{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 8228, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00024307243558580456, "grad_norm": 1.4945179703455433, "learning_rate": 0.0, "loss": 1.1829731464385986, "step": 1 }, { "epoch": 0.0004861448711716091, "grad_norm": 1.3005812735084505, "learning_rate": 4.854368932038835e-08, "loss": 1.224231481552124, "step": 2 }, { "epoch": 0.0007292173067574137, "grad_norm": 1.344991425755796, "learning_rate": 9.70873786407767e-08, "loss": 1.1904892921447754, "step": 3 }, { "epoch": 0.0009722897423432182, "grad_norm": 1.4628892356191403, "learning_rate": 1.4563106796116507e-07, "loss": 1.2003729343414307, "step": 4 }, { "epoch": 0.0012153621779290229, "grad_norm": 1.4746255643784159, "learning_rate": 1.941747572815534e-07, "loss": 1.1297242641448975, "step": 5 }, { "epoch": 0.0014584346135148275, "grad_norm": 1.49149378743173, "learning_rate": 2.4271844660194176e-07, "loss": 1.116420030593872, "step": 6 }, { "epoch": 0.001701507049100632, "grad_norm": 1.448208255805919, "learning_rate": 2.9126213592233014e-07, "loss": 1.2470757961273193, "step": 7 }, { "epoch": 0.0019445794846864365, "grad_norm": 1.212579912153243, "learning_rate": 3.398058252427185e-07, "loss": 1.2697489261627197, "step": 8 }, { "epoch": 0.0021876519202722413, "grad_norm": 1.189455733579834, "learning_rate": 3.883495145631068e-07, "loss": 1.053025484085083, "step": 9 }, { "epoch": 0.0024307243558580457, "grad_norm": 1.2228815779140838, "learning_rate": 4.368932038834952e-07, "loss": 1.101393461227417, "step": 10 }, { "epoch": 0.00267379679144385, "grad_norm": 1.22261881424171, "learning_rate": 4.854368932038835e-07, "loss": 1.1143680810928345, "step": 11 }, { "epoch": 0.002916869227029655, "grad_norm": 1.4353173827679346, "learning_rate": 5.339805825242719e-07, "loss": 1.2657065391540527, "step": 12 }, { "epoch": 0.0031599416626154593, "grad_norm": 1.3924444517070427, "learning_rate": 5.825242718446603e-07, "loss": 1.1039724349975586, "step": 13 }, { "epoch": 0.003403014098201264, "grad_norm": 1.5153889005422245, "learning_rate": 6.310679611650486e-07, "loss": 1.2360996007919312, "step": 14 }, { "epoch": 0.0036460865337870686, "grad_norm": 1.362619834424756, "learning_rate": 6.79611650485437e-07, "loss": 1.2099463939666748, "step": 15 }, { "epoch": 0.003889158969372873, "grad_norm": 1.3807535631342016, "learning_rate": 7.281553398058253e-07, "loss": 1.2920161485671997, "step": 16 }, { "epoch": 0.004132231404958678, "grad_norm": 1.3335851598824386, "learning_rate": 7.766990291262136e-07, "loss": 1.1319366693496704, "step": 17 }, { "epoch": 0.004375303840544483, "grad_norm": 1.187300721446578, "learning_rate": 8.25242718446602e-07, "loss": 1.0261366367340088, "step": 18 }, { "epoch": 0.004618376276130287, "grad_norm": 1.4170389289251408, "learning_rate": 8.737864077669904e-07, "loss": 1.1337122917175293, "step": 19 }, { "epoch": 0.004861448711716091, "grad_norm": 1.5956799002440687, "learning_rate": 9.223300970873787e-07, "loss": 1.139801025390625, "step": 20 }, { "epoch": 0.005104521147301896, "grad_norm": 1.3542723342599645, "learning_rate": 9.70873786407767e-07, "loss": 1.2212281227111816, "step": 21 }, { "epoch": 0.0053475935828877, "grad_norm": 1.34488539588145, "learning_rate": 1.0194174757281554e-06, "loss": 1.0724375247955322, "step": 22 }, { "epoch": 0.005590666018473505, "grad_norm": 1.4475926208238636, "learning_rate": 1.0679611650485437e-06, "loss": 1.1120295524597168, "step": 23 }, { "epoch": 0.00583373845405931, "grad_norm": 1.770121019868134, "learning_rate": 1.1165048543689322e-06, "loss": 1.160451889038086, "step": 24 }, { "epoch": 0.006076810889645114, "grad_norm": 1.4551387510184235, "learning_rate": 1.1650485436893206e-06, "loss": 1.2004809379577637, "step": 25 }, { "epoch": 0.006319883325230919, "grad_norm": 1.2646975221510282, "learning_rate": 1.213592233009709e-06, "loss": 1.1485308408737183, "step": 26 }, { "epoch": 0.0065629557608167235, "grad_norm": 1.1603542983360726, "learning_rate": 1.2621359223300972e-06, "loss": 1.1057801246643066, "step": 27 }, { "epoch": 0.006806028196402528, "grad_norm": 1.493085696168619, "learning_rate": 1.3106796116504856e-06, "loss": 1.3008511066436768, "step": 28 }, { "epoch": 0.007049100631988332, "grad_norm": 1.152017247784078, "learning_rate": 1.359223300970874e-06, "loss": 1.1857595443725586, "step": 29 }, { "epoch": 0.007292173067574137, "grad_norm": 1.4327611237739384, "learning_rate": 1.4077669902912622e-06, "loss": 1.0703740119934082, "step": 30 }, { "epoch": 0.007535245503159942, "grad_norm": 1.6091785524307607, "learning_rate": 1.4563106796116506e-06, "loss": 1.1891390085220337, "step": 31 }, { "epoch": 0.007778317938745746, "grad_norm": 1.4338690682976611, "learning_rate": 1.5048543689320389e-06, "loss": 1.1940207481384277, "step": 32 }, { "epoch": 0.008021390374331552, "grad_norm": 1.5016182334913741, "learning_rate": 1.5533980582524272e-06, "loss": 1.2921146154403687, "step": 33 }, { "epoch": 0.008264462809917356, "grad_norm": 1.3256840046178118, "learning_rate": 1.6019417475728158e-06, "loss": 1.122525691986084, "step": 34 }, { "epoch": 0.00850753524550316, "grad_norm": 1.6464766566616056, "learning_rate": 1.650485436893204e-06, "loss": 1.3157933950424194, "step": 35 }, { "epoch": 0.008750607681088965, "grad_norm": 1.4924986234432271, "learning_rate": 1.6990291262135924e-06, "loss": 1.0907602310180664, "step": 36 }, { "epoch": 0.00899368011667477, "grad_norm": 1.1806543997199919, "learning_rate": 1.7475728155339808e-06, "loss": 1.128176212310791, "step": 37 }, { "epoch": 0.009236752552260573, "grad_norm": 1.5136974610117053, "learning_rate": 1.796116504854369e-06, "loss": 1.1955599784851074, "step": 38 }, { "epoch": 0.009479824987846379, "grad_norm": 1.5358381314436964, "learning_rate": 1.8446601941747574e-06, "loss": 1.1773334741592407, "step": 39 }, { "epoch": 0.009722897423432183, "grad_norm": 1.2620255541697332, "learning_rate": 1.8932038834951458e-06, "loss": 1.2818598747253418, "step": 40 }, { "epoch": 0.009965969859017987, "grad_norm": 1.2687667154541724, "learning_rate": 1.941747572815534e-06, "loss": 1.142372965812683, "step": 41 }, { "epoch": 0.010209042294603793, "grad_norm": 1.3178562249821943, "learning_rate": 1.9902912621359226e-06, "loss": 1.0289628505706787, "step": 42 }, { "epoch": 0.010452114730189596, "grad_norm": 1.3431760075866757, "learning_rate": 2.0388349514563107e-06, "loss": 1.202725887298584, "step": 43 }, { "epoch": 0.0106951871657754, "grad_norm": 1.358673112547129, "learning_rate": 2.0873786407766993e-06, "loss": 1.0584251880645752, "step": 44 }, { "epoch": 0.010938259601361206, "grad_norm": 1.3075890863450201, "learning_rate": 2.1359223300970874e-06, "loss": 1.035726547241211, "step": 45 }, { "epoch": 0.01118133203694701, "grad_norm": 1.350873272215216, "learning_rate": 2.184466019417476e-06, "loss": 1.2708501815795898, "step": 46 }, { "epoch": 0.011424404472532814, "grad_norm": 1.0886670559656044, "learning_rate": 2.2330097087378645e-06, "loss": 0.9220836758613586, "step": 47 }, { "epoch": 0.01166747690811862, "grad_norm": 1.3220416798498007, "learning_rate": 2.2815533980582526e-06, "loss": 1.1464056968688965, "step": 48 }, { "epoch": 0.011910549343704424, "grad_norm": 1.3427628586722355, "learning_rate": 2.330097087378641e-06, "loss": 1.120340347290039, "step": 49 }, { "epoch": 0.012153621779290228, "grad_norm": 1.2365948889380012, "learning_rate": 2.3786407766990293e-06, "loss": 1.0211321115493774, "step": 50 }, { "epoch": 0.012396694214876033, "grad_norm": 1.3164681857642004, "learning_rate": 2.427184466019418e-06, "loss": 1.046913981437683, "step": 51 }, { "epoch": 0.012639766650461837, "grad_norm": 1.2045106500987024, "learning_rate": 2.475728155339806e-06, "loss": 0.9406787157058716, "step": 52 }, { "epoch": 0.012882839086047641, "grad_norm": 1.156000472544257, "learning_rate": 2.5242718446601945e-06, "loss": 0.7364840507507324, "step": 53 }, { "epoch": 0.013125911521633447, "grad_norm": 1.2445396945922922, "learning_rate": 2.5728155339805826e-06, "loss": 1.1726189851760864, "step": 54 }, { "epoch": 0.013368983957219251, "grad_norm": 1.1498650954000842, "learning_rate": 2.621359223300971e-06, "loss": 0.803581714630127, "step": 55 }, { "epoch": 0.013612056392805057, "grad_norm": 1.1723514595611066, "learning_rate": 2.6699029126213593e-06, "loss": 1.0402333736419678, "step": 56 }, { "epoch": 0.01385512882839086, "grad_norm": 1.0677482082182768, "learning_rate": 2.718446601941748e-06, "loss": 1.062371015548706, "step": 57 }, { "epoch": 0.014098201263976665, "grad_norm": 1.2328273501801228, "learning_rate": 2.766990291262136e-06, "loss": 1.005713939666748, "step": 58 }, { "epoch": 0.01434127369956247, "grad_norm": 1.0195916984780928, "learning_rate": 2.8155339805825245e-06, "loss": 0.955734372138977, "step": 59 }, { "epoch": 0.014584346135148274, "grad_norm": 1.168589325391793, "learning_rate": 2.8640776699029126e-06, "loss": 1.0755832195281982, "step": 60 }, { "epoch": 0.014827418570734078, "grad_norm": 0.9411763236219243, "learning_rate": 2.912621359223301e-06, "loss": 0.9812177419662476, "step": 61 }, { "epoch": 0.015070491006319884, "grad_norm": 1.2942198579969975, "learning_rate": 2.9611650485436892e-06, "loss": 1.1038448810577393, "step": 62 }, { "epoch": 0.015313563441905688, "grad_norm": 1.1240087675005523, "learning_rate": 3.0097087378640778e-06, "loss": 1.0442405939102173, "step": 63 }, { "epoch": 0.015556635877491492, "grad_norm": 0.998922018456209, "learning_rate": 3.058252427184466e-06, "loss": 0.9702444672584534, "step": 64 }, { "epoch": 0.015799708313077296, "grad_norm": 1.0234653743044662, "learning_rate": 3.1067961165048544e-06, "loss": 0.7638298273086548, "step": 65 }, { "epoch": 0.016042780748663103, "grad_norm": 0.9895775708011185, "learning_rate": 3.1553398058252434e-06, "loss": 1.0299046039581299, "step": 66 }, { "epoch": 0.016285853184248907, "grad_norm": 1.1618329391398912, "learning_rate": 3.2038834951456315e-06, "loss": 1.1800503730773926, "step": 67 }, { "epoch": 0.01652892561983471, "grad_norm": 0.9919154648057328, "learning_rate": 3.25242718446602e-06, "loss": 1.08077871799469, "step": 68 }, { "epoch": 0.016771998055420515, "grad_norm": 1.117735859892588, "learning_rate": 3.300970873786408e-06, "loss": 0.8943830728530884, "step": 69 }, { "epoch": 0.01701507049100632, "grad_norm": 1.0739227876883284, "learning_rate": 3.3495145631067967e-06, "loss": 1.0423531532287598, "step": 70 }, { "epoch": 0.017258142926592123, "grad_norm": 0.890662678626169, "learning_rate": 3.398058252427185e-06, "loss": 0.9554758071899414, "step": 71 }, { "epoch": 0.01750121536217793, "grad_norm": 0.8787334230910545, "learning_rate": 3.4466019417475734e-06, "loss": 0.9003157615661621, "step": 72 }, { "epoch": 0.017744287797763734, "grad_norm": 1.008104304563339, "learning_rate": 3.4951456310679615e-06, "loss": 0.8566453456878662, "step": 73 }, { "epoch": 0.01798736023334954, "grad_norm": 1.0522176909787149, "learning_rate": 3.54368932038835e-06, "loss": 0.9483290314674377, "step": 74 }, { "epoch": 0.018230432668935342, "grad_norm": 0.9765534149095073, "learning_rate": 3.592233009708738e-06, "loss": 0.9985085129737854, "step": 75 }, { "epoch": 0.018473505104521146, "grad_norm": 0.9225202711091245, "learning_rate": 3.6407766990291267e-06, "loss": 0.8131860494613647, "step": 76 }, { "epoch": 0.01871657754010695, "grad_norm": 0.8920536577893772, "learning_rate": 3.689320388349515e-06, "loss": 0.9511319398880005, "step": 77 }, { "epoch": 0.018959649975692758, "grad_norm": 0.8749922521205141, "learning_rate": 3.7378640776699034e-06, "loss": 0.9576234817504883, "step": 78 }, { "epoch": 0.019202722411278562, "grad_norm": 0.8509214817370616, "learning_rate": 3.7864077669902915e-06, "loss": 0.8994898200035095, "step": 79 }, { "epoch": 0.019445794846864366, "grad_norm": 1.0529364077315602, "learning_rate": 3.83495145631068e-06, "loss": 0.9690864682197571, "step": 80 }, { "epoch": 0.01968886728245017, "grad_norm": 0.9057857331846272, "learning_rate": 3.883495145631068e-06, "loss": 1.0193897485733032, "step": 81 }, { "epoch": 0.019931939718035974, "grad_norm": 0.8973177851815463, "learning_rate": 3.932038834951457e-06, "loss": 0.7184461355209351, "step": 82 }, { "epoch": 0.020175012153621778, "grad_norm": 0.9089041906798602, "learning_rate": 3.980582524271845e-06, "loss": 0.9568495154380798, "step": 83 }, { "epoch": 0.020418084589207585, "grad_norm": 0.8944099490514492, "learning_rate": 4.029126213592233e-06, "loss": 0.8547254800796509, "step": 84 }, { "epoch": 0.02066115702479339, "grad_norm": 0.9516945765243496, "learning_rate": 4.0776699029126215e-06, "loss": 0.8345698118209839, "step": 85 }, { "epoch": 0.020904229460379193, "grad_norm": 0.9425924374319592, "learning_rate": 4.12621359223301e-06, "loss": 0.9776610136032104, "step": 86 }, { "epoch": 0.021147301895964997, "grad_norm": 1.0812856862474052, "learning_rate": 4.1747572815533986e-06, "loss": 0.9984065294265747, "step": 87 }, { "epoch": 0.0213903743315508, "grad_norm": 1.114980055471667, "learning_rate": 4.223300970873786e-06, "loss": 1.1613221168518066, "step": 88 }, { "epoch": 0.02163344676713661, "grad_norm": 1.0369411015148666, "learning_rate": 4.271844660194175e-06, "loss": 0.9812633991241455, "step": 89 }, { "epoch": 0.021876519202722412, "grad_norm": 1.1615522393311088, "learning_rate": 4.320388349514563e-06, "loss": 0.8968325257301331, "step": 90 }, { "epoch": 0.022119591638308216, "grad_norm": 0.9691495424767824, "learning_rate": 4.368932038834952e-06, "loss": 0.9509971737861633, "step": 91 }, { "epoch": 0.02236266407389402, "grad_norm": 0.879226210794504, "learning_rate": 4.4174757281553404e-06, "loss": 0.8027092218399048, "step": 92 }, { "epoch": 0.022605736509479824, "grad_norm": 0.866498467628097, "learning_rate": 4.466019417475729e-06, "loss": 0.9532610177993774, "step": 93 }, { "epoch": 0.022848808945065628, "grad_norm": 0.9589854725489302, "learning_rate": 4.514563106796117e-06, "loss": 0.8471971750259399, "step": 94 }, { "epoch": 0.023091881380651436, "grad_norm": 0.9365527362739762, "learning_rate": 4.563106796116505e-06, "loss": 0.8442251086235046, "step": 95 }, { "epoch": 0.02333495381623724, "grad_norm": 1.3744600154599003, "learning_rate": 4.611650485436894e-06, "loss": 1.2609193325042725, "step": 96 }, { "epoch": 0.023578026251823044, "grad_norm": 1.0360456489379062, "learning_rate": 4.660194174757282e-06, "loss": 1.0048513412475586, "step": 97 }, { "epoch": 0.023821098687408847, "grad_norm": 1.1925092963661712, "learning_rate": 4.70873786407767e-06, "loss": 0.9697086811065674, "step": 98 }, { "epoch": 0.02406417112299465, "grad_norm": 1.0044607758427717, "learning_rate": 4.7572815533980585e-06, "loss": 0.8832486271858215, "step": 99 }, { "epoch": 0.024307243558580455, "grad_norm": 0.9956129998451999, "learning_rate": 4.805825242718447e-06, "loss": 0.9490481615066528, "step": 100 }, { "epoch": 0.024550315994166263, "grad_norm": 0.9807577260105411, "learning_rate": 4.854368932038836e-06, "loss": 0.9071600437164307, "step": 101 }, { "epoch": 0.024793388429752067, "grad_norm": 1.0752847338123173, "learning_rate": 4.902912621359223e-06, "loss": 1.0347933769226074, "step": 102 }, { "epoch": 0.02503646086533787, "grad_norm": 1.1570576949100315, "learning_rate": 4.951456310679612e-06, "loss": 1.0358091592788696, "step": 103 }, { "epoch": 0.025279533300923675, "grad_norm": 0.895638031989607, "learning_rate": 5e-06, "loss": 1.0516788959503174, "step": 104 }, { "epoch": 0.02552260573650948, "grad_norm": 1.0990255176384425, "learning_rate": 5.048543689320389e-06, "loss": 0.9298542737960815, "step": 105 }, { "epoch": 0.025765678172095283, "grad_norm": 1.1049546100389447, "learning_rate": 5.0970873786407775e-06, "loss": 1.1358405351638794, "step": 106 }, { "epoch": 0.02600875060768109, "grad_norm": 0.9368095334701799, "learning_rate": 5.145631067961165e-06, "loss": 0.8658401966094971, "step": 107 }, { "epoch": 0.026251823043266894, "grad_norm": 0.8986461559493466, "learning_rate": 5.194174757281554e-06, "loss": 1.0139635801315308, "step": 108 }, { "epoch": 0.026494895478852698, "grad_norm": 1.0514324598945364, "learning_rate": 5.242718446601942e-06, "loss": 0.8422435522079468, "step": 109 }, { "epoch": 0.026737967914438502, "grad_norm": 1.2333925304602789, "learning_rate": 5.291262135922331e-06, "loss": 0.8926314115524292, "step": 110 }, { "epoch": 0.026981040350024306, "grad_norm": 1.1055211789875905, "learning_rate": 5.3398058252427185e-06, "loss": 1.087392807006836, "step": 111 }, { "epoch": 0.027224112785610113, "grad_norm": 0.9648753262016097, "learning_rate": 5.388349514563107e-06, "loss": 0.9406963586807251, "step": 112 }, { "epoch": 0.027467185221195917, "grad_norm": 1.028952690183109, "learning_rate": 5.436893203883496e-06, "loss": 0.826431155204773, "step": 113 }, { "epoch": 0.02771025765678172, "grad_norm": 1.1684516618412641, "learning_rate": 5.485436893203884e-06, "loss": 0.9458146095275879, "step": 114 }, { "epoch": 0.027953330092367525, "grad_norm": 1.0674933649808702, "learning_rate": 5.533980582524272e-06, "loss": 0.8754380345344543, "step": 115 }, { "epoch": 0.02819640252795333, "grad_norm": 1.0436615757975236, "learning_rate": 5.58252427184466e-06, "loss": 1.037347435951233, "step": 116 }, { "epoch": 0.028439474963539133, "grad_norm": 0.969605685610838, "learning_rate": 5.631067961165049e-06, "loss": 0.9354009032249451, "step": 117 }, { "epoch": 0.02868254739912494, "grad_norm": 0.8973669770273304, "learning_rate": 5.6796116504854375e-06, "loss": 0.8921153545379639, "step": 118 }, { "epoch": 0.028925619834710745, "grad_norm": 1.121684789035623, "learning_rate": 5.728155339805825e-06, "loss": 0.9506028294563293, "step": 119 }, { "epoch": 0.02916869227029655, "grad_norm": 1.072308854070988, "learning_rate": 5.776699029126214e-06, "loss": 0.9824037551879883, "step": 120 }, { "epoch": 0.029411764705882353, "grad_norm": 1.00129346655751, "learning_rate": 5.825242718446602e-06, "loss": 0.9732852578163147, "step": 121 }, { "epoch": 0.029654837141468156, "grad_norm": 1.090440479545138, "learning_rate": 5.873786407766991e-06, "loss": 0.9663233757019043, "step": 122 }, { "epoch": 0.02989790957705396, "grad_norm": 1.0148514518696274, "learning_rate": 5.9223300970873785e-06, "loss": 0.8841441869735718, "step": 123 }, { "epoch": 0.030140982012639768, "grad_norm": 1.0776214690222068, "learning_rate": 5.970873786407767e-06, "loss": 0.8916454315185547, "step": 124 }, { "epoch": 0.030384054448225572, "grad_norm": 1.0816106061056552, "learning_rate": 6.0194174757281556e-06, "loss": 0.7443137764930725, "step": 125 }, { "epoch": 0.030627126883811376, "grad_norm": 0.9720493842281391, "learning_rate": 6.067961165048544e-06, "loss": 0.9374991655349731, "step": 126 }, { "epoch": 0.03087019931939718, "grad_norm": 1.0800076084276362, "learning_rate": 6.116504854368932e-06, "loss": 0.9155739545822144, "step": 127 }, { "epoch": 0.031113271754982984, "grad_norm": 0.949349033988199, "learning_rate": 6.16504854368932e-06, "loss": 1.0265107154846191, "step": 128 }, { "epoch": 0.03135634419056879, "grad_norm": 1.3704433186198721, "learning_rate": 6.213592233009709e-06, "loss": 1.228090763092041, "step": 129 }, { "epoch": 0.03159941662615459, "grad_norm": 0.897589482153592, "learning_rate": 6.262135922330098e-06, "loss": 0.7131045460700989, "step": 130 }, { "epoch": 0.0318424890617404, "grad_norm": 1.0015279877373742, "learning_rate": 6.310679611650487e-06, "loss": 0.8757247924804688, "step": 131 }, { "epoch": 0.03208556149732621, "grad_norm": 1.2546952015533315, "learning_rate": 6.3592233009708745e-06, "loss": 0.8684989213943481, "step": 132 }, { "epoch": 0.03232863393291201, "grad_norm": 1.045266501509933, "learning_rate": 6.407766990291263e-06, "loss": 1.0263044834136963, "step": 133 }, { "epoch": 0.032571706368497814, "grad_norm": 1.0947630128338088, "learning_rate": 6.456310679611652e-06, "loss": 0.8954870700836182, "step": 134 }, { "epoch": 0.032814778804083615, "grad_norm": 1.2014860763033854, "learning_rate": 6.50485436893204e-06, "loss": 1.0375757217407227, "step": 135 }, { "epoch": 0.03305785123966942, "grad_norm": 1.029494925643112, "learning_rate": 6.553398058252428e-06, "loss": 0.924638032913208, "step": 136 }, { "epoch": 0.03330092367525522, "grad_norm": 1.1314773066771278, "learning_rate": 6.601941747572816e-06, "loss": 0.799332320690155, "step": 137 }, { "epoch": 0.03354399611084103, "grad_norm": 1.0849706287507974, "learning_rate": 6.650485436893205e-06, "loss": 1.1166467666625977, "step": 138 }, { "epoch": 0.03378706854642684, "grad_norm": 1.0746298535944523, "learning_rate": 6.6990291262135935e-06, "loss": 0.7291650176048279, "step": 139 }, { "epoch": 0.03403014098201264, "grad_norm": 1.0870696991546844, "learning_rate": 6.747572815533981e-06, "loss": 0.939744234085083, "step": 140 }, { "epoch": 0.034273213417598446, "grad_norm": 1.0985371261357713, "learning_rate": 6.79611650485437e-06, "loss": 0.9719498157501221, "step": 141 }, { "epoch": 0.034516285853184246, "grad_norm": 1.0276788149883902, "learning_rate": 6.844660194174758e-06, "loss": 0.9445546865463257, "step": 142 }, { "epoch": 0.034759358288770054, "grad_norm": 0.9328713125670449, "learning_rate": 6.893203883495147e-06, "loss": 0.6778570413589478, "step": 143 }, { "epoch": 0.03500243072435586, "grad_norm": 1.0978316755038868, "learning_rate": 6.9417475728155345e-06, "loss": 0.9000784158706665, "step": 144 }, { "epoch": 0.03524550315994166, "grad_norm": 0.9986590481541544, "learning_rate": 6.990291262135923e-06, "loss": 0.7730683088302612, "step": 145 }, { "epoch": 0.03548857559552747, "grad_norm": 1.159677147805435, "learning_rate": 7.0388349514563116e-06, "loss": 0.835922122001648, "step": 146 }, { "epoch": 0.03573164803111327, "grad_norm": 1.0565059873494331, "learning_rate": 7.0873786407767e-06, "loss": 0.9465033411979675, "step": 147 }, { "epoch": 0.03597472046669908, "grad_norm": 0.9415047697140807, "learning_rate": 7.135922330097088e-06, "loss": 0.8419917821884155, "step": 148 }, { "epoch": 0.036217792902284884, "grad_norm": 1.1381538442466994, "learning_rate": 7.184466019417476e-06, "loss": 0.8304937481880188, "step": 149 }, { "epoch": 0.036460865337870685, "grad_norm": 1.0168684581758485, "learning_rate": 7.233009708737865e-06, "loss": 0.8216912746429443, "step": 150 }, { "epoch": 0.03670393777345649, "grad_norm": 1.128815343789225, "learning_rate": 7.2815533980582534e-06, "loss": 0.942093014717102, "step": 151 }, { "epoch": 0.03694701020904229, "grad_norm": 1.1865493110260037, "learning_rate": 7.330097087378641e-06, "loss": 0.9196211695671082, "step": 152 }, { "epoch": 0.0371900826446281, "grad_norm": 1.1102937857601045, "learning_rate": 7.37864077669903e-06, "loss": 0.9383392333984375, "step": 153 }, { "epoch": 0.0374331550802139, "grad_norm": 1.3552076171490928, "learning_rate": 7.427184466019418e-06, "loss": 0.976478099822998, "step": 154 }, { "epoch": 0.03767622751579971, "grad_norm": 1.061989915897806, "learning_rate": 7.475728155339807e-06, "loss": 0.7817899584770203, "step": 155 }, { "epoch": 0.037919299951385516, "grad_norm": 0.9475682602854878, "learning_rate": 7.5242718446601945e-06, "loss": 0.8938120603561401, "step": 156 }, { "epoch": 0.038162372386971316, "grad_norm": 1.0556192852925774, "learning_rate": 7.572815533980583e-06, "loss": 1.0537114143371582, "step": 157 }, { "epoch": 0.038405444822557123, "grad_norm": 1.2909957441817104, "learning_rate": 7.6213592233009715e-06, "loss": 0.9488391876220703, "step": 158 }, { "epoch": 0.038648517258142924, "grad_norm": 1.1560063923515804, "learning_rate": 7.66990291262136e-06, "loss": 0.8627606630325317, "step": 159 }, { "epoch": 0.03889158969372873, "grad_norm": 1.1405002337538006, "learning_rate": 7.718446601941748e-06, "loss": 0.7222731113433838, "step": 160 }, { "epoch": 0.03913466212931454, "grad_norm": 1.1119018338776998, "learning_rate": 7.766990291262136e-06, "loss": 0.8468010425567627, "step": 161 }, { "epoch": 0.03937773456490034, "grad_norm": 1.0148372822738254, "learning_rate": 7.815533980582525e-06, "loss": 0.8182731866836548, "step": 162 }, { "epoch": 0.03962080700048615, "grad_norm": 1.0118831637104708, "learning_rate": 7.864077669902913e-06, "loss": 0.886298656463623, "step": 163 }, { "epoch": 0.03986387943607195, "grad_norm": 1.2486335579605516, "learning_rate": 7.912621359223302e-06, "loss": 0.809931755065918, "step": 164 }, { "epoch": 0.040106951871657755, "grad_norm": 1.115888047023517, "learning_rate": 7.96116504854369e-06, "loss": 0.784578800201416, "step": 165 }, { "epoch": 0.040350024307243555, "grad_norm": 1.2461358082194267, "learning_rate": 8.009708737864077e-06, "loss": 0.9750135540962219, "step": 166 }, { "epoch": 0.04059309674282936, "grad_norm": 1.1129203199268616, "learning_rate": 8.058252427184466e-06, "loss": 0.8382435441017151, "step": 167 }, { "epoch": 0.04083616917841517, "grad_norm": 1.055610650030913, "learning_rate": 8.106796116504854e-06, "loss": 0.8109841346740723, "step": 168 }, { "epoch": 0.04107924161400097, "grad_norm": 1.191682142198999, "learning_rate": 8.155339805825243e-06, "loss": 0.8274809122085571, "step": 169 }, { "epoch": 0.04132231404958678, "grad_norm": 1.4044317869625467, "learning_rate": 8.203883495145632e-06, "loss": 0.9143906831741333, "step": 170 }, { "epoch": 0.04156538648517258, "grad_norm": 1.0837078639516509, "learning_rate": 8.25242718446602e-06, "loss": 0.8184795379638672, "step": 171 }, { "epoch": 0.041808458920758386, "grad_norm": 1.015872797802403, "learning_rate": 8.300970873786409e-06, "loss": 0.7018345594406128, "step": 172 }, { "epoch": 0.04205153135634419, "grad_norm": 1.056134794231764, "learning_rate": 8.349514563106797e-06, "loss": 0.7307000160217285, "step": 173 }, { "epoch": 0.042294603791929994, "grad_norm": 1.2169659912394082, "learning_rate": 8.398058252427184e-06, "loss": 0.9226215481758118, "step": 174 }, { "epoch": 0.0425376762275158, "grad_norm": 1.2445400539131395, "learning_rate": 8.446601941747573e-06, "loss": 0.8784397840499878, "step": 175 }, { "epoch": 0.0427807486631016, "grad_norm": 1.099362923185479, "learning_rate": 8.495145631067961e-06, "loss": 0.7663447856903076, "step": 176 }, { "epoch": 0.04302382109868741, "grad_norm": 1.1595849805617604, "learning_rate": 8.54368932038835e-06, "loss": 0.6555088758468628, "step": 177 }, { "epoch": 0.04326689353427322, "grad_norm": 1.205371169125159, "learning_rate": 8.592233009708738e-06, "loss": 0.7448751926422119, "step": 178 }, { "epoch": 0.04350996596985902, "grad_norm": 1.3748762181378134, "learning_rate": 8.640776699029127e-06, "loss": 0.8839258551597595, "step": 179 }, { "epoch": 0.043753038405444825, "grad_norm": 1.2551214719180324, "learning_rate": 8.689320388349515e-06, "loss": 0.9498693943023682, "step": 180 }, { "epoch": 0.043996110841030625, "grad_norm": 1.3075622273496146, "learning_rate": 8.737864077669904e-06, "loss": 0.9740331172943115, "step": 181 }, { "epoch": 0.04423918327661643, "grad_norm": 1.1762144661285512, "learning_rate": 8.786407766990292e-06, "loss": 0.9271747469902039, "step": 182 }, { "epoch": 0.04448225571220223, "grad_norm": 1.2275984122786294, "learning_rate": 8.834951456310681e-06, "loss": 0.7130942940711975, "step": 183 }, { "epoch": 0.04472532814778804, "grad_norm": 1.4380895368701607, "learning_rate": 8.88349514563107e-06, "loss": 0.800206184387207, "step": 184 }, { "epoch": 0.04496840058337385, "grad_norm": 1.0533449197387257, "learning_rate": 8.932038834951458e-06, "loss": 0.5782434344291687, "step": 185 }, { "epoch": 0.04521147301895965, "grad_norm": 1.1294790190203667, "learning_rate": 8.980582524271847e-06, "loss": 0.7924538850784302, "step": 186 }, { "epoch": 0.045454545454545456, "grad_norm": 1.1911904854069384, "learning_rate": 9.029126213592233e-06, "loss": 0.8357741236686707, "step": 187 }, { "epoch": 0.045697617890131256, "grad_norm": 1.207303710922082, "learning_rate": 9.077669902912622e-06, "loss": 0.9291259050369263, "step": 188 }, { "epoch": 0.045940690325717064, "grad_norm": 1.1155792981673942, "learning_rate": 9.12621359223301e-06, "loss": 0.8548524379730225, "step": 189 }, { "epoch": 0.04618376276130287, "grad_norm": 1.23722706959497, "learning_rate": 9.174757281553399e-06, "loss": 0.8148719668388367, "step": 190 }, { "epoch": 0.04642683519688867, "grad_norm": 1.1760195296794045, "learning_rate": 9.223300970873788e-06, "loss": 0.8400400876998901, "step": 191 }, { "epoch": 0.04666990763247448, "grad_norm": 1.194347557676156, "learning_rate": 9.271844660194176e-06, "loss": 0.816845715045929, "step": 192 }, { "epoch": 0.04691298006806028, "grad_norm": 1.4585456715848628, "learning_rate": 9.320388349514565e-06, "loss": 0.8348129391670227, "step": 193 }, { "epoch": 0.04715605250364609, "grad_norm": 1.2498286910549352, "learning_rate": 9.368932038834953e-06, "loss": 0.7381201982498169, "step": 194 }, { "epoch": 0.047399124939231894, "grad_norm": 1.5462101465082645, "learning_rate": 9.41747572815534e-06, "loss": 1.0396337509155273, "step": 195 }, { "epoch": 0.047642197374817695, "grad_norm": 1.068476568768427, "learning_rate": 9.466019417475729e-06, "loss": 0.7295995354652405, "step": 196 }, { "epoch": 0.0478852698104035, "grad_norm": 1.3653738844274566, "learning_rate": 9.514563106796117e-06, "loss": 0.9926124215126038, "step": 197 }, { "epoch": 0.0481283422459893, "grad_norm": 1.2812540526110088, "learning_rate": 9.563106796116506e-06, "loss": 0.9588726758956909, "step": 198 }, { "epoch": 0.04837141468157511, "grad_norm": 1.2538134630501816, "learning_rate": 9.611650485436894e-06, "loss": 0.959867537021637, "step": 199 }, { "epoch": 0.04861448711716091, "grad_norm": 1.3289870005176543, "learning_rate": 9.660194174757283e-06, "loss": 0.8349640369415283, "step": 200 }, { "epoch": 0.04885755955274672, "grad_norm": 1.302895422434068, "learning_rate": 9.708737864077671e-06, "loss": 0.7360700964927673, "step": 201 }, { "epoch": 0.049100631988332526, "grad_norm": 1.215556622580874, "learning_rate": 9.75728155339806e-06, "loss": 0.9024494886398315, "step": 202 }, { "epoch": 0.049343704423918326, "grad_norm": 2.4471418375020653, "learning_rate": 9.805825242718447e-06, "loss": 0.9280184507369995, "step": 203 }, { "epoch": 0.049586776859504134, "grad_norm": 1.4374029141037317, "learning_rate": 9.854368932038835e-06, "loss": 0.6851783990859985, "step": 204 }, { "epoch": 0.049829849295089934, "grad_norm": 1.252155782730683, "learning_rate": 9.902912621359224e-06, "loss": 0.8498727083206177, "step": 205 }, { "epoch": 0.05007292173067574, "grad_norm": 1.150117221890676, "learning_rate": 9.951456310679612e-06, "loss": 0.8200715780258179, "step": 206 }, { "epoch": 0.05031599416626155, "grad_norm": 1.2484202723986133, "learning_rate": 1e-05, "loss": 0.9752775430679321, "step": 207 }, { "epoch": 0.05055906660184735, "grad_norm": 1.5666376941429163, "learning_rate": 1.004854368932039e-05, "loss": 0.8442133069038391, "step": 208 }, { "epoch": 0.05080213903743316, "grad_norm": 1.2154126379654808, "learning_rate": 1.0097087378640778e-05, "loss": 1.0954525470733643, "step": 209 }, { "epoch": 0.05104521147301896, "grad_norm": 1.038085616955461, "learning_rate": 1.0145631067961166e-05, "loss": 0.7235355377197266, "step": 210 }, { "epoch": 0.051288283908604765, "grad_norm": 1.2572201994007612, "learning_rate": 1.0194174757281555e-05, "loss": 0.7975475788116455, "step": 211 }, { "epoch": 0.051531356344190565, "grad_norm": 1.2044284314308495, "learning_rate": 1.0242718446601942e-05, "loss": 0.741875410079956, "step": 212 }, { "epoch": 0.05177442877977637, "grad_norm": 1.5122974710480432, "learning_rate": 1.029126213592233e-05, "loss": 1.0505986213684082, "step": 213 }, { "epoch": 0.05201750121536218, "grad_norm": 1.2128675353904717, "learning_rate": 1.0339805825242719e-05, "loss": 0.7253590822219849, "step": 214 }, { "epoch": 0.05226057365094798, "grad_norm": 1.605841081281062, "learning_rate": 1.0388349514563107e-05, "loss": 1.165531873703003, "step": 215 }, { "epoch": 0.05250364608653379, "grad_norm": 1.4322266496975906, "learning_rate": 1.0436893203883496e-05, "loss": 0.9187163710594177, "step": 216 }, { "epoch": 0.05274671852211959, "grad_norm": 1.1926305140386333, "learning_rate": 1.0485436893203885e-05, "loss": 0.7790827751159668, "step": 217 }, { "epoch": 0.052989790957705396, "grad_norm": 1.2575631106403362, "learning_rate": 1.0533980582524273e-05, "loss": 0.8605998754501343, "step": 218 }, { "epoch": 0.053232863393291203, "grad_norm": 1.1127804488283586, "learning_rate": 1.0582524271844662e-05, "loss": 0.6373496055603027, "step": 219 }, { "epoch": 0.053475935828877004, "grad_norm": 1.2153934984062484, "learning_rate": 1.0631067961165048e-05, "loss": 0.8142063617706299, "step": 220 }, { "epoch": 0.05371900826446281, "grad_norm": 1.2405755748235623, "learning_rate": 1.0679611650485437e-05, "loss": 0.8347150683403015, "step": 221 }, { "epoch": 0.05396208070004861, "grad_norm": 1.1265370255262368, "learning_rate": 1.0728155339805826e-05, "loss": 0.7540105581283569, "step": 222 }, { "epoch": 0.05420515313563442, "grad_norm": 1.5161975965541512, "learning_rate": 1.0776699029126214e-05, "loss": 1.020809531211853, "step": 223 }, { "epoch": 0.05444822557122023, "grad_norm": 1.300497184230585, "learning_rate": 1.0825242718446603e-05, "loss": 0.8361014127731323, "step": 224 }, { "epoch": 0.05469129800680603, "grad_norm": 1.2831127036170449, "learning_rate": 1.0873786407766991e-05, "loss": 0.7840157747268677, "step": 225 }, { "epoch": 0.054934370442391835, "grad_norm": 1.2942591602958031, "learning_rate": 1.092233009708738e-05, "loss": 0.6489155292510986, "step": 226 }, { "epoch": 0.055177442877977635, "grad_norm": 1.3312259453614461, "learning_rate": 1.0970873786407768e-05, "loss": 0.7866085767745972, "step": 227 }, { "epoch": 0.05542051531356344, "grad_norm": 1.3739015541943906, "learning_rate": 1.1019417475728155e-05, "loss": 0.8729261159896851, "step": 228 }, { "epoch": 0.05566358774914924, "grad_norm": 1.2687765244224003, "learning_rate": 1.1067961165048544e-05, "loss": 0.7445756196975708, "step": 229 }, { "epoch": 0.05590666018473505, "grad_norm": 1.4930288845728374, "learning_rate": 1.1116504854368932e-05, "loss": 0.8581697344779968, "step": 230 }, { "epoch": 0.05614973262032086, "grad_norm": 1.335308013008052, "learning_rate": 1.116504854368932e-05, "loss": 0.9341096878051758, "step": 231 }, { "epoch": 0.05639280505590666, "grad_norm": 1.2688639857472173, "learning_rate": 1.121359223300971e-05, "loss": 0.7580307126045227, "step": 232 }, { "epoch": 0.056635877491492466, "grad_norm": 1.5158617528775211, "learning_rate": 1.1262135922330098e-05, "loss": 0.7317906618118286, "step": 233 }, { "epoch": 0.056878949927078266, "grad_norm": 1.2515824679352057, "learning_rate": 1.1310679611650486e-05, "loss": 0.7324402332305908, "step": 234 }, { "epoch": 0.057122022362664074, "grad_norm": 1.1382486678759505, "learning_rate": 1.1359223300970875e-05, "loss": 0.6487488746643066, "step": 235 }, { "epoch": 0.05736509479824988, "grad_norm": 1.424391068527372, "learning_rate": 1.1407766990291262e-05, "loss": 0.7964101433753967, "step": 236 }, { "epoch": 0.05760816723383568, "grad_norm": 1.3362600318557663, "learning_rate": 1.145631067961165e-05, "loss": 0.8242725133895874, "step": 237 }, { "epoch": 0.05785123966942149, "grad_norm": 1.363105907713923, "learning_rate": 1.1504854368932039e-05, "loss": 0.771286129951477, "step": 238 }, { "epoch": 0.05809431210500729, "grad_norm": 1.3506893398590893, "learning_rate": 1.1553398058252427e-05, "loss": 0.7679290771484375, "step": 239 }, { "epoch": 0.0583373845405931, "grad_norm": 1.2286575084884725, "learning_rate": 1.1601941747572816e-05, "loss": 0.7652332782745361, "step": 240 }, { "epoch": 0.058580456976178905, "grad_norm": 1.4635200473843706, "learning_rate": 1.1650485436893204e-05, "loss": 0.723844051361084, "step": 241 }, { "epoch": 0.058823529411764705, "grad_norm": 1.2467700268376247, "learning_rate": 1.1699029126213593e-05, "loss": 0.6942906379699707, "step": 242 }, { "epoch": 0.05906660184735051, "grad_norm": 1.1864169435770218, "learning_rate": 1.1747572815533982e-05, "loss": 0.7390443086624146, "step": 243 }, { "epoch": 0.05930967428293631, "grad_norm": 1.2938494493841786, "learning_rate": 1.1796116504854368e-05, "loss": 1.0136103630065918, "step": 244 }, { "epoch": 0.05955274671852212, "grad_norm": 1.389152484825638, "learning_rate": 1.1844660194174757e-05, "loss": 0.8648906946182251, "step": 245 }, { "epoch": 0.05979581915410792, "grad_norm": 1.1838292659208378, "learning_rate": 1.1893203883495146e-05, "loss": 0.8057101964950562, "step": 246 }, { "epoch": 0.06003889158969373, "grad_norm": 1.4015422177881327, "learning_rate": 1.1941747572815534e-05, "loss": 0.8503154516220093, "step": 247 }, { "epoch": 0.060281964025279536, "grad_norm": 1.207610482057617, "learning_rate": 1.1990291262135923e-05, "loss": 0.7716975212097168, "step": 248 }, { "epoch": 0.060525036460865336, "grad_norm": 1.4492440183037898, "learning_rate": 1.2038834951456311e-05, "loss": 0.712080717086792, "step": 249 }, { "epoch": 0.060768108896451144, "grad_norm": 1.4373835265399857, "learning_rate": 1.20873786407767e-05, "loss": 0.8427804708480835, "step": 250 }, { "epoch": 0.061011181332036944, "grad_norm": 1.6402197330177284, "learning_rate": 1.2135922330097088e-05, "loss": 0.884907603263855, "step": 251 }, { "epoch": 0.06125425376762275, "grad_norm": 1.4766017920819519, "learning_rate": 1.2184466019417475e-05, "loss": 0.9101190567016602, "step": 252 }, { "epoch": 0.06149732620320856, "grad_norm": 1.211003909038649, "learning_rate": 1.2233009708737864e-05, "loss": 0.7934457063674927, "step": 253 }, { "epoch": 0.06174039863879436, "grad_norm": 1.5436808829418012, "learning_rate": 1.2281553398058252e-05, "loss": 0.845151424407959, "step": 254 }, { "epoch": 0.06198347107438017, "grad_norm": 1.372407290867082, "learning_rate": 1.233009708737864e-05, "loss": 0.7216074466705322, "step": 255 }, { "epoch": 0.06222654350996597, "grad_norm": 1.421292852649878, "learning_rate": 1.237864077669903e-05, "loss": 0.9070807695388794, "step": 256 }, { "epoch": 0.062469615945551775, "grad_norm": 1.4417195366291997, "learning_rate": 1.2427184466019418e-05, "loss": 0.94444340467453, "step": 257 }, { "epoch": 0.06271268838113758, "grad_norm": 1.3545299824776502, "learning_rate": 1.2475728155339806e-05, "loss": 0.8773320913314819, "step": 258 }, { "epoch": 0.06295576081672338, "grad_norm": 1.286349814967923, "learning_rate": 1.2524271844660197e-05, "loss": 0.8204959630966187, "step": 259 }, { "epoch": 0.06319883325230918, "grad_norm": 1.2075404800446359, "learning_rate": 1.2572815533980585e-05, "loss": 0.6121944189071655, "step": 260 }, { "epoch": 0.063441905687895, "grad_norm": 1.1528163164243674, "learning_rate": 1.2621359223300974e-05, "loss": 0.864553689956665, "step": 261 }, { "epoch": 0.0636849781234808, "grad_norm": 1.4075785643612575, "learning_rate": 1.266990291262136e-05, "loss": 0.7788692712783813, "step": 262 }, { "epoch": 0.0639280505590666, "grad_norm": 1.4309256173633635, "learning_rate": 1.2718446601941749e-05, "loss": 0.823415994644165, "step": 263 }, { "epoch": 0.06417112299465241, "grad_norm": 1.3318849311714447, "learning_rate": 1.2766990291262138e-05, "loss": 0.8447093367576599, "step": 264 }, { "epoch": 0.06441419543023821, "grad_norm": 1.3643554911726354, "learning_rate": 1.2815533980582526e-05, "loss": 0.7716794013977051, "step": 265 }, { "epoch": 0.06465726786582401, "grad_norm": 1.482682008824273, "learning_rate": 1.2864077669902915e-05, "loss": 0.7615909576416016, "step": 266 }, { "epoch": 0.06490034030140981, "grad_norm": 1.4611786142067125, "learning_rate": 1.2912621359223303e-05, "loss": 0.7779632806777954, "step": 267 }, { "epoch": 0.06514341273699563, "grad_norm": 1.753007537630538, "learning_rate": 1.2961165048543692e-05, "loss": 1.044161081314087, "step": 268 }, { "epoch": 0.06538648517258143, "grad_norm": 1.1470670161268908, "learning_rate": 1.300970873786408e-05, "loss": 0.6635681390762329, "step": 269 }, { "epoch": 0.06562955760816723, "grad_norm": 1.3106925766988682, "learning_rate": 1.3058252427184467e-05, "loss": 0.8217509984970093, "step": 270 }, { "epoch": 0.06587263004375304, "grad_norm": 1.3450456377262414, "learning_rate": 1.3106796116504856e-05, "loss": 0.8500254154205322, "step": 271 }, { "epoch": 0.06611570247933884, "grad_norm": 1.244783497128625, "learning_rate": 1.3155339805825244e-05, "loss": 0.6788653135299683, "step": 272 }, { "epoch": 0.06635877491492465, "grad_norm": 1.6581964775040778, "learning_rate": 1.3203883495145633e-05, "loss": 1.0021288394927979, "step": 273 }, { "epoch": 0.06660184735051045, "grad_norm": 1.6843562720446819, "learning_rate": 1.3252427184466021e-05, "loss": 0.8636997938156128, "step": 274 }, { "epoch": 0.06684491978609626, "grad_norm": 1.3254681029465685, "learning_rate": 1.330097087378641e-05, "loss": 0.789723813533783, "step": 275 }, { "epoch": 0.06708799222168206, "grad_norm": 1.246071525746493, "learning_rate": 1.3349514563106798e-05, "loss": 0.8471893072128296, "step": 276 }, { "epoch": 0.06733106465726786, "grad_norm": 1.3028982140383472, "learning_rate": 1.3398058252427187e-05, "loss": 0.9488403797149658, "step": 277 }, { "epoch": 0.06757413709285368, "grad_norm": 1.2714867381330932, "learning_rate": 1.3446601941747574e-05, "loss": 0.8667684197425842, "step": 278 }, { "epoch": 0.06781720952843948, "grad_norm": 1.370867021240955, "learning_rate": 1.3495145631067962e-05, "loss": 0.7139316201210022, "step": 279 }, { "epoch": 0.06806028196402528, "grad_norm": 1.297579939664304, "learning_rate": 1.3543689320388351e-05, "loss": 0.9581413269042969, "step": 280 }, { "epoch": 0.06830335439961109, "grad_norm": 1.242090285668229, "learning_rate": 1.359223300970874e-05, "loss": 0.607937216758728, "step": 281 }, { "epoch": 0.06854642683519689, "grad_norm": 1.2707098703359316, "learning_rate": 1.3640776699029128e-05, "loss": 0.9260252714157104, "step": 282 }, { "epoch": 0.06878949927078269, "grad_norm": 1.4713630915557385, "learning_rate": 1.3689320388349517e-05, "loss": 0.9052764177322388, "step": 283 }, { "epoch": 0.06903257170636849, "grad_norm": 1.5527794932552554, "learning_rate": 1.3737864077669905e-05, "loss": 0.8559961318969727, "step": 284 }, { "epoch": 0.0692756441419543, "grad_norm": 1.4486235656865767, "learning_rate": 1.3786407766990294e-05, "loss": 0.999858021736145, "step": 285 }, { "epoch": 0.06951871657754011, "grad_norm": 1.2143958427111003, "learning_rate": 1.383495145631068e-05, "loss": 0.798923671245575, "step": 286 }, { "epoch": 0.06976178901312591, "grad_norm": 1.288375544169899, "learning_rate": 1.3883495145631069e-05, "loss": 0.979544997215271, "step": 287 }, { "epoch": 0.07000486144871172, "grad_norm": 1.2135631359136942, "learning_rate": 1.3932038834951458e-05, "loss": 0.878167450428009, "step": 288 }, { "epoch": 0.07024793388429752, "grad_norm": 1.4930955999211986, "learning_rate": 1.3980582524271846e-05, "loss": 0.7250772714614868, "step": 289 }, { "epoch": 0.07049100631988332, "grad_norm": 1.3953057240726376, "learning_rate": 1.4029126213592235e-05, "loss": 0.8967674374580383, "step": 290 }, { "epoch": 0.07073407875546912, "grad_norm": 1.4644519856007672, "learning_rate": 1.4077669902912623e-05, "loss": 0.7309169769287109, "step": 291 }, { "epoch": 0.07097715119105494, "grad_norm": 1.227461293597686, "learning_rate": 1.4126213592233012e-05, "loss": 0.7034288644790649, "step": 292 }, { "epoch": 0.07122022362664074, "grad_norm": 1.3798707003731503, "learning_rate": 1.41747572815534e-05, "loss": 0.8545830249786377, "step": 293 }, { "epoch": 0.07146329606222654, "grad_norm": 1.2784171660693902, "learning_rate": 1.4223300970873787e-05, "loss": 0.7917525768280029, "step": 294 }, { "epoch": 0.07170636849781235, "grad_norm": 1.3234811165028983, "learning_rate": 1.4271844660194176e-05, "loss": 0.8956096172332764, "step": 295 }, { "epoch": 0.07194944093339815, "grad_norm": 1.498002817453801, "learning_rate": 1.4320388349514564e-05, "loss": 0.7109057903289795, "step": 296 }, { "epoch": 0.07219251336898395, "grad_norm": 1.2541722132336612, "learning_rate": 1.4368932038834953e-05, "loss": 0.7911465764045715, "step": 297 }, { "epoch": 0.07243558580456977, "grad_norm": 1.514132106562257, "learning_rate": 1.4417475728155341e-05, "loss": 0.7365239262580872, "step": 298 }, { "epoch": 0.07267865824015557, "grad_norm": 1.3686294630216307, "learning_rate": 1.446601941747573e-05, "loss": 0.6961990594863892, "step": 299 }, { "epoch": 0.07292173067574137, "grad_norm": 1.4193249081172432, "learning_rate": 1.4514563106796118e-05, "loss": 0.8218226432800293, "step": 300 }, { "epoch": 0.07316480311132717, "grad_norm": 1.311567772701485, "learning_rate": 1.4563106796116507e-05, "loss": 0.8144648671150208, "step": 301 }, { "epoch": 0.07340787554691298, "grad_norm": 1.356199543164654, "learning_rate": 1.4611650485436894e-05, "loss": 0.9220322370529175, "step": 302 }, { "epoch": 0.07365094798249879, "grad_norm": 1.5090098368849294, "learning_rate": 1.4660194174757282e-05, "loss": 0.7196826338768005, "step": 303 }, { "epoch": 0.07389402041808459, "grad_norm": 1.3426981324467115, "learning_rate": 1.470873786407767e-05, "loss": 0.7935036420822144, "step": 304 }, { "epoch": 0.0741370928536704, "grad_norm": 1.4128697744589778, "learning_rate": 1.475728155339806e-05, "loss": 0.8244994878768921, "step": 305 }, { "epoch": 0.0743801652892562, "grad_norm": 1.562638265645535, "learning_rate": 1.4805825242718448e-05, "loss": 0.9429188966751099, "step": 306 }, { "epoch": 0.074623237724842, "grad_norm": 1.3681065860207782, "learning_rate": 1.4854368932038836e-05, "loss": 0.6425460577011108, "step": 307 }, { "epoch": 0.0748663101604278, "grad_norm": 1.3767879501997984, "learning_rate": 1.4902912621359225e-05, "loss": 0.7051057815551758, "step": 308 }, { "epoch": 0.07510938259601362, "grad_norm": 1.4074396239712288, "learning_rate": 1.4951456310679614e-05, "loss": 0.7629784345626831, "step": 309 }, { "epoch": 0.07535245503159942, "grad_norm": 1.5652245065439132, "learning_rate": 1.5000000000000002e-05, "loss": 1.0058857202529907, "step": 310 }, { "epoch": 0.07559552746718522, "grad_norm": 1.5853613679492697, "learning_rate": 1.5048543689320389e-05, "loss": 0.7072176933288574, "step": 311 }, { "epoch": 0.07583859990277103, "grad_norm": 1.4084627006084631, "learning_rate": 1.5097087378640777e-05, "loss": 0.7966986894607544, "step": 312 }, { "epoch": 0.07608167233835683, "grad_norm": 1.4359667892380177, "learning_rate": 1.5145631067961166e-05, "loss": 0.7189505696296692, "step": 313 }, { "epoch": 0.07632474477394263, "grad_norm": 1.3582452867316328, "learning_rate": 1.5194174757281555e-05, "loss": 0.7253684997558594, "step": 314 }, { "epoch": 0.07656781720952843, "grad_norm": 1.511327991500767, "learning_rate": 1.5242718446601943e-05, "loss": 0.8617204427719116, "step": 315 }, { "epoch": 0.07681088964511425, "grad_norm": 1.561497018164261, "learning_rate": 1.529126213592233e-05, "loss": 0.7857822179794312, "step": 316 }, { "epoch": 0.07705396208070005, "grad_norm": 1.3772452529216028, "learning_rate": 1.533980582524272e-05, "loss": 0.88658207654953, "step": 317 }, { "epoch": 0.07729703451628585, "grad_norm": 1.3458386130778595, "learning_rate": 1.5388349514563107e-05, "loss": 0.8470664024353027, "step": 318 }, { "epoch": 0.07754010695187166, "grad_norm": 1.187815327443866, "learning_rate": 1.5436893203883496e-05, "loss": 0.6520171165466309, "step": 319 }, { "epoch": 0.07778317938745746, "grad_norm": 1.262790413558124, "learning_rate": 1.5485436893203884e-05, "loss": 0.8208167552947998, "step": 320 }, { "epoch": 0.07802625182304326, "grad_norm": 1.4679701854700533, "learning_rate": 1.5533980582524273e-05, "loss": 0.7363773584365845, "step": 321 }, { "epoch": 0.07826932425862908, "grad_norm": 1.3080981601863995, "learning_rate": 1.558252427184466e-05, "loss": 0.8038463592529297, "step": 322 }, { "epoch": 0.07851239669421488, "grad_norm": 1.4932577566299499, "learning_rate": 1.563106796116505e-05, "loss": 0.8908462524414062, "step": 323 }, { "epoch": 0.07875546912980068, "grad_norm": 1.477134393735411, "learning_rate": 1.5679611650485438e-05, "loss": 0.771199643611908, "step": 324 }, { "epoch": 0.07899854156538648, "grad_norm": 1.1563199325390565, "learning_rate": 1.5728155339805827e-05, "loss": 0.8690524697303772, "step": 325 }, { "epoch": 0.0792416140009723, "grad_norm": 1.3752585239812463, "learning_rate": 1.5776699029126215e-05, "loss": 0.7339615821838379, "step": 326 }, { "epoch": 0.0794846864365581, "grad_norm": 1.3932563859174973, "learning_rate": 1.5825242718446604e-05, "loss": 0.8263595104217529, "step": 327 }, { "epoch": 0.0797277588721439, "grad_norm": 1.4063318431519096, "learning_rate": 1.5873786407766992e-05, "loss": 0.7599712610244751, "step": 328 }, { "epoch": 0.07997083130772971, "grad_norm": 1.439952427307247, "learning_rate": 1.592233009708738e-05, "loss": 0.7736619710922241, "step": 329 }, { "epoch": 0.08021390374331551, "grad_norm": 1.3435836777441696, "learning_rate": 1.597087378640777e-05, "loss": 0.7436249852180481, "step": 330 }, { "epoch": 0.08045697617890131, "grad_norm": 1.394561597333321, "learning_rate": 1.6019417475728155e-05, "loss": 0.6456092596054077, "step": 331 }, { "epoch": 0.08070004861448711, "grad_norm": 1.4801638114948072, "learning_rate": 1.6067961165048543e-05, "loss": 0.7940018177032471, "step": 332 }, { "epoch": 0.08094312105007292, "grad_norm": 1.317781902699669, "learning_rate": 1.6116504854368932e-05, "loss": 0.9618918895721436, "step": 333 }, { "epoch": 0.08118619348565873, "grad_norm": 1.358203057812716, "learning_rate": 1.616504854368932e-05, "loss": 0.9295963048934937, "step": 334 }, { "epoch": 0.08142926592124453, "grad_norm": 1.2938282952508955, "learning_rate": 1.621359223300971e-05, "loss": 0.747465968132019, "step": 335 }, { "epoch": 0.08167233835683034, "grad_norm": 1.292434220602592, "learning_rate": 1.6262135922330097e-05, "loss": 0.8069223165512085, "step": 336 }, { "epoch": 0.08191541079241614, "grad_norm": 1.4495972452419703, "learning_rate": 1.6310679611650486e-05, "loss": 0.8151869773864746, "step": 337 }, { "epoch": 0.08215848322800194, "grad_norm": 1.3816726189062734, "learning_rate": 1.6359223300970874e-05, "loss": 0.802410364151001, "step": 338 }, { "epoch": 0.08240155566358776, "grad_norm": 1.3392544270047886, "learning_rate": 1.6407766990291263e-05, "loss": 0.6879445314407349, "step": 339 }, { "epoch": 0.08264462809917356, "grad_norm": 1.245940781897636, "learning_rate": 1.645631067961165e-05, "loss": 0.6663963794708252, "step": 340 }, { "epoch": 0.08288770053475936, "grad_norm": 1.2601500862719994, "learning_rate": 1.650485436893204e-05, "loss": 0.7094843983650208, "step": 341 }, { "epoch": 0.08313077297034516, "grad_norm": 1.4508621393869596, "learning_rate": 1.655339805825243e-05, "loss": 0.9038093090057373, "step": 342 }, { "epoch": 0.08337384540593097, "grad_norm": 1.386991248949534, "learning_rate": 1.6601941747572817e-05, "loss": 0.8653950691223145, "step": 343 }, { "epoch": 0.08361691784151677, "grad_norm": 1.4609410979300144, "learning_rate": 1.6650485436893206e-05, "loss": 0.6954107284545898, "step": 344 }, { "epoch": 0.08385999027710257, "grad_norm": 1.8327514927975503, "learning_rate": 1.6699029126213594e-05, "loss": 0.8208533525466919, "step": 345 }, { "epoch": 0.08410306271268839, "grad_norm": 1.4842308743940662, "learning_rate": 1.6747572815533983e-05, "loss": 0.68990159034729, "step": 346 }, { "epoch": 0.08434613514827419, "grad_norm": 1.69806844030035, "learning_rate": 1.6796116504854368e-05, "loss": 0.7612667083740234, "step": 347 }, { "epoch": 0.08458920758385999, "grad_norm": 1.4583300910584431, "learning_rate": 1.6844660194174757e-05, "loss": 0.8407200574874878, "step": 348 }, { "epoch": 0.08483228001944579, "grad_norm": 1.4285288686165436, "learning_rate": 1.6893203883495145e-05, "loss": 0.8334863185882568, "step": 349 }, { "epoch": 0.0850753524550316, "grad_norm": 1.543612887256739, "learning_rate": 1.6941747572815534e-05, "loss": 0.7645490169525146, "step": 350 }, { "epoch": 0.0853184248906174, "grad_norm": 1.2549409921088184, "learning_rate": 1.6990291262135922e-05, "loss": 0.7498242855072021, "step": 351 }, { "epoch": 0.0855614973262032, "grad_norm": 1.3575465925428474, "learning_rate": 1.703883495145631e-05, "loss": 0.634401261806488, "step": 352 }, { "epoch": 0.08580456976178902, "grad_norm": 1.344435131770316, "learning_rate": 1.70873786407767e-05, "loss": 0.6866642832756042, "step": 353 }, { "epoch": 0.08604764219737482, "grad_norm": 1.4876916105455047, "learning_rate": 1.7135922330097088e-05, "loss": 0.8875761032104492, "step": 354 }, { "epoch": 0.08629071463296062, "grad_norm": 1.482940102894624, "learning_rate": 1.7184466019417476e-05, "loss": 0.7058879137039185, "step": 355 }, { "epoch": 0.08653378706854643, "grad_norm": 1.3903093422569106, "learning_rate": 1.7233009708737865e-05, "loss": 0.7706238627433777, "step": 356 }, { "epoch": 0.08677685950413223, "grad_norm": 1.405964065664742, "learning_rate": 1.7281553398058253e-05, "loss": 0.7643134593963623, "step": 357 }, { "epoch": 0.08701993193971803, "grad_norm": 1.537227521744467, "learning_rate": 1.7330097087378642e-05, "loss": 0.7891362905502319, "step": 358 }, { "epoch": 0.08726300437530383, "grad_norm": 1.3523395987424045, "learning_rate": 1.737864077669903e-05, "loss": 0.7884392142295837, "step": 359 }, { "epoch": 0.08750607681088965, "grad_norm": 1.4453428826807977, "learning_rate": 1.742718446601942e-05, "loss": 0.6782532930374146, "step": 360 }, { "epoch": 0.08774914924647545, "grad_norm": 1.6088817721522708, "learning_rate": 1.7475728155339808e-05, "loss": 0.9734393358230591, "step": 361 }, { "epoch": 0.08799222168206125, "grad_norm": 1.2338196099403562, "learning_rate": 1.7524271844660196e-05, "loss": 0.6186429262161255, "step": 362 }, { "epoch": 0.08823529411764706, "grad_norm": 1.4731627486733452, "learning_rate": 1.7572815533980585e-05, "loss": 0.8287703990936279, "step": 363 }, { "epoch": 0.08847836655323287, "grad_norm": 1.3712251542161975, "learning_rate": 1.7621359223300973e-05, "loss": 0.9445688724517822, "step": 364 }, { "epoch": 0.08872143898881867, "grad_norm": 1.4080147821948745, "learning_rate": 1.7669902912621362e-05, "loss": 0.8601852655410767, "step": 365 }, { "epoch": 0.08896451142440447, "grad_norm": 1.3908597236108093, "learning_rate": 1.771844660194175e-05, "loss": 0.8293633460998535, "step": 366 }, { "epoch": 0.08920758385999028, "grad_norm": 1.4396974279768828, "learning_rate": 1.776699029126214e-05, "loss": 0.8080475330352783, "step": 367 }, { "epoch": 0.08945065629557608, "grad_norm": 1.3386081648603576, "learning_rate": 1.7815533980582527e-05, "loss": 0.7597870826721191, "step": 368 }, { "epoch": 0.08969372873116188, "grad_norm": 1.2932445375615598, "learning_rate": 1.7864077669902916e-05, "loss": 0.7974927425384521, "step": 369 }, { "epoch": 0.0899368011667477, "grad_norm": 1.3332432137152261, "learning_rate": 1.7912621359223304e-05, "loss": 0.8415985107421875, "step": 370 }, { "epoch": 0.0901798736023335, "grad_norm": 2.0622969585885293, "learning_rate": 1.7961165048543693e-05, "loss": 0.9827492237091064, "step": 371 }, { "epoch": 0.0904229460379193, "grad_norm": 1.1843457544916591, "learning_rate": 1.800970873786408e-05, "loss": 0.6470409631729126, "step": 372 }, { "epoch": 0.09066601847350511, "grad_norm": 1.3870903847529903, "learning_rate": 1.8058252427184467e-05, "loss": 0.8084561228752136, "step": 373 }, { "epoch": 0.09090909090909091, "grad_norm": 1.6327310009600895, "learning_rate": 1.8106796116504855e-05, "loss": 0.8695342540740967, "step": 374 }, { "epoch": 0.09115216334467671, "grad_norm": 1.3975730500282215, "learning_rate": 1.8155339805825244e-05, "loss": 0.6765704154968262, "step": 375 }, { "epoch": 0.09139523578026251, "grad_norm": 1.532395312541951, "learning_rate": 1.8203883495145632e-05, "loss": 0.8007533550262451, "step": 376 }, { "epoch": 0.09163830821584833, "grad_norm": 1.5795556657507253, "learning_rate": 1.825242718446602e-05, "loss": 0.7467725872993469, "step": 377 }, { "epoch": 0.09188138065143413, "grad_norm": 1.3652066684618573, "learning_rate": 1.830097087378641e-05, "loss": 0.7362604141235352, "step": 378 }, { "epoch": 0.09212445308701993, "grad_norm": 1.702381030465842, "learning_rate": 1.8349514563106798e-05, "loss": 0.8352253437042236, "step": 379 }, { "epoch": 0.09236752552260574, "grad_norm": 1.5278879386384543, "learning_rate": 1.8398058252427187e-05, "loss": 0.837522566318512, "step": 380 }, { "epoch": 0.09261059795819154, "grad_norm": 1.2269595871903078, "learning_rate": 1.8446601941747575e-05, "loss": 0.7218831181526184, "step": 381 }, { "epoch": 0.09285367039377734, "grad_norm": 1.47372472936495, "learning_rate": 1.8495145631067964e-05, "loss": 0.808685302734375, "step": 382 }, { "epoch": 0.09309674282936314, "grad_norm": 1.1480778780395606, "learning_rate": 1.8543689320388352e-05, "loss": 0.718202531337738, "step": 383 }, { "epoch": 0.09333981526494896, "grad_norm": 1.3996727261345605, "learning_rate": 1.859223300970874e-05, "loss": 0.7360048890113831, "step": 384 }, { "epoch": 0.09358288770053476, "grad_norm": 1.7920496874661205, "learning_rate": 1.864077669902913e-05, "loss": 0.7881697416305542, "step": 385 }, { "epoch": 0.09382596013612056, "grad_norm": 1.3989369870759867, "learning_rate": 1.8689320388349518e-05, "loss": 0.7383959293365479, "step": 386 }, { "epoch": 0.09406903257170637, "grad_norm": 1.417891059120657, "learning_rate": 1.8737864077669906e-05, "loss": 0.788063108921051, "step": 387 }, { "epoch": 0.09431210500729217, "grad_norm": 1.3091321230806552, "learning_rate": 1.8786407766990295e-05, "loss": 0.7901182770729065, "step": 388 }, { "epoch": 0.09455517744287797, "grad_norm": 1.3115640712700476, "learning_rate": 1.883495145631068e-05, "loss": 0.6841474771499634, "step": 389 }, { "epoch": 0.09479824987846379, "grad_norm": 1.6268573858283795, "learning_rate": 1.888349514563107e-05, "loss": 0.6854604482650757, "step": 390 }, { "epoch": 0.09504132231404959, "grad_norm": 1.5925128547965568, "learning_rate": 1.8932038834951457e-05, "loss": 0.8900717496871948, "step": 391 }, { "epoch": 0.09528439474963539, "grad_norm": 1.3198067585364808, "learning_rate": 1.8980582524271846e-05, "loss": 0.6053980588912964, "step": 392 }, { "epoch": 0.09552746718522119, "grad_norm": 1.3743286817526674, "learning_rate": 1.9029126213592234e-05, "loss": 0.6322689056396484, "step": 393 }, { "epoch": 0.095770539620807, "grad_norm": 1.4789092097590673, "learning_rate": 1.9077669902912623e-05, "loss": 0.638930082321167, "step": 394 }, { "epoch": 0.0960136120563928, "grad_norm": 1.571360214168761, "learning_rate": 1.912621359223301e-05, "loss": 0.8435162305831909, "step": 395 }, { "epoch": 0.0962566844919786, "grad_norm": 1.2698100091471392, "learning_rate": 1.91747572815534e-05, "loss": 0.6762062311172485, "step": 396 }, { "epoch": 0.09649975692756442, "grad_norm": 1.3592780192200569, "learning_rate": 1.922330097087379e-05, "loss": 0.6646108627319336, "step": 397 }, { "epoch": 0.09674282936315022, "grad_norm": 1.5101469096546594, "learning_rate": 1.9271844660194177e-05, "loss": 0.7649343013763428, "step": 398 }, { "epoch": 0.09698590179873602, "grad_norm": 1.5559501234364277, "learning_rate": 1.9320388349514565e-05, "loss": 0.7467461824417114, "step": 399 }, { "epoch": 0.09722897423432182, "grad_norm": 1.6803801707734096, "learning_rate": 1.9368932038834954e-05, "loss": 0.7170389890670776, "step": 400 }, { "epoch": 0.09747204666990764, "grad_norm": 1.5837916760604458, "learning_rate": 1.9417475728155343e-05, "loss": 0.8880615234375, "step": 401 }, { "epoch": 0.09771511910549344, "grad_norm": 1.3337936455519972, "learning_rate": 1.946601941747573e-05, "loss": 0.708240270614624, "step": 402 }, { "epoch": 0.09795819154107924, "grad_norm": 1.5079057271371696, "learning_rate": 1.951456310679612e-05, "loss": 0.7462232112884521, "step": 403 }, { "epoch": 0.09820126397666505, "grad_norm": 1.5508108666557934, "learning_rate": 1.9563106796116508e-05, "loss": 0.7584505081176758, "step": 404 }, { "epoch": 0.09844433641225085, "grad_norm": 1.4947778859524279, "learning_rate": 1.9611650485436893e-05, "loss": 0.808042585849762, "step": 405 }, { "epoch": 0.09868740884783665, "grad_norm": 1.5582336572310673, "learning_rate": 1.9660194174757282e-05, "loss": 0.7659244537353516, "step": 406 }, { "epoch": 0.09893048128342247, "grad_norm": 1.5112645622430085, "learning_rate": 1.970873786407767e-05, "loss": 0.8649922013282776, "step": 407 }, { "epoch": 0.09917355371900827, "grad_norm": 1.3393164401971809, "learning_rate": 1.975728155339806e-05, "loss": 0.7030999660491943, "step": 408 }, { "epoch": 0.09941662615459407, "grad_norm": 1.3199559443200952, "learning_rate": 1.9805825242718447e-05, "loss": 0.855226993560791, "step": 409 }, { "epoch": 0.09965969859017987, "grad_norm": 1.5472162191855747, "learning_rate": 1.9854368932038836e-05, "loss": 0.857399046421051, "step": 410 }, { "epoch": 0.09990277102576568, "grad_norm": 1.205346010537893, "learning_rate": 1.9902912621359225e-05, "loss": 0.7722647786140442, "step": 411 }, { "epoch": 0.10014584346135148, "grad_norm": 1.591555159431319, "learning_rate": 1.9951456310679613e-05, "loss": 0.6607739925384521, "step": 412 }, { "epoch": 0.10038891589693728, "grad_norm": 1.3724120951746344, "learning_rate": 2e-05, "loss": 0.7376465797424316, "step": 413 }, { "epoch": 0.1006319883325231, "grad_norm": 1.3584058606979919, "learning_rate": 1.9999999192205964e-05, "loss": 0.8531791567802429, "step": 414 }, { "epoch": 0.1008750607681089, "grad_norm": 1.0352105812389625, "learning_rate": 1.9999996768823987e-05, "loss": 0.6406031847000122, "step": 415 }, { "epoch": 0.1011181332036947, "grad_norm": 1.1355468737472176, "learning_rate": 1.999999272985446e-05, "loss": 0.6960590481758118, "step": 416 }, { "epoch": 0.1013612056392805, "grad_norm": 1.2400704283402177, "learning_rate": 1.9999987075298037e-05, "loss": 0.7201956510543823, "step": 417 }, { "epoch": 0.10160427807486631, "grad_norm": 1.272876672781831, "learning_rate": 1.9999979805155626e-05, "loss": 0.8252406120300293, "step": 418 }, { "epoch": 0.10184735051045211, "grad_norm": 1.4489564426213926, "learning_rate": 1.9999970919428406e-05, "loss": 0.7149620056152344, "step": 419 }, { "epoch": 0.10209042294603791, "grad_norm": 1.2774960135939386, "learning_rate": 1.9999960418117813e-05, "loss": 0.6678721904754639, "step": 420 }, { "epoch": 0.10233349538162373, "grad_norm": 1.3977350418562948, "learning_rate": 1.9999948301225546e-05, "loss": 0.8625020980834961, "step": 421 }, { "epoch": 0.10257656781720953, "grad_norm": 1.5785403041248987, "learning_rate": 1.9999934568753555e-05, "loss": 0.7730829119682312, "step": 422 }, { "epoch": 0.10281964025279533, "grad_norm": 1.601905547547853, "learning_rate": 1.9999919220704064e-05, "loss": 0.8652878999710083, "step": 423 }, { "epoch": 0.10306271268838113, "grad_norm": 1.3640579790564227, "learning_rate": 1.999990225707955e-05, "loss": 0.8400906324386597, "step": 424 }, { "epoch": 0.10330578512396695, "grad_norm": 1.3827154022567498, "learning_rate": 1.9999883677882756e-05, "loss": 0.791741132736206, "step": 425 }, { "epoch": 0.10354885755955275, "grad_norm": 1.369713966437185, "learning_rate": 1.9999863483116683e-05, "loss": 0.8143365383148193, "step": 426 }, { "epoch": 0.10379192999513855, "grad_norm": 1.5988405144241735, "learning_rate": 1.9999841672784596e-05, "loss": 0.8591856956481934, "step": 427 }, { "epoch": 0.10403500243072436, "grad_norm": 1.2585646358118838, "learning_rate": 1.9999818246890013e-05, "loss": 0.7983167767524719, "step": 428 }, { "epoch": 0.10427807486631016, "grad_norm": 1.51516260796851, "learning_rate": 1.999979320543672e-05, "loss": 0.7876371145248413, "step": 429 }, { "epoch": 0.10452114730189596, "grad_norm": 1.3661723442421156, "learning_rate": 1.9999766548428766e-05, "loss": 0.9360877275466919, "step": 430 }, { "epoch": 0.10476421973748178, "grad_norm": 1.46749530141963, "learning_rate": 1.999973827587046e-05, "loss": 0.9561746120452881, "step": 431 }, { "epoch": 0.10500729217306758, "grad_norm": 1.3425340407793926, "learning_rate": 1.999970838776636e-05, "loss": 0.7775603532791138, "step": 432 }, { "epoch": 0.10525036460865338, "grad_norm": 1.2338583489694541, "learning_rate": 1.9999676884121303e-05, "loss": 0.5482093095779419, "step": 433 }, { "epoch": 0.10549343704423918, "grad_norm": 1.3025476959695594, "learning_rate": 1.9999643764940376e-05, "loss": 0.6663684248924255, "step": 434 }, { "epoch": 0.10573650947982499, "grad_norm": 1.3829921118754664, "learning_rate": 1.999960903022893e-05, "loss": 0.950234055519104, "step": 435 }, { "epoch": 0.10597958191541079, "grad_norm": 1.3842649937619411, "learning_rate": 1.9999572679992576e-05, "loss": 0.8826330304145813, "step": 436 }, { "epoch": 0.10622265435099659, "grad_norm": 1.1781446441304, "learning_rate": 1.999953471423719e-05, "loss": 0.8750373125076294, "step": 437 }, { "epoch": 0.10646572678658241, "grad_norm": 1.170225827770101, "learning_rate": 1.99994951329689e-05, "loss": 0.7012651562690735, "step": 438 }, { "epoch": 0.10670879922216821, "grad_norm": 1.331545120993243, "learning_rate": 1.99994539361941e-05, "loss": 0.684269905090332, "step": 439 }, { "epoch": 0.10695187165775401, "grad_norm": 1.231329138105433, "learning_rate": 1.9999411123919454e-05, "loss": 0.7722471952438354, "step": 440 }, { "epoch": 0.10719494409333981, "grad_norm": 1.2731964599313517, "learning_rate": 1.9999366696151874e-05, "loss": 0.7870053052902222, "step": 441 }, { "epoch": 0.10743801652892562, "grad_norm": 1.3361759512432734, "learning_rate": 1.9999320652898533e-05, "loss": 0.6787863969802856, "step": 442 }, { "epoch": 0.10768108896451142, "grad_norm": 1.3257063385239323, "learning_rate": 1.999927299416688e-05, "loss": 0.6974327564239502, "step": 443 }, { "epoch": 0.10792416140009722, "grad_norm": 1.3903411759873132, "learning_rate": 1.9999223719964608e-05, "loss": 0.8200607299804688, "step": 444 }, { "epoch": 0.10816723383568304, "grad_norm": 1.762603624135142, "learning_rate": 1.999917283029968e-05, "loss": 0.8251653909683228, "step": 445 }, { "epoch": 0.10841030627126884, "grad_norm": 1.6095237792099844, "learning_rate": 1.9999120325180313e-05, "loss": 0.7842493057250977, "step": 446 }, { "epoch": 0.10865337870685464, "grad_norm": 1.643285144411804, "learning_rate": 1.9999066204614995e-05, "loss": 0.8334354162216187, "step": 447 }, { "epoch": 0.10889645114244045, "grad_norm": 1.4240422324734587, "learning_rate": 1.9999010468612464e-05, "loss": 0.8315508961677551, "step": 448 }, { "epoch": 0.10913952357802625, "grad_norm": 1.4653223661771768, "learning_rate": 1.999895311718173e-05, "loss": 0.7379433512687683, "step": 449 }, { "epoch": 0.10938259601361205, "grad_norm": 1.5845277270422828, "learning_rate": 1.999889415033206e-05, "loss": 0.8615049123764038, "step": 450 }, { "epoch": 0.10962566844919786, "grad_norm": 1.3763276852860842, "learning_rate": 1.9998833568072976e-05, "loss": 0.6901631355285645, "step": 451 }, { "epoch": 0.10986874088478367, "grad_norm": 1.3180265484919715, "learning_rate": 1.999877137041427e-05, "loss": 0.767656683921814, "step": 452 }, { "epoch": 0.11011181332036947, "grad_norm": 1.2563706643767532, "learning_rate": 1.9998707557365983e-05, "loss": 0.8108268976211548, "step": 453 }, { "epoch": 0.11035488575595527, "grad_norm": 1.3891314888870063, "learning_rate": 1.9998642128938432e-05, "loss": 0.8159427642822266, "step": 454 }, { "epoch": 0.11059795819154108, "grad_norm": 1.2045541658026022, "learning_rate": 1.9998575085142183e-05, "loss": 0.8089761734008789, "step": 455 }, { "epoch": 0.11084103062712689, "grad_norm": 1.3622814172981805, "learning_rate": 1.999850642598807e-05, "loss": 0.7314633727073669, "step": 456 }, { "epoch": 0.11108410306271269, "grad_norm": 1.1872317978008826, "learning_rate": 1.9998436151487187e-05, "loss": 0.6517931818962097, "step": 457 }, { "epoch": 0.11132717549829849, "grad_norm": 1.6235022959820344, "learning_rate": 1.9998364261650886e-05, "loss": 0.8892073035240173, "step": 458 }, { "epoch": 0.1115702479338843, "grad_norm": 1.4965300002670392, "learning_rate": 1.999829075649078e-05, "loss": 0.672325849533081, "step": 459 }, { "epoch": 0.1118133203694701, "grad_norm": 1.5293205220004327, "learning_rate": 1.999821563601874e-05, "loss": 0.7605061531066895, "step": 460 }, { "epoch": 0.1120563928050559, "grad_norm": 1.4021685566730608, "learning_rate": 1.9998138900246916e-05, "loss": 0.7092502117156982, "step": 461 }, { "epoch": 0.11229946524064172, "grad_norm": 1.3009860299537734, "learning_rate": 1.999806054918769e-05, "loss": 0.7096859812736511, "step": 462 }, { "epoch": 0.11254253767622752, "grad_norm": 1.3261684338904547, "learning_rate": 1.9997980582853728e-05, "loss": 0.6822162866592407, "step": 463 }, { "epoch": 0.11278561011181332, "grad_norm": 1.3922278169333013, "learning_rate": 1.9997899001257953e-05, "loss": 0.8326777219772339, "step": 464 }, { "epoch": 0.11302868254739913, "grad_norm": 1.4977139792341623, "learning_rate": 1.9997815804413537e-05, "loss": 0.8791807889938354, "step": 465 }, { "epoch": 0.11327175498298493, "grad_norm": 1.5428680756937747, "learning_rate": 1.9997730992333926e-05, "loss": 0.639805793762207, "step": 466 }, { "epoch": 0.11351482741857073, "grad_norm": 1.7503858052356687, "learning_rate": 1.999764456503282e-05, "loss": 0.8292750120162964, "step": 467 }, { "epoch": 0.11375789985415653, "grad_norm": 1.353468040680582, "learning_rate": 1.9997556522524186e-05, "loss": 0.7509797811508179, "step": 468 }, { "epoch": 0.11400097228974235, "grad_norm": 1.3495330246996708, "learning_rate": 1.999746686482224e-05, "loss": 0.7874879837036133, "step": 469 }, { "epoch": 0.11424404472532815, "grad_norm": 1.4848863070645695, "learning_rate": 1.9997375591941478e-05, "loss": 0.8062007427215576, "step": 470 }, { "epoch": 0.11448711716091395, "grad_norm": 1.2416459115764549, "learning_rate": 1.9997282703896637e-05, "loss": 0.678037166595459, "step": 471 }, { "epoch": 0.11473018959649976, "grad_norm": 1.4868288812905586, "learning_rate": 1.9997188200702725e-05, "loss": 0.6715686321258545, "step": 472 }, { "epoch": 0.11497326203208556, "grad_norm": 1.2908633122756803, "learning_rate": 1.9997092082375015e-05, "loss": 0.6653944849967957, "step": 473 }, { "epoch": 0.11521633446767136, "grad_norm": 1.5008551494516393, "learning_rate": 1.9996994348929035e-05, "loss": 0.7632204294204712, "step": 474 }, { "epoch": 0.11545940690325716, "grad_norm": 1.5239857337758784, "learning_rate": 1.9996895000380566e-05, "loss": 0.7749382853507996, "step": 475 }, { "epoch": 0.11570247933884298, "grad_norm": 1.3660337663758597, "learning_rate": 1.9996794036745666e-05, "loss": 0.7735611796379089, "step": 476 }, { "epoch": 0.11594555177442878, "grad_norm": 1.7669127476570303, "learning_rate": 1.999669145804065e-05, "loss": 0.9216466546058655, "step": 477 }, { "epoch": 0.11618862421001458, "grad_norm": 1.3751833222017675, "learning_rate": 1.9996587264282084e-05, "loss": 0.6701909303665161, "step": 478 }, { "epoch": 0.1164316966456004, "grad_norm": 1.3853202626238945, "learning_rate": 1.9996481455486803e-05, "loss": 0.764870285987854, "step": 479 }, { "epoch": 0.1166747690811862, "grad_norm": 1.3798233402845759, "learning_rate": 1.99963740316719e-05, "loss": 0.5955643653869629, "step": 480 }, { "epoch": 0.116917841516772, "grad_norm": 1.516469690299404, "learning_rate": 1.9996264992854733e-05, "loss": 0.7799233794212341, "step": 481 }, { "epoch": 0.11716091395235781, "grad_norm": 1.5707976671944963, "learning_rate": 1.9996154339052917e-05, "loss": 0.7163299918174744, "step": 482 }, { "epoch": 0.11740398638794361, "grad_norm": 1.3419843409867271, "learning_rate": 1.9996042070284332e-05, "loss": 0.6769019365310669, "step": 483 }, { "epoch": 0.11764705882352941, "grad_norm": 1.4062556290029227, "learning_rate": 1.999592818656711e-05, "loss": 0.8044548630714417, "step": 484 }, { "epoch": 0.11789013125911521, "grad_norm": 1.3041129616414615, "learning_rate": 1.9995812687919653e-05, "loss": 0.675125002861023, "step": 485 }, { "epoch": 0.11813320369470102, "grad_norm": 1.3023663534293206, "learning_rate": 1.9995695574360626e-05, "loss": 0.7121978998184204, "step": 486 }, { "epoch": 0.11837627613028683, "grad_norm": 1.6573179728121898, "learning_rate": 1.999557684590894e-05, "loss": 0.8366292119026184, "step": 487 }, { "epoch": 0.11861934856587263, "grad_norm": 1.2272274074003602, "learning_rate": 1.9995456502583782e-05, "loss": 0.6278789043426514, "step": 488 }, { "epoch": 0.11886242100145844, "grad_norm": 1.536881118467519, "learning_rate": 1.9995334544404594e-05, "loss": 0.7399742603302002, "step": 489 }, { "epoch": 0.11910549343704424, "grad_norm": 1.386615408331633, "learning_rate": 1.999521097139108e-05, "loss": 0.8581811785697937, "step": 490 }, { "epoch": 0.11934856587263004, "grad_norm": 1.3730070845386146, "learning_rate": 1.9995085783563203e-05, "loss": 0.8921172618865967, "step": 491 }, { "epoch": 0.11959163830821584, "grad_norm": 1.3872549996639514, "learning_rate": 1.9994958980941188e-05, "loss": 0.7164539694786072, "step": 492 }, { "epoch": 0.11983471074380166, "grad_norm": 1.2904950738421, "learning_rate": 1.9994830563545524e-05, "loss": 0.7452867031097412, "step": 493 }, { "epoch": 0.12007778317938746, "grad_norm": 1.3968654254642288, "learning_rate": 1.9994700531396955e-05, "loss": 0.6565608978271484, "step": 494 }, { "epoch": 0.12032085561497326, "grad_norm": 1.5604747495206774, "learning_rate": 1.999456888451649e-05, "loss": 0.724810004234314, "step": 495 }, { "epoch": 0.12056392805055907, "grad_norm": 1.556473002464419, "learning_rate": 1.9994435622925396e-05, "loss": 0.8569102883338928, "step": 496 }, { "epoch": 0.12080700048614487, "grad_norm": 1.2922820250625522, "learning_rate": 1.9994300746645204e-05, "loss": 0.6696858406066895, "step": 497 }, { "epoch": 0.12105007292173067, "grad_norm": 1.3096808161141538, "learning_rate": 1.9994164255697706e-05, "loss": 0.5995984077453613, "step": 498 }, { "epoch": 0.12129314535731649, "grad_norm": 1.3483022511161509, "learning_rate": 1.999402615010495e-05, "loss": 0.8167673349380493, "step": 499 }, { "epoch": 0.12153621779290229, "grad_norm": 1.5582923767776897, "learning_rate": 1.999388642988925e-05, "loss": 0.8909902572631836, "step": 500 }, { "epoch": 0.12177929022848809, "grad_norm": 1.3125558608283698, "learning_rate": 1.999374509507318e-05, "loss": 0.6474215984344482, "step": 501 }, { "epoch": 0.12202236266407389, "grad_norm": 1.2956194361277755, "learning_rate": 1.9993602145679577e-05, "loss": 0.7204924821853638, "step": 502 }, { "epoch": 0.1222654350996597, "grad_norm": 1.2603834529847189, "learning_rate": 1.9993457581731525e-05, "loss": 0.678870439529419, "step": 503 }, { "epoch": 0.1225085075352455, "grad_norm": 1.2795617384078903, "learning_rate": 1.999331140325239e-05, "loss": 0.9004747867584229, "step": 504 }, { "epoch": 0.1227515799708313, "grad_norm": 1.2027275771109203, "learning_rate": 1.999316361026578e-05, "loss": 0.6452218294143677, "step": 505 }, { "epoch": 0.12299465240641712, "grad_norm": 1.2790377810489866, "learning_rate": 1.999301420279558e-05, "loss": 0.8020837306976318, "step": 506 }, { "epoch": 0.12323772484200292, "grad_norm": 1.4235523061549644, "learning_rate": 1.9992863180865924e-05, "loss": 0.8302303552627563, "step": 507 }, { "epoch": 0.12348079727758872, "grad_norm": 1.4310638872839359, "learning_rate": 1.9992710544501215e-05, "loss": 0.7641040682792664, "step": 508 }, { "epoch": 0.12372386971317452, "grad_norm": 1.5390454525655395, "learning_rate": 1.999255629372611e-05, "loss": 0.8621032238006592, "step": 509 }, { "epoch": 0.12396694214876033, "grad_norm": 1.3396538300761456, "learning_rate": 1.9992400428565524e-05, "loss": 0.8388805985450745, "step": 510 }, { "epoch": 0.12421001458434613, "grad_norm": 1.3364767892841867, "learning_rate": 1.9992242949044645e-05, "loss": 0.8929553627967834, "step": 511 }, { "epoch": 0.12445308701993194, "grad_norm": 1.4294731458049337, "learning_rate": 1.9992083855188914e-05, "loss": 0.7112388014793396, "step": 512 }, { "epoch": 0.12469615945551775, "grad_norm": 1.613405308200654, "learning_rate": 1.9991923147024036e-05, "loss": 0.7540104389190674, "step": 513 }, { "epoch": 0.12493923189110355, "grad_norm": 1.3450112814429138, "learning_rate": 1.999176082457597e-05, "loss": 0.8249487280845642, "step": 514 }, { "epoch": 0.12518230432668936, "grad_norm": 1.3626254983164052, "learning_rate": 1.9991596887870946e-05, "loss": 0.8185168504714966, "step": 515 }, { "epoch": 0.12542537676227516, "grad_norm": 1.2754912854651854, "learning_rate": 1.9991431336935444e-05, "loss": 0.5937727093696594, "step": 516 }, { "epoch": 0.12566844919786097, "grad_norm": 1.2855489105142124, "learning_rate": 1.9991264171796213e-05, "loss": 0.9256038069725037, "step": 517 }, { "epoch": 0.12591152163344677, "grad_norm": 1.348754258599677, "learning_rate": 1.9991095392480264e-05, "loss": 0.8640462160110474, "step": 518 }, { "epoch": 0.12615459406903257, "grad_norm": 1.6808733138225005, "learning_rate": 1.9990924999014854e-05, "loss": 0.904654324054718, "step": 519 }, { "epoch": 0.12639766650461837, "grad_norm": 1.2261947973992946, "learning_rate": 1.999075299142752e-05, "loss": 0.8341623544692993, "step": 520 }, { "epoch": 0.12664073894020417, "grad_norm": 1.4090436789690182, "learning_rate": 1.9990579369746054e-05, "loss": 0.7295001745223999, "step": 521 }, { "epoch": 0.12688381137579, "grad_norm": 1.1884441585840573, "learning_rate": 1.99904041339985e-05, "loss": 0.7214516401290894, "step": 522 }, { "epoch": 0.1271268838113758, "grad_norm": 1.3790228681177206, "learning_rate": 1.999022728421317e-05, "loss": 0.6953664422035217, "step": 523 }, { "epoch": 0.1273699562469616, "grad_norm": 1.2116353320889663, "learning_rate": 1.999004882041863e-05, "loss": 0.7195498943328857, "step": 524 }, { "epoch": 0.1276130286825474, "grad_norm": 1.3640495670481052, "learning_rate": 1.9989868742643725e-05, "loss": 0.7388461232185364, "step": 525 }, { "epoch": 0.1278561011181332, "grad_norm": 1.4247044187128157, "learning_rate": 1.9989687050917543e-05, "loss": 0.7764464616775513, "step": 526 }, { "epoch": 0.128099173553719, "grad_norm": 1.422502100555569, "learning_rate": 1.9989503745269434e-05, "loss": 0.5995833873748779, "step": 527 }, { "epoch": 0.12834224598930483, "grad_norm": 1.4128308746120923, "learning_rate": 1.9989318825729016e-05, "loss": 0.8536065816879272, "step": 528 }, { "epoch": 0.12858531842489063, "grad_norm": 1.2554538564517772, "learning_rate": 1.9989132292326163e-05, "loss": 0.8471872806549072, "step": 529 }, { "epoch": 0.12882839086047643, "grad_norm": 1.5186530981522917, "learning_rate": 1.9988944145091013e-05, "loss": 0.6897215843200684, "step": 530 }, { "epoch": 0.12907146329606223, "grad_norm": 1.4500718946708042, "learning_rate": 1.9988754384053962e-05, "loss": 0.781897246837616, "step": 531 }, { "epoch": 0.12931453573164803, "grad_norm": 1.4986993807172846, "learning_rate": 1.9988563009245662e-05, "loss": 0.6296090483665466, "step": 532 }, { "epoch": 0.12955760816723383, "grad_norm": 1.1891676289339366, "learning_rate": 1.998837002069704e-05, "loss": 0.6561669707298279, "step": 533 }, { "epoch": 0.12980068060281963, "grad_norm": 1.290381228631031, "learning_rate": 1.9988175418439275e-05, "loss": 0.7062938213348389, "step": 534 }, { "epoch": 0.13004375303840546, "grad_norm": 1.6197652043969222, "learning_rate": 1.9987979202503797e-05, "loss": 0.6563053131103516, "step": 535 }, { "epoch": 0.13028682547399126, "grad_norm": 1.2914922005604688, "learning_rate": 1.9987781372922318e-05, "loss": 0.7568116188049316, "step": 536 }, { "epoch": 0.13052989790957706, "grad_norm": 1.3390147933764283, "learning_rate": 1.9987581929726788e-05, "loss": 0.8073443174362183, "step": 537 }, { "epoch": 0.13077297034516286, "grad_norm": 1.8715504379467185, "learning_rate": 1.9987380872949438e-05, "loss": 0.9267756938934326, "step": 538 }, { "epoch": 0.13101604278074866, "grad_norm": 1.3476167393555143, "learning_rate": 1.9987178202622746e-05, "loss": 0.8518221378326416, "step": 539 }, { "epoch": 0.13125911521633446, "grad_norm": 1.2929880674010128, "learning_rate": 1.9986973918779457e-05, "loss": 0.7201565504074097, "step": 540 }, { "epoch": 0.13150218765192026, "grad_norm": 1.2622606848841766, "learning_rate": 1.9986768021452575e-05, "loss": 0.7583926916122437, "step": 541 }, { "epoch": 0.1317452600875061, "grad_norm": 1.368986022484321, "learning_rate": 1.998656051067536e-05, "loss": 0.6475867033004761, "step": 542 }, { "epoch": 0.1319883325230919, "grad_norm": 1.3884189121949697, "learning_rate": 1.9986351386481345e-05, "loss": 0.7223794460296631, "step": 543 }, { "epoch": 0.1322314049586777, "grad_norm": 1.182779111583729, "learning_rate": 1.998614064890431e-05, "loss": 0.6516745686531067, "step": 544 }, { "epoch": 0.1324744773942635, "grad_norm": 1.3254866081530865, "learning_rate": 1.99859282979783e-05, "loss": 0.6797758340835571, "step": 545 }, { "epoch": 0.1327175498298493, "grad_norm": 1.6121729804613087, "learning_rate": 1.9985714333737628e-05, "loss": 0.9135996103286743, "step": 546 }, { "epoch": 0.1329606222654351, "grad_norm": 1.2899029440336753, "learning_rate": 1.998549875621686e-05, "loss": 0.9069602489471436, "step": 547 }, { "epoch": 0.1332036947010209, "grad_norm": 1.2869575414882202, "learning_rate": 1.998528156545082e-05, "loss": 0.6616719961166382, "step": 548 }, { "epoch": 0.13344676713660672, "grad_norm": 1.2833789045126722, "learning_rate": 1.9985062761474605e-05, "loss": 0.7369615435600281, "step": 549 }, { "epoch": 0.13368983957219252, "grad_norm": 1.4080300015728002, "learning_rate": 1.998484234432356e-05, "loss": 0.8767683506011963, "step": 550 }, { "epoch": 0.13393291200777832, "grad_norm": 1.3583113209553492, "learning_rate": 1.9984620314033294e-05, "loss": 0.6262576580047607, "step": 551 }, { "epoch": 0.13417598444336412, "grad_norm": 1.3634379042189466, "learning_rate": 1.9984396670639678e-05, "loss": 0.8054844737052917, "step": 552 }, { "epoch": 0.13441905687894992, "grad_norm": 1.3459178371014222, "learning_rate": 1.998417141417885e-05, "loss": 0.7966904640197754, "step": 553 }, { "epoch": 0.13466212931453572, "grad_norm": 1.1950407850350617, "learning_rate": 1.998394454468719e-05, "loss": 0.6585164070129395, "step": 554 }, { "epoch": 0.13490520175012152, "grad_norm": 1.3140695563314364, "learning_rate": 1.9983716062201363e-05, "loss": 0.7371630668640137, "step": 555 }, { "epoch": 0.13514827418570735, "grad_norm": 1.3102586236466234, "learning_rate": 1.9983485966758274e-05, "loss": 0.7778677940368652, "step": 556 }, { "epoch": 0.13539134662129315, "grad_norm": 1.1295886813798905, "learning_rate": 1.9983254258395105e-05, "loss": 0.8782382011413574, "step": 557 }, { "epoch": 0.13563441905687895, "grad_norm": 1.3245194680760564, "learning_rate": 1.9983020937149284e-05, "loss": 0.7176942825317383, "step": 558 }, { "epoch": 0.13587749149246475, "grad_norm": 1.4318718560765546, "learning_rate": 1.998278600305851e-05, "loss": 0.8424107432365417, "step": 559 }, { "epoch": 0.13612056392805055, "grad_norm": 1.5219341902934989, "learning_rate": 1.9982549456160735e-05, "loss": 0.7997035980224609, "step": 560 }, { "epoch": 0.13636363636363635, "grad_norm": 1.343263051891143, "learning_rate": 1.9982311296494173e-05, "loss": 0.7772162556648254, "step": 561 }, { "epoch": 0.13660670879922218, "grad_norm": 1.3593907570927035, "learning_rate": 1.998207152409731e-05, "loss": 0.7486333847045898, "step": 562 }, { "epoch": 0.13684978123480798, "grad_norm": 1.223126231452975, "learning_rate": 1.9981830139008875e-05, "loss": 0.7076734304428101, "step": 563 }, { "epoch": 0.13709285367039378, "grad_norm": 1.2023454868187964, "learning_rate": 1.998158714126787e-05, "loss": 0.6798034310340881, "step": 564 }, { "epoch": 0.13733592610597958, "grad_norm": 1.3019567345845828, "learning_rate": 1.9981342530913556e-05, "loss": 0.7665469646453857, "step": 565 }, { "epoch": 0.13757899854156538, "grad_norm": 1.6277700540875344, "learning_rate": 1.9981096307985442e-05, "loss": 0.7924169898033142, "step": 566 }, { "epoch": 0.13782207097715118, "grad_norm": 1.4597797109425028, "learning_rate": 1.9980848472523315e-05, "loss": 0.7586926817893982, "step": 567 }, { "epoch": 0.13806514341273698, "grad_norm": 1.5165039323367162, "learning_rate": 1.9980599024567215e-05, "loss": 0.804307222366333, "step": 568 }, { "epoch": 0.1383082158483228, "grad_norm": 1.2266895447546797, "learning_rate": 1.9980347964157442e-05, "loss": 0.8253638744354248, "step": 569 }, { "epoch": 0.1385512882839086, "grad_norm": 1.3120643665710756, "learning_rate": 1.9980095291334555e-05, "loss": 0.6986863017082214, "step": 570 }, { "epoch": 0.13879436071949441, "grad_norm": 1.372283702897699, "learning_rate": 1.997984100613938e-05, "loss": 0.7765489816665649, "step": 571 }, { "epoch": 0.13903743315508021, "grad_norm": 1.3106047553762863, "learning_rate": 1.997958510861299e-05, "loss": 0.7361048460006714, "step": 572 }, { "epoch": 0.13928050559066601, "grad_norm": 1.481759880758782, "learning_rate": 1.997932759879674e-05, "loss": 0.9374935030937195, "step": 573 }, { "epoch": 0.13952357802625182, "grad_norm": 1.6472797347398613, "learning_rate": 1.9979068476732226e-05, "loss": 0.7305525541305542, "step": 574 }, { "epoch": 0.13976665046183762, "grad_norm": 1.2933417932876863, "learning_rate": 1.9978807742461307e-05, "loss": 0.8099181652069092, "step": 575 }, { "epoch": 0.14000972289742344, "grad_norm": 1.4192586332786548, "learning_rate": 1.9978545396026116e-05, "loss": 0.8853392601013184, "step": 576 }, { "epoch": 0.14025279533300924, "grad_norm": 1.255687788880191, "learning_rate": 1.997828143746903e-05, "loss": 0.7434967756271362, "step": 577 }, { "epoch": 0.14049586776859505, "grad_norm": 1.3378028125865473, "learning_rate": 1.9978015866832697e-05, "loss": 0.6883610486984253, "step": 578 }, { "epoch": 0.14073894020418085, "grad_norm": 1.2462621452815201, "learning_rate": 1.9977748684160022e-05, "loss": 0.9394352436065674, "step": 579 }, { "epoch": 0.14098201263976665, "grad_norm": 1.6005922217564694, "learning_rate": 1.9977479889494173e-05, "loss": 0.7575966119766235, "step": 580 }, { "epoch": 0.14122508507535245, "grad_norm": 1.2668862945887045, "learning_rate": 1.9977209482878576e-05, "loss": 0.7737035155296326, "step": 581 }, { "epoch": 0.14146815751093825, "grad_norm": 1.0322327251685914, "learning_rate": 1.9976937464356912e-05, "loss": 0.5839126110076904, "step": 582 }, { "epoch": 0.14171122994652408, "grad_norm": 1.3037188575666143, "learning_rate": 1.997666383397313e-05, "loss": 0.5638519525527954, "step": 583 }, { "epoch": 0.14195430238210988, "grad_norm": 1.6483292230020612, "learning_rate": 1.9976388591771442e-05, "loss": 0.6596106290817261, "step": 584 }, { "epoch": 0.14219737481769568, "grad_norm": 1.5042680573508942, "learning_rate": 1.9976111737796312e-05, "loss": 0.6547865271568298, "step": 585 }, { "epoch": 0.14244044725328148, "grad_norm": 1.2295777965597998, "learning_rate": 1.9975833272092468e-05, "loss": 0.6757097840309143, "step": 586 }, { "epoch": 0.14268351968886728, "grad_norm": 1.4224090490504195, "learning_rate": 1.9975553194704903e-05, "loss": 0.7585034370422363, "step": 587 }, { "epoch": 0.14292659212445308, "grad_norm": 1.4408967449757468, "learning_rate": 1.9975271505678858e-05, "loss": 0.6978644132614136, "step": 588 }, { "epoch": 0.14316966456003888, "grad_norm": 1.3263910149685063, "learning_rate": 1.9974988205059848e-05, "loss": 0.6152922511100769, "step": 589 }, { "epoch": 0.1434127369956247, "grad_norm": 1.3614160763795784, "learning_rate": 1.9974703292893648e-05, "loss": 0.7995831370353699, "step": 590 }, { "epoch": 0.1436558094312105, "grad_norm": 1.7276748160820736, "learning_rate": 1.9974416769226273e-05, "loss": 0.8297946453094482, "step": 591 }, { "epoch": 0.1438988818667963, "grad_norm": 1.540876263283875, "learning_rate": 1.9974128634104025e-05, "loss": 0.8067132234573364, "step": 592 }, { "epoch": 0.1441419543023821, "grad_norm": 1.3221940554683913, "learning_rate": 1.9973838887573454e-05, "loss": 0.7378815412521362, "step": 593 }, { "epoch": 0.1443850267379679, "grad_norm": 1.2808452892065831, "learning_rate": 1.9973547529681367e-05, "loss": 0.6834346055984497, "step": 594 }, { "epoch": 0.1446280991735537, "grad_norm": 1.5551544786204423, "learning_rate": 1.997325456047484e-05, "loss": 0.8581990003585815, "step": 595 }, { "epoch": 0.14487117160913954, "grad_norm": 1.360977970278577, "learning_rate": 1.9972959980001197e-05, "loss": 0.7241047620773315, "step": 596 }, { "epoch": 0.14511424404472534, "grad_norm": 1.1847938242725595, "learning_rate": 1.997266378830804e-05, "loss": 0.5485596656799316, "step": 597 }, { "epoch": 0.14535731648031114, "grad_norm": 1.4605725002244445, "learning_rate": 1.9972365985443213e-05, "loss": 0.8140109181404114, "step": 598 }, { "epoch": 0.14560038891589694, "grad_norm": 1.669076457434598, "learning_rate": 1.9972066571454835e-05, "loss": 0.847004771232605, "step": 599 }, { "epoch": 0.14584346135148274, "grad_norm": 1.3533890637144335, "learning_rate": 1.9971765546391276e-05, "loss": 0.699756920337677, "step": 600 }, { "epoch": 0.14608653378706854, "grad_norm": 1.311629462135118, "learning_rate": 1.9971462910301168e-05, "loss": 0.6278185248374939, "step": 601 }, { "epoch": 0.14632960622265434, "grad_norm": 1.3668629723745818, "learning_rate": 1.997115866323341e-05, "loss": 0.822769284248352, "step": 602 }, { "epoch": 0.14657267865824017, "grad_norm": 1.6035302988601898, "learning_rate": 1.9970852805237148e-05, "loss": 0.88144850730896, "step": 603 }, { "epoch": 0.14681575109382597, "grad_norm": 1.3637014742552798, "learning_rate": 1.9970545336361802e-05, "loss": 0.6558984518051147, "step": 604 }, { "epoch": 0.14705882352941177, "grad_norm": 1.2971691245736108, "learning_rate": 1.9970236256657044e-05, "loss": 0.6752508878707886, "step": 605 }, { "epoch": 0.14730189596499757, "grad_norm": 1.1285026166173608, "learning_rate": 1.9969925566172806e-05, "loss": 0.7163481116294861, "step": 606 }, { "epoch": 0.14754496840058337, "grad_norm": 1.3804295998704876, "learning_rate": 1.9969613264959287e-05, "loss": 0.8334599733352661, "step": 607 }, { "epoch": 0.14778804083616917, "grad_norm": 1.3844678216484876, "learning_rate": 1.996929935306694e-05, "loss": 0.7542829513549805, "step": 608 }, { "epoch": 0.14803111327175497, "grad_norm": 1.3378649389421586, "learning_rate": 1.996898383054648e-05, "loss": 0.8293688893318176, "step": 609 }, { "epoch": 0.1482741857073408, "grad_norm": 1.310840261496358, "learning_rate": 1.9968666697448885e-05, "loss": 0.7456485629081726, "step": 610 }, { "epoch": 0.1485172581429266, "grad_norm": 1.3928796891779567, "learning_rate": 1.996834795382539e-05, "loss": 0.7719817161560059, "step": 611 }, { "epoch": 0.1487603305785124, "grad_norm": 1.3492792043019917, "learning_rate": 1.996802759972749e-05, "loss": 0.7813098430633545, "step": 612 }, { "epoch": 0.1490034030140982, "grad_norm": 1.4370882456647838, "learning_rate": 1.9967705635206937e-05, "loss": 0.855487585067749, "step": 613 }, { "epoch": 0.149246475449684, "grad_norm": 1.412080317461785, "learning_rate": 1.996738206031575e-05, "loss": 0.6924394369125366, "step": 614 }, { "epoch": 0.1494895478852698, "grad_norm": 1.2400583417018614, "learning_rate": 1.996705687510621e-05, "loss": 0.8513713479042053, "step": 615 }, { "epoch": 0.1497326203208556, "grad_norm": 1.3643899267757682, "learning_rate": 1.996673007963085e-05, "loss": 0.8821783065795898, "step": 616 }, { "epoch": 0.14997569275644143, "grad_norm": 1.378088107788351, "learning_rate": 1.9966401673942468e-05, "loss": 0.7023358345031738, "step": 617 }, { "epoch": 0.15021876519202723, "grad_norm": 1.3002858441748768, "learning_rate": 1.9966071658094116e-05, "loss": 0.7026766538619995, "step": 618 }, { "epoch": 0.15046183762761303, "grad_norm": 1.3987565977116412, "learning_rate": 1.9965740032139114e-05, "loss": 0.738823413848877, "step": 619 }, { "epoch": 0.15070491006319883, "grad_norm": 1.3174129585831378, "learning_rate": 1.996540679613104e-05, "loss": 0.5979900360107422, "step": 620 }, { "epoch": 0.15094798249878463, "grad_norm": 1.39568064509608, "learning_rate": 1.9965071950123732e-05, "loss": 0.8568977117538452, "step": 621 }, { "epoch": 0.15119105493437043, "grad_norm": 1.4015259555848358, "learning_rate": 1.9964735494171284e-05, "loss": 0.8640449047088623, "step": 622 }, { "epoch": 0.15143412736995623, "grad_norm": 1.3161524921002272, "learning_rate": 1.9964397428328058e-05, "loss": 0.8959442377090454, "step": 623 }, { "epoch": 0.15167719980554206, "grad_norm": 1.2247084791959344, "learning_rate": 1.996405775264867e-05, "loss": 0.688657283782959, "step": 624 }, { "epoch": 0.15192027224112786, "grad_norm": 1.224844393871019, "learning_rate": 1.9963716467187992e-05, "loss": 0.701055645942688, "step": 625 }, { "epoch": 0.15216334467671366, "grad_norm": 1.1772580565130777, "learning_rate": 1.996337357200117e-05, "loss": 0.8780609369277954, "step": 626 }, { "epoch": 0.15240641711229946, "grad_norm": 1.245984483543194, "learning_rate": 1.9963029067143595e-05, "loss": 0.6402156949043274, "step": 627 }, { "epoch": 0.15264948954788526, "grad_norm": 1.3109395766495369, "learning_rate": 1.9962682952670927e-05, "loss": 0.7409800887107849, "step": 628 }, { "epoch": 0.15289256198347106, "grad_norm": 1.4215910316024971, "learning_rate": 1.9962335228639084e-05, "loss": 0.7982556819915771, "step": 629 }, { "epoch": 0.15313563441905687, "grad_norm": 1.3792150236748664, "learning_rate": 1.9961985895104248e-05, "loss": 0.7142238616943359, "step": 630 }, { "epoch": 0.1533787068546427, "grad_norm": 1.4048591759740043, "learning_rate": 1.9961634952122853e-05, "loss": 0.9089837074279785, "step": 631 }, { "epoch": 0.1536217792902285, "grad_norm": 1.3693812262776266, "learning_rate": 1.9961282399751594e-05, "loss": 0.7895857691764832, "step": 632 }, { "epoch": 0.1538648517258143, "grad_norm": 1.3015034887530101, "learning_rate": 1.9960928238047436e-05, "loss": 0.6957216262817383, "step": 633 }, { "epoch": 0.1541079241614001, "grad_norm": 1.2349016475030155, "learning_rate": 1.996057246706759e-05, "loss": 0.7316403388977051, "step": 634 }, { "epoch": 0.1543509965969859, "grad_norm": 1.121278174326018, "learning_rate": 1.9960215086869538e-05, "loss": 0.6353187561035156, "step": 635 }, { "epoch": 0.1545940690325717, "grad_norm": 1.3138187180006446, "learning_rate": 1.995985609751102e-05, "loss": 0.8138369917869568, "step": 636 }, { "epoch": 0.15483714146815752, "grad_norm": 1.7608446353443954, "learning_rate": 1.9959495499050027e-05, "loss": 0.8303531408309937, "step": 637 }, { "epoch": 0.15508021390374332, "grad_norm": 1.3239468325184445, "learning_rate": 1.9959133291544824e-05, "loss": 0.6586894392967224, "step": 638 }, { "epoch": 0.15532328633932913, "grad_norm": 1.5697905556174228, "learning_rate": 1.995876947505392e-05, "loss": 0.7215726375579834, "step": 639 }, { "epoch": 0.15556635877491493, "grad_norm": 1.1824038998598267, "learning_rate": 1.9958404049636103e-05, "loss": 0.7229310870170593, "step": 640 }, { "epoch": 0.15580943121050073, "grad_norm": 1.4170203989705246, "learning_rate": 1.9958037015350404e-05, "loss": 0.6562180519104004, "step": 641 }, { "epoch": 0.15605250364608653, "grad_norm": 1.4259399214651685, "learning_rate": 1.9957668372256126e-05, "loss": 0.7005153894424438, "step": 642 }, { "epoch": 0.15629557608167233, "grad_norm": 1.2079905850396528, "learning_rate": 1.995729812041282e-05, "loss": 0.833433210849762, "step": 643 }, { "epoch": 0.15653864851725816, "grad_norm": 1.3600685527563023, "learning_rate": 1.9956926259880306e-05, "loss": 0.8431481122970581, "step": 644 }, { "epoch": 0.15678172095284396, "grad_norm": 1.5580273660166413, "learning_rate": 1.9956552790718663e-05, "loss": 0.8271298408508301, "step": 645 }, { "epoch": 0.15702479338842976, "grad_norm": 1.3836013944011991, "learning_rate": 1.995617771298823e-05, "loss": 0.5935542583465576, "step": 646 }, { "epoch": 0.15726786582401556, "grad_norm": 1.2851479447911553, "learning_rate": 1.9955801026749598e-05, "loss": 0.712337076663971, "step": 647 }, { "epoch": 0.15751093825960136, "grad_norm": 1.544081290923422, "learning_rate": 1.9955422732063626e-05, "loss": 0.6450101733207703, "step": 648 }, { "epoch": 0.15775401069518716, "grad_norm": 1.4211896189507953, "learning_rate": 1.9955042828991433e-05, "loss": 0.7137094736099243, "step": 649 }, { "epoch": 0.15799708313077296, "grad_norm": 1.3146017912443966, "learning_rate": 1.9954661317594398e-05, "loss": 0.6726163625717163, "step": 650 }, { "epoch": 0.1582401555663588, "grad_norm": 1.29652511565198, "learning_rate": 1.995427819793415e-05, "loss": 0.769389271736145, "step": 651 }, { "epoch": 0.1584832280019446, "grad_norm": 1.3521133732762989, "learning_rate": 1.9953893470072592e-05, "loss": 0.761976957321167, "step": 652 }, { "epoch": 0.1587263004375304, "grad_norm": 1.182021112398883, "learning_rate": 1.995350713407188e-05, "loss": 0.7065908908843994, "step": 653 }, { "epoch": 0.1589693728731162, "grad_norm": 1.2818798043098125, "learning_rate": 1.9953119189994422e-05, "loss": 0.6712616682052612, "step": 654 }, { "epoch": 0.159212445308702, "grad_norm": 1.2055599156522012, "learning_rate": 1.9952729637902903e-05, "loss": 0.7089111804962158, "step": 655 }, { "epoch": 0.1594555177442878, "grad_norm": 1.2562291888720607, "learning_rate": 1.9952338477860257e-05, "loss": 0.6865633726119995, "step": 656 }, { "epoch": 0.1596985901798736, "grad_norm": 1.2781067547712979, "learning_rate": 1.9951945709929675e-05, "loss": 0.6119183897972107, "step": 657 }, { "epoch": 0.15994166261545942, "grad_norm": 1.2728642520683122, "learning_rate": 1.9951551334174616e-05, "loss": 0.7437613010406494, "step": 658 }, { "epoch": 0.16018473505104522, "grad_norm": 1.3456588766189932, "learning_rate": 1.9951155350658792e-05, "loss": 0.7903302311897278, "step": 659 }, { "epoch": 0.16042780748663102, "grad_norm": 1.4868276130637557, "learning_rate": 1.995075775944618e-05, "loss": 0.9019405841827393, "step": 660 }, { "epoch": 0.16067087992221682, "grad_norm": 1.35988506081264, "learning_rate": 1.9950358560601014e-05, "loss": 0.7340210676193237, "step": 661 }, { "epoch": 0.16091395235780262, "grad_norm": 1.231661388968633, "learning_rate": 1.9949957754187785e-05, "loss": 0.6469219923019409, "step": 662 }, { "epoch": 0.16115702479338842, "grad_norm": 1.0483582592992535, "learning_rate": 1.9949555340271254e-05, "loss": 0.7349631786346436, "step": 663 }, { "epoch": 0.16140009722897422, "grad_norm": 1.1738392679721574, "learning_rate": 1.9949151318916427e-05, "loss": 0.7847577333450317, "step": 664 }, { "epoch": 0.16164316966456005, "grad_norm": 1.4493910724468126, "learning_rate": 1.994874569018858e-05, "loss": 0.7863664627075195, "step": 665 }, { "epoch": 0.16188624210014585, "grad_norm": 1.4427866973808954, "learning_rate": 1.9948338454153246e-05, "loss": 0.8297349214553833, "step": 666 }, { "epoch": 0.16212931453573165, "grad_norm": 1.337667710995936, "learning_rate": 1.994792961087622e-05, "loss": 0.6671820282936096, "step": 667 }, { "epoch": 0.16237238697131745, "grad_norm": 1.2777947884542102, "learning_rate": 1.994751916042355e-05, "loss": 0.6374891400337219, "step": 668 }, { "epoch": 0.16261545940690325, "grad_norm": 1.5007546406325378, "learning_rate": 1.994710710286155e-05, "loss": 0.6449031829833984, "step": 669 }, { "epoch": 0.16285853184248905, "grad_norm": 1.2972559148688296, "learning_rate": 1.994669343825679e-05, "loss": 0.8347816467285156, "step": 670 }, { "epoch": 0.16310160427807488, "grad_norm": 1.5101799612396185, "learning_rate": 1.9946278166676103e-05, "loss": 0.7626581192016602, "step": 671 }, { "epoch": 0.16334467671366068, "grad_norm": 1.4790114468069664, "learning_rate": 1.994586128818658e-05, "loss": 0.8175934553146362, "step": 672 }, { "epoch": 0.16358774914924648, "grad_norm": 1.1824910736653258, "learning_rate": 1.994544280285557e-05, "loss": 0.7499988675117493, "step": 673 }, { "epoch": 0.16383082158483228, "grad_norm": 1.293043963910582, "learning_rate": 1.9945022710750682e-05, "loss": 0.7456365823745728, "step": 674 }, { "epoch": 0.16407389402041808, "grad_norm": 1.1732739549857976, "learning_rate": 1.994460101193979e-05, "loss": 0.7100453972816467, "step": 675 }, { "epoch": 0.16431696645600388, "grad_norm": 1.3812349795021273, "learning_rate": 1.994417770649102e-05, "loss": 0.8912467956542969, "step": 676 }, { "epoch": 0.16456003889158968, "grad_norm": 1.451477226672852, "learning_rate": 1.994375279447276e-05, "loss": 0.6647613048553467, "step": 677 }, { "epoch": 0.1648031113271755, "grad_norm": 1.491279023945506, "learning_rate": 1.9943326275953658e-05, "loss": 0.8079770803451538, "step": 678 }, { "epoch": 0.1650461837627613, "grad_norm": 1.411000664251662, "learning_rate": 1.9942898151002626e-05, "loss": 0.9490137100219727, "step": 679 }, { "epoch": 0.1652892561983471, "grad_norm": 1.2602048300996056, "learning_rate": 1.9942468419688827e-05, "loss": 0.7458792924880981, "step": 680 }, { "epoch": 0.1655323286339329, "grad_norm": 1.2563742670451483, "learning_rate": 1.9942037082081688e-05, "loss": 0.6962332725524902, "step": 681 }, { "epoch": 0.1657754010695187, "grad_norm": 1.5689774772186988, "learning_rate": 1.9941604138250898e-05, "loss": 0.7320666313171387, "step": 682 }, { "epoch": 0.1660184735051045, "grad_norm": 1.3295680222195363, "learning_rate": 1.99411695882664e-05, "loss": 0.7697545289993286, "step": 683 }, { "epoch": 0.16626154594069031, "grad_norm": 1.231079552837733, "learning_rate": 1.9940733432198406e-05, "loss": 0.6577486991882324, "step": 684 }, { "epoch": 0.16650461837627614, "grad_norm": 1.3452095900738334, "learning_rate": 1.9940295670117373e-05, "loss": 0.7305715084075928, "step": 685 }, { "epoch": 0.16674769081186194, "grad_norm": 1.4894396370412593, "learning_rate": 1.9939856302094024e-05, "loss": 0.7899668216705322, "step": 686 }, { "epoch": 0.16699076324744774, "grad_norm": 1.3651139219362936, "learning_rate": 1.9939415328199353e-05, "loss": 0.9621508121490479, "step": 687 }, { "epoch": 0.16723383568303354, "grad_norm": 1.4682152036598715, "learning_rate": 1.9938972748504594e-05, "loss": 0.7684491872787476, "step": 688 }, { "epoch": 0.16747690811861934, "grad_norm": 1.2361015377076912, "learning_rate": 1.9938528563081254e-05, "loss": 0.6241808533668518, "step": 689 }, { "epoch": 0.16771998055420514, "grad_norm": 1.1894367457328576, "learning_rate": 1.9938082772001094e-05, "loss": 0.6822741031646729, "step": 690 }, { "epoch": 0.16796305298979095, "grad_norm": 1.279210688692875, "learning_rate": 1.9937635375336134e-05, "loss": 0.693436861038208, "step": 691 }, { "epoch": 0.16820612542537677, "grad_norm": 1.4778583075434866, "learning_rate": 1.993718637315866e-05, "loss": 0.8159948587417603, "step": 692 }, { "epoch": 0.16844919786096257, "grad_norm": 1.5213051174880683, "learning_rate": 1.9936735765541202e-05, "loss": 0.740787148475647, "step": 693 }, { "epoch": 0.16869227029654837, "grad_norm": 1.3156075165031076, "learning_rate": 1.9936283552556574e-05, "loss": 0.5268902778625488, "step": 694 }, { "epoch": 0.16893534273213417, "grad_norm": 1.6839192278232082, "learning_rate": 1.993582973427782e-05, "loss": 0.5598750710487366, "step": 695 }, { "epoch": 0.16917841516771998, "grad_norm": 1.3847885349763855, "learning_rate": 1.993537431077827e-05, "loss": 0.7869741916656494, "step": 696 }, { "epoch": 0.16942148760330578, "grad_norm": 1.290863823294302, "learning_rate": 1.9934917282131497e-05, "loss": 0.6188144087791443, "step": 697 }, { "epoch": 0.16966456003889158, "grad_norm": 1.4190003588698161, "learning_rate": 1.9934458648411333e-05, "loss": 0.7879056930541992, "step": 698 }, { "epoch": 0.1699076324744774, "grad_norm": 1.5301914649037593, "learning_rate": 1.9933998409691883e-05, "loss": 0.6538465619087219, "step": 699 }, { "epoch": 0.1701507049100632, "grad_norm": 1.5115103500377405, "learning_rate": 1.9933536566047504e-05, "loss": 0.8802556395530701, "step": 700 }, { "epoch": 0.170393777345649, "grad_norm": 1.1973140239312114, "learning_rate": 1.9933073117552798e-05, "loss": 0.7256383895874023, "step": 701 }, { "epoch": 0.1706368497812348, "grad_norm": 1.2607991296728283, "learning_rate": 1.993260806428265e-05, "loss": 0.8517094850540161, "step": 702 }, { "epoch": 0.1708799222168206, "grad_norm": 1.6695458048351364, "learning_rate": 1.993214140631219e-05, "loss": 0.7663816809654236, "step": 703 }, { "epoch": 0.1711229946524064, "grad_norm": 1.0542101674106656, "learning_rate": 1.9931673143716816e-05, "loss": 0.626041054725647, "step": 704 }, { "epoch": 0.17136606708799224, "grad_norm": 1.4050465146012066, "learning_rate": 1.993120327657217e-05, "loss": 0.6519328355789185, "step": 705 }, { "epoch": 0.17160913952357804, "grad_norm": 1.2238692581517083, "learning_rate": 1.993073180495417e-05, "loss": 0.7415466904640198, "step": 706 }, { "epoch": 0.17185221195916384, "grad_norm": 1.3799676664071672, "learning_rate": 1.9930258728938983e-05, "loss": 0.7214398384094238, "step": 707 }, { "epoch": 0.17209528439474964, "grad_norm": 1.2504781183246485, "learning_rate": 1.992978404860304e-05, "loss": 0.7790747880935669, "step": 708 }, { "epoch": 0.17233835683033544, "grad_norm": 1.3755023828804547, "learning_rate": 1.9929307764023032e-05, "loss": 0.782207727432251, "step": 709 }, { "epoch": 0.17258142926592124, "grad_norm": 1.269091108784611, "learning_rate": 1.9928829875275906e-05, "loss": 0.7907794713973999, "step": 710 }, { "epoch": 0.17282450170150704, "grad_norm": 1.3764737444967474, "learning_rate": 1.9928350382438873e-05, "loss": 0.8974685668945312, "step": 711 }, { "epoch": 0.17306757413709287, "grad_norm": 1.361723523061707, "learning_rate": 1.9927869285589387e-05, "loss": 0.7602565288543701, "step": 712 }, { "epoch": 0.17331064657267867, "grad_norm": 1.1848126544653033, "learning_rate": 1.9927386584805184e-05, "loss": 0.726420521736145, "step": 713 }, { "epoch": 0.17355371900826447, "grad_norm": 1.522531588256821, "learning_rate": 1.9926902280164248e-05, "loss": 0.6588485240936279, "step": 714 }, { "epoch": 0.17379679144385027, "grad_norm": 1.1254960521515778, "learning_rate": 1.9926416371744814e-05, "loss": 0.6049520969390869, "step": 715 }, { "epoch": 0.17403986387943607, "grad_norm": 1.0489239545669646, "learning_rate": 1.9925928859625395e-05, "loss": 0.49786385893821716, "step": 716 }, { "epoch": 0.17428293631502187, "grad_norm": 1.249065303562916, "learning_rate": 1.992543974388475e-05, "loss": 0.7156479954719543, "step": 717 }, { "epoch": 0.17452600875060767, "grad_norm": 1.4667559943521773, "learning_rate": 1.9924949024601896e-05, "loss": 0.7554510235786438, "step": 718 }, { "epoch": 0.1747690811861935, "grad_norm": 1.3003662087735726, "learning_rate": 1.9924456701856116e-05, "loss": 0.6905642151832581, "step": 719 }, { "epoch": 0.1750121536217793, "grad_norm": 1.278014637767266, "learning_rate": 1.9923962775726953e-05, "loss": 0.7029860019683838, "step": 720 }, { "epoch": 0.1752552260573651, "grad_norm": 1.2244212382771325, "learning_rate": 1.9923467246294196e-05, "loss": 0.689764142036438, "step": 721 }, { "epoch": 0.1754982984929509, "grad_norm": 1.160863416893451, "learning_rate": 1.9922970113637913e-05, "loss": 0.7338434457778931, "step": 722 }, { "epoch": 0.1757413709285367, "grad_norm": 1.3508636536231464, "learning_rate": 1.9922471377838408e-05, "loss": 0.7068380117416382, "step": 723 }, { "epoch": 0.1759844433641225, "grad_norm": 1.3457245842623722, "learning_rate": 1.9921971038976266e-05, "loss": 0.75289386510849, "step": 724 }, { "epoch": 0.1762275157997083, "grad_norm": 1.3094149715406302, "learning_rate": 1.992146909713232e-05, "loss": 0.7537500858306885, "step": 725 }, { "epoch": 0.17647058823529413, "grad_norm": 1.2394291198364629, "learning_rate": 1.992096555238766e-05, "loss": 0.7119944095611572, "step": 726 }, { "epoch": 0.17671366067087993, "grad_norm": 1.5851974192120077, "learning_rate": 1.9920460404823636e-05, "loss": 0.7720211148262024, "step": 727 }, { "epoch": 0.17695673310646573, "grad_norm": 1.3286241337277722, "learning_rate": 1.991995365452186e-05, "loss": 0.6430656909942627, "step": 728 }, { "epoch": 0.17719980554205153, "grad_norm": 1.5342876106476968, "learning_rate": 1.9919445301564208e-05, "loss": 0.8547232747077942, "step": 729 }, { "epoch": 0.17744287797763733, "grad_norm": 1.4232704065874164, "learning_rate": 1.9918935346032806e-05, "loss": 0.6823368072509766, "step": 730 }, { "epoch": 0.17768595041322313, "grad_norm": 1.4779228041335233, "learning_rate": 1.9918423788010037e-05, "loss": 0.7233890295028687, "step": 731 }, { "epoch": 0.17792902284880893, "grad_norm": 1.0653150247553282, "learning_rate": 1.9917910627578554e-05, "loss": 0.6798068284988403, "step": 732 }, { "epoch": 0.17817209528439476, "grad_norm": 1.5704277716610797, "learning_rate": 1.9917395864821262e-05, "loss": 0.7163656949996948, "step": 733 }, { "epoch": 0.17841516771998056, "grad_norm": 1.256380034565514, "learning_rate": 1.9916879499821316e-05, "loss": 0.6609053611755371, "step": 734 }, { "epoch": 0.17865824015556636, "grad_norm": 1.300374373020181, "learning_rate": 1.9916361532662152e-05, "loss": 0.6997771263122559, "step": 735 }, { "epoch": 0.17890131259115216, "grad_norm": 1.5111366826774304, "learning_rate": 1.9915841963427447e-05, "loss": 0.7730140089988708, "step": 736 }, { "epoch": 0.17914438502673796, "grad_norm": 1.1530393414464766, "learning_rate": 1.991532079220114e-05, "loss": 0.5726412534713745, "step": 737 }, { "epoch": 0.17938745746232376, "grad_norm": 1.3205416737334452, "learning_rate": 1.9914798019067432e-05, "loss": 0.7310463786125183, "step": 738 }, { "epoch": 0.17963052989790956, "grad_norm": 1.378826154921754, "learning_rate": 1.9914273644110785e-05, "loss": 0.7463237047195435, "step": 739 }, { "epoch": 0.1798736023334954, "grad_norm": 1.437567086240816, "learning_rate": 1.9913747667415915e-05, "loss": 0.83243727684021, "step": 740 }, { "epoch": 0.1801166747690812, "grad_norm": 1.524088354479365, "learning_rate": 1.9913220089067794e-05, "loss": 0.7947529554367065, "step": 741 }, { "epoch": 0.180359747204667, "grad_norm": 1.8350698466671589, "learning_rate": 1.9912690909151663e-05, "loss": 0.8002713322639465, "step": 742 }, { "epoch": 0.1806028196402528, "grad_norm": 1.4373079409165568, "learning_rate": 1.9912160127753005e-05, "loss": 0.8011069297790527, "step": 743 }, { "epoch": 0.1808458920758386, "grad_norm": 1.2785017684495328, "learning_rate": 1.9911627744957588e-05, "loss": 0.5784062743186951, "step": 744 }, { "epoch": 0.1810889645114244, "grad_norm": 1.3371299980622846, "learning_rate": 1.991109376085141e-05, "loss": 0.7644676566123962, "step": 745 }, { "epoch": 0.18133203694701022, "grad_norm": 1.4013517001933016, "learning_rate": 1.9910558175520748e-05, "loss": 0.6947500705718994, "step": 746 }, { "epoch": 0.18157510938259602, "grad_norm": 1.2259040758930542, "learning_rate": 1.9910020989052124e-05, "loss": 0.5965503454208374, "step": 747 }, { "epoch": 0.18181818181818182, "grad_norm": 1.2003564464642573, "learning_rate": 1.9909482201532334e-05, "loss": 0.9018765091896057, "step": 748 }, { "epoch": 0.18206125425376762, "grad_norm": 1.4410239715340472, "learning_rate": 1.990894181304842e-05, "loss": 0.76260906457901, "step": 749 }, { "epoch": 0.18230432668935342, "grad_norm": 1.4640152028164597, "learning_rate": 1.9908399823687684e-05, "loss": 0.7481579780578613, "step": 750 }, { "epoch": 0.18254739912493922, "grad_norm": 1.261192550665943, "learning_rate": 1.9907856233537696e-05, "loss": 0.7346898317337036, "step": 751 }, { "epoch": 0.18279047156052503, "grad_norm": 1.139856371808916, "learning_rate": 1.9907311042686264e-05, "loss": 0.6093918085098267, "step": 752 }, { "epoch": 0.18303354399611085, "grad_norm": 1.3179913233221427, "learning_rate": 1.9906764251221484e-05, "loss": 0.668302059173584, "step": 753 }, { "epoch": 0.18327661643169665, "grad_norm": 1.3189771329334898, "learning_rate": 1.9906215859231686e-05, "loss": 0.7522164583206177, "step": 754 }, { "epoch": 0.18351968886728245, "grad_norm": 1.2578166445660346, "learning_rate": 1.9905665866805467e-05, "loss": 0.8581958413124084, "step": 755 }, { "epoch": 0.18376276130286825, "grad_norm": 1.6043971835664195, "learning_rate": 1.990511427403169e-05, "loss": 0.8684770464897156, "step": 756 }, { "epoch": 0.18400583373845406, "grad_norm": 1.5045500465305097, "learning_rate": 1.9904561080999466e-05, "loss": 0.8592104911804199, "step": 757 }, { "epoch": 0.18424890617403986, "grad_norm": 1.386802605577251, "learning_rate": 1.9904006287798164e-05, "loss": 0.752244234085083, "step": 758 }, { "epoch": 0.18449197860962566, "grad_norm": 1.377369644253823, "learning_rate": 1.9903449894517425e-05, "loss": 0.8606182336807251, "step": 759 }, { "epoch": 0.18473505104521148, "grad_norm": 1.2791733205779325, "learning_rate": 1.990289190124713e-05, "loss": 0.7119693756103516, "step": 760 }, { "epoch": 0.18497812348079729, "grad_norm": 1.338136023759425, "learning_rate": 1.990233230807743e-05, "loss": 0.7841159105300903, "step": 761 }, { "epoch": 0.18522119591638309, "grad_norm": 1.2321758844246122, "learning_rate": 1.9901771115098737e-05, "loss": 0.7450445890426636, "step": 762 }, { "epoch": 0.1854642683519689, "grad_norm": 1.5141795189452065, "learning_rate": 1.990120832240171e-05, "loss": 0.6196019649505615, "step": 763 }, { "epoch": 0.1857073407875547, "grad_norm": 1.5913574585316854, "learning_rate": 1.9900643930077275e-05, "loss": 0.9742822051048279, "step": 764 }, { "epoch": 0.1859504132231405, "grad_norm": 1.372792773927902, "learning_rate": 1.9900077938216623e-05, "loss": 0.7270567417144775, "step": 765 }, { "epoch": 0.1861934856587263, "grad_norm": 1.5084825766341428, "learning_rate": 1.9899510346911182e-05, "loss": 0.908442497253418, "step": 766 }, { "epoch": 0.18643655809431212, "grad_norm": 1.4185349412611736, "learning_rate": 1.989894115625266e-05, "loss": 0.6952733993530273, "step": 767 }, { "epoch": 0.18667963052989792, "grad_norm": 1.245761788037849, "learning_rate": 1.9898370366333012e-05, "loss": 0.6337939500808716, "step": 768 }, { "epoch": 0.18692270296548372, "grad_norm": 1.1642606482963536, "learning_rate": 1.9897797977244452e-05, "loss": 0.5364607572555542, "step": 769 }, { "epoch": 0.18716577540106952, "grad_norm": 1.5033478360149395, "learning_rate": 1.989722398907946e-05, "loss": 0.6837893724441528, "step": 770 }, { "epoch": 0.18740884783665532, "grad_norm": 1.060681040790922, "learning_rate": 1.9896648401930764e-05, "loss": 0.47675275802612305, "step": 771 }, { "epoch": 0.18765192027224112, "grad_norm": 1.2638858638669928, "learning_rate": 1.9896071215891356e-05, "loss": 0.6793588399887085, "step": 772 }, { "epoch": 0.18789499270782692, "grad_norm": 2.0412923190585266, "learning_rate": 1.9895492431054492e-05, "loss": 0.7396180629730225, "step": 773 }, { "epoch": 0.18813806514341275, "grad_norm": 1.2434441168951906, "learning_rate": 1.9894912047513667e-05, "loss": 0.6405335664749146, "step": 774 }, { "epoch": 0.18838113757899855, "grad_norm": 1.4995498471036939, "learning_rate": 1.989433006536266e-05, "loss": 0.7392584681510925, "step": 775 }, { "epoch": 0.18862421001458435, "grad_norm": 1.435536158750293, "learning_rate": 1.9893746484695483e-05, "loss": 0.7195072174072266, "step": 776 }, { "epoch": 0.18886728245017015, "grad_norm": 1.311675730383538, "learning_rate": 1.989316130560643e-05, "loss": 0.7674720287322998, "step": 777 }, { "epoch": 0.18911035488575595, "grad_norm": 1.5470498251560547, "learning_rate": 1.9892574528190033e-05, "loss": 0.6745381355285645, "step": 778 }, { "epoch": 0.18935342732134175, "grad_norm": 1.3911431727725032, "learning_rate": 1.9891986152541097e-05, "loss": 0.740946352481842, "step": 779 }, { "epoch": 0.18959649975692758, "grad_norm": 1.4800357747105208, "learning_rate": 1.9891396178754675e-05, "loss": 0.6192719340324402, "step": 780 }, { "epoch": 0.18983957219251338, "grad_norm": 1.3239599881524209, "learning_rate": 1.989080460692609e-05, "loss": 0.6775915622711182, "step": 781 }, { "epoch": 0.19008264462809918, "grad_norm": 1.547306857063923, "learning_rate": 1.9890211437150904e-05, "loss": 0.739737868309021, "step": 782 }, { "epoch": 0.19032571706368498, "grad_norm": 1.2635244846871365, "learning_rate": 1.988961666952496e-05, "loss": 0.6342816352844238, "step": 783 }, { "epoch": 0.19056878949927078, "grad_norm": 1.3020551958136233, "learning_rate": 1.988902030414434e-05, "loss": 0.8675709962844849, "step": 784 }, { "epoch": 0.19081186193485658, "grad_norm": 1.4230634074884687, "learning_rate": 1.9888422341105397e-05, "loss": 0.8335037231445312, "step": 785 }, { "epoch": 0.19105493437044238, "grad_norm": 1.2876997464024322, "learning_rate": 1.9887822780504736e-05, "loss": 0.5909615755081177, "step": 786 }, { "epoch": 0.1912980068060282, "grad_norm": 1.2648696687468295, "learning_rate": 1.9887221622439216e-05, "loss": 0.774581253528595, "step": 787 }, { "epoch": 0.191541079241614, "grad_norm": 1.4532624922625088, "learning_rate": 1.988661886700597e-05, "loss": 0.7638965845108032, "step": 788 }, { "epoch": 0.1917841516771998, "grad_norm": 1.2545824693889378, "learning_rate": 1.988601451430237e-05, "loss": 0.6616711616516113, "step": 789 }, { "epoch": 0.1920272241127856, "grad_norm": 1.3536738056369086, "learning_rate": 1.9885408564426055e-05, "loss": 0.7360454797744751, "step": 790 }, { "epoch": 0.1922702965483714, "grad_norm": 1.472586086497695, "learning_rate": 1.988480101747493e-05, "loss": 0.591162919998169, "step": 791 }, { "epoch": 0.1925133689839572, "grad_norm": 1.2999971750013062, "learning_rate": 1.9884191873547136e-05, "loss": 0.71843421459198, "step": 792 }, { "epoch": 0.192756441419543, "grad_norm": 1.4165503969215307, "learning_rate": 1.9883581132741093e-05, "loss": 0.873205304145813, "step": 793 }, { "epoch": 0.19299951385512884, "grad_norm": 1.3420744040089898, "learning_rate": 1.9882968795155474e-05, "loss": 0.640188992023468, "step": 794 }, { "epoch": 0.19324258629071464, "grad_norm": 1.3481880519755438, "learning_rate": 1.9882354860889202e-05, "loss": 0.6607303619384766, "step": 795 }, { "epoch": 0.19348565872630044, "grad_norm": 1.440841067578767, "learning_rate": 1.988173933004147e-05, "loss": 0.8559519648551941, "step": 796 }, { "epoch": 0.19372873116188624, "grad_norm": 1.3089730654919662, "learning_rate": 1.9881122202711717e-05, "loss": 0.7992410659790039, "step": 797 }, { "epoch": 0.19397180359747204, "grad_norm": 1.462108461861893, "learning_rate": 1.9880503478999648e-05, "loss": 0.716071605682373, "step": 798 }, { "epoch": 0.19421487603305784, "grad_norm": 1.646628764760811, "learning_rate": 1.987988315900522e-05, "loss": 0.7976816892623901, "step": 799 }, { "epoch": 0.19445794846864364, "grad_norm": 1.4290670402747447, "learning_rate": 1.9879261242828657e-05, "loss": 0.7929345965385437, "step": 800 }, { "epoch": 0.19470102090422947, "grad_norm": 1.578169738372141, "learning_rate": 1.9878637730570428e-05, "loss": 0.6978554129600525, "step": 801 }, { "epoch": 0.19494409333981527, "grad_norm": 1.3510275979479425, "learning_rate": 1.9878012622331272e-05, "loss": 0.6865094900131226, "step": 802 }, { "epoch": 0.19518716577540107, "grad_norm": 2.0411040354897345, "learning_rate": 1.9877385918212178e-05, "loss": 1.0242081880569458, "step": 803 }, { "epoch": 0.19543023821098687, "grad_norm": 1.4692529328703805, "learning_rate": 1.98767576183144e-05, "loss": 0.7384994029998779, "step": 804 }, { "epoch": 0.19567331064657267, "grad_norm": 1.3541176486681703, "learning_rate": 1.9876127722739436e-05, "loss": 0.7819240689277649, "step": 805 }, { "epoch": 0.19591638308215847, "grad_norm": 1.4171046070496063, "learning_rate": 1.987549623158906e-05, "loss": 0.7142330408096313, "step": 806 }, { "epoch": 0.19615945551774427, "grad_norm": 1.171302921689087, "learning_rate": 1.987486314496529e-05, "loss": 0.6266553401947021, "step": 807 }, { "epoch": 0.1964025279533301, "grad_norm": 1.3607768135913894, "learning_rate": 1.9874228462970415e-05, "loss": 0.8654865026473999, "step": 808 }, { "epoch": 0.1966456003889159, "grad_norm": 1.1897516564844897, "learning_rate": 1.9873592185706963e-05, "loss": 0.6725162267684937, "step": 809 }, { "epoch": 0.1968886728245017, "grad_norm": 1.1809324644896666, "learning_rate": 1.9872954313277736e-05, "loss": 0.5845681428909302, "step": 810 }, { "epoch": 0.1971317452600875, "grad_norm": 1.3403835412582712, "learning_rate": 1.9872314845785784e-05, "loss": 0.695735514163971, "step": 811 }, { "epoch": 0.1973748176956733, "grad_norm": 1.2550090815630934, "learning_rate": 1.9871673783334424e-05, "loss": 0.801529049873352, "step": 812 }, { "epoch": 0.1976178901312591, "grad_norm": 1.18609703071799, "learning_rate": 1.987103112602722e-05, "loss": 0.6542948484420776, "step": 813 }, { "epoch": 0.19786096256684493, "grad_norm": 1.4709132830322733, "learning_rate": 1.9870386873968002e-05, "loss": 0.7538840770721436, "step": 814 }, { "epoch": 0.19810403500243073, "grad_norm": 1.5363465956213669, "learning_rate": 1.9869741027260857e-05, "loss": 0.7425649166107178, "step": 815 }, { "epoch": 0.19834710743801653, "grad_norm": 1.334877276183261, "learning_rate": 1.986909358601012e-05, "loss": 0.782099187374115, "step": 816 }, { "epoch": 0.19859017987360233, "grad_norm": 1.4385395273450061, "learning_rate": 1.9868444550320397e-05, "loss": 0.698225736618042, "step": 817 }, { "epoch": 0.19883325230918814, "grad_norm": 1.2271318763320438, "learning_rate": 1.9867793920296544e-05, "loss": 0.7999368906021118, "step": 818 }, { "epoch": 0.19907632474477394, "grad_norm": 1.5446651326866048, "learning_rate": 1.9867141696043673e-05, "loss": 0.9080746173858643, "step": 819 }, { "epoch": 0.19931939718035974, "grad_norm": 1.5260041727770717, "learning_rate": 1.9866487877667162e-05, "loss": 0.7750047445297241, "step": 820 }, { "epoch": 0.19956246961594556, "grad_norm": 1.5233212930983175, "learning_rate": 1.9865832465272636e-05, "loss": 0.805847704410553, "step": 821 }, { "epoch": 0.19980554205153137, "grad_norm": 1.4331946721799638, "learning_rate": 1.9865175458965984e-05, "loss": 0.7903405427932739, "step": 822 }, { "epoch": 0.20004861448711717, "grad_norm": 1.1917457940587823, "learning_rate": 1.9864516858853356e-05, "loss": 0.6587435603141785, "step": 823 }, { "epoch": 0.20029168692270297, "grad_norm": 1.1278780389665954, "learning_rate": 1.9863856665041147e-05, "loss": 0.7216470241546631, "step": 824 }, { "epoch": 0.20053475935828877, "grad_norm": 1.104221755259535, "learning_rate": 1.986319487763602e-05, "loss": 0.7169907093048096, "step": 825 }, { "epoch": 0.20077783179387457, "grad_norm": 1.3938610397235232, "learning_rate": 1.9862531496744894e-05, "loss": 0.6143417954444885, "step": 826 }, { "epoch": 0.20102090422946037, "grad_norm": 1.2961566992005884, "learning_rate": 1.9861866522474944e-05, "loss": 0.7592300176620483, "step": 827 }, { "epoch": 0.2012639766650462, "grad_norm": 1.3695454889685281, "learning_rate": 1.9861199954933604e-05, "loss": 0.6898641586303711, "step": 828 }, { "epoch": 0.201507049100632, "grad_norm": 1.1900308398976154, "learning_rate": 1.986053179422856e-05, "loss": 0.6018139123916626, "step": 829 }, { "epoch": 0.2017501215362178, "grad_norm": 1.6051794055505288, "learning_rate": 1.9859862040467757e-05, "loss": 0.7747509479522705, "step": 830 }, { "epoch": 0.2019931939718036, "grad_norm": 1.3415812122800324, "learning_rate": 1.985919069375941e-05, "loss": 0.7955561876296997, "step": 831 }, { "epoch": 0.2022362664073894, "grad_norm": 1.2804462886450363, "learning_rate": 1.985851775421197e-05, "loss": 0.6289246082305908, "step": 832 }, { "epoch": 0.2024793388429752, "grad_norm": 1.4362014794993665, "learning_rate": 1.9857843221934158e-05, "loss": 0.6771042346954346, "step": 833 }, { "epoch": 0.202722411278561, "grad_norm": 1.5336564528494288, "learning_rate": 1.985716709703496e-05, "loss": 0.7407082915306091, "step": 834 }, { "epoch": 0.20296548371414683, "grad_norm": 1.1630778917059468, "learning_rate": 1.9856489379623598e-05, "loss": 0.6462366580963135, "step": 835 }, { "epoch": 0.20320855614973263, "grad_norm": 1.1376612549534173, "learning_rate": 1.985581006980957e-05, "loss": 0.6294227242469788, "step": 836 }, { "epoch": 0.20345162858531843, "grad_norm": 1.4148298342412617, "learning_rate": 1.9855129167702625e-05, "loss": 0.6545461416244507, "step": 837 }, { "epoch": 0.20369470102090423, "grad_norm": 1.4994494857143632, "learning_rate": 1.985444667341277e-05, "loss": 0.6621991991996765, "step": 838 }, { "epoch": 0.20393777345649003, "grad_norm": 1.1883675806215173, "learning_rate": 1.9853762587050257e-05, "loss": 0.6651076078414917, "step": 839 }, { "epoch": 0.20418084589207583, "grad_norm": 1.056438822830065, "learning_rate": 1.985307690872562e-05, "loss": 0.6343739032745361, "step": 840 }, { "epoch": 0.20442391832766163, "grad_norm": 1.39178088601338, "learning_rate": 1.9852389638549624e-05, "loss": 0.7873599529266357, "step": 841 }, { "epoch": 0.20466699076324746, "grad_norm": 1.3235887330443217, "learning_rate": 1.9851700776633313e-05, "loss": 0.548672080039978, "step": 842 }, { "epoch": 0.20491006319883326, "grad_norm": 1.3005178380642208, "learning_rate": 1.9851010323087976e-05, "loss": 0.5697452425956726, "step": 843 }, { "epoch": 0.20515313563441906, "grad_norm": 1.3176976668680602, "learning_rate": 1.9850318278025163e-05, "loss": 0.7445207834243774, "step": 844 }, { "epoch": 0.20539620807000486, "grad_norm": 1.2511739484223359, "learning_rate": 1.9849624641556676e-05, "loss": 0.6657576560974121, "step": 845 }, { "epoch": 0.20563928050559066, "grad_norm": 1.459668796641968, "learning_rate": 1.984892941379458e-05, "loss": 0.8286976218223572, "step": 846 }, { "epoch": 0.20588235294117646, "grad_norm": 1.3435056696846186, "learning_rate": 1.9848232594851196e-05, "loss": 0.8859411478042603, "step": 847 }, { "epoch": 0.20612542537676226, "grad_norm": 1.2502581312450274, "learning_rate": 1.9847534184839102e-05, "loss": 0.6942890882492065, "step": 848 }, { "epoch": 0.2063684978123481, "grad_norm": 1.3983496364306731, "learning_rate": 1.9846834183871128e-05, "loss": 0.7418497204780579, "step": 849 }, { "epoch": 0.2066115702479339, "grad_norm": 1.3297874348216763, "learning_rate": 1.984613259206037e-05, "loss": 0.6727807521820068, "step": 850 }, { "epoch": 0.2068546426835197, "grad_norm": 1.3178742265385193, "learning_rate": 1.9845429409520177e-05, "loss": 0.7161140441894531, "step": 851 }, { "epoch": 0.2070977151191055, "grad_norm": 1.463800888236399, "learning_rate": 1.984472463636415e-05, "loss": 0.6982980370521545, "step": 852 }, { "epoch": 0.2073407875546913, "grad_norm": 1.3162463957944288, "learning_rate": 1.9844018272706155e-05, "loss": 0.7797882556915283, "step": 853 }, { "epoch": 0.2075838599902771, "grad_norm": 1.2563397280353752, "learning_rate": 1.9843310318660308e-05, "loss": 0.7775920629501343, "step": 854 }, { "epoch": 0.20782693242586292, "grad_norm": 1.2366317969597844, "learning_rate": 1.984260077434099e-05, "loss": 0.8178389668464661, "step": 855 }, { "epoch": 0.20807000486144872, "grad_norm": 1.5171564266528754, "learning_rate": 1.9841889639862824e-05, "loss": 0.889212965965271, "step": 856 }, { "epoch": 0.20831307729703452, "grad_norm": 1.4867349034334385, "learning_rate": 1.9841176915340713e-05, "loss": 0.7833266258239746, "step": 857 }, { "epoch": 0.20855614973262032, "grad_norm": 1.446120853165858, "learning_rate": 1.9840462600889794e-05, "loss": 0.9117094874382019, "step": 858 }, { "epoch": 0.20879922216820612, "grad_norm": 1.3760620003321327, "learning_rate": 1.983974669662548e-05, "loss": 0.6953755617141724, "step": 859 }, { "epoch": 0.20904229460379192, "grad_norm": 1.1057962773978613, "learning_rate": 1.9839029202663424e-05, "loss": 0.6437187194824219, "step": 860 }, { "epoch": 0.20928536703937772, "grad_norm": 1.2839092417916549, "learning_rate": 1.9838310119119545e-05, "loss": 0.8944956064224243, "step": 861 }, { "epoch": 0.20952843947496355, "grad_norm": 1.6103885040805694, "learning_rate": 1.9837589446110018e-05, "loss": 0.8092770576477051, "step": 862 }, { "epoch": 0.20977151191054935, "grad_norm": 1.2909503650806258, "learning_rate": 1.9836867183751275e-05, "loss": 0.8158417344093323, "step": 863 }, { "epoch": 0.21001458434613515, "grad_norm": 1.2089489034843075, "learning_rate": 1.9836143332160005e-05, "loss": 0.8655812740325928, "step": 864 }, { "epoch": 0.21025765678172095, "grad_norm": 1.242143213316164, "learning_rate": 1.9835417891453147e-05, "loss": 0.7539697885513306, "step": 865 }, { "epoch": 0.21050072921730675, "grad_norm": 1.1487811632608234, "learning_rate": 1.983469086174791e-05, "loss": 0.8445411324501038, "step": 866 }, { "epoch": 0.21074380165289255, "grad_norm": 2.7519450747795253, "learning_rate": 1.9833962243161746e-05, "loss": 0.770806074142456, "step": 867 }, { "epoch": 0.21098687408847835, "grad_norm": 1.3319505213646605, "learning_rate": 1.983323203581237e-05, "loss": 0.8696194887161255, "step": 868 }, { "epoch": 0.21122994652406418, "grad_norm": 1.2743217201131782, "learning_rate": 1.983250023981776e-05, "loss": 0.7898038625717163, "step": 869 }, { "epoch": 0.21147301895964998, "grad_norm": 1.304567025894724, "learning_rate": 1.9831766855296132e-05, "loss": 0.7310956716537476, "step": 870 }, { "epoch": 0.21171609139523578, "grad_norm": 1.1309385612472382, "learning_rate": 1.9831031882365983e-05, "loss": 0.6995487213134766, "step": 871 }, { "epoch": 0.21195916383082158, "grad_norm": 1.1051437743044132, "learning_rate": 1.983029532114605e-05, "loss": 0.636116087436676, "step": 872 }, { "epoch": 0.21220223626640738, "grad_norm": 1.2829205858974075, "learning_rate": 1.982955717175533e-05, "loss": 0.8520756959915161, "step": 873 }, { "epoch": 0.21244530870199319, "grad_norm": 1.2639622583564951, "learning_rate": 1.9828817434313076e-05, "loss": 0.6569581031799316, "step": 874 }, { "epoch": 0.21268838113757899, "grad_norm": 1.3872017772495455, "learning_rate": 1.98280761089388e-05, "loss": 0.7238610982894897, "step": 875 }, { "epoch": 0.21293145357316481, "grad_norm": 1.3003231750171713, "learning_rate": 1.9827333195752275e-05, "loss": 0.6685988903045654, "step": 876 }, { "epoch": 0.21317452600875061, "grad_norm": 1.3173326672634522, "learning_rate": 1.9826588694873515e-05, "loss": 0.7482847571372986, "step": 877 }, { "epoch": 0.21341759844433641, "grad_norm": 1.2068208842713242, "learning_rate": 1.982584260642281e-05, "loss": 0.8192899227142334, "step": 878 }, { "epoch": 0.21366067087992222, "grad_norm": 1.3173189174643338, "learning_rate": 1.9825094930520693e-05, "loss": 0.6880079507827759, "step": 879 }, { "epoch": 0.21390374331550802, "grad_norm": 1.3174719571015434, "learning_rate": 1.982434566728796e-05, "loss": 0.7216985821723938, "step": 880 }, { "epoch": 0.21414681575109382, "grad_norm": 1.1691185924303837, "learning_rate": 1.982359481684566e-05, "loss": 0.7620134353637695, "step": 881 }, { "epoch": 0.21438988818667962, "grad_norm": 1.2025104687333885, "learning_rate": 1.9822842379315097e-05, "loss": 0.7272018194198608, "step": 882 }, { "epoch": 0.21463296062226545, "grad_norm": 1.4615153082408887, "learning_rate": 1.9822088354817834e-05, "loss": 0.7869863510131836, "step": 883 }, { "epoch": 0.21487603305785125, "grad_norm": 1.4260621200710963, "learning_rate": 1.9821332743475696e-05, "loss": 0.7602064609527588, "step": 884 }, { "epoch": 0.21511910549343705, "grad_norm": 1.3439320547046751, "learning_rate": 1.982057554541075e-05, "loss": 0.7487697601318359, "step": 885 }, { "epoch": 0.21536217792902285, "grad_norm": 1.2580397950486493, "learning_rate": 1.9819816760745335e-05, "loss": 0.6650209426879883, "step": 886 }, { "epoch": 0.21560525036460865, "grad_norm": 1.2374541468005742, "learning_rate": 1.9819056389602038e-05, "loss": 0.7556195259094238, "step": 887 }, { "epoch": 0.21584832280019445, "grad_norm": 1.2992403948548734, "learning_rate": 1.98182944321037e-05, "loss": 0.6978678703308105, "step": 888 }, { "epoch": 0.21609139523578028, "grad_norm": 1.4331905453054115, "learning_rate": 1.9817530888373427e-05, "loss": 0.796538233757019, "step": 889 }, { "epoch": 0.21633446767136608, "grad_norm": 1.3379849848290593, "learning_rate": 1.9816765758534573e-05, "loss": 0.8749228715896606, "step": 890 }, { "epoch": 0.21657754010695188, "grad_norm": 1.3433786334613698, "learning_rate": 1.9815999042710754e-05, "loss": 0.7528415322303772, "step": 891 }, { "epoch": 0.21682061254253768, "grad_norm": 1.5008328516851797, "learning_rate": 1.9815230741025837e-05, "loss": 0.5735311508178711, "step": 892 }, { "epoch": 0.21706368497812348, "grad_norm": 1.504832390744354, "learning_rate": 1.981446085360395e-05, "loss": 0.6692565083503723, "step": 893 }, { "epoch": 0.21730675741370928, "grad_norm": 1.202795174145097, "learning_rate": 1.9813689380569474e-05, "loss": 0.5808964967727661, "step": 894 }, { "epoch": 0.21754982984929508, "grad_norm": 1.2045901731797102, "learning_rate": 1.9812916322047044e-05, "loss": 0.6903188228607178, "step": 895 }, { "epoch": 0.2177929022848809, "grad_norm": 1.2526609469393934, "learning_rate": 1.981214167816156e-05, "loss": 0.7607161998748779, "step": 896 }, { "epoch": 0.2180359747204667, "grad_norm": 1.5417094853859215, "learning_rate": 1.981136544903817e-05, "loss": 0.6240228414535522, "step": 897 }, { "epoch": 0.2182790471560525, "grad_norm": 1.2910758179099115, "learning_rate": 1.981058763480228e-05, "loss": 0.6902782917022705, "step": 898 }, { "epoch": 0.2185221195916383, "grad_norm": 1.1733891843636202, "learning_rate": 1.9809808235579554e-05, "loss": 0.7836716175079346, "step": 899 }, { "epoch": 0.2187651920272241, "grad_norm": 1.3496263873532572, "learning_rate": 1.9809027251495913e-05, "loss": 0.7194226980209351, "step": 900 }, { "epoch": 0.2190082644628099, "grad_norm": 1.3873423466841543, "learning_rate": 1.980824468267753e-05, "loss": 0.6390199661254883, "step": 901 }, { "epoch": 0.2192513368983957, "grad_norm": 1.242606304141634, "learning_rate": 1.980746052925083e-05, "loss": 0.6453275680541992, "step": 902 }, { "epoch": 0.21949440933398154, "grad_norm": 1.4067150869065026, "learning_rate": 1.980667479134251e-05, "loss": 0.7001842856407166, "step": 903 }, { "epoch": 0.21973748176956734, "grad_norm": 1.0762246791648147, "learning_rate": 1.9805887469079505e-05, "loss": 0.5779491662979126, "step": 904 }, { "epoch": 0.21998055420515314, "grad_norm": 1.6021758572834284, "learning_rate": 1.9805098562589017e-05, "loss": 0.9043482542037964, "step": 905 }, { "epoch": 0.22022362664073894, "grad_norm": 1.3601831805068512, "learning_rate": 1.98043080719985e-05, "loss": 0.7055695056915283, "step": 906 }, { "epoch": 0.22046669907632474, "grad_norm": 1.3040032219258277, "learning_rate": 1.9803515997435667e-05, "loss": 0.7873729467391968, "step": 907 }, { "epoch": 0.22070977151191054, "grad_norm": 1.4235462449955514, "learning_rate": 1.980272233902848e-05, "loss": 0.867031455039978, "step": 908 }, { "epoch": 0.22095284394749634, "grad_norm": 1.391530365155797, "learning_rate": 1.980192709690517e-05, "loss": 0.7561995983123779, "step": 909 }, { "epoch": 0.22119591638308217, "grad_norm": 1.3405305067249094, "learning_rate": 1.9801130271194208e-05, "loss": 0.9125165343284607, "step": 910 }, { "epoch": 0.22143898881866797, "grad_norm": 1.3069888778250425, "learning_rate": 1.9800331862024328e-05, "loss": 0.7249249815940857, "step": 911 }, { "epoch": 0.22168206125425377, "grad_norm": 1.1878381442211134, "learning_rate": 1.9799531869524525e-05, "loss": 0.6460082530975342, "step": 912 }, { "epoch": 0.22192513368983957, "grad_norm": 1.145865557868039, "learning_rate": 1.9798730293824036e-05, "loss": 0.6819336414337158, "step": 913 }, { "epoch": 0.22216820612542537, "grad_norm": 1.244895439353974, "learning_rate": 1.9797927135052376e-05, "loss": 0.7429498434066772, "step": 914 }, { "epoch": 0.22241127856101117, "grad_norm": 1.4505207428973503, "learning_rate": 1.979712239333929e-05, "loss": 0.6616028547286987, "step": 915 }, { "epoch": 0.22265435099659697, "grad_norm": 1.158940954270746, "learning_rate": 1.97963160688148e-05, "loss": 0.5789275169372559, "step": 916 }, { "epoch": 0.2228974234321828, "grad_norm": 1.5071669070626357, "learning_rate": 1.9795508161609166e-05, "loss": 0.6183947324752808, "step": 917 }, { "epoch": 0.2231404958677686, "grad_norm": 1.3569753501858528, "learning_rate": 1.979469867185292e-05, "loss": 0.7974079847335815, "step": 918 }, { "epoch": 0.2233835683033544, "grad_norm": 1.3951555498574604, "learning_rate": 1.9793887599676838e-05, "loss": 0.7426151037216187, "step": 919 }, { "epoch": 0.2236266407389402, "grad_norm": 1.3662368222202916, "learning_rate": 1.9793074945211958e-05, "loss": 0.7204954624176025, "step": 920 }, { "epoch": 0.223869713174526, "grad_norm": 1.350657702754289, "learning_rate": 1.9792260708589573e-05, "loss": 0.8365118503570557, "step": 921 }, { "epoch": 0.2241127856101118, "grad_norm": 1.2524437665348644, "learning_rate": 1.9791444889941226e-05, "loss": 0.5988146066665649, "step": 922 }, { "epoch": 0.22435585804569763, "grad_norm": 1.2676695241198843, "learning_rate": 1.979062748939872e-05, "loss": 0.6612472534179688, "step": 923 }, { "epoch": 0.22459893048128343, "grad_norm": 1.3944716667597452, "learning_rate": 1.978980850709412e-05, "loss": 0.7233140468597412, "step": 924 }, { "epoch": 0.22484200291686923, "grad_norm": 1.3388774450633354, "learning_rate": 1.978898794315973e-05, "loss": 0.7873908281326294, "step": 925 }, { "epoch": 0.22508507535245503, "grad_norm": 1.1642962567844999, "learning_rate": 1.9788165797728124e-05, "loss": 0.7375775575637817, "step": 926 }, { "epoch": 0.22532814778804083, "grad_norm": 1.372422521850774, "learning_rate": 1.978734207093213e-05, "loss": 0.690101146697998, "step": 927 }, { "epoch": 0.22557122022362663, "grad_norm": 1.25970350555771, "learning_rate": 1.9786516762904823e-05, "loss": 0.670258641242981, "step": 928 }, { "epoch": 0.22581429265921243, "grad_norm": 1.3067165215632652, "learning_rate": 1.9785689873779543e-05, "loss": 0.6073256731033325, "step": 929 }, { "epoch": 0.22605736509479826, "grad_norm": 1.3462620536539938, "learning_rate": 1.978486140368988e-05, "loss": 0.7466164827346802, "step": 930 }, { "epoch": 0.22630043753038406, "grad_norm": 1.3188687472698257, "learning_rate": 1.9784031352769677e-05, "loss": 0.8783237338066101, "step": 931 }, { "epoch": 0.22654350996596986, "grad_norm": 1.303795183886654, "learning_rate": 1.9783199721153038e-05, "loss": 0.6909281015396118, "step": 932 }, { "epoch": 0.22678658240155566, "grad_norm": 1.4165655979357807, "learning_rate": 1.9782366508974325e-05, "loss": 0.722525954246521, "step": 933 }, { "epoch": 0.22702965483714146, "grad_norm": 1.5709477643147691, "learning_rate": 1.9781531716368146e-05, "loss": 0.9628894925117493, "step": 934 }, { "epoch": 0.22727272727272727, "grad_norm": 1.8662767514312038, "learning_rate": 1.9780695343469368e-05, "loss": 1.0064678192138672, "step": 935 }, { "epoch": 0.22751579970831307, "grad_norm": 1.1242781755719002, "learning_rate": 1.9779857390413117e-05, "loss": 0.6288625001907349, "step": 936 }, { "epoch": 0.2277588721438989, "grad_norm": 1.5594772167369242, "learning_rate": 1.9779017857334773e-05, "loss": 0.7072699069976807, "step": 937 }, { "epoch": 0.2280019445794847, "grad_norm": 1.1267240370238585, "learning_rate": 1.9778176744369966e-05, "loss": 0.6966124773025513, "step": 938 }, { "epoch": 0.2282450170150705, "grad_norm": 1.5493489947080423, "learning_rate": 1.977733405165459e-05, "loss": 0.7501797080039978, "step": 939 }, { "epoch": 0.2284880894506563, "grad_norm": 1.3448090001845372, "learning_rate": 1.977648977932479e-05, "loss": 0.8580607771873474, "step": 940 }, { "epoch": 0.2287311618862421, "grad_norm": 1.3683247374929532, "learning_rate": 1.9775643927516956e-05, "loss": 0.870781421661377, "step": 941 }, { "epoch": 0.2289742343218279, "grad_norm": 1.1522187948988154, "learning_rate": 1.9774796496367753e-05, "loss": 0.622211217880249, "step": 942 }, { "epoch": 0.2292173067574137, "grad_norm": 1.383808063178314, "learning_rate": 1.9773947486014084e-05, "loss": 0.7008114457130432, "step": 943 }, { "epoch": 0.22946037919299953, "grad_norm": 1.2473867433355268, "learning_rate": 1.977309689659312e-05, "loss": 0.8740452527999878, "step": 944 }, { "epoch": 0.22970345162858533, "grad_norm": 1.29904407991708, "learning_rate": 1.977224472824228e-05, "loss": 0.7280395030975342, "step": 945 }, { "epoch": 0.22994652406417113, "grad_norm": 1.1641309648226676, "learning_rate": 1.9771390981099233e-05, "loss": 0.627068281173706, "step": 946 }, { "epoch": 0.23018959649975693, "grad_norm": 1.164423900929844, "learning_rate": 1.977053565530192e-05, "loss": 0.7887665033340454, "step": 947 }, { "epoch": 0.23043266893534273, "grad_norm": 1.406342271372917, "learning_rate": 1.9769678750988516e-05, "loss": 0.7681204080581665, "step": 948 }, { "epoch": 0.23067574137092853, "grad_norm": 1.23844385812877, "learning_rate": 1.9768820268297467e-05, "loss": 0.6285523772239685, "step": 949 }, { "epoch": 0.23091881380651433, "grad_norm": 1.2280338124408874, "learning_rate": 1.976796020736747e-05, "loss": 0.8319772481918335, "step": 950 }, { "epoch": 0.23116188624210016, "grad_norm": 1.5787706153097025, "learning_rate": 1.976709856833747e-05, "loss": 0.8864173293113708, "step": 951 }, { "epoch": 0.23140495867768596, "grad_norm": 1.249286983800931, "learning_rate": 1.9766235351346678e-05, "loss": 0.722494900226593, "step": 952 }, { "epoch": 0.23164803111327176, "grad_norm": 1.3479741087331638, "learning_rate": 1.9765370556534555e-05, "loss": 0.6917625665664673, "step": 953 }, { "epoch": 0.23189110354885756, "grad_norm": 1.5551389654389889, "learning_rate": 1.9764504184040806e-05, "loss": 0.6916577816009521, "step": 954 }, { "epoch": 0.23213417598444336, "grad_norm": 1.2190478152570325, "learning_rate": 1.976363623400541e-05, "loss": 0.7268685102462769, "step": 955 }, { "epoch": 0.23237724842002916, "grad_norm": 1.3836120466185111, "learning_rate": 1.976276670656859e-05, "loss": 0.727493166923523, "step": 956 }, { "epoch": 0.232620320855615, "grad_norm": 1.3395668537996372, "learning_rate": 1.976189560187083e-05, "loss": 0.7122955322265625, "step": 957 }, { "epoch": 0.2328633932912008, "grad_norm": 1.3080160471334084, "learning_rate": 1.9761022920052855e-05, "loss": 0.6532683968544006, "step": 958 }, { "epoch": 0.2331064657267866, "grad_norm": 1.3852776246106013, "learning_rate": 1.976014866125566e-05, "loss": 0.7678740620613098, "step": 959 }, { "epoch": 0.2333495381623724, "grad_norm": 1.2031496399426718, "learning_rate": 1.975927282562049e-05, "loss": 0.6631525754928589, "step": 960 }, { "epoch": 0.2335926105979582, "grad_norm": 1.0389471165976947, "learning_rate": 1.9758395413288846e-05, "loss": 0.514837920665741, "step": 961 }, { "epoch": 0.233835683033544, "grad_norm": 1.2832621288038728, "learning_rate": 1.9757516424402476e-05, "loss": 0.6304782629013062, "step": 962 }, { "epoch": 0.2340787554691298, "grad_norm": 1.0940269181297213, "learning_rate": 1.975663585910339e-05, "loss": 0.5953484773635864, "step": 963 }, { "epoch": 0.23432182790471562, "grad_norm": 1.329943662623608, "learning_rate": 1.9755753717533854e-05, "loss": 0.6835619211196899, "step": 964 }, { "epoch": 0.23456490034030142, "grad_norm": 1.7166591020045767, "learning_rate": 1.9754869999836385e-05, "loss": 0.7361167669296265, "step": 965 }, { "epoch": 0.23480797277588722, "grad_norm": 1.0951234901067108, "learning_rate": 1.975398470615375e-05, "loss": 0.7202695608139038, "step": 966 }, { "epoch": 0.23505104521147302, "grad_norm": 1.1988099440181441, "learning_rate": 1.9753097836628985e-05, "loss": 0.735649824142456, "step": 967 }, { "epoch": 0.23529411764705882, "grad_norm": 1.5660571538209629, "learning_rate": 1.9752209391405365e-05, "loss": 0.812909722328186, "step": 968 }, { "epoch": 0.23553719008264462, "grad_norm": 1.4638111192409868, "learning_rate": 1.975131937062643e-05, "loss": 0.8296381831169128, "step": 969 }, { "epoch": 0.23578026251823042, "grad_norm": 1.6289800836692387, "learning_rate": 1.9750427774435966e-05, "loss": 0.7894198894500732, "step": 970 }, { "epoch": 0.23602333495381625, "grad_norm": 0.9708302954385315, "learning_rate": 1.974953460297802e-05, "loss": 0.49267131090164185, "step": 971 }, { "epoch": 0.23626640738940205, "grad_norm": 1.2136469783944932, "learning_rate": 1.9748639856396896e-05, "loss": 0.7378112077713013, "step": 972 }, { "epoch": 0.23650947982498785, "grad_norm": 1.2921528925590682, "learning_rate": 1.9747743534837143e-05, "loss": 0.704924464225769, "step": 973 }, { "epoch": 0.23675255226057365, "grad_norm": 1.2872534481999052, "learning_rate": 1.974684563844357e-05, "loss": 0.6917473077774048, "step": 974 }, { "epoch": 0.23699562469615945, "grad_norm": 1.3529302127101284, "learning_rate": 1.9745946167361245e-05, "loss": 0.6651617288589478, "step": 975 }, { "epoch": 0.23723869713174525, "grad_norm": 1.3581287344845963, "learning_rate": 1.974504512173548e-05, "loss": 0.6544866561889648, "step": 976 }, { "epoch": 0.23748176956733105, "grad_norm": 1.323506688296927, "learning_rate": 1.974414250171185e-05, "loss": 0.6719216108322144, "step": 977 }, { "epoch": 0.23772484200291688, "grad_norm": 1.2682474414772944, "learning_rate": 1.974323830743618e-05, "loss": 0.6026521921157837, "step": 978 }, { "epoch": 0.23796791443850268, "grad_norm": 1.2049065357714526, "learning_rate": 1.974233253905455e-05, "loss": 0.6734176874160767, "step": 979 }, { "epoch": 0.23821098687408848, "grad_norm": 1.4131644488597574, "learning_rate": 1.9741425196713294e-05, "loss": 0.6382790207862854, "step": 980 }, { "epoch": 0.23845405930967428, "grad_norm": 1.305673091702104, "learning_rate": 1.9740516280559005e-05, "loss": 0.6550023555755615, "step": 981 }, { "epoch": 0.23869713174526008, "grad_norm": 1.3330655223480996, "learning_rate": 1.9739605790738525e-05, "loss": 0.767907977104187, "step": 982 }, { "epoch": 0.23894020418084588, "grad_norm": 1.2775355473184342, "learning_rate": 1.9738693727398948e-05, "loss": 0.6630259156227112, "step": 983 }, { "epoch": 0.23918327661643168, "grad_norm": 1.3321894640192193, "learning_rate": 1.9737780090687628e-05, "loss": 0.673583447933197, "step": 984 }, { "epoch": 0.2394263490520175, "grad_norm": 1.3195405995515797, "learning_rate": 1.9736864880752172e-05, "loss": 0.7884759306907654, "step": 985 }, { "epoch": 0.2396694214876033, "grad_norm": 1.1336930815392217, "learning_rate": 1.9735948097740443e-05, "loss": 0.6026610136032104, "step": 986 }, { "epoch": 0.2399124939231891, "grad_norm": 1.230924987352827, "learning_rate": 1.9735029741800548e-05, "loss": 0.6142780780792236, "step": 987 }, { "epoch": 0.2401555663587749, "grad_norm": 1.2694501504529914, "learning_rate": 1.9734109813080864e-05, "loss": 0.5979452133178711, "step": 988 }, { "epoch": 0.24039863879436071, "grad_norm": 1.4433835775349164, "learning_rate": 1.973318831173001e-05, "loss": 0.7448813915252686, "step": 989 }, { "epoch": 0.24064171122994651, "grad_norm": 1.4726108027696008, "learning_rate": 1.9732265237896855e-05, "loss": 0.7537441253662109, "step": 990 }, { "epoch": 0.24088478366553231, "grad_norm": 1.1920364771474063, "learning_rate": 1.973134059173054e-05, "loss": 0.6468384861946106, "step": 991 }, { "epoch": 0.24112785610111814, "grad_norm": 1.1543756550947528, "learning_rate": 1.973041437338045e-05, "loss": 0.683343768119812, "step": 992 }, { "epoch": 0.24137092853670394, "grad_norm": 1.4082346589987882, "learning_rate": 1.972948658299622e-05, "loss": 0.6329813599586487, "step": 993 }, { "epoch": 0.24161400097228974, "grad_norm": 1.3570360106667267, "learning_rate": 1.9728557220727738e-05, "loss": 0.8212186694145203, "step": 994 }, { "epoch": 0.24185707340787554, "grad_norm": 1.5510349678580315, "learning_rate": 1.9727626286725157e-05, "loss": 0.7185649871826172, "step": 995 }, { "epoch": 0.24210014584346135, "grad_norm": 1.162639526889306, "learning_rate": 1.9726693781138876e-05, "loss": 0.7890462875366211, "step": 996 }, { "epoch": 0.24234321827904715, "grad_norm": 1.3423348896568914, "learning_rate": 1.972575970411955e-05, "loss": 0.7040582299232483, "step": 997 }, { "epoch": 0.24258629071463297, "grad_norm": 1.3408416549777893, "learning_rate": 1.972482405581809e-05, "loss": 0.705714762210846, "step": 998 }, { "epoch": 0.24282936315021877, "grad_norm": 1.2506089329689696, "learning_rate": 1.972388683638565e-05, "loss": 0.7233104705810547, "step": 999 }, { "epoch": 0.24307243558580457, "grad_norm": 1.357609444081543, "learning_rate": 1.9722948045973657e-05, "loss": 0.6491557955741882, "step": 1000 }, { "epoch": 0.24331550802139038, "grad_norm": 1.2945545242594487, "learning_rate": 1.972200768473377e-05, "loss": 0.584478497505188, "step": 1001 }, { "epoch": 0.24355858045697618, "grad_norm": 1.4838159126360146, "learning_rate": 1.972106575281792e-05, "loss": 0.7545945644378662, "step": 1002 }, { "epoch": 0.24380165289256198, "grad_norm": 1.2393312669173298, "learning_rate": 1.9720122250378285e-05, "loss": 0.6833957433700562, "step": 1003 }, { "epoch": 0.24404472532814778, "grad_norm": 1.1918818274663296, "learning_rate": 1.9719177177567287e-05, "loss": 0.7089433073997498, "step": 1004 }, { "epoch": 0.2442877977637336, "grad_norm": 1.5035214305497309, "learning_rate": 1.971823053453762e-05, "loss": 0.7561455368995667, "step": 1005 }, { "epoch": 0.2445308701993194, "grad_norm": 1.3440380393720912, "learning_rate": 1.971728232144222e-05, "loss": 0.7483510971069336, "step": 1006 }, { "epoch": 0.2447739426349052, "grad_norm": 1.4091576103056462, "learning_rate": 1.971633253843428e-05, "loss": 0.73689866065979, "step": 1007 }, { "epoch": 0.245017015070491, "grad_norm": 1.2666578745304686, "learning_rate": 1.9715381185667243e-05, "loss": 0.5807225108146667, "step": 1008 }, { "epoch": 0.2452600875060768, "grad_norm": 1.7004864724756676, "learning_rate": 1.971442826329481e-05, "loss": 0.6910436153411865, "step": 1009 }, { "epoch": 0.2455031599416626, "grad_norm": 1.4477383714193366, "learning_rate": 1.9713473771470933e-05, "loss": 0.7238394021987915, "step": 1010 }, { "epoch": 0.2457462323772484, "grad_norm": 1.3453249008147872, "learning_rate": 1.9712517710349824e-05, "loss": 0.7372255921363831, "step": 1011 }, { "epoch": 0.24598930481283424, "grad_norm": 1.1233173808293913, "learning_rate": 1.9711560080085932e-05, "loss": 0.6579360961914062, "step": 1012 }, { "epoch": 0.24623237724842004, "grad_norm": 1.4228811213857777, "learning_rate": 1.971060088083398e-05, "loss": 0.9169907569885254, "step": 1013 }, { "epoch": 0.24647544968400584, "grad_norm": 1.3292112496629338, "learning_rate": 1.9709640112748936e-05, "loss": 0.6709243059158325, "step": 1014 }, { "epoch": 0.24671852211959164, "grad_norm": 1.2462719743340254, "learning_rate": 1.9708677775986013e-05, "loss": 0.640405535697937, "step": 1015 }, { "epoch": 0.24696159455517744, "grad_norm": 1.6897308724997475, "learning_rate": 1.970771387070069e-05, "loss": 0.995284914970398, "step": 1016 }, { "epoch": 0.24720466699076324, "grad_norm": 1.2657159428765328, "learning_rate": 1.9706748397048693e-05, "loss": 0.72222900390625, "step": 1017 }, { "epoch": 0.24744773942634904, "grad_norm": 1.230512643586113, "learning_rate": 1.9705781355186006e-05, "loss": 0.6147174835205078, "step": 1018 }, { "epoch": 0.24769081186193487, "grad_norm": 1.3682987162644484, "learning_rate": 1.9704812745268855e-05, "loss": 0.582120954990387, "step": 1019 }, { "epoch": 0.24793388429752067, "grad_norm": 1.2819908850471806, "learning_rate": 1.970384256745374e-05, "loss": 0.6876624822616577, "step": 1020 }, { "epoch": 0.24817695673310647, "grad_norm": 1.3244392335122213, "learning_rate": 1.9702870821897385e-05, "loss": 0.6178814768791199, "step": 1021 }, { "epoch": 0.24842002916869227, "grad_norm": 1.444122015926929, "learning_rate": 1.97018975087568e-05, "loss": 0.8352615833282471, "step": 1022 }, { "epoch": 0.24866310160427807, "grad_norm": 1.2073478074196113, "learning_rate": 1.9700922628189222e-05, "loss": 0.6450971364974976, "step": 1023 }, { "epoch": 0.24890617403986387, "grad_norm": 1.2714006305875643, "learning_rate": 1.969994618035216e-05, "loss": 0.7238398790359497, "step": 1024 }, { "epoch": 0.24914924647544967, "grad_norm": 1.362842628398054, "learning_rate": 1.9698968165403357e-05, "loss": 0.7131849527359009, "step": 1025 }, { "epoch": 0.2493923189110355, "grad_norm": 1.293079628261536, "learning_rate": 1.9697988583500832e-05, "loss": 0.943638801574707, "step": 1026 }, { "epoch": 0.2496353913466213, "grad_norm": 1.494097867464259, "learning_rate": 1.9697007434802832e-05, "loss": 0.8177782297134399, "step": 1027 }, { "epoch": 0.2498784637822071, "grad_norm": 1.3625603872555503, "learning_rate": 1.9696024719467883e-05, "loss": 0.7169448137283325, "step": 1028 }, { "epoch": 0.25012153621779293, "grad_norm": 1.330137692785399, "learning_rate": 1.9695040437654744e-05, "loss": 0.8015018701553345, "step": 1029 }, { "epoch": 0.25036460865337873, "grad_norm": 1.4474739538783876, "learning_rate": 1.9694054589522435e-05, "loss": 0.7370535135269165, "step": 1030 }, { "epoch": 0.25060768108896453, "grad_norm": 1.1028926065335853, "learning_rate": 1.9693067175230235e-05, "loss": 0.6586948037147522, "step": 1031 }, { "epoch": 0.25085075352455033, "grad_norm": 1.3166241453558867, "learning_rate": 1.9692078194937657e-05, "loss": 0.828449010848999, "step": 1032 }, { "epoch": 0.25109382596013613, "grad_norm": 1.1501815688287609, "learning_rate": 1.969108764880449e-05, "loss": 0.7367572784423828, "step": 1033 }, { "epoch": 0.25133689839572193, "grad_norm": 1.6009864453414075, "learning_rate": 1.969009553699076e-05, "loss": 0.7721320390701294, "step": 1034 }, { "epoch": 0.25157997083130773, "grad_norm": 1.284861603228641, "learning_rate": 1.968910185965675e-05, "loss": 0.7400681376457214, "step": 1035 }, { "epoch": 0.25182304326689353, "grad_norm": 1.2607280564998316, "learning_rate": 1.968810661696301e-05, "loss": 0.7847380042076111, "step": 1036 }, { "epoch": 0.25206611570247933, "grad_norm": 1.2654664703630485, "learning_rate": 1.9687109809070314e-05, "loss": 0.5713176727294922, "step": 1037 }, { "epoch": 0.25230918813806513, "grad_norm": 1.5262508308078837, "learning_rate": 1.9686111436139712e-05, "loss": 0.6625922918319702, "step": 1038 }, { "epoch": 0.25255226057365093, "grad_norm": 1.2722414874320502, "learning_rate": 1.9685111498332504e-05, "loss": 0.7129775285720825, "step": 1039 }, { "epoch": 0.25279533300923673, "grad_norm": 1.5843243742364086, "learning_rate": 1.968410999581023e-05, "loss": 0.8006030917167664, "step": 1040 }, { "epoch": 0.25303840544482253, "grad_norm": 1.3108714655688232, "learning_rate": 1.9683106928734697e-05, "loss": 0.6976684331893921, "step": 1041 }, { "epoch": 0.25328147788040833, "grad_norm": 1.3461984913620317, "learning_rate": 1.968210229726796e-05, "loss": 0.7882629632949829, "step": 1042 }, { "epoch": 0.2535245503159942, "grad_norm": 1.2825128954385365, "learning_rate": 1.9681096101572325e-05, "loss": 0.6387531757354736, "step": 1043 }, { "epoch": 0.25376762275158, "grad_norm": 1.3237173690382993, "learning_rate": 1.968008834181035e-05, "loss": 0.6802419424057007, "step": 1044 }, { "epoch": 0.2540106951871658, "grad_norm": 1.5317100696788744, "learning_rate": 1.967907901814485e-05, "loss": 0.6645092964172363, "step": 1045 }, { "epoch": 0.2542537676227516, "grad_norm": 1.3384484032723352, "learning_rate": 1.9678068130738888e-05, "loss": 0.7638102769851685, "step": 1046 }, { "epoch": 0.2544968400583374, "grad_norm": 1.2655967730891358, "learning_rate": 1.967705567975578e-05, "loss": 0.7066786289215088, "step": 1047 }, { "epoch": 0.2547399124939232, "grad_norm": 1.2162734462511613, "learning_rate": 1.9676041665359103e-05, "loss": 0.713808536529541, "step": 1048 }, { "epoch": 0.254982984929509, "grad_norm": 1.1568887336227938, "learning_rate": 1.9675026087712676e-05, "loss": 0.6210687160491943, "step": 1049 }, { "epoch": 0.2552260573650948, "grad_norm": 1.4257030035635092, "learning_rate": 1.9674008946980572e-05, "loss": 0.6718668937683105, "step": 1050 }, { "epoch": 0.2554691298006806, "grad_norm": 1.1822988571328479, "learning_rate": 1.967299024332712e-05, "loss": 0.6374367475509644, "step": 1051 }, { "epoch": 0.2557122022362664, "grad_norm": 1.2689101770921016, "learning_rate": 1.9671969976916907e-05, "loss": 0.671148419380188, "step": 1052 }, { "epoch": 0.2559552746718522, "grad_norm": 1.23771902939707, "learning_rate": 1.9670948147914758e-05, "loss": 0.6492453813552856, "step": 1053 }, { "epoch": 0.256198347107438, "grad_norm": 1.263085023150389, "learning_rate": 1.9669924756485764e-05, "loss": 0.714105486869812, "step": 1054 }, { "epoch": 0.2564414195430238, "grad_norm": 1.3328010618276451, "learning_rate": 1.9668899802795257e-05, "loss": 0.8157996535301208, "step": 1055 }, { "epoch": 0.25668449197860965, "grad_norm": 1.3358943905203675, "learning_rate": 1.9667873287008835e-05, "loss": 0.7408866882324219, "step": 1056 }, { "epoch": 0.25692756441419545, "grad_norm": 1.1100666351339088, "learning_rate": 1.966684520929233e-05, "loss": 0.712753176689148, "step": 1057 }, { "epoch": 0.25717063684978125, "grad_norm": 1.1975023450613216, "learning_rate": 1.966581556981185e-05, "loss": 0.6031157374382019, "step": 1058 }, { "epoch": 0.25741370928536705, "grad_norm": 1.1696140534751676, "learning_rate": 1.9664784368733735e-05, "loss": 0.6785479784011841, "step": 1059 }, { "epoch": 0.25765678172095285, "grad_norm": 1.3872851942600046, "learning_rate": 1.9663751606224583e-05, "loss": 0.7467803955078125, "step": 1060 }, { "epoch": 0.25789985415653865, "grad_norm": 1.3246319111216065, "learning_rate": 1.9662717282451248e-05, "loss": 0.7580522298812866, "step": 1061 }, { "epoch": 0.25814292659212446, "grad_norm": 1.2360096440065076, "learning_rate": 1.9661681397580836e-05, "loss": 0.670934796333313, "step": 1062 }, { "epoch": 0.25838599902771026, "grad_norm": 1.3559046269077286, "learning_rate": 1.96606439517807e-05, "loss": 0.705123782157898, "step": 1063 }, { "epoch": 0.25862907146329606, "grad_norm": 1.350496843149349, "learning_rate": 1.9659604945218453e-05, "loss": 0.6685246229171753, "step": 1064 }, { "epoch": 0.25887214389888186, "grad_norm": 1.199920702782533, "learning_rate": 1.965856437806195e-05, "loss": 0.7157776355743408, "step": 1065 }, { "epoch": 0.25911521633446766, "grad_norm": 1.3088907147959554, "learning_rate": 1.965752225047931e-05, "loss": 0.8183010816574097, "step": 1066 }, { "epoch": 0.25935828877005346, "grad_norm": 1.3183326466497634, "learning_rate": 1.9656478562638895e-05, "loss": 0.7703589797019958, "step": 1067 }, { "epoch": 0.25960136120563926, "grad_norm": 1.6032201039661915, "learning_rate": 1.9655433314709317e-05, "loss": 0.8071427345275879, "step": 1068 }, { "epoch": 0.25984443364122506, "grad_norm": 1.4245513805734236, "learning_rate": 1.965438650685945e-05, "loss": 0.8223307132720947, "step": 1069 }, { "epoch": 0.2600875060768109, "grad_norm": 1.1996048137784394, "learning_rate": 1.9653338139258418e-05, "loss": 0.7251730561256409, "step": 1070 }, { "epoch": 0.2603305785123967, "grad_norm": 1.2201621237792926, "learning_rate": 1.9652288212075588e-05, "loss": 0.7626001238822937, "step": 1071 }, { "epoch": 0.2605736509479825, "grad_norm": 1.3254451947559733, "learning_rate": 1.965123672548059e-05, "loss": 0.7008861303329468, "step": 1072 }, { "epoch": 0.2608167233835683, "grad_norm": 1.428529230870143, "learning_rate": 1.9650183679643296e-05, "loss": 0.7036386728286743, "step": 1073 }, { "epoch": 0.2610597958191541, "grad_norm": 1.3261929768266185, "learning_rate": 1.964912907473384e-05, "loss": 0.7388180494308472, "step": 1074 }, { "epoch": 0.2613028682547399, "grad_norm": 1.3917642146567248, "learning_rate": 1.96480729109226e-05, "loss": 0.7426337599754333, "step": 1075 }, { "epoch": 0.2615459406903257, "grad_norm": 1.4716189661308399, "learning_rate": 1.9647015188380207e-05, "loss": 0.783772349357605, "step": 1076 }, { "epoch": 0.2617890131259115, "grad_norm": 1.2650099800438317, "learning_rate": 1.9645955907277544e-05, "loss": 0.6828936338424683, "step": 1077 }, { "epoch": 0.2620320855614973, "grad_norm": 1.1991781353391062, "learning_rate": 1.9644895067785755e-05, "loss": 0.5260371565818787, "step": 1078 }, { "epoch": 0.2622751579970831, "grad_norm": 1.2813793108555582, "learning_rate": 1.9643832670076222e-05, "loss": 0.7296266555786133, "step": 1079 }, { "epoch": 0.2625182304326689, "grad_norm": 1.2541617156164142, "learning_rate": 1.9642768714320584e-05, "loss": 0.6983985304832458, "step": 1080 }, { "epoch": 0.2627613028682547, "grad_norm": 1.1659379212837537, "learning_rate": 1.9641703200690735e-05, "loss": 0.6386688947677612, "step": 1081 }, { "epoch": 0.2630043753038405, "grad_norm": 1.3173920849964387, "learning_rate": 1.9640636129358816e-05, "loss": 0.7484480142593384, "step": 1082 }, { "epoch": 0.2632474477394264, "grad_norm": 1.509130655638839, "learning_rate": 1.9639567500497224e-05, "loss": 0.9588847160339355, "step": 1083 }, { "epoch": 0.2634905201750122, "grad_norm": 1.5065991964694885, "learning_rate": 1.963849731427861e-05, "loss": 0.7156849503517151, "step": 1084 }, { "epoch": 0.263733592610598, "grad_norm": 1.468075729702674, "learning_rate": 1.9637425570875857e-05, "loss": 0.8721189498901367, "step": 1085 }, { "epoch": 0.2639766650461838, "grad_norm": 1.5442174591419588, "learning_rate": 1.9636352270462132e-05, "loss": 0.7879074811935425, "step": 1086 }, { "epoch": 0.2642197374817696, "grad_norm": 1.2970738742994745, "learning_rate": 1.9635277413210826e-05, "loss": 0.6493673324584961, "step": 1087 }, { "epoch": 0.2644628099173554, "grad_norm": 1.5006325470973891, "learning_rate": 1.9634200999295592e-05, "loss": 0.7537651062011719, "step": 1088 }, { "epoch": 0.2647058823529412, "grad_norm": 1.3471468578583305, "learning_rate": 1.963312302889034e-05, "loss": 0.7515881061553955, "step": 1089 }, { "epoch": 0.264948954788527, "grad_norm": 1.4670845639111878, "learning_rate": 1.963204350216922e-05, "loss": 0.6617887020111084, "step": 1090 }, { "epoch": 0.2651920272241128, "grad_norm": 1.4008528157999196, "learning_rate": 1.9630962419306637e-05, "loss": 0.7486531138420105, "step": 1091 }, { "epoch": 0.2654350996596986, "grad_norm": 1.3989764645953002, "learning_rate": 1.9629879780477258e-05, "loss": 0.7168192863464355, "step": 1092 }, { "epoch": 0.2656781720952844, "grad_norm": 1.4198186074465513, "learning_rate": 1.9628795585855986e-05, "loss": 0.715804934501648, "step": 1093 }, { "epoch": 0.2659212445308702, "grad_norm": 1.1206641167655287, "learning_rate": 1.9627709835617985e-05, "loss": 0.7152094841003418, "step": 1094 }, { "epoch": 0.266164316966456, "grad_norm": 1.3881252597890306, "learning_rate": 1.9626622529938665e-05, "loss": 0.8965678215026855, "step": 1095 }, { "epoch": 0.2664073894020418, "grad_norm": 1.3300951526243208, "learning_rate": 1.9625533668993693e-05, "loss": 0.730974555015564, "step": 1096 }, { "epoch": 0.26665046183762764, "grad_norm": 1.3970341159512851, "learning_rate": 1.9624443252958986e-05, "loss": 0.7652744054794312, "step": 1097 }, { "epoch": 0.26689353427321344, "grad_norm": 1.1241594206669436, "learning_rate": 1.9623351282010704e-05, "loss": 0.7597827315330505, "step": 1098 }, { "epoch": 0.26713660670879924, "grad_norm": 1.1941940613768856, "learning_rate": 1.9622257756325266e-05, "loss": 0.7298950552940369, "step": 1099 }, { "epoch": 0.26737967914438504, "grad_norm": 1.3266478263037162, "learning_rate": 1.962116267607934e-05, "loss": 0.6328941583633423, "step": 1100 }, { "epoch": 0.26762275157997084, "grad_norm": 1.320279705665358, "learning_rate": 1.9620066041449854e-05, "loss": 0.7897543907165527, "step": 1101 }, { "epoch": 0.26786582401555664, "grad_norm": 1.1445792951308922, "learning_rate": 1.961896785261397e-05, "loss": 0.660751223564148, "step": 1102 }, { "epoch": 0.26810889645114244, "grad_norm": 1.1761749829271215, "learning_rate": 1.9617868109749113e-05, "loss": 0.7808287143707275, "step": 1103 }, { "epoch": 0.26835196888672824, "grad_norm": 1.2458072900808435, "learning_rate": 1.9616766813032955e-05, "loss": 0.7456170320510864, "step": 1104 }, { "epoch": 0.26859504132231404, "grad_norm": 1.4948466147379889, "learning_rate": 1.9615663962643422e-05, "loss": 0.6729317903518677, "step": 1105 }, { "epoch": 0.26883811375789984, "grad_norm": 1.1053860051896929, "learning_rate": 1.9614559558758692e-05, "loss": 0.7392110824584961, "step": 1106 }, { "epoch": 0.26908118619348564, "grad_norm": 1.0583695256540742, "learning_rate": 1.9613453601557184e-05, "loss": 0.5770108103752136, "step": 1107 }, { "epoch": 0.26932425862907144, "grad_norm": 1.1226783282372137, "learning_rate": 1.9612346091217582e-05, "loss": 0.6103205680847168, "step": 1108 }, { "epoch": 0.26956733106465725, "grad_norm": 1.430841897838905, "learning_rate": 1.961123702791881e-05, "loss": 0.7871972322463989, "step": 1109 }, { "epoch": 0.26981040350024305, "grad_norm": 1.2952485822623672, "learning_rate": 1.9610126411840048e-05, "loss": 0.7501130700111389, "step": 1110 }, { "epoch": 0.2700534759358289, "grad_norm": 1.6164162235992114, "learning_rate": 1.9609014243160726e-05, "loss": 0.7941956520080566, "step": 1111 }, { "epoch": 0.2702965483714147, "grad_norm": 1.4475009630751867, "learning_rate": 1.960790052206052e-05, "loss": 0.7974392771720886, "step": 1112 }, { "epoch": 0.2705396208070005, "grad_norm": 1.1508945452800277, "learning_rate": 1.9606785248719372e-05, "loss": 0.7107416987419128, "step": 1113 }, { "epoch": 0.2707826932425863, "grad_norm": 1.4201190289077672, "learning_rate": 1.9605668423317455e-05, "loss": 0.7939809560775757, "step": 1114 }, { "epoch": 0.2710257656781721, "grad_norm": 1.5943387473826909, "learning_rate": 1.9604550046035206e-05, "loss": 0.7721909284591675, "step": 1115 }, { "epoch": 0.2712688381137579, "grad_norm": 1.2498772363153476, "learning_rate": 1.9603430117053307e-05, "loss": 0.7269030809402466, "step": 1116 }, { "epoch": 0.2715119105493437, "grad_norm": 1.2828162865424018, "learning_rate": 1.9602308636552692e-05, "loss": 0.59881192445755, "step": 1117 }, { "epoch": 0.2717549829849295, "grad_norm": 1.340956138900654, "learning_rate": 1.9601185604714548e-05, "loss": 0.8054957389831543, "step": 1118 }, { "epoch": 0.2719980554205153, "grad_norm": 1.3375397394421946, "learning_rate": 1.960006102172031e-05, "loss": 0.8528215289115906, "step": 1119 }, { "epoch": 0.2722411278561011, "grad_norm": 1.7147480185134099, "learning_rate": 1.9598934887751664e-05, "loss": 0.7474032640457153, "step": 1120 }, { "epoch": 0.2724842002916869, "grad_norm": 1.1850201604951085, "learning_rate": 1.9597807202990548e-05, "loss": 0.8332722783088684, "step": 1121 }, { "epoch": 0.2727272727272727, "grad_norm": 1.3914522146341914, "learning_rate": 1.9596677967619148e-05, "loss": 0.7792162895202637, "step": 1122 }, { "epoch": 0.2729703451628585, "grad_norm": 1.2117990151913325, "learning_rate": 1.9595547181819902e-05, "loss": 0.7035485506057739, "step": 1123 }, { "epoch": 0.27321341759844436, "grad_norm": 1.1699483991453399, "learning_rate": 1.9594414845775498e-05, "loss": 0.6795021295547485, "step": 1124 }, { "epoch": 0.27345649003403016, "grad_norm": 1.354740874158286, "learning_rate": 1.9593280959668878e-05, "loss": 0.7092708349227905, "step": 1125 }, { "epoch": 0.27369956246961596, "grad_norm": 1.3057621965335409, "learning_rate": 1.9592145523683227e-05, "loss": 0.9082560539245605, "step": 1126 }, { "epoch": 0.27394263490520177, "grad_norm": 1.210969798968147, "learning_rate": 1.9591008538001987e-05, "loss": 0.6613690853118896, "step": 1127 }, { "epoch": 0.27418570734078757, "grad_norm": 1.3196779452621639, "learning_rate": 1.9589870002808848e-05, "loss": 0.6975858211517334, "step": 1128 }, { "epoch": 0.27442877977637337, "grad_norm": 1.217326159121742, "learning_rate": 1.958872991828775e-05, "loss": 0.6510100364685059, "step": 1129 }, { "epoch": 0.27467185221195917, "grad_norm": 1.2883839634925873, "learning_rate": 1.9587588284622883e-05, "loss": 0.9072951078414917, "step": 1130 }, { "epoch": 0.27491492464754497, "grad_norm": 1.2067155110925554, "learning_rate": 1.9586445101998687e-05, "loss": 0.6497021913528442, "step": 1131 }, { "epoch": 0.27515799708313077, "grad_norm": 1.3710490596255656, "learning_rate": 1.958530037059986e-05, "loss": 0.6819390058517456, "step": 1132 }, { "epoch": 0.27540106951871657, "grad_norm": 1.422412644707136, "learning_rate": 1.9584154090611335e-05, "loss": 0.799309253692627, "step": 1133 }, { "epoch": 0.27564414195430237, "grad_norm": 1.4091034676567433, "learning_rate": 1.9583006262218305e-05, "loss": 0.7875638008117676, "step": 1134 }, { "epoch": 0.27588721438988817, "grad_norm": 1.2361486106491923, "learning_rate": 1.9581856885606217e-05, "loss": 0.6311304569244385, "step": 1135 }, { "epoch": 0.27613028682547397, "grad_norm": 1.3223968584543104, "learning_rate": 1.958070596096076e-05, "loss": 0.8424077033996582, "step": 1136 }, { "epoch": 0.27637335926105977, "grad_norm": 1.4254911678241995, "learning_rate": 1.9579553488467874e-05, "loss": 0.8399142026901245, "step": 1137 }, { "epoch": 0.2766164316966456, "grad_norm": 1.0782687735455767, "learning_rate": 1.9578399468313752e-05, "loss": 0.508934736251831, "step": 1138 }, { "epoch": 0.2768595041322314, "grad_norm": 1.186127429745489, "learning_rate": 1.9577243900684838e-05, "loss": 0.7481350898742676, "step": 1139 }, { "epoch": 0.2771025765678172, "grad_norm": 1.468016232757045, "learning_rate": 1.9576086785767826e-05, "loss": 0.6939587593078613, "step": 1140 }, { "epoch": 0.277345649003403, "grad_norm": 1.3180015086482033, "learning_rate": 1.957492812374965e-05, "loss": 0.6031389236450195, "step": 1141 }, { "epoch": 0.27758872143898883, "grad_norm": 1.319338136372124, "learning_rate": 1.957376791481751e-05, "loss": 0.7576879262924194, "step": 1142 }, { "epoch": 0.27783179387457463, "grad_norm": 1.4041543055548236, "learning_rate": 1.9572606159158845e-05, "loss": 0.8493562936782837, "step": 1143 }, { "epoch": 0.27807486631016043, "grad_norm": 1.4575623455355162, "learning_rate": 1.9571442856961344e-05, "loss": 0.7430686354637146, "step": 1144 }, { "epoch": 0.27831793874574623, "grad_norm": 1.2364184307274977, "learning_rate": 1.9570278008412956e-05, "loss": 0.8078780174255371, "step": 1145 }, { "epoch": 0.27856101118133203, "grad_norm": 1.1946513705101574, "learning_rate": 1.9569111613701864e-05, "loss": 0.668956995010376, "step": 1146 }, { "epoch": 0.27880408361691783, "grad_norm": 1.3011154826488804, "learning_rate": 1.9567943673016517e-05, "loss": 0.7335695624351501, "step": 1147 }, { "epoch": 0.27904715605250363, "grad_norm": 1.244032542048773, "learning_rate": 1.9566774186545602e-05, "loss": 0.7434527277946472, "step": 1148 }, { "epoch": 0.27929022848808943, "grad_norm": 1.405783515964972, "learning_rate": 1.956560315447806e-05, "loss": 0.7821336984634399, "step": 1149 }, { "epoch": 0.27953330092367523, "grad_norm": 1.5081127399451473, "learning_rate": 1.9564430577003083e-05, "loss": 0.802778959274292, "step": 1150 }, { "epoch": 0.27977637335926103, "grad_norm": 1.4513777093609426, "learning_rate": 1.956325645431011e-05, "loss": 0.9098430871963501, "step": 1151 }, { "epoch": 0.2800194457948469, "grad_norm": 1.2776798201509534, "learning_rate": 1.956208078658883e-05, "loss": 0.7781152725219727, "step": 1152 }, { "epoch": 0.2802625182304327, "grad_norm": 1.740898486338533, "learning_rate": 1.9560903574029186e-05, "loss": 0.7476781606674194, "step": 1153 }, { "epoch": 0.2805055906660185, "grad_norm": 1.2702234522306015, "learning_rate": 1.9559724816821363e-05, "loss": 0.6566892862319946, "step": 1154 }, { "epoch": 0.2807486631016043, "grad_norm": 1.5172573872461783, "learning_rate": 1.9558544515155802e-05, "loss": 0.7001281976699829, "step": 1155 }, { "epoch": 0.2809917355371901, "grad_norm": 1.2678993165580303, "learning_rate": 1.955736266922319e-05, "loss": 0.6522821187973022, "step": 1156 }, { "epoch": 0.2812348079727759, "grad_norm": 1.3602967951260048, "learning_rate": 1.9556179279214464e-05, "loss": 0.6727923154830933, "step": 1157 }, { "epoch": 0.2814778804083617, "grad_norm": 1.3420303958978224, "learning_rate": 1.9554994345320813e-05, "loss": 0.7206603288650513, "step": 1158 }, { "epoch": 0.2817209528439475, "grad_norm": 1.2830463812352977, "learning_rate": 1.9553807867733675e-05, "loss": 0.7029585242271423, "step": 1159 }, { "epoch": 0.2819640252795333, "grad_norm": 1.1973427029121804, "learning_rate": 1.955261984664473e-05, "loss": 0.6072044372558594, "step": 1160 }, { "epoch": 0.2822070977151191, "grad_norm": 1.2906574826974364, "learning_rate": 1.955143028224592e-05, "loss": 0.8070724010467529, "step": 1161 }, { "epoch": 0.2824501701507049, "grad_norm": 1.6068866081931652, "learning_rate": 1.955023917472942e-05, "loss": 0.7850169539451599, "step": 1162 }, { "epoch": 0.2826932425862907, "grad_norm": 1.3797239458219024, "learning_rate": 1.9549046524287676e-05, "loss": 0.6523846983909607, "step": 1163 }, { "epoch": 0.2829363150218765, "grad_norm": 1.4365625890937788, "learning_rate": 1.9547852331113363e-05, "loss": 0.8283592462539673, "step": 1164 }, { "epoch": 0.28317938745746235, "grad_norm": 1.5328917612469226, "learning_rate": 1.9546656595399418e-05, "loss": 0.785429835319519, "step": 1165 }, { "epoch": 0.28342245989304815, "grad_norm": 1.240616997925057, "learning_rate": 1.9545459317339022e-05, "loss": 0.6104738116264343, "step": 1166 }, { "epoch": 0.28366553232863395, "grad_norm": 1.3364707914337481, "learning_rate": 1.95442604971256e-05, "loss": 0.6841458082199097, "step": 1167 }, { "epoch": 0.28390860476421975, "grad_norm": 1.2273072748984704, "learning_rate": 1.954306013495284e-05, "loss": 0.77220618724823, "step": 1168 }, { "epoch": 0.28415167719980555, "grad_norm": 1.3012353112813504, "learning_rate": 1.9541858231014665e-05, "loss": 0.6611150503158569, "step": 1169 }, { "epoch": 0.28439474963539135, "grad_norm": 1.2302314795162512, "learning_rate": 1.9540654785505255e-05, "loss": 0.570956289768219, "step": 1170 }, { "epoch": 0.28463782207097715, "grad_norm": 1.589728545624296, "learning_rate": 1.953944979861904e-05, "loss": 0.708979606628418, "step": 1171 }, { "epoch": 0.28488089450656295, "grad_norm": 1.3471161126328988, "learning_rate": 1.9538243270550695e-05, "loss": 0.6480371356010437, "step": 1172 }, { "epoch": 0.28512396694214875, "grad_norm": 1.0557507005211941, "learning_rate": 1.9537035201495144e-05, "loss": 0.7040462493896484, "step": 1173 }, { "epoch": 0.28536703937773455, "grad_norm": 1.156256803185789, "learning_rate": 1.953582559164756e-05, "loss": 0.5758742094039917, "step": 1174 }, { "epoch": 0.28561011181332036, "grad_norm": 1.455155313738038, "learning_rate": 1.9534614441203364e-05, "loss": 0.7899571657180786, "step": 1175 }, { "epoch": 0.28585318424890616, "grad_norm": 1.1839056787509856, "learning_rate": 1.9533401750358235e-05, "loss": 0.6536349058151245, "step": 1176 }, { "epoch": 0.28609625668449196, "grad_norm": 1.2993059539032612, "learning_rate": 1.9532187519308094e-05, "loss": 0.6515258550643921, "step": 1177 }, { "epoch": 0.28633932912007776, "grad_norm": 1.3337020097275698, "learning_rate": 1.9530971748249103e-05, "loss": 0.7045862078666687, "step": 1178 }, { "epoch": 0.2865824015556636, "grad_norm": 1.1966312930808718, "learning_rate": 1.9529754437377688e-05, "loss": 0.6706819534301758, "step": 1179 }, { "epoch": 0.2868254739912494, "grad_norm": 1.4090185960043415, "learning_rate": 1.9528535586890508e-05, "loss": 0.736205518245697, "step": 1180 }, { "epoch": 0.2870685464268352, "grad_norm": 1.1456420016033013, "learning_rate": 1.9527315196984488e-05, "loss": 0.6373763680458069, "step": 1181 }, { "epoch": 0.287311618862421, "grad_norm": 1.2033787743296953, "learning_rate": 1.952609326785679e-05, "loss": 0.6699745655059814, "step": 1182 }, { "epoch": 0.2875546912980068, "grad_norm": 1.1594181886169033, "learning_rate": 1.9524869799704824e-05, "loss": 0.5992273688316345, "step": 1183 }, { "epoch": 0.2877977637335926, "grad_norm": 1.3242306443799832, "learning_rate": 1.9523644792726254e-05, "loss": 0.7440169453620911, "step": 1184 }, { "epoch": 0.2880408361691784, "grad_norm": 1.2003185636090237, "learning_rate": 1.9522418247118993e-05, "loss": 0.7086882591247559, "step": 1185 }, { "epoch": 0.2882839086047642, "grad_norm": 1.2684494653496492, "learning_rate": 1.9521190163081196e-05, "loss": 0.7578713297843933, "step": 1186 }, { "epoch": 0.28852698104035, "grad_norm": 1.2416209199317294, "learning_rate": 1.9519960540811272e-05, "loss": 0.6735696792602539, "step": 1187 }, { "epoch": 0.2887700534759358, "grad_norm": 1.4456765556753028, "learning_rate": 1.951872938050788e-05, "loss": 0.7794386744499207, "step": 1188 }, { "epoch": 0.2890131259115216, "grad_norm": 1.3480552523350582, "learning_rate": 1.9517496682369925e-05, "loss": 0.6647989749908447, "step": 1189 }, { "epoch": 0.2892561983471074, "grad_norm": 1.1599228982902632, "learning_rate": 1.9516262446596554e-05, "loss": 0.5742471814155579, "step": 1190 }, { "epoch": 0.2894992707826932, "grad_norm": 1.3571307148879561, "learning_rate": 1.9515026673387178e-05, "loss": 0.7976232171058655, "step": 1191 }, { "epoch": 0.2897423432182791, "grad_norm": 1.4045711504936724, "learning_rate": 1.9513789362941437e-05, "loss": 0.6518054008483887, "step": 1192 }, { "epoch": 0.2899854156538649, "grad_norm": 1.5602111243183092, "learning_rate": 1.9512550515459237e-05, "loss": 0.7340199947357178, "step": 1193 }, { "epoch": 0.2902284880894507, "grad_norm": 1.2331218594469648, "learning_rate": 1.9511310131140726e-05, "loss": 0.5796368718147278, "step": 1194 }, { "epoch": 0.2904715605250365, "grad_norm": 2.8624353501226647, "learning_rate": 1.9510068210186287e-05, "loss": 0.9224033355712891, "step": 1195 }, { "epoch": 0.2907146329606223, "grad_norm": 1.2335478390275219, "learning_rate": 1.9508824752796576e-05, "loss": 0.7081613540649414, "step": 1196 }, { "epoch": 0.2909577053962081, "grad_norm": 1.265954956714463, "learning_rate": 1.950757975917248e-05, "loss": 0.7187343835830688, "step": 1197 }, { "epoch": 0.2912007778317939, "grad_norm": 1.3811626709099731, "learning_rate": 1.950633322951514e-05, "loss": 0.613788366317749, "step": 1198 }, { "epoch": 0.2914438502673797, "grad_norm": 1.4305609459496749, "learning_rate": 1.9505085164025938e-05, "loss": 0.7394440770149231, "step": 1199 }, { "epoch": 0.2916869227029655, "grad_norm": 1.103075912706183, "learning_rate": 1.950383556290652e-05, "loss": 0.6484329700469971, "step": 1200 }, { "epoch": 0.2919299951385513, "grad_norm": 1.7770766558477917, "learning_rate": 1.950258442635876e-05, "loss": 0.6649644374847412, "step": 1201 }, { "epoch": 0.2921730675741371, "grad_norm": 1.450298260398163, "learning_rate": 1.9501331754584797e-05, "loss": 0.7266194820404053, "step": 1202 }, { "epoch": 0.2924161400097229, "grad_norm": 1.1290643382615597, "learning_rate": 1.950007754778701e-05, "loss": 0.6539995670318604, "step": 1203 }, { "epoch": 0.2926592124453087, "grad_norm": 1.3143766813729714, "learning_rate": 1.9498821806168023e-05, "loss": 0.6144999265670776, "step": 1204 }, { "epoch": 0.2929022848808945, "grad_norm": 1.5667051237130958, "learning_rate": 1.9497564529930716e-05, "loss": 0.7564272880554199, "step": 1205 }, { "epoch": 0.29314535731648034, "grad_norm": 1.3480159164728416, "learning_rate": 1.9496305719278212e-05, "loss": 0.5948255062103271, "step": 1206 }, { "epoch": 0.29338842975206614, "grad_norm": 1.3217293250989528, "learning_rate": 1.949504537441388e-05, "loss": 0.6790245771408081, "step": 1207 }, { "epoch": 0.29363150218765194, "grad_norm": 1.3481104454419848, "learning_rate": 1.949378349554135e-05, "loss": 0.8325299024581909, "step": 1208 }, { "epoch": 0.29387457462323774, "grad_norm": 1.375436149695437, "learning_rate": 1.9492520082864476e-05, "loss": 0.6724599599838257, "step": 1209 }, { "epoch": 0.29411764705882354, "grad_norm": 1.2958872295378796, "learning_rate": 1.9491255136587384e-05, "loss": 0.6997729539871216, "step": 1210 }, { "epoch": 0.29436071949440934, "grad_norm": 1.3548507731591608, "learning_rate": 1.9489988656914432e-05, "loss": 0.8140059113502502, "step": 1211 }, { "epoch": 0.29460379192999514, "grad_norm": 1.3589177428489765, "learning_rate": 1.9488720644050233e-05, "loss": 0.7845596075057983, "step": 1212 }, { "epoch": 0.29484686436558094, "grad_norm": 1.290099337947367, "learning_rate": 1.9487451098199642e-05, "loss": 0.6646411418914795, "step": 1213 }, { "epoch": 0.29508993680116674, "grad_norm": 1.0836381730507896, "learning_rate": 1.9486180019567767e-05, "loss": 0.5649732947349548, "step": 1214 }, { "epoch": 0.29533300923675254, "grad_norm": 1.3564564759283808, "learning_rate": 1.9484907408359967e-05, "loss": 0.6794418692588806, "step": 1215 }, { "epoch": 0.29557608167233834, "grad_norm": 1.2352138443093834, "learning_rate": 1.948363326478184e-05, "loss": 0.7333462834358215, "step": 1216 }, { "epoch": 0.29581915410792414, "grad_norm": 1.4913809063054217, "learning_rate": 1.9482357589039235e-05, "loss": 0.703997015953064, "step": 1217 }, { "epoch": 0.29606222654350994, "grad_norm": 1.364003707547261, "learning_rate": 1.9481080381338245e-05, "loss": 0.6898313760757446, "step": 1218 }, { "epoch": 0.29630529897909574, "grad_norm": 1.106186470383619, "learning_rate": 1.947980164188522e-05, "loss": 0.6431204676628113, "step": 1219 }, { "epoch": 0.2965483714146816, "grad_norm": 1.298031612293443, "learning_rate": 1.9478521370886746e-05, "loss": 0.6733763813972473, "step": 1220 }, { "epoch": 0.2967914438502674, "grad_norm": 1.3289134815105215, "learning_rate": 1.9477239568549667e-05, "loss": 0.5754275321960449, "step": 1221 }, { "epoch": 0.2970345162858532, "grad_norm": 1.2729984231614149, "learning_rate": 1.947595623508107e-05, "loss": 0.5302947163581848, "step": 1222 }, { "epoch": 0.297277588721439, "grad_norm": 1.6100487676587487, "learning_rate": 1.947467137068828e-05, "loss": 0.5752731561660767, "step": 1223 }, { "epoch": 0.2975206611570248, "grad_norm": 1.4092878520217123, "learning_rate": 1.947338497557889e-05, "loss": 0.6633050441741943, "step": 1224 }, { "epoch": 0.2977637335926106, "grad_norm": 1.4334763692737134, "learning_rate": 1.9472097049960722e-05, "loss": 0.7356998920440674, "step": 1225 }, { "epoch": 0.2980068060281964, "grad_norm": 1.5480275895068198, "learning_rate": 1.9470807594041854e-05, "loss": 0.9016563892364502, "step": 1226 }, { "epoch": 0.2982498784637822, "grad_norm": 1.4250785868900264, "learning_rate": 1.9469516608030608e-05, "loss": 0.7636754512786865, "step": 1227 }, { "epoch": 0.298492950899368, "grad_norm": 1.2804652392365679, "learning_rate": 1.946822409213555e-05, "loss": 0.668316662311554, "step": 1228 }, { "epoch": 0.2987360233349538, "grad_norm": 1.2685883369909976, "learning_rate": 1.9466930046565504e-05, "loss": 0.5871998071670532, "step": 1229 }, { "epoch": 0.2989790957705396, "grad_norm": 1.4816059830970292, "learning_rate": 1.9465634471529532e-05, "loss": 0.715939462184906, "step": 1230 }, { "epoch": 0.2992221682061254, "grad_norm": 1.2467610610105362, "learning_rate": 1.9464337367236945e-05, "loss": 0.6646352410316467, "step": 1231 }, { "epoch": 0.2994652406417112, "grad_norm": 1.397051353105154, "learning_rate": 1.9463038733897298e-05, "loss": 0.7817435264587402, "step": 1232 }, { "epoch": 0.29970831307729706, "grad_norm": 1.4969832795869005, "learning_rate": 1.9461738571720406e-05, "loss": 0.6031773090362549, "step": 1233 }, { "epoch": 0.29995138551288286, "grad_norm": 1.4467850047572268, "learning_rate": 1.9460436880916314e-05, "loss": 0.8099570870399475, "step": 1234 }, { "epoch": 0.30019445794846866, "grad_norm": 1.3018373302349882, "learning_rate": 1.9459133661695323e-05, "loss": 0.642399787902832, "step": 1235 }, { "epoch": 0.30043753038405446, "grad_norm": 1.267122335191845, "learning_rate": 1.945782891426798e-05, "loss": 0.5703033208847046, "step": 1236 }, { "epoch": 0.30068060281964026, "grad_norm": 1.6404424937630435, "learning_rate": 1.9456522638845082e-05, "loss": 0.6990916132926941, "step": 1237 }, { "epoch": 0.30092367525522606, "grad_norm": 1.3925241448212093, "learning_rate": 1.9455214835637662e-05, "loss": 0.7059776782989502, "step": 1238 }, { "epoch": 0.30116674769081186, "grad_norm": 1.3297085783159661, "learning_rate": 1.9453905504857018e-05, "loss": 0.6387366056442261, "step": 1239 }, { "epoch": 0.30140982012639767, "grad_norm": 1.640626025162627, "learning_rate": 1.945259464671467e-05, "loss": 0.9020857810974121, "step": 1240 }, { "epoch": 0.30165289256198347, "grad_norm": 1.1920691577124651, "learning_rate": 1.945128226142241e-05, "loss": 0.6274569034576416, "step": 1241 }, { "epoch": 0.30189596499756927, "grad_norm": 1.2286475327047766, "learning_rate": 1.9449968349192257e-05, "loss": 0.6162442564964294, "step": 1242 }, { "epoch": 0.30213903743315507, "grad_norm": 1.4702304911120958, "learning_rate": 1.9448652910236494e-05, "loss": 0.8009412884712219, "step": 1243 }, { "epoch": 0.30238210986874087, "grad_norm": 1.329576374289821, "learning_rate": 1.9447335944767632e-05, "loss": 0.6560027599334717, "step": 1244 }, { "epoch": 0.30262518230432667, "grad_norm": 1.2579414684372616, "learning_rate": 1.9446017452998448e-05, "loss": 0.6115477085113525, "step": 1245 }, { "epoch": 0.30286825473991247, "grad_norm": 1.5196793702506386, "learning_rate": 1.944469743514195e-05, "loss": 0.767625629901886, "step": 1246 }, { "epoch": 0.3031113271754983, "grad_norm": 1.3984296108630352, "learning_rate": 1.94433758914114e-05, "loss": 0.6015191078186035, "step": 1247 }, { "epoch": 0.3033543996110841, "grad_norm": 1.3724108496197611, "learning_rate": 1.9442052822020305e-05, "loss": 0.7660267353057861, "step": 1248 }, { "epoch": 0.3035974720466699, "grad_norm": 1.3016542829168238, "learning_rate": 1.9440728227182417e-05, "loss": 0.6839306354522705, "step": 1249 }, { "epoch": 0.3038405444822557, "grad_norm": 1.3887688463206416, "learning_rate": 1.943940210711174e-05, "loss": 0.6888152360916138, "step": 1250 }, { "epoch": 0.3040836169178415, "grad_norm": 1.4380389075537243, "learning_rate": 1.9438074462022515e-05, "loss": 0.7070307731628418, "step": 1251 }, { "epoch": 0.3043266893534273, "grad_norm": 1.3790531671586372, "learning_rate": 1.943674529212924e-05, "loss": 0.7052490711212158, "step": 1252 }, { "epoch": 0.3045697617890131, "grad_norm": 1.2480953012458331, "learning_rate": 1.943541459764665e-05, "loss": 0.7471447587013245, "step": 1253 }, { "epoch": 0.3048128342245989, "grad_norm": 1.401212274006004, "learning_rate": 1.943408237878973e-05, "loss": 0.6302101612091064, "step": 1254 }, { "epoch": 0.30505590666018473, "grad_norm": 1.2661065943079213, "learning_rate": 1.9432748635773717e-05, "loss": 0.7943645715713501, "step": 1255 }, { "epoch": 0.30529897909577053, "grad_norm": 1.3439400370972592, "learning_rate": 1.9431413368814084e-05, "loss": 0.7809312343597412, "step": 1256 }, { "epoch": 0.30554205153135633, "grad_norm": 1.1320202713694418, "learning_rate": 1.9430076578126556e-05, "loss": 0.5952998399734497, "step": 1257 }, { "epoch": 0.30578512396694213, "grad_norm": 1.1646351768713488, "learning_rate": 1.9428738263927108e-05, "loss": 0.6619799137115479, "step": 1258 }, { "epoch": 0.30602819640252793, "grad_norm": 1.0833151811868824, "learning_rate": 1.942739842643195e-05, "loss": 0.6692537069320679, "step": 1259 }, { "epoch": 0.30627126883811373, "grad_norm": 1.2717054056406447, "learning_rate": 1.9426057065857543e-05, "loss": 0.5916615724563599, "step": 1260 }, { "epoch": 0.3065143412736996, "grad_norm": 1.3548694807076103, "learning_rate": 1.9424714182420606e-05, "loss": 0.7478951215744019, "step": 1261 }, { "epoch": 0.3067574137092854, "grad_norm": 1.4486739221150806, "learning_rate": 1.9423369776338086e-05, "loss": 0.626482367515564, "step": 1262 }, { "epoch": 0.3070004861448712, "grad_norm": 1.2475796433350042, "learning_rate": 1.9422023847827184e-05, "loss": 0.720130443572998, "step": 1263 }, { "epoch": 0.307243558580457, "grad_norm": 1.3909497725220237, "learning_rate": 1.9420676397105346e-05, "loss": 0.7202904224395752, "step": 1264 }, { "epoch": 0.3074866310160428, "grad_norm": 1.4112950801599438, "learning_rate": 1.941932742439027e-05, "loss": 0.7217140793800354, "step": 1265 }, { "epoch": 0.3077297034516286, "grad_norm": 1.2618417317315012, "learning_rate": 1.941797692989989e-05, "loss": 0.7501485347747803, "step": 1266 }, { "epoch": 0.3079727758872144, "grad_norm": 1.3078353314239037, "learning_rate": 1.941662491385239e-05, "loss": 0.572106122970581, "step": 1267 }, { "epoch": 0.3082158483228002, "grad_norm": 1.1866332113689395, "learning_rate": 1.9415271376466197e-05, "loss": 0.58750319480896, "step": 1268 }, { "epoch": 0.308458920758386, "grad_norm": 1.2872960538937506, "learning_rate": 1.9413916317959994e-05, "loss": 0.7867124080657959, "step": 1269 }, { "epoch": 0.3087019931939718, "grad_norm": 1.36565102683756, "learning_rate": 1.94125597385527e-05, "loss": 0.6561180353164673, "step": 1270 }, { "epoch": 0.3089450656295576, "grad_norm": 1.5240953031738287, "learning_rate": 1.9411201638463485e-05, "loss": 0.6397478580474854, "step": 1271 }, { "epoch": 0.3091881380651434, "grad_norm": 1.377829129186704, "learning_rate": 1.940984201791175e-05, "loss": 0.7400578260421753, "step": 1272 }, { "epoch": 0.3094312105007292, "grad_norm": 1.216522428271809, "learning_rate": 1.940848087711717e-05, "loss": 0.7224097847938538, "step": 1273 }, { "epoch": 0.30967428293631505, "grad_norm": 1.1677072099976031, "learning_rate": 1.940711821629964e-05, "loss": 0.7002249956130981, "step": 1274 }, { "epoch": 0.30991735537190085, "grad_norm": 1.371335771707034, "learning_rate": 1.9405754035679312e-05, "loss": 0.6944217085838318, "step": 1275 }, { "epoch": 0.31016042780748665, "grad_norm": 1.2676029069945858, "learning_rate": 1.940438833547658e-05, "loss": 0.6502590775489807, "step": 1276 }, { "epoch": 0.31040350024307245, "grad_norm": 1.5484139078407848, "learning_rate": 1.9403021115912083e-05, "loss": 0.7530246376991272, "step": 1277 }, { "epoch": 0.31064657267865825, "grad_norm": 1.246124641586971, "learning_rate": 1.9401652377206715e-05, "loss": 0.6207919120788574, "step": 1278 }, { "epoch": 0.31088964511424405, "grad_norm": 1.3920074379741272, "learning_rate": 1.9400282119581603e-05, "loss": 0.6973080039024353, "step": 1279 }, { "epoch": 0.31113271754982985, "grad_norm": 1.1749397519010567, "learning_rate": 1.9398910343258125e-05, "loss": 0.7051734924316406, "step": 1280 }, { "epoch": 0.31137578998541565, "grad_norm": 1.4111085479093122, "learning_rate": 1.93975370484579e-05, "loss": 0.6847357749938965, "step": 1281 }, { "epoch": 0.31161886242100145, "grad_norm": 1.3573332409719696, "learning_rate": 1.9396162235402797e-05, "loss": 0.6004730463027954, "step": 1282 }, { "epoch": 0.31186193485658725, "grad_norm": 1.3285019672363318, "learning_rate": 1.9394785904314934e-05, "loss": 0.7142876386642456, "step": 1283 }, { "epoch": 0.31210500729217305, "grad_norm": 1.4632751421130064, "learning_rate": 1.9393408055416665e-05, "loss": 0.7943194508552551, "step": 1284 }, { "epoch": 0.31234807972775885, "grad_norm": 1.3224274381930496, "learning_rate": 1.93920286889306e-05, "loss": 0.6950099468231201, "step": 1285 }, { "epoch": 0.31259115216334465, "grad_norm": 1.2014636172355544, "learning_rate": 1.9390647805079574e-05, "loss": 0.67397141456604, "step": 1286 }, { "epoch": 0.31283422459893045, "grad_norm": 1.3251205126695318, "learning_rate": 1.9389265404086695e-05, "loss": 0.8399792909622192, "step": 1287 }, { "epoch": 0.3130772970345163, "grad_norm": 1.7633483680266544, "learning_rate": 1.9387881486175295e-05, "loss": 0.6936146020889282, "step": 1288 }, { "epoch": 0.3133203694701021, "grad_norm": 1.236682098793898, "learning_rate": 1.9386496051568962e-05, "loss": 0.8003270626068115, "step": 1289 }, { "epoch": 0.3135634419056879, "grad_norm": 2.271799694966686, "learning_rate": 1.9385109100491518e-05, "loss": 0.6005741953849792, "step": 1290 }, { "epoch": 0.3138065143412737, "grad_norm": 1.2333411449448486, "learning_rate": 1.9383720633167045e-05, "loss": 0.6577644348144531, "step": 1291 }, { "epoch": 0.3140495867768595, "grad_norm": 1.4582149040726862, "learning_rate": 1.938233064981986e-05, "loss": 0.7494965195655823, "step": 1292 }, { "epoch": 0.3142926592124453, "grad_norm": 1.4195932964813867, "learning_rate": 1.938093915067453e-05, "loss": 0.6307618618011475, "step": 1293 }, { "epoch": 0.3145357316480311, "grad_norm": 1.2701801860701092, "learning_rate": 1.9379546135955855e-05, "loss": 0.7340924739837646, "step": 1294 }, { "epoch": 0.3147788040836169, "grad_norm": 1.1649265782484408, "learning_rate": 1.9378151605888894e-05, "loss": 0.6581038236618042, "step": 1295 }, { "epoch": 0.3150218765192027, "grad_norm": 1.5161512076344243, "learning_rate": 1.937675556069895e-05, "loss": 0.8531101942062378, "step": 1296 }, { "epoch": 0.3152649489547885, "grad_norm": 1.3941782471587345, "learning_rate": 1.9375358000611557e-05, "loss": 0.8068580627441406, "step": 1297 }, { "epoch": 0.3155080213903743, "grad_norm": 1.2140805760908195, "learning_rate": 1.937395892585251e-05, "loss": 0.6679761409759521, "step": 1298 }, { "epoch": 0.3157510938259601, "grad_norm": 1.4156271568519183, "learning_rate": 1.937255833664784e-05, "loss": 0.7548414468765259, "step": 1299 }, { "epoch": 0.3159941662615459, "grad_norm": 1.3469572601973059, "learning_rate": 1.9371156233223825e-05, "loss": 0.6944047212600708, "step": 1300 }, { "epoch": 0.3162372386971318, "grad_norm": 1.4730128740371282, "learning_rate": 1.936975261580699e-05, "loss": 0.8044272661209106, "step": 1301 }, { "epoch": 0.3164803111327176, "grad_norm": 1.505502165276979, "learning_rate": 1.9368347484624094e-05, "loss": 0.7356602549552917, "step": 1302 }, { "epoch": 0.3167233835683034, "grad_norm": 1.237233844478416, "learning_rate": 1.936694083990215e-05, "loss": 0.7879598140716553, "step": 1303 }, { "epoch": 0.3169664560038892, "grad_norm": 1.1361999042978415, "learning_rate": 1.9365532681868425e-05, "loss": 0.7993665933609009, "step": 1304 }, { "epoch": 0.317209528439475, "grad_norm": 1.408862453038674, "learning_rate": 1.9364123010750406e-05, "loss": 0.7486152648925781, "step": 1305 }, { "epoch": 0.3174526008750608, "grad_norm": 1.3442910513006894, "learning_rate": 1.9362711826775843e-05, "loss": 0.7278156280517578, "step": 1306 }, { "epoch": 0.3176956733106466, "grad_norm": 1.2660607476443662, "learning_rate": 1.9361299130172726e-05, "loss": 0.5805932283401489, "step": 1307 }, { "epoch": 0.3179387457462324, "grad_norm": 1.3797615262527405, "learning_rate": 1.9359884921169286e-05, "loss": 0.725109875202179, "step": 1308 }, { "epoch": 0.3181818181818182, "grad_norm": 1.3202418021685258, "learning_rate": 1.9358469199994006e-05, "loss": 0.7098106145858765, "step": 1309 }, { "epoch": 0.318424890617404, "grad_norm": 1.1104404110295003, "learning_rate": 1.93570519668756e-05, "loss": 0.5882269144058228, "step": 1310 }, { "epoch": 0.3186679630529898, "grad_norm": 1.264831872951071, "learning_rate": 1.9355633222043045e-05, "loss": 0.7007155418395996, "step": 1311 }, { "epoch": 0.3189110354885756, "grad_norm": 1.1047790941328155, "learning_rate": 1.935421296572554e-05, "loss": 0.6948978900909424, "step": 1312 }, { "epoch": 0.3191541079241614, "grad_norm": 1.2516144063698087, "learning_rate": 1.9352791198152547e-05, "loss": 0.7422938346862793, "step": 1313 }, { "epoch": 0.3193971803597472, "grad_norm": 1.285063888356021, "learning_rate": 1.9351367919553766e-05, "loss": 0.6477510929107666, "step": 1314 }, { "epoch": 0.31964025279533304, "grad_norm": 1.2977886162138326, "learning_rate": 1.934994313015914e-05, "loss": 0.7980877757072449, "step": 1315 }, { "epoch": 0.31988332523091884, "grad_norm": 1.2833717398830622, "learning_rate": 1.934851683019885e-05, "loss": 0.5815136432647705, "step": 1316 }, { "epoch": 0.32012639766650464, "grad_norm": 1.4353510535134058, "learning_rate": 1.9347089019903333e-05, "loss": 0.5848857164382935, "step": 1317 }, { "epoch": 0.32036947010209044, "grad_norm": 1.6871282422438998, "learning_rate": 1.9345659699503265e-05, "loss": 0.699210524559021, "step": 1318 }, { "epoch": 0.32061254253767624, "grad_norm": 1.1497435952652606, "learning_rate": 1.934422886922956e-05, "loss": 0.5540732741355896, "step": 1319 }, { "epoch": 0.32085561497326204, "grad_norm": 1.4675395800037536, "learning_rate": 1.9342796529313385e-05, "loss": 0.6023752689361572, "step": 1320 }, { "epoch": 0.32109868740884784, "grad_norm": 1.066861998713457, "learning_rate": 1.934136267998615e-05, "loss": 0.44056665897369385, "step": 1321 }, { "epoch": 0.32134175984443364, "grad_norm": 1.5088457237993278, "learning_rate": 1.9339927321479498e-05, "loss": 0.7372761964797974, "step": 1322 }, { "epoch": 0.32158483228001944, "grad_norm": 1.4047416053443131, "learning_rate": 1.9338490454025334e-05, "loss": 0.6667118668556213, "step": 1323 }, { "epoch": 0.32182790471560524, "grad_norm": 1.2586335122121715, "learning_rate": 1.933705207785579e-05, "loss": 0.714116096496582, "step": 1324 }, { "epoch": 0.32207097715119104, "grad_norm": 1.4458112084074326, "learning_rate": 1.9335612193203243e-05, "loss": 0.699466347694397, "step": 1325 }, { "epoch": 0.32231404958677684, "grad_norm": 1.4408838306849079, "learning_rate": 1.9334170800300334e-05, "loss": 0.7776508927345276, "step": 1326 }, { "epoch": 0.32255712202236264, "grad_norm": 1.423628249123153, "learning_rate": 1.9332727899379918e-05, "loss": 0.8575199842453003, "step": 1327 }, { "epoch": 0.32280019445794844, "grad_norm": 1.3649039518679513, "learning_rate": 1.9331283490675117e-05, "loss": 0.6443345546722412, "step": 1328 }, { "epoch": 0.3230432668935343, "grad_norm": 1.2220777633727158, "learning_rate": 1.9329837574419287e-05, "loss": 0.7533725500106812, "step": 1329 }, { "epoch": 0.3232863393291201, "grad_norm": 1.7151954501670517, "learning_rate": 1.9328390150846023e-05, "loss": 0.7031955718994141, "step": 1330 }, { "epoch": 0.3235294117647059, "grad_norm": 1.438167240345701, "learning_rate": 1.9326941220189174e-05, "loss": 0.7015735507011414, "step": 1331 }, { "epoch": 0.3237724842002917, "grad_norm": 1.4027229858779324, "learning_rate": 1.932549078268283e-05, "loss": 0.6353121995925903, "step": 1332 }, { "epoch": 0.3240155566358775, "grad_norm": 1.3792969811178049, "learning_rate": 1.9324038838561317e-05, "loss": 0.7690932750701904, "step": 1333 }, { "epoch": 0.3242586290714633, "grad_norm": 1.4515366165267918, "learning_rate": 1.932258538805921e-05, "loss": 0.7469145059585571, "step": 1334 }, { "epoch": 0.3245017015070491, "grad_norm": 1.392533155377005, "learning_rate": 1.9321130431411328e-05, "loss": 0.6168932914733887, "step": 1335 }, { "epoch": 0.3247447739426349, "grad_norm": 1.1875964257106835, "learning_rate": 1.931967396885273e-05, "loss": 0.6546919345855713, "step": 1336 }, { "epoch": 0.3249878463782207, "grad_norm": 1.437630915798022, "learning_rate": 1.931821600061872e-05, "loss": 0.727165699005127, "step": 1337 }, { "epoch": 0.3252309188138065, "grad_norm": 1.3261473730637299, "learning_rate": 1.9316756526944855e-05, "loss": 0.7885606288909912, "step": 1338 }, { "epoch": 0.3254739912493923, "grad_norm": 1.5587414568015596, "learning_rate": 1.9315295548066913e-05, "loss": 0.549826979637146, "step": 1339 }, { "epoch": 0.3257170636849781, "grad_norm": 1.4897180685965443, "learning_rate": 1.9313833064220935e-05, "loss": 0.8652329444885254, "step": 1340 }, { "epoch": 0.3259601361205639, "grad_norm": 1.3511955608947042, "learning_rate": 1.9312369075643197e-05, "loss": 0.7582256197929382, "step": 1341 }, { "epoch": 0.32620320855614976, "grad_norm": 1.3052786713382465, "learning_rate": 1.9310903582570216e-05, "loss": 0.8220837712287903, "step": 1342 }, { "epoch": 0.32644628099173556, "grad_norm": 1.474018868303645, "learning_rate": 1.930943658523876e-05, "loss": 0.7856172323226929, "step": 1343 }, { "epoch": 0.32668935342732136, "grad_norm": 1.1064893727443275, "learning_rate": 1.9307968083885835e-05, "loss": 0.5643701553344727, "step": 1344 }, { "epoch": 0.32693242586290716, "grad_norm": 1.369959501283088, "learning_rate": 1.9306498078748687e-05, "loss": 0.8407614827156067, "step": 1345 }, { "epoch": 0.32717549829849296, "grad_norm": 1.3418214728772435, "learning_rate": 1.9305026570064812e-05, "loss": 0.6934142112731934, "step": 1346 }, { "epoch": 0.32741857073407876, "grad_norm": 1.2739891078973755, "learning_rate": 1.930355355807194e-05, "loss": 0.5368447303771973, "step": 1347 }, { "epoch": 0.32766164316966456, "grad_norm": 1.0817363966312223, "learning_rate": 1.9302079043008052e-05, "loss": 0.5872273445129395, "step": 1348 }, { "epoch": 0.32790471560525036, "grad_norm": 1.4791393501461243, "learning_rate": 1.9300603025111374e-05, "loss": 0.6144394278526306, "step": 1349 }, { "epoch": 0.32814778804083616, "grad_norm": 1.5329610144077008, "learning_rate": 1.929912550462036e-05, "loss": 0.6835021376609802, "step": 1350 }, { "epoch": 0.32839086047642196, "grad_norm": 1.2163685880947355, "learning_rate": 1.9297646481773725e-05, "loss": 0.6218926310539246, "step": 1351 }, { "epoch": 0.32863393291200776, "grad_norm": 1.3350353680461025, "learning_rate": 1.9296165956810414e-05, "loss": 0.6322596073150635, "step": 1352 }, { "epoch": 0.32887700534759357, "grad_norm": 1.4131449741298672, "learning_rate": 1.9294683929969616e-05, "loss": 0.6715003252029419, "step": 1353 }, { "epoch": 0.32912007778317937, "grad_norm": 1.547793449347504, "learning_rate": 1.929320040149077e-05, "loss": 0.8886430263519287, "step": 1354 }, { "epoch": 0.32936315021876517, "grad_norm": 1.364356885117769, "learning_rate": 1.9291715371613552e-05, "loss": 0.6287528276443481, "step": 1355 }, { "epoch": 0.329606222654351, "grad_norm": 1.2324111024299238, "learning_rate": 1.9290228840577886e-05, "loss": 0.688846230506897, "step": 1356 }, { "epoch": 0.3298492950899368, "grad_norm": 1.2616457251612812, "learning_rate": 1.9288740808623923e-05, "loss": 0.7069304585456848, "step": 1357 }, { "epoch": 0.3300923675255226, "grad_norm": 1.4143646214394123, "learning_rate": 1.928725127599208e-05, "loss": 0.7301149964332581, "step": 1358 }, { "epoch": 0.3303354399611084, "grad_norm": 1.3574079601236635, "learning_rate": 1.9285760242922996e-05, "loss": 0.8140010237693787, "step": 1359 }, { "epoch": 0.3305785123966942, "grad_norm": 1.3955487753568454, "learning_rate": 1.9284267709657563e-05, "loss": 0.7492258548736572, "step": 1360 }, { "epoch": 0.33082158483228, "grad_norm": 1.301303634873325, "learning_rate": 1.9282773676436914e-05, "loss": 0.6458590030670166, "step": 1361 }, { "epoch": 0.3310646572678658, "grad_norm": 1.4939081781576948, "learning_rate": 1.9281278143502422e-05, "loss": 0.7350530624389648, "step": 1362 }, { "epoch": 0.3313077297034516, "grad_norm": 1.1453100534389569, "learning_rate": 1.9279781111095702e-05, "loss": 0.681709885597229, "step": 1363 }, { "epoch": 0.3315508021390374, "grad_norm": 1.518882678967011, "learning_rate": 1.9278282579458616e-05, "loss": 0.897990882396698, "step": 1364 }, { "epoch": 0.3317938745746232, "grad_norm": 1.386575163664412, "learning_rate": 1.9276782548833262e-05, "loss": 0.7720742225646973, "step": 1365 }, { "epoch": 0.332036947010209, "grad_norm": 1.2016645909652068, "learning_rate": 1.9275281019461988e-05, "loss": 0.819422721862793, "step": 1366 }, { "epoch": 0.3322800194457948, "grad_norm": 1.2138285913453057, "learning_rate": 1.9273777991587375e-05, "loss": 0.8380371332168579, "step": 1367 }, { "epoch": 0.33252309188138063, "grad_norm": 1.6495463486896151, "learning_rate": 1.927227346545225e-05, "loss": 0.7914010882377625, "step": 1368 }, { "epoch": 0.33276616431696643, "grad_norm": 1.2334796485663122, "learning_rate": 1.9270767441299685e-05, "loss": 0.5854858160018921, "step": 1369 }, { "epoch": 0.3330092367525523, "grad_norm": 1.6379496270021254, "learning_rate": 1.926925991937299e-05, "loss": 0.6789106130599976, "step": 1370 }, { "epoch": 0.3332523091881381, "grad_norm": 1.0313202755671398, "learning_rate": 1.9267750899915717e-05, "loss": 0.6292887926101685, "step": 1371 }, { "epoch": 0.3334953816237239, "grad_norm": 1.286845770602046, "learning_rate": 1.9266240383171662e-05, "loss": 0.6893868446350098, "step": 1372 }, { "epoch": 0.3337384540593097, "grad_norm": 1.4556908834591429, "learning_rate": 1.9264728369384867e-05, "loss": 0.6835194826126099, "step": 1373 }, { "epoch": 0.3339815264948955, "grad_norm": 1.1467139265322173, "learning_rate": 1.9263214858799608e-05, "loss": 0.6858971118927002, "step": 1374 }, { "epoch": 0.3342245989304813, "grad_norm": 1.2569194988416172, "learning_rate": 1.92616998516604e-05, "loss": 0.6544447541236877, "step": 1375 }, { "epoch": 0.3344676713660671, "grad_norm": 1.5798765238178332, "learning_rate": 1.9260183348212015e-05, "loss": 0.7115499973297119, "step": 1376 }, { "epoch": 0.3347107438016529, "grad_norm": 1.372766753397567, "learning_rate": 1.9258665348699453e-05, "loss": 0.7365099191665649, "step": 1377 }, { "epoch": 0.3349538162372387, "grad_norm": 1.2501606810823027, "learning_rate": 1.925714585336796e-05, "loss": 0.5267406105995178, "step": 1378 }, { "epoch": 0.3351968886728245, "grad_norm": 1.3723006357615832, "learning_rate": 1.9255624862463026e-05, "loss": 0.6202298998832703, "step": 1379 }, { "epoch": 0.3354399611084103, "grad_norm": 1.2261269049636645, "learning_rate": 1.9254102376230376e-05, "loss": 0.7659646272659302, "step": 1380 }, { "epoch": 0.3356830335439961, "grad_norm": 1.1695062708766621, "learning_rate": 1.925257839491599e-05, "loss": 0.7038124203681946, "step": 1381 }, { "epoch": 0.3359261059795819, "grad_norm": 1.3647671088500448, "learning_rate": 1.9251052918766072e-05, "loss": 0.5693999528884888, "step": 1382 }, { "epoch": 0.33616917841516775, "grad_norm": 1.2567770044576314, "learning_rate": 1.924952594802708e-05, "loss": 0.6171048879623413, "step": 1383 }, { "epoch": 0.33641225085075355, "grad_norm": 1.22850095127295, "learning_rate": 1.9247997482945706e-05, "loss": 0.6908570528030396, "step": 1384 }, { "epoch": 0.33665532328633935, "grad_norm": 1.1704227544208003, "learning_rate": 1.924646752376889e-05, "loss": 0.673170804977417, "step": 1385 }, { "epoch": 0.33689839572192515, "grad_norm": 1.353795640606133, "learning_rate": 1.924493607074381e-05, "loss": 0.8364904522895813, "step": 1386 }, { "epoch": 0.33714146815751095, "grad_norm": 1.5231458711576782, "learning_rate": 1.924340312411789e-05, "loss": 0.6522639989852905, "step": 1387 }, { "epoch": 0.33738454059309675, "grad_norm": 1.3968163883140827, "learning_rate": 1.9241868684138783e-05, "loss": 0.7021592855453491, "step": 1388 }, { "epoch": 0.33762761302868255, "grad_norm": 1.1953920103286346, "learning_rate": 1.9240332751054397e-05, "loss": 0.6639422178268433, "step": 1389 }, { "epoch": 0.33787068546426835, "grad_norm": 1.207005280100032, "learning_rate": 1.9238795325112867e-05, "loss": 0.625278651714325, "step": 1390 }, { "epoch": 0.33811375789985415, "grad_norm": 1.4302916867554012, "learning_rate": 1.923725640656259e-05, "loss": 0.6809590458869934, "step": 1391 }, { "epoch": 0.33835683033543995, "grad_norm": 1.319367768170793, "learning_rate": 1.9235715995652187e-05, "loss": 0.8403590321540833, "step": 1392 }, { "epoch": 0.33859990277102575, "grad_norm": 1.3364149615970424, "learning_rate": 1.9234174092630522e-05, "loss": 0.9001204371452332, "step": 1393 }, { "epoch": 0.33884297520661155, "grad_norm": 1.2035573202347383, "learning_rate": 1.9232630697746702e-05, "loss": 0.6882377862930298, "step": 1394 }, { "epoch": 0.33908604764219735, "grad_norm": 1.2953747407656047, "learning_rate": 1.9231085811250083e-05, "loss": 0.5794740319252014, "step": 1395 }, { "epoch": 0.33932912007778315, "grad_norm": 1.3267638075105195, "learning_rate": 1.9229539433390248e-05, "loss": 0.6350187063217163, "step": 1396 }, { "epoch": 0.339572192513369, "grad_norm": 1.1942567510692184, "learning_rate": 1.9227991564417038e-05, "loss": 0.7828056812286377, "step": 1397 }, { "epoch": 0.3398152649489548, "grad_norm": 1.0601841031251864, "learning_rate": 1.922644220458051e-05, "loss": 0.5974648594856262, "step": 1398 }, { "epoch": 0.3400583373845406, "grad_norm": 1.4169358765126174, "learning_rate": 1.922489135413099e-05, "loss": 0.6229907870292664, "step": 1399 }, { "epoch": 0.3403014098201264, "grad_norm": 1.312505335567181, "learning_rate": 1.9223339013319023e-05, "loss": 0.7648439407348633, "step": 1400 }, { "epoch": 0.3405444822557122, "grad_norm": 1.2131182170127566, "learning_rate": 1.9221785182395404e-05, "loss": 0.7984778881072998, "step": 1401 }, { "epoch": 0.340787554691298, "grad_norm": 1.2319780386527013, "learning_rate": 1.9220229861611176e-05, "loss": 0.7539113759994507, "step": 1402 }, { "epoch": 0.3410306271268838, "grad_norm": 1.3926406168285566, "learning_rate": 1.9218673051217608e-05, "loss": 0.7642449736595154, "step": 1403 }, { "epoch": 0.3412736995624696, "grad_norm": 1.3755016010806322, "learning_rate": 1.9217114751466215e-05, "loss": 0.7642135620117188, "step": 1404 }, { "epoch": 0.3415167719980554, "grad_norm": 1.2801100106368186, "learning_rate": 1.9215554962608757e-05, "loss": 0.6357026100158691, "step": 1405 }, { "epoch": 0.3417598444336412, "grad_norm": 1.3615452213143133, "learning_rate": 1.9213993684897235e-05, "loss": 0.7203260660171509, "step": 1406 }, { "epoch": 0.342002916869227, "grad_norm": 1.3434707642466852, "learning_rate": 1.921243091858388e-05, "loss": 0.8018672466278076, "step": 1407 }, { "epoch": 0.3422459893048128, "grad_norm": 1.3590675534217356, "learning_rate": 1.9210866663921178e-05, "loss": 0.6697542667388916, "step": 1408 }, { "epoch": 0.3424890617403986, "grad_norm": 1.2436994629255582, "learning_rate": 1.920930092116184e-05, "loss": 0.6087051630020142, "step": 1409 }, { "epoch": 0.34273213417598447, "grad_norm": 1.3102930211129504, "learning_rate": 1.9207733690558832e-05, "loss": 0.6712015867233276, "step": 1410 }, { "epoch": 0.34297520661157027, "grad_norm": 1.2478431960699268, "learning_rate": 1.9206164972365354e-05, "loss": 0.7048311233520508, "step": 1411 }, { "epoch": 0.34321827904715607, "grad_norm": 1.0321888117978244, "learning_rate": 1.920459476683484e-05, "loss": 0.5784353017807007, "step": 1412 }, { "epoch": 0.3434613514827419, "grad_norm": 1.54654633526688, "learning_rate": 1.9203023074220976e-05, "loss": 0.7457172870635986, "step": 1413 }, { "epoch": 0.3437044239183277, "grad_norm": 1.188755182794177, "learning_rate": 1.9201449894777683e-05, "loss": 0.7760995030403137, "step": 1414 }, { "epoch": 0.3439474963539135, "grad_norm": 1.428721139921117, "learning_rate": 1.919987522875912e-05, "loss": 0.7549236416816711, "step": 1415 }, { "epoch": 0.3441905687894993, "grad_norm": 1.3363028708628215, "learning_rate": 1.9198299076419688e-05, "loss": 0.6017575263977051, "step": 1416 }, { "epoch": 0.3444336412250851, "grad_norm": 1.412008831156872, "learning_rate": 1.9196721438014028e-05, "loss": 0.7349345684051514, "step": 1417 }, { "epoch": 0.3446767136606709, "grad_norm": 1.5342524351752678, "learning_rate": 1.9195142313797023e-05, "loss": 0.6756756901741028, "step": 1418 }, { "epoch": 0.3449197860962567, "grad_norm": 1.3285915431104602, "learning_rate": 1.9193561704023796e-05, "loss": 0.8266932964324951, "step": 1419 }, { "epoch": 0.3451628585318425, "grad_norm": 1.2916854486975005, "learning_rate": 1.91919796089497e-05, "loss": 0.9227912425994873, "step": 1420 }, { "epoch": 0.3454059309674283, "grad_norm": 1.3688444497268968, "learning_rate": 1.919039602883035e-05, "loss": 0.5746219158172607, "step": 1421 }, { "epoch": 0.3456490034030141, "grad_norm": 1.5563258556877062, "learning_rate": 1.9188810963921575e-05, "loss": 0.7427229881286621, "step": 1422 }, { "epoch": 0.3458920758385999, "grad_norm": 1.4835515414181142, "learning_rate": 1.9187224414479463e-05, "loss": 0.6014330983161926, "step": 1423 }, { "epoch": 0.34613514827418573, "grad_norm": 1.5425675017256941, "learning_rate": 1.9185636380760327e-05, "loss": 0.7249922752380371, "step": 1424 }, { "epoch": 0.34637822070977153, "grad_norm": 1.2834826488731257, "learning_rate": 1.918404686302074e-05, "loss": 0.7978817224502563, "step": 1425 }, { "epoch": 0.34662129314535733, "grad_norm": 1.364352132995379, "learning_rate": 1.9182455861517494e-05, "loss": 0.6353025436401367, "step": 1426 }, { "epoch": 0.34686436558094313, "grad_norm": 1.2522642315205348, "learning_rate": 1.9180863376507633e-05, "loss": 0.6531208753585815, "step": 1427 }, { "epoch": 0.34710743801652894, "grad_norm": 1.1699816352502244, "learning_rate": 1.9179269408248437e-05, "loss": 0.5672242641448975, "step": 1428 }, { "epoch": 0.34735051045211474, "grad_norm": 1.2442816131410808, "learning_rate": 1.917767395699742e-05, "loss": 0.7409714460372925, "step": 1429 }, { "epoch": 0.34759358288770054, "grad_norm": 1.3060194380126418, "learning_rate": 1.917607702301235e-05, "loss": 0.6771764755249023, "step": 1430 }, { "epoch": 0.34783665532328634, "grad_norm": 1.1474250992217252, "learning_rate": 1.9174478606551217e-05, "loss": 0.5425859689712524, "step": 1431 }, { "epoch": 0.34807972775887214, "grad_norm": 1.4305268761293597, "learning_rate": 1.917287870787227e-05, "loss": 0.7616360187530518, "step": 1432 }, { "epoch": 0.34832280019445794, "grad_norm": 1.549393164954607, "learning_rate": 1.9171277327233975e-05, "loss": 0.7534044981002808, "step": 1433 }, { "epoch": 0.34856587263004374, "grad_norm": 1.2501211562108512, "learning_rate": 1.9169674464895056e-05, "loss": 0.8750678300857544, "step": 1434 }, { "epoch": 0.34880894506562954, "grad_norm": 1.4157664035105757, "learning_rate": 1.9168070121114464e-05, "loss": 0.8399330377578735, "step": 1435 }, { "epoch": 0.34905201750121534, "grad_norm": 1.3359617313504601, "learning_rate": 1.9166464296151407e-05, "loss": 0.6697971224784851, "step": 1436 }, { "epoch": 0.34929508993680114, "grad_norm": 1.6235596769309975, "learning_rate": 1.9164856990265304e-05, "loss": 0.7522168159484863, "step": 1437 }, { "epoch": 0.349538162372387, "grad_norm": 1.179196802072765, "learning_rate": 1.916324820371584e-05, "loss": 0.6478496789932251, "step": 1438 }, { "epoch": 0.3497812348079728, "grad_norm": 1.3381433398176221, "learning_rate": 1.916163793676293e-05, "loss": 0.7635068297386169, "step": 1439 }, { "epoch": 0.3500243072435586, "grad_norm": 1.3106925959817428, "learning_rate": 1.9160026189666717e-05, "loss": 0.8201820254325867, "step": 1440 }, { "epoch": 0.3502673796791444, "grad_norm": 1.3946385458835253, "learning_rate": 1.9158412962687604e-05, "loss": 0.7063964605331421, "step": 1441 }, { "epoch": 0.3505104521147302, "grad_norm": 1.1540115167873053, "learning_rate": 1.9156798256086212e-05, "loss": 0.7905441522598267, "step": 1442 }, { "epoch": 0.350753524550316, "grad_norm": 1.2393924844925672, "learning_rate": 1.9155182070123418e-05, "loss": 0.5918336510658264, "step": 1443 }, { "epoch": 0.3509965969859018, "grad_norm": 1.1049033406312319, "learning_rate": 1.9153564405060328e-05, "loss": 0.6354016065597534, "step": 1444 }, { "epoch": 0.3512396694214876, "grad_norm": 1.1728921914600685, "learning_rate": 1.9151945261158295e-05, "loss": 0.6912660598754883, "step": 1445 }, { "epoch": 0.3514827418570734, "grad_norm": 1.181923580068804, "learning_rate": 1.9150324638678896e-05, "loss": 0.7930930852890015, "step": 1446 }, { "epoch": 0.3517258142926592, "grad_norm": 1.1738699967993051, "learning_rate": 1.9148702537883964e-05, "loss": 0.8027001619338989, "step": 1447 }, { "epoch": 0.351968886728245, "grad_norm": 1.1572915447310774, "learning_rate": 1.9147078959035563e-05, "loss": 0.6558631062507629, "step": 1448 }, { "epoch": 0.3522119591638308, "grad_norm": 1.2839202590590253, "learning_rate": 1.9145453902396e-05, "loss": 0.7203360795974731, "step": 1449 }, { "epoch": 0.3524550315994166, "grad_norm": 1.5369302039397275, "learning_rate": 1.914382736822781e-05, "loss": 0.5998920202255249, "step": 1450 }, { "epoch": 0.35269810403500246, "grad_norm": 1.4389127404410458, "learning_rate": 1.9142199356793772e-05, "loss": 0.666991651058197, "step": 1451 }, { "epoch": 0.35294117647058826, "grad_norm": 1.2679418967385472, "learning_rate": 1.914056986835692e-05, "loss": 0.6492263078689575, "step": 1452 }, { "epoch": 0.35318424890617406, "grad_norm": 1.2333153061474342, "learning_rate": 1.9138938903180496e-05, "loss": 0.8534078598022461, "step": 1453 }, { "epoch": 0.35342732134175986, "grad_norm": 1.2489840372842829, "learning_rate": 1.9137306461528006e-05, "loss": 0.6485500931739807, "step": 1454 }, { "epoch": 0.35367039377734566, "grad_norm": 1.4579638676626407, "learning_rate": 1.9135672543663185e-05, "loss": 0.7620161771774292, "step": 1455 }, { "epoch": 0.35391346621293146, "grad_norm": 1.397292051081568, "learning_rate": 1.913403714985e-05, "loss": 0.7917711734771729, "step": 1456 }, { "epoch": 0.35415653864851726, "grad_norm": 1.361871415542578, "learning_rate": 1.9132400280352672e-05, "loss": 0.7129045724868774, "step": 1457 }, { "epoch": 0.35439961108410306, "grad_norm": 1.3295046327129754, "learning_rate": 1.9130761935435648e-05, "loss": 0.8113116025924683, "step": 1458 }, { "epoch": 0.35464268351968886, "grad_norm": 1.2839740009259697, "learning_rate": 1.912912211536361e-05, "loss": 0.6051050424575806, "step": 1459 }, { "epoch": 0.35488575595527466, "grad_norm": 1.3456338758982525, "learning_rate": 1.91274808204015e-05, "loss": 0.807937741279602, "step": 1460 }, { "epoch": 0.35512882839086046, "grad_norm": 1.2251455444363193, "learning_rate": 1.9125838050814472e-05, "loss": 0.6446660757064819, "step": 1461 }, { "epoch": 0.35537190082644626, "grad_norm": 1.4761092268844294, "learning_rate": 1.9124193806867936e-05, "loss": 0.7891366481781006, "step": 1462 }, { "epoch": 0.35561497326203206, "grad_norm": 1.4721135392278706, "learning_rate": 1.912254808882753e-05, "loss": 0.795897364616394, "step": 1463 }, { "epoch": 0.35585804569761786, "grad_norm": 1.2708529904042454, "learning_rate": 1.9120900896959134e-05, "loss": 0.6449159383773804, "step": 1464 }, { "epoch": 0.3561011181332037, "grad_norm": 1.248083747392446, "learning_rate": 1.9119252231528874e-05, "loss": 0.6848781108856201, "step": 1465 }, { "epoch": 0.3563441905687895, "grad_norm": 1.273175973889451, "learning_rate": 1.9117602092803095e-05, "loss": 0.8199577331542969, "step": 1466 }, { "epoch": 0.3565872630043753, "grad_norm": 1.4384688586406318, "learning_rate": 1.91159504810484e-05, "loss": 0.6632829308509827, "step": 1467 }, { "epoch": 0.3568303354399611, "grad_norm": 1.644479008648437, "learning_rate": 1.911429739653162e-05, "loss": 0.7537722587585449, "step": 1468 }, { "epoch": 0.3570734078755469, "grad_norm": 1.2602812807642185, "learning_rate": 1.911264283951982e-05, "loss": 0.6308338642120361, "step": 1469 }, { "epoch": 0.3573164803111327, "grad_norm": 1.4456122847662753, "learning_rate": 1.9110986810280315e-05, "loss": 0.7581157684326172, "step": 1470 }, { "epoch": 0.3575595527467185, "grad_norm": 1.5093633560858812, "learning_rate": 1.9109329309080648e-05, "loss": 0.730604350566864, "step": 1471 }, { "epoch": 0.3578026251823043, "grad_norm": 1.444702140171859, "learning_rate": 1.9107670336188605e-05, "loss": 0.7349516153335571, "step": 1472 }, { "epoch": 0.3580456976178901, "grad_norm": 1.4896795144406174, "learning_rate": 1.9106009891872203e-05, "loss": 0.7161098718643188, "step": 1473 }, { "epoch": 0.3582887700534759, "grad_norm": 1.6526114538161294, "learning_rate": 1.9104347976399708e-05, "loss": 0.7113515734672546, "step": 1474 }, { "epoch": 0.3585318424890617, "grad_norm": 1.5132889296180427, "learning_rate": 1.910268459003961e-05, "loss": 0.7652366161346436, "step": 1475 }, { "epoch": 0.3587749149246475, "grad_norm": 1.5103501947173423, "learning_rate": 1.9101019733060647e-05, "loss": 0.6387653350830078, "step": 1476 }, { "epoch": 0.3590179873602333, "grad_norm": 1.315121103174933, "learning_rate": 1.9099353405731794e-05, "loss": 0.6991258859634399, "step": 1477 }, { "epoch": 0.3592610597958191, "grad_norm": 1.1224698607676518, "learning_rate": 1.909768560832226e-05, "loss": 0.6015151739120483, "step": 1478 }, { "epoch": 0.359504132231405, "grad_norm": 1.3667002475681738, "learning_rate": 1.9096016341101488e-05, "loss": 0.648205041885376, "step": 1479 }, { "epoch": 0.3597472046669908, "grad_norm": 1.4355150213082153, "learning_rate": 1.9094345604339164e-05, "loss": 0.611162543296814, "step": 1480 }, { "epoch": 0.3599902771025766, "grad_norm": 1.313510392030864, "learning_rate": 1.9092673398305216e-05, "loss": 0.7861765027046204, "step": 1481 }, { "epoch": 0.3602333495381624, "grad_norm": 1.0822710681886245, "learning_rate": 1.9090999723269793e-05, "loss": 0.4856724739074707, "step": 1482 }, { "epoch": 0.3604764219737482, "grad_norm": 1.456741318942601, "learning_rate": 1.9089324579503303e-05, "loss": 0.7492325901985168, "step": 1483 }, { "epoch": 0.360719494409334, "grad_norm": 1.8728054229742095, "learning_rate": 1.9087647967276374e-05, "loss": 0.7435591816902161, "step": 1484 }, { "epoch": 0.3609625668449198, "grad_norm": 1.314109755751763, "learning_rate": 1.908596988685988e-05, "loss": 0.6217185258865356, "step": 1485 }, { "epoch": 0.3612056392805056, "grad_norm": 1.3860946572236452, "learning_rate": 1.908429033852493e-05, "loss": 0.8971785306930542, "step": 1486 }, { "epoch": 0.3614487117160914, "grad_norm": 1.241937136675419, "learning_rate": 1.9082609322542866e-05, "loss": 0.7954738736152649, "step": 1487 }, { "epoch": 0.3616917841516772, "grad_norm": 1.2702906591716183, "learning_rate": 1.9080926839185275e-05, "loss": 0.8019617795944214, "step": 1488 }, { "epoch": 0.361934856587263, "grad_norm": 1.2155422090435568, "learning_rate": 1.9079242888723977e-05, "loss": 0.6003443002700806, "step": 1489 }, { "epoch": 0.3621779290228488, "grad_norm": 1.5703451625420914, "learning_rate": 1.9077557471431024e-05, "loss": 0.7049282789230347, "step": 1490 }, { "epoch": 0.3624210014584346, "grad_norm": 1.322815432230437, "learning_rate": 1.9075870587578715e-05, "loss": 0.7148921489715576, "step": 1491 }, { "epoch": 0.36266407389402044, "grad_norm": 1.2058971182626177, "learning_rate": 1.907418223743958e-05, "loss": 0.6837296485900879, "step": 1492 }, { "epoch": 0.36290714632960624, "grad_norm": 1.386996593622911, "learning_rate": 1.9072492421286384e-05, "loss": 0.611450731754303, "step": 1493 }, { "epoch": 0.36315021876519205, "grad_norm": 1.172382380331113, "learning_rate": 1.9070801139392136e-05, "loss": 0.7703243494033813, "step": 1494 }, { "epoch": 0.36339329120077785, "grad_norm": 1.2535858003133884, "learning_rate": 1.9069108392030076e-05, "loss": 0.709388017654419, "step": 1495 }, { "epoch": 0.36363636363636365, "grad_norm": 1.2967353493765983, "learning_rate": 1.906741417947368e-05, "loss": 0.5839183330535889, "step": 1496 }, { "epoch": 0.36387943607194945, "grad_norm": 1.177869892234178, "learning_rate": 1.9065718501996663e-05, "loss": 0.6880878210067749, "step": 1497 }, { "epoch": 0.36412250850753525, "grad_norm": 1.1178033952317072, "learning_rate": 1.9064021359872983e-05, "loss": 0.5742567777633667, "step": 1498 }, { "epoch": 0.36436558094312105, "grad_norm": 1.253422375352366, "learning_rate": 1.906232275337682e-05, "loss": 0.8475919961929321, "step": 1499 }, { "epoch": 0.36460865337870685, "grad_norm": 1.3498546733218368, "learning_rate": 1.9060622682782605e-05, "loss": 0.6032763719558716, "step": 1500 }, { "epoch": 0.36485172581429265, "grad_norm": 1.3594278823455546, "learning_rate": 1.9058921148364996e-05, "loss": 0.6421465873718262, "step": 1501 }, { "epoch": 0.36509479824987845, "grad_norm": 1.230280845720498, "learning_rate": 1.905721815039889e-05, "loss": 0.7958266735076904, "step": 1502 }, { "epoch": 0.36533787068546425, "grad_norm": 1.4024272582115473, "learning_rate": 1.9055513689159423e-05, "loss": 0.6471452116966248, "step": 1503 }, { "epoch": 0.36558094312105005, "grad_norm": 1.1298870936782666, "learning_rate": 1.9053807764921966e-05, "loss": 0.6187902092933655, "step": 1504 }, { "epoch": 0.36582401555663585, "grad_norm": 1.3268546058517956, "learning_rate": 1.9052100377962127e-05, "loss": 0.7356570959091187, "step": 1505 }, { "epoch": 0.3660670879922217, "grad_norm": 1.2777744913562588, "learning_rate": 1.9050391528555747e-05, "loss": 0.7959809899330139, "step": 1506 }, { "epoch": 0.3663101604278075, "grad_norm": 1.4255106046504062, "learning_rate": 1.9048681216978908e-05, "loss": 0.6436896324157715, "step": 1507 }, { "epoch": 0.3665532328633933, "grad_norm": 1.4223728882726805, "learning_rate": 1.9046969443507923e-05, "loss": 0.8452944755554199, "step": 1508 }, { "epoch": 0.3667963052989791, "grad_norm": 1.4000573789582549, "learning_rate": 1.904525620841935e-05, "loss": 0.7722115516662598, "step": 1509 }, { "epoch": 0.3670393777345649, "grad_norm": 1.5282399181541704, "learning_rate": 1.9043541511989967e-05, "loss": 0.6969318389892578, "step": 1510 }, { "epoch": 0.3672824501701507, "grad_norm": 1.2891608280384586, "learning_rate": 1.9041825354496807e-05, "loss": 0.7600188255310059, "step": 1511 }, { "epoch": 0.3675255226057365, "grad_norm": 1.3495559367041463, "learning_rate": 1.904010773621713e-05, "loss": 0.6158434152603149, "step": 1512 }, { "epoch": 0.3677685950413223, "grad_norm": 1.2022814108252022, "learning_rate": 1.9038388657428428e-05, "loss": 0.6658272743225098, "step": 1513 }, { "epoch": 0.3680116674769081, "grad_norm": 1.2355202617016965, "learning_rate": 1.9036668118408436e-05, "loss": 0.6755532622337341, "step": 1514 }, { "epoch": 0.3682547399124939, "grad_norm": 1.2025130153762469, "learning_rate": 1.9034946119435117e-05, "loss": 0.7855615615844727, "step": 1515 }, { "epoch": 0.3684978123480797, "grad_norm": 1.5825948944719326, "learning_rate": 1.9033222660786685e-05, "loss": 0.6769456267356873, "step": 1516 }, { "epoch": 0.3687408847836655, "grad_norm": 1.5220057447884325, "learning_rate": 1.9031497742741573e-05, "loss": 0.6338355541229248, "step": 1517 }, { "epoch": 0.3689839572192513, "grad_norm": 1.2162825154615091, "learning_rate": 1.902977136557846e-05, "loss": 0.7081514596939087, "step": 1518 }, { "epoch": 0.36922702965483717, "grad_norm": 1.2656351534888268, "learning_rate": 1.9028043529576253e-05, "loss": 0.669585108757019, "step": 1519 }, { "epoch": 0.36947010209042297, "grad_norm": 1.2843823353683543, "learning_rate": 1.9026314235014105e-05, "loss": 0.5191412568092346, "step": 1520 }, { "epoch": 0.36971317452600877, "grad_norm": 1.364432159469831, "learning_rate": 1.9024583482171392e-05, "loss": 0.6209925413131714, "step": 1521 }, { "epoch": 0.36995624696159457, "grad_norm": 1.4187445298301478, "learning_rate": 1.902285127132774e-05, "loss": 0.7775666117668152, "step": 1522 }, { "epoch": 0.37019931939718037, "grad_norm": 1.132829370444938, "learning_rate": 1.9021117602762994e-05, "loss": 0.567660391330719, "step": 1523 }, { "epoch": 0.37044239183276617, "grad_norm": 1.2936552026019317, "learning_rate": 1.901938247675725e-05, "loss": 0.6729240417480469, "step": 1524 }, { "epoch": 0.37068546426835197, "grad_norm": 1.1313121878241124, "learning_rate": 1.9017645893590834e-05, "loss": 0.8145706653594971, "step": 1525 }, { "epoch": 0.3709285367039378, "grad_norm": 1.5966417688762844, "learning_rate": 1.9015907853544304e-05, "loss": 0.907994270324707, "step": 1526 }, { "epoch": 0.3711716091395236, "grad_norm": 1.2473847888933878, "learning_rate": 1.901416835689845e-05, "loss": 0.6636871695518494, "step": 1527 }, { "epoch": 0.3714146815751094, "grad_norm": 1.3720566116185744, "learning_rate": 1.9012427403934314e-05, "loss": 0.7303731441497803, "step": 1528 }, { "epoch": 0.3716577540106952, "grad_norm": 1.4849323798881797, "learning_rate": 1.9010684994933154e-05, "loss": 0.6694195866584778, "step": 1529 }, { "epoch": 0.371900826446281, "grad_norm": 1.2117806790239218, "learning_rate": 1.9008941130176474e-05, "loss": 0.6277292966842651, "step": 1530 }, { "epoch": 0.3721438988818668, "grad_norm": 1.5250842115620806, "learning_rate": 1.9007195809946012e-05, "loss": 0.7104818820953369, "step": 1531 }, { "epoch": 0.3723869713174526, "grad_norm": 1.4458240454522155, "learning_rate": 1.9005449034523735e-05, "loss": 0.6606448888778687, "step": 1532 }, { "epoch": 0.37263004375303843, "grad_norm": 1.1091420337315836, "learning_rate": 1.900370080419186e-05, "loss": 0.5677917003631592, "step": 1533 }, { "epoch": 0.37287311618862423, "grad_norm": 1.3607904741628551, "learning_rate": 1.9001951119232814e-05, "loss": 0.6626967787742615, "step": 1534 }, { "epoch": 0.37311618862421003, "grad_norm": 1.3572079732958984, "learning_rate": 1.9000199979929288e-05, "loss": 0.7062698602676392, "step": 1535 }, { "epoch": 0.37335926105979583, "grad_norm": 1.352449110232025, "learning_rate": 1.899844738656419e-05, "loss": 0.8443552851676941, "step": 1536 }, { "epoch": 0.37360233349538163, "grad_norm": 1.3585168464207735, "learning_rate": 1.8996693339420662e-05, "loss": 0.7096296548843384, "step": 1537 }, { "epoch": 0.37384540593096743, "grad_norm": 1.4736260159387362, "learning_rate": 1.8994937838782092e-05, "loss": 0.6613030433654785, "step": 1538 }, { "epoch": 0.37408847836655323, "grad_norm": 1.3993733434893876, "learning_rate": 1.899318088493209e-05, "loss": 0.6889849901199341, "step": 1539 }, { "epoch": 0.37433155080213903, "grad_norm": 1.5068254356846904, "learning_rate": 1.8991422478154513e-05, "loss": 0.7741506695747375, "step": 1540 }, { "epoch": 0.37457462323772484, "grad_norm": 1.3050063171442567, "learning_rate": 1.8989662618733446e-05, "loss": 0.6420302987098694, "step": 1541 }, { "epoch": 0.37481769567331064, "grad_norm": 1.446005688432262, "learning_rate": 1.8987901306953208e-05, "loss": 0.6833999752998352, "step": 1542 }, { "epoch": 0.37506076810889644, "grad_norm": 1.1720953131106733, "learning_rate": 1.8986138543098357e-05, "loss": 0.7232087254524231, "step": 1543 }, { "epoch": 0.37530384054448224, "grad_norm": 1.496763665130026, "learning_rate": 1.898437432745368e-05, "loss": 0.8455933332443237, "step": 1544 }, { "epoch": 0.37554691298006804, "grad_norm": 1.267788586643059, "learning_rate": 1.8982608660304204e-05, "loss": 0.7700250744819641, "step": 1545 }, { "epoch": 0.37578998541565384, "grad_norm": 1.1743922000130151, "learning_rate": 1.8980841541935188e-05, "loss": 0.6741982698440552, "step": 1546 }, { "epoch": 0.3760330578512397, "grad_norm": 1.4881132241861352, "learning_rate": 1.8979072972632124e-05, "loss": 0.7654949426651001, "step": 1547 }, { "epoch": 0.3762761302868255, "grad_norm": 1.6378341493848927, "learning_rate": 1.897730295268074e-05, "loss": 0.6314157247543335, "step": 1548 }, { "epoch": 0.3765192027224113, "grad_norm": 1.1827508787353678, "learning_rate": 1.8975531482366998e-05, "loss": 0.7103589773178101, "step": 1549 }, { "epoch": 0.3767622751579971, "grad_norm": 1.3805361847325017, "learning_rate": 1.8973758561977097e-05, "loss": 0.7558531761169434, "step": 1550 }, { "epoch": 0.3770053475935829, "grad_norm": 1.3193168834189162, "learning_rate": 1.897198419179747e-05, "loss": 0.6878142356872559, "step": 1551 }, { "epoch": 0.3772484200291687, "grad_norm": 1.5805106311504087, "learning_rate": 1.8970208372114772e-05, "loss": 0.7506130337715149, "step": 1552 }, { "epoch": 0.3774914924647545, "grad_norm": 1.27650461895392, "learning_rate": 1.8968431103215912e-05, "loss": 0.6849209070205688, "step": 1553 }, { "epoch": 0.3777345649003403, "grad_norm": 1.3126300779668076, "learning_rate": 1.896665238538802e-05, "loss": 0.7303389310836792, "step": 1554 }, { "epoch": 0.3779776373359261, "grad_norm": 1.05536111023623, "learning_rate": 1.8964872218918463e-05, "loss": 0.6179507970809937, "step": 1555 }, { "epoch": 0.3782207097715119, "grad_norm": 1.4227341702694372, "learning_rate": 1.8963090604094846e-05, "loss": 0.7396026253700256, "step": 1556 }, { "epoch": 0.3784637822070977, "grad_norm": 1.0499535840217318, "learning_rate": 1.8961307541205003e-05, "loss": 0.7363289594650269, "step": 1557 }, { "epoch": 0.3787068546426835, "grad_norm": 1.1931808072695824, "learning_rate": 1.8959523030536998e-05, "loss": 0.652863621711731, "step": 1558 }, { "epoch": 0.3789499270782693, "grad_norm": 1.2034570649582366, "learning_rate": 1.8957737072379143e-05, "loss": 0.6277251243591309, "step": 1559 }, { "epoch": 0.37919299951385516, "grad_norm": 1.6945346210637027, "learning_rate": 1.8955949667019967e-05, "loss": 0.7690232992172241, "step": 1560 }, { "epoch": 0.37943607194944096, "grad_norm": 1.2268961883726326, "learning_rate": 1.895416081474825e-05, "loss": 0.6320134401321411, "step": 1561 }, { "epoch": 0.37967914438502676, "grad_norm": 1.3743001833326267, "learning_rate": 1.8952370515852992e-05, "loss": 0.7502520084381104, "step": 1562 }, { "epoch": 0.37992221682061256, "grad_norm": 1.2798005028172375, "learning_rate": 1.8950578770623427e-05, "loss": 0.7665501236915588, "step": 1563 }, { "epoch": 0.38016528925619836, "grad_norm": 1.4062556658843763, "learning_rate": 1.8948785579349034e-05, "loss": 0.6748603582382202, "step": 1564 }, { "epoch": 0.38040836169178416, "grad_norm": 1.4037695225544748, "learning_rate": 1.8946990942319518e-05, "loss": 0.7547556161880493, "step": 1565 }, { "epoch": 0.38065143412736996, "grad_norm": 1.1644353056940522, "learning_rate": 1.8945194859824818e-05, "loss": 0.6673097610473633, "step": 1566 }, { "epoch": 0.38089450656295576, "grad_norm": 1.0876592612313325, "learning_rate": 1.8943397332155108e-05, "loss": 0.5768203139305115, "step": 1567 }, { "epoch": 0.38113757899854156, "grad_norm": 1.5017263405634274, "learning_rate": 1.8941598359600787e-05, "loss": 0.6109243035316467, "step": 1568 }, { "epoch": 0.38138065143412736, "grad_norm": 1.522985359165199, "learning_rate": 1.8939797942452503e-05, "loss": 0.6068160533905029, "step": 1569 }, { "epoch": 0.38162372386971316, "grad_norm": 1.314571384949586, "learning_rate": 1.8937996081001127e-05, "loss": 0.6284438371658325, "step": 1570 }, { "epoch": 0.38186679630529896, "grad_norm": 1.3063297250460315, "learning_rate": 1.8936192775537764e-05, "loss": 0.8061387538909912, "step": 1571 }, { "epoch": 0.38210986874088476, "grad_norm": 1.1882103877560881, "learning_rate": 1.8934388026353756e-05, "loss": 0.758294939994812, "step": 1572 }, { "epoch": 0.38235294117647056, "grad_norm": 1.418945651363421, "learning_rate": 1.8932581833740676e-05, "loss": 0.6596380472183228, "step": 1573 }, { "epoch": 0.3825960136120564, "grad_norm": 1.357863007176774, "learning_rate": 1.893077419799033e-05, "loss": 0.6576957702636719, "step": 1574 }, { "epoch": 0.3828390860476422, "grad_norm": 1.304673092512403, "learning_rate": 1.8928965119394754e-05, "loss": 0.6882742643356323, "step": 1575 }, { "epoch": 0.383082158483228, "grad_norm": 1.385226401300088, "learning_rate": 1.8927154598246226e-05, "loss": 0.7289403676986694, "step": 1576 }, { "epoch": 0.3833252309188138, "grad_norm": 1.4066811958023162, "learning_rate": 1.892534263483725e-05, "loss": 0.7741618156433105, "step": 1577 }, { "epoch": 0.3835683033543996, "grad_norm": 1.2794026096764544, "learning_rate": 1.8923529229460563e-05, "loss": 0.6583641767501831, "step": 1578 }, { "epoch": 0.3838113757899854, "grad_norm": 1.1360754158769313, "learning_rate": 1.892171438240914e-05, "loss": 0.6292515993118286, "step": 1579 }, { "epoch": 0.3840544482255712, "grad_norm": 1.339433095807259, "learning_rate": 1.8919898093976177e-05, "loss": 0.7694394588470459, "step": 1580 }, { "epoch": 0.384297520661157, "grad_norm": 1.1779587198095196, "learning_rate": 1.8918080364455122e-05, "loss": 0.5844123363494873, "step": 1581 }, { "epoch": 0.3845405930967428, "grad_norm": 1.3420687369456348, "learning_rate": 1.8916261194139637e-05, "loss": 0.6190102696418762, "step": 1582 }, { "epoch": 0.3847836655323286, "grad_norm": 1.479365964848955, "learning_rate": 1.8914440583323634e-05, "loss": 0.6997513771057129, "step": 1583 }, { "epoch": 0.3850267379679144, "grad_norm": 1.2467203395891868, "learning_rate": 1.8912618532301242e-05, "loss": 0.6881051063537598, "step": 1584 }, { "epoch": 0.3852698104035002, "grad_norm": 1.4114297191494987, "learning_rate": 1.8910795041366827e-05, "loss": 0.8315212726593018, "step": 1585 }, { "epoch": 0.385512882839086, "grad_norm": 1.3286473868222894, "learning_rate": 1.8908970110814996e-05, "loss": 0.5767401456832886, "step": 1586 }, { "epoch": 0.3857559552746718, "grad_norm": 1.1867795535840784, "learning_rate": 1.890714374094058e-05, "loss": 0.6203277111053467, "step": 1587 }, { "epoch": 0.3859990277102577, "grad_norm": 1.3510278930094173, "learning_rate": 1.8905315932038647e-05, "loss": 0.6508141756057739, "step": 1588 }, { "epoch": 0.3862421001458435, "grad_norm": 1.4445568271862776, "learning_rate": 1.8903486684404496e-05, "loss": 0.7425417900085449, "step": 1589 }, { "epoch": 0.3864851725814293, "grad_norm": 1.3521755692343622, "learning_rate": 1.890165599833365e-05, "loss": 0.6390239000320435, "step": 1590 }, { "epoch": 0.3867282450170151, "grad_norm": 1.2012683495617937, "learning_rate": 1.8899823874121885e-05, "loss": 0.6042712926864624, "step": 1591 }, { "epoch": 0.3869713174526009, "grad_norm": 1.234146175800257, "learning_rate": 1.889799031206519e-05, "loss": 0.7147626876831055, "step": 1592 }, { "epoch": 0.3872143898881867, "grad_norm": 1.3533107377193914, "learning_rate": 1.889615531245979e-05, "loss": 0.5991209745407104, "step": 1593 }, { "epoch": 0.3874574623237725, "grad_norm": 1.2774169245496478, "learning_rate": 1.8894318875602148e-05, "loss": 0.5505607724189758, "step": 1594 }, { "epoch": 0.3877005347593583, "grad_norm": 1.5158421292135986, "learning_rate": 1.889248100178896e-05, "loss": 0.652824342250824, "step": 1595 }, { "epoch": 0.3879436071949441, "grad_norm": 1.2037828853844155, "learning_rate": 1.889064169131715e-05, "loss": 0.5546165704727173, "step": 1596 }, { "epoch": 0.3881866796305299, "grad_norm": 1.355390152793672, "learning_rate": 1.888880094448387e-05, "loss": 0.7655389904975891, "step": 1597 }, { "epoch": 0.3884297520661157, "grad_norm": 1.4212531014932488, "learning_rate": 1.8886958761586512e-05, "loss": 0.6246581077575684, "step": 1598 }, { "epoch": 0.3886728245017015, "grad_norm": 1.4840721852805308, "learning_rate": 1.88851151429227e-05, "loss": 0.7996087074279785, "step": 1599 }, { "epoch": 0.3889158969372873, "grad_norm": 1.3393055089292154, "learning_rate": 1.888327008879028e-05, "loss": 0.8242354989051819, "step": 1600 }, { "epoch": 0.38915896937287314, "grad_norm": 1.4187567727974169, "learning_rate": 1.888142359948734e-05, "loss": 0.6817584037780762, "step": 1601 }, { "epoch": 0.38940204180845894, "grad_norm": 1.5975799519442953, "learning_rate": 1.88795756753122e-05, "loss": 0.7136951088905334, "step": 1602 }, { "epoch": 0.38964511424404474, "grad_norm": 1.4333181723378674, "learning_rate": 1.8877726316563404e-05, "loss": 0.7801160216331482, "step": 1603 }, { "epoch": 0.38988818667963054, "grad_norm": 1.0888332548202053, "learning_rate": 1.8875875523539733e-05, "loss": 0.6369972229003906, "step": 1604 }, { "epoch": 0.39013125911521634, "grad_norm": 1.38677119203936, "learning_rate": 1.88740232965402e-05, "loss": 0.6709492206573486, "step": 1605 }, { "epoch": 0.39037433155080214, "grad_norm": 1.548200069201089, "learning_rate": 1.8872169635864046e-05, "loss": 0.5790488719940186, "step": 1606 }, { "epoch": 0.39061740398638795, "grad_norm": 1.1099895854117487, "learning_rate": 1.887031454181075e-05, "loss": 0.7414664030075073, "step": 1607 }, { "epoch": 0.39086047642197375, "grad_norm": 1.3062682432576107, "learning_rate": 1.8868458014680015e-05, "loss": 0.6435613632202148, "step": 1608 }, { "epoch": 0.39110354885755955, "grad_norm": 1.2125360838970793, "learning_rate": 1.8866600054771783e-05, "loss": 0.6498529314994812, "step": 1609 }, { "epoch": 0.39134662129314535, "grad_norm": 1.301370453325965, "learning_rate": 1.8864740662386224e-05, "loss": 0.7235862016677856, "step": 1610 }, { "epoch": 0.39158969372873115, "grad_norm": 1.1562531310195503, "learning_rate": 1.886287983782374e-05, "loss": 0.7913183569908142, "step": 1611 }, { "epoch": 0.39183276616431695, "grad_norm": 1.2366129991563428, "learning_rate": 1.8861017581384954e-05, "loss": 0.6701571941375732, "step": 1612 }, { "epoch": 0.39207583859990275, "grad_norm": 1.4702928313388306, "learning_rate": 1.8859153893370737e-05, "loss": 0.6352282762527466, "step": 1613 }, { "epoch": 0.39231891103548855, "grad_norm": 1.3322684600970913, "learning_rate": 1.8857288774082188e-05, "loss": 0.7277434468269348, "step": 1614 }, { "epoch": 0.3925619834710744, "grad_norm": 1.2202637501083013, "learning_rate": 1.885542222382063e-05, "loss": 0.6661701202392578, "step": 1615 }, { "epoch": 0.3928050559066602, "grad_norm": 1.346138171880016, "learning_rate": 1.885355424288762e-05, "loss": 0.5959542989730835, "step": 1616 }, { "epoch": 0.393048128342246, "grad_norm": 1.2578121059779186, "learning_rate": 1.885168483158494e-05, "loss": 0.6395928859710693, "step": 1617 }, { "epoch": 0.3932912007778318, "grad_norm": 1.4724160008887153, "learning_rate": 1.8849813990214623e-05, "loss": 0.7747945189476013, "step": 1618 }, { "epoch": 0.3935342732134176, "grad_norm": 1.1760654196611162, "learning_rate": 1.8847941719078912e-05, "loss": 0.740959644317627, "step": 1619 }, { "epoch": 0.3937773456490034, "grad_norm": 1.238617734725801, "learning_rate": 1.8846068018480292e-05, "loss": 0.8213573694229126, "step": 1620 }, { "epoch": 0.3940204180845892, "grad_norm": 1.3294241984542854, "learning_rate": 1.8844192888721473e-05, "loss": 0.7148739695549011, "step": 1621 }, { "epoch": 0.394263490520175, "grad_norm": 1.3554342043146335, "learning_rate": 1.88423163301054e-05, "loss": 0.6508815288543701, "step": 1622 }, { "epoch": 0.3945065629557608, "grad_norm": 1.2847719417039316, "learning_rate": 1.884043834293525e-05, "loss": 0.6488940715789795, "step": 1623 }, { "epoch": 0.3947496353913466, "grad_norm": 1.3452016293137532, "learning_rate": 1.8838558927514424e-05, "loss": 0.7051806449890137, "step": 1624 }, { "epoch": 0.3949927078269324, "grad_norm": 1.3247924946768852, "learning_rate": 1.883667808414656e-05, "loss": 0.8175389766693115, "step": 1625 }, { "epoch": 0.3952357802625182, "grad_norm": 1.2887012799423683, "learning_rate": 1.8834795813135523e-05, "loss": 0.7071004509925842, "step": 1626 }, { "epoch": 0.395478852698104, "grad_norm": 1.9147216305440184, "learning_rate": 1.883291211478542e-05, "loss": 0.7153759598731995, "step": 1627 }, { "epoch": 0.39572192513368987, "grad_norm": 1.2526604028767592, "learning_rate": 1.8831026989400562e-05, "loss": 0.6840586066246033, "step": 1628 }, { "epoch": 0.39596499756927567, "grad_norm": 1.1721199736962764, "learning_rate": 1.8829140437285525e-05, "loss": 0.5924205183982849, "step": 1629 }, { "epoch": 0.39620807000486147, "grad_norm": 1.267914936584069, "learning_rate": 1.8827252458745086e-05, "loss": 0.6841392517089844, "step": 1630 }, { "epoch": 0.39645114244044727, "grad_norm": 1.303631117825759, "learning_rate": 1.882536305408427e-05, "loss": 0.7208907604217529, "step": 1631 }, { "epoch": 0.39669421487603307, "grad_norm": 1.2753539952681516, "learning_rate": 1.8823472223608323e-05, "loss": 0.6297111511230469, "step": 1632 }, { "epoch": 0.39693728731161887, "grad_norm": 1.3283961850873407, "learning_rate": 1.8821579967622733e-05, "loss": 0.7237212657928467, "step": 1633 }, { "epoch": 0.39718035974720467, "grad_norm": 1.2905250404505175, "learning_rate": 1.8819686286433204e-05, "loss": 0.6745909452438354, "step": 1634 }, { "epoch": 0.39742343218279047, "grad_norm": 1.233718366392807, "learning_rate": 1.8817791180345674e-05, "loss": 0.7571200132369995, "step": 1635 }, { "epoch": 0.39766650461837627, "grad_norm": 1.4214868148436264, "learning_rate": 1.8815894649666323e-05, "loss": 0.7527279853820801, "step": 1636 }, { "epoch": 0.39790957705396207, "grad_norm": 1.2402969341233432, "learning_rate": 1.8813996694701548e-05, "loss": 0.7128415107727051, "step": 1637 }, { "epoch": 0.39815264948954787, "grad_norm": 1.2397994503367362, "learning_rate": 1.881209731575798e-05, "loss": 0.6955904960632324, "step": 1638 }, { "epoch": 0.3983957219251337, "grad_norm": 1.285296008291614, "learning_rate": 1.881019651314248e-05, "loss": 0.7172486782073975, "step": 1639 }, { "epoch": 0.3986387943607195, "grad_norm": 1.394707973129383, "learning_rate": 1.880829428716214e-05, "loss": 0.629897952079773, "step": 1640 }, { "epoch": 0.3988818667963053, "grad_norm": 1.3236662344364614, "learning_rate": 1.8806390638124284e-05, "loss": 0.6187134385108948, "step": 1641 }, { "epoch": 0.39912493923189113, "grad_norm": 1.3967743507352304, "learning_rate": 1.8804485566336456e-05, "loss": 0.6415038108825684, "step": 1642 }, { "epoch": 0.39936801166747693, "grad_norm": 1.3418433895330726, "learning_rate": 1.8802579072106445e-05, "loss": 0.808864951133728, "step": 1643 }, { "epoch": 0.39961108410306273, "grad_norm": 1.5995403346687154, "learning_rate": 1.8800671155742256e-05, "loss": 0.828477144241333, "step": 1644 }, { "epoch": 0.39985415653864853, "grad_norm": 1.1911468551305453, "learning_rate": 1.8798761817552135e-05, "loss": 0.666496992111206, "step": 1645 }, { "epoch": 0.40009722897423433, "grad_norm": 1.1468948079949144, "learning_rate": 1.879685105784455e-05, "loss": 0.6489532589912415, "step": 1646 }, { "epoch": 0.40034030140982013, "grad_norm": 1.2918475397077962, "learning_rate": 1.87949388769282e-05, "loss": 0.5374866724014282, "step": 1647 }, { "epoch": 0.40058337384540593, "grad_norm": 1.85633437017578, "learning_rate": 1.8793025275112015e-05, "loss": 0.7204412221908569, "step": 1648 }, { "epoch": 0.40082644628099173, "grad_norm": 1.31852212090768, "learning_rate": 1.8791110252705152e-05, "loss": 0.7240411043167114, "step": 1649 }, { "epoch": 0.40106951871657753, "grad_norm": 1.3132167180458814, "learning_rate": 1.8789193810017008e-05, "loss": 0.6461485624313354, "step": 1650 }, { "epoch": 0.40131259115216333, "grad_norm": 1.445080417428181, "learning_rate": 1.878727594735719e-05, "loss": 0.6811811327934265, "step": 1651 }, { "epoch": 0.40155566358774913, "grad_norm": 1.1805600764302902, "learning_rate": 1.8785356665035556e-05, "loss": 0.6833555102348328, "step": 1652 }, { "epoch": 0.40179873602333493, "grad_norm": 1.3290033804628547, "learning_rate": 1.8783435963362178e-05, "loss": 0.5717357397079468, "step": 1653 }, { "epoch": 0.40204180845892074, "grad_norm": 1.178677464445936, "learning_rate": 1.878151384264736e-05, "loss": 0.5903016328811646, "step": 1654 }, { "epoch": 0.40228488089450654, "grad_norm": 1.2649398057619736, "learning_rate": 1.877959030320164e-05, "loss": 0.7832697629928589, "step": 1655 }, { "epoch": 0.4025279533300924, "grad_norm": 1.4611027412289916, "learning_rate": 1.8777665345335786e-05, "loss": 0.8351551294326782, "step": 1656 }, { "epoch": 0.4027710257656782, "grad_norm": 1.3295841349801616, "learning_rate": 1.877573896936079e-05, "loss": 0.6658279895782471, "step": 1657 }, { "epoch": 0.403014098201264, "grad_norm": 1.2617705768226417, "learning_rate": 1.8773811175587866e-05, "loss": 0.8216201663017273, "step": 1658 }, { "epoch": 0.4032571706368498, "grad_norm": 1.3626679450126828, "learning_rate": 1.8771881964328478e-05, "loss": 0.6120314002037048, "step": 1659 }, { "epoch": 0.4035002430724356, "grad_norm": 1.3801234345436173, "learning_rate": 1.8769951335894304e-05, "loss": 0.6707159280776978, "step": 1660 }, { "epoch": 0.4037433155080214, "grad_norm": 1.2806986382712446, "learning_rate": 1.8768019290597254e-05, "loss": 0.7029656171798706, "step": 1661 }, { "epoch": 0.4039863879436072, "grad_norm": 1.4597064647271014, "learning_rate": 1.8766085828749462e-05, "loss": 0.6535124778747559, "step": 1662 }, { "epoch": 0.404229460379193, "grad_norm": 1.3949136913088305, "learning_rate": 1.8764150950663304e-05, "loss": 0.7188602685928345, "step": 1663 }, { "epoch": 0.4044725328147788, "grad_norm": 1.3490399864744664, "learning_rate": 1.8762214656651365e-05, "loss": 0.5911474823951721, "step": 1664 }, { "epoch": 0.4047156052503646, "grad_norm": 1.1993556610661342, "learning_rate": 1.8760276947026483e-05, "loss": 0.6236312389373779, "step": 1665 }, { "epoch": 0.4049586776859504, "grad_norm": 1.2702786057253765, "learning_rate": 1.8758337822101706e-05, "loss": 0.6580731868743896, "step": 1666 }, { "epoch": 0.4052017501215362, "grad_norm": 1.275211035890556, "learning_rate": 1.8756397282190318e-05, "loss": 0.5718228816986084, "step": 1667 }, { "epoch": 0.405444822557122, "grad_norm": 1.2649548023670767, "learning_rate": 1.8754455327605827e-05, "loss": 0.672907829284668, "step": 1668 }, { "epoch": 0.40568789499270785, "grad_norm": 1.3442424248704656, "learning_rate": 1.8752511958661977e-05, "loss": 0.7931507229804993, "step": 1669 }, { "epoch": 0.40593096742829365, "grad_norm": 1.1862280222914505, "learning_rate": 1.8750567175672735e-05, "loss": 0.7038679122924805, "step": 1670 }, { "epoch": 0.40617403986387945, "grad_norm": 1.3899797592297118, "learning_rate": 1.8748620978952298e-05, "loss": 0.6592289209365845, "step": 1671 }, { "epoch": 0.40641711229946526, "grad_norm": 1.3585724626578537, "learning_rate": 1.874667336881509e-05, "loss": 0.6307651996612549, "step": 1672 }, { "epoch": 0.40666018473505106, "grad_norm": 1.5768228664964068, "learning_rate": 1.8744724345575764e-05, "loss": 0.7091399431228638, "step": 1673 }, { "epoch": 0.40690325717063686, "grad_norm": 1.8493324249564531, "learning_rate": 1.8742773909549206e-05, "loss": 0.6029103994369507, "step": 1674 }, { "epoch": 0.40714632960622266, "grad_norm": 1.3810981071681039, "learning_rate": 1.8740822061050522e-05, "loss": 0.6625827550888062, "step": 1675 }, { "epoch": 0.40738940204180846, "grad_norm": 1.3598756295088323, "learning_rate": 1.8738868800395053e-05, "loss": 0.7558481693267822, "step": 1676 }, { "epoch": 0.40763247447739426, "grad_norm": 1.5343444611694264, "learning_rate": 1.873691412789836e-05, "loss": 0.7254225015640259, "step": 1677 }, { "epoch": 0.40787554691298006, "grad_norm": 1.516482769668994, "learning_rate": 1.8734958043876247e-05, "loss": 0.8636709451675415, "step": 1678 }, { "epoch": 0.40811861934856586, "grad_norm": 1.358278450682691, "learning_rate": 1.873300054864473e-05, "loss": 0.9230217933654785, "step": 1679 }, { "epoch": 0.40836169178415166, "grad_norm": 1.4082991599105623, "learning_rate": 1.873104164252006e-05, "loss": 0.6415776014328003, "step": 1680 }, { "epoch": 0.40860476421973746, "grad_norm": 1.242886520281229, "learning_rate": 1.8729081325818716e-05, "loss": 0.6771867275238037, "step": 1681 }, { "epoch": 0.40884783665532326, "grad_norm": 1.3299451329362848, "learning_rate": 1.8727119598857407e-05, "loss": 0.8993208408355713, "step": 1682 }, { "epoch": 0.4090909090909091, "grad_norm": 1.3221330868354058, "learning_rate": 1.8725156461953065e-05, "loss": 0.5849524140357971, "step": 1683 }, { "epoch": 0.4093339815264949, "grad_norm": 1.3470178198228653, "learning_rate": 1.872319191542285e-05, "loss": 0.721538782119751, "step": 1684 }, { "epoch": 0.4095770539620807, "grad_norm": 1.4871577005216192, "learning_rate": 1.8721225959584156e-05, "loss": 0.7916505932807922, "step": 1685 }, { "epoch": 0.4098201263976665, "grad_norm": 1.337373334561388, "learning_rate": 1.87192585947546e-05, "loss": 0.5196511149406433, "step": 1686 }, { "epoch": 0.4100631988332523, "grad_norm": 1.5266391764534655, "learning_rate": 1.8717289821252023e-05, "loss": 0.8273380994796753, "step": 1687 }, { "epoch": 0.4103062712688381, "grad_norm": 1.2255347671336856, "learning_rate": 1.87153196393945e-05, "loss": 0.7085404396057129, "step": 1688 }, { "epoch": 0.4105493437044239, "grad_norm": 1.2831878744134133, "learning_rate": 1.871334804950033e-05, "loss": 0.6945085525512695, "step": 1689 }, { "epoch": 0.4107924161400097, "grad_norm": 1.2387489569661194, "learning_rate": 1.871137505188805e-05, "loss": 0.6103079319000244, "step": 1690 }, { "epoch": 0.4110354885755955, "grad_norm": 1.612379529767458, "learning_rate": 1.87094006468764e-05, "loss": 0.7369747161865234, "step": 1691 }, { "epoch": 0.4112785610111813, "grad_norm": 1.0874763803143503, "learning_rate": 1.8707424834784372e-05, "loss": 0.6075820922851562, "step": 1692 }, { "epoch": 0.4115216334467671, "grad_norm": 1.5057433857238527, "learning_rate": 1.8705447615931172e-05, "loss": 0.6570993661880493, "step": 1693 }, { "epoch": 0.4117647058823529, "grad_norm": 1.1135629705747687, "learning_rate": 1.870346899063624e-05, "loss": 0.6673749089241028, "step": 1694 }, { "epoch": 0.4120077783179387, "grad_norm": 1.2737081027729529, "learning_rate": 1.8701488959219243e-05, "loss": 0.6269890666007996, "step": 1695 }, { "epoch": 0.4122508507535245, "grad_norm": 1.2434563327205888, "learning_rate": 1.8699507522000068e-05, "loss": 0.7597387433052063, "step": 1696 }, { "epoch": 0.4124939231891104, "grad_norm": 1.3319132998964023, "learning_rate": 1.8697524679298832e-05, "loss": 0.6546935439109802, "step": 1697 }, { "epoch": 0.4127369956246962, "grad_norm": 1.3442187689751077, "learning_rate": 1.8695540431435883e-05, "loss": 0.6884953379631042, "step": 1698 }, { "epoch": 0.412980068060282, "grad_norm": 1.3095302460854934, "learning_rate": 1.8693554778731793e-05, "loss": 0.601271390914917, "step": 1699 }, { "epoch": 0.4132231404958678, "grad_norm": 1.517395440013041, "learning_rate": 1.8691567721507363e-05, "loss": 0.8655628561973572, "step": 1700 }, { "epoch": 0.4134662129314536, "grad_norm": 1.5007232004015243, "learning_rate": 1.8689579260083622e-05, "loss": 0.6479473114013672, "step": 1701 }, { "epoch": 0.4137092853670394, "grad_norm": 1.4130123136830464, "learning_rate": 1.8687589394781816e-05, "loss": 0.7770971059799194, "step": 1702 }, { "epoch": 0.4139523578026252, "grad_norm": 1.347894462157954, "learning_rate": 1.8685598125923432e-05, "loss": 0.6797027587890625, "step": 1703 }, { "epoch": 0.414195430238211, "grad_norm": 1.1785990737866336, "learning_rate": 1.8683605453830176e-05, "loss": 0.5638515949249268, "step": 1704 }, { "epoch": 0.4144385026737968, "grad_norm": 1.4463974380957505, "learning_rate": 1.8681611378823977e-05, "loss": 0.7704883813858032, "step": 1705 }, { "epoch": 0.4146815751093826, "grad_norm": 1.4937603587356834, "learning_rate": 1.8679615901227004e-05, "loss": 0.7458217740058899, "step": 1706 }, { "epoch": 0.4149246475449684, "grad_norm": 1.1759998827554738, "learning_rate": 1.8677619021361636e-05, "loss": 0.6845710873603821, "step": 1707 }, { "epoch": 0.4151677199805542, "grad_norm": 1.0956827268051699, "learning_rate": 1.867562073955049e-05, "loss": 0.7292819023132324, "step": 1708 }, { "epoch": 0.41541079241614, "grad_norm": 1.6232792638260771, "learning_rate": 1.8673621056116405e-05, "loss": 0.652341365814209, "step": 1709 }, { "epoch": 0.41565386485172584, "grad_norm": 1.4676301178521423, "learning_rate": 1.8671619971382447e-05, "loss": 0.6864720582962036, "step": 1710 }, { "epoch": 0.41589693728731164, "grad_norm": 1.722090793784531, "learning_rate": 1.8669617485671912e-05, "loss": 0.696182131767273, "step": 1711 }, { "epoch": 0.41614000972289744, "grad_norm": 1.275355553666848, "learning_rate": 1.866761359930832e-05, "loss": 0.7000823020935059, "step": 1712 }, { "epoch": 0.41638308215848324, "grad_norm": 1.5044615035192475, "learning_rate": 1.866560831261541e-05, "loss": 0.5850309133529663, "step": 1713 }, { "epoch": 0.41662615459406904, "grad_norm": 1.357396844983364, "learning_rate": 1.8663601625917153e-05, "loss": 0.7549617886543274, "step": 1714 }, { "epoch": 0.41686922702965484, "grad_norm": 1.1093550332169484, "learning_rate": 1.8661593539537753e-05, "loss": 0.5510966181755066, "step": 1715 }, { "epoch": 0.41711229946524064, "grad_norm": 1.3346932655039678, "learning_rate": 1.865958405380164e-05, "loss": 0.7287638187408447, "step": 1716 }, { "epoch": 0.41735537190082644, "grad_norm": 1.482424650710343, "learning_rate": 1.865757316903345e-05, "loss": 0.6593359112739563, "step": 1717 }, { "epoch": 0.41759844433641224, "grad_norm": 1.3751335791941148, "learning_rate": 1.8655560885558064e-05, "loss": 0.7917884588241577, "step": 1718 }, { "epoch": 0.41784151677199804, "grad_norm": 1.5169815656249892, "learning_rate": 1.8653547203700587e-05, "loss": 0.7484254240989685, "step": 1719 }, { "epoch": 0.41808458920758385, "grad_norm": 1.4584603931049556, "learning_rate": 1.8651532123786347e-05, "loss": 0.653382420539856, "step": 1720 }, { "epoch": 0.41832766164316965, "grad_norm": 1.2267308830656947, "learning_rate": 1.8649515646140896e-05, "loss": 0.584089457988739, "step": 1721 }, { "epoch": 0.41857073407875545, "grad_norm": 1.3656252800387558, "learning_rate": 1.8647497771090012e-05, "loss": 0.8105655908584595, "step": 1722 }, { "epoch": 0.41881380651434125, "grad_norm": 1.9889420698629006, "learning_rate": 1.8645478498959705e-05, "loss": 0.9724801182746887, "step": 1723 }, { "epoch": 0.4190568789499271, "grad_norm": 1.1897231797255812, "learning_rate": 1.8643457830076202e-05, "loss": 0.8427716493606567, "step": 1724 }, { "epoch": 0.4192999513855129, "grad_norm": 1.8414705920927334, "learning_rate": 1.8641435764765966e-05, "loss": 0.6717017889022827, "step": 1725 }, { "epoch": 0.4195430238210987, "grad_norm": 1.3091192604492872, "learning_rate": 1.8639412303355674e-05, "loss": 0.6695753931999207, "step": 1726 }, { "epoch": 0.4197860962566845, "grad_norm": 1.2416718434440528, "learning_rate": 1.8637387446172233e-05, "loss": 0.6133882999420166, "step": 1727 }, { "epoch": 0.4200291686922703, "grad_norm": 1.2671135465650014, "learning_rate": 1.8635361193542778e-05, "loss": 0.6571630239486694, "step": 1728 }, { "epoch": 0.4202722411278561, "grad_norm": 1.3573387283019966, "learning_rate": 1.8633333545794672e-05, "loss": 0.641566276550293, "step": 1729 }, { "epoch": 0.4205153135634419, "grad_norm": 1.3576749497619298, "learning_rate": 1.8631304503255495e-05, "loss": 0.7216918468475342, "step": 1730 }, { "epoch": 0.4207583859990277, "grad_norm": 1.2434132101330297, "learning_rate": 1.8629274066253057e-05, "loss": 0.750120222568512, "step": 1731 }, { "epoch": 0.4210014584346135, "grad_norm": 1.3377154898249455, "learning_rate": 1.8627242235115392e-05, "loss": 0.8462324142456055, "step": 1732 }, { "epoch": 0.4212445308701993, "grad_norm": 1.5069869862854157, "learning_rate": 1.8625209010170766e-05, "loss": 0.7679934501647949, "step": 1733 }, { "epoch": 0.4214876033057851, "grad_norm": 1.359938266956452, "learning_rate": 1.8623174391747656e-05, "loss": 0.769467830657959, "step": 1734 }, { "epoch": 0.4217306757413709, "grad_norm": 1.1790166438840934, "learning_rate": 1.8621138380174778e-05, "loss": 0.6604779958724976, "step": 1735 }, { "epoch": 0.4219737481769567, "grad_norm": 1.1730003529233899, "learning_rate": 1.8619100975781067e-05, "loss": 0.770888090133667, "step": 1736 }, { "epoch": 0.42221682061254256, "grad_norm": 1.3817681880791106, "learning_rate": 1.8617062178895684e-05, "loss": 0.6268760561943054, "step": 1737 }, { "epoch": 0.42245989304812837, "grad_norm": 1.266391577058236, "learning_rate": 1.8615021989848016e-05, "loss": 0.5864993929862976, "step": 1738 }, { "epoch": 0.42270296548371417, "grad_norm": 1.1240563026741064, "learning_rate": 1.8612980408967668e-05, "loss": 0.6210548281669617, "step": 1739 }, { "epoch": 0.42294603791929997, "grad_norm": 1.3317581071478808, "learning_rate": 1.8610937436584477e-05, "loss": 0.7054125666618347, "step": 1740 }, { "epoch": 0.42318911035488577, "grad_norm": 1.2806972778096768, "learning_rate": 1.8608893073028506e-05, "loss": 0.6000015139579773, "step": 1741 }, { "epoch": 0.42343218279047157, "grad_norm": 1.1818257296144963, "learning_rate": 1.8606847318630042e-05, "loss": 0.6011673212051392, "step": 1742 }, { "epoch": 0.42367525522605737, "grad_norm": 1.4117933947797345, "learning_rate": 1.8604800173719585e-05, "loss": 0.6365809440612793, "step": 1743 }, { "epoch": 0.42391832766164317, "grad_norm": 1.434081170195468, "learning_rate": 1.860275163862788e-05, "loss": 0.6154351830482483, "step": 1744 }, { "epoch": 0.42416140009722897, "grad_norm": 1.353067669611154, "learning_rate": 1.860070171368588e-05, "loss": 0.7834395170211792, "step": 1745 }, { "epoch": 0.42440447253281477, "grad_norm": 1.6185339576051565, "learning_rate": 1.8598650399224773e-05, "loss": 0.6788878440856934, "step": 1746 }, { "epoch": 0.42464754496840057, "grad_norm": 1.4479589009768474, "learning_rate": 1.859659769557596e-05, "loss": 0.7276086211204529, "step": 1747 }, { "epoch": 0.42489061740398637, "grad_norm": 1.409088395160234, "learning_rate": 1.859454360307108e-05, "loss": 0.6880705952644348, "step": 1748 }, { "epoch": 0.42513368983957217, "grad_norm": 1.572240223473282, "learning_rate": 1.8592488122041987e-05, "loss": 0.7385244965553284, "step": 1749 }, { "epoch": 0.42537676227515797, "grad_norm": 1.3780926295242544, "learning_rate": 1.8590431252820763e-05, "loss": 0.6709411144256592, "step": 1750 }, { "epoch": 0.4256198347107438, "grad_norm": 1.4617972321826227, "learning_rate": 1.8588372995739708e-05, "loss": 0.9633876085281372, "step": 1751 }, { "epoch": 0.42586290714632963, "grad_norm": 1.588419139016173, "learning_rate": 1.858631335113136e-05, "loss": 0.7127997875213623, "step": 1752 }, { "epoch": 0.42610597958191543, "grad_norm": 1.3004490913712874, "learning_rate": 1.8584252319328463e-05, "loss": 0.6478302478790283, "step": 1753 }, { "epoch": 0.42634905201750123, "grad_norm": 1.2616698987783057, "learning_rate": 1.8582189900664003e-05, "loss": 0.6426427364349365, "step": 1754 }, { "epoch": 0.42659212445308703, "grad_norm": 1.3422911440406475, "learning_rate": 1.8580126095471184e-05, "loss": 0.7243432998657227, "step": 1755 }, { "epoch": 0.42683519688867283, "grad_norm": 1.4193557616173216, "learning_rate": 1.8578060904083423e-05, "loss": 0.7005342841148376, "step": 1756 }, { "epoch": 0.42707826932425863, "grad_norm": 1.3690176176719937, "learning_rate": 1.8575994326834378e-05, "loss": 0.5599045753479004, "step": 1757 }, { "epoch": 0.42732134175984443, "grad_norm": 1.4450756308165866, "learning_rate": 1.857392636405791e-05, "loss": 0.7861731648445129, "step": 1758 }, { "epoch": 0.42756441419543023, "grad_norm": 1.2557502493037527, "learning_rate": 1.8571857016088134e-05, "loss": 0.6609847545623779, "step": 1759 }, { "epoch": 0.42780748663101603, "grad_norm": 1.3178380806149093, "learning_rate": 1.8569786283259362e-05, "loss": 0.7222149968147278, "step": 1760 }, { "epoch": 0.42805055906660183, "grad_norm": 1.1693415650696382, "learning_rate": 1.8567714165906138e-05, "loss": 0.7081785202026367, "step": 1761 }, { "epoch": 0.42829363150218763, "grad_norm": 1.177277232101964, "learning_rate": 1.8565640664363235e-05, "loss": 0.7651737928390503, "step": 1762 }, { "epoch": 0.42853670393777343, "grad_norm": 1.313622516948698, "learning_rate": 1.8563565778965638e-05, "loss": 0.6585830450057983, "step": 1763 }, { "epoch": 0.42877977637335923, "grad_norm": 1.4402363165667942, "learning_rate": 1.8561489510048572e-05, "loss": 0.8085141777992249, "step": 1764 }, { "epoch": 0.4290228488089451, "grad_norm": 1.2539245645627466, "learning_rate": 1.8559411857947472e-05, "loss": 0.6308262348175049, "step": 1765 }, { "epoch": 0.4292659212445309, "grad_norm": 1.1998103351729286, "learning_rate": 1.8557332822998e-05, "loss": 0.5969010591506958, "step": 1766 }, { "epoch": 0.4295089936801167, "grad_norm": 2.53648194208898, "learning_rate": 1.8555252405536048e-05, "loss": 0.8128308057785034, "step": 1767 }, { "epoch": 0.4297520661157025, "grad_norm": 1.1553002223586113, "learning_rate": 1.8553170605897722e-05, "loss": 0.6097874641418457, "step": 1768 }, { "epoch": 0.4299951385512883, "grad_norm": 1.569152249547088, "learning_rate": 1.855108742441935e-05, "loss": 0.5943145751953125, "step": 1769 }, { "epoch": 0.4302382109868741, "grad_norm": 1.255079091249289, "learning_rate": 1.85490028614375e-05, "loss": 0.7562021017074585, "step": 1770 }, { "epoch": 0.4304812834224599, "grad_norm": 1.5139697551449405, "learning_rate": 1.854691691728894e-05, "loss": 0.6728061437606812, "step": 1771 }, { "epoch": 0.4307243558580457, "grad_norm": 1.27764135137225, "learning_rate": 1.854482959231068e-05, "loss": 0.7972357273101807, "step": 1772 }, { "epoch": 0.4309674282936315, "grad_norm": 1.4544961680818091, "learning_rate": 1.8542740886839942e-05, "loss": 0.7786002159118652, "step": 1773 }, { "epoch": 0.4312105007292173, "grad_norm": 1.393225972224507, "learning_rate": 1.8540650801214174e-05, "loss": 0.6248173117637634, "step": 1774 }, { "epoch": 0.4314535731648031, "grad_norm": 1.4632426228819444, "learning_rate": 1.8538559335771053e-05, "loss": 0.7003263235092163, "step": 1775 }, { "epoch": 0.4316966456003889, "grad_norm": 1.122454179342716, "learning_rate": 1.853646649084847e-05, "loss": 0.8159335255622864, "step": 1776 }, { "epoch": 0.4319397180359747, "grad_norm": 1.3253350366563412, "learning_rate": 1.8534372266784543e-05, "loss": 0.6225530505180359, "step": 1777 }, { "epoch": 0.43218279047156055, "grad_norm": 1.1909540774178293, "learning_rate": 1.853227666391761e-05, "loss": 0.5519946217536926, "step": 1778 }, { "epoch": 0.43242586290714635, "grad_norm": 1.565136983011389, "learning_rate": 1.8530179682586237e-05, "loss": 0.7334610223770142, "step": 1779 }, { "epoch": 0.43266893534273215, "grad_norm": 1.3513839074336387, "learning_rate": 1.8528081323129213e-05, "loss": 0.6658732295036316, "step": 1780 }, { "epoch": 0.43291200777831795, "grad_norm": 1.3881689301823985, "learning_rate": 1.8525981585885537e-05, "loss": 0.7559835910797119, "step": 1781 }, { "epoch": 0.43315508021390375, "grad_norm": 1.1077495407073257, "learning_rate": 1.852388047119445e-05, "loss": 0.7635390162467957, "step": 1782 }, { "epoch": 0.43339815264948955, "grad_norm": 1.1330879836713712, "learning_rate": 1.8521777979395398e-05, "loss": 0.6406751871109009, "step": 1783 }, { "epoch": 0.43364122508507535, "grad_norm": 1.37267484390866, "learning_rate": 1.8519674110828065e-05, "loss": 0.8009914755821228, "step": 1784 }, { "epoch": 0.43388429752066116, "grad_norm": 1.244880313239792, "learning_rate": 1.8517568865832338e-05, "loss": 0.7687413692474365, "step": 1785 }, { "epoch": 0.43412736995624696, "grad_norm": 1.1592971603601279, "learning_rate": 1.851546224474835e-05, "loss": 0.7734946012496948, "step": 1786 }, { "epoch": 0.43437044239183276, "grad_norm": 1.1247979898345277, "learning_rate": 1.8513354247916436e-05, "loss": 0.7218956351280212, "step": 1787 }, { "epoch": 0.43461351482741856, "grad_norm": 1.641988390418595, "learning_rate": 1.8511244875677166e-05, "loss": 0.6542111039161682, "step": 1788 }, { "epoch": 0.43485658726300436, "grad_norm": 1.2408641131277276, "learning_rate": 1.8509134128371325e-05, "loss": 0.7899558544158936, "step": 1789 }, { "epoch": 0.43509965969859016, "grad_norm": 1.3904529282585978, "learning_rate": 1.8507022006339924e-05, "loss": 0.5551470518112183, "step": 1790 }, { "epoch": 0.43534273213417596, "grad_norm": 1.3691356928573515, "learning_rate": 1.8504908509924197e-05, "loss": 0.8577858209609985, "step": 1791 }, { "epoch": 0.4355858045697618, "grad_norm": 1.3212289440261276, "learning_rate": 1.850279363946559e-05, "loss": 0.7467259168624878, "step": 1792 }, { "epoch": 0.4358288770053476, "grad_norm": 1.4090142071934417, "learning_rate": 1.8500677395305788e-05, "loss": 0.8178038597106934, "step": 1793 }, { "epoch": 0.4360719494409334, "grad_norm": 1.5594019349699435, "learning_rate": 1.8498559777786686e-05, "loss": 0.7155634164810181, "step": 1794 }, { "epoch": 0.4363150218765192, "grad_norm": 1.1911721927031542, "learning_rate": 1.8496440787250406e-05, "loss": 0.536474347114563, "step": 1795 }, { "epoch": 0.436558094312105, "grad_norm": 1.4021055342556372, "learning_rate": 1.8494320424039284e-05, "loss": 0.6644238233566284, "step": 1796 }, { "epoch": 0.4368011667476908, "grad_norm": 1.1060117734681267, "learning_rate": 1.8492198688495884e-05, "loss": 0.5808477401733398, "step": 1797 }, { "epoch": 0.4370442391832766, "grad_norm": 4.895688574636102, "learning_rate": 1.8490075580962994e-05, "loss": 0.6417444944381714, "step": 1798 }, { "epoch": 0.4372873116188624, "grad_norm": 1.3336343038038183, "learning_rate": 1.8487951101783622e-05, "loss": 0.5710353851318359, "step": 1799 }, { "epoch": 0.4375303840544482, "grad_norm": 1.1358947969418978, "learning_rate": 1.8485825251300994e-05, "loss": 0.6941722631454468, "step": 1800 }, { "epoch": 0.437773456490034, "grad_norm": 1.2552042950598064, "learning_rate": 1.848369802985856e-05, "loss": 0.816792905330658, "step": 1801 }, { "epoch": 0.4380165289256198, "grad_norm": 0.9895309376255292, "learning_rate": 1.848156943779999e-05, "loss": 0.6694289445877075, "step": 1802 }, { "epoch": 0.4382596013612056, "grad_norm": 1.442185769648169, "learning_rate": 1.847943947546918e-05, "loss": 0.5636759400367737, "step": 1803 }, { "epoch": 0.4385026737967914, "grad_norm": 1.3930895178828993, "learning_rate": 1.8477308143210244e-05, "loss": 0.8085829019546509, "step": 1804 }, { "epoch": 0.4387457462323773, "grad_norm": 1.3367546326696538, "learning_rate": 1.8475175441367514e-05, "loss": 0.7422942519187927, "step": 1805 }, { "epoch": 0.4389888186679631, "grad_norm": 1.4515904311284638, "learning_rate": 1.8473041370285548e-05, "loss": 0.763198971748352, "step": 1806 }, { "epoch": 0.4392318911035489, "grad_norm": 1.4749936210600452, "learning_rate": 1.8470905930309127e-05, "loss": 0.6188011169433594, "step": 1807 }, { "epoch": 0.4394749635391347, "grad_norm": 1.2374798544364023, "learning_rate": 1.8468769121783246e-05, "loss": 0.7238942384719849, "step": 1808 }, { "epoch": 0.4397180359747205, "grad_norm": 1.4122634475137668, "learning_rate": 1.8466630945053127e-05, "loss": 0.7059062719345093, "step": 1809 }, { "epoch": 0.4399611084103063, "grad_norm": 1.22668903748182, "learning_rate": 1.8464491400464212e-05, "loss": 0.879237949848175, "step": 1810 }, { "epoch": 0.4402041808458921, "grad_norm": 1.3646390828841475, "learning_rate": 1.8462350488362163e-05, "loss": 0.7083419561386108, "step": 1811 }, { "epoch": 0.4404472532814779, "grad_norm": 1.7968882949978058, "learning_rate": 1.8460208209092863e-05, "loss": 0.6421512365341187, "step": 1812 }, { "epoch": 0.4406903257170637, "grad_norm": 1.1970559214681329, "learning_rate": 1.8458064563002417e-05, "loss": 0.5784375667572021, "step": 1813 }, { "epoch": 0.4409333981526495, "grad_norm": 1.3366453547222439, "learning_rate": 1.8455919550437147e-05, "loss": 0.6079528331756592, "step": 1814 }, { "epoch": 0.4411764705882353, "grad_norm": 1.2022831121167918, "learning_rate": 1.84537731717436e-05, "loss": 0.7338815927505493, "step": 1815 }, { "epoch": 0.4414195430238211, "grad_norm": 1.538231438923874, "learning_rate": 1.8451625427268545e-05, "loss": 0.7378086447715759, "step": 1816 }, { "epoch": 0.4416626154594069, "grad_norm": 1.1654690585276437, "learning_rate": 1.8449476317358964e-05, "loss": 0.5728856921195984, "step": 1817 }, { "epoch": 0.4419056878949927, "grad_norm": 1.404528619091447, "learning_rate": 1.8447325842362073e-05, "loss": 0.6139215230941772, "step": 1818 }, { "epoch": 0.44214876033057854, "grad_norm": 1.4425562236579241, "learning_rate": 1.8445174002625287e-05, "loss": 0.7781782746315002, "step": 1819 }, { "epoch": 0.44239183276616434, "grad_norm": 1.3151332836768805, "learning_rate": 1.844302079849627e-05, "loss": 0.6524670124053955, "step": 1820 }, { "epoch": 0.44263490520175014, "grad_norm": 1.251581405615055, "learning_rate": 1.8440866230322877e-05, "loss": 0.6251903772354126, "step": 1821 }, { "epoch": 0.44287797763733594, "grad_norm": 1.2173466387528544, "learning_rate": 1.843871029845321e-05, "loss": 0.6527047753334045, "step": 1822 }, { "epoch": 0.44312105007292174, "grad_norm": 1.3807572639378083, "learning_rate": 1.8436553003235568e-05, "loss": 0.6940528154373169, "step": 1823 }, { "epoch": 0.44336412250850754, "grad_norm": 1.3610781226576196, "learning_rate": 1.843439434501849e-05, "loss": 0.6290214657783508, "step": 1824 }, { "epoch": 0.44360719494409334, "grad_norm": 1.1944691954650426, "learning_rate": 1.8432234324150716e-05, "loss": 0.6147068738937378, "step": 1825 }, { "epoch": 0.44385026737967914, "grad_norm": 1.612972850053772, "learning_rate": 1.843007294098123e-05, "loss": 0.7515444755554199, "step": 1826 }, { "epoch": 0.44409333981526494, "grad_norm": 1.2970195110705343, "learning_rate": 1.842791019585921e-05, "loss": 0.6028544902801514, "step": 1827 }, { "epoch": 0.44433641225085074, "grad_norm": 1.3982628796891543, "learning_rate": 1.8425746089134075e-05, "loss": 0.6710168123245239, "step": 1828 }, { "epoch": 0.44457948468643654, "grad_norm": 1.3940094982239926, "learning_rate": 1.8423580621155447e-05, "loss": 0.6782693266868591, "step": 1829 }, { "epoch": 0.44482255712202234, "grad_norm": 1.207778719165753, "learning_rate": 1.8421413792273184e-05, "loss": 0.5028232932090759, "step": 1830 }, { "epoch": 0.44506562955760814, "grad_norm": 1.3077828581975697, "learning_rate": 1.841924560283735e-05, "loss": 0.590801477432251, "step": 1831 }, { "epoch": 0.44530870199319394, "grad_norm": 1.5213240012687985, "learning_rate": 1.8417076053198248e-05, "loss": 0.693977952003479, "step": 1832 }, { "epoch": 0.4455517744287798, "grad_norm": 1.4301919118760318, "learning_rate": 1.841490514370637e-05, "loss": 0.7143231630325317, "step": 1833 }, { "epoch": 0.4457948468643656, "grad_norm": 1.5781573157784425, "learning_rate": 1.8412732874712458e-05, "loss": 0.738835334777832, "step": 1834 }, { "epoch": 0.4460379192999514, "grad_norm": 1.500419719268955, "learning_rate": 1.8410559246567453e-05, "loss": 0.7328401803970337, "step": 1835 }, { "epoch": 0.4462809917355372, "grad_norm": 1.3927247205160804, "learning_rate": 1.8408384259622535e-05, "loss": 0.7893892526626587, "step": 1836 }, { "epoch": 0.446524064171123, "grad_norm": 1.3639235142726698, "learning_rate": 1.8406207914229083e-05, "loss": 0.5805754065513611, "step": 1837 }, { "epoch": 0.4467671366067088, "grad_norm": 1.3000579852021668, "learning_rate": 1.8404030210738705e-05, "loss": 0.6393237113952637, "step": 1838 }, { "epoch": 0.4470102090422946, "grad_norm": 1.2534140243801442, "learning_rate": 1.8401851149503233e-05, "loss": 0.8025974035263062, "step": 1839 }, { "epoch": 0.4472532814778804, "grad_norm": 1.4241061425742223, "learning_rate": 1.8399670730874705e-05, "loss": 0.7419394254684448, "step": 1840 }, { "epoch": 0.4474963539134662, "grad_norm": 1.2338338918312255, "learning_rate": 1.83974889552054e-05, "loss": 0.6826434135437012, "step": 1841 }, { "epoch": 0.447739426349052, "grad_norm": 1.3848551144381096, "learning_rate": 1.8395305822847792e-05, "loss": 0.6430392861366272, "step": 1842 }, { "epoch": 0.4479824987846378, "grad_norm": 1.3257692616455223, "learning_rate": 1.839312133415459e-05, "loss": 0.5826048851013184, "step": 1843 }, { "epoch": 0.4482255712202236, "grad_norm": 1.3653067108746826, "learning_rate": 1.839093548947872e-05, "loss": 0.5992681384086609, "step": 1844 }, { "epoch": 0.4484686436558094, "grad_norm": 1.2687933988092004, "learning_rate": 1.8388748289173315e-05, "loss": 0.6926472187042236, "step": 1845 }, { "epoch": 0.44871171609139526, "grad_norm": 1.2971127475731825, "learning_rate": 1.8386559733591745e-05, "loss": 0.6341466903686523, "step": 1846 }, { "epoch": 0.44895478852698106, "grad_norm": 1.3652613902413115, "learning_rate": 1.838436982308759e-05, "loss": 0.6937010288238525, "step": 1847 }, { "epoch": 0.44919786096256686, "grad_norm": 1.21197511951975, "learning_rate": 1.838217855801464e-05, "loss": 0.8400942087173462, "step": 1848 }, { "epoch": 0.44944093339815266, "grad_norm": 1.4386539906689464, "learning_rate": 1.8379985938726926e-05, "loss": 0.7685527801513672, "step": 1849 }, { "epoch": 0.44968400583373846, "grad_norm": 1.302185790940337, "learning_rate": 1.837779196557868e-05, "loss": 0.6106222867965698, "step": 1850 }, { "epoch": 0.44992707826932427, "grad_norm": 1.0991432954516134, "learning_rate": 1.8375596638924355e-05, "loss": 0.6414058208465576, "step": 1851 }, { "epoch": 0.45017015070491007, "grad_norm": 1.2400167355309961, "learning_rate": 1.837339995911863e-05, "loss": 0.7034006118774414, "step": 1852 }, { "epoch": 0.45041322314049587, "grad_norm": 1.257203510115155, "learning_rate": 1.837120192651639e-05, "loss": 0.6194571256637573, "step": 1853 }, { "epoch": 0.45065629557608167, "grad_norm": 1.3123310860527182, "learning_rate": 1.8369002541472756e-05, "loss": 0.7145599722862244, "step": 1854 }, { "epoch": 0.45089936801166747, "grad_norm": 1.7154801639781263, "learning_rate": 1.8366801804343054e-05, "loss": 0.8116759657859802, "step": 1855 }, { "epoch": 0.45114244044725327, "grad_norm": 1.4891028243899216, "learning_rate": 1.836459971548283e-05, "loss": 0.670893669128418, "step": 1856 }, { "epoch": 0.45138551288283907, "grad_norm": 1.2847333297190966, "learning_rate": 1.836239627524785e-05, "loss": 0.8003957271575928, "step": 1857 }, { "epoch": 0.45162858531842487, "grad_norm": 1.3150488763821055, "learning_rate": 1.836019148399411e-05, "loss": 0.6188127994537354, "step": 1858 }, { "epoch": 0.45187165775401067, "grad_norm": 1.3037091942589623, "learning_rate": 1.83579853420778e-05, "loss": 0.6735391616821289, "step": 1859 }, { "epoch": 0.4521147301895965, "grad_norm": 1.323472526007604, "learning_rate": 1.835577784985535e-05, "loss": 0.571670651435852, "step": 1860 }, { "epoch": 0.4523578026251823, "grad_norm": 1.567961896034971, "learning_rate": 1.8353569007683397e-05, "loss": 0.6415408849716187, "step": 1861 }, { "epoch": 0.4526008750607681, "grad_norm": 1.2340170201084295, "learning_rate": 1.8351358815918798e-05, "loss": 0.7571409940719604, "step": 1862 }, { "epoch": 0.4528439474963539, "grad_norm": 1.433474701954068, "learning_rate": 1.8349147274918633e-05, "loss": 0.6303682327270508, "step": 1863 }, { "epoch": 0.4530870199319397, "grad_norm": 1.2942220222820913, "learning_rate": 1.834693438504019e-05, "loss": 0.7735791206359863, "step": 1864 }, { "epoch": 0.45333009236752553, "grad_norm": 1.087676296656075, "learning_rate": 1.8344720146640985e-05, "loss": 0.6199684739112854, "step": 1865 }, { "epoch": 0.45357316480311133, "grad_norm": 1.1909275991312285, "learning_rate": 1.8342504560078748e-05, "loss": 0.4860400855541229, "step": 1866 }, { "epoch": 0.45381623723869713, "grad_norm": 1.249276755813787, "learning_rate": 1.8340287625711423e-05, "loss": 0.7880285978317261, "step": 1867 }, { "epoch": 0.45405930967428293, "grad_norm": 1.3353808094991093, "learning_rate": 1.8338069343897177e-05, "loss": 0.6775597333908081, "step": 1868 }, { "epoch": 0.45430238210986873, "grad_norm": 1.5460738426678406, "learning_rate": 1.8335849714994394e-05, "loss": 0.6822093725204468, "step": 1869 }, { "epoch": 0.45454545454545453, "grad_norm": 1.2132363379177484, "learning_rate": 1.8333628739361673e-05, "loss": 0.5832206010818481, "step": 1870 }, { "epoch": 0.45478852698104033, "grad_norm": 1.672033908659176, "learning_rate": 1.8331406417357834e-05, "loss": 0.5680962204933167, "step": 1871 }, { "epoch": 0.45503159941662613, "grad_norm": 1.4062712548343523, "learning_rate": 1.8329182749341915e-05, "loss": 0.7256317138671875, "step": 1872 }, { "epoch": 0.45527467185221193, "grad_norm": 1.2575788829391925, "learning_rate": 1.8326957735673165e-05, "loss": 0.6504052877426147, "step": 1873 }, { "epoch": 0.4555177442877978, "grad_norm": 1.3379534446190977, "learning_rate": 1.832473137671105e-05, "loss": 0.8249928951263428, "step": 1874 }, { "epoch": 0.4557608167233836, "grad_norm": 1.3069338701855082, "learning_rate": 1.832250367281527e-05, "loss": 0.6448577642440796, "step": 1875 }, { "epoch": 0.4560038891589694, "grad_norm": 1.255633469117492, "learning_rate": 1.832027462434572e-05, "loss": 0.544681966304779, "step": 1876 }, { "epoch": 0.4562469615945552, "grad_norm": 1.237334818806487, "learning_rate": 1.8318044231662525e-05, "loss": 0.6915925741195679, "step": 1877 }, { "epoch": 0.456490034030141, "grad_norm": 1.3318788902360692, "learning_rate": 1.8315812495126026e-05, "loss": 0.7340656518936157, "step": 1878 }, { "epoch": 0.4567331064657268, "grad_norm": 1.2885701927327682, "learning_rate": 1.831357941509678e-05, "loss": 0.7290533781051636, "step": 1879 }, { "epoch": 0.4569761789013126, "grad_norm": 1.1816092654299326, "learning_rate": 1.831134499193556e-05, "loss": 0.7211471796035767, "step": 1880 }, { "epoch": 0.4572192513368984, "grad_norm": 1.2715197939152287, "learning_rate": 1.8309109226003358e-05, "loss": 0.6092303991317749, "step": 1881 }, { "epoch": 0.4574623237724842, "grad_norm": 1.3709871141442709, "learning_rate": 1.830687211766138e-05, "loss": 0.7080460786819458, "step": 1882 }, { "epoch": 0.45770539620807, "grad_norm": 1.194389333841586, "learning_rate": 1.8304633667271053e-05, "loss": 0.6349884271621704, "step": 1883 }, { "epoch": 0.4579484686436558, "grad_norm": 1.268071775729875, "learning_rate": 1.830239387519401e-05, "loss": 0.7368249893188477, "step": 1884 }, { "epoch": 0.4581915410792416, "grad_norm": 1.3381462586825998, "learning_rate": 1.8300152741792122e-05, "loss": 0.6830856800079346, "step": 1885 }, { "epoch": 0.4584346135148274, "grad_norm": 1.1231804537650887, "learning_rate": 1.8297910267427455e-05, "loss": 0.613176703453064, "step": 1886 }, { "epoch": 0.45867768595041325, "grad_norm": 1.3515940395009882, "learning_rate": 1.8295666452462302e-05, "loss": 0.6098706126213074, "step": 1887 }, { "epoch": 0.45892075838599905, "grad_norm": 1.2623042587480977, "learning_rate": 1.8293421297259173e-05, "loss": 0.5546097159385681, "step": 1888 }, { "epoch": 0.45916383082158485, "grad_norm": 1.3915763398777328, "learning_rate": 1.829117480218079e-05, "loss": 0.660478949546814, "step": 1889 }, { "epoch": 0.45940690325717065, "grad_norm": 1.711691053685325, "learning_rate": 1.8288926967590098e-05, "loss": 0.7760985493659973, "step": 1890 }, { "epoch": 0.45964997569275645, "grad_norm": 1.9135789864819506, "learning_rate": 1.8286677793850248e-05, "loss": 0.603452205657959, "step": 1891 }, { "epoch": 0.45989304812834225, "grad_norm": 1.097745276719622, "learning_rate": 1.8284427281324624e-05, "loss": 0.5905625224113464, "step": 1892 }, { "epoch": 0.46013612056392805, "grad_norm": 1.3767840903738917, "learning_rate": 1.8282175430376804e-05, "loss": 0.7072231769561768, "step": 1893 }, { "epoch": 0.46037919299951385, "grad_norm": 1.5339804224885452, "learning_rate": 1.82799222413706e-05, "loss": 0.7119278907775879, "step": 1894 }, { "epoch": 0.46062226543509965, "grad_norm": 1.4051433788888736, "learning_rate": 1.8277667714670042e-05, "loss": 0.772661566734314, "step": 1895 }, { "epoch": 0.46086533787068545, "grad_norm": 1.2306245799434035, "learning_rate": 1.8275411850639354e-05, "loss": 0.6157815456390381, "step": 1896 }, { "epoch": 0.46110841030627125, "grad_norm": 1.2616556011121036, "learning_rate": 1.8273154649643002e-05, "loss": 0.643831193447113, "step": 1897 }, { "epoch": 0.46135148274185706, "grad_norm": 1.3259945403392546, "learning_rate": 1.8270896112045654e-05, "loss": 0.6735708117485046, "step": 1898 }, { "epoch": 0.46159455517744286, "grad_norm": 1.2744165515068082, "learning_rate": 1.8268636238212196e-05, "loss": 0.7230342626571655, "step": 1899 }, { "epoch": 0.46183762761302866, "grad_norm": 1.3273868480328193, "learning_rate": 1.8266375028507724e-05, "loss": 0.726370096206665, "step": 1900 }, { "epoch": 0.4620807000486145, "grad_norm": 1.390409871543361, "learning_rate": 1.826411248329757e-05, "loss": 0.7144424915313721, "step": 1901 }, { "epoch": 0.4623237724842003, "grad_norm": 1.4920812907020555, "learning_rate": 1.8261848602947254e-05, "loss": 0.4875728487968445, "step": 1902 }, { "epoch": 0.4625668449197861, "grad_norm": 1.3522030375685712, "learning_rate": 1.8259583387822536e-05, "loss": 0.6252853870391846, "step": 1903 }, { "epoch": 0.4628099173553719, "grad_norm": 1.193574486886362, "learning_rate": 1.8257316838289378e-05, "loss": 0.7739750146865845, "step": 1904 }, { "epoch": 0.4630529897909577, "grad_norm": 1.2357688510531404, "learning_rate": 1.8255048954713957e-05, "loss": 0.6101632714271545, "step": 1905 }, { "epoch": 0.4632960622265435, "grad_norm": 1.3662425541802914, "learning_rate": 1.825277973746268e-05, "loss": 0.8003696203231812, "step": 1906 }, { "epoch": 0.4635391346621293, "grad_norm": 1.5589563594237212, "learning_rate": 1.825050918690215e-05, "loss": 0.7173917293548584, "step": 1907 }, { "epoch": 0.4637822070977151, "grad_norm": 1.2110469796980703, "learning_rate": 1.8248237303399193e-05, "loss": 0.4603995084762573, "step": 1908 }, { "epoch": 0.4640252795333009, "grad_norm": 1.1042863918394485, "learning_rate": 1.8245964087320856e-05, "loss": 0.5523555278778076, "step": 1909 }, { "epoch": 0.4642683519688867, "grad_norm": 1.1985930192576222, "learning_rate": 1.8243689539034403e-05, "loss": 0.5847833156585693, "step": 1910 }, { "epoch": 0.4645114244044725, "grad_norm": 1.3302470071672763, "learning_rate": 1.8241413658907296e-05, "loss": 0.6942115426063538, "step": 1911 }, { "epoch": 0.4647544968400583, "grad_norm": 1.1494719143437204, "learning_rate": 1.823913644730723e-05, "loss": 0.6544106602668762, "step": 1912 }, { "epoch": 0.4649975692756441, "grad_norm": 1.4722380418131737, "learning_rate": 1.8236857904602104e-05, "loss": 0.8295291066169739, "step": 1913 }, { "epoch": 0.46524064171123, "grad_norm": 1.0730422301386133, "learning_rate": 1.8234578031160045e-05, "loss": 0.43567121028900146, "step": 1914 }, { "epoch": 0.4654837141468158, "grad_norm": 1.3804466203253019, "learning_rate": 1.8232296827349376e-05, "loss": 0.5595642924308777, "step": 1915 }, { "epoch": 0.4657267865824016, "grad_norm": 1.378346254613674, "learning_rate": 1.8230014293538653e-05, "loss": 0.6688446998596191, "step": 1916 }, { "epoch": 0.4659698590179874, "grad_norm": 1.6348891435244608, "learning_rate": 1.822773043009664e-05, "loss": 0.5906520485877991, "step": 1917 }, { "epoch": 0.4662129314535732, "grad_norm": 1.6146510971202777, "learning_rate": 1.822544523739231e-05, "loss": 0.7821112871170044, "step": 1918 }, { "epoch": 0.466456003889159, "grad_norm": 1.2133921766472322, "learning_rate": 1.822315871579486e-05, "loss": 0.6317603588104248, "step": 1919 }, { "epoch": 0.4666990763247448, "grad_norm": 1.6849995929473236, "learning_rate": 1.8220870865673694e-05, "loss": 0.6430929899215698, "step": 1920 }, { "epoch": 0.4669421487603306, "grad_norm": 1.2502489260098733, "learning_rate": 1.8218581687398438e-05, "loss": 0.6589436531066895, "step": 1921 }, { "epoch": 0.4671852211959164, "grad_norm": 1.2088011014473456, "learning_rate": 1.8216291181338928e-05, "loss": 0.7362215518951416, "step": 1922 }, { "epoch": 0.4674282936315022, "grad_norm": 1.6453505620262296, "learning_rate": 1.8213999347865216e-05, "loss": 0.7399969100952148, "step": 1923 }, { "epoch": 0.467671366067088, "grad_norm": 1.2352264257100245, "learning_rate": 1.8211706187347564e-05, "loss": 0.5974297523498535, "step": 1924 }, { "epoch": 0.4679144385026738, "grad_norm": 1.2611686848310533, "learning_rate": 1.8209411700156457e-05, "loss": 0.6953728199005127, "step": 1925 }, { "epoch": 0.4681575109382596, "grad_norm": 1.4118932783995208, "learning_rate": 1.8207115886662588e-05, "loss": 0.5949053764343262, "step": 1926 }, { "epoch": 0.4684005833738454, "grad_norm": 1.187869054977078, "learning_rate": 1.8204818747236865e-05, "loss": 0.6325860023498535, "step": 1927 }, { "epoch": 0.46864365580943124, "grad_norm": 1.4270789633223093, "learning_rate": 1.8202520282250415e-05, "loss": 0.5885562300682068, "step": 1928 }, { "epoch": 0.46888672824501704, "grad_norm": 1.5322778179067702, "learning_rate": 1.820022049207457e-05, "loss": 0.7775979042053223, "step": 1929 }, { "epoch": 0.46912980068060284, "grad_norm": 1.3902110407964279, "learning_rate": 1.819791937708088e-05, "loss": 0.7179657220840454, "step": 1930 }, { "epoch": 0.46937287311618864, "grad_norm": 1.295185641638586, "learning_rate": 1.8195616937641118e-05, "loss": 0.5970947742462158, "step": 1931 }, { "epoch": 0.46961594555177444, "grad_norm": 1.4288461443024332, "learning_rate": 1.8193313174127256e-05, "loss": 0.5880861282348633, "step": 1932 }, { "epoch": 0.46985901798736024, "grad_norm": 1.335533158211669, "learning_rate": 1.8191008086911494e-05, "loss": 0.8478316068649292, "step": 1933 }, { "epoch": 0.47010209042294604, "grad_norm": 1.2351136848566737, "learning_rate": 1.8188701676366234e-05, "loss": 0.6953327059745789, "step": 1934 }, { "epoch": 0.47034516285853184, "grad_norm": 1.1947217120571805, "learning_rate": 1.8186393942864097e-05, "loss": 0.7097251415252686, "step": 1935 }, { "epoch": 0.47058823529411764, "grad_norm": 1.3318622147377863, "learning_rate": 1.818408488677792e-05, "loss": 0.697570264339447, "step": 1936 }, { "epoch": 0.47083130772970344, "grad_norm": 1.3013638000237542, "learning_rate": 1.818177450848075e-05, "loss": 0.6385073661804199, "step": 1937 }, { "epoch": 0.47107438016528924, "grad_norm": 1.6234398209251593, "learning_rate": 1.8179462808345853e-05, "loss": 0.5991809368133545, "step": 1938 }, { "epoch": 0.47131745260087504, "grad_norm": 1.1783803677591755, "learning_rate": 1.81771497867467e-05, "loss": 0.5778635740280151, "step": 1939 }, { "epoch": 0.47156052503646084, "grad_norm": 1.1813876547103928, "learning_rate": 1.817483544405698e-05, "loss": 0.7232409119606018, "step": 1940 }, { "epoch": 0.47180359747204664, "grad_norm": 1.5169073725568745, "learning_rate": 1.8172519780650594e-05, "loss": 0.636803150177002, "step": 1941 }, { "epoch": 0.4720466699076325, "grad_norm": 1.2378847617674504, "learning_rate": 1.817020279690166e-05, "loss": 0.7045572996139526, "step": 1942 }, { "epoch": 0.4722897423432183, "grad_norm": 1.1591449836376415, "learning_rate": 1.8167884493184512e-05, "loss": 0.6376838684082031, "step": 1943 }, { "epoch": 0.4725328147788041, "grad_norm": 1.2947336534118012, "learning_rate": 1.8165564869873688e-05, "loss": 0.7660641670227051, "step": 1944 }, { "epoch": 0.4727758872143899, "grad_norm": 1.183398424855334, "learning_rate": 1.816324392734394e-05, "loss": 0.550136923789978, "step": 1945 }, { "epoch": 0.4730189596499757, "grad_norm": 1.4332773966729235, "learning_rate": 1.816092166597024e-05, "loss": 0.6699485778808594, "step": 1946 }, { "epoch": 0.4732620320855615, "grad_norm": 1.2513650027622354, "learning_rate": 1.815859808612777e-05, "loss": 0.6650853157043457, "step": 1947 }, { "epoch": 0.4735051045211473, "grad_norm": 1.543135606180228, "learning_rate": 1.8156273188191927e-05, "loss": 0.6780376434326172, "step": 1948 }, { "epoch": 0.4737481769567331, "grad_norm": 1.5901905028844463, "learning_rate": 1.8153946972538315e-05, "loss": 0.6574380397796631, "step": 1949 }, { "epoch": 0.4739912493923189, "grad_norm": 1.3341714976793375, "learning_rate": 1.8151619439542757e-05, "loss": 0.6185526847839355, "step": 1950 }, { "epoch": 0.4742343218279047, "grad_norm": 1.454672899591814, "learning_rate": 1.8149290589581286e-05, "loss": 0.6623290777206421, "step": 1951 }, { "epoch": 0.4744773942634905, "grad_norm": 1.3931760797963657, "learning_rate": 1.8146960423030143e-05, "loss": 0.7017488479614258, "step": 1952 }, { "epoch": 0.4747204666990763, "grad_norm": 1.2948794900336587, "learning_rate": 1.81446289402658e-05, "loss": 0.7254677414894104, "step": 1953 }, { "epoch": 0.4749635391346621, "grad_norm": 1.175794352545914, "learning_rate": 1.8142296141664912e-05, "loss": 0.6842788457870483, "step": 1954 }, { "epoch": 0.47520661157024796, "grad_norm": 1.480438008965234, "learning_rate": 1.8139962027604375e-05, "loss": 0.8557650446891785, "step": 1955 }, { "epoch": 0.47544968400583376, "grad_norm": 1.2560856583038544, "learning_rate": 1.813762659846128e-05, "loss": 0.673088788986206, "step": 1956 }, { "epoch": 0.47569275644141956, "grad_norm": 1.4590508794437214, "learning_rate": 1.8135289854612942e-05, "loss": 0.5863760113716125, "step": 1957 }, { "epoch": 0.47593582887700536, "grad_norm": 1.1572705850849747, "learning_rate": 1.8132951796436877e-05, "loss": 0.49971717596054077, "step": 1958 }, { "epoch": 0.47617890131259116, "grad_norm": 1.2453374402613206, "learning_rate": 1.813061242431082e-05, "loss": 0.692674994468689, "step": 1959 }, { "epoch": 0.47642197374817696, "grad_norm": 1.5113086779707114, "learning_rate": 1.8128271738612718e-05, "loss": 0.6225232481956482, "step": 1960 }, { "epoch": 0.47666504618376276, "grad_norm": 1.3096595006321161, "learning_rate": 1.812592973972073e-05, "loss": 0.8124737739562988, "step": 1961 }, { "epoch": 0.47690811861934856, "grad_norm": 1.194209637665102, "learning_rate": 1.812358642801322e-05, "loss": 0.5609695911407471, "step": 1962 }, { "epoch": 0.47715119105493436, "grad_norm": 1.251607717466938, "learning_rate": 1.8121241803868786e-05, "loss": 0.6568284630775452, "step": 1963 }, { "epoch": 0.47739426349052017, "grad_norm": 1.4076687077557908, "learning_rate": 1.8118895867666206e-05, "loss": 0.801343560218811, "step": 1964 }, { "epoch": 0.47763733592610597, "grad_norm": 1.273672875223145, "learning_rate": 1.8116548619784497e-05, "loss": 0.567151665687561, "step": 1965 }, { "epoch": 0.47788040836169177, "grad_norm": 1.3521462610770698, "learning_rate": 1.8114200060602876e-05, "loss": 0.8432581424713135, "step": 1966 }, { "epoch": 0.47812348079727757, "grad_norm": 1.1449067011926848, "learning_rate": 1.8111850190500768e-05, "loss": 0.6480542421340942, "step": 1967 }, { "epoch": 0.47836655323286337, "grad_norm": 1.2336124004524154, "learning_rate": 1.810949900985782e-05, "loss": 0.6252350211143494, "step": 1968 }, { "epoch": 0.4786096256684492, "grad_norm": 1.4668160120597469, "learning_rate": 1.8107146519053883e-05, "loss": 0.6043447256088257, "step": 1969 }, { "epoch": 0.478852698104035, "grad_norm": 1.5407740077910723, "learning_rate": 1.8104792718469027e-05, "loss": 0.7024151086807251, "step": 1970 }, { "epoch": 0.4790957705396208, "grad_norm": 1.3470434485127085, "learning_rate": 1.810243760848353e-05, "loss": 0.7931290864944458, "step": 1971 }, { "epoch": 0.4793388429752066, "grad_norm": 1.105681280251983, "learning_rate": 1.8100081189477873e-05, "loss": 0.6190937757492065, "step": 1972 }, { "epoch": 0.4795819154107924, "grad_norm": 1.3758189951432798, "learning_rate": 1.8097723461832762e-05, "loss": 0.5848296284675598, "step": 1973 }, { "epoch": 0.4798249878463782, "grad_norm": 1.175872634364554, "learning_rate": 1.8095364425929106e-05, "loss": 0.6657400727272034, "step": 1974 }, { "epoch": 0.480068060281964, "grad_norm": 1.4710735798349195, "learning_rate": 1.809300408214803e-05, "loss": 0.7330705523490906, "step": 1975 }, { "epoch": 0.4803111327175498, "grad_norm": 1.6396026238985317, "learning_rate": 1.8090642430870865e-05, "loss": 0.6433785557746887, "step": 1976 }, { "epoch": 0.4805542051531356, "grad_norm": 1.1822718723182857, "learning_rate": 1.8088279472479162e-05, "loss": 0.6010866165161133, "step": 1977 }, { "epoch": 0.48079727758872143, "grad_norm": 1.2613226138070694, "learning_rate": 1.8085915207354675e-05, "loss": 0.7677274942398071, "step": 1978 }, { "epoch": 0.48104035002430723, "grad_norm": 1.4019988740131264, "learning_rate": 1.808354963587937e-05, "loss": 0.8621988892555237, "step": 1979 }, { "epoch": 0.48128342245989303, "grad_norm": 1.421258364656576, "learning_rate": 1.8081182758435435e-05, "loss": 0.6460142135620117, "step": 1980 }, { "epoch": 0.48152649489547883, "grad_norm": 1.4205283029469455, "learning_rate": 1.8078814575405245e-05, "loss": 0.6735793352127075, "step": 1981 }, { "epoch": 0.48176956733106463, "grad_norm": 1.3244844783137721, "learning_rate": 1.807644508717141e-05, "loss": 0.66620934009552, "step": 1982 }, { "epoch": 0.4820126397666505, "grad_norm": 1.2854743073268489, "learning_rate": 1.8074074294116744e-05, "loss": 0.6375657916069031, "step": 1983 }, { "epoch": 0.4822557122022363, "grad_norm": 1.1810407878280498, "learning_rate": 1.8071702196624262e-05, "loss": 0.6444462537765503, "step": 1984 }, { "epoch": 0.4824987846378221, "grad_norm": 1.544197038331106, "learning_rate": 1.80693287950772e-05, "loss": 0.7535640597343445, "step": 1985 }, { "epoch": 0.4827418570734079, "grad_norm": 1.2214433118139838, "learning_rate": 1.8066954089859007e-05, "loss": 0.5631632804870605, "step": 1986 }, { "epoch": 0.4829849295089937, "grad_norm": 1.3822699387329362, "learning_rate": 1.806457808135333e-05, "loss": 0.6625458598136902, "step": 1987 }, { "epoch": 0.4832280019445795, "grad_norm": 1.2531741081021661, "learning_rate": 1.806220076994404e-05, "loss": 0.6278354525566101, "step": 1988 }, { "epoch": 0.4834710743801653, "grad_norm": 1.3350349576775962, "learning_rate": 1.8059822156015208e-05, "loss": 0.7411869764328003, "step": 1989 }, { "epoch": 0.4837141468157511, "grad_norm": 1.5118638609272395, "learning_rate": 1.805744223995112e-05, "loss": 0.7508984804153442, "step": 1990 }, { "epoch": 0.4839572192513369, "grad_norm": 1.4250831245010656, "learning_rate": 1.8055061022136277e-05, "loss": 0.7396284341812134, "step": 1991 }, { "epoch": 0.4842002916869227, "grad_norm": 1.323028940916798, "learning_rate": 1.8052678502955385e-05, "loss": 0.5300865769386292, "step": 1992 }, { "epoch": 0.4844433641225085, "grad_norm": 1.4380597972588594, "learning_rate": 1.8050294682793356e-05, "loss": 0.7540422677993774, "step": 1993 }, { "epoch": 0.4846864365580943, "grad_norm": 1.4442438826339736, "learning_rate": 1.8047909562035324e-05, "loss": 0.8447569608688354, "step": 1994 }, { "epoch": 0.4849295089936801, "grad_norm": 1.3149983788668012, "learning_rate": 1.804552314106662e-05, "loss": 0.566381573677063, "step": 1995 }, { "epoch": 0.48517258142926595, "grad_norm": 1.332898716941146, "learning_rate": 1.8043135420272794e-05, "loss": 0.7247072458267212, "step": 1996 }, { "epoch": 0.48541565386485175, "grad_norm": 1.27295983562267, "learning_rate": 1.8040746400039604e-05, "loss": 0.5683351755142212, "step": 1997 }, { "epoch": 0.48565872630043755, "grad_norm": 1.4300482011694937, "learning_rate": 1.8038356080753017e-05, "loss": 0.629919171333313, "step": 1998 }, { "epoch": 0.48590179873602335, "grad_norm": 1.2652911798538613, "learning_rate": 1.803596446279921e-05, "loss": 0.5571404695510864, "step": 1999 }, { "epoch": 0.48614487117160915, "grad_norm": 1.1749128414599097, "learning_rate": 1.8033571546564567e-05, "loss": 0.6728399395942688, "step": 2000 }, { "epoch": 0.48638794360719495, "grad_norm": 1.391071473400533, "learning_rate": 1.803117733243569e-05, "loss": 0.6762497425079346, "step": 2001 }, { "epoch": 0.48663101604278075, "grad_norm": 1.364455760499344, "learning_rate": 1.8028781820799382e-05, "loss": 0.5682691335678101, "step": 2002 }, { "epoch": 0.48687408847836655, "grad_norm": 1.2200224520177436, "learning_rate": 1.8026385012042657e-05, "loss": 0.5550578832626343, "step": 2003 }, { "epoch": 0.48711716091395235, "grad_norm": 1.4147025042230796, "learning_rate": 1.8023986906552747e-05, "loss": 0.6660405397415161, "step": 2004 }, { "epoch": 0.48736023334953815, "grad_norm": 1.306540126893956, "learning_rate": 1.802158750471708e-05, "loss": 0.7602781653404236, "step": 2005 }, { "epoch": 0.48760330578512395, "grad_norm": 1.487336040784825, "learning_rate": 1.8019186806923305e-05, "loss": 0.6867932081222534, "step": 2006 }, { "epoch": 0.48784637822070975, "grad_norm": 1.1181207816897218, "learning_rate": 1.8016784813559275e-05, "loss": 0.5537831783294678, "step": 2007 }, { "epoch": 0.48808945065629555, "grad_norm": 1.344728758884877, "learning_rate": 1.8014381525013053e-05, "loss": 0.6218934059143066, "step": 2008 }, { "epoch": 0.48833252309188135, "grad_norm": 1.343854442671351, "learning_rate": 1.801197694167291e-05, "loss": 0.7641550302505493, "step": 2009 }, { "epoch": 0.4885755955274672, "grad_norm": 1.3053286799767083, "learning_rate": 1.800957106392733e-05, "loss": 0.7963595986366272, "step": 2010 }, { "epoch": 0.488818667963053, "grad_norm": 1.1869875789660664, "learning_rate": 1.8007163892165005e-05, "loss": 0.5336430072784424, "step": 2011 }, { "epoch": 0.4890617403986388, "grad_norm": 1.1517486381084678, "learning_rate": 1.800475542677483e-05, "loss": 0.7545665502548218, "step": 2012 }, { "epoch": 0.4893048128342246, "grad_norm": 1.2755554537278568, "learning_rate": 1.8002345668145916e-05, "loss": 0.5165852308273315, "step": 2013 }, { "epoch": 0.4895478852698104, "grad_norm": 1.253865547086637, "learning_rate": 1.7999934616667582e-05, "loss": 0.7696032524108887, "step": 2014 }, { "epoch": 0.4897909577053962, "grad_norm": 1.2963001527331561, "learning_rate": 1.799752227272935e-05, "loss": 0.6692056655883789, "step": 2015 }, { "epoch": 0.490034030140982, "grad_norm": 1.2132484386565527, "learning_rate": 1.7995108636720963e-05, "loss": 0.5760196447372437, "step": 2016 }, { "epoch": 0.4902771025765678, "grad_norm": 1.2767994453985025, "learning_rate": 1.799269370903236e-05, "loss": 0.601669192314148, "step": 2017 }, { "epoch": 0.4905201750121536, "grad_norm": 1.2135750873666453, "learning_rate": 1.7990277490053693e-05, "loss": 0.6637371778488159, "step": 2018 }, { "epoch": 0.4907632474477394, "grad_norm": 1.303599147202857, "learning_rate": 1.7987859980175328e-05, "loss": 0.5473656058311462, "step": 2019 }, { "epoch": 0.4910063198833252, "grad_norm": 1.5250272705735801, "learning_rate": 1.798544117978783e-05, "loss": 0.6600735187530518, "step": 2020 }, { "epoch": 0.491249392318911, "grad_norm": 1.2070684015102267, "learning_rate": 1.7983021089281983e-05, "loss": 0.5980600714683533, "step": 2021 }, { "epoch": 0.4914924647544968, "grad_norm": 1.276139207710852, "learning_rate": 1.798059970904877e-05, "loss": 0.8142338395118713, "step": 2022 }, { "epoch": 0.49173553719008267, "grad_norm": 1.4478312211501743, "learning_rate": 1.7978177039479386e-05, "loss": 0.6594194769859314, "step": 2023 }, { "epoch": 0.4919786096256685, "grad_norm": 1.5703148245903698, "learning_rate": 1.7975753080965236e-05, "loss": 0.6989774703979492, "step": 2024 }, { "epoch": 0.4922216820612543, "grad_norm": 1.4408993841765643, "learning_rate": 1.797332783389793e-05, "loss": 0.6889493465423584, "step": 2025 }, { "epoch": 0.4924647544968401, "grad_norm": 1.5364618467796987, "learning_rate": 1.7970901298669293e-05, "loss": 0.7594355344772339, "step": 2026 }, { "epoch": 0.4927078269324259, "grad_norm": 1.3708862796478254, "learning_rate": 1.7968473475671347e-05, "loss": 0.5904159545898438, "step": 2027 }, { "epoch": 0.4929508993680117, "grad_norm": 1.1839443434186667, "learning_rate": 1.7966044365296332e-05, "loss": 0.5868058800697327, "step": 2028 }, { "epoch": 0.4931939718035975, "grad_norm": 1.5489073339977877, "learning_rate": 1.7963613967936693e-05, "loss": 0.8019232749938965, "step": 2029 }, { "epoch": 0.4934370442391833, "grad_norm": 1.2087108581116126, "learning_rate": 1.7961182283985076e-05, "loss": 0.5031205415725708, "step": 2030 }, { "epoch": 0.4936801166747691, "grad_norm": 1.3715281472746583, "learning_rate": 1.795874931383435e-05, "loss": 0.7745482921600342, "step": 2031 }, { "epoch": 0.4939231891103549, "grad_norm": 1.0900862215473313, "learning_rate": 1.7956315057877572e-05, "loss": 0.5238996744155884, "step": 2032 }, { "epoch": 0.4941662615459407, "grad_norm": 1.3588115738154258, "learning_rate": 1.795387951650803e-05, "loss": 0.7115267515182495, "step": 2033 }, { "epoch": 0.4944093339815265, "grad_norm": 1.42739998073913, "learning_rate": 1.7951442690119196e-05, "loss": 0.7796096801757812, "step": 2034 }, { "epoch": 0.4946524064171123, "grad_norm": 1.2748877321665664, "learning_rate": 1.794900457910477e-05, "loss": 0.6621671915054321, "step": 2035 }, { "epoch": 0.4948954788526981, "grad_norm": 1.2557081807946249, "learning_rate": 1.7946565183858638e-05, "loss": 0.7159013748168945, "step": 2036 }, { "epoch": 0.49513855128828393, "grad_norm": 1.4502456406950606, "learning_rate": 1.794412450477492e-05, "loss": 0.693280816078186, "step": 2037 }, { "epoch": 0.49538162372386974, "grad_norm": 1.2569665294976247, "learning_rate": 1.7941682542247923e-05, "loss": 0.5894519686698914, "step": 2038 }, { "epoch": 0.49562469615945554, "grad_norm": 1.4530369824010754, "learning_rate": 1.7939239296672164e-05, "loss": 0.7377382516860962, "step": 2039 }, { "epoch": 0.49586776859504134, "grad_norm": 1.5311010935611795, "learning_rate": 1.7936794768442375e-05, "loss": 0.6741394996643066, "step": 2040 }, { "epoch": 0.49611084103062714, "grad_norm": 1.552726961841577, "learning_rate": 1.793434895795349e-05, "loss": 0.7025201320648193, "step": 2041 }, { "epoch": 0.49635391346621294, "grad_norm": 1.3346135912315775, "learning_rate": 1.793190186560065e-05, "loss": 0.6400142908096313, "step": 2042 }, { "epoch": 0.49659698590179874, "grad_norm": 1.2719346449075806, "learning_rate": 1.792945349177921e-05, "loss": 0.7396837472915649, "step": 2043 }, { "epoch": 0.49684005833738454, "grad_norm": 1.3132909410876574, "learning_rate": 1.7927003836884718e-05, "loss": 0.6652066707611084, "step": 2044 }, { "epoch": 0.49708313077297034, "grad_norm": 1.6393670051952902, "learning_rate": 1.7924552901312943e-05, "loss": 0.6095765233039856, "step": 2045 }, { "epoch": 0.49732620320855614, "grad_norm": 1.4716053105815257, "learning_rate": 1.7922100685459853e-05, "loss": 0.6379678845405579, "step": 2046 }, { "epoch": 0.49756927564414194, "grad_norm": 1.3377295140359466, "learning_rate": 1.7919647189721627e-05, "loss": 0.615787148475647, "step": 2047 }, { "epoch": 0.49781234807972774, "grad_norm": 1.2330196440474617, "learning_rate": 1.7917192414494644e-05, "loss": 0.6827255487442017, "step": 2048 }, { "epoch": 0.49805542051531354, "grad_norm": 1.3466592779796203, "learning_rate": 1.79147363601755e-05, "loss": 0.7033535242080688, "step": 2049 }, { "epoch": 0.49829849295089934, "grad_norm": 1.4308208486694383, "learning_rate": 1.791227902716099e-05, "loss": 0.6934987902641296, "step": 2050 }, { "epoch": 0.4985415653864852, "grad_norm": 1.43512336265814, "learning_rate": 1.790982041584812e-05, "loss": 0.6886148452758789, "step": 2051 }, { "epoch": 0.498784637822071, "grad_norm": 1.3943318234256246, "learning_rate": 1.7907360526634097e-05, "loss": 0.7554881572723389, "step": 2052 }, { "epoch": 0.4990277102576568, "grad_norm": 1.5235781778541335, "learning_rate": 1.7904899359916337e-05, "loss": 0.6504961848258972, "step": 2053 }, { "epoch": 0.4992707826932426, "grad_norm": 1.594843564039134, "learning_rate": 1.7902436916092467e-05, "loss": 0.7520538568496704, "step": 2054 }, { "epoch": 0.4995138551288284, "grad_norm": 1.298970188823874, "learning_rate": 1.7899973195560315e-05, "loss": 0.7103005647659302, "step": 2055 }, { "epoch": 0.4997569275644142, "grad_norm": 1.5471991797490516, "learning_rate": 1.789750819871792e-05, "loss": 0.8207725286483765, "step": 2056 }, { "epoch": 0.5, "grad_norm": 1.4209018880718582, "learning_rate": 1.7895041925963515e-05, "loss": 0.7793287634849548, "step": 2057 }, { "epoch": 0.5002430724355859, "grad_norm": 1.4139578636349077, "learning_rate": 1.7892574377695556e-05, "loss": 0.8910761475563049, "step": 2058 }, { "epoch": 0.5004861448711716, "grad_norm": 1.4427765151477754, "learning_rate": 1.7890105554312693e-05, "loss": 0.7117195129394531, "step": 2059 }, { "epoch": 0.5007292173067575, "grad_norm": 1.1553642341877588, "learning_rate": 1.788763545621379e-05, "loss": 0.7101486325263977, "step": 2060 }, { "epoch": 0.5009722897423432, "grad_norm": 1.534117368742507, "learning_rate": 1.788516408379791e-05, "loss": 0.8506888151168823, "step": 2061 }, { "epoch": 0.5012153621779291, "grad_norm": 1.3564685939949725, "learning_rate": 1.7882691437464324e-05, "loss": 0.6369701027870178, "step": 2062 }, { "epoch": 0.5014584346135148, "grad_norm": 1.3742521733161004, "learning_rate": 1.7880217517612515e-05, "loss": 0.772948145866394, "step": 2063 }, { "epoch": 0.5017015070491007, "grad_norm": 1.2866799729670804, "learning_rate": 1.7877742324642163e-05, "loss": 0.6171262264251709, "step": 2064 }, { "epoch": 0.5019445794846864, "grad_norm": 1.4269183614202121, "learning_rate": 1.7875265858953156e-05, "loss": 0.6372554302215576, "step": 2065 }, { "epoch": 0.5021876519202723, "grad_norm": 1.4277486210509942, "learning_rate": 1.787278812094559e-05, "loss": 0.5110150575637817, "step": 2066 }, { "epoch": 0.502430724355858, "grad_norm": 1.127813461192754, "learning_rate": 1.7870309111019767e-05, "loss": 0.6987369060516357, "step": 2067 }, { "epoch": 0.5026737967914439, "grad_norm": 1.3428224485862805, "learning_rate": 1.786782882957619e-05, "loss": 0.6545054912567139, "step": 2068 }, { "epoch": 0.5029168692270296, "grad_norm": 1.4188344464355567, "learning_rate": 1.7865347277015575e-05, "loss": 0.6689258813858032, "step": 2069 }, { "epoch": 0.5031599416626155, "grad_norm": 1.2723454116891382, "learning_rate": 1.7862864453738832e-05, "loss": 0.6690998077392578, "step": 2070 }, { "epoch": 0.5034030140982013, "grad_norm": 1.6882723754617295, "learning_rate": 1.7860380360147087e-05, "loss": 0.6315518617630005, "step": 2071 }, { "epoch": 0.5036460865337871, "grad_norm": 1.1145622200028018, "learning_rate": 1.785789499664167e-05, "loss": 0.6922605037689209, "step": 2072 }, { "epoch": 0.5038891589693729, "grad_norm": 1.0901555004252474, "learning_rate": 1.7855408363624106e-05, "loss": 0.6321655511856079, "step": 2073 }, { "epoch": 0.5041322314049587, "grad_norm": 1.5658971567252002, "learning_rate": 1.785292046149614e-05, "loss": 0.592839777469635, "step": 2074 }, { "epoch": 0.5043753038405445, "grad_norm": 1.486861224377476, "learning_rate": 1.7850431290659707e-05, "loss": 0.7549514770507812, "step": 2075 }, { "epoch": 0.5046183762761303, "grad_norm": 1.3723799663831542, "learning_rate": 1.7847940851516962e-05, "loss": 0.6616325378417969, "step": 2076 }, { "epoch": 0.5048614487117161, "grad_norm": 1.3642132504127427, "learning_rate": 1.7845449144470256e-05, "loss": 0.7422086000442505, "step": 2077 }, { "epoch": 0.5051045211473019, "grad_norm": 1.3107560811500087, "learning_rate": 1.7842956169922143e-05, "loss": 0.6204961538314819, "step": 2078 }, { "epoch": 0.5053475935828877, "grad_norm": 1.1385472368692626, "learning_rate": 1.784046192827538e-05, "loss": 0.5308598279953003, "step": 2079 }, { "epoch": 0.5055906660184735, "grad_norm": 1.4580797857024568, "learning_rate": 1.783796641993295e-05, "loss": 0.7284260988235474, "step": 2080 }, { "epoch": 0.5058337384540593, "grad_norm": 1.4431437176228887, "learning_rate": 1.7835469645298004e-05, "loss": 0.6874702572822571, "step": 2081 }, { "epoch": 0.5060768108896451, "grad_norm": 1.3996360287485063, "learning_rate": 1.7832971604773936e-05, "loss": 0.8214660882949829, "step": 2082 }, { "epoch": 0.5063198833252309, "grad_norm": 1.22305333184156, "learning_rate": 1.783047229876431e-05, "loss": 0.698309063911438, "step": 2083 }, { "epoch": 0.5065629557608167, "grad_norm": 1.4749376786980317, "learning_rate": 1.782797172767293e-05, "loss": 0.6415195465087891, "step": 2084 }, { "epoch": 0.5068060281964025, "grad_norm": 1.2646377590986437, "learning_rate": 1.7825469891903768e-05, "loss": 0.7189385890960693, "step": 2085 }, { "epoch": 0.5070491006319884, "grad_norm": 1.1425594281176923, "learning_rate": 1.7822966791861028e-05, "loss": 0.6724643111228943, "step": 2086 }, { "epoch": 0.5072921730675741, "grad_norm": 1.2824079829139232, "learning_rate": 1.7820462427949098e-05, "loss": 0.5908191204071045, "step": 2087 }, { "epoch": 0.50753524550316, "grad_norm": 2.0207771520653366, "learning_rate": 1.7817956800572595e-05, "loss": 0.7510062456130981, "step": 2088 }, { "epoch": 0.5077783179387457, "grad_norm": 1.313557745324939, "learning_rate": 1.7815449910136312e-05, "loss": 0.6665521860122681, "step": 2089 }, { "epoch": 0.5080213903743316, "grad_norm": 1.3377693244737476, "learning_rate": 1.781294175704526e-05, "loss": 0.7130563259124756, "step": 2090 }, { "epoch": 0.5082644628099173, "grad_norm": 1.2335164837783312, "learning_rate": 1.7810432341704664e-05, "loss": 0.7163683176040649, "step": 2091 }, { "epoch": 0.5085075352455032, "grad_norm": 1.3814525458933924, "learning_rate": 1.7807921664519933e-05, "loss": 0.592750072479248, "step": 2092 }, { "epoch": 0.5087506076810889, "grad_norm": 1.1442241459144462, "learning_rate": 1.7805409725896687e-05, "loss": 0.5742543339729309, "step": 2093 }, { "epoch": 0.5089936801166748, "grad_norm": 1.3092681383493483, "learning_rate": 1.7802896526240758e-05, "loss": 0.6194922924041748, "step": 2094 }, { "epoch": 0.5092367525522605, "grad_norm": 1.1144941149089498, "learning_rate": 1.7800382065958175e-05, "loss": 0.5108276605606079, "step": 2095 }, { "epoch": 0.5094798249878464, "grad_norm": 1.4175866475036973, "learning_rate": 1.779786634545517e-05, "loss": 0.7157620191574097, "step": 2096 }, { "epoch": 0.5097228974234321, "grad_norm": 1.3696742441026055, "learning_rate": 1.7795349365138176e-05, "loss": 0.585184633731842, "step": 2097 }, { "epoch": 0.509965969859018, "grad_norm": 1.551624322754488, "learning_rate": 1.7792831125413838e-05, "loss": 0.7877108454704285, "step": 2098 }, { "epoch": 0.5102090422946038, "grad_norm": 1.1601741580214417, "learning_rate": 1.7790311626689e-05, "loss": 0.549664318561554, "step": 2099 }, { "epoch": 0.5104521147301896, "grad_norm": 1.0960498513142856, "learning_rate": 1.7787790869370706e-05, "loss": 0.7049285173416138, "step": 2100 }, { "epoch": 0.5106951871657754, "grad_norm": 1.2828301156033266, "learning_rate": 1.7785268853866213e-05, "loss": 0.7020139694213867, "step": 2101 }, { "epoch": 0.5109382596013612, "grad_norm": 1.488386377301556, "learning_rate": 1.7782745580582963e-05, "loss": 0.7329096794128418, "step": 2102 }, { "epoch": 0.511181332036947, "grad_norm": 1.3269225402936418, "learning_rate": 1.7780221049928625e-05, "loss": 0.6609591245651245, "step": 2103 }, { "epoch": 0.5114244044725328, "grad_norm": 1.4048910423751484, "learning_rate": 1.777769526231105e-05, "loss": 0.7194134593009949, "step": 2104 }, { "epoch": 0.5116674769081186, "grad_norm": 1.4289125534125808, "learning_rate": 1.7775168218138314e-05, "loss": 0.6810678243637085, "step": 2105 }, { "epoch": 0.5119105493437044, "grad_norm": 1.2095911683154916, "learning_rate": 1.7772639917818667e-05, "loss": 0.5404350757598877, "step": 2106 }, { "epoch": 0.5121536217792902, "grad_norm": 1.3356433361608842, "learning_rate": 1.7770110361760587e-05, "loss": 0.5637277364730835, "step": 2107 }, { "epoch": 0.512396694214876, "grad_norm": 1.2705265433245645, "learning_rate": 1.7767579550372748e-05, "loss": 0.5252575874328613, "step": 2108 }, { "epoch": 0.5126397666504618, "grad_norm": 1.2500194034763383, "learning_rate": 1.776504748406402e-05, "loss": 0.6211093068122864, "step": 2109 }, { "epoch": 0.5128828390860476, "grad_norm": 1.4324883466037057, "learning_rate": 1.7762514163243483e-05, "loss": 0.761886477470398, "step": 2110 }, { "epoch": 0.5131259115216334, "grad_norm": 1.3171803946715996, "learning_rate": 1.7759979588320417e-05, "loss": 0.667776882648468, "step": 2111 }, { "epoch": 0.5133689839572193, "grad_norm": 1.4378409627549615, "learning_rate": 1.7757443759704306e-05, "loss": 0.702157735824585, "step": 2112 }, { "epoch": 0.513612056392805, "grad_norm": 1.3621602167188553, "learning_rate": 1.775490667780483e-05, "loss": 0.7086688280105591, "step": 2113 }, { "epoch": 0.5138551288283909, "grad_norm": 1.1098630466620736, "learning_rate": 1.7752368343031886e-05, "loss": 0.7456492185592651, "step": 2114 }, { "epoch": 0.5140982012639767, "grad_norm": 1.3277530295390303, "learning_rate": 1.7749828755795556e-05, "loss": 0.7464662790298462, "step": 2115 }, { "epoch": 0.5143412736995625, "grad_norm": 1.2407156057111282, "learning_rate": 1.774728791650614e-05, "loss": 0.6974163055419922, "step": 2116 }, { "epoch": 0.5145843461351483, "grad_norm": 1.2665647746643296, "learning_rate": 1.7744745825574123e-05, "loss": 0.638512134552002, "step": 2117 }, { "epoch": 0.5148274185707341, "grad_norm": 1.0438618721561397, "learning_rate": 1.774220248341021e-05, "loss": 0.6231693029403687, "step": 2118 }, { "epoch": 0.5150704910063199, "grad_norm": 1.2740665200407235, "learning_rate": 1.77396578904253e-05, "loss": 0.6474120616912842, "step": 2119 }, { "epoch": 0.5153135634419057, "grad_norm": 1.253678832809389, "learning_rate": 1.7737112047030494e-05, "loss": 0.7036040425300598, "step": 2120 }, { "epoch": 0.5155566358774915, "grad_norm": 1.1731537629187834, "learning_rate": 1.773456495363709e-05, "loss": 0.5210830569267273, "step": 2121 }, { "epoch": 0.5157997083130773, "grad_norm": 1.2864077956895674, "learning_rate": 1.7732016610656605e-05, "loss": 0.6057080030441284, "step": 2122 }, { "epoch": 0.516042780748663, "grad_norm": 1.4141362995786528, "learning_rate": 1.7729467018500734e-05, "loss": 0.7071092128753662, "step": 2123 }, { "epoch": 0.5162858531842489, "grad_norm": 1.2494700480807728, "learning_rate": 1.772691617758139e-05, "loss": 0.7126401662826538, "step": 2124 }, { "epoch": 0.5165289256198347, "grad_norm": 1.4257327856319884, "learning_rate": 1.772436408831069e-05, "loss": 0.675320029258728, "step": 2125 }, { "epoch": 0.5167719980554205, "grad_norm": 1.4241818110652644, "learning_rate": 1.7721810751100938e-05, "loss": 0.7672979831695557, "step": 2126 }, { "epoch": 0.5170150704910064, "grad_norm": 1.2706542264475635, "learning_rate": 1.771925616636465e-05, "loss": 0.5822427272796631, "step": 2127 }, { "epoch": 0.5172581429265921, "grad_norm": 1.4227056835812737, "learning_rate": 1.771670033451455e-05, "loss": 0.8230865597724915, "step": 2128 }, { "epoch": 0.517501215362178, "grad_norm": 1.1711064589318254, "learning_rate": 1.771414325596354e-05, "loss": 0.7106782793998718, "step": 2129 }, { "epoch": 0.5177442877977637, "grad_norm": 1.4387557675685203, "learning_rate": 1.7711584931124753e-05, "loss": 0.6389893293380737, "step": 2130 }, { "epoch": 0.5179873602333496, "grad_norm": 1.3853992263236343, "learning_rate": 1.7709025360411497e-05, "loss": 0.542579174041748, "step": 2131 }, { "epoch": 0.5182304326689353, "grad_norm": 1.2650201041109894, "learning_rate": 1.7706464544237306e-05, "loss": 0.6393819451332092, "step": 2132 }, { "epoch": 0.5184735051045212, "grad_norm": 1.6453688944072766, "learning_rate": 1.770390248301589e-05, "loss": 0.8241571187973022, "step": 2133 }, { "epoch": 0.5187165775401069, "grad_norm": 1.1194268474833486, "learning_rate": 1.7701339177161182e-05, "loss": 0.5341123342514038, "step": 2134 }, { "epoch": 0.5189596499756928, "grad_norm": 1.3145550267700907, "learning_rate": 1.76987746270873e-05, "loss": 0.7397980690002441, "step": 2135 }, { "epoch": 0.5192027224112785, "grad_norm": 1.5136839899472159, "learning_rate": 1.7696208833208574e-05, "loss": 0.7416543960571289, "step": 2136 }, { "epoch": 0.5194457948468644, "grad_norm": 1.2763832340354553, "learning_rate": 1.7693641795939525e-05, "loss": 0.5328516960144043, "step": 2137 }, { "epoch": 0.5196888672824501, "grad_norm": 1.2953381649549232, "learning_rate": 1.769107351569489e-05, "loss": 0.864984393119812, "step": 2138 }, { "epoch": 0.519931939718036, "grad_norm": 1.3989827645380104, "learning_rate": 1.7688503992889585e-05, "loss": 0.7481319904327393, "step": 2139 }, { "epoch": 0.5201750121536218, "grad_norm": 1.2451227939084473, "learning_rate": 1.768593322793875e-05, "loss": 0.7311174869537354, "step": 2140 }, { "epoch": 0.5204180845892076, "grad_norm": 1.3697759701737395, "learning_rate": 1.7683361221257705e-05, "loss": 0.7090219259262085, "step": 2141 }, { "epoch": 0.5206611570247934, "grad_norm": 1.2128157889052908, "learning_rate": 1.768078797326199e-05, "loss": 0.6693586707115173, "step": 2142 }, { "epoch": 0.5209042294603792, "grad_norm": 1.280529226395373, "learning_rate": 1.7678213484367336e-05, "loss": 0.7180107831954956, "step": 2143 }, { "epoch": 0.521147301895965, "grad_norm": 1.1766024756246527, "learning_rate": 1.7675637754989667e-05, "loss": 0.7092503309249878, "step": 2144 }, { "epoch": 0.5213903743315508, "grad_norm": 1.3212386738992565, "learning_rate": 1.767306078554511e-05, "loss": 0.6465872526168823, "step": 2145 }, { "epoch": 0.5216334467671366, "grad_norm": 1.3623506278806194, "learning_rate": 1.7670482576450014e-05, "loss": 0.6425137519836426, "step": 2146 }, { "epoch": 0.5218765192027224, "grad_norm": 1.3054291256353416, "learning_rate": 1.76679031281209e-05, "loss": 0.641539990901947, "step": 2147 }, { "epoch": 0.5221195916383082, "grad_norm": 1.217290434339397, "learning_rate": 1.7665322440974502e-05, "loss": 0.6743576526641846, "step": 2148 }, { "epoch": 0.522362664073894, "grad_norm": 1.4185940119903524, "learning_rate": 1.7662740515427753e-05, "loss": 0.7801876068115234, "step": 2149 }, { "epoch": 0.5226057365094798, "grad_norm": 1.176511640494014, "learning_rate": 1.7660157351897788e-05, "loss": 0.6221873760223389, "step": 2150 }, { "epoch": 0.5228488089450656, "grad_norm": 1.2076521090789754, "learning_rate": 1.7657572950801936e-05, "loss": 0.6922479867935181, "step": 2151 }, { "epoch": 0.5230918813806514, "grad_norm": 1.1846884345596476, "learning_rate": 1.7654987312557733e-05, "loss": 0.5154560804367065, "step": 2152 }, { "epoch": 0.5233349538162373, "grad_norm": 1.4392904931840371, "learning_rate": 1.765240043758291e-05, "loss": 0.6060595512390137, "step": 2153 }, { "epoch": 0.523578026251823, "grad_norm": 1.2074922109724422, "learning_rate": 1.76498123262954e-05, "loss": 0.5533969402313232, "step": 2154 }, { "epoch": 0.5238210986874089, "grad_norm": 1.2606715438647191, "learning_rate": 1.7647222979113335e-05, "loss": 0.7089070081710815, "step": 2155 }, { "epoch": 0.5240641711229946, "grad_norm": 1.4929528185083876, "learning_rate": 1.764463239645505e-05, "loss": 0.8139622211456299, "step": 2156 }, { "epoch": 0.5243072435585805, "grad_norm": 1.4910515195798089, "learning_rate": 1.764204057873907e-05, "loss": 0.6835958957672119, "step": 2157 }, { "epoch": 0.5245503159941662, "grad_norm": 1.1514116578621594, "learning_rate": 1.763944752638413e-05, "loss": 0.5694040060043335, "step": 2158 }, { "epoch": 0.5247933884297521, "grad_norm": 1.4009852886926746, "learning_rate": 1.763685323980916e-05, "loss": 0.644755482673645, "step": 2159 }, { "epoch": 0.5250364608653378, "grad_norm": 1.3208933771858098, "learning_rate": 1.7634257719433292e-05, "loss": 0.7200300693511963, "step": 2160 }, { "epoch": 0.5252795333009237, "grad_norm": 1.4190687227389656, "learning_rate": 1.763166096567585e-05, "loss": 0.761385977268219, "step": 2161 }, { "epoch": 0.5255226057365094, "grad_norm": 1.5256651676139306, "learning_rate": 1.7629062978956367e-05, "loss": 0.6877377033233643, "step": 2162 }, { "epoch": 0.5257656781720953, "grad_norm": 1.4600332290001181, "learning_rate": 1.7626463759694567e-05, "loss": 0.8399922847747803, "step": 2163 }, { "epoch": 0.526008750607681, "grad_norm": 1.5034302539132203, "learning_rate": 1.7623863308310382e-05, "loss": 0.7634553909301758, "step": 2164 }, { "epoch": 0.5262518230432669, "grad_norm": 1.345353174892017, "learning_rate": 1.762126162522393e-05, "loss": 0.6427427530288696, "step": 2165 }, { "epoch": 0.5264948954788528, "grad_norm": 1.2214226434583055, "learning_rate": 1.761865871085554e-05, "loss": 0.6581236124038696, "step": 2166 }, { "epoch": 0.5267379679144385, "grad_norm": 1.2196324474601914, "learning_rate": 1.7616054565625742e-05, "loss": 0.6554121971130371, "step": 2167 }, { "epoch": 0.5269810403500244, "grad_norm": 1.4109331203476807, "learning_rate": 1.761344918995525e-05, "loss": 0.745789110660553, "step": 2168 }, { "epoch": 0.5272241127856101, "grad_norm": 1.3905407329722017, "learning_rate": 1.761084258426499e-05, "loss": 0.8578720092773438, "step": 2169 }, { "epoch": 0.527467185221196, "grad_norm": 1.2736112650675453, "learning_rate": 1.7608234748976075e-05, "loss": 0.8366312980651855, "step": 2170 }, { "epoch": 0.5277102576567817, "grad_norm": 1.2127721638603184, "learning_rate": 1.7605625684509833e-05, "loss": 0.6770468950271606, "step": 2171 }, { "epoch": 0.5279533300923676, "grad_norm": 1.1463877333884915, "learning_rate": 1.7603015391287773e-05, "loss": 0.7063940763473511, "step": 2172 }, { "epoch": 0.5281964025279533, "grad_norm": 1.4209071249619813, "learning_rate": 1.760040386973162e-05, "loss": 0.8296723365783691, "step": 2173 }, { "epoch": 0.5284394749635392, "grad_norm": 1.3025896544349962, "learning_rate": 1.7597791120263284e-05, "loss": 0.6159169673919678, "step": 2174 }, { "epoch": 0.5286825473991249, "grad_norm": 1.460321372413045, "learning_rate": 1.7595177143304877e-05, "loss": 0.6673615574836731, "step": 2175 }, { "epoch": 0.5289256198347108, "grad_norm": 1.117381228388742, "learning_rate": 1.7592561939278706e-05, "loss": 0.6567777395248413, "step": 2176 }, { "epoch": 0.5291686922702965, "grad_norm": 1.4006391661517783, "learning_rate": 1.7589945508607288e-05, "loss": 0.5588729381561279, "step": 2177 }, { "epoch": 0.5294117647058824, "grad_norm": 1.529599098471468, "learning_rate": 1.7587327851713323e-05, "loss": 0.6823455095291138, "step": 2178 }, { "epoch": 0.5296548371414681, "grad_norm": 1.3125083148848375, "learning_rate": 1.7584708969019725e-05, "loss": 0.6662975549697876, "step": 2179 }, { "epoch": 0.529897909577054, "grad_norm": 1.7196237174144597, "learning_rate": 1.758208886094959e-05, "loss": 0.7732728719711304, "step": 2180 }, { "epoch": 0.5301409820126398, "grad_norm": 1.229876765358833, "learning_rate": 1.7579467527926223e-05, "loss": 0.6647286415100098, "step": 2181 }, { "epoch": 0.5303840544482256, "grad_norm": 1.2002253984438402, "learning_rate": 1.7576844970373123e-05, "loss": 0.6900025606155396, "step": 2182 }, { "epoch": 0.5306271268838114, "grad_norm": 1.3273145700241835, "learning_rate": 1.7574221188713987e-05, "loss": 0.8184098601341248, "step": 2183 }, { "epoch": 0.5308701993193972, "grad_norm": 1.2135086978256242, "learning_rate": 1.757159618337271e-05, "loss": 0.7439125180244446, "step": 2184 }, { "epoch": 0.531113271754983, "grad_norm": 1.465555690141435, "learning_rate": 1.7568969954773382e-05, "loss": 0.6237946152687073, "step": 2185 }, { "epoch": 0.5313563441905688, "grad_norm": 1.3149381366479955, "learning_rate": 1.7566342503340303e-05, "loss": 0.8222059011459351, "step": 2186 }, { "epoch": 0.5315994166261546, "grad_norm": 1.2744156494386152, "learning_rate": 1.7563713829497947e-05, "loss": 0.6644878387451172, "step": 2187 }, { "epoch": 0.5318424890617404, "grad_norm": 1.4620605654855405, "learning_rate": 1.756108393367101e-05, "loss": 0.7588918805122375, "step": 2188 }, { "epoch": 0.5320855614973262, "grad_norm": 1.3726948446154745, "learning_rate": 1.7558452816284374e-05, "loss": 0.7795801758766174, "step": 2189 }, { "epoch": 0.532328633932912, "grad_norm": 1.4107706205056834, "learning_rate": 1.7555820477763113e-05, "loss": 0.6530126333236694, "step": 2190 }, { "epoch": 0.5325717063684978, "grad_norm": 1.4504425878250742, "learning_rate": 1.755318691853251e-05, "loss": 0.8496966361999512, "step": 2191 }, { "epoch": 0.5328147788040836, "grad_norm": 1.1559902107135709, "learning_rate": 1.755055213901804e-05, "loss": 0.6980334520339966, "step": 2192 }, { "epoch": 0.5330578512396694, "grad_norm": 1.3526823130012353, "learning_rate": 1.7547916139645367e-05, "loss": 0.6565884947776794, "step": 2193 }, { "epoch": 0.5333009236752553, "grad_norm": 1.3303733249560863, "learning_rate": 1.754527892084037e-05, "loss": 0.6052879691123962, "step": 2194 }, { "epoch": 0.533543996110841, "grad_norm": 1.1489679413683995, "learning_rate": 1.7542640483029107e-05, "loss": 0.6792871356010437, "step": 2195 }, { "epoch": 0.5337870685464269, "grad_norm": 1.4221666990581021, "learning_rate": 1.7540000826637845e-05, "loss": 0.5287636518478394, "step": 2196 }, { "epoch": 0.5340301409820126, "grad_norm": 8.521102208782295, "learning_rate": 1.7537359952093046e-05, "loss": 0.5338659286499023, "step": 2197 }, { "epoch": 0.5342732134175985, "grad_norm": 1.266581000137266, "learning_rate": 1.7534717859821362e-05, "loss": 0.6076993346214294, "step": 2198 }, { "epoch": 0.5345162858531842, "grad_norm": 1.2652984216828613, "learning_rate": 1.7532074550249648e-05, "loss": 0.6744606494903564, "step": 2199 }, { "epoch": 0.5347593582887701, "grad_norm": 1.3411255873117833, "learning_rate": 1.7529430023804954e-05, "loss": 0.7083104252815247, "step": 2200 }, { "epoch": 0.5350024307243558, "grad_norm": 1.5132243354499393, "learning_rate": 1.7526784280914522e-05, "loss": 0.6905088424682617, "step": 2201 }, { "epoch": 0.5352455031599417, "grad_norm": 1.4483227266919287, "learning_rate": 1.7524137322005804e-05, "loss": 0.7546573877334595, "step": 2202 }, { "epoch": 0.5354885755955274, "grad_norm": 1.3616106878217291, "learning_rate": 1.7521489147506435e-05, "loss": 0.6411089897155762, "step": 2203 }, { "epoch": 0.5357316480311133, "grad_norm": 1.28398229745553, "learning_rate": 1.751883975784425e-05, "loss": 0.6345380544662476, "step": 2204 }, { "epoch": 0.535974720466699, "grad_norm": 1.3544529479223835, "learning_rate": 1.7516189153447283e-05, "loss": 0.634385347366333, "step": 2205 }, { "epoch": 0.5362177929022849, "grad_norm": 1.0455312167462616, "learning_rate": 1.751353733474376e-05, "loss": 0.6075072288513184, "step": 2206 }, { "epoch": 0.5364608653378707, "grad_norm": 1.2342867344393107, "learning_rate": 1.751088430216211e-05, "loss": 0.747698962688446, "step": 2207 }, { "epoch": 0.5367039377734565, "grad_norm": 1.5588829555849155, "learning_rate": 1.7508230056130947e-05, "loss": 0.7779765129089355, "step": 2208 }, { "epoch": 0.5369470102090423, "grad_norm": 1.2581999074936714, "learning_rate": 1.7505574597079094e-05, "loss": 0.817065954208374, "step": 2209 }, { "epoch": 0.5371900826446281, "grad_norm": 1.530311266576102, "learning_rate": 1.750291792543556e-05, "loss": 0.6439622640609741, "step": 2210 }, { "epoch": 0.5374331550802139, "grad_norm": 1.1970130992745294, "learning_rate": 1.7500260041629558e-05, "loss": 0.6609777212142944, "step": 2211 }, { "epoch": 0.5376762275157997, "grad_norm": 1.3358202055723987, "learning_rate": 1.749760094609049e-05, "loss": 0.628070592880249, "step": 2212 }, { "epoch": 0.5379192999513855, "grad_norm": 1.3556009780983045, "learning_rate": 1.7494940639247953e-05, "loss": 0.6394476294517517, "step": 2213 }, { "epoch": 0.5381623723869713, "grad_norm": 1.2888027030557057, "learning_rate": 1.7492279121531746e-05, "loss": 0.7376281023025513, "step": 2214 }, { "epoch": 0.5384054448225571, "grad_norm": 1.3087095389464831, "learning_rate": 1.7489616393371865e-05, "loss": 0.6051928997039795, "step": 2215 }, { "epoch": 0.5386485172581429, "grad_norm": 1.2334295401375672, "learning_rate": 1.748695245519849e-05, "loss": 0.6339110136032104, "step": 2216 }, { "epoch": 0.5388915896937287, "grad_norm": 1.2834091480124248, "learning_rate": 1.748428730744201e-05, "loss": 0.5634702444076538, "step": 2217 }, { "epoch": 0.5391346621293145, "grad_norm": 1.2442272683430444, "learning_rate": 1.7481620950532994e-05, "loss": 0.7456949949264526, "step": 2218 }, { "epoch": 0.5393777345649003, "grad_norm": 1.354778000292604, "learning_rate": 1.7478953384902225e-05, "loss": 0.7499444484710693, "step": 2219 }, { "epoch": 0.5396208070004861, "grad_norm": 1.3572550212531682, "learning_rate": 1.747628461098067e-05, "loss": 0.5670989155769348, "step": 2220 }, { "epoch": 0.539863879436072, "grad_norm": 1.2563000255975094, "learning_rate": 1.747361462919949e-05, "loss": 0.7386449575424194, "step": 2221 }, { "epoch": 0.5401069518716578, "grad_norm": 1.350339400671981, "learning_rate": 1.747094343999004e-05, "loss": 0.6043144464492798, "step": 2222 }, { "epoch": 0.5403500243072435, "grad_norm": 1.2158020458393544, "learning_rate": 1.7468271043783883e-05, "loss": 0.696865439414978, "step": 2223 }, { "epoch": 0.5405930967428294, "grad_norm": 1.5554313913288953, "learning_rate": 1.7465597441012762e-05, "loss": 0.6491040587425232, "step": 2224 }, { "epoch": 0.5408361691784151, "grad_norm": 1.4757970423030562, "learning_rate": 1.7462922632108625e-05, "loss": 0.7988091707229614, "step": 2225 }, { "epoch": 0.541079241614001, "grad_norm": 1.3198948573886424, "learning_rate": 1.7460246617503612e-05, "loss": 0.528863787651062, "step": 2226 }, { "epoch": 0.5413223140495868, "grad_norm": 1.171769613723508, "learning_rate": 1.745756939763005e-05, "loss": 0.4917582869529724, "step": 2227 }, { "epoch": 0.5415653864851726, "grad_norm": 1.2555737494424941, "learning_rate": 1.7454890972920472e-05, "loss": 0.5640554428100586, "step": 2228 }, { "epoch": 0.5418084589207584, "grad_norm": 1.1290892982089982, "learning_rate": 1.74522113438076e-05, "loss": 0.5338954925537109, "step": 2229 }, { "epoch": 0.5420515313563442, "grad_norm": 1.2825260648905863, "learning_rate": 1.744953051072435e-05, "loss": 0.5701193809509277, "step": 2230 }, { "epoch": 0.54229460379193, "grad_norm": 1.330615204288298, "learning_rate": 1.744684847410384e-05, "loss": 0.6635949611663818, "step": 2231 }, { "epoch": 0.5425376762275158, "grad_norm": 1.1320530518364558, "learning_rate": 1.7444165234379376e-05, "loss": 0.5190848708152771, "step": 2232 }, { "epoch": 0.5427807486631016, "grad_norm": 1.281524399625512, "learning_rate": 1.744148079198445e-05, "loss": 0.5895777940750122, "step": 2233 }, { "epoch": 0.5430238210986874, "grad_norm": 1.357749127828359, "learning_rate": 1.7438795147352768e-05, "loss": 0.8755898475646973, "step": 2234 }, { "epoch": 0.5432668935342733, "grad_norm": 1.4369746858986319, "learning_rate": 1.743610830091821e-05, "loss": 0.6616729497909546, "step": 2235 }, { "epoch": 0.543509965969859, "grad_norm": 1.2230983950335745, "learning_rate": 1.743342025311487e-05, "loss": 0.6096769571304321, "step": 2236 }, { "epoch": 0.5437530384054449, "grad_norm": 1.5325707532034805, "learning_rate": 1.7430731004377016e-05, "loss": 0.7045573592185974, "step": 2237 }, { "epoch": 0.5439961108410306, "grad_norm": 1.3284326132046298, "learning_rate": 1.7428040555139128e-05, "loss": 0.7218019962310791, "step": 2238 }, { "epoch": 0.5442391832766165, "grad_norm": 1.2935961175162445, "learning_rate": 1.7425348905835867e-05, "loss": 0.5894906520843506, "step": 2239 }, { "epoch": 0.5444822557122022, "grad_norm": 1.3177340603459404, "learning_rate": 1.742265605690209e-05, "loss": 0.6623934507369995, "step": 2240 }, { "epoch": 0.5447253281477881, "grad_norm": 1.431942967293658, "learning_rate": 1.7419962008772858e-05, "loss": 0.7920516729354858, "step": 2241 }, { "epoch": 0.5449684005833738, "grad_norm": 1.1645589231260758, "learning_rate": 1.7417266761883412e-05, "loss": 0.731785237789154, "step": 2242 }, { "epoch": 0.5452114730189597, "grad_norm": 1.2183798644760435, "learning_rate": 1.7414570316669195e-05, "loss": 0.6436094045639038, "step": 2243 }, { "epoch": 0.5454545454545454, "grad_norm": 1.43982819047194, "learning_rate": 1.7411872673565843e-05, "loss": 0.6999650001525879, "step": 2244 }, { "epoch": 0.5456976178901313, "grad_norm": 1.4052305160088494, "learning_rate": 1.7409173833009184e-05, "loss": 0.7623887658119202, "step": 2245 }, { "epoch": 0.545940690325717, "grad_norm": 1.2358596890508469, "learning_rate": 1.7406473795435234e-05, "loss": 0.5626802444458008, "step": 2246 }, { "epoch": 0.5461837627613029, "grad_norm": 1.21436869889493, "learning_rate": 1.7403772561280217e-05, "loss": 0.8012993335723877, "step": 2247 }, { "epoch": 0.5464268351968887, "grad_norm": 1.437138142910235, "learning_rate": 1.7401070130980533e-05, "loss": 0.7596534490585327, "step": 2248 }, { "epoch": 0.5466699076324745, "grad_norm": 1.138452119863049, "learning_rate": 1.739836650497279e-05, "loss": 0.6983548402786255, "step": 2249 }, { "epoch": 0.5469129800680603, "grad_norm": 1.336032136581129, "learning_rate": 1.7395661683693774e-05, "loss": 0.6362632513046265, "step": 2250 }, { "epoch": 0.5471560525036461, "grad_norm": 1.2810013077400255, "learning_rate": 1.739295566758048e-05, "loss": 0.6393342614173889, "step": 2251 }, { "epoch": 0.5473991249392319, "grad_norm": 1.2565833906335582, "learning_rate": 1.7390248457070088e-05, "loss": 0.672199010848999, "step": 2252 }, { "epoch": 0.5476421973748177, "grad_norm": 1.511437082304273, "learning_rate": 1.738754005259997e-05, "loss": 0.7306243181228638, "step": 2253 }, { "epoch": 0.5478852698104035, "grad_norm": 1.320142441374393, "learning_rate": 1.7384830454607687e-05, "loss": 0.5873227119445801, "step": 2254 }, { "epoch": 0.5481283422459893, "grad_norm": 1.3017756184714104, "learning_rate": 1.738211966353101e-05, "loss": 0.6225727200508118, "step": 2255 }, { "epoch": 0.5483714146815751, "grad_norm": 1.6168612764246324, "learning_rate": 1.7379407679807885e-05, "loss": 0.6494383811950684, "step": 2256 }, { "epoch": 0.5486144871171609, "grad_norm": 1.472744531708639, "learning_rate": 1.7376694503876457e-05, "loss": 0.6867406368255615, "step": 2257 }, { "epoch": 0.5488575595527467, "grad_norm": 1.5230854146086585, "learning_rate": 1.7373980136175063e-05, "loss": 0.7658612132072449, "step": 2258 }, { "epoch": 0.5491006319883325, "grad_norm": 1.2125977197691327, "learning_rate": 1.7371264577142235e-05, "loss": 0.5251045227050781, "step": 2259 }, { "epoch": 0.5493437044239183, "grad_norm": 1.2299319021181816, "learning_rate": 1.7368547827216693e-05, "loss": 0.5169223546981812, "step": 2260 }, { "epoch": 0.5495867768595041, "grad_norm": 1.4035147061505568, "learning_rate": 1.7365829886837355e-05, "loss": 0.6556458473205566, "step": 2261 }, { "epoch": 0.5498298492950899, "grad_norm": 1.2847037520147224, "learning_rate": 1.7363110756443323e-05, "loss": 0.6037243008613586, "step": 2262 }, { "epoch": 0.5500729217306758, "grad_norm": 1.2199371218360093, "learning_rate": 1.7360390436473903e-05, "loss": 0.5513937473297119, "step": 2263 }, { "epoch": 0.5503159941662615, "grad_norm": 1.114802687250484, "learning_rate": 1.735766892736858e-05, "loss": 0.6649704575538635, "step": 2264 }, { "epoch": 0.5505590666018474, "grad_norm": 1.0527927474913616, "learning_rate": 1.7354946229567048e-05, "loss": 0.606670081615448, "step": 2265 }, { "epoch": 0.5508021390374331, "grad_norm": 1.0729474544805504, "learning_rate": 1.735222234350917e-05, "loss": 0.4332551062107086, "step": 2266 }, { "epoch": 0.551045211473019, "grad_norm": 1.1730177725526796, "learning_rate": 1.734949726963502e-05, "loss": 0.6582907438278198, "step": 2267 }, { "epoch": 0.5512882839086047, "grad_norm": 1.0932244440374244, "learning_rate": 1.7346771008384856e-05, "loss": 0.6123228669166565, "step": 2268 }, { "epoch": 0.5515313563441906, "grad_norm": 1.20168319072111, "learning_rate": 1.7344043560199137e-05, "loss": 0.5748087763786316, "step": 2269 }, { "epoch": 0.5517744287797763, "grad_norm": 1.4907089346869593, "learning_rate": 1.7341314925518496e-05, "loss": 0.672819972038269, "step": 2270 }, { "epoch": 0.5520175012153622, "grad_norm": 1.4200919793176965, "learning_rate": 1.7338585104783775e-05, "loss": 0.6217366456985474, "step": 2271 }, { "epoch": 0.5522605736509479, "grad_norm": 1.4160805670579835, "learning_rate": 1.7335854098435995e-05, "loss": 0.6333853006362915, "step": 2272 }, { "epoch": 0.5525036460865338, "grad_norm": 1.315739576621934, "learning_rate": 1.733312190691638e-05, "loss": 0.694475531578064, "step": 2273 }, { "epoch": 0.5527467185221195, "grad_norm": 1.4792485241333029, "learning_rate": 1.7330388530666334e-05, "loss": 0.7899181246757507, "step": 2274 }, { "epoch": 0.5529897909577054, "grad_norm": 1.2632392382576716, "learning_rate": 1.732765397012746e-05, "loss": 0.7306502461433411, "step": 2275 }, { "epoch": 0.5532328633932913, "grad_norm": 1.1389786135753202, "learning_rate": 1.7324918225741555e-05, "loss": 0.6097330451011658, "step": 2276 }, { "epoch": 0.553475935828877, "grad_norm": 1.4929234803652676, "learning_rate": 1.7322181297950595e-05, "loss": 0.7614322900772095, "step": 2277 }, { "epoch": 0.5537190082644629, "grad_norm": 1.5195953909224613, "learning_rate": 1.7319443187196758e-05, "loss": 0.66962730884552, "step": 2278 }, { "epoch": 0.5539620807000486, "grad_norm": 1.2205573055747168, "learning_rate": 1.7316703893922412e-05, "loss": 0.6076210737228394, "step": 2279 }, { "epoch": 0.5542051531356345, "grad_norm": 1.2939344680112441, "learning_rate": 1.731396341857011e-05, "loss": 0.6146469116210938, "step": 2280 }, { "epoch": 0.5544482255712202, "grad_norm": 1.4595717476460737, "learning_rate": 1.7311221761582605e-05, "loss": 0.874970555305481, "step": 2281 }, { "epoch": 0.554691298006806, "grad_norm": 1.2241087643942998, "learning_rate": 1.730847892340283e-05, "loss": 0.6318050622940063, "step": 2282 }, { "epoch": 0.5549343704423918, "grad_norm": 1.399479284183202, "learning_rate": 1.730573490447392e-05, "loss": 0.6737504005432129, "step": 2283 }, { "epoch": 0.5551774428779777, "grad_norm": 1.1532695568572104, "learning_rate": 1.730298970523919e-05, "loss": 0.6773949861526489, "step": 2284 }, { "epoch": 0.5554205153135634, "grad_norm": 1.382576032263945, "learning_rate": 1.7300243326142157e-05, "loss": 0.6087077260017395, "step": 2285 }, { "epoch": 0.5556635877491493, "grad_norm": 1.3810122702285306, "learning_rate": 1.729749576762652e-05, "loss": 0.7191706895828247, "step": 2286 }, { "epoch": 0.555906660184735, "grad_norm": 1.6188281606083956, "learning_rate": 1.729474703013617e-05, "loss": 0.6869033575057983, "step": 2287 }, { "epoch": 0.5561497326203209, "grad_norm": 1.1880452654936753, "learning_rate": 1.729199711411519e-05, "loss": 0.696262001991272, "step": 2288 }, { "epoch": 0.5563928050559067, "grad_norm": 1.236431804128441, "learning_rate": 1.7289246020007856e-05, "loss": 0.6988202929496765, "step": 2289 }, { "epoch": 0.5566358774914925, "grad_norm": 1.208224372898281, "learning_rate": 1.728649374825863e-05, "loss": 0.6856625080108643, "step": 2290 }, { "epoch": 0.5568789499270783, "grad_norm": 1.405497836708729, "learning_rate": 1.7283740299312165e-05, "loss": 0.7800748348236084, "step": 2291 }, { "epoch": 0.5571220223626641, "grad_norm": 1.1185869607398287, "learning_rate": 1.72809856736133e-05, "loss": 0.6343153715133667, "step": 2292 }, { "epoch": 0.5573650947982499, "grad_norm": 1.3488902268346914, "learning_rate": 1.7278229871607083e-05, "loss": 0.7592332363128662, "step": 2293 }, { "epoch": 0.5576081672338357, "grad_norm": 1.3973223206412684, "learning_rate": 1.7275472893738724e-05, "loss": 0.5690466165542603, "step": 2294 }, { "epoch": 0.5578512396694215, "grad_norm": 1.3245012570204613, "learning_rate": 1.727271474045364e-05, "loss": 0.7100390195846558, "step": 2295 }, { "epoch": 0.5580943121050073, "grad_norm": 1.645637905477716, "learning_rate": 1.7269955412197443e-05, "loss": 0.8531458377838135, "step": 2296 }, { "epoch": 0.5583373845405931, "grad_norm": 1.0867010024074215, "learning_rate": 1.7267194909415917e-05, "loss": 0.5456601977348328, "step": 2297 }, { "epoch": 0.5585804569761789, "grad_norm": 1.2824203891526302, "learning_rate": 1.7264433232555054e-05, "loss": 0.7834700345993042, "step": 2298 }, { "epoch": 0.5588235294117647, "grad_norm": 1.4055029843874791, "learning_rate": 1.7261670382061017e-05, "loss": 0.6839419007301331, "step": 2299 }, { "epoch": 0.5590666018473505, "grad_norm": 1.234813712840271, "learning_rate": 1.725890635838018e-05, "loss": 0.6465340852737427, "step": 2300 }, { "epoch": 0.5593096742829363, "grad_norm": 1.2139850156818985, "learning_rate": 1.7256141161959087e-05, "loss": 0.6952481269836426, "step": 2301 }, { "epoch": 0.5595527467185221, "grad_norm": 1.5186488197195074, "learning_rate": 1.7253374793244484e-05, "loss": 0.6596106290817261, "step": 2302 }, { "epoch": 0.5597958191541079, "grad_norm": 1.2658598026240229, "learning_rate": 1.72506072526833e-05, "loss": 0.7438603043556213, "step": 2303 }, { "epoch": 0.5600388915896938, "grad_norm": 1.4544642911543688, "learning_rate": 1.7247838540722657e-05, "loss": 0.6509888172149658, "step": 2304 }, { "epoch": 0.5602819640252795, "grad_norm": 1.1944099689720116, "learning_rate": 1.7245068657809864e-05, "loss": 0.8324712514877319, "step": 2305 }, { "epoch": 0.5605250364608654, "grad_norm": 1.2532694668379962, "learning_rate": 1.7242297604392422e-05, "loss": 0.6971974968910217, "step": 2306 }, { "epoch": 0.5607681088964511, "grad_norm": 1.2527253533581009, "learning_rate": 1.7239525380918016e-05, "loss": 0.7425002455711365, "step": 2307 }, { "epoch": 0.561011181332037, "grad_norm": 1.473663658302495, "learning_rate": 1.7236751987834526e-05, "loss": 0.7019491195678711, "step": 2308 }, { "epoch": 0.5612542537676227, "grad_norm": 1.369835559337756, "learning_rate": 1.7233977425590015e-05, "loss": 0.5679735541343689, "step": 2309 }, { "epoch": 0.5614973262032086, "grad_norm": 1.3402567335048967, "learning_rate": 1.7231201694632744e-05, "loss": 0.6598294377326965, "step": 2310 }, { "epoch": 0.5617403986387943, "grad_norm": 1.372174097856622, "learning_rate": 1.722842479541115e-05, "loss": 0.6983537077903748, "step": 2311 }, { "epoch": 0.5619834710743802, "grad_norm": 1.2197529801297193, "learning_rate": 1.7225646728373866e-05, "loss": 0.6384561061859131, "step": 2312 }, { "epoch": 0.5622265435099659, "grad_norm": 1.2290467250549455, "learning_rate": 1.7222867493969717e-05, "loss": 0.5363515019416809, "step": 2313 }, { "epoch": 0.5624696159455518, "grad_norm": 1.3669281879928596, "learning_rate": 1.722008709264771e-05, "loss": 0.785210371017456, "step": 2314 }, { "epoch": 0.5627126883811375, "grad_norm": 1.4207117300972576, "learning_rate": 1.7217305524857043e-05, "loss": 0.812528133392334, "step": 2315 }, { "epoch": 0.5629557608167234, "grad_norm": 1.2622335052572202, "learning_rate": 1.7214522791047108e-05, "loss": 0.7232229709625244, "step": 2316 }, { "epoch": 0.5631988332523092, "grad_norm": 1.3191685193262537, "learning_rate": 1.7211738891667474e-05, "loss": 0.8933693170547485, "step": 2317 }, { "epoch": 0.563441905687895, "grad_norm": 1.6448771345730915, "learning_rate": 1.720895382716791e-05, "loss": 0.6644321084022522, "step": 2318 }, { "epoch": 0.5636849781234808, "grad_norm": 1.2425146730943815, "learning_rate": 1.720616759799836e-05, "loss": 0.5742087960243225, "step": 2319 }, { "epoch": 0.5639280505590666, "grad_norm": 1.3518222361468843, "learning_rate": 1.720338020460897e-05, "loss": 0.5504908561706543, "step": 2320 }, { "epoch": 0.5641711229946524, "grad_norm": 1.3455370101858377, "learning_rate": 1.7200591647450065e-05, "loss": 0.6313724517822266, "step": 2321 }, { "epoch": 0.5644141954302382, "grad_norm": 1.1657959912639082, "learning_rate": 1.7197801926972165e-05, "loss": 0.5363976359367371, "step": 2322 }, { "epoch": 0.564657267865824, "grad_norm": 1.309980170209733, "learning_rate": 1.719501104362597e-05, "loss": 0.6915791034698486, "step": 2323 }, { "epoch": 0.5649003403014098, "grad_norm": 1.3398502856093089, "learning_rate": 1.719221899786237e-05, "loss": 0.5828843116760254, "step": 2324 }, { "epoch": 0.5651434127369956, "grad_norm": 1.4178536493766738, "learning_rate": 1.7189425790132452e-05, "loss": 0.6683758497238159, "step": 2325 }, { "epoch": 0.5653864851725814, "grad_norm": 1.488113044155623, "learning_rate": 1.7186631420887477e-05, "loss": 0.7003852725028992, "step": 2326 }, { "epoch": 0.5656295576081672, "grad_norm": 1.5893224190137216, "learning_rate": 1.71838358905789e-05, "loss": 0.5776286125183105, "step": 2327 }, { "epoch": 0.565872630043753, "grad_norm": 1.1728316037486934, "learning_rate": 1.7181039199658368e-05, "loss": 0.5635907053947449, "step": 2328 }, { "epoch": 0.5661157024793388, "grad_norm": 1.6756314290709213, "learning_rate": 1.7178241348577707e-05, "loss": 0.7823673486709595, "step": 2329 }, { "epoch": 0.5663587749149247, "grad_norm": 1.1970420511760989, "learning_rate": 1.7175442337788933e-05, "loss": 0.6989782452583313, "step": 2330 }, { "epoch": 0.5666018473505104, "grad_norm": 1.5364921529283482, "learning_rate": 1.717264216774426e-05, "loss": 0.546104371547699, "step": 2331 }, { "epoch": 0.5668449197860963, "grad_norm": 1.38105483196573, "learning_rate": 1.716984083889607e-05, "loss": 0.760187566280365, "step": 2332 }, { "epoch": 0.567087992221682, "grad_norm": 1.2111760841276284, "learning_rate": 1.7167038351696947e-05, "loss": 0.7350693941116333, "step": 2333 }, { "epoch": 0.5673310646572679, "grad_norm": 1.875453878538169, "learning_rate": 1.7164234706599656e-05, "loss": 0.5548731684684753, "step": 2334 }, { "epoch": 0.5675741370928536, "grad_norm": 1.3641789072240438, "learning_rate": 1.716142990405715e-05, "loss": 0.5955467224121094, "step": 2335 }, { "epoch": 0.5678172095284395, "grad_norm": 1.315167744654201, "learning_rate": 1.7158623944522572e-05, "loss": 0.7042475938796997, "step": 2336 }, { "epoch": 0.5680602819640252, "grad_norm": 1.3432684779782857, "learning_rate": 1.715581682844925e-05, "loss": 0.7370121479034424, "step": 2337 }, { "epoch": 0.5683033543996111, "grad_norm": 1.1622562363485547, "learning_rate": 1.7153008556290696e-05, "loss": 0.5831730961799622, "step": 2338 }, { "epoch": 0.5685464268351968, "grad_norm": 1.4460763446285667, "learning_rate": 1.7150199128500608e-05, "loss": 0.5469090938568115, "step": 2339 }, { "epoch": 0.5687894992707827, "grad_norm": 1.1812809878855528, "learning_rate": 1.714738854553288e-05, "loss": 0.6156921982765198, "step": 2340 }, { "epoch": 0.5690325717063685, "grad_norm": 1.5073603228161108, "learning_rate": 1.7144576807841583e-05, "loss": 0.892522931098938, "step": 2341 }, { "epoch": 0.5692756441419543, "grad_norm": 1.2233354989508212, "learning_rate": 1.7141763915880982e-05, "loss": 0.6304531693458557, "step": 2342 }, { "epoch": 0.56951871657754, "grad_norm": 1.2987733499347183, "learning_rate": 1.713894987010552e-05, "loss": 0.5960790514945984, "step": 2343 }, { "epoch": 0.5697617890131259, "grad_norm": 1.28598012554552, "learning_rate": 1.713613467096983e-05, "loss": 0.6085141897201538, "step": 2344 }, { "epoch": 0.5700048614487118, "grad_norm": 1.2645052470276779, "learning_rate": 1.7133318318928735e-05, "loss": 0.6958321332931519, "step": 2345 }, { "epoch": 0.5702479338842975, "grad_norm": 1.288051632861737, "learning_rate": 1.7130500814437244e-05, "loss": 0.755528450012207, "step": 2346 }, { "epoch": 0.5704910063198834, "grad_norm": 1.234808738415357, "learning_rate": 1.7127682157950546e-05, "loss": 0.6767792701721191, "step": 2347 }, { "epoch": 0.5707340787554691, "grad_norm": 1.2150197775663325, "learning_rate": 1.7124862349924017e-05, "loss": 0.7536126375198364, "step": 2348 }, { "epoch": 0.570977151191055, "grad_norm": 1.053412343762457, "learning_rate": 1.7122041390813228e-05, "loss": 0.5596300363540649, "step": 2349 }, { "epoch": 0.5712202236266407, "grad_norm": 1.3631631537363171, "learning_rate": 1.7119219281073923e-05, "loss": 0.7423297166824341, "step": 2350 }, { "epoch": 0.5714632960622266, "grad_norm": 1.2764569191679658, "learning_rate": 1.7116396021162044e-05, "loss": 0.6182795763015747, "step": 2351 }, { "epoch": 0.5717063684978123, "grad_norm": 1.3791598436620136, "learning_rate": 1.7113571611533718e-05, "loss": 0.6903343796730042, "step": 2352 }, { "epoch": 0.5719494409333982, "grad_norm": 1.3994133844609191, "learning_rate": 1.711074605264524e-05, "loss": 0.7808687090873718, "step": 2353 }, { "epoch": 0.5721925133689839, "grad_norm": 1.4448888833440512, "learning_rate": 1.7107919344953115e-05, "loss": 0.6163545846939087, "step": 2354 }, { "epoch": 0.5724355858045698, "grad_norm": 1.1287968542957916, "learning_rate": 1.710509148891402e-05, "loss": 0.6247868537902832, "step": 2355 }, { "epoch": 0.5726786582401555, "grad_norm": 1.0650282385419876, "learning_rate": 1.7102262484984814e-05, "loss": 0.610042929649353, "step": 2356 }, { "epoch": 0.5729217306757414, "grad_norm": 1.3066903594761374, "learning_rate": 1.709943233362256e-05, "loss": 0.7710957527160645, "step": 2357 }, { "epoch": 0.5731648031113272, "grad_norm": 1.230775430436628, "learning_rate": 1.709660103528448e-05, "loss": 0.6172122955322266, "step": 2358 }, { "epoch": 0.573407875546913, "grad_norm": 1.1398508753899896, "learning_rate": 1.7093768590428004e-05, "loss": 0.49898552894592285, "step": 2359 }, { "epoch": 0.5736509479824988, "grad_norm": 1.2537873105189645, "learning_rate": 1.7090934999510732e-05, "loss": 0.6351790428161621, "step": 2360 }, { "epoch": 0.5738940204180846, "grad_norm": 1.475473906253978, "learning_rate": 1.7088100262990466e-05, "loss": 0.6026169061660767, "step": 2361 }, { "epoch": 0.5741370928536704, "grad_norm": 1.300872714741105, "learning_rate": 1.7085264381325172e-05, "loss": 0.6741676926612854, "step": 2362 }, { "epoch": 0.5743801652892562, "grad_norm": 1.3182974740882327, "learning_rate": 1.7082427354973017e-05, "loss": 0.644690990447998, "step": 2363 }, { "epoch": 0.574623237724842, "grad_norm": 1.5604076146393053, "learning_rate": 1.7079589184392345e-05, "loss": 0.789586067199707, "step": 2364 }, { "epoch": 0.5748663101604278, "grad_norm": 1.2942229327343981, "learning_rate": 1.7076749870041692e-05, "loss": 0.8125302195549011, "step": 2365 }, { "epoch": 0.5751093825960136, "grad_norm": 1.1179169687757418, "learning_rate": 1.7073909412379766e-05, "loss": 0.7897821664810181, "step": 2366 }, { "epoch": 0.5753524550315994, "grad_norm": 1.6156678747628095, "learning_rate": 1.7071067811865477e-05, "loss": 0.770258367061615, "step": 2367 }, { "epoch": 0.5755955274671852, "grad_norm": 1.4864843434917923, "learning_rate": 1.7068225068957906e-05, "loss": 0.6383078098297119, "step": 2368 }, { "epoch": 0.575838599902771, "grad_norm": 1.2527364598269106, "learning_rate": 1.7065381184116322e-05, "loss": 0.5515803694725037, "step": 2369 }, { "epoch": 0.5760816723383568, "grad_norm": 1.0995131068927977, "learning_rate": 1.706253615780018e-05, "loss": 0.619393527507782, "step": 2370 }, { "epoch": 0.5763247447739427, "grad_norm": 1.1803015354981834, "learning_rate": 1.7059689990469126e-05, "loss": 0.5726116895675659, "step": 2371 }, { "epoch": 0.5765678172095284, "grad_norm": 1.2755644024682353, "learning_rate": 1.7056842682582975e-05, "loss": 0.6985629200935364, "step": 2372 }, { "epoch": 0.5768108896451143, "grad_norm": 1.3700118385698916, "learning_rate": 1.7053994234601736e-05, "loss": 0.6532431840896606, "step": 2373 }, { "epoch": 0.5770539620807, "grad_norm": 1.3028074415222393, "learning_rate": 1.7051144646985605e-05, "loss": 0.7665505409240723, "step": 2374 }, { "epoch": 0.5772970345162859, "grad_norm": 1.232892932666319, "learning_rate": 1.7048293920194952e-05, "loss": 0.6299135684967041, "step": 2375 }, { "epoch": 0.5775401069518716, "grad_norm": 1.2870226087414278, "learning_rate": 1.7045442054690345e-05, "loss": 0.6194169521331787, "step": 2376 }, { "epoch": 0.5777831793874575, "grad_norm": 1.3333311502047904, "learning_rate": 1.704258905093252e-05, "loss": 0.7427995204925537, "step": 2377 }, { "epoch": 0.5780262518230432, "grad_norm": 1.5527490764681626, "learning_rate": 1.7039734909382407e-05, "loss": 0.6581906676292419, "step": 2378 }, { "epoch": 0.5782693242586291, "grad_norm": 1.183611760491557, "learning_rate": 1.7036879630501123e-05, "loss": 0.8517650961875916, "step": 2379 }, { "epoch": 0.5785123966942148, "grad_norm": 1.2818709859484032, "learning_rate": 1.7034023214749957e-05, "loss": 0.651162326335907, "step": 2380 }, { "epoch": 0.5787554691298007, "grad_norm": 1.3499908516582884, "learning_rate": 1.703116566259039e-05, "loss": 0.7472814321517944, "step": 2381 }, { "epoch": 0.5789985415653864, "grad_norm": 1.1702457509875996, "learning_rate": 1.7028306974484085e-05, "loss": 0.5506311655044556, "step": 2382 }, { "epoch": 0.5792416140009723, "grad_norm": 0.9746307636425674, "learning_rate": 1.7025447150892892e-05, "loss": 0.509488046169281, "step": 2383 }, { "epoch": 0.5794846864365581, "grad_norm": 1.385079966689904, "learning_rate": 1.7022586192278834e-05, "loss": 0.7165625095367432, "step": 2384 }, { "epoch": 0.5797277588721439, "grad_norm": 1.2827999671436598, "learning_rate": 1.701972409910413e-05, "loss": 0.6079535484313965, "step": 2385 }, { "epoch": 0.5799708313077298, "grad_norm": 1.410477217248654, "learning_rate": 1.701686087183117e-05, "loss": 0.7215386629104614, "step": 2386 }, { "epoch": 0.5802139037433155, "grad_norm": 1.3474539669007388, "learning_rate": 1.7013996510922538e-05, "loss": 0.6761656999588013, "step": 2387 }, { "epoch": 0.5804569761789014, "grad_norm": 1.30978513183714, "learning_rate": 1.7011131016840996e-05, "loss": 0.6745128035545349, "step": 2388 }, { "epoch": 0.5807000486144871, "grad_norm": 1.2103448007562225, "learning_rate": 1.7008264390049493e-05, "loss": 0.5891454219818115, "step": 2389 }, { "epoch": 0.580943121050073, "grad_norm": 1.7644820335936293, "learning_rate": 1.7005396631011147e-05, "loss": 0.6340436339378357, "step": 2390 }, { "epoch": 0.5811861934856587, "grad_norm": 1.3118493104798101, "learning_rate": 1.7002527740189284e-05, "loss": 0.7168579697608948, "step": 2391 }, { "epoch": 0.5814292659212446, "grad_norm": 1.268717299555155, "learning_rate": 1.6999657718047392e-05, "loss": 0.6453925371170044, "step": 2392 }, { "epoch": 0.5816723383568303, "grad_norm": 1.32083160860312, "learning_rate": 1.6996786565049145e-05, "loss": 0.6038765907287598, "step": 2393 }, { "epoch": 0.5819154107924162, "grad_norm": 1.4818061112489462, "learning_rate": 1.6993914281658406e-05, "loss": 0.6628803014755249, "step": 2394 }, { "epoch": 0.5821584832280019, "grad_norm": 1.3008017605687803, "learning_rate": 1.6991040868339217e-05, "loss": 0.8285216093063354, "step": 2395 }, { "epoch": 0.5824015556635878, "grad_norm": 1.7619622948699392, "learning_rate": 1.6988166325555807e-05, "loss": 0.6498672366142273, "step": 2396 }, { "epoch": 0.5826446280991735, "grad_norm": 1.407090884840984, "learning_rate": 1.6985290653772583e-05, "loss": 0.5817669630050659, "step": 2397 }, { "epoch": 0.5828877005347594, "grad_norm": 1.0775810108010266, "learning_rate": 1.698241385345413e-05, "loss": 0.6204513311386108, "step": 2398 }, { "epoch": 0.5831307729703452, "grad_norm": 1.2880051813075424, "learning_rate": 1.6979535925065222e-05, "loss": 0.566169023513794, "step": 2399 }, { "epoch": 0.583373845405931, "grad_norm": 1.0680269147222556, "learning_rate": 1.6976656869070816e-05, "loss": 0.6547212600708008, "step": 2400 }, { "epoch": 0.5836169178415168, "grad_norm": 1.1813137265824092, "learning_rate": 1.6973776685936047e-05, "loss": 0.7607733011245728, "step": 2401 }, { "epoch": 0.5838599902771026, "grad_norm": 1.1325737568376844, "learning_rate": 1.6970895376126234e-05, "loss": 0.6728355884552002, "step": 2402 }, { "epoch": 0.5841030627126884, "grad_norm": 1.2254356327318479, "learning_rate": 1.6968012940106883e-05, "loss": 0.7573181390762329, "step": 2403 }, { "epoch": 0.5843461351482742, "grad_norm": 1.163870313058274, "learning_rate": 1.696512937834367e-05, "loss": 0.6339029669761658, "step": 2404 }, { "epoch": 0.58458920758386, "grad_norm": 1.1667301900305516, "learning_rate": 1.6962244691302466e-05, "loss": 0.7011980414390564, "step": 2405 }, { "epoch": 0.5848322800194458, "grad_norm": 1.1272029047538905, "learning_rate": 1.695935887944931e-05, "loss": 0.48009124398231506, "step": 2406 }, { "epoch": 0.5850753524550316, "grad_norm": 1.2836159284669622, "learning_rate": 1.6956471943250435e-05, "loss": 0.6798129081726074, "step": 2407 }, { "epoch": 0.5853184248906174, "grad_norm": 1.424873845249997, "learning_rate": 1.695358388317225e-05, "loss": 0.665704607963562, "step": 2408 }, { "epoch": 0.5855614973262032, "grad_norm": 1.2199631779879372, "learning_rate": 1.6950694699681348e-05, "loss": 0.7458101511001587, "step": 2409 }, { "epoch": 0.585804569761789, "grad_norm": 1.3420067293781048, "learning_rate": 1.6947804393244504e-05, "loss": 0.5978899002075195, "step": 2410 }, { "epoch": 0.5860476421973748, "grad_norm": 1.235917647136676, "learning_rate": 1.6944912964328664e-05, "loss": 0.648500382900238, "step": 2411 }, { "epoch": 0.5862907146329607, "grad_norm": 1.1541003967697976, "learning_rate": 1.6942020413400975e-05, "loss": 0.6089125871658325, "step": 2412 }, { "epoch": 0.5865337870685464, "grad_norm": 1.536176182713376, "learning_rate": 1.6939126740928745e-05, "loss": 0.8351122140884399, "step": 2413 }, { "epoch": 0.5867768595041323, "grad_norm": 1.19417352999259, "learning_rate": 1.6936231947379477e-05, "loss": 0.5235804915428162, "step": 2414 }, { "epoch": 0.587019931939718, "grad_norm": 1.6187512055565354, "learning_rate": 1.6933336033220847e-05, "loss": 0.7379255294799805, "step": 2415 }, { "epoch": 0.5872630043753039, "grad_norm": 1.1588066785948692, "learning_rate": 1.693043899892072e-05, "loss": 0.5831902027130127, "step": 2416 }, { "epoch": 0.5875060768108896, "grad_norm": 1.4849132173236972, "learning_rate": 1.692754084494713e-05, "loss": 0.8032386302947998, "step": 2417 }, { "epoch": 0.5877491492464755, "grad_norm": 1.1885900408439412, "learning_rate": 1.692464157176831e-05, "loss": 0.5507861375808716, "step": 2418 }, { "epoch": 0.5879922216820612, "grad_norm": 1.3638608251374216, "learning_rate": 1.6921741179852654e-05, "loss": 0.6156383752822876, "step": 2419 }, { "epoch": 0.5882352941176471, "grad_norm": 1.1696649749910226, "learning_rate": 1.691883966966875e-05, "loss": 0.5652095079421997, "step": 2420 }, { "epoch": 0.5884783665532328, "grad_norm": 1.2823842037411932, "learning_rate": 1.691593704168536e-05, "loss": 0.7694916725158691, "step": 2421 }, { "epoch": 0.5887214389888187, "grad_norm": 1.2487953376370846, "learning_rate": 1.6913033296371433e-05, "loss": 0.6157475113868713, "step": 2422 }, { "epoch": 0.5889645114244044, "grad_norm": 1.408002769294989, "learning_rate": 1.691012843419609e-05, "loss": 0.711162805557251, "step": 2423 }, { "epoch": 0.5892075838599903, "grad_norm": 1.3624877944106646, "learning_rate": 1.6907222455628644e-05, "loss": 0.7088662385940552, "step": 2424 }, { "epoch": 0.5894506562955761, "grad_norm": 1.211664381454468, "learning_rate": 1.6904315361138573e-05, "loss": 0.7093095779418945, "step": 2425 }, { "epoch": 0.5896937287311619, "grad_norm": 1.381301850180953, "learning_rate": 1.690140715119555e-05, "loss": 0.6110198497772217, "step": 2426 }, { "epoch": 0.5899368011667477, "grad_norm": 1.492534383162528, "learning_rate": 1.6898497826269418e-05, "loss": 0.7354232668876648, "step": 2427 }, { "epoch": 0.5901798736023335, "grad_norm": 1.2056224206723432, "learning_rate": 1.6895587386830205e-05, "loss": 0.6459695100784302, "step": 2428 }, { "epoch": 0.5904229460379193, "grad_norm": 1.1181575853749257, "learning_rate": 1.6892675833348124e-05, "loss": 0.6401344537734985, "step": 2429 }, { "epoch": 0.5906660184735051, "grad_norm": 1.181218515271394, "learning_rate": 1.688976316629355e-05, "loss": 0.5909767746925354, "step": 2430 }, { "epoch": 0.5909090909090909, "grad_norm": 1.347567274352636, "learning_rate": 1.688684938613706e-05, "loss": 0.658682107925415, "step": 2431 }, { "epoch": 0.5911521633446767, "grad_norm": 1.3955247075292951, "learning_rate": 1.6883934493349396e-05, "loss": 0.7156231999397278, "step": 2432 }, { "epoch": 0.5913952357802625, "grad_norm": 1.4310220375778915, "learning_rate": 1.6881018488401487e-05, "loss": 0.6134390830993652, "step": 2433 }, { "epoch": 0.5916383082158483, "grad_norm": 1.3213060314233505, "learning_rate": 1.687810137176444e-05, "loss": 0.684491753578186, "step": 2434 }, { "epoch": 0.5918813806514341, "grad_norm": 1.5280900232499925, "learning_rate": 1.6875183143909537e-05, "loss": 0.6773227453231812, "step": 2435 }, { "epoch": 0.5921244530870199, "grad_norm": 1.3014067668694072, "learning_rate": 1.6872263805308246e-05, "loss": 0.8106643557548523, "step": 2436 }, { "epoch": 0.5923675255226057, "grad_norm": 1.3858363959598698, "learning_rate": 1.686934335643222e-05, "loss": 0.7973667979240417, "step": 2437 }, { "epoch": 0.5926105979581915, "grad_norm": 1.0896069529843406, "learning_rate": 1.6866421797753264e-05, "loss": 0.5290188789367676, "step": 2438 }, { "epoch": 0.5928536703937773, "grad_norm": 1.2072218275541617, "learning_rate": 1.6863499129743397e-05, "loss": 0.6423479318618774, "step": 2439 }, { "epoch": 0.5930967428293632, "grad_norm": 1.311260825970017, "learning_rate": 1.6860575352874797e-05, "loss": 0.6023501753807068, "step": 2440 }, { "epoch": 0.5933398152649489, "grad_norm": 1.3697545618043365, "learning_rate": 1.685765046761983e-05, "loss": 0.6064696907997131, "step": 2441 }, { "epoch": 0.5935828877005348, "grad_norm": 1.1243954680115729, "learning_rate": 1.6854724474451027e-05, "loss": 0.6306662559509277, "step": 2442 }, { "epoch": 0.5938259601361205, "grad_norm": 1.3304275010509805, "learning_rate": 1.6851797373841114e-05, "loss": 0.671352744102478, "step": 2443 }, { "epoch": 0.5940690325717064, "grad_norm": 1.4330938752605245, "learning_rate": 1.6848869166262992e-05, "loss": 0.6474078893661499, "step": 2444 }, { "epoch": 0.5943121050072921, "grad_norm": 1.30946494829301, "learning_rate": 1.684593985218974e-05, "loss": 0.6402463912963867, "step": 2445 }, { "epoch": 0.594555177442878, "grad_norm": 1.2781246285695007, "learning_rate": 1.684300943209461e-05, "loss": 0.6826629638671875, "step": 2446 }, { "epoch": 0.5947982498784637, "grad_norm": 1.2889868818809194, "learning_rate": 1.6840077906451037e-05, "loss": 0.7128267884254456, "step": 2447 }, { "epoch": 0.5950413223140496, "grad_norm": 1.2610831752459761, "learning_rate": 1.683714527573263e-05, "loss": 0.6171326041221619, "step": 2448 }, { "epoch": 0.5952843947496353, "grad_norm": 1.2454477240688688, "learning_rate": 1.6834211540413196e-05, "loss": 0.6316275000572205, "step": 2449 }, { "epoch": 0.5955274671852212, "grad_norm": 1.2333544240503826, "learning_rate": 1.6831276700966697e-05, "loss": 0.5756258368492126, "step": 2450 }, { "epoch": 0.595770539620807, "grad_norm": 1.2212392972142654, "learning_rate": 1.682834075786728e-05, "loss": 0.6502866148948669, "step": 2451 }, { "epoch": 0.5960136120563928, "grad_norm": 1.3950365058640517, "learning_rate": 1.6825403711589274e-05, "loss": 0.7489467263221741, "step": 2452 }, { "epoch": 0.5962566844919787, "grad_norm": 1.1146190720454925, "learning_rate": 1.6822465562607186e-05, "loss": 0.7205348014831543, "step": 2453 }, { "epoch": 0.5964997569275644, "grad_norm": 1.1688013005987257, "learning_rate": 1.6819526311395703e-05, "loss": 0.6801983714103699, "step": 2454 }, { "epoch": 0.5967428293631503, "grad_norm": 1.0766511779627324, "learning_rate": 1.681658595842968e-05, "loss": 0.5943965911865234, "step": 2455 }, { "epoch": 0.596985901798736, "grad_norm": 1.308069161514502, "learning_rate": 1.681364450418416e-05, "loss": 0.7977367639541626, "step": 2456 }, { "epoch": 0.5972289742343219, "grad_norm": 1.3424712495628077, "learning_rate": 1.6810701949134364e-05, "loss": 0.6988658905029297, "step": 2457 }, { "epoch": 0.5974720466699076, "grad_norm": 1.518721136132832, "learning_rate": 1.6807758293755682e-05, "loss": 0.7280202507972717, "step": 2458 }, { "epoch": 0.5977151191054935, "grad_norm": 1.2784217942703073, "learning_rate": 1.680481353852369e-05, "loss": 0.6083294749259949, "step": 2459 }, { "epoch": 0.5979581915410792, "grad_norm": 1.4618539302393154, "learning_rate": 1.6801867683914143e-05, "loss": 0.6914151310920715, "step": 2460 }, { "epoch": 0.5982012639766651, "grad_norm": 1.1776911137470025, "learning_rate": 1.6798920730402962e-05, "loss": 0.6399229168891907, "step": 2461 }, { "epoch": 0.5984443364122508, "grad_norm": 1.2982081734033921, "learning_rate": 1.6795972678466255e-05, "loss": 0.529417097568512, "step": 2462 }, { "epoch": 0.5986874088478367, "grad_norm": 1.6453800300382238, "learning_rate": 1.6793023528580316e-05, "loss": 0.5431444644927979, "step": 2463 }, { "epoch": 0.5989304812834224, "grad_norm": 1.3645609719254967, "learning_rate": 1.6790073281221593e-05, "loss": 0.6394801139831543, "step": 2464 }, { "epoch": 0.5991735537190083, "grad_norm": 1.1986166982111746, "learning_rate": 1.6787121936866732e-05, "loss": 0.7249040007591248, "step": 2465 }, { "epoch": 0.5994166261545941, "grad_norm": 1.2855969860239609, "learning_rate": 1.678416949599255e-05, "loss": 0.6946894526481628, "step": 2466 }, { "epoch": 0.5996596985901799, "grad_norm": 1.4111752685215064, "learning_rate": 1.678121595907603e-05, "loss": 0.6082057952880859, "step": 2467 }, { "epoch": 0.5999027710257657, "grad_norm": 1.1893621200329592, "learning_rate": 1.677826132659435e-05, "loss": 0.6523765921592712, "step": 2468 }, { "epoch": 0.6001458434613515, "grad_norm": 1.2582206263414932, "learning_rate": 1.6775305599024853e-05, "loss": 0.5133974552154541, "step": 2469 }, { "epoch": 0.6003889158969373, "grad_norm": 1.2707462187438512, "learning_rate": 1.677234877684507e-05, "loss": 0.6002677083015442, "step": 2470 }, { "epoch": 0.6006319883325231, "grad_norm": 1.3423087557941185, "learning_rate": 1.6769390860532696e-05, "loss": 0.7243272066116333, "step": 2471 }, { "epoch": 0.6008750607681089, "grad_norm": 1.1266489386851317, "learning_rate": 1.6766431850565604e-05, "loss": 0.6574723720550537, "step": 2472 }, { "epoch": 0.6011181332036947, "grad_norm": 1.2059440829177028, "learning_rate": 1.6763471747421856e-05, "loss": 0.6808943748474121, "step": 2473 }, { "epoch": 0.6013612056392805, "grad_norm": 1.1845201943736834, "learning_rate": 1.676051055157968e-05, "loss": 0.7699984312057495, "step": 2474 }, { "epoch": 0.6016042780748663, "grad_norm": 1.3141077205064549, "learning_rate": 1.6757548263517484e-05, "loss": 0.8367524147033691, "step": 2475 }, { "epoch": 0.6018473505104521, "grad_norm": 1.499200727488509, "learning_rate": 1.675458488371385e-05, "loss": 0.7502768635749817, "step": 2476 }, { "epoch": 0.6020904229460379, "grad_norm": 1.335988572738552, "learning_rate": 1.675162041264754e-05, "loss": 0.6889176368713379, "step": 2477 }, { "epoch": 0.6023334953816237, "grad_norm": 1.2154491105095062, "learning_rate": 1.674865485079749e-05, "loss": 0.563042938709259, "step": 2478 }, { "epoch": 0.6025765678172095, "grad_norm": 1.1213753986427395, "learning_rate": 1.6745688198642808e-05, "loss": 0.6242265701293945, "step": 2479 }, { "epoch": 0.6028196402527953, "grad_norm": 1.3295408811218858, "learning_rate": 1.6742720456662787e-05, "loss": 0.6154987812042236, "step": 2480 }, { "epoch": 0.6030627126883812, "grad_norm": 1.1619747891445262, "learning_rate": 1.6739751625336898e-05, "loss": 0.6482241153717041, "step": 2481 }, { "epoch": 0.6033057851239669, "grad_norm": 1.5500713446006988, "learning_rate": 1.6736781705144768e-05, "loss": 0.6260034441947937, "step": 2482 }, { "epoch": 0.6035488575595528, "grad_norm": 1.4098764490284523, "learning_rate": 1.6733810696566228e-05, "loss": 0.6804739236831665, "step": 2483 }, { "epoch": 0.6037919299951385, "grad_norm": 1.425660523778267, "learning_rate": 1.6730838600081257e-05, "loss": 0.5812477469444275, "step": 2484 }, { "epoch": 0.6040350024307244, "grad_norm": 1.2693013401042375, "learning_rate": 1.6727865416170032e-05, "loss": 0.6296895742416382, "step": 2485 }, { "epoch": 0.6042780748663101, "grad_norm": 1.4879451008356448, "learning_rate": 1.6724891145312902e-05, "loss": 0.73710036277771, "step": 2486 }, { "epoch": 0.604521147301896, "grad_norm": 1.2445733153919076, "learning_rate": 1.6721915787990374e-05, "loss": 0.6577466726303101, "step": 2487 }, { "epoch": 0.6047642197374817, "grad_norm": 1.2934982338579628, "learning_rate": 1.6718939344683148e-05, "loss": 0.761938214302063, "step": 2488 }, { "epoch": 0.6050072921730676, "grad_norm": 1.3399336781342055, "learning_rate": 1.6715961815872096e-05, "loss": 0.6723355054855347, "step": 2489 }, { "epoch": 0.6052503646086533, "grad_norm": 1.29159037123674, "learning_rate": 1.671298320203827e-05, "loss": 0.6139325499534607, "step": 2490 }, { "epoch": 0.6054934370442392, "grad_norm": 1.4030107947708097, "learning_rate": 1.671000350366288e-05, "loss": 0.6906956434249878, "step": 2491 }, { "epoch": 0.6057365094798249, "grad_norm": 1.4198769691255146, "learning_rate": 1.670702272122733e-05, "loss": 0.7307583093643188, "step": 2492 }, { "epoch": 0.6059795819154108, "grad_norm": 1.2456120406031321, "learning_rate": 1.6704040855213182e-05, "loss": 0.5492756366729736, "step": 2493 }, { "epoch": 0.6062226543509966, "grad_norm": 1.3655161390692232, "learning_rate": 1.6701057906102197e-05, "loss": 0.5742740035057068, "step": 2494 }, { "epoch": 0.6064657267865824, "grad_norm": 1.3443632163071635, "learning_rate": 1.6698073874376287e-05, "loss": 0.632778525352478, "step": 2495 }, { "epoch": 0.6067087992221682, "grad_norm": 1.326168073436114, "learning_rate": 1.6695088760517554e-05, "loss": 0.8588436841964722, "step": 2496 }, { "epoch": 0.606951871657754, "grad_norm": 1.2536429582361344, "learning_rate": 1.669210256500826e-05, "loss": 0.679466962814331, "step": 2497 }, { "epoch": 0.6071949440933399, "grad_norm": 1.300432038753172, "learning_rate": 1.6689115288330862e-05, "loss": 0.6865832209587097, "step": 2498 }, { "epoch": 0.6074380165289256, "grad_norm": 1.346780775419711, "learning_rate": 1.668612693096798e-05, "loss": 0.7309199571609497, "step": 2499 }, { "epoch": 0.6076810889645115, "grad_norm": 1.133689257677818, "learning_rate": 1.6683137493402405e-05, "loss": 0.6871769428253174, "step": 2500 }, { "epoch": 0.6079241614000972, "grad_norm": 1.1660654092554175, "learning_rate": 1.6680146976117105e-05, "loss": 0.6268882751464844, "step": 2501 }, { "epoch": 0.608167233835683, "grad_norm": 1.1937831384075777, "learning_rate": 1.667715537959523e-05, "loss": 0.7052068710327148, "step": 2502 }, { "epoch": 0.6084103062712688, "grad_norm": 1.1960266569419256, "learning_rate": 1.6674162704320096e-05, "loss": 0.6323356628417969, "step": 2503 }, { "epoch": 0.6086533787068547, "grad_norm": 1.2768880913328895, "learning_rate": 1.6671168950775194e-05, "loss": 0.6918904781341553, "step": 2504 }, { "epoch": 0.6088964511424404, "grad_norm": 1.2071335060361914, "learning_rate": 1.6668174119444195e-05, "loss": 0.82924884557724, "step": 2505 }, { "epoch": 0.6091395235780263, "grad_norm": 1.2322523559950158, "learning_rate": 1.666517821081094e-05, "loss": 0.7100269198417664, "step": 2506 }, { "epoch": 0.6093825960136121, "grad_norm": 1.381118586676163, "learning_rate": 1.666218122535944e-05, "loss": 0.6091960072517395, "step": 2507 }, { "epoch": 0.6096256684491979, "grad_norm": 1.3538290324075137, "learning_rate": 1.665918316357389e-05, "loss": 0.6303055882453918, "step": 2508 }, { "epoch": 0.6098687408847837, "grad_norm": 1.2148420772687152, "learning_rate": 1.6656184025938654e-05, "loss": 0.5716878175735474, "step": 2509 }, { "epoch": 0.6101118133203695, "grad_norm": 1.1775841908246005, "learning_rate": 1.6653183812938264e-05, "loss": 0.583625853061676, "step": 2510 }, { "epoch": 0.6103548857559553, "grad_norm": 1.287649093479535, "learning_rate": 1.665018252505743e-05, "loss": 0.6415659189224243, "step": 2511 }, { "epoch": 0.6105979581915411, "grad_norm": 1.245780585997354, "learning_rate": 1.6647180162781044e-05, "loss": 0.7167422771453857, "step": 2512 }, { "epoch": 0.6108410306271269, "grad_norm": 1.2643869194914492, "learning_rate": 1.6644176726594156e-05, "loss": 0.6072420477867126, "step": 2513 }, { "epoch": 0.6110841030627127, "grad_norm": 1.365029726470401, "learning_rate": 1.6641172216982003e-05, "loss": 0.768306314945221, "step": 2514 }, { "epoch": 0.6113271754982985, "grad_norm": 1.2451381068784526, "learning_rate": 1.663816663442999e-05, "loss": 0.6576324701309204, "step": 2515 }, { "epoch": 0.6115702479338843, "grad_norm": 1.3105508608776606, "learning_rate": 1.6635159979423693e-05, "loss": 0.6602170467376709, "step": 2516 }, { "epoch": 0.6118133203694701, "grad_norm": 1.2395554254669658, "learning_rate": 1.663215225244886e-05, "loss": 0.8035563230514526, "step": 2517 }, { "epoch": 0.6120563928050559, "grad_norm": 1.386399916854431, "learning_rate": 1.6629143453991422e-05, "loss": 0.7121725082397461, "step": 2518 }, { "epoch": 0.6122994652406417, "grad_norm": 1.5673137554073262, "learning_rate": 1.6626133584537474e-05, "loss": 0.7667017579078674, "step": 2519 }, { "epoch": 0.6125425376762275, "grad_norm": 1.21457024612981, "learning_rate": 1.662312264457329e-05, "loss": 0.6521978378295898, "step": 2520 }, { "epoch": 0.6127856101118133, "grad_norm": 1.3223571182003102, "learning_rate": 1.662011063458531e-05, "loss": 0.7283635139465332, "step": 2521 }, { "epoch": 0.6130286825473992, "grad_norm": 1.2092243887459355, "learning_rate": 1.6617097555060154e-05, "loss": 0.711355447769165, "step": 2522 }, { "epoch": 0.6132717549829849, "grad_norm": 1.3310357051073243, "learning_rate": 1.661408340648461e-05, "loss": 0.7144307494163513, "step": 2523 }, { "epoch": 0.6135148274185708, "grad_norm": 1.2390181574867312, "learning_rate": 1.6611068189345637e-05, "loss": 0.636370062828064, "step": 2524 }, { "epoch": 0.6137578998541565, "grad_norm": 1.3456762977118868, "learning_rate": 1.6608051904130375e-05, "loss": 0.609081506729126, "step": 2525 }, { "epoch": 0.6140009722897424, "grad_norm": 1.3305652745929815, "learning_rate": 1.6605034551326126e-05, "loss": 0.5718897581100464, "step": 2526 }, { "epoch": 0.6142440447253281, "grad_norm": 1.1896841873150947, "learning_rate": 1.6602016131420375e-05, "loss": 0.704580545425415, "step": 2527 }, { "epoch": 0.614487117160914, "grad_norm": 1.4169434115431792, "learning_rate": 1.6598996644900776e-05, "loss": 0.624963104724884, "step": 2528 }, { "epoch": 0.6147301895964997, "grad_norm": 1.1935208687155396, "learning_rate": 1.659597609225515e-05, "loss": 0.6413841247558594, "step": 2529 }, { "epoch": 0.6149732620320856, "grad_norm": 1.1956309543819135, "learning_rate": 1.6592954473971488e-05, "loss": 0.7159297466278076, "step": 2530 }, { "epoch": 0.6152163344676713, "grad_norm": 1.2644881165128943, "learning_rate": 1.658993179053797e-05, "loss": 0.609522819519043, "step": 2531 }, { "epoch": 0.6154594069032572, "grad_norm": 1.3178524978665767, "learning_rate": 1.658690804244293e-05, "loss": 0.624367892742157, "step": 2532 }, { "epoch": 0.6157024793388429, "grad_norm": 1.3328428812124493, "learning_rate": 1.6583883230174884e-05, "loss": 0.6993682384490967, "step": 2533 }, { "epoch": 0.6159455517744288, "grad_norm": 1.3807352511051922, "learning_rate": 1.6580857354222516e-05, "loss": 0.6995217800140381, "step": 2534 }, { "epoch": 0.6161886242100146, "grad_norm": 1.4705427799072213, "learning_rate": 1.6577830415074687e-05, "loss": 0.5997974872589111, "step": 2535 }, { "epoch": 0.6164316966456004, "grad_norm": 1.290754393431784, "learning_rate": 1.657480241322042e-05, "loss": 0.7391895055770874, "step": 2536 }, { "epoch": 0.6166747690811862, "grad_norm": 1.3646990950299256, "learning_rate": 1.6571773349148918e-05, "loss": 0.6656053066253662, "step": 2537 }, { "epoch": 0.616917841516772, "grad_norm": 1.2919115013796498, "learning_rate": 1.656874322334955e-05, "loss": 0.7882883548736572, "step": 2538 }, { "epoch": 0.6171609139523578, "grad_norm": 1.574916843380026, "learning_rate": 1.6565712036311865e-05, "loss": 0.7539361119270325, "step": 2539 }, { "epoch": 0.6174039863879436, "grad_norm": 1.2666644908166549, "learning_rate": 1.6562679788525572e-05, "loss": 0.6290011405944824, "step": 2540 }, { "epoch": 0.6176470588235294, "grad_norm": 1.2153323243571752, "learning_rate": 1.6559646480480563e-05, "loss": 0.5995693206787109, "step": 2541 }, { "epoch": 0.6178901312591152, "grad_norm": 1.3524140144074477, "learning_rate": 1.655661211266689e-05, "loss": 0.5336090922355652, "step": 2542 }, { "epoch": 0.618133203694701, "grad_norm": 1.2289745444734188, "learning_rate": 1.6553576685574787e-05, "loss": 0.7809706926345825, "step": 2543 }, { "epoch": 0.6183762761302868, "grad_norm": 1.203945488914083, "learning_rate": 1.655054019969465e-05, "loss": 0.5373551845550537, "step": 2544 }, { "epoch": 0.6186193485658726, "grad_norm": 1.3344142293784682, "learning_rate": 1.6547502655517052e-05, "loss": 0.5607507824897766, "step": 2545 }, { "epoch": 0.6188624210014584, "grad_norm": 1.4484182396780174, "learning_rate": 1.6544464053532735e-05, "loss": 0.6748601198196411, "step": 2546 }, { "epoch": 0.6191054934370442, "grad_norm": 1.3770750393440276, "learning_rate": 1.6541424394232613e-05, "loss": 0.6329579949378967, "step": 2547 }, { "epoch": 0.6193485658726301, "grad_norm": 1.2305271816788537, "learning_rate": 1.6538383678107768e-05, "loss": 0.6895967721939087, "step": 2548 }, { "epoch": 0.6195916383082158, "grad_norm": 1.3003954741868304, "learning_rate": 1.6535341905649453e-05, "loss": 0.7507784962654114, "step": 2549 }, { "epoch": 0.6198347107438017, "grad_norm": 1.304457856831692, "learning_rate": 1.6532299077349095e-05, "loss": 0.5055866241455078, "step": 2550 }, { "epoch": 0.6200777831793874, "grad_norm": 1.2824518158111087, "learning_rate": 1.6529255193698293e-05, "loss": 0.730968713760376, "step": 2551 }, { "epoch": 0.6203208556149733, "grad_norm": 1.4078076867663714, "learning_rate": 1.652621025518881e-05, "loss": 0.6669254302978516, "step": 2552 }, { "epoch": 0.620563928050559, "grad_norm": 1.3802526492805147, "learning_rate": 1.6523164262312578e-05, "loss": 0.6496238708496094, "step": 2553 }, { "epoch": 0.6208070004861449, "grad_norm": 1.5639698274291651, "learning_rate": 1.6520117215561714e-05, "loss": 0.6758877038955688, "step": 2554 }, { "epoch": 0.6210500729217306, "grad_norm": 1.3283073567182053, "learning_rate": 1.6517069115428483e-05, "loss": 0.5957851409912109, "step": 2555 }, { "epoch": 0.6212931453573165, "grad_norm": 1.4629182874184565, "learning_rate": 1.6514019962405347e-05, "loss": 0.600141167640686, "step": 2556 }, { "epoch": 0.6215362177929022, "grad_norm": 1.337531420665078, "learning_rate": 1.651096975698491e-05, "loss": 0.7104241847991943, "step": 2557 }, { "epoch": 0.6217792902284881, "grad_norm": 1.2063075957181069, "learning_rate": 1.6507918499659966e-05, "loss": 0.5577673316001892, "step": 2558 }, { "epoch": 0.6220223626640738, "grad_norm": 1.2101310927282976, "learning_rate": 1.650486619092347e-05, "loss": 0.5836781859397888, "step": 2559 }, { "epoch": 0.6222654350996597, "grad_norm": 1.4440721573007158, "learning_rate": 1.6501812831268557e-05, "loss": 0.7188368439674377, "step": 2560 }, { "epoch": 0.6225085075352454, "grad_norm": 1.2093855882180533, "learning_rate": 1.6498758421188516e-05, "loss": 0.6635130643844604, "step": 2561 }, { "epoch": 0.6227515799708313, "grad_norm": 1.25339402974168, "learning_rate": 1.6495702961176814e-05, "loss": 0.6849652528762817, "step": 2562 }, { "epoch": 0.6229946524064172, "grad_norm": 1.5109026508299717, "learning_rate": 1.649264645172709e-05, "loss": 0.7674351930618286, "step": 2563 }, { "epoch": 0.6232377248420029, "grad_norm": 1.5330024693677646, "learning_rate": 1.648958889333315e-05, "loss": 0.7617800235748291, "step": 2564 }, { "epoch": 0.6234807972775888, "grad_norm": 1.645980710917202, "learning_rate": 1.648653028648897e-05, "loss": 0.8192945718765259, "step": 2565 }, { "epoch": 0.6237238697131745, "grad_norm": 1.3198455439534014, "learning_rate": 1.6483470631688695e-05, "loss": 0.7252936363220215, "step": 2566 }, { "epoch": 0.6239669421487604, "grad_norm": 1.192060979627256, "learning_rate": 1.6480409929426634e-05, "loss": 0.626906156539917, "step": 2567 }, { "epoch": 0.6242100145843461, "grad_norm": 1.2332973370339522, "learning_rate": 1.6477348180197278e-05, "loss": 0.5872519612312317, "step": 2568 }, { "epoch": 0.624453087019932, "grad_norm": 1.341500046264096, "learning_rate": 1.6474285384495273e-05, "loss": 0.6230369806289673, "step": 2569 }, { "epoch": 0.6246961594555177, "grad_norm": 1.4733863195921897, "learning_rate": 1.647122154281545e-05, "loss": 0.6532584428787231, "step": 2570 }, { "epoch": 0.6249392318911036, "grad_norm": 1.122213625540698, "learning_rate": 1.6468156655652787e-05, "loss": 0.6198476552963257, "step": 2571 }, { "epoch": 0.6251823043266893, "grad_norm": 1.3310001844536077, "learning_rate": 1.6465090723502454e-05, "loss": 0.6082730293273926, "step": 2572 }, { "epoch": 0.6254253767622752, "grad_norm": 1.1632423879914209, "learning_rate": 1.6462023746859774e-05, "loss": 0.7357217073440552, "step": 2573 }, { "epoch": 0.6256684491978609, "grad_norm": 1.3252322253807112, "learning_rate": 1.645895572622024e-05, "loss": 0.8265800476074219, "step": 2574 }, { "epoch": 0.6259115216334468, "grad_norm": 1.4078107874555728, "learning_rate": 1.645588666207953e-05, "loss": 0.5130358934402466, "step": 2575 }, { "epoch": 0.6261545940690326, "grad_norm": 1.4701502502853967, "learning_rate": 1.645281655493347e-05, "loss": 0.7204645872116089, "step": 2576 }, { "epoch": 0.6263976665046184, "grad_norm": 1.1658513335985743, "learning_rate": 1.644974540527806e-05, "loss": 0.5995352268218994, "step": 2577 }, { "epoch": 0.6266407389402042, "grad_norm": 1.4207682824424674, "learning_rate": 1.644667321360948e-05, "loss": 0.6859217882156372, "step": 2578 }, { "epoch": 0.62688381137579, "grad_norm": 1.0456769325236235, "learning_rate": 1.6443599980424062e-05, "loss": 0.6071341037750244, "step": 2579 }, { "epoch": 0.6271268838113758, "grad_norm": 1.2685356197632078, "learning_rate": 1.644052570621832e-05, "loss": 0.53121417760849, "step": 2580 }, { "epoch": 0.6273699562469616, "grad_norm": 1.2769000775012258, "learning_rate": 1.6437450391488928e-05, "loss": 0.6226416826248169, "step": 2581 }, { "epoch": 0.6276130286825474, "grad_norm": 1.3901908295781729, "learning_rate": 1.6434374036732728e-05, "loss": 0.8086225986480713, "step": 2582 }, { "epoch": 0.6278561011181332, "grad_norm": 1.1245026550289567, "learning_rate": 1.6431296642446727e-05, "loss": 0.5353596210479736, "step": 2583 }, { "epoch": 0.628099173553719, "grad_norm": 1.3231683256259756, "learning_rate": 1.642821820912812e-05, "loss": 0.6732574701309204, "step": 2584 }, { "epoch": 0.6283422459893048, "grad_norm": 1.3536624714254935, "learning_rate": 1.6425138737274245e-05, "loss": 0.6768819093704224, "step": 2585 }, { "epoch": 0.6285853184248906, "grad_norm": 1.3504151582732407, "learning_rate": 1.642205822738262e-05, "loss": 0.5700419545173645, "step": 2586 }, { "epoch": 0.6288283908604764, "grad_norm": 1.3842254956358686, "learning_rate": 1.6418976679950922e-05, "loss": 0.7797165513038635, "step": 2587 }, { "epoch": 0.6290714632960622, "grad_norm": 1.4324967301347058, "learning_rate": 1.6415894095477015e-05, "loss": 0.5664600133895874, "step": 2588 }, { "epoch": 0.6293145357316481, "grad_norm": 1.1557305331026748, "learning_rate": 1.6412810474458906e-05, "loss": 0.6548989415168762, "step": 2589 }, { "epoch": 0.6295576081672338, "grad_norm": 1.4328825234025702, "learning_rate": 1.640972581739479e-05, "loss": 0.8615638017654419, "step": 2590 }, { "epoch": 0.6298006806028197, "grad_norm": 1.1020312181299954, "learning_rate": 1.6406640124783013e-05, "loss": 0.5733774900436401, "step": 2591 }, { "epoch": 0.6300437530384054, "grad_norm": 1.241018913595588, "learning_rate": 1.6403553397122098e-05, "loss": 0.6862307786941528, "step": 2592 }, { "epoch": 0.6302868254739913, "grad_norm": 1.6512385479056035, "learning_rate": 1.6400465634910738e-05, "loss": 0.7478636503219604, "step": 2593 }, { "epoch": 0.630529897909577, "grad_norm": 1.4526609800576065, "learning_rate": 1.6397376838647786e-05, "loss": 0.716595470905304, "step": 2594 }, { "epoch": 0.6307729703451629, "grad_norm": 1.258032602487574, "learning_rate": 1.6394287008832254e-05, "loss": 0.5770237445831299, "step": 2595 }, { "epoch": 0.6310160427807486, "grad_norm": 1.41734860076915, "learning_rate": 1.639119614596335e-05, "loss": 0.7221179604530334, "step": 2596 }, { "epoch": 0.6312591152163345, "grad_norm": 1.296158466569748, "learning_rate": 1.6388104250540414e-05, "loss": 0.8446171283721924, "step": 2597 }, { "epoch": 0.6315021876519202, "grad_norm": 1.2230735360542122, "learning_rate": 1.6385011323062977e-05, "loss": 0.5567861795425415, "step": 2598 }, { "epoch": 0.6317452600875061, "grad_norm": 1.3914265716719556, "learning_rate": 1.6381917364030725e-05, "loss": 0.6282297968864441, "step": 2599 }, { "epoch": 0.6319883325230918, "grad_norm": 1.3214766057266571, "learning_rate": 1.6378822373943518e-05, "loss": 0.5505380630493164, "step": 2600 }, { "epoch": 0.6322314049586777, "grad_norm": 1.3055756509140635, "learning_rate": 1.6375726353301377e-05, "loss": 0.7625084519386292, "step": 2601 }, { "epoch": 0.6324744773942635, "grad_norm": 1.1506969267196634, "learning_rate": 1.6372629302604485e-05, "loss": 0.741588830947876, "step": 2602 }, { "epoch": 0.6327175498298493, "grad_norm": 1.219808817927046, "learning_rate": 1.636953122235321e-05, "loss": 0.6309491395950317, "step": 2603 }, { "epoch": 0.6329606222654351, "grad_norm": 1.7956317967299122, "learning_rate": 1.636643211304807e-05, "loss": 0.8259873390197754, "step": 2604 }, { "epoch": 0.6332036947010209, "grad_norm": 1.2667128251433397, "learning_rate": 1.6363331975189748e-05, "loss": 0.6627964973449707, "step": 2605 }, { "epoch": 0.6334467671366067, "grad_norm": 1.148583982347593, "learning_rate": 1.63602308092791e-05, "loss": 0.6046761274337769, "step": 2606 }, { "epoch": 0.6336898395721925, "grad_norm": 1.326024185725544, "learning_rate": 1.6357128615817154e-05, "loss": 0.7416674494743347, "step": 2607 }, { "epoch": 0.6339329120077783, "grad_norm": 1.2674217860357089, "learning_rate": 1.6354025395305087e-05, "loss": 0.6830090284347534, "step": 2608 }, { "epoch": 0.6341759844433641, "grad_norm": 1.3423106861204932, "learning_rate": 1.635092114824426e-05, "loss": 0.6872683763504028, "step": 2609 }, { "epoch": 0.63441905687895, "grad_norm": 1.280993421453984, "learning_rate": 1.6347815875136186e-05, "loss": 0.6234767436981201, "step": 2610 }, { "epoch": 0.6346621293145357, "grad_norm": 1.2152397625754727, "learning_rate": 1.6344709576482554e-05, "loss": 0.8066385984420776, "step": 2611 }, { "epoch": 0.6349052017501216, "grad_norm": 1.4187418201717572, "learning_rate": 1.6341602252785204e-05, "loss": 0.6729259490966797, "step": 2612 }, { "epoch": 0.6351482741857073, "grad_norm": 1.2164252498054096, "learning_rate": 1.6338493904546163e-05, "loss": 0.6812874674797058, "step": 2613 }, { "epoch": 0.6353913466212932, "grad_norm": 1.2689345862997694, "learning_rate": 1.633538453226761e-05, "loss": 0.7941622734069824, "step": 2614 }, { "epoch": 0.6356344190568789, "grad_norm": 1.3342037628061196, "learning_rate": 1.6332274136451883e-05, "loss": 0.655566930770874, "step": 2615 }, { "epoch": 0.6358774914924648, "grad_norm": 1.3649772826508837, "learning_rate": 1.6329162717601497e-05, "loss": 0.6403402090072632, "step": 2616 }, { "epoch": 0.6361205639280506, "grad_norm": 1.3999695005673505, "learning_rate": 1.632605027621914e-05, "loss": 0.5533091425895691, "step": 2617 }, { "epoch": 0.6363636363636364, "grad_norm": 1.2480368554194743, "learning_rate": 1.632293681280764e-05, "loss": 0.8287243843078613, "step": 2618 }, { "epoch": 0.6366067087992222, "grad_norm": 1.3356875404504114, "learning_rate": 1.6319822327870014e-05, "loss": 0.6050065755844116, "step": 2619 }, { "epoch": 0.636849781234808, "grad_norm": 1.1877536239617685, "learning_rate": 1.631670682190943e-05, "loss": 0.5953094363212585, "step": 2620 }, { "epoch": 0.6370928536703938, "grad_norm": 1.3356227391748332, "learning_rate": 1.6313590295429224e-05, "loss": 0.672749400138855, "step": 2621 }, { "epoch": 0.6373359261059796, "grad_norm": 1.2916324271470516, "learning_rate": 1.6310472748932904e-05, "loss": 0.5948749780654907, "step": 2622 }, { "epoch": 0.6375789985415654, "grad_norm": 1.0747360462538038, "learning_rate": 1.6307354182924134e-05, "loss": 0.7997865676879883, "step": 2623 }, { "epoch": 0.6378220709771512, "grad_norm": 1.3365711339566446, "learning_rate": 1.630423459790674e-05, "loss": 0.6860148906707764, "step": 2624 }, { "epoch": 0.638065143412737, "grad_norm": 1.4286231019025952, "learning_rate": 1.630111399438473e-05, "loss": 0.5185575485229492, "step": 2625 }, { "epoch": 0.6383082158483228, "grad_norm": 1.3317149376033526, "learning_rate": 1.6297992372862257e-05, "loss": 0.6968897581100464, "step": 2626 }, { "epoch": 0.6385512882839086, "grad_norm": 1.2001530937818334, "learning_rate": 1.6294869733843647e-05, "loss": 0.6154395937919617, "step": 2627 }, { "epoch": 0.6387943607194944, "grad_norm": 1.1474278073471813, "learning_rate": 1.629174607783339e-05, "loss": 0.6545411348342896, "step": 2628 }, { "epoch": 0.6390374331550802, "grad_norm": 1.469402608561212, "learning_rate": 1.6288621405336144e-05, "loss": 0.7907732725143433, "step": 2629 }, { "epoch": 0.6392805055906661, "grad_norm": 1.1326543949341186, "learning_rate": 1.6285495716856728e-05, "loss": 0.5877563953399658, "step": 2630 }, { "epoch": 0.6395235780262518, "grad_norm": 1.6905501263815086, "learning_rate": 1.6282369012900116e-05, "loss": 0.6920328140258789, "step": 2631 }, { "epoch": 0.6397666504618377, "grad_norm": 1.4106841233381373, "learning_rate": 1.627924129397146e-05, "loss": 0.6961320042610168, "step": 2632 }, { "epoch": 0.6400097228974234, "grad_norm": 1.317818853980644, "learning_rate": 1.6276112560576072e-05, "loss": 0.7815544009208679, "step": 2633 }, { "epoch": 0.6402527953330093, "grad_norm": 1.279788618830764, "learning_rate": 1.6272982813219425e-05, "loss": 0.5944516658782959, "step": 2634 }, { "epoch": 0.640495867768595, "grad_norm": 1.1125911935700998, "learning_rate": 1.6269852052407155e-05, "loss": 0.4749506413936615, "step": 2635 }, { "epoch": 0.6407389402041809, "grad_norm": 1.20734040333701, "learning_rate": 1.6266720278645067e-05, "loss": 0.508438766002655, "step": 2636 }, { "epoch": 0.6409820126397666, "grad_norm": 1.226760439614529, "learning_rate": 1.6263587492439127e-05, "loss": 0.5623095035552979, "step": 2637 }, { "epoch": 0.6412250850753525, "grad_norm": 1.3194433886171977, "learning_rate": 1.626045369429546e-05, "loss": 0.5787800550460815, "step": 2638 }, { "epoch": 0.6414681575109382, "grad_norm": 1.4369697493581077, "learning_rate": 1.6257318884720364e-05, "loss": 0.6681352853775024, "step": 2639 }, { "epoch": 0.6417112299465241, "grad_norm": 1.047677410094824, "learning_rate": 1.6254183064220294e-05, "loss": 0.5556990504264832, "step": 2640 }, { "epoch": 0.6419543023821098, "grad_norm": 1.2243904815799196, "learning_rate": 1.6251046233301866e-05, "loss": 0.5705106854438782, "step": 2641 }, { "epoch": 0.6421973748176957, "grad_norm": 2.018009543691601, "learning_rate": 1.6247908392471865e-05, "loss": 0.8600321412086487, "step": 2642 }, { "epoch": 0.6424404472532815, "grad_norm": 1.2588396783786915, "learning_rate": 1.6244769542237238e-05, "loss": 0.6855157613754272, "step": 2643 }, { "epoch": 0.6426835196888673, "grad_norm": 1.2435305452428398, "learning_rate": 1.624162968310509e-05, "loss": 0.4418838620185852, "step": 2644 }, { "epoch": 0.6429265921244531, "grad_norm": 1.360698567212452, "learning_rate": 1.6238488815582693e-05, "loss": 0.6724461317062378, "step": 2645 }, { "epoch": 0.6431696645600389, "grad_norm": 1.2761555189218767, "learning_rate": 1.623534694017749e-05, "loss": 0.5923624038696289, "step": 2646 }, { "epoch": 0.6434127369956247, "grad_norm": 1.3248679054184826, "learning_rate": 1.623220405739707e-05, "loss": 0.5763329267501831, "step": 2647 }, { "epoch": 0.6436558094312105, "grad_norm": 1.2660305006471837, "learning_rate": 1.6229060167749197e-05, "loss": 0.6984878778457642, "step": 2648 }, { "epoch": 0.6438988818667963, "grad_norm": 1.3025668944352427, "learning_rate": 1.622591527174179e-05, "loss": 0.5550881624221802, "step": 2649 }, { "epoch": 0.6441419543023821, "grad_norm": 1.2342756508740125, "learning_rate": 1.6222769369882942e-05, "loss": 0.5228486061096191, "step": 2650 }, { "epoch": 0.6443850267379679, "grad_norm": 1.3363595869323919, "learning_rate": 1.6219622462680895e-05, "loss": 0.5680413842201233, "step": 2651 }, { "epoch": 0.6446280991735537, "grad_norm": 1.2113039315106289, "learning_rate": 1.6216474550644063e-05, "loss": 0.6077527403831482, "step": 2652 }, { "epoch": 0.6448711716091395, "grad_norm": 1.2262235231050276, "learning_rate": 1.6213325634281017e-05, "loss": 0.6571810245513916, "step": 2653 }, { "epoch": 0.6451142440447253, "grad_norm": 1.4652537914343826, "learning_rate": 1.6210175714100492e-05, "loss": 0.6226509213447571, "step": 2654 }, { "epoch": 0.6453573164803111, "grad_norm": 1.5163249779392738, "learning_rate": 1.6207024790611386e-05, "loss": 0.6190806031227112, "step": 2655 }, { "epoch": 0.6456003889158969, "grad_norm": 1.3790717197754225, "learning_rate": 1.620387286432276e-05, "loss": 0.6359691619873047, "step": 2656 }, { "epoch": 0.6458434613514827, "grad_norm": 1.236113269256931, "learning_rate": 1.620071993574383e-05, "loss": 0.6212655305862427, "step": 2657 }, { "epoch": 0.6460865337870686, "grad_norm": 1.4653492500531877, "learning_rate": 1.6197566005383988e-05, "loss": 0.781832218170166, "step": 2658 }, { "epoch": 0.6463296062226543, "grad_norm": 1.5658044973218916, "learning_rate": 1.6194411073752774e-05, "loss": 0.6414213180541992, "step": 2659 }, { "epoch": 0.6465726786582402, "grad_norm": 1.4640900223811613, "learning_rate": 1.6191255141359894e-05, "loss": 0.6955232620239258, "step": 2660 }, { "epoch": 0.6468157510938259, "grad_norm": 1.59743637780091, "learning_rate": 1.6188098208715216e-05, "loss": 0.6377576589584351, "step": 2661 }, { "epoch": 0.6470588235294118, "grad_norm": 1.2024289783175668, "learning_rate": 1.6184940276328776e-05, "loss": 0.6782079935073853, "step": 2662 }, { "epoch": 0.6473018959649975, "grad_norm": 1.297322679793175, "learning_rate": 1.618178134471076e-05, "loss": 0.5734342336654663, "step": 2663 }, { "epoch": 0.6475449684005834, "grad_norm": 1.162793759098744, "learning_rate": 1.6178621414371525e-05, "loss": 0.6951073408126831, "step": 2664 }, { "epoch": 0.6477880408361691, "grad_norm": 1.162482338421661, "learning_rate": 1.6175460485821583e-05, "loss": 0.5631749629974365, "step": 2665 }, { "epoch": 0.648031113271755, "grad_norm": 1.4631239185011764, "learning_rate": 1.6172298559571614e-05, "loss": 0.7621433734893799, "step": 2666 }, { "epoch": 0.6482741857073407, "grad_norm": 1.3986712533321084, "learning_rate": 1.616913563613245e-05, "loss": 0.5985628366470337, "step": 2667 }, { "epoch": 0.6485172581429266, "grad_norm": 1.3121207558158297, "learning_rate": 1.616597171601509e-05, "loss": 0.6598663330078125, "step": 2668 }, { "epoch": 0.6487603305785123, "grad_norm": 1.423335049039213, "learning_rate": 1.6162806799730694e-05, "loss": 0.7193120718002319, "step": 2669 }, { "epoch": 0.6490034030140982, "grad_norm": 1.2739748637226767, "learning_rate": 1.6159640887790583e-05, "loss": 0.7843843698501587, "step": 2670 }, { "epoch": 0.6492464754496841, "grad_norm": 1.2956854731578311, "learning_rate": 1.615647398070624e-05, "loss": 0.596282958984375, "step": 2671 }, { "epoch": 0.6494895478852698, "grad_norm": 1.2053509462565224, "learning_rate": 1.6153306078989298e-05, "loss": 0.6690568923950195, "step": 2672 }, { "epoch": 0.6497326203208557, "grad_norm": 1.5301458125704326, "learning_rate": 1.6150137183151567e-05, "loss": 0.5992845296859741, "step": 2673 }, { "epoch": 0.6499756927564414, "grad_norm": 1.3711439470530282, "learning_rate": 1.6146967293705015e-05, "loss": 0.7604213953018188, "step": 2674 }, { "epoch": 0.6502187651920273, "grad_norm": 1.3137280862309384, "learning_rate": 1.6143796411161755e-05, "loss": 0.5856381058692932, "step": 2675 }, { "epoch": 0.650461837627613, "grad_norm": 1.2185693111964, "learning_rate": 1.6140624536034072e-05, "loss": 0.6126827597618103, "step": 2676 }, { "epoch": 0.6507049100631989, "grad_norm": 1.389493513691983, "learning_rate": 1.6137451668834415e-05, "loss": 0.5527020692825317, "step": 2677 }, { "epoch": 0.6509479824987846, "grad_norm": 1.4437702768840426, "learning_rate": 1.6134277810075388e-05, "loss": 0.6097639799118042, "step": 2678 }, { "epoch": 0.6511910549343705, "grad_norm": 1.3644064022950164, "learning_rate": 1.6131102960269757e-05, "loss": 0.7363405227661133, "step": 2679 }, { "epoch": 0.6514341273699562, "grad_norm": 1.5638329021535093, "learning_rate": 1.6127927119930442e-05, "loss": 0.6238608956336975, "step": 2680 }, { "epoch": 0.6516771998055421, "grad_norm": 1.5035133941363883, "learning_rate": 1.612475028957053e-05, "loss": 0.6854282021522522, "step": 2681 }, { "epoch": 0.6519202722411278, "grad_norm": 1.2755034963842844, "learning_rate": 1.6121572469703268e-05, "loss": 0.7653785943984985, "step": 2682 }, { "epoch": 0.6521633446767137, "grad_norm": 1.2782261853394927, "learning_rate": 1.6118393660842063e-05, "loss": 0.6029286980628967, "step": 2683 }, { "epoch": 0.6524064171122995, "grad_norm": 1.3212527437125872, "learning_rate": 1.611521386350047e-05, "loss": 0.5652360916137695, "step": 2684 }, { "epoch": 0.6526494895478853, "grad_norm": 1.242730023091643, "learning_rate": 1.6112033078192225e-05, "loss": 0.6278326511383057, "step": 2685 }, { "epoch": 0.6528925619834711, "grad_norm": 1.29293826084712, "learning_rate": 1.61088513054312e-05, "loss": 0.6079814434051514, "step": 2686 }, { "epoch": 0.6531356344190569, "grad_norm": 1.191925485565603, "learning_rate": 1.610566854573145e-05, "loss": 0.6056979894638062, "step": 2687 }, { "epoch": 0.6533787068546427, "grad_norm": 1.4363483735044684, "learning_rate": 1.6102484799607173e-05, "loss": 0.6018837094306946, "step": 2688 }, { "epoch": 0.6536217792902285, "grad_norm": 1.6476622007966941, "learning_rate": 1.6099300067572728e-05, "loss": 0.7431480884552002, "step": 2689 }, { "epoch": 0.6538648517258143, "grad_norm": 1.5825130244671148, "learning_rate": 1.609611435014264e-05, "loss": 0.6512801647186279, "step": 2690 }, { "epoch": 0.6541079241614001, "grad_norm": 1.5240668170159082, "learning_rate": 1.6092927647831588e-05, "loss": 0.8494164943695068, "step": 2691 }, { "epoch": 0.6543509965969859, "grad_norm": 1.2908706175456974, "learning_rate": 1.608973996115441e-05, "loss": 0.7648401260375977, "step": 2692 }, { "epoch": 0.6545940690325717, "grad_norm": 1.336380021176423, "learning_rate": 1.6086551290626117e-05, "loss": 0.7002145648002625, "step": 2693 }, { "epoch": 0.6548371414681575, "grad_norm": 1.2285475515916646, "learning_rate": 1.608336163676185e-05, "loss": 0.6200157403945923, "step": 2694 }, { "epoch": 0.6550802139037433, "grad_norm": 1.3195699614870005, "learning_rate": 1.608017100007693e-05, "loss": 0.5660499334335327, "step": 2695 }, { "epoch": 0.6553232863393291, "grad_norm": 1.360982787029036, "learning_rate": 1.6076979381086843e-05, "loss": 0.6781564950942993, "step": 2696 }, { "epoch": 0.6555663587749149, "grad_norm": 1.3234252228040007, "learning_rate": 1.6073786780307212e-05, "loss": 0.5358541011810303, "step": 2697 }, { "epoch": 0.6558094312105007, "grad_norm": 1.3439137683765618, "learning_rate": 1.6070593198253834e-05, "loss": 0.6485497951507568, "step": 2698 }, { "epoch": 0.6560525036460866, "grad_norm": 1.34470583194786, "learning_rate": 1.6067398635442663e-05, "loss": 0.6237227916717529, "step": 2699 }, { "epoch": 0.6562955760816723, "grad_norm": 1.4158446850894424, "learning_rate": 1.60642030923898e-05, "loss": 0.6143203377723694, "step": 2700 }, { "epoch": 0.6565386485172582, "grad_norm": 1.3580419995810846, "learning_rate": 1.6061006569611524e-05, "loss": 0.5362731218338013, "step": 2701 }, { "epoch": 0.6567817209528439, "grad_norm": 1.407434249941418, "learning_rate": 1.605780906762425e-05, "loss": 0.6732631325721741, "step": 2702 }, { "epoch": 0.6570247933884298, "grad_norm": 1.5147884089327786, "learning_rate": 1.6054610586944574e-05, "loss": 0.8627110123634338, "step": 2703 }, { "epoch": 0.6572678658240155, "grad_norm": 1.2850637628918191, "learning_rate": 1.605141112808923e-05, "loss": 0.7733258008956909, "step": 2704 }, { "epoch": 0.6575109382596014, "grad_norm": 1.2414249215546602, "learning_rate": 1.6048210691575124e-05, "loss": 0.7832586765289307, "step": 2705 }, { "epoch": 0.6577540106951871, "grad_norm": 1.2007757984610528, "learning_rate": 1.6045009277919314e-05, "loss": 0.5585370063781738, "step": 2706 }, { "epoch": 0.657997083130773, "grad_norm": 1.3711724766478561, "learning_rate": 1.6041806887639015e-05, "loss": 0.6572721600532532, "step": 2707 }, { "epoch": 0.6582401555663587, "grad_norm": 1.3988302661359784, "learning_rate": 1.60386035212516e-05, "loss": 0.7289643883705139, "step": 2708 }, { "epoch": 0.6584832280019446, "grad_norm": 1.241403232698244, "learning_rate": 1.6035399179274604e-05, "loss": 0.7216721773147583, "step": 2709 }, { "epoch": 0.6587263004375303, "grad_norm": 1.4624701063817522, "learning_rate": 1.6032193862225716e-05, "loss": 0.6032732725143433, "step": 2710 }, { "epoch": 0.6589693728731162, "grad_norm": 1.126808912591003, "learning_rate": 1.6028987570622782e-05, "loss": 0.7425667643547058, "step": 2711 }, { "epoch": 0.659212445308702, "grad_norm": 1.360950009332943, "learning_rate": 1.6025780304983808e-05, "loss": 0.821877658367157, "step": 2712 }, { "epoch": 0.6594555177442878, "grad_norm": 1.1936980472812748, "learning_rate": 1.6022572065826957e-05, "loss": 0.6774115562438965, "step": 2713 }, { "epoch": 0.6596985901798736, "grad_norm": 1.1529457563280743, "learning_rate": 1.6019362853670545e-05, "loss": 0.5909900665283203, "step": 2714 }, { "epoch": 0.6599416626154594, "grad_norm": 1.2700488297315664, "learning_rate": 1.6016152669033046e-05, "loss": 0.6470006704330444, "step": 2715 }, { "epoch": 0.6601847350510452, "grad_norm": 1.2805725255142948, "learning_rate": 1.6012941512433103e-05, "loss": 0.6401129961013794, "step": 2716 }, { "epoch": 0.660427807486631, "grad_norm": 1.291586236559284, "learning_rate": 1.60097293843895e-05, "loss": 0.7397843599319458, "step": 2717 }, { "epoch": 0.6606708799222168, "grad_norm": 1.1695408301311447, "learning_rate": 1.6006516285421186e-05, "loss": 0.7222949266433716, "step": 2718 }, { "epoch": 0.6609139523578026, "grad_norm": 1.1987907473997352, "learning_rate": 1.6003302216047265e-05, "loss": 0.6309598088264465, "step": 2719 }, { "epoch": 0.6611570247933884, "grad_norm": 1.3012901115740882, "learning_rate": 1.6000087176787e-05, "loss": 0.7407472133636475, "step": 2720 }, { "epoch": 0.6614000972289742, "grad_norm": 1.5146138929134794, "learning_rate": 1.5996871168159808e-05, "loss": 0.6692293286323547, "step": 2721 }, { "epoch": 0.66164316966456, "grad_norm": 1.1931570235191515, "learning_rate": 1.599365419068526e-05, "loss": 0.7014262676239014, "step": 2722 }, { "epoch": 0.6618862421001458, "grad_norm": 1.2892708656942387, "learning_rate": 1.599043624488309e-05, "loss": 0.5931549072265625, "step": 2723 }, { "epoch": 0.6621293145357317, "grad_norm": 1.3660005039124754, "learning_rate": 1.5987217331273188e-05, "loss": 0.5980316400527954, "step": 2724 }, { "epoch": 0.6623723869713175, "grad_norm": 1.5402603376127288, "learning_rate": 1.5983997450375594e-05, "loss": 0.764158308506012, "step": 2725 }, { "epoch": 0.6626154594069033, "grad_norm": 1.092433159060527, "learning_rate": 1.5980776602710514e-05, "loss": 0.5219804048538208, "step": 2726 }, { "epoch": 0.6628585318424891, "grad_norm": 1.2637037118681127, "learning_rate": 1.5977554788798292e-05, "loss": 0.6344265341758728, "step": 2727 }, { "epoch": 0.6631016042780749, "grad_norm": 1.2211190132833787, "learning_rate": 1.5974332009159453e-05, "loss": 0.6474542617797852, "step": 2728 }, { "epoch": 0.6633446767136607, "grad_norm": 1.153865946120938, "learning_rate": 1.597110826431466e-05, "loss": 0.6193657517433167, "step": 2729 }, { "epoch": 0.6635877491492465, "grad_norm": 1.2220586353064702, "learning_rate": 1.596788355478474e-05, "loss": 0.6420034170150757, "step": 2730 }, { "epoch": 0.6638308215848323, "grad_norm": 1.1510617785306705, "learning_rate": 1.5964657881090667e-05, "loss": 0.6318395733833313, "step": 2731 }, { "epoch": 0.664073894020418, "grad_norm": 1.629803549226739, "learning_rate": 1.5961431243753586e-05, "loss": 0.6377270221710205, "step": 2732 }, { "epoch": 0.6643169664560039, "grad_norm": 1.3382574956009385, "learning_rate": 1.595820364329478e-05, "loss": 0.5677398443222046, "step": 2733 }, { "epoch": 0.6645600388915897, "grad_norm": 1.3030436731793356, "learning_rate": 1.5954975080235705e-05, "loss": 0.7283809185028076, "step": 2734 }, { "epoch": 0.6648031113271755, "grad_norm": 1.0779061909564167, "learning_rate": 1.5951745555097955e-05, "loss": 0.7681365013122559, "step": 2735 }, { "epoch": 0.6650461837627613, "grad_norm": 1.1781139108375418, "learning_rate": 1.5948515068403293e-05, "loss": 0.641319751739502, "step": 2736 }, { "epoch": 0.6652892561983471, "grad_norm": 1.2123917357949194, "learning_rate": 1.5945283620673633e-05, "loss": 0.6296244859695435, "step": 2737 }, { "epoch": 0.6655323286339329, "grad_norm": 1.2441063943022288, "learning_rate": 1.5942051212431042e-05, "loss": 0.7407407760620117, "step": 2738 }, { "epoch": 0.6657754010695187, "grad_norm": 1.3266756968686628, "learning_rate": 1.5938817844197742e-05, "loss": 0.5009981989860535, "step": 2739 }, { "epoch": 0.6660184735051046, "grad_norm": 1.6557788460236422, "learning_rate": 1.593558351649612e-05, "loss": 0.6600494384765625, "step": 2740 }, { "epoch": 0.6662615459406903, "grad_norm": 1.1710320263846636, "learning_rate": 1.5932348229848704e-05, "loss": 0.5372241735458374, "step": 2741 }, { "epoch": 0.6665046183762762, "grad_norm": 1.154480003612171, "learning_rate": 1.5929111984778184e-05, "loss": 0.6384588479995728, "step": 2742 }, { "epoch": 0.6667476908118619, "grad_norm": 1.1264978888984114, "learning_rate": 1.59258747818074e-05, "loss": 0.5653824210166931, "step": 2743 }, { "epoch": 0.6669907632474478, "grad_norm": 1.2590061347628447, "learning_rate": 1.5922636621459357e-05, "loss": 0.6504442691802979, "step": 2744 }, { "epoch": 0.6672338356830335, "grad_norm": 1.3518611289147828, "learning_rate": 1.5919397504257203e-05, "loss": 0.5894525051116943, "step": 2745 }, { "epoch": 0.6674769081186194, "grad_norm": 1.3447734019482953, "learning_rate": 1.5916157430724254e-05, "loss": 0.7705122828483582, "step": 2746 }, { "epoch": 0.6677199805542051, "grad_norm": 1.4903927648903221, "learning_rate": 1.5912916401383962e-05, "loss": 0.8570685386657715, "step": 2747 }, { "epoch": 0.667963052989791, "grad_norm": 1.3773034501528392, "learning_rate": 1.5909674416759956e-05, "loss": 0.6534131765365601, "step": 2748 }, { "epoch": 0.6682061254253767, "grad_norm": 1.3243465459292945, "learning_rate": 1.5906431477375994e-05, "loss": 0.5664116740226746, "step": 2749 }, { "epoch": 0.6684491978609626, "grad_norm": 1.2941028590103043, "learning_rate": 1.590318758375601e-05, "loss": 0.5451105237007141, "step": 2750 }, { "epoch": 0.6686922702965483, "grad_norm": 1.4431076994928815, "learning_rate": 1.589994273642408e-05, "loss": 0.6546218395233154, "step": 2751 }, { "epoch": 0.6689353427321342, "grad_norm": 1.295140586554525, "learning_rate": 1.5896696935904442e-05, "loss": 0.7902648448944092, "step": 2752 }, { "epoch": 0.66917841516772, "grad_norm": 1.3457915870125283, "learning_rate": 1.5893450182721476e-05, "loss": 0.748934268951416, "step": 2753 }, { "epoch": 0.6694214876033058, "grad_norm": 1.25068367243146, "learning_rate": 1.589020247739973e-05, "loss": 0.5538425445556641, "step": 2754 }, { "epoch": 0.6696645600388916, "grad_norm": 1.3524775064786758, "learning_rate": 1.5886953820463896e-05, "loss": 0.69386887550354, "step": 2755 }, { "epoch": 0.6699076324744774, "grad_norm": 1.3252270551242453, "learning_rate": 1.5883704212438827e-05, "loss": 0.6915777325630188, "step": 2756 }, { "epoch": 0.6701507049100632, "grad_norm": 1.379754301511843, "learning_rate": 1.5880453653849523e-05, "loss": 0.6298439502716064, "step": 2757 }, { "epoch": 0.670393777345649, "grad_norm": 1.3247377033842482, "learning_rate": 1.5877202145221146e-05, "loss": 0.6546323299407959, "step": 2758 }, { "epoch": 0.6706368497812348, "grad_norm": 1.3774797225796434, "learning_rate": 1.5873949687078993e-05, "loss": 0.7776068449020386, "step": 2759 }, { "epoch": 0.6708799222168206, "grad_norm": 1.4490108360810265, "learning_rate": 1.5870696279948536e-05, "loss": 0.6285706758499146, "step": 2760 }, { "epoch": 0.6711229946524064, "grad_norm": 1.488729087292918, "learning_rate": 1.5867441924355392e-05, "loss": 0.6652545928955078, "step": 2761 }, { "epoch": 0.6713660670879922, "grad_norm": 1.3743259484407992, "learning_rate": 1.586418662082533e-05, "loss": 0.9326362013816833, "step": 2762 }, { "epoch": 0.671609139523578, "grad_norm": 1.364322894541048, "learning_rate": 1.586093036988427e-05, "loss": 0.6187559962272644, "step": 2763 }, { "epoch": 0.6718522119591638, "grad_norm": 1.1131089892546329, "learning_rate": 1.5857673172058292e-05, "loss": 0.5604510307312012, "step": 2764 }, { "epoch": 0.6720952843947496, "grad_norm": 1.2669247551631013, "learning_rate": 1.5854415027873624e-05, "loss": 0.6485719680786133, "step": 2765 }, { "epoch": 0.6723383568303355, "grad_norm": 1.4755867967092833, "learning_rate": 1.5851155937856646e-05, "loss": 0.7271683216094971, "step": 2766 }, { "epoch": 0.6725814292659212, "grad_norm": 1.2850980555372062, "learning_rate": 1.5847895902533894e-05, "loss": 0.6134635210037231, "step": 2767 }, { "epoch": 0.6728245017015071, "grad_norm": 1.1304332945537723, "learning_rate": 1.5844634922432053e-05, "loss": 0.6789560317993164, "step": 2768 }, { "epoch": 0.6730675741370928, "grad_norm": 1.391372624081287, "learning_rate": 1.584137299807797e-05, "loss": 0.5880626440048218, "step": 2769 }, { "epoch": 0.6733106465726787, "grad_norm": 1.3286704670904395, "learning_rate": 1.583811012999863e-05, "loss": 0.7310691475868225, "step": 2770 }, { "epoch": 0.6735537190082644, "grad_norm": 1.287847596826846, "learning_rate": 1.5834846318721183e-05, "loss": 0.7019942998886108, "step": 2771 }, { "epoch": 0.6737967914438503, "grad_norm": 1.3840717903440756, "learning_rate": 1.5831581564772923e-05, "loss": 0.6219842433929443, "step": 2772 }, { "epoch": 0.674039863879436, "grad_norm": 1.7512841949323041, "learning_rate": 1.58283158686813e-05, "loss": 0.6133899092674255, "step": 2773 }, { "epoch": 0.6742829363150219, "grad_norm": 1.2646371362243285, "learning_rate": 1.582504923097392e-05, "loss": 0.79256671667099, "step": 2774 }, { "epoch": 0.6745260087506076, "grad_norm": 1.1337889303934143, "learning_rate": 1.5821781652178533e-05, "loss": 0.49680274724960327, "step": 2775 }, { "epoch": 0.6747690811861935, "grad_norm": 1.5188766595198198, "learning_rate": 1.5818513132823043e-05, "loss": 0.8870993852615356, "step": 2776 }, { "epoch": 0.6750121536217792, "grad_norm": 1.2990778617983858, "learning_rate": 1.5815243673435515e-05, "loss": 0.6165190935134888, "step": 2777 }, { "epoch": 0.6752552260573651, "grad_norm": 1.2424257944493617, "learning_rate": 1.5811973274544157e-05, "loss": 0.7019470930099487, "step": 2778 }, { "epoch": 0.675498298492951, "grad_norm": 1.2245432398162626, "learning_rate": 1.5808701936677327e-05, "loss": 0.6771552562713623, "step": 2779 }, { "epoch": 0.6757413709285367, "grad_norm": 1.0623744708815686, "learning_rate": 1.580542966036354e-05, "loss": 0.6636519432067871, "step": 2780 }, { "epoch": 0.6759844433641226, "grad_norm": 1.4182337504033091, "learning_rate": 1.5802156446131463e-05, "loss": 0.7032849192619324, "step": 2781 }, { "epoch": 0.6762275157997083, "grad_norm": 1.2483528588726556, "learning_rate": 1.5798882294509913e-05, "loss": 0.639146089553833, "step": 2782 }, { "epoch": 0.6764705882352942, "grad_norm": 1.0860922842047809, "learning_rate": 1.5795607206027853e-05, "loss": 0.6750848293304443, "step": 2783 }, { "epoch": 0.6767136606708799, "grad_norm": 1.2703475341487425, "learning_rate": 1.5792331181214408e-05, "loss": 0.8205346465110779, "step": 2784 }, { "epoch": 0.6769567331064658, "grad_norm": 1.3614842615717948, "learning_rate": 1.5789054220598844e-05, "loss": 0.834734320640564, "step": 2785 }, { "epoch": 0.6771998055420515, "grad_norm": 1.6869832374206013, "learning_rate": 1.5785776324710587e-05, "loss": 0.5769445300102234, "step": 2786 }, { "epoch": 0.6774428779776374, "grad_norm": 1.2784092071216482, "learning_rate": 1.578249749407921e-05, "loss": 0.6695569753646851, "step": 2787 }, { "epoch": 0.6776859504132231, "grad_norm": 1.457668120872005, "learning_rate": 1.577921772923443e-05, "loss": 0.6813271641731262, "step": 2788 }, { "epoch": 0.677929022848809, "grad_norm": 1.2176208669028434, "learning_rate": 1.577593703070613e-05, "loss": 0.6006495952606201, "step": 2789 }, { "epoch": 0.6781720952843947, "grad_norm": 1.1898683492128617, "learning_rate": 1.5772655399024332e-05, "loss": 0.49420633912086487, "step": 2790 }, { "epoch": 0.6784151677199806, "grad_norm": 1.4172495415646207, "learning_rate": 1.5769372834719218e-05, "loss": 0.5808334350585938, "step": 2791 }, { "epoch": 0.6786582401555663, "grad_norm": 1.2193265276405216, "learning_rate": 1.57660893383211e-05, "loss": 0.6598371863365173, "step": 2792 }, { "epoch": 0.6789013125911522, "grad_norm": 1.1548112834536024, "learning_rate": 1.5762804910360476e-05, "loss": 0.6407128572463989, "step": 2793 }, { "epoch": 0.679144385026738, "grad_norm": 1.0295296643521914, "learning_rate": 1.575951955136796e-05, "loss": 0.6431955099105835, "step": 2794 }, { "epoch": 0.6793874574623238, "grad_norm": 1.3261959489700763, "learning_rate": 1.5756233261874337e-05, "loss": 0.8128819465637207, "step": 2795 }, { "epoch": 0.6796305298979096, "grad_norm": 1.2871526138905172, "learning_rate": 1.5752946042410532e-05, "loss": 0.5168901085853577, "step": 2796 }, { "epoch": 0.6798736023334954, "grad_norm": 1.2457245192557116, "learning_rate": 1.5749657893507627e-05, "loss": 0.6886518001556396, "step": 2797 }, { "epoch": 0.6801166747690812, "grad_norm": 1.5474924537296637, "learning_rate": 1.5746368815696856e-05, "loss": 0.841214656829834, "step": 2798 }, { "epoch": 0.680359747204667, "grad_norm": 1.3336207811260472, "learning_rate": 1.5743078809509586e-05, "loss": 0.6784530878067017, "step": 2799 }, { "epoch": 0.6806028196402528, "grad_norm": 1.2404909123762016, "learning_rate": 1.573978787547736e-05, "loss": 0.8150078654289246, "step": 2800 }, { "epoch": 0.6808458920758386, "grad_norm": 1.330013058192731, "learning_rate": 1.5736496014131847e-05, "loss": 0.5218961834907532, "step": 2801 }, { "epoch": 0.6810889645114244, "grad_norm": 1.3974388882034958, "learning_rate": 1.573320322600488e-05, "loss": 0.5013446807861328, "step": 2802 }, { "epoch": 0.6813320369470102, "grad_norm": 1.2104535254295425, "learning_rate": 1.572990951162844e-05, "loss": 0.5093057155609131, "step": 2803 }, { "epoch": 0.681575109382596, "grad_norm": 1.1886501601894002, "learning_rate": 1.5726614871534657e-05, "loss": 0.5367323160171509, "step": 2804 }, { "epoch": 0.6818181818181818, "grad_norm": 1.3371112104328449, "learning_rate": 1.5723319306255798e-05, "loss": 0.49254468083381653, "step": 2805 }, { "epoch": 0.6820612542537676, "grad_norm": 1.263867818731903, "learning_rate": 1.5720022816324305e-05, "loss": 0.6922764182090759, "step": 2806 }, { "epoch": 0.6823043266893535, "grad_norm": 1.2337141232945854, "learning_rate": 1.5716725402272748e-05, "loss": 0.48445457220077515, "step": 2807 }, { "epoch": 0.6825473991249392, "grad_norm": 1.5027995282339162, "learning_rate": 1.5713427064633853e-05, "loss": 0.7550041675567627, "step": 2808 }, { "epoch": 0.6827904715605251, "grad_norm": 1.4648121942804424, "learning_rate": 1.5710127803940496e-05, "loss": 0.5260303020477295, "step": 2809 }, { "epoch": 0.6830335439961108, "grad_norm": 1.0736307171284056, "learning_rate": 1.57068276207257e-05, "loss": 0.4924715459346771, "step": 2810 }, { "epoch": 0.6832766164316967, "grad_norm": 1.3365132952845353, "learning_rate": 1.5703526515522643e-05, "loss": 0.7750240564346313, "step": 2811 }, { "epoch": 0.6835196888672824, "grad_norm": 1.2987004710932997, "learning_rate": 1.5700224488864644e-05, "loss": 0.6226062178611755, "step": 2812 }, { "epoch": 0.6837627613028683, "grad_norm": 1.4663266170476332, "learning_rate": 1.5696921541285176e-05, "loss": 0.7293719053268433, "step": 2813 }, { "epoch": 0.684005833738454, "grad_norm": 1.4256272879657101, "learning_rate": 1.5693617673317862e-05, "loss": 0.6654415726661682, "step": 2814 }, { "epoch": 0.6842489061740399, "grad_norm": 1.2628466989406066, "learning_rate": 1.5690312885496463e-05, "loss": 0.5081272125244141, "step": 2815 }, { "epoch": 0.6844919786096256, "grad_norm": 1.423548895859012, "learning_rate": 1.5687007178354906e-05, "loss": 0.7072639465332031, "step": 2816 }, { "epoch": 0.6847350510452115, "grad_norm": 1.1636217271656486, "learning_rate": 1.568370055242725e-05, "loss": 0.6776266098022461, "step": 2817 }, { "epoch": 0.6849781234807972, "grad_norm": 1.43261489325714, "learning_rate": 1.5680393008247713e-05, "loss": 0.5951050519943237, "step": 2818 }, { "epoch": 0.6852211959163831, "grad_norm": 1.259384854867727, "learning_rate": 1.5677084546350656e-05, "loss": 0.6848317384719849, "step": 2819 }, { "epoch": 0.6854642683519689, "grad_norm": 1.3714005119091945, "learning_rate": 1.5673775167270593e-05, "loss": 0.6184905171394348, "step": 2820 }, { "epoch": 0.6857073407875547, "grad_norm": 1.6094049319199337, "learning_rate": 1.5670464871542178e-05, "loss": 0.698298990726471, "step": 2821 }, { "epoch": 0.6859504132231405, "grad_norm": 1.303079210816341, "learning_rate": 1.5667153659700226e-05, "loss": 0.7876139879226685, "step": 2822 }, { "epoch": 0.6861934856587263, "grad_norm": 1.8176277111871606, "learning_rate": 1.5663841532279688e-05, "loss": 0.8400242924690247, "step": 2823 }, { "epoch": 0.6864365580943121, "grad_norm": 1.2040374042519504, "learning_rate": 1.5660528489815667e-05, "loss": 0.5982959270477295, "step": 2824 }, { "epoch": 0.6866796305298979, "grad_norm": 1.4859680238460207, "learning_rate": 1.565721453284341e-05, "loss": 0.7639615535736084, "step": 2825 }, { "epoch": 0.6869227029654837, "grad_norm": 1.2427186196907771, "learning_rate": 1.5653899661898328e-05, "loss": 0.6201870441436768, "step": 2826 }, { "epoch": 0.6871657754010695, "grad_norm": 1.4812864537014039, "learning_rate": 1.565058387751596e-05, "loss": 0.557530403137207, "step": 2827 }, { "epoch": 0.6874088478366553, "grad_norm": 1.3716816660117543, "learning_rate": 1.5647267180232e-05, "loss": 0.7430696487426758, "step": 2828 }, { "epoch": 0.6876519202722411, "grad_norm": 1.4347529998603503, "learning_rate": 1.564394957058229e-05, "loss": 0.6536284685134888, "step": 2829 }, { "epoch": 0.687894992707827, "grad_norm": 1.2805059907826803, "learning_rate": 1.564063104910282e-05, "loss": 0.5856568813323975, "step": 2830 }, { "epoch": 0.6881380651434127, "grad_norm": 1.2417254289976394, "learning_rate": 1.563731161632972e-05, "loss": 0.6549757122993469, "step": 2831 }, { "epoch": 0.6883811375789985, "grad_norm": 1.3782958000031935, "learning_rate": 1.563399127279929e-05, "loss": 0.5926462411880493, "step": 2832 }, { "epoch": 0.6886242100145843, "grad_norm": 1.1845719548165619, "learning_rate": 1.563067001904794e-05, "loss": 0.5863439440727234, "step": 2833 }, { "epoch": 0.6888672824501701, "grad_norm": 1.2759298035061877, "learning_rate": 1.5627347855612263e-05, "loss": 0.5875333547592163, "step": 2834 }, { "epoch": 0.689110354885756, "grad_norm": 1.2971070149596675, "learning_rate": 1.5624024783028977e-05, "loss": 0.8616706728935242, "step": 2835 }, { "epoch": 0.6893534273213417, "grad_norm": 1.4371806439051933, "learning_rate": 1.562070080183496e-05, "loss": 0.643057107925415, "step": 2836 }, { "epoch": 0.6895964997569276, "grad_norm": 1.253634083393738, "learning_rate": 1.5617375912567218e-05, "loss": 0.5767829418182373, "step": 2837 }, { "epoch": 0.6898395721925134, "grad_norm": 1.4454402734709924, "learning_rate": 1.5614050115762926e-05, "loss": 0.6631444692611694, "step": 2838 }, { "epoch": 0.6900826446280992, "grad_norm": 1.2016495044655886, "learning_rate": 1.5610723411959396e-05, "loss": 0.5936226844787598, "step": 2839 }, { "epoch": 0.690325717063685, "grad_norm": 1.3623750472449565, "learning_rate": 1.5607395801694084e-05, "loss": 0.7566401958465576, "step": 2840 }, { "epoch": 0.6905687894992708, "grad_norm": 1.206516232270828, "learning_rate": 1.5604067285504596e-05, "loss": 0.6332771182060242, "step": 2841 }, { "epoch": 0.6908118619348566, "grad_norm": 1.2396820417268932, "learning_rate": 1.560073786392868e-05, "loss": 0.7711570262908936, "step": 2842 }, { "epoch": 0.6910549343704424, "grad_norm": 1.3985294430324418, "learning_rate": 1.559740753750423e-05, "loss": 0.664010763168335, "step": 2843 }, { "epoch": 0.6912980068060282, "grad_norm": 1.2893413273426033, "learning_rate": 1.55940763067693e-05, "loss": 0.7304702997207642, "step": 2844 }, { "epoch": 0.691541079241614, "grad_norm": 1.166571720588949, "learning_rate": 1.5590744172262076e-05, "loss": 0.5421541929244995, "step": 2845 }, { "epoch": 0.6917841516771998, "grad_norm": 1.6115761972872835, "learning_rate": 1.558741113452089e-05, "loss": 0.6820032596588135, "step": 2846 }, { "epoch": 0.6920272241127856, "grad_norm": 1.3332020156249977, "learning_rate": 1.5584077194084226e-05, "loss": 0.7832927703857422, "step": 2847 }, { "epoch": 0.6922702965483715, "grad_norm": 1.406131129583171, "learning_rate": 1.558074235149071e-05, "loss": 0.5773596167564392, "step": 2848 }, { "epoch": 0.6925133689839572, "grad_norm": 1.1265465866804631, "learning_rate": 1.557740660727912e-05, "loss": 0.6400434970855713, "step": 2849 }, { "epoch": 0.6927564414195431, "grad_norm": 1.2788124495511928, "learning_rate": 1.557406996198837e-05, "loss": 0.6600591540336609, "step": 2850 }, { "epoch": 0.6929995138551288, "grad_norm": 1.3309046148212544, "learning_rate": 1.557073241615752e-05, "loss": 0.6014285087585449, "step": 2851 }, { "epoch": 0.6932425862907147, "grad_norm": 1.4941875701846192, "learning_rate": 1.5567393970325793e-05, "loss": 0.6844323873519897, "step": 2852 }, { "epoch": 0.6934856587263004, "grad_norm": 1.2907489913343497, "learning_rate": 1.5564054625032534e-05, "loss": 0.7235721349716187, "step": 2853 }, { "epoch": 0.6937287311618863, "grad_norm": 1.249289754271873, "learning_rate": 1.5560714380817245e-05, "loss": 0.6502597332000732, "step": 2854 }, { "epoch": 0.693971803597472, "grad_norm": 1.2932964970592642, "learning_rate": 1.5557373238219574e-05, "loss": 0.5760338306427002, "step": 2855 }, { "epoch": 0.6942148760330579, "grad_norm": 1.3321912994261502, "learning_rate": 1.5554031197779312e-05, "loss": 0.6832517385482788, "step": 2856 }, { "epoch": 0.6944579484686436, "grad_norm": 1.4205484146390035, "learning_rate": 1.555068826003639e-05, "loss": 0.7040055990219116, "step": 2857 }, { "epoch": 0.6947010209042295, "grad_norm": 1.1925743057518645, "learning_rate": 1.55473444255309e-05, "loss": 0.4484000504016876, "step": 2858 }, { "epoch": 0.6949440933398152, "grad_norm": 1.4193233426464227, "learning_rate": 1.554399969480306e-05, "loss": 0.6228945851325989, "step": 2859 }, { "epoch": 0.6951871657754011, "grad_norm": 1.3608098873645782, "learning_rate": 1.554065406839324e-05, "loss": 0.5802443027496338, "step": 2860 }, { "epoch": 0.6954302382109869, "grad_norm": 1.4337790128163335, "learning_rate": 1.553730754684196e-05, "loss": 0.6474890112876892, "step": 2861 }, { "epoch": 0.6956733106465727, "grad_norm": 1.2143557083975434, "learning_rate": 1.5533960130689877e-05, "loss": 0.7060925960540771, "step": 2862 }, { "epoch": 0.6959163830821585, "grad_norm": 1.45536669016592, "learning_rate": 1.5530611820477797e-05, "loss": 0.8382987976074219, "step": 2863 }, { "epoch": 0.6961594555177443, "grad_norm": 1.2666576345417933, "learning_rate": 1.5527262616746667e-05, "loss": 0.6764518618583679, "step": 2864 }, { "epoch": 0.6964025279533301, "grad_norm": 1.2983874937103463, "learning_rate": 1.552391252003758e-05, "loss": 0.572725236415863, "step": 2865 }, { "epoch": 0.6966456003889159, "grad_norm": 1.298445821467661, "learning_rate": 1.5520561530891776e-05, "loss": 0.594947338104248, "step": 2866 }, { "epoch": 0.6968886728245017, "grad_norm": 1.319788278876263, "learning_rate": 1.5517209649850635e-05, "loss": 0.7263680696487427, "step": 2867 }, { "epoch": 0.6971317452600875, "grad_norm": 1.246067680742453, "learning_rate": 1.5513856877455688e-05, "loss": 0.6023887395858765, "step": 2868 }, { "epoch": 0.6973748176956733, "grad_norm": 1.4127193848235235, "learning_rate": 1.5510503214248597e-05, "loss": 0.512161910533905, "step": 2869 }, { "epoch": 0.6976178901312591, "grad_norm": 1.6843308258882874, "learning_rate": 1.550714866077118e-05, "loss": 0.7295210957527161, "step": 2870 }, { "epoch": 0.6978609625668449, "grad_norm": 1.4374211473829392, "learning_rate": 1.5503793217565393e-05, "loss": 0.5865526795387268, "step": 2871 }, { "epoch": 0.6981040350024307, "grad_norm": 1.23736790468701, "learning_rate": 1.550043688517334e-05, "loss": 0.6509865522384644, "step": 2872 }, { "epoch": 0.6983471074380165, "grad_norm": 1.302380161843073, "learning_rate": 1.5497079664137267e-05, "loss": 0.6692261695861816, "step": 2873 }, { "epoch": 0.6985901798736023, "grad_norm": 1.2480221350570784, "learning_rate": 1.5493721554999556e-05, "loss": 0.6199846267700195, "step": 2874 }, { "epoch": 0.6988332523091881, "grad_norm": 1.2236336237527632, "learning_rate": 1.5490362558302746e-05, "loss": 0.6904421448707581, "step": 2875 }, { "epoch": 0.699076324744774, "grad_norm": 1.2953248934144403, "learning_rate": 1.548700267458951e-05, "loss": 0.5278189182281494, "step": 2876 }, { "epoch": 0.6993193971803597, "grad_norm": 1.293834279871088, "learning_rate": 1.5483641904402666e-05, "loss": 0.7064348459243774, "step": 2877 }, { "epoch": 0.6995624696159456, "grad_norm": 1.4612625964821992, "learning_rate": 1.5480280248285174e-05, "loss": 0.6895464658737183, "step": 2878 }, { "epoch": 0.6998055420515313, "grad_norm": 1.3217610859582696, "learning_rate": 1.5476917706780145e-05, "loss": 0.6149169206619263, "step": 2879 }, { "epoch": 0.7000486144871172, "grad_norm": 1.471909707455173, "learning_rate": 1.5473554280430824e-05, "loss": 0.7029409408569336, "step": 2880 }, { "epoch": 0.7002916869227029, "grad_norm": 1.177574411862601, "learning_rate": 1.5470189969780604e-05, "loss": 0.5953884124755859, "step": 2881 }, { "epoch": 0.7005347593582888, "grad_norm": 1.2352702596738714, "learning_rate": 1.5466824775373016e-05, "loss": 0.8397929668426514, "step": 2882 }, { "epoch": 0.7007778317938745, "grad_norm": 1.5220222541829305, "learning_rate": 1.5463458697751736e-05, "loss": 0.807993471622467, "step": 2883 }, { "epoch": 0.7010209042294604, "grad_norm": 1.130715109825616, "learning_rate": 1.546009173746059e-05, "loss": 0.5585217475891113, "step": 2884 }, { "epoch": 0.7012639766650461, "grad_norm": 1.3670534625694712, "learning_rate": 1.545672389504353e-05, "loss": 0.6619375944137573, "step": 2885 }, { "epoch": 0.701507049100632, "grad_norm": 1.4242898036684324, "learning_rate": 1.545335517104467e-05, "loss": 0.7592509388923645, "step": 2886 }, { "epoch": 0.7017501215362177, "grad_norm": 1.232441947151398, "learning_rate": 1.544998556600825e-05, "loss": 0.5034295320510864, "step": 2887 }, { "epoch": 0.7019931939718036, "grad_norm": 1.2583362625938395, "learning_rate": 1.5446615080478664e-05, "loss": 0.6262809038162231, "step": 2888 }, { "epoch": 0.7022362664073895, "grad_norm": 1.3019294447623353, "learning_rate": 1.5443243715000442e-05, "loss": 0.6897479295730591, "step": 2889 }, { "epoch": 0.7024793388429752, "grad_norm": 1.3285893440548877, "learning_rate": 1.5439871470118263e-05, "loss": 0.8258323073387146, "step": 2890 }, { "epoch": 0.702722411278561, "grad_norm": 1.5676247435678068, "learning_rate": 1.543649834637693e-05, "loss": 0.6395833492279053, "step": 2891 }, { "epoch": 0.7029654837141468, "grad_norm": 1.1656090781624078, "learning_rate": 1.5433124344321412e-05, "loss": 0.6115076541900635, "step": 2892 }, { "epoch": 0.7032085561497327, "grad_norm": 1.5682388985035673, "learning_rate": 1.5429749464496808e-05, "loss": 0.7539764642715454, "step": 2893 }, { "epoch": 0.7034516285853184, "grad_norm": 1.3548530032559543, "learning_rate": 1.5426373707448353e-05, "loss": 0.7594554424285889, "step": 2894 }, { "epoch": 0.7036947010209043, "grad_norm": 1.4429386083972573, "learning_rate": 1.5422997073721438e-05, "loss": 0.5763460397720337, "step": 2895 }, { "epoch": 0.70393777345649, "grad_norm": 1.2516336658837635, "learning_rate": 1.5419619563861582e-05, "loss": 0.6851105093955994, "step": 2896 }, { "epoch": 0.7041808458920759, "grad_norm": 1.3168411727413616, "learning_rate": 1.5416241178414455e-05, "loss": 0.6080679893493652, "step": 2897 }, { "epoch": 0.7044239183276616, "grad_norm": 1.503535860595978, "learning_rate": 1.5412861917925864e-05, "loss": 0.7823940515518188, "step": 2898 }, { "epoch": 0.7046669907632475, "grad_norm": 1.345741273511091, "learning_rate": 1.5409481782941756e-05, "loss": 0.8159829378128052, "step": 2899 }, { "epoch": 0.7049100631988332, "grad_norm": 1.2036538119401012, "learning_rate": 1.5406100774008222e-05, "loss": 0.6942399144172668, "step": 2900 }, { "epoch": 0.7051531356344191, "grad_norm": 1.275713772898277, "learning_rate": 1.54027188916715e-05, "loss": 0.6223607659339905, "step": 2901 }, { "epoch": 0.7053962080700049, "grad_norm": 1.23856352136328, "learning_rate": 1.539933613647796e-05, "loss": 0.47341451048851013, "step": 2902 }, { "epoch": 0.7056392805055907, "grad_norm": 1.4063186356462771, "learning_rate": 1.5395952508974107e-05, "loss": 0.5532781481742859, "step": 2903 }, { "epoch": 0.7058823529411765, "grad_norm": 1.3436211023415767, "learning_rate": 1.5392568009706606e-05, "loss": 0.7713290452957153, "step": 2904 }, { "epoch": 0.7061254253767623, "grad_norm": 1.3698937178403985, "learning_rate": 1.538918263922225e-05, "loss": 0.6594874858856201, "step": 2905 }, { "epoch": 0.7063684978123481, "grad_norm": 1.2362450482080716, "learning_rate": 1.5385796398067975e-05, "loss": 0.7923039197921753, "step": 2906 }, { "epoch": 0.7066115702479339, "grad_norm": 1.2965505545649796, "learning_rate": 1.5382409286790853e-05, "loss": 0.5468047857284546, "step": 2907 }, { "epoch": 0.7068546426835197, "grad_norm": 1.2452878472326903, "learning_rate": 1.537902130593811e-05, "loss": 0.6709779500961304, "step": 2908 }, { "epoch": 0.7070977151191055, "grad_norm": 1.252154633897821, "learning_rate": 1.5375632456057104e-05, "loss": 0.5519700050354004, "step": 2909 }, { "epoch": 0.7073407875546913, "grad_norm": 1.1055762798389452, "learning_rate": 1.5372242737695332e-05, "loss": 0.587855339050293, "step": 2910 }, { "epoch": 0.7075838599902771, "grad_norm": 1.3625003804962217, "learning_rate": 1.536885215140042e-05, "loss": 0.6511951684951782, "step": 2911 }, { "epoch": 0.7078269324258629, "grad_norm": 1.5781128582912032, "learning_rate": 1.536546069772017e-05, "loss": 0.679621696472168, "step": 2912 }, { "epoch": 0.7080700048614487, "grad_norm": 1.1174236760187255, "learning_rate": 1.5362068377202483e-05, "loss": 0.6416900753974915, "step": 2913 }, { "epoch": 0.7083130772970345, "grad_norm": 1.354941132372432, "learning_rate": 1.535867519039543e-05, "loss": 0.9183820486068726, "step": 2914 }, { "epoch": 0.7085561497326203, "grad_norm": 1.3928797543988958, "learning_rate": 1.5355281137847202e-05, "loss": 0.5864881277084351, "step": 2915 }, { "epoch": 0.7087992221682061, "grad_norm": 1.4668808741462318, "learning_rate": 1.535188622010614e-05, "loss": 0.7103961110115051, "step": 2916 }, { "epoch": 0.709042294603792, "grad_norm": 1.3807687212988362, "learning_rate": 1.5348490437720725e-05, "loss": 0.6117606163024902, "step": 2917 }, { "epoch": 0.7092853670393777, "grad_norm": 1.3552325213764227, "learning_rate": 1.5345093791239572e-05, "loss": 0.6722307205200195, "step": 2918 }, { "epoch": 0.7095284394749636, "grad_norm": 1.3174523469673887, "learning_rate": 1.5341696281211447e-05, "loss": 0.5184582471847534, "step": 2919 }, { "epoch": 0.7097715119105493, "grad_norm": 1.4531601537524832, "learning_rate": 1.533829790818524e-05, "loss": 0.592485785484314, "step": 2920 }, { "epoch": 0.7100145843461352, "grad_norm": 1.4291759263720611, "learning_rate": 1.533489867270999e-05, "loss": 0.5341998934745789, "step": 2921 }, { "epoch": 0.7102576567817209, "grad_norm": 1.2990357525311809, "learning_rate": 1.5331498575334873e-05, "loss": 0.7131499648094177, "step": 2922 }, { "epoch": 0.7105007292173068, "grad_norm": 1.366988860127899, "learning_rate": 1.5328097616609206e-05, "loss": 0.6959762573242188, "step": 2923 }, { "epoch": 0.7107438016528925, "grad_norm": 1.577005049156542, "learning_rate": 1.5324695797082444e-05, "loss": 0.7818055152893066, "step": 2924 }, { "epoch": 0.7109868740884784, "grad_norm": 1.1590435270746688, "learning_rate": 1.532129311730418e-05, "loss": 0.5199344158172607, "step": 2925 }, { "epoch": 0.7112299465240641, "grad_norm": 1.9126682694600465, "learning_rate": 1.5317889577824146e-05, "loss": 0.5861135721206665, "step": 2926 }, { "epoch": 0.71147301895965, "grad_norm": 1.1251431157284097, "learning_rate": 1.5314485179192215e-05, "loss": 0.6397766470909119, "step": 2927 }, { "epoch": 0.7117160913952357, "grad_norm": 1.2490152517006317, "learning_rate": 1.5311079921958398e-05, "loss": 0.5571025609970093, "step": 2928 }, { "epoch": 0.7119591638308216, "grad_norm": 1.1216700221787048, "learning_rate": 1.5307673806672846e-05, "loss": 0.6180055141448975, "step": 2929 }, { "epoch": 0.7122022362664074, "grad_norm": 1.3333741101425658, "learning_rate": 1.5304266833885843e-05, "loss": 0.7039819359779358, "step": 2930 }, { "epoch": 0.7124453087019932, "grad_norm": 1.5747359733721558, "learning_rate": 1.5300859004147818e-05, "loss": 0.7712098360061646, "step": 2931 }, { "epoch": 0.712688381137579, "grad_norm": 1.4641225579219497, "learning_rate": 1.5297450318009332e-05, "loss": 0.6136038303375244, "step": 2932 }, { "epoch": 0.7129314535731648, "grad_norm": 1.2200823058957477, "learning_rate": 1.5294040776021092e-05, "loss": 0.6112949848175049, "step": 2933 }, { "epoch": 0.7131745260087506, "grad_norm": 1.3746735960212406, "learning_rate": 1.529063037873394e-05, "loss": 0.6799354553222656, "step": 2934 }, { "epoch": 0.7134175984443364, "grad_norm": 1.2659687093302006, "learning_rate": 1.5287219126698856e-05, "loss": 0.6039535403251648, "step": 2935 }, { "epoch": 0.7136606708799222, "grad_norm": 1.3672031257043553, "learning_rate": 1.5283807020466955e-05, "loss": 0.5772846937179565, "step": 2936 }, { "epoch": 0.713903743315508, "grad_norm": 1.4563663243934966, "learning_rate": 1.5280394060589497e-05, "loss": 0.6707313060760498, "step": 2937 }, { "epoch": 0.7141468157510938, "grad_norm": 1.1540351478637296, "learning_rate": 1.527698024761787e-05, "loss": 0.647850513458252, "step": 2938 }, { "epoch": 0.7143898881866796, "grad_norm": 1.5448277873920602, "learning_rate": 1.527356558210361e-05, "loss": 0.7359193563461304, "step": 2939 }, { "epoch": 0.7146329606222654, "grad_norm": 1.2184679120544184, "learning_rate": 1.527015006459838e-05, "loss": 0.5647385716438293, "step": 2940 }, { "epoch": 0.7148760330578512, "grad_norm": 1.3908358899004223, "learning_rate": 1.5266733695653998e-05, "loss": 0.7036861181259155, "step": 2941 }, { "epoch": 0.715119105493437, "grad_norm": 1.4022260730859275, "learning_rate": 1.52633164758224e-05, "loss": 0.743507981300354, "step": 2942 }, { "epoch": 0.7153621779290229, "grad_norm": 1.2096447386907605, "learning_rate": 1.525989840565567e-05, "loss": 0.6267847418785095, "step": 2943 }, { "epoch": 0.7156052503646086, "grad_norm": 1.009998281769988, "learning_rate": 1.5256479485706027e-05, "loss": 0.5084853172302246, "step": 2944 }, { "epoch": 0.7158483228001945, "grad_norm": 1.2515130989154266, "learning_rate": 1.5253059716525828e-05, "loss": 0.8058874607086182, "step": 2945 }, { "epoch": 0.7160913952357802, "grad_norm": 1.3894528002582536, "learning_rate": 1.5249639098667569e-05, "loss": 0.6529557704925537, "step": 2946 }, { "epoch": 0.7163344676713661, "grad_norm": 1.29101700162949, "learning_rate": 1.524621763268388e-05, "loss": 0.7805825471878052, "step": 2947 }, { "epoch": 0.7165775401069518, "grad_norm": 1.3660657164965624, "learning_rate": 1.5242795319127522e-05, "loss": 0.7071174383163452, "step": 2948 }, { "epoch": 0.7168206125425377, "grad_norm": 1.2517939120038846, "learning_rate": 1.523937215855141e-05, "loss": 0.7353390455245972, "step": 2949 }, { "epoch": 0.7170636849781235, "grad_norm": 1.2386938514485162, "learning_rate": 1.5235948151508582e-05, "loss": 0.6463958024978638, "step": 2950 }, { "epoch": 0.7173067574137093, "grad_norm": 1.2174739087728803, "learning_rate": 1.5232523298552214e-05, "loss": 0.5290624499320984, "step": 2951 }, { "epoch": 0.717549829849295, "grad_norm": 1.2126422624079534, "learning_rate": 1.5229097600235624e-05, "loss": 0.6849427223205566, "step": 2952 }, { "epoch": 0.7177929022848809, "grad_norm": 1.5986899386565365, "learning_rate": 1.5225671057112262e-05, "loss": 0.7090630531311035, "step": 2953 }, { "epoch": 0.7180359747204667, "grad_norm": 2.092752648427203, "learning_rate": 1.5222243669735716e-05, "loss": 0.822968602180481, "step": 2954 }, { "epoch": 0.7182790471560525, "grad_norm": 1.3412241302007952, "learning_rate": 1.5218815438659715e-05, "loss": 0.6749368906021118, "step": 2955 }, { "epoch": 0.7185221195916383, "grad_norm": 1.2267205480810481, "learning_rate": 1.5215386364438116e-05, "loss": 0.6628969311714172, "step": 2956 }, { "epoch": 0.7187651920272241, "grad_norm": 1.2325291547879735, "learning_rate": 1.5211956447624916e-05, "loss": 0.5236489772796631, "step": 2957 }, { "epoch": 0.71900826446281, "grad_norm": 1.055036773182541, "learning_rate": 1.520852568877425e-05, "loss": 0.48264217376708984, "step": 2958 }, { "epoch": 0.7192513368983957, "grad_norm": 1.4430696890954648, "learning_rate": 1.5205094088440384e-05, "loss": 0.7175478935241699, "step": 2959 }, { "epoch": 0.7194944093339816, "grad_norm": 1.2724293598669894, "learning_rate": 1.5201661647177728e-05, "loss": 0.7663909196853638, "step": 2960 }, { "epoch": 0.7197374817695673, "grad_norm": 1.3369323922453877, "learning_rate": 1.5198228365540817e-05, "loss": 0.6001534461975098, "step": 2961 }, { "epoch": 0.7199805542051532, "grad_norm": 1.276301941859463, "learning_rate": 1.5194794244084337e-05, "loss": 0.5180256366729736, "step": 2962 }, { "epoch": 0.7202236266407389, "grad_norm": 1.3439999164947993, "learning_rate": 1.5191359283363091e-05, "loss": 0.6906589269638062, "step": 2963 }, { "epoch": 0.7204666990763248, "grad_norm": 1.3802181014648645, "learning_rate": 1.5187923483932037e-05, "loss": 0.7992970943450928, "step": 2964 }, { "epoch": 0.7207097715119105, "grad_norm": 1.3201551921317978, "learning_rate": 1.5184486846346248e-05, "loss": 0.6925965547561646, "step": 2965 }, { "epoch": 0.7209528439474964, "grad_norm": 1.12480725062194, "learning_rate": 1.5181049371160947e-05, "loss": 0.5406155586242676, "step": 2966 }, { "epoch": 0.7211959163830821, "grad_norm": 1.3111911139391763, "learning_rate": 1.5177611058931494e-05, "loss": 0.6087175607681274, "step": 2967 }, { "epoch": 0.721438988818668, "grad_norm": 1.4256869846162152, "learning_rate": 1.517417191021337e-05, "loss": 0.6996216773986816, "step": 2968 }, { "epoch": 0.7216820612542537, "grad_norm": 1.3037934724523208, "learning_rate": 1.5170731925562205e-05, "loss": 0.6094607710838318, "step": 2969 }, { "epoch": 0.7219251336898396, "grad_norm": 1.2068261561145612, "learning_rate": 1.516729110553376e-05, "loss": 0.6400936841964722, "step": 2970 }, { "epoch": 0.7221682061254254, "grad_norm": 1.207020027579516, "learning_rate": 1.5163849450683925e-05, "loss": 0.43114957213401794, "step": 2971 }, { "epoch": 0.7224112785610112, "grad_norm": 1.2368986323032078, "learning_rate": 1.5160406961568735e-05, "loss": 0.6464937925338745, "step": 2972 }, { "epoch": 0.722654350996597, "grad_norm": 1.3205513778509301, "learning_rate": 1.5156963638744348e-05, "loss": 0.6188036203384399, "step": 2973 }, { "epoch": 0.7228974234321828, "grad_norm": 1.5159283063091096, "learning_rate": 1.5153519482767069e-05, "loss": 0.6279233694076538, "step": 2974 }, { "epoch": 0.7231404958677686, "grad_norm": 1.1818530507366405, "learning_rate": 1.5150074494193328e-05, "loss": 0.5029953718185425, "step": 2975 }, { "epoch": 0.7233835683033544, "grad_norm": 1.461489252107919, "learning_rate": 1.5146628673579695e-05, "loss": 0.8650744557380676, "step": 2976 }, { "epoch": 0.7236266407389402, "grad_norm": 1.361855618536697, "learning_rate": 1.5143182021482871e-05, "loss": 0.8463134765625, "step": 2977 }, { "epoch": 0.723869713174526, "grad_norm": 1.229682822920255, "learning_rate": 1.5139734538459698e-05, "loss": 0.5291097164154053, "step": 2978 }, { "epoch": 0.7241127856101118, "grad_norm": 1.4626472016235046, "learning_rate": 1.513628622506714e-05, "loss": 0.7153373956680298, "step": 2979 }, { "epoch": 0.7243558580456976, "grad_norm": 1.252104548398461, "learning_rate": 1.5132837081862309e-05, "loss": 0.7276603579521179, "step": 2980 }, { "epoch": 0.7245989304812834, "grad_norm": 1.5729594519380277, "learning_rate": 1.512938710940244e-05, "loss": 0.7451938390731812, "step": 2981 }, { "epoch": 0.7248420029168692, "grad_norm": 1.1111511069757471, "learning_rate": 1.5125936308244909e-05, "loss": 0.6352068185806274, "step": 2982 }, { "epoch": 0.725085075352455, "grad_norm": 1.3409687318729142, "learning_rate": 1.512248467894722e-05, "loss": 0.6240894198417664, "step": 2983 }, { "epoch": 0.7253281477880409, "grad_norm": 1.3477450072555541, "learning_rate": 1.5119032222067019e-05, "loss": 0.7216370105743408, "step": 2984 }, { "epoch": 0.7255712202236266, "grad_norm": 1.2906472353590752, "learning_rate": 1.5115578938162072e-05, "loss": 0.651203453540802, "step": 2985 }, { "epoch": 0.7258142926592125, "grad_norm": 1.137430269963228, "learning_rate": 1.51121248277903e-05, "loss": 0.6686685085296631, "step": 2986 }, { "epoch": 0.7260573650947982, "grad_norm": 1.341870635318092, "learning_rate": 1.5108669891509738e-05, "loss": 0.6248022317886353, "step": 2987 }, { "epoch": 0.7263004375303841, "grad_norm": 1.4098708966912756, "learning_rate": 1.5105214129878558e-05, "loss": 0.7057205438613892, "step": 2988 }, { "epoch": 0.7265435099659698, "grad_norm": 1.578597264170143, "learning_rate": 1.5101757543455074e-05, "loss": 0.8599420189857483, "step": 2989 }, { "epoch": 0.7267865824015557, "grad_norm": 1.5823158883904849, "learning_rate": 1.509830013279773e-05, "loss": 0.5787392854690552, "step": 2990 }, { "epoch": 0.7270296548371414, "grad_norm": 1.5518528526522881, "learning_rate": 1.5094841898465093e-05, "loss": 0.7425066232681274, "step": 2991 }, { "epoch": 0.7272727272727273, "grad_norm": 1.368231221224006, "learning_rate": 1.5091382841015881e-05, "loss": 0.6077810525894165, "step": 2992 }, { "epoch": 0.727515799708313, "grad_norm": 1.3441577996830354, "learning_rate": 1.5087922961008927e-05, "loss": 0.5360199213027954, "step": 2993 }, { "epoch": 0.7277588721438989, "grad_norm": 1.3426661698104365, "learning_rate": 1.5084462259003206e-05, "loss": 0.5573140382766724, "step": 2994 }, { "epoch": 0.7280019445794846, "grad_norm": 1.2497834456406047, "learning_rate": 1.5081000735557831e-05, "loss": 0.5516963005065918, "step": 2995 }, { "epoch": 0.7282450170150705, "grad_norm": 1.2163714952286035, "learning_rate": 1.5077538391232036e-05, "loss": 0.652902364730835, "step": 2996 }, { "epoch": 0.7284880894506564, "grad_norm": 1.2342435119689106, "learning_rate": 1.5074075226585195e-05, "loss": 0.5021283030509949, "step": 2997 }, { "epoch": 0.7287311618862421, "grad_norm": 1.1902070467988586, "learning_rate": 1.5070611242176814e-05, "loss": 0.668896496295929, "step": 2998 }, { "epoch": 0.728974234321828, "grad_norm": 1.4926112112640544, "learning_rate": 1.5067146438566527e-05, "loss": 0.8528822660446167, "step": 2999 }, { "epoch": 0.7292173067574137, "grad_norm": 1.617710405553233, "learning_rate": 1.5063680816314108e-05, "loss": 0.6777299642562866, "step": 3000 }, { "epoch": 0.7294603791929996, "grad_norm": 1.2558003998418332, "learning_rate": 1.5060214375979453e-05, "loss": 0.5508057475090027, "step": 3001 }, { "epoch": 0.7297034516285853, "grad_norm": 1.2907999232529397, "learning_rate": 1.5056747118122598e-05, "loss": 0.6440062522888184, "step": 3002 }, { "epoch": 0.7299465240641712, "grad_norm": 1.411169020837615, "learning_rate": 1.5053279043303715e-05, "loss": 0.5055438876152039, "step": 3003 }, { "epoch": 0.7301895964997569, "grad_norm": 1.2649094410983897, "learning_rate": 1.5049810152083095e-05, "loss": 0.6786594986915588, "step": 3004 }, { "epoch": 0.7304326689353428, "grad_norm": 1.2879238520438085, "learning_rate": 1.5046340445021171e-05, "loss": 0.6574575901031494, "step": 3005 }, { "epoch": 0.7306757413709285, "grad_norm": 1.2011114430743222, "learning_rate": 1.5042869922678502e-05, "loss": 0.5681808590888977, "step": 3006 }, { "epoch": 0.7309188138065144, "grad_norm": 1.3519061182595349, "learning_rate": 1.5039398585615783e-05, "loss": 0.5495287179946899, "step": 3007 }, { "epoch": 0.7311618862421001, "grad_norm": 1.4646000922708864, "learning_rate": 1.5035926434393844e-05, "loss": 0.7005153894424438, "step": 3008 }, { "epoch": 0.731404958677686, "grad_norm": 1.228526614997082, "learning_rate": 1.5032453469573636e-05, "loss": 0.6511509418487549, "step": 3009 }, { "epoch": 0.7316480311132717, "grad_norm": 1.687760795561552, "learning_rate": 1.5028979691716243e-05, "loss": 0.7328258752822876, "step": 3010 }, { "epoch": 0.7318911035488576, "grad_norm": 1.4891990691189085, "learning_rate": 1.5025505101382894e-05, "loss": 0.7107021808624268, "step": 3011 }, { "epoch": 0.7321341759844434, "grad_norm": 1.4289562789961001, "learning_rate": 1.5022029699134932e-05, "loss": 0.6150474548339844, "step": 3012 }, { "epoch": 0.7323772484200292, "grad_norm": 1.7159914832799676, "learning_rate": 1.5018553485533844e-05, "loss": 0.5658533573150635, "step": 3013 }, { "epoch": 0.732620320855615, "grad_norm": 1.2165900740753661, "learning_rate": 1.501507646114124e-05, "loss": 0.6608140468597412, "step": 3014 }, { "epoch": 0.7328633932912008, "grad_norm": 1.3011676742694123, "learning_rate": 1.5011598626518866e-05, "loss": 0.5124791264533997, "step": 3015 }, { "epoch": 0.7331064657267866, "grad_norm": 1.435724707672408, "learning_rate": 1.5008119982228596e-05, "loss": 0.6596618890762329, "step": 3016 }, { "epoch": 0.7333495381623724, "grad_norm": 1.4746674925512253, "learning_rate": 1.5004640528832433e-05, "loss": 0.7670319080352783, "step": 3017 }, { "epoch": 0.7335926105979582, "grad_norm": 1.6266292946634366, "learning_rate": 1.5001160266892518e-05, "loss": 0.5143306255340576, "step": 3018 }, { "epoch": 0.733835683033544, "grad_norm": 1.437314345724597, "learning_rate": 1.4997679196971115e-05, "loss": 0.6663611531257629, "step": 3019 }, { "epoch": 0.7340787554691298, "grad_norm": 1.3087916313691483, "learning_rate": 1.4994197319630623e-05, "loss": 0.6673795580863953, "step": 3020 }, { "epoch": 0.7343218279047156, "grad_norm": 1.302382639430848, "learning_rate": 1.4990714635433568e-05, "loss": 0.7084215879440308, "step": 3021 }, { "epoch": 0.7345649003403014, "grad_norm": 1.332134886947343, "learning_rate": 1.4987231144942607e-05, "loss": 0.6312086582183838, "step": 3022 }, { "epoch": 0.7348079727758872, "grad_norm": 1.215259435807219, "learning_rate": 1.4983746848720534e-05, "loss": 0.7243598699569702, "step": 3023 }, { "epoch": 0.735051045211473, "grad_norm": 1.4780993357674148, "learning_rate": 1.4980261747330262e-05, "loss": 0.65815269947052, "step": 3024 }, { "epoch": 0.7352941176470589, "grad_norm": 1.1430218940681154, "learning_rate": 1.4976775841334847e-05, "loss": 0.6015812754631042, "step": 3025 }, { "epoch": 0.7355371900826446, "grad_norm": 1.1949770318397974, "learning_rate": 1.4973289131297462e-05, "loss": 0.6556439399719238, "step": 3026 }, { "epoch": 0.7357802625182305, "grad_norm": 1.664822381388327, "learning_rate": 1.4969801617781415e-05, "loss": 0.7657078504562378, "step": 3027 }, { "epoch": 0.7360233349538162, "grad_norm": 1.2293110401369471, "learning_rate": 1.4966313301350147e-05, "loss": 0.6016138792037964, "step": 3028 }, { "epoch": 0.7362664073894021, "grad_norm": 1.5958508512961982, "learning_rate": 1.4962824182567226e-05, "loss": 0.709162712097168, "step": 3029 }, { "epoch": 0.7365094798249878, "grad_norm": 1.3010789490458232, "learning_rate": 1.4959334261996348e-05, "loss": 0.7032933235168457, "step": 3030 }, { "epoch": 0.7367525522605737, "grad_norm": 1.1877288801950503, "learning_rate": 1.4955843540201345e-05, "loss": 0.5360292196273804, "step": 3031 }, { "epoch": 0.7369956246961594, "grad_norm": 1.504818727854604, "learning_rate": 1.4952352017746168e-05, "loss": 0.6124420166015625, "step": 3032 }, { "epoch": 0.7372386971317453, "grad_norm": 1.2919715440228452, "learning_rate": 1.4948859695194908e-05, "loss": 0.6746879816055298, "step": 3033 }, { "epoch": 0.737481769567331, "grad_norm": 1.134814183788939, "learning_rate": 1.4945366573111779e-05, "loss": 0.581417977809906, "step": 3034 }, { "epoch": 0.7377248420029169, "grad_norm": 1.1911707894758015, "learning_rate": 1.4941872652061125e-05, "loss": 0.6640629172325134, "step": 3035 }, { "epoch": 0.7379679144385026, "grad_norm": 1.3717140121305793, "learning_rate": 1.493837793260742e-05, "loss": 0.5163942575454712, "step": 3036 }, { "epoch": 0.7382109868740885, "grad_norm": 1.5360462715145804, "learning_rate": 1.4934882415315266e-05, "loss": 0.7417742013931274, "step": 3037 }, { "epoch": 0.7384540593096743, "grad_norm": 1.326576532610092, "learning_rate": 1.4931386100749396e-05, "loss": 0.5918583869934082, "step": 3038 }, { "epoch": 0.7386971317452601, "grad_norm": 1.3473850474900004, "learning_rate": 1.4927888989474668e-05, "loss": 0.5425455570220947, "step": 3039 }, { "epoch": 0.7389402041808459, "grad_norm": 1.3610036082207981, "learning_rate": 1.4924391082056074e-05, "loss": 0.6684151887893677, "step": 3040 }, { "epoch": 0.7391832766164317, "grad_norm": 1.4156053230183312, "learning_rate": 1.492089237905873e-05, "loss": 0.7094928026199341, "step": 3041 }, { "epoch": 0.7394263490520175, "grad_norm": 1.1417261849154199, "learning_rate": 1.4917392881047883e-05, "loss": 0.6163800358772278, "step": 3042 }, { "epoch": 0.7396694214876033, "grad_norm": 1.4827673753405402, "learning_rate": 1.4913892588588905e-05, "loss": 0.5348718166351318, "step": 3043 }, { "epoch": 0.7399124939231891, "grad_norm": 1.2560468467579975, "learning_rate": 1.4910391502247308e-05, "loss": 0.4960557818412781, "step": 3044 }, { "epoch": 0.7401555663587749, "grad_norm": 1.1503314701645873, "learning_rate": 1.4906889622588714e-05, "loss": 0.6730480790138245, "step": 3045 }, { "epoch": 0.7403986387943607, "grad_norm": 1.684310870778246, "learning_rate": 1.4903386950178884e-05, "loss": 0.6353576183319092, "step": 3046 }, { "epoch": 0.7406417112299465, "grad_norm": 1.2925506054266214, "learning_rate": 1.4899883485583705e-05, "loss": 0.5742325782775879, "step": 3047 }, { "epoch": 0.7408847836655323, "grad_norm": 1.2438482997788702, "learning_rate": 1.4896379229369198e-05, "loss": 0.6792517900466919, "step": 3048 }, { "epoch": 0.7411278561011181, "grad_norm": 1.30989099492291, "learning_rate": 1.4892874182101502e-05, "loss": 0.651475191116333, "step": 3049 }, { "epoch": 0.7413709285367039, "grad_norm": 1.1596181670501668, "learning_rate": 1.488936834434689e-05, "loss": 0.5450453162193298, "step": 3050 }, { "epoch": 0.7416140009722897, "grad_norm": 1.2490728400915674, "learning_rate": 1.4885861716671759e-05, "loss": 0.5437303781509399, "step": 3051 }, { "epoch": 0.7418570734078755, "grad_norm": 1.2035205913864881, "learning_rate": 1.488235429964264e-05, "loss": 0.5488492846488953, "step": 3052 }, { "epoch": 0.7421001458434614, "grad_norm": 1.4786838334978307, "learning_rate": 1.487884609382618e-05, "loss": 0.5930954217910767, "step": 3053 }, { "epoch": 0.7423432182790471, "grad_norm": 1.216297025087322, "learning_rate": 1.4875337099789166e-05, "loss": 0.7751185894012451, "step": 3054 }, { "epoch": 0.742586290714633, "grad_norm": 1.2642757142642183, "learning_rate": 1.4871827318098506e-05, "loss": 0.501360297203064, "step": 3055 }, { "epoch": 0.7428293631502187, "grad_norm": 1.1928161015638044, "learning_rate": 1.4868316749321234e-05, "loss": 0.4676039218902588, "step": 3056 }, { "epoch": 0.7430724355858046, "grad_norm": 1.17924631966759, "learning_rate": 1.4864805394024517e-05, "loss": 0.6265438795089722, "step": 3057 }, { "epoch": 0.7433155080213903, "grad_norm": 1.3227245470263547, "learning_rate": 1.4861293252775642e-05, "loss": 0.5963374376296997, "step": 3058 }, { "epoch": 0.7435585804569762, "grad_norm": 1.3909864104485505, "learning_rate": 1.4857780326142026e-05, "loss": 0.801801323890686, "step": 3059 }, { "epoch": 0.743801652892562, "grad_norm": 1.4577114297577607, "learning_rate": 1.4854266614691217e-05, "loss": 0.7144230604171753, "step": 3060 }, { "epoch": 0.7440447253281478, "grad_norm": 1.1477954131257586, "learning_rate": 1.4850752118990884e-05, "loss": 0.5752678513526917, "step": 3061 }, { "epoch": 0.7442877977637335, "grad_norm": 1.6218150217851048, "learning_rate": 1.4847236839608823e-05, "loss": 0.8516794443130493, "step": 3062 }, { "epoch": 0.7445308701993194, "grad_norm": 1.1384802693465081, "learning_rate": 1.4843720777112962e-05, "loss": 0.5991549491882324, "step": 3063 }, { "epoch": 0.7447739426349052, "grad_norm": 1.2858205530046396, "learning_rate": 1.4840203932071349e-05, "loss": 0.6681170463562012, "step": 3064 }, { "epoch": 0.745017015070491, "grad_norm": 1.4817738669257616, "learning_rate": 1.4836686305052162e-05, "loss": 0.7794027924537659, "step": 3065 }, { "epoch": 0.7452600875060769, "grad_norm": 1.4204118806803134, "learning_rate": 1.4833167896623704e-05, "loss": 0.5874036550521851, "step": 3066 }, { "epoch": 0.7455031599416626, "grad_norm": 1.3054518843775487, "learning_rate": 1.4829648707354405e-05, "loss": 0.7328076958656311, "step": 3067 }, { "epoch": 0.7457462323772485, "grad_norm": 1.1295648355297347, "learning_rate": 1.4826128737812821e-05, "loss": 0.5223658084869385, "step": 3068 }, { "epoch": 0.7459893048128342, "grad_norm": 1.5350496461022982, "learning_rate": 1.4822607988567637e-05, "loss": 0.7552604675292969, "step": 3069 }, { "epoch": 0.7462323772484201, "grad_norm": 1.492347451271152, "learning_rate": 1.4819086460187659e-05, "loss": 0.5760813355445862, "step": 3070 }, { "epoch": 0.7464754496840058, "grad_norm": 1.3723555612491882, "learning_rate": 1.4815564153241819e-05, "loss": 0.7343590259552002, "step": 3071 }, { "epoch": 0.7467185221195917, "grad_norm": 1.1522230279981784, "learning_rate": 1.4812041068299179e-05, "loss": 0.5833715200424194, "step": 3072 }, { "epoch": 0.7469615945551774, "grad_norm": 1.4821291084098638, "learning_rate": 1.4808517205928924e-05, "loss": 0.7424473762512207, "step": 3073 }, { "epoch": 0.7472046669907633, "grad_norm": 1.330248303290362, "learning_rate": 1.4804992566700364e-05, "loss": 0.6809432506561279, "step": 3074 }, { "epoch": 0.747447739426349, "grad_norm": 1.363661290526408, "learning_rate": 1.4801467151182938e-05, "loss": 0.6368609666824341, "step": 3075 }, { "epoch": 0.7476908118619349, "grad_norm": 1.2648664771635736, "learning_rate": 1.4797940959946202e-05, "loss": 0.6868221759796143, "step": 3076 }, { "epoch": 0.7479338842975206, "grad_norm": 1.295466724630276, "learning_rate": 1.4794413993559851e-05, "loss": 0.6860477924346924, "step": 3077 }, { "epoch": 0.7481769567331065, "grad_norm": 1.876878849004374, "learning_rate": 1.4790886252593693e-05, "loss": 0.875023365020752, "step": 3078 }, { "epoch": 0.7484200291686923, "grad_norm": 1.337080242447071, "learning_rate": 1.4787357737617667e-05, "loss": 0.735135555267334, "step": 3079 }, { "epoch": 0.7486631016042781, "grad_norm": 1.2521378945137647, "learning_rate": 1.4783828449201835e-05, "loss": 0.673550009727478, "step": 3080 }, { "epoch": 0.7489061740398639, "grad_norm": 1.2055910644577184, "learning_rate": 1.4780298387916385e-05, "loss": 0.589159369468689, "step": 3081 }, { "epoch": 0.7491492464754497, "grad_norm": 1.4797414247369483, "learning_rate": 1.477676755433163e-05, "loss": 0.7704734206199646, "step": 3082 }, { "epoch": 0.7493923189110355, "grad_norm": 1.4541993432438163, "learning_rate": 1.4773235949018005e-05, "loss": 0.5316981077194214, "step": 3083 }, { "epoch": 0.7496353913466213, "grad_norm": 1.20320164822704, "learning_rate": 1.4769703572546073e-05, "loss": 0.647808849811554, "step": 3084 }, { "epoch": 0.7498784637822071, "grad_norm": 1.3383543448706352, "learning_rate": 1.476617042548652e-05, "loss": 0.6882001757621765, "step": 3085 }, { "epoch": 0.7501215362177929, "grad_norm": 1.3987115195576256, "learning_rate": 1.4762636508410163e-05, "loss": 0.5419360399246216, "step": 3086 }, { "epoch": 0.7503646086533787, "grad_norm": 1.4850889219544652, "learning_rate": 1.4759101821887929e-05, "loss": 0.7874752283096313, "step": 3087 }, { "epoch": 0.7506076810889645, "grad_norm": 1.3116931762180517, "learning_rate": 1.4755566366490884e-05, "loss": 0.8672266006469727, "step": 3088 }, { "epoch": 0.7508507535245503, "grad_norm": 1.3464360804548132, "learning_rate": 1.4752030142790206e-05, "loss": 0.7073047161102295, "step": 3089 }, { "epoch": 0.7510938259601361, "grad_norm": 1.3044868860118393, "learning_rate": 1.4748493151357209e-05, "loss": 0.6122909188270569, "step": 3090 }, { "epoch": 0.7513368983957219, "grad_norm": 1.414258454404336, "learning_rate": 1.4744955392763318e-05, "loss": 0.8055797219276428, "step": 3091 }, { "epoch": 0.7515799708313077, "grad_norm": 1.3779199747301394, "learning_rate": 1.4741416867580096e-05, "loss": 0.5824984312057495, "step": 3092 }, { "epoch": 0.7518230432668935, "grad_norm": 1.4772036501158756, "learning_rate": 1.473787757637922e-05, "loss": 0.5997207164764404, "step": 3093 }, { "epoch": 0.7520661157024794, "grad_norm": 1.2935346231340141, "learning_rate": 1.4734337519732496e-05, "loss": 0.787879467010498, "step": 3094 }, { "epoch": 0.7523091881380651, "grad_norm": 1.397722508918233, "learning_rate": 1.4730796698211849e-05, "loss": 0.7074623703956604, "step": 3095 }, { "epoch": 0.752552260573651, "grad_norm": 1.1822994629242956, "learning_rate": 1.4727255112389326e-05, "loss": 0.6745275259017944, "step": 3096 }, { "epoch": 0.7527953330092367, "grad_norm": 1.1360812876660262, "learning_rate": 1.472371276283711e-05, "loss": 0.6499507427215576, "step": 3097 }, { "epoch": 0.7530384054448226, "grad_norm": 1.3360957417174502, "learning_rate": 1.4720169650127496e-05, "loss": 0.6116020679473877, "step": 3098 }, { "epoch": 0.7532814778804083, "grad_norm": 1.3423818617285264, "learning_rate": 1.47166257748329e-05, "loss": 0.6666703224182129, "step": 3099 }, { "epoch": 0.7535245503159942, "grad_norm": 1.3019994369851253, "learning_rate": 1.4713081137525871e-05, "loss": 0.6466065049171448, "step": 3100 }, { "epoch": 0.7537676227515799, "grad_norm": 1.2871531110928582, "learning_rate": 1.4709535738779075e-05, "loss": 0.6274001598358154, "step": 3101 }, { "epoch": 0.7540106951871658, "grad_norm": 1.2931416038296615, "learning_rate": 1.4705989579165301e-05, "loss": 0.7332309484481812, "step": 3102 }, { "epoch": 0.7542537676227515, "grad_norm": 1.2416790591259839, "learning_rate": 1.4702442659257466e-05, "loss": 0.6907967329025269, "step": 3103 }, { "epoch": 0.7544968400583374, "grad_norm": 1.4364328989559034, "learning_rate": 1.4698894979628602e-05, "loss": 0.5853050351142883, "step": 3104 }, { "epoch": 0.7547399124939231, "grad_norm": 1.353930993443688, "learning_rate": 1.469534654085187e-05, "loss": 0.61375892162323, "step": 3105 }, { "epoch": 0.754982984929509, "grad_norm": 1.2472609315919896, "learning_rate": 1.4691797343500553e-05, "loss": 0.48542630672454834, "step": 3106 }, { "epoch": 0.7552260573650948, "grad_norm": 1.280230784255727, "learning_rate": 1.4688247388148055e-05, "loss": 0.7379501461982727, "step": 3107 }, { "epoch": 0.7554691298006806, "grad_norm": 1.3705408402207158, "learning_rate": 1.4684696675367898e-05, "loss": 0.5794673562049866, "step": 3108 }, { "epoch": 0.7557122022362665, "grad_norm": 1.3628413146857916, "learning_rate": 1.4681145205733736e-05, "loss": 0.6651345491409302, "step": 3109 }, { "epoch": 0.7559552746718522, "grad_norm": 1.2098183708181327, "learning_rate": 1.4677592979819336e-05, "loss": 0.7303791046142578, "step": 3110 }, { "epoch": 0.756198347107438, "grad_norm": 1.321871146873751, "learning_rate": 1.4674039998198595e-05, "loss": 0.7597941160202026, "step": 3111 }, { "epoch": 0.7564414195430238, "grad_norm": 1.4093790765184695, "learning_rate": 1.4670486261445526e-05, "loss": 0.7833495140075684, "step": 3112 }, { "epoch": 0.7566844919786097, "grad_norm": 1.349324893651385, "learning_rate": 1.4666931770134267e-05, "loss": 0.5815238356590271, "step": 3113 }, { "epoch": 0.7569275644141954, "grad_norm": 1.822099918331838, "learning_rate": 1.4663376524839079e-05, "loss": 0.7860262989997864, "step": 3114 }, { "epoch": 0.7571706368497813, "grad_norm": 1.2148199563106195, "learning_rate": 1.4659820526134344e-05, "loss": 0.7117588520050049, "step": 3115 }, { "epoch": 0.757413709285367, "grad_norm": 1.278216646864825, "learning_rate": 1.4656263774594562e-05, "loss": 0.6032896041870117, "step": 3116 }, { "epoch": 0.7576567817209529, "grad_norm": 1.8863070205237524, "learning_rate": 1.4652706270794354e-05, "loss": 0.5447248816490173, "step": 3117 }, { "epoch": 0.7578998541565386, "grad_norm": 1.286624938889448, "learning_rate": 1.4649148015308477e-05, "loss": 0.6072052717208862, "step": 3118 }, { "epoch": 0.7581429265921245, "grad_norm": 1.2499776192245589, "learning_rate": 1.4645589008711789e-05, "loss": 0.5911890864372253, "step": 3119 }, { "epoch": 0.7583859990277103, "grad_norm": 1.3929250223840033, "learning_rate": 1.4642029251579283e-05, "loss": 0.5967658162117004, "step": 3120 }, { "epoch": 0.7586290714632961, "grad_norm": 1.30104078014148, "learning_rate": 1.4638468744486065e-05, "loss": 0.5841137170791626, "step": 3121 }, { "epoch": 0.7588721438988819, "grad_norm": 1.3454058836354954, "learning_rate": 1.4634907488007372e-05, "loss": 0.9344394207000732, "step": 3122 }, { "epoch": 0.7591152163344677, "grad_norm": 1.592478641224931, "learning_rate": 1.4631345482718554e-05, "loss": 0.6628988981246948, "step": 3123 }, { "epoch": 0.7593582887700535, "grad_norm": 1.4153454197611244, "learning_rate": 1.4627782729195082e-05, "loss": 0.7054605484008789, "step": 3124 }, { "epoch": 0.7596013612056393, "grad_norm": 1.3266947997522123, "learning_rate": 1.4624219228012555e-05, "loss": 0.5903563499450684, "step": 3125 }, { "epoch": 0.7598444336412251, "grad_norm": 1.3050453473324857, "learning_rate": 1.4620654979746682e-05, "loss": 0.6876928806304932, "step": 3126 }, { "epoch": 0.7600875060768109, "grad_norm": 1.4537007126610662, "learning_rate": 1.4617089984973306e-05, "loss": 0.6664167642593384, "step": 3127 }, { "epoch": 0.7603305785123967, "grad_norm": 1.4085392096643616, "learning_rate": 1.4613524244268375e-05, "loss": 0.5857996344566345, "step": 3128 }, { "epoch": 0.7605736509479825, "grad_norm": 1.490154009774733, "learning_rate": 1.4609957758207971e-05, "loss": 0.8388849496841431, "step": 3129 }, { "epoch": 0.7608167233835683, "grad_norm": 1.2103627543160174, "learning_rate": 1.4606390527368292e-05, "loss": 0.6118670701980591, "step": 3130 }, { "epoch": 0.7610597958191541, "grad_norm": 1.784864529832506, "learning_rate": 1.4602822552325654e-05, "loss": 0.7662287950515747, "step": 3131 }, { "epoch": 0.7613028682547399, "grad_norm": 1.5989901102546131, "learning_rate": 1.4599253833656492e-05, "loss": 0.7478861808776855, "step": 3132 }, { "epoch": 0.7615459406903257, "grad_norm": 1.2851858286122313, "learning_rate": 1.4595684371937369e-05, "loss": 0.5548959970474243, "step": 3133 }, { "epoch": 0.7617890131259115, "grad_norm": 1.4369323365578754, "learning_rate": 1.4592114167744962e-05, "loss": 0.6894211173057556, "step": 3134 }, { "epoch": 0.7620320855614974, "grad_norm": 1.4672133937259209, "learning_rate": 1.4588543221656064e-05, "loss": 0.6778537034988403, "step": 3135 }, { "epoch": 0.7622751579970831, "grad_norm": 1.501982340165732, "learning_rate": 1.4584971534247599e-05, "loss": 0.6802605390548706, "step": 3136 }, { "epoch": 0.762518230432669, "grad_norm": 1.304980287545884, "learning_rate": 1.4581399106096597e-05, "loss": 0.48984014987945557, "step": 3137 }, { "epoch": 0.7627613028682547, "grad_norm": 1.151690565113529, "learning_rate": 1.4577825937780223e-05, "loss": 0.7157946228981018, "step": 3138 }, { "epoch": 0.7630043753038406, "grad_norm": 1.277531346996837, "learning_rate": 1.4574252029875752e-05, "loss": 0.6074331998825073, "step": 3139 }, { "epoch": 0.7632474477394263, "grad_norm": 1.3805505221762435, "learning_rate": 1.4570677382960578e-05, "loss": 0.6721192002296448, "step": 3140 }, { "epoch": 0.7634905201750122, "grad_norm": 1.2314751892901388, "learning_rate": 1.4567101997612216e-05, "loss": 0.7126098871231079, "step": 3141 }, { "epoch": 0.7637335926105979, "grad_norm": 1.379417073855596, "learning_rate": 1.4563525874408309e-05, "loss": 0.614651083946228, "step": 3142 }, { "epoch": 0.7639766650461838, "grad_norm": 1.500496694380757, "learning_rate": 1.4559949013926602e-05, "loss": 0.525662899017334, "step": 3143 }, { "epoch": 0.7642197374817695, "grad_norm": 1.2921873185858284, "learning_rate": 1.455637141674497e-05, "loss": 0.6908317804336548, "step": 3144 }, { "epoch": 0.7644628099173554, "grad_norm": 1.2774335581759986, "learning_rate": 1.4552793083441405e-05, "loss": 0.6092959046363831, "step": 3145 }, { "epoch": 0.7647058823529411, "grad_norm": 1.5456326272646512, "learning_rate": 1.4549214014594021e-05, "loss": 0.7369235754013062, "step": 3146 }, { "epoch": 0.764948954788527, "grad_norm": 1.2348304800741612, "learning_rate": 1.454563421078105e-05, "loss": 0.6961265802383423, "step": 3147 }, { "epoch": 0.7651920272241128, "grad_norm": 1.4462077354597398, "learning_rate": 1.4542053672580836e-05, "loss": 0.5986684560775757, "step": 3148 }, { "epoch": 0.7654350996596986, "grad_norm": 1.2614040032907041, "learning_rate": 1.453847240057185e-05, "loss": 0.7535760402679443, "step": 3149 }, { "epoch": 0.7656781720952844, "grad_norm": 1.3727461662372673, "learning_rate": 1.4534890395332672e-05, "loss": 0.6469095945358276, "step": 3150 }, { "epoch": 0.7659212445308702, "grad_norm": 1.1666278953305866, "learning_rate": 1.4531307657442013e-05, "loss": 0.46708226203918457, "step": 3151 }, { "epoch": 0.766164316966456, "grad_norm": 1.3875563369165842, "learning_rate": 1.4527724187478699e-05, "loss": 0.6777981519699097, "step": 3152 }, { "epoch": 0.7664073894020418, "grad_norm": 1.2136423644364716, "learning_rate": 1.4524139986021659e-05, "loss": 0.5168048739433289, "step": 3153 }, { "epoch": 0.7666504618376276, "grad_norm": 1.299053098234715, "learning_rate": 1.452055505364996e-05, "loss": 0.671873152256012, "step": 3154 }, { "epoch": 0.7668935342732134, "grad_norm": 1.3761530948500733, "learning_rate": 1.4516969390942782e-05, "loss": 0.7005735635757446, "step": 3155 }, { "epoch": 0.7671366067087992, "grad_norm": 1.675225214129756, "learning_rate": 1.4513382998479416e-05, "loss": 0.5807546377182007, "step": 3156 }, { "epoch": 0.767379679144385, "grad_norm": 1.2962731134106729, "learning_rate": 1.4509795876839274e-05, "loss": 0.6375076174736023, "step": 3157 }, { "epoch": 0.7676227515799708, "grad_norm": 1.3584596633368564, "learning_rate": 1.4506208026601888e-05, "loss": 0.5855531096458435, "step": 3158 }, { "epoch": 0.7678658240155566, "grad_norm": 1.111656867257623, "learning_rate": 1.4502619448346913e-05, "loss": 0.6682655811309814, "step": 3159 }, { "epoch": 0.7681088964511424, "grad_norm": 1.2719651118185082, "learning_rate": 1.4499030142654108e-05, "loss": 0.7422876358032227, "step": 3160 }, { "epoch": 0.7683519688867283, "grad_norm": 1.1390618264832464, "learning_rate": 1.4495440110103361e-05, "loss": 0.597569465637207, "step": 3161 }, { "epoch": 0.768595041322314, "grad_norm": 1.2295507417935245, "learning_rate": 1.449184935127467e-05, "loss": 0.6041349172592163, "step": 3162 }, { "epoch": 0.7688381137578999, "grad_norm": 1.3301369373892873, "learning_rate": 1.4488257866748156e-05, "loss": 0.5629080533981323, "step": 3163 }, { "epoch": 0.7690811861934856, "grad_norm": 1.3740877005942227, "learning_rate": 1.4484665657104055e-05, "loss": 0.6498855352401733, "step": 3164 }, { "epoch": 0.7693242586290715, "grad_norm": 1.4096889915831592, "learning_rate": 1.4481072722922721e-05, "loss": 0.7027394771575928, "step": 3165 }, { "epoch": 0.7695673310646572, "grad_norm": 1.3128138780362044, "learning_rate": 1.4477479064784621e-05, "loss": 0.6050020456314087, "step": 3166 }, { "epoch": 0.7698104035002431, "grad_norm": 1.4242019535797954, "learning_rate": 1.4473884683270343e-05, "loss": 0.703642725944519, "step": 3167 }, { "epoch": 0.7700534759358288, "grad_norm": 1.2642507482652385, "learning_rate": 1.4470289578960595e-05, "loss": 0.7142714262008667, "step": 3168 }, { "epoch": 0.7702965483714147, "grad_norm": 1.2462860735321342, "learning_rate": 1.4466693752436193e-05, "loss": 0.5616710186004639, "step": 3169 }, { "epoch": 0.7705396208070004, "grad_norm": 1.0688528986575963, "learning_rate": 1.4463097204278076e-05, "loss": 0.6975635290145874, "step": 3170 }, { "epoch": 0.7707826932425863, "grad_norm": 1.296717333543229, "learning_rate": 1.44594999350673e-05, "loss": 0.582169771194458, "step": 3171 }, { "epoch": 0.771025765678172, "grad_norm": 1.7395038082626857, "learning_rate": 1.4455901945385034e-05, "loss": 0.8596820831298828, "step": 3172 }, { "epoch": 0.7712688381137579, "grad_norm": 1.625672805957919, "learning_rate": 1.4452303235812562e-05, "loss": 0.7238490581512451, "step": 3173 }, { "epoch": 0.7715119105493436, "grad_norm": 1.4157038016325472, "learning_rate": 1.4448703806931288e-05, "loss": 0.770596981048584, "step": 3174 }, { "epoch": 0.7717549829849295, "grad_norm": 1.362068547623348, "learning_rate": 1.4445103659322736e-05, "loss": 0.5970790386199951, "step": 3175 }, { "epoch": 0.7719980554205154, "grad_norm": 1.210526597568003, "learning_rate": 1.444150279356854e-05, "loss": 0.64018714427948, "step": 3176 }, { "epoch": 0.7722411278561011, "grad_norm": 1.3892074572875588, "learning_rate": 1.4437901210250451e-05, "loss": 0.7057356238365173, "step": 3177 }, { "epoch": 0.772484200291687, "grad_norm": 1.2973363686850596, "learning_rate": 1.4434298909950336e-05, "loss": 0.7073919773101807, "step": 3178 }, { "epoch": 0.7727272727272727, "grad_norm": 1.1666688857725318, "learning_rate": 1.4430695893250175e-05, "loss": 0.6458086967468262, "step": 3179 }, { "epoch": 0.7729703451628586, "grad_norm": 1.2998970793593971, "learning_rate": 1.4427092160732072e-05, "loss": 0.6612899303436279, "step": 3180 }, { "epoch": 0.7732134175984443, "grad_norm": 1.4253176207997402, "learning_rate": 1.442348771297824e-05, "loss": 0.5069724321365356, "step": 3181 }, { "epoch": 0.7734564900340302, "grad_norm": 1.6403998678641387, "learning_rate": 1.4419882550571007e-05, "loss": 0.691826581954956, "step": 3182 }, { "epoch": 0.7736995624696159, "grad_norm": 1.1952062402287074, "learning_rate": 1.4416276674092827e-05, "loss": 0.4967353343963623, "step": 3183 }, { "epoch": 0.7739426349052018, "grad_norm": 1.4324910697194224, "learning_rate": 1.441267008412625e-05, "loss": 0.6148166656494141, "step": 3184 }, { "epoch": 0.7741857073407875, "grad_norm": 1.38142844960301, "learning_rate": 1.440906278125396e-05, "loss": 0.8596159219741821, "step": 3185 }, { "epoch": 0.7744287797763734, "grad_norm": 1.3510045311649672, "learning_rate": 1.4405454766058744e-05, "loss": 0.5823229551315308, "step": 3186 }, { "epoch": 0.7746718522119591, "grad_norm": 1.3962235588472296, "learning_rate": 1.4401846039123512e-05, "loss": 0.5646376013755798, "step": 3187 }, { "epoch": 0.774914924647545, "grad_norm": 1.191787225706107, "learning_rate": 1.4398236601031288e-05, "loss": 0.689294159412384, "step": 3188 }, { "epoch": 0.7751579970831308, "grad_norm": 1.405113295507345, "learning_rate": 1.4394626452365202e-05, "loss": 0.7216447591781616, "step": 3189 }, { "epoch": 0.7754010695187166, "grad_norm": 1.0378911224919767, "learning_rate": 1.4391015593708506e-05, "loss": 0.5872023701667786, "step": 3190 }, { "epoch": 0.7756441419543024, "grad_norm": 1.194615866660791, "learning_rate": 1.438740402564457e-05, "loss": 0.6346553564071655, "step": 3191 }, { "epoch": 0.7758872143898882, "grad_norm": 1.3018276097458086, "learning_rate": 1.4383791748756873e-05, "loss": 0.6417597532272339, "step": 3192 }, { "epoch": 0.776130286825474, "grad_norm": 1.3506957359953082, "learning_rate": 1.438017876362901e-05, "loss": 0.8235218524932861, "step": 3193 }, { "epoch": 0.7763733592610598, "grad_norm": 1.2710067939298102, "learning_rate": 1.437656507084469e-05, "loss": 0.6570519208908081, "step": 3194 }, { "epoch": 0.7766164316966456, "grad_norm": 1.2392675081549, "learning_rate": 1.4372950670987735e-05, "loss": 0.6371831893920898, "step": 3195 }, { "epoch": 0.7768595041322314, "grad_norm": 1.1865395438368118, "learning_rate": 1.4369335564642089e-05, "loss": 0.6223856210708618, "step": 3196 }, { "epoch": 0.7771025765678172, "grad_norm": 1.4182651450956236, "learning_rate": 1.4365719752391805e-05, "loss": 0.6820264458656311, "step": 3197 }, { "epoch": 0.777345649003403, "grad_norm": 1.2387659606275068, "learning_rate": 1.4362103234821039e-05, "loss": 0.5733577013015747, "step": 3198 }, { "epoch": 0.7775887214389888, "grad_norm": 1.3099458449951873, "learning_rate": 1.4358486012514076e-05, "loss": 0.7360103726387024, "step": 3199 }, { "epoch": 0.7778317938745746, "grad_norm": 1.2729796044931085, "learning_rate": 1.4354868086055316e-05, "loss": 0.5869238376617432, "step": 3200 }, { "epoch": 0.7780748663101604, "grad_norm": 1.387310061662279, "learning_rate": 1.4351249456029262e-05, "loss": 0.6257840394973755, "step": 3201 }, { "epoch": 0.7783179387457463, "grad_norm": 1.3008681322773927, "learning_rate": 1.4347630123020534e-05, "loss": 0.5089088678359985, "step": 3202 }, { "epoch": 0.778561011181332, "grad_norm": 1.6634270397996196, "learning_rate": 1.4344010087613869e-05, "loss": 0.7171367406845093, "step": 3203 }, { "epoch": 0.7788040836169179, "grad_norm": 1.6518376194856002, "learning_rate": 1.4340389350394117e-05, "loss": 0.6594990491867065, "step": 3204 }, { "epoch": 0.7790471560525036, "grad_norm": 1.6727332363626013, "learning_rate": 1.4336767911946238e-05, "loss": 0.6728452444076538, "step": 3205 }, { "epoch": 0.7792902284880895, "grad_norm": 1.2748922293131637, "learning_rate": 1.433314577285531e-05, "loss": 0.6158827543258667, "step": 3206 }, { "epoch": 0.7795333009236752, "grad_norm": 1.23567202552885, "learning_rate": 1.4329522933706516e-05, "loss": 0.6093946695327759, "step": 3207 }, { "epoch": 0.7797763733592611, "grad_norm": 1.2876500934362969, "learning_rate": 1.4325899395085166e-05, "loss": 0.7009349465370178, "step": 3208 }, { "epoch": 0.7800194457948468, "grad_norm": 1.3359528895512631, "learning_rate": 1.4322275157576666e-05, "loss": 0.6032897233963013, "step": 3209 }, { "epoch": 0.7802625182304327, "grad_norm": 1.5134005826417796, "learning_rate": 1.4318650221766548e-05, "loss": 0.49914735555648804, "step": 3210 }, { "epoch": 0.7805055906660184, "grad_norm": 1.2870875151448502, "learning_rate": 1.431502458824045e-05, "loss": 0.6528562903404236, "step": 3211 }, { "epoch": 0.7807486631016043, "grad_norm": 1.3152495446070671, "learning_rate": 1.4311398257584126e-05, "loss": 0.8420534729957581, "step": 3212 }, { "epoch": 0.78099173553719, "grad_norm": 1.5698727984323082, "learning_rate": 1.4307771230383446e-05, "loss": 0.6550133228302002, "step": 3213 }, { "epoch": 0.7812348079727759, "grad_norm": 1.2978122220232642, "learning_rate": 1.430414350722438e-05, "loss": 0.6193492412567139, "step": 3214 }, { "epoch": 0.7814778804083617, "grad_norm": 1.3801001639932529, "learning_rate": 1.4300515088693026e-05, "loss": 0.7699429988861084, "step": 3215 }, { "epoch": 0.7817209528439475, "grad_norm": 1.2908328581230895, "learning_rate": 1.4296885975375584e-05, "loss": 0.6462844610214233, "step": 3216 }, { "epoch": 0.7819640252795333, "grad_norm": 1.3068961076982242, "learning_rate": 1.4293256167858366e-05, "loss": 0.6888947486877441, "step": 3217 }, { "epoch": 0.7822070977151191, "grad_norm": 1.3183990506562004, "learning_rate": 1.4289625666727805e-05, "loss": 0.5762566328048706, "step": 3218 }, { "epoch": 0.782450170150705, "grad_norm": 1.3206230109777017, "learning_rate": 1.4285994472570434e-05, "loss": 0.665572464466095, "step": 3219 }, { "epoch": 0.7826932425862907, "grad_norm": 1.3354244411116543, "learning_rate": 1.4282362585972914e-05, "loss": 0.7463701963424683, "step": 3220 }, { "epoch": 0.7829363150218765, "grad_norm": 1.311759532911567, "learning_rate": 1.4278730007521996e-05, "loss": 0.569445788860321, "step": 3221 }, { "epoch": 0.7831793874574623, "grad_norm": 1.5807914338477578, "learning_rate": 1.4275096737804568e-05, "loss": 0.7464574575424194, "step": 3222 }, { "epoch": 0.7834224598930482, "grad_norm": 1.3502496844308383, "learning_rate": 1.4271462777407608e-05, "loss": 0.6073175668716431, "step": 3223 }, { "epoch": 0.7836655323286339, "grad_norm": 1.3050443797186428, "learning_rate": 1.4267828126918217e-05, "loss": 0.6284259557723999, "step": 3224 }, { "epoch": 0.7839086047642198, "grad_norm": 1.3829549381171264, "learning_rate": 1.4264192786923603e-05, "loss": 0.6985249519348145, "step": 3225 }, { "epoch": 0.7841516771998055, "grad_norm": 1.281334606621654, "learning_rate": 1.426055675801109e-05, "loss": 0.5662108659744263, "step": 3226 }, { "epoch": 0.7843947496353914, "grad_norm": 1.423423323478589, "learning_rate": 1.425692004076811e-05, "loss": 0.6667766571044922, "step": 3227 }, { "epoch": 0.7846378220709771, "grad_norm": 1.2685945693113312, "learning_rate": 1.4253282635782202e-05, "loss": 0.5111671686172485, "step": 3228 }, { "epoch": 0.784880894506563, "grad_norm": 1.2957767433964094, "learning_rate": 1.4249644543641026e-05, "loss": 0.5691450834274292, "step": 3229 }, { "epoch": 0.7851239669421488, "grad_norm": 1.3837075718091105, "learning_rate": 1.4246005764932348e-05, "loss": 0.6229475736618042, "step": 3230 }, { "epoch": 0.7853670393777346, "grad_norm": 1.2487427387438657, "learning_rate": 1.4242366300244043e-05, "loss": 0.5047698020935059, "step": 3231 }, { "epoch": 0.7856101118133204, "grad_norm": 1.2707058781037333, "learning_rate": 1.4238726150164099e-05, "loss": 0.5654227137565613, "step": 3232 }, { "epoch": 0.7858531842489062, "grad_norm": 1.311005063538903, "learning_rate": 1.4235085315280614e-05, "loss": 0.6215063333511353, "step": 3233 }, { "epoch": 0.786096256684492, "grad_norm": 1.267538693425498, "learning_rate": 1.4231443796181797e-05, "loss": 0.5352468490600586, "step": 3234 }, { "epoch": 0.7863393291200778, "grad_norm": 1.0459336906685122, "learning_rate": 1.4227801593455968e-05, "loss": 0.6619670987129211, "step": 3235 }, { "epoch": 0.7865824015556636, "grad_norm": 1.3434438890708824, "learning_rate": 1.422415870769155e-05, "loss": 0.6714826822280884, "step": 3236 }, { "epoch": 0.7868254739912494, "grad_norm": 1.5586214383815487, "learning_rate": 1.4220515139477099e-05, "loss": 0.695481538772583, "step": 3237 }, { "epoch": 0.7870685464268352, "grad_norm": 1.0828144659972831, "learning_rate": 1.4216870889401251e-05, "loss": 0.535078227519989, "step": 3238 }, { "epoch": 0.787311618862421, "grad_norm": 1.2079845543603653, "learning_rate": 1.4213225958052774e-05, "loss": 0.6107180714607239, "step": 3239 }, { "epoch": 0.7875546912980068, "grad_norm": 1.240521304553456, "learning_rate": 1.4209580346020535e-05, "loss": 0.6312179565429688, "step": 3240 }, { "epoch": 0.7877977637335926, "grad_norm": 1.4212460025419666, "learning_rate": 1.4205934053893517e-05, "loss": 0.7060067057609558, "step": 3241 }, { "epoch": 0.7880408361691784, "grad_norm": 1.1868976827930477, "learning_rate": 1.4202287082260813e-05, "loss": 0.5731377601623535, "step": 3242 }, { "epoch": 0.7882839086047643, "grad_norm": 1.084031558389863, "learning_rate": 1.4198639431711619e-05, "loss": 0.6227153539657593, "step": 3243 }, { "epoch": 0.78852698104035, "grad_norm": 1.3025497506230332, "learning_rate": 1.4194991102835242e-05, "loss": 0.5324302911758423, "step": 3244 }, { "epoch": 0.7887700534759359, "grad_norm": 1.4240313571667453, "learning_rate": 1.419134209622111e-05, "loss": 0.6873039603233337, "step": 3245 }, { "epoch": 0.7890131259115216, "grad_norm": 1.664476173426213, "learning_rate": 1.4187692412458746e-05, "loss": 0.6215645670890808, "step": 3246 }, { "epoch": 0.7892561983471075, "grad_norm": 1.4512474298950675, "learning_rate": 1.4184042052137793e-05, "loss": 0.6690313816070557, "step": 3247 }, { "epoch": 0.7894992707826932, "grad_norm": 1.376377587947515, "learning_rate": 1.4180391015847996e-05, "loss": 0.7395232915878296, "step": 3248 }, { "epoch": 0.7897423432182791, "grad_norm": 1.6067551850002795, "learning_rate": 1.4176739304179214e-05, "loss": 0.6784043312072754, "step": 3249 }, { "epoch": 0.7899854156538648, "grad_norm": 1.2481325180096712, "learning_rate": 1.4173086917721413e-05, "loss": 0.7441916465759277, "step": 3250 }, { "epoch": 0.7902284880894507, "grad_norm": 1.420379077596559, "learning_rate": 1.4169433857064667e-05, "loss": 0.7185336351394653, "step": 3251 }, { "epoch": 0.7904715605250364, "grad_norm": 1.1186631057993492, "learning_rate": 1.4165780122799155e-05, "loss": 0.6659038662910461, "step": 3252 }, { "epoch": 0.7907146329606223, "grad_norm": 1.393525259754233, "learning_rate": 1.416212571551518e-05, "loss": 0.6249270439147949, "step": 3253 }, { "epoch": 0.790957705396208, "grad_norm": 1.4044569577753594, "learning_rate": 1.4158470635803138e-05, "loss": 0.6490393280982971, "step": 3254 }, { "epoch": 0.7912007778317939, "grad_norm": 1.6571003284151795, "learning_rate": 1.415481488425354e-05, "loss": 0.8298455476760864, "step": 3255 }, { "epoch": 0.7914438502673797, "grad_norm": 1.4041536708475746, "learning_rate": 1.4151158461457005e-05, "loss": 0.551676869392395, "step": 3256 }, { "epoch": 0.7916869227029655, "grad_norm": 1.2784225296453484, "learning_rate": 1.414750136800426e-05, "loss": 0.6508073806762695, "step": 3257 }, { "epoch": 0.7919299951385513, "grad_norm": 1.233114298598364, "learning_rate": 1.4143843604486143e-05, "loss": 0.5930670499801636, "step": 3258 }, { "epoch": 0.7921730675741371, "grad_norm": 1.1579281246063537, "learning_rate": 1.4140185171493595e-05, "loss": 0.6099998354911804, "step": 3259 }, { "epoch": 0.7924161400097229, "grad_norm": 1.1636114703609717, "learning_rate": 1.4136526069617667e-05, "loss": 0.5900183320045471, "step": 3260 }, { "epoch": 0.7926592124453087, "grad_norm": 1.4957587517286408, "learning_rate": 1.4132866299449523e-05, "loss": 0.548791229724884, "step": 3261 }, { "epoch": 0.7929022848808945, "grad_norm": 1.1676437273941866, "learning_rate": 1.412920586158043e-05, "loss": 0.48889297246932983, "step": 3262 }, { "epoch": 0.7931453573164803, "grad_norm": 1.148366546746287, "learning_rate": 1.4125544756601764e-05, "loss": 0.5912967920303345, "step": 3263 }, { "epoch": 0.7933884297520661, "grad_norm": 1.3378759468746997, "learning_rate": 1.4121882985105005e-05, "loss": 0.5638710260391235, "step": 3264 }, { "epoch": 0.7936315021876519, "grad_norm": 1.2418318719590173, "learning_rate": 1.4118220547681747e-05, "loss": 0.4490705728530884, "step": 3265 }, { "epoch": 0.7938745746232377, "grad_norm": 1.2345006494742903, "learning_rate": 1.411455744492369e-05, "loss": 0.7071206569671631, "step": 3266 }, { "epoch": 0.7941176470588235, "grad_norm": 1.4067789477580288, "learning_rate": 1.4110893677422643e-05, "loss": 0.765895426273346, "step": 3267 }, { "epoch": 0.7943607194944093, "grad_norm": 1.388780262457993, "learning_rate": 1.4107229245770517e-05, "loss": 0.6812349557876587, "step": 3268 }, { "epoch": 0.7946037919299951, "grad_norm": 1.3420561964060163, "learning_rate": 1.410356415055933e-05, "loss": 0.7340677380561829, "step": 3269 }, { "epoch": 0.7948468643655809, "grad_norm": 1.5770142747779183, "learning_rate": 1.4099898392381214e-05, "loss": 0.7417305707931519, "step": 3270 }, { "epoch": 0.7950899368011668, "grad_norm": 1.3631057053324003, "learning_rate": 1.4096231971828407e-05, "loss": 0.606789231300354, "step": 3271 }, { "epoch": 0.7953330092367525, "grad_norm": 1.4596923670150082, "learning_rate": 1.4092564889493248e-05, "loss": 0.5375059843063354, "step": 3272 }, { "epoch": 0.7955760816723384, "grad_norm": 1.196643225092049, "learning_rate": 1.4088897145968182e-05, "loss": 0.6872411966323853, "step": 3273 }, { "epoch": 0.7958191541079241, "grad_norm": 1.3771908776191921, "learning_rate": 1.4085228741845774e-05, "loss": 0.576101541519165, "step": 3274 }, { "epoch": 0.79606222654351, "grad_norm": 1.4649683238550835, "learning_rate": 1.4081559677718683e-05, "loss": 0.6861557960510254, "step": 3275 }, { "epoch": 0.7963052989790957, "grad_norm": 1.1248752037662852, "learning_rate": 1.407788995417968e-05, "loss": 0.5526548624038696, "step": 3276 }, { "epoch": 0.7965483714146816, "grad_norm": 1.3242698178726602, "learning_rate": 1.407421957182164e-05, "loss": 0.5995655059814453, "step": 3277 }, { "epoch": 0.7967914438502673, "grad_norm": 1.3193013039064503, "learning_rate": 1.4070548531237545e-05, "loss": 0.4995250701904297, "step": 3278 }, { "epoch": 0.7970345162858532, "grad_norm": 1.3653911093857312, "learning_rate": 1.4066876833020481e-05, "loss": 0.6581698656082153, "step": 3279 }, { "epoch": 0.797277588721439, "grad_norm": 1.3117229770427725, "learning_rate": 1.4063204477763652e-05, "loss": 0.5475736856460571, "step": 3280 }, { "epoch": 0.7975206611570248, "grad_norm": 1.6116746429942297, "learning_rate": 1.405953146606035e-05, "loss": 0.8809107542037964, "step": 3281 }, { "epoch": 0.7977637335926105, "grad_norm": 1.3204945121085552, "learning_rate": 1.405585779850399e-05, "loss": 0.6004889011383057, "step": 3282 }, { "epoch": 0.7980068060281964, "grad_norm": 1.1192722790396592, "learning_rate": 1.405218347568808e-05, "loss": 0.509606122970581, "step": 3283 }, { "epoch": 0.7982498784637823, "grad_norm": 1.3865169348442437, "learning_rate": 1.4048508498206241e-05, "loss": 0.6768496632575989, "step": 3284 }, { "epoch": 0.798492950899368, "grad_norm": 1.4060878411262039, "learning_rate": 1.4044832866652198e-05, "loss": 0.7651362419128418, "step": 3285 }, { "epoch": 0.7987360233349539, "grad_norm": 1.2895382257622503, "learning_rate": 1.4041156581619784e-05, "loss": 0.7569683194160461, "step": 3286 }, { "epoch": 0.7989790957705396, "grad_norm": 1.592009345747276, "learning_rate": 1.4037479643702932e-05, "loss": 0.7240776419639587, "step": 3287 }, { "epoch": 0.7992221682061255, "grad_norm": 1.4057244567372404, "learning_rate": 1.4033802053495684e-05, "loss": 0.6518229842185974, "step": 3288 }, { "epoch": 0.7994652406417112, "grad_norm": 1.2468817379848924, "learning_rate": 1.4030123811592185e-05, "loss": 0.7257641553878784, "step": 3289 }, { "epoch": 0.7997083130772971, "grad_norm": 1.2334302862871975, "learning_rate": 1.4026444918586693e-05, "loss": 0.6584875583648682, "step": 3290 }, { "epoch": 0.7999513855128828, "grad_norm": 1.3561569025205862, "learning_rate": 1.4022765375073562e-05, "loss": 0.658327579498291, "step": 3291 }, { "epoch": 0.8001944579484687, "grad_norm": 1.4542770012534132, "learning_rate": 1.4019085181647256e-05, "loss": 0.49983304738998413, "step": 3292 }, { "epoch": 0.8004375303840544, "grad_norm": 1.2560250405106017, "learning_rate": 1.4015404338902338e-05, "loss": 0.6926963329315186, "step": 3293 }, { "epoch": 0.8006806028196403, "grad_norm": 1.1094749786112965, "learning_rate": 1.4011722847433489e-05, "loss": 0.5306472182273865, "step": 3294 }, { "epoch": 0.800923675255226, "grad_norm": 1.2582972111899047, "learning_rate": 1.4008040707835484e-05, "loss": 0.6178785562515259, "step": 3295 }, { "epoch": 0.8011667476908119, "grad_norm": 1.4213278641717162, "learning_rate": 1.4004357920703199e-05, "loss": 0.5971562266349792, "step": 3296 }, { "epoch": 0.8014098201263977, "grad_norm": 1.0902005667202446, "learning_rate": 1.4000674486631624e-05, "loss": 0.5641102194786072, "step": 3297 }, { "epoch": 0.8016528925619835, "grad_norm": 1.4232312117577275, "learning_rate": 1.3996990406215854e-05, "loss": 0.7120606899261475, "step": 3298 }, { "epoch": 0.8018959649975693, "grad_norm": 1.243329416770405, "learning_rate": 1.3993305680051078e-05, "loss": 0.5730016231536865, "step": 3299 }, { "epoch": 0.8021390374331551, "grad_norm": 1.5346888979798887, "learning_rate": 1.3989620308732603e-05, "loss": 0.600745439529419, "step": 3300 }, { "epoch": 0.8023821098687409, "grad_norm": 1.148532224932669, "learning_rate": 1.3985934292855827e-05, "loss": 0.6042417287826538, "step": 3301 }, { "epoch": 0.8026251823043267, "grad_norm": 1.5800117578480941, "learning_rate": 1.3982247633016261e-05, "loss": 0.7253611087799072, "step": 3302 }, { "epoch": 0.8028682547399125, "grad_norm": 1.2169070655510985, "learning_rate": 1.3978560329809518e-05, "loss": 0.564416766166687, "step": 3303 }, { "epoch": 0.8031113271754983, "grad_norm": 1.4334451588057966, "learning_rate": 1.3974872383831316e-05, "loss": 0.6865569353103638, "step": 3304 }, { "epoch": 0.8033543996110841, "grad_norm": 1.2657267960926026, "learning_rate": 1.3971183795677471e-05, "loss": 0.6358678340911865, "step": 3305 }, { "epoch": 0.8035974720466699, "grad_norm": 1.3162499848106999, "learning_rate": 1.3967494565943908e-05, "loss": 0.8230589628219604, "step": 3306 }, { "epoch": 0.8038405444822557, "grad_norm": 1.4173914150587352, "learning_rate": 1.3963804695226657e-05, "loss": 0.7189098596572876, "step": 3307 }, { "epoch": 0.8040836169178415, "grad_norm": 1.350754471945116, "learning_rate": 1.3960114184121848e-05, "loss": 0.7684926986694336, "step": 3308 }, { "epoch": 0.8043266893534273, "grad_norm": 1.3822363230114958, "learning_rate": 1.3956423033225713e-05, "loss": 0.6979205012321472, "step": 3309 }, { "epoch": 0.8045697617890131, "grad_norm": 1.2377808706925502, "learning_rate": 1.395273124313459e-05, "loss": 0.732252836227417, "step": 3310 }, { "epoch": 0.8048128342245989, "grad_norm": 1.1958656214677492, "learning_rate": 1.3949038814444924e-05, "loss": 0.6163087487220764, "step": 3311 }, { "epoch": 0.8050559066601848, "grad_norm": 1.201235887113162, "learning_rate": 1.3945345747753259e-05, "loss": 0.7299476861953735, "step": 3312 }, { "epoch": 0.8052989790957705, "grad_norm": 1.107321664727588, "learning_rate": 1.394165204365624e-05, "loss": 0.6244611144065857, "step": 3313 }, { "epoch": 0.8055420515313564, "grad_norm": 1.2468772724641142, "learning_rate": 1.3937957702750619e-05, "loss": 0.560451328754425, "step": 3314 }, { "epoch": 0.8057851239669421, "grad_norm": 1.1822843727685757, "learning_rate": 1.3934262725633247e-05, "loss": 0.5297389030456543, "step": 3315 }, { "epoch": 0.806028196402528, "grad_norm": 1.419269996743285, "learning_rate": 1.3930567112901084e-05, "loss": 0.7042943835258484, "step": 3316 }, { "epoch": 0.8062712688381137, "grad_norm": 1.2036444424646864, "learning_rate": 1.3926870865151185e-05, "loss": 0.5399294495582581, "step": 3317 }, { "epoch": 0.8065143412736996, "grad_norm": 1.350948238886548, "learning_rate": 1.3923173982980714e-05, "loss": 0.794937252998352, "step": 3318 }, { "epoch": 0.8067574137092853, "grad_norm": 1.1247862077442903, "learning_rate": 1.3919476466986932e-05, "loss": 0.4143857955932617, "step": 3319 }, { "epoch": 0.8070004861448712, "grad_norm": 1.1825066175705652, "learning_rate": 1.3915778317767208e-05, "loss": 0.5940282344818115, "step": 3320 }, { "epoch": 0.8072435585804569, "grad_norm": 1.2296534294284305, "learning_rate": 1.3912079535919009e-05, "loss": 0.6319648027420044, "step": 3321 }, { "epoch": 0.8074866310160428, "grad_norm": 1.3394679113439376, "learning_rate": 1.3908380122039908e-05, "loss": 0.6121023893356323, "step": 3322 }, { "epoch": 0.8077297034516285, "grad_norm": 1.3228908980205625, "learning_rate": 1.3904680076727576e-05, "loss": 0.7752237915992737, "step": 3323 }, { "epoch": 0.8079727758872144, "grad_norm": 1.430878948112627, "learning_rate": 1.3900979400579787e-05, "loss": 0.6227835416793823, "step": 3324 }, { "epoch": 0.8082158483228002, "grad_norm": 1.3516946325929622, "learning_rate": 1.3897278094194422e-05, "loss": 0.5558527708053589, "step": 3325 }, { "epoch": 0.808458920758386, "grad_norm": 1.086588578306708, "learning_rate": 1.3893576158169454e-05, "loss": 0.60658198595047, "step": 3326 }, { "epoch": 0.8087019931939718, "grad_norm": 1.406034528600147, "learning_rate": 1.3889873593102964e-05, "loss": 0.5055236220359802, "step": 3327 }, { "epoch": 0.8089450656295576, "grad_norm": 1.3849384125450603, "learning_rate": 1.3886170399593142e-05, "loss": 0.5984170436859131, "step": 3328 }, { "epoch": 0.8091881380651434, "grad_norm": 1.2359966316426272, "learning_rate": 1.3882466578238263e-05, "loss": 0.5145629644393921, "step": 3329 }, { "epoch": 0.8094312105007292, "grad_norm": 1.3978675685842903, "learning_rate": 1.3878762129636713e-05, "loss": 0.5630947947502136, "step": 3330 }, { "epoch": 0.809674282936315, "grad_norm": 1.30458548934023, "learning_rate": 1.3875057054386982e-05, "loss": 0.6854604482650757, "step": 3331 }, { "epoch": 0.8099173553719008, "grad_norm": 1.4234023743493014, "learning_rate": 1.3871351353087654e-05, "loss": 0.5667203068733215, "step": 3332 }, { "epoch": 0.8101604278074866, "grad_norm": 1.3213153031588267, "learning_rate": 1.386764502633742e-05, "loss": 0.6273561716079712, "step": 3333 }, { "epoch": 0.8104035002430724, "grad_norm": 1.311240948799004, "learning_rate": 1.3863938074735066e-05, "loss": 0.5438836812973022, "step": 3334 }, { "epoch": 0.8106465726786583, "grad_norm": 1.2610517503207803, "learning_rate": 1.3860230498879487e-05, "loss": 0.5952879190444946, "step": 3335 }, { "epoch": 0.810889645114244, "grad_norm": 1.4606889251171196, "learning_rate": 1.3856522299369674e-05, "loss": 0.6987404823303223, "step": 3336 }, { "epoch": 0.8111327175498299, "grad_norm": 1.427252669043088, "learning_rate": 1.3852813476804718e-05, "loss": 0.682471752166748, "step": 3337 }, { "epoch": 0.8113757899854157, "grad_norm": 1.445477950703363, "learning_rate": 1.384910403178381e-05, "loss": 0.5727691054344177, "step": 3338 }, { "epoch": 0.8116188624210015, "grad_norm": 1.4789763477553612, "learning_rate": 1.3845393964906251e-05, "loss": 0.6888909935951233, "step": 3339 }, { "epoch": 0.8118619348565873, "grad_norm": 1.3718817511307138, "learning_rate": 1.384168327677143e-05, "loss": 0.7626557350158691, "step": 3340 }, { "epoch": 0.812105007292173, "grad_norm": 1.4713292216200304, "learning_rate": 1.3837971967978836e-05, "loss": 0.6756061911582947, "step": 3341 }, { "epoch": 0.8123480797277589, "grad_norm": 1.3973032777531154, "learning_rate": 1.3834260039128068e-05, "loss": 0.568729817867279, "step": 3342 }, { "epoch": 0.8125911521633447, "grad_norm": 1.445439968592343, "learning_rate": 1.3830547490818826e-05, "loss": 0.68213951587677, "step": 3343 }, { "epoch": 0.8128342245989305, "grad_norm": 1.330028618604576, "learning_rate": 1.3826834323650899e-05, "loss": 0.723652184009552, "step": 3344 }, { "epoch": 0.8130772970345163, "grad_norm": 1.5013458626540588, "learning_rate": 1.3823120538224184e-05, "loss": 0.701892077922821, "step": 3345 }, { "epoch": 0.8133203694701021, "grad_norm": 1.19575384754867, "learning_rate": 1.3819406135138674e-05, "loss": 0.6341818571090698, "step": 3346 }, { "epoch": 0.8135634419056879, "grad_norm": 1.276471882357722, "learning_rate": 1.3815691114994463e-05, "loss": 0.8746600151062012, "step": 3347 }, { "epoch": 0.8138065143412737, "grad_norm": 1.5369850499567526, "learning_rate": 1.3811975478391748e-05, "loss": 0.6999603509902954, "step": 3348 }, { "epoch": 0.8140495867768595, "grad_norm": 1.4119086024403542, "learning_rate": 1.3808259225930825e-05, "loss": 0.6597062349319458, "step": 3349 }, { "epoch": 0.8142926592124453, "grad_norm": 1.354705203203758, "learning_rate": 1.3804542358212081e-05, "loss": 0.7371991276741028, "step": 3350 }, { "epoch": 0.8145357316480311, "grad_norm": 1.1631927675384313, "learning_rate": 1.380082487583601e-05, "loss": 0.6757198572158813, "step": 3351 }, { "epoch": 0.8147788040836169, "grad_norm": 1.3087265842629077, "learning_rate": 1.3797106779403206e-05, "loss": 0.6140809059143066, "step": 3352 }, { "epoch": 0.8150218765192028, "grad_norm": 1.5196216988265179, "learning_rate": 1.3793388069514362e-05, "loss": 0.570105791091919, "step": 3353 }, { "epoch": 0.8152649489547885, "grad_norm": 1.1724853105153494, "learning_rate": 1.3789668746770264e-05, "loss": 0.6339827179908752, "step": 3354 }, { "epoch": 0.8155080213903744, "grad_norm": 1.3253105903666067, "learning_rate": 1.3785948811771803e-05, "loss": 0.674517035484314, "step": 3355 }, { "epoch": 0.8157510938259601, "grad_norm": 1.2436320978951527, "learning_rate": 1.378222826511997e-05, "loss": 0.6301798820495605, "step": 3356 }, { "epoch": 0.815994166261546, "grad_norm": 1.2453013608534775, "learning_rate": 1.3778507107415849e-05, "loss": 0.688217043876648, "step": 3357 }, { "epoch": 0.8162372386971317, "grad_norm": 1.520769991122463, "learning_rate": 1.3774785339260627e-05, "loss": 0.806133508682251, "step": 3358 }, { "epoch": 0.8164803111327176, "grad_norm": 1.2197417458566193, "learning_rate": 1.3771062961255588e-05, "loss": 0.6545911431312561, "step": 3359 }, { "epoch": 0.8167233835683033, "grad_norm": 1.1611776790816537, "learning_rate": 1.3767339974002117e-05, "loss": 0.6903351545333862, "step": 3360 }, { "epoch": 0.8169664560038892, "grad_norm": 1.2139826440163202, "learning_rate": 1.3763616378101691e-05, "loss": 0.6340637803077698, "step": 3361 }, { "epoch": 0.8172095284394749, "grad_norm": 1.2425750373621292, "learning_rate": 1.3759892174155893e-05, "loss": 0.6709448099136353, "step": 3362 }, { "epoch": 0.8174526008750608, "grad_norm": 1.3724442465470368, "learning_rate": 1.3756167362766397e-05, "loss": 0.6924033164978027, "step": 3363 }, { "epoch": 0.8176956733106465, "grad_norm": 1.3748546014293435, "learning_rate": 1.3752441944534987e-05, "loss": 0.6841589212417603, "step": 3364 }, { "epoch": 0.8179387457462324, "grad_norm": 1.254808001203808, "learning_rate": 1.374871592006353e-05, "loss": 0.5799908638000488, "step": 3365 }, { "epoch": 0.8181818181818182, "grad_norm": 1.0722968861867936, "learning_rate": 1.3744989289954e-05, "loss": 0.5718205571174622, "step": 3366 }, { "epoch": 0.818424890617404, "grad_norm": 1.3482387448072028, "learning_rate": 1.3741262054808469e-05, "loss": 0.7012891173362732, "step": 3367 }, { "epoch": 0.8186679630529898, "grad_norm": 1.3124225100125548, "learning_rate": 1.3737534215229103e-05, "loss": 0.8079378008842468, "step": 3368 }, { "epoch": 0.8189110354885756, "grad_norm": 1.2996311122982873, "learning_rate": 1.3733805771818166e-05, "loss": 0.7087955474853516, "step": 3369 }, { "epoch": 0.8191541079241614, "grad_norm": 1.4185571443796452, "learning_rate": 1.3730076725178024e-05, "loss": 0.7183899879455566, "step": 3370 }, { "epoch": 0.8193971803597472, "grad_norm": 1.2898506348367342, "learning_rate": 1.3726347075911131e-05, "loss": 0.5699722766876221, "step": 3371 }, { "epoch": 0.819640252795333, "grad_norm": 1.2094433512822302, "learning_rate": 1.3722616824620052e-05, "loss": 0.460671603679657, "step": 3372 }, { "epoch": 0.8198833252309188, "grad_norm": 1.3263420006774076, "learning_rate": 1.371888597190744e-05, "loss": 0.5011776089668274, "step": 3373 }, { "epoch": 0.8201263976665046, "grad_norm": 1.3019206083088686, "learning_rate": 1.3715154518376044e-05, "loss": 0.5087471008300781, "step": 3374 }, { "epoch": 0.8203694701020904, "grad_norm": 1.1656638974244715, "learning_rate": 1.3711422464628716e-05, "loss": 0.6530722379684448, "step": 3375 }, { "epoch": 0.8206125425376762, "grad_norm": 1.273078559165652, "learning_rate": 1.3707689811268403e-05, "loss": 0.636615514755249, "step": 3376 }, { "epoch": 0.820855614973262, "grad_norm": 1.1674862943729458, "learning_rate": 1.3703956558898145e-05, "loss": 0.6119654774665833, "step": 3377 }, { "epoch": 0.8210986874088478, "grad_norm": 1.5613868940348457, "learning_rate": 1.3700222708121085e-05, "loss": 0.6263973712921143, "step": 3378 }, { "epoch": 0.8213417598444337, "grad_norm": 1.3016786734526262, "learning_rate": 1.3696488259540455e-05, "loss": 0.7485108971595764, "step": 3379 }, { "epoch": 0.8215848322800194, "grad_norm": 1.511450837754167, "learning_rate": 1.3692753213759593e-05, "loss": 0.8472733497619629, "step": 3380 }, { "epoch": 0.8218279047156053, "grad_norm": 1.352482357208376, "learning_rate": 1.3689017571381928e-05, "loss": 0.8387123346328735, "step": 3381 }, { "epoch": 0.822070977151191, "grad_norm": 1.2427303971786932, "learning_rate": 1.3685281333010982e-05, "loss": 0.6615711450576782, "step": 3382 }, { "epoch": 0.8223140495867769, "grad_norm": 1.274894007854942, "learning_rate": 1.3681544499250379e-05, "loss": 0.7315108776092529, "step": 3383 }, { "epoch": 0.8225571220223626, "grad_norm": 1.1197808183373765, "learning_rate": 1.3677807070703841e-05, "loss": 0.6535066962242126, "step": 3384 }, { "epoch": 0.8228001944579485, "grad_norm": 1.4042262543165278, "learning_rate": 1.3674069047975177e-05, "loss": 0.7488241195678711, "step": 3385 }, { "epoch": 0.8230432668935342, "grad_norm": 1.3107285078704458, "learning_rate": 1.3670330431668302e-05, "loss": 0.7754788398742676, "step": 3386 }, { "epoch": 0.8232863393291201, "grad_norm": 1.075270453690032, "learning_rate": 1.366659122238722e-05, "loss": 0.6348282694816589, "step": 3387 }, { "epoch": 0.8235294117647058, "grad_norm": 1.1712455143296159, "learning_rate": 1.3662851420736032e-05, "loss": 0.6698475480079651, "step": 3388 }, { "epoch": 0.8237724842002917, "grad_norm": 1.2414491253621494, "learning_rate": 1.3659111027318936e-05, "loss": 0.6117052435874939, "step": 3389 }, { "epoch": 0.8240155566358774, "grad_norm": 1.3551258120276939, "learning_rate": 1.365537004274023e-05, "loss": 0.7384674549102783, "step": 3390 }, { "epoch": 0.8242586290714633, "grad_norm": 1.3432001709359813, "learning_rate": 1.3651628467604301e-05, "loss": 0.6469642519950867, "step": 3391 }, { "epoch": 0.824501701507049, "grad_norm": 1.2659698451107055, "learning_rate": 1.3647886302515627e-05, "loss": 0.6711422204971313, "step": 3392 }, { "epoch": 0.8247447739426349, "grad_norm": 1.358277708518685, "learning_rate": 1.3644143548078797e-05, "loss": 0.5566897392272949, "step": 3393 }, { "epoch": 0.8249878463782208, "grad_norm": 1.222387930082958, "learning_rate": 1.3640400204898485e-05, "loss": 0.445487916469574, "step": 3394 }, { "epoch": 0.8252309188138065, "grad_norm": 1.346774490962312, "learning_rate": 1.3636656273579454e-05, "loss": 0.7174774408340454, "step": 3395 }, { "epoch": 0.8254739912493924, "grad_norm": 1.200245401268736, "learning_rate": 1.3632911754726573e-05, "loss": 0.7146060466766357, "step": 3396 }, { "epoch": 0.8257170636849781, "grad_norm": 1.3999709618354088, "learning_rate": 1.3629166648944803e-05, "loss": 0.6977970600128174, "step": 3397 }, { "epoch": 0.825960136120564, "grad_norm": 1.4358678469602515, "learning_rate": 1.36254209568392e-05, "loss": 0.8311485648155212, "step": 3398 }, { "epoch": 0.8262032085561497, "grad_norm": 1.308621259300901, "learning_rate": 1.3621674679014908e-05, "loss": 0.6236478090286255, "step": 3399 }, { "epoch": 0.8264462809917356, "grad_norm": 1.229023067491141, "learning_rate": 1.3617927816077177e-05, "loss": 0.5661340355873108, "step": 3400 }, { "epoch": 0.8266893534273213, "grad_norm": 1.4357245696632264, "learning_rate": 1.3614180368631342e-05, "loss": 0.6378030180931091, "step": 3401 }, { "epoch": 0.8269324258629072, "grad_norm": 1.5601979723366146, "learning_rate": 1.361043233728284e-05, "loss": 0.7155025601387024, "step": 3402 }, { "epoch": 0.8271754982984929, "grad_norm": 1.2018329281604558, "learning_rate": 1.3606683722637195e-05, "loss": 0.7064741849899292, "step": 3403 }, { "epoch": 0.8274185707340788, "grad_norm": 1.3043680297689388, "learning_rate": 1.360293452530003e-05, "loss": 0.6229931712150574, "step": 3404 }, { "epoch": 0.8276616431696645, "grad_norm": 1.2898097138627025, "learning_rate": 1.3599184745877059e-05, "loss": 0.5985409021377563, "step": 3405 }, { "epoch": 0.8279047156052504, "grad_norm": 1.3992387765117051, "learning_rate": 1.3595434384974093e-05, "loss": 0.5348461866378784, "step": 3406 }, { "epoch": 0.8281477880408362, "grad_norm": 1.405838735320811, "learning_rate": 1.359168344319704e-05, "loss": 0.5681272745132446, "step": 3407 }, { "epoch": 0.828390860476422, "grad_norm": 1.288523800858529, "learning_rate": 1.3587931921151888e-05, "loss": 0.7325516939163208, "step": 3408 }, { "epoch": 0.8286339329120078, "grad_norm": 1.4379573203497893, "learning_rate": 1.358417981944474e-05, "loss": 0.6946238875389099, "step": 3409 }, { "epoch": 0.8288770053475936, "grad_norm": 1.3403824950362964, "learning_rate": 1.3580427138681774e-05, "loss": 0.7469080686569214, "step": 3410 }, { "epoch": 0.8291200777831794, "grad_norm": 1.1945908319690899, "learning_rate": 1.3576673879469269e-05, "loss": 0.6493101119995117, "step": 3411 }, { "epoch": 0.8293631502187652, "grad_norm": 1.320817905976283, "learning_rate": 1.35729200424136e-05, "loss": 0.6832085847854614, "step": 3412 }, { "epoch": 0.829606222654351, "grad_norm": 1.280853690925648, "learning_rate": 1.356916562812123e-05, "loss": 0.6398648023605347, "step": 3413 }, { "epoch": 0.8298492950899368, "grad_norm": 1.5489088032806453, "learning_rate": 1.356541063719872e-05, "loss": 0.6646740436553955, "step": 3414 }, { "epoch": 0.8300923675255226, "grad_norm": 1.2051953655114476, "learning_rate": 1.3561655070252719e-05, "loss": 0.5894690155982971, "step": 3415 }, { "epoch": 0.8303354399611084, "grad_norm": 1.198773027811795, "learning_rate": 1.355789892788997e-05, "loss": 0.589983344078064, "step": 3416 }, { "epoch": 0.8305785123966942, "grad_norm": 1.3226808054813257, "learning_rate": 1.3554142210717317e-05, "loss": 0.5923150777816772, "step": 3417 }, { "epoch": 0.83082158483228, "grad_norm": 1.8254792183255788, "learning_rate": 1.3550384919341688e-05, "loss": 0.6869733333587646, "step": 3418 }, { "epoch": 0.8310646572678658, "grad_norm": 1.3449479145961338, "learning_rate": 1.3546627054370105e-05, "loss": 0.6276346445083618, "step": 3419 }, { "epoch": 0.8313077297034517, "grad_norm": 1.3617019745598988, "learning_rate": 1.3542868616409689e-05, "loss": 0.6636717319488525, "step": 3420 }, { "epoch": 0.8315508021390374, "grad_norm": 1.4628577324270273, "learning_rate": 1.3539109606067642e-05, "loss": 0.6999602913856506, "step": 3421 }, { "epoch": 0.8317938745746233, "grad_norm": 1.2594271109252144, "learning_rate": 1.3535350023951269e-05, "loss": 0.5994772911071777, "step": 3422 }, { "epoch": 0.832036947010209, "grad_norm": 1.2481066648424433, "learning_rate": 1.3531589870667963e-05, "loss": 0.5505393743515015, "step": 3423 }, { "epoch": 0.8322800194457949, "grad_norm": 1.3757298077161262, "learning_rate": 1.3527829146825208e-05, "loss": 0.6888733506202698, "step": 3424 }, { "epoch": 0.8325230918813806, "grad_norm": 1.243371552260501, "learning_rate": 1.3524067853030586e-05, "loss": 0.5627062320709229, "step": 3425 }, { "epoch": 0.8327661643169665, "grad_norm": 1.6370157815389037, "learning_rate": 1.3520305989891764e-05, "loss": 0.5923823118209839, "step": 3426 }, { "epoch": 0.8330092367525522, "grad_norm": 1.5086425048479988, "learning_rate": 1.3516543558016508e-05, "loss": 0.8397760391235352, "step": 3427 }, { "epoch": 0.8332523091881381, "grad_norm": 1.273965551150315, "learning_rate": 1.3512780558012665e-05, "loss": 0.6901479959487915, "step": 3428 }, { "epoch": 0.8334953816237238, "grad_norm": 1.2275761035930417, "learning_rate": 1.3509016990488191e-05, "loss": 0.5900871753692627, "step": 3429 }, { "epoch": 0.8337384540593097, "grad_norm": 1.4553837494900848, "learning_rate": 1.3505252856051114e-05, "loss": 0.5955662131309509, "step": 3430 }, { "epoch": 0.8339815264948954, "grad_norm": 1.2484931636645065, "learning_rate": 1.3501488155309566e-05, "loss": 0.6715408563613892, "step": 3431 }, { "epoch": 0.8342245989304813, "grad_norm": 1.2708158987515514, "learning_rate": 1.349772288887177e-05, "loss": 0.5673171281814575, "step": 3432 }, { "epoch": 0.8344676713660671, "grad_norm": 1.4452330812259389, "learning_rate": 1.3493957057346032e-05, "loss": 0.6877837181091309, "step": 3433 }, { "epoch": 0.8347107438016529, "grad_norm": 1.4422557736386437, "learning_rate": 1.3490190661340763e-05, "loss": 0.543488085269928, "step": 3434 }, { "epoch": 0.8349538162372387, "grad_norm": 1.510846531958858, "learning_rate": 1.3486423701464453e-05, "loss": 0.6466708183288574, "step": 3435 }, { "epoch": 0.8351968886728245, "grad_norm": 1.610179873852416, "learning_rate": 1.3482656178325689e-05, "loss": 0.6856960654258728, "step": 3436 }, { "epoch": 0.8354399611084103, "grad_norm": 1.2994890144698288, "learning_rate": 1.3478888092533145e-05, "loss": 0.8049095273017883, "step": 3437 }, { "epoch": 0.8356830335439961, "grad_norm": 1.3245615640654462, "learning_rate": 1.3475119444695595e-05, "loss": 0.6224536895751953, "step": 3438 }, { "epoch": 0.835926105979582, "grad_norm": 1.3903009210072275, "learning_rate": 1.3471350235421887e-05, "loss": 0.7762950658798218, "step": 3439 }, { "epoch": 0.8361691784151677, "grad_norm": 1.408400950409356, "learning_rate": 1.346758046532098e-05, "loss": 0.652751624584198, "step": 3440 }, { "epoch": 0.8364122508507535, "grad_norm": 1.4442478992024166, "learning_rate": 1.3463810135001904e-05, "loss": 0.5651956796646118, "step": 3441 }, { "epoch": 0.8366553232863393, "grad_norm": 1.3386589079163866, "learning_rate": 1.3460039245073798e-05, "loss": 0.5144855976104736, "step": 3442 }, { "epoch": 0.8368983957219251, "grad_norm": 1.3500383685178214, "learning_rate": 1.345626779614588e-05, "loss": 0.7165416479110718, "step": 3443 }, { "epoch": 0.8371414681575109, "grad_norm": 1.0974836374330421, "learning_rate": 1.3452495788827455e-05, "loss": 0.5714793801307678, "step": 3444 }, { "epoch": 0.8373845405930967, "grad_norm": 1.435826825316094, "learning_rate": 1.3448723223727929e-05, "loss": 0.77423095703125, "step": 3445 }, { "epoch": 0.8376276130286825, "grad_norm": 1.309899073498058, "learning_rate": 1.3444950101456792e-05, "loss": 0.6470581293106079, "step": 3446 }, { "epoch": 0.8378706854642683, "grad_norm": 1.3173440711839952, "learning_rate": 1.344117642262363e-05, "loss": 0.5502126216888428, "step": 3447 }, { "epoch": 0.8381137578998542, "grad_norm": 1.264720453756091, "learning_rate": 1.3437402187838111e-05, "loss": 0.8006755113601685, "step": 3448 }, { "epoch": 0.83835683033544, "grad_norm": 1.2530500164730796, "learning_rate": 1.3433627397709987e-05, "loss": 0.5237860679626465, "step": 3449 }, { "epoch": 0.8385999027710258, "grad_norm": 1.1061517222789772, "learning_rate": 1.3429852052849122e-05, "loss": 0.627091646194458, "step": 3450 }, { "epoch": 0.8388429752066116, "grad_norm": 1.221109839563721, "learning_rate": 1.342607615386545e-05, "loss": 0.6031063795089722, "step": 3451 }, { "epoch": 0.8390860476421974, "grad_norm": 1.4371400866811428, "learning_rate": 1.3422299701368999e-05, "loss": 0.6460427641868591, "step": 3452 }, { "epoch": 0.8393291200777832, "grad_norm": 1.5138807862868013, "learning_rate": 1.3418522695969892e-05, "loss": 0.6822220087051392, "step": 3453 }, { "epoch": 0.839572192513369, "grad_norm": 1.2256530796318437, "learning_rate": 1.3414745138278334e-05, "loss": 0.6775282025337219, "step": 3454 }, { "epoch": 0.8398152649489548, "grad_norm": 1.3127178955864012, "learning_rate": 1.3410967028904627e-05, "loss": 0.6692503690719604, "step": 3455 }, { "epoch": 0.8400583373845406, "grad_norm": 1.3603325869638292, "learning_rate": 1.3407188368459152e-05, "loss": 0.6912590265274048, "step": 3456 }, { "epoch": 0.8403014098201264, "grad_norm": 1.565559202754125, "learning_rate": 1.3403409157552391e-05, "loss": 0.8765546083450317, "step": 3457 }, { "epoch": 0.8405444822557122, "grad_norm": 1.5934920352945654, "learning_rate": 1.3399629396794907e-05, "loss": 0.7144431471824646, "step": 3458 }, { "epoch": 0.840787554691298, "grad_norm": 1.3712739187107708, "learning_rate": 1.339584908679735e-05, "loss": 0.6270037293434143, "step": 3459 }, { "epoch": 0.8410306271268838, "grad_norm": 1.4406516011546746, "learning_rate": 1.3392068228170467e-05, "loss": 0.6531490087509155, "step": 3460 }, { "epoch": 0.8412736995624697, "grad_norm": 1.341937747596504, "learning_rate": 1.3388286821525086e-05, "loss": 0.7785901427268982, "step": 3461 }, { "epoch": 0.8415167719980554, "grad_norm": 1.182107763278689, "learning_rate": 1.3384504867472126e-05, "loss": 0.5726717114448547, "step": 3462 }, { "epoch": 0.8417598444336413, "grad_norm": 1.3864796943336215, "learning_rate": 1.3380722366622598e-05, "loss": 0.7520691156387329, "step": 3463 }, { "epoch": 0.842002916869227, "grad_norm": 1.0370537248130607, "learning_rate": 1.3376939319587598e-05, "loss": 0.6989579200744629, "step": 3464 }, { "epoch": 0.8422459893048129, "grad_norm": 1.2030743583049281, "learning_rate": 1.3373155726978308e-05, "loss": 0.5495274066925049, "step": 3465 }, { "epoch": 0.8424890617403986, "grad_norm": 1.1891970421093527, "learning_rate": 1.3369371589406006e-05, "loss": 0.7489500641822815, "step": 3466 }, { "epoch": 0.8427321341759845, "grad_norm": 1.2414669155139706, "learning_rate": 1.3365586907482044e-05, "loss": 0.48740577697753906, "step": 3467 }, { "epoch": 0.8429752066115702, "grad_norm": 1.5735672404173275, "learning_rate": 1.3361801681817879e-05, "loss": 0.6991966962814331, "step": 3468 }, { "epoch": 0.8432182790471561, "grad_norm": 1.301086905900277, "learning_rate": 1.3358015913025045e-05, "loss": 0.628151535987854, "step": 3469 }, { "epoch": 0.8434613514827418, "grad_norm": 1.2529554257348463, "learning_rate": 1.3354229601715161e-05, "loss": 0.7473700046539307, "step": 3470 }, { "epoch": 0.8437044239183277, "grad_norm": 1.5272035056657076, "learning_rate": 1.3350442748499944e-05, "loss": 0.6111875772476196, "step": 3471 }, { "epoch": 0.8439474963539134, "grad_norm": 1.123167342167691, "learning_rate": 1.3346655353991195e-05, "loss": 0.536298930644989, "step": 3472 }, { "epoch": 0.8441905687894993, "grad_norm": 1.3076795379472494, "learning_rate": 1.3342867418800798e-05, "loss": 0.6450611352920532, "step": 3473 }, { "epoch": 0.8444336412250851, "grad_norm": 1.1354388310395114, "learning_rate": 1.3339078943540731e-05, "loss": 0.6297110319137573, "step": 3474 }, { "epoch": 0.8446767136606709, "grad_norm": 1.265326263839764, "learning_rate": 1.3335289928823049e-05, "loss": 0.5974615812301636, "step": 3475 }, { "epoch": 0.8449197860962567, "grad_norm": 1.2798160796131817, "learning_rate": 1.3331500375259907e-05, "loss": 0.5114350318908691, "step": 3476 }, { "epoch": 0.8451628585318425, "grad_norm": 1.3064748198110485, "learning_rate": 1.3327710283463535e-05, "loss": 0.6350747346878052, "step": 3477 }, { "epoch": 0.8454059309674283, "grad_norm": 1.4483585564410213, "learning_rate": 1.332391965404626e-05, "loss": 0.6874380111694336, "step": 3478 }, { "epoch": 0.8456490034030141, "grad_norm": 1.3357864344878536, "learning_rate": 1.3320128487620491e-05, "loss": 0.7498493790626526, "step": 3479 }, { "epoch": 0.8458920758385999, "grad_norm": 1.5486944108005019, "learning_rate": 1.3316336784798724e-05, "loss": 0.6143897771835327, "step": 3480 }, { "epoch": 0.8461351482741857, "grad_norm": 1.2923258668180693, "learning_rate": 1.3312544546193542e-05, "loss": 0.5002519488334656, "step": 3481 }, { "epoch": 0.8463782207097715, "grad_norm": 1.314274723518114, "learning_rate": 1.3308751772417611e-05, "loss": 0.5715277194976807, "step": 3482 }, { "epoch": 0.8466212931453573, "grad_norm": 1.2781035901129456, "learning_rate": 1.3304958464083698e-05, "loss": 0.5911047458648682, "step": 3483 }, { "epoch": 0.8468643655809431, "grad_norm": 1.5454616620232937, "learning_rate": 1.3301164621804632e-05, "loss": 0.5595042109489441, "step": 3484 }, { "epoch": 0.8471074380165289, "grad_norm": 1.4278185394368148, "learning_rate": 1.3297370246193349e-05, "loss": 0.6106480360031128, "step": 3485 }, { "epoch": 0.8473505104521147, "grad_norm": 1.325336204469548, "learning_rate": 1.3293575337862858e-05, "loss": 0.6244200468063354, "step": 3486 }, { "epoch": 0.8475935828877005, "grad_norm": 1.226967579549434, "learning_rate": 1.328977989742627e-05, "loss": 0.6943122148513794, "step": 3487 }, { "epoch": 0.8478366553232863, "grad_norm": 1.4878944841584685, "learning_rate": 1.3285983925496761e-05, "loss": 0.7581759095191956, "step": 3488 }, { "epoch": 0.8480797277588722, "grad_norm": 1.4408567090033553, "learning_rate": 1.3282187422687612e-05, "loss": 0.608312726020813, "step": 3489 }, { "epoch": 0.8483228001944579, "grad_norm": 1.490235901506079, "learning_rate": 1.3278390389612175e-05, "loss": 0.6848390698432922, "step": 3490 }, { "epoch": 0.8485658726300438, "grad_norm": 1.3369298873562792, "learning_rate": 1.3274592826883898e-05, "loss": 0.4980524182319641, "step": 3491 }, { "epoch": 0.8488089450656295, "grad_norm": 1.135375004751942, "learning_rate": 1.3270794735116312e-05, "loss": 0.572228193283081, "step": 3492 }, { "epoch": 0.8490520175012154, "grad_norm": 1.4001162001460974, "learning_rate": 1.3266996114923027e-05, "loss": 0.6502918004989624, "step": 3493 }, { "epoch": 0.8492950899368011, "grad_norm": 1.6385744571058423, "learning_rate": 1.3263196966917748e-05, "loss": 0.6824593544006348, "step": 3494 }, { "epoch": 0.849538162372387, "grad_norm": 1.3119858976010048, "learning_rate": 1.3259397291714258e-05, "loss": 0.7767385840415955, "step": 3495 }, { "epoch": 0.8497812348079727, "grad_norm": 1.307308190682041, "learning_rate": 1.3255597089926431e-05, "loss": 0.5725094079971313, "step": 3496 }, { "epoch": 0.8500243072435586, "grad_norm": 1.189699082804666, "learning_rate": 1.3251796362168222e-05, "loss": 0.5398495197296143, "step": 3497 }, { "epoch": 0.8502673796791443, "grad_norm": 1.442812189927797, "learning_rate": 1.3247995109053666e-05, "loss": 0.7230567932128906, "step": 3498 }, { "epoch": 0.8505104521147302, "grad_norm": 1.611489409925878, "learning_rate": 1.32441933311969e-05, "loss": 0.712658703327179, "step": 3499 }, { "epoch": 0.8507535245503159, "grad_norm": 1.3005198086865095, "learning_rate": 1.3240391029212127e-05, "loss": 0.46233534812927246, "step": 3500 }, { "epoch": 0.8509965969859018, "grad_norm": 1.2514119246602455, "learning_rate": 1.3236588203713645e-05, "loss": 0.5794561505317688, "step": 3501 }, { "epoch": 0.8512396694214877, "grad_norm": 1.7128927128400941, "learning_rate": 1.3232784855315836e-05, "loss": 0.6758724451065063, "step": 3502 }, { "epoch": 0.8514827418570734, "grad_norm": 1.473407400993565, "learning_rate": 1.3228980984633157e-05, "loss": 0.8903415203094482, "step": 3503 }, { "epoch": 0.8517258142926593, "grad_norm": 1.361332865959058, "learning_rate": 1.3225176592280164e-05, "loss": 0.6133327484130859, "step": 3504 }, { "epoch": 0.851968886728245, "grad_norm": 1.2975781371691033, "learning_rate": 1.3221371678871489e-05, "loss": 0.6404067873954773, "step": 3505 }, { "epoch": 0.8522119591638309, "grad_norm": 1.377470884492745, "learning_rate": 1.3217566245021845e-05, "loss": 0.5497263669967651, "step": 3506 }, { "epoch": 0.8524550315994166, "grad_norm": 1.2520938670441377, "learning_rate": 1.3213760291346036e-05, "loss": 0.5879902839660645, "step": 3507 }, { "epoch": 0.8526981040350025, "grad_norm": 1.172300452600444, "learning_rate": 1.320995381845895e-05, "loss": 0.6434237957000732, "step": 3508 }, { "epoch": 0.8529411764705882, "grad_norm": 1.4205454921989489, "learning_rate": 1.3206146826975554e-05, "loss": 0.7112480401992798, "step": 3509 }, { "epoch": 0.8531842489061741, "grad_norm": 1.183670042911237, "learning_rate": 1.3202339317510899e-05, "loss": 0.703721284866333, "step": 3510 }, { "epoch": 0.8534273213417598, "grad_norm": 1.3579829693361327, "learning_rate": 1.3198531290680126e-05, "loss": 0.7524465322494507, "step": 3511 }, { "epoch": 0.8536703937773457, "grad_norm": 1.3119966128301248, "learning_rate": 1.3194722747098451e-05, "loss": 0.6817705035209656, "step": 3512 }, { "epoch": 0.8539134662129314, "grad_norm": 1.4431248888500505, "learning_rate": 1.3190913687381182e-05, "loss": 0.606965184211731, "step": 3513 }, { "epoch": 0.8541565386485173, "grad_norm": 1.2128095634211074, "learning_rate": 1.31871041121437e-05, "loss": 0.7397875785827637, "step": 3514 }, { "epoch": 0.8543996110841031, "grad_norm": 1.215986367441501, "learning_rate": 1.318329402200148e-05, "loss": 0.6263875961303711, "step": 3515 }, { "epoch": 0.8546426835196889, "grad_norm": 1.302035161584853, "learning_rate": 1.3179483417570075e-05, "loss": 0.6281944513320923, "step": 3516 }, { "epoch": 0.8548857559552747, "grad_norm": 1.251599624232664, "learning_rate": 1.3175672299465124e-05, "loss": 0.6281805038452148, "step": 3517 }, { "epoch": 0.8551288283908605, "grad_norm": 1.6083069964687318, "learning_rate": 1.3171860668302343e-05, "loss": 0.7173851728439331, "step": 3518 }, { "epoch": 0.8553719008264463, "grad_norm": 1.7030185669480715, "learning_rate": 1.3168048524697534e-05, "loss": 0.6456143856048584, "step": 3519 }, { "epoch": 0.8556149732620321, "grad_norm": 1.1232993677699759, "learning_rate": 1.3164235869266585e-05, "loss": 0.6754023432731628, "step": 3520 }, { "epoch": 0.8558580456976179, "grad_norm": 1.366425788806003, "learning_rate": 1.3160422702625467e-05, "loss": 0.6581951379776001, "step": 3521 }, { "epoch": 0.8561011181332037, "grad_norm": 1.4664734948739313, "learning_rate": 1.3156609025390221e-05, "loss": 0.8366720676422119, "step": 3522 }, { "epoch": 0.8563441905687895, "grad_norm": 1.1776063508256962, "learning_rate": 1.3152794838176988e-05, "loss": 0.4579940438270569, "step": 3523 }, { "epoch": 0.8565872630043753, "grad_norm": 1.3284902123431441, "learning_rate": 1.3148980141601982e-05, "loss": 0.518406867980957, "step": 3524 }, { "epoch": 0.8568303354399611, "grad_norm": 1.7505193810723663, "learning_rate": 1.3145164936281501e-05, "loss": 0.6036260724067688, "step": 3525 }, { "epoch": 0.8570734078755469, "grad_norm": 1.4651811968953825, "learning_rate": 1.3141349222831923e-05, "loss": 0.7474085092544556, "step": 3526 }, { "epoch": 0.8573164803111327, "grad_norm": 1.6356012161206, "learning_rate": 1.3137533001869713e-05, "loss": 0.6469744443893433, "step": 3527 }, { "epoch": 0.8575595527467185, "grad_norm": 1.3435539211390495, "learning_rate": 1.3133716274011414e-05, "loss": 0.6184121370315552, "step": 3528 }, { "epoch": 0.8578026251823043, "grad_norm": 1.2878781145447682, "learning_rate": 1.312989903987365e-05, "loss": 0.5367239713668823, "step": 3529 }, { "epoch": 0.8580456976178902, "grad_norm": 1.3464896611289774, "learning_rate": 1.3126081300073132e-05, "loss": 0.6634431481361389, "step": 3530 }, { "epoch": 0.8582887700534759, "grad_norm": 1.2117867090980365, "learning_rate": 1.3122263055226645e-05, "loss": 0.4837067723274231, "step": 3531 }, { "epoch": 0.8585318424890618, "grad_norm": 1.3657975888583367, "learning_rate": 1.3118444305951065e-05, "loss": 0.581218957901001, "step": 3532 }, { "epoch": 0.8587749149246475, "grad_norm": 1.430704824587142, "learning_rate": 1.3114625052863343e-05, "loss": 0.6674765944480896, "step": 3533 }, { "epoch": 0.8590179873602334, "grad_norm": 1.4265631699070578, "learning_rate": 1.3110805296580513e-05, "loss": 0.6823673844337463, "step": 3534 }, { "epoch": 0.8592610597958191, "grad_norm": 1.2874530172888827, "learning_rate": 1.3106985037719686e-05, "loss": 0.5887505412101746, "step": 3535 }, { "epoch": 0.859504132231405, "grad_norm": 1.4702827303619967, "learning_rate": 1.3103164276898066e-05, "loss": 0.608604907989502, "step": 3536 }, { "epoch": 0.8597472046669907, "grad_norm": 1.4467627085617483, "learning_rate": 1.3099343014732928e-05, "loss": 0.5557856559753418, "step": 3537 }, { "epoch": 0.8599902771025766, "grad_norm": 1.915584330819148, "learning_rate": 1.3095521251841628e-05, "loss": 0.6017040610313416, "step": 3538 }, { "epoch": 0.8602333495381623, "grad_norm": 1.2181497239201986, "learning_rate": 1.3091698988841604e-05, "loss": 0.5646081566810608, "step": 3539 }, { "epoch": 0.8604764219737482, "grad_norm": 1.1969794052516074, "learning_rate": 1.3087876226350382e-05, "loss": 0.6779073476791382, "step": 3540 }, { "epoch": 0.8607194944093339, "grad_norm": 1.3126134910546667, "learning_rate": 1.3084052964985559e-05, "loss": 0.5554418563842773, "step": 3541 }, { "epoch": 0.8609625668449198, "grad_norm": 1.5251702448423772, "learning_rate": 1.3080229205364818e-05, "loss": 0.5849447250366211, "step": 3542 }, { "epoch": 0.8612056392805056, "grad_norm": 1.1692069232513638, "learning_rate": 1.3076404948105921e-05, "loss": 0.5729180574417114, "step": 3543 }, { "epoch": 0.8614487117160914, "grad_norm": 1.3415854532937441, "learning_rate": 1.3072580193826708e-05, "loss": 0.6699074506759644, "step": 3544 }, { "epoch": 0.8616917841516772, "grad_norm": 1.2937344394050023, "learning_rate": 1.3068754943145105e-05, "loss": 0.6714217662811279, "step": 3545 }, { "epoch": 0.861934856587263, "grad_norm": 1.3087028688827416, "learning_rate": 1.3064929196679114e-05, "loss": 0.5746918320655823, "step": 3546 }, { "epoch": 0.8621779290228488, "grad_norm": 1.5732493021356178, "learning_rate": 1.3061102955046822e-05, "loss": 0.682119607925415, "step": 3547 }, { "epoch": 0.8624210014584346, "grad_norm": 1.2324382494005468, "learning_rate": 1.3057276218866382e-05, "loss": 0.602458119392395, "step": 3548 }, { "epoch": 0.8626640738940204, "grad_norm": 1.4826862989558744, "learning_rate": 1.3053448988756046e-05, "loss": 0.6319684982299805, "step": 3549 }, { "epoch": 0.8629071463296062, "grad_norm": 1.393545397565116, "learning_rate": 1.3049621265334132e-05, "loss": 0.6992062330245972, "step": 3550 }, { "epoch": 0.863150218765192, "grad_norm": 1.401900475548958, "learning_rate": 1.3045793049219042e-05, "loss": 0.5461175441741943, "step": 3551 }, { "epoch": 0.8633932912007778, "grad_norm": 1.2063155380331008, "learning_rate": 1.3041964341029262e-05, "loss": 0.5258105993270874, "step": 3552 }, { "epoch": 0.8636363636363636, "grad_norm": 1.3083496784758746, "learning_rate": 1.303813514138335e-05, "loss": 0.653510570526123, "step": 3553 }, { "epoch": 0.8638794360719494, "grad_norm": 1.233738921217831, "learning_rate": 1.3034305450899951e-05, "loss": 0.7248444557189941, "step": 3554 }, { "epoch": 0.8641225085075352, "grad_norm": 1.303938054385844, "learning_rate": 1.3030475270197782e-05, "loss": 0.6570404767990112, "step": 3555 }, { "epoch": 0.8643655809431211, "grad_norm": 1.267794715005444, "learning_rate": 1.3026644599895644e-05, "loss": 0.7126563787460327, "step": 3556 }, { "epoch": 0.8646086533787068, "grad_norm": 1.3151754391207802, "learning_rate": 1.3022813440612411e-05, "loss": 0.4687907099723816, "step": 3557 }, { "epoch": 0.8648517258142927, "grad_norm": 1.4070392476940121, "learning_rate": 1.3018981792967048e-05, "loss": 0.6868255138397217, "step": 3558 }, { "epoch": 0.8650947982498784, "grad_norm": 1.3946184863041098, "learning_rate": 1.3015149657578585e-05, "loss": 0.5944437980651855, "step": 3559 }, { "epoch": 0.8653378706854643, "grad_norm": 1.7870760863983, "learning_rate": 1.301131703506614e-05, "loss": 0.6952168941497803, "step": 3560 }, { "epoch": 0.86558094312105, "grad_norm": 1.3151326218005501, "learning_rate": 1.3007483926048908e-05, "loss": 0.7825570106506348, "step": 3561 }, { "epoch": 0.8658240155566359, "grad_norm": 1.3269629527917683, "learning_rate": 1.3003650331146159e-05, "loss": 0.6702679395675659, "step": 3562 }, { "epoch": 0.8660670879922217, "grad_norm": 1.3774769570103855, "learning_rate": 1.2999816250977246e-05, "loss": 0.6452623605728149, "step": 3563 }, { "epoch": 0.8663101604278075, "grad_norm": 1.5331377011838085, "learning_rate": 1.2995981686161599e-05, "loss": 0.6708500385284424, "step": 3564 }, { "epoch": 0.8665532328633933, "grad_norm": 1.4909893072879303, "learning_rate": 1.2992146637318721e-05, "loss": 0.6214514970779419, "step": 3565 }, { "epoch": 0.8667963052989791, "grad_norm": 1.4077090785735826, "learning_rate": 1.2988311105068204e-05, "loss": 0.5763859748840332, "step": 3566 }, { "epoch": 0.8670393777345649, "grad_norm": 1.2792115005626226, "learning_rate": 1.2984475090029706e-05, "loss": 0.5286937952041626, "step": 3567 }, { "epoch": 0.8672824501701507, "grad_norm": 1.1843520044522655, "learning_rate": 1.2980638592822974e-05, "loss": 0.7669963836669922, "step": 3568 }, { "epoch": 0.8675255226057365, "grad_norm": 1.224589573216859, "learning_rate": 1.2976801614067826e-05, "loss": 0.6144599914550781, "step": 3569 }, { "epoch": 0.8677685950413223, "grad_norm": 1.1984547696470804, "learning_rate": 1.2972964154384163e-05, "loss": 0.6484661102294922, "step": 3570 }, { "epoch": 0.8680116674769082, "grad_norm": 1.557214924970859, "learning_rate": 1.2969126214391954e-05, "loss": 0.7990586161613464, "step": 3571 }, { "epoch": 0.8682547399124939, "grad_norm": 1.4886029432013073, "learning_rate": 1.2965287794711257e-05, "loss": 0.6027978658676147, "step": 3572 }, { "epoch": 0.8684978123480798, "grad_norm": 1.564518105750083, "learning_rate": 1.2961448895962199e-05, "loss": 0.642460823059082, "step": 3573 }, { "epoch": 0.8687408847836655, "grad_norm": 1.276380827418956, "learning_rate": 1.295760951876499e-05, "loss": 0.6525782346725464, "step": 3574 }, { "epoch": 0.8689839572192514, "grad_norm": 1.2692603865060341, "learning_rate": 1.2953769663739916e-05, "loss": 0.5512070059776306, "step": 3575 }, { "epoch": 0.8692270296548371, "grad_norm": 1.3542191912488621, "learning_rate": 1.2949929331507339e-05, "loss": 0.6553381681442261, "step": 3576 }, { "epoch": 0.869470102090423, "grad_norm": 1.209650304827051, "learning_rate": 1.2946088522687696e-05, "loss": 0.5551872849464417, "step": 3577 }, { "epoch": 0.8697131745260087, "grad_norm": 1.436174685754454, "learning_rate": 1.2942247237901507e-05, "loss": 0.665408730506897, "step": 3578 }, { "epoch": 0.8699562469615946, "grad_norm": 1.470962259867472, "learning_rate": 1.2938405477769363e-05, "loss": 0.6202508211135864, "step": 3579 }, { "epoch": 0.8701993193971803, "grad_norm": 1.5530306225362878, "learning_rate": 1.2934563242911934e-05, "loss": 0.6725844144821167, "step": 3580 }, { "epoch": 0.8704423918327662, "grad_norm": 1.4214170982542729, "learning_rate": 1.2930720533949967e-05, "loss": 0.7365444302558899, "step": 3581 }, { "epoch": 0.8706854642683519, "grad_norm": 1.2710210960027595, "learning_rate": 1.292687735150429e-05, "loss": 0.5514234900474548, "step": 3582 }, { "epoch": 0.8709285367039378, "grad_norm": 1.3998435925437802, "learning_rate": 1.2923033696195798e-05, "loss": 0.7304848432540894, "step": 3583 }, { "epoch": 0.8711716091395236, "grad_norm": 1.2404840470590779, "learning_rate": 1.2919189568645467e-05, "loss": 0.6707983016967773, "step": 3584 }, { "epoch": 0.8714146815751094, "grad_norm": 1.3593288238693717, "learning_rate": 1.2915344969474353e-05, "loss": 0.6484081745147705, "step": 3585 }, { "epoch": 0.8716577540106952, "grad_norm": 1.343979734840328, "learning_rate": 1.2911499899303581e-05, "loss": 0.5900115966796875, "step": 3586 }, { "epoch": 0.871900826446281, "grad_norm": 1.2380917565126304, "learning_rate": 1.290765435875436e-05, "loss": 0.5041496753692627, "step": 3587 }, { "epoch": 0.8721438988818668, "grad_norm": 1.4840568030919645, "learning_rate": 1.2903808348447969e-05, "loss": 0.6050943732261658, "step": 3588 }, { "epoch": 0.8723869713174526, "grad_norm": 1.3294006937270213, "learning_rate": 1.2899961869005761e-05, "loss": 0.592647910118103, "step": 3589 }, { "epoch": 0.8726300437530384, "grad_norm": 1.1829878291692826, "learning_rate": 1.2896114921049177e-05, "loss": 0.5244619846343994, "step": 3590 }, { "epoch": 0.8728731161886242, "grad_norm": 1.3079935164855108, "learning_rate": 1.289226750519972e-05, "loss": 0.614739179611206, "step": 3591 }, { "epoch": 0.87311618862421, "grad_norm": 1.699690527816476, "learning_rate": 1.2888419622078976e-05, "loss": 0.6849691867828369, "step": 3592 }, { "epoch": 0.8733592610597958, "grad_norm": 1.3603330403551084, "learning_rate": 1.28845712723086e-05, "loss": 0.6272097826004028, "step": 3593 }, { "epoch": 0.8736023334953816, "grad_norm": 1.4736138368222576, "learning_rate": 1.288072245651033e-05, "loss": 0.7712994813919067, "step": 3594 }, { "epoch": 0.8738454059309674, "grad_norm": 1.238645502371351, "learning_rate": 1.2876873175305977e-05, "loss": 0.6406420469284058, "step": 3595 }, { "epoch": 0.8740884783665532, "grad_norm": 1.4055362500526194, "learning_rate": 1.2873023429317426e-05, "loss": 0.6400429010391235, "step": 3596 }, { "epoch": 0.8743315508021391, "grad_norm": 1.1631184061630904, "learning_rate": 1.2869173219166632e-05, "loss": 0.43365901708602905, "step": 3597 }, { "epoch": 0.8745746232377248, "grad_norm": 1.32576790002033, "learning_rate": 1.286532254547564e-05, "loss": 0.6860702633857727, "step": 3598 }, { "epoch": 0.8748176956733107, "grad_norm": 1.265206500298662, "learning_rate": 1.2861471408866555e-05, "loss": 0.6037848591804504, "step": 3599 }, { "epoch": 0.8750607681088964, "grad_norm": 1.359641227189625, "learning_rate": 1.2857619809961563e-05, "loss": 0.6348063945770264, "step": 3600 }, { "epoch": 0.8753038405444823, "grad_norm": 1.2939601828738376, "learning_rate": 1.2853767749382922e-05, "loss": 0.5720525979995728, "step": 3601 }, { "epoch": 0.875546912980068, "grad_norm": 1.1965445712011085, "learning_rate": 1.2849915227752966e-05, "loss": 0.6841995120048523, "step": 3602 }, { "epoch": 0.8757899854156539, "grad_norm": 1.4806914364812267, "learning_rate": 1.2846062245694107e-05, "loss": 0.5891491770744324, "step": 3603 }, { "epoch": 0.8760330578512396, "grad_norm": 1.1474983324574495, "learning_rate": 1.2842208803828826e-05, "loss": 0.6216716766357422, "step": 3604 }, { "epoch": 0.8762761302868255, "grad_norm": 1.0400972578172545, "learning_rate": 1.283835490277968e-05, "loss": 0.4915117621421814, "step": 3605 }, { "epoch": 0.8765192027224112, "grad_norm": 1.2593803476947123, "learning_rate": 1.2834500543169299e-05, "loss": 0.6441718339920044, "step": 3606 }, { "epoch": 0.8767622751579971, "grad_norm": 1.3684271035089681, "learning_rate": 1.2830645725620397e-05, "loss": 0.7130417227745056, "step": 3607 }, { "epoch": 0.8770053475935828, "grad_norm": 1.2077837052366458, "learning_rate": 1.2826790450755744e-05, "loss": 0.6012754440307617, "step": 3608 }, { "epoch": 0.8772484200291687, "grad_norm": 1.2386340893856314, "learning_rate": 1.28229347191982e-05, "loss": 0.5893133878707886, "step": 3609 }, { "epoch": 0.8774914924647546, "grad_norm": 1.3790106946193423, "learning_rate": 1.2819078531570688e-05, "loss": 0.4912623167037964, "step": 3610 }, { "epoch": 0.8777345649003403, "grad_norm": 1.1580483513221869, "learning_rate": 1.2815221888496213e-05, "loss": 0.6452107429504395, "step": 3611 }, { "epoch": 0.8779776373359262, "grad_norm": 1.2062813735419795, "learning_rate": 1.2811364790597845e-05, "loss": 0.5988074541091919, "step": 3612 }, { "epoch": 0.8782207097715119, "grad_norm": 1.3467896124624803, "learning_rate": 1.2807507238498733e-05, "loss": 0.6441593170166016, "step": 3613 }, { "epoch": 0.8784637822070978, "grad_norm": 1.4714935908983058, "learning_rate": 1.2803649232822104e-05, "loss": 0.847917914390564, "step": 3614 }, { "epoch": 0.8787068546426835, "grad_norm": 1.3001395939131923, "learning_rate": 1.2799790774191248e-05, "loss": 0.7193191051483154, "step": 3615 }, { "epoch": 0.8789499270782694, "grad_norm": 1.3829037636807586, "learning_rate": 1.2795931863229534e-05, "loss": 0.6362663507461548, "step": 3616 }, { "epoch": 0.8791929995138551, "grad_norm": 1.4553118997281576, "learning_rate": 1.2792072500560404e-05, "loss": 0.6123595237731934, "step": 3617 }, { "epoch": 0.879436071949441, "grad_norm": 1.404125794001433, "learning_rate": 1.278821268680737e-05, "loss": 0.7522179484367371, "step": 3618 }, { "epoch": 0.8796791443850267, "grad_norm": 1.4085106247368528, "learning_rate": 1.2784352422594022e-05, "loss": 0.5903457403182983, "step": 3619 }, { "epoch": 0.8799222168206126, "grad_norm": 1.5323191248930812, "learning_rate": 1.2780491708544017e-05, "loss": 0.5456136465072632, "step": 3620 }, { "epoch": 0.8801652892561983, "grad_norm": 1.2818330125690673, "learning_rate": 1.2776630545281088e-05, "loss": 0.6164519786834717, "step": 3621 }, { "epoch": 0.8804083616917842, "grad_norm": 1.652434682632667, "learning_rate": 1.2772768933429036e-05, "loss": 0.8049845099449158, "step": 3622 }, { "epoch": 0.8806514341273699, "grad_norm": 1.345873659608999, "learning_rate": 1.2768906873611746e-05, "loss": 0.658787190914154, "step": 3623 }, { "epoch": 0.8808945065629558, "grad_norm": 1.761806294974668, "learning_rate": 1.2765044366453166e-05, "loss": 0.8353868722915649, "step": 3624 }, { "epoch": 0.8811375789985416, "grad_norm": 1.4246139315875106, "learning_rate": 1.2761181412577313e-05, "loss": 0.725865364074707, "step": 3625 }, { "epoch": 0.8813806514341274, "grad_norm": 1.274032160247022, "learning_rate": 1.2757318012608287e-05, "loss": 0.7154115438461304, "step": 3626 }, { "epoch": 0.8816237238697132, "grad_norm": 1.2660829942405805, "learning_rate": 1.2753454167170251e-05, "loss": 0.6783353686332703, "step": 3627 }, { "epoch": 0.881866796305299, "grad_norm": 1.3311510844122414, "learning_rate": 1.2749589876887444e-05, "loss": 0.6878551244735718, "step": 3628 }, { "epoch": 0.8821098687408848, "grad_norm": 1.2609800070885255, "learning_rate": 1.2745725142384176e-05, "loss": 0.5650619268417358, "step": 3629 }, { "epoch": 0.8823529411764706, "grad_norm": 1.305823470748817, "learning_rate": 1.2741859964284827e-05, "loss": 0.6459963917732239, "step": 3630 }, { "epoch": 0.8825960136120564, "grad_norm": 1.2358821731507452, "learning_rate": 1.2737994343213855e-05, "loss": 0.7714384198188782, "step": 3631 }, { "epoch": 0.8828390860476422, "grad_norm": 1.6494153356212748, "learning_rate": 1.2734128279795783e-05, "loss": 0.681789219379425, "step": 3632 }, { "epoch": 0.883082158483228, "grad_norm": 1.1928848997304038, "learning_rate": 1.2730261774655206e-05, "loss": 0.6580101251602173, "step": 3633 }, { "epoch": 0.8833252309188138, "grad_norm": 1.3790180593352497, "learning_rate": 1.2726394828416792e-05, "loss": 0.5614535808563232, "step": 3634 }, { "epoch": 0.8835683033543996, "grad_norm": 1.1997502989937923, "learning_rate": 1.2722527441705284e-05, "loss": 0.8539665341377258, "step": 3635 }, { "epoch": 0.8838113757899854, "grad_norm": 1.376746821385164, "learning_rate": 1.2718659615145493e-05, "loss": 0.5920838713645935, "step": 3636 }, { "epoch": 0.8840544482255712, "grad_norm": 1.4797150697157266, "learning_rate": 1.2714791349362293e-05, "loss": 0.60682213306427, "step": 3637 }, { "epoch": 0.8842975206611571, "grad_norm": 1.301368835884581, "learning_rate": 1.2710922644980638e-05, "loss": 0.6968247890472412, "step": 3638 }, { "epoch": 0.8845405930967428, "grad_norm": 1.404909119274298, "learning_rate": 1.270705350262556e-05, "loss": 0.650873064994812, "step": 3639 }, { "epoch": 0.8847836655323287, "grad_norm": 1.4444328924185488, "learning_rate": 1.2703183922922145e-05, "loss": 0.6028798222541809, "step": 3640 }, { "epoch": 0.8850267379679144, "grad_norm": 1.2281509993108015, "learning_rate": 1.2699313906495559e-05, "loss": 0.5837726593017578, "step": 3641 }, { "epoch": 0.8852698104035003, "grad_norm": 1.5430487447301535, "learning_rate": 1.2695443453971036e-05, "loss": 0.9982308149337769, "step": 3642 }, { "epoch": 0.885512882839086, "grad_norm": 1.4337273706430802, "learning_rate": 1.2691572565973887e-05, "loss": 0.7001684904098511, "step": 3643 }, { "epoch": 0.8857559552746719, "grad_norm": 1.2962758108406276, "learning_rate": 1.2687701243129484e-05, "loss": 0.7449139356613159, "step": 3644 }, { "epoch": 0.8859990277102576, "grad_norm": 1.3963780947985307, "learning_rate": 1.2683829486063275e-05, "loss": 0.7326188683509827, "step": 3645 }, { "epoch": 0.8862421001458435, "grad_norm": 1.7133808848934824, "learning_rate": 1.2679957295400773e-05, "loss": 0.6903387904167175, "step": 3646 }, { "epoch": 0.8864851725814292, "grad_norm": 1.4737014438723524, "learning_rate": 1.2676084671767568e-05, "loss": 0.682774007320404, "step": 3647 }, { "epoch": 0.8867282450170151, "grad_norm": 1.2492335648379946, "learning_rate": 1.2672211615789316e-05, "loss": 0.7150461673736572, "step": 3648 }, { "epoch": 0.8869713174526008, "grad_norm": 1.377531055535527, "learning_rate": 1.2668338128091742e-05, "loss": 0.7912008762359619, "step": 3649 }, { "epoch": 0.8872143898881867, "grad_norm": 1.4990189392756048, "learning_rate": 1.2664464209300642e-05, "loss": 0.7761310338973999, "step": 3650 }, { "epoch": 0.8874574623237725, "grad_norm": 1.3170329241204932, "learning_rate": 1.2660589860041883e-05, "loss": 0.7261946201324463, "step": 3651 }, { "epoch": 0.8877005347593583, "grad_norm": 1.2296058307637785, "learning_rate": 1.2656715080941399e-05, "loss": 0.5663048028945923, "step": 3652 }, { "epoch": 0.8879436071949441, "grad_norm": 1.3716700844962932, "learning_rate": 1.2652839872625198e-05, "loss": 0.5914954543113708, "step": 3653 }, { "epoch": 0.8881866796305299, "grad_norm": 1.2786142743516082, "learning_rate": 1.2648964235719352e-05, "loss": 0.5149909853935242, "step": 3654 }, { "epoch": 0.8884297520661157, "grad_norm": 1.4221900123534053, "learning_rate": 1.2645088170850002e-05, "loss": 0.7158361673355103, "step": 3655 }, { "epoch": 0.8886728245017015, "grad_norm": 1.3110056367466427, "learning_rate": 1.2641211678643362e-05, "loss": 0.5613404512405396, "step": 3656 }, { "epoch": 0.8889158969372873, "grad_norm": 1.3071809051313856, "learning_rate": 1.2637334759725712e-05, "loss": 0.7472246885299683, "step": 3657 }, { "epoch": 0.8891589693728731, "grad_norm": 1.187154709451313, "learning_rate": 1.2633457414723407e-05, "loss": 0.6028858423233032, "step": 3658 }, { "epoch": 0.8894020418084589, "grad_norm": 1.2654273775864533, "learning_rate": 1.262957964426286e-05, "loss": 0.5828535556793213, "step": 3659 }, { "epoch": 0.8896451142440447, "grad_norm": 1.35299212641508, "learning_rate": 1.2625701448970564e-05, "loss": 0.764906108379364, "step": 3660 }, { "epoch": 0.8898881866796305, "grad_norm": 1.6289151647618754, "learning_rate": 1.2621822829473074e-05, "loss": 0.7167148590087891, "step": 3661 }, { "epoch": 0.8901312591152163, "grad_norm": 1.2157533252435564, "learning_rate": 1.2617943786397016e-05, "loss": 0.5135914087295532, "step": 3662 }, { "epoch": 0.8903743315508021, "grad_norm": 1.1591254100448805, "learning_rate": 1.2614064320369082e-05, "loss": 0.4535629153251648, "step": 3663 }, { "epoch": 0.8906174039863879, "grad_norm": 1.2817627370811684, "learning_rate": 1.2610184432016032e-05, "loss": 0.6048192977905273, "step": 3664 }, { "epoch": 0.8908604764219737, "grad_norm": 1.335994444394673, "learning_rate": 1.26063041219647e-05, "loss": 0.7264999747276306, "step": 3665 }, { "epoch": 0.8911035488575596, "grad_norm": 1.2743162460984814, "learning_rate": 1.2602423390841984e-05, "loss": 0.6183239221572876, "step": 3666 }, { "epoch": 0.8913466212931453, "grad_norm": 1.2169491122815637, "learning_rate": 1.2598542239274847e-05, "loss": 0.6721915006637573, "step": 3667 }, { "epoch": 0.8915896937287312, "grad_norm": 1.4680766557601157, "learning_rate": 1.2594660667890328e-05, "loss": 0.7029007077217102, "step": 3668 }, { "epoch": 0.891832766164317, "grad_norm": 1.3774948876524007, "learning_rate": 1.2590778677315525e-05, "loss": 0.4739689528942108, "step": 3669 }, { "epoch": 0.8920758385999028, "grad_norm": 1.3616920019423897, "learning_rate": 1.258689626817761e-05, "loss": 0.7446638345718384, "step": 3670 }, { "epoch": 0.8923189110354885, "grad_norm": 1.4425320469644671, "learning_rate": 1.2583013441103817e-05, "loss": 0.707459568977356, "step": 3671 }, { "epoch": 0.8925619834710744, "grad_norm": 1.3473217471146908, "learning_rate": 1.2579130196721455e-05, "loss": 0.6093310713768005, "step": 3672 }, { "epoch": 0.8928050559066601, "grad_norm": 1.657195489505617, "learning_rate": 1.2575246535657897e-05, "loss": 0.6580034494400024, "step": 3673 }, { "epoch": 0.893048128342246, "grad_norm": 1.2706745793043175, "learning_rate": 1.2571362458540577e-05, "loss": 0.659744143486023, "step": 3674 }, { "epoch": 0.8932912007778318, "grad_norm": 1.2020911603100894, "learning_rate": 1.2567477965997003e-05, "loss": 0.6829582452774048, "step": 3675 }, { "epoch": 0.8935342732134176, "grad_norm": 1.230466103664099, "learning_rate": 1.2563593058654757e-05, "loss": 0.5823167562484741, "step": 3676 }, { "epoch": 0.8937773456490034, "grad_norm": 1.214600709304945, "learning_rate": 1.2559707737141471e-05, "loss": 0.5814991593360901, "step": 3677 }, { "epoch": 0.8940204180845892, "grad_norm": 1.398430766823864, "learning_rate": 1.255582200208486e-05, "loss": 0.7541642189025879, "step": 3678 }, { "epoch": 0.8942634905201751, "grad_norm": 1.425771338776452, "learning_rate": 1.2551935854112689e-05, "loss": 0.6288760304450989, "step": 3679 }, { "epoch": 0.8945065629557608, "grad_norm": 1.335801691097507, "learning_rate": 1.254804929385281e-05, "loss": 0.4935429096221924, "step": 3680 }, { "epoch": 0.8947496353913467, "grad_norm": 1.198420886183355, "learning_rate": 1.2544162321933127e-05, "loss": 0.577339768409729, "step": 3681 }, { "epoch": 0.8949927078269324, "grad_norm": 1.2582121930644758, "learning_rate": 1.2540274938981609e-05, "loss": 0.5944368839263916, "step": 3682 }, { "epoch": 0.8952357802625183, "grad_norm": 1.9193044790871978, "learning_rate": 1.2536387145626302e-05, "loss": 0.7364226579666138, "step": 3683 }, { "epoch": 0.895478852698104, "grad_norm": 1.203788137117401, "learning_rate": 1.2532498942495317e-05, "loss": 0.5209017992019653, "step": 3684 }, { "epoch": 0.8957219251336899, "grad_norm": 1.2108688211545402, "learning_rate": 1.2528610330216824e-05, "loss": 0.620879590511322, "step": 3685 }, { "epoch": 0.8959649975692756, "grad_norm": 1.2946334656015799, "learning_rate": 1.2524721309419058e-05, "loss": 0.6123335957527161, "step": 3686 }, { "epoch": 0.8962080700048615, "grad_norm": 1.179430035889209, "learning_rate": 1.252083188073033e-05, "loss": 0.6950648427009583, "step": 3687 }, { "epoch": 0.8964511424404472, "grad_norm": 1.3190984838500643, "learning_rate": 1.2516942044779012e-05, "loss": 0.5403076410293579, "step": 3688 }, { "epoch": 0.8966942148760331, "grad_norm": 1.4844645010668223, "learning_rate": 1.2513051802193539e-05, "loss": 0.7667686343193054, "step": 3689 }, { "epoch": 0.8969372873116188, "grad_norm": 1.2345342908705117, "learning_rate": 1.2509161153602415e-05, "loss": 0.4646822214126587, "step": 3690 }, { "epoch": 0.8971803597472047, "grad_norm": 1.230589218494095, "learning_rate": 1.2505270099634209e-05, "loss": 0.570980966091156, "step": 3691 }, { "epoch": 0.8974234321827905, "grad_norm": 1.47069351698544, "learning_rate": 1.250137864091755e-05, "loss": 0.8731635212898254, "step": 3692 }, { "epoch": 0.8976665046183763, "grad_norm": 1.284025169044106, "learning_rate": 1.2497486778081145e-05, "loss": 0.7434060573577881, "step": 3693 }, { "epoch": 0.8979095770539621, "grad_norm": 1.7739121338609676, "learning_rate": 1.2493594511753753e-05, "loss": 0.7490396499633789, "step": 3694 }, { "epoch": 0.8981526494895479, "grad_norm": 1.2561922566385846, "learning_rate": 1.2489701842564204e-05, "loss": 0.4983523488044739, "step": 3695 }, { "epoch": 0.8983957219251337, "grad_norm": 1.3398197991860126, "learning_rate": 1.2485808771141397e-05, "loss": 0.6540018916130066, "step": 3696 }, { "epoch": 0.8986387943607195, "grad_norm": 1.3815566619999848, "learning_rate": 1.248191529811429e-05, "loss": 0.7271016836166382, "step": 3697 }, { "epoch": 0.8988818667963053, "grad_norm": 1.771879206869176, "learning_rate": 1.247802142411191e-05, "loss": 0.6550743579864502, "step": 3698 }, { "epoch": 0.8991249392318911, "grad_norm": 1.377048281414995, "learning_rate": 1.2474127149763345e-05, "loss": 0.6715385913848877, "step": 3699 }, { "epoch": 0.8993680116674769, "grad_norm": 1.3010559935036925, "learning_rate": 1.2470232475697744e-05, "loss": 0.646979808807373, "step": 3700 }, { "epoch": 0.8996110841030627, "grad_norm": 1.2954726766251263, "learning_rate": 1.2466337402544333e-05, "loss": 0.5484892129898071, "step": 3701 }, { "epoch": 0.8998541565386485, "grad_norm": 1.481711449264923, "learning_rate": 1.2462441930932394e-05, "loss": 0.6565549373626709, "step": 3702 }, { "epoch": 0.9000972289742343, "grad_norm": 1.3266041739123702, "learning_rate": 1.2458546061491275e-05, "loss": 0.5821484327316284, "step": 3703 }, { "epoch": 0.9003403014098201, "grad_norm": 1.4628564940638138, "learning_rate": 1.2454649794850381e-05, "loss": 0.7788103818893433, "step": 3704 }, { "epoch": 0.9005833738454059, "grad_norm": 1.2023278226836809, "learning_rate": 1.2450753131639197e-05, "loss": 0.6249845027923584, "step": 3705 }, { "epoch": 0.9008264462809917, "grad_norm": 1.7396266589626788, "learning_rate": 1.2446856072487263e-05, "loss": 0.6508196592330933, "step": 3706 }, { "epoch": 0.9010695187165776, "grad_norm": 1.3641168508107044, "learning_rate": 1.2442958618024179e-05, "loss": 0.5786810517311096, "step": 3707 }, { "epoch": 0.9013125911521633, "grad_norm": 1.366013478760362, "learning_rate": 1.2439060768879614e-05, "loss": 0.6034128069877625, "step": 3708 }, { "epoch": 0.9015556635877492, "grad_norm": 1.5418951769682256, "learning_rate": 1.2435162525683303e-05, "loss": 0.6922034621238708, "step": 3709 }, { "epoch": 0.9017987360233349, "grad_norm": 1.3638904984642215, "learning_rate": 1.2431263889065036e-05, "loss": 0.6901249885559082, "step": 3710 }, { "epoch": 0.9020418084589208, "grad_norm": 1.242432035899852, "learning_rate": 1.2427364859654675e-05, "loss": 0.6590898036956787, "step": 3711 }, { "epoch": 0.9022848808945065, "grad_norm": 1.2580645437097582, "learning_rate": 1.242346543808214e-05, "loss": 0.6105222702026367, "step": 3712 }, { "epoch": 0.9025279533300924, "grad_norm": 1.4765431704286938, "learning_rate": 1.2419565624977426e-05, "loss": 0.7244282960891724, "step": 3713 }, { "epoch": 0.9027710257656781, "grad_norm": 1.2292942674191802, "learning_rate": 1.241566542097057e-05, "loss": 0.6703225374221802, "step": 3714 }, { "epoch": 0.903014098201264, "grad_norm": 1.0413791149395923, "learning_rate": 1.2411764826691695e-05, "loss": 0.6483434438705444, "step": 3715 }, { "epoch": 0.9032571706368497, "grad_norm": 1.840766800883894, "learning_rate": 1.2407863842770968e-05, "loss": 0.6977413892745972, "step": 3716 }, { "epoch": 0.9035002430724356, "grad_norm": 1.5574751044345785, "learning_rate": 1.2403962469838633e-05, "loss": 0.6872271299362183, "step": 3717 }, { "epoch": 0.9037433155080213, "grad_norm": 1.1391701281497233, "learning_rate": 1.2400060708524985e-05, "loss": 0.7349467873573303, "step": 3718 }, { "epoch": 0.9039863879436072, "grad_norm": 1.245937725009586, "learning_rate": 1.2396158559460396e-05, "loss": 0.4877455234527588, "step": 3719 }, { "epoch": 0.904229460379193, "grad_norm": 1.341358794338902, "learning_rate": 1.2392256023275284e-05, "loss": 0.7526109218597412, "step": 3720 }, { "epoch": 0.9044725328147788, "grad_norm": 1.4297657151210932, "learning_rate": 1.2388353100600143e-05, "loss": 0.6387484073638916, "step": 3721 }, { "epoch": 0.9047156052503647, "grad_norm": 1.2781192350742763, "learning_rate": 1.2384449792065527e-05, "loss": 0.566352367401123, "step": 3722 }, { "epoch": 0.9049586776859504, "grad_norm": 1.1307339715529257, "learning_rate": 1.2380546098302045e-05, "loss": 0.48210299015045166, "step": 3723 }, { "epoch": 0.9052017501215363, "grad_norm": 1.3477982795032344, "learning_rate": 1.2376642019940373e-05, "loss": 0.7133333683013916, "step": 3724 }, { "epoch": 0.905444822557122, "grad_norm": 1.30688718720159, "learning_rate": 1.2372737557611256e-05, "loss": 0.49489104747772217, "step": 3725 }, { "epoch": 0.9056878949927079, "grad_norm": 1.998131334136859, "learning_rate": 1.2368832711945487e-05, "loss": 0.6999320387840271, "step": 3726 }, { "epoch": 0.9059309674282936, "grad_norm": 1.3214383932933351, "learning_rate": 1.236492748357393e-05, "loss": 0.5510645508766174, "step": 3727 }, { "epoch": 0.9061740398638795, "grad_norm": 1.2868763218882886, "learning_rate": 1.2361021873127508e-05, "loss": 0.5975680351257324, "step": 3728 }, { "epoch": 0.9064171122994652, "grad_norm": 1.3250619115975497, "learning_rate": 1.235711588123721e-05, "loss": 0.7784155607223511, "step": 3729 }, { "epoch": 0.9066601847350511, "grad_norm": 1.2053422785952659, "learning_rate": 1.235320950853408e-05, "loss": 0.4920244812965393, "step": 3730 }, { "epoch": 0.9069032571706368, "grad_norm": 1.5077371676736677, "learning_rate": 1.234930275564923e-05, "loss": 0.6581258773803711, "step": 3731 }, { "epoch": 0.9071463296062227, "grad_norm": 1.14353373767442, "learning_rate": 1.2345395623213829e-05, "loss": 0.5621598362922668, "step": 3732 }, { "epoch": 0.9073894020418085, "grad_norm": 1.3877441711045886, "learning_rate": 1.2341488111859111e-05, "loss": 0.8193432092666626, "step": 3733 }, { "epoch": 0.9076324744773943, "grad_norm": 1.3546572764123948, "learning_rate": 1.2337580222216365e-05, "loss": 0.7565375566482544, "step": 3734 }, { "epoch": 0.9078755469129801, "grad_norm": 1.2822554589680821, "learning_rate": 1.2333671954916946e-05, "loss": 0.6280151605606079, "step": 3735 }, { "epoch": 0.9081186193485659, "grad_norm": 1.2268731447534664, "learning_rate": 1.2329763310592271e-05, "loss": 0.46533429622650146, "step": 3736 }, { "epoch": 0.9083616917841517, "grad_norm": 1.2505314829524006, "learning_rate": 1.2325854289873812e-05, "loss": 0.745795726776123, "step": 3737 }, { "epoch": 0.9086047642197375, "grad_norm": 1.2043546969373735, "learning_rate": 1.2321944893393107e-05, "loss": 0.5901858806610107, "step": 3738 }, { "epoch": 0.9088478366553233, "grad_norm": 1.2183021433458487, "learning_rate": 1.2318035121781757e-05, "loss": 0.7998614311218262, "step": 3739 }, { "epoch": 0.9090909090909091, "grad_norm": 1.2971440646102637, "learning_rate": 1.231412497567142e-05, "loss": 0.6108332872390747, "step": 3740 }, { "epoch": 0.9093339815264949, "grad_norm": 1.5958906386860903, "learning_rate": 1.2310214455693808e-05, "loss": 0.6883814334869385, "step": 3741 }, { "epoch": 0.9095770539620807, "grad_norm": 1.2664485565750203, "learning_rate": 1.2306303562480703e-05, "loss": 0.7229781150817871, "step": 3742 }, { "epoch": 0.9098201263976665, "grad_norm": 1.6384933863246776, "learning_rate": 1.230239229666395e-05, "loss": 0.6433151364326477, "step": 3743 }, { "epoch": 0.9100631988332523, "grad_norm": 1.4086917715425298, "learning_rate": 1.2298480658875445e-05, "loss": 0.7177548408508301, "step": 3744 }, { "epoch": 0.9103062712688381, "grad_norm": 1.4306573186185683, "learning_rate": 1.229456864974714e-05, "loss": 0.6601063013076782, "step": 3745 }, { "epoch": 0.9105493437044239, "grad_norm": 1.343553613144654, "learning_rate": 1.2290656269911066e-05, "loss": 0.5672842860221863, "step": 3746 }, { "epoch": 0.9107924161400097, "grad_norm": 1.333111404913533, "learning_rate": 1.2286743519999296e-05, "loss": 0.5683232545852661, "step": 3747 }, { "epoch": 0.9110354885755956, "grad_norm": 1.1586169955824797, "learning_rate": 1.2282830400643971e-05, "loss": 0.6230043768882751, "step": 3748 }, { "epoch": 0.9112785610111813, "grad_norm": 1.4946104442184287, "learning_rate": 1.2278916912477285e-05, "loss": 0.654283881187439, "step": 3749 }, { "epoch": 0.9115216334467672, "grad_norm": 1.1879966163423132, "learning_rate": 1.2275003056131505e-05, "loss": 0.5203551054000854, "step": 3750 }, { "epoch": 0.9117647058823529, "grad_norm": 1.4800888348493166, "learning_rate": 1.2271088832238945e-05, "loss": 0.7152693271636963, "step": 3751 }, { "epoch": 0.9120077783179388, "grad_norm": 1.6002672695006233, "learning_rate": 1.226717424143198e-05, "loss": 0.6060491800308228, "step": 3752 }, { "epoch": 0.9122508507535245, "grad_norm": 1.418774919647667, "learning_rate": 1.2263259284343053e-05, "loss": 0.7533012628555298, "step": 3753 }, { "epoch": 0.9124939231891104, "grad_norm": 1.6332438142431263, "learning_rate": 1.225934396160465e-05, "loss": 0.7625036239624023, "step": 3754 }, { "epoch": 0.9127369956246961, "grad_norm": 1.2225716646119176, "learning_rate": 1.2255428273849334e-05, "loss": 0.6789162755012512, "step": 3755 }, { "epoch": 0.912980068060282, "grad_norm": 1.2838140264947664, "learning_rate": 1.2251512221709718e-05, "loss": 0.537168025970459, "step": 3756 }, { "epoch": 0.9132231404958677, "grad_norm": 1.565885543217028, "learning_rate": 1.2247595805818471e-05, "loss": 0.6169619560241699, "step": 3757 }, { "epoch": 0.9134662129314536, "grad_norm": 1.1548643303191275, "learning_rate": 1.2243679026808325e-05, "loss": 0.6360597610473633, "step": 3758 }, { "epoch": 0.9137092853670393, "grad_norm": 1.3878923706777822, "learning_rate": 1.2239761885312076e-05, "loss": 0.6450039148330688, "step": 3759 }, { "epoch": 0.9139523578026252, "grad_norm": 1.3255274373038577, "learning_rate": 1.2235844381962565e-05, "loss": 0.6109063625335693, "step": 3760 }, { "epoch": 0.914195430238211, "grad_norm": 1.3944298131793282, "learning_rate": 1.2231926517392705e-05, "loss": 0.6002954244613647, "step": 3761 }, { "epoch": 0.9144385026737968, "grad_norm": 1.3643003447886066, "learning_rate": 1.2228008292235458e-05, "loss": 0.6377880573272705, "step": 3762 }, { "epoch": 0.9146815751093826, "grad_norm": 1.5434967442447838, "learning_rate": 1.222408970712385e-05, "loss": 0.6412267684936523, "step": 3763 }, { "epoch": 0.9149246475449684, "grad_norm": 1.2051289269167145, "learning_rate": 1.222017076269096e-05, "loss": 0.6701064705848694, "step": 3764 }, { "epoch": 0.9151677199805542, "grad_norm": 1.3029086536304417, "learning_rate": 1.2216251459569927e-05, "loss": 0.5844871997833252, "step": 3765 }, { "epoch": 0.91541079241614, "grad_norm": 1.325752365859962, "learning_rate": 1.2212331798393955e-05, "loss": 0.790337324142456, "step": 3766 }, { "epoch": 0.9156538648517258, "grad_norm": 1.2665510414445937, "learning_rate": 1.2208411779796297e-05, "loss": 0.6317154169082642, "step": 3767 }, { "epoch": 0.9158969372873116, "grad_norm": 1.3307764026281266, "learning_rate": 1.2204491404410265e-05, "loss": 0.5990461111068726, "step": 3768 }, { "epoch": 0.9161400097228974, "grad_norm": 1.1958813825031505, "learning_rate": 1.2200570672869232e-05, "loss": 0.5005378723144531, "step": 3769 }, { "epoch": 0.9163830821584832, "grad_norm": 1.3306756951706584, "learning_rate": 1.2196649585806626e-05, "loss": 0.5790282487869263, "step": 3770 }, { "epoch": 0.916626154594069, "grad_norm": 1.27199945853749, "learning_rate": 1.2192728143855931e-05, "loss": 0.6745859384536743, "step": 3771 }, { "epoch": 0.9168692270296548, "grad_norm": 2.1269307540443996, "learning_rate": 1.2188806347650696e-05, "loss": 0.7057361602783203, "step": 3772 }, { "epoch": 0.9171122994652406, "grad_norm": 1.3556489144111885, "learning_rate": 1.2184884197824513e-05, "loss": 0.5332280397415161, "step": 3773 }, { "epoch": 0.9173553719008265, "grad_norm": 1.4768209890766575, "learning_rate": 1.2180961695011049e-05, "loss": 0.8557421565055847, "step": 3774 }, { "epoch": 0.9175984443364122, "grad_norm": 1.1919534757730155, "learning_rate": 1.2177038839844015e-05, "loss": 0.5733343362808228, "step": 3775 }, { "epoch": 0.9178415167719981, "grad_norm": 4.459068858603627, "learning_rate": 1.2173115632957182e-05, "loss": 0.6332818269729614, "step": 3776 }, { "epoch": 0.9180845892075838, "grad_norm": 1.2359104997872483, "learning_rate": 1.216919207498438e-05, "loss": 0.6219059228897095, "step": 3777 }, { "epoch": 0.9183276616431697, "grad_norm": 1.2276501473465211, "learning_rate": 1.2165268166559493e-05, "loss": 0.714837908744812, "step": 3778 }, { "epoch": 0.9185707340787554, "grad_norm": 1.8420642631480595, "learning_rate": 1.2161343908316465e-05, "loss": 0.7665092945098877, "step": 3779 }, { "epoch": 0.9188138065143413, "grad_norm": 1.4300940200691064, "learning_rate": 1.2157419300889293e-05, "loss": 0.6148480176925659, "step": 3780 }, { "epoch": 0.919056878949927, "grad_norm": 1.389441407415601, "learning_rate": 1.2153494344912031e-05, "loss": 0.6692731380462646, "step": 3781 }, { "epoch": 0.9192999513855129, "grad_norm": 1.3301174818693604, "learning_rate": 1.214956904101879e-05, "loss": 0.7324767112731934, "step": 3782 }, { "epoch": 0.9195430238210986, "grad_norm": 1.311353943304261, "learning_rate": 1.214564338984374e-05, "loss": 0.46852701902389526, "step": 3783 }, { "epoch": 0.9197860962566845, "grad_norm": 1.3957516565650339, "learning_rate": 1.2141717392021104e-05, "loss": 0.646340012550354, "step": 3784 }, { "epoch": 0.9200291686922702, "grad_norm": 1.4539204666013008, "learning_rate": 1.2137791048185161e-05, "loss": 0.8532063961029053, "step": 3785 }, { "epoch": 0.9202722411278561, "grad_norm": 1.3732302971071688, "learning_rate": 1.2133864358970245e-05, "loss": 0.7229231595993042, "step": 3786 }, { "epoch": 0.9205153135634419, "grad_norm": 1.3047625559630718, "learning_rate": 1.2129937325010748e-05, "loss": 0.41690593957901, "step": 3787 }, { "epoch": 0.9207583859990277, "grad_norm": 1.3187564209520906, "learning_rate": 1.212600994694112e-05, "loss": 0.7222840189933777, "step": 3788 }, { "epoch": 0.9210014584346136, "grad_norm": 1.5706499682257533, "learning_rate": 1.212208222539586e-05, "loss": 0.629130482673645, "step": 3789 }, { "epoch": 0.9212445308701993, "grad_norm": 1.5241345720823152, "learning_rate": 1.2118154161009523e-05, "loss": 0.6864324808120728, "step": 3790 }, { "epoch": 0.9214876033057852, "grad_norm": 1.273948577786055, "learning_rate": 1.2114225754416728e-05, "loss": 0.5567405819892883, "step": 3791 }, { "epoch": 0.9217306757413709, "grad_norm": 1.4392310907630796, "learning_rate": 1.2110297006252145e-05, "loss": 0.587997317314148, "step": 3792 }, { "epoch": 0.9219737481769568, "grad_norm": 1.5551709822263606, "learning_rate": 1.2106367917150492e-05, "loss": 0.7278091907501221, "step": 3793 }, { "epoch": 0.9222168206125425, "grad_norm": 1.3618952229417542, "learning_rate": 1.210243848774655e-05, "loss": 0.5677658915519714, "step": 3794 }, { "epoch": 0.9224598930481284, "grad_norm": 1.5133534547402339, "learning_rate": 1.2098508718675156e-05, "loss": 0.6326487064361572, "step": 3795 }, { "epoch": 0.9227029654837141, "grad_norm": 1.028706340778269, "learning_rate": 1.2094578610571197e-05, "loss": 0.7757592797279358, "step": 3796 }, { "epoch": 0.9229460379193, "grad_norm": 1.1839967529306707, "learning_rate": 1.2090648164069613e-05, "loss": 0.6925965547561646, "step": 3797 }, { "epoch": 0.9231891103548857, "grad_norm": 1.3292284688709444, "learning_rate": 1.2086717379805408e-05, "loss": 0.6937450170516968, "step": 3798 }, { "epoch": 0.9234321827904716, "grad_norm": 1.5579474079044247, "learning_rate": 1.2082786258413632e-05, "loss": 0.6571778655052185, "step": 3799 }, { "epoch": 0.9236752552260573, "grad_norm": 1.5229778001192957, "learning_rate": 1.2078854800529393e-05, "loss": 0.8948364853858948, "step": 3800 }, { "epoch": 0.9239183276616432, "grad_norm": 1.2151430421511487, "learning_rate": 1.207492300678785e-05, "loss": 0.5635933876037598, "step": 3801 }, { "epoch": 0.924161400097229, "grad_norm": 1.2221746578177497, "learning_rate": 1.207099087782422e-05, "loss": 0.5723222494125366, "step": 3802 }, { "epoch": 0.9244044725328148, "grad_norm": 1.5389773620911713, "learning_rate": 1.2067058414273776e-05, "loss": 0.605961799621582, "step": 3803 }, { "epoch": 0.9246475449684006, "grad_norm": 1.0480831871545713, "learning_rate": 1.206312561677184e-05, "loss": 0.6015462875366211, "step": 3804 }, { "epoch": 0.9248906174039864, "grad_norm": 1.3808258897548187, "learning_rate": 1.205919248595379e-05, "loss": 0.6609705686569214, "step": 3805 }, { "epoch": 0.9251336898395722, "grad_norm": 1.318805128225778, "learning_rate": 1.205525902245506e-05, "loss": 0.6242004036903381, "step": 3806 }, { "epoch": 0.925376762275158, "grad_norm": 1.2409174492521329, "learning_rate": 1.2051325226911132e-05, "loss": 0.6498847603797913, "step": 3807 }, { "epoch": 0.9256198347107438, "grad_norm": 1.5233170939302418, "learning_rate": 1.2047391099957548e-05, "loss": 0.5552986860275269, "step": 3808 }, { "epoch": 0.9258629071463296, "grad_norm": 1.7010716013503244, "learning_rate": 1.2043456642229898e-05, "loss": 0.787147581577301, "step": 3809 }, { "epoch": 0.9261059795819154, "grad_norm": 1.4144700625029365, "learning_rate": 1.2039521854363829e-05, "loss": 0.8662188053131104, "step": 3810 }, { "epoch": 0.9263490520175012, "grad_norm": 1.1748863824229963, "learning_rate": 1.2035586736995047e-05, "loss": 0.8005936145782471, "step": 3811 }, { "epoch": 0.926592124453087, "grad_norm": 1.3338308509629366, "learning_rate": 1.2031651290759297e-05, "loss": 0.6617075800895691, "step": 3812 }, { "epoch": 0.9268351968886728, "grad_norm": 1.2828909008190659, "learning_rate": 1.2027715516292386e-05, "loss": 0.5126579403877258, "step": 3813 }, { "epoch": 0.9270782693242586, "grad_norm": 1.4327106372152543, "learning_rate": 1.202377941423018e-05, "loss": 0.5949416756629944, "step": 3814 }, { "epoch": 0.9273213417598445, "grad_norm": 1.2742292212347035, "learning_rate": 1.2019842985208581e-05, "loss": 0.654801607131958, "step": 3815 }, { "epoch": 0.9275644141954302, "grad_norm": 1.2983833852254811, "learning_rate": 1.201590622986356e-05, "loss": 0.6800340414047241, "step": 3816 }, { "epoch": 0.9278074866310161, "grad_norm": 1.40912751356604, "learning_rate": 1.2011969148831136e-05, "loss": 0.687713623046875, "step": 3817 }, { "epoch": 0.9280505590666018, "grad_norm": 1.3028995688419946, "learning_rate": 1.2008031742747373e-05, "loss": 0.4550766348838806, "step": 3818 }, { "epoch": 0.9282936315021877, "grad_norm": 1.2032033043934025, "learning_rate": 1.2004094012248396e-05, "loss": 0.5933477878570557, "step": 3819 }, { "epoch": 0.9285367039377734, "grad_norm": 1.557740277716701, "learning_rate": 1.2000155957970381e-05, "loss": 0.6672928929328918, "step": 3820 }, { "epoch": 0.9287797763733593, "grad_norm": 1.3703918538138247, "learning_rate": 1.1996217580549557e-05, "loss": 0.54453045129776, "step": 3821 }, { "epoch": 0.929022848808945, "grad_norm": 1.322745165136018, "learning_rate": 1.1992278880622198e-05, "loss": 0.560180127620697, "step": 3822 }, { "epoch": 0.9292659212445309, "grad_norm": 1.3756856696844402, "learning_rate": 1.1988339858824645e-05, "loss": 0.6178858876228333, "step": 3823 }, { "epoch": 0.9295089936801166, "grad_norm": 1.7245860258515764, "learning_rate": 1.1984400515793273e-05, "loss": 0.8331149816513062, "step": 3824 }, { "epoch": 0.9297520661157025, "grad_norm": 1.1784018028362082, "learning_rate": 1.1980460852164522e-05, "loss": 0.6358345746994019, "step": 3825 }, { "epoch": 0.9299951385512882, "grad_norm": 1.272786574733114, "learning_rate": 1.1976520868574877e-05, "loss": 0.6120679378509521, "step": 3826 }, { "epoch": 0.9302382109868741, "grad_norm": 1.347646979743435, "learning_rate": 1.1972580565660875e-05, "loss": 0.6302683353424072, "step": 3827 }, { "epoch": 0.93048128342246, "grad_norm": 1.403747132432601, "learning_rate": 1.1968639944059115e-05, "loss": 0.8250715732574463, "step": 3828 }, { "epoch": 0.9307243558580457, "grad_norm": 1.5119269056017333, "learning_rate": 1.1964699004406231e-05, "loss": 0.5961302518844604, "step": 3829 }, { "epoch": 0.9309674282936315, "grad_norm": 1.1601368373350616, "learning_rate": 1.196075774733892e-05, "loss": 0.6619002819061279, "step": 3830 }, { "epoch": 0.9312105007292173, "grad_norm": 1.5064991516925386, "learning_rate": 1.1956816173493924e-05, "loss": 0.7387260794639587, "step": 3831 }, { "epoch": 0.9314535731648032, "grad_norm": 1.404252742153749, "learning_rate": 1.1952874283508042e-05, "loss": 0.7702144980430603, "step": 3832 }, { "epoch": 0.9316966456003889, "grad_norm": 1.358205875823311, "learning_rate": 1.1948932078018125e-05, "loss": 0.6170068979263306, "step": 3833 }, { "epoch": 0.9319397180359748, "grad_norm": 1.5243085684530153, "learning_rate": 1.1944989557661063e-05, "loss": 0.5774174928665161, "step": 3834 }, { "epoch": 0.9321827904715605, "grad_norm": 1.5255375931125754, "learning_rate": 1.1941046723073806e-05, "loss": 0.6178733110427856, "step": 3835 }, { "epoch": 0.9324258629071464, "grad_norm": 1.5564224537389872, "learning_rate": 1.1937103574893356e-05, "loss": 0.6372858881950378, "step": 3836 }, { "epoch": 0.9326689353427321, "grad_norm": 1.3600476258891392, "learning_rate": 1.1933160113756764e-05, "loss": 0.738500714302063, "step": 3837 }, { "epoch": 0.932912007778318, "grad_norm": 1.5252807005803468, "learning_rate": 1.1929216340301131e-05, "loss": 0.6160486936569214, "step": 3838 }, { "epoch": 0.9331550802139037, "grad_norm": 1.5421508420737708, "learning_rate": 1.1925272255163604e-05, "loss": 0.8352925777435303, "step": 3839 }, { "epoch": 0.9333981526494896, "grad_norm": 1.3053940633187433, "learning_rate": 1.1921327858981392e-05, "loss": 0.5799093842506409, "step": 3840 }, { "epoch": 0.9336412250850753, "grad_norm": 1.4411659492850784, "learning_rate": 1.191738315239174e-05, "loss": 0.6046398878097534, "step": 3841 }, { "epoch": 0.9338842975206612, "grad_norm": 1.5850564253466222, "learning_rate": 1.1913438136031954e-05, "loss": 0.7190558910369873, "step": 3842 }, { "epoch": 0.934127369956247, "grad_norm": 1.1247624828894123, "learning_rate": 1.1909492810539385e-05, "loss": 0.46794143319129944, "step": 3843 }, { "epoch": 0.9343704423918328, "grad_norm": 1.1189623490445673, "learning_rate": 1.1905547176551436e-05, "loss": 0.695636510848999, "step": 3844 }, { "epoch": 0.9346135148274186, "grad_norm": 1.3626648873944602, "learning_rate": 1.1901601234705556e-05, "loss": 0.48845094442367554, "step": 3845 }, { "epoch": 0.9348565872630044, "grad_norm": 1.342881044763775, "learning_rate": 1.1897654985639249e-05, "loss": 0.6462626457214355, "step": 3846 }, { "epoch": 0.9350996596985902, "grad_norm": 1.396843222507574, "learning_rate": 1.1893708429990065e-05, "loss": 0.6835171580314636, "step": 3847 }, { "epoch": 0.935342732134176, "grad_norm": 1.2647689466903531, "learning_rate": 1.1889761568395609e-05, "loss": 0.7018706798553467, "step": 3848 }, { "epoch": 0.9355858045697618, "grad_norm": 1.3927178337941972, "learning_rate": 1.1885814401493524e-05, "loss": 0.7817326784133911, "step": 3849 }, { "epoch": 0.9358288770053476, "grad_norm": 1.119379040943024, "learning_rate": 1.1881866929921519e-05, "loss": 0.5739582777023315, "step": 3850 }, { "epoch": 0.9360719494409334, "grad_norm": 1.348809269534926, "learning_rate": 1.1877919154317335e-05, "loss": 0.7580307126045227, "step": 3851 }, { "epoch": 0.9363150218765192, "grad_norm": 1.4648658564674435, "learning_rate": 1.187397107531877e-05, "loss": 0.609451413154602, "step": 3852 }, { "epoch": 0.936558094312105, "grad_norm": 1.0855005949250982, "learning_rate": 1.187002269356368e-05, "loss": 0.6423404216766357, "step": 3853 }, { "epoch": 0.9368011667476908, "grad_norm": 1.2904689102265072, "learning_rate": 1.1866074009689947e-05, "loss": 0.5417962670326233, "step": 3854 }, { "epoch": 0.9370442391832766, "grad_norm": 1.326623462819545, "learning_rate": 1.1862125024335527e-05, "loss": 0.6859824061393738, "step": 3855 }, { "epoch": 0.9372873116188625, "grad_norm": 1.3386852469261277, "learning_rate": 1.1858175738138404e-05, "loss": 0.6482005715370178, "step": 3856 }, { "epoch": 0.9375303840544482, "grad_norm": 1.2448366565760762, "learning_rate": 1.185422615173663e-05, "loss": 0.6037243604660034, "step": 3857 }, { "epoch": 0.9377734564900341, "grad_norm": 1.3551878188662192, "learning_rate": 1.185027626576829e-05, "loss": 0.7097839117050171, "step": 3858 }, { "epoch": 0.9380165289256198, "grad_norm": 1.478051164430107, "learning_rate": 1.1846326080871524e-05, "loss": 0.5359722375869751, "step": 3859 }, { "epoch": 0.9382596013612057, "grad_norm": 1.267671441193548, "learning_rate": 1.1842375597684517e-05, "loss": 0.7998456954956055, "step": 3860 }, { "epoch": 0.9385026737967914, "grad_norm": 1.2012192704710585, "learning_rate": 1.1838424816845507e-05, "loss": 0.5852420926094055, "step": 3861 }, { "epoch": 0.9387457462323773, "grad_norm": 1.2585076687437258, "learning_rate": 1.1834473738992776e-05, "loss": 0.544956624507904, "step": 3862 }, { "epoch": 0.938988818667963, "grad_norm": 1.4659892372804932, "learning_rate": 1.1830522364764655e-05, "loss": 0.5756899118423462, "step": 3863 }, { "epoch": 0.9392318911035489, "grad_norm": 1.270915477878423, "learning_rate": 1.1826570694799525e-05, "loss": 0.7361704111099243, "step": 3864 }, { "epoch": 0.9394749635391346, "grad_norm": 1.3572583365270463, "learning_rate": 1.1822618729735813e-05, "loss": 0.7420721054077148, "step": 3865 }, { "epoch": 0.9397180359747205, "grad_norm": 1.4399378136408592, "learning_rate": 1.1818666470211992e-05, "loss": 0.6466599702835083, "step": 3866 }, { "epoch": 0.9399611084103062, "grad_norm": 1.3577966920268008, "learning_rate": 1.1814713916866585e-05, "loss": 0.6546827554702759, "step": 3867 }, { "epoch": 0.9402041808458921, "grad_norm": 1.2775322357086942, "learning_rate": 1.1810761070338164e-05, "loss": 0.7277987003326416, "step": 3868 }, { "epoch": 0.9404472532814779, "grad_norm": 1.2877090894894894, "learning_rate": 1.1806807931265344e-05, "loss": 0.5276011824607849, "step": 3869 }, { "epoch": 0.9406903257170637, "grad_norm": 1.136991889435165, "learning_rate": 1.1802854500286789e-05, "loss": 0.6479870080947876, "step": 3870 }, { "epoch": 0.9409333981526495, "grad_norm": 1.2767232167877098, "learning_rate": 1.179890077804121e-05, "loss": 0.6363948583602905, "step": 3871 }, { "epoch": 0.9411764705882353, "grad_norm": 1.0839876492957896, "learning_rate": 1.1794946765167367e-05, "loss": 0.5905882716178894, "step": 3872 }, { "epoch": 0.9414195430238211, "grad_norm": 1.3023355216078603, "learning_rate": 1.1790992462304066e-05, "loss": 0.5328924655914307, "step": 3873 }, { "epoch": 0.9416626154594069, "grad_norm": 1.2667416572960462, "learning_rate": 1.1787037870090162e-05, "loss": 0.8754028081893921, "step": 3874 }, { "epoch": 0.9419056878949927, "grad_norm": 1.3528405116016795, "learning_rate": 1.1783082989164549e-05, "loss": 0.6747527122497559, "step": 3875 }, { "epoch": 0.9421487603305785, "grad_norm": 1.2553631006605988, "learning_rate": 1.1779127820166174e-05, "loss": 0.67654949426651, "step": 3876 }, { "epoch": 0.9423918327661643, "grad_norm": 1.1385564413761056, "learning_rate": 1.177517236373403e-05, "loss": 0.5546597242355347, "step": 3877 }, { "epoch": 0.9426349052017501, "grad_norm": 1.4213326702024087, "learning_rate": 1.1771216620507162e-05, "loss": 0.7606573104858398, "step": 3878 }, { "epoch": 0.9428779776373359, "grad_norm": 1.3918933609677475, "learning_rate": 1.1767260591124645e-05, "loss": 0.44104301929473877, "step": 3879 }, { "epoch": 0.9431210500729217, "grad_norm": 1.2382543871558582, "learning_rate": 1.1763304276225615e-05, "loss": 0.5462988615036011, "step": 3880 }, { "epoch": 0.9433641225085075, "grad_norm": 1.3687650890342098, "learning_rate": 1.175934767644925e-05, "loss": 0.6231358647346497, "step": 3881 }, { "epoch": 0.9436071949440933, "grad_norm": 1.4695916393411357, "learning_rate": 1.1755390792434772e-05, "loss": 0.6573282480239868, "step": 3882 }, { "epoch": 0.9438502673796791, "grad_norm": 1.3015496927251577, "learning_rate": 1.1751433624821454e-05, "loss": 0.4887711703777313, "step": 3883 }, { "epoch": 0.944093339815265, "grad_norm": 1.3424812420956884, "learning_rate": 1.1747476174248604e-05, "loss": 0.6416893005371094, "step": 3884 }, { "epoch": 0.9443364122508507, "grad_norm": 1.499771260172155, "learning_rate": 1.174351844135559e-05, "loss": 0.6508835554122925, "step": 3885 }, { "epoch": 0.9445794846864366, "grad_norm": 1.3436129223817685, "learning_rate": 1.1739560426781817e-05, "loss": 0.6339862942695618, "step": 3886 }, { "epoch": 0.9448225571220223, "grad_norm": 1.4463478803264567, "learning_rate": 1.1735602131166739e-05, "loss": 0.5621591806411743, "step": 3887 }, { "epoch": 0.9450656295576082, "grad_norm": 1.5929424571800412, "learning_rate": 1.1731643555149845e-05, "loss": 0.48212969303131104, "step": 3888 }, { "epoch": 0.945308701993194, "grad_norm": 1.4613981418431494, "learning_rate": 1.1727684699370689e-05, "loss": 0.6654260754585266, "step": 3889 }, { "epoch": 0.9455517744287798, "grad_norm": 1.2748881395118832, "learning_rate": 1.172372556446885e-05, "loss": 0.6456059217453003, "step": 3890 }, { "epoch": 0.9457948468643655, "grad_norm": 1.545766151896617, "learning_rate": 1.1719766151083967e-05, "loss": 0.7229812145233154, "step": 3891 }, { "epoch": 0.9460379192999514, "grad_norm": 1.435177255351062, "learning_rate": 1.1715806459855717e-05, "loss": 0.6412845849990845, "step": 3892 }, { "epoch": 0.9462809917355371, "grad_norm": 1.4840240494310852, "learning_rate": 1.1711846491423818e-05, "loss": 0.6979078650474548, "step": 3893 }, { "epoch": 0.946524064171123, "grad_norm": 1.2958446260013154, "learning_rate": 1.1707886246428044e-05, "loss": 0.6121431589126587, "step": 3894 }, { "epoch": 0.9467671366067087, "grad_norm": 1.283325173411022, "learning_rate": 1.1703925725508205e-05, "loss": 0.6483502388000488, "step": 3895 }, { "epoch": 0.9470102090422946, "grad_norm": 1.5713373312050276, "learning_rate": 1.169996492930416e-05, "loss": 0.7241339683532715, "step": 3896 }, { "epoch": 0.9472532814778805, "grad_norm": 1.3243922710388287, "learning_rate": 1.1696003858455804e-05, "loss": 0.727902889251709, "step": 3897 }, { "epoch": 0.9474963539134662, "grad_norm": 1.412659473472214, "learning_rate": 1.1692042513603092e-05, "loss": 0.7113755941390991, "step": 3898 }, { "epoch": 0.9477394263490521, "grad_norm": 1.2393950733966728, "learning_rate": 1.1688080895386009e-05, "loss": 0.5563077926635742, "step": 3899 }, { "epoch": 0.9479824987846378, "grad_norm": 1.2127710094475117, "learning_rate": 1.168411900444459e-05, "loss": 0.5203859210014343, "step": 3900 }, { "epoch": 0.9482255712202237, "grad_norm": 1.2567508033329202, "learning_rate": 1.1680156841418911e-05, "loss": 0.6164199113845825, "step": 3901 }, { "epoch": 0.9484686436558094, "grad_norm": 1.4173718855947963, "learning_rate": 1.1676194406949097e-05, "loss": 0.6559708118438721, "step": 3902 }, { "epoch": 0.9487117160913953, "grad_norm": 1.3281842206349503, "learning_rate": 1.1672231701675317e-05, "loss": 0.6595553755760193, "step": 3903 }, { "epoch": 0.948954788526981, "grad_norm": 1.36967225169, "learning_rate": 1.1668268726237777e-05, "loss": 0.708945095539093, "step": 3904 }, { "epoch": 0.9491978609625669, "grad_norm": 1.1174438095985388, "learning_rate": 1.1664305481276733e-05, "loss": 0.5633033514022827, "step": 3905 }, { "epoch": 0.9494409333981526, "grad_norm": 1.4599547324522772, "learning_rate": 1.1660341967432475e-05, "loss": 0.5287760496139526, "step": 3906 }, { "epoch": 0.9496840058337385, "grad_norm": 1.5232773750162731, "learning_rate": 1.1656378185345355e-05, "loss": 0.810441255569458, "step": 3907 }, { "epoch": 0.9499270782693242, "grad_norm": 1.5654233444800953, "learning_rate": 1.1652414135655747e-05, "loss": 0.7300267219543457, "step": 3908 }, { "epoch": 0.9501701507049101, "grad_norm": 1.2115047568392208, "learning_rate": 1.1648449819004084e-05, "loss": 0.6642441749572754, "step": 3909 }, { "epoch": 0.9504132231404959, "grad_norm": 1.5229895309815795, "learning_rate": 1.1644485236030834e-05, "loss": 0.5790748000144958, "step": 3910 }, { "epoch": 0.9506562955760817, "grad_norm": 1.2607509872739808, "learning_rate": 1.1640520387376513e-05, "loss": 0.5894696712493896, "step": 3911 }, { "epoch": 0.9508993680116675, "grad_norm": 1.3048206458550906, "learning_rate": 1.1636555273681673e-05, "loss": 0.5375053286552429, "step": 3912 }, { "epoch": 0.9511424404472533, "grad_norm": 1.2476019800449751, "learning_rate": 1.1632589895586915e-05, "loss": 0.570576548576355, "step": 3913 }, { "epoch": 0.9513855128828391, "grad_norm": 1.14292318207459, "learning_rate": 1.1628624253732881e-05, "loss": 0.640049397945404, "step": 3914 }, { "epoch": 0.9516285853184249, "grad_norm": 1.2698624620974301, "learning_rate": 1.1624658348760255e-05, "loss": 0.7945722341537476, "step": 3915 }, { "epoch": 0.9518716577540107, "grad_norm": 1.580772375784929, "learning_rate": 1.1620692181309766e-05, "loss": 0.6840958595275879, "step": 3916 }, { "epoch": 0.9521147301895965, "grad_norm": 1.4733215643608437, "learning_rate": 1.1616725752022178e-05, "loss": 0.655019998550415, "step": 3917 }, { "epoch": 0.9523578026251823, "grad_norm": 1.9798520020698887, "learning_rate": 1.1612759061538309e-05, "loss": 0.6775059103965759, "step": 3918 }, { "epoch": 0.9526008750607681, "grad_norm": 1.4190562726627058, "learning_rate": 1.1608792110499008e-05, "loss": 0.857512354850769, "step": 3919 }, { "epoch": 0.9528439474963539, "grad_norm": 1.284048410763566, "learning_rate": 1.160482489954517e-05, "loss": 0.6651724576950073, "step": 3920 }, { "epoch": 0.9530870199319397, "grad_norm": 1.419825523183974, "learning_rate": 1.1600857429317738e-05, "loss": 0.6169350147247314, "step": 3921 }, { "epoch": 0.9533300923675255, "grad_norm": 1.3676800898733452, "learning_rate": 1.1596889700457691e-05, "loss": 0.4985705614089966, "step": 3922 }, { "epoch": 0.9535731648031113, "grad_norm": 1.6336441875403585, "learning_rate": 1.1592921713606049e-05, "loss": 0.5873897671699524, "step": 3923 }, { "epoch": 0.9538162372386971, "grad_norm": 1.364207189564027, "learning_rate": 1.1588953469403872e-05, "loss": 0.652809739112854, "step": 3924 }, { "epoch": 0.954059309674283, "grad_norm": 1.3463368237020261, "learning_rate": 1.1584984968492267e-05, "loss": 0.5625655055046082, "step": 3925 }, { "epoch": 0.9543023821098687, "grad_norm": 1.2560031067299564, "learning_rate": 1.1581016211512384e-05, "loss": 0.8446453213691711, "step": 3926 }, { "epoch": 0.9545454545454546, "grad_norm": 1.3156482025610008, "learning_rate": 1.1577047199105407e-05, "loss": 0.6383377313613892, "step": 3927 }, { "epoch": 0.9547885269810403, "grad_norm": 1.3170660185330807, "learning_rate": 1.1573077931912567e-05, "loss": 0.6981781721115112, "step": 3928 }, { "epoch": 0.9550315994166262, "grad_norm": 1.2011454193576085, "learning_rate": 1.1569108410575127e-05, "loss": 0.6451833248138428, "step": 3929 }, { "epoch": 0.9552746718522119, "grad_norm": 1.353546206640383, "learning_rate": 1.1565138635734408e-05, "loss": 0.6284748315811157, "step": 3930 }, { "epoch": 0.9555177442877978, "grad_norm": 1.0395567351213366, "learning_rate": 1.1561168608031759e-05, "loss": 0.5499582290649414, "step": 3931 }, { "epoch": 0.9557608167233835, "grad_norm": 1.4496540007020104, "learning_rate": 1.1557198328108573e-05, "loss": 0.60672527551651, "step": 3932 }, { "epoch": 0.9560038891589694, "grad_norm": 1.3055324511645925, "learning_rate": 1.155322779660628e-05, "loss": 0.5724654793739319, "step": 3933 }, { "epoch": 0.9562469615945551, "grad_norm": 1.313256399512337, "learning_rate": 1.1549257014166355e-05, "loss": 0.6025035977363586, "step": 3934 }, { "epoch": 0.956490034030141, "grad_norm": 1.235015648521735, "learning_rate": 1.1545285981430317e-05, "loss": 0.5694698095321655, "step": 3935 }, { "epoch": 0.9567331064657267, "grad_norm": 1.3794689794420267, "learning_rate": 1.1541314699039719e-05, "loss": 0.6385723352432251, "step": 3936 }, { "epoch": 0.9569761789013126, "grad_norm": 1.3843316164251214, "learning_rate": 1.1537343167636155e-05, "loss": 0.8113117218017578, "step": 3937 }, { "epoch": 0.9572192513368984, "grad_norm": 1.2855547184784706, "learning_rate": 1.1533371387861263e-05, "loss": 0.5284950733184814, "step": 3938 }, { "epoch": 0.9574623237724842, "grad_norm": 1.408184322380299, "learning_rate": 1.1529399360356719e-05, "loss": 0.5696093440055847, "step": 3939 }, { "epoch": 0.95770539620807, "grad_norm": 1.3361109550111199, "learning_rate": 1.1525427085764238e-05, "loss": 0.5361323356628418, "step": 3940 }, { "epoch": 0.9579484686436558, "grad_norm": 1.3962836377561634, "learning_rate": 1.152145456472558e-05, "loss": 0.5573095083236694, "step": 3941 }, { "epoch": 0.9581915410792416, "grad_norm": 1.3508356394205718, "learning_rate": 1.1517481797882533e-05, "loss": 0.6542431712150574, "step": 3942 }, { "epoch": 0.9584346135148274, "grad_norm": 1.2232554487625866, "learning_rate": 1.1513508785876939e-05, "loss": 0.5289784073829651, "step": 3943 }, { "epoch": 0.9586776859504132, "grad_norm": 1.5017965408033116, "learning_rate": 1.150953552935067e-05, "loss": 0.7289671897888184, "step": 3944 }, { "epoch": 0.958920758385999, "grad_norm": 1.2522931342006185, "learning_rate": 1.1505562028945643e-05, "loss": 0.6140431761741638, "step": 3945 }, { "epoch": 0.9591638308215849, "grad_norm": 1.221230660704461, "learning_rate": 1.1501588285303808e-05, "loss": 0.6112369298934937, "step": 3946 }, { "epoch": 0.9594069032571706, "grad_norm": 1.391439184648886, "learning_rate": 1.149761429906716e-05, "loss": 0.6274404525756836, "step": 3947 }, { "epoch": 0.9596499756927565, "grad_norm": 1.4681294759799322, "learning_rate": 1.1493640070877736e-05, "loss": 0.4717264175415039, "step": 3948 }, { "epoch": 0.9598930481283422, "grad_norm": 1.3265469323298358, "learning_rate": 1.1489665601377603e-05, "loss": 0.7260000705718994, "step": 3949 }, { "epoch": 0.960136120563928, "grad_norm": 1.446248733776445, "learning_rate": 1.1485690891208872e-05, "loss": 0.8170257806777954, "step": 3950 }, { "epoch": 0.9603791929995139, "grad_norm": 1.150400308545659, "learning_rate": 1.1481715941013692e-05, "loss": 0.6295474171638489, "step": 3951 }, { "epoch": 0.9606222654350997, "grad_norm": 1.5728284788934166, "learning_rate": 1.147774075143425e-05, "loss": 0.6721106171607971, "step": 3952 }, { "epoch": 0.9608653378706855, "grad_norm": 1.10307366040031, "learning_rate": 1.1473765323112777e-05, "loss": 0.6037554740905762, "step": 3953 }, { "epoch": 0.9611084103062713, "grad_norm": 1.306426630564027, "learning_rate": 1.1469789656691535e-05, "loss": 0.6590142250061035, "step": 3954 }, { "epoch": 0.9613514827418571, "grad_norm": 1.354439859769422, "learning_rate": 1.1465813752812831e-05, "loss": 0.5792948007583618, "step": 3955 }, { "epoch": 0.9615945551774429, "grad_norm": 1.2359541499641833, "learning_rate": 1.1461837612119004e-05, "loss": 0.5237072110176086, "step": 3956 }, { "epoch": 0.9618376276130287, "grad_norm": 1.2014693482047956, "learning_rate": 1.1457861235252436e-05, "loss": 0.5464988946914673, "step": 3957 }, { "epoch": 0.9620807000486145, "grad_norm": 1.32405456460863, "learning_rate": 1.1453884622855549e-05, "loss": 0.7339409589767456, "step": 3958 }, { "epoch": 0.9623237724842003, "grad_norm": 1.4040811644049769, "learning_rate": 1.1449907775570793e-05, "loss": 0.568790853023529, "step": 3959 }, { "epoch": 0.9625668449197861, "grad_norm": 1.4208742190943782, "learning_rate": 1.1445930694040667e-05, "loss": 0.5048806667327881, "step": 3960 }, { "epoch": 0.9628099173553719, "grad_norm": 1.0984268861728743, "learning_rate": 1.1441953378907705e-05, "loss": 0.6275051832199097, "step": 3961 }, { "epoch": 0.9630529897909577, "grad_norm": 1.6227891870695703, "learning_rate": 1.143797583081447e-05, "loss": 0.8062846660614014, "step": 3962 }, { "epoch": 0.9632960622265435, "grad_norm": 1.2175807406697232, "learning_rate": 1.1433998050403579e-05, "loss": 0.6340672969818115, "step": 3963 }, { "epoch": 0.9635391346621293, "grad_norm": 1.1645018961285172, "learning_rate": 1.143002003831767e-05, "loss": 0.6574032306671143, "step": 3964 }, { "epoch": 0.9637822070977151, "grad_norm": 1.3530563866867256, "learning_rate": 1.1426041795199434e-05, "loss": 0.5924806594848633, "step": 3965 }, { "epoch": 0.964025279533301, "grad_norm": 1.3024706817678562, "learning_rate": 1.1422063321691581e-05, "loss": 0.6510341763496399, "step": 3966 }, { "epoch": 0.9642683519688867, "grad_norm": 1.3299196611633983, "learning_rate": 1.141808461843688e-05, "loss": 0.6372146010398865, "step": 3967 }, { "epoch": 0.9645114244044726, "grad_norm": 1.4299222541400762, "learning_rate": 1.1414105686078117e-05, "loss": 0.7502787113189697, "step": 3968 }, { "epoch": 0.9647544968400583, "grad_norm": 1.4638592485839252, "learning_rate": 1.1410126525258125e-05, "loss": 0.6139510869979858, "step": 3969 }, { "epoch": 0.9649975692756442, "grad_norm": 1.1894970187916711, "learning_rate": 1.1406147136619776e-05, "loss": 0.6191460490226746, "step": 3970 }, { "epoch": 0.9652406417112299, "grad_norm": 1.3604626555258446, "learning_rate": 1.1402167520805969e-05, "loss": 0.691861629486084, "step": 3971 }, { "epoch": 0.9654837141468158, "grad_norm": 1.3001829532567828, "learning_rate": 1.1398187678459652e-05, "loss": 0.495763897895813, "step": 3972 }, { "epoch": 0.9657267865824015, "grad_norm": 1.3268197737040426, "learning_rate": 1.1394207610223802e-05, "loss": 0.6035501956939697, "step": 3973 }, { "epoch": 0.9659698590179874, "grad_norm": 1.2988437216372706, "learning_rate": 1.1390227316741434e-05, "loss": 0.7100608348846436, "step": 3974 }, { "epoch": 0.9662129314535731, "grad_norm": 1.19413567161668, "learning_rate": 1.1386246798655596e-05, "loss": 0.6964905261993408, "step": 3975 }, { "epoch": 0.966456003889159, "grad_norm": 1.3685939073525846, "learning_rate": 1.1382266056609382e-05, "loss": 0.6517294645309448, "step": 3976 }, { "epoch": 0.9666990763247447, "grad_norm": 1.51771598449506, "learning_rate": 1.137828509124591e-05, "loss": 0.6631888151168823, "step": 3977 }, { "epoch": 0.9669421487603306, "grad_norm": 1.395032839041885, "learning_rate": 1.1374303903208345e-05, "loss": 0.6375347375869751, "step": 3978 }, { "epoch": 0.9671852211959164, "grad_norm": 1.5045738447988213, "learning_rate": 1.1370322493139879e-05, "loss": 0.6718522310256958, "step": 3979 }, { "epoch": 0.9674282936315022, "grad_norm": 1.1637657579087282, "learning_rate": 1.1366340861683745e-05, "loss": 0.5923734903335571, "step": 3980 }, { "epoch": 0.967671366067088, "grad_norm": 1.1296744961773497, "learning_rate": 1.1362359009483213e-05, "loss": 0.5920511484146118, "step": 3981 }, { "epoch": 0.9679144385026738, "grad_norm": 1.5377626405389309, "learning_rate": 1.1358376937181584e-05, "loss": 0.8029991388320923, "step": 3982 }, { "epoch": 0.9681575109382596, "grad_norm": 1.2990161533935252, "learning_rate": 1.1354394645422195e-05, "loss": 0.803696870803833, "step": 3983 }, { "epoch": 0.9684005833738454, "grad_norm": 1.3876833662908794, "learning_rate": 1.1350412134848425e-05, "loss": 0.6575651168823242, "step": 3984 }, { "epoch": 0.9686436558094312, "grad_norm": 1.6279895851121484, "learning_rate": 1.134642940610368e-05, "loss": 0.675257682800293, "step": 3985 }, { "epoch": 0.968886728245017, "grad_norm": 1.3289674626003745, "learning_rate": 1.134244645983141e-05, "loss": 0.5872372388839722, "step": 3986 }, { "epoch": 0.9691298006806028, "grad_norm": 1.3208780439932966, "learning_rate": 1.1338463296675086e-05, "loss": 0.6960493326187134, "step": 3987 }, { "epoch": 0.9693728731161886, "grad_norm": 1.3742230132461581, "learning_rate": 1.1334479917278229e-05, "loss": 0.6476043462753296, "step": 3988 }, { "epoch": 0.9696159455517744, "grad_norm": 1.2627039326315161, "learning_rate": 1.1330496322284392e-05, "loss": 0.7478907108306885, "step": 3989 }, { "epoch": 0.9698590179873602, "grad_norm": 1.6207126489058652, "learning_rate": 1.1326512512337153e-05, "loss": 0.5710828304290771, "step": 3990 }, { "epoch": 0.970102090422946, "grad_norm": 1.1957457040977655, "learning_rate": 1.1322528488080134e-05, "loss": 0.6218490600585938, "step": 3991 }, { "epoch": 0.9703451628585319, "grad_norm": 1.8124068504515678, "learning_rate": 1.131854425015699e-05, "loss": 0.6311458945274353, "step": 3992 }, { "epoch": 0.9705882352941176, "grad_norm": 1.240971821253471, "learning_rate": 1.1314559799211413e-05, "loss": 0.5179867744445801, "step": 3993 }, { "epoch": 0.9708313077297035, "grad_norm": 1.6646073868436444, "learning_rate": 1.131057513588712e-05, "loss": 0.6026206016540527, "step": 3994 }, { "epoch": 0.9710743801652892, "grad_norm": 1.4885697395039332, "learning_rate": 1.1306590260827874e-05, "loss": 0.5955901145935059, "step": 3995 }, { "epoch": 0.9713174526008751, "grad_norm": 1.4359220969968927, "learning_rate": 1.1302605174677463e-05, "loss": 0.730080783367157, "step": 3996 }, { "epoch": 0.9715605250364608, "grad_norm": 1.2212454724061483, "learning_rate": 1.1298619878079713e-05, "loss": 0.5755631327629089, "step": 3997 }, { "epoch": 0.9718035974720467, "grad_norm": 1.1389714411372778, "learning_rate": 1.1294634371678486e-05, "loss": 0.584303617477417, "step": 3998 }, { "epoch": 0.9720466699076324, "grad_norm": 1.3561366564892616, "learning_rate": 1.1290648656117668e-05, "loss": 0.7461980581283569, "step": 3999 }, { "epoch": 0.9722897423432183, "grad_norm": 1.3090283587846108, "learning_rate": 1.1286662732041199e-05, "loss": 0.670059323310852, "step": 4000 }, { "epoch": 0.972532814778804, "grad_norm": 1.3063900833991868, "learning_rate": 1.1282676600093032e-05, "loss": 0.6780164241790771, "step": 4001 }, { "epoch": 0.9727758872143899, "grad_norm": 1.3640780825603964, "learning_rate": 1.1278690260917165e-05, "loss": 0.7526670694351196, "step": 4002 }, { "epoch": 0.9730189596499756, "grad_norm": 1.4429949667590034, "learning_rate": 1.1274703715157623e-05, "loss": 0.6293051242828369, "step": 4003 }, { "epoch": 0.9732620320855615, "grad_norm": 1.336421485837697, "learning_rate": 1.1270716963458469e-05, "loss": 0.6912798285484314, "step": 4004 }, { "epoch": 0.9735051045211472, "grad_norm": 1.564303338355627, "learning_rate": 1.1266730006463797e-05, "loss": 0.5970037579536438, "step": 4005 }, { "epoch": 0.9737481769567331, "grad_norm": 1.531050578037616, "learning_rate": 1.1262742844817737e-05, "loss": 0.7211542129516602, "step": 4006 }, { "epoch": 0.973991249392319, "grad_norm": 1.3379158882461706, "learning_rate": 1.125875547916445e-05, "loss": 0.6603879928588867, "step": 4007 }, { "epoch": 0.9742343218279047, "grad_norm": 1.2340391356078968, "learning_rate": 1.1254767910148127e-05, "loss": 0.6841497421264648, "step": 4008 }, { "epoch": 0.9744773942634906, "grad_norm": 1.5528354668845403, "learning_rate": 1.1250780138413e-05, "loss": 0.6881450414657593, "step": 4009 }, { "epoch": 0.9747204666990763, "grad_norm": 1.3386511927078717, "learning_rate": 1.1246792164603323e-05, "loss": 0.666118860244751, "step": 4010 }, { "epoch": 0.9749635391346622, "grad_norm": 1.2666384360421599, "learning_rate": 1.1242803989363395e-05, "loss": 0.6876596212387085, "step": 4011 }, { "epoch": 0.9752066115702479, "grad_norm": 1.1949134424482053, "learning_rate": 1.1238815613337535e-05, "loss": 0.5654401779174805, "step": 4012 }, { "epoch": 0.9754496840058338, "grad_norm": 1.2581914966986052, "learning_rate": 1.1234827037170101e-05, "loss": 0.5717900991439819, "step": 4013 }, { "epoch": 0.9756927564414195, "grad_norm": 1.3907077442749576, "learning_rate": 1.1230838261505486e-05, "loss": 0.6241086721420288, "step": 4014 }, { "epoch": 0.9759358288770054, "grad_norm": 1.212031858487598, "learning_rate": 1.1226849286988108e-05, "loss": 0.6201097965240479, "step": 4015 }, { "epoch": 0.9761789013125911, "grad_norm": 1.4084046829315207, "learning_rate": 1.1222860114262421e-05, "loss": 0.5770373344421387, "step": 4016 }, { "epoch": 0.976421973748177, "grad_norm": 1.3404394248475968, "learning_rate": 1.1218870743972914e-05, "loss": 0.7160965204238892, "step": 4017 }, { "epoch": 0.9766650461837627, "grad_norm": 1.620128123893688, "learning_rate": 1.1214881176764104e-05, "loss": 0.6674911975860596, "step": 4018 }, { "epoch": 0.9769081186193486, "grad_norm": 1.2811994490674217, "learning_rate": 1.121089141328054e-05, "loss": 0.5375953316688538, "step": 4019 }, { "epoch": 0.9771511910549344, "grad_norm": 1.3218140374675105, "learning_rate": 1.12069014541668e-05, "loss": 0.6973751783370972, "step": 4020 }, { "epoch": 0.9773942634905202, "grad_norm": 1.253596223878822, "learning_rate": 1.1202911300067508e-05, "loss": 0.6070988774299622, "step": 4021 }, { "epoch": 0.977637335926106, "grad_norm": 1.4186383121439634, "learning_rate": 1.1198920951627296e-05, "loss": 0.6304534673690796, "step": 4022 }, { "epoch": 0.9778804083616918, "grad_norm": 1.3271299293953869, "learning_rate": 1.1194930409490847e-05, "loss": 0.6116349697113037, "step": 4023 }, { "epoch": 0.9781234807972776, "grad_norm": 1.2704507681721289, "learning_rate": 1.1190939674302865e-05, "loss": 0.6137944459915161, "step": 4024 }, { "epoch": 0.9783665532328634, "grad_norm": 1.431723684190909, "learning_rate": 1.118694874670809e-05, "loss": 0.7552162408828735, "step": 4025 }, { "epoch": 0.9786096256684492, "grad_norm": 1.307872765360433, "learning_rate": 1.1182957627351292e-05, "loss": 0.6674940586090088, "step": 4026 }, { "epoch": 0.978852698104035, "grad_norm": 1.2719949906241537, "learning_rate": 1.1178966316877271e-05, "loss": 0.6211689710617065, "step": 4027 }, { "epoch": 0.9790957705396208, "grad_norm": 1.4171731996875516, "learning_rate": 1.1174974815930857e-05, "loss": 0.6588010787963867, "step": 4028 }, { "epoch": 0.9793388429752066, "grad_norm": 1.2163897911386767, "learning_rate": 1.1170983125156913e-05, "loss": 0.6882840394973755, "step": 4029 }, { "epoch": 0.9795819154107924, "grad_norm": 1.2692052482946832, "learning_rate": 1.1166991245200336e-05, "loss": 0.6642088890075684, "step": 4030 }, { "epoch": 0.9798249878463782, "grad_norm": 1.310492620308483, "learning_rate": 1.1162999176706042e-05, "loss": 0.7465799450874329, "step": 4031 }, { "epoch": 0.980068060281964, "grad_norm": 1.0730342701004196, "learning_rate": 1.1159006920318986e-05, "loss": 0.5421842932701111, "step": 4032 }, { "epoch": 0.9803111327175499, "grad_norm": 1.4951098922682136, "learning_rate": 1.1155014476684156e-05, "loss": 0.6630498766899109, "step": 4033 }, { "epoch": 0.9805542051531356, "grad_norm": 1.610509567218245, "learning_rate": 1.1151021846446568e-05, "loss": 0.6076828241348267, "step": 4034 }, { "epoch": 0.9807972775887215, "grad_norm": 1.3538667051251105, "learning_rate": 1.114702903025126e-05, "loss": 0.6713071465492249, "step": 4035 }, { "epoch": 0.9810403500243072, "grad_norm": 1.437465455382395, "learning_rate": 1.1143036028743311e-05, "loss": 0.5605980157852173, "step": 4036 }, { "epoch": 0.9812834224598931, "grad_norm": 1.2770906779714128, "learning_rate": 1.1139042842567824e-05, "loss": 0.562947154045105, "step": 4037 }, { "epoch": 0.9815264948954788, "grad_norm": 1.3339740298644516, "learning_rate": 1.1135049472369934e-05, "loss": 0.602645993232727, "step": 4038 }, { "epoch": 0.9817695673310647, "grad_norm": 1.4880430339567543, "learning_rate": 1.1131055918794806e-05, "loss": 0.6376075744628906, "step": 4039 }, { "epoch": 0.9820126397666504, "grad_norm": 1.3455246570114539, "learning_rate": 1.1127062182487634e-05, "loss": 0.6526479721069336, "step": 4040 }, { "epoch": 0.9822557122022363, "grad_norm": 1.501225041751381, "learning_rate": 1.1123068264093639e-05, "loss": 0.576090931892395, "step": 4041 }, { "epoch": 0.982498784637822, "grad_norm": 1.1848927736347377, "learning_rate": 1.1119074164258075e-05, "loss": 0.6726586222648621, "step": 4042 }, { "epoch": 0.9827418570734079, "grad_norm": 1.2371762312273116, "learning_rate": 1.1115079883626223e-05, "loss": 0.609606146812439, "step": 4043 }, { "epoch": 0.9829849295089936, "grad_norm": 1.4228460437313732, "learning_rate": 1.1111085422843392e-05, "loss": 0.7029669284820557, "step": 4044 }, { "epoch": 0.9832280019445795, "grad_norm": 1.3763110827548044, "learning_rate": 1.110709078255493e-05, "loss": 0.49953946471214294, "step": 4045 }, { "epoch": 0.9834710743801653, "grad_norm": 1.597269013337896, "learning_rate": 1.1103095963406201e-05, "loss": 0.6737143993377686, "step": 4046 }, { "epoch": 0.9837141468157511, "grad_norm": 1.3970954415957642, "learning_rate": 1.1099100966042605e-05, "loss": 0.6715126037597656, "step": 4047 }, { "epoch": 0.983957219251337, "grad_norm": 2.0130075112510766, "learning_rate": 1.1095105791109566e-05, "loss": 0.5073788166046143, "step": 4048 }, { "epoch": 0.9842002916869227, "grad_norm": 1.2726147045727225, "learning_rate": 1.1091110439252542e-05, "loss": 0.5678783655166626, "step": 4049 }, { "epoch": 0.9844433641225085, "grad_norm": 1.2824665226078735, "learning_rate": 1.1087114911117018e-05, "loss": 0.5407860279083252, "step": 4050 }, { "epoch": 0.9846864365580943, "grad_norm": 1.4417926036784787, "learning_rate": 1.1083119207348505e-05, "loss": 0.6692063212394714, "step": 4051 }, { "epoch": 0.9849295089936801, "grad_norm": 1.267575633886239, "learning_rate": 1.1079123328592546e-05, "loss": 0.7242779731750488, "step": 4052 }, { "epoch": 0.9851725814292659, "grad_norm": 1.288577875772416, "learning_rate": 1.1075127275494704e-05, "loss": 0.6667113304138184, "step": 4053 }, { "epoch": 0.9854156538648517, "grad_norm": 1.2964717145870766, "learning_rate": 1.1071131048700586e-05, "loss": 0.5726609826087952, "step": 4054 }, { "epoch": 0.9856587263004375, "grad_norm": 1.1833755491876998, "learning_rate": 1.1067134648855812e-05, "loss": 0.5280375480651855, "step": 4055 }, { "epoch": 0.9859017987360233, "grad_norm": 1.4572427127498102, "learning_rate": 1.1063138076606039e-05, "loss": 0.4388040006160736, "step": 4056 }, { "epoch": 0.9861448711716091, "grad_norm": 1.3161219254932237, "learning_rate": 1.1059141332596946e-05, "loss": 0.5926943421363831, "step": 4057 }, { "epoch": 0.986387943607195, "grad_norm": 1.6739590054479974, "learning_rate": 1.1055144417474241e-05, "loss": 0.6413019895553589, "step": 4058 }, { "epoch": 0.9866310160427807, "grad_norm": 1.2182236143050176, "learning_rate": 1.1051147331883665e-05, "loss": 0.6599177122116089, "step": 4059 }, { "epoch": 0.9868740884783666, "grad_norm": 1.2812700518308258, "learning_rate": 1.1047150076470978e-05, "loss": 0.5857417583465576, "step": 4060 }, { "epoch": 0.9871171609139524, "grad_norm": 1.3565196024525121, "learning_rate": 1.1043152651881972e-05, "loss": 0.575571596622467, "step": 4061 }, { "epoch": 0.9873602333495382, "grad_norm": 1.2453173474917245, "learning_rate": 1.103915505876247e-05, "loss": 0.5402329564094543, "step": 4062 }, { "epoch": 0.987603305785124, "grad_norm": 1.434770914814869, "learning_rate": 1.1035157297758315e-05, "loss": 0.8380438685417175, "step": 4063 }, { "epoch": 0.9878463782207098, "grad_norm": 1.3970070519595548, "learning_rate": 1.1031159369515382e-05, "loss": 0.5180463194847107, "step": 4064 }, { "epoch": 0.9880894506562956, "grad_norm": 1.471046868261319, "learning_rate": 1.1027161274679573e-05, "loss": 0.6638870239257812, "step": 4065 }, { "epoch": 0.9883325230918814, "grad_norm": 1.3366218842246236, "learning_rate": 1.1023163013896814e-05, "loss": 0.6365903615951538, "step": 4066 }, { "epoch": 0.9885755955274672, "grad_norm": 1.5064550237709418, "learning_rate": 1.1019164587813057e-05, "loss": 0.6397289037704468, "step": 4067 }, { "epoch": 0.988818667963053, "grad_norm": 1.3133754599351175, "learning_rate": 1.1015165997074283e-05, "loss": 0.712486982345581, "step": 4068 }, { "epoch": 0.9890617403986388, "grad_norm": 1.621615956210714, "learning_rate": 1.1011167242326504e-05, "loss": 0.5911146998405457, "step": 4069 }, { "epoch": 0.9893048128342246, "grad_norm": 1.2939844062176813, "learning_rate": 1.1007168324215751e-05, "loss": 0.6286430358886719, "step": 4070 }, { "epoch": 0.9895478852698104, "grad_norm": 1.2614981902514448, "learning_rate": 1.1003169243388085e-05, "loss": 0.6239272356033325, "step": 4071 }, { "epoch": 0.9897909577053962, "grad_norm": 1.4636965141006653, "learning_rate": 1.0999170000489591e-05, "loss": 0.5864056348800659, "step": 4072 }, { "epoch": 0.990034030140982, "grad_norm": 1.2010182394038298, "learning_rate": 1.0995170596166385e-05, "loss": 0.6377483010292053, "step": 4073 }, { "epoch": 0.9902771025765679, "grad_norm": 1.2914049133592493, "learning_rate": 1.0991171031064604e-05, "loss": 0.6960228085517883, "step": 4074 }, { "epoch": 0.9905201750121536, "grad_norm": 1.4312261370934283, "learning_rate": 1.0987171305830415e-05, "loss": 0.6598050594329834, "step": 4075 }, { "epoch": 0.9907632474477395, "grad_norm": 1.5497939910580505, "learning_rate": 1.0983171421110008e-05, "loss": 0.7660547494888306, "step": 4076 }, { "epoch": 0.9910063198833252, "grad_norm": 1.404499355988823, "learning_rate": 1.0979171377549595e-05, "loss": 0.5855003595352173, "step": 4077 }, { "epoch": 0.9912493923189111, "grad_norm": 1.579944819573736, "learning_rate": 1.0975171175795425e-05, "loss": 0.7398363351821899, "step": 4078 }, { "epoch": 0.9914924647544968, "grad_norm": 1.3383286298064578, "learning_rate": 1.0971170816493761e-05, "loss": 0.6856931447982788, "step": 4079 }, { "epoch": 0.9917355371900827, "grad_norm": 1.3431671520687298, "learning_rate": 1.0967170300290902e-05, "loss": 0.5468965768814087, "step": 4080 }, { "epoch": 0.9919786096256684, "grad_norm": 1.372731229825952, "learning_rate": 1.0963169627833157e-05, "loss": 0.534106433391571, "step": 4081 }, { "epoch": 0.9922216820612543, "grad_norm": 1.2704617417153994, "learning_rate": 1.095916879976688e-05, "loss": 0.5384896993637085, "step": 4082 }, { "epoch": 0.99246475449684, "grad_norm": 1.363663192521795, "learning_rate": 1.0955167816738437e-05, "loss": 0.7384361028671265, "step": 4083 }, { "epoch": 0.9927078269324259, "grad_norm": 1.2649834365960022, "learning_rate": 1.095116667939422e-05, "loss": 0.5187826156616211, "step": 4084 }, { "epoch": 0.9929508993680116, "grad_norm": 1.4040336136994584, "learning_rate": 1.0947165388380646e-05, "loss": 0.5333072543144226, "step": 4085 }, { "epoch": 0.9931939718035975, "grad_norm": 1.3855120696280772, "learning_rate": 1.0943163944344164e-05, "loss": 0.7403957843780518, "step": 4086 }, { "epoch": 0.9934370442391833, "grad_norm": 1.500690135221902, "learning_rate": 1.0939162347931241e-05, "loss": 0.7587080001831055, "step": 4087 }, { "epoch": 0.9936801166747691, "grad_norm": 1.3574389392359367, "learning_rate": 1.0935160599788366e-05, "loss": 0.6463054418563843, "step": 4088 }, { "epoch": 0.9939231891103549, "grad_norm": 1.546615896256837, "learning_rate": 1.0931158700562063e-05, "loss": 0.557939887046814, "step": 4089 }, { "epoch": 0.9941662615459407, "grad_norm": 1.265362523009272, "learning_rate": 1.0927156650898867e-05, "loss": 0.7343440651893616, "step": 4090 }, { "epoch": 0.9944093339815265, "grad_norm": 1.4110952147630822, "learning_rate": 1.0923154451445352e-05, "loss": 0.6899145841598511, "step": 4091 }, { "epoch": 0.9946524064171123, "grad_norm": 1.3062511211736736, "learning_rate": 1.0919152102848105e-05, "loss": 0.6045184135437012, "step": 4092 }, { "epoch": 0.9948954788526981, "grad_norm": 1.4914620684396538, "learning_rate": 1.091514960575374e-05, "loss": 0.6888771057128906, "step": 4093 }, { "epoch": 0.9951385512882839, "grad_norm": 1.406543690959564, "learning_rate": 1.0911146960808897e-05, "loss": 0.6739560961723328, "step": 4094 }, { "epoch": 0.9953816237238697, "grad_norm": 1.36439244894577, "learning_rate": 1.0907144168660235e-05, "loss": 0.6885221600532532, "step": 4095 }, { "epoch": 0.9956246961594555, "grad_norm": 1.4291231366943011, "learning_rate": 1.0903141229954445e-05, "loss": 0.4720054268836975, "step": 4096 }, { "epoch": 0.9958677685950413, "grad_norm": 1.3706191417400315, "learning_rate": 1.0899138145338237e-05, "loss": 0.4517384171485901, "step": 4097 }, { "epoch": 0.9961108410306271, "grad_norm": 1.236648849013862, "learning_rate": 1.0895134915458339e-05, "loss": 0.5246795415878296, "step": 4098 }, { "epoch": 0.9963539134662129, "grad_norm": 1.375945696241243, "learning_rate": 1.0891131540961513e-05, "loss": 0.5864747166633606, "step": 4099 }, { "epoch": 0.9965969859017987, "grad_norm": 1.5664970635731887, "learning_rate": 1.088712802249454e-05, "loss": 0.7400902509689331, "step": 4100 }, { "epoch": 0.9968400583373845, "grad_norm": 1.278347752136564, "learning_rate": 1.088312436070422e-05, "loss": 0.5821311473846436, "step": 4101 }, { "epoch": 0.9970831307729704, "grad_norm": 1.4077809825325691, "learning_rate": 1.0879120556237384e-05, "loss": 0.555831253528595, "step": 4102 }, { "epoch": 0.9973262032085561, "grad_norm": 1.4179159827201102, "learning_rate": 1.0875116609740876e-05, "loss": 0.6338759660720825, "step": 4103 }, { "epoch": 0.997569275644142, "grad_norm": 1.1775656103007872, "learning_rate": 1.0871112521861576e-05, "loss": 0.6465687155723572, "step": 4104 }, { "epoch": 0.9978123480797277, "grad_norm": 1.176394425140454, "learning_rate": 1.0867108293246374e-05, "loss": 0.6749840378761292, "step": 4105 }, { "epoch": 0.9980554205153136, "grad_norm": 1.2777235143947954, "learning_rate": 1.0863103924542189e-05, "loss": 0.7778739929199219, "step": 4106 }, { "epoch": 0.9982984929508993, "grad_norm": 1.6180073617978423, "learning_rate": 1.0859099416395964e-05, "loss": 0.6918681859970093, "step": 4107 }, { "epoch": 0.9985415653864852, "grad_norm": 1.3326380522640078, "learning_rate": 1.0855094769454664e-05, "loss": 0.6956212520599365, "step": 4108 }, { "epoch": 0.9987846378220709, "grad_norm": 1.2342162719087784, "learning_rate": 1.0851089984365272e-05, "loss": 0.7817381620407104, "step": 4109 }, { "epoch": 0.9990277102576568, "grad_norm": 1.435687282944255, "learning_rate": 1.0847085061774798e-05, "loss": 0.7088751792907715, "step": 4110 }, { "epoch": 0.9992707826932425, "grad_norm": 1.5266294787272823, "learning_rate": 1.084308000233027e-05, "loss": 0.6977641582489014, "step": 4111 }, { "epoch": 0.9995138551288284, "grad_norm": 1.3870069126898246, "learning_rate": 1.0839074806678744e-05, "loss": 0.6481819152832031, "step": 4112 }, { "epoch": 0.9997569275644141, "grad_norm": 1.2265129638599088, "learning_rate": 1.0835069475467296e-05, "loss": 0.6652039289474487, "step": 4113 }, { "epoch": 1.0, "grad_norm": 1.1413544776989926, "learning_rate": 1.0831064009343012e-05, "loss": 0.6095300912857056, "step": 4114 }, { "epoch": 1.0002430724355857, "grad_norm": 1.204407246584463, "learning_rate": 1.0827058408953025e-05, "loss": 0.8131661415100098, "step": 4115 }, { "epoch": 1.0004861448711717, "grad_norm": 1.2234698831416195, "learning_rate": 1.0823052674944464e-05, "loss": 0.5449469089508057, "step": 4116 }, { "epoch": 1.0007292173067575, "grad_norm": 1.2324667405938798, "learning_rate": 1.0819046807964495e-05, "loss": 0.6301008462905884, "step": 4117 }, { "epoch": 1.0009722897423432, "grad_norm": 1.2581530770069487, "learning_rate": 1.08150408086603e-05, "loss": 0.5995548963546753, "step": 4118 }, { "epoch": 1.001215362177929, "grad_norm": 1.261920353674723, "learning_rate": 1.0811034677679089e-05, "loss": 0.5509936809539795, "step": 4119 }, { "epoch": 1.001458434613515, "grad_norm": 1.158490391492896, "learning_rate": 1.080702841566808e-05, "loss": 0.5706968903541565, "step": 4120 }, { "epoch": 1.0017015070491007, "grad_norm": 1.3207290580868702, "learning_rate": 1.080302202327452e-05, "loss": 0.48553574085235596, "step": 4121 }, { "epoch": 1.0019445794846864, "grad_norm": 1.5193371748868403, "learning_rate": 1.0799015501145682e-05, "loss": 0.5928781032562256, "step": 4122 }, { "epoch": 1.0021876519202721, "grad_norm": 1.3502112668267612, "learning_rate": 1.0795008849928852e-05, "loss": 0.6879315376281738, "step": 4123 }, { "epoch": 1.0024307243558581, "grad_norm": 1.2630860906169705, "learning_rate": 1.0791002070271342e-05, "loss": 0.5027304887771606, "step": 4124 }, { "epoch": 1.0026737967914439, "grad_norm": 1.1022166866893408, "learning_rate": 1.078699516282048e-05, "loss": 0.6570000648498535, "step": 4125 }, { "epoch": 1.0029168692270296, "grad_norm": 1.5591144809143933, "learning_rate": 1.0782988128223618e-05, "loss": 0.7230250835418701, "step": 4126 }, { "epoch": 1.0031599416626154, "grad_norm": 1.2763403299300233, "learning_rate": 1.0778980967128128e-05, "loss": 0.7334213256835938, "step": 4127 }, { "epoch": 1.0034030140982013, "grad_norm": 1.3491886125137256, "learning_rate": 1.0774973680181403e-05, "loss": 0.655807614326477, "step": 4128 }, { "epoch": 1.003646086533787, "grad_norm": 1.2535508360399379, "learning_rate": 1.0770966268030853e-05, "loss": 0.5636923313140869, "step": 4129 }, { "epoch": 1.0038891589693728, "grad_norm": 1.1176130201290946, "learning_rate": 1.0766958731323915e-05, "loss": 0.5855267643928528, "step": 4130 }, { "epoch": 1.0041322314049588, "grad_norm": 1.4203208710049537, "learning_rate": 1.0762951070708036e-05, "loss": 0.7891652584075928, "step": 4131 }, { "epoch": 1.0043753038405445, "grad_norm": 1.1969490399931522, "learning_rate": 1.0758943286830695e-05, "loss": 0.6279709339141846, "step": 4132 }, { "epoch": 1.0046183762761303, "grad_norm": 1.3389648713706694, "learning_rate": 1.075493538033938e-05, "loss": 0.5691311359405518, "step": 4133 }, { "epoch": 1.004861448711716, "grad_norm": 1.9823150538193604, "learning_rate": 1.0750927351881606e-05, "loss": 0.5175744891166687, "step": 4134 }, { "epoch": 1.005104521147302, "grad_norm": 1.4378618165569983, "learning_rate": 1.0746919202104901e-05, "loss": 0.6599748134613037, "step": 4135 }, { "epoch": 1.0053475935828877, "grad_norm": 1.210483967185272, "learning_rate": 1.0742910931656825e-05, "loss": 0.5101252794265747, "step": 4136 }, { "epoch": 1.0055906660184735, "grad_norm": 1.4215208937713146, "learning_rate": 1.0738902541184944e-05, "loss": 0.4986645579338074, "step": 4137 }, { "epoch": 1.0058337384540592, "grad_norm": 1.5825661711528387, "learning_rate": 1.0734894031336851e-05, "loss": 0.7324646711349487, "step": 4138 }, { "epoch": 1.0060768108896452, "grad_norm": 1.4433685787435455, "learning_rate": 1.0730885402760155e-05, "loss": 0.8022934198379517, "step": 4139 }, { "epoch": 1.006319883325231, "grad_norm": 1.3850076918038299, "learning_rate": 1.0726876656102483e-05, "loss": 0.549091100692749, "step": 4140 }, { "epoch": 1.0065629557608167, "grad_norm": 1.2814198560177756, "learning_rate": 1.0722867792011486e-05, "loss": 0.6151405572891235, "step": 4141 }, { "epoch": 1.0068060281964026, "grad_norm": 1.2307147206907563, "learning_rate": 1.071885881113483e-05, "loss": 0.5278090238571167, "step": 4142 }, { "epoch": 1.0070491006319884, "grad_norm": 1.326493313391717, "learning_rate": 1.0714849714120203e-05, "loss": 0.5450903177261353, "step": 4143 }, { "epoch": 1.0072921730675741, "grad_norm": 2.1238836416494786, "learning_rate": 1.0710840501615309e-05, "loss": 0.43794724345207214, "step": 4144 }, { "epoch": 1.0075352455031599, "grad_norm": 1.6410800706173845, "learning_rate": 1.0706831174267873e-05, "loss": 0.6056283712387085, "step": 4145 }, { "epoch": 1.0077783179387458, "grad_norm": 1.4140133524539547, "learning_rate": 1.0702821732725632e-05, "loss": 0.5643320083618164, "step": 4146 }, { "epoch": 1.0080213903743316, "grad_norm": 1.3353040194559291, "learning_rate": 1.0698812177636353e-05, "loss": 0.5382602214813232, "step": 4147 }, { "epoch": 1.0082644628099173, "grad_norm": 1.23822125739445, "learning_rate": 1.0694802509647812e-05, "loss": 0.4229767918586731, "step": 4148 }, { "epoch": 1.008507535245503, "grad_norm": 1.3775938110135408, "learning_rate": 1.0690792729407807e-05, "loss": 0.5629650354385376, "step": 4149 }, { "epoch": 1.008750607681089, "grad_norm": 1.3197173638907578, "learning_rate": 1.0686782837564152e-05, "loss": 0.5988351106643677, "step": 4150 }, { "epoch": 1.0089936801166748, "grad_norm": 1.4790315788024693, "learning_rate": 1.0682772834764677e-05, "loss": 0.684394359588623, "step": 4151 }, { "epoch": 1.0092367525522605, "grad_norm": 1.5979481589188957, "learning_rate": 1.067876272165724e-05, "loss": 0.7354111075401306, "step": 4152 }, { "epoch": 1.0094798249878463, "grad_norm": 1.5499787184917098, "learning_rate": 1.0674752498889709e-05, "loss": 0.6146597266197205, "step": 4153 }, { "epoch": 1.0097228974234322, "grad_norm": 1.7003278320551176, "learning_rate": 1.0670742167109967e-05, "loss": 0.5318308472633362, "step": 4154 }, { "epoch": 1.009965969859018, "grad_norm": 1.5222038393281665, "learning_rate": 1.0666731726965922e-05, "loss": 0.6122541427612305, "step": 4155 }, { "epoch": 1.0102090422946037, "grad_norm": 1.2942427646451926, "learning_rate": 1.0662721179105492e-05, "loss": 0.5461985468864441, "step": 4156 }, { "epoch": 1.0104521147301897, "grad_norm": 1.2860239148254398, "learning_rate": 1.0658710524176623e-05, "loss": 0.5124581456184387, "step": 4157 }, { "epoch": 1.0106951871657754, "grad_norm": 1.4153420996230865, "learning_rate": 1.0654699762827264e-05, "loss": 0.5523080229759216, "step": 4158 }, { "epoch": 1.0109382596013612, "grad_norm": 1.39145398519339, "learning_rate": 1.0650688895705392e-05, "loss": 0.5292336344718933, "step": 4159 }, { "epoch": 1.011181332036947, "grad_norm": 1.2618562627392051, "learning_rate": 1.0646677923458998e-05, "loss": 0.48674261569976807, "step": 4160 }, { "epoch": 1.011424404472533, "grad_norm": 1.1824997006127433, "learning_rate": 1.0642666846736094e-05, "loss": 0.4505497217178345, "step": 4161 }, { "epoch": 1.0116674769081186, "grad_norm": 1.364442090546896, "learning_rate": 1.0638655666184697e-05, "loss": 0.6069837808609009, "step": 4162 }, { "epoch": 1.0119105493437044, "grad_norm": 1.4637395130443835, "learning_rate": 1.0634644382452852e-05, "loss": 0.6586427688598633, "step": 4163 }, { "epoch": 1.0121536217792901, "grad_norm": 1.7064592423332348, "learning_rate": 1.0630632996188621e-05, "loss": 0.6671651005744934, "step": 4164 }, { "epoch": 1.012396694214876, "grad_norm": 1.1946950173706743, "learning_rate": 1.0626621508040074e-05, "loss": 0.6110827326774597, "step": 4165 }, { "epoch": 1.0126397666504618, "grad_norm": 1.2411624526674023, "learning_rate": 1.0622609918655302e-05, "loss": 0.6196953654289246, "step": 4166 }, { "epoch": 1.0128828390860476, "grad_norm": 1.935952558647485, "learning_rate": 1.0618598228682417e-05, "loss": 0.745265007019043, "step": 4167 }, { "epoch": 1.0131259115216333, "grad_norm": 1.352760886644761, "learning_rate": 1.0614586438769536e-05, "loss": 0.654370903968811, "step": 4168 }, { "epoch": 1.0133689839572193, "grad_norm": 1.4976541835149495, "learning_rate": 1.0610574549564805e-05, "loss": 0.6052916646003723, "step": 4169 }, { "epoch": 1.013612056392805, "grad_norm": 1.292303385343712, "learning_rate": 1.0606562561716378e-05, "loss": 0.4807875156402588, "step": 4170 }, { "epoch": 1.0138551288283908, "grad_norm": 1.398746399197542, "learning_rate": 1.0602550475872428e-05, "loss": 0.5628845691680908, "step": 4171 }, { "epoch": 1.0140982012639768, "grad_norm": 1.5382401818268467, "learning_rate": 1.059853829268114e-05, "loss": 0.5427473783493042, "step": 4172 }, { "epoch": 1.0143412736995625, "grad_norm": 1.341515279597582, "learning_rate": 1.059452601279072e-05, "loss": 0.5855149030685425, "step": 4173 }, { "epoch": 1.0145843461351483, "grad_norm": 1.3752742947022893, "learning_rate": 1.059051363684939e-05, "loss": 0.620376467704773, "step": 4174 }, { "epoch": 1.014827418570734, "grad_norm": 1.2948416671786867, "learning_rate": 1.0586501165505378e-05, "loss": 0.623253345489502, "step": 4175 }, { "epoch": 1.01507049100632, "grad_norm": 1.3331143209305005, "learning_rate": 1.0582488599406937e-05, "loss": 0.6035810708999634, "step": 4176 }, { "epoch": 1.0153135634419057, "grad_norm": 1.5314936405767614, "learning_rate": 1.0578475939202333e-05, "loss": 0.6739188432693481, "step": 4177 }, { "epoch": 1.0155566358774915, "grad_norm": 1.3359731428761168, "learning_rate": 1.0574463185539847e-05, "loss": 0.625745415687561, "step": 4178 }, { "epoch": 1.0157997083130772, "grad_norm": 1.2681912191326719, "learning_rate": 1.0570450339067773e-05, "loss": 0.6058375835418701, "step": 4179 }, { "epoch": 1.0160427807486632, "grad_norm": 1.4969004842046825, "learning_rate": 1.0566437400434421e-05, "loss": 0.5924068093299866, "step": 4180 }, { "epoch": 1.016285853184249, "grad_norm": 1.3340219203342356, "learning_rate": 1.056242437028812e-05, "loss": 0.5685099959373474, "step": 4181 }, { "epoch": 1.0165289256198347, "grad_norm": 1.373508883432466, "learning_rate": 1.055841124927721e-05, "loss": 0.6829901933670044, "step": 4182 }, { "epoch": 1.0167719980554206, "grad_norm": 1.2101324182993025, "learning_rate": 1.0554398038050044e-05, "loss": 0.42198535799980164, "step": 4183 }, { "epoch": 1.0170150704910064, "grad_norm": 1.3121846661496375, "learning_rate": 1.055038473725499e-05, "loss": 0.5272603034973145, "step": 4184 }, { "epoch": 1.0172581429265921, "grad_norm": 1.4536931759804113, "learning_rate": 1.0546371347540439e-05, "loss": 0.5473986864089966, "step": 4185 }, { "epoch": 1.0175012153621779, "grad_norm": 1.2839167109548575, "learning_rate": 1.054235786955478e-05, "loss": 0.5241931676864624, "step": 4186 }, { "epoch": 1.0177442877977638, "grad_norm": 1.4811744235967415, "learning_rate": 1.0538344303946432e-05, "loss": 0.5829373002052307, "step": 4187 }, { "epoch": 1.0179873602333496, "grad_norm": 1.5765953504706447, "learning_rate": 1.053433065136382e-05, "loss": 0.6425580978393555, "step": 4188 }, { "epoch": 1.0182304326689353, "grad_norm": 1.4431220337703754, "learning_rate": 1.0530316912455387e-05, "loss": 0.5047342777252197, "step": 4189 }, { "epoch": 1.018473505104521, "grad_norm": 1.2961018836561808, "learning_rate": 1.0526303087869585e-05, "loss": 0.5016952157020569, "step": 4190 }, { "epoch": 1.018716577540107, "grad_norm": 1.4390166797083286, "learning_rate": 1.0522289178254883e-05, "loss": 0.7844235301017761, "step": 4191 }, { "epoch": 1.0189596499756928, "grad_norm": 1.6178701647439466, "learning_rate": 1.0518275184259767e-05, "loss": 0.708849310874939, "step": 4192 }, { "epoch": 1.0192027224112785, "grad_norm": 1.2839254259366801, "learning_rate": 1.0514261106532728e-05, "loss": 0.46745216846466064, "step": 4193 }, { "epoch": 1.0194457948468643, "grad_norm": 1.2911976540352792, "learning_rate": 1.051024694572228e-05, "loss": 0.5043485164642334, "step": 4194 }, { "epoch": 1.0196888672824502, "grad_norm": 1.2855544415586457, "learning_rate": 1.0506232702476944e-05, "loss": 0.496452271938324, "step": 4195 }, { "epoch": 1.019931939718036, "grad_norm": 1.5517629517334786, "learning_rate": 1.0502218377445252e-05, "loss": 0.5415823459625244, "step": 4196 }, { "epoch": 1.0201750121536217, "grad_norm": 1.4691795108752044, "learning_rate": 1.0498203971275762e-05, "loss": 0.5927358865737915, "step": 4197 }, { "epoch": 1.0204180845892077, "grad_norm": 1.482874565447108, "learning_rate": 1.0494189484617033e-05, "loss": 0.7212754487991333, "step": 4198 }, { "epoch": 1.0206611570247934, "grad_norm": 1.553624614217781, "learning_rate": 1.049017491811764e-05, "loss": 0.7246096134185791, "step": 4199 }, { "epoch": 1.0209042294603792, "grad_norm": 1.5527846066755677, "learning_rate": 1.048616027242617e-05, "loss": 0.5296695232391357, "step": 4200 }, { "epoch": 1.021147301895965, "grad_norm": 1.5644281720822804, "learning_rate": 1.048214554819123e-05, "loss": 0.6812100410461426, "step": 4201 }, { "epoch": 1.0213903743315509, "grad_norm": 1.729244193010369, "learning_rate": 1.0478130746061425e-05, "loss": 0.5348995923995972, "step": 4202 }, { "epoch": 1.0216334467671366, "grad_norm": 1.6037113849107514, "learning_rate": 1.0474115866685392e-05, "loss": 0.6201683878898621, "step": 4203 }, { "epoch": 1.0218765192027224, "grad_norm": 1.2326881732946096, "learning_rate": 1.0470100910711763e-05, "loss": 0.49071741104125977, "step": 4204 }, { "epoch": 1.0221195916383081, "grad_norm": 1.4040009956605795, "learning_rate": 1.046608587878919e-05, "loss": 0.5378491878509521, "step": 4205 }, { "epoch": 1.022362664073894, "grad_norm": 1.4079043148433674, "learning_rate": 1.0462070771566339e-05, "loss": 0.5706679821014404, "step": 4206 }, { "epoch": 1.0226057365094798, "grad_norm": 1.4098208948959572, "learning_rate": 1.0458055589691887e-05, "loss": 0.5221810340881348, "step": 4207 }, { "epoch": 1.0228488089450656, "grad_norm": 1.8051657703055553, "learning_rate": 1.0454040333814519e-05, "loss": 0.6556100845336914, "step": 4208 }, { "epoch": 1.0230918813806515, "grad_norm": 1.5803136095031423, "learning_rate": 1.0450025004582938e-05, "loss": 0.560401201248169, "step": 4209 }, { "epoch": 1.0233349538162373, "grad_norm": 1.1637029005103539, "learning_rate": 1.0446009602645852e-05, "loss": 0.5587201118469238, "step": 4210 }, { "epoch": 1.023578026251823, "grad_norm": 1.5126968140554278, "learning_rate": 1.0441994128651988e-05, "loss": 0.7516224384307861, "step": 4211 }, { "epoch": 1.0238210986874088, "grad_norm": 1.4096557889535581, "learning_rate": 1.043797858325008e-05, "loss": 0.6307817101478577, "step": 4212 }, { "epoch": 1.0240641711229947, "grad_norm": 1.4658414976593541, "learning_rate": 1.0433962967088871e-05, "loss": 0.6921050548553467, "step": 4213 }, { "epoch": 1.0243072435585805, "grad_norm": 1.3423678320758599, "learning_rate": 1.0429947280817126e-05, "loss": 0.48049789667129517, "step": 4214 }, { "epoch": 1.0245503159941662, "grad_norm": 1.447751715746082, "learning_rate": 1.0425931525083611e-05, "loss": 0.47232526540756226, "step": 4215 }, { "epoch": 1.024793388429752, "grad_norm": 1.5806108066118534, "learning_rate": 1.0421915700537107e-05, "loss": 0.4491407871246338, "step": 4216 }, { "epoch": 1.025036460865338, "grad_norm": 1.3996795839164131, "learning_rate": 1.0417899807826402e-05, "loss": 0.5356093645095825, "step": 4217 }, { "epoch": 1.0252795333009237, "grad_norm": 1.4229268246338003, "learning_rate": 1.0413883847600307e-05, "loss": 0.5987250208854675, "step": 4218 }, { "epoch": 1.0255226057365094, "grad_norm": 1.3665651711700633, "learning_rate": 1.0409867820507631e-05, "loss": 0.5473041534423828, "step": 4219 }, { "epoch": 1.0257656781720952, "grad_norm": 1.4063198579461453, "learning_rate": 1.0405851727197198e-05, "loss": 0.7035103440284729, "step": 4220 }, { "epoch": 1.0260087506076812, "grad_norm": 1.3999913307971652, "learning_rate": 1.0401835568317842e-05, "loss": 0.5832536220550537, "step": 4221 }, { "epoch": 1.026251823043267, "grad_norm": 1.3845691073071411, "learning_rate": 1.0397819344518411e-05, "loss": 0.5358266830444336, "step": 4222 }, { "epoch": 1.0264948954788526, "grad_norm": 1.4055336919428618, "learning_rate": 1.0393803056447765e-05, "loss": 0.6444931030273438, "step": 4223 }, { "epoch": 1.0267379679144386, "grad_norm": 1.5839218889595288, "learning_rate": 1.0389786704754765e-05, "loss": 0.6510050296783447, "step": 4224 }, { "epoch": 1.0269810403500244, "grad_norm": 1.6138610073514648, "learning_rate": 1.0385770290088286e-05, "loss": 0.6019341945648193, "step": 4225 }, { "epoch": 1.02722411278561, "grad_norm": 1.5953317988326552, "learning_rate": 1.0381753813097224e-05, "loss": 0.6469962000846863, "step": 4226 }, { "epoch": 1.0274671852211958, "grad_norm": 1.3149641909532375, "learning_rate": 1.037773727443047e-05, "loss": 0.6458128690719604, "step": 4227 }, { "epoch": 1.0277102576567818, "grad_norm": 1.7105647347080373, "learning_rate": 1.0373720674736935e-05, "loss": 0.7569371461868286, "step": 4228 }, { "epoch": 1.0279533300923676, "grad_norm": 1.393755610850855, "learning_rate": 1.036970401466553e-05, "loss": 0.6129064559936523, "step": 4229 }, { "epoch": 1.0281964025279533, "grad_norm": 1.3756114419307675, "learning_rate": 1.0365687294865185e-05, "loss": 0.5283613204956055, "step": 4230 }, { "epoch": 1.028439474963539, "grad_norm": 1.5223517375141742, "learning_rate": 1.0361670515984839e-05, "loss": 0.5370105504989624, "step": 4231 }, { "epoch": 1.028682547399125, "grad_norm": 1.3914370992635654, "learning_rate": 1.0357653678673437e-05, "loss": 0.6459052562713623, "step": 4232 }, { "epoch": 1.0289256198347108, "grad_norm": 1.282551635187669, "learning_rate": 1.0353636783579927e-05, "loss": 0.5675293207168579, "step": 4233 }, { "epoch": 1.0291686922702965, "grad_norm": 1.324120169650166, "learning_rate": 1.0349619831353285e-05, "loss": 0.5650746822357178, "step": 4234 }, { "epoch": 1.0294117647058822, "grad_norm": 1.44649256890644, "learning_rate": 1.034560282264248e-05, "loss": 0.5767570734024048, "step": 4235 }, { "epoch": 1.0296548371414682, "grad_norm": 2.697115216745666, "learning_rate": 1.0341585758096495e-05, "loss": 0.6091094017028809, "step": 4236 }, { "epoch": 1.029897909577054, "grad_norm": 1.2743898633200192, "learning_rate": 1.0337568638364322e-05, "loss": 0.6931119561195374, "step": 4237 }, { "epoch": 1.0301409820126397, "grad_norm": 1.451401832472921, "learning_rate": 1.0333551464094962e-05, "loss": 0.6286271214485168, "step": 4238 }, { "epoch": 1.0303840544482257, "grad_norm": 1.4387198315541063, "learning_rate": 1.0329534235937425e-05, "loss": 0.49176570773124695, "step": 4239 }, { "epoch": 1.0306271268838114, "grad_norm": 1.3363915751360786, "learning_rate": 1.0325516954540732e-05, "loss": 0.46616682410240173, "step": 4240 }, { "epoch": 1.0308701993193972, "grad_norm": 1.6417948791434736, "learning_rate": 1.0321499620553905e-05, "loss": 0.5996805429458618, "step": 4241 }, { "epoch": 1.031113271754983, "grad_norm": 1.3984983486212417, "learning_rate": 1.0317482234625983e-05, "loss": 0.5061130523681641, "step": 4242 }, { "epoch": 1.0313563441905689, "grad_norm": 1.5242987365913503, "learning_rate": 1.0313464797406014e-05, "loss": 0.522545337677002, "step": 4243 }, { "epoch": 1.0315994166261546, "grad_norm": 1.2878689393690093, "learning_rate": 1.0309447309543044e-05, "loss": 0.5574857592582703, "step": 4244 }, { "epoch": 1.0318424890617404, "grad_norm": 1.3462540544116193, "learning_rate": 1.0305429771686135e-05, "loss": 0.6553404331207275, "step": 4245 }, { "epoch": 1.032085561497326, "grad_norm": 1.4815451624213496, "learning_rate": 1.0301412184484356e-05, "loss": 0.6032779216766357, "step": 4246 }, { "epoch": 1.032328633932912, "grad_norm": 1.4153102499916963, "learning_rate": 1.0297394548586785e-05, "loss": 0.5540597438812256, "step": 4247 }, { "epoch": 1.0325717063684978, "grad_norm": 1.1643513395477025, "learning_rate": 1.0293376864642506e-05, "loss": 0.6531463861465454, "step": 4248 }, { "epoch": 1.0328147788040836, "grad_norm": 1.5562065718203282, "learning_rate": 1.0289359133300612e-05, "loss": 0.6873975396156311, "step": 4249 }, { "epoch": 1.0330578512396693, "grad_norm": 1.3925465471102716, "learning_rate": 1.0285341355210197e-05, "loss": 0.5276810526847839, "step": 4250 }, { "epoch": 1.0333009236752553, "grad_norm": 1.7978506494029525, "learning_rate": 1.0281323531020375e-05, "loss": 0.6486960649490356, "step": 4251 }, { "epoch": 1.033543996110841, "grad_norm": 1.5271987076572493, "learning_rate": 1.0277305661380258e-05, "loss": 0.6399163007736206, "step": 4252 }, { "epoch": 1.0337870685464268, "grad_norm": 1.469021692857841, "learning_rate": 1.0273287746938974e-05, "loss": 0.656109094619751, "step": 4253 }, { "epoch": 1.0340301409820127, "grad_norm": 1.489063064940581, "learning_rate": 1.0269269788345643e-05, "loss": 0.7575676441192627, "step": 4254 }, { "epoch": 1.0342732134175985, "grad_norm": 1.5673037030336994, "learning_rate": 1.0265251786249408e-05, "loss": 0.514525294303894, "step": 4255 }, { "epoch": 1.0345162858531842, "grad_norm": 1.2459619138374165, "learning_rate": 1.026123374129941e-05, "loss": 0.50077223777771, "step": 4256 }, { "epoch": 1.03475935828877, "grad_norm": 1.6013002553686613, "learning_rate": 1.02572156541448e-05, "loss": 0.5807116627693176, "step": 4257 }, { "epoch": 1.035002430724356, "grad_norm": 1.5294006017658377, "learning_rate": 1.0253197525434734e-05, "loss": 0.6903767585754395, "step": 4258 }, { "epoch": 1.0352455031599417, "grad_norm": 1.8723502641323009, "learning_rate": 1.0249179355818381e-05, "loss": 0.46293607354164124, "step": 4259 }, { "epoch": 1.0354885755955274, "grad_norm": 1.2880095494890333, "learning_rate": 1.0245161145944906e-05, "loss": 0.5472249984741211, "step": 4260 }, { "epoch": 1.0357316480311132, "grad_norm": 1.3024412639319105, "learning_rate": 1.0241142896463492e-05, "loss": 0.5253669023513794, "step": 4261 }, { "epoch": 1.0359747204666991, "grad_norm": 1.3002837573816224, "learning_rate": 1.0237124608023314e-05, "loss": 0.5033007264137268, "step": 4262 }, { "epoch": 1.0362177929022849, "grad_norm": 1.4884089142612933, "learning_rate": 1.0233106281273575e-05, "loss": 0.6836128234863281, "step": 4263 }, { "epoch": 1.0364608653378706, "grad_norm": 1.8521356504429125, "learning_rate": 1.0229087916863458e-05, "loss": 0.6379534602165222, "step": 4264 }, { "epoch": 1.0367039377734566, "grad_norm": 1.3800970038128613, "learning_rate": 1.0225069515442173e-05, "loss": 0.5293596386909485, "step": 4265 }, { "epoch": 1.0369470102090423, "grad_norm": 1.4270008629959479, "learning_rate": 1.0221051077658922e-05, "loss": 0.5267210006713867, "step": 4266 }, { "epoch": 1.037190082644628, "grad_norm": 1.413296271811314, "learning_rate": 1.0217032604162922e-05, "loss": 0.5677909255027771, "step": 4267 }, { "epoch": 1.0374331550802138, "grad_norm": 1.3514027186874578, "learning_rate": 1.0213014095603398e-05, "loss": 0.7087143063545227, "step": 4268 }, { "epoch": 1.0376762275157998, "grad_norm": 1.410381423443488, "learning_rate": 1.0208995552629568e-05, "loss": 0.5718128085136414, "step": 4269 }, { "epoch": 1.0379192999513855, "grad_norm": 1.5263301685873816, "learning_rate": 1.0204976975890664e-05, "loss": 0.8000034093856812, "step": 4270 }, { "epoch": 1.0381623723869713, "grad_norm": 1.4120941954740136, "learning_rate": 1.0200958366035926e-05, "loss": 0.6176852583885193, "step": 4271 }, { "epoch": 1.038405444822557, "grad_norm": 1.56969898692807, "learning_rate": 1.0196939723714598e-05, "loss": 0.6509557962417603, "step": 4272 }, { "epoch": 1.038648517258143, "grad_norm": 1.401721916416015, "learning_rate": 1.019292104957592e-05, "loss": 0.5263798832893372, "step": 4273 }, { "epoch": 1.0388915896937287, "grad_norm": 1.52109651908678, "learning_rate": 1.0188902344269146e-05, "loss": 0.7127325534820557, "step": 4274 }, { "epoch": 1.0391346621293145, "grad_norm": 1.5534505835163008, "learning_rate": 1.0184883608443534e-05, "loss": 0.5579734444618225, "step": 4275 }, { "epoch": 1.0393777345649002, "grad_norm": 1.26008996775576, "learning_rate": 1.018086484274835e-05, "loss": 0.5859545469284058, "step": 4276 }, { "epoch": 1.0396208070004862, "grad_norm": 1.154666094899605, "learning_rate": 1.0176846047832855e-05, "loss": 0.6119795441627502, "step": 4277 }, { "epoch": 1.039863879436072, "grad_norm": 1.4605849659844103, "learning_rate": 1.0172827224346323e-05, "loss": 0.7118932008743286, "step": 4278 }, { "epoch": 1.0401069518716577, "grad_norm": 1.4424723773631722, "learning_rate": 1.0168808372938032e-05, "loss": 0.5972070693969727, "step": 4279 }, { "epoch": 1.0403500243072437, "grad_norm": 1.423009469968282, "learning_rate": 1.0164789494257263e-05, "loss": 0.5551578998565674, "step": 4280 }, { "epoch": 1.0405930967428294, "grad_norm": 1.4765770676739904, "learning_rate": 1.01607705889533e-05, "loss": 0.47294336557388306, "step": 4281 }, { "epoch": 1.0408361691784151, "grad_norm": 1.3776892614499499, "learning_rate": 1.0156751657675432e-05, "loss": 0.531489908695221, "step": 4282 }, { "epoch": 1.041079241614001, "grad_norm": 1.393137558450675, "learning_rate": 1.0152732701072951e-05, "loss": 0.6352052688598633, "step": 4283 }, { "epoch": 1.0413223140495869, "grad_norm": 1.633452840181021, "learning_rate": 1.014871371979516e-05, "loss": 0.46850067377090454, "step": 4284 }, { "epoch": 1.0415653864851726, "grad_norm": 1.270599519732637, "learning_rate": 1.0144694714491356e-05, "loss": 0.49937522411346436, "step": 4285 }, { "epoch": 1.0418084589207584, "grad_norm": 1.3372936449459571, "learning_rate": 1.0140675685810845e-05, "loss": 0.6294598579406738, "step": 4286 }, { "epoch": 1.042051531356344, "grad_norm": 1.4920515447428766, "learning_rate": 1.013665663440294e-05, "loss": 0.7863240838050842, "step": 4287 }, { "epoch": 1.04229460379193, "grad_norm": 1.3671578489986405, "learning_rate": 1.0132637560916952e-05, "loss": 0.5748436450958252, "step": 4288 }, { "epoch": 1.0425376762275158, "grad_norm": 1.61350774820259, "learning_rate": 1.0128618466002199e-05, "loss": 0.7075313329696655, "step": 4289 }, { "epoch": 1.0427807486631016, "grad_norm": 1.5689977554285912, "learning_rate": 1.0124599350307997e-05, "loss": 0.6384955048561096, "step": 4290 }, { "epoch": 1.0430238210986875, "grad_norm": 1.4142765053186774, "learning_rate": 1.0120580214483675e-05, "loss": 0.6213991045951843, "step": 4291 }, { "epoch": 1.0432668935342733, "grad_norm": 1.4670278888385644, "learning_rate": 1.0116561059178557e-05, "loss": 0.6542593240737915, "step": 4292 }, { "epoch": 1.043509965969859, "grad_norm": 1.5971227143212998, "learning_rate": 1.0112541885041973e-05, "loss": 0.5805336833000183, "step": 4293 }, { "epoch": 1.0437530384054448, "grad_norm": 1.5587112396943186, "learning_rate": 1.0108522692723256e-05, "loss": 0.7053854465484619, "step": 4294 }, { "epoch": 1.0439961108410307, "grad_norm": 1.8499064559564387, "learning_rate": 1.0104503482871741e-05, "loss": 0.6922029256820679, "step": 4295 }, { "epoch": 1.0442391832766165, "grad_norm": 1.5686049931982304, "learning_rate": 1.0100484256136772e-05, "loss": 0.4964848458766937, "step": 4296 }, { "epoch": 1.0444822557122022, "grad_norm": 1.2903750886343073, "learning_rate": 1.0096465013167683e-05, "loss": 0.46670466661453247, "step": 4297 }, { "epoch": 1.044725328147788, "grad_norm": 1.5015736414602845, "learning_rate": 1.0092445754613823e-05, "loss": 0.6874164342880249, "step": 4298 }, { "epoch": 1.044968400583374, "grad_norm": 1.2368626524749127, "learning_rate": 1.0088426481124536e-05, "loss": 0.571291983127594, "step": 4299 }, { "epoch": 1.0452114730189597, "grad_norm": 1.3115167760052993, "learning_rate": 1.0084407193349172e-05, "loss": 0.6131864786148071, "step": 4300 }, { "epoch": 1.0454545454545454, "grad_norm": 1.382271542638267, "learning_rate": 1.0080387891937085e-05, "loss": 0.5338269472122192, "step": 4301 }, { "epoch": 1.0456976178901312, "grad_norm": 1.759170108364299, "learning_rate": 1.0076368577537626e-05, "loss": 0.6821557283401489, "step": 4302 }, { "epoch": 1.0459406903257171, "grad_norm": 1.5330896632342588, "learning_rate": 1.0072349250800146e-05, "loss": 0.5315020084381104, "step": 4303 }, { "epoch": 1.0461837627613029, "grad_norm": 1.210506185904285, "learning_rate": 1.0068329912374012e-05, "loss": 0.6181321144104004, "step": 4304 }, { "epoch": 1.0464268351968886, "grad_norm": 1.3910547720243776, "learning_rate": 1.0064310562908576e-05, "loss": 0.6929197311401367, "step": 4305 }, { "epoch": 1.0466699076324746, "grad_norm": 1.361943546732413, "learning_rate": 1.0060291203053206e-05, "loss": 0.5610368251800537, "step": 4306 }, { "epoch": 1.0469129800680603, "grad_norm": 1.4485330856975873, "learning_rate": 1.0056271833457257e-05, "loss": 0.6118466854095459, "step": 4307 }, { "epoch": 1.047156052503646, "grad_norm": 1.774162124561737, "learning_rate": 1.00522524547701e-05, "loss": 0.7722564339637756, "step": 4308 }, { "epoch": 1.0473991249392318, "grad_norm": 1.5250936339139165, "learning_rate": 1.0048233067641098e-05, "loss": 0.6089572906494141, "step": 4309 }, { "epoch": 1.0476421973748178, "grad_norm": 1.2639543269505418, "learning_rate": 1.004421367271962e-05, "loss": 0.5504823923110962, "step": 4310 }, { "epoch": 1.0478852698104035, "grad_norm": 1.8404550068899888, "learning_rate": 1.004019427065503e-05, "loss": 0.689555287361145, "step": 4311 }, { "epoch": 1.0481283422459893, "grad_norm": 1.645712870239147, "learning_rate": 1.0036174862096706e-05, "loss": 0.6746020317077637, "step": 4312 }, { "epoch": 1.048371414681575, "grad_norm": 1.386070766097781, "learning_rate": 1.0032155447694011e-05, "loss": 0.5839145183563232, "step": 4313 }, { "epoch": 1.048614487117161, "grad_norm": 1.4473294742671836, "learning_rate": 1.0028136028096325e-05, "loss": 0.6312017440795898, "step": 4314 }, { "epoch": 1.0488575595527467, "grad_norm": 1.311985164739139, "learning_rate": 1.002411660395301e-05, "loss": 0.6773381233215332, "step": 4315 }, { "epoch": 1.0491006319883325, "grad_norm": 1.4040942763017332, "learning_rate": 1.0020097175913448e-05, "loss": 0.46327289938926697, "step": 4316 }, { "epoch": 1.0493437044239182, "grad_norm": 1.3155617345551531, "learning_rate": 1.0016077744627012e-05, "loss": 0.5674548149108887, "step": 4317 }, { "epoch": 1.0495867768595042, "grad_norm": 1.339385455266433, "learning_rate": 1.0012058310743077e-05, "loss": 0.5395925045013428, "step": 4318 }, { "epoch": 1.04982984929509, "grad_norm": 1.749677487407595, "learning_rate": 1.0008038874911008e-05, "loss": 0.5870517492294312, "step": 4319 }, { "epoch": 1.0500729217306757, "grad_norm": 1.4442239203012521, "learning_rate": 1.0004019437780193e-05, "loss": 0.6661641597747803, "step": 4320 }, { "epoch": 1.0503159941662616, "grad_norm": 1.417267747624533, "learning_rate": 1e-05, "loss": 0.6371252536773682, "step": 4321 }, { "epoch": 1.0505590666018474, "grad_norm": 1.4187401129285382, "learning_rate": 9.99598056221981e-06, "loss": 0.592154324054718, "step": 4322 }, { "epoch": 1.0508021390374331, "grad_norm": 1.4669270132730179, "learning_rate": 9.991961125088994e-06, "loss": 0.6651446223258972, "step": 4323 }, { "epoch": 1.0510452114730189, "grad_norm": 1.7584298893026602, "learning_rate": 9.987941689256928e-06, "loss": 0.56876140832901, "step": 4324 }, { "epoch": 1.0512882839086048, "grad_norm": 1.4710214394982029, "learning_rate": 9.983922255372991e-06, "loss": 0.42117977142333984, "step": 4325 }, { "epoch": 1.0515313563441906, "grad_norm": 1.3841859375749361, "learning_rate": 9.979902824086552e-06, "loss": 0.7006247639656067, "step": 4326 }, { "epoch": 1.0517744287797763, "grad_norm": 1.716743286704126, "learning_rate": 9.97588339604699e-06, "loss": 0.5627484321594238, "step": 4327 }, { "epoch": 1.052017501215362, "grad_norm": 1.5632256131889815, "learning_rate": 9.971863971903678e-06, "loss": 0.6936711072921753, "step": 4328 }, { "epoch": 1.052260573650948, "grad_norm": 1.3216914763887952, "learning_rate": 9.96784455230599e-06, "loss": 0.5313664674758911, "step": 4329 }, { "epoch": 1.0525036460865338, "grad_norm": 1.428495071929408, "learning_rate": 9.963825137903297e-06, "loss": 0.6941556930541992, "step": 4330 }, { "epoch": 1.0527467185221195, "grad_norm": 1.4297151148648546, "learning_rate": 9.959805729344973e-06, "loss": 0.6996138691902161, "step": 4331 }, { "epoch": 1.0529897909577053, "grad_norm": 1.522564662230702, "learning_rate": 9.955786327280385e-06, "loss": 0.5533925294876099, "step": 4332 }, { "epoch": 1.0532328633932913, "grad_norm": 1.7495942509409121, "learning_rate": 9.951766932358907e-06, "loss": 0.612756609916687, "step": 4333 }, { "epoch": 1.053475935828877, "grad_norm": 1.6021643766667524, "learning_rate": 9.947747545229904e-06, "loss": 0.5131350755691528, "step": 4334 }, { "epoch": 1.0537190082644627, "grad_norm": 1.5993290645221205, "learning_rate": 9.943728166542745e-06, "loss": 0.6292746663093567, "step": 4335 }, { "epoch": 1.0539620807000487, "grad_norm": 1.3214399057100634, "learning_rate": 9.939708796946799e-06, "loss": 0.5237895250320435, "step": 4336 }, { "epoch": 1.0542051531356345, "grad_norm": 1.6280447998277825, "learning_rate": 9.935689437091425e-06, "loss": 0.6214008331298828, "step": 4337 }, { "epoch": 1.0544482255712202, "grad_norm": 1.2556378049635022, "learning_rate": 9.931670087625992e-06, "loss": 0.4255106747150421, "step": 4338 }, { "epoch": 1.054691298006806, "grad_norm": 1.7288890872180647, "learning_rate": 9.927650749199857e-06, "loss": 0.6478762626647949, "step": 4339 }, { "epoch": 1.054934370442392, "grad_norm": 1.4403781687851867, "learning_rate": 9.92363142246238e-06, "loss": 0.5409442186355591, "step": 4340 }, { "epoch": 1.0551774428779777, "grad_norm": 1.479424053353634, "learning_rate": 9.919612108062919e-06, "loss": 0.48689714074134827, "step": 4341 }, { "epoch": 1.0554205153135634, "grad_norm": 1.2877925867688664, "learning_rate": 9.91559280665083e-06, "loss": 0.6132897138595581, "step": 4342 }, { "epoch": 1.0556635877491491, "grad_norm": 1.654873485930006, "learning_rate": 9.911573518875466e-06, "loss": 0.6075420379638672, "step": 4343 }, { "epoch": 1.0559066601847351, "grad_norm": 1.34823661048539, "learning_rate": 9.907554245386179e-06, "loss": 0.5556469559669495, "step": 4344 }, { "epoch": 1.0561497326203209, "grad_norm": 1.7364497236874925, "learning_rate": 9.903534986832319e-06, "loss": 0.5499878525733948, "step": 4345 }, { "epoch": 1.0563928050559066, "grad_norm": 1.3133342618601263, "learning_rate": 9.899515743863231e-06, "loss": 0.6320040822029114, "step": 4346 }, { "epoch": 1.0566358774914926, "grad_norm": 1.3705478019193857, "learning_rate": 9.89549651712826e-06, "loss": 0.46836578845977783, "step": 4347 }, { "epoch": 1.0568789499270783, "grad_norm": 1.4369512940820377, "learning_rate": 9.891477307276749e-06, "loss": 0.5651748180389404, "step": 4348 }, { "epoch": 1.057122022362664, "grad_norm": 1.4738699570144826, "learning_rate": 9.887458114958032e-06, "loss": 0.6375240683555603, "step": 4349 }, { "epoch": 1.0573650947982498, "grad_norm": 1.347736329296389, "learning_rate": 9.883438940821444e-06, "loss": 0.5295228958129883, "step": 4350 }, { "epoch": 1.0576081672338358, "grad_norm": 1.493779569492715, "learning_rate": 9.879419785516325e-06, "loss": 0.6647850871086121, "step": 4351 }, { "epoch": 1.0578512396694215, "grad_norm": 1.496379896487364, "learning_rate": 9.875400649692004e-06, "loss": 0.6845108270645142, "step": 4352 }, { "epoch": 1.0580943121050073, "grad_norm": 1.3918286468815806, "learning_rate": 9.871381533997805e-06, "loss": 0.7417435646057129, "step": 4353 }, { "epoch": 1.058337384540593, "grad_norm": 1.3968462163841742, "learning_rate": 9.867362439083051e-06, "loss": 0.5650396943092346, "step": 4354 }, { "epoch": 1.058580456976179, "grad_norm": 1.500897681184572, "learning_rate": 9.863343365597064e-06, "loss": 0.5037485361099243, "step": 4355 }, { "epoch": 1.0588235294117647, "grad_norm": 1.435321421614887, "learning_rate": 9.859324314189158e-06, "loss": 0.5504486560821533, "step": 4356 }, { "epoch": 1.0590666018473505, "grad_norm": 1.3365316675812333, "learning_rate": 9.855305285508649e-06, "loss": 0.49435847997665405, "step": 4357 }, { "epoch": 1.0593096742829362, "grad_norm": 1.646260484764111, "learning_rate": 9.851286280204842e-06, "loss": 0.5609452128410339, "step": 4358 }, { "epoch": 1.0595527467185222, "grad_norm": 1.3536163132938666, "learning_rate": 9.847267298927049e-06, "loss": 0.6455153226852417, "step": 4359 }, { "epoch": 1.059795819154108, "grad_norm": 1.4342750191816342, "learning_rate": 9.84324834232457e-06, "loss": 0.47025033831596375, "step": 4360 }, { "epoch": 1.0600388915896937, "grad_norm": 1.582089942379999, "learning_rate": 9.839229411046704e-06, "loss": 0.5982058048248291, "step": 4361 }, { "epoch": 1.0602819640252796, "grad_norm": 1.3647634220568965, "learning_rate": 9.83521050574274e-06, "loss": 0.5727245807647705, "step": 4362 }, { "epoch": 1.0605250364608654, "grad_norm": 1.464387476025694, "learning_rate": 9.83119162706197e-06, "loss": 0.6031229496002197, "step": 4363 }, { "epoch": 1.0607681088964511, "grad_norm": 1.6558402585615597, "learning_rate": 9.827172775653679e-06, "loss": 0.6574050188064575, "step": 4364 }, { "epoch": 1.0610111813320369, "grad_norm": 1.3734124530834462, "learning_rate": 9.823153952167148e-06, "loss": 0.6378228664398193, "step": 4365 }, { "epoch": 1.0612542537676228, "grad_norm": 1.644802293269602, "learning_rate": 9.819135157251656e-06, "loss": 0.6739882230758667, "step": 4366 }, { "epoch": 1.0614973262032086, "grad_norm": 1.479985996320087, "learning_rate": 9.815116391556466e-06, "loss": 0.6380025744438171, "step": 4367 }, { "epoch": 1.0617403986387943, "grad_norm": 1.4318616265174464, "learning_rate": 9.811097655730856e-06, "loss": 0.4200194478034973, "step": 4368 }, { "epoch": 1.06198347107438, "grad_norm": 1.4727936204517431, "learning_rate": 9.807078950424084e-06, "loss": 0.5499867796897888, "step": 4369 }, { "epoch": 1.062226543509966, "grad_norm": 1.388871695746551, "learning_rate": 9.803060276285405e-06, "loss": 0.5729115009307861, "step": 4370 }, { "epoch": 1.0624696159455518, "grad_norm": 1.6179754260086554, "learning_rate": 9.799041633964075e-06, "loss": 0.4686933755874634, "step": 4371 }, { "epoch": 1.0627126883811375, "grad_norm": 1.3677512026418606, "learning_rate": 9.795023024109339e-06, "loss": 0.6734212636947632, "step": 4372 }, { "epoch": 1.0629557608167235, "grad_norm": 1.4834690385316711, "learning_rate": 9.791004447370439e-06, "loss": 0.5225131511688232, "step": 4373 }, { "epoch": 1.0631988332523092, "grad_norm": 1.6686566832139313, "learning_rate": 9.786985904396609e-06, "loss": 0.6681995391845703, "step": 4374 }, { "epoch": 1.063441905687895, "grad_norm": 1.4266421008277306, "learning_rate": 9.782967395837078e-06, "loss": 0.7611777782440186, "step": 4375 }, { "epoch": 1.0636849781234807, "grad_norm": 1.3572169881511305, "learning_rate": 9.77894892234108e-06, "loss": 0.7137064933776855, "step": 4376 }, { "epoch": 1.0639280505590667, "grad_norm": 1.519480172374902, "learning_rate": 9.77493048455783e-06, "loss": 0.43200016021728516, "step": 4377 }, { "epoch": 1.0641711229946524, "grad_norm": 1.4708812086977918, "learning_rate": 9.770912083136544e-06, "loss": 0.4991609752178192, "step": 4378 }, { "epoch": 1.0644141954302382, "grad_norm": 1.354439711324061, "learning_rate": 9.76689371872643e-06, "loss": 0.6499302983283997, "step": 4379 }, { "epoch": 1.064657267865824, "grad_norm": 1.4566210916881275, "learning_rate": 9.762875391976688e-06, "loss": 0.6372556686401367, "step": 4380 }, { "epoch": 1.06490034030141, "grad_norm": 1.5848510513258887, "learning_rate": 9.758857103536513e-06, "loss": 0.5133183002471924, "step": 4381 }, { "epoch": 1.0651434127369956, "grad_norm": 1.3105513661354637, "learning_rate": 9.754838854055097e-06, "loss": 0.400288462638855, "step": 4382 }, { "epoch": 1.0653864851725814, "grad_norm": 1.2912689579234304, "learning_rate": 9.75082064418162e-06, "loss": 0.5529296398162842, "step": 4383 }, { "epoch": 1.0656295576081671, "grad_norm": 1.62473854744099, "learning_rate": 9.746802474565267e-06, "loss": 0.5507123470306396, "step": 4384 }, { "epoch": 1.065872630043753, "grad_norm": 1.4088105404577402, "learning_rate": 9.742784345855203e-06, "loss": 0.6788464784622192, "step": 4385 }, { "epoch": 1.0661157024793388, "grad_norm": 1.4738572818771802, "learning_rate": 9.738766258700594e-06, "loss": 0.5540523529052734, "step": 4386 }, { "epoch": 1.0663587749149246, "grad_norm": 1.6552382743876837, "learning_rate": 9.734748213750595e-06, "loss": 0.6089738607406616, "step": 4387 }, { "epoch": 1.0666018473505106, "grad_norm": 1.4647018703320482, "learning_rate": 9.73073021165436e-06, "loss": 0.5719711184501648, "step": 4388 }, { "epoch": 1.0668449197860963, "grad_norm": 1.5115413135941624, "learning_rate": 9.726712253061031e-06, "loss": 0.5473837852478027, "step": 4389 }, { "epoch": 1.067087992221682, "grad_norm": 1.5341012107599157, "learning_rate": 9.722694338619745e-06, "loss": 0.6313230991363525, "step": 4390 }, { "epoch": 1.0673310646572678, "grad_norm": 1.4313329240305503, "learning_rate": 9.718676468979625e-06, "loss": 0.595430314540863, "step": 4391 }, { "epoch": 1.0675741370928538, "grad_norm": 1.5230302925080872, "learning_rate": 9.714658644789805e-06, "loss": 0.5578218102455139, "step": 4392 }, { "epoch": 1.0678172095284395, "grad_norm": 1.560731615496596, "learning_rate": 9.710640866699393e-06, "loss": 0.5656239986419678, "step": 4393 }, { "epoch": 1.0680602819640252, "grad_norm": 1.4254017374466472, "learning_rate": 9.706623135357495e-06, "loss": 0.6005108952522278, "step": 4394 }, { "epoch": 1.068303354399611, "grad_norm": 1.829338767970363, "learning_rate": 9.702605451413217e-06, "loss": 0.7066553831100464, "step": 4395 }, { "epoch": 1.068546426835197, "grad_norm": 1.3622406495742763, "learning_rate": 9.698587815515646e-06, "loss": 0.5017932057380676, "step": 4396 }, { "epoch": 1.0687894992707827, "grad_norm": 1.4513941014677507, "learning_rate": 9.694570228313868e-06, "loss": 0.5310795903205872, "step": 4397 }, { "epoch": 1.0690325717063685, "grad_norm": 1.358390735848485, "learning_rate": 9.69055269045696e-06, "loss": 0.6809477806091309, "step": 4398 }, { "epoch": 1.0692756441419542, "grad_norm": 1.2327561482362615, "learning_rate": 9.686535202593988e-06, "loss": 0.5803501605987549, "step": 4399 }, { "epoch": 1.0695187165775402, "grad_norm": 1.3995489614711096, "learning_rate": 9.682517765374015e-06, "loss": 0.6498831510543823, "step": 4400 }, { "epoch": 1.069761789013126, "grad_norm": 1.7645650732633045, "learning_rate": 9.678500379446095e-06, "loss": 0.7051591873168945, "step": 4401 }, { "epoch": 1.0700048614487117, "grad_norm": 1.3060526695764703, "learning_rate": 9.674483045459271e-06, "loss": 0.5678548216819763, "step": 4402 }, { "epoch": 1.0702479338842976, "grad_norm": 1.4251310002794817, "learning_rate": 9.670465764062576e-06, "loss": 0.8264667391777039, "step": 4403 }, { "epoch": 1.0704910063198834, "grad_norm": 1.4183257883344829, "learning_rate": 9.66644853590504e-06, "loss": 0.6251392364501953, "step": 4404 }, { "epoch": 1.070734078755469, "grad_norm": 1.4376195372556992, "learning_rate": 9.662431361635681e-06, "loss": 0.5488555431365967, "step": 4405 }, { "epoch": 1.0709771511910549, "grad_norm": 1.5224332523281419, "learning_rate": 9.658414241903508e-06, "loss": 0.7111132144927979, "step": 4406 }, { "epoch": 1.0712202236266408, "grad_norm": 1.4336513632161048, "learning_rate": 9.654397177357522e-06, "loss": 0.5820901393890381, "step": 4407 }, { "epoch": 1.0714632960622266, "grad_norm": 1.4640149002557463, "learning_rate": 9.650380168646716e-06, "loss": 0.6021502614021301, "step": 4408 }, { "epoch": 1.0717063684978123, "grad_norm": 1.4981682251081312, "learning_rate": 9.646363216420072e-06, "loss": 0.5995486974716187, "step": 4409 }, { "epoch": 1.071949440933398, "grad_norm": 1.4718135970677935, "learning_rate": 9.642346321326568e-06, "loss": 0.6426037549972534, "step": 4410 }, { "epoch": 1.072192513368984, "grad_norm": 1.4917535487888174, "learning_rate": 9.638329484015165e-06, "loss": 0.5789382457733154, "step": 4411 }, { "epoch": 1.0724355858045698, "grad_norm": 1.6697561772504235, "learning_rate": 9.634312705134818e-06, "loss": 0.574744701385498, "step": 4412 }, { "epoch": 1.0726786582401555, "grad_norm": 1.2402305087146726, "learning_rate": 9.630295985334474e-06, "loss": 0.4642086625099182, "step": 4413 }, { "epoch": 1.0729217306757413, "grad_norm": 1.3340702206199324, "learning_rate": 9.62627932526307e-06, "loss": 0.5582301616668701, "step": 4414 }, { "epoch": 1.0731648031113272, "grad_norm": 1.3829575403908727, "learning_rate": 9.622262725569533e-06, "loss": 0.47447896003723145, "step": 4415 }, { "epoch": 1.073407875546913, "grad_norm": 1.6500979677176177, "learning_rate": 9.618246186902776e-06, "loss": 0.5246997475624084, "step": 4416 }, { "epoch": 1.0736509479824987, "grad_norm": 1.5345839088758089, "learning_rate": 9.614229709911713e-06, "loss": 0.655734658241272, "step": 4417 }, { "epoch": 1.0738940204180847, "grad_norm": 1.3044143813443903, "learning_rate": 9.610213295245238e-06, "loss": 0.43485334515571594, "step": 4418 }, { "epoch": 1.0741370928536704, "grad_norm": 1.6650254535458795, "learning_rate": 9.606196943552238e-06, "loss": 0.5984123945236206, "step": 4419 }, { "epoch": 1.0743801652892562, "grad_norm": 1.4418277476253334, "learning_rate": 9.60218065548159e-06, "loss": 0.648768961429596, "step": 4420 }, { "epoch": 1.074623237724842, "grad_norm": 1.5286628285183557, "learning_rate": 9.598164431682161e-06, "loss": 0.6420633792877197, "step": 4421 }, { "epoch": 1.0748663101604279, "grad_norm": 1.599645711630344, "learning_rate": 9.594148272802807e-06, "loss": 0.5837013721466064, "step": 4422 }, { "epoch": 1.0751093825960136, "grad_norm": 1.6972899077239347, "learning_rate": 9.590132179492374e-06, "loss": 0.4830769896507263, "step": 4423 }, { "epoch": 1.0753524550315994, "grad_norm": 1.2853048705984014, "learning_rate": 9.586116152399694e-06, "loss": 0.5492796301841736, "step": 4424 }, { "epoch": 1.0755955274671851, "grad_norm": 1.5137170034314709, "learning_rate": 9.582100192173598e-06, "loss": 0.6672409772872925, "step": 4425 }, { "epoch": 1.075838599902771, "grad_norm": 1.4086389272343394, "learning_rate": 9.578084299462896e-06, "loss": 0.5592319369316101, "step": 4426 }, { "epoch": 1.0760816723383568, "grad_norm": 1.4128136841161663, "learning_rate": 9.57406847491639e-06, "loss": 0.5625807046890259, "step": 4427 }, { "epoch": 1.0763247447739426, "grad_norm": 1.4045282591653527, "learning_rate": 9.570052719182875e-06, "loss": 0.5439488887786865, "step": 4428 }, { "epoch": 1.0765678172095283, "grad_norm": 1.421437350380536, "learning_rate": 9.56603703291113e-06, "loss": 0.5639393329620361, "step": 4429 }, { "epoch": 1.0768108896451143, "grad_norm": 1.6128652863054236, "learning_rate": 9.562021416749926e-06, "loss": 0.6037842035293579, "step": 4430 }, { "epoch": 1.0770539620807, "grad_norm": 1.4864724419633142, "learning_rate": 9.558005871348017e-06, "loss": 0.6333089470863342, "step": 4431 }, { "epoch": 1.0772970345162858, "grad_norm": 1.337150030359404, "learning_rate": 9.55399039735415e-06, "loss": 0.5394765138626099, "step": 4432 }, { "epoch": 1.0775401069518717, "grad_norm": 1.3706538465725608, "learning_rate": 9.549974995417066e-06, "loss": 0.4958137273788452, "step": 4433 }, { "epoch": 1.0777831793874575, "grad_norm": 1.6877943180520685, "learning_rate": 9.545959666185483e-06, "loss": 0.6517987251281738, "step": 4434 }, { "epoch": 1.0780262518230432, "grad_norm": 1.465581471813415, "learning_rate": 9.541944410308116e-06, "loss": 0.483386367559433, "step": 4435 }, { "epoch": 1.078269324258629, "grad_norm": 1.5019937014841775, "learning_rate": 9.537929228433663e-06, "loss": 0.5452715158462524, "step": 4436 }, { "epoch": 1.078512396694215, "grad_norm": 1.4301464636269179, "learning_rate": 9.533914121210813e-06, "loss": 0.6069810390472412, "step": 4437 }, { "epoch": 1.0787554691298007, "grad_norm": 1.4304554717642284, "learning_rate": 9.529899089288242e-06, "loss": 0.5174189209938049, "step": 4438 }, { "epoch": 1.0789985415653864, "grad_norm": 1.6214868014307067, "learning_rate": 9.525884133314613e-06, "loss": 0.6289925575256348, "step": 4439 }, { "epoch": 1.0792416140009724, "grad_norm": 1.4371644586473789, "learning_rate": 9.521869253938578e-06, "loss": 0.5559602379798889, "step": 4440 }, { "epoch": 1.0794846864365581, "grad_norm": 3.2947835793631226, "learning_rate": 9.517854451808775e-06, "loss": 0.7200114130973816, "step": 4441 }, { "epoch": 1.079727758872144, "grad_norm": 1.657791481849857, "learning_rate": 9.513839727573833e-06, "loss": 0.612977147102356, "step": 4442 }, { "epoch": 1.0799708313077296, "grad_norm": 1.3896604372813826, "learning_rate": 9.509825081882363e-06, "loss": 0.5654135346412659, "step": 4443 }, { "epoch": 1.0802139037433156, "grad_norm": 1.590512085065839, "learning_rate": 9.50581051538297e-06, "loss": 0.57207190990448, "step": 4444 }, { "epoch": 1.0804569761789014, "grad_norm": 1.605877474766484, "learning_rate": 9.501796028724241e-06, "loss": 0.6177342534065247, "step": 4445 }, { "epoch": 1.080700048614487, "grad_norm": 1.3071394355434602, "learning_rate": 9.497781622554751e-06, "loss": 0.5740532875061035, "step": 4446 }, { "epoch": 1.0809431210500728, "grad_norm": 1.1945636296414686, "learning_rate": 9.493767297523062e-06, "loss": 0.4988701045513153, "step": 4447 }, { "epoch": 1.0811861934856588, "grad_norm": 1.3200964572100873, "learning_rate": 9.489753054277726e-06, "loss": 0.49479353427886963, "step": 4448 }, { "epoch": 1.0814292659212446, "grad_norm": 1.419834836371257, "learning_rate": 9.485738893467272e-06, "loss": 0.6027201414108276, "step": 4449 }, { "epoch": 1.0816723383568303, "grad_norm": 1.5078816950212304, "learning_rate": 9.481724815740237e-06, "loss": 0.597230076789856, "step": 4450 }, { "epoch": 1.081915410792416, "grad_norm": 1.4666138341663502, "learning_rate": 9.477710821745119e-06, "loss": 0.6900971531867981, "step": 4451 }, { "epoch": 1.082158483228002, "grad_norm": 1.4053303461327185, "learning_rate": 9.473696912130417e-06, "loss": 0.5595828294754028, "step": 4452 }, { "epoch": 1.0824015556635878, "grad_norm": 1.7112000441953805, "learning_rate": 9.469683087544616e-06, "loss": 0.5513526797294617, "step": 4453 }, { "epoch": 1.0826446280991735, "grad_norm": 1.4335889890072744, "learning_rate": 9.465669348636183e-06, "loss": 0.6884557008743286, "step": 4454 }, { "epoch": 1.0828877005347595, "grad_norm": 1.4288908636605029, "learning_rate": 9.461655696053573e-06, "loss": 0.5329287052154541, "step": 4455 }, { "epoch": 1.0831307729703452, "grad_norm": 1.4974489189458768, "learning_rate": 9.457642130445224e-06, "loss": 0.7207549810409546, "step": 4456 }, { "epoch": 1.083373845405931, "grad_norm": 1.4303062279581484, "learning_rate": 9.453628652459563e-06, "loss": 0.5233443975448608, "step": 4457 }, { "epoch": 1.0836169178415167, "grad_norm": 1.5652168815411087, "learning_rate": 9.44961526274501e-06, "loss": 0.5556023120880127, "step": 4458 }, { "epoch": 1.0838599902771027, "grad_norm": 1.372992289193468, "learning_rate": 9.445601961949957e-06, "loss": 0.5483829975128174, "step": 4459 }, { "epoch": 1.0841030627126884, "grad_norm": 1.489107720885321, "learning_rate": 9.441588750722794e-06, "loss": 0.5261953473091125, "step": 4460 }, { "epoch": 1.0843461351482742, "grad_norm": 1.7491464751019012, "learning_rate": 9.437575629711883e-06, "loss": 0.6078960299491882, "step": 4461 }, { "epoch": 1.08458920758386, "grad_norm": 1.7153611600635488, "learning_rate": 9.433562599565582e-06, "loss": 0.5649653673171997, "step": 4462 }, { "epoch": 1.0848322800194459, "grad_norm": 1.2033719033387862, "learning_rate": 9.429549660932232e-06, "loss": 0.6297459602355957, "step": 4463 }, { "epoch": 1.0850753524550316, "grad_norm": 1.3560638849807498, "learning_rate": 9.42553681446016e-06, "loss": 0.5644232034683228, "step": 4464 }, { "epoch": 1.0853184248906174, "grad_norm": 1.4397335533787459, "learning_rate": 9.421524060797667e-06, "loss": 0.5571460723876953, "step": 4465 }, { "epoch": 1.085561497326203, "grad_norm": 1.5329013234691535, "learning_rate": 9.417511400593064e-06, "loss": 0.5602021217346191, "step": 4466 }, { "epoch": 1.085804569761789, "grad_norm": 1.386982394664089, "learning_rate": 9.413498834494624e-06, "loss": 0.612522542476654, "step": 4467 }, { "epoch": 1.0860476421973748, "grad_norm": 1.1821976477567224, "learning_rate": 9.409486363150613e-06, "loss": 0.46090710163116455, "step": 4468 }, { "epoch": 1.0862907146329606, "grad_norm": 1.4246493770481738, "learning_rate": 9.405473987209283e-06, "loss": 0.5268405675888062, "step": 4469 }, { "epoch": 1.0865337870685465, "grad_norm": 1.5932451105293395, "learning_rate": 9.401461707318864e-06, "loss": 0.6416295766830444, "step": 4470 }, { "epoch": 1.0867768595041323, "grad_norm": 1.6304691087056276, "learning_rate": 9.397449524127577e-06, "loss": 0.6661291122436523, "step": 4471 }, { "epoch": 1.087019931939718, "grad_norm": 1.5315834030415423, "learning_rate": 9.393437438283626e-06, "loss": 0.6981563568115234, "step": 4472 }, { "epoch": 1.0872630043753038, "grad_norm": 1.4039514910271507, "learning_rate": 9.389425450435196e-06, "loss": 0.5417815446853638, "step": 4473 }, { "epoch": 1.0875060768108897, "grad_norm": 1.5820215761281518, "learning_rate": 9.385413561230464e-06, "loss": 0.6293705701828003, "step": 4474 }, { "epoch": 1.0877491492464755, "grad_norm": 1.486467048666407, "learning_rate": 9.381401771317586e-06, "loss": 0.6305831670761108, "step": 4475 }, { "epoch": 1.0879922216820612, "grad_norm": 1.5406204304365019, "learning_rate": 9.3773900813447e-06, "loss": 0.5265912413597107, "step": 4476 }, { "epoch": 1.088235294117647, "grad_norm": 1.5282166945592837, "learning_rate": 9.37337849195993e-06, "loss": 0.7201114296913147, "step": 4477 }, { "epoch": 1.088478366553233, "grad_norm": 1.5890802750377384, "learning_rate": 9.36936700381138e-06, "loss": 0.580277681350708, "step": 4478 }, { "epoch": 1.0887214389888187, "grad_norm": 1.3236028786371101, "learning_rate": 9.365355617547152e-06, "loss": 0.5584641098976135, "step": 4479 }, { "epoch": 1.0889645114244044, "grad_norm": 1.3747129323436549, "learning_rate": 9.361344333815308e-06, "loss": 0.5154616832733154, "step": 4480 }, { "epoch": 1.0892075838599902, "grad_norm": 1.6694477527992841, "learning_rate": 9.357333153263911e-06, "loss": 0.6881415843963623, "step": 4481 }, { "epoch": 1.0894506562955761, "grad_norm": 1.4950818637997099, "learning_rate": 9.353322076541002e-06, "loss": 0.5783987045288086, "step": 4482 }, { "epoch": 1.0896937287311619, "grad_norm": 1.5700737341033286, "learning_rate": 9.34931110429461e-06, "loss": 0.6185005903244019, "step": 4483 }, { "epoch": 1.0899368011667476, "grad_norm": 1.4235734615007352, "learning_rate": 9.345300237172739e-06, "loss": 0.535822868347168, "step": 4484 }, { "epoch": 1.0901798736023336, "grad_norm": 1.4645605157836166, "learning_rate": 9.34128947582338e-06, "loss": 0.5785428285598755, "step": 4485 }, { "epoch": 1.0904229460379193, "grad_norm": 1.5520498699642056, "learning_rate": 9.33727882089451e-06, "loss": 0.6933275461196899, "step": 4486 }, { "epoch": 1.090666018473505, "grad_norm": 1.4848149222155616, "learning_rate": 9.333268273034083e-06, "loss": 0.5077940225601196, "step": 4487 }, { "epoch": 1.0909090909090908, "grad_norm": 1.5632512352676704, "learning_rate": 9.329257832890036e-06, "loss": 0.540668249130249, "step": 4488 }, { "epoch": 1.0911521633446768, "grad_norm": 1.3309449918856342, "learning_rate": 9.325247501110298e-06, "loss": 0.4731217324733734, "step": 4489 }, { "epoch": 1.0913952357802625, "grad_norm": 1.447890468410441, "learning_rate": 9.321237278342761e-06, "loss": 0.6316136121749878, "step": 4490 }, { "epoch": 1.0916383082158483, "grad_norm": 1.8452298520297055, "learning_rate": 9.317227165235325e-06, "loss": 0.6185352206230164, "step": 4491 }, { "epoch": 1.091881380651434, "grad_norm": 1.6702759357209847, "learning_rate": 9.313217162435853e-06, "loss": 0.6698681116104126, "step": 4492 }, { "epoch": 1.09212445308702, "grad_norm": 1.3917840038826712, "learning_rate": 9.309207270592196e-06, "loss": 0.585076630115509, "step": 4493 }, { "epoch": 1.0923675255226057, "grad_norm": 1.2584719508506705, "learning_rate": 9.30519749035219e-06, "loss": 0.5810431838035583, "step": 4494 }, { "epoch": 1.0926105979581915, "grad_norm": 1.5808947842870646, "learning_rate": 9.301187822363649e-06, "loss": 0.6356669068336487, "step": 4495 }, { "epoch": 1.0928536703937772, "grad_norm": 1.3926338261266706, "learning_rate": 9.29717826727437e-06, "loss": 0.652472734451294, "step": 4496 }, { "epoch": 1.0930967428293632, "grad_norm": 1.8305251036497296, "learning_rate": 9.29316882573213e-06, "loss": 0.6673128604888916, "step": 4497 }, { "epoch": 1.093339815264949, "grad_norm": 1.4810880889210245, "learning_rate": 9.289159498384691e-06, "loss": 0.5190853476524353, "step": 4498 }, { "epoch": 1.0935828877005347, "grad_norm": 1.3959156311942702, "learning_rate": 9.285150285879797e-06, "loss": 0.5450589060783386, "step": 4499 }, { "epoch": 1.0938259601361207, "grad_norm": 1.4924096900850594, "learning_rate": 9.281141188865171e-06, "loss": 0.5568996667861938, "step": 4500 }, { "epoch": 1.0940690325717064, "grad_norm": 1.552345664239097, "learning_rate": 9.277132207988516e-06, "loss": 0.6572287678718567, "step": 4501 }, { "epoch": 1.0943121050072921, "grad_norm": 1.386021551860989, "learning_rate": 9.27312334389752e-06, "loss": 0.44370460510253906, "step": 4502 }, { "epoch": 1.094555177442878, "grad_norm": 1.317161144720726, "learning_rate": 9.269114597239849e-06, "loss": 0.4235595464706421, "step": 4503 }, { "epoch": 1.0947982498784639, "grad_norm": 1.593259186995376, "learning_rate": 9.265105968663152e-06, "loss": 0.5997068285942078, "step": 4504 }, { "epoch": 1.0950413223140496, "grad_norm": 1.5437864923341835, "learning_rate": 9.261097458815057e-06, "loss": 0.6476179361343384, "step": 4505 }, { "epoch": 1.0952843947496353, "grad_norm": 1.2616512652766603, "learning_rate": 9.257089068343175e-06, "loss": 0.6583454608917236, "step": 4506 }, { "epoch": 1.095527467185221, "grad_norm": 1.3895717052039962, "learning_rate": 9.253080797895099e-06, "loss": 0.5496999025344849, "step": 4507 }, { "epoch": 1.095770539620807, "grad_norm": 1.4109007377327052, "learning_rate": 9.249072648118397e-06, "loss": 0.5756127834320068, "step": 4508 }, { "epoch": 1.0960136120563928, "grad_norm": 1.5234357700832601, "learning_rate": 9.245064619660624e-06, "loss": 0.6089422106742859, "step": 4509 }, { "epoch": 1.0962566844919786, "grad_norm": 1.5308743143093033, "learning_rate": 9.24105671316931e-06, "loss": 0.5272327661514282, "step": 4510 }, { "epoch": 1.0964997569275645, "grad_norm": 1.7302506633145254, "learning_rate": 9.237048929291967e-06, "loss": 0.5474583506584167, "step": 4511 }, { "epoch": 1.0967428293631503, "grad_norm": 1.4278478168589472, "learning_rate": 9.23304126867609e-06, "loss": 0.4878554940223694, "step": 4512 }, { "epoch": 1.096985901798736, "grad_norm": 1.7833363713787231, "learning_rate": 9.22903373196915e-06, "loss": 0.6934905052185059, "step": 4513 }, { "epoch": 1.0972289742343218, "grad_norm": 1.5227443728562542, "learning_rate": 9.225026319818599e-06, "loss": 0.5671505331993103, "step": 4514 }, { "epoch": 1.0974720466699077, "grad_norm": 1.4501360887003154, "learning_rate": 9.221019032871874e-06, "loss": 0.6702126264572144, "step": 4515 }, { "epoch": 1.0977151191054935, "grad_norm": 1.4047482145811574, "learning_rate": 9.217011871776384e-06, "loss": 0.6828560829162598, "step": 4516 }, { "epoch": 1.0979581915410792, "grad_norm": 1.4137047243778962, "learning_rate": 9.213004837179524e-06, "loss": 0.4752919673919678, "step": 4517 }, { "epoch": 1.098201263976665, "grad_norm": 1.753237994671535, "learning_rate": 9.208997929728662e-06, "loss": 0.5145255327224731, "step": 4518 }, { "epoch": 1.098444336412251, "grad_norm": 1.4004712803565456, "learning_rate": 9.204991150071151e-06, "loss": 0.5569594502449036, "step": 4519 }, { "epoch": 1.0986874088478367, "grad_norm": 1.6836238080719546, "learning_rate": 9.200984498854322e-06, "loss": 0.5469307899475098, "step": 4520 }, { "epoch": 1.0989304812834224, "grad_norm": 1.4756462071004381, "learning_rate": 9.196977976725485e-06, "loss": 0.549341082572937, "step": 4521 }, { "epoch": 1.0991735537190084, "grad_norm": 1.62062281959583, "learning_rate": 9.192971584331927e-06, "loss": 0.5775541067123413, "step": 4522 }, { "epoch": 1.0994166261545941, "grad_norm": 1.2060746511131424, "learning_rate": 9.188965322320916e-06, "loss": 0.4432786703109741, "step": 4523 }, { "epoch": 1.0996596985901799, "grad_norm": 1.5982037420523136, "learning_rate": 9.1849591913397e-06, "loss": 0.5112117528915405, "step": 4524 }, { "epoch": 1.0999027710257656, "grad_norm": 1.4274432309831628, "learning_rate": 9.180953192035507e-06, "loss": 0.7239210605621338, "step": 4525 }, { "epoch": 1.1001458434613516, "grad_norm": 1.6065889395683983, "learning_rate": 9.17694732505554e-06, "loss": 0.6921788454055786, "step": 4526 }, { "epoch": 1.1003889158969373, "grad_norm": 1.5555842390784027, "learning_rate": 9.17294159104698e-06, "loss": 0.6132508516311646, "step": 4527 }, { "epoch": 1.100631988332523, "grad_norm": 1.4455422445337145, "learning_rate": 9.168935990656989e-06, "loss": 0.6064404249191284, "step": 4528 }, { "epoch": 1.1008750607681088, "grad_norm": 1.4883995977535283, "learning_rate": 9.16493052453271e-06, "loss": 0.5618476867675781, "step": 4529 }, { "epoch": 1.1011181332036948, "grad_norm": 1.5668385706870762, "learning_rate": 9.16092519332126e-06, "loss": 0.738191545009613, "step": 4530 }, { "epoch": 1.1013612056392805, "grad_norm": 1.7361221976500756, "learning_rate": 9.156919997669731e-06, "loss": 0.6013821363449097, "step": 4531 }, { "epoch": 1.1016042780748663, "grad_norm": 1.4699702580356226, "learning_rate": 9.152914938225204e-06, "loss": 0.5788896083831787, "step": 4532 }, { "epoch": 1.101847350510452, "grad_norm": 1.8163051069411735, "learning_rate": 9.14891001563473e-06, "loss": 0.6788231134414673, "step": 4533 }, { "epoch": 1.102090422946038, "grad_norm": 1.4348580665314699, "learning_rate": 9.14490523054534e-06, "loss": 0.5992722511291504, "step": 4534 }, { "epoch": 1.1023334953816237, "grad_norm": 1.5299784220045973, "learning_rate": 9.140900583604038e-06, "loss": 0.4748286008834839, "step": 4535 }, { "epoch": 1.1025765678172095, "grad_norm": 1.243538261505925, "learning_rate": 9.136896075457815e-06, "loss": 0.6114721298217773, "step": 4536 }, { "epoch": 1.1028196402527954, "grad_norm": 1.5310864732561666, "learning_rate": 9.132891706753631e-06, "loss": 0.6217843294143677, "step": 4537 }, { "epoch": 1.1030627126883812, "grad_norm": 1.318147537271728, "learning_rate": 9.12888747813843e-06, "loss": 0.5175364017486572, "step": 4538 }, { "epoch": 1.103305785123967, "grad_norm": 1.29443942578725, "learning_rate": 9.124883390259126e-06, "loss": 0.690614640712738, "step": 4539 }, { "epoch": 1.1035488575595527, "grad_norm": 1.329789468754849, "learning_rate": 9.12087944376262e-06, "loss": 0.5420909523963928, "step": 4540 }, { "epoch": 1.1037919299951386, "grad_norm": 1.6597086469110327, "learning_rate": 9.116875639295783e-06, "loss": 0.6526120901107788, "step": 4541 }, { "epoch": 1.1040350024307244, "grad_norm": 1.4724466227609774, "learning_rate": 9.112871977505463e-06, "loss": 0.4902859032154083, "step": 4542 }, { "epoch": 1.1042780748663101, "grad_norm": 1.4352531596744043, "learning_rate": 9.108868459038489e-06, "loss": 0.5357139110565186, "step": 4543 }, { "epoch": 1.1045211473018959, "grad_norm": 1.749032427958367, "learning_rate": 9.104865084541665e-06, "loss": 0.6677002906799316, "step": 4544 }, { "epoch": 1.1047642197374818, "grad_norm": 1.2764996822445402, "learning_rate": 9.10086185466177e-06, "loss": 0.5276771187782288, "step": 4545 }, { "epoch": 1.1050072921730676, "grad_norm": 1.450335371335477, "learning_rate": 9.09685877004556e-06, "loss": 0.46930354833602905, "step": 4546 }, { "epoch": 1.1052503646086533, "grad_norm": 1.725734358071503, "learning_rate": 9.092855831339764e-06, "loss": 0.6594897508621216, "step": 4547 }, { "epoch": 1.105493437044239, "grad_norm": 1.585208287642318, "learning_rate": 9.088853039191105e-06, "loss": 0.5222557187080383, "step": 4548 }, { "epoch": 1.105736509479825, "grad_norm": 1.4900360322697455, "learning_rate": 9.084850394246262e-06, "loss": 0.5919155478477478, "step": 4549 }, { "epoch": 1.1059795819154108, "grad_norm": 1.6164915674691405, "learning_rate": 9.080847897151898e-06, "loss": 0.5933804512023926, "step": 4550 }, { "epoch": 1.1062226543509965, "grad_norm": 1.46103711374267, "learning_rate": 9.07684554855465e-06, "loss": 0.5865103006362915, "step": 4551 }, { "epoch": 1.1064657267865825, "grad_norm": 1.4964294074969893, "learning_rate": 9.072843349101134e-06, "loss": 0.6167519092559814, "step": 4552 }, { "epoch": 1.1067087992221682, "grad_norm": 1.7033260741746454, "learning_rate": 9.068841299437943e-06, "loss": 0.7153698205947876, "step": 4553 }, { "epoch": 1.106951871657754, "grad_norm": 1.4514632797301523, "learning_rate": 9.064839400211638e-06, "loss": 0.5447901487350464, "step": 4554 }, { "epoch": 1.1071949440933397, "grad_norm": 1.4750713338506782, "learning_rate": 9.06083765206876e-06, "loss": 0.5092335343360901, "step": 4555 }, { "epoch": 1.1074380165289257, "grad_norm": 1.3690473248830526, "learning_rate": 9.056836055655836e-06, "loss": 0.574620246887207, "step": 4556 }, { "epoch": 1.1076810889645115, "grad_norm": 1.4604853802925681, "learning_rate": 9.052834611619354e-06, "loss": 0.7196298837661743, "step": 4557 }, { "epoch": 1.1079241614000972, "grad_norm": 1.8013391020906684, "learning_rate": 9.048833320605783e-06, "loss": 0.5634110569953918, "step": 4558 }, { "epoch": 1.108167233835683, "grad_norm": 1.6245354804632386, "learning_rate": 9.044832183261568e-06, "loss": 0.6721905469894409, "step": 4559 }, { "epoch": 1.108410306271269, "grad_norm": 1.6721250509022998, "learning_rate": 9.040831200233122e-06, "loss": 0.7050707936286926, "step": 4560 }, { "epoch": 1.1086533787068547, "grad_norm": 1.5258976091923993, "learning_rate": 9.036830372166844e-06, "loss": 0.6865085363388062, "step": 4561 }, { "epoch": 1.1088964511424404, "grad_norm": 1.5919336246376459, "learning_rate": 9.032829699709103e-06, "loss": 0.617920994758606, "step": 4562 }, { "epoch": 1.1091395235780261, "grad_norm": 1.4390210701307151, "learning_rate": 9.028829183506242e-06, "loss": 0.5968458652496338, "step": 4563 }, { "epoch": 1.109382596013612, "grad_norm": 1.6372062812666877, "learning_rate": 9.024828824204576e-06, "loss": 0.7750126123428345, "step": 4564 }, { "epoch": 1.1096256684491979, "grad_norm": 1.5889379666920127, "learning_rate": 9.020828622450406e-06, "loss": 0.6233677864074707, "step": 4565 }, { "epoch": 1.1098687408847836, "grad_norm": 1.4489529619040746, "learning_rate": 9.016828578889995e-06, "loss": 0.535889744758606, "step": 4566 }, { "epoch": 1.1101118133203696, "grad_norm": 1.6876862545664202, "learning_rate": 9.012828694169587e-06, "loss": 0.6625850200653076, "step": 4567 }, { "epoch": 1.1103548857559553, "grad_norm": 1.3799349890190393, "learning_rate": 9.008828968935399e-06, "loss": 0.46938803791999817, "step": 4568 }, { "epoch": 1.110597958191541, "grad_norm": 1.4698649748522894, "learning_rate": 9.00482940383362e-06, "loss": 0.62978196144104, "step": 4569 }, { "epoch": 1.1108410306271268, "grad_norm": 1.4230562887235596, "learning_rate": 9.000829999510414e-06, "loss": 0.4821575880050659, "step": 4570 }, { "epoch": 1.1110841030627128, "grad_norm": 1.4968904012908526, "learning_rate": 8.996830756611921e-06, "loss": 0.5447679758071899, "step": 4571 }, { "epoch": 1.1113271754982985, "grad_norm": 1.5161463030593694, "learning_rate": 8.99283167578425e-06, "loss": 0.5734270811080933, "step": 4572 }, { "epoch": 1.1115702479338843, "grad_norm": 2.2050618045519403, "learning_rate": 8.988832757673497e-06, "loss": 0.5967544913291931, "step": 4573 }, { "epoch": 1.11181332036947, "grad_norm": 1.6071425475783865, "learning_rate": 8.984834002925718e-06, "loss": 0.6182731986045837, "step": 4574 }, { "epoch": 1.112056392805056, "grad_norm": 1.559480343798165, "learning_rate": 8.980835412186947e-06, "loss": 0.6160401105880737, "step": 4575 }, { "epoch": 1.1122994652406417, "grad_norm": 1.4522496291612856, "learning_rate": 8.976836986103191e-06, "loss": 0.43602627515792847, "step": 4576 }, { "epoch": 1.1125425376762275, "grad_norm": 1.3349772820247272, "learning_rate": 8.972838725320429e-06, "loss": 0.5049014091491699, "step": 4577 }, { "epoch": 1.1127856101118132, "grad_norm": 1.7140193891779216, "learning_rate": 8.968840630484621e-06, "loss": 0.5820134878158569, "step": 4578 }, { "epoch": 1.1130286825473992, "grad_norm": 1.2584077689916797, "learning_rate": 8.96484270224169e-06, "loss": 0.4998313784599304, "step": 4579 }, { "epoch": 1.113271754982985, "grad_norm": 1.3291182390265526, "learning_rate": 8.960844941237532e-06, "loss": 0.6082589626312256, "step": 4580 }, { "epoch": 1.1135148274185707, "grad_norm": 1.5825933418480838, "learning_rate": 8.95684734811803e-06, "loss": 0.5086318850517273, "step": 4581 }, { "epoch": 1.1137578998541566, "grad_norm": 1.5498548873027667, "learning_rate": 8.952849923529025e-06, "loss": 0.7118325233459473, "step": 4582 }, { "epoch": 1.1140009722897424, "grad_norm": 1.6045040826425554, "learning_rate": 8.94885266811634e-06, "loss": 0.6973618865013123, "step": 4583 }, { "epoch": 1.1142440447253281, "grad_norm": 1.423022424169812, "learning_rate": 8.944855582525762e-06, "loss": 0.5000640749931335, "step": 4584 }, { "epoch": 1.1144871171609139, "grad_norm": 1.4633566557011863, "learning_rate": 8.940858667403058e-06, "loss": 0.5750479102134705, "step": 4585 }, { "epoch": 1.1147301895964998, "grad_norm": 1.5662791279221098, "learning_rate": 8.936861923393964e-06, "loss": 0.6258634328842163, "step": 4586 }, { "epoch": 1.1149732620320856, "grad_norm": 1.5512699533446064, "learning_rate": 8.932865351144193e-06, "loss": 0.6255465745925903, "step": 4587 }, { "epoch": 1.1152163344676713, "grad_norm": 1.7231115119738452, "learning_rate": 8.928868951299416e-06, "loss": 0.5494064688682556, "step": 4588 }, { "epoch": 1.115459406903257, "grad_norm": 1.3310648413950164, "learning_rate": 8.924872724505296e-06, "loss": 0.4971560537815094, "step": 4589 }, { "epoch": 1.115702479338843, "grad_norm": 1.430493013636122, "learning_rate": 8.92087667140746e-06, "loss": 0.6926105618476868, "step": 4590 }, { "epoch": 1.1159455517744288, "grad_norm": 1.5231034791759432, "learning_rate": 8.916880792651497e-06, "loss": 0.5524954795837402, "step": 4591 }, { "epoch": 1.1161886242100145, "grad_norm": 1.2901461746575251, "learning_rate": 8.912885088882985e-06, "loss": 0.5604850053787231, "step": 4592 }, { "epoch": 1.1164316966456005, "grad_norm": 1.4705694048181412, "learning_rate": 8.90888956074746e-06, "loss": 0.5870035290718079, "step": 4593 }, { "epoch": 1.1166747690811862, "grad_norm": 1.9628479083990418, "learning_rate": 8.904894208890436e-06, "loss": 0.5296616554260254, "step": 4594 }, { "epoch": 1.116917841516772, "grad_norm": 1.303090544032004, "learning_rate": 8.900899033957399e-06, "loss": 0.5314677953720093, "step": 4595 }, { "epoch": 1.1171609139523577, "grad_norm": 1.390186699547363, "learning_rate": 8.8969040365938e-06, "loss": 0.6549686789512634, "step": 4596 }, { "epoch": 1.1174039863879437, "grad_norm": 1.6961140211567474, "learning_rate": 8.892909217445069e-06, "loss": 0.6520552635192871, "step": 4597 }, { "epoch": 1.1176470588235294, "grad_norm": 1.4901991439428766, "learning_rate": 8.888914577156606e-06, "loss": 0.6394515037536621, "step": 4598 }, { "epoch": 1.1178901312591152, "grad_norm": 1.5193039777040824, "learning_rate": 8.88492011637378e-06, "loss": 0.5722911357879639, "step": 4599 }, { "epoch": 1.118133203694701, "grad_norm": 1.520820058858721, "learning_rate": 8.880925835741929e-06, "loss": 0.6783513426780701, "step": 4600 }, { "epoch": 1.118376276130287, "grad_norm": 1.4471225251550959, "learning_rate": 8.876931735906364e-06, "loss": 0.5892723798751831, "step": 4601 }, { "epoch": 1.1186193485658726, "grad_norm": 1.6556883850261095, "learning_rate": 8.872937817512369e-06, "loss": 0.6665138006210327, "step": 4602 }, { "epoch": 1.1188624210014584, "grad_norm": 1.3285334000241578, "learning_rate": 8.868944081205197e-06, "loss": 0.5266570448875427, "step": 4603 }, { "epoch": 1.1191054934370444, "grad_norm": 1.5126122038673628, "learning_rate": 8.86495052763007e-06, "loss": 0.530780553817749, "step": 4604 }, { "epoch": 1.11934856587263, "grad_norm": 1.4985153914943607, "learning_rate": 8.860957157432177e-06, "loss": 0.5890452265739441, "step": 4605 }, { "epoch": 1.1195916383082158, "grad_norm": 1.6758838071893654, "learning_rate": 8.85696397125669e-06, "loss": 0.6090855598449707, "step": 4606 }, { "epoch": 1.1198347107438016, "grad_norm": 1.7380790901036587, "learning_rate": 8.852970969748742e-06, "loss": 0.670281708240509, "step": 4607 }, { "epoch": 1.1200777831793876, "grad_norm": 1.455694183595228, "learning_rate": 8.848978153553435e-06, "loss": 0.5855671167373657, "step": 4608 }, { "epoch": 1.1203208556149733, "grad_norm": 1.434307543463257, "learning_rate": 8.844985523315845e-06, "loss": 0.6780800819396973, "step": 4609 }, { "epoch": 1.120563928050559, "grad_norm": 1.685481024584918, "learning_rate": 8.840993079681018e-06, "loss": 0.5737702250480652, "step": 4610 }, { "epoch": 1.1208070004861448, "grad_norm": 1.5873570474556753, "learning_rate": 8.837000823293965e-06, "loss": 0.6336060762405396, "step": 4611 }, { "epoch": 1.1210500729217308, "grad_norm": 1.5141000480189672, "learning_rate": 8.83300875479967e-06, "loss": 0.502005398273468, "step": 4612 }, { "epoch": 1.1212931453573165, "grad_norm": 1.5228396284956736, "learning_rate": 8.829016874843089e-06, "loss": 0.6946368217468262, "step": 4613 }, { "epoch": 1.1215362177929022, "grad_norm": 1.6159128317596145, "learning_rate": 8.825025184069144e-06, "loss": 0.5207350254058838, "step": 4614 }, { "epoch": 1.121779290228488, "grad_norm": 1.360349617360887, "learning_rate": 8.821033683122732e-06, "loss": 0.4538816809654236, "step": 4615 }, { "epoch": 1.122022362664074, "grad_norm": 1.4233786441659988, "learning_rate": 8.817042372648712e-06, "loss": 0.6683834791183472, "step": 4616 }, { "epoch": 1.1222654350996597, "grad_norm": 1.4138965040980283, "learning_rate": 8.813051253291913e-06, "loss": 0.5842849016189575, "step": 4617 }, { "epoch": 1.1225085075352454, "grad_norm": 1.4277682492518609, "learning_rate": 8.809060325697139e-06, "loss": 0.6197444200515747, "step": 4618 }, { "epoch": 1.1227515799708314, "grad_norm": 1.7801723347843694, "learning_rate": 8.805069590509156e-06, "loss": 0.6390373706817627, "step": 4619 }, { "epoch": 1.1229946524064172, "grad_norm": 1.6027521442734496, "learning_rate": 8.801079048372707e-06, "loss": 0.7431282997131348, "step": 4620 }, { "epoch": 1.123237724842003, "grad_norm": 1.3394167086525328, "learning_rate": 8.797088699932494e-06, "loss": 0.6017334461212158, "step": 4621 }, { "epoch": 1.1234807972775886, "grad_norm": 1.706468702687343, "learning_rate": 8.7930985458332e-06, "loss": 0.559158205986023, "step": 4622 }, { "epoch": 1.1237238697131746, "grad_norm": 1.4337520260918348, "learning_rate": 8.789108586719462e-06, "loss": 0.6457585096359253, "step": 4623 }, { "epoch": 1.1239669421487604, "grad_norm": 1.7513324988483017, "learning_rate": 8.785118823235898e-06, "loss": 0.6548459529876709, "step": 4624 }, { "epoch": 1.124210014584346, "grad_norm": 1.7293540797214675, "learning_rate": 8.78112925602709e-06, "loss": 0.5509489178657532, "step": 4625 }, { "epoch": 1.1244530870199319, "grad_norm": 1.3163586479277327, "learning_rate": 8.777139885737582e-06, "loss": 0.45506009459495544, "step": 4626 }, { "epoch": 1.1246961594555178, "grad_norm": 1.2906156219079468, "learning_rate": 8.773150713011897e-06, "loss": 0.4951667785644531, "step": 4627 }, { "epoch": 1.1249392318911036, "grad_norm": 1.4545323894272908, "learning_rate": 8.769161738494519e-06, "loss": 0.5558339953422546, "step": 4628 }, { "epoch": 1.1251823043266893, "grad_norm": 1.672946587271832, "learning_rate": 8.7651729628299e-06, "loss": 0.5274007320404053, "step": 4629 }, { "epoch": 1.125425376762275, "grad_norm": 1.3326941611520804, "learning_rate": 8.761184386662468e-06, "loss": 0.5876073241233826, "step": 4630 }, { "epoch": 1.125668449197861, "grad_norm": 1.812897513653009, "learning_rate": 8.757196010636608e-06, "loss": 0.5615643262863159, "step": 4631 }, { "epoch": 1.1259115216334468, "grad_norm": 1.43931247828565, "learning_rate": 8.753207835396678e-06, "loss": 0.5726405382156372, "step": 4632 }, { "epoch": 1.1261545940690325, "grad_norm": 1.3136645647420462, "learning_rate": 8.749219861587002e-06, "loss": 0.5505622625350952, "step": 4633 }, { "epoch": 1.1263976665046185, "grad_norm": 1.2772504534841305, "learning_rate": 8.745232089851876e-06, "loss": 0.6691713333129883, "step": 4634 }, { "epoch": 1.1266407389402042, "grad_norm": 1.5247871841318543, "learning_rate": 8.741244520835554e-06, "loss": 0.6704831123352051, "step": 4635 }, { "epoch": 1.12688381137579, "grad_norm": 1.4661162044604166, "learning_rate": 8.737257155182268e-06, "loss": 0.623543381690979, "step": 4636 }, { "epoch": 1.1271268838113757, "grad_norm": 1.4189090699998161, "learning_rate": 8.733269993536208e-06, "loss": 0.7477874755859375, "step": 4637 }, { "epoch": 1.1273699562469617, "grad_norm": 1.6030962495452965, "learning_rate": 8.729283036541535e-06, "loss": 0.6045582294464111, "step": 4638 }, { "epoch": 1.1276130286825474, "grad_norm": 1.395364602383253, "learning_rate": 8.725296284842383e-06, "loss": 0.5436173677444458, "step": 4639 }, { "epoch": 1.1278561011181332, "grad_norm": 1.5450437619436406, "learning_rate": 8.721309739082839e-06, "loss": 0.49683135747909546, "step": 4640 }, { "epoch": 1.128099173553719, "grad_norm": 1.5720704821472893, "learning_rate": 8.717323399906972e-06, "loss": 0.5644954442977905, "step": 4641 }, { "epoch": 1.1283422459893049, "grad_norm": 1.3976555329858438, "learning_rate": 8.713337267958805e-06, "loss": 0.5218707323074341, "step": 4642 }, { "epoch": 1.1285853184248906, "grad_norm": 1.5150624778920216, "learning_rate": 8.709351343882333e-06, "loss": 0.6573131084442139, "step": 4643 }, { "epoch": 1.1288283908604764, "grad_norm": 1.8410415006665326, "learning_rate": 8.705365628321521e-06, "loss": 0.6852938532829285, "step": 4644 }, { "epoch": 1.1290714632960621, "grad_norm": 1.47784377298489, "learning_rate": 8.701380121920292e-06, "loss": 0.5021111965179443, "step": 4645 }, { "epoch": 1.129314535731648, "grad_norm": 1.5262895602684576, "learning_rate": 8.697394825322538e-06, "loss": 0.6651133298873901, "step": 4646 }, { "epoch": 1.1295576081672338, "grad_norm": 1.569683938051877, "learning_rate": 8.693409739172128e-06, "loss": 0.6117218732833862, "step": 4647 }, { "epoch": 1.1298006806028196, "grad_norm": 1.4460918197417203, "learning_rate": 8.689424864112881e-06, "loss": 0.546112596988678, "step": 4648 }, { "epoch": 1.1300437530384055, "grad_norm": 1.5373684271680166, "learning_rate": 8.68544020078859e-06, "loss": 0.528468906879425, "step": 4649 }, { "epoch": 1.1302868254739913, "grad_norm": 1.5991110632397612, "learning_rate": 8.681455749843011e-06, "loss": 0.6293564438819885, "step": 4650 }, { "epoch": 1.130529897909577, "grad_norm": 1.5645496389662685, "learning_rate": 8.67747151191987e-06, "loss": 0.6611846685409546, "step": 4651 }, { "epoch": 1.1307729703451628, "grad_norm": 1.5624735191593218, "learning_rate": 8.673487487662852e-06, "loss": 0.5000607967376709, "step": 4652 }, { "epoch": 1.1310160427807487, "grad_norm": 1.5951889830591783, "learning_rate": 8.669503677715614e-06, "loss": 0.5163489580154419, "step": 4653 }, { "epoch": 1.1312591152163345, "grad_norm": 1.6056946466794786, "learning_rate": 8.66552008272177e-06, "loss": 0.7278372645378113, "step": 4654 }, { "epoch": 1.1315021876519202, "grad_norm": 1.2766818205669332, "learning_rate": 8.661536703324916e-06, "loss": 0.5491818785667419, "step": 4655 }, { "epoch": 1.1317452600875062, "grad_norm": 1.6307932433161845, "learning_rate": 8.657553540168592e-06, "loss": 0.6475831270217896, "step": 4656 }, { "epoch": 1.131988332523092, "grad_norm": 1.5774771507358152, "learning_rate": 8.653570593896323e-06, "loss": 0.7175897359848022, "step": 4657 }, { "epoch": 1.1322314049586777, "grad_norm": 1.7275175083998289, "learning_rate": 8.649587865151579e-06, "loss": 0.5836617946624756, "step": 4658 }, { "epoch": 1.1324744773942634, "grad_norm": 1.3133701246973835, "learning_rate": 8.645605354577808e-06, "loss": 0.5715504288673401, "step": 4659 }, { "epoch": 1.1327175498298492, "grad_norm": 1.6267519981990377, "learning_rate": 8.641623062818421e-06, "loss": 0.5559661984443665, "step": 4660 }, { "epoch": 1.1329606222654351, "grad_norm": 1.5224547892887077, "learning_rate": 8.637640990516793e-06, "loss": 0.5164235234260559, "step": 4661 }, { "epoch": 1.133203694701021, "grad_norm": 1.3804625626626217, "learning_rate": 8.633659138316255e-06, "loss": 0.41749054193496704, "step": 4662 }, { "epoch": 1.1334467671366066, "grad_norm": 1.4142310518532064, "learning_rate": 8.629677506860123e-06, "loss": 0.6887925863265991, "step": 4663 }, { "epoch": 1.1336898395721926, "grad_norm": 1.6902428213395075, "learning_rate": 8.625696096791656e-06, "loss": 0.566918134689331, "step": 4664 }, { "epoch": 1.1339329120077783, "grad_norm": 1.8333524939329056, "learning_rate": 8.62171490875409e-06, "loss": 0.4376637935638428, "step": 4665 }, { "epoch": 1.134175984443364, "grad_norm": 1.4556238074599108, "learning_rate": 8.61773394339062e-06, "loss": 0.5371979475021362, "step": 4666 }, { "epoch": 1.1344190568789498, "grad_norm": 1.425989117918489, "learning_rate": 8.613753201344407e-06, "loss": 0.615976095199585, "step": 4667 }, { "epoch": 1.1346621293145358, "grad_norm": 1.4130692403820562, "learning_rate": 8.609772683258573e-06, "loss": 0.536528468132019, "step": 4668 }, { "epoch": 1.1349052017501216, "grad_norm": 1.5341779418979367, "learning_rate": 8.605792389776203e-06, "loss": 0.6335446834564209, "step": 4669 }, { "epoch": 1.1351482741857073, "grad_norm": 1.7243632620351703, "learning_rate": 8.601812321540348e-06, "loss": 0.7035552263259888, "step": 4670 }, { "epoch": 1.1353913466212933, "grad_norm": 1.637872693918323, "learning_rate": 8.597832479194031e-06, "loss": 0.6584231853485107, "step": 4671 }, { "epoch": 1.135634419056879, "grad_norm": 1.5808506808978393, "learning_rate": 8.593852863380227e-06, "loss": 0.5354045033454895, "step": 4672 }, { "epoch": 1.1358774914924648, "grad_norm": 1.7187327461646178, "learning_rate": 8.589873474741877e-06, "loss": 0.5231483578681946, "step": 4673 }, { "epoch": 1.1361205639280505, "grad_norm": 1.5465504047198844, "learning_rate": 8.585894313921885e-06, "loss": 0.49123311042785645, "step": 4674 }, { "epoch": 1.1363636363636362, "grad_norm": 1.6888956834091855, "learning_rate": 8.581915381563122e-06, "loss": 0.48488175868988037, "step": 4675 }, { "epoch": 1.1366067087992222, "grad_norm": 1.5959382095393877, "learning_rate": 8.57793667830842e-06, "loss": 0.7920215129852295, "step": 4676 }, { "epoch": 1.136849781234808, "grad_norm": 1.6034880072438702, "learning_rate": 8.573958204800572e-06, "loss": 0.6222094893455505, "step": 4677 }, { "epoch": 1.1370928536703937, "grad_norm": 1.396905485961334, "learning_rate": 8.569979961682333e-06, "loss": 0.4224599003791809, "step": 4678 }, { "epoch": 1.1373359261059797, "grad_norm": 1.4090774907633377, "learning_rate": 8.566001949596423e-06, "loss": 0.6715916395187378, "step": 4679 }, { "epoch": 1.1375789985415654, "grad_norm": 1.63327923632172, "learning_rate": 8.562024169185532e-06, "loss": 0.7165378332138062, "step": 4680 }, { "epoch": 1.1378220709771512, "grad_norm": 1.8848135089310576, "learning_rate": 8.5580466210923e-06, "loss": 0.6406175494194031, "step": 4681 }, { "epoch": 1.138065143412737, "grad_norm": 1.4113765364137347, "learning_rate": 8.554069305959336e-06, "loss": 0.5127053260803223, "step": 4682 }, { "epoch": 1.1383082158483229, "grad_norm": 1.7077767820109238, "learning_rate": 8.550092224429209e-06, "loss": 0.608942985534668, "step": 4683 }, { "epoch": 1.1385512882839086, "grad_norm": 1.5629528824653334, "learning_rate": 8.546115377144454e-06, "loss": 0.8301626443862915, "step": 4684 }, { "epoch": 1.1387943607194944, "grad_norm": 1.8051848526369951, "learning_rate": 8.542138764747565e-06, "loss": 0.645798921585083, "step": 4685 }, { "epoch": 1.1390374331550803, "grad_norm": 1.4917468762132757, "learning_rate": 8.538162387881e-06, "loss": 0.655786395072937, "step": 4686 }, { "epoch": 1.139280505590666, "grad_norm": 1.4773257252283254, "learning_rate": 8.53418624718717e-06, "loss": 0.4979304075241089, "step": 4687 }, { "epoch": 1.1395235780262518, "grad_norm": 1.6462128500929343, "learning_rate": 8.530210343308467e-06, "loss": 0.5374335646629333, "step": 4688 }, { "epoch": 1.1397666504618376, "grad_norm": 1.7048447838959206, "learning_rate": 8.526234676887224e-06, "loss": 0.5713544487953186, "step": 4689 }, { "epoch": 1.1400097228974235, "grad_norm": 1.400115306484279, "learning_rate": 8.522259248565751e-06, "loss": 0.5917888879776001, "step": 4690 }, { "epoch": 1.1402527953330093, "grad_norm": 1.4844835367296685, "learning_rate": 8.518284058986313e-06, "loss": 0.5631924271583557, "step": 4691 }, { "epoch": 1.140495867768595, "grad_norm": 1.720043576777073, "learning_rate": 8.514309108791132e-06, "loss": 0.5149371027946472, "step": 4692 }, { "epoch": 1.1407389402041808, "grad_norm": 1.5908769609105327, "learning_rate": 8.5103343986224e-06, "loss": 0.5470885038375854, "step": 4693 }, { "epoch": 1.1409820126397667, "grad_norm": 1.4870577863150967, "learning_rate": 8.506359929122266e-06, "loss": 0.5591946840286255, "step": 4694 }, { "epoch": 1.1412250850753525, "grad_norm": 1.6887558577459854, "learning_rate": 8.50238570093284e-06, "loss": 0.629973292350769, "step": 4695 }, { "epoch": 1.1414681575109382, "grad_norm": 1.4576365090982737, "learning_rate": 8.498411714696194e-06, "loss": 0.6027295589447021, "step": 4696 }, { "epoch": 1.141711229946524, "grad_norm": 1.2836825562762837, "learning_rate": 8.494437971054358e-06, "loss": 0.5600541830062866, "step": 4697 }, { "epoch": 1.14195430238211, "grad_norm": 1.4604623422766505, "learning_rate": 8.490464470649332e-06, "loss": 0.7041088342666626, "step": 4698 }, { "epoch": 1.1421973748176957, "grad_norm": 1.3219344405945408, "learning_rate": 8.486491214123063e-06, "loss": 0.6558225154876709, "step": 4699 }, { "epoch": 1.1424404472532814, "grad_norm": 1.3363582021707252, "learning_rate": 8.482518202117468e-06, "loss": 0.5106720328330994, "step": 4700 }, { "epoch": 1.1426835196888674, "grad_norm": 1.583255335459784, "learning_rate": 8.478545435274424e-06, "loss": 0.5303730964660645, "step": 4701 }, { "epoch": 1.1429265921244531, "grad_norm": 1.3177813976851465, "learning_rate": 8.474572914235764e-06, "loss": 0.5502020716667175, "step": 4702 }, { "epoch": 1.1431696645600389, "grad_norm": 1.6678755493359483, "learning_rate": 8.470600639643281e-06, "loss": 0.650823712348938, "step": 4703 }, { "epoch": 1.1434127369956246, "grad_norm": 1.6634783065020093, "learning_rate": 8.466628612138738e-06, "loss": 0.6883741617202759, "step": 4704 }, { "epoch": 1.1436558094312106, "grad_norm": 1.5099002372178911, "learning_rate": 8.462656832363847e-06, "loss": 0.5063555240631104, "step": 4705 }, { "epoch": 1.1438988818667963, "grad_norm": 1.7730417563399044, "learning_rate": 8.458685300960286e-06, "loss": 0.4421682357788086, "step": 4706 }, { "epoch": 1.144141954302382, "grad_norm": 1.4675882513736793, "learning_rate": 8.454714018569687e-06, "loss": 0.48784005641937256, "step": 4707 }, { "epoch": 1.1443850267379678, "grad_norm": 1.516025322049905, "learning_rate": 8.450742985833648e-06, "loss": 0.6031169295310974, "step": 4708 }, { "epoch": 1.1446280991735538, "grad_norm": 1.5659548597179926, "learning_rate": 8.446772203393726e-06, "loss": 0.708555281162262, "step": 4709 }, { "epoch": 1.1448711716091395, "grad_norm": 1.2985723809872038, "learning_rate": 8.442801671891432e-06, "loss": 0.4962384104728699, "step": 4710 }, { "epoch": 1.1451142440447253, "grad_norm": 1.4702635927275498, "learning_rate": 8.438831391968243e-06, "loss": 0.6778659820556641, "step": 4711 }, { "epoch": 1.145357316480311, "grad_norm": 1.5066604411514306, "learning_rate": 8.434861364265592e-06, "loss": 0.5442681908607483, "step": 4712 }, { "epoch": 1.145600388915897, "grad_norm": 1.5926342733898309, "learning_rate": 8.430891589424872e-06, "loss": 0.5233851671218872, "step": 4713 }, { "epoch": 1.1458434613514827, "grad_norm": 1.6944690519953907, "learning_rate": 8.426922068087438e-06, "loss": 0.5143245458602905, "step": 4714 }, { "epoch": 1.1460865337870685, "grad_norm": 1.473127391034295, "learning_rate": 8.422952800894596e-06, "loss": 0.5158453583717346, "step": 4715 }, { "epoch": 1.1463296062226545, "grad_norm": 1.7724100328976637, "learning_rate": 8.41898378848762e-06, "loss": 0.6867268085479736, "step": 4716 }, { "epoch": 1.1465726786582402, "grad_norm": 1.567382379820179, "learning_rate": 8.415015031507734e-06, "loss": 0.6521567106246948, "step": 4717 }, { "epoch": 1.146815751093826, "grad_norm": 1.6350430342027582, "learning_rate": 8.411046530596131e-06, "loss": 0.5861766338348389, "step": 4718 }, { "epoch": 1.1470588235294117, "grad_norm": 1.5215949899331092, "learning_rate": 8.407078286393956e-06, "loss": 0.5732424855232239, "step": 4719 }, { "epoch": 1.1473018959649977, "grad_norm": 1.5981883215492807, "learning_rate": 8.40311029954231e-06, "loss": 0.6376622319221497, "step": 4720 }, { "epoch": 1.1475449684005834, "grad_norm": 1.3268637108783898, "learning_rate": 8.399142570682262e-06, "loss": 0.5310641527175903, "step": 4721 }, { "epoch": 1.1477880408361691, "grad_norm": 1.4764436582699578, "learning_rate": 8.395175100454832e-06, "loss": 0.5726916193962097, "step": 4722 }, { "epoch": 1.1480311132717549, "grad_norm": 1.542033595068861, "learning_rate": 8.391207889500997e-06, "loss": 0.506916344165802, "step": 4723 }, { "epoch": 1.1482741857073409, "grad_norm": 1.4064918078034303, "learning_rate": 8.387240938461696e-06, "loss": 0.6673961877822876, "step": 4724 }, { "epoch": 1.1485172581429266, "grad_norm": 1.6193945441896493, "learning_rate": 8.383274247977827e-06, "loss": 0.5649533271789551, "step": 4725 }, { "epoch": 1.1487603305785123, "grad_norm": 1.3829333561811696, "learning_rate": 8.37930781869024e-06, "loss": 0.5573838353157043, "step": 4726 }, { "epoch": 1.149003403014098, "grad_norm": 1.3728635065624069, "learning_rate": 8.37534165123975e-06, "loss": 0.5554089546203613, "step": 4727 }, { "epoch": 1.149246475449684, "grad_norm": 1.5526646332865568, "learning_rate": 8.371375746267122e-06, "loss": 0.49995046854019165, "step": 4728 }, { "epoch": 1.1494895478852698, "grad_norm": 1.2433656278042333, "learning_rate": 8.367410104413087e-06, "loss": 0.4268609881401062, "step": 4729 }, { "epoch": 1.1497326203208555, "grad_norm": 1.6753485748042005, "learning_rate": 8.36344472631833e-06, "loss": 0.5148785710334778, "step": 4730 }, { "epoch": 1.1499756927564415, "grad_norm": 1.489754696654763, "learning_rate": 8.359479612623492e-06, "loss": 0.5789898037910461, "step": 4731 }, { "epoch": 1.1502187651920273, "grad_norm": 1.6081205166188286, "learning_rate": 8.355514763969168e-06, "loss": 0.48006927967071533, "step": 4732 }, { "epoch": 1.150461837627613, "grad_norm": 1.5332363553343078, "learning_rate": 8.35155018099592e-06, "loss": 0.6770176887512207, "step": 4733 }, { "epoch": 1.1507049100631987, "grad_norm": 1.742472387197658, "learning_rate": 8.347585864344256e-06, "loss": 0.6008217334747314, "step": 4734 }, { "epoch": 1.1509479824987847, "grad_norm": 1.680541617715875, "learning_rate": 8.34362181465465e-06, "loss": 0.549445390701294, "step": 4735 }, { "epoch": 1.1511910549343705, "grad_norm": 1.8276254218697667, "learning_rate": 8.339658032567523e-06, "loss": 0.5781971216201782, "step": 4736 }, { "epoch": 1.1514341273699562, "grad_norm": 1.6868370010069555, "learning_rate": 8.335694518723272e-06, "loss": 0.6043404340744019, "step": 4737 }, { "epoch": 1.1516771998055422, "grad_norm": 1.4993100731180908, "learning_rate": 8.331731273762226e-06, "loss": 0.5940980315208435, "step": 4738 }, { "epoch": 1.151920272241128, "grad_norm": 1.38222144166632, "learning_rate": 8.327768298324685e-06, "loss": 0.6190036535263062, "step": 4739 }, { "epoch": 1.1521633446767137, "grad_norm": 1.3600255943732205, "learning_rate": 8.323805593050904e-06, "loss": 0.688663125038147, "step": 4740 }, { "epoch": 1.1524064171122994, "grad_norm": 1.605727870853516, "learning_rate": 8.319843158581092e-06, "loss": 0.5322248935699463, "step": 4741 }, { "epoch": 1.1526494895478852, "grad_norm": 1.4639448867343905, "learning_rate": 8.315880995555416e-06, "loss": 0.5294251441955566, "step": 4742 }, { "epoch": 1.1528925619834711, "grad_norm": 1.6498011195125453, "learning_rate": 8.311919104613996e-06, "loss": 0.5794675350189209, "step": 4743 }, { "epoch": 1.1531356344190569, "grad_norm": 1.5346208810055164, "learning_rate": 8.307957486396908e-06, "loss": 0.5935835838317871, "step": 4744 }, { "epoch": 1.1533787068546426, "grad_norm": 1.4891782473151693, "learning_rate": 8.303996141544196e-06, "loss": 0.48603111505508423, "step": 4745 }, { "epoch": 1.1536217792902286, "grad_norm": 1.4896665440847345, "learning_rate": 8.300035070695844e-06, "loss": 0.5451321601867676, "step": 4746 }, { "epoch": 1.1538648517258143, "grad_norm": 1.3475619465789253, "learning_rate": 8.296074274491798e-06, "loss": 0.4714433252811432, "step": 4747 }, { "epoch": 1.1541079241614, "grad_norm": 1.2687432329615127, "learning_rate": 8.292113753571959e-06, "loss": 0.4649322032928467, "step": 4748 }, { "epoch": 1.1543509965969858, "grad_norm": 1.4279433615671364, "learning_rate": 8.288153508576186e-06, "loss": 0.42881524562835693, "step": 4749 }, { "epoch": 1.1545940690325718, "grad_norm": 1.3691756816743463, "learning_rate": 8.284193540144288e-06, "loss": 0.5466026663780212, "step": 4750 }, { "epoch": 1.1548371414681575, "grad_norm": 1.5994170653460313, "learning_rate": 8.280233848916036e-06, "loss": 0.733158528804779, "step": 4751 }, { "epoch": 1.1550802139037433, "grad_norm": 1.5856180276507479, "learning_rate": 8.276274435531154e-06, "loss": 0.7286702394485474, "step": 4752 }, { "epoch": 1.1553232863393292, "grad_norm": 1.6368983316263217, "learning_rate": 8.272315300629313e-06, "loss": 0.4820973575115204, "step": 4753 }, { "epoch": 1.155566358774915, "grad_norm": 1.6468234920158125, "learning_rate": 8.268356444850154e-06, "loss": 0.6136516332626343, "step": 4754 }, { "epoch": 1.1558094312105007, "grad_norm": 1.499267377840064, "learning_rate": 8.264397868833263e-06, "loss": 0.6027719974517822, "step": 4755 }, { "epoch": 1.1560525036460865, "grad_norm": 1.615230207030876, "learning_rate": 8.260439573218185e-06, "loss": 0.5730490684509277, "step": 4756 }, { "epoch": 1.1562955760816722, "grad_norm": 1.6144766925876257, "learning_rate": 8.256481558644412e-06, "loss": 0.5130010843276978, "step": 4757 }, { "epoch": 1.1565386485172582, "grad_norm": 1.4873141973592126, "learning_rate": 8.2525238257514e-06, "loss": 0.5567917823791504, "step": 4758 }, { "epoch": 1.156781720952844, "grad_norm": 1.3680291735764043, "learning_rate": 8.248566375178551e-06, "loss": 0.49875783920288086, "step": 4759 }, { "epoch": 1.1570247933884297, "grad_norm": 1.4141942006052612, "learning_rate": 8.244609207565233e-06, "loss": 0.574736475944519, "step": 4760 }, { "epoch": 1.1572678658240156, "grad_norm": 1.3646398589165734, "learning_rate": 8.24065232355075e-06, "loss": 0.6418272852897644, "step": 4761 }, { "epoch": 1.1575109382596014, "grad_norm": 1.5244703146614424, "learning_rate": 8.236695723774386e-06, "loss": 0.563656210899353, "step": 4762 }, { "epoch": 1.1577540106951871, "grad_norm": 1.8304334805164695, "learning_rate": 8.232739408875357e-06, "loss": 0.5805160999298096, "step": 4763 }, { "epoch": 1.1579970831307729, "grad_norm": 1.4950155634867641, "learning_rate": 8.228783379492841e-06, "loss": 0.691533625125885, "step": 4764 }, { "epoch": 1.1582401555663588, "grad_norm": 1.4989837285140302, "learning_rate": 8.224827636265971e-06, "loss": 0.47680604457855225, "step": 4765 }, { "epoch": 1.1584832280019446, "grad_norm": 1.6412222557272016, "learning_rate": 8.22087217983383e-06, "loss": 0.5179800987243652, "step": 4766 }, { "epoch": 1.1587263004375303, "grad_norm": 1.3722684301472468, "learning_rate": 8.216917010835456e-06, "loss": 0.4444543123245239, "step": 4767 }, { "epoch": 1.1589693728731163, "grad_norm": 1.5989983971865698, "learning_rate": 8.212962129909843e-06, "loss": 0.6291130185127258, "step": 4768 }, { "epoch": 1.159212445308702, "grad_norm": 1.6556979238021354, "learning_rate": 8.209007537695934e-06, "loss": 0.603624165058136, "step": 4769 }, { "epoch": 1.1594555177442878, "grad_norm": 1.4579065557234496, "learning_rate": 8.205053234832634e-06, "loss": 0.5698781609535217, "step": 4770 }, { "epoch": 1.1596985901798735, "grad_norm": 1.4835759046827, "learning_rate": 8.201099221958793e-06, "loss": 0.616357147693634, "step": 4771 }, { "epoch": 1.1599416626154595, "grad_norm": 1.5627469468554522, "learning_rate": 8.197145499713214e-06, "loss": 0.614068865776062, "step": 4772 }, { "epoch": 1.1601847350510452, "grad_norm": 1.689618106190973, "learning_rate": 8.19319206873466e-06, "loss": 0.5766292214393616, "step": 4773 }, { "epoch": 1.160427807486631, "grad_norm": 1.6672854068408867, "learning_rate": 8.18923892966184e-06, "loss": 0.5523936152458191, "step": 4774 }, { "epoch": 1.1606708799222167, "grad_norm": 1.281723852175699, "learning_rate": 8.185286083133419e-06, "loss": 0.4749823212623596, "step": 4775 }, { "epoch": 1.1609139523578027, "grad_norm": 1.5034791337022018, "learning_rate": 8.181333529788013e-06, "loss": 0.6087266802787781, "step": 4776 }, { "epoch": 1.1611570247933884, "grad_norm": 1.2932662224240314, "learning_rate": 8.177381270264189e-06, "loss": 0.4909393787384033, "step": 4777 }, { "epoch": 1.1614000972289742, "grad_norm": 1.645507959219267, "learning_rate": 8.173429305200477e-06, "loss": 0.6168253421783447, "step": 4778 }, { "epoch": 1.16164316966456, "grad_norm": 1.5638012864048352, "learning_rate": 8.169477635235346e-06, "loss": 0.5512549877166748, "step": 4779 }, { "epoch": 1.161886242100146, "grad_norm": 1.6491886461632654, "learning_rate": 8.165526261007225e-06, "loss": 0.700494647026062, "step": 4780 }, { "epoch": 1.1621293145357317, "grad_norm": 1.6086361225064962, "learning_rate": 8.161575183154495e-06, "loss": 0.5377607345581055, "step": 4781 }, { "epoch": 1.1623723869713174, "grad_norm": 1.5875375552370294, "learning_rate": 8.157624402315487e-06, "loss": 0.5577132701873779, "step": 4782 }, { "epoch": 1.1626154594069034, "grad_norm": 1.4093396955258752, "learning_rate": 8.153673919128479e-06, "loss": 0.6905319094657898, "step": 4783 }, { "epoch": 1.162858531842489, "grad_norm": 1.528806481711937, "learning_rate": 8.149723734231712e-06, "loss": 0.6458002924919128, "step": 4784 }, { "epoch": 1.1631016042780749, "grad_norm": 1.751741810130294, "learning_rate": 8.14577384826337e-06, "loss": 0.5641061663627625, "step": 4785 }, { "epoch": 1.1633446767136606, "grad_norm": 1.5400625047756005, "learning_rate": 8.141824261861595e-06, "loss": 0.6010767221450806, "step": 4786 }, { "epoch": 1.1635877491492466, "grad_norm": 1.9015625047384548, "learning_rate": 8.137874975664478e-06, "loss": 0.7858662009239197, "step": 4787 }, { "epoch": 1.1638308215848323, "grad_norm": 1.6728661194662409, "learning_rate": 8.133925990310056e-06, "loss": 0.541273832321167, "step": 4788 }, { "epoch": 1.164073894020418, "grad_norm": 1.4102066554487702, "learning_rate": 8.129977306436326e-06, "loss": 0.49253326654434204, "step": 4789 }, { "epoch": 1.1643169664560038, "grad_norm": 1.489062011274401, "learning_rate": 8.126028924681231e-06, "loss": 0.5839226245880127, "step": 4790 }, { "epoch": 1.1645600388915898, "grad_norm": 1.6234474320954255, "learning_rate": 8.122080845682668e-06, "loss": 0.6660164594650269, "step": 4791 }, { "epoch": 1.1648031113271755, "grad_norm": 1.4113864604929742, "learning_rate": 8.118133070078485e-06, "loss": 0.5448316931724548, "step": 4792 }, { "epoch": 1.1650461837627613, "grad_norm": 1.5287526156301536, "learning_rate": 8.114185598506477e-06, "loss": 0.5800611972808838, "step": 4793 }, { "epoch": 1.165289256198347, "grad_norm": 1.583554510753844, "learning_rate": 8.110238431604393e-06, "loss": 0.5162487030029297, "step": 4794 }, { "epoch": 1.165532328633933, "grad_norm": 1.8842106596361115, "learning_rate": 8.106291570009935e-06, "loss": 0.6624077558517456, "step": 4795 }, { "epoch": 1.1657754010695187, "grad_norm": 1.6523254838659498, "learning_rate": 8.102345014360753e-06, "loss": 0.5917428731918335, "step": 4796 }, { "epoch": 1.1660184735051045, "grad_norm": 1.6306623849539663, "learning_rate": 8.098398765294447e-06, "loss": 0.686683177947998, "step": 4797 }, { "epoch": 1.1662615459406904, "grad_norm": 1.4414402652874685, "learning_rate": 8.094452823448569e-06, "loss": 0.4988190829753876, "step": 4798 }, { "epoch": 1.1665046183762762, "grad_norm": 1.483672053657575, "learning_rate": 8.090507189460619e-06, "loss": 0.6191856861114502, "step": 4799 }, { "epoch": 1.166747690811862, "grad_norm": 1.5546231687063352, "learning_rate": 8.08656186396805e-06, "loss": 0.5951703786849976, "step": 4800 }, { "epoch": 1.1669907632474477, "grad_norm": 1.4921471382466305, "learning_rate": 8.082616847608264e-06, "loss": 0.5569525957107544, "step": 4801 }, { "epoch": 1.1672338356830336, "grad_norm": 1.407467245449497, "learning_rate": 8.07867214101861e-06, "loss": 0.6066544055938721, "step": 4802 }, { "epoch": 1.1674769081186194, "grad_norm": 1.56342293999919, "learning_rate": 8.074727744836397e-06, "loss": 0.6438538432121277, "step": 4803 }, { "epoch": 1.1677199805542051, "grad_norm": 1.4671863661122524, "learning_rate": 8.070783659698872e-06, "loss": 0.5491798520088196, "step": 4804 }, { "epoch": 1.1679630529897909, "grad_norm": 1.471774765922662, "learning_rate": 8.066839886243238e-06, "loss": 0.5227756500244141, "step": 4805 }, { "epoch": 1.1682061254253768, "grad_norm": 1.3479747897135965, "learning_rate": 8.062896425106647e-06, "loss": 0.5302000045776367, "step": 4806 }, { "epoch": 1.1684491978609626, "grad_norm": 1.4685469973862124, "learning_rate": 8.058953276926197e-06, "loss": 0.6025059223175049, "step": 4807 }, { "epoch": 1.1686922702965483, "grad_norm": 1.5270090557884273, "learning_rate": 8.055010442338942e-06, "loss": 0.5213838219642639, "step": 4808 }, { "epoch": 1.168935342732134, "grad_norm": 1.5751129507718553, "learning_rate": 8.05106792198188e-06, "loss": 0.6835908889770508, "step": 4809 }, { "epoch": 1.16917841516772, "grad_norm": 1.7789648092628434, "learning_rate": 8.047125716491958e-06, "loss": 0.6186796426773071, "step": 4810 }, { "epoch": 1.1694214876033058, "grad_norm": 1.4142181428603684, "learning_rate": 8.043183826506078e-06, "loss": 0.5166891813278198, "step": 4811 }, { "epoch": 1.1696645600388915, "grad_norm": 1.3844481158453272, "learning_rate": 8.039242252661083e-06, "loss": 0.47805026173591614, "step": 4812 }, { "epoch": 1.1699076324744775, "grad_norm": 1.4571706783757858, "learning_rate": 8.035300995593772e-06, "loss": 0.5159022808074951, "step": 4813 }, { "epoch": 1.1701507049100632, "grad_norm": 1.6550749721158111, "learning_rate": 8.03136005594089e-06, "loss": 0.6992567181587219, "step": 4814 }, { "epoch": 1.170393777345649, "grad_norm": 1.5431569706276733, "learning_rate": 8.027419434339126e-06, "loss": 0.6660257577896118, "step": 4815 }, { "epoch": 1.1706368497812347, "grad_norm": 1.6426645367584838, "learning_rate": 8.023479131425129e-06, "loss": 0.6175377368927002, "step": 4816 }, { "epoch": 1.1708799222168207, "grad_norm": 1.467644497800832, "learning_rate": 8.019539147835483e-06, "loss": 0.7570216655731201, "step": 4817 }, { "epoch": 1.1711229946524064, "grad_norm": 1.6185097265802202, "learning_rate": 8.01559948420673e-06, "loss": 0.48509207367897034, "step": 4818 }, { "epoch": 1.1713660670879922, "grad_norm": 1.4725395337465756, "learning_rate": 8.011660141175358e-06, "loss": 0.6611501574516296, "step": 4819 }, { "epoch": 1.1716091395235781, "grad_norm": 1.5181025614183106, "learning_rate": 8.007721119377803e-06, "loss": 0.7014344930648804, "step": 4820 }, { "epoch": 1.171852211959164, "grad_norm": 1.5255599132104505, "learning_rate": 8.003782419450448e-06, "loss": 0.5580021739006042, "step": 4821 }, { "epoch": 1.1720952843947496, "grad_norm": 1.4938386356153481, "learning_rate": 7.999844042029624e-06, "loss": 0.5792073011398315, "step": 4822 }, { "epoch": 1.1723383568303354, "grad_norm": 1.6427189752541531, "learning_rate": 7.99590598775161e-06, "loss": 0.4686654210090637, "step": 4823 }, { "epoch": 1.1725814292659211, "grad_norm": 1.3011631463601367, "learning_rate": 7.991968257252634e-06, "loss": 0.5505779981613159, "step": 4824 }, { "epoch": 1.172824501701507, "grad_norm": 1.7867304163551052, "learning_rate": 7.988030851168871e-06, "loss": 0.7306749820709229, "step": 4825 }, { "epoch": 1.1730675741370928, "grad_norm": 1.7086976564110279, "learning_rate": 7.984093770136441e-06, "loss": 0.6742611527442932, "step": 4826 }, { "epoch": 1.1733106465726786, "grad_norm": 1.5513181065020847, "learning_rate": 7.98015701479142e-06, "loss": 0.47669917345046997, "step": 4827 }, { "epoch": 1.1735537190082646, "grad_norm": 1.6375440637964884, "learning_rate": 7.976220585769824e-06, "loss": 0.5777784585952759, "step": 4828 }, { "epoch": 1.1737967914438503, "grad_norm": 1.3705197656693413, "learning_rate": 7.972284483707615e-06, "loss": 0.8215893507003784, "step": 4829 }, { "epoch": 1.174039863879436, "grad_norm": 1.4327838065984964, "learning_rate": 7.968348709240706e-06, "loss": 0.612478494644165, "step": 4830 }, { "epoch": 1.1742829363150218, "grad_norm": 1.4975895563108412, "learning_rate": 7.964413263004957e-06, "loss": 0.5503253936767578, "step": 4831 }, { "epoch": 1.1745260087506078, "grad_norm": 1.239926581460509, "learning_rate": 7.960478145636173e-06, "loss": 0.47599634528160095, "step": 4832 }, { "epoch": 1.1747690811861935, "grad_norm": 1.4006591499459309, "learning_rate": 7.956543357770105e-06, "loss": 0.6257092952728271, "step": 4833 }, { "epoch": 1.1750121536217792, "grad_norm": 1.4810388108130925, "learning_rate": 7.952608900042457e-06, "loss": 0.570937991142273, "step": 4834 }, { "epoch": 1.1752552260573652, "grad_norm": 1.4262697316415716, "learning_rate": 7.948674773088871e-06, "loss": 0.49631378054618835, "step": 4835 }, { "epoch": 1.175498298492951, "grad_norm": 1.4608813145805661, "learning_rate": 7.944740977544944e-06, "loss": 0.5434062480926514, "step": 4836 }, { "epoch": 1.1757413709285367, "grad_norm": 1.4596133461575422, "learning_rate": 7.940807514046211e-06, "loss": 0.5137939453125, "step": 4837 }, { "epoch": 1.1759844433641224, "grad_norm": 1.6354136547573244, "learning_rate": 7.936874383228162e-06, "loss": 0.7691118717193604, "step": 4838 }, { "epoch": 1.1762275157997082, "grad_norm": 1.4792378394214385, "learning_rate": 7.932941585726226e-06, "loss": 0.5664836764335632, "step": 4839 }, { "epoch": 1.1764705882352942, "grad_norm": 1.5451083725955446, "learning_rate": 7.929009122175783e-06, "loss": 0.5267726182937622, "step": 4840 }, { "epoch": 1.17671366067088, "grad_norm": 1.6387054085906634, "learning_rate": 7.925076993212154e-06, "loss": 0.5479843616485596, "step": 4841 }, { "epoch": 1.1769567331064656, "grad_norm": 1.664799451160637, "learning_rate": 7.921145199470612e-06, "loss": 0.8985415697097778, "step": 4842 }, { "epoch": 1.1771998055420516, "grad_norm": 1.5145162311661158, "learning_rate": 7.917213741586368e-06, "loss": 0.6841373443603516, "step": 4843 }, { "epoch": 1.1774428779776374, "grad_norm": 1.3986361382021786, "learning_rate": 7.913282620194592e-06, "loss": 0.7083829641342163, "step": 4844 }, { "epoch": 1.177685950413223, "grad_norm": 1.543015740134499, "learning_rate": 7.909351835930389e-06, "loss": 0.6325552463531494, "step": 4845 }, { "epoch": 1.1779290228488088, "grad_norm": 1.3818727833426965, "learning_rate": 7.905421389428806e-06, "loss": 0.42906248569488525, "step": 4846 }, { "epoch": 1.1781720952843948, "grad_norm": 1.7391776402162182, "learning_rate": 7.901491281324847e-06, "loss": 0.6493852734565735, "step": 4847 }, { "epoch": 1.1784151677199806, "grad_norm": 1.7582934471736167, "learning_rate": 7.897561512253451e-06, "loss": 0.566387951374054, "step": 4848 }, { "epoch": 1.1786582401555663, "grad_norm": 1.6342911157175914, "learning_rate": 7.893632082849512e-06, "loss": 0.6444041132926941, "step": 4849 }, { "epoch": 1.1789013125911523, "grad_norm": 1.7510827588015525, "learning_rate": 7.88970299374786e-06, "loss": 0.6045805811882019, "step": 4850 }, { "epoch": 1.179144385026738, "grad_norm": 1.4894347966367099, "learning_rate": 7.885774245583272e-06, "loss": 0.5804477334022522, "step": 4851 }, { "epoch": 1.1793874574623238, "grad_norm": 1.3177388875461096, "learning_rate": 7.881845838990477e-06, "loss": 0.6141648888587952, "step": 4852 }, { "epoch": 1.1796305298979095, "grad_norm": 1.7181436895355784, "learning_rate": 7.877917774604144e-06, "loss": 0.6743941903114319, "step": 4853 }, { "epoch": 1.1798736023334955, "grad_norm": 1.4470019236355174, "learning_rate": 7.873990053058883e-06, "loss": 0.5142179131507874, "step": 4854 }, { "epoch": 1.1801166747690812, "grad_norm": 1.493718944259176, "learning_rate": 7.870062674989255e-06, "loss": 0.4750288128852844, "step": 4855 }, { "epoch": 1.180359747204667, "grad_norm": 1.6415342954563168, "learning_rate": 7.866135641029759e-06, "loss": 0.4603268802165985, "step": 4856 }, { "epoch": 1.1806028196402527, "grad_norm": 1.5289914294813634, "learning_rate": 7.862208951814846e-06, "loss": 0.582291841506958, "step": 4857 }, { "epoch": 1.1808458920758387, "grad_norm": 1.432005184594248, "learning_rate": 7.8582826079789e-06, "loss": 0.5088425874710083, "step": 4858 }, { "epoch": 1.1810889645114244, "grad_norm": 1.478210775063557, "learning_rate": 7.85435661015626e-06, "loss": 0.594950795173645, "step": 4859 }, { "epoch": 1.1813320369470102, "grad_norm": 1.743694770872368, "learning_rate": 7.85043095898121e-06, "loss": 0.5538040995597839, "step": 4860 }, { "epoch": 1.181575109382596, "grad_norm": 1.6395683609426894, "learning_rate": 7.846505655087972e-06, "loss": 0.6168408393859863, "step": 4861 }, { "epoch": 1.1818181818181819, "grad_norm": 1.5413389234744874, "learning_rate": 7.84258069911071e-06, "loss": 0.5799242854118347, "step": 4862 }, { "epoch": 1.1820612542537676, "grad_norm": 1.8300862332040748, "learning_rate": 7.838656091683537e-06, "loss": 0.5011212229728699, "step": 4863 }, { "epoch": 1.1823043266893534, "grad_norm": 1.4195958517839382, "learning_rate": 7.83473183344051e-06, "loss": 0.6520829796791077, "step": 4864 }, { "epoch": 1.1825473991249393, "grad_norm": 1.6348757140139178, "learning_rate": 7.830807925015624e-06, "loss": 0.6058746576309204, "step": 4865 }, { "epoch": 1.182790471560525, "grad_norm": 1.5905312589246934, "learning_rate": 7.826884367042821e-06, "loss": 0.6177981495857239, "step": 4866 }, { "epoch": 1.1830335439961108, "grad_norm": 1.7832281299472938, "learning_rate": 7.822961160155985e-06, "loss": 0.5191969871520996, "step": 4867 }, { "epoch": 1.1832766164316966, "grad_norm": 1.7311490350693752, "learning_rate": 7.81903830498895e-06, "loss": 0.5595442056655884, "step": 4868 }, { "epoch": 1.1835196888672825, "grad_norm": 1.6093785076335483, "learning_rate": 7.815115802175485e-06, "loss": 0.5879508256912231, "step": 4869 }, { "epoch": 1.1837627613028683, "grad_norm": 1.7580266891696015, "learning_rate": 7.811193652349307e-06, "loss": 0.4778252840042114, "step": 4870 }, { "epoch": 1.184005833738454, "grad_norm": 1.639075357770564, "learning_rate": 7.80727185614407e-06, "loss": 0.680820107460022, "step": 4871 }, { "epoch": 1.1842489061740398, "grad_norm": 1.6578293676013276, "learning_rate": 7.803350414193377e-06, "loss": 0.4367676377296448, "step": 4872 }, { "epoch": 1.1844919786096257, "grad_norm": 1.5603278816507107, "learning_rate": 7.799429327130771e-06, "loss": 0.5991494059562683, "step": 4873 }, { "epoch": 1.1847350510452115, "grad_norm": 1.452458742460692, "learning_rate": 7.795508595589738e-06, "loss": 0.5210568904876709, "step": 4874 }, { "epoch": 1.1849781234807972, "grad_norm": 1.5484852365587256, "learning_rate": 7.791588220203708e-06, "loss": 0.6502312421798706, "step": 4875 }, { "epoch": 1.185221195916383, "grad_norm": 1.4288757613543563, "learning_rate": 7.787668201606046e-06, "loss": 0.5377901196479797, "step": 4876 }, { "epoch": 1.185464268351969, "grad_norm": 1.3284416815526958, "learning_rate": 7.783748540430074e-06, "loss": 0.47621679306030273, "step": 4877 }, { "epoch": 1.1857073407875547, "grad_norm": 1.5842752722497841, "learning_rate": 7.779829237309044e-06, "loss": 0.6297405362129211, "step": 4878 }, { "epoch": 1.1859504132231404, "grad_norm": 1.5149742599587581, "learning_rate": 7.775910292876155e-06, "loss": 0.6323806643486023, "step": 4879 }, { "epoch": 1.1861934856587264, "grad_norm": 1.5503343900981743, "learning_rate": 7.771991707764545e-06, "loss": 0.5537708401679993, "step": 4880 }, { "epoch": 1.1864365580943121, "grad_norm": 1.680558017497662, "learning_rate": 7.768073482607298e-06, "loss": 0.6269676685333252, "step": 4881 }, { "epoch": 1.1866796305298979, "grad_norm": 1.6305825506466893, "learning_rate": 7.764155618037437e-06, "loss": 0.5571316480636597, "step": 4882 }, { "epoch": 1.1869227029654836, "grad_norm": 1.5587318491150142, "learning_rate": 7.76023811468793e-06, "loss": 0.6822373270988464, "step": 4883 }, { "epoch": 1.1871657754010696, "grad_norm": 1.352278552323818, "learning_rate": 7.756320973191675e-06, "loss": 0.5373497009277344, "step": 4884 }, { "epoch": 1.1874088478366553, "grad_norm": 1.576670556918278, "learning_rate": 7.75240419418153e-06, "loss": 0.5442001819610596, "step": 4885 }, { "epoch": 1.187651920272241, "grad_norm": 1.5254342115770407, "learning_rate": 7.748487778290284e-06, "loss": 0.6353763341903687, "step": 4886 }, { "epoch": 1.1878949927078268, "grad_norm": 1.6376125086694888, "learning_rate": 7.744571726150667e-06, "loss": 0.5845664739608765, "step": 4887 }, { "epoch": 1.1881380651434128, "grad_norm": 1.6237633419590836, "learning_rate": 7.740656038395353e-06, "loss": 0.6491991877555847, "step": 4888 }, { "epoch": 1.1883811375789985, "grad_norm": 1.3496358633003107, "learning_rate": 7.736740715656952e-06, "loss": 0.5601421594619751, "step": 4889 }, { "epoch": 1.1886242100145843, "grad_norm": 1.5680521052376066, "learning_rate": 7.732825758568022e-06, "loss": 0.5833006501197815, "step": 4890 }, { "epoch": 1.18886728245017, "grad_norm": 1.525002401231162, "learning_rate": 7.728911167761058e-06, "loss": 0.68497633934021, "step": 4891 }, { "epoch": 1.189110354885756, "grad_norm": 1.5855887695387563, "learning_rate": 7.724996943868495e-06, "loss": 0.5132187008857727, "step": 4892 }, { "epoch": 1.1893534273213417, "grad_norm": 1.832611825281125, "learning_rate": 7.721083087522717e-06, "loss": 0.6981265544891357, "step": 4893 }, { "epoch": 1.1895964997569275, "grad_norm": 1.625608275230516, "learning_rate": 7.717169599356034e-06, "loss": 0.6079316139221191, "step": 4894 }, { "epoch": 1.1898395721925135, "grad_norm": 1.5298491876955669, "learning_rate": 7.713256480000709e-06, "loss": 0.6106953024864197, "step": 4895 }, { "epoch": 1.1900826446280992, "grad_norm": 1.52707518559415, "learning_rate": 7.709343730088938e-06, "loss": 0.5613738894462585, "step": 4896 }, { "epoch": 1.190325717063685, "grad_norm": 1.5474196047016169, "learning_rate": 7.705431350252863e-06, "loss": 0.7108067274093628, "step": 4897 }, { "epoch": 1.1905687894992707, "grad_norm": 1.5757408409694793, "learning_rate": 7.70151934112456e-06, "loss": 0.6219706535339355, "step": 4898 }, { "epoch": 1.1908118619348567, "grad_norm": 1.512422847719208, "learning_rate": 7.697607703336052e-06, "loss": 0.5863091349601746, "step": 4899 }, { "epoch": 1.1910549343704424, "grad_norm": 1.6385594269343124, "learning_rate": 7.693696437519295e-06, "loss": 0.5579441785812378, "step": 4900 }, { "epoch": 1.1912980068060282, "grad_norm": 1.6035550116630668, "learning_rate": 7.689785544306194e-06, "loss": 0.5622843503952026, "step": 4901 }, { "epoch": 1.1915410792416141, "grad_norm": 1.443198798882003, "learning_rate": 7.685875024328584e-06, "loss": 0.5701436996459961, "step": 4902 }, { "epoch": 1.1917841516771999, "grad_norm": 1.766330183288853, "learning_rate": 7.681964878218244e-06, "loss": 0.5547758340835571, "step": 4903 }, { "epoch": 1.1920272241127856, "grad_norm": 1.6994763258641468, "learning_rate": 7.678055106606895e-06, "loss": 0.6367729306221008, "step": 4904 }, { "epoch": 1.1922702965483714, "grad_norm": 1.7540842529605194, "learning_rate": 7.674145710126193e-06, "loss": 0.644981861114502, "step": 4905 }, { "epoch": 1.192513368983957, "grad_norm": 1.5029209449098935, "learning_rate": 7.670236689407736e-06, "loss": 0.6859593987464905, "step": 4906 }, { "epoch": 1.192756441419543, "grad_norm": 1.4881719754741651, "learning_rate": 7.666328045083057e-06, "loss": 0.6803637742996216, "step": 4907 }, { "epoch": 1.1929995138551288, "grad_norm": 2.087517323689865, "learning_rate": 7.66241977778364e-06, "loss": 0.5910333395004272, "step": 4908 }, { "epoch": 1.1932425862907146, "grad_norm": 1.4173326377380724, "learning_rate": 7.65851188814089e-06, "loss": 0.6718213558197021, "step": 4909 }, { "epoch": 1.1934856587263005, "grad_norm": 1.5649779536994368, "learning_rate": 7.654604376786173e-06, "loss": 0.5856678485870361, "step": 4910 }, { "epoch": 1.1937287311618863, "grad_norm": 1.5600995703483875, "learning_rate": 7.650697244350772e-06, "loss": 0.647980809211731, "step": 4911 }, { "epoch": 1.193971803597472, "grad_norm": 1.4952984368329787, "learning_rate": 7.646790491465921e-06, "loss": 0.5500811338424683, "step": 4912 }, { "epoch": 1.1942148760330578, "grad_norm": 1.828168668384687, "learning_rate": 7.642884118762795e-06, "loss": 0.49470263719558716, "step": 4913 }, { "epoch": 1.1944579484686437, "grad_norm": 1.5342386305942206, "learning_rate": 7.638978126872495e-06, "loss": 0.6132679581642151, "step": 4914 }, { "epoch": 1.1947010209042295, "grad_norm": 1.552200717871224, "learning_rate": 7.635072516426077e-06, "loss": 0.4827370345592499, "step": 4915 }, { "epoch": 1.1949440933398152, "grad_norm": 1.5012606963414372, "learning_rate": 7.63116728805452e-06, "loss": 0.5980871319770813, "step": 4916 }, { "epoch": 1.1951871657754012, "grad_norm": 1.4539260320920817, "learning_rate": 7.627262442388747e-06, "loss": 0.716368556022644, "step": 4917 }, { "epoch": 1.195430238210987, "grad_norm": 1.1181850343489101, "learning_rate": 7.623357980059627e-06, "loss": 0.5711433291435242, "step": 4918 }, { "epoch": 1.1956733106465727, "grad_norm": 1.597187873711819, "learning_rate": 7.619453901697958e-06, "loss": 0.5147548317909241, "step": 4919 }, { "epoch": 1.1959163830821584, "grad_norm": 1.4148806512996048, "learning_rate": 7.6155502079344754e-06, "loss": 0.645781934261322, "step": 4920 }, { "epoch": 1.1961594555177442, "grad_norm": 1.6800565808190788, "learning_rate": 7.611646899399858e-06, "loss": 0.7296221256256104, "step": 4921 }, { "epoch": 1.1964025279533301, "grad_norm": 1.530335601615665, "learning_rate": 7.60774397672472e-06, "loss": 0.5158886313438416, "step": 4922 }, { "epoch": 1.1966456003889159, "grad_norm": 1.58364286563554, "learning_rate": 7.60384144053961e-06, "loss": 0.5373148918151855, "step": 4923 }, { "epoch": 1.1968886728245016, "grad_norm": 1.548837870486113, "learning_rate": 7.599939291475019e-06, "loss": 0.6459131836891174, "step": 4924 }, { "epoch": 1.1971317452600876, "grad_norm": 1.5270803299387248, "learning_rate": 7.596037530161371e-06, "loss": 0.5252430438995361, "step": 4925 }, { "epoch": 1.1973748176956733, "grad_norm": 1.7062726809249835, "learning_rate": 7.5921361572290355e-06, "loss": 0.5674902200698853, "step": 4926 }, { "epoch": 1.197617890131259, "grad_norm": 1.7639372588629385, "learning_rate": 7.588235173308309e-06, "loss": 0.7147097587585449, "step": 4927 }, { "epoch": 1.1978609625668448, "grad_norm": 1.6400871432980229, "learning_rate": 7.584334579029431e-06, "loss": 0.6076751947402954, "step": 4928 }, { "epoch": 1.1981040350024308, "grad_norm": 1.514832527568693, "learning_rate": 7.580434375022579e-06, "loss": 0.4698092043399811, "step": 4929 }, { "epoch": 1.1983471074380165, "grad_norm": 1.674424265033735, "learning_rate": 7.576534561917861e-06, "loss": 0.6781505346298218, "step": 4930 }, { "epoch": 1.1985901798736023, "grad_norm": 1.4443206922162157, "learning_rate": 7.57263514034533e-06, "loss": 0.5608323812484741, "step": 4931 }, { "epoch": 1.1988332523091882, "grad_norm": 1.4757603132339168, "learning_rate": 7.56873611093497e-06, "loss": 0.6274864673614502, "step": 4932 }, { "epoch": 1.199076324744774, "grad_norm": 1.5104450121424888, "learning_rate": 7.5648374743167e-06, "loss": 0.7501145005226135, "step": 4933 }, { "epoch": 1.1993193971803597, "grad_norm": 1.646907499366969, "learning_rate": 7.560939231120389e-06, "loss": 0.6545113325119019, "step": 4934 }, { "epoch": 1.1995624696159455, "grad_norm": 1.5790986230831685, "learning_rate": 7.557041381975825e-06, "loss": 0.48818638920783997, "step": 4935 }, { "epoch": 1.1998055420515314, "grad_norm": 1.7950741057511035, "learning_rate": 7.553143927512741e-06, "loss": 0.7200618982315063, "step": 4936 }, { "epoch": 1.2000486144871172, "grad_norm": 1.4996722196941488, "learning_rate": 7.5492468683608045e-06, "loss": 0.633243203163147, "step": 4937 }, { "epoch": 1.200291686922703, "grad_norm": 1.5513486678975539, "learning_rate": 7.545350205149622e-06, "loss": 0.7572793960571289, "step": 4938 }, { "epoch": 1.2005347593582887, "grad_norm": 1.9792062862712654, "learning_rate": 7.5414539385087315e-06, "loss": 0.7224884033203125, "step": 4939 }, { "epoch": 1.2007778317938747, "grad_norm": 1.3380833990494214, "learning_rate": 7.537558069067611e-06, "loss": 0.4477558732032776, "step": 4940 }, { "epoch": 1.2010209042294604, "grad_norm": 1.5670040390583304, "learning_rate": 7.533662597455667e-06, "loss": 0.5090813040733337, "step": 4941 }, { "epoch": 1.2012639766650461, "grad_norm": 1.5195533157677, "learning_rate": 7.529767524302256e-06, "loss": 0.5036583542823792, "step": 4942 }, { "epoch": 1.2015070491006319, "grad_norm": 1.8010204510597778, "learning_rate": 7.525872850236658e-06, "loss": 0.5949540734291077, "step": 4943 }, { "epoch": 1.2017501215362179, "grad_norm": 1.5721520018409454, "learning_rate": 7.521978575888094e-06, "loss": 0.4745265245437622, "step": 4944 }, { "epoch": 1.2019931939718036, "grad_norm": 1.4198967212778266, "learning_rate": 7.518084701885711e-06, "loss": 0.541222333908081, "step": 4945 }, { "epoch": 1.2022362664073893, "grad_norm": 1.5915655339183543, "learning_rate": 7.514191228858605e-06, "loss": 0.6327613592147827, "step": 4946 }, { "epoch": 1.2024793388429753, "grad_norm": 1.2694286514912012, "learning_rate": 7.510298157435799e-06, "loss": 0.5555896759033203, "step": 4947 }, { "epoch": 1.202722411278561, "grad_norm": 1.4619282985852524, "learning_rate": 7.506405488246252e-06, "loss": 0.5732243061065674, "step": 4948 }, { "epoch": 1.2029654837141468, "grad_norm": 1.9453674973847137, "learning_rate": 7.5025132219188615e-06, "loss": 0.6436343193054199, "step": 4949 }, { "epoch": 1.2032085561497325, "grad_norm": 1.594788667641881, "learning_rate": 7.498621359082451e-06, "loss": 0.5252099633216858, "step": 4950 }, { "epoch": 1.2034516285853185, "grad_norm": 1.4741336539494942, "learning_rate": 7.494729900365794e-06, "loss": 0.5066309571266174, "step": 4951 }, { "epoch": 1.2036947010209043, "grad_norm": 1.6366668023062447, "learning_rate": 7.4908388463975854e-06, "loss": 0.6171112656593323, "step": 4952 }, { "epoch": 1.20393777345649, "grad_norm": 1.6681083315923257, "learning_rate": 7.486948197806463e-06, "loss": 0.6105493307113647, "step": 4953 }, { "epoch": 1.2041808458920757, "grad_norm": 1.6436708911752342, "learning_rate": 7.4830579552209905e-06, "loss": 0.5322132110595703, "step": 4954 }, { "epoch": 1.2044239183276617, "grad_norm": 1.636189022866746, "learning_rate": 7.479168119269672e-06, "loss": 0.5286136865615845, "step": 4955 }, { "epoch": 1.2046669907632475, "grad_norm": 1.5214058682248017, "learning_rate": 7.475278690580945e-06, "loss": 0.5784083008766174, "step": 4956 }, { "epoch": 1.2049100631988332, "grad_norm": 1.442929715897689, "learning_rate": 7.471389669783183e-06, "loss": 0.5855112075805664, "step": 4957 }, { "epoch": 1.205153135634419, "grad_norm": 1.3983965168697619, "learning_rate": 7.4675010575046826e-06, "loss": 0.5427704453468323, "step": 4958 }, { "epoch": 1.205396208070005, "grad_norm": 1.5348475624039808, "learning_rate": 7.463612854373696e-06, "loss": 0.5863771438598633, "step": 4959 }, { "epoch": 1.2056392805055907, "grad_norm": 1.7204228284397531, "learning_rate": 7.4597250610183926e-06, "loss": 0.6047582626342773, "step": 4960 }, { "epoch": 1.2058823529411764, "grad_norm": 1.4954068986158189, "learning_rate": 7.455837678066877e-06, "loss": 0.533190131187439, "step": 4961 }, { "epoch": 1.2061254253767624, "grad_norm": 1.5927400041593736, "learning_rate": 7.451950706147193e-06, "loss": 0.6185314059257507, "step": 4962 }, { "epoch": 1.2063684978123481, "grad_norm": 1.644577361456067, "learning_rate": 7.448064145887315e-06, "loss": 0.5343207716941833, "step": 4963 }, { "epoch": 1.2066115702479339, "grad_norm": 1.7408760388598985, "learning_rate": 7.4441779979151475e-06, "loss": 0.5832515358924866, "step": 4964 }, { "epoch": 1.2068546426835196, "grad_norm": 1.359566808709789, "learning_rate": 7.440292262858533e-06, "loss": 0.5758440494537354, "step": 4965 }, { "epoch": 1.2070977151191056, "grad_norm": 1.6406183706345407, "learning_rate": 7.436406941345245e-06, "loss": 0.5440002679824829, "step": 4966 }, { "epoch": 1.2073407875546913, "grad_norm": 1.6218858002524639, "learning_rate": 7.432522034002996e-06, "loss": 0.5288457870483398, "step": 4967 }, { "epoch": 1.207583859990277, "grad_norm": 1.6189988250929284, "learning_rate": 7.428637541459426e-06, "loss": 0.5665003061294556, "step": 4968 }, { "epoch": 1.207826932425863, "grad_norm": 1.4736692708372576, "learning_rate": 7.4247534643421075e-06, "loss": 0.5310592651367188, "step": 4969 }, { "epoch": 1.2080700048614488, "grad_norm": 1.5168137533203427, "learning_rate": 7.420869803278548e-06, "loss": 0.6110148429870605, "step": 4970 }, { "epoch": 1.2083130772970345, "grad_norm": 1.3971240015194746, "learning_rate": 7.416986558896186e-06, "loss": 0.6831387877464294, "step": 4971 }, { "epoch": 1.2085561497326203, "grad_norm": 1.2650720366810364, "learning_rate": 7.4131037318223965e-06, "loss": 0.5337626934051514, "step": 4972 }, { "epoch": 1.208799222168206, "grad_norm": 1.6355068630385106, "learning_rate": 7.409221322684481e-06, "loss": 0.6973584890365601, "step": 4973 }, { "epoch": 1.209042294603792, "grad_norm": 1.505083269840388, "learning_rate": 7.405339332109673e-06, "loss": 0.597942590713501, "step": 4974 }, { "epoch": 1.2092853670393777, "grad_norm": 1.6589686682182256, "learning_rate": 7.401457760725154e-06, "loss": 0.5817058682441711, "step": 4975 }, { "epoch": 1.2095284394749635, "grad_norm": 1.5578024493672409, "learning_rate": 7.3975766091580185e-06, "loss": 0.522831916809082, "step": 4976 }, { "epoch": 1.2097715119105494, "grad_norm": 1.643471339307008, "learning_rate": 7.393695878035301e-06, "loss": 0.4770813286304474, "step": 4977 }, { "epoch": 1.2100145843461352, "grad_norm": 1.548944625098344, "learning_rate": 7.3898155679839704e-06, "loss": 0.4680410623550415, "step": 4978 }, { "epoch": 1.210257656781721, "grad_norm": 1.4132897778953382, "learning_rate": 7.385935679630922e-06, "loss": 0.7359215617179871, "step": 4979 }, { "epoch": 1.2105007292173067, "grad_norm": 1.7271306403219555, "learning_rate": 7.382056213602987e-06, "loss": 0.509162187576294, "step": 4980 }, { "epoch": 1.2107438016528926, "grad_norm": 1.7474222074332955, "learning_rate": 7.378177170526928e-06, "loss": 0.6609405279159546, "step": 4981 }, { "epoch": 1.2109868740884784, "grad_norm": 1.5992060016586285, "learning_rate": 7.374298551029436e-06, "loss": 0.5632736682891846, "step": 4982 }, { "epoch": 1.2112299465240641, "grad_norm": 1.3213814264345083, "learning_rate": 7.37042035573714e-06, "loss": 0.5102826356887817, "step": 4983 }, { "epoch": 1.21147301895965, "grad_norm": 1.502060846130976, "learning_rate": 7.3665425852765955e-06, "loss": 0.5287219882011414, "step": 4984 }, { "epoch": 1.2117160913952358, "grad_norm": 1.4384131529984594, "learning_rate": 7.3626652402742895e-06, "loss": 0.5245919227600098, "step": 4985 }, { "epoch": 1.2119591638308216, "grad_norm": 1.462791901738144, "learning_rate": 7.358788321356642e-06, "loss": 0.6382732391357422, "step": 4986 }, { "epoch": 1.2122022362664073, "grad_norm": 1.3717754394252457, "learning_rate": 7.354911829150002e-06, "loss": 0.5719157457351685, "step": 4987 }, { "epoch": 1.212445308701993, "grad_norm": 1.4547794666285396, "learning_rate": 7.351035764280652e-06, "loss": 0.6063718795776367, "step": 4988 }, { "epoch": 1.212688381137579, "grad_norm": 1.4432477589540718, "learning_rate": 7.347160127374804e-06, "loss": 0.7708503007888794, "step": 4989 }, { "epoch": 1.2129314535731648, "grad_norm": 1.4799498326771747, "learning_rate": 7.343284919058602e-06, "loss": 0.6058822274208069, "step": 4990 }, { "epoch": 1.2131745260087505, "grad_norm": 1.4694613405051007, "learning_rate": 7.339410139958117e-06, "loss": 0.5392019748687744, "step": 4991 }, { "epoch": 1.2134175984443365, "grad_norm": 2.0910293586383495, "learning_rate": 7.3355357906993595e-06, "loss": 0.5818665027618408, "step": 4992 }, { "epoch": 1.2136606708799222, "grad_norm": 1.4794385405674673, "learning_rate": 7.33166187190826e-06, "loss": 0.6233386993408203, "step": 4993 }, { "epoch": 1.213903743315508, "grad_norm": 1.5184653897357576, "learning_rate": 7.327788384210688e-06, "loss": 0.6568725109100342, "step": 4994 }, { "epoch": 1.2141468157510937, "grad_norm": 1.4671045659588708, "learning_rate": 7.323915328232435e-06, "loss": 0.46349895000457764, "step": 4995 }, { "epoch": 1.2143898881866797, "grad_norm": 1.5209189737222704, "learning_rate": 7.32004270459923e-06, "loss": 0.5898757576942444, "step": 4996 }, { "epoch": 1.2146329606222654, "grad_norm": 1.67764040925442, "learning_rate": 7.31617051393673e-06, "loss": 0.6229387521743774, "step": 4997 }, { "epoch": 1.2148760330578512, "grad_norm": 1.307211192340005, "learning_rate": 7.31229875687052e-06, "loss": 0.3836592435836792, "step": 4998 }, { "epoch": 1.2151191054934372, "grad_norm": 1.6873654369000421, "learning_rate": 7.308427434026115e-06, "loss": 0.6278901100158691, "step": 4999 }, { "epoch": 1.215362177929023, "grad_norm": 1.3979812422763662, "learning_rate": 7.304556546028964e-06, "loss": 0.5987281799316406, "step": 5000 }, { "epoch": 1.2156052503646086, "grad_norm": 1.571834335026013, "learning_rate": 7.300686093504444e-06, "loss": 0.7663929462432861, "step": 5001 }, { "epoch": 1.2158483228001944, "grad_norm": 1.6311683525560923, "learning_rate": 7.296816077077859e-06, "loss": 0.6058510541915894, "step": 5002 }, { "epoch": 1.2160913952357804, "grad_norm": 1.8587503687170368, "learning_rate": 7.292946497374443e-06, "loss": 0.6181536912918091, "step": 5003 }, { "epoch": 1.216334467671366, "grad_norm": 1.536567913329273, "learning_rate": 7.289077355019363e-06, "loss": 0.5061658620834351, "step": 5004 }, { "epoch": 1.2165775401069518, "grad_norm": 1.654672354811922, "learning_rate": 7.285208650637712e-06, "loss": 0.6444135904312134, "step": 5005 }, { "epoch": 1.2168206125425376, "grad_norm": 1.8318992938032403, "learning_rate": 7.281340384854514e-06, "loss": 0.5691784024238586, "step": 5006 }, { "epoch": 1.2170636849781236, "grad_norm": 1.6098266054165045, "learning_rate": 7.277472558294717e-06, "loss": 0.5011687278747559, "step": 5007 }, { "epoch": 1.2173067574137093, "grad_norm": 1.5961046414011997, "learning_rate": 7.273605171583209e-06, "loss": 0.5113722085952759, "step": 5008 }, { "epoch": 1.217549829849295, "grad_norm": 1.4294167339814108, "learning_rate": 7.269738225344797e-06, "loss": 0.4831927418708801, "step": 5009 }, { "epoch": 1.2177929022848808, "grad_norm": 1.4631733876467616, "learning_rate": 7.2658717202042206e-06, "loss": 0.5700019001960754, "step": 5010 }, { "epoch": 1.2180359747204668, "grad_norm": 1.467592896319532, "learning_rate": 7.2620056567861485e-06, "loss": 0.6451202630996704, "step": 5011 }, { "epoch": 1.2182790471560525, "grad_norm": 1.3379200724848248, "learning_rate": 7.258140035715176e-06, "loss": 0.5391086935997009, "step": 5012 }, { "epoch": 1.2185221195916383, "grad_norm": 1.5098920518580272, "learning_rate": 7.2542748576158284e-06, "loss": 0.47917822003364563, "step": 5013 }, { "epoch": 1.2187651920272242, "grad_norm": 1.7044085445246517, "learning_rate": 7.250410123112561e-06, "loss": 0.5366750955581665, "step": 5014 }, { "epoch": 1.21900826446281, "grad_norm": 1.5653878947368969, "learning_rate": 7.246545832829752e-06, "loss": 0.6647883653640747, "step": 5015 }, { "epoch": 1.2192513368983957, "grad_norm": 1.7965902563585716, "learning_rate": 7.242681987391716e-06, "loss": 0.549064040184021, "step": 5016 }, { "epoch": 1.2194944093339815, "grad_norm": 1.7420795306735724, "learning_rate": 7.238818587422689e-06, "loss": 0.47400325536727905, "step": 5017 }, { "epoch": 1.2197374817695674, "grad_norm": 1.5262719700471317, "learning_rate": 7.234955633546838e-06, "loss": 0.48885732889175415, "step": 5018 }, { "epoch": 1.2199805542051532, "grad_norm": 1.6903019916900601, "learning_rate": 7.231093126388255e-06, "loss": 0.5019851922988892, "step": 5019 }, { "epoch": 1.220223626640739, "grad_norm": 1.6551752518418463, "learning_rate": 7.227231066570967e-06, "loss": 0.4514006972312927, "step": 5020 }, { "epoch": 1.2204666990763247, "grad_norm": 1.6454426796873416, "learning_rate": 7.223369454718918e-06, "loss": 0.5587257146835327, "step": 5021 }, { "epoch": 1.2207097715119106, "grad_norm": 1.3060320122413793, "learning_rate": 7.219508291455989e-06, "loss": 0.556341290473938, "step": 5022 }, { "epoch": 1.2209528439474964, "grad_norm": 1.5250212511789083, "learning_rate": 7.215647577405982e-06, "loss": 0.3823384642601013, "step": 5023 }, { "epoch": 1.2211959163830821, "grad_norm": 1.451916234629867, "learning_rate": 7.2117873131926316e-06, "loss": 0.6696385741233826, "step": 5024 }, { "epoch": 1.2214389888186679, "grad_norm": 1.8022148382890182, "learning_rate": 7.207927499439599e-06, "loss": 0.6369652152061462, "step": 5025 }, { "epoch": 1.2216820612542538, "grad_norm": 1.4961971226973099, "learning_rate": 7.204068136770468e-06, "loss": 0.744279146194458, "step": 5026 }, { "epoch": 1.2219251336898396, "grad_norm": 1.6272093561196053, "learning_rate": 7.2002092258087555e-06, "loss": 0.4712027311325073, "step": 5027 }, { "epoch": 1.2221682061254253, "grad_norm": 1.7882726049346729, "learning_rate": 7.1963507671779e-06, "loss": 0.4833163321018219, "step": 5028 }, { "epoch": 1.2224112785610113, "grad_norm": 1.7768814950519791, "learning_rate": 7.19249276150127e-06, "loss": 0.4917518198490143, "step": 5029 }, { "epoch": 1.222654350996597, "grad_norm": 1.3994679038323514, "learning_rate": 7.188635209402161e-06, "loss": 0.4768206775188446, "step": 5030 }, { "epoch": 1.2228974234321828, "grad_norm": 1.577264401412888, "learning_rate": 7.184778111503794e-06, "loss": 0.6296635866165161, "step": 5031 }, { "epoch": 1.2231404958677685, "grad_norm": 1.5943103199670094, "learning_rate": 7.180921468429314e-06, "loss": 0.6231416463851929, "step": 5032 }, { "epoch": 1.2233835683033545, "grad_norm": 1.4710250984039548, "learning_rate": 7.1770652808018045e-06, "loss": 0.4463982582092285, "step": 5033 }, { "epoch": 1.2236266407389402, "grad_norm": 1.6262963522040923, "learning_rate": 7.173209549244259e-06, "loss": 0.6192830801010132, "step": 5034 }, { "epoch": 1.223869713174526, "grad_norm": 1.4305126040919165, "learning_rate": 7.169354274379607e-06, "loss": 0.4397585391998291, "step": 5035 }, { "epoch": 1.2241127856101117, "grad_norm": 1.690756603258714, "learning_rate": 7.165499456830702e-06, "loss": 0.5959782600402832, "step": 5036 }, { "epoch": 1.2243558580456977, "grad_norm": 1.644746996237651, "learning_rate": 7.161645097220324e-06, "loss": 0.5379733443260193, "step": 5037 }, { "epoch": 1.2245989304812834, "grad_norm": 1.7076461420913462, "learning_rate": 7.157791196171179e-06, "loss": 0.6368643641471863, "step": 5038 }, { "epoch": 1.2248420029168692, "grad_norm": 1.7023442836408627, "learning_rate": 7.153937754305897e-06, "loss": 0.4887056350708008, "step": 5039 }, { "epoch": 1.225085075352455, "grad_norm": 1.5822764416729511, "learning_rate": 7.150084772247034e-06, "loss": 0.5979699492454529, "step": 5040 }, { "epoch": 1.2253281477880409, "grad_norm": 1.8013170726279508, "learning_rate": 7.14623225061708e-06, "loss": 0.5148475170135498, "step": 5041 }, { "epoch": 1.2255712202236266, "grad_norm": 1.762397594884685, "learning_rate": 7.142380190038441e-06, "loss": 0.5718818306922913, "step": 5042 }, { "epoch": 1.2258142926592124, "grad_norm": 1.7044106452376084, "learning_rate": 7.138528591133447e-06, "loss": 0.5408533811569214, "step": 5043 }, { "epoch": 1.2260573650947983, "grad_norm": 1.4532564344047547, "learning_rate": 7.134677454524361e-06, "loss": 0.44172221422195435, "step": 5044 }, { "epoch": 1.226300437530384, "grad_norm": 1.5088095990738732, "learning_rate": 7.130826780833368e-06, "loss": 0.5026198625564575, "step": 5045 }, { "epoch": 1.2265435099659698, "grad_norm": 1.512206981733543, "learning_rate": 7.12697657068258e-06, "loss": 0.5710346698760986, "step": 5046 }, { "epoch": 1.2267865824015556, "grad_norm": 1.835302488980966, "learning_rate": 7.123126824694029e-06, "loss": 0.609849214553833, "step": 5047 }, { "epoch": 1.2270296548371415, "grad_norm": 1.6858569022200272, "learning_rate": 7.119277543489671e-06, "loss": 0.6825125217437744, "step": 5048 }, { "epoch": 1.2272727272727273, "grad_norm": 1.524155392758966, "learning_rate": 7.115428727691403e-06, "loss": 0.5204361081123352, "step": 5049 }, { "epoch": 1.227515799708313, "grad_norm": 1.4415727228368318, "learning_rate": 7.111580377921028e-06, "loss": 0.5916916131973267, "step": 5050 }, { "epoch": 1.227758872143899, "grad_norm": 1.6996461062269934, "learning_rate": 7.107732494800283e-06, "loss": 0.5168114900588989, "step": 5051 }, { "epoch": 1.2280019445794847, "grad_norm": 1.6229859070623074, "learning_rate": 7.103885078950828e-06, "loss": 0.5193149447441101, "step": 5052 }, { "epoch": 1.2282450170150705, "grad_norm": 1.4653897210290923, "learning_rate": 7.100038130994242e-06, "loss": 0.7705503702163696, "step": 5053 }, { "epoch": 1.2284880894506562, "grad_norm": 1.6068570845506613, "learning_rate": 7.096191651552037e-06, "loss": 0.5709680318832397, "step": 5054 }, { "epoch": 1.228731161886242, "grad_norm": 1.8122901299900276, "learning_rate": 7.092345641245646e-06, "loss": 0.6425989866256714, "step": 5055 }, { "epoch": 1.228974234321828, "grad_norm": 1.650689950854493, "learning_rate": 7.08850010069642e-06, "loss": 0.5418571829795837, "step": 5056 }, { "epoch": 1.2292173067574137, "grad_norm": 1.5576895983811985, "learning_rate": 7.084655030525649e-06, "loss": 0.7092200517654419, "step": 5057 }, { "epoch": 1.2294603791929994, "grad_norm": 1.4880773251413189, "learning_rate": 7.080810431354534e-06, "loss": 0.6460973024368286, "step": 5058 }, { "epoch": 1.2297034516285854, "grad_norm": 1.4749135445599242, "learning_rate": 7.076966303804205e-06, "loss": 0.6231157779693604, "step": 5059 }, { "epoch": 1.2299465240641712, "grad_norm": 1.4512124897870886, "learning_rate": 7.073122648495712e-06, "loss": 0.5557054281234741, "step": 5060 }, { "epoch": 1.230189596499757, "grad_norm": 1.4493783719353277, "learning_rate": 7.069279466050035e-06, "loss": 0.5868064761161804, "step": 5061 }, { "epoch": 1.2304326689353426, "grad_norm": 1.5161859553322412, "learning_rate": 7.06543675708807e-06, "loss": 0.4936080873012543, "step": 5062 }, { "epoch": 1.2306757413709286, "grad_norm": 1.6289974251592259, "learning_rate": 7.061594522230642e-06, "loss": 0.5217006206512451, "step": 5063 }, { "epoch": 1.2309188138065144, "grad_norm": 1.5653866759115076, "learning_rate": 7.057752762098497e-06, "loss": 0.5325466394424438, "step": 5064 }, { "epoch": 1.2311618862421, "grad_norm": 1.7287258540907364, "learning_rate": 7.053911477312305e-06, "loss": 0.5527167320251465, "step": 5065 }, { "epoch": 1.231404958677686, "grad_norm": 1.4745584590788376, "learning_rate": 7.0500706684926635e-06, "loss": 0.5622933506965637, "step": 5066 }, { "epoch": 1.2316480311132718, "grad_norm": 1.6000543186199097, "learning_rate": 7.046230336260085e-06, "loss": 0.5650856494903564, "step": 5067 }, { "epoch": 1.2318911035488576, "grad_norm": 1.3863814187226033, "learning_rate": 7.042390481235011e-06, "loss": 0.5050387382507324, "step": 5068 }, { "epoch": 1.2321341759844433, "grad_norm": 2.124557656615522, "learning_rate": 7.0385511040378026e-06, "loss": 0.4437069296836853, "step": 5069 }, { "epoch": 1.232377248420029, "grad_norm": 1.3689080348871556, "learning_rate": 7.0347122052887475e-06, "loss": 0.4334724545478821, "step": 5070 }, { "epoch": 1.232620320855615, "grad_norm": 1.395930822752634, "learning_rate": 7.030873785608051e-06, "loss": 0.36663031578063965, "step": 5071 }, { "epoch": 1.2328633932912008, "grad_norm": 1.4150577473015744, "learning_rate": 7.027035845615844e-06, "loss": 0.6525026559829712, "step": 5072 }, { "epoch": 1.2331064657267865, "grad_norm": 1.9112842350572428, "learning_rate": 7.023198385932174e-06, "loss": 0.6577969193458557, "step": 5073 }, { "epoch": 1.2333495381623725, "grad_norm": 2.1816766877821725, "learning_rate": 7.019361407177027e-06, "loss": 0.8510502576828003, "step": 5074 }, { "epoch": 1.2335926105979582, "grad_norm": 1.8867304198255503, "learning_rate": 7.0155249099702945e-06, "loss": 0.4732987582683563, "step": 5075 }, { "epoch": 1.233835683033544, "grad_norm": 1.7319274191963967, "learning_rate": 7.0116888949318e-06, "loss": 0.574127197265625, "step": 5076 }, { "epoch": 1.2340787554691297, "grad_norm": 1.6479862083423087, "learning_rate": 7.007853362681282e-06, "loss": 0.568663477897644, "step": 5077 }, { "epoch": 1.2343218279047157, "grad_norm": 1.5986151584650508, "learning_rate": 7.004018313838405e-06, "loss": 0.5647599101066589, "step": 5078 }, { "epoch": 1.2345649003403014, "grad_norm": 1.6270198219566563, "learning_rate": 7.000183749022758e-06, "loss": 0.5390947461128235, "step": 5079 }, { "epoch": 1.2348079727758872, "grad_norm": 1.8332504810426762, "learning_rate": 6.9963496688538435e-06, "loss": 0.656854510307312, "step": 5080 }, { "epoch": 1.2350510452114731, "grad_norm": 1.511042045119633, "learning_rate": 6.992516073951094e-06, "loss": 0.5251984596252441, "step": 5081 }, { "epoch": 1.2352941176470589, "grad_norm": 1.4288994341973251, "learning_rate": 6.988682964933862e-06, "loss": 0.6948416233062744, "step": 5082 }, { "epoch": 1.2355371900826446, "grad_norm": 1.7692742185971808, "learning_rate": 6.984850342421416e-06, "loss": 0.5903031229972839, "step": 5083 }, { "epoch": 1.2357802625182304, "grad_norm": 1.8166990791996065, "learning_rate": 6.981018207032955e-06, "loss": 0.5935291647911072, "step": 5084 }, { "epoch": 1.2360233349538163, "grad_norm": 1.8113467876262892, "learning_rate": 6.9771865593875895e-06, "loss": 0.5864508152008057, "step": 5085 }, { "epoch": 1.236266407389402, "grad_norm": 1.491876924520483, "learning_rate": 6.97335540010436e-06, "loss": 0.474734365940094, "step": 5086 }, { "epoch": 1.2365094798249878, "grad_norm": 1.6730541242922465, "learning_rate": 6.969524729802221e-06, "loss": 0.5516210794448853, "step": 5087 }, { "epoch": 1.2367525522605736, "grad_norm": 1.4485972023371148, "learning_rate": 6.965694549100051e-06, "loss": 0.6032391786575317, "step": 5088 }, { "epoch": 1.2369956246961595, "grad_norm": 1.5553988847777223, "learning_rate": 6.961864858616649e-06, "loss": 0.6772916316986084, "step": 5089 }, { "epoch": 1.2372386971317453, "grad_norm": 1.5560858238257917, "learning_rate": 6.958035658970738e-06, "loss": 0.650252640247345, "step": 5090 }, { "epoch": 1.237481769567331, "grad_norm": 1.3009483039749163, "learning_rate": 6.9542069507809596e-06, "loss": 0.538640022277832, "step": 5091 }, { "epoch": 1.2377248420029168, "grad_norm": 1.7038796343115854, "learning_rate": 6.950378734665873e-06, "loss": 0.48319417238235474, "step": 5092 }, { "epoch": 1.2379679144385027, "grad_norm": 1.503653098980973, "learning_rate": 6.946551011243958e-06, "loss": 0.5719033479690552, "step": 5093 }, { "epoch": 1.2382109868740885, "grad_norm": 1.5195510603085296, "learning_rate": 6.9427237811336205e-06, "loss": 0.5442984700202942, "step": 5094 }, { "epoch": 1.2384540593096742, "grad_norm": 1.6588864538034735, "learning_rate": 6.938897044953184e-06, "loss": 0.7082314491271973, "step": 5095 }, { "epoch": 1.2386971317452602, "grad_norm": 1.46182629583885, "learning_rate": 6.9350708033208865e-06, "loss": 0.581182599067688, "step": 5096 }, { "epoch": 1.238940204180846, "grad_norm": 1.492567091209306, "learning_rate": 6.931245056854894e-06, "loss": 0.5824824571609497, "step": 5097 }, { "epoch": 1.2391832766164317, "grad_norm": 1.506551246703069, "learning_rate": 6.927419806173292e-06, "loss": 0.5301380157470703, "step": 5098 }, { "epoch": 1.2394263490520174, "grad_norm": 1.6935306568081148, "learning_rate": 6.9235950518940806e-06, "loss": 0.5549139976501465, "step": 5099 }, { "epoch": 1.2396694214876034, "grad_norm": 1.3893341541561322, "learning_rate": 6.919770794635184e-06, "loss": 0.5655152797698975, "step": 5100 }, { "epoch": 1.2399124939231891, "grad_norm": 1.377578045991816, "learning_rate": 6.915947035014443e-06, "loss": 0.4230702519416809, "step": 5101 }, { "epoch": 1.2401555663587749, "grad_norm": 1.2933235761920425, "learning_rate": 6.912123773649622e-06, "loss": 0.5132251977920532, "step": 5102 }, { "epoch": 1.2403986387943606, "grad_norm": 1.4760470188665256, "learning_rate": 6.9083010111583985e-06, "loss": 0.4988866448402405, "step": 5103 }, { "epoch": 1.2406417112299466, "grad_norm": 1.8122128555073083, "learning_rate": 6.9044787481583775e-06, "loss": 0.4456004798412323, "step": 5104 }, { "epoch": 1.2408847836655323, "grad_norm": 1.6590906921891126, "learning_rate": 6.900656985267077e-06, "loss": 0.489495187997818, "step": 5105 }, { "epoch": 1.241127856101118, "grad_norm": 1.5551068789290168, "learning_rate": 6.896835723101935e-06, "loss": 0.6349695920944214, "step": 5106 }, { "epoch": 1.2413709285367038, "grad_norm": 1.3152642295739578, "learning_rate": 6.893014962280314e-06, "loss": 0.6062631607055664, "step": 5107 }, { "epoch": 1.2416140009722898, "grad_norm": 1.4525318173032973, "learning_rate": 6.8891947034194905e-06, "loss": 0.5663816928863525, "step": 5108 }, { "epoch": 1.2418570734078755, "grad_norm": 1.3583958347607605, "learning_rate": 6.88537494713666e-06, "loss": 0.42569661140441895, "step": 5109 }, { "epoch": 1.2421001458434613, "grad_norm": 1.5307697583180917, "learning_rate": 6.881555694048938e-06, "loss": 0.7667945623397827, "step": 5110 }, { "epoch": 1.2423432182790473, "grad_norm": 1.4578065805462141, "learning_rate": 6.877736944773358e-06, "loss": 0.5834788084030151, "step": 5111 }, { "epoch": 1.242586290714633, "grad_norm": 1.5948523111372979, "learning_rate": 6.873918699926874e-06, "loss": 0.6282802820205688, "step": 5112 }, { "epoch": 1.2428293631502187, "grad_norm": 1.6956232907317657, "learning_rate": 6.870100960126355e-06, "loss": 0.6533019542694092, "step": 5113 }, { "epoch": 1.2430724355858045, "grad_norm": 1.3275581888351737, "learning_rate": 6.86628372598859e-06, "loss": 0.5512486696243286, "step": 5114 }, { "epoch": 1.2433155080213905, "grad_norm": 1.656941643429608, "learning_rate": 6.86246699813029e-06, "loss": 0.6368464231491089, "step": 5115 }, { "epoch": 1.2435585804569762, "grad_norm": 1.5773227545281707, "learning_rate": 6.858650777168079e-06, "loss": 0.6159738898277283, "step": 5116 }, { "epoch": 1.243801652892562, "grad_norm": 1.8302315749935658, "learning_rate": 6.854835063718502e-06, "loss": 0.5908464789390564, "step": 5117 }, { "epoch": 1.2440447253281477, "grad_norm": 1.9667834714319437, "learning_rate": 6.85101985839802e-06, "loss": 0.5853084921836853, "step": 5118 }, { "epoch": 1.2442877977637337, "grad_norm": 1.4541496312674185, "learning_rate": 6.847205161823015e-06, "loss": 0.5009456872940063, "step": 5119 }, { "epoch": 1.2445308701993194, "grad_norm": 1.6280034172319449, "learning_rate": 6.8433909746097825e-06, "loss": 0.42878562211990356, "step": 5120 }, { "epoch": 1.2447739426349052, "grad_norm": 1.6305155923789716, "learning_rate": 6.83957729737454e-06, "loss": 0.7146632671356201, "step": 5121 }, { "epoch": 1.245017015070491, "grad_norm": 1.5092180204660668, "learning_rate": 6.835764130733417e-06, "loss": 0.5099636316299438, "step": 5122 }, { "epoch": 1.2452600875060769, "grad_norm": 1.3997745246219138, "learning_rate": 6.831951475302468e-06, "loss": 0.42935025691986084, "step": 5123 }, { "epoch": 1.2455031599416626, "grad_norm": 1.5021501039234837, "learning_rate": 6.828139331697661e-06, "loss": 0.6004469394683838, "step": 5124 }, { "epoch": 1.2457462323772484, "grad_norm": 1.5140611184940427, "learning_rate": 6.8243277005348794e-06, "loss": 0.45487481355667114, "step": 5125 }, { "epoch": 1.2459893048128343, "grad_norm": 1.615279928148699, "learning_rate": 6.820516582429927e-06, "loss": 0.4931142330169678, "step": 5126 }, { "epoch": 1.24623237724842, "grad_norm": 1.3580041301861945, "learning_rate": 6.816705977998523e-06, "loss": 0.4298231899738312, "step": 5127 }, { "epoch": 1.2464754496840058, "grad_norm": 1.6054142975264518, "learning_rate": 6.812895887856304e-06, "loss": 0.6620432138442993, "step": 5128 }, { "epoch": 1.2467185221195916, "grad_norm": 1.6857400331914005, "learning_rate": 6.8090863126188245e-06, "loss": 0.685625433921814, "step": 5129 }, { "epoch": 1.2469615945551775, "grad_norm": 1.6048325412388722, "learning_rate": 6.80527725290155e-06, "loss": 0.5514389276504517, "step": 5130 }, { "epoch": 1.2472046669907633, "grad_norm": 1.7125230118828036, "learning_rate": 6.801468709319877e-06, "loss": 0.4983324110507965, "step": 5131 }, { "epoch": 1.247447739426349, "grad_norm": 2.126772344260084, "learning_rate": 6.7976606824891035e-06, "loss": 0.5695187449455261, "step": 5132 }, { "epoch": 1.247690811861935, "grad_norm": 2.0397538940775495, "learning_rate": 6.793853173024449e-06, "loss": 0.5384870171546936, "step": 5133 }, { "epoch": 1.2479338842975207, "grad_norm": 1.4718907426299352, "learning_rate": 6.790046181541052e-06, "loss": 0.7354703545570374, "step": 5134 }, { "epoch": 1.2481769567331065, "grad_norm": 1.4379836716417467, "learning_rate": 6.786239708653967e-06, "loss": 0.5887376070022583, "step": 5135 }, { "epoch": 1.2484200291686922, "grad_norm": 1.4775117747235802, "learning_rate": 6.7824337549781595e-06, "loss": 0.555492103099823, "step": 5136 }, { "epoch": 1.248663101604278, "grad_norm": 1.5219154580382392, "learning_rate": 6.778628321128517e-06, "loss": 0.4465489685535431, "step": 5137 }, { "epoch": 1.248906174039864, "grad_norm": 1.2365427934268067, "learning_rate": 6.774823407719837e-06, "loss": 0.4543612003326416, "step": 5138 }, { "epoch": 1.2491492464754497, "grad_norm": 1.8343578685717277, "learning_rate": 6.771019015366843e-06, "loss": 0.5307114720344543, "step": 5139 }, { "epoch": 1.2493923189110354, "grad_norm": 1.597901881902944, "learning_rate": 6.767215144684167e-06, "loss": 0.5107459425926208, "step": 5140 }, { "epoch": 1.2496353913466214, "grad_norm": 1.6199020304674578, "learning_rate": 6.763411796286357e-06, "loss": 0.6858394145965576, "step": 5141 }, { "epoch": 1.2498784637822071, "grad_norm": 1.6840427488864087, "learning_rate": 6.759608970787875e-06, "loss": 0.4706222414970398, "step": 5142 }, { "epoch": 1.2501215362177929, "grad_norm": 1.3294518210267194, "learning_rate": 6.755806668803103e-06, "loss": 0.5088678598403931, "step": 5143 }, { "epoch": 1.2503646086533786, "grad_norm": 1.6874492813758728, "learning_rate": 6.752004890946335e-06, "loss": 0.43785130977630615, "step": 5144 }, { "epoch": 1.2506076810889646, "grad_norm": 1.5262759528655008, "learning_rate": 6.748203637831784e-06, "loss": 0.5443283319473267, "step": 5145 }, { "epoch": 1.2508507535245503, "grad_norm": 1.6433950565064352, "learning_rate": 6.744402910073574e-06, "loss": 0.5655784606933594, "step": 5146 }, { "epoch": 1.251093825960136, "grad_norm": 1.4953500474250379, "learning_rate": 6.740602708285742e-06, "loss": 0.6318101286888123, "step": 5147 }, { "epoch": 1.251336898395722, "grad_norm": 1.8534262082284154, "learning_rate": 6.736803033082253e-06, "loss": 0.5595259666442871, "step": 5148 }, { "epoch": 1.2515799708313078, "grad_norm": 1.8811995547061213, "learning_rate": 6.733003885076974e-06, "loss": 0.6343063116073608, "step": 5149 }, { "epoch": 1.2518230432668935, "grad_norm": 1.5433052976220905, "learning_rate": 6.729205264883691e-06, "loss": 0.5915542840957642, "step": 5150 }, { "epoch": 1.2520661157024793, "grad_norm": 1.9796851733813303, "learning_rate": 6.725407173116105e-06, "loss": 0.5812877416610718, "step": 5151 }, { "epoch": 1.252309188138065, "grad_norm": 1.5102276241969361, "learning_rate": 6.721609610387829e-06, "loss": 0.45947030186653137, "step": 5152 }, { "epoch": 1.252552260573651, "grad_norm": 1.5571112909999245, "learning_rate": 6.717812577312394e-06, "loss": 0.5283809304237366, "step": 5153 }, { "epoch": 1.2527953330092367, "grad_norm": 1.621582206265817, "learning_rate": 6.714016074503243e-06, "loss": 0.6842758655548096, "step": 5154 }, { "epoch": 1.2530384054448225, "grad_norm": 1.411869945758158, "learning_rate": 6.710220102573733e-06, "loss": 0.5765405893325806, "step": 5155 }, { "epoch": 1.2532814778804084, "grad_norm": 1.622040399965383, "learning_rate": 6.706424662137141e-06, "loss": 0.6465685367584229, "step": 5156 }, { "epoch": 1.2535245503159942, "grad_norm": 1.5638660677854106, "learning_rate": 6.702629753806653e-06, "loss": 0.578720211982727, "step": 5157 }, { "epoch": 1.25376762275158, "grad_norm": 1.7633001595192768, "learning_rate": 6.698835378195369e-06, "loss": 0.5112446546554565, "step": 5158 }, { "epoch": 1.2540106951871657, "grad_norm": 1.6262257537703717, "learning_rate": 6.695041535916305e-06, "loss": 0.7909001111984253, "step": 5159 }, { "epoch": 1.2542537676227516, "grad_norm": 1.7372762207617813, "learning_rate": 6.6912482275823895e-06, "loss": 0.6114823222160339, "step": 5160 }, { "epoch": 1.2544968400583374, "grad_norm": 1.4305373952425615, "learning_rate": 6.687455453806462e-06, "loss": 0.6374116539955139, "step": 5161 }, { "epoch": 1.2547399124939231, "grad_norm": 1.4039538407739838, "learning_rate": 6.683663215201279e-06, "loss": 0.667319655418396, "step": 5162 }, { "epoch": 1.254982984929509, "grad_norm": 1.7500421212166672, "learning_rate": 6.679871512379509e-06, "loss": 0.5742055177688599, "step": 5163 }, { "epoch": 1.2552260573650948, "grad_norm": 1.6241764312402047, "learning_rate": 6.67608034595374e-06, "loss": 0.5586709380149841, "step": 5164 }, { "epoch": 1.2554691298006806, "grad_norm": 1.5905782562789064, "learning_rate": 6.672289716536467e-06, "loss": 0.5895063877105713, "step": 5165 }, { "epoch": 1.2557122022362663, "grad_norm": 1.6067002508444892, "learning_rate": 6.668499624740097e-06, "loss": 0.4730837941169739, "step": 5166 }, { "epoch": 1.255955274671852, "grad_norm": 1.6359514844631173, "learning_rate": 6.664710071176953e-06, "loss": 0.6588560342788696, "step": 5167 }, { "epoch": 1.256198347107438, "grad_norm": 1.5111579459380762, "learning_rate": 6.660921056459273e-06, "loss": 0.43663614988327026, "step": 5168 }, { "epoch": 1.2564414195430238, "grad_norm": 1.58539630143306, "learning_rate": 6.657132581199204e-06, "loss": 0.6001375913619995, "step": 5169 }, { "epoch": 1.2566844919786098, "grad_norm": 1.5252262810422659, "learning_rate": 6.65334464600881e-06, "loss": 0.5756802558898926, "step": 5170 }, { "epoch": 1.2569275644141955, "grad_norm": 1.4442088888942544, "learning_rate": 6.649557251500056e-06, "loss": 0.46947380900382996, "step": 5171 }, { "epoch": 1.2571706368497813, "grad_norm": 1.8497803222671405, "learning_rate": 6.645770398284841e-06, "loss": 0.6083933115005493, "step": 5172 }, { "epoch": 1.257413709285367, "grad_norm": 1.3835986315570186, "learning_rate": 6.64198408697496e-06, "loss": 0.4563426673412323, "step": 5173 }, { "epoch": 1.2576567817209527, "grad_norm": 1.4276813951001093, "learning_rate": 6.638198318182124e-06, "loss": 0.539960503578186, "step": 5174 }, { "epoch": 1.2578998541565387, "grad_norm": 1.5118321583170222, "learning_rate": 6.634413092517957e-06, "loss": 0.5634695291519165, "step": 5175 }, { "epoch": 1.2581429265921245, "grad_norm": 1.4255405401260375, "learning_rate": 6.630628410593998e-06, "loss": 0.5663608312606812, "step": 5176 }, { "epoch": 1.2583859990277102, "grad_norm": 1.4801337211224603, "learning_rate": 6.626844273021693e-06, "loss": 0.6359717845916748, "step": 5177 }, { "epoch": 1.2586290714632962, "grad_norm": 1.5114982666355656, "learning_rate": 6.623060680412404e-06, "loss": 0.8000239133834839, "step": 5178 }, { "epoch": 1.258872143898882, "grad_norm": 1.2907461612223916, "learning_rate": 6.619277633377401e-06, "loss": 0.6134874224662781, "step": 5179 }, { "epoch": 1.2591152163344677, "grad_norm": 1.6811438619133787, "learning_rate": 6.615495132527874e-06, "loss": 0.5077862739562988, "step": 5180 }, { "epoch": 1.2593582887700534, "grad_norm": 1.3238914191991684, "learning_rate": 6.611713178474917e-06, "loss": 0.4859309792518616, "step": 5181 }, { "epoch": 1.2596013612056391, "grad_norm": 1.503838254192508, "learning_rate": 6.607931771829537e-06, "loss": 0.5637787580490112, "step": 5182 }, { "epoch": 1.2598444336412251, "grad_norm": 1.45312857987828, "learning_rate": 6.604150913202651e-06, "loss": 0.5278752446174622, "step": 5183 }, { "epoch": 1.2600875060768109, "grad_norm": 1.8514697271051392, "learning_rate": 6.600370603205098e-06, "loss": 0.5975170731544495, "step": 5184 }, { "epoch": 1.2603305785123968, "grad_norm": 1.6219502447094016, "learning_rate": 6.596590842447611e-06, "loss": 0.5727725028991699, "step": 5185 }, { "epoch": 1.2605736509479826, "grad_norm": 1.5335262735953246, "learning_rate": 6.592811631540849e-06, "loss": 0.6149719953536987, "step": 5186 }, { "epoch": 1.2608167233835683, "grad_norm": 1.4757767937500494, "learning_rate": 6.589032971095377e-06, "loss": 0.6618062257766724, "step": 5187 }, { "epoch": 1.261059795819154, "grad_norm": 1.6233825818199354, "learning_rate": 6.585254861721666e-06, "loss": 0.6015638113021851, "step": 5188 }, { "epoch": 1.2613028682547398, "grad_norm": 1.864976284937558, "learning_rate": 6.581477304030111e-06, "loss": 0.7099536657333374, "step": 5189 }, { "epoch": 1.2615459406903258, "grad_norm": 1.3809420928783207, "learning_rate": 6.577700298631002e-06, "loss": 0.6188540458679199, "step": 5190 }, { "epoch": 1.2617890131259115, "grad_norm": 1.2475141181333447, "learning_rate": 6.573923846134553e-06, "loss": 0.4721244275569916, "step": 5191 }, { "epoch": 1.2620320855614973, "grad_norm": 1.4618134873829718, "learning_rate": 6.57014794715088e-06, "loss": 0.4804081916809082, "step": 5192 }, { "epoch": 1.2622751579970832, "grad_norm": 1.601985445279244, "learning_rate": 6.566372602290015e-06, "loss": 0.6001747250556946, "step": 5193 }, { "epoch": 1.262518230432669, "grad_norm": 1.4590798221220036, "learning_rate": 6.562597812161895e-06, "loss": 0.5275921821594238, "step": 5194 }, { "epoch": 1.2627613028682547, "grad_norm": 1.8049512884987822, "learning_rate": 6.558823577376372e-06, "loss": 0.6027249693870544, "step": 5195 }, { "epoch": 1.2630043753038405, "grad_norm": 1.4240807175239685, "learning_rate": 6.555049898543207e-06, "loss": 0.6585351228713989, "step": 5196 }, { "epoch": 1.2632474477394264, "grad_norm": 1.562852852812785, "learning_rate": 6.551276776272072e-06, "loss": 0.6243377327919006, "step": 5197 }, { "epoch": 1.2634905201750122, "grad_norm": 1.5934023223538016, "learning_rate": 6.547504211172549e-06, "loss": 0.6848798394203186, "step": 5198 }, { "epoch": 1.263733592610598, "grad_norm": 1.42428181699547, "learning_rate": 6.543732203854125e-06, "loss": 0.514905571937561, "step": 5199 }, { "epoch": 1.263976665046184, "grad_norm": 1.5151539057873986, "learning_rate": 6.539960754926206e-06, "loss": 0.7881988286972046, "step": 5200 }, { "epoch": 1.2642197374817696, "grad_norm": 1.3620015302470303, "learning_rate": 6.5361898649980994e-06, "loss": 0.479971319437027, "step": 5201 }, { "epoch": 1.2644628099173554, "grad_norm": 2.094655373081221, "learning_rate": 6.532419534679027e-06, "loss": 0.7090268135070801, "step": 5202 }, { "epoch": 1.2647058823529411, "grad_norm": 1.8202675616133075, "learning_rate": 6.528649764578116e-06, "loss": 0.654216468334198, "step": 5203 }, { "epoch": 1.2649489547885269, "grad_norm": 1.4094351234304143, "learning_rate": 6.524880555304409e-06, "loss": 0.6251753568649292, "step": 5204 }, { "epoch": 1.2651920272241128, "grad_norm": 1.4040665997207502, "learning_rate": 6.521111907466856e-06, "loss": 0.7004821300506592, "step": 5205 }, { "epoch": 1.2654350996596986, "grad_norm": 1.7140241666194735, "learning_rate": 6.5173438216743135e-06, "loss": 0.6373515129089355, "step": 5206 }, { "epoch": 1.2656781720952843, "grad_norm": 1.606560208105215, "learning_rate": 6.513576298535549e-06, "loss": 0.5399128198623657, "step": 5207 }, { "epoch": 1.2659212445308703, "grad_norm": 1.4415519724224628, "learning_rate": 6.5098093386592395e-06, "loss": 0.6919000148773193, "step": 5208 }, { "epoch": 1.266164316966456, "grad_norm": 1.455799733781914, "learning_rate": 6.506042942653971e-06, "loss": 0.5711508989334106, "step": 5209 }, { "epoch": 1.2664073894020418, "grad_norm": 1.4680640229060054, "learning_rate": 6.502277111128236e-06, "loss": 0.6219677925109863, "step": 5210 }, { "epoch": 1.2666504618376275, "grad_norm": 1.6262183202512905, "learning_rate": 6.4985118446904385e-06, "loss": 0.6135144233703613, "step": 5211 }, { "epoch": 1.2668935342732135, "grad_norm": 1.5653291181949374, "learning_rate": 6.49474714394889e-06, "loss": 0.7476679086685181, "step": 5212 }, { "epoch": 1.2671366067087992, "grad_norm": 1.3134621714774182, "learning_rate": 6.490983009511813e-06, "loss": 0.46186596155166626, "step": 5213 }, { "epoch": 1.267379679144385, "grad_norm": 1.4982624132386166, "learning_rate": 6.4872194419873355e-06, "loss": 0.6272224187850952, "step": 5214 }, { "epoch": 1.267622751579971, "grad_norm": 1.5554470517546406, "learning_rate": 6.483456441983496e-06, "loss": 0.6116430759429932, "step": 5215 }, { "epoch": 1.2678658240155567, "grad_norm": 1.4078804672170386, "learning_rate": 6.479694010108237e-06, "loss": 0.5729094743728638, "step": 5216 }, { "epoch": 1.2681088964511424, "grad_norm": 1.7684111079010243, "learning_rate": 6.475932146969417e-06, "loss": 0.5684142708778381, "step": 5217 }, { "epoch": 1.2683519688867282, "grad_norm": 1.6378483395480448, "learning_rate": 6.472170853174795e-06, "loss": 0.6647416353225708, "step": 5218 }, { "epoch": 1.268595041322314, "grad_norm": 1.583055517519595, "learning_rate": 6.468410129332042e-06, "loss": 0.6077133417129517, "step": 5219 }, { "epoch": 1.2688381137579, "grad_norm": 1.6907437929197875, "learning_rate": 6.464649976048736e-06, "loss": 0.6566146612167358, "step": 5220 }, { "epoch": 1.2690811861934856, "grad_norm": 1.6890914263019619, "learning_rate": 6.460890393932362e-06, "loss": 0.5637683868408203, "step": 5221 }, { "epoch": 1.2693242586290714, "grad_norm": 1.5763146163161026, "learning_rate": 6.457131383590316e-06, "loss": 0.5659505128860474, "step": 5222 }, { "epoch": 1.2695673310646574, "grad_norm": 2.950438766559089, "learning_rate": 6.453372945629896e-06, "loss": 0.5361185669898987, "step": 5223 }, { "epoch": 1.269810403500243, "grad_norm": 1.5060337733036873, "learning_rate": 6.4496150806583145e-06, "loss": 0.6903121471405029, "step": 5224 }, { "epoch": 1.2700534759358288, "grad_norm": 1.36078191694349, "learning_rate": 6.445857789282686e-06, "loss": 0.4372348189353943, "step": 5225 }, { "epoch": 1.2702965483714146, "grad_norm": 1.3978917853777058, "learning_rate": 6.4421010721100315e-06, "loss": 0.5939218997955322, "step": 5226 }, { "epoch": 1.2705396208070006, "grad_norm": 1.4699545509570386, "learning_rate": 6.438344929747287e-06, "loss": 0.5934746265411377, "step": 5227 }, { "epoch": 1.2707826932425863, "grad_norm": 1.6779710501488012, "learning_rate": 6.434589362801286e-06, "loss": 0.5324106812477112, "step": 5228 }, { "epoch": 1.271025765678172, "grad_norm": 1.5405013401124141, "learning_rate": 6.430834371878769e-06, "loss": 0.5316939949989319, "step": 5229 }, { "epoch": 1.271268838113758, "grad_norm": 1.4097750743247994, "learning_rate": 6.427079957586402e-06, "loss": 0.5089868903160095, "step": 5230 }, { "epoch": 1.2715119105493438, "grad_norm": 1.9074932509064728, "learning_rate": 6.423326120530733e-06, "loss": 0.7304666042327881, "step": 5231 }, { "epoch": 1.2717549829849295, "grad_norm": 1.6353808549459024, "learning_rate": 6.419572861318229e-06, "loss": 0.6588101387023926, "step": 5232 }, { "epoch": 1.2719980554205153, "grad_norm": 1.7862851430694182, "learning_rate": 6.4158201805552635e-06, "loss": 0.717509388923645, "step": 5233 }, { "epoch": 1.272241127856101, "grad_norm": 1.429038024568339, "learning_rate": 6.412068078848113e-06, "loss": 0.6164833307266235, "step": 5234 }, { "epoch": 1.272484200291687, "grad_norm": 1.8016949877038413, "learning_rate": 6.408316556802966e-06, "loss": 0.6387877464294434, "step": 5235 }, { "epoch": 1.2727272727272727, "grad_norm": 1.5476666696100403, "learning_rate": 6.404565615025911e-06, "loss": 0.667172908782959, "step": 5236 }, { "epoch": 1.2729703451628585, "grad_norm": 1.651355727671361, "learning_rate": 6.400815254122943e-06, "loss": 0.4903181195259094, "step": 5237 }, { "epoch": 1.2732134175984444, "grad_norm": 1.7041122351195679, "learning_rate": 6.397065474699973e-06, "loss": 0.5561158657073975, "step": 5238 }, { "epoch": 1.2734564900340302, "grad_norm": 1.6005630004928029, "learning_rate": 6.393316277362808e-06, "loss": 0.4650590121746063, "step": 5239 }, { "epoch": 1.273699562469616, "grad_norm": 1.67127983352451, "learning_rate": 6.389567662717165e-06, "loss": 0.6089769601821899, "step": 5240 }, { "epoch": 1.2739426349052017, "grad_norm": 1.6695891808721357, "learning_rate": 6.3858196313686614e-06, "loss": 0.5383753180503845, "step": 5241 }, { "epoch": 1.2741857073407876, "grad_norm": 1.528049567059983, "learning_rate": 6.382072183922827e-06, "loss": 0.42563962936401367, "step": 5242 }, { "epoch": 1.2744287797763734, "grad_norm": 1.628423025203771, "learning_rate": 6.378325320985096e-06, "loss": 0.4397202432155609, "step": 5243 }, { "epoch": 1.2746718522119591, "grad_norm": 1.4056866745268615, "learning_rate": 6.374579043160807e-06, "loss": 0.5652577877044678, "step": 5244 }, { "epoch": 1.274914924647545, "grad_norm": 1.5066649026180536, "learning_rate": 6.3708333510551965e-06, "loss": 0.43195784091949463, "step": 5245 }, { "epoch": 1.2751579970831308, "grad_norm": 1.427065863099142, "learning_rate": 6.367088245273427e-06, "loss": 0.4820752441883087, "step": 5246 }, { "epoch": 1.2754010695187166, "grad_norm": 1.5106116077110108, "learning_rate": 6.363343726420548e-06, "loss": 0.49181216955184937, "step": 5247 }, { "epoch": 1.2756441419543023, "grad_norm": 1.8977105074808918, "learning_rate": 6.359599795101519e-06, "loss": 0.5101951360702515, "step": 5248 }, { "epoch": 1.275887214389888, "grad_norm": 1.7417491528227187, "learning_rate": 6.355856451921205e-06, "loss": 0.6113131046295166, "step": 5249 }, { "epoch": 1.276130286825474, "grad_norm": 1.4858690855626382, "learning_rate": 6.352113697484375e-06, "loss": 0.46299731731414795, "step": 5250 }, { "epoch": 1.2763733592610598, "grad_norm": 1.48833112267567, "learning_rate": 6.3483715323957056e-06, "loss": 0.6106514930725098, "step": 5251 }, { "epoch": 1.2766164316966457, "grad_norm": 1.7393833705941937, "learning_rate": 6.3446299572597735e-06, "loss": 0.7521138191223145, "step": 5252 }, { "epoch": 1.2768595041322315, "grad_norm": 1.688013354977256, "learning_rate": 6.340888972681063e-06, "loss": 0.5968804359436035, "step": 5253 }, { "epoch": 1.2771025765678172, "grad_norm": 1.5555953060507008, "learning_rate": 6.337148579263969e-06, "loss": 0.5969014167785645, "step": 5254 }, { "epoch": 1.277345649003403, "grad_norm": 1.613802992948838, "learning_rate": 6.333408777612783e-06, "loss": 0.5022306442260742, "step": 5255 }, { "epoch": 1.2775887214389887, "grad_norm": 1.7309197388099404, "learning_rate": 6.3296695683317e-06, "loss": 0.6929557919502258, "step": 5256 }, { "epoch": 1.2778317938745747, "grad_norm": 1.6054867852572605, "learning_rate": 6.3259309520248235e-06, "loss": 0.5119321346282959, "step": 5257 }, { "epoch": 1.2780748663101604, "grad_norm": 1.5421744471729883, "learning_rate": 6.322192929296161e-06, "loss": 0.5409209728240967, "step": 5258 }, { "epoch": 1.2783179387457462, "grad_norm": 1.731641624834208, "learning_rate": 6.318455500749624e-06, "loss": 0.6241094470024109, "step": 5259 }, { "epoch": 1.2785610111813321, "grad_norm": 1.464308851907881, "learning_rate": 6.314718666989022e-06, "loss": 0.6403630971908569, "step": 5260 }, { "epoch": 1.2788040836169179, "grad_norm": 1.6020165212329542, "learning_rate": 6.310982428618078e-06, "loss": 0.5395325422286987, "step": 5261 }, { "epoch": 1.2790471560525036, "grad_norm": 1.5760246190012959, "learning_rate": 6.307246786240406e-06, "loss": 0.536638617515564, "step": 5262 }, { "epoch": 1.2792902284880894, "grad_norm": 1.3632302877733613, "learning_rate": 6.303511740459545e-06, "loss": 0.5367751121520996, "step": 5263 }, { "epoch": 1.2795333009236751, "grad_norm": 1.5235046012087317, "learning_rate": 6.299777291878917e-06, "loss": 0.5922404527664185, "step": 5264 }, { "epoch": 1.279776373359261, "grad_norm": 1.7145672933633835, "learning_rate": 6.2960434411018564e-06, "loss": 0.7174407243728638, "step": 5265 }, { "epoch": 1.2800194457948468, "grad_norm": 1.4744580425242968, "learning_rate": 6.292310188731599e-06, "loss": 0.6530517339706421, "step": 5266 }, { "epoch": 1.2802625182304328, "grad_norm": 1.688579936597846, "learning_rate": 6.288577535371286e-06, "loss": 0.5670619606971741, "step": 5267 }, { "epoch": 1.2805055906660185, "grad_norm": 1.6639204290836112, "learning_rate": 6.2848454816239605e-06, "loss": 0.5512239933013916, "step": 5268 }, { "epoch": 1.2807486631016043, "grad_norm": 1.5217247778465943, "learning_rate": 6.281114028092567e-06, "loss": 0.4595481753349304, "step": 5269 }, { "epoch": 1.28099173553719, "grad_norm": 1.5290639878621695, "learning_rate": 6.27738317537995e-06, "loss": 0.46013978123664856, "step": 5270 }, { "epoch": 1.2812348079727758, "grad_norm": 1.3306679299349369, "learning_rate": 6.273652924088871e-06, "loss": 0.5399197936058044, "step": 5271 }, { "epoch": 1.2814778804083617, "grad_norm": 1.6328214200659994, "learning_rate": 6.2699232748219805e-06, "loss": 0.6688557863235474, "step": 5272 }, { "epoch": 1.2817209528439475, "grad_norm": 1.6313561960492131, "learning_rate": 6.266194228181837e-06, "loss": 0.6871187686920166, "step": 5273 }, { "epoch": 1.2819640252795332, "grad_norm": 1.5083623088537426, "learning_rate": 6.262465784770902e-06, "loss": 0.6173466444015503, "step": 5274 }, { "epoch": 1.2822070977151192, "grad_norm": 1.6907420413614764, "learning_rate": 6.258737945191533e-06, "loss": 0.6585355997085571, "step": 5275 }, { "epoch": 1.282450170150705, "grad_norm": 1.4067605210854304, "learning_rate": 6.255010710046001e-06, "loss": 0.48273104429244995, "step": 5276 }, { "epoch": 1.2826932425862907, "grad_norm": 1.683895255927075, "learning_rate": 6.251284079936473e-06, "loss": 0.5210840702056885, "step": 5277 }, { "epoch": 1.2829363150218764, "grad_norm": 1.619665100914258, "learning_rate": 6.247558055465015e-06, "loss": 0.46657896041870117, "step": 5278 }, { "epoch": 1.2831793874574624, "grad_norm": 1.3901020179521584, "learning_rate": 6.243832637233602e-06, "loss": 0.5119668841362, "step": 5279 }, { "epoch": 1.2834224598930482, "grad_norm": 1.7337164410882935, "learning_rate": 6.2401078258441095e-06, "loss": 0.6601573824882507, "step": 5280 }, { "epoch": 1.283665532328634, "grad_norm": 1.4165827735428818, "learning_rate": 6.236383621898312e-06, "loss": 0.5354973673820496, "step": 5281 }, { "epoch": 1.2839086047642199, "grad_norm": 1.468601375085598, "learning_rate": 6.232660025997887e-06, "loss": 0.5329231023788452, "step": 5282 }, { "epoch": 1.2841516771998056, "grad_norm": 1.659228687990688, "learning_rate": 6.228937038744414e-06, "loss": 0.5472928881645203, "step": 5283 }, { "epoch": 1.2843947496353914, "grad_norm": 2.76156093481022, "learning_rate": 6.225214660739375e-06, "loss": 0.5492330193519592, "step": 5284 }, { "epoch": 1.284637822070977, "grad_norm": 1.7365910665896007, "learning_rate": 6.221492892584153e-06, "loss": 0.5403812527656555, "step": 5285 }, { "epoch": 1.2848808945065628, "grad_norm": 1.6952893130680848, "learning_rate": 6.21777173488003e-06, "loss": 0.7138451337814331, "step": 5286 }, { "epoch": 1.2851239669421488, "grad_norm": 1.8140090854273458, "learning_rate": 6.214051188228197e-06, "loss": 0.5598385334014893, "step": 5287 }, { "epoch": 1.2853670393777346, "grad_norm": 1.5397438245409987, "learning_rate": 6.210331253229738e-06, "loss": 0.5787249803543091, "step": 5288 }, { "epoch": 1.2856101118133203, "grad_norm": 1.4431399354885739, "learning_rate": 6.206611930485642e-06, "loss": 0.5281804800033569, "step": 5289 }, { "epoch": 1.2858531842489063, "grad_norm": 1.603934101662011, "learning_rate": 6.202893220596797e-06, "loss": 0.5704047679901123, "step": 5290 }, { "epoch": 1.286096256684492, "grad_norm": 1.8490388771315667, "learning_rate": 6.199175124163994e-06, "loss": 0.5764999389648438, "step": 5291 }, { "epoch": 1.2863393291200778, "grad_norm": 1.611406538307708, "learning_rate": 6.1954576417879255e-06, "loss": 0.7389283180236816, "step": 5292 }, { "epoch": 1.2865824015556635, "grad_norm": 1.3449012079720257, "learning_rate": 6.19174077406918e-06, "loss": 0.3917507529258728, "step": 5293 }, { "epoch": 1.2868254739912495, "grad_norm": 1.5569566589285202, "learning_rate": 6.188024521608254e-06, "loss": 0.5301644802093506, "step": 5294 }, { "epoch": 1.2870685464268352, "grad_norm": 1.4371082739452312, "learning_rate": 6.184308885005539e-06, "loss": 0.6347904205322266, "step": 5295 }, { "epoch": 1.287311618862421, "grad_norm": 1.4989775435332195, "learning_rate": 6.180593864861329e-06, "loss": 0.5206936597824097, "step": 5296 }, { "epoch": 1.287554691298007, "grad_norm": 2.0347227607282803, "learning_rate": 6.1768794617758195e-06, "loss": 0.7701382040977478, "step": 5297 }, { "epoch": 1.2877977637335927, "grad_norm": 1.4677384495881496, "learning_rate": 6.173165676349103e-06, "loss": 0.664718508720398, "step": 5298 }, { "epoch": 1.2880408361691784, "grad_norm": 1.4101422363084766, "learning_rate": 6.1694525091811765e-06, "loss": 0.5925289988517761, "step": 5299 }, { "epoch": 1.2882839086047642, "grad_norm": 1.4392897200661323, "learning_rate": 6.1657399608719325e-06, "loss": 0.5321701169013977, "step": 5300 }, { "epoch": 1.28852698104035, "grad_norm": 1.6532079805779933, "learning_rate": 6.162028032021168e-06, "loss": 0.48764801025390625, "step": 5301 }, { "epoch": 1.2887700534759359, "grad_norm": 1.5279696273088788, "learning_rate": 6.158316723228577e-06, "loss": 0.43258196115493774, "step": 5302 }, { "epoch": 1.2890131259115216, "grad_norm": 1.6237187729055784, "learning_rate": 6.154606035093751e-06, "loss": 0.5877864360809326, "step": 5303 }, { "epoch": 1.2892561983471074, "grad_norm": 1.48333539527661, "learning_rate": 6.150895968216189e-06, "loss": 0.7269659042358398, "step": 5304 }, { "epoch": 1.2894992707826933, "grad_norm": 1.4983839679802886, "learning_rate": 6.147186523195283e-06, "loss": 0.5650476813316345, "step": 5305 }, { "epoch": 1.289742343218279, "grad_norm": 1.8082105279601057, "learning_rate": 6.143477700630328e-06, "loss": 0.6014181971549988, "step": 5306 }, { "epoch": 1.2899854156538648, "grad_norm": 1.4152506075011797, "learning_rate": 6.139769501120515e-06, "loss": 0.38871270418167114, "step": 5307 }, { "epoch": 1.2902284880894506, "grad_norm": 1.675141286355176, "learning_rate": 6.136061925264937e-06, "loss": 0.5541660785675049, "step": 5308 }, { "epoch": 1.2904715605250365, "grad_norm": 1.5385707748903505, "learning_rate": 6.1323549736625856e-06, "loss": 0.5898334980010986, "step": 5309 }, { "epoch": 1.2907146329606223, "grad_norm": 1.4963038176874692, "learning_rate": 6.128648646912352e-06, "loss": 0.6419920921325684, "step": 5310 }, { "epoch": 1.290957705396208, "grad_norm": 1.6837736491673456, "learning_rate": 6.124942945613021e-06, "loss": 0.5069506168365479, "step": 5311 }, { "epoch": 1.291200777831794, "grad_norm": 1.4407046131291872, "learning_rate": 6.12123787036329e-06, "loss": 0.47638124227523804, "step": 5312 }, { "epoch": 1.2914438502673797, "grad_norm": 1.624928538900863, "learning_rate": 6.117533421761741e-06, "loss": 0.597430944442749, "step": 5313 }, { "epoch": 1.2916869227029655, "grad_norm": 1.513855659994277, "learning_rate": 6.113829600406862e-06, "loss": 0.5756438374519348, "step": 5314 }, { "epoch": 1.2919299951385512, "grad_norm": 1.5445061272449931, "learning_rate": 6.110126406897037e-06, "loss": 0.7006171345710754, "step": 5315 }, { "epoch": 1.292173067574137, "grad_norm": 1.7069899579947885, "learning_rate": 6.10642384183055e-06, "loss": 0.5976263880729675, "step": 5316 }, { "epoch": 1.292416140009723, "grad_norm": 1.4170145192765728, "learning_rate": 6.102721905805583e-06, "loss": 0.5770970582962036, "step": 5317 }, { "epoch": 1.2926592124453087, "grad_norm": 1.2075569533146309, "learning_rate": 6.099020599420216e-06, "loss": 0.3544054627418518, "step": 5318 }, { "epoch": 1.2929022848808944, "grad_norm": 1.5941881653413208, "learning_rate": 6.095319923272427e-06, "loss": 0.4903009235858917, "step": 5319 }, { "epoch": 1.2931453573164804, "grad_norm": 1.5149440660753644, "learning_rate": 6.091619877960094e-06, "loss": 0.6242126226425171, "step": 5320 }, { "epoch": 1.2933884297520661, "grad_norm": 1.398308727035598, "learning_rate": 6.087920464080993e-06, "loss": 0.46557408571243286, "step": 5321 }, { "epoch": 1.2936315021876519, "grad_norm": 1.6265893987162712, "learning_rate": 6.084221682232795e-06, "loss": 0.5741023421287537, "step": 5322 }, { "epoch": 1.2938745746232376, "grad_norm": 1.5483774767846767, "learning_rate": 6.080523533013072e-06, "loss": 0.5440030097961426, "step": 5323 }, { "epoch": 1.2941176470588236, "grad_norm": 1.5696845437714992, "learning_rate": 6.07682601701929e-06, "loss": 0.5578352212905884, "step": 5324 }, { "epoch": 1.2943607194944093, "grad_norm": 1.5013100296274546, "learning_rate": 6.073129134848819e-06, "loss": 0.5745951533317566, "step": 5325 }, { "epoch": 1.294603791929995, "grad_norm": 1.3606866386324297, "learning_rate": 6.06943288709892e-06, "loss": 0.4520919919013977, "step": 5326 }, { "epoch": 1.294846864365581, "grad_norm": 1.590532795777967, "learning_rate": 6.065737274366753e-06, "loss": 0.6348037123680115, "step": 5327 }, { "epoch": 1.2950899368011668, "grad_norm": 1.6891075127298936, "learning_rate": 6.062042297249382e-06, "loss": 0.5243490934371948, "step": 5328 }, { "epoch": 1.2953330092367525, "grad_norm": 1.7462346230581363, "learning_rate": 6.0583479563437615e-06, "loss": 0.6447511911392212, "step": 5329 }, { "epoch": 1.2955760816723383, "grad_norm": 1.4087845486508803, "learning_rate": 6.0546542522467435e-06, "loss": 0.6372796297073364, "step": 5330 }, { "epoch": 1.295819154107924, "grad_norm": 1.6190986711897921, "learning_rate": 6.050961185555078e-06, "loss": 0.5782623887062073, "step": 5331 }, { "epoch": 1.29606222654351, "grad_norm": 1.5669054510701899, "learning_rate": 6.047268756865412e-06, "loss": 0.5629816055297852, "step": 5332 }, { "epoch": 1.2963052989790957, "grad_norm": 1.8168681012353036, "learning_rate": 6.043576966774292e-06, "loss": 0.6268275380134583, "step": 5333 }, { "epoch": 1.2965483714146817, "grad_norm": 1.2671583421043946, "learning_rate": 6.039885815878158e-06, "loss": 0.5787390470504761, "step": 5334 }, { "epoch": 1.2967914438502675, "grad_norm": 1.6313041453618904, "learning_rate": 6.0361953047733434e-06, "loss": 0.5987088084220886, "step": 5335 }, { "epoch": 1.2970345162858532, "grad_norm": 1.7591399498972748, "learning_rate": 6.032505434056092e-06, "loss": 0.5440841317176819, "step": 5336 }, { "epoch": 1.297277588721439, "grad_norm": 1.8338444714861264, "learning_rate": 6.0288162043225295e-06, "loss": 0.5358070135116577, "step": 5337 }, { "epoch": 1.2975206611570247, "grad_norm": 1.5764767392538226, "learning_rate": 6.025127616168687e-06, "loss": 0.5872736573219299, "step": 5338 }, { "epoch": 1.2977637335926107, "grad_norm": 1.5322938877321854, "learning_rate": 6.021439670190483e-06, "loss": 0.7078459858894348, "step": 5339 }, { "epoch": 1.2980068060281964, "grad_norm": 1.8260780783671526, "learning_rate": 6.017752366983741e-06, "loss": 0.5909385681152344, "step": 5340 }, { "epoch": 1.2982498784637821, "grad_norm": 1.521492390679405, "learning_rate": 6.014065707144177e-06, "loss": 0.7488293647766113, "step": 5341 }, { "epoch": 1.2984929508993681, "grad_norm": 1.500521870565984, "learning_rate": 6.010379691267403e-06, "loss": 0.6524522304534912, "step": 5342 }, { "epoch": 1.2987360233349539, "grad_norm": 1.8096182913030463, "learning_rate": 6.006694319948926e-06, "loss": 0.5591270923614502, "step": 5343 }, { "epoch": 1.2989790957705396, "grad_norm": 1.7539181480312094, "learning_rate": 6.003009593784148e-06, "loss": 0.4917130470275879, "step": 5344 }, { "epoch": 1.2992221682061253, "grad_norm": 1.8619669234971765, "learning_rate": 5.999325513368377e-06, "loss": 0.6284105777740479, "step": 5345 }, { "epoch": 1.299465240641711, "grad_norm": 1.491893622978199, "learning_rate": 5.9956420792968026e-06, "loss": 0.502288818359375, "step": 5346 }, { "epoch": 1.299708313077297, "grad_norm": 1.8728883733846646, "learning_rate": 5.99195929216452e-06, "loss": 0.5670517683029175, "step": 5347 }, { "epoch": 1.2999513855128828, "grad_norm": 1.6463004958805234, "learning_rate": 5.988277152566513e-06, "loss": 0.5811123847961426, "step": 5348 }, { "epoch": 1.3001944579484688, "grad_norm": 1.4983966327483964, "learning_rate": 5.984595661097663e-06, "loss": 0.5848182439804077, "step": 5349 }, { "epoch": 1.3004375303840545, "grad_norm": 1.787364097070272, "learning_rate": 5.98091481835275e-06, "loss": 0.5811821818351746, "step": 5350 }, { "epoch": 1.3006806028196403, "grad_norm": 1.7926336585438354, "learning_rate": 5.977234624926443e-06, "loss": 0.6102205514907837, "step": 5351 }, { "epoch": 1.300923675255226, "grad_norm": 1.4775222880070487, "learning_rate": 5.973555081413308e-06, "loss": 0.5296741724014282, "step": 5352 }, { "epoch": 1.3011667476908118, "grad_norm": 1.3161480337043405, "learning_rate": 5.969876188407816e-06, "loss": 0.5534130334854126, "step": 5353 }, { "epoch": 1.3014098201263977, "grad_norm": 1.4600324250457872, "learning_rate": 5.96619794650432e-06, "loss": 0.5482769012451172, "step": 5354 }, { "epoch": 1.3016528925619835, "grad_norm": 1.584696353050241, "learning_rate": 5.962520356297071e-06, "loss": 0.6857703924179077, "step": 5355 }, { "epoch": 1.3018959649975692, "grad_norm": 1.6849340231024919, "learning_rate": 5.958843418380219e-06, "loss": 0.5614318251609802, "step": 5356 }, { "epoch": 1.3021390374331552, "grad_norm": 1.530853842690666, "learning_rate": 5.9551671333478056e-06, "loss": 0.5040712356567383, "step": 5357 }, { "epoch": 1.302382109868741, "grad_norm": 1.4331709632553962, "learning_rate": 5.951491501793762e-06, "loss": 0.5370842218399048, "step": 5358 }, { "epoch": 1.3026251823043267, "grad_norm": 1.3112150821665283, "learning_rate": 5.947816524311924e-06, "loss": 0.5264583826065063, "step": 5359 }, { "epoch": 1.3028682547399124, "grad_norm": 1.5806293363133763, "learning_rate": 5.944142201496011e-06, "loss": 0.682226836681366, "step": 5360 }, { "epoch": 1.3031113271754984, "grad_norm": 1.5145230868523494, "learning_rate": 5.9404685339396494e-06, "loss": 0.6052165031433105, "step": 5361 }, { "epoch": 1.3033543996110841, "grad_norm": 1.407661429451131, "learning_rate": 5.93679552223635e-06, "loss": 0.6169764995574951, "step": 5362 }, { "epoch": 1.3035974720466699, "grad_norm": 1.265303847913882, "learning_rate": 5.9331231669795195e-06, "loss": 0.4023531675338745, "step": 5363 }, { "epoch": 1.3038405444822558, "grad_norm": 1.486347645315122, "learning_rate": 5.9294514687624595e-06, "loss": 0.4103936553001404, "step": 5364 }, { "epoch": 1.3040836169178416, "grad_norm": 1.4340462679509047, "learning_rate": 5.925780428178365e-06, "loss": 0.44300079345703125, "step": 5365 }, { "epoch": 1.3043266893534273, "grad_norm": 1.5810760977776601, "learning_rate": 5.922110045820324e-06, "loss": 0.6070108413696289, "step": 5366 }, { "epoch": 1.304569761789013, "grad_norm": 1.5237610235226189, "learning_rate": 5.918440322281321e-06, "loss": 0.5898382663726807, "step": 5367 }, { "epoch": 1.3048128342245988, "grad_norm": 1.5624006743229382, "learning_rate": 5.914771258154226e-06, "loss": 0.526418149471283, "step": 5368 }, { "epoch": 1.3050559066601848, "grad_norm": 1.7315602778305508, "learning_rate": 5.911102854031818e-06, "loss": 0.6091320514678955, "step": 5369 }, { "epoch": 1.3052989790957705, "grad_norm": 1.2136960784282997, "learning_rate": 5.907435110506756e-06, "loss": 0.4941347539424896, "step": 5370 }, { "epoch": 1.3055420515313563, "grad_norm": 1.6029381688089697, "learning_rate": 5.903768028171596e-06, "loss": 0.5007350444793701, "step": 5371 }, { "epoch": 1.3057851239669422, "grad_norm": 1.6357304265404433, "learning_rate": 5.9001016076187864e-06, "loss": 0.6731880903244019, "step": 5372 }, { "epoch": 1.306028196402528, "grad_norm": 1.758592217804813, "learning_rate": 5.896435849440671e-06, "loss": 0.5802782773971558, "step": 5373 }, { "epoch": 1.3062712688381137, "grad_norm": 1.4644326303329287, "learning_rate": 5.892770754229487e-06, "loss": 0.508793830871582, "step": 5374 }, { "epoch": 1.3065143412736995, "grad_norm": 1.7273800415963385, "learning_rate": 5.88910632257736e-06, "loss": 0.6289326548576355, "step": 5375 }, { "epoch": 1.3067574137092854, "grad_norm": 1.6447404913865502, "learning_rate": 5.885442555076308e-06, "loss": 0.5202768445014954, "step": 5376 }, { "epoch": 1.3070004861448712, "grad_norm": 1.39747341898858, "learning_rate": 5.881779452318254e-06, "loss": 0.47191691398620605, "step": 5377 }, { "epoch": 1.307243558580457, "grad_norm": 1.5091470176471393, "learning_rate": 5.878117014894998e-06, "loss": 0.5994022488594055, "step": 5378 }, { "epoch": 1.307486631016043, "grad_norm": 1.5641948110658577, "learning_rate": 5.874455243398241e-06, "loss": 0.5396936535835266, "step": 5379 }, { "epoch": 1.3077297034516286, "grad_norm": 1.366245858878424, "learning_rate": 5.870794138419573e-06, "loss": 0.5449162125587463, "step": 5380 }, { "epoch": 1.3079727758872144, "grad_norm": 1.6791802518217955, "learning_rate": 5.867133700550479e-06, "loss": 0.7038771510124207, "step": 5381 }, { "epoch": 1.3082158483228001, "grad_norm": 1.5622389369351173, "learning_rate": 5.863473930382335e-06, "loss": 0.49005627632141113, "step": 5382 }, { "epoch": 1.3084589207583859, "grad_norm": 1.6345686385435592, "learning_rate": 5.859814828506409e-06, "loss": 0.672613263130188, "step": 5383 }, { "epoch": 1.3087019931939718, "grad_norm": 1.5765760064662901, "learning_rate": 5.856156395513861e-06, "loss": 0.45427390933036804, "step": 5384 }, { "epoch": 1.3089450656295576, "grad_norm": 1.718915269111883, "learning_rate": 5.852498631995741e-06, "loss": 0.5199953317642212, "step": 5385 }, { "epoch": 1.3091881380651433, "grad_norm": 1.4879814960897737, "learning_rate": 5.848841538542998e-06, "loss": 0.5443390011787415, "step": 5386 }, { "epoch": 1.3094312105007293, "grad_norm": 1.5130805503196834, "learning_rate": 5.845185115746462e-06, "loss": 0.6010069847106934, "step": 5387 }, { "epoch": 1.309674282936315, "grad_norm": 1.4905006655122781, "learning_rate": 5.841529364196865e-06, "loss": 0.5760056376457214, "step": 5388 }, { "epoch": 1.3099173553719008, "grad_norm": 1.4161885369919713, "learning_rate": 5.837874284484825e-06, "loss": 0.48320767283439636, "step": 5389 }, { "epoch": 1.3101604278074865, "grad_norm": 1.5036516027297109, "learning_rate": 5.834219877200846e-06, "loss": 0.534325122833252, "step": 5390 }, { "epoch": 1.3104035002430725, "grad_norm": 2.1690307030291893, "learning_rate": 5.830566142935341e-06, "loss": 0.593902051448822, "step": 5391 }, { "epoch": 1.3106465726786583, "grad_norm": 1.620284542604428, "learning_rate": 5.8269130822785916e-06, "loss": 0.607881486415863, "step": 5392 }, { "epoch": 1.310889645114244, "grad_norm": 1.5552381589970485, "learning_rate": 5.823260695820787e-06, "loss": 0.4513486623764038, "step": 5393 }, { "epoch": 1.31113271754983, "grad_norm": 1.590942070619567, "learning_rate": 5.819608984152002e-06, "loss": 0.5971618890762329, "step": 5394 }, { "epoch": 1.3113757899854157, "grad_norm": 1.7198526490003738, "learning_rate": 5.8159579478622074e-06, "loss": 0.6166183352470398, "step": 5395 }, { "epoch": 1.3116188624210015, "grad_norm": 1.432945383408454, "learning_rate": 5.812307587541252e-06, "loss": 0.47077059745788574, "step": 5396 }, { "epoch": 1.3118619348565872, "grad_norm": 1.7214300246906233, "learning_rate": 5.808657903778894e-06, "loss": 0.6025532484054565, "step": 5397 }, { "epoch": 1.312105007292173, "grad_norm": 1.7641237271314887, "learning_rate": 5.805008897164763e-06, "loss": 0.6124259233474731, "step": 5398 }, { "epoch": 1.312348079727759, "grad_norm": 1.5680814240550116, "learning_rate": 5.801360568288388e-06, "loss": 0.5433374643325806, "step": 5399 }, { "epoch": 1.3125911521633447, "grad_norm": 1.6799284668414578, "learning_rate": 5.797712917739194e-06, "loss": 0.49727320671081543, "step": 5400 }, { "epoch": 1.3128342245989304, "grad_norm": 1.5225712395596849, "learning_rate": 5.794065946106482e-06, "loss": 0.6704481840133667, "step": 5401 }, { "epoch": 1.3130772970345164, "grad_norm": 1.5475752649563874, "learning_rate": 5.790419653979466e-06, "loss": 0.5929580926895142, "step": 5402 }, { "epoch": 1.3133203694701021, "grad_norm": 1.8369137452421387, "learning_rate": 5.786774041947227e-06, "loss": 0.5363010764122009, "step": 5403 }, { "epoch": 1.3135634419056879, "grad_norm": 1.6758326734260898, "learning_rate": 5.783129110598751e-06, "loss": 0.5288071632385254, "step": 5404 }, { "epoch": 1.3138065143412736, "grad_norm": 1.867170507052972, "learning_rate": 5.779484860522904e-06, "loss": 0.5982369184494019, "step": 5405 }, { "epoch": 1.3140495867768596, "grad_norm": 1.8241369670374437, "learning_rate": 5.7758412923084505e-06, "loss": 0.5460942387580872, "step": 5406 }, { "epoch": 1.3142926592124453, "grad_norm": 1.769665231659213, "learning_rate": 5.772198406544041e-06, "loss": 0.572271466255188, "step": 5407 }, { "epoch": 1.314535731648031, "grad_norm": 1.410306503252972, "learning_rate": 5.768556203818207e-06, "loss": 0.46316200494766235, "step": 5408 }, { "epoch": 1.314778804083617, "grad_norm": 1.5165231416027494, "learning_rate": 5.764914684719389e-06, "loss": 0.5277228355407715, "step": 5409 }, { "epoch": 1.3150218765192028, "grad_norm": 1.6116664465038035, "learning_rate": 5.761273849835903e-06, "loss": 0.6788420677185059, "step": 5410 }, { "epoch": 1.3152649489547885, "grad_norm": 1.6008422474511177, "learning_rate": 5.75763369975596e-06, "loss": 0.519313633441925, "step": 5411 }, { "epoch": 1.3155080213903743, "grad_norm": 1.358299316612755, "learning_rate": 5.753994235067652e-06, "loss": 0.44956910610198975, "step": 5412 }, { "epoch": 1.31575109382596, "grad_norm": 1.4575284524627925, "learning_rate": 5.750355456358976e-06, "loss": 0.5300643444061279, "step": 5413 }, { "epoch": 1.315994166261546, "grad_norm": 1.6501670239223363, "learning_rate": 5.7467173642177996e-06, "loss": 0.49038833379745483, "step": 5414 }, { "epoch": 1.3162372386971317, "grad_norm": 1.6951733104180264, "learning_rate": 5.743079959231896e-06, "loss": 0.4410272240638733, "step": 5415 }, { "epoch": 1.3164803111327177, "grad_norm": 1.594434820635216, "learning_rate": 5.739443241988917e-06, "loss": 0.5241360068321228, "step": 5416 }, { "epoch": 1.3167233835683034, "grad_norm": 1.551153979496306, "learning_rate": 5.735807213076401e-06, "loss": 0.603049635887146, "step": 5417 }, { "epoch": 1.3169664560038892, "grad_norm": 1.8421035032121742, "learning_rate": 5.732171873081787e-06, "loss": 0.5084812641143799, "step": 5418 }, { "epoch": 1.317209528439475, "grad_norm": 1.7217728858812715, "learning_rate": 5.728537222592393e-06, "loss": 0.593845784664154, "step": 5419 }, { "epoch": 1.3174526008750607, "grad_norm": 1.662841878839775, "learning_rate": 5.7249032621954344e-06, "loss": 0.6215741634368896, "step": 5420 }, { "epoch": 1.3176956733106466, "grad_norm": 1.911498021037758, "learning_rate": 5.721269992478002e-06, "loss": 0.6034639477729797, "step": 5421 }, { "epoch": 1.3179387457462324, "grad_norm": 1.5049448078769179, "learning_rate": 5.71763741402709e-06, "loss": 0.7035446166992188, "step": 5422 }, { "epoch": 1.3181818181818181, "grad_norm": 1.5385745998337852, "learning_rate": 5.714005527429567e-06, "loss": 0.5383206009864807, "step": 5423 }, { "epoch": 1.318424890617404, "grad_norm": 1.694945388795577, "learning_rate": 5.7103743332722e-06, "loss": 0.5978615283966064, "step": 5424 }, { "epoch": 1.3186679630529898, "grad_norm": 1.7538521231178215, "learning_rate": 5.706743832141637e-06, "loss": 0.5257077217102051, "step": 5425 }, { "epoch": 1.3189110354885756, "grad_norm": 1.7394562410013377, "learning_rate": 5.703114024624419e-06, "loss": 0.5892611742019653, "step": 5426 }, { "epoch": 1.3191541079241613, "grad_norm": 1.472471349057131, "learning_rate": 5.6994849113069785e-06, "loss": 0.6511085033416748, "step": 5427 }, { "epoch": 1.319397180359747, "grad_norm": 1.8334737596998898, "learning_rate": 5.69585649277562e-06, "loss": 0.600016713142395, "step": 5428 }, { "epoch": 1.319640252795333, "grad_norm": 1.5172199132679303, "learning_rate": 5.692228769616559e-06, "loss": 0.6560725569725037, "step": 5429 }, { "epoch": 1.3198833252309188, "grad_norm": 1.4817216120810732, "learning_rate": 5.688601742415874e-06, "loss": 0.5857517719268799, "step": 5430 }, { "epoch": 1.3201263976665047, "grad_norm": 1.6135474779019248, "learning_rate": 5.684975411759554e-06, "loss": 0.6180276870727539, "step": 5431 }, { "epoch": 1.3203694701020905, "grad_norm": 1.7139383097812668, "learning_rate": 5.681349778233456e-06, "loss": 0.6451040506362915, "step": 5432 }, { "epoch": 1.3206125425376762, "grad_norm": 1.441509003126418, "learning_rate": 5.67772484242334e-06, "loss": 0.5884961485862732, "step": 5433 }, { "epoch": 1.320855614973262, "grad_norm": 1.7643900317962011, "learning_rate": 5.6741006049148385e-06, "loss": 0.5610600709915161, "step": 5434 }, { "epoch": 1.3210986874088477, "grad_norm": 1.48563112597105, "learning_rate": 5.670477066293482e-06, "loss": 0.6146764755249023, "step": 5435 }, { "epoch": 1.3213417598444337, "grad_norm": 1.4922010918448743, "learning_rate": 5.6668542271446935e-06, "loss": 0.6182267665863037, "step": 5436 }, { "epoch": 1.3215848322800194, "grad_norm": 1.447222449870978, "learning_rate": 5.663232088053763e-06, "loss": 0.5381438732147217, "step": 5437 }, { "epoch": 1.3218279047156052, "grad_norm": 1.4926933331415184, "learning_rate": 5.659610649605887e-06, "loss": 0.5809948444366455, "step": 5438 }, { "epoch": 1.3220709771511912, "grad_norm": 1.6379625836505884, "learning_rate": 5.655989912386133e-06, "loss": 0.6800763607025146, "step": 5439 }, { "epoch": 1.322314049586777, "grad_norm": 1.5232621508331985, "learning_rate": 5.652369876979471e-06, "loss": 0.5611323714256287, "step": 5440 }, { "epoch": 1.3225571220223626, "grad_norm": 1.5019051640432388, "learning_rate": 5.648750543970743e-06, "loss": 0.43468624353408813, "step": 5441 }, { "epoch": 1.3228001944579484, "grad_norm": 1.431049715993674, "learning_rate": 5.645131913944684e-06, "loss": 0.5858216285705566, "step": 5442 }, { "epoch": 1.3230432668935344, "grad_norm": 1.6340853582124024, "learning_rate": 5.641513987485925e-06, "loss": 0.4991682469844818, "step": 5443 }, { "epoch": 1.32328633932912, "grad_norm": 1.7734521401051033, "learning_rate": 5.637896765178963e-06, "loss": 0.701723039150238, "step": 5444 }, { "epoch": 1.3235294117647058, "grad_norm": 1.7166577394722256, "learning_rate": 5.6342802476082014e-06, "loss": 0.567246675491333, "step": 5445 }, { "epoch": 1.3237724842002918, "grad_norm": 1.4734605252719084, "learning_rate": 5.630664435357911e-06, "loss": 0.48682940006256104, "step": 5446 }, { "epoch": 1.3240155566358776, "grad_norm": 1.532115796887814, "learning_rate": 5.627049329012266e-06, "loss": 0.5299606323242188, "step": 5447 }, { "epoch": 1.3242586290714633, "grad_norm": 1.4974427221312048, "learning_rate": 5.623434929155314e-06, "loss": 0.5758302211761475, "step": 5448 }, { "epoch": 1.324501701507049, "grad_norm": 1.4609347652704585, "learning_rate": 5.619821236370995e-06, "loss": 0.45335862040519714, "step": 5449 }, { "epoch": 1.3247447739426348, "grad_norm": 1.5018079849160553, "learning_rate": 5.616208251243131e-06, "loss": 0.4633961319923401, "step": 5450 }, { "epoch": 1.3249878463782208, "grad_norm": 1.5157008935009606, "learning_rate": 5.612595974355431e-06, "loss": 0.46459388732910156, "step": 5451 }, { "epoch": 1.3252309188138065, "grad_norm": 1.7650717604561428, "learning_rate": 5.6089844062914975e-06, "loss": 0.48655861616134644, "step": 5452 }, { "epoch": 1.3254739912493922, "grad_norm": 1.3328332609750317, "learning_rate": 5.6053735476348025e-06, "loss": 0.560539722442627, "step": 5453 }, { "epoch": 1.3257170636849782, "grad_norm": 1.9422754377811748, "learning_rate": 5.6017633989687184e-06, "loss": 0.6159456968307495, "step": 5454 }, { "epoch": 1.325960136120564, "grad_norm": 1.5624782267973905, "learning_rate": 5.598153960876489e-06, "loss": 0.5887136459350586, "step": 5455 }, { "epoch": 1.3262032085561497, "grad_norm": 1.7991185472511275, "learning_rate": 5.59454523394126e-06, "loss": 0.5594829320907593, "step": 5456 }, { "epoch": 1.3264462809917354, "grad_norm": 1.502862832391546, "learning_rate": 5.590937218746043e-06, "loss": 0.5928282141685486, "step": 5457 }, { "epoch": 1.3266893534273214, "grad_norm": 1.7539118890229364, "learning_rate": 5.587329915873755e-06, "loss": 0.4991530179977417, "step": 5458 }, { "epoch": 1.3269324258629072, "grad_norm": 1.5334700485029702, "learning_rate": 5.583723325907178e-06, "loss": 0.616547167301178, "step": 5459 }, { "epoch": 1.327175498298493, "grad_norm": 1.5088245764350514, "learning_rate": 5.580117449428992e-06, "loss": 0.5685099959373474, "step": 5460 }, { "epoch": 1.3274185707340789, "grad_norm": 1.546747554254726, "learning_rate": 5.576512287021765e-06, "loss": 0.5904390811920166, "step": 5461 }, { "epoch": 1.3276616431696646, "grad_norm": 1.4073697141470443, "learning_rate": 5.5729078392679316e-06, "loss": 0.44819098711013794, "step": 5462 }, { "epoch": 1.3279047156052504, "grad_norm": 1.5611713560168001, "learning_rate": 5.56930410674983e-06, "loss": 0.6710337400436401, "step": 5463 }, { "epoch": 1.328147788040836, "grad_norm": 1.7367709081130596, "learning_rate": 5.565701090049669e-06, "loss": 0.6127203702926636, "step": 5464 }, { "epoch": 1.3283908604764219, "grad_norm": 1.5020102161701456, "learning_rate": 5.562098789749555e-06, "loss": 0.4515858292579651, "step": 5465 }, { "epoch": 1.3286339329120078, "grad_norm": 1.4455924144211267, "learning_rate": 5.558497206431462e-06, "loss": 0.6707276701927185, "step": 5466 }, { "epoch": 1.3288770053475936, "grad_norm": 1.4819622402355088, "learning_rate": 5.554896340677264e-06, "loss": 0.46185988187789917, "step": 5467 }, { "epoch": 1.3291200777831793, "grad_norm": 1.3978149047338047, "learning_rate": 5.551296193068714e-06, "loss": 0.6497876644134521, "step": 5468 }, { "epoch": 1.3293631502187653, "grad_norm": 1.3079883250966229, "learning_rate": 5.5476967641874416e-06, "loss": 0.45839613676071167, "step": 5469 }, { "epoch": 1.329606222654351, "grad_norm": 1.4594702938988462, "learning_rate": 5.544098054614973e-06, "loss": 0.5419684052467346, "step": 5470 }, { "epoch": 1.3298492950899368, "grad_norm": 1.5434706234694742, "learning_rate": 5.5405000649327035e-06, "loss": 0.5955681204795837, "step": 5471 }, { "epoch": 1.3300923675255225, "grad_norm": 1.546393403197826, "learning_rate": 5.536902795721928e-06, "loss": 0.5193783640861511, "step": 5472 }, { "epoch": 1.3303354399611085, "grad_norm": 1.6184534857592179, "learning_rate": 5.53330624756381e-06, "loss": 0.4246614873409271, "step": 5473 }, { "epoch": 1.3305785123966942, "grad_norm": 1.873729363413162, "learning_rate": 5.529710421039409e-06, "loss": 0.5803015828132629, "step": 5474 }, { "epoch": 1.33082158483228, "grad_norm": 1.5209059934221296, "learning_rate": 5.526115316729654e-06, "loss": 0.6144113540649414, "step": 5475 }, { "epoch": 1.331064657267866, "grad_norm": 1.4735138071608016, "learning_rate": 5.5225209352153805e-06, "loss": 0.5035043954849243, "step": 5476 }, { "epoch": 1.3313077297034517, "grad_norm": 1.5182767329851037, "learning_rate": 5.518927277077284e-06, "loss": 0.5491610169410706, "step": 5477 }, { "epoch": 1.3315508021390374, "grad_norm": 1.5896144193215376, "learning_rate": 5.515334342895946e-06, "loss": 0.4852726459503174, "step": 5478 }, { "epoch": 1.3317938745746232, "grad_norm": 1.6455424874623596, "learning_rate": 5.511742133251847e-06, "loss": 0.39569026231765747, "step": 5479 }, { "epoch": 1.332036947010209, "grad_norm": 1.452456866574849, "learning_rate": 5.508150648725332e-06, "loss": 0.6382920742034912, "step": 5480 }, { "epoch": 1.3322800194457949, "grad_norm": 1.476657083211137, "learning_rate": 5.504559889896645e-06, "loss": 0.5662292242050171, "step": 5481 }, { "epoch": 1.3325230918813806, "grad_norm": 1.6909675168209963, "learning_rate": 5.500969857345895e-06, "loss": 0.4491804540157318, "step": 5482 }, { "epoch": 1.3327661643169664, "grad_norm": 1.5929798596560225, "learning_rate": 5.497380551653089e-06, "loss": 0.5992166996002197, "step": 5483 }, { "epoch": 1.3330092367525523, "grad_norm": 1.44931299947261, "learning_rate": 5.493791973398111e-06, "loss": 0.46261757612228394, "step": 5484 }, { "epoch": 1.333252309188138, "grad_norm": 1.5375747837961118, "learning_rate": 5.49020412316073e-06, "loss": 0.4617674648761749, "step": 5485 }, { "epoch": 1.3334953816237238, "grad_norm": 1.6516469324896939, "learning_rate": 5.486617001520591e-06, "loss": 0.5428470373153687, "step": 5486 }, { "epoch": 1.3337384540593096, "grad_norm": 1.584109600047559, "learning_rate": 5.4830306090572216e-06, "loss": 0.7237628698348999, "step": 5487 }, { "epoch": 1.3339815264948955, "grad_norm": 1.4729768240156793, "learning_rate": 5.479444946350043e-06, "loss": 0.5534055233001709, "step": 5488 }, { "epoch": 1.3342245989304813, "grad_norm": 1.525860633638968, "learning_rate": 5.475860013978344e-06, "loss": 0.5350134968757629, "step": 5489 }, { "epoch": 1.334467671366067, "grad_norm": 1.8193792313406747, "learning_rate": 5.472275812521309e-06, "loss": 0.5377146601676941, "step": 5490 }, { "epoch": 1.334710743801653, "grad_norm": 1.5586311069829697, "learning_rate": 5.468692342557984e-06, "loss": 0.4651396870613098, "step": 5491 }, { "epoch": 1.3349538162372387, "grad_norm": 1.4440526086693795, "learning_rate": 5.465109604667329e-06, "loss": 0.5023981928825378, "step": 5492 }, { "epoch": 1.3351968886728245, "grad_norm": 2.1722503834110207, "learning_rate": 5.4615275994281514e-06, "loss": 0.6030065417289734, "step": 5493 }, { "epoch": 1.3354399611084102, "grad_norm": 1.341289765301215, "learning_rate": 5.457946327419166e-06, "loss": 0.3874070644378662, "step": 5494 }, { "epoch": 1.335683033543996, "grad_norm": 1.4570124957160682, "learning_rate": 5.4543657892189505e-06, "loss": 0.5415362119674683, "step": 5495 }, { "epoch": 1.335926105979582, "grad_norm": 1.6281840917897308, "learning_rate": 5.450785985405979e-06, "loss": 0.731366753578186, "step": 5496 }, { "epoch": 1.3361691784151677, "grad_norm": 1.843398705113017, "learning_rate": 5.4472069165586e-06, "loss": 0.5770534873008728, "step": 5497 }, { "epoch": 1.3364122508507537, "grad_norm": 1.4987963883364257, "learning_rate": 5.443628583255035e-06, "loss": 0.4920906722545624, "step": 5498 }, { "epoch": 1.3366553232863394, "grad_norm": 1.5819903621684481, "learning_rate": 5.440050986073406e-06, "loss": 0.5310781002044678, "step": 5499 }, { "epoch": 1.3368983957219251, "grad_norm": 1.4950080631149105, "learning_rate": 5.436474125591692e-06, "loss": 0.6327425241470337, "step": 5500 }, { "epoch": 1.337141468157511, "grad_norm": 1.5047871166824311, "learning_rate": 5.432898002387783e-06, "loss": 0.5346336364746094, "step": 5501 }, { "epoch": 1.3373845405930966, "grad_norm": 1.6042898001053651, "learning_rate": 5.429322617039422e-06, "loss": 0.6558163166046143, "step": 5502 }, { "epoch": 1.3376276130286826, "grad_norm": 1.8383052756712237, "learning_rate": 5.425747970124251e-06, "loss": 0.6016097068786621, "step": 5503 }, { "epoch": 1.3378706854642683, "grad_norm": 1.9515250886672935, "learning_rate": 5.422174062219777e-06, "loss": 0.6383423805236816, "step": 5504 }, { "epoch": 1.338113757899854, "grad_norm": 1.6952453977355681, "learning_rate": 5.418600893903405e-06, "loss": 0.4837382137775421, "step": 5505 }, { "epoch": 1.33835683033544, "grad_norm": 1.4353032591203074, "learning_rate": 5.415028465752408e-06, "loss": 0.568092942237854, "step": 5506 }, { "epoch": 1.3385999027710258, "grad_norm": 1.6354287967284629, "learning_rate": 5.411456778343941e-06, "loss": 0.5811023712158203, "step": 5507 }, { "epoch": 1.3388429752066116, "grad_norm": 1.5806877414440956, "learning_rate": 5.407885832255042e-06, "loss": 0.5436564683914185, "step": 5508 }, { "epoch": 1.3390860476421973, "grad_norm": 1.527506393354411, "learning_rate": 5.404315628062631e-06, "loss": 0.5544165372848511, "step": 5509 }, { "epoch": 1.339329120077783, "grad_norm": 1.692930762844472, "learning_rate": 5.400746166343509e-06, "loss": 0.569503903388977, "step": 5510 }, { "epoch": 1.339572192513369, "grad_norm": 1.7104707064352282, "learning_rate": 5.397177447674347e-06, "loss": 0.6912986040115356, "step": 5511 }, { "epoch": 1.3398152649489548, "grad_norm": 1.4252544066868076, "learning_rate": 5.393609472631711e-06, "loss": 0.49989113211631775, "step": 5512 }, { "epoch": 1.3400583373845407, "grad_norm": 1.6777393914495198, "learning_rate": 5.3900422417920295e-06, "loss": 0.7136327028274536, "step": 5513 }, { "epoch": 1.3403014098201265, "grad_norm": 1.5083062801362965, "learning_rate": 5.386475755731628e-06, "loss": 0.5198869705200195, "step": 5514 }, { "epoch": 1.3405444822557122, "grad_norm": 1.8221638969794967, "learning_rate": 5.382910015026701e-06, "loss": 0.5084515810012817, "step": 5515 }, { "epoch": 1.340787554691298, "grad_norm": 1.5862410277597139, "learning_rate": 5.379345020253317e-06, "loss": 0.6144943237304688, "step": 5516 }, { "epoch": 1.3410306271268837, "grad_norm": 1.609282219383091, "learning_rate": 5.375780771987449e-06, "loss": 0.49942678213119507, "step": 5517 }, { "epoch": 1.3412736995624697, "grad_norm": 1.9506091313421134, "learning_rate": 5.372217270804918e-06, "loss": 0.7396729588508606, "step": 5518 }, { "epoch": 1.3415167719980554, "grad_norm": 1.579771398354327, "learning_rate": 5.36865451728145e-06, "loss": 0.549672544002533, "step": 5519 }, { "epoch": 1.3417598444336412, "grad_norm": 1.6850968976152954, "learning_rate": 5.365092511992629e-06, "loss": 0.564732551574707, "step": 5520 }, { "epoch": 1.3420029168692271, "grad_norm": 1.360968111349972, "learning_rate": 5.361531255513939e-06, "loss": 0.3781586289405823, "step": 5521 }, { "epoch": 1.3422459893048129, "grad_norm": 1.4341257344743088, "learning_rate": 5.35797074842072e-06, "loss": 0.5064780116081238, "step": 5522 }, { "epoch": 1.3424890617403986, "grad_norm": 1.8019550647653808, "learning_rate": 5.354410991288216e-06, "loss": 0.5289180278778076, "step": 5523 }, { "epoch": 1.3427321341759844, "grad_norm": 1.2735562602536756, "learning_rate": 5.350851984691526e-06, "loss": 0.4521237015724182, "step": 5524 }, { "epoch": 1.3429752066115703, "grad_norm": 1.7145923750328524, "learning_rate": 5.347293729205644e-06, "loss": 0.5405783653259277, "step": 5525 }, { "epoch": 1.343218279047156, "grad_norm": 1.577830202400393, "learning_rate": 5.343736225405442e-06, "loss": 0.5326539874076843, "step": 5526 }, { "epoch": 1.3434613514827418, "grad_norm": 1.4539993916165475, "learning_rate": 5.340179473865657e-06, "loss": 0.6413589715957642, "step": 5527 }, { "epoch": 1.3437044239183278, "grad_norm": 1.3989572905888974, "learning_rate": 5.336623475160922e-06, "loss": 0.48116549849510193, "step": 5528 }, { "epoch": 1.3439474963539135, "grad_norm": 1.7117311049808732, "learning_rate": 5.333068229865733e-06, "loss": 0.5772029161453247, "step": 5529 }, { "epoch": 1.3441905687894993, "grad_norm": 1.9055317900104287, "learning_rate": 5.329513738554477e-06, "loss": 0.6137700080871582, "step": 5530 }, { "epoch": 1.344433641225085, "grad_norm": 1.4073261094395533, "learning_rate": 5.325960001801408e-06, "loss": 0.5562955141067505, "step": 5531 }, { "epoch": 1.3446767136606708, "grad_norm": 1.3645291759068914, "learning_rate": 5.322407020180664e-06, "loss": 0.5077908635139465, "step": 5532 }, { "epoch": 1.3449197860962567, "grad_norm": 1.2488211668889424, "learning_rate": 5.318854794266268e-06, "loss": 0.639677882194519, "step": 5533 }, { "epoch": 1.3451628585318425, "grad_norm": 1.4965592633939735, "learning_rate": 5.315303324632103e-06, "loss": 0.5246465802192688, "step": 5534 }, { "epoch": 1.3454059309674282, "grad_norm": 1.5635575268878543, "learning_rate": 5.311752611851949e-06, "loss": 0.5721767544746399, "step": 5535 }, { "epoch": 1.3456490034030142, "grad_norm": 1.548597670935442, "learning_rate": 5.308202656499447e-06, "loss": 0.43356817960739136, "step": 5536 }, { "epoch": 1.3458920758386, "grad_norm": 1.8893448732035238, "learning_rate": 5.3046534591481326e-06, "loss": 0.5077556371688843, "step": 5537 }, { "epoch": 1.3461351482741857, "grad_norm": 1.7662462941518413, "learning_rate": 5.3011050203714e-06, "loss": 0.570101261138916, "step": 5538 }, { "epoch": 1.3463782207097714, "grad_norm": 1.707451677864387, "learning_rate": 5.2975573407425385e-06, "loss": 0.5844742655754089, "step": 5539 }, { "epoch": 1.3466212931453574, "grad_norm": 1.487619304422867, "learning_rate": 5.294010420834701e-06, "loss": 0.5931998491287231, "step": 5540 }, { "epoch": 1.3468643655809431, "grad_norm": 1.4032888322384685, "learning_rate": 5.290464261220927e-06, "loss": 0.551025927066803, "step": 5541 }, { "epoch": 1.3471074380165289, "grad_norm": 1.7690369192734632, "learning_rate": 5.286918862474133e-06, "loss": 0.5790389180183411, "step": 5542 }, { "epoch": 1.3473505104521148, "grad_norm": 1.907546327159503, "learning_rate": 5.283374225167102e-06, "loss": 0.5330567359924316, "step": 5543 }, { "epoch": 1.3475935828877006, "grad_norm": 1.5858841571792117, "learning_rate": 5.279830349872508e-06, "loss": 0.4614320695400238, "step": 5544 }, { "epoch": 1.3478366553232863, "grad_norm": 1.4183028850779953, "learning_rate": 5.27628723716289e-06, "loss": 0.5474405288696289, "step": 5545 }, { "epoch": 1.348079727758872, "grad_norm": 1.5436394927777664, "learning_rate": 5.272744887610675e-06, "loss": 0.6143761873245239, "step": 5546 }, { "epoch": 1.3483228001944578, "grad_norm": 1.1202250336319044, "learning_rate": 5.269203301788155e-06, "loss": 0.4159061908721924, "step": 5547 }, { "epoch": 1.3485658726300438, "grad_norm": 1.781654756106231, "learning_rate": 5.265662480267508e-06, "loss": 0.5281141996383667, "step": 5548 }, { "epoch": 1.3488089450656295, "grad_norm": 1.607447637786129, "learning_rate": 5.262122423620782e-06, "loss": 0.6113530993461609, "step": 5549 }, { "epoch": 1.3490520175012153, "grad_norm": 1.546805008083035, "learning_rate": 5.258583132419904e-06, "loss": 0.6020617485046387, "step": 5550 }, { "epoch": 1.3492950899368013, "grad_norm": 1.6059553020311688, "learning_rate": 5.255044607236686e-06, "loss": 0.7770763635635376, "step": 5551 }, { "epoch": 1.349538162372387, "grad_norm": 1.525526831814953, "learning_rate": 5.251506848642795e-06, "loss": 0.585595965385437, "step": 5552 }, { "epoch": 1.3497812348079727, "grad_norm": 1.9034930032523303, "learning_rate": 5.247969857209798e-06, "loss": 0.664043664932251, "step": 5553 }, { "epoch": 1.3500243072435585, "grad_norm": 1.465148898008547, "learning_rate": 5.244433633509121e-06, "loss": 0.5668501853942871, "step": 5554 }, { "epoch": 1.3502673796791445, "grad_norm": 1.7387467060840527, "learning_rate": 5.240898178112075e-06, "loss": 0.5639811754226685, "step": 5555 }, { "epoch": 1.3505104521147302, "grad_norm": 1.8225221063112356, "learning_rate": 5.2373634915898406e-06, "loss": 0.6188721656799316, "step": 5556 }, { "epoch": 1.350753524550316, "grad_norm": 1.6131354360035401, "learning_rate": 5.2338295745134795e-06, "loss": 0.5673990249633789, "step": 5557 }, { "epoch": 1.350996596985902, "grad_norm": 1.5493717900895119, "learning_rate": 5.2302964274539304e-06, "loss": 0.66934734582901, "step": 5558 }, { "epoch": 1.3512396694214877, "grad_norm": 1.587311870532019, "learning_rate": 5.226764050981999e-06, "loss": 0.6461911797523499, "step": 5559 }, { "epoch": 1.3514827418570734, "grad_norm": 1.6629819287133576, "learning_rate": 5.223232445668376e-06, "loss": 0.5411165952682495, "step": 5560 }, { "epoch": 1.3517258142926591, "grad_norm": 1.4815851079968307, "learning_rate": 5.219701612083619e-06, "loss": 0.5585911273956299, "step": 5561 }, { "epoch": 1.351968886728245, "grad_norm": 1.6771420226115246, "learning_rate": 5.216171550798171e-06, "loss": 0.6102634072303772, "step": 5562 }, { "epoch": 1.3522119591638309, "grad_norm": 1.8609658856242557, "learning_rate": 5.2126422623823355e-06, "loss": 0.560269832611084, "step": 5563 }, { "epoch": 1.3524550315994166, "grad_norm": 1.353414499501973, "learning_rate": 5.209113747406311e-06, "loss": 0.49924200773239136, "step": 5564 }, { "epoch": 1.3526981040350026, "grad_norm": 1.7802742965905423, "learning_rate": 5.205586006440149e-06, "loss": 0.6527038812637329, "step": 5565 }, { "epoch": 1.3529411764705883, "grad_norm": 1.7021700627318612, "learning_rate": 5.202059040053799e-06, "loss": 0.4553072452545166, "step": 5566 }, { "epoch": 1.353184248906174, "grad_norm": 1.6462096829522703, "learning_rate": 5.1985328488170675e-06, "loss": 0.6975047588348389, "step": 5567 }, { "epoch": 1.3534273213417598, "grad_norm": 1.4296635046442814, "learning_rate": 5.195007433299638e-06, "loss": 0.4505651593208313, "step": 5568 }, { "epoch": 1.3536703937773455, "grad_norm": 1.7441148557259536, "learning_rate": 5.191482794071081e-06, "loss": 0.6372737884521484, "step": 5569 }, { "epoch": 1.3539134662129315, "grad_norm": 1.6603697378683004, "learning_rate": 5.187958931700823e-06, "loss": 0.688565731048584, "step": 5570 }, { "epoch": 1.3541565386485173, "grad_norm": 1.6195783497927383, "learning_rate": 5.184435846758186e-06, "loss": 0.603460967540741, "step": 5571 }, { "epoch": 1.354399611084103, "grad_norm": 1.3180652670560389, "learning_rate": 5.180913539812344e-06, "loss": 0.3999135494232178, "step": 5572 }, { "epoch": 1.354642683519689, "grad_norm": 1.5905957653362202, "learning_rate": 5.177392011432368e-06, "loss": 0.5823802947998047, "step": 5573 }, { "epoch": 1.3548857559552747, "grad_norm": 1.377978254652724, "learning_rate": 5.173871262187176e-06, "loss": 0.40725767612457275, "step": 5574 }, { "epoch": 1.3551288283908605, "grad_norm": 1.767855188803756, "learning_rate": 5.170351292645597e-06, "loss": 0.6383568644523621, "step": 5575 }, { "epoch": 1.3553719008264462, "grad_norm": 1.5319130281694588, "learning_rate": 5.166832103376301e-06, "loss": 0.503623902797699, "step": 5576 }, { "epoch": 1.355614973262032, "grad_norm": 1.4795916655062689, "learning_rate": 5.163313694947841e-06, "loss": 0.640358030796051, "step": 5577 }, { "epoch": 1.355858045697618, "grad_norm": 1.5883619412311867, "learning_rate": 5.159796067928656e-06, "loss": 0.6225963830947876, "step": 5578 }, { "epoch": 1.3561011181332037, "grad_norm": 1.5649556411850134, "learning_rate": 5.156279222887042e-06, "loss": 0.5466405153274536, "step": 5579 }, { "epoch": 1.3563441905687896, "grad_norm": 1.6514888042654527, "learning_rate": 5.152763160391181e-06, "loss": 0.7237280607223511, "step": 5580 }, { "epoch": 1.3565872630043754, "grad_norm": 1.664720368991577, "learning_rate": 5.1492478810091184e-06, "loss": 0.55120450258255, "step": 5581 }, { "epoch": 1.3568303354399611, "grad_norm": 1.4574185858685573, "learning_rate": 5.145733385308784e-06, "loss": 0.5297062397003174, "step": 5582 }, { "epoch": 1.3570734078755469, "grad_norm": 1.2109196751675837, "learning_rate": 5.142219673857973e-06, "loss": 0.6016855835914612, "step": 5583 }, { "epoch": 1.3573164803111326, "grad_norm": 1.8082146145759839, "learning_rate": 5.13870674722436e-06, "loss": 0.5188559889793396, "step": 5584 }, { "epoch": 1.3575595527467186, "grad_norm": 1.4251122842885144, "learning_rate": 5.135194605975487e-06, "loss": 0.6285718679428101, "step": 5585 }, { "epoch": 1.3578026251823043, "grad_norm": 1.6880544977068914, "learning_rate": 5.131683250678768e-06, "loss": 0.7598580121994019, "step": 5586 }, { "epoch": 1.35804569761789, "grad_norm": 1.4786164509660398, "learning_rate": 5.1281726819014995e-06, "loss": 0.5104266405105591, "step": 5587 }, { "epoch": 1.358288770053476, "grad_norm": 1.651485902530322, "learning_rate": 5.1246629002108374e-06, "loss": 0.43228185176849365, "step": 5588 }, { "epoch": 1.3585318424890618, "grad_norm": 1.5501964354238735, "learning_rate": 5.121153906173826e-06, "loss": 0.7106503844261169, "step": 5589 }, { "epoch": 1.3587749149246475, "grad_norm": 1.6138242426294584, "learning_rate": 5.117645700357361e-06, "loss": 0.4874434471130371, "step": 5590 }, { "epoch": 1.3590179873602333, "grad_norm": 1.610372322881039, "learning_rate": 5.114138283328243e-06, "loss": 0.4900287389755249, "step": 5591 }, { "epoch": 1.359261059795819, "grad_norm": 1.718636940108825, "learning_rate": 5.110631655653111e-06, "loss": 0.5485798120498657, "step": 5592 }, { "epoch": 1.359504132231405, "grad_norm": 1.5462305558535212, "learning_rate": 5.107125817898499e-06, "loss": 0.6213563680648804, "step": 5593 }, { "epoch": 1.3597472046669907, "grad_norm": 1.6019562564716572, "learning_rate": 5.103620770630806e-06, "loss": 0.5600353479385376, "step": 5594 }, { "epoch": 1.3599902771025767, "grad_norm": 1.4023150004180143, "learning_rate": 5.100116514416298e-06, "loss": 0.6215177178382874, "step": 5595 }, { "epoch": 1.3602333495381624, "grad_norm": 1.3788717729636886, "learning_rate": 5.096613049821123e-06, "loss": 0.6165161728858948, "step": 5596 }, { "epoch": 1.3604764219737482, "grad_norm": 1.4305721188819458, "learning_rate": 5.093110377411292e-06, "loss": 0.5481202602386475, "step": 5597 }, { "epoch": 1.360719494409334, "grad_norm": 1.4447982777021235, "learning_rate": 5.089608497752696e-06, "loss": 0.6455329656600952, "step": 5598 }, { "epoch": 1.3609625668449197, "grad_norm": 1.5924449329632029, "learning_rate": 5.086107411411093e-06, "loss": 0.4209446907043457, "step": 5599 }, { "epoch": 1.3612056392805056, "grad_norm": 1.648805226501189, "learning_rate": 5.08260711895212e-06, "loss": 0.6766629815101624, "step": 5600 }, { "epoch": 1.3614487117160914, "grad_norm": 1.5463024311006885, "learning_rate": 5.079107620941272e-06, "loss": 0.5727681517601013, "step": 5601 }, { "epoch": 1.3616917841516771, "grad_norm": 1.8163131882407821, "learning_rate": 5.07560891794393e-06, "loss": 0.5395742654800415, "step": 5602 }, { "epoch": 1.361934856587263, "grad_norm": 1.6448151883917783, "learning_rate": 5.072111010525333e-06, "loss": 0.5275571346282959, "step": 5603 }, { "epoch": 1.3621779290228488, "grad_norm": 1.8372356301860806, "learning_rate": 5.06861389925061e-06, "loss": 0.7507506608963013, "step": 5604 }, { "epoch": 1.3624210014584346, "grad_norm": 1.347459058502806, "learning_rate": 5.06511758468474e-06, "loss": 0.505587637424469, "step": 5605 }, { "epoch": 1.3626640738940203, "grad_norm": 1.6557239075261605, "learning_rate": 5.061622067392581e-06, "loss": 0.5192150473594666, "step": 5606 }, { "epoch": 1.3629071463296063, "grad_norm": 1.6465204116334018, "learning_rate": 5.058127347938877e-06, "loss": 0.5034152269363403, "step": 5607 }, { "epoch": 1.363150218765192, "grad_norm": 2.0014316098077534, "learning_rate": 5.054633426888221e-06, "loss": 0.6496097445487976, "step": 5608 }, { "epoch": 1.3633932912007778, "grad_norm": 1.6530190169553238, "learning_rate": 5.051140304805093e-06, "loss": 0.5387247800827026, "step": 5609 }, { "epoch": 1.3636363636363638, "grad_norm": 1.5686783376907245, "learning_rate": 5.047647982253832e-06, "loss": 0.64671790599823, "step": 5610 }, { "epoch": 1.3638794360719495, "grad_norm": 1.6339881117315163, "learning_rate": 5.044156459798659e-06, "loss": 0.6016594171524048, "step": 5611 }, { "epoch": 1.3641225085075352, "grad_norm": 1.6494963795489945, "learning_rate": 5.040665738003653e-06, "loss": 0.598944365978241, "step": 5612 }, { "epoch": 1.364365580943121, "grad_norm": 1.4684530485720093, "learning_rate": 5.037175817432779e-06, "loss": 0.70772784948349, "step": 5613 }, { "epoch": 1.3646086533787067, "grad_norm": 1.5127655727551006, "learning_rate": 5.03368669864986e-06, "loss": 0.5683411955833435, "step": 5614 }, { "epoch": 1.3648517258142927, "grad_norm": 1.712447725639775, "learning_rate": 5.030198382218587e-06, "loss": 0.6073794960975647, "step": 5615 }, { "epoch": 1.3650947982498784, "grad_norm": 1.5443914161730337, "learning_rate": 5.026710868702542e-06, "loss": 0.4576241970062256, "step": 5616 }, { "epoch": 1.3653378706854642, "grad_norm": 1.6693571990655882, "learning_rate": 5.023224158665154e-06, "loss": 0.644744873046875, "step": 5617 }, { "epoch": 1.3655809431210502, "grad_norm": 1.9002011390740503, "learning_rate": 5.0197382526697395e-06, "loss": 0.538170337677002, "step": 5618 }, { "epoch": 1.365824015556636, "grad_norm": 1.5127951454315944, "learning_rate": 5.016253151279466e-06, "loss": 0.5387158393859863, "step": 5619 }, { "epoch": 1.3660670879922217, "grad_norm": 1.534338605249501, "learning_rate": 5.012768855057395e-06, "loss": 0.42914092540740967, "step": 5620 }, { "epoch": 1.3663101604278074, "grad_norm": 1.8793792425320148, "learning_rate": 5.009285364566435e-06, "loss": 0.5880754590034485, "step": 5621 }, { "epoch": 1.3665532328633934, "grad_norm": 1.6442208570493126, "learning_rate": 5.005802680369383e-06, "loss": 0.5169143080711365, "step": 5622 }, { "epoch": 1.366796305298979, "grad_norm": 1.4081172737803833, "learning_rate": 5.002320803028887e-06, "loss": 0.44560179114341736, "step": 5623 }, { "epoch": 1.3670393777345649, "grad_norm": 1.4556287469349378, "learning_rate": 4.998839733107482e-06, "loss": 0.5241987705230713, "step": 5624 }, { "epoch": 1.3672824501701508, "grad_norm": 1.7937199578565584, "learning_rate": 4.995359471167569e-06, "loss": 0.6434186697006226, "step": 5625 }, { "epoch": 1.3675255226057366, "grad_norm": 1.5374332564906574, "learning_rate": 4.991880017771406e-06, "loss": 0.5273117423057556, "step": 5626 }, { "epoch": 1.3677685950413223, "grad_norm": 1.6313200197542308, "learning_rate": 4.988401373481137e-06, "loss": 0.5670918822288513, "step": 5627 }, { "epoch": 1.368011667476908, "grad_norm": 1.4650350881778291, "learning_rate": 4.984923538858762e-06, "loss": 0.5519446730613708, "step": 5628 }, { "epoch": 1.3682547399124938, "grad_norm": 1.7924575765084356, "learning_rate": 4.98144651446616e-06, "loss": 0.5332597494125366, "step": 5629 }, { "epoch": 1.3684978123480798, "grad_norm": 1.4483845493761003, "learning_rate": 4.9779703008650695e-06, "loss": 0.5918233394622803, "step": 5630 }, { "epoch": 1.3687408847836655, "grad_norm": 1.714906424095293, "learning_rate": 4.974494898617108e-06, "loss": 0.7590543031692505, "step": 5631 }, { "epoch": 1.3689839572192513, "grad_norm": 1.4769613340167773, "learning_rate": 4.971020308283759e-06, "loss": 0.5953475832939148, "step": 5632 }, { "epoch": 1.3692270296548372, "grad_norm": 1.3957399216556718, "learning_rate": 4.967546530426368e-06, "loss": 0.5790746212005615, "step": 5633 }, { "epoch": 1.369470102090423, "grad_norm": 1.6087150638700483, "learning_rate": 4.96407356560616e-06, "loss": 0.5692381858825684, "step": 5634 }, { "epoch": 1.3697131745260087, "grad_norm": 1.1665687201865704, "learning_rate": 4.9606014143842165e-06, "loss": 0.3364342451095581, "step": 5635 }, { "epoch": 1.3699562469615945, "grad_norm": 1.3990610011934268, "learning_rate": 4.957130077321502e-06, "loss": 0.5450031757354736, "step": 5636 }, { "epoch": 1.3701993193971804, "grad_norm": 1.4918682282597773, "learning_rate": 4.953659554978831e-06, "loss": 0.5704122185707092, "step": 5637 }, { "epoch": 1.3704423918327662, "grad_norm": 1.4071135076391796, "learning_rate": 4.950189847916909e-06, "loss": 0.5369749665260315, "step": 5638 }, { "epoch": 1.370685464268352, "grad_norm": 1.6279140967189833, "learning_rate": 4.946720956696287e-06, "loss": 0.47082439064979553, "step": 5639 }, { "epoch": 1.3709285367039379, "grad_norm": 1.588372995931689, "learning_rate": 4.943252881877401e-06, "loss": 0.5571011304855347, "step": 5640 }, { "epoch": 1.3711716091395236, "grad_norm": 1.549059694377331, "learning_rate": 4.93978562402055e-06, "loss": 0.47489649057388306, "step": 5641 }, { "epoch": 1.3714146815751094, "grad_norm": 1.5843142773384549, "learning_rate": 4.9363191836858946e-06, "loss": 0.5622274279594421, "step": 5642 }, { "epoch": 1.3716577540106951, "grad_norm": 1.5489087872983511, "learning_rate": 4.932853561433476e-06, "loss": 0.6054266691207886, "step": 5643 }, { "epoch": 1.3719008264462809, "grad_norm": 1.9099956811539236, "learning_rate": 4.929388757823189e-06, "loss": 0.4265066385269165, "step": 5644 }, { "epoch": 1.3721438988818668, "grad_norm": 1.6858042767775492, "learning_rate": 4.925924773414809e-06, "loss": 0.639433741569519, "step": 5645 }, { "epoch": 1.3723869713174526, "grad_norm": 1.728541151048982, "learning_rate": 4.922461608767967e-06, "loss": 0.5889131426811218, "step": 5646 }, { "epoch": 1.3726300437530385, "grad_norm": 1.4519461263278082, "learning_rate": 4.91899926444217e-06, "loss": 0.5960249900817871, "step": 5647 }, { "epoch": 1.3728731161886243, "grad_norm": 1.6343574953413686, "learning_rate": 4.915537740996797e-06, "loss": 0.5517693758010864, "step": 5648 }, { "epoch": 1.37311618862421, "grad_norm": 1.8630684859773787, "learning_rate": 4.912077038991076e-06, "loss": 0.519920289516449, "step": 5649 }, { "epoch": 1.3733592610597958, "grad_norm": 1.5445841700460117, "learning_rate": 4.908617158984123e-06, "loss": 0.5293078422546387, "step": 5650 }, { "epoch": 1.3736023334953815, "grad_norm": 1.7457589353318437, "learning_rate": 4.9051581015349085e-06, "loss": 0.6188732385635376, "step": 5651 }, { "epoch": 1.3738454059309675, "grad_norm": 1.6543070419599275, "learning_rate": 4.901699867202275e-06, "loss": 0.5082491636276245, "step": 5652 }, { "epoch": 1.3740884783665532, "grad_norm": 1.7367506975968376, "learning_rate": 4.8982424565449274e-06, "loss": 0.4900452494621277, "step": 5653 }, { "epoch": 1.374331550802139, "grad_norm": 1.776574961580228, "learning_rate": 4.894785870121446e-06, "loss": 0.5143173933029175, "step": 5654 }, { "epoch": 1.374574623237725, "grad_norm": 1.5675456701627652, "learning_rate": 4.8913301084902665e-06, "loss": 0.5565172433853149, "step": 5655 }, { "epoch": 1.3748176956733107, "grad_norm": 1.5038011412504704, "learning_rate": 4.887875172209702e-06, "loss": 0.5633959770202637, "step": 5656 }, { "epoch": 1.3750607681088964, "grad_norm": 1.5801108113582802, "learning_rate": 4.884421061837929e-06, "loss": 0.5695955157279968, "step": 5657 }, { "epoch": 1.3753038405444822, "grad_norm": 1.67346361954297, "learning_rate": 4.880967777932985e-06, "loss": 0.6055481433868408, "step": 5658 }, { "epoch": 1.375546912980068, "grad_norm": 1.5149870309817914, "learning_rate": 4.877515321052785e-06, "loss": 0.616217851638794, "step": 5659 }, { "epoch": 1.375789985415654, "grad_norm": 1.3805266217153327, "learning_rate": 4.8740636917550945e-06, "loss": 0.5248686671257019, "step": 5660 }, { "epoch": 1.3760330578512396, "grad_norm": 1.8297231961033942, "learning_rate": 4.870612890597564e-06, "loss": 0.6300902366638184, "step": 5661 }, { "epoch": 1.3762761302868256, "grad_norm": 1.7028670711695628, "learning_rate": 4.867162918137693e-06, "loss": 0.6330562829971313, "step": 5662 }, { "epoch": 1.3765192027224114, "grad_norm": 1.6374056712852059, "learning_rate": 4.8637137749328635e-06, "loss": 0.5227974653244019, "step": 5663 }, { "epoch": 1.376762275157997, "grad_norm": 1.9861816039194067, "learning_rate": 4.860265461540305e-06, "loss": 0.6831798553466797, "step": 5664 }, { "epoch": 1.3770053475935828, "grad_norm": 1.5431044502259479, "learning_rate": 4.8568179785171285e-06, "loss": 0.6606643199920654, "step": 5665 }, { "epoch": 1.3772484200291686, "grad_norm": 1.6370560919913637, "learning_rate": 4.853371326420308e-06, "loss": 0.4666289985179901, "step": 5666 }, { "epoch": 1.3774914924647546, "grad_norm": 1.6003224662632276, "learning_rate": 4.849925505806674e-06, "loss": 0.7217381596565247, "step": 5667 }, { "epoch": 1.3777345649003403, "grad_norm": 1.812964225659456, "learning_rate": 4.846480517232935e-06, "loss": 0.6415361166000366, "step": 5668 }, { "epoch": 1.377977637335926, "grad_norm": 1.8615413225040254, "learning_rate": 4.843036361255654e-06, "loss": 0.5562778115272522, "step": 5669 }, { "epoch": 1.378220709771512, "grad_norm": 1.5056385547100832, "learning_rate": 4.839593038431271e-06, "loss": 0.4878072738647461, "step": 5670 }, { "epoch": 1.3784637822070978, "grad_norm": 1.7869958903494958, "learning_rate": 4.836150549316078e-06, "loss": 0.7209490537643433, "step": 5671 }, { "epoch": 1.3787068546426835, "grad_norm": 1.587471724824267, "learning_rate": 4.8327088944662414e-06, "loss": 0.5902667045593262, "step": 5672 }, { "epoch": 1.3789499270782692, "grad_norm": 1.6377923847053013, "learning_rate": 4.829268074437793e-06, "loss": 0.5588816404342651, "step": 5673 }, { "epoch": 1.3791929995138552, "grad_norm": 1.4428074888790499, "learning_rate": 4.825828089786632e-06, "loss": 0.6977479457855225, "step": 5674 }, { "epoch": 1.379436071949441, "grad_norm": 1.6422633825679434, "learning_rate": 4.822388941068511e-06, "loss": 0.613344132900238, "step": 5675 }, { "epoch": 1.3796791443850267, "grad_norm": 1.6846628442392428, "learning_rate": 4.818950628839054e-06, "loss": 0.6200190782546997, "step": 5676 }, { "epoch": 1.3799222168206127, "grad_norm": 1.5170809921256823, "learning_rate": 4.815513153653758e-06, "loss": 0.6166459918022156, "step": 5677 }, { "epoch": 1.3801652892561984, "grad_norm": 1.6786168597490538, "learning_rate": 4.8120765160679685e-06, "loss": 0.5843775868415833, "step": 5678 }, { "epoch": 1.3804083616917842, "grad_norm": 1.520927275199568, "learning_rate": 4.808640716636912e-06, "loss": 0.5402352809906006, "step": 5679 }, { "epoch": 1.38065143412737, "grad_norm": 1.4753361384490102, "learning_rate": 4.805205755915663e-06, "loss": 0.5267714262008667, "step": 5680 }, { "epoch": 1.3808945065629556, "grad_norm": 1.524981019697379, "learning_rate": 4.801771634459182e-06, "loss": 0.6275893449783325, "step": 5681 }, { "epoch": 1.3811375789985416, "grad_norm": 1.6388119654204585, "learning_rate": 4.798338352822273e-06, "loss": 0.5678187608718872, "step": 5682 }, { "epoch": 1.3813806514341274, "grad_norm": 1.2984355191280377, "learning_rate": 4.794905911559619e-06, "loss": 0.5696035027503967, "step": 5683 }, { "epoch": 1.381623723869713, "grad_norm": 1.5752931957344096, "learning_rate": 4.791474311225756e-06, "loss": 0.5250852108001709, "step": 5684 }, { "epoch": 1.381866796305299, "grad_norm": 1.5816578307445304, "learning_rate": 4.788043552375087e-06, "loss": 0.6015030145645142, "step": 5685 }, { "epoch": 1.3821098687408848, "grad_norm": 1.728823586185269, "learning_rate": 4.78461363556189e-06, "loss": 0.5990656614303589, "step": 5686 }, { "epoch": 1.3823529411764706, "grad_norm": 1.7229543590162333, "learning_rate": 4.781184561340288e-06, "loss": 0.5100196003913879, "step": 5687 }, { "epoch": 1.3825960136120563, "grad_norm": 1.5249950360549702, "learning_rate": 4.777756330264284e-06, "loss": 0.5572899580001831, "step": 5688 }, { "epoch": 1.3828390860476423, "grad_norm": 1.538983228485553, "learning_rate": 4.774328942887738e-06, "loss": 0.4903918504714966, "step": 5689 }, { "epoch": 1.383082158483228, "grad_norm": 1.3712081690477835, "learning_rate": 4.770902399764379e-06, "loss": 0.642586350440979, "step": 5690 }, { "epoch": 1.3833252309188138, "grad_norm": 1.6091972684467504, "learning_rate": 4.767476701447787e-06, "loss": 0.5592391490936279, "step": 5691 }, { "epoch": 1.3835683033543997, "grad_norm": 1.7771762952683363, "learning_rate": 4.764051848491422e-06, "loss": 0.567354679107666, "step": 5692 }, { "epoch": 1.3838113757899855, "grad_norm": 1.6844796061907068, "learning_rate": 4.760627841448595e-06, "loss": 0.5608678460121155, "step": 5693 }, { "epoch": 1.3840544482255712, "grad_norm": 1.8592257823507932, "learning_rate": 4.75720468087248e-06, "loss": 0.7117698192596436, "step": 5694 }, { "epoch": 1.384297520661157, "grad_norm": 1.7307615747494887, "learning_rate": 4.753782367316129e-06, "loss": 0.6164040565490723, "step": 5695 }, { "epoch": 1.3845405930967427, "grad_norm": 1.7203628690482011, "learning_rate": 4.750360901332435e-06, "loss": 0.5680033564567566, "step": 5696 }, { "epoch": 1.3847836655323287, "grad_norm": 1.8059809080511282, "learning_rate": 4.746940283474173e-06, "loss": 0.5668635368347168, "step": 5697 }, { "epoch": 1.3850267379679144, "grad_norm": 1.710226529661135, "learning_rate": 4.743520514293973e-06, "loss": 0.6740709543228149, "step": 5698 }, { "epoch": 1.3852698104035002, "grad_norm": 1.6222694894484386, "learning_rate": 4.740101594344333e-06, "loss": 0.6944831013679504, "step": 5699 }, { "epoch": 1.3855128828390861, "grad_norm": 1.497430627311122, "learning_rate": 4.736683524177601e-06, "loss": 0.5511426329612732, "step": 5700 }, { "epoch": 1.3857559552746719, "grad_norm": 1.6654756362524596, "learning_rate": 4.733266304346005e-06, "loss": 0.6235562562942505, "step": 5701 }, { "epoch": 1.3859990277102576, "grad_norm": 1.6337978592912508, "learning_rate": 4.729849935401619e-06, "loss": 0.476235032081604, "step": 5702 }, { "epoch": 1.3862421001458434, "grad_norm": 1.7093348189991135, "learning_rate": 4.726434417896395e-06, "loss": 0.7308721542358398, "step": 5703 }, { "epoch": 1.3864851725814293, "grad_norm": 1.4227337338234456, "learning_rate": 4.723019752382136e-06, "loss": 0.44790714979171753, "step": 5704 }, { "epoch": 1.386728245017015, "grad_norm": 1.5099048878345134, "learning_rate": 4.7196059394105045e-06, "loss": 0.5494951605796814, "step": 5705 }, { "epoch": 1.3869713174526008, "grad_norm": 1.3601435823242662, "learning_rate": 4.716192979533046e-06, "loss": 0.43779003620147705, "step": 5706 }, { "epoch": 1.3872143898881868, "grad_norm": 1.4954327972904555, "learning_rate": 4.7127808733011435e-06, "loss": 0.5069074630737305, "step": 5707 }, { "epoch": 1.3874574623237725, "grad_norm": 1.4027669692398188, "learning_rate": 4.7093696212660615e-06, "loss": 0.5223957300186157, "step": 5708 }, { "epoch": 1.3877005347593583, "grad_norm": 1.605515944397851, "learning_rate": 4.705959223978908e-06, "loss": 0.600683331489563, "step": 5709 }, { "epoch": 1.387943607194944, "grad_norm": 1.6480434657633982, "learning_rate": 4.7025496819906715e-06, "loss": 0.7203761339187622, "step": 5710 }, { "epoch": 1.3881866796305298, "grad_norm": 1.5840451825504125, "learning_rate": 4.699140995852185e-06, "loss": 0.5959616899490356, "step": 5711 }, { "epoch": 1.3884297520661157, "grad_norm": 1.8366396548083526, "learning_rate": 4.6957331661141614e-06, "loss": 0.5328337550163269, "step": 5712 }, { "epoch": 1.3886728245017015, "grad_norm": 1.920855054552912, "learning_rate": 4.692326193327157e-06, "loss": 0.5750507116317749, "step": 5713 }, { "epoch": 1.3889158969372872, "grad_norm": 1.8170233980586012, "learning_rate": 4.6889200780416015e-06, "loss": 0.5543847680091858, "step": 5714 }, { "epoch": 1.3891589693728732, "grad_norm": 1.6051018130566348, "learning_rate": 4.685514820807787e-06, "loss": 0.6098638772964478, "step": 5715 }, { "epoch": 1.389402041808459, "grad_norm": 1.5630965091808866, "learning_rate": 4.682110422175855e-06, "loss": 0.630082368850708, "step": 5716 }, { "epoch": 1.3896451142440447, "grad_norm": 1.8880023899796472, "learning_rate": 4.678706882695824e-06, "loss": 0.5094325542449951, "step": 5717 }, { "epoch": 1.3898881866796304, "grad_norm": 1.4983603583771383, "learning_rate": 4.6753042029175575e-06, "loss": 0.6858240962028503, "step": 5718 }, { "epoch": 1.3901312591152164, "grad_norm": 1.515576683099251, "learning_rate": 4.671902383390798e-06, "loss": 0.5283656716346741, "step": 5719 }, { "epoch": 1.3903743315508021, "grad_norm": 1.544840226853185, "learning_rate": 4.66850142466513e-06, "loss": 0.5539214611053467, "step": 5720 }, { "epoch": 1.390617403986388, "grad_norm": 1.8038103168123663, "learning_rate": 4.665101327290011e-06, "loss": 0.6096948385238647, "step": 5721 }, { "epoch": 1.3908604764219739, "grad_norm": 1.8436605195900841, "learning_rate": 4.661702091814763e-06, "loss": 0.5706946849822998, "step": 5722 }, { "epoch": 1.3911035488575596, "grad_norm": 1.5556114977557942, "learning_rate": 4.658303718788555e-06, "loss": 0.7005075216293335, "step": 5723 }, { "epoch": 1.3913466212931453, "grad_norm": 1.6313011450855324, "learning_rate": 4.6549062087604295e-06, "loss": 0.5582723617553711, "step": 5724 }, { "epoch": 1.391589693728731, "grad_norm": 1.6934714184364221, "learning_rate": 4.651509562279276e-06, "loss": 0.5114140510559082, "step": 5725 }, { "epoch": 1.3918327661643168, "grad_norm": 1.5562358374077234, "learning_rate": 4.648113779893863e-06, "loss": 0.7418616414070129, "step": 5726 }, { "epoch": 1.3920758385999028, "grad_norm": 1.3832344025817953, "learning_rate": 4.644718862152802e-06, "loss": 0.4269218444824219, "step": 5727 }, { "epoch": 1.3923189110354885, "grad_norm": 1.567174029920462, "learning_rate": 4.6413248096045745e-06, "loss": 0.5813754200935364, "step": 5728 }, { "epoch": 1.3925619834710745, "grad_norm": 1.5249962398004273, "learning_rate": 4.637931622797518e-06, "loss": 0.5012434124946594, "step": 5729 }, { "epoch": 1.3928050559066603, "grad_norm": 1.743680050026521, "learning_rate": 4.634539302279832e-06, "loss": 0.5623968839645386, "step": 5730 }, { "epoch": 1.393048128342246, "grad_norm": 1.8207897137300848, "learning_rate": 4.6311478485995786e-06, "loss": 0.520900309085846, "step": 5731 }, { "epoch": 1.3932912007778318, "grad_norm": 1.9556544044039266, "learning_rate": 4.627757262304673e-06, "loss": 0.6909269690513611, "step": 5732 }, { "epoch": 1.3935342732134175, "grad_norm": 1.635642888275135, "learning_rate": 4.624367543942899e-06, "loss": 0.4372294545173645, "step": 5733 }, { "epoch": 1.3937773456490035, "grad_norm": 1.7056038140677694, "learning_rate": 4.62097869406189e-06, "loss": 0.6538774967193604, "step": 5734 }, { "epoch": 1.3940204180845892, "grad_norm": 1.5456743553925933, "learning_rate": 4.617590713209149e-06, "loss": 0.5492606163024902, "step": 5735 }, { "epoch": 1.394263490520175, "grad_norm": 1.7130678159944845, "learning_rate": 4.61420360193203e-06, "loss": 0.5512596368789673, "step": 5736 }, { "epoch": 1.394506562955761, "grad_norm": 1.849353326168177, "learning_rate": 4.610817360777756e-06, "loss": 0.622870922088623, "step": 5737 }, { "epoch": 1.3947496353913467, "grad_norm": 1.677791495709844, "learning_rate": 4.607431990293398e-06, "loss": 0.6319071054458618, "step": 5738 }, { "epoch": 1.3949927078269324, "grad_norm": 1.6785798423380214, "learning_rate": 4.6040474910258945e-06, "loss": 0.5890023708343506, "step": 5739 }, { "epoch": 1.3952357802625182, "grad_norm": 1.4524054013665293, "learning_rate": 4.6006638635220465e-06, "loss": 0.6601066589355469, "step": 5740 }, { "epoch": 1.395478852698104, "grad_norm": 1.4844410007466475, "learning_rate": 4.597281108328502e-06, "loss": 0.5309164524078369, "step": 5741 }, { "epoch": 1.3957219251336899, "grad_norm": 1.441866841980882, "learning_rate": 4.593899225991779e-06, "loss": 0.511113703250885, "step": 5742 }, { "epoch": 1.3959649975692756, "grad_norm": 1.493987627163759, "learning_rate": 4.590518217058246e-06, "loss": 0.478415310382843, "step": 5743 }, { "epoch": 1.3962080700048616, "grad_norm": 1.3482406004986451, "learning_rate": 4.58713808207414e-06, "loss": 0.5266748666763306, "step": 5744 }, { "epoch": 1.3964511424404473, "grad_norm": 1.593568915081143, "learning_rate": 4.583758821585548e-06, "loss": 0.6622949838638306, "step": 5745 }, { "epoch": 1.396694214876033, "grad_norm": 1.5434865799563908, "learning_rate": 4.580380436138419e-06, "loss": 0.5315272808074951, "step": 5746 }, { "epoch": 1.3969372873116188, "grad_norm": 1.781842583153613, "learning_rate": 4.577002926278564e-06, "loss": 0.6833077669143677, "step": 5747 }, { "epoch": 1.3971803597472046, "grad_norm": 1.5042819881318241, "learning_rate": 4.573626292551646e-06, "loss": 0.5491222739219666, "step": 5748 }, { "epoch": 1.3974234321827905, "grad_norm": 1.3323438932898481, "learning_rate": 4.570250535503196e-06, "loss": 0.40704911947250366, "step": 5749 }, { "epoch": 1.3976665046183763, "grad_norm": 1.745590976786976, "learning_rate": 4.5668756556785875e-06, "loss": 0.7496710419654846, "step": 5750 }, { "epoch": 1.397909577053962, "grad_norm": 1.7042443007695771, "learning_rate": 4.563501653623073e-06, "loss": 0.5137702822685242, "step": 5751 }, { "epoch": 1.398152649489548, "grad_norm": 1.7934560428845434, "learning_rate": 4.560128529881742e-06, "loss": 0.638783872127533, "step": 5752 }, { "epoch": 1.3983957219251337, "grad_norm": 1.5219100061310222, "learning_rate": 4.55675628499956e-06, "loss": 0.5334228277206421, "step": 5753 }, { "epoch": 1.3986387943607195, "grad_norm": 1.5109106262651928, "learning_rate": 4.553384919521337e-06, "loss": 0.5311899185180664, "step": 5754 }, { "epoch": 1.3988818667963052, "grad_norm": 1.7552938636959705, "learning_rate": 4.550014433991751e-06, "loss": 0.6038998365402222, "step": 5755 }, { "epoch": 1.3991249392318912, "grad_norm": 1.792356893997138, "learning_rate": 4.546644828955336e-06, "loss": 0.5456304550170898, "step": 5756 }, { "epoch": 1.399368011667477, "grad_norm": 1.8966861335835585, "learning_rate": 4.543276104956472e-06, "loss": 0.6182126998901367, "step": 5757 }, { "epoch": 1.3996110841030627, "grad_norm": 1.7234473977710034, "learning_rate": 4.539908262539416e-06, "loss": 0.5185145139694214, "step": 5758 }, { "epoch": 1.3998541565386486, "grad_norm": 1.4729295058955292, "learning_rate": 4.536541302248266e-06, "loss": 0.6079758405685425, "step": 5759 }, { "epoch": 1.4000972289742344, "grad_norm": 1.5006597693514978, "learning_rate": 4.53317522462699e-06, "loss": 0.4671083092689514, "step": 5760 }, { "epoch": 1.4003403014098201, "grad_norm": 1.5041145310898199, "learning_rate": 4.529810030219399e-06, "loss": 0.4762170910835266, "step": 5761 }, { "epoch": 1.4005833738454059, "grad_norm": 1.3845796872388496, "learning_rate": 4.5264457195691756e-06, "loss": 0.6688416004180908, "step": 5762 }, { "epoch": 1.4008264462809916, "grad_norm": 1.342135950075817, "learning_rate": 4.523082293219857e-06, "loss": 0.5190792679786682, "step": 5763 }, { "epoch": 1.4010695187165776, "grad_norm": 1.9144434692155985, "learning_rate": 4.519719751714826e-06, "loss": 0.5356631875038147, "step": 5764 }, { "epoch": 1.4013125911521633, "grad_norm": 1.5876008928742429, "learning_rate": 4.5163580955973384e-06, "loss": 0.5523781776428223, "step": 5765 }, { "epoch": 1.401555663587749, "grad_norm": 1.5191718434483221, "learning_rate": 4.512997325410493e-06, "loss": 0.670745849609375, "step": 5766 }, { "epoch": 1.401798736023335, "grad_norm": 1.548328819264119, "learning_rate": 4.509637441697259e-06, "loss": 0.42685648798942566, "step": 5767 }, { "epoch": 1.4020418084589208, "grad_norm": 1.6067430234592157, "learning_rate": 4.506278445000447e-06, "loss": 0.6751025319099426, "step": 5768 }, { "epoch": 1.4022848808945065, "grad_norm": 1.2995052075540923, "learning_rate": 4.50292033586274e-06, "loss": 0.4906209111213684, "step": 5769 }, { "epoch": 1.4025279533300923, "grad_norm": 1.8312937058254144, "learning_rate": 4.4995631148266626e-06, "loss": 0.40579134225845337, "step": 5770 }, { "epoch": 1.4027710257656782, "grad_norm": 1.4830541198910243, "learning_rate": 4.496206782434609e-06, "loss": 0.5205479860305786, "step": 5771 }, { "epoch": 1.403014098201264, "grad_norm": 1.453922832000359, "learning_rate": 4.492851339228821e-06, "loss": 0.6394705772399902, "step": 5772 }, { "epoch": 1.4032571706368497, "grad_norm": 1.7839014731512837, "learning_rate": 4.489496785751407e-06, "loss": 0.5578072667121887, "step": 5773 }, { "epoch": 1.4035002430724357, "grad_norm": 1.6614439187092271, "learning_rate": 4.486143122544318e-06, "loss": 0.5303750038146973, "step": 5774 }, { "epoch": 1.4037433155080214, "grad_norm": 1.6642806976666302, "learning_rate": 4.482790350149366e-06, "loss": 0.5867366790771484, "step": 5775 }, { "epoch": 1.4039863879436072, "grad_norm": 1.5997013747298603, "learning_rate": 4.479438469108229e-06, "loss": 0.5529788732528687, "step": 5776 }, { "epoch": 1.404229460379193, "grad_norm": 1.825435149419184, "learning_rate": 4.476087479962424e-06, "loss": 0.6134921312332153, "step": 5777 }, { "epoch": 1.4044725328147787, "grad_norm": 1.3930081111466561, "learning_rate": 4.472737383253339e-06, "loss": 0.5353362560272217, "step": 5778 }, { "epoch": 1.4047156052503647, "grad_norm": 1.5229251149188785, "learning_rate": 4.469388179522204e-06, "loss": 0.5142531394958496, "step": 5779 }, { "epoch": 1.4049586776859504, "grad_norm": 1.7674250297563825, "learning_rate": 4.466039869310125e-06, "loss": 0.6815308332443237, "step": 5780 }, { "epoch": 1.4052017501215361, "grad_norm": 1.7872901308379838, "learning_rate": 4.4626924531580395e-06, "loss": 0.5757685899734497, "step": 5781 }, { "epoch": 1.405444822557122, "grad_norm": 1.4883951561771651, "learning_rate": 4.45934593160676e-06, "loss": 0.5472963452339172, "step": 5782 }, { "epoch": 1.4056878949927079, "grad_norm": 1.7261364047693704, "learning_rate": 4.4560003051969436e-06, "loss": 0.5868048667907715, "step": 5783 }, { "epoch": 1.4059309674282936, "grad_norm": 2.159908140310393, "learning_rate": 4.452655574469101e-06, "loss": 0.6476770639419556, "step": 5784 }, { "epoch": 1.4061740398638793, "grad_norm": 1.5554428413082897, "learning_rate": 4.449311739963611e-06, "loss": 0.5731048583984375, "step": 5785 }, { "epoch": 1.4064171122994653, "grad_norm": 1.451892590371698, "learning_rate": 4.445968802220692e-06, "loss": 0.5392138957977295, "step": 5786 }, { "epoch": 1.406660184735051, "grad_norm": 1.612589856669812, "learning_rate": 4.442626761780429e-06, "loss": 0.4823251962661743, "step": 5787 }, { "epoch": 1.4069032571706368, "grad_norm": 1.5221337932280397, "learning_rate": 4.439285619182756e-06, "loss": 0.507369875907898, "step": 5788 }, { "epoch": 1.4071463296062228, "grad_norm": 1.7068850251914383, "learning_rate": 4.435945374967471e-06, "loss": 0.4424838423728943, "step": 5789 }, { "epoch": 1.4073894020418085, "grad_norm": 1.7417999080601978, "learning_rate": 4.432606029674209e-06, "loss": 0.6163325309753418, "step": 5790 }, { "epoch": 1.4076324744773943, "grad_norm": 1.4694721077433428, "learning_rate": 4.429267583842482e-06, "loss": 0.6477739810943604, "step": 5791 }, { "epoch": 1.40787554691298, "grad_norm": 1.6635537797545592, "learning_rate": 4.4259300380116376e-06, "loss": 0.7291178703308105, "step": 5792 }, { "epoch": 1.4081186193485657, "grad_norm": 1.697885258224913, "learning_rate": 4.422593392720884e-06, "loss": 0.562110424041748, "step": 5793 }, { "epoch": 1.4083616917841517, "grad_norm": 1.519193131234094, "learning_rate": 4.419257648509293e-06, "loss": 0.6030217409133911, "step": 5794 }, { "epoch": 1.4086047642197375, "grad_norm": 1.5806464870294814, "learning_rate": 4.415922805915773e-06, "loss": 0.5627946257591248, "step": 5795 }, { "epoch": 1.4088478366553232, "grad_norm": 1.4101648918876537, "learning_rate": 4.412588865479112e-06, "loss": 0.4842926859855652, "step": 5796 }, { "epoch": 1.4090909090909092, "grad_norm": 1.968020544974963, "learning_rate": 4.4092558277379235e-06, "loss": 0.5275899171829224, "step": 5797 }, { "epoch": 1.409333981526495, "grad_norm": 1.9722382046760092, "learning_rate": 4.4059236932307e-06, "loss": 0.4823375940322876, "step": 5798 }, { "epoch": 1.4095770539620807, "grad_norm": 1.6691985281834192, "learning_rate": 4.402592462495768e-06, "loss": 0.7803664803504944, "step": 5799 }, { "epoch": 1.4098201263976664, "grad_norm": 1.6396559797295267, "learning_rate": 4.399262136071325e-06, "loss": 0.44395655393600464, "step": 5800 }, { "epoch": 1.4100631988332524, "grad_norm": 1.6380314266648557, "learning_rate": 4.395932714495411e-06, "loss": 0.5659920573234558, "step": 5801 }, { "epoch": 1.4103062712688381, "grad_norm": 1.5686156122615318, "learning_rate": 4.39260419830592e-06, "loss": 0.5248848795890808, "step": 5802 }, { "epoch": 1.4105493437044239, "grad_norm": 1.5195509486195422, "learning_rate": 4.389276588040605e-06, "loss": 0.5743666887283325, "step": 5803 }, { "epoch": 1.4107924161400098, "grad_norm": 1.3485460841170236, "learning_rate": 4.3859498842370726e-06, "loss": 0.4707481861114502, "step": 5804 }, { "epoch": 1.4110354885755956, "grad_norm": 1.4058880406091914, "learning_rate": 4.382624087432784e-06, "loss": 0.5559327602386475, "step": 5805 }, { "epoch": 1.4112785610111813, "grad_norm": 1.4591114159791148, "learning_rate": 4.379299198165045e-06, "loss": 0.6467941999435425, "step": 5806 }, { "epoch": 1.411521633446767, "grad_norm": 1.5136719639984342, "learning_rate": 4.375975216971026e-06, "loss": 0.698745846748352, "step": 5807 }, { "epoch": 1.4117647058823528, "grad_norm": 1.8074223507626912, "learning_rate": 4.372652144387739e-06, "loss": 0.6399469971656799, "step": 5808 }, { "epoch": 1.4120077783179388, "grad_norm": 1.5835053116592794, "learning_rate": 4.369329980952063e-06, "loss": 0.6579080820083618, "step": 5809 }, { "epoch": 1.4122508507535245, "grad_norm": 1.6226584934327988, "learning_rate": 4.366008727200715e-06, "loss": 0.5943614840507507, "step": 5810 }, { "epoch": 1.4124939231891105, "grad_norm": 1.6424393474770544, "learning_rate": 4.362688383670281e-06, "loss": 0.553381085395813, "step": 5811 }, { "epoch": 1.4127369956246962, "grad_norm": 1.4982218959292957, "learning_rate": 4.359368950897184e-06, "loss": 0.40938276052474976, "step": 5812 }, { "epoch": 1.412980068060282, "grad_norm": 1.950980006443775, "learning_rate": 4.356050429417711e-06, "loss": 0.5590344667434692, "step": 5813 }, { "epoch": 1.4132231404958677, "grad_norm": 1.339529589441458, "learning_rate": 4.352732819768003e-06, "loss": 0.49156856536865234, "step": 5814 }, { "epoch": 1.4134662129314535, "grad_norm": 1.7272767814422323, "learning_rate": 4.3494161224840404e-06, "loss": 0.5471696257591248, "step": 5815 }, { "epoch": 1.4137092853670394, "grad_norm": 2.096723204626928, "learning_rate": 4.346100338101673e-06, "loss": 0.6227936744689941, "step": 5816 }, { "epoch": 1.4139523578026252, "grad_norm": 1.9173203055033565, "learning_rate": 4.342785467156587e-06, "loss": 0.6234991550445557, "step": 5817 }, { "epoch": 1.414195430238211, "grad_norm": 1.4107001744127454, "learning_rate": 4.339471510184336e-06, "loss": 0.626317024230957, "step": 5818 }, { "epoch": 1.414438502673797, "grad_norm": 1.4412153765742333, "learning_rate": 4.336158467720314e-06, "loss": 0.46819859743118286, "step": 5819 }, { "epoch": 1.4146815751093826, "grad_norm": 1.5426687168561042, "learning_rate": 4.332846340299774e-06, "loss": 0.464332640171051, "step": 5820 }, { "epoch": 1.4149246475449684, "grad_norm": 2.1530716138165213, "learning_rate": 4.329535128457822e-06, "loss": 0.5396307706832886, "step": 5821 }, { "epoch": 1.4151677199805541, "grad_norm": 1.8532304923873433, "learning_rate": 4.3262248327294085e-06, "loss": 0.7301306128501892, "step": 5822 }, { "epoch": 1.4154107924161399, "grad_norm": 1.5414763805090845, "learning_rate": 4.322915453649347e-06, "loss": 0.4842376708984375, "step": 5823 }, { "epoch": 1.4156538648517258, "grad_norm": 1.4628277341718987, "learning_rate": 4.3196069917522895e-06, "loss": 0.6350372433662415, "step": 5824 }, { "epoch": 1.4158969372873116, "grad_norm": 1.707400512900268, "learning_rate": 4.316299447572753e-06, "loss": 0.5362594127655029, "step": 5825 }, { "epoch": 1.4161400097228976, "grad_norm": 1.814250574332531, "learning_rate": 4.312992821645097e-06, "loss": 0.5462625026702881, "step": 5826 }, { "epoch": 1.4163830821584833, "grad_norm": 1.5931133966970874, "learning_rate": 4.30968711450354e-06, "loss": 0.49921655654907227, "step": 5827 }, { "epoch": 1.416626154594069, "grad_norm": 1.8692888150195248, "learning_rate": 4.306382326682142e-06, "loss": 0.4615190923213959, "step": 5828 }, { "epoch": 1.4168692270296548, "grad_norm": 1.5533326417622384, "learning_rate": 4.303078458714824e-06, "loss": 0.6270062923431396, "step": 5829 }, { "epoch": 1.4171122994652405, "grad_norm": 1.8540916521765287, "learning_rate": 4.299775511135358e-06, "loss": 0.6012627482414246, "step": 5830 }, { "epoch": 1.4173553719008265, "grad_norm": 1.6752647413739117, "learning_rate": 4.296473484477358e-06, "loss": 0.3900397717952728, "step": 5831 }, { "epoch": 1.4175984443364122, "grad_norm": 1.572387242053971, "learning_rate": 4.293172379274303e-06, "loss": 0.5389912128448486, "step": 5832 }, { "epoch": 1.417841516771998, "grad_norm": 1.620984912490404, "learning_rate": 4.289872196059507e-06, "loss": 0.5727679133415222, "step": 5833 }, { "epoch": 1.418084589207584, "grad_norm": 1.6479949023279692, "learning_rate": 4.286572935366152e-06, "loss": 0.5535921454429626, "step": 5834 }, { "epoch": 1.4183276616431697, "grad_norm": 1.5661748815647303, "learning_rate": 4.283274597727255e-06, "loss": 0.5896140336990356, "step": 5835 }, { "epoch": 1.4185707340787554, "grad_norm": 1.606404862124366, "learning_rate": 4.2799771836756956e-06, "loss": 0.593326210975647, "step": 5836 }, { "epoch": 1.4188138065143412, "grad_norm": 1.5029006021221294, "learning_rate": 4.2766806937442025e-06, "loss": 0.6311753988265991, "step": 5837 }, { "epoch": 1.4190568789499272, "grad_norm": 1.4601754621816545, "learning_rate": 4.273385128465347e-06, "loss": 0.5076234936714172, "step": 5838 }, { "epoch": 1.419299951385513, "grad_norm": 1.942121886660383, "learning_rate": 4.270090488371562e-06, "loss": 0.5881384611129761, "step": 5839 }, { "epoch": 1.4195430238210986, "grad_norm": 1.583028931828058, "learning_rate": 4.26679677399512e-06, "loss": 0.6747634410858154, "step": 5840 }, { "epoch": 1.4197860962566846, "grad_norm": 1.6087809441935148, "learning_rate": 4.263503985868157e-06, "loss": 0.6732971668243408, "step": 5841 }, { "epoch": 1.4200291686922704, "grad_norm": 1.4578421360276117, "learning_rate": 4.260212124522644e-06, "loss": 0.539638876914978, "step": 5842 }, { "epoch": 1.420272241127856, "grad_norm": 1.6114573948266453, "learning_rate": 4.256921190490416e-06, "loss": 0.5682379603385925, "step": 5843 }, { "epoch": 1.4205153135634419, "grad_norm": 1.6638825797728338, "learning_rate": 4.253631184303148e-06, "loss": 0.7723763585090637, "step": 5844 }, { "epoch": 1.4207583859990276, "grad_norm": 1.643042879456665, "learning_rate": 4.250342106492371e-06, "loss": 0.6381250619888306, "step": 5845 }, { "epoch": 1.4210014584346136, "grad_norm": 1.4644620488239615, "learning_rate": 4.247053957589469e-06, "loss": 0.6663600206375122, "step": 5846 }, { "epoch": 1.4212445308701993, "grad_norm": 1.500171255574029, "learning_rate": 4.243766738125664e-06, "loss": 0.6813154816627502, "step": 5847 }, { "epoch": 1.421487603305785, "grad_norm": 1.6003795874843483, "learning_rate": 4.240480448632043e-06, "loss": 0.7869656085968018, "step": 5848 }, { "epoch": 1.421730675741371, "grad_norm": 1.5341074712155, "learning_rate": 4.237195089639526e-06, "loss": 0.5713797211647034, "step": 5849 }, { "epoch": 1.4219737481769568, "grad_norm": 1.4535278596817813, "learning_rate": 4.2339106616789e-06, "loss": 0.47910767793655396, "step": 5850 }, { "epoch": 1.4222168206125425, "grad_norm": 1.5733037455860301, "learning_rate": 4.230627165280787e-06, "loss": 0.5821409821510315, "step": 5851 }, { "epoch": 1.4224598930481283, "grad_norm": 1.4049105055279263, "learning_rate": 4.227344600975671e-06, "loss": 0.6403465270996094, "step": 5852 }, { "epoch": 1.4227029654837142, "grad_norm": 1.4077259127112076, "learning_rate": 4.224062969293873e-06, "loss": 0.6374199390411377, "step": 5853 }, { "epoch": 1.4229460379193, "grad_norm": 1.3755632519909593, "learning_rate": 4.220782270765571e-06, "loss": 0.615099310874939, "step": 5854 }, { "epoch": 1.4231891103548857, "grad_norm": 1.5634269623811359, "learning_rate": 4.217502505920797e-06, "loss": 0.4893762469291687, "step": 5855 }, { "epoch": 1.4234321827904717, "grad_norm": 1.3530902927706507, "learning_rate": 4.214223675289415e-06, "loss": 0.49922090768814087, "step": 5856 }, { "epoch": 1.4236752552260574, "grad_norm": 1.6037994521293075, "learning_rate": 4.21094577940116e-06, "loss": 0.5081870555877686, "step": 5857 }, { "epoch": 1.4239183276616432, "grad_norm": 1.5257452970590504, "learning_rate": 4.2076688187855965e-06, "loss": 0.5589135885238647, "step": 5858 }, { "epoch": 1.424161400097229, "grad_norm": 1.35891182689321, "learning_rate": 4.204392793972152e-06, "loss": 0.5766650438308716, "step": 5859 }, { "epoch": 1.4244044725328147, "grad_norm": 1.500633895465324, "learning_rate": 4.201117705490092e-06, "loss": 0.6023861169815063, "step": 5860 }, { "epoch": 1.4246475449684006, "grad_norm": 1.7963095906873952, "learning_rate": 4.197843553868538e-06, "loss": 0.5550529956817627, "step": 5861 }, { "epoch": 1.4248906174039864, "grad_norm": 1.7059246440264786, "learning_rate": 4.194570339636464e-06, "loss": 0.5390881299972534, "step": 5862 }, { "epoch": 1.4251336898395721, "grad_norm": 1.7287880768374597, "learning_rate": 4.191298063322675e-06, "loss": 0.49571704864501953, "step": 5863 }, { "epoch": 1.425376762275158, "grad_norm": 1.4791193558382485, "learning_rate": 4.188026725455848e-06, "loss": 0.5877512693405151, "step": 5864 }, { "epoch": 1.4256198347107438, "grad_norm": 1.621515267847552, "learning_rate": 4.184756326564487e-06, "loss": 0.6262039542198181, "step": 5865 }, { "epoch": 1.4258629071463296, "grad_norm": 1.7263058669206006, "learning_rate": 4.18148686717696e-06, "loss": 0.5768304467201233, "step": 5866 }, { "epoch": 1.4261059795819153, "grad_norm": 1.7790321829239137, "learning_rate": 4.178218347821471e-06, "loss": 0.5074553489685059, "step": 5867 }, { "epoch": 1.4263490520175013, "grad_norm": 1.5328714918876172, "learning_rate": 4.174950769026086e-06, "loss": 0.6901461482048035, "step": 5868 }, { "epoch": 1.426592124453087, "grad_norm": 1.3949606157022238, "learning_rate": 4.1716841313187e-06, "loss": 0.6654369831085205, "step": 5869 }, { "epoch": 1.4268351968886728, "grad_norm": 1.508450240497182, "learning_rate": 4.168418435227079e-06, "loss": 0.5528749227523804, "step": 5870 }, { "epoch": 1.4270782693242587, "grad_norm": 1.5295851421173379, "learning_rate": 4.165153681278822e-06, "loss": 0.5336122512817383, "step": 5871 }, { "epoch": 1.4273213417598445, "grad_norm": 1.9028584105198996, "learning_rate": 4.161889870001372e-06, "loss": 0.544122576713562, "step": 5872 }, { "epoch": 1.4275644141954302, "grad_norm": 1.7117648365394191, "learning_rate": 4.158627001922035e-06, "loss": 0.505651593208313, "step": 5873 }, { "epoch": 1.427807486631016, "grad_norm": 1.5585471470523686, "learning_rate": 4.155365077567948e-06, "loss": 0.5735711455345154, "step": 5874 }, { "epoch": 1.4280505590666017, "grad_norm": 1.8186351375169607, "learning_rate": 4.152104097466111e-06, "loss": 0.5483950972557068, "step": 5875 }, { "epoch": 1.4282936315021877, "grad_norm": 1.8484982936529255, "learning_rate": 4.148844062143358e-06, "loss": 0.6061592102050781, "step": 5876 }, { "epoch": 1.4285367039377734, "grad_norm": 1.7552430488003485, "learning_rate": 4.145584972126377e-06, "loss": 0.7646622657775879, "step": 5877 }, { "epoch": 1.4287797763733592, "grad_norm": 1.5195323934260572, "learning_rate": 4.142326827941708e-06, "loss": 0.6642887592315674, "step": 5878 }, { "epoch": 1.4290228488089451, "grad_norm": 1.581950966181706, "learning_rate": 4.139069630115731e-06, "loss": 0.552702784538269, "step": 5879 }, { "epoch": 1.429265921244531, "grad_norm": 1.7780526995450565, "learning_rate": 4.1358133791746715e-06, "loss": 0.6324317455291748, "step": 5880 }, { "epoch": 1.4295089936801166, "grad_norm": 1.7319552037517016, "learning_rate": 4.13255807564461e-06, "loss": 0.6046730279922485, "step": 5881 }, { "epoch": 1.4297520661157024, "grad_norm": 1.6038000649146522, "learning_rate": 4.129303720051469e-06, "loss": 0.6572452187538147, "step": 5882 }, { "epoch": 1.4299951385512883, "grad_norm": 1.6514424690910805, "learning_rate": 4.126050312921011e-06, "loss": 0.6096298098564148, "step": 5883 }, { "epoch": 1.430238210986874, "grad_norm": 1.5479562660141124, "learning_rate": 4.122797854778863e-06, "loss": 0.5488014221191406, "step": 5884 }, { "epoch": 1.4304812834224598, "grad_norm": 1.732087707352584, "learning_rate": 4.119546346150478e-06, "loss": 0.44228285551071167, "step": 5885 }, { "epoch": 1.4307243558580458, "grad_norm": 1.7973750581091215, "learning_rate": 4.116295787561173e-06, "loss": 0.6402746438980103, "step": 5886 }, { "epoch": 1.4309674282936315, "grad_norm": 1.722715349531868, "learning_rate": 4.113046179536102e-06, "loss": 0.6542291641235352, "step": 5887 }, { "epoch": 1.4312105007292173, "grad_norm": 1.7042360092683833, "learning_rate": 4.109797522600272e-06, "loss": 0.6198195219039917, "step": 5888 }, { "epoch": 1.431453573164803, "grad_norm": 1.49489845972242, "learning_rate": 4.106549817278525e-06, "loss": 0.4507770538330078, "step": 5889 }, { "epoch": 1.4316966456003888, "grad_norm": 1.5602174948968404, "learning_rate": 4.1033030640955625e-06, "loss": 0.6032634973526001, "step": 5890 }, { "epoch": 1.4319397180359748, "grad_norm": 1.6946651105839512, "learning_rate": 4.100057263575925e-06, "loss": 0.4765598773956299, "step": 5891 }, { "epoch": 1.4321827904715605, "grad_norm": 1.6164525316650433, "learning_rate": 4.096812416243993e-06, "loss": 0.5071738958358765, "step": 5892 }, { "epoch": 1.4324258629071465, "grad_norm": 1.5702861589240633, "learning_rate": 4.093568522624012e-06, "loss": 0.4626063406467438, "step": 5893 }, { "epoch": 1.4326689353427322, "grad_norm": 1.6749366315536556, "learning_rate": 4.090325583240047e-06, "loss": 0.6231892108917236, "step": 5894 }, { "epoch": 1.432912007778318, "grad_norm": 1.6493870165846254, "learning_rate": 4.087083598616039e-06, "loss": 0.6343954801559448, "step": 5895 }, { "epoch": 1.4331550802139037, "grad_norm": 1.6005908755671956, "learning_rate": 4.083842569275748e-06, "loss": 0.5313001275062561, "step": 5896 }, { "epoch": 1.4333981526494894, "grad_norm": 1.695887007773999, "learning_rate": 4.080602495742798e-06, "loss": 0.5713887810707092, "step": 5897 }, { "epoch": 1.4336412250850754, "grad_norm": 1.8542524018789146, "learning_rate": 4.077363378540645e-06, "loss": 0.576015830039978, "step": 5898 }, { "epoch": 1.4338842975206612, "grad_norm": 1.573748364629675, "learning_rate": 4.074125218192604e-06, "loss": 0.5996739864349365, "step": 5899 }, { "epoch": 1.434127369956247, "grad_norm": 1.5721524911016356, "learning_rate": 4.070888015221824e-06, "loss": 0.434653639793396, "step": 5900 }, { "epoch": 1.4343704423918329, "grad_norm": 1.7699779728051865, "learning_rate": 4.0676517701513015e-06, "loss": 0.5287355780601501, "step": 5901 }, { "epoch": 1.4346135148274186, "grad_norm": 1.6092679814240207, "learning_rate": 4.064416483503882e-06, "loss": 0.5057737231254578, "step": 5902 }, { "epoch": 1.4348565872630044, "grad_norm": 1.5375924868931548, "learning_rate": 4.061182155802255e-06, "loss": 0.6345394253730774, "step": 5903 }, { "epoch": 1.43509965969859, "grad_norm": 1.3939974976926608, "learning_rate": 4.05794878756896e-06, "loss": 0.40154746174812317, "step": 5904 }, { "epoch": 1.4353427321341758, "grad_norm": 1.709175235883134, "learning_rate": 4.0547163793263674e-06, "loss": 0.6484968066215515, "step": 5905 }, { "epoch": 1.4355858045697618, "grad_norm": 1.7502267161593519, "learning_rate": 4.05148493159671e-06, "loss": 0.5726499557495117, "step": 5906 }, { "epoch": 1.4358288770053476, "grad_norm": 1.7138759927903235, "learning_rate": 4.048254444902047e-06, "loss": 0.6281880140304565, "step": 5907 }, { "epoch": 1.4360719494409335, "grad_norm": 1.3869690186450785, "learning_rate": 4.0450249197642995e-06, "loss": 0.49727723002433777, "step": 5908 }, { "epoch": 1.4363150218765193, "grad_norm": 1.3397833124307157, "learning_rate": 4.04179635670522e-06, "loss": 0.5325289964675903, "step": 5909 }, { "epoch": 1.436558094312105, "grad_norm": 1.4978402242435427, "learning_rate": 4.038568756246414e-06, "loss": 0.5642017126083374, "step": 5910 }, { "epoch": 1.4368011667476908, "grad_norm": 1.8308502074811048, "learning_rate": 4.035342118909334e-06, "loss": 0.5542508363723755, "step": 5911 }, { "epoch": 1.4370442391832765, "grad_norm": 1.6964435320930227, "learning_rate": 4.032116445215261e-06, "loss": 0.5527987480163574, "step": 5912 }, { "epoch": 1.4372873116188625, "grad_norm": 1.6034370711425274, "learning_rate": 4.028891735685341e-06, "loss": 0.44822120666503906, "step": 5913 }, { "epoch": 1.4375303840544482, "grad_norm": 1.6400826062426135, "learning_rate": 4.025667990840547e-06, "loss": 0.48319166898727417, "step": 5914 }, { "epoch": 1.437773456490034, "grad_norm": 1.5394652962100546, "learning_rate": 4.0224452112017095e-06, "loss": 0.5400841236114502, "step": 5915 }, { "epoch": 1.43801652892562, "grad_norm": 1.5326278764763912, "learning_rate": 4.01922339728949e-06, "loss": 0.49273067712783813, "step": 5916 }, { "epoch": 1.4382596013612057, "grad_norm": 1.7160995583561622, "learning_rate": 4.016002549624408e-06, "loss": 0.6612290740013123, "step": 5917 }, { "epoch": 1.4385026737967914, "grad_norm": 1.752593215000814, "learning_rate": 4.012782668726814e-06, "loss": 0.49042826890945435, "step": 5918 }, { "epoch": 1.4387457462323772, "grad_norm": 1.4921783087525307, "learning_rate": 4.00956375511691e-06, "loss": 0.5207022428512573, "step": 5919 }, { "epoch": 1.4389888186679631, "grad_norm": 1.7498414534867, "learning_rate": 4.006345809314744e-06, "loss": 0.6758131980895996, "step": 5920 }, { "epoch": 1.4392318911035489, "grad_norm": 1.6630167778121998, "learning_rate": 4.003128831840195e-06, "loss": 0.4488527774810791, "step": 5921 }, { "epoch": 1.4394749635391346, "grad_norm": 1.5838759544697736, "learning_rate": 3.9999128232130034e-06, "loss": 0.5399035811424255, "step": 5922 }, { "epoch": 1.4397180359747206, "grad_norm": 1.9018959504482207, "learning_rate": 3.996697783952736e-06, "loss": 0.476990669965744, "step": 5923 }, { "epoch": 1.4399611084103063, "grad_norm": 1.6607935269366834, "learning_rate": 3.993483714578817e-06, "loss": 0.7660830020904541, "step": 5924 }, { "epoch": 1.440204180845892, "grad_norm": 1.7545050099701474, "learning_rate": 3.990270615610502e-06, "loss": 0.5645588636398315, "step": 5925 }, { "epoch": 1.4404472532814778, "grad_norm": 1.4934844393376416, "learning_rate": 3.9870584875669016e-06, "loss": 0.5269109010696411, "step": 5926 }, { "epoch": 1.4406903257170636, "grad_norm": 1.563070146735638, "learning_rate": 3.983847330966955e-06, "loss": 0.47041070461273193, "step": 5927 }, { "epoch": 1.4409333981526495, "grad_norm": 1.8080815847773575, "learning_rate": 3.980637146329459e-06, "loss": 0.635831892490387, "step": 5928 }, { "epoch": 1.4411764705882353, "grad_norm": 1.6547752366547583, "learning_rate": 3.977427934173048e-06, "loss": 0.7043871879577637, "step": 5929 }, { "epoch": 1.441419543023821, "grad_norm": 1.5585407267422995, "learning_rate": 3.9742196950161935e-06, "loss": 0.479947030544281, "step": 5930 }, { "epoch": 1.441662615459407, "grad_norm": 1.7225987662494986, "learning_rate": 3.971012429377221e-06, "loss": 0.7414509057998657, "step": 5931 }, { "epoch": 1.4419056878949927, "grad_norm": 1.5172245419945105, "learning_rate": 3.9678061377742864e-06, "loss": 0.6497827768325806, "step": 5932 }, { "epoch": 1.4421487603305785, "grad_norm": 1.5980789539455034, "learning_rate": 3.964600820725399e-06, "loss": 0.6753836870193481, "step": 5933 }, { "epoch": 1.4423918327661642, "grad_norm": 1.5967923366101349, "learning_rate": 3.961396478748402e-06, "loss": 0.5245517492294312, "step": 5934 }, { "epoch": 1.4426349052017502, "grad_norm": 1.555061119925157, "learning_rate": 3.958193112360986e-06, "loss": 0.5366497039794922, "step": 5935 }, { "epoch": 1.442877977637336, "grad_norm": 1.569824015661055, "learning_rate": 3.954990722080689e-06, "loss": 0.7375968098640442, "step": 5936 }, { "epoch": 1.4431210500729217, "grad_norm": 1.438902206304419, "learning_rate": 3.951789308424876e-06, "loss": 0.5223530530929565, "step": 5937 }, { "epoch": 1.4433641225085077, "grad_norm": 1.690465954136578, "learning_rate": 3.948588871910771e-06, "loss": 0.6211827993392944, "step": 5938 }, { "epoch": 1.4436071949440934, "grad_norm": 1.552532983187485, "learning_rate": 3.945389413055428e-06, "loss": 0.5697109699249268, "step": 5939 }, { "epoch": 1.4438502673796791, "grad_norm": 1.9243997604305394, "learning_rate": 3.942190932375752e-06, "loss": 0.5406290292739868, "step": 5940 }, { "epoch": 1.4440933398152649, "grad_norm": 1.3031595432155654, "learning_rate": 3.938993430388481e-06, "loss": 0.4574546813964844, "step": 5941 }, { "epoch": 1.4443364122508506, "grad_norm": 1.4968914650056808, "learning_rate": 3.935796907610203e-06, "loss": 0.577876091003418, "step": 5942 }, { "epoch": 1.4445794846864366, "grad_norm": 1.7067573889473997, "learning_rate": 3.932601364557342e-06, "loss": 0.5709360837936401, "step": 5943 }, { "epoch": 1.4448225571220223, "grad_norm": 1.6095134364971158, "learning_rate": 3.929406801746166e-06, "loss": 0.5892128348350525, "step": 5944 }, { "epoch": 1.445065629557608, "grad_norm": 1.7055675274885154, "learning_rate": 3.92621321969279e-06, "loss": 0.5953332185745239, "step": 5945 }, { "epoch": 1.445308701993194, "grad_norm": 1.8014693814233245, "learning_rate": 3.9230206189131595e-06, "loss": 0.5535039901733398, "step": 5946 }, { "epoch": 1.4455517744287798, "grad_norm": 1.370558532673409, "learning_rate": 3.919828999923071e-06, "loss": 0.4868595600128174, "step": 5947 }, { "epoch": 1.4457948468643655, "grad_norm": 1.741462186665139, "learning_rate": 3.916638363238155e-06, "loss": 0.5252515077590942, "step": 5948 }, { "epoch": 1.4460379192999513, "grad_norm": 1.5829818308378087, "learning_rate": 3.9134487093738906e-06, "loss": 0.4976832866668701, "step": 5949 }, { "epoch": 1.4462809917355373, "grad_norm": 2.3948389375184393, "learning_rate": 3.91026003884559e-06, "loss": 0.6288565397262573, "step": 5950 }, { "epoch": 1.446524064171123, "grad_norm": 1.9140656435365782, "learning_rate": 3.907072352168414e-06, "loss": 0.6291898488998413, "step": 5951 }, { "epoch": 1.4467671366067087, "grad_norm": 1.7669711175257565, "learning_rate": 3.903885649857365e-06, "loss": 0.5936671495437622, "step": 5952 }, { "epoch": 1.4470102090422947, "grad_norm": 1.4662652043210584, "learning_rate": 3.9006999324272756e-06, "loss": 0.5855964422225952, "step": 5953 }, { "epoch": 1.4472532814778805, "grad_norm": 1.5565592921090738, "learning_rate": 3.897515200392833e-06, "loss": 0.7011584043502808, "step": 5954 }, { "epoch": 1.4474963539134662, "grad_norm": 1.5132326216295584, "learning_rate": 3.894331454268552e-06, "loss": 0.5750668048858643, "step": 5955 }, { "epoch": 1.447739426349052, "grad_norm": 1.7633980661022692, "learning_rate": 3.891148694568802e-06, "loss": 0.7001341581344604, "step": 5956 }, { "epoch": 1.4479824987846377, "grad_norm": 1.579503275000081, "learning_rate": 3.8879669218077785e-06, "loss": 0.6015012264251709, "step": 5957 }, { "epoch": 1.4482255712202237, "grad_norm": 1.7605435756240333, "learning_rate": 3.884786136499533e-06, "loss": 0.6885270476341248, "step": 5958 }, { "epoch": 1.4484686436558094, "grad_norm": 1.6516187395263524, "learning_rate": 3.881606339157938e-06, "loss": 0.646409809589386, "step": 5959 }, { "epoch": 1.4487117160913954, "grad_norm": 1.6454630687349387, "learning_rate": 3.878427530296732e-06, "loss": 0.5803457498550415, "step": 5960 }, { "epoch": 1.4489547885269811, "grad_norm": 1.4301302769162243, "learning_rate": 3.875249710429472e-06, "loss": 0.4038790464401245, "step": 5961 }, { "epoch": 1.4491978609625669, "grad_norm": 1.9008473348498753, "learning_rate": 3.8720728800695605e-06, "loss": 0.634314775466919, "step": 5962 }, { "epoch": 1.4494409333981526, "grad_norm": 1.7433752715133786, "learning_rate": 3.868897039730249e-06, "loss": 0.4982941746711731, "step": 5963 }, { "epoch": 1.4496840058337384, "grad_norm": 1.6885227871801682, "learning_rate": 3.8657221899246136e-06, "loss": 0.5220828056335449, "step": 5964 }, { "epoch": 1.4499270782693243, "grad_norm": 1.9301516746847822, "learning_rate": 3.862548331165589e-06, "loss": 0.5481916666030884, "step": 5965 }, { "epoch": 1.45017015070491, "grad_norm": 1.9115283605076023, "learning_rate": 3.859375463965931e-06, "loss": 0.7583798170089722, "step": 5966 }, { "epoch": 1.4504132231404958, "grad_norm": 1.5905129536997438, "learning_rate": 3.856203588838252e-06, "loss": 0.49637728929519653, "step": 5967 }, { "epoch": 1.4506562955760818, "grad_norm": 1.520282591586628, "learning_rate": 3.853032706294987e-06, "loss": 0.4733145236968994, "step": 5968 }, { "epoch": 1.4508993680116675, "grad_norm": 1.7162497463824198, "learning_rate": 3.849862816848432e-06, "loss": 0.5830914974212646, "step": 5969 }, { "epoch": 1.4511424404472533, "grad_norm": 1.6119746396673391, "learning_rate": 3.846693921010706e-06, "loss": 0.48398613929748535, "step": 5970 }, { "epoch": 1.451385512882839, "grad_norm": 1.8453368330095825, "learning_rate": 3.843526019293765e-06, "loss": 0.7362502217292786, "step": 5971 }, { "epoch": 1.4516285853184248, "grad_norm": 1.5869322092375187, "learning_rate": 3.840359112209422e-06, "loss": 0.654278039932251, "step": 5972 }, { "epoch": 1.4518716577540107, "grad_norm": 1.7190345773480677, "learning_rate": 3.837193200269309e-06, "loss": 0.5066512823104858, "step": 5973 }, { "epoch": 1.4521147301895965, "grad_norm": 1.6020502572153577, "learning_rate": 3.834028283984917e-06, "loss": 0.5283300876617432, "step": 5974 }, { "epoch": 1.4523578026251824, "grad_norm": 1.9876641376351045, "learning_rate": 3.830864363867556e-06, "loss": 0.6783531904220581, "step": 5975 }, { "epoch": 1.4526008750607682, "grad_norm": 1.5021478337271754, "learning_rate": 3.827701440428389e-06, "loss": 0.7813847064971924, "step": 5976 }, { "epoch": 1.452843947496354, "grad_norm": 1.6207011938949083, "learning_rate": 3.824539514178416e-06, "loss": 0.47184211015701294, "step": 5977 }, { "epoch": 1.4530870199319397, "grad_norm": 1.6460146914713227, "learning_rate": 3.8213785856284765e-06, "loss": 0.4915250241756439, "step": 5978 }, { "epoch": 1.4533300923675254, "grad_norm": 1.6747241523988476, "learning_rate": 3.8182186552892395e-06, "loss": 0.6083568334579468, "step": 5979 }, { "epoch": 1.4535731648031114, "grad_norm": 1.6576860055203404, "learning_rate": 3.815059723671227e-06, "loss": 0.5642822980880737, "step": 5980 }, { "epoch": 1.4538162372386971, "grad_norm": 1.6521250417327058, "learning_rate": 3.811901791284788e-06, "loss": 0.4935462474822998, "step": 5981 }, { "epoch": 1.4540593096742829, "grad_norm": 1.4775064842982957, "learning_rate": 3.808744858640111e-06, "loss": 0.5039271116256714, "step": 5982 }, { "epoch": 1.4543023821098688, "grad_norm": 1.7325051931182738, "learning_rate": 3.8055889262472323e-06, "loss": 0.6336740255355835, "step": 5983 }, { "epoch": 1.4545454545454546, "grad_norm": 1.542985355734984, "learning_rate": 3.802433994616013e-06, "loss": 0.531844973564148, "step": 5984 }, { "epoch": 1.4547885269810403, "grad_norm": 1.760010986317142, "learning_rate": 3.79928006425617e-06, "loss": 0.690719723701477, "step": 5985 }, { "epoch": 1.455031599416626, "grad_norm": 1.6367504127420502, "learning_rate": 3.796127135677241e-06, "loss": 0.6318866610527039, "step": 5986 }, { "epoch": 1.4552746718522118, "grad_norm": 1.8444956363703378, "learning_rate": 3.7929752093886163e-06, "loss": 0.6020856499671936, "step": 5987 }, { "epoch": 1.4555177442877978, "grad_norm": 1.7166004699805169, "learning_rate": 3.789824285899509e-06, "loss": 0.6278111934661865, "step": 5988 }, { "epoch": 1.4557608167233835, "grad_norm": 1.4897661075115818, "learning_rate": 3.7866743657189863e-06, "loss": 0.736060619354248, "step": 5989 }, { "epoch": 1.4560038891589695, "grad_norm": 1.5850805994397987, "learning_rate": 3.7835254493559416e-06, "loss": 0.47902345657348633, "step": 5990 }, { "epoch": 1.4562469615945552, "grad_norm": 1.8035644388312875, "learning_rate": 3.780377537319108e-06, "loss": 0.7280597686767578, "step": 5991 }, { "epoch": 1.456490034030141, "grad_norm": 1.7937918291110577, "learning_rate": 3.777230630117059e-06, "loss": 0.49044734239578247, "step": 5992 }, { "epoch": 1.4567331064657267, "grad_norm": 1.6422579171601945, "learning_rate": 3.7740847282582083e-06, "loss": 0.6719037294387817, "step": 5993 }, { "epoch": 1.4569761789013125, "grad_norm": 1.3362577957264286, "learning_rate": 3.7709398322508063e-06, "loss": 0.49916979670524597, "step": 5994 }, { "epoch": 1.4572192513368984, "grad_norm": 1.9300609492950578, "learning_rate": 3.767795942602931e-06, "loss": 0.669909656047821, "step": 5995 }, { "epoch": 1.4574623237724842, "grad_norm": 1.9142258068566824, "learning_rate": 3.764653059822513e-06, "loss": 0.5746203660964966, "step": 5996 }, { "epoch": 1.45770539620807, "grad_norm": 1.6881850046599987, "learning_rate": 3.761511184417306e-06, "loss": 0.7085678577423096, "step": 5997 }, { "epoch": 1.457948468643656, "grad_norm": 1.6246110191177074, "learning_rate": 3.758370316894914e-06, "loss": 0.43206316232681274, "step": 5998 }, { "epoch": 1.4581915410792416, "grad_norm": 1.8378640225772789, "learning_rate": 3.7552304577627683e-06, "loss": 0.427664577960968, "step": 5999 }, { "epoch": 1.4584346135148274, "grad_norm": 1.5650340351673817, "learning_rate": 3.7520916075281354e-06, "loss": 0.47006481885910034, "step": 6000 }, { "epoch": 1.4586776859504131, "grad_norm": 1.80184042915263, "learning_rate": 3.748953766698136e-06, "loss": 0.7065495252609253, "step": 6001 }, { "epoch": 1.458920758385999, "grad_norm": 1.5905478891553542, "learning_rate": 3.745816935779707e-06, "loss": 0.5851840972900391, "step": 6002 }, { "epoch": 1.4591638308215849, "grad_norm": 1.5449109611772118, "learning_rate": 3.7426811152796373e-06, "loss": 0.6860973834991455, "step": 6003 }, { "epoch": 1.4594069032571706, "grad_norm": 1.559011218178358, "learning_rate": 3.739546305704539e-06, "loss": 0.6005682945251465, "step": 6004 }, { "epoch": 1.4596499756927566, "grad_norm": 1.6177692235770698, "learning_rate": 3.736412507560876e-06, "loss": 0.6938517093658447, "step": 6005 }, { "epoch": 1.4598930481283423, "grad_norm": 1.5661676484589964, "learning_rate": 3.7332797213549334e-06, "loss": 0.6371433734893799, "step": 6006 }, { "epoch": 1.460136120563928, "grad_norm": 1.6154600228935772, "learning_rate": 3.730147947592848e-06, "loss": 0.4661463797092438, "step": 6007 }, { "epoch": 1.4603791929995138, "grad_norm": 1.5975854787913844, "learning_rate": 3.7270171867805815e-06, "loss": 0.6597098112106323, "step": 6008 }, { "epoch": 1.4606222654350995, "grad_norm": 1.7664607233071252, "learning_rate": 3.723887439423929e-06, "loss": 0.6280114650726318, "step": 6009 }, { "epoch": 1.4608653378706855, "grad_norm": 1.7794494036133737, "learning_rate": 3.720758706028542e-06, "loss": 0.5707333087921143, "step": 6010 }, { "epoch": 1.4611084103062713, "grad_norm": 1.771879776567448, "learning_rate": 3.717630987099886e-06, "loss": 0.5557271242141724, "step": 6011 }, { "epoch": 1.461351482741857, "grad_norm": 1.8269947332219492, "learning_rate": 3.7145042831432775e-06, "loss": 0.5526280403137207, "step": 6012 }, { "epoch": 1.461594555177443, "grad_norm": 1.4974541882616603, "learning_rate": 3.711378594663857e-06, "loss": 0.6766061782836914, "step": 6013 }, { "epoch": 1.4618376276130287, "grad_norm": 1.6903528567019885, "learning_rate": 3.7082539221666123e-06, "loss": 0.6890079975128174, "step": 6014 }, { "epoch": 1.4620807000486145, "grad_norm": 1.5112995422995497, "learning_rate": 3.705130266156356e-06, "loss": 0.6478886008262634, "step": 6015 }, { "epoch": 1.4623237724842002, "grad_norm": 1.5448745633542234, "learning_rate": 3.702007627137748e-06, "loss": 0.5886574983596802, "step": 6016 }, { "epoch": 1.4625668449197862, "grad_norm": 1.5658614794227148, "learning_rate": 3.6988860056152744e-06, "loss": 0.7085119485855103, "step": 6017 }, { "epoch": 1.462809917355372, "grad_norm": 1.792499046035096, "learning_rate": 3.69576540209326e-06, "loss": 0.4665243625640869, "step": 6018 }, { "epoch": 1.4630529897909577, "grad_norm": 1.7060635056967817, "learning_rate": 3.692645817075872e-06, "loss": 0.6317046880722046, "step": 6019 }, { "epoch": 1.4632960622265436, "grad_norm": 1.5199407815585644, "learning_rate": 3.6895272510670976e-06, "loss": 0.6327306032180786, "step": 6020 }, { "epoch": 1.4635391346621294, "grad_norm": 1.603274995499827, "learning_rate": 3.6864097045707783e-06, "loss": 0.6128853559494019, "step": 6021 }, { "epoch": 1.4637822070977151, "grad_norm": 1.7307217727687405, "learning_rate": 3.6832931780905724e-06, "loss": 0.46318191289901733, "step": 6022 }, { "epoch": 1.4640252795333009, "grad_norm": 1.6888384233044522, "learning_rate": 3.6801776721299897e-06, "loss": 0.5959578156471252, "step": 6023 }, { "epoch": 1.4642683519688866, "grad_norm": 1.7064978363256764, "learning_rate": 3.677063187192361e-06, "loss": 0.6990222930908203, "step": 6024 }, { "epoch": 1.4645114244044726, "grad_norm": 1.7468216730517843, "learning_rate": 3.673949723780862e-06, "loss": 0.5831222534179688, "step": 6025 }, { "epoch": 1.4647544968400583, "grad_norm": 1.5093440252239905, "learning_rate": 3.6708372823985027e-06, "loss": 0.561195969581604, "step": 6026 }, { "epoch": 1.464997569275644, "grad_norm": 1.504876199094551, "learning_rate": 3.6677258635481196e-06, "loss": 0.5154291391372681, "step": 6027 }, { "epoch": 1.46524064171123, "grad_norm": 1.840350031303012, "learning_rate": 3.6646154677323963e-06, "loss": 0.5801303386688232, "step": 6028 }, { "epoch": 1.4654837141468158, "grad_norm": 1.5538875130492802, "learning_rate": 3.6615060954538383e-06, "loss": 0.6272231936454773, "step": 6029 }, { "epoch": 1.4657267865824015, "grad_norm": 1.6889638443602502, "learning_rate": 3.658397747214798e-06, "loss": 0.535873293876648, "step": 6030 }, { "epoch": 1.4659698590179873, "grad_norm": 1.542243598803172, "learning_rate": 3.6552904235174504e-06, "loss": 0.5238786339759827, "step": 6031 }, { "epoch": 1.4662129314535732, "grad_norm": 1.6489544506629459, "learning_rate": 3.652184124863817e-06, "loss": 0.6395345330238342, "step": 6032 }, { "epoch": 1.466456003889159, "grad_norm": 1.457033490653509, "learning_rate": 3.649078851755742e-06, "loss": 0.5732386112213135, "step": 6033 }, { "epoch": 1.4666990763247447, "grad_norm": 1.4158625290530649, "learning_rate": 3.645974604694912e-06, "loss": 0.5231861472129822, "step": 6034 }, { "epoch": 1.4669421487603307, "grad_norm": 1.642141302693059, "learning_rate": 3.642871384182849e-06, "loss": 0.6427352428436279, "step": 6035 }, { "epoch": 1.4671852211959164, "grad_norm": 1.7365293096290075, "learning_rate": 3.639769190720899e-06, "loss": 0.6152218580245972, "step": 6036 }, { "epoch": 1.4674282936315022, "grad_norm": 1.5324592608375982, "learning_rate": 3.636668024810256e-06, "loss": 0.4974938929080963, "step": 6037 }, { "epoch": 1.467671366067088, "grad_norm": 1.5871706259994114, "learning_rate": 3.633567886951933e-06, "loss": 0.5708749294281006, "step": 6038 }, { "epoch": 1.4679144385026737, "grad_norm": 1.405869804753898, "learning_rate": 3.630468777646793e-06, "loss": 0.6524933576583862, "step": 6039 }, { "epoch": 1.4681575109382596, "grad_norm": 1.7141459108591466, "learning_rate": 3.627370697395515e-06, "loss": 0.7037602663040161, "step": 6040 }, { "epoch": 1.4684005833738454, "grad_norm": 1.881632155371549, "learning_rate": 3.6242736466986307e-06, "loss": 0.7614222764968872, "step": 6041 }, { "epoch": 1.4686436558094313, "grad_norm": 1.705116033742337, "learning_rate": 3.6211776260564866e-06, "loss": 0.6970421075820923, "step": 6042 }, { "epoch": 1.468886728245017, "grad_norm": 1.5547771627306581, "learning_rate": 3.6180826359692767e-06, "loss": 0.5062484741210938, "step": 6043 }, { "epoch": 1.4691298006806028, "grad_norm": 1.6970726389164035, "learning_rate": 3.6149886769370278e-06, "loss": 0.735614538192749, "step": 6044 }, { "epoch": 1.4693728731161886, "grad_norm": 1.456770643280326, "learning_rate": 3.6118957494595885e-06, "loss": 0.5551865100860596, "step": 6045 }, { "epoch": 1.4696159455517743, "grad_norm": 1.5782048438791056, "learning_rate": 3.6088038540366556e-06, "loss": 0.7417107820510864, "step": 6046 }, { "epoch": 1.4698590179873603, "grad_norm": 1.4236614730893207, "learning_rate": 3.605712991167746e-06, "loss": 0.6016433238983154, "step": 6047 }, { "epoch": 1.470102090422946, "grad_norm": 1.7309746712684153, "learning_rate": 3.602623161352221e-06, "loss": 0.4559459388256073, "step": 6048 }, { "epoch": 1.4703451628585318, "grad_norm": 1.4187676656178994, "learning_rate": 3.5995343650892646e-06, "loss": 0.47498857975006104, "step": 6049 }, { "epoch": 1.4705882352941178, "grad_norm": 1.449046232384716, "learning_rate": 3.596446602877902e-06, "loss": 0.588019847869873, "step": 6050 }, { "epoch": 1.4708313077297035, "grad_norm": 1.6978173486758388, "learning_rate": 3.593359875216991e-06, "loss": 0.5894460082054138, "step": 6051 }, { "epoch": 1.4710743801652892, "grad_norm": 1.4054765618126335, "learning_rate": 3.5902741826052134e-06, "loss": 0.6863467693328857, "step": 6052 }, { "epoch": 1.471317452600875, "grad_norm": 1.7132814918792285, "learning_rate": 3.587189525541097e-06, "loss": 0.465196430683136, "step": 6053 }, { "epoch": 1.4715605250364607, "grad_norm": 1.7392743440631806, "learning_rate": 3.5841059045229886e-06, "loss": 0.6402699947357178, "step": 6054 }, { "epoch": 1.4718035974720467, "grad_norm": 1.7225388806157735, "learning_rate": 3.581023320049081e-06, "loss": 0.6842021942138672, "step": 6055 }, { "epoch": 1.4720466699076324, "grad_norm": 1.5247926710570658, "learning_rate": 3.577941772617386e-06, "loss": 0.48919743299484253, "step": 6056 }, { "epoch": 1.4722897423432184, "grad_norm": 1.5943901352701946, "learning_rate": 3.5748612627257604e-06, "loss": 0.6313661336898804, "step": 6057 }, { "epoch": 1.4725328147788042, "grad_norm": 1.6979989801943924, "learning_rate": 3.57178179087188e-06, "loss": 0.6875218749046326, "step": 6058 }, { "epoch": 1.47277588721439, "grad_norm": 1.6884111008785816, "learning_rate": 3.5687033575532714e-06, "loss": 0.5253579616546631, "step": 6059 }, { "epoch": 1.4730189596499756, "grad_norm": 1.5721249259656411, "learning_rate": 3.5656259632672775e-06, "loss": 0.4632294476032257, "step": 6060 }, { "epoch": 1.4732620320855614, "grad_norm": 1.3406850747468477, "learning_rate": 3.5625496085110757e-06, "loss": 0.5572640895843506, "step": 6061 }, { "epoch": 1.4735051045211474, "grad_norm": 1.4410070323986204, "learning_rate": 3.559474293781683e-06, "loss": 0.6440991163253784, "step": 6062 }, { "epoch": 1.473748176956733, "grad_norm": 1.4966922905075162, "learning_rate": 3.5564000195759385e-06, "loss": 0.6449762582778931, "step": 6063 }, { "epoch": 1.4739912493923188, "grad_norm": 1.6181847337580744, "learning_rate": 3.553326786390524e-06, "loss": 0.5391324758529663, "step": 6064 }, { "epoch": 1.4742343218279048, "grad_norm": 1.8186493698613513, "learning_rate": 3.5502545947219414e-06, "loss": 0.49028852581977844, "step": 6065 }, { "epoch": 1.4744773942634906, "grad_norm": 1.4629615942343255, "learning_rate": 3.5471834450665333e-06, "loss": 0.4874032735824585, "step": 6066 }, { "epoch": 1.4747204666990763, "grad_norm": 1.633183155755545, "learning_rate": 3.5441133379204708e-06, "loss": 0.5397500991821289, "step": 6067 }, { "epoch": 1.474963539134662, "grad_norm": 1.6350823334693596, "learning_rate": 3.54104427377976e-06, "loss": 0.5002152919769287, "step": 6068 }, { "epoch": 1.475206611570248, "grad_norm": 1.5919358822889411, "learning_rate": 3.537976253140232e-06, "loss": 0.48329684138298035, "step": 6069 }, { "epoch": 1.4754496840058338, "grad_norm": 1.6207321907495202, "learning_rate": 3.53490927649755e-06, "loss": 0.5565363168716431, "step": 6070 }, { "epoch": 1.4756927564414195, "grad_norm": 1.4273326538225417, "learning_rate": 3.5318433443472165e-06, "loss": 0.578497052192688, "step": 6071 }, { "epoch": 1.4759358288770055, "grad_norm": 1.7520416185792698, "learning_rate": 3.528778457184554e-06, "loss": 0.5200670957565308, "step": 6072 }, { "epoch": 1.4761789013125912, "grad_norm": 1.8089633192519392, "learning_rate": 3.5257146155047293e-06, "loss": 0.604102611541748, "step": 6073 }, { "epoch": 1.476421973748177, "grad_norm": 1.8855528497225347, "learning_rate": 3.5226518198027217e-06, "loss": 0.640713095664978, "step": 6074 }, { "epoch": 1.4766650461837627, "grad_norm": 1.6620533991979858, "learning_rate": 3.5195900705733667e-06, "loss": 0.6661645174026489, "step": 6075 }, { "epoch": 1.4769081186193485, "grad_norm": 1.5264910588280431, "learning_rate": 3.5165293683113066e-06, "loss": 0.527970552444458, "step": 6076 }, { "epoch": 1.4771511910549344, "grad_norm": 1.6112265404076345, "learning_rate": 3.513469713511032e-06, "loss": 0.6926937699317932, "step": 6077 }, { "epoch": 1.4773942634905202, "grad_norm": 1.860747686891746, "learning_rate": 3.5104111066668535e-06, "loss": 0.5533348321914673, "step": 6078 }, { "epoch": 1.477637335926106, "grad_norm": 1.4785320453395083, "learning_rate": 3.5073535482729116e-06, "loss": 0.5899041891098022, "step": 6079 }, { "epoch": 1.4778804083616919, "grad_norm": 1.565530211617039, "learning_rate": 3.50429703882319e-06, "loss": 0.4920912981033325, "step": 6080 }, { "epoch": 1.4781234807972776, "grad_norm": 1.8328880585003164, "learning_rate": 3.5012415788114884e-06, "loss": 0.5044493675231934, "step": 6081 }, { "epoch": 1.4783665532328634, "grad_norm": 1.5950845468978594, "learning_rate": 3.4981871687314484e-06, "loss": 0.5390356779098511, "step": 6082 }, { "epoch": 1.4786096256684491, "grad_norm": 1.866770821157686, "learning_rate": 3.4951338090765272e-06, "loss": 0.683129608631134, "step": 6083 }, { "epoch": 1.478852698104035, "grad_norm": 1.7679622358297453, "learning_rate": 3.4920815003400357e-06, "loss": 0.5747657418251038, "step": 6084 }, { "epoch": 1.4790957705396208, "grad_norm": 1.5917528221605093, "learning_rate": 3.4890302430150913e-06, "loss": 0.5005226731300354, "step": 6085 }, { "epoch": 1.4793388429752066, "grad_norm": 1.7917253198171108, "learning_rate": 3.4859800375946574e-06, "loss": 0.4873892068862915, "step": 6086 }, { "epoch": 1.4795819154107925, "grad_norm": 1.5853981860367117, "learning_rate": 3.482930884571516e-06, "loss": 0.5470404624938965, "step": 6087 }, { "epoch": 1.4798249878463783, "grad_norm": 1.7696473251779283, "learning_rate": 3.479882784438291e-06, "loss": 0.6524006128311157, "step": 6088 }, { "epoch": 1.480068060281964, "grad_norm": 1.920614146680613, "learning_rate": 3.4768357376874264e-06, "loss": 0.5122734308242798, "step": 6089 }, { "epoch": 1.4803111327175498, "grad_norm": 1.5767903253202458, "learning_rate": 3.4737897448111947e-06, "loss": 0.6506860852241516, "step": 6090 }, { "epoch": 1.4805542051531355, "grad_norm": 1.5426974951151704, "learning_rate": 3.470744806301708e-06, "loss": 0.46383458375930786, "step": 6091 }, { "epoch": 1.4807972775887215, "grad_norm": 1.6063489528274006, "learning_rate": 3.467700922650903e-06, "loss": 0.6723281145095825, "step": 6092 }, { "epoch": 1.4810403500243072, "grad_norm": 1.7641647182530642, "learning_rate": 3.4646580943505483e-06, "loss": 0.46522533893585205, "step": 6093 }, { "epoch": 1.481283422459893, "grad_norm": 1.5390371485657486, "learning_rate": 3.4616163218922326e-06, "loss": 0.7595668435096741, "step": 6094 }, { "epoch": 1.481526494895479, "grad_norm": 1.530761976583392, "learning_rate": 3.458575605767389e-06, "loss": 0.567911684513092, "step": 6095 }, { "epoch": 1.4817695673310647, "grad_norm": 1.3619788969308506, "learning_rate": 3.4555359464672657e-06, "loss": 0.43183860182762146, "step": 6096 }, { "epoch": 1.4820126397666504, "grad_norm": 1.6180536036217723, "learning_rate": 3.4524973444829503e-06, "loss": 0.5018681287765503, "step": 6097 }, { "epoch": 1.4822557122022362, "grad_norm": 1.4814615346651507, "learning_rate": 3.4494598003053546e-06, "loss": 0.5178461670875549, "step": 6098 }, { "epoch": 1.4824987846378221, "grad_norm": 1.4096753897429013, "learning_rate": 3.4464233144252133e-06, "loss": 0.45916372537612915, "step": 6099 }, { "epoch": 1.4827418570734079, "grad_norm": 1.646658331692377, "learning_rate": 3.4433878873331115e-06, "loss": 0.590222954750061, "step": 6100 }, { "epoch": 1.4829849295089936, "grad_norm": 1.449306152810644, "learning_rate": 3.4403535195194393e-06, "loss": 0.6310548782348633, "step": 6101 }, { "epoch": 1.4832280019445796, "grad_norm": 1.4298892990083079, "learning_rate": 3.437320211474431e-06, "loss": 0.576591968536377, "step": 6102 }, { "epoch": 1.4834710743801653, "grad_norm": 1.614364606487678, "learning_rate": 3.4342879636881376e-06, "loss": 0.7148545980453491, "step": 6103 }, { "epoch": 1.483714146815751, "grad_norm": 1.686482132436627, "learning_rate": 3.4312567766504535e-06, "loss": 0.6624016761779785, "step": 6104 }, { "epoch": 1.4839572192513368, "grad_norm": 1.5338464560611327, "learning_rate": 3.428226650851085e-06, "loss": 0.54718017578125, "step": 6105 }, { "epoch": 1.4842002916869226, "grad_norm": 1.6151544578095602, "learning_rate": 3.425197586779584e-06, "loss": 0.521480917930603, "step": 6106 }, { "epoch": 1.4844433641225085, "grad_norm": 1.5794430996921955, "learning_rate": 3.4221695849253155e-06, "loss": 0.6632183194160461, "step": 6107 }, { "epoch": 1.4846864365580943, "grad_norm": 1.3597006783689272, "learning_rate": 3.4191426457774822e-06, "loss": 0.5217209458351135, "step": 6108 }, { "epoch": 1.48492950899368, "grad_norm": 1.4972085964262956, "learning_rate": 3.4161167698251176e-06, "loss": 0.5001394748687744, "step": 6109 }, { "epoch": 1.485172581429266, "grad_norm": 1.5776936895727764, "learning_rate": 3.413091957557071e-06, "loss": 0.544420599937439, "step": 6110 }, { "epoch": 1.4854156538648517, "grad_norm": 1.8184035005378807, "learning_rate": 3.410068209462034e-06, "loss": 0.546515703201294, "step": 6111 }, { "epoch": 1.4856587263004375, "grad_norm": 1.5104989931940844, "learning_rate": 3.407045526028514e-06, "loss": 0.475619375705719, "step": 6112 }, { "epoch": 1.4859017987360232, "grad_norm": 1.4300508122474949, "learning_rate": 3.404023907744858e-06, "loss": 0.5223138928413391, "step": 6113 }, { "epoch": 1.4861448711716092, "grad_norm": 1.7434928378856163, "learning_rate": 3.401003355099227e-06, "loss": 0.47980648279190063, "step": 6114 }, { "epoch": 1.486387943607195, "grad_norm": 1.7607050402351436, "learning_rate": 3.3979838685796253e-06, "loss": 0.6217248439788818, "step": 6115 }, { "epoch": 1.4866310160427807, "grad_norm": 1.6525576412261282, "learning_rate": 3.394965448673876e-06, "loss": 0.7671569585800171, "step": 6116 }, { "epoch": 1.4868740884783667, "grad_norm": 1.6932655113880866, "learning_rate": 3.391948095869628e-06, "loss": 0.6154975891113281, "step": 6117 }, { "epoch": 1.4871171609139524, "grad_norm": 1.581955516960839, "learning_rate": 3.388931810654368e-06, "loss": 0.589623212814331, "step": 6118 }, { "epoch": 1.4873602333495382, "grad_norm": 1.218597863379371, "learning_rate": 3.385916593515395e-06, "loss": 0.39150768518447876, "step": 6119 }, { "epoch": 1.487603305785124, "grad_norm": 1.4747507792453574, "learning_rate": 3.3829024449398506e-06, "loss": 0.6230287551879883, "step": 6120 }, { "epoch": 1.4878463782207096, "grad_norm": 1.721765547948976, "learning_rate": 3.3798893654146915e-06, "loss": 0.5649079084396362, "step": 6121 }, { "epoch": 1.4880894506562956, "grad_norm": 1.7065482609398446, "learning_rate": 3.376877355426713e-06, "loss": 0.49950289726257324, "step": 6122 }, { "epoch": 1.4883325230918814, "grad_norm": 1.7642886117264938, "learning_rate": 3.373866415462527e-06, "loss": 0.5844733715057373, "step": 6123 }, { "epoch": 1.4885755955274673, "grad_norm": 1.644953028143122, "learning_rate": 3.3708565460085787e-06, "loss": 0.6171788573265076, "step": 6124 }, { "epoch": 1.488818667963053, "grad_norm": 1.7412013083690079, "learning_rate": 3.367847747551143e-06, "loss": 0.5515847206115723, "step": 6125 }, { "epoch": 1.4890617403986388, "grad_norm": 1.6190003013772916, "learning_rate": 3.36484002057631e-06, "loss": 0.6689252853393555, "step": 6126 }, { "epoch": 1.4893048128342246, "grad_norm": 1.3891582486348408, "learning_rate": 3.361833365570014e-06, "loss": 0.6174126863479614, "step": 6127 }, { "epoch": 1.4895478852698103, "grad_norm": 1.6598306165172023, "learning_rate": 3.3588277830179974e-06, "loss": 0.6565874814987183, "step": 6128 }, { "epoch": 1.4897909577053963, "grad_norm": 1.6183206202258151, "learning_rate": 3.355823273405846e-06, "loss": 0.5404124855995178, "step": 6129 }, { "epoch": 1.490034030140982, "grad_norm": 1.5150355437985776, "learning_rate": 3.352819837218958e-06, "loss": 0.5640057325363159, "step": 6130 }, { "epoch": 1.4902771025765678, "grad_norm": 1.5714323801120145, "learning_rate": 3.3498174749425717e-06, "loss": 0.6018704175949097, "step": 6131 }, { "epoch": 1.4905201750121537, "grad_norm": 1.648567069260723, "learning_rate": 3.346816187061739e-06, "loss": 0.5120040774345398, "step": 6132 }, { "epoch": 1.4907632474477395, "grad_norm": 1.7231622678336005, "learning_rate": 3.3438159740613474e-06, "loss": 0.5672017335891724, "step": 6133 }, { "epoch": 1.4910063198833252, "grad_norm": 1.8686020288690421, "learning_rate": 3.3408168364261104e-06, "loss": 0.5935721397399902, "step": 6134 }, { "epoch": 1.491249392318911, "grad_norm": 1.5846281423062543, "learning_rate": 3.33781877464056e-06, "loss": 0.5700930953025818, "step": 6135 }, { "epoch": 1.4914924647544967, "grad_norm": 1.9445325331307153, "learning_rate": 3.3348217891890645e-06, "loss": 0.4816528856754303, "step": 6136 }, { "epoch": 1.4917355371900827, "grad_norm": 1.736323746094503, "learning_rate": 3.331825880555807e-06, "loss": 0.5424349308013916, "step": 6137 }, { "epoch": 1.4919786096256684, "grad_norm": 1.648125351636026, "learning_rate": 3.32883104922481e-06, "loss": 0.4711175858974457, "step": 6138 }, { "epoch": 1.4922216820612544, "grad_norm": 1.557590204638322, "learning_rate": 3.3258372956799077e-06, "loss": 0.5153903365135193, "step": 6139 }, { "epoch": 1.4924647544968401, "grad_norm": 1.7167921694400634, "learning_rate": 3.3228446204047715e-06, "loss": 0.6070417761802673, "step": 6140 }, { "epoch": 1.4927078269324259, "grad_norm": 1.5406928707438916, "learning_rate": 3.3198530238828973e-06, "loss": 0.5548431873321533, "step": 6141 }, { "epoch": 1.4929508993680116, "grad_norm": 1.5731761256239867, "learning_rate": 3.316862506597598e-06, "loss": 0.6738398671150208, "step": 6142 }, { "epoch": 1.4931939718035974, "grad_norm": 1.5406040787269821, "learning_rate": 3.313873069032023e-06, "loss": 0.5756639242172241, "step": 6143 }, { "epoch": 1.4934370442391833, "grad_norm": 1.4999453901584725, "learning_rate": 3.3108847116691366e-06, "loss": 0.5368316173553467, "step": 6144 }, { "epoch": 1.493680116674769, "grad_norm": 1.5672536621323954, "learning_rate": 3.3078974349917413e-06, "loss": 0.6425111293792725, "step": 6145 }, { "epoch": 1.4939231891103548, "grad_norm": 1.58923275386933, "learning_rate": 3.30491123948245e-06, "loss": 0.4143001437187195, "step": 6146 }, { "epoch": 1.4941662615459408, "grad_norm": 1.895966024714462, "learning_rate": 3.301926125623717e-06, "loss": 0.4918839931488037, "step": 6147 }, { "epoch": 1.4944093339815265, "grad_norm": 1.563056346679132, "learning_rate": 3.2989420938978066e-06, "loss": 0.6243748068809509, "step": 6148 }, { "epoch": 1.4946524064171123, "grad_norm": 1.3410625720633622, "learning_rate": 3.2959591447868177e-06, "loss": 0.512616753578186, "step": 6149 }, { "epoch": 1.494895478852698, "grad_norm": 1.5176786523528483, "learning_rate": 3.292977278772678e-06, "loss": 0.5660443305969238, "step": 6150 }, { "epoch": 1.495138551288284, "grad_norm": 1.7265822641263842, "learning_rate": 3.289996496337123e-06, "loss": 0.5267668962478638, "step": 6151 }, { "epoch": 1.4953816237238697, "grad_norm": 1.54910378649384, "learning_rate": 3.2870167979617363e-06, "loss": 0.4811341166496277, "step": 6152 }, { "epoch": 1.4956246961594555, "grad_norm": 1.9737547387747207, "learning_rate": 3.2840381841279044e-06, "loss": 0.5815338492393494, "step": 6153 }, { "epoch": 1.4958677685950414, "grad_norm": 1.5512655072858674, "learning_rate": 3.2810606553168555e-06, "loss": 0.5285027027130127, "step": 6154 }, { "epoch": 1.4961108410306272, "grad_norm": 1.4403436542882873, "learning_rate": 3.2780842120096303e-06, "loss": 0.6546814441680908, "step": 6155 }, { "epoch": 1.496353913466213, "grad_norm": 1.6770201904636086, "learning_rate": 3.275108854687101e-06, "loss": 0.5796707272529602, "step": 6156 }, { "epoch": 1.4965969859017987, "grad_norm": 1.7630772029042274, "learning_rate": 3.272134583829966e-06, "loss": 0.5315073728561401, "step": 6157 }, { "epoch": 1.4968400583373844, "grad_norm": 1.5208196949285513, "learning_rate": 3.2691613999187434e-06, "loss": 0.46512529253959656, "step": 6158 }, { "epoch": 1.4970831307729704, "grad_norm": 1.81638991690402, "learning_rate": 3.266189303433778e-06, "loss": 0.5311630964279175, "step": 6159 }, { "epoch": 1.4973262032085561, "grad_norm": 1.6289402595318114, "learning_rate": 3.2632182948552327e-06, "loss": 0.4938935339450836, "step": 6160 }, { "epoch": 1.4975692756441419, "grad_norm": 1.4324421292794556, "learning_rate": 3.2602483746631087e-06, "loss": 0.6439932584762573, "step": 6161 }, { "epoch": 1.4978123480797279, "grad_norm": 1.5945319419703565, "learning_rate": 3.257279543337214e-06, "loss": 0.5845648050308228, "step": 6162 }, { "epoch": 1.4980554205153136, "grad_norm": 1.8098938810526912, "learning_rate": 3.254311801357196e-06, "loss": 0.44599753618240356, "step": 6163 }, { "epoch": 1.4982984929508993, "grad_norm": 1.4893348448988473, "learning_rate": 3.251345149202516e-06, "loss": 0.4843748211860657, "step": 6164 }, { "epoch": 1.498541565386485, "grad_norm": 1.4627820614019194, "learning_rate": 3.2483795873524625e-06, "loss": 0.542782187461853, "step": 6165 }, { "epoch": 1.498784637822071, "grad_norm": 1.7114416693166825, "learning_rate": 3.2454151162861503e-06, "loss": 0.5281667113304138, "step": 6166 }, { "epoch": 1.4990277102576568, "grad_norm": 1.609253193463236, "learning_rate": 3.242451736482518e-06, "loss": 0.7326400279998779, "step": 6167 }, { "epoch": 1.4992707826932425, "grad_norm": 1.5693236284042802, "learning_rate": 3.239489448420323e-06, "loss": 0.5919598340988159, "step": 6168 }, { "epoch": 1.4995138551288285, "grad_norm": 1.444965898203384, "learning_rate": 3.236528252578146e-06, "loss": 0.45607948303222656, "step": 6169 }, { "epoch": 1.4997569275644143, "grad_norm": 1.547191139763152, "learning_rate": 3.2335681494343994e-06, "loss": 0.5435362458229065, "step": 6170 }, { "epoch": 1.5, "grad_norm": 1.9055178650457436, "learning_rate": 3.2306091394673088e-06, "loss": 0.5199649333953857, "step": 6171 }, { "epoch": 1.5002430724355857, "grad_norm": 1.9451508026148074, "learning_rate": 3.2276512231549353e-06, "loss": 0.7384029626846313, "step": 6172 }, { "epoch": 1.5004861448711715, "grad_norm": 1.5606383089734053, "learning_rate": 3.224694400975145e-06, "loss": 0.5531542301177979, "step": 6173 }, { "epoch": 1.5007292173067575, "grad_norm": 1.740669821380043, "learning_rate": 3.221738673405652e-06, "loss": 0.5732556581497192, "step": 6174 }, { "epoch": 1.5009722897423432, "grad_norm": 1.5439844152871949, "learning_rate": 3.218784040923971e-06, "loss": 0.4996248483657837, "step": 6175 }, { "epoch": 1.5012153621779292, "grad_norm": 1.5616504638857474, "learning_rate": 3.2158305040074555e-06, "loss": 0.44825005531311035, "step": 6176 }, { "epoch": 1.501458434613515, "grad_norm": 1.8140753097744677, "learning_rate": 3.2128780631332712e-06, "loss": 0.7424123287200928, "step": 6177 }, { "epoch": 1.5017015070491007, "grad_norm": 1.8668818852404077, "learning_rate": 3.209926718778409e-06, "loss": 0.5905960202217102, "step": 6178 }, { "epoch": 1.5019445794846864, "grad_norm": 1.58684889287259, "learning_rate": 3.2069764714196894e-06, "loss": 0.47980114817619324, "step": 6179 }, { "epoch": 1.5021876519202721, "grad_norm": 1.6646195582027925, "learning_rate": 3.2040273215337447e-06, "loss": 0.6779584884643555, "step": 6180 }, { "epoch": 1.502430724355858, "grad_norm": 1.540615743428457, "learning_rate": 3.2010792695970407e-06, "loss": 0.5922008752822876, "step": 6181 }, { "epoch": 1.5026737967914439, "grad_norm": 1.875695189425896, "learning_rate": 3.19813231608586e-06, "loss": 0.5590148568153381, "step": 6182 }, { "epoch": 1.5029168692270296, "grad_norm": 1.8320286971821251, "learning_rate": 3.1951864614763127e-06, "loss": 0.5369859933853149, "step": 6183 }, { "epoch": 1.5031599416626156, "grad_norm": 1.6402498499598517, "learning_rate": 3.19224170624432e-06, "loss": 0.560536801815033, "step": 6184 }, { "epoch": 1.5034030140982013, "grad_norm": 1.5435623115575032, "learning_rate": 3.1892980508656414e-06, "loss": 0.4591201841831207, "step": 6185 }, { "epoch": 1.503646086533787, "grad_norm": 1.9004644462822555, "learning_rate": 3.186355495815845e-06, "loss": 0.5516560077667236, "step": 6186 }, { "epoch": 1.5038891589693728, "grad_norm": 1.5655950556688591, "learning_rate": 3.183414041570324e-06, "loss": 0.5857201218605042, "step": 6187 }, { "epoch": 1.5041322314049586, "grad_norm": 1.821890010984531, "learning_rate": 3.180473688604303e-06, "loss": 0.6055315732955933, "step": 6188 }, { "epoch": 1.5043753038405445, "grad_norm": 1.6425204718647062, "learning_rate": 3.177534437392813e-06, "loss": 0.731730580329895, "step": 6189 }, { "epoch": 1.5046183762761303, "grad_norm": 1.7940940202123015, "learning_rate": 3.1745962884107285e-06, "loss": 0.5595930218696594, "step": 6190 }, { "epoch": 1.5048614487117162, "grad_norm": 1.8378196787224, "learning_rate": 3.171659242132722e-06, "loss": 0.5745985507965088, "step": 6191 }, { "epoch": 1.505104521147302, "grad_norm": 1.668151736387817, "learning_rate": 3.168723299033307e-06, "loss": 0.46940910816192627, "step": 6192 }, { "epoch": 1.5053475935828877, "grad_norm": 1.5788697255234383, "learning_rate": 3.1657884595868037e-06, "loss": 0.5476244688034058, "step": 6193 }, { "epoch": 1.5055906660184735, "grad_norm": 1.669330124167921, "learning_rate": 3.16285472426737e-06, "loss": 0.5847263336181641, "step": 6194 }, { "epoch": 1.5058337384540592, "grad_norm": 1.7875384750903054, "learning_rate": 3.159922093548967e-06, "loss": 0.6399456262588501, "step": 6195 }, { "epoch": 1.506076810889645, "grad_norm": 1.5147187376682874, "learning_rate": 3.156990567905396e-06, "loss": 0.5685648918151855, "step": 6196 }, { "epoch": 1.506319883325231, "grad_norm": 1.6491846937252106, "learning_rate": 3.154060147810266e-06, "loss": 0.6018037796020508, "step": 6197 }, { "epoch": 1.5065629557608167, "grad_norm": 2.061809686421917, "learning_rate": 3.1511308337370073e-06, "loss": 0.660995364189148, "step": 6198 }, { "epoch": 1.5068060281964026, "grad_norm": 1.7615762721275952, "learning_rate": 3.148202626158887e-06, "loss": 0.584109902381897, "step": 6199 }, { "epoch": 1.5070491006319884, "grad_norm": 1.781228162286586, "learning_rate": 3.1452755255489754e-06, "loss": 0.5502222776412964, "step": 6200 }, { "epoch": 1.5072921730675741, "grad_norm": 1.5750903117113062, "learning_rate": 3.142349532380177e-06, "loss": 0.5526819229125977, "step": 6201 }, { "epoch": 1.5075352455031599, "grad_norm": 1.6519005301962746, "learning_rate": 3.1394246471252043e-06, "loss": 0.5872712731361389, "step": 6202 }, { "epoch": 1.5077783179387456, "grad_norm": 1.6601781285060595, "learning_rate": 3.136500870256607e-06, "loss": 0.6969867944717407, "step": 6203 }, { "epoch": 1.5080213903743316, "grad_norm": 1.8144349693546087, "learning_rate": 3.1335782022467376e-06, "loss": 0.5378497242927551, "step": 6204 }, { "epoch": 1.5082644628099173, "grad_norm": 1.5609800507092315, "learning_rate": 3.1306566435677877e-06, "loss": 0.5807783603668213, "step": 6205 }, { "epoch": 1.5085075352455033, "grad_norm": 1.5547103946362695, "learning_rate": 3.127736194691754e-06, "loss": 0.5365554690361023, "step": 6206 }, { "epoch": 1.508750607681089, "grad_norm": 1.752560819840873, "learning_rate": 3.1248168560904626e-06, "loss": 0.46362102031707764, "step": 6207 }, { "epoch": 1.5089936801166748, "grad_norm": 1.4562851907995749, "learning_rate": 3.1218986282355635e-06, "loss": 0.585692822933197, "step": 6208 }, { "epoch": 1.5092367525522605, "grad_norm": 1.5939790037421422, "learning_rate": 3.1189815115985136e-06, "loss": 0.5083990693092346, "step": 6209 }, { "epoch": 1.5094798249878463, "grad_norm": 1.942444448845612, "learning_rate": 3.1160655066506074e-06, "loss": 0.5584964752197266, "step": 6210 }, { "epoch": 1.509722897423432, "grad_norm": 1.5778081040629934, "learning_rate": 3.113150613862943e-06, "loss": 0.4855536222457886, "step": 6211 }, { "epoch": 1.509965969859018, "grad_norm": 1.6784910404937998, "learning_rate": 3.110236833706454e-06, "loss": 0.5641512870788574, "step": 6212 }, { "epoch": 1.510209042294604, "grad_norm": 1.8000560525255265, "learning_rate": 3.1073241666518817e-06, "loss": 0.6727893352508545, "step": 6213 }, { "epoch": 1.5104521147301897, "grad_norm": 1.3332631326972149, "learning_rate": 3.1044126131697947e-06, "loss": 0.4129648804664612, "step": 6214 }, { "epoch": 1.5106951871657754, "grad_norm": 1.7542843907895893, "learning_rate": 3.1015021737305837e-06, "loss": 0.5074397325515747, "step": 6215 }, { "epoch": 1.5109382596013612, "grad_norm": 1.879922207593419, "learning_rate": 3.0985928488044515e-06, "loss": 0.5928588509559631, "step": 6216 }, { "epoch": 1.511181332036947, "grad_norm": 1.6224282819603633, "learning_rate": 3.0956846388614294e-06, "loss": 0.5077654719352722, "step": 6217 }, { "epoch": 1.5114244044725327, "grad_norm": 1.6288606489295483, "learning_rate": 3.092777544371358e-06, "loss": 0.6404478549957275, "step": 6218 }, { "epoch": 1.5116674769081186, "grad_norm": 1.8136791808742678, "learning_rate": 3.0898715658039113e-06, "loss": 0.5502639412879944, "step": 6219 }, { "epoch": 1.5119105493437044, "grad_norm": 1.7466369852836763, "learning_rate": 3.0869667036285678e-06, "loss": 0.5950352549552917, "step": 6220 }, { "epoch": 1.5121536217792904, "grad_norm": 1.6630608500068824, "learning_rate": 3.084062958314642e-06, "loss": 0.5003497004508972, "step": 6221 }, { "epoch": 1.512396694214876, "grad_norm": 1.6182065983563874, "learning_rate": 3.081160330331253e-06, "loss": 0.6124330759048462, "step": 6222 }, { "epoch": 1.5126397666504618, "grad_norm": 1.340607073868128, "learning_rate": 3.0782588201473472e-06, "loss": 0.39436668157577515, "step": 6223 }, { "epoch": 1.5128828390860476, "grad_norm": 1.65296234001472, "learning_rate": 3.0753584282316938e-06, "loss": 0.508926510810852, "step": 6224 }, { "epoch": 1.5131259115216333, "grad_norm": 2.030878081667963, "learning_rate": 3.0724591550528705e-06, "loss": 0.5855730772018433, "step": 6225 }, { "epoch": 1.5133689839572193, "grad_norm": 1.9216502471351824, "learning_rate": 3.0695610010792854e-06, "loss": 0.5600732564926147, "step": 6226 }, { "epoch": 1.513612056392805, "grad_norm": 1.5714142574949979, "learning_rate": 3.0666639667791553e-06, "loss": 0.5123807787895203, "step": 6227 }, { "epoch": 1.513855128828391, "grad_norm": 1.5674257013580817, "learning_rate": 3.0637680526205282e-06, "loss": 0.48952487111091614, "step": 6228 }, { "epoch": 1.5140982012639768, "grad_norm": 1.7327335931708987, "learning_rate": 3.0608732590712574e-06, "loss": 0.6051017642021179, "step": 6229 }, { "epoch": 1.5143412736995625, "grad_norm": 1.5061339994186618, "learning_rate": 3.057979586599027e-06, "loss": 0.5487445592880249, "step": 6230 }, { "epoch": 1.5145843461351483, "grad_norm": 1.5612136794782534, "learning_rate": 3.055087035671337e-06, "loss": 0.4434965252876282, "step": 6231 }, { "epoch": 1.514827418570734, "grad_norm": 1.653722195972789, "learning_rate": 3.052195606755498e-06, "loss": 0.48344871401786804, "step": 6232 }, { "epoch": 1.5150704910063197, "grad_norm": 1.596769770250402, "learning_rate": 3.0493053003186524e-06, "loss": 0.553318202495575, "step": 6233 }, { "epoch": 1.5153135634419057, "grad_norm": 1.9864046220048552, "learning_rate": 3.0464161168277496e-06, "loss": 0.6797187328338623, "step": 6234 }, { "epoch": 1.5155566358774915, "grad_norm": 1.797513445413984, "learning_rate": 3.0435280567495673e-06, "loss": 0.39102980494499207, "step": 6235 }, { "epoch": 1.5157997083130774, "grad_norm": 1.7400821113805878, "learning_rate": 3.0406411205506924e-06, "loss": 0.5270326733589172, "step": 6236 }, { "epoch": 1.5160427807486632, "grad_norm": 1.4865805261871863, "learning_rate": 3.0377553086975397e-06, "loss": 0.5809503793716431, "step": 6237 }, { "epoch": 1.516285853184249, "grad_norm": 1.8249422426990494, "learning_rate": 3.0348706216563307e-06, "loss": 0.4396858215332031, "step": 6238 }, { "epoch": 1.5165289256198347, "grad_norm": 1.793628631704933, "learning_rate": 3.031987059893118e-06, "loss": 0.7467952966690063, "step": 6239 }, { "epoch": 1.5167719980554204, "grad_norm": 1.6084364122050823, "learning_rate": 3.0291046238737664e-06, "loss": 0.6015782952308655, "step": 6240 }, { "epoch": 1.5170150704910064, "grad_norm": 1.5664663695419994, "learning_rate": 3.026223314063954e-06, "loss": 0.7046892642974854, "step": 6241 }, { "epoch": 1.5172581429265921, "grad_norm": 1.8160974290541534, "learning_rate": 3.0233431309291883e-06, "loss": 0.7352972626686096, "step": 6242 }, { "epoch": 1.517501215362178, "grad_norm": 1.503518687801411, "learning_rate": 3.0204640749347814e-06, "loss": 0.5471131205558777, "step": 6243 }, { "epoch": 1.5177442877977638, "grad_norm": 1.6668544785566204, "learning_rate": 3.0175861465458755e-06, "loss": 0.5820969343185425, "step": 6244 }, { "epoch": 1.5179873602333496, "grad_norm": 1.6325330701391232, "learning_rate": 3.014709346227421e-06, "loss": 0.6443871259689331, "step": 6245 }, { "epoch": 1.5182304326689353, "grad_norm": 1.5733688262904442, "learning_rate": 3.011833674444197e-06, "loss": 0.43270134925842285, "step": 6246 }, { "epoch": 1.518473505104521, "grad_norm": 1.7666641095377007, "learning_rate": 3.008959131660785e-06, "loss": 0.5899980068206787, "step": 6247 }, { "epoch": 1.5187165775401068, "grad_norm": 1.5851168239269744, "learning_rate": 3.006085718341596e-06, "loss": 0.5503591299057007, "step": 6248 }, { "epoch": 1.5189596499756928, "grad_norm": 1.577792700762672, "learning_rate": 3.0032134349508603e-06, "loss": 0.4702845513820648, "step": 6249 }, { "epoch": 1.5192027224112785, "grad_norm": 1.4880714386785845, "learning_rate": 3.000342281952613e-06, "loss": 0.5835298895835876, "step": 6250 }, { "epoch": 1.5194457948468645, "grad_norm": 1.795195693911627, "learning_rate": 2.9974722598107197e-06, "loss": 0.5898001194000244, "step": 6251 }, { "epoch": 1.5196888672824502, "grad_norm": 1.6855675618280166, "learning_rate": 2.994603368988853e-06, "loss": 0.653517484664917, "step": 6252 }, { "epoch": 1.519931939718036, "grad_norm": 1.5736324377931117, "learning_rate": 2.9917356099505137e-06, "loss": 0.5123723745346069, "step": 6253 }, { "epoch": 1.5201750121536217, "grad_norm": 1.8354723404057005, "learning_rate": 2.988868983159007e-06, "loss": 0.4486982822418213, "step": 6254 }, { "epoch": 1.5204180845892075, "grad_norm": 1.655621450546648, "learning_rate": 2.9860034890774634e-06, "loss": 0.6372030973434448, "step": 6255 }, { "epoch": 1.5206611570247934, "grad_norm": 1.5485010393542875, "learning_rate": 2.9831391281688348e-06, "loss": 0.5090519189834595, "step": 6256 }, { "epoch": 1.5209042294603792, "grad_norm": 1.6887084815606697, "learning_rate": 2.980275900895875e-06, "loss": 0.4251630902290344, "step": 6257 }, { "epoch": 1.5211473018959651, "grad_norm": 1.735083505521331, "learning_rate": 2.97741380772117e-06, "loss": 0.5926200151443481, "step": 6258 }, { "epoch": 1.5213903743315509, "grad_norm": 1.3556272531655227, "learning_rate": 2.974552849107112e-06, "loss": 0.47338974475860596, "step": 6259 }, { "epoch": 1.5216334467671366, "grad_norm": 1.3144686504175311, "learning_rate": 2.971693025515917e-06, "loss": 0.4762915372848511, "step": 6260 }, { "epoch": 1.5218765192027224, "grad_norm": 1.5388454453493512, "learning_rate": 2.968834337409612e-06, "loss": 0.48438796401023865, "step": 6261 }, { "epoch": 1.5221195916383081, "grad_norm": 1.6909770256833665, "learning_rate": 2.9659767852500475e-06, "loss": 0.5389005541801453, "step": 6262 }, { "epoch": 1.5223626640738939, "grad_norm": 1.4997469784171618, "learning_rate": 2.9631203694988776e-06, "loss": 0.6412667036056519, "step": 6263 }, { "epoch": 1.5226057365094798, "grad_norm": 1.7667161235764741, "learning_rate": 2.9602650906175934e-06, "loss": 0.6092220544815063, "step": 6264 }, { "epoch": 1.5228488089450656, "grad_norm": 2.0523557641557795, "learning_rate": 2.9574109490674806e-06, "loss": 0.560950517654419, "step": 6265 }, { "epoch": 1.5230918813806515, "grad_norm": 1.6316242457078727, "learning_rate": 2.9545579453096586e-06, "loss": 0.4195300042629242, "step": 6266 }, { "epoch": 1.5233349538162373, "grad_norm": 1.646965259641154, "learning_rate": 2.95170607980505e-06, "loss": 0.6635493040084839, "step": 6267 }, { "epoch": 1.523578026251823, "grad_norm": 1.7400710772362178, "learning_rate": 2.9488553530143983e-06, "loss": 0.7825971245765686, "step": 6268 }, { "epoch": 1.5238210986874088, "grad_norm": 1.627177976705471, "learning_rate": 2.9460057653982676e-06, "loss": 0.550312340259552, "step": 6269 }, { "epoch": 1.5240641711229945, "grad_norm": 1.60856438916096, "learning_rate": 2.943157317417028e-06, "loss": 0.6811631917953491, "step": 6270 }, { "epoch": 1.5243072435585805, "grad_norm": 1.7642105737874219, "learning_rate": 2.9403100095308756e-06, "loss": 0.6356663703918457, "step": 6271 }, { "epoch": 1.5245503159941662, "grad_norm": 1.5742794871167711, "learning_rate": 2.937463842199817e-06, "loss": 0.5918157696723938, "step": 6272 }, { "epoch": 1.5247933884297522, "grad_norm": 1.5652403960054495, "learning_rate": 2.93461881588368e-06, "loss": 0.5137485265731812, "step": 6273 }, { "epoch": 1.525036460865338, "grad_norm": 1.8412635847945906, "learning_rate": 2.9317749310420953e-06, "loss": 0.4915546774864197, "step": 6274 }, { "epoch": 1.5252795333009237, "grad_norm": 1.5902713685728485, "learning_rate": 2.9289321881345257e-06, "loss": 0.680790364742279, "step": 6275 }, { "epoch": 1.5255226057365094, "grad_norm": 1.5558816948889889, "learning_rate": 2.9260905876202374e-06, "loss": 0.5523684024810791, "step": 6276 }, { "epoch": 1.5257656781720952, "grad_norm": 1.8803022569812675, "learning_rate": 2.9232501299583126e-06, "loss": 0.6290777921676636, "step": 6277 }, { "epoch": 1.526008750607681, "grad_norm": 1.4506187145468754, "learning_rate": 2.9204108156076593e-06, "loss": 0.6882456541061401, "step": 6278 }, { "epoch": 1.526251823043267, "grad_norm": 1.6064227178037391, "learning_rate": 2.917572645026987e-06, "loss": 0.656761646270752, "step": 6279 }, { "epoch": 1.5264948954788529, "grad_norm": 1.590540262306975, "learning_rate": 2.91473561867483e-06, "loss": 0.6494688987731934, "step": 6280 }, { "epoch": 1.5267379679144386, "grad_norm": 1.8086099684426016, "learning_rate": 2.9118997370095357e-06, "loss": 0.6749274730682373, "step": 6281 }, { "epoch": 1.5269810403500244, "grad_norm": 1.7386642871880977, "learning_rate": 2.9090650004892673e-06, "loss": 0.6735051274299622, "step": 6282 }, { "epoch": 1.52722411278561, "grad_norm": 2.1864056134390735, "learning_rate": 2.906231409571998e-06, "loss": 0.6370229721069336, "step": 6283 }, { "epoch": 1.5274671852211958, "grad_norm": 1.5528687936459287, "learning_rate": 2.9033989647155227e-06, "loss": 0.5296022295951843, "step": 6284 }, { "epoch": 1.5277102576567816, "grad_norm": 1.6200716580527688, "learning_rate": 2.9005676663774464e-06, "loss": 0.6445057392120361, "step": 6285 }, { "epoch": 1.5279533300923676, "grad_norm": 1.737140437386958, "learning_rate": 2.8977375150151865e-06, "loss": 0.5954973697662354, "step": 6286 }, { "epoch": 1.5281964025279533, "grad_norm": 2.129070826358343, "learning_rate": 2.8949085110859854e-06, "loss": 0.6248170137405396, "step": 6287 }, { "epoch": 1.5284394749635393, "grad_norm": 1.4586886904758851, "learning_rate": 2.892080655046885e-06, "loss": 0.6114848256111145, "step": 6288 }, { "epoch": 1.528682547399125, "grad_norm": 1.2821512343691477, "learning_rate": 2.889253947354761e-06, "loss": 0.5655726790428162, "step": 6289 }, { "epoch": 1.5289256198347108, "grad_norm": 1.5201257709954428, "learning_rate": 2.8864283884662845e-06, "loss": 0.505165159702301, "step": 6290 }, { "epoch": 1.5291686922702965, "grad_norm": 1.8199925154788579, "learning_rate": 2.8836039788379566e-06, "loss": 0.5878161191940308, "step": 6291 }, { "epoch": 1.5294117647058822, "grad_norm": 1.7109783767520317, "learning_rate": 2.8807807189260774e-06, "loss": 0.7107229232788086, "step": 6292 }, { "epoch": 1.529654837141468, "grad_norm": 1.5020701445906146, "learning_rate": 2.8779586091867774e-06, "loss": 0.6247086524963379, "step": 6293 }, { "epoch": 1.529897909577054, "grad_norm": 2.0332520229413675, "learning_rate": 2.8751376500759854e-06, "loss": 0.6709868907928467, "step": 6294 }, { "epoch": 1.53014098201264, "grad_norm": 1.6612158558373327, "learning_rate": 2.87231784204946e-06, "loss": 0.6290204524993896, "step": 6295 }, { "epoch": 1.5303840544482257, "grad_norm": 1.5099975549402174, "learning_rate": 2.8694991855627595e-06, "loss": 0.5361341238021851, "step": 6296 }, { "epoch": 1.5306271268838114, "grad_norm": 1.4493299953818322, "learning_rate": 2.866681681071265e-06, "loss": 0.6146445274353027, "step": 6297 }, { "epoch": 1.5308701993193972, "grad_norm": 1.7344292988122012, "learning_rate": 2.8638653290301723e-06, "loss": 0.504827618598938, "step": 6298 }, { "epoch": 1.531113271754983, "grad_norm": 1.8342134298886386, "learning_rate": 2.8610501298944827e-06, "loss": 0.5890001058578491, "step": 6299 }, { "epoch": 1.5313563441905687, "grad_norm": 1.5641628152087197, "learning_rate": 2.8582360841190217e-06, "loss": 0.6245530843734741, "step": 6300 }, { "epoch": 1.5315994166261546, "grad_norm": 1.481091349563085, "learning_rate": 2.8554231921584164e-06, "loss": 0.606757402420044, "step": 6301 }, { "epoch": 1.5318424890617404, "grad_norm": 1.771248244040117, "learning_rate": 2.8526114544671224e-06, "loss": 0.4765127897262573, "step": 6302 }, { "epoch": 1.5320855614973263, "grad_norm": 1.964415378488203, "learning_rate": 2.849800871499393e-06, "loss": 0.49782249331474304, "step": 6303 }, { "epoch": 1.532328633932912, "grad_norm": 1.4508882510633752, "learning_rate": 2.8469914437093062e-06, "loss": 0.5384153127670288, "step": 6304 }, { "epoch": 1.5325717063684978, "grad_norm": 1.5344291215345813, "learning_rate": 2.844183171550753e-06, "loss": 0.626730740070343, "step": 6305 }, { "epoch": 1.5328147788040836, "grad_norm": 1.7575911403273925, "learning_rate": 2.8413760554774285e-06, "loss": 0.5542163848876953, "step": 6306 }, { "epoch": 1.5330578512396693, "grad_norm": 1.6918813946264704, "learning_rate": 2.838570095942852e-06, "loss": 0.6135622262954712, "step": 6307 }, { "epoch": 1.5333009236752553, "grad_norm": 1.8016797516008465, "learning_rate": 2.8357652934003465e-06, "loss": 0.5157964825630188, "step": 6308 }, { "epoch": 1.533543996110841, "grad_norm": 1.6420725499287134, "learning_rate": 2.8329616483030574e-06, "loss": 0.7407187223434448, "step": 6309 }, { "epoch": 1.533787068546427, "grad_norm": 1.4922512480773695, "learning_rate": 2.8301591611039326e-06, "loss": 0.5825315117835999, "step": 6310 }, { "epoch": 1.5340301409820127, "grad_norm": 1.6612557965511459, "learning_rate": 2.827357832255745e-06, "loss": 0.6477112770080566, "step": 6311 }, { "epoch": 1.5342732134175985, "grad_norm": 1.5553953049731506, "learning_rate": 2.824557662211067e-06, "loss": 0.4336182475090027, "step": 6312 }, { "epoch": 1.5345162858531842, "grad_norm": 2.0325929694436113, "learning_rate": 2.821758651422294e-06, "loss": 0.5765884518623352, "step": 6313 }, { "epoch": 1.53475935828877, "grad_norm": 1.4576789182593877, "learning_rate": 2.8189608003416348e-06, "loss": 0.3972592353820801, "step": 6314 }, { "epoch": 1.5350024307243557, "grad_norm": 1.5826577447596344, "learning_rate": 2.8161641094211003e-06, "loss": 0.7254438400268555, "step": 6315 }, { "epoch": 1.5352455031599417, "grad_norm": 1.5248953278521837, "learning_rate": 2.8133685791125263e-06, "loss": 0.5280328989028931, "step": 6316 }, { "epoch": 1.5354885755955274, "grad_norm": 1.7593607987049482, "learning_rate": 2.8105742098675504e-06, "loss": 0.6178725361824036, "step": 6317 }, { "epoch": 1.5357316480311134, "grad_norm": 1.6638367986942268, "learning_rate": 2.8077810021376318e-06, "loss": 0.6791641712188721, "step": 6318 }, { "epoch": 1.5359747204666991, "grad_norm": 1.607222015779795, "learning_rate": 2.804988956374033e-06, "loss": 0.5436784625053406, "step": 6319 }, { "epoch": 1.5362177929022849, "grad_norm": 1.6643460737688178, "learning_rate": 2.8021980730278387e-06, "loss": 0.5232069492340088, "step": 6320 }, { "epoch": 1.5364608653378706, "grad_norm": 1.510773844671413, "learning_rate": 2.799408352549936e-06, "loss": 0.5581072568893433, "step": 6321 }, { "epoch": 1.5367039377734564, "grad_norm": 1.597930680347974, "learning_rate": 2.7966197953910314e-06, "loss": 0.5528696775436401, "step": 6322 }, { "epoch": 1.5369470102090423, "grad_norm": 1.642865924116791, "learning_rate": 2.7938324020016427e-06, "loss": 0.530005693435669, "step": 6323 }, { "epoch": 1.537190082644628, "grad_norm": 1.4342411770914287, "learning_rate": 2.7910461728320924e-06, "loss": 0.4400220811367035, "step": 6324 }, { "epoch": 1.537433155080214, "grad_norm": 1.4884477356651793, "learning_rate": 2.788261108332528e-06, "loss": 0.5560040473937988, "step": 6325 }, { "epoch": 1.5376762275157998, "grad_norm": 1.7689125387979157, "learning_rate": 2.7854772089528936e-06, "loss": 0.519048810005188, "step": 6326 }, { "epoch": 1.5379192999513855, "grad_norm": 1.5998651071115364, "learning_rate": 2.7826944751429584e-06, "loss": 0.6856247186660767, "step": 6327 }, { "epoch": 1.5381623723869713, "grad_norm": 1.6086110012455703, "learning_rate": 2.779912907352292e-06, "loss": 0.5612636804580688, "step": 6328 }, { "epoch": 1.538405444822557, "grad_norm": 1.4360594370432411, "learning_rate": 2.7771325060302844e-06, "loss": 0.4355263113975525, "step": 6329 }, { "epoch": 1.5386485172581428, "grad_norm": 1.6309665850349258, "learning_rate": 2.7743532716261366e-06, "loss": 0.7860581874847412, "step": 6330 }, { "epoch": 1.5388915896937287, "grad_norm": 1.7083518376245586, "learning_rate": 2.771575204588853e-06, "loss": 0.7362702488899231, "step": 6331 }, { "epoch": 1.5391346621293145, "grad_norm": 1.617102934761403, "learning_rate": 2.7687983053672605e-06, "loss": 0.5684025287628174, "step": 6332 }, { "epoch": 1.5393777345649005, "grad_norm": 1.7999170821123782, "learning_rate": 2.7660225744099858e-06, "loss": 0.5794370174407959, "step": 6333 }, { "epoch": 1.5396208070004862, "grad_norm": 1.5917421947760428, "learning_rate": 2.763248012165477e-06, "loss": 0.5916699171066284, "step": 6334 }, { "epoch": 1.539863879436072, "grad_norm": 1.3283504773539947, "learning_rate": 2.760474619081985e-06, "loss": 0.5106242895126343, "step": 6335 }, { "epoch": 1.5401069518716577, "grad_norm": 1.3600818483566457, "learning_rate": 2.757702395607582e-06, "loss": 0.49772730469703674, "step": 6336 }, { "epoch": 1.5403500243072434, "grad_norm": 1.7883065216693708, "learning_rate": 2.754931342190138e-06, "loss": 0.5369423627853394, "step": 6337 }, { "epoch": 1.5405930967428294, "grad_norm": 1.91095447454717, "learning_rate": 2.752161459277345e-06, "loss": 0.7401414513587952, "step": 6338 }, { "epoch": 1.5408361691784151, "grad_norm": 1.7368267252881104, "learning_rate": 2.749392747316704e-06, "loss": 0.5569890737533569, "step": 6339 }, { "epoch": 1.5410792416140011, "grad_norm": 1.756716407184983, "learning_rate": 2.7466252067555198e-06, "loss": 0.6278154850006104, "step": 6340 }, { "epoch": 1.5413223140495869, "grad_norm": 1.624789092122435, "learning_rate": 2.743858838040918e-06, "loss": 0.5620070695877075, "step": 6341 }, { "epoch": 1.5415653864851726, "grad_norm": 1.434796008019471, "learning_rate": 2.741093641619824e-06, "loss": 0.4651568531990051, "step": 6342 }, { "epoch": 1.5418084589207584, "grad_norm": 1.7684347152967572, "learning_rate": 2.7383296179389863e-06, "loss": 0.5567047595977783, "step": 6343 }, { "epoch": 1.542051531356344, "grad_norm": 1.535210597418177, "learning_rate": 2.7355667674449514e-06, "loss": 0.5281926393508911, "step": 6344 }, { "epoch": 1.5422946037919298, "grad_norm": 1.4975269298454337, "learning_rate": 2.7328050905840842e-06, "loss": 0.5414185523986816, "step": 6345 }, { "epoch": 1.5425376762275158, "grad_norm": 1.7747623409307456, "learning_rate": 2.7300445878025618e-06, "loss": 0.49654048681259155, "step": 6346 }, { "epoch": 1.5427807486631016, "grad_norm": 1.7703026346214592, "learning_rate": 2.7272852595463605e-06, "loss": 0.6363011002540588, "step": 6347 }, { "epoch": 1.5430238210986875, "grad_norm": 1.7335641062460705, "learning_rate": 2.724527106261281e-06, "loss": 0.6111865043640137, "step": 6348 }, { "epoch": 1.5432668935342733, "grad_norm": 1.4459367786608532, "learning_rate": 2.7217701283929208e-06, "loss": 0.5692081451416016, "step": 6349 }, { "epoch": 1.543509965969859, "grad_norm": 1.4321914119387706, "learning_rate": 2.719014326386701e-06, "loss": 0.38740473985671997, "step": 6350 }, { "epoch": 1.5437530384054448, "grad_norm": 1.3909115008719048, "learning_rate": 2.7162597006878387e-06, "loss": 0.5380280613899231, "step": 6351 }, { "epoch": 1.5439961108410305, "grad_norm": 1.532164587458081, "learning_rate": 2.7135062517413748e-06, "loss": 0.41632354259490967, "step": 6352 }, { "epoch": 1.5442391832766165, "grad_norm": 1.8511660945864463, "learning_rate": 2.7107539799921432e-06, "loss": 0.6329441070556641, "step": 6353 }, { "epoch": 1.5444822557122022, "grad_norm": 1.78766956800397, "learning_rate": 2.7080028858848097e-06, "loss": 0.673367977142334, "step": 6354 }, { "epoch": 1.5447253281477882, "grad_norm": 1.5546555251081737, "learning_rate": 2.705252969863833e-06, "loss": 0.4457361102104187, "step": 6355 }, { "epoch": 1.544968400583374, "grad_norm": 1.4216777408119017, "learning_rate": 2.702504232373482e-06, "loss": 0.4859536290168762, "step": 6356 }, { "epoch": 1.5452114730189597, "grad_norm": 1.8178470231017714, "learning_rate": 2.699756673857845e-06, "loss": 0.5930337905883789, "step": 6357 }, { "epoch": 1.5454545454545454, "grad_norm": 1.5852556263216326, "learning_rate": 2.69701029476081e-06, "loss": 0.7333319187164307, "step": 6358 }, { "epoch": 1.5456976178901312, "grad_norm": 1.5127803593618208, "learning_rate": 2.694265095526084e-06, "loss": 0.6309980154037476, "step": 6359 }, { "epoch": 1.545940690325717, "grad_norm": 1.5526622462608701, "learning_rate": 2.6915210765971724e-06, "loss": 0.5116623640060425, "step": 6360 }, { "epoch": 1.5461837627613029, "grad_norm": 1.750047891515587, "learning_rate": 2.6887782384174e-06, "loss": 0.552532434463501, "step": 6361 }, { "epoch": 1.5464268351968888, "grad_norm": 1.78582973086624, "learning_rate": 2.6860365814298894e-06, "loss": 0.6391340494155884, "step": 6362 }, { "epoch": 1.5466699076324746, "grad_norm": 1.739464599790241, "learning_rate": 2.683296106077591e-06, "loss": 0.6195827126502991, "step": 6363 }, { "epoch": 1.5469129800680603, "grad_norm": 1.5480474157835231, "learning_rate": 2.680556812803242e-06, "loss": 0.5072444081306458, "step": 6364 }, { "epoch": 1.547156052503646, "grad_norm": 1.6792776816601716, "learning_rate": 2.6778187020494086e-06, "loss": 0.642059326171875, "step": 6365 }, { "epoch": 1.5473991249392318, "grad_norm": 1.7809814980767658, "learning_rate": 2.675081774258451e-06, "loss": 0.6270232200622559, "step": 6366 }, { "epoch": 1.5476421973748176, "grad_norm": 1.3619567230276721, "learning_rate": 2.672346029872541e-06, "loss": 0.49714967608451843, "step": 6367 }, { "epoch": 1.5478852698104035, "grad_norm": 1.6130611400385295, "learning_rate": 2.6696114693336707e-06, "loss": 0.527662992477417, "step": 6368 }, { "epoch": 1.5481283422459893, "grad_norm": 1.6697296374339246, "learning_rate": 2.6668780930836245e-06, "loss": 0.5317908525466919, "step": 6369 }, { "epoch": 1.5483714146815752, "grad_norm": 1.7682738928764163, "learning_rate": 2.6641459015640058e-06, "loss": 0.46915972232818604, "step": 6370 }, { "epoch": 1.548614487117161, "grad_norm": 1.627022363323474, "learning_rate": 2.6614148952162255e-06, "loss": 0.7348861694335938, "step": 6371 }, { "epoch": 1.5488575595527467, "grad_norm": 1.2352958456189727, "learning_rate": 2.658685074481505e-06, "loss": 0.4280925393104553, "step": 6372 }, { "epoch": 1.5491006319883325, "grad_norm": 1.7256143484615343, "learning_rate": 2.6559564398008643e-06, "loss": 0.6272016167640686, "step": 6373 }, { "epoch": 1.5493437044239182, "grad_norm": 1.4422837838455238, "learning_rate": 2.6532289916151434e-06, "loss": 0.5551656484603882, "step": 6374 }, { "epoch": 1.549586776859504, "grad_norm": 1.6314424604442692, "learning_rate": 2.650502730364984e-06, "loss": 0.4858303666114807, "step": 6375 }, { "epoch": 1.54982984929509, "grad_norm": 2.2244153785825787, "learning_rate": 2.647777656490834e-06, "loss": 0.6839954853057861, "step": 6376 }, { "epoch": 1.550072921730676, "grad_norm": 1.4700777085093724, "learning_rate": 2.6450537704329593e-06, "loss": 0.6630826592445374, "step": 6377 }, { "epoch": 1.5503159941662616, "grad_norm": 1.9755894512007934, "learning_rate": 2.6423310726314187e-06, "loss": 0.5774043202400208, "step": 6378 }, { "epoch": 1.5505590666018474, "grad_norm": 1.2689557904061277, "learning_rate": 2.6396095635261e-06, "loss": 0.5795080661773682, "step": 6379 }, { "epoch": 1.5508021390374331, "grad_norm": 1.627702967241219, "learning_rate": 2.6368892435566774e-06, "loss": 0.811211347579956, "step": 6380 }, { "epoch": 1.5510452114730189, "grad_norm": 1.2982955045808091, "learning_rate": 2.6341701131626486e-06, "loss": 0.463978111743927, "step": 6381 }, { "epoch": 1.5512882839086046, "grad_norm": 1.7116213063776384, "learning_rate": 2.631452172783309e-06, "loss": 0.524966835975647, "step": 6382 }, { "epoch": 1.5515313563441906, "grad_norm": 2.2828833735419582, "learning_rate": 2.628735422857769e-06, "loss": 0.5398203134536743, "step": 6383 }, { "epoch": 1.5517744287797763, "grad_norm": 1.5688903509876964, "learning_rate": 2.626019863824941e-06, "loss": 0.7418181896209717, "step": 6384 }, { "epoch": 1.5520175012153623, "grad_norm": 1.4449921724145072, "learning_rate": 2.623305496123547e-06, "loss": 0.5524062514305115, "step": 6385 }, { "epoch": 1.552260573650948, "grad_norm": 1.7247842676165992, "learning_rate": 2.620592320192117e-06, "loss": 0.5621213912963867, "step": 6386 }, { "epoch": 1.5525036460865338, "grad_norm": 1.5632063074749718, "learning_rate": 2.617880336468991e-06, "loss": 0.5732566714286804, "step": 6387 }, { "epoch": 1.5527467185221195, "grad_norm": 1.4611654956074092, "learning_rate": 2.6151695453923142e-06, "loss": 0.6628602743148804, "step": 6388 }, { "epoch": 1.5529897909577053, "grad_norm": 1.5058764583039068, "learning_rate": 2.6124599474000347e-06, "loss": 0.5659058094024658, "step": 6389 }, { "epoch": 1.5532328633932913, "grad_norm": 1.7169183215421455, "learning_rate": 2.609751542929917e-06, "loss": 0.6611403226852417, "step": 6390 }, { "epoch": 1.553475935828877, "grad_norm": 1.5773217781423798, "learning_rate": 2.6070443324195228e-06, "loss": 0.6463104486465454, "step": 6391 }, { "epoch": 1.553719008264463, "grad_norm": 1.5995724073951554, "learning_rate": 2.60433831630623e-06, "loss": 0.579795777797699, "step": 6392 }, { "epoch": 1.5539620807000487, "grad_norm": 1.8171787126786816, "learning_rate": 2.601633495027218e-06, "loss": 0.6393204927444458, "step": 6393 }, { "epoch": 1.5542051531356345, "grad_norm": 1.7867786293734322, "learning_rate": 2.598929869019471e-06, "loss": 0.5954485535621643, "step": 6394 }, { "epoch": 1.5544482255712202, "grad_norm": 1.6704037439144277, "learning_rate": 2.596227438719786e-06, "loss": 0.44243693351745605, "step": 6395 }, { "epoch": 1.554691298006806, "grad_norm": 1.6396026337229193, "learning_rate": 2.5935262045647647e-06, "loss": 0.580448567867279, "step": 6396 }, { "epoch": 1.5549343704423917, "grad_norm": 1.5956566055067936, "learning_rate": 2.59082616699082e-06, "loss": 0.5020623207092285, "step": 6397 }, { "epoch": 1.5551774428779777, "grad_norm": 1.7454841186431131, "learning_rate": 2.588127326434158e-06, "loss": 0.5916997194290161, "step": 6398 }, { "epoch": 1.5554205153135634, "grad_norm": 1.7950607962456016, "learning_rate": 2.5854296833308067e-06, "loss": 0.6347808241844177, "step": 6399 }, { "epoch": 1.5556635877491494, "grad_norm": 1.7654549251472027, "learning_rate": 2.5827332381165894e-06, "loss": 0.504876971244812, "step": 6400 }, { "epoch": 1.5559066601847351, "grad_norm": 1.7301409381743156, "learning_rate": 2.5800379912271456e-06, "loss": 0.5674139857292175, "step": 6401 }, { "epoch": 1.5561497326203209, "grad_norm": 1.53476508249059, "learning_rate": 2.577343943097912e-06, "loss": 0.5412570834159851, "step": 6402 }, { "epoch": 1.5563928050559066, "grad_norm": 1.4992814277932736, "learning_rate": 2.5746510941641354e-06, "loss": 0.4624824523925781, "step": 6403 }, { "epoch": 1.5566358774914923, "grad_norm": 1.5379166366204275, "learning_rate": 2.5719594448608755e-06, "loss": 0.49646082520484924, "step": 6404 }, { "epoch": 1.5568789499270783, "grad_norm": 1.5203448912942736, "learning_rate": 2.5692689956229842e-06, "loss": 0.6223228573799133, "step": 6405 }, { "epoch": 1.557122022362664, "grad_norm": 1.6006683025373432, "learning_rate": 2.566579746885134e-06, "loss": 0.6011185050010681, "step": 6406 }, { "epoch": 1.55736509479825, "grad_norm": 1.6608995655998349, "learning_rate": 2.5638916990817897e-06, "loss": 0.5764430165290833, "step": 6407 }, { "epoch": 1.5576081672338358, "grad_norm": 1.6040401773273396, "learning_rate": 2.5612048526472356e-06, "loss": 0.5262502431869507, "step": 6408 }, { "epoch": 1.5578512396694215, "grad_norm": 1.652042762127184, "learning_rate": 2.5585192080155506e-06, "loss": 0.5903193950653076, "step": 6409 }, { "epoch": 1.5580943121050073, "grad_norm": 1.728066410297164, "learning_rate": 2.555834765620628e-06, "loss": 0.5622243285179138, "step": 6410 }, { "epoch": 1.558337384540593, "grad_norm": 1.642569717093, "learning_rate": 2.55315152589616e-06, "loss": 0.5641049742698669, "step": 6411 }, { "epoch": 1.5585804569761788, "grad_norm": 1.8950513252740908, "learning_rate": 2.5504694892756475e-06, "loss": 0.5297061204910278, "step": 6412 }, { "epoch": 1.5588235294117647, "grad_norm": 1.7321797650448723, "learning_rate": 2.5477886561924026e-06, "loss": 0.7391721606254578, "step": 6413 }, { "epoch": 1.5590666018473505, "grad_norm": 1.7537284987981583, "learning_rate": 2.54510902707953e-06, "loss": 0.5836952924728394, "step": 6414 }, { "epoch": 1.5593096742829364, "grad_norm": 1.8799115634565868, "learning_rate": 2.542430602369954e-06, "loss": 0.6242150068283081, "step": 6415 }, { "epoch": 1.5595527467185222, "grad_norm": 1.6361565862518168, "learning_rate": 2.5397533824963917e-06, "loss": 0.552191972732544, "step": 6416 }, { "epoch": 1.559795819154108, "grad_norm": 1.4500326651585878, "learning_rate": 2.5370773678913772e-06, "loss": 0.4470696449279785, "step": 6417 }, { "epoch": 1.5600388915896937, "grad_norm": 1.1618327410846427, "learning_rate": 2.534402558987239e-06, "loss": 0.46460193395614624, "step": 6418 }, { "epoch": 1.5602819640252794, "grad_norm": 1.628591070870872, "learning_rate": 2.5317289562161186e-06, "loss": 0.553207278251648, "step": 6419 }, { "epoch": 1.5605250364608654, "grad_norm": 1.6776331746680144, "learning_rate": 2.5290565600099624e-06, "loss": 0.4623348116874695, "step": 6420 }, { "epoch": 1.5607681088964511, "grad_norm": 1.3651490934873733, "learning_rate": 2.526385370800515e-06, "loss": 0.5056815147399902, "step": 6421 }, { "epoch": 1.561011181332037, "grad_norm": 1.8321477251513167, "learning_rate": 2.5237153890193343e-06, "loss": 0.5626817941665649, "step": 6422 }, { "epoch": 1.5612542537676228, "grad_norm": 1.6597947586786255, "learning_rate": 2.5210466150977754e-06, "loss": 0.6372174024581909, "step": 6423 }, { "epoch": 1.5614973262032086, "grad_norm": 1.3864560351193618, "learning_rate": 2.518379049467008e-06, "loss": 0.510715663433075, "step": 6424 }, { "epoch": 1.5617403986387943, "grad_norm": 1.4900961914436341, "learning_rate": 2.5157126925579933e-06, "loss": 0.4646923840045929, "step": 6425 }, { "epoch": 1.56198347107438, "grad_norm": 1.6809849524339253, "learning_rate": 2.5130475448015126e-06, "loss": 0.6994274854660034, "step": 6426 }, { "epoch": 1.5622265435099658, "grad_norm": 1.614453579585392, "learning_rate": 2.5103836066281374e-06, "loss": 0.5527176856994629, "step": 6427 }, { "epoch": 1.5624696159455518, "grad_norm": 1.8541317473514114, "learning_rate": 2.507720878468253e-06, "loss": 0.6147395372390747, "step": 6428 }, { "epoch": 1.5627126883811375, "grad_norm": 1.6917618961108642, "learning_rate": 2.505059360752049e-06, "loss": 0.44939035177230835, "step": 6429 }, { "epoch": 1.5629557608167235, "grad_norm": 1.5975448845685982, "learning_rate": 2.5023990539095133e-06, "loss": 0.5366876125335693, "step": 6430 }, { "epoch": 1.5631988332523092, "grad_norm": 2.0264180837468055, "learning_rate": 2.499739958370445e-06, "loss": 0.44634342193603516, "step": 6431 }, { "epoch": 1.563441905687895, "grad_norm": 1.212046696009497, "learning_rate": 2.497082074564441e-06, "loss": 0.4053598642349243, "step": 6432 }, { "epoch": 1.5636849781234807, "grad_norm": 1.5027664463424881, "learning_rate": 2.49442540292091e-06, "loss": 0.5091789960861206, "step": 6433 }, { "epoch": 1.5639280505590665, "grad_norm": 1.7203759347925145, "learning_rate": 2.4917699438690557e-06, "loss": 0.5968554019927979, "step": 6434 }, { "epoch": 1.5641711229946524, "grad_norm": 1.5491368715096716, "learning_rate": 2.489115697837896e-06, "loss": 0.5635174512863159, "step": 6435 }, { "epoch": 1.5644141954302382, "grad_norm": 1.824286852323672, "learning_rate": 2.486462665256243e-06, "loss": 0.4470716118812561, "step": 6436 }, { "epoch": 1.5646572678658242, "grad_norm": 1.6420765878100492, "learning_rate": 2.483810846552719e-06, "loss": 0.5822184085845947, "step": 6437 }, { "epoch": 1.56490034030141, "grad_norm": 1.4501393518855716, "learning_rate": 2.4811602421557525e-06, "loss": 0.4313852787017822, "step": 6438 }, { "epoch": 1.5651434127369956, "grad_norm": 1.6794345409010343, "learning_rate": 2.4785108524935666e-06, "loss": 0.5461034774780273, "step": 6439 }, { "epoch": 1.5653864851725814, "grad_norm": 1.536899299998034, "learning_rate": 2.4758626779941985e-06, "loss": 0.49594035744667053, "step": 6440 }, { "epoch": 1.5656295576081671, "grad_norm": 1.657003693584698, "learning_rate": 2.4732157190854777e-06, "loss": 0.5281510353088379, "step": 6441 }, { "epoch": 1.5658726300437529, "grad_norm": 1.7798488359027937, "learning_rate": 2.470569976195052e-06, "loss": 0.850968062877655, "step": 6442 }, { "epoch": 1.5661157024793388, "grad_norm": 1.5613596126394917, "learning_rate": 2.4679254497503556e-06, "loss": 0.618368923664093, "step": 6443 }, { "epoch": 1.5663587749149248, "grad_norm": 1.8648581090638996, "learning_rate": 2.4652821401786396e-06, "loss": 0.4874196946620941, "step": 6444 }, { "epoch": 1.5666018473505106, "grad_norm": 1.5904255419428386, "learning_rate": 2.462640047906958e-06, "loss": 0.5540282130241394, "step": 6445 }, { "epoch": 1.5668449197860963, "grad_norm": 1.6769648307571625, "learning_rate": 2.4599991733621553e-06, "loss": 0.5078459978103638, "step": 6446 }, { "epoch": 1.567087992221682, "grad_norm": 1.4966144341755927, "learning_rate": 2.4573595169708963e-06, "loss": 0.5751899480819702, "step": 6447 }, { "epoch": 1.5673310646572678, "grad_norm": 1.4739135225024975, "learning_rate": 2.454721079159633e-06, "loss": 0.5532150268554688, "step": 6448 }, { "epoch": 1.5675741370928535, "grad_norm": 1.5307009980859048, "learning_rate": 2.452083860354636e-06, "loss": 0.5304887294769287, "step": 6449 }, { "epoch": 1.5678172095284395, "grad_norm": 1.4799157540255188, "learning_rate": 2.449447860981965e-06, "loss": 0.61879563331604, "step": 6450 }, { "epoch": 1.5680602819640252, "grad_norm": 1.7319995435254967, "learning_rate": 2.446813081467494e-06, "loss": 0.5305595397949219, "step": 6451 }, { "epoch": 1.5683033543996112, "grad_norm": 1.7161258489596258, "learning_rate": 2.4441795222368858e-06, "loss": 0.7352566719055176, "step": 6452 }, { "epoch": 1.568546426835197, "grad_norm": 1.750243459279646, "learning_rate": 2.441547183715628e-06, "loss": 0.5340296626091003, "step": 6453 }, { "epoch": 1.5687894992707827, "grad_norm": 1.5940253663117019, "learning_rate": 2.438916066328991e-06, "loss": 0.53605717420578, "step": 6454 }, { "epoch": 1.5690325717063685, "grad_norm": 1.60017524935457, "learning_rate": 2.4362861705020537e-06, "loss": 0.6270985007286072, "step": 6455 }, { "epoch": 1.5692756441419542, "grad_norm": 1.787489402022983, "learning_rate": 2.4336574966597028e-06, "loss": 0.6474073529243469, "step": 6456 }, { "epoch": 1.56951871657754, "grad_norm": 1.8216349500052642, "learning_rate": 2.4310300452266187e-06, "loss": 0.6933295726776123, "step": 6457 }, { "epoch": 1.569761789013126, "grad_norm": 2.067194280134943, "learning_rate": 2.428403816627295e-06, "loss": 0.6832572221755981, "step": 6458 }, { "epoch": 1.5700048614487119, "grad_norm": 1.4308637469406766, "learning_rate": 2.425778811286017e-06, "loss": 0.565435528755188, "step": 6459 }, { "epoch": 1.5702479338842976, "grad_norm": 1.4175499054881213, "learning_rate": 2.4231550296268804e-06, "loss": 0.6000231504440308, "step": 6460 }, { "epoch": 1.5704910063198834, "grad_norm": 1.6021005940389355, "learning_rate": 2.4205324720737787e-06, "loss": 0.5912450551986694, "step": 6461 }, { "epoch": 1.570734078755469, "grad_norm": 1.7348385743520827, "learning_rate": 2.417911139050413e-06, "loss": 0.6455841660499573, "step": 6462 }, { "epoch": 1.5709771511910549, "grad_norm": 1.590874874314062, "learning_rate": 2.41529103098028e-06, "loss": 0.646645724773407, "step": 6463 }, { "epoch": 1.5712202236266406, "grad_norm": 1.545551070713062, "learning_rate": 2.4126721482866787e-06, "loss": 0.5233157873153687, "step": 6464 }, { "epoch": 1.5714632960622266, "grad_norm": 1.735968643481764, "learning_rate": 2.410054491392718e-06, "loss": 0.590900719165802, "step": 6465 }, { "epoch": 1.5717063684978123, "grad_norm": 1.7020864357428387, "learning_rate": 2.4074380607212977e-06, "loss": 0.52192622423172, "step": 6466 }, { "epoch": 1.5719494409333983, "grad_norm": 1.8488481735748181, "learning_rate": 2.4048228566951304e-06, "loss": 0.6167089939117432, "step": 6467 }, { "epoch": 1.572192513368984, "grad_norm": 1.6486762653752396, "learning_rate": 2.4022088797367173e-06, "loss": 0.6911454200744629, "step": 6468 }, { "epoch": 1.5724355858045698, "grad_norm": 1.6961516499377816, "learning_rate": 2.3995961302683803e-06, "loss": 0.465500146150589, "step": 6469 }, { "epoch": 1.5726786582401555, "grad_norm": 1.462958369956524, "learning_rate": 2.396984608712225e-06, "loss": 0.49689096212387085, "step": 6470 }, { "epoch": 1.5729217306757413, "grad_norm": 1.6649952127433174, "learning_rate": 2.39437431549017e-06, "loss": 0.5367533564567566, "step": 6471 }, { "epoch": 1.5731648031113272, "grad_norm": 1.5028140423963647, "learning_rate": 2.3917652510239253e-06, "loss": 0.618822455406189, "step": 6472 }, { "epoch": 1.573407875546913, "grad_norm": 1.5438676388278414, "learning_rate": 2.389157415735015e-06, "loss": 0.5044423341751099, "step": 6473 }, { "epoch": 1.573650947982499, "grad_norm": 1.4366569231118775, "learning_rate": 2.3865508100447545e-06, "loss": 0.4441247582435608, "step": 6474 }, { "epoch": 1.5738940204180847, "grad_norm": 1.7636679502795978, "learning_rate": 2.3839454343742607e-06, "loss": 0.5972170829772949, "step": 6475 }, { "epoch": 1.5741370928536704, "grad_norm": 1.5988994707524304, "learning_rate": 2.3813412891444608e-06, "loss": 0.717694103717804, "step": 6476 }, { "epoch": 1.5743801652892562, "grad_norm": 1.5319825551027857, "learning_rate": 2.3787383747760696e-06, "loss": 0.47854238748550415, "step": 6477 }, { "epoch": 1.574623237724842, "grad_norm": 1.9524648820787083, "learning_rate": 2.3761366916896223e-06, "loss": 0.7701624631881714, "step": 6478 }, { "epoch": 1.5748663101604277, "grad_norm": 1.6248726951707815, "learning_rate": 2.373536240305434e-06, "loss": 0.43426513671875, "step": 6479 }, { "epoch": 1.5751093825960136, "grad_norm": 1.6947580992542692, "learning_rate": 2.3709370210436366e-06, "loss": 0.5312819480895996, "step": 6480 }, { "epoch": 1.5753524550315994, "grad_norm": 1.5830273720673107, "learning_rate": 2.368339034324152e-06, "loss": 0.6143491864204407, "step": 6481 }, { "epoch": 1.5755955274671853, "grad_norm": 1.4809330424552065, "learning_rate": 2.3657422805667118e-06, "loss": 0.49762898683547974, "step": 6482 }, { "epoch": 1.575838599902771, "grad_norm": 1.4072807053180094, "learning_rate": 2.3631467601908442e-06, "loss": 0.6008909940719604, "step": 6483 }, { "epoch": 1.5760816723383568, "grad_norm": 1.5644812283188212, "learning_rate": 2.3605524736158736e-06, "loss": 0.5582218170166016, "step": 6484 }, { "epoch": 1.5763247447739426, "grad_norm": 1.5487358453281608, "learning_rate": 2.3579594212609325e-06, "loss": 0.6641724705696106, "step": 6485 }, { "epoch": 1.5765678172095283, "grad_norm": 1.684357695348579, "learning_rate": 2.3553676035449525e-06, "loss": 0.5124123096466064, "step": 6486 }, { "epoch": 1.5768108896451143, "grad_norm": 1.4998669789892067, "learning_rate": 2.3527770208866663e-06, "loss": 0.5733243823051453, "step": 6487 }, { "epoch": 1.5770539620807, "grad_norm": 1.714643974673212, "learning_rate": 2.3501876737046e-06, "loss": 0.5358884930610657, "step": 6488 }, { "epoch": 1.577297034516286, "grad_norm": 1.5727349812297373, "learning_rate": 2.3475995624170935e-06, "loss": 0.5724520683288574, "step": 6489 }, { "epoch": 1.5775401069518717, "grad_norm": 1.6056998345945854, "learning_rate": 2.3450126874422697e-06, "loss": 0.5905017852783203, "step": 6490 }, { "epoch": 1.5777831793874575, "grad_norm": 1.7750000444555918, "learning_rate": 2.3424270491980685e-06, "loss": 0.602049708366394, "step": 6491 }, { "epoch": 1.5780262518230432, "grad_norm": 1.55801155463495, "learning_rate": 2.3398426481022184e-06, "loss": 0.5642759799957275, "step": 6492 }, { "epoch": 1.578269324258629, "grad_norm": 1.5629133537323632, "learning_rate": 2.3372594845722483e-06, "loss": 0.5261588096618652, "step": 6493 }, { "epoch": 1.5785123966942147, "grad_norm": 1.4370312361101796, "learning_rate": 2.334677559025501e-06, "loss": 0.5429651141166687, "step": 6494 }, { "epoch": 1.5787554691298007, "grad_norm": 1.6437589148507394, "learning_rate": 2.3320968718791027e-06, "loss": 0.5595430731773376, "step": 6495 }, { "epoch": 1.5789985415653864, "grad_norm": 1.5872349611292174, "learning_rate": 2.3295174235499883e-06, "loss": 0.6324213147163391, "step": 6496 }, { "epoch": 1.5792416140009724, "grad_norm": 1.637526221339104, "learning_rate": 2.3269392144548887e-06, "loss": 0.5909008979797363, "step": 6497 }, { "epoch": 1.5794846864365581, "grad_norm": 2.640805359738065, "learning_rate": 2.3243622450103396e-06, "loss": 0.559730052947998, "step": 6498 }, { "epoch": 1.579727758872144, "grad_norm": 1.5990060584969905, "learning_rate": 2.321786515632668e-06, "loss": 0.6139917969703674, "step": 6499 }, { "epoch": 1.5799708313077296, "grad_norm": 1.7016048546841998, "learning_rate": 2.3192120267380113e-06, "loss": 0.7522554397583008, "step": 6500 }, { "epoch": 1.5802139037433154, "grad_norm": 1.729674929718575, "learning_rate": 2.316638778742295e-06, "loss": 0.6622560620307922, "step": 6501 }, { "epoch": 1.5804569761789014, "grad_norm": 1.6990883386756115, "learning_rate": 2.314066772061253e-06, "loss": 0.5480266809463501, "step": 6502 }, { "epoch": 1.580700048614487, "grad_norm": 1.4501995209563248, "learning_rate": 2.3114960071104186e-06, "loss": 0.5491713881492615, "step": 6503 }, { "epoch": 1.580943121050073, "grad_norm": 1.7915886395415477, "learning_rate": 2.308926484305115e-06, "loss": 0.5595465898513794, "step": 6504 }, { "epoch": 1.5811861934856588, "grad_norm": 1.9114552133596199, "learning_rate": 2.3063582040604782e-06, "loss": 0.5373883843421936, "step": 6505 }, { "epoch": 1.5814292659212446, "grad_norm": 1.4281630255820448, "learning_rate": 2.3037911667914305e-06, "loss": 0.4030282497406006, "step": 6506 }, { "epoch": 1.5816723383568303, "grad_norm": 1.8173406018307667, "learning_rate": 2.3012253729127045e-06, "loss": 0.5671696662902832, "step": 6507 }, { "epoch": 1.581915410792416, "grad_norm": 1.684561299151386, "learning_rate": 2.298660822838822e-06, "loss": 0.42370277643203735, "step": 6508 }, { "epoch": 1.5821584832280018, "grad_norm": 1.7349282412928486, "learning_rate": 2.2960975169841106e-06, "loss": 0.5339009761810303, "step": 6509 }, { "epoch": 1.5824015556635878, "grad_norm": 1.6644221085239888, "learning_rate": 2.293535455762698e-06, "loss": 0.4782200753688812, "step": 6510 }, { "epoch": 1.5826446280991735, "grad_norm": 1.3807052629618222, "learning_rate": 2.2909746395885013e-06, "loss": 0.622092604637146, "step": 6511 }, { "epoch": 1.5828877005347595, "grad_norm": 1.811039686097157, "learning_rate": 2.2884150688752506e-06, "loss": 0.5903230905532837, "step": 6512 }, { "epoch": 1.5831307729703452, "grad_norm": 1.8730667139223727, "learning_rate": 2.28585674403646e-06, "loss": 0.4885072410106659, "step": 6513 }, { "epoch": 1.583373845405931, "grad_norm": 1.546956309445208, "learning_rate": 2.2832996654854555e-06, "loss": 0.5549498796463013, "step": 6514 }, { "epoch": 1.5836169178415167, "grad_norm": 1.8271498790288005, "learning_rate": 2.2807438336353494e-06, "loss": 0.6520770788192749, "step": 6515 }, { "epoch": 1.5838599902771024, "grad_norm": 1.5613142870697896, "learning_rate": 2.2781892488990654e-06, "loss": 0.6394345760345459, "step": 6516 }, { "epoch": 1.5841030627126884, "grad_norm": 1.5110591509322562, "learning_rate": 2.2756359116893122e-06, "loss": 0.5604082345962524, "step": 6517 }, { "epoch": 1.5843461351482742, "grad_norm": 1.5351104441251124, "learning_rate": 2.273083822418608e-06, "loss": 0.5119533538818359, "step": 6518 }, { "epoch": 1.5845892075838601, "grad_norm": 1.7214975212825028, "learning_rate": 2.270532981499268e-06, "loss": 0.6604741811752319, "step": 6519 }, { "epoch": 1.5848322800194459, "grad_norm": 1.5050813982877853, "learning_rate": 2.267983389343397e-06, "loss": 0.4929693341255188, "step": 6520 }, { "epoch": 1.5850753524550316, "grad_norm": 1.465421982579449, "learning_rate": 2.2654350463629094e-06, "loss": 0.5146420001983643, "step": 6521 }, { "epoch": 1.5853184248906174, "grad_norm": 1.5824673698255078, "learning_rate": 2.262887952969508e-06, "loss": 0.5702130794525146, "step": 6522 }, { "epoch": 1.585561497326203, "grad_norm": 1.6118095841158684, "learning_rate": 2.2603421095747025e-06, "loss": 0.6489968299865723, "step": 6523 }, { "epoch": 1.5858045697617889, "grad_norm": 1.6718295927092277, "learning_rate": 2.2577975165897913e-06, "loss": 0.5392205119132996, "step": 6524 }, { "epoch": 1.5860476421973748, "grad_norm": 1.5609447336819549, "learning_rate": 2.2552541744258816e-06, "loss": 0.5423364639282227, "step": 6525 }, { "epoch": 1.5862907146329608, "grad_norm": 1.4670763296965814, "learning_rate": 2.2527120834938663e-06, "loss": 0.6265590786933899, "step": 6526 }, { "epoch": 1.5865337870685465, "grad_norm": 1.5969059210233187, "learning_rate": 2.2501712442044466e-06, "loss": 0.517956018447876, "step": 6527 }, { "epoch": 1.5867768595041323, "grad_norm": 1.6833426398776252, "learning_rate": 2.2476316569681188e-06, "loss": 0.5731427669525146, "step": 6528 }, { "epoch": 1.587019931939718, "grad_norm": 1.4394367570590738, "learning_rate": 2.245093322195171e-06, "loss": 0.5446950793266296, "step": 6529 }, { "epoch": 1.5872630043753038, "grad_norm": 1.5068081890110459, "learning_rate": 2.242556240295699e-06, "loss": 0.5351118445396423, "step": 6530 }, { "epoch": 1.5875060768108895, "grad_norm": 1.5247116648939731, "learning_rate": 2.2400204116795853e-06, "loss": 0.563130259513855, "step": 6531 }, { "epoch": 1.5877491492464755, "grad_norm": 1.4662977287074643, "learning_rate": 2.23748583675652e-06, "loss": 0.4440326690673828, "step": 6532 }, { "epoch": 1.5879922216820612, "grad_norm": 1.8492068884508563, "learning_rate": 2.234952515935982e-06, "loss": 0.48401281237602234, "step": 6533 }, { "epoch": 1.5882352941176472, "grad_norm": 1.6299206723005366, "learning_rate": 2.232420449627253e-06, "loss": 0.5484923124313354, "step": 6534 }, { "epoch": 1.588478366553233, "grad_norm": 1.4930002839911813, "learning_rate": 2.229889638239414e-06, "loss": 0.5963430404663086, "step": 6535 }, { "epoch": 1.5887214389888187, "grad_norm": 1.5372785272553042, "learning_rate": 2.2273600821813344e-06, "loss": 0.481428325176239, "step": 6536 }, { "epoch": 1.5889645114244044, "grad_norm": 1.969631300258035, "learning_rate": 2.2248317818616915e-06, "loss": 0.7245990037918091, "step": 6537 }, { "epoch": 1.5892075838599902, "grad_norm": 1.5056950070105608, "learning_rate": 2.2223047376889482e-06, "loss": 0.3609062433242798, "step": 6538 }, { "epoch": 1.5894506562955761, "grad_norm": 1.6846777773986383, "learning_rate": 2.2197789500713786e-06, "loss": 0.5384387969970703, "step": 6539 }, { "epoch": 1.5896937287311619, "grad_norm": 1.599480050131968, "learning_rate": 2.2172544194170374e-06, "loss": 0.526813268661499, "step": 6540 }, { "epoch": 1.5899368011667478, "grad_norm": 1.6864429708477773, "learning_rate": 2.214731146133793e-06, "loss": 0.6285924911499023, "step": 6541 }, { "epoch": 1.5901798736023336, "grad_norm": 1.6053991214518948, "learning_rate": 2.2122091306292926e-06, "loss": 0.6269700527191162, "step": 6542 }, { "epoch": 1.5904229460379193, "grad_norm": 1.618075514517955, "learning_rate": 2.2096883733110007e-06, "loss": 0.4668845534324646, "step": 6543 }, { "epoch": 1.590666018473505, "grad_norm": 1.7225026648606463, "learning_rate": 2.207168874586164e-06, "loss": 0.6521778106689453, "step": 6544 }, { "epoch": 1.5909090909090908, "grad_norm": 1.8910030135834868, "learning_rate": 2.2046506348618256e-06, "loss": 0.6526474952697754, "step": 6545 }, { "epoch": 1.5911521633446766, "grad_norm": 1.7948757162873343, "learning_rate": 2.2021336545448357e-06, "loss": 0.4490090310573578, "step": 6546 }, { "epoch": 1.5913952357802625, "grad_norm": 1.5670709081641438, "learning_rate": 2.199617934041828e-06, "loss": 0.6244315505027771, "step": 6547 }, { "epoch": 1.5916383082158483, "grad_norm": 1.5880597513225476, "learning_rate": 2.1971034737592455e-06, "loss": 0.7088150978088379, "step": 6548 }, { "epoch": 1.5918813806514343, "grad_norm": 1.8379527879847397, "learning_rate": 2.1945902741033154e-06, "loss": 0.626957356929779, "step": 6549 }, { "epoch": 1.59212445308702, "grad_norm": 1.7766539639506354, "learning_rate": 2.1920783354800746e-06, "loss": 0.482730507850647, "step": 6550 }, { "epoch": 1.5923675255226057, "grad_norm": 1.6071098071691867, "learning_rate": 2.1895676582953375e-06, "loss": 0.5455487966537476, "step": 6551 }, { "epoch": 1.5926105979581915, "grad_norm": 1.7181370898625952, "learning_rate": 2.1870582429547405e-06, "loss": 0.5804214477539062, "step": 6552 }, { "epoch": 1.5928536703937772, "grad_norm": 1.5332392308106884, "learning_rate": 2.1845500898636927e-06, "loss": 0.6370018720626831, "step": 6553 }, { "epoch": 1.5930967428293632, "grad_norm": 1.721408853664116, "learning_rate": 2.1820431994274096e-06, "loss": 0.7116737365722656, "step": 6554 }, { "epoch": 1.593339815264949, "grad_norm": 1.6578512365545344, "learning_rate": 2.179537572050903e-06, "loss": 0.46383386850357056, "step": 6555 }, { "epoch": 1.593582887700535, "grad_norm": 1.5009267759521967, "learning_rate": 2.1770332081389768e-06, "loss": 0.5890370607376099, "step": 6556 }, { "epoch": 1.5938259601361207, "grad_norm": 1.5996150155455764, "learning_rate": 2.1745301080962357e-06, "loss": 0.5405737161636353, "step": 6557 }, { "epoch": 1.5940690325717064, "grad_norm": 1.617429486913466, "learning_rate": 2.1720282723270748e-06, "loss": 0.5319201946258545, "step": 6558 }, { "epoch": 1.5943121050072921, "grad_norm": 1.5502509678686172, "learning_rate": 2.1695277012356888e-06, "loss": 0.5744831562042236, "step": 6559 }, { "epoch": 1.594555177442878, "grad_norm": 1.912310557722667, "learning_rate": 2.1670283952260663e-06, "loss": 0.43980348110198975, "step": 6560 }, { "epoch": 1.5947982498784636, "grad_norm": 1.4940955185758806, "learning_rate": 2.1645303547019967e-06, "loss": 0.6222119331359863, "step": 6561 }, { "epoch": 1.5950413223140496, "grad_norm": 1.5173064438031638, "learning_rate": 2.1620335800670567e-06, "loss": 0.46449702978134155, "step": 6562 }, { "epoch": 1.5952843947496353, "grad_norm": 1.5485067448459535, "learning_rate": 2.1595380717246206e-06, "loss": 0.5769492387771606, "step": 6563 }, { "epoch": 1.5955274671852213, "grad_norm": 1.4152831548647449, "learning_rate": 2.157043830077864e-06, "loss": 0.48981067538261414, "step": 6564 }, { "epoch": 1.595770539620807, "grad_norm": 1.6446080186486975, "learning_rate": 2.1545508555297478e-06, "loss": 0.4047904312610626, "step": 6565 }, { "epoch": 1.5960136120563928, "grad_norm": 1.9204510508409571, "learning_rate": 2.152059148483041e-06, "loss": 0.6674093008041382, "step": 6566 }, { "epoch": 1.5962566844919786, "grad_norm": 1.545815342307272, "learning_rate": 2.1495687093402904e-06, "loss": 0.5146921277046204, "step": 6567 }, { "epoch": 1.5964997569275643, "grad_norm": 1.7649525822620915, "learning_rate": 2.1470795385038625e-06, "loss": 0.6340919733047485, "step": 6568 }, { "epoch": 1.5967428293631503, "grad_norm": 1.7010583436139883, "learning_rate": 2.1445916363758932e-06, "loss": 0.5629911422729492, "step": 6569 }, { "epoch": 1.596985901798736, "grad_norm": 1.636192920405052, "learning_rate": 2.1421050033583333e-06, "loss": 0.5543312430381775, "step": 6570 }, { "epoch": 1.597228974234322, "grad_norm": 1.4523509859838855, "learning_rate": 2.1396196398529156e-06, "loss": 0.5133811831474304, "step": 6571 }, { "epoch": 1.5974720466699077, "grad_norm": 1.586258312730971, "learning_rate": 2.137135546261171e-06, "loss": 0.599245548248291, "step": 6572 }, { "epoch": 1.5977151191054935, "grad_norm": 1.5344751950481677, "learning_rate": 2.1346527229844305e-06, "loss": 0.46161210536956787, "step": 6573 }, { "epoch": 1.5979581915410792, "grad_norm": 1.636312697229632, "learning_rate": 2.1321711704238124e-06, "loss": 0.5803316831588745, "step": 6574 }, { "epoch": 1.598201263976665, "grad_norm": 1.6263861702056837, "learning_rate": 2.1296908889802358e-06, "loss": 0.4859824776649475, "step": 6575 }, { "epoch": 1.5984443364122507, "grad_norm": 1.5298164730001855, "learning_rate": 2.1272118790544106e-06, "loss": 0.570063054561615, "step": 6576 }, { "epoch": 1.5986874088478367, "grad_norm": 1.5655265767570608, "learning_rate": 2.124734141046847e-06, "loss": 0.5970048308372498, "step": 6577 }, { "epoch": 1.5989304812834224, "grad_norm": 1.7868598975624541, "learning_rate": 2.1222576753578395e-06, "loss": 0.6604928374290466, "step": 6578 }, { "epoch": 1.5991735537190084, "grad_norm": 1.4644923569189587, "learning_rate": 2.119782482387488e-06, "loss": 0.4546463191509247, "step": 6579 }, { "epoch": 1.5994166261545941, "grad_norm": 1.6755435940614845, "learning_rate": 2.117308562535676e-06, "loss": 0.5551490783691406, "step": 6580 }, { "epoch": 1.5996596985901799, "grad_norm": 1.824471235474142, "learning_rate": 2.114835916202094e-06, "loss": 0.5333787202835083, "step": 6581 }, { "epoch": 1.5999027710257656, "grad_norm": 1.6786953947194345, "learning_rate": 2.112364543786215e-06, "loss": 0.6628751754760742, "step": 6582 }, { "epoch": 1.6001458434613514, "grad_norm": 1.4800851721408852, "learning_rate": 2.109894445687307e-06, "loss": 0.45271092653274536, "step": 6583 }, { "epoch": 1.6003889158969373, "grad_norm": 1.6359262603797753, "learning_rate": 2.1074256223044464e-06, "loss": 0.43935859203338623, "step": 6584 }, { "epoch": 1.600631988332523, "grad_norm": 1.875341610807873, "learning_rate": 2.1049580740364863e-06, "loss": 0.56866455078125, "step": 6585 }, { "epoch": 1.600875060768109, "grad_norm": 1.5088745569124273, "learning_rate": 2.102491801282085e-06, "loss": 0.5522898435592651, "step": 6586 }, { "epoch": 1.6011181332036948, "grad_norm": 1.3623836645650516, "learning_rate": 2.1000268044396843e-06, "loss": 0.5075643062591553, "step": 6587 }, { "epoch": 1.6013612056392805, "grad_norm": 1.5817664206624193, "learning_rate": 2.0975630839075345e-06, "loss": 0.5496433973312378, "step": 6588 }, { "epoch": 1.6016042780748663, "grad_norm": 1.389487521382978, "learning_rate": 2.095100640083664e-06, "loss": 0.4283601939678192, "step": 6589 }, { "epoch": 1.601847350510452, "grad_norm": 1.597763737643725, "learning_rate": 2.0926394733659072e-06, "loss": 0.5170869827270508, "step": 6590 }, { "epoch": 1.6020904229460378, "grad_norm": 1.559400651678212, "learning_rate": 2.0901795841518844e-06, "loss": 0.4364579916000366, "step": 6591 }, { "epoch": 1.6023334953816237, "grad_norm": 1.6838506399782078, "learning_rate": 2.08772097283901e-06, "loss": 0.47166407108306885, "step": 6592 }, { "epoch": 1.6025765678172095, "grad_norm": 1.6032480163488207, "learning_rate": 2.085263639824502e-06, "loss": 0.5194869637489319, "step": 6593 }, { "epoch": 1.6028196402527954, "grad_norm": 1.6346357543220877, "learning_rate": 2.0828075855053565e-06, "loss": 0.4855474531650543, "step": 6594 }, { "epoch": 1.6030627126883812, "grad_norm": 1.772534541066963, "learning_rate": 2.0803528102783775e-06, "loss": 0.5985019207000732, "step": 6595 }, { "epoch": 1.603305785123967, "grad_norm": 1.540832904615958, "learning_rate": 2.0778993145401494e-06, "loss": 0.4777887463569641, "step": 6596 }, { "epoch": 1.6035488575595527, "grad_norm": 1.909213872080703, "learning_rate": 2.0754470986870602e-06, "loss": 0.6513022184371948, "step": 6597 }, { "epoch": 1.6037919299951384, "grad_norm": 1.77618513243968, "learning_rate": 2.0729961631152838e-06, "loss": 0.5765270590782166, "step": 6598 }, { "epoch": 1.6040350024307244, "grad_norm": 1.5949363606452036, "learning_rate": 2.0705465082207944e-06, "loss": 0.5667819976806641, "step": 6599 }, { "epoch": 1.6042780748663101, "grad_norm": 2.160622456713569, "learning_rate": 2.06809813439935e-06, "loss": 0.5744008421897888, "step": 6600 }, { "epoch": 1.604521147301896, "grad_norm": 1.7584601489166487, "learning_rate": 2.06565104204651e-06, "loss": 0.4991294741630554, "step": 6601 }, { "epoch": 1.6047642197374818, "grad_norm": 1.5579064059326477, "learning_rate": 2.063205231557628e-06, "loss": 0.4542378783226013, "step": 6602 }, { "epoch": 1.6050072921730676, "grad_norm": 1.8533868567603904, "learning_rate": 2.0607607033278374e-06, "loss": 0.6294225454330444, "step": 6603 }, { "epoch": 1.6052503646086533, "grad_norm": 1.531428826684572, "learning_rate": 2.0583174577520805e-06, "loss": 0.6616170406341553, "step": 6604 }, { "epoch": 1.605493437044239, "grad_norm": 1.8305050962449025, "learning_rate": 2.05587549522508e-06, "loss": 0.5698629021644592, "step": 6605 }, { "epoch": 1.6057365094798248, "grad_norm": 1.7874024889093125, "learning_rate": 2.053434816141362e-06, "loss": 0.541780948638916, "step": 6606 }, { "epoch": 1.6059795819154108, "grad_norm": 1.7568563103618982, "learning_rate": 2.050995420895233e-06, "loss": 0.604229211807251, "step": 6607 }, { "epoch": 1.6062226543509968, "grad_norm": 1.6789282560123517, "learning_rate": 2.048557309880803e-06, "loss": 0.7717465162277222, "step": 6608 }, { "epoch": 1.6064657267865825, "grad_norm": 1.8863604353994217, "learning_rate": 2.0461204834919725e-06, "loss": 0.5760974884033203, "step": 6609 }, { "epoch": 1.6067087992221682, "grad_norm": 1.6155904315911644, "learning_rate": 2.043684942122427e-06, "loss": 0.4807845652103424, "step": 6610 }, { "epoch": 1.606951871657754, "grad_norm": 1.6988499844753921, "learning_rate": 2.0412506861656533e-06, "loss": 0.5877646207809448, "step": 6611 }, { "epoch": 1.6071949440933397, "grad_norm": 1.6737327607736516, "learning_rate": 2.0388177160149236e-06, "loss": 0.7401211261749268, "step": 6612 }, { "epoch": 1.6074380165289255, "grad_norm": 1.7750310201628694, "learning_rate": 2.036386032063311e-06, "loss": 0.6217203140258789, "step": 6613 }, { "epoch": 1.6076810889645115, "grad_norm": 1.554781420309189, "learning_rate": 2.0339556347036694e-06, "loss": 0.5621060132980347, "step": 6614 }, { "epoch": 1.6079241614000972, "grad_norm": 1.8252351298615772, "learning_rate": 2.0315265243286564e-06, "loss": 0.5921005606651306, "step": 6615 }, { "epoch": 1.6081672338356832, "grad_norm": 1.8385582646167469, "learning_rate": 2.0290987013307096e-06, "loss": 0.4658951759338379, "step": 6616 }, { "epoch": 1.608410306271269, "grad_norm": 2.054142787890137, "learning_rate": 2.0266721661020705e-06, "loss": 0.4512670040130615, "step": 6617 }, { "epoch": 1.6086533787068547, "grad_norm": 1.7983389168167117, "learning_rate": 2.024246919034767e-06, "loss": 0.5109896659851074, "step": 6618 }, { "epoch": 1.6088964511424404, "grad_norm": 1.6762695989220153, "learning_rate": 2.0218229605206162e-06, "loss": 0.5486649870872498, "step": 6619 }, { "epoch": 1.6091395235780261, "grad_norm": 1.8723311335821, "learning_rate": 2.0194002909512346e-06, "loss": 0.7431430220603943, "step": 6620 }, { "epoch": 1.609382596013612, "grad_norm": 1.7449983913933105, "learning_rate": 2.0169789107180195e-06, "loss": 0.6085963249206543, "step": 6621 }, { "epoch": 1.6096256684491979, "grad_norm": 1.682015532590523, "learning_rate": 2.0145588202121723e-06, "loss": 0.45453399419784546, "step": 6622 }, { "epoch": 1.6098687408847838, "grad_norm": 1.6065280511579618, "learning_rate": 2.0121400198246744e-06, "loss": 0.5788406729698181, "step": 6623 }, { "epoch": 1.6101118133203696, "grad_norm": 1.6687539110818745, "learning_rate": 2.009722509946308e-06, "loss": 0.5859256982803345, "step": 6624 }, { "epoch": 1.6103548857559553, "grad_norm": 1.5844527804752813, "learning_rate": 2.0073062909676432e-06, "loss": 0.5210015773773193, "step": 6625 }, { "epoch": 1.610597958191541, "grad_norm": 1.8490113203642804, "learning_rate": 2.004891363279039e-06, "loss": 0.6299726963043213, "step": 6626 }, { "epoch": 1.6108410306271268, "grad_norm": 1.8402680724733793, "learning_rate": 2.002477727270652e-06, "loss": 0.5011122822761536, "step": 6627 }, { "epoch": 1.6110841030627125, "grad_norm": 1.6524678326944742, "learning_rate": 2.0000653833324223e-06, "loss": 0.5439075827598572, "step": 6628 }, { "epoch": 1.6113271754982985, "grad_norm": 1.7040973283122698, "learning_rate": 1.9976543318540887e-06, "loss": 0.454270601272583, "step": 6629 }, { "epoch": 1.6115702479338843, "grad_norm": 1.4310723990775465, "learning_rate": 1.9952445732251733e-06, "loss": 0.4934217929840088, "step": 6630 }, { "epoch": 1.6118133203694702, "grad_norm": 1.6981684604615743, "learning_rate": 1.992836107835e-06, "loss": 0.7206728458404541, "step": 6631 }, { "epoch": 1.612056392805056, "grad_norm": 1.924122649807708, "learning_rate": 1.9904289360726716e-06, "loss": 0.5483721494674683, "step": 6632 }, { "epoch": 1.6122994652406417, "grad_norm": 1.7428045726245636, "learning_rate": 1.9880230583270897e-06, "loss": 0.5039916634559631, "step": 6633 }, { "epoch": 1.6125425376762275, "grad_norm": 1.8923970856702523, "learning_rate": 1.98561847498695e-06, "loss": 0.6559092402458191, "step": 6634 }, { "epoch": 1.6127856101118132, "grad_norm": 1.8651913968991376, "learning_rate": 1.9832151864407266e-06, "loss": 0.6562027931213379, "step": 6635 }, { "epoch": 1.6130286825473992, "grad_norm": 1.4142521226161557, "learning_rate": 1.9808131930766985e-06, "loss": 0.5387916564941406, "step": 6636 }, { "epoch": 1.613271754982985, "grad_norm": 1.7347663043918933, "learning_rate": 1.978412495282922e-06, "loss": 0.6433638334274292, "step": 6637 }, { "epoch": 1.6135148274185709, "grad_norm": 1.6531811008018777, "learning_rate": 1.9760130934472577e-06, "loss": 0.37293750047683716, "step": 6638 }, { "epoch": 1.6137578998541566, "grad_norm": 1.4080696265129584, "learning_rate": 1.9736149879573453e-06, "loss": 0.5537489056587219, "step": 6639 }, { "epoch": 1.6140009722897424, "grad_norm": 1.6868275908240693, "learning_rate": 1.9712181792006234e-06, "loss": 0.5540642738342285, "step": 6640 }, { "epoch": 1.6142440447253281, "grad_norm": 1.3891800275298538, "learning_rate": 1.968822667564314e-06, "loss": 0.5542909502983093, "step": 6641 }, { "epoch": 1.6144871171609139, "grad_norm": 1.6362035335354657, "learning_rate": 1.966428453435434e-06, "loss": 0.5497745275497437, "step": 6642 }, { "epoch": 1.6147301895964996, "grad_norm": 1.6650180154161747, "learning_rate": 1.9640355372007945e-06, "loss": 0.4713360071182251, "step": 6643 }, { "epoch": 1.6149732620320856, "grad_norm": 1.597525715606267, "learning_rate": 1.961643919246985e-06, "loss": 0.7155966758728027, "step": 6644 }, { "epoch": 1.6152163344676713, "grad_norm": 1.5622739433348378, "learning_rate": 1.959253599960399e-06, "loss": 0.5566350221633911, "step": 6645 }, { "epoch": 1.6154594069032573, "grad_norm": 1.8110089362073605, "learning_rate": 1.956864579727207e-06, "loss": 0.7278127670288086, "step": 6646 }, { "epoch": 1.615702479338843, "grad_norm": 1.745008788854271, "learning_rate": 1.954476858933383e-06, "loss": 0.610866129398346, "step": 6647 }, { "epoch": 1.6159455517744288, "grad_norm": 1.4986115189181302, "learning_rate": 1.952090437964679e-06, "loss": 0.5415172576904297, "step": 6648 }, { "epoch": 1.6161886242100145, "grad_norm": 1.5574130126559496, "learning_rate": 1.9497053172066437e-06, "loss": 0.5683257579803467, "step": 6649 }, { "epoch": 1.6164316966456003, "grad_norm": 1.7572132945379895, "learning_rate": 1.9473214970446153e-06, "loss": 0.5748674273490906, "step": 6650 }, { "epoch": 1.6166747690811862, "grad_norm": 1.769799502760278, "learning_rate": 1.944938977863723e-06, "loss": 0.5411278009414673, "step": 6651 }, { "epoch": 1.616917841516772, "grad_norm": 1.7799381930665794, "learning_rate": 1.942557760048882e-06, "loss": 0.5169808268547058, "step": 6652 }, { "epoch": 1.617160913952358, "grad_norm": 1.5203953586657601, "learning_rate": 1.940177843984795e-06, "loss": 0.4408758282661438, "step": 6653 }, { "epoch": 1.6174039863879437, "grad_norm": 1.7633009759179232, "learning_rate": 1.9377992300559654e-06, "loss": 0.5851919054985046, "step": 6654 }, { "epoch": 1.6176470588235294, "grad_norm": 1.7137011609614954, "learning_rate": 1.9354219186466726e-06, "loss": 0.5942158699035645, "step": 6655 }, { "epoch": 1.6178901312591152, "grad_norm": 1.617554440836706, "learning_rate": 1.9330459101409982e-06, "loss": 0.5013660192489624, "step": 6656 }, { "epoch": 1.618133203694701, "grad_norm": 1.9594992864508567, "learning_rate": 1.9306712049227993e-06, "loss": 0.5832997560501099, "step": 6657 }, { "epoch": 1.6183762761302867, "grad_norm": 1.6540438054617457, "learning_rate": 1.9282978033757404e-06, "loss": 0.6430975198745728, "step": 6658 }, { "epoch": 1.6186193485658726, "grad_norm": 1.8725744935360429, "learning_rate": 1.925925705883258e-06, "loss": 0.5569758415222168, "step": 6659 }, { "epoch": 1.6188624210014584, "grad_norm": 1.5636790219970627, "learning_rate": 1.92355491282859e-06, "loss": 0.5148023366928101, "step": 6660 }, { "epoch": 1.6191054934370444, "grad_norm": 1.614727961441777, "learning_rate": 1.921185424594758e-06, "loss": 0.519223690032959, "step": 6661 }, { "epoch": 1.61934856587263, "grad_norm": 1.663599184356793, "learning_rate": 1.918817241564569e-06, "loss": 0.5354150533676147, "step": 6662 }, { "epoch": 1.6195916383082158, "grad_norm": 2.299398437386372, "learning_rate": 1.91645036412063e-06, "loss": 0.6754683256149292, "step": 6663 }, { "epoch": 1.6198347107438016, "grad_norm": 1.4658021106045722, "learning_rate": 1.9140847926453264e-06, "loss": 0.62517249584198, "step": 6664 }, { "epoch": 1.6200777831793873, "grad_norm": 2.059881555592222, "learning_rate": 1.9117205275208383e-06, "loss": 0.5757490396499634, "step": 6665 }, { "epoch": 1.6203208556149733, "grad_norm": 1.8382707094604982, "learning_rate": 1.9093575691291344e-06, "loss": 0.6277372241020203, "step": 6666 }, { "epoch": 1.620563928050559, "grad_norm": 1.7036850808617792, "learning_rate": 1.9069959178519737e-06, "loss": 0.6139748692512512, "step": 6667 }, { "epoch": 1.620807000486145, "grad_norm": 1.6311713740903535, "learning_rate": 1.9046355740708967e-06, "loss": 0.6353753805160522, "step": 6668 }, { "epoch": 1.6210500729217308, "grad_norm": 1.6839778698868046, "learning_rate": 1.9022765381672426e-06, "loss": 0.47249361872673035, "step": 6669 }, { "epoch": 1.6212931453573165, "grad_norm": 1.560435073439614, "learning_rate": 1.8999188105221321e-06, "loss": 0.36186498403549194, "step": 6670 }, { "epoch": 1.6215362177929022, "grad_norm": 1.4282815449010724, "learning_rate": 1.8975623915164753e-06, "loss": 0.4457515478134155, "step": 6671 }, { "epoch": 1.621779290228488, "grad_norm": 1.7058831848761995, "learning_rate": 1.8952072815309763e-06, "loss": 0.4869554936885834, "step": 6672 }, { "epoch": 1.6220223626640737, "grad_norm": 1.470477976898109, "learning_rate": 1.8928534809461186e-06, "loss": 0.6744360327720642, "step": 6673 }, { "epoch": 1.6222654350996597, "grad_norm": 1.6219461035781488, "learning_rate": 1.8905009901421822e-06, "loss": 0.5601799488067627, "step": 6674 }, { "epoch": 1.6225085075352454, "grad_norm": 1.566420671532256, "learning_rate": 1.8881498094992335e-06, "loss": 0.4626604914665222, "step": 6675 }, { "epoch": 1.6227515799708314, "grad_norm": 1.530057560531051, "learning_rate": 1.8857999393971283e-06, "loss": 0.4940527677536011, "step": 6676 }, { "epoch": 1.6229946524064172, "grad_norm": 1.8191303646810173, "learning_rate": 1.883451380215503e-06, "loss": 0.5270865559577942, "step": 6677 }, { "epoch": 1.623237724842003, "grad_norm": 1.7919140627143229, "learning_rate": 1.8811041323337954e-06, "loss": 0.5700557231903076, "step": 6678 }, { "epoch": 1.6234807972775886, "grad_norm": 1.8029978612385589, "learning_rate": 1.8787581961312163e-06, "loss": 0.6131875514984131, "step": 6679 }, { "epoch": 1.6237238697131744, "grad_norm": 1.652461169068027, "learning_rate": 1.8764135719867793e-06, "loss": 0.6308377981185913, "step": 6680 }, { "epoch": 1.6239669421487604, "grad_norm": 1.3402656878786035, "learning_rate": 1.874070260279276e-06, "loss": 0.4026739001274109, "step": 6681 }, { "epoch": 1.624210014584346, "grad_norm": 1.4072524324121232, "learning_rate": 1.8717282613872833e-06, "loss": 0.5455447435379028, "step": 6682 }, { "epoch": 1.624453087019932, "grad_norm": 1.4617719404374434, "learning_rate": 1.8693875756891833e-06, "loss": 0.5172713994979858, "step": 6683 }, { "epoch": 1.6246961594555178, "grad_norm": 1.4227023699866286, "learning_rate": 1.8670482035631254e-06, "loss": 0.5389741659164429, "step": 6684 }, { "epoch": 1.6249392318911036, "grad_norm": 2.025554626379306, "learning_rate": 1.8647101453870608e-06, "loss": 0.6324977874755859, "step": 6685 }, { "epoch": 1.6251823043266893, "grad_norm": 1.5387150845900932, "learning_rate": 1.8623734015387196e-06, "loss": 0.4594108462333679, "step": 6686 }, { "epoch": 1.625425376762275, "grad_norm": 1.5164456127101291, "learning_rate": 1.8600379723956286e-06, "loss": 0.5418657660484314, "step": 6687 }, { "epoch": 1.6256684491978608, "grad_norm": 1.8094283663812751, "learning_rate": 1.8577038583350903e-06, "loss": 0.5064224004745483, "step": 6688 }, { "epoch": 1.6259115216334468, "grad_norm": 1.5960393429668278, "learning_rate": 1.8553710597342067e-06, "loss": 0.4601883590221405, "step": 6689 }, { "epoch": 1.6261545940690327, "grad_norm": 2.1154190872537666, "learning_rate": 1.8530395769698572e-06, "loss": 0.540178656578064, "step": 6690 }, { "epoch": 1.6263976665046185, "grad_norm": 1.4764909494143008, "learning_rate": 1.8507094104187174e-06, "loss": 0.5847306251525879, "step": 6691 }, { "epoch": 1.6266407389402042, "grad_norm": 1.4528968337707813, "learning_rate": 1.8483805604572457e-06, "loss": 0.531794011592865, "step": 6692 }, { "epoch": 1.62688381137579, "grad_norm": 1.5313233608671795, "learning_rate": 1.846053027461686e-06, "loss": 0.6061419248580933, "step": 6693 }, { "epoch": 1.6271268838113757, "grad_norm": 1.4407770467407686, "learning_rate": 1.8437268118080753e-06, "loss": 0.636238694190979, "step": 6694 }, { "epoch": 1.6273699562469615, "grad_norm": 1.5780098004775927, "learning_rate": 1.8414019138722306e-06, "loss": 0.5509511232376099, "step": 6695 }, { "epoch": 1.6276130286825474, "grad_norm": 1.5950377075639026, "learning_rate": 1.8390783340297625e-06, "loss": 0.5760771036148071, "step": 6696 }, { "epoch": 1.6278561011181332, "grad_norm": 1.5562239613460553, "learning_rate": 1.8367560726560628e-06, "loss": 0.5693955421447754, "step": 6697 }, { "epoch": 1.6280991735537191, "grad_norm": 2.5267746911049267, "learning_rate": 1.8344351301263152e-06, "loss": 0.6527481079101562, "step": 6698 }, { "epoch": 1.6283422459893049, "grad_norm": 1.649829427906989, "learning_rate": 1.8321155068154906e-06, "loss": 0.4819604456424713, "step": 6699 }, { "epoch": 1.6285853184248906, "grad_norm": 1.349501854085777, "learning_rate": 1.829797203098339e-06, "loss": 0.4151954650878906, "step": 6700 }, { "epoch": 1.6288283908604764, "grad_norm": 1.581932132554141, "learning_rate": 1.827480219349409e-06, "loss": 0.4253559112548828, "step": 6701 }, { "epoch": 1.6290714632960621, "grad_norm": 1.7224409683593345, "learning_rate": 1.8251645559430242e-06, "loss": 0.5436816215515137, "step": 6702 }, { "epoch": 1.629314535731648, "grad_norm": 1.5004331307502654, "learning_rate": 1.8228502132533055e-06, "loss": 0.6574311852455139, "step": 6703 }, { "epoch": 1.6295576081672338, "grad_norm": 1.7259074092628917, "learning_rate": 1.8205371916541492e-06, "loss": 0.5513980388641357, "step": 6704 }, { "epoch": 1.6298006806028198, "grad_norm": 1.6185230354972004, "learning_rate": 1.818225491519252e-06, "loss": 0.42551395297050476, "step": 6705 }, { "epoch": 1.6300437530384055, "grad_norm": 1.9629634453238558, "learning_rate": 1.815915113222082e-06, "loss": 0.5910829901695251, "step": 6706 }, { "epoch": 1.6302868254739913, "grad_norm": 1.6460814842481997, "learning_rate": 1.8136060571359038e-06, "loss": 0.5254735946655273, "step": 6707 }, { "epoch": 1.630529897909577, "grad_norm": 1.5622218970036306, "learning_rate": 1.81129832363377e-06, "loss": 0.5693899393081665, "step": 6708 }, { "epoch": 1.6307729703451628, "grad_norm": 1.519197606036422, "learning_rate": 1.8089919130885081e-06, "loss": 0.543082594871521, "step": 6709 }, { "epoch": 1.6310160427807485, "grad_norm": 1.6099196110277467, "learning_rate": 1.8066868258727454e-06, "loss": 0.5031783580780029, "step": 6710 }, { "epoch": 1.6312591152163345, "grad_norm": 2.1412425413684546, "learning_rate": 1.8043830623588843e-06, "loss": 0.841117262840271, "step": 6711 }, { "epoch": 1.6315021876519202, "grad_norm": 1.5905483089780845, "learning_rate": 1.8020806229191223e-06, "loss": 0.48015105724334717, "step": 6712 }, { "epoch": 1.6317452600875062, "grad_norm": 1.7496766291836734, "learning_rate": 1.7997795079254343e-06, "loss": 0.5668493509292603, "step": 6713 }, { "epoch": 1.631988332523092, "grad_norm": 1.7365885769344653, "learning_rate": 1.7974797177495896e-06, "loss": 0.5761511325836182, "step": 6714 }, { "epoch": 1.6322314049586777, "grad_norm": 1.6254305417747839, "learning_rate": 1.7951812527631362e-06, "loss": 0.5026800036430359, "step": 6715 }, { "epoch": 1.6324744773942634, "grad_norm": 1.5416371171995822, "learning_rate": 1.792884113337412e-06, "loss": 0.6078391671180725, "step": 6716 }, { "epoch": 1.6327175498298492, "grad_norm": 1.7842208032259301, "learning_rate": 1.7905882998435443e-06, "loss": 0.6261169910430908, "step": 6717 }, { "epoch": 1.6329606222654351, "grad_norm": 1.8193593086389221, "learning_rate": 1.7882938126524363e-06, "loss": 0.6803112030029297, "step": 6718 }, { "epoch": 1.633203694701021, "grad_norm": 1.7812306109850389, "learning_rate": 1.7860006521347873e-06, "loss": 0.587918758392334, "step": 6719 }, { "epoch": 1.6334467671366069, "grad_norm": 2.1285116489738667, "learning_rate": 1.783708818661074e-06, "loss": 0.5546055436134338, "step": 6720 }, { "epoch": 1.6336898395721926, "grad_norm": 1.7152332942557484, "learning_rate": 1.7814183126015649e-06, "loss": 0.6159456372261047, "step": 6721 }, { "epoch": 1.6339329120077783, "grad_norm": 1.5508157843134853, "learning_rate": 1.7791291343263085e-06, "loss": 0.46767860651016235, "step": 6722 }, { "epoch": 1.634175984443364, "grad_norm": 1.7885433155637525, "learning_rate": 1.776841284205143e-06, "loss": 0.614267885684967, "step": 6723 }, { "epoch": 1.6344190568789498, "grad_norm": 1.523932032834384, "learning_rate": 1.7745547626076931e-06, "loss": 0.5033255219459534, "step": 6724 }, { "epoch": 1.6346621293145356, "grad_norm": 1.7112888980995038, "learning_rate": 1.7722695699033632e-06, "loss": 0.572610080242157, "step": 6725 }, { "epoch": 1.6349052017501216, "grad_norm": 1.668225596329694, "learning_rate": 1.769985706461349e-06, "loss": 0.6161785125732422, "step": 6726 }, { "epoch": 1.6351482741857073, "grad_norm": 1.8001198543567116, "learning_rate": 1.7677031726506256e-06, "loss": 0.79952073097229, "step": 6727 }, { "epoch": 1.6353913466212933, "grad_norm": 1.7881503225194717, "learning_rate": 1.765421968839961e-06, "loss": 0.6637136340141296, "step": 6728 }, { "epoch": 1.635634419056879, "grad_norm": 1.4986846922311516, "learning_rate": 1.7631420953978974e-06, "loss": 0.49292367696762085, "step": 6729 }, { "epoch": 1.6358774914924648, "grad_norm": 1.9327370260659105, "learning_rate": 1.7608635526927754e-06, "loss": 0.6831244230270386, "step": 6730 }, { "epoch": 1.6361205639280505, "grad_norm": 1.6839440125955065, "learning_rate": 1.7585863410927085e-06, "loss": 0.5000861287117004, "step": 6731 }, { "epoch": 1.6363636363636362, "grad_norm": 1.6915985229083281, "learning_rate": 1.756310460965601e-06, "loss": 0.6323164701461792, "step": 6732 }, { "epoch": 1.6366067087992222, "grad_norm": 1.5173946637446116, "learning_rate": 1.754035912679145e-06, "loss": 0.6094322204589844, "step": 6733 }, { "epoch": 1.636849781234808, "grad_norm": 1.7302292741178296, "learning_rate": 1.7517626966008095e-06, "loss": 0.6800304055213928, "step": 6734 }, { "epoch": 1.637092853670394, "grad_norm": 1.5535288026969514, "learning_rate": 1.749490813097856e-06, "loss": 0.5781310200691223, "step": 6735 }, { "epoch": 1.6373359261059797, "grad_norm": 1.6131496979706483, "learning_rate": 1.7472202625373236e-06, "loss": 0.4247131943702698, "step": 6736 }, { "epoch": 1.6375789985415654, "grad_norm": 1.7379260899547222, "learning_rate": 1.744951045286044e-06, "loss": 0.5815620422363281, "step": 6737 }, { "epoch": 1.6378220709771512, "grad_norm": 1.559566843454697, "learning_rate": 1.7426831617106243e-06, "loss": 0.3900752663612366, "step": 6738 }, { "epoch": 1.638065143412737, "grad_norm": 1.530687130278235, "learning_rate": 1.7404166121774646e-06, "loss": 0.7034904956817627, "step": 6739 }, { "epoch": 1.6383082158483226, "grad_norm": 1.5782667171489702, "learning_rate": 1.7381513970527474e-06, "loss": 0.5769414901733398, "step": 6740 }, { "epoch": 1.6385512882839086, "grad_norm": 1.9164450959251944, "learning_rate": 1.7358875167024336e-06, "loss": 0.6816832423210144, "step": 6741 }, { "epoch": 1.6387943607194944, "grad_norm": 1.6123160667138206, "learning_rate": 1.733624971492277e-06, "loss": 0.490073025226593, "step": 6742 }, { "epoch": 1.6390374331550803, "grad_norm": 1.703439838585199, "learning_rate": 1.7313637617878076e-06, "loss": 0.6000887155532837, "step": 6743 }, { "epoch": 1.639280505590666, "grad_norm": 1.5462275181068927, "learning_rate": 1.7291038879543487e-06, "loss": 0.496196448802948, "step": 6744 }, { "epoch": 1.6395235780262518, "grad_norm": 1.7074982189190944, "learning_rate": 1.7268453503569982e-06, "loss": 0.5574652552604675, "step": 6745 }, { "epoch": 1.6397666504618376, "grad_norm": 1.5385440246547741, "learning_rate": 1.7245881493606476e-06, "loss": 0.5046147704124451, "step": 6746 }, { "epoch": 1.6400097228974233, "grad_norm": 1.4497449995128189, "learning_rate": 1.7223322853299618e-06, "loss": 0.5339906215667725, "step": 6747 }, { "epoch": 1.6402527953330093, "grad_norm": 1.6591628374224534, "learning_rate": 1.7200777586293983e-06, "loss": 0.4912545680999756, "step": 6748 }, { "epoch": 1.640495867768595, "grad_norm": 1.6581746003078117, "learning_rate": 1.7178245696231953e-06, "loss": 0.702823281288147, "step": 6749 }, { "epoch": 1.640738940204181, "grad_norm": 1.3634011092667693, "learning_rate": 1.71557271867538e-06, "loss": 0.5311009883880615, "step": 6750 }, { "epoch": 1.6409820126397667, "grad_norm": 1.9983689748422642, "learning_rate": 1.7133222061497534e-06, "loss": 0.6139790415763855, "step": 6751 }, { "epoch": 1.6412250850753525, "grad_norm": 1.6685517158074832, "learning_rate": 1.7110730324099057e-06, "loss": 0.5733411908149719, "step": 6752 }, { "epoch": 1.6414681575109382, "grad_norm": 1.4677892502128091, "learning_rate": 1.7088251978192127e-06, "loss": 0.5966470241546631, "step": 6753 }, { "epoch": 1.641711229946524, "grad_norm": 2.1026713459098696, "learning_rate": 1.7065787027408298e-06, "loss": 0.5635460019111633, "step": 6754 }, { "epoch": 1.6419543023821097, "grad_norm": 1.665652669515399, "learning_rate": 1.704333547537702e-06, "loss": 0.7157745361328125, "step": 6755 }, { "epoch": 1.6421973748176957, "grad_norm": 1.4854113564407592, "learning_rate": 1.702089732572546e-06, "loss": 0.6291275024414062, "step": 6756 }, { "epoch": 1.6424404472532816, "grad_norm": 1.582816916017405, "learning_rate": 1.6998472582078806e-06, "loss": 0.6413069367408752, "step": 6757 }, { "epoch": 1.6426835196888674, "grad_norm": 1.6554991480404777, "learning_rate": 1.6976061248059894e-06, "loss": 0.5047941207885742, "step": 6758 }, { "epoch": 1.6429265921244531, "grad_norm": 1.7034661919254943, "learning_rate": 1.6953663327289516e-06, "loss": 0.6055202484130859, "step": 6759 }, { "epoch": 1.6431696645600389, "grad_norm": 1.8121099468694013, "learning_rate": 1.6931278823386233e-06, "loss": 0.6287364959716797, "step": 6760 }, { "epoch": 1.6434127369956246, "grad_norm": 1.7312364471224706, "learning_rate": 1.690890773996644e-06, "loss": 0.47307801246643066, "step": 6761 }, { "epoch": 1.6436558094312104, "grad_norm": 1.4320704681070187, "learning_rate": 1.6886550080644426e-06, "loss": 0.6820114850997925, "step": 6762 }, { "epoch": 1.6438988818667963, "grad_norm": 1.6559925601730858, "learning_rate": 1.6864205849032223e-06, "loss": 0.5476333498954773, "step": 6763 }, { "epoch": 1.644141954302382, "grad_norm": 1.6312941024392764, "learning_rate": 1.6841875048739753e-06, "loss": 0.42487144470214844, "step": 6764 }, { "epoch": 1.644385026737968, "grad_norm": 1.547763256614197, "learning_rate": 1.6819557683374766e-06, "loss": 0.5830655097961426, "step": 6765 }, { "epoch": 1.6446280991735538, "grad_norm": 1.8099844513032604, "learning_rate": 1.6797253756542843e-06, "loss": 0.6558496952056885, "step": 6766 }, { "epoch": 1.6448711716091395, "grad_norm": 1.6085986969769022, "learning_rate": 1.6774963271847334e-06, "loss": 0.514545738697052, "step": 6767 }, { "epoch": 1.6451142440447253, "grad_norm": 1.690369246455795, "learning_rate": 1.6752686232889527e-06, "loss": 0.603320300579071, "step": 6768 }, { "epoch": 1.645357316480311, "grad_norm": 1.6759914416946433, "learning_rate": 1.6730422643268418e-06, "loss": 0.5794844031333923, "step": 6769 }, { "epoch": 1.6456003889158968, "grad_norm": 1.7082266286542773, "learning_rate": 1.670817250658089e-06, "loss": 0.5503374338150024, "step": 6770 }, { "epoch": 1.6458434613514827, "grad_norm": 1.7505148766000374, "learning_rate": 1.6685935826421685e-06, "loss": 0.5809928178787231, "step": 6771 }, { "epoch": 1.6460865337870687, "grad_norm": 1.676316830149759, "learning_rate": 1.6663712606383263e-06, "loss": 0.644917368888855, "step": 6772 }, { "epoch": 1.6463296062226545, "grad_norm": 1.4459849660050899, "learning_rate": 1.6641502850056078e-06, "loss": 0.46593785285949707, "step": 6773 }, { "epoch": 1.6465726786582402, "grad_norm": 1.5082149490334988, "learning_rate": 1.6619306561028236e-06, "loss": 0.5401439666748047, "step": 6774 }, { "epoch": 1.646815751093826, "grad_norm": 1.4909295716403375, "learning_rate": 1.65971237428858e-06, "loss": 0.6361878514289856, "step": 6775 }, { "epoch": 1.6470588235294117, "grad_norm": 1.8326846362205664, "learning_rate": 1.6574954399212551e-06, "loss": 0.5014734268188477, "step": 6776 }, { "epoch": 1.6473018959649974, "grad_norm": 1.4980615011053513, "learning_rate": 1.6552798533590176e-06, "loss": 0.41770729422569275, "step": 6777 }, { "epoch": 1.6475449684005834, "grad_norm": 1.5424725597671052, "learning_rate": 1.6530656149598134e-06, "loss": 0.49931034445762634, "step": 6778 }, { "epoch": 1.6477880408361691, "grad_norm": 1.7789109585291503, "learning_rate": 1.6508527250813711e-06, "loss": 0.5574613809585571, "step": 6779 }, { "epoch": 1.648031113271755, "grad_norm": 1.660929138401887, "learning_rate": 1.6486411840812032e-06, "loss": 0.4816969633102417, "step": 6780 }, { "epoch": 1.6482741857073409, "grad_norm": 1.591304766061164, "learning_rate": 1.6464309923166033e-06, "loss": 0.5964572429656982, "step": 6781 }, { "epoch": 1.6485172581429266, "grad_norm": 1.699746849774768, "learning_rate": 1.644222150144652e-06, "loss": 0.5806484222412109, "step": 6782 }, { "epoch": 1.6487603305785123, "grad_norm": 1.889285340253048, "learning_rate": 1.6420146579222008e-06, "loss": 0.6022319793701172, "step": 6783 }, { "epoch": 1.649003403014098, "grad_norm": 1.3786468894185133, "learning_rate": 1.639808516005894e-06, "loss": 0.520696759223938, "step": 6784 }, { "epoch": 1.649246475449684, "grad_norm": 1.5616527000339775, "learning_rate": 1.6376037247521493e-06, "loss": 0.6408368945121765, "step": 6785 }, { "epoch": 1.6494895478852698, "grad_norm": 1.5947197839867986, "learning_rate": 1.6354002845171746e-06, "loss": 0.5224895477294922, "step": 6786 }, { "epoch": 1.6497326203208558, "grad_norm": 1.6107271206484937, "learning_rate": 1.6331981956569499e-06, "loss": 0.5514097213745117, "step": 6787 }, { "epoch": 1.6499756927564415, "grad_norm": 1.6950191057974904, "learning_rate": 1.6309974585272482e-06, "loss": 0.501939594745636, "step": 6788 }, { "epoch": 1.6502187651920273, "grad_norm": 1.6074234567383283, "learning_rate": 1.6287980734836118e-06, "loss": 0.6203149557113647, "step": 6789 }, { "epoch": 1.650461837627613, "grad_norm": 1.8425479386201118, "learning_rate": 1.6266000408813742e-06, "loss": 0.5975390076637268, "step": 6790 }, { "epoch": 1.6507049100631987, "grad_norm": 1.8718783928089888, "learning_rate": 1.6244033610756482e-06, "loss": 0.44059503078460693, "step": 6791 }, { "epoch": 1.6509479824987845, "grad_norm": 1.9182708934871486, "learning_rate": 1.622208034421322e-06, "loss": 0.5057539939880371, "step": 6792 }, { "epoch": 1.6511910549343705, "grad_norm": 1.6699873926864865, "learning_rate": 1.6200140612730765e-06, "loss": 0.6093566417694092, "step": 6793 }, { "epoch": 1.6514341273699562, "grad_norm": 1.6090971673672907, "learning_rate": 1.61782144198536e-06, "loss": 0.538998007774353, "step": 6794 }, { "epoch": 1.6516771998055422, "grad_norm": 1.8315000627606943, "learning_rate": 1.615630176912416e-06, "loss": 0.49603259563446045, "step": 6795 }, { "epoch": 1.651920272241128, "grad_norm": 2.0899581600850334, "learning_rate": 1.6134402664082572e-06, "loss": 0.4983118176460266, "step": 6796 }, { "epoch": 1.6521633446767137, "grad_norm": 1.6339848644768646, "learning_rate": 1.6112517108266867e-06, "loss": 0.4502371549606323, "step": 6797 }, { "epoch": 1.6524064171122994, "grad_norm": 1.8445020709798505, "learning_rate": 1.6090645105212854e-06, "loss": 0.6291937232017517, "step": 6798 }, { "epoch": 1.6526494895478852, "grad_norm": 1.6336462724043697, "learning_rate": 1.6068786658454105e-06, "loss": 0.5643644332885742, "step": 6799 }, { "epoch": 1.6528925619834711, "grad_norm": 1.6997106296577766, "learning_rate": 1.6046941771522106e-06, "loss": 0.5648398399353027, "step": 6800 }, { "epoch": 1.6531356344190569, "grad_norm": 1.5710210675368463, "learning_rate": 1.6025110447946024e-06, "loss": 0.5198243856430054, "step": 6801 }, { "epoch": 1.6533787068546428, "grad_norm": 1.6095898619810827, "learning_rate": 1.6003292691252958e-06, "loss": 0.5342311263084412, "step": 6802 }, { "epoch": 1.6536217792902286, "grad_norm": 1.859236144447321, "learning_rate": 1.5981488504967712e-06, "loss": 0.5389232039451599, "step": 6803 }, { "epoch": 1.6538648517258143, "grad_norm": 1.7702881456868798, "learning_rate": 1.5959697892612991e-06, "loss": 0.5431442856788635, "step": 6804 }, { "epoch": 1.6541079241614, "grad_norm": 1.5562854824971386, "learning_rate": 1.593792085770921e-06, "loss": 0.5731674432754517, "step": 6805 }, { "epoch": 1.6543509965969858, "grad_norm": 1.524983199124142, "learning_rate": 1.5916157403774667e-06, "loss": 0.5432218313217163, "step": 6806 }, { "epoch": 1.6545940690325716, "grad_norm": 1.6528922231460856, "learning_rate": 1.5894407534325474e-06, "loss": 0.6593745350837708, "step": 6807 }, { "epoch": 1.6548371414681575, "grad_norm": 1.601969892263279, "learning_rate": 1.587267125287545e-06, "loss": 0.569566547870636, "step": 6808 }, { "epoch": 1.6550802139037433, "grad_norm": 1.8647685013881647, "learning_rate": 1.585094856293633e-06, "loss": 0.587228000164032, "step": 6809 }, { "epoch": 1.6553232863393292, "grad_norm": 1.6886596419709003, "learning_rate": 1.5829239468017565e-06, "loss": 0.4876830577850342, "step": 6810 }, { "epoch": 1.655566358774915, "grad_norm": 1.7758865511455943, "learning_rate": 1.5807543971626505e-06, "loss": 0.5632275938987732, "step": 6811 }, { "epoch": 1.6558094312105007, "grad_norm": 1.6603589465294966, "learning_rate": 1.5785862077268187e-06, "loss": 0.6607354879379272, "step": 6812 }, { "epoch": 1.6560525036460865, "grad_norm": 1.8002613119642856, "learning_rate": 1.5764193788445548e-06, "loss": 0.5308077335357666, "step": 6813 }, { "epoch": 1.6562955760816722, "grad_norm": 1.7431878475353633, "learning_rate": 1.5742539108659305e-06, "loss": 0.6197848320007324, "step": 6814 }, { "epoch": 1.6565386485172582, "grad_norm": 1.7270942706099692, "learning_rate": 1.5720898041407928e-06, "loss": 0.5124588012695312, "step": 6815 }, { "epoch": 1.656781720952844, "grad_norm": 1.8793063307027544, "learning_rate": 1.5699270590187753e-06, "loss": 0.5822991728782654, "step": 6816 }, { "epoch": 1.65702479338843, "grad_norm": 1.6682095347419794, "learning_rate": 1.5677656758492843e-06, "loss": 0.5134687423706055, "step": 6817 }, { "epoch": 1.6572678658240156, "grad_norm": 1.701858895878732, "learning_rate": 1.5656056549815158e-06, "loss": 0.6112955808639526, "step": 6818 }, { "epoch": 1.6575109382596014, "grad_norm": 1.8076063037913552, "learning_rate": 1.5634469967644338e-06, "loss": 0.656975269317627, "step": 6819 }, { "epoch": 1.6577540106951871, "grad_norm": 1.6215755304942903, "learning_rate": 1.5612897015467953e-06, "loss": 0.5936046838760376, "step": 6820 }, { "epoch": 1.6579970831307729, "grad_norm": 1.6700320438941294, "learning_rate": 1.5591337696771247e-06, "loss": 0.47113245725631714, "step": 6821 }, { "epoch": 1.6582401555663586, "grad_norm": 1.5164631189679434, "learning_rate": 1.5569792015037334e-06, "loss": 0.5393043756484985, "step": 6822 }, { "epoch": 1.6584832280019446, "grad_norm": 1.827986935360495, "learning_rate": 1.5548259973747148e-06, "loss": 0.6330162882804871, "step": 6823 }, { "epoch": 1.6587263004375303, "grad_norm": 1.6125314799107768, "learning_rate": 1.5526741576379311e-06, "loss": 0.6221135854721069, "step": 6824 }, { "epoch": 1.6589693728731163, "grad_norm": 1.8878890699955202, "learning_rate": 1.5505236826410385e-06, "loss": 0.459486186504364, "step": 6825 }, { "epoch": 1.659212445308702, "grad_norm": 1.952223496565309, "learning_rate": 1.548374572731458e-06, "loss": 0.6421010494232178, "step": 6826 }, { "epoch": 1.6594555177442878, "grad_norm": 1.8431496240136251, "learning_rate": 1.546226828256403e-06, "loss": 0.6428818702697754, "step": 6827 }, { "epoch": 1.6596985901798735, "grad_norm": 1.6364476631585159, "learning_rate": 1.5440804495628558e-06, "loss": 0.5652294754981995, "step": 6828 }, { "epoch": 1.6599416626154593, "grad_norm": 2.060562223975504, "learning_rate": 1.541935436997588e-06, "loss": 0.6369094848632812, "step": 6829 }, { "epoch": 1.6601847350510452, "grad_norm": 1.6061639477446032, "learning_rate": 1.5397917909071392e-06, "loss": 0.47631746530532837, "step": 6830 }, { "epoch": 1.660427807486631, "grad_norm": 1.7913703145625464, "learning_rate": 1.5376495116378387e-06, "loss": 0.6011338829994202, "step": 6831 }, { "epoch": 1.660670879922217, "grad_norm": 1.284575418426272, "learning_rate": 1.53550859953579e-06, "loss": 0.5809845924377441, "step": 6832 }, { "epoch": 1.6609139523578027, "grad_norm": 1.5139724097894667, "learning_rate": 1.533369054946875e-06, "loss": 0.4710683524608612, "step": 6833 }, { "epoch": 1.6611570247933884, "grad_norm": 1.8981461837647164, "learning_rate": 1.5312308782167573e-06, "loss": 0.6972324848175049, "step": 6834 }, { "epoch": 1.6614000972289742, "grad_norm": 1.888728627973235, "learning_rate": 1.5290940696908762e-06, "loss": 0.6906039714813232, "step": 6835 }, { "epoch": 1.66164316966456, "grad_norm": 1.5294084241069923, "learning_rate": 1.5269586297144546e-06, "loss": 0.5468201041221619, "step": 6836 }, { "epoch": 1.6618862421001457, "grad_norm": 1.8373578130906945, "learning_rate": 1.5248245586324883e-06, "loss": 0.4896007180213928, "step": 6837 }, { "epoch": 1.6621293145357317, "grad_norm": 1.6072919203108744, "learning_rate": 1.5226918567897576e-06, "loss": 0.5412893295288086, "step": 6838 }, { "epoch": 1.6623723869713176, "grad_norm": 1.7061247958230203, "learning_rate": 1.5205605245308218e-06, "loss": 0.5446433424949646, "step": 6839 }, { "epoch": 1.6626154594069034, "grad_norm": 1.7524775479702586, "learning_rate": 1.5184305622000106e-06, "loss": 0.6725059151649475, "step": 6840 }, { "epoch": 1.662858531842489, "grad_norm": 1.729580437853065, "learning_rate": 1.5163019701414428e-06, "loss": 0.5589271783828735, "step": 6841 }, { "epoch": 1.6631016042780749, "grad_norm": 1.93851481044877, "learning_rate": 1.5141747486990077e-06, "loss": 0.5661444664001465, "step": 6842 }, { "epoch": 1.6633446767136606, "grad_norm": 1.5380996649227399, "learning_rate": 1.5120488982163805e-06, "loss": 0.7603115439414978, "step": 6843 }, { "epoch": 1.6635877491492463, "grad_norm": 1.8170744274955761, "learning_rate": 1.5099244190370066e-06, "loss": 0.4759223759174347, "step": 6844 }, { "epoch": 1.6638308215848323, "grad_norm": 1.809558800122264, "learning_rate": 1.507801311504119e-06, "loss": 0.587895929813385, "step": 6845 }, { "epoch": 1.664073894020418, "grad_norm": 1.6505300949761061, "learning_rate": 1.5056795759607179e-06, "loss": 0.5329710245132446, "step": 6846 }, { "epoch": 1.664316966456004, "grad_norm": 1.9275840292512845, "learning_rate": 1.5035592127495968e-06, "loss": 0.591010570526123, "step": 6847 }, { "epoch": 1.6645600388915898, "grad_norm": 1.795404631901292, "learning_rate": 1.5014402222133163e-06, "loss": 0.5224888920783997, "step": 6848 }, { "epoch": 1.6648031113271755, "grad_norm": 1.4924437443746823, "learning_rate": 1.4993226046942121e-06, "loss": 0.4432429075241089, "step": 6849 }, { "epoch": 1.6650461837627613, "grad_norm": 1.792920321659074, "learning_rate": 1.4972063605344122e-06, "loss": 0.5524827241897583, "step": 6850 }, { "epoch": 1.665289256198347, "grad_norm": 1.9475441967169622, "learning_rate": 1.4950914900758073e-06, "loss": 0.5402528643608093, "step": 6851 }, { "epoch": 1.6655323286339327, "grad_norm": 1.9025756895982175, "learning_rate": 1.4929779936600797e-06, "loss": 0.6143530011177063, "step": 6852 }, { "epoch": 1.6657754010695187, "grad_norm": 1.8349403073243649, "learning_rate": 1.4908658716286784e-06, "loss": 0.5568746328353882, "step": 6853 }, { "epoch": 1.6660184735051047, "grad_norm": 1.5972644477526001, "learning_rate": 1.488755124322836e-06, "loss": 0.67160964012146, "step": 6854 }, { "epoch": 1.6662615459406904, "grad_norm": 1.505735038071414, "learning_rate": 1.4866457520835653e-06, "loss": 0.5968840718269348, "step": 6855 }, { "epoch": 1.6665046183762762, "grad_norm": 1.6142473384469584, "learning_rate": 1.4845377552516537e-06, "loss": 0.7363849878311157, "step": 6856 }, { "epoch": 1.666747690811862, "grad_norm": 1.6365522510823491, "learning_rate": 1.4824311341676622e-06, "loss": 0.4877338409423828, "step": 6857 }, { "epoch": 1.6669907632474477, "grad_norm": 1.6847290837059745, "learning_rate": 1.4803258891719407e-06, "loss": 0.5696061253547668, "step": 6858 }, { "epoch": 1.6672338356830334, "grad_norm": 1.7271892936819335, "learning_rate": 1.4782220206046061e-06, "loss": 0.6307550668716431, "step": 6859 }, { "epoch": 1.6674769081186194, "grad_norm": 1.4749223631138288, "learning_rate": 1.476119528805554e-06, "loss": 0.50021892786026, "step": 6860 }, { "epoch": 1.6677199805542051, "grad_norm": 1.22155658363768, "learning_rate": 1.4740184141144664e-06, "loss": 0.5426176190376282, "step": 6861 }, { "epoch": 1.667963052989791, "grad_norm": 1.5533230940795106, "learning_rate": 1.4719186768707916e-06, "loss": 0.49054622650146484, "step": 6862 }, { "epoch": 1.6682061254253768, "grad_norm": 1.429705285859648, "learning_rate": 1.4698203174137637e-06, "loss": 0.4988883137702942, "step": 6863 }, { "epoch": 1.6684491978609626, "grad_norm": 1.5225374365957975, "learning_rate": 1.4677233360823906e-06, "loss": 0.6755847334861755, "step": 6864 }, { "epoch": 1.6686922702965483, "grad_norm": 1.6087099276181414, "learning_rate": 1.46562773321546e-06, "loss": 0.5722277164459229, "step": 6865 }, { "epoch": 1.668935342732134, "grad_norm": 1.842000316309544, "learning_rate": 1.4635335091515312e-06, "loss": 0.49241986870765686, "step": 6866 }, { "epoch": 1.66917841516772, "grad_norm": 1.710283572430324, "learning_rate": 1.4614406642289492e-06, "loss": 0.428937166929245, "step": 6867 }, { "epoch": 1.6694214876033058, "grad_norm": 1.891181529200258, "learning_rate": 1.4593491987858288e-06, "loss": 0.503006100654602, "step": 6868 }, { "epoch": 1.6696645600388917, "grad_norm": 1.7307351428930862, "learning_rate": 1.457259113160061e-06, "loss": 0.5286684632301331, "step": 6869 }, { "epoch": 1.6699076324744775, "grad_norm": 1.8703982083757957, "learning_rate": 1.4551704076893247e-06, "loss": 0.5568320751190186, "step": 6870 }, { "epoch": 1.6701507049100632, "grad_norm": 1.8205245734915607, "learning_rate": 1.4530830827110608e-06, "loss": 0.7652924656867981, "step": 6871 }, { "epoch": 1.670393777345649, "grad_norm": 1.7305644591453633, "learning_rate": 1.4509971385625033e-06, "loss": 0.5560042858123779, "step": 6872 }, { "epoch": 1.6706368497812347, "grad_norm": 1.8151430497825758, "learning_rate": 1.4489125755806487e-06, "loss": 0.5341084003448486, "step": 6873 }, { "epoch": 1.6708799222168205, "grad_norm": 1.6588368378984981, "learning_rate": 1.4468293941022815e-06, "loss": 0.6260244846343994, "step": 6874 }, { "epoch": 1.6711229946524064, "grad_norm": 1.7212050894486834, "learning_rate": 1.444747594463952e-06, "loss": 0.7269504070281982, "step": 6875 }, { "epoch": 1.6713660670879922, "grad_norm": 1.7394291846167347, "learning_rate": 1.4426671770020007e-06, "loss": 0.6549230813980103, "step": 6876 }, { "epoch": 1.6716091395235781, "grad_norm": 1.9108816514129152, "learning_rate": 1.4405881420525315e-06, "loss": 0.609656572341919, "step": 6877 }, { "epoch": 1.671852211959164, "grad_norm": 1.6957996370610084, "learning_rate": 1.4385104899514314e-06, "loss": 0.6338484287261963, "step": 6878 }, { "epoch": 1.6720952843947496, "grad_norm": 1.5502817881113482, "learning_rate": 1.436434221034364e-06, "loss": 0.5458695888519287, "step": 6879 }, { "epoch": 1.6723383568303354, "grad_norm": 1.7849988379319326, "learning_rate": 1.434359335636769e-06, "loss": 0.6378799676895142, "step": 6880 }, { "epoch": 1.6725814292659211, "grad_norm": 1.782939024138446, "learning_rate": 1.432285834093865e-06, "loss": 0.5976606607437134, "step": 6881 }, { "epoch": 1.672824501701507, "grad_norm": 1.6893087957854007, "learning_rate": 1.4302137167406404e-06, "loss": 0.6453735828399658, "step": 6882 }, { "epoch": 1.6730675741370928, "grad_norm": 1.480257015972683, "learning_rate": 1.4281429839118677e-06, "loss": 0.7453477382659912, "step": 6883 }, { "epoch": 1.6733106465726788, "grad_norm": 1.877757263536859, "learning_rate": 1.426073635942088e-06, "loss": 0.6263900995254517, "step": 6884 }, { "epoch": 1.6735537190082646, "grad_norm": 1.509697424143534, "learning_rate": 1.4240056731656271e-06, "loss": 0.5935059189796448, "step": 6885 }, { "epoch": 1.6737967914438503, "grad_norm": 1.4797538245376745, "learning_rate": 1.4219390959165779e-06, "loss": 0.5955191254615784, "step": 6886 }, { "epoch": 1.674039863879436, "grad_norm": 1.5046985951481848, "learning_rate": 1.4198739045288168e-06, "loss": 0.5541762113571167, "step": 6887 }, { "epoch": 1.6742829363150218, "grad_norm": 1.6767123748063077, "learning_rate": 1.4178100993359967e-06, "loss": 0.4822057783603668, "step": 6888 }, { "epoch": 1.6745260087506075, "grad_norm": 1.5392387308088962, "learning_rate": 1.4157476806715364e-06, "loss": 0.4945920705795288, "step": 6889 }, { "epoch": 1.6747690811861935, "grad_norm": 1.694039576959565, "learning_rate": 1.4136866488686441e-06, "loss": 0.729863166809082, "step": 6890 }, { "epoch": 1.6750121536217792, "grad_norm": 1.7076680324192188, "learning_rate": 1.411627004260293e-06, "loss": 0.779350757598877, "step": 6891 }, { "epoch": 1.6752552260573652, "grad_norm": 1.711281267312653, "learning_rate": 1.409568747179242e-06, "loss": 0.5285130739212036, "step": 6892 }, { "epoch": 1.675498298492951, "grad_norm": 1.41049383558305, "learning_rate": 1.4075118779580155e-06, "loss": 0.4921248257160187, "step": 6893 }, { "epoch": 1.6757413709285367, "grad_norm": 1.7504250060390676, "learning_rate": 1.4054563969289225e-06, "loss": 0.5812284350395203, "step": 6894 }, { "epoch": 1.6759844433641224, "grad_norm": 2.0573014629169015, "learning_rate": 1.4034023044240396e-06, "loss": 0.5348492860794067, "step": 6895 }, { "epoch": 1.6762275157997082, "grad_norm": 1.8947199690288585, "learning_rate": 1.401349600775228e-06, "loss": 0.6222202777862549, "step": 6896 }, { "epoch": 1.6764705882352942, "grad_norm": 1.5677373461920527, "learning_rate": 1.3992982863141203e-06, "loss": 0.5152330994606018, "step": 6897 }, { "epoch": 1.67671366067088, "grad_norm": 1.6014389303090548, "learning_rate": 1.39724836137212e-06, "loss": 0.6952754855155945, "step": 6898 }, { "epoch": 1.6769567331064659, "grad_norm": 1.9211035490044186, "learning_rate": 1.3951998262804156e-06, "loss": 0.5845398902893066, "step": 6899 }, { "epoch": 1.6771998055420516, "grad_norm": 1.9486046308031124, "learning_rate": 1.3931526813699614e-06, "loss": 0.5825624465942383, "step": 6900 }, { "epoch": 1.6774428779776374, "grad_norm": 1.5425630658661205, "learning_rate": 1.391106926971496e-06, "loss": 0.5391628742218018, "step": 6901 }, { "epoch": 1.677685950413223, "grad_norm": 1.8628338312524855, "learning_rate": 1.3890625634155254e-06, "loss": 0.6256569623947144, "step": 6902 }, { "epoch": 1.6779290228488088, "grad_norm": 1.456240331900216, "learning_rate": 1.387019591032337e-06, "loss": 0.4733671545982361, "step": 6903 }, { "epoch": 1.6781720952843946, "grad_norm": 1.5051229327243447, "learning_rate": 1.3849780101519883e-06, "loss": 0.4688500165939331, "step": 6904 }, { "epoch": 1.6784151677199806, "grad_norm": 1.6690800013966653, "learning_rate": 1.3829378211043166e-06, "loss": 0.46389997005462646, "step": 6905 }, { "epoch": 1.6786582401555663, "grad_norm": 1.7182753517200269, "learning_rate": 1.3808990242189346e-06, "loss": 0.6015380620956421, "step": 6906 }, { "epoch": 1.6789013125911523, "grad_norm": 1.7136821735974122, "learning_rate": 1.3788616198252236e-06, "loss": 0.6193168759346008, "step": 6907 }, { "epoch": 1.679144385026738, "grad_norm": 1.8194876195336478, "learning_rate": 1.3768256082523479e-06, "loss": 0.5663849115371704, "step": 6908 }, { "epoch": 1.6793874574623238, "grad_norm": 1.646931539153424, "learning_rate": 1.3747909898292389e-06, "loss": 0.45200055837631226, "step": 6909 }, { "epoch": 1.6796305298979095, "grad_norm": 1.590553864355272, "learning_rate": 1.3727577648846113e-06, "loss": 0.6649762392044067, "step": 6910 }, { "epoch": 1.6798736023334953, "grad_norm": 1.4870879623152091, "learning_rate": 1.370725933746947e-06, "loss": 0.6039979457855225, "step": 6911 }, { "epoch": 1.6801166747690812, "grad_norm": 1.8565527604805767, "learning_rate": 1.3686954967445088e-06, "loss": 0.5758146047592163, "step": 6912 }, { "epoch": 1.680359747204667, "grad_norm": 1.5849857992300809, "learning_rate": 1.366666454205332e-06, "loss": 0.5833969116210938, "step": 6913 }, { "epoch": 1.680602819640253, "grad_norm": 1.8154640435865588, "learning_rate": 1.3646388064572235e-06, "loss": 0.4750775694847107, "step": 6914 }, { "epoch": 1.6808458920758387, "grad_norm": 1.3165968536648942, "learning_rate": 1.3626125538277712e-06, "loss": 0.5380561351776123, "step": 6915 }, { "epoch": 1.6810889645114244, "grad_norm": 1.7497772742851343, "learning_rate": 1.36058769664433e-06, "loss": 0.5456632375717163, "step": 6916 }, { "epoch": 1.6813320369470102, "grad_norm": 1.8249413408091644, "learning_rate": 1.3585642352340377e-06, "loss": 0.5394749641418457, "step": 6917 }, { "epoch": 1.681575109382596, "grad_norm": 1.656081996930278, "learning_rate": 1.3565421699237979e-06, "loss": 0.5916489362716675, "step": 6918 }, { "epoch": 1.6818181818181817, "grad_norm": 1.5798082814996992, "learning_rate": 1.3545215010402979e-06, "loss": 0.6462844610214233, "step": 6919 }, { "epoch": 1.6820612542537676, "grad_norm": 1.4987525079368298, "learning_rate": 1.3525022289099897e-06, "loss": 0.41152477264404297, "step": 6920 }, { "epoch": 1.6823043266893536, "grad_norm": 1.5326781754890455, "learning_rate": 1.350484353859106e-06, "loss": 0.42211490869522095, "step": 6921 }, { "epoch": 1.6825473991249393, "grad_norm": 1.6629203169015108, "learning_rate": 1.348467876213656e-06, "loss": 0.6173069477081299, "step": 6922 }, { "epoch": 1.682790471560525, "grad_norm": 1.876463469421557, "learning_rate": 1.3464527962994145e-06, "loss": 0.5996475219726562, "step": 6923 }, { "epoch": 1.6830335439961108, "grad_norm": 1.9101293857247117, "learning_rate": 1.3444391144419388e-06, "loss": 0.6588393449783325, "step": 6924 }, { "epoch": 1.6832766164316966, "grad_norm": 2.484748032156152, "learning_rate": 1.3424268309665546e-06, "loss": 0.5916286706924438, "step": 6925 }, { "epoch": 1.6835196888672823, "grad_norm": 1.643503108419761, "learning_rate": 1.3404159461983657e-06, "loss": 0.6659595966339111, "step": 6926 }, { "epoch": 1.6837627613028683, "grad_norm": 1.953613114637106, "learning_rate": 1.3384064604622459e-06, "loss": 0.5740668773651123, "step": 6927 }, { "epoch": 1.684005833738454, "grad_norm": 1.5240214079448693, "learning_rate": 1.336398374082848e-06, "loss": 0.4766296446323395, "step": 6928 }, { "epoch": 1.68424890617404, "grad_norm": 1.761975493982454, "learning_rate": 1.3343916873845952e-06, "loss": 0.606937825679779, "step": 6929 }, { "epoch": 1.6844919786096257, "grad_norm": 1.6357411205064512, "learning_rate": 1.3323864006916843e-06, "loss": 0.5279386043548584, "step": 6930 }, { "epoch": 1.6847350510452115, "grad_norm": 1.8365946294693598, "learning_rate": 1.3303825143280903e-06, "loss": 0.5901325941085815, "step": 6931 }, { "epoch": 1.6849781234807972, "grad_norm": 1.9842723946452383, "learning_rate": 1.328380028617553e-06, "loss": 0.5961786508560181, "step": 6932 }, { "epoch": 1.685221195916383, "grad_norm": 1.4315461772529887, "learning_rate": 1.3263789438835973e-06, "loss": 0.568176805973053, "step": 6933 }, { "epoch": 1.685464268351969, "grad_norm": 1.5294332360897849, "learning_rate": 1.324379260449512e-06, "loss": 0.47881242632865906, "step": 6934 }, { "epoch": 1.6857073407875547, "grad_norm": 1.9821302541024255, "learning_rate": 1.3223809786383669e-06, "loss": 0.5883041620254517, "step": 6935 }, { "epoch": 1.6859504132231407, "grad_norm": 1.7049739443125191, "learning_rate": 1.3203840987729966e-06, "loss": 0.6652065515518188, "step": 6936 }, { "epoch": 1.6861934856587264, "grad_norm": 1.6014511777274905, "learning_rate": 1.3183886211760222e-06, "loss": 0.5857974290847778, "step": 6937 }, { "epoch": 1.6864365580943121, "grad_norm": 1.757359593708071, "learning_rate": 1.3163945461698268e-06, "loss": 0.4982866048812866, "step": 6938 }, { "epoch": 1.6866796305298979, "grad_norm": 1.4232994909622319, "learning_rate": 1.3144018740765685e-06, "loss": 0.5156645774841309, "step": 6939 }, { "epoch": 1.6869227029654836, "grad_norm": 1.6762641515953844, "learning_rate": 1.3124106052181861e-06, "loss": 0.5693603754043579, "step": 6940 }, { "epoch": 1.6871657754010694, "grad_norm": 1.729825823436965, "learning_rate": 1.3104207399163816e-06, "loss": 0.5381431579589844, "step": 6941 }, { "epoch": 1.6874088478366553, "grad_norm": 1.591256666306224, "learning_rate": 1.3084322784926395e-06, "loss": 0.4393288195133209, "step": 6942 }, { "epoch": 1.687651920272241, "grad_norm": 1.4855732919521285, "learning_rate": 1.30644522126821e-06, "loss": 0.514999508857727, "step": 6943 }, { "epoch": 1.687894992707827, "grad_norm": 1.749757743071344, "learning_rate": 1.3044595685641225e-06, "loss": 0.6140667200088501, "step": 6944 }, { "epoch": 1.6881380651434128, "grad_norm": 1.8074289305272213, "learning_rate": 1.302475320701171e-06, "loss": 0.5616350769996643, "step": 6945 }, { "epoch": 1.6883811375789985, "grad_norm": 1.706083367515065, "learning_rate": 1.3004924779999362e-06, "loss": 0.6422631144523621, "step": 6946 }, { "epoch": 1.6886242100145843, "grad_norm": 1.8156872975320408, "learning_rate": 1.29851104078076e-06, "loss": 0.7340966463088989, "step": 6947 }, { "epoch": 1.68886728245017, "grad_norm": 1.4816124501183945, "learning_rate": 1.2965310093637595e-06, "loss": 0.4956680238246918, "step": 6948 }, { "epoch": 1.689110354885756, "grad_norm": 1.606677948174798, "learning_rate": 1.294552384068829e-06, "loss": 0.5721871852874756, "step": 6949 }, { "epoch": 1.6893534273213417, "grad_norm": 1.6482122141147142, "learning_rate": 1.2925751652156304e-06, "loss": 0.49387967586517334, "step": 6950 }, { "epoch": 1.6895964997569277, "grad_norm": 1.8131764478206212, "learning_rate": 1.2905993531236038e-06, "loss": 0.6402489542961121, "step": 6951 }, { "epoch": 1.6898395721925135, "grad_norm": 1.6305982836295652, "learning_rate": 1.288624948111955e-06, "loss": 0.5132129192352295, "step": 6952 }, { "epoch": 1.6900826446280992, "grad_norm": 1.871835156323756, "learning_rate": 1.2866519504996688e-06, "loss": 0.6318644285202026, "step": 6953 }, { "epoch": 1.690325717063685, "grad_norm": 1.6416572354865753, "learning_rate": 1.2846803606055003e-06, "loss": 0.5977357029914856, "step": 6954 }, { "epoch": 1.6905687894992707, "grad_norm": 1.5677997491702271, "learning_rate": 1.2827101787479802e-06, "loss": 0.5275963544845581, "step": 6955 }, { "epoch": 1.6908118619348564, "grad_norm": 1.4607167982324099, "learning_rate": 1.2807414052454026e-06, "loss": 0.47936517000198364, "step": 6956 }, { "epoch": 1.6910549343704424, "grad_norm": 1.4491984547847465, "learning_rate": 1.2787740404158455e-06, "loss": 0.4017765522003174, "step": 6957 }, { "epoch": 1.6912980068060282, "grad_norm": 1.60616977198248, "learning_rate": 1.2768080845771525e-06, "loss": 0.6580475568771362, "step": 6958 }, { "epoch": 1.6915410792416141, "grad_norm": 1.70886163432742, "learning_rate": 1.274843538046937e-06, "loss": 0.6339048147201538, "step": 6959 }, { "epoch": 1.6917841516771999, "grad_norm": 4.012144127810884, "learning_rate": 1.2728804011425955e-06, "loss": 0.4829130172729492, "step": 6960 }, { "epoch": 1.6920272241127856, "grad_norm": 1.6828042334901656, "learning_rate": 1.2709186741812828e-06, "loss": 0.40293067693710327, "step": 6961 }, { "epoch": 1.6922702965483714, "grad_norm": 1.5926926915154596, "learning_rate": 1.2689583574799414e-06, "loss": 0.5188173055648804, "step": 6962 }, { "epoch": 1.692513368983957, "grad_norm": 1.593743279831298, "learning_rate": 1.2669994513552707e-06, "loss": 0.6240876913070679, "step": 6963 }, { "epoch": 1.692756441419543, "grad_norm": 1.6605385166337059, "learning_rate": 1.2650419561237537e-06, "loss": 0.5418802499771118, "step": 6964 }, { "epoch": 1.6929995138551288, "grad_norm": 1.589408716066917, "learning_rate": 1.263085872101638e-06, "loss": 0.3610803186893463, "step": 6965 }, { "epoch": 1.6932425862907148, "grad_norm": 1.5493149369674548, "learning_rate": 1.2611311996049502e-06, "loss": 0.5869201421737671, "step": 6966 }, { "epoch": 1.6934856587263005, "grad_norm": 1.5048445955896181, "learning_rate": 1.2591779389494818e-06, "loss": 0.5667721033096313, "step": 6967 }, { "epoch": 1.6937287311618863, "grad_norm": 1.5340279717812288, "learning_rate": 1.257226090450797e-06, "loss": 0.4752860367298126, "step": 6968 }, { "epoch": 1.693971803597472, "grad_norm": 1.6782956670410059, "learning_rate": 1.2552756544242374e-06, "loss": 0.550703763961792, "step": 6969 }, { "epoch": 1.6942148760330578, "grad_norm": 1.672995721475227, "learning_rate": 1.2533266311849112e-06, "loss": 0.5905759334564209, "step": 6970 }, { "epoch": 1.6944579484686435, "grad_norm": 1.5581985584611082, "learning_rate": 1.2513790210477051e-06, "loss": 0.4538385570049286, "step": 6971 }, { "epoch": 1.6947010209042295, "grad_norm": 1.9958558919679559, "learning_rate": 1.249432824327267e-06, "loss": 0.41183096170425415, "step": 6972 }, { "epoch": 1.6949440933398152, "grad_norm": 1.6592877753979827, "learning_rate": 1.2474880413380253e-06, "loss": 0.4584425091743469, "step": 6973 }, { "epoch": 1.6951871657754012, "grad_norm": 1.5455006546119414, "learning_rate": 1.2455446723941744e-06, "loss": 0.6445673704147339, "step": 6974 }, { "epoch": 1.695430238210987, "grad_norm": 1.5179560831812795, "learning_rate": 1.2436027178096854e-06, "loss": 0.5245462656021118, "step": 6975 }, { "epoch": 1.6956733106465727, "grad_norm": 1.5634878425590437, "learning_rate": 1.2416621778982984e-06, "loss": 0.5284894108772278, "step": 6976 }, { "epoch": 1.6959163830821584, "grad_norm": 1.620770584532979, "learning_rate": 1.2397230529735171e-06, "loss": 0.6569940447807312, "step": 6977 }, { "epoch": 1.6961594555177442, "grad_norm": 1.6385629560218626, "learning_rate": 1.2377853433486353e-06, "loss": 0.6083174347877502, "step": 6978 }, { "epoch": 1.6964025279533301, "grad_norm": 1.5504961360948322, "learning_rate": 1.2358490493367003e-06, "loss": 0.46189555525779724, "step": 6979 }, { "epoch": 1.6966456003889159, "grad_norm": 1.550934997535082, "learning_rate": 1.233914171250541e-06, "loss": 0.6151854991912842, "step": 6980 }, { "epoch": 1.6968886728245018, "grad_norm": 1.66280838207167, "learning_rate": 1.2319807094027492e-06, "loss": 0.5295751094818115, "step": 6981 }, { "epoch": 1.6971317452600876, "grad_norm": 1.5814870251192557, "learning_rate": 1.2300486641056985e-06, "loss": 0.563846230506897, "step": 6982 }, { "epoch": 1.6973748176956733, "grad_norm": 1.632560752746041, "learning_rate": 1.2281180356715227e-06, "loss": 0.6036282181739807, "step": 6983 }, { "epoch": 1.697617890131259, "grad_norm": 1.6160479268825754, "learning_rate": 1.2261888244121357e-06, "loss": 0.6582450270652771, "step": 6984 }, { "epoch": 1.6978609625668448, "grad_norm": 1.7209989219486792, "learning_rate": 1.2242610306392178e-06, "loss": 0.554923415184021, "step": 6985 }, { "epoch": 1.6981040350024306, "grad_norm": 1.6941804323969736, "learning_rate": 1.2223346546642156e-06, "loss": 0.648877739906311, "step": 6986 }, { "epoch": 1.6983471074380165, "grad_norm": 1.966173034389134, "learning_rate": 1.2204096967983603e-06, "loss": 0.5729527473449707, "step": 6987 }, { "epoch": 1.6985901798736023, "grad_norm": 1.6564432449798079, "learning_rate": 1.2184861573526407e-06, "loss": 0.5336763858795166, "step": 6988 }, { "epoch": 1.6988332523091882, "grad_norm": 1.4227319959847669, "learning_rate": 1.216564036637825e-06, "loss": 0.46252453327178955, "step": 6989 }, { "epoch": 1.699076324744774, "grad_norm": 1.5628738853922324, "learning_rate": 1.214643334964445e-06, "loss": 0.6031650900840759, "step": 6990 }, { "epoch": 1.6993193971803597, "grad_norm": 1.643746752203988, "learning_rate": 1.2127240526428108e-06, "loss": 0.6100730299949646, "step": 6991 }, { "epoch": 1.6995624696159455, "grad_norm": 1.5676446212248378, "learning_rate": 1.2108061899829949e-06, "loss": 0.6429834961891174, "step": 6992 }, { "epoch": 1.6998055420515312, "grad_norm": 1.6771953137212023, "learning_rate": 1.2088897472948492e-06, "loss": 0.39521318674087524, "step": 6993 }, { "epoch": 1.7000486144871172, "grad_norm": 1.6491378398043044, "learning_rate": 1.206974724887988e-06, "loss": 0.7188616394996643, "step": 6994 }, { "epoch": 1.700291686922703, "grad_norm": 1.8908109101326611, "learning_rate": 1.2050611230718024e-06, "loss": 0.6481412649154663, "step": 6995 }, { "epoch": 1.700534759358289, "grad_norm": 1.65762028580079, "learning_rate": 1.2031489421554543e-06, "loss": 0.5433286428451538, "step": 6996 }, { "epoch": 1.7007778317938747, "grad_norm": 1.7609136705736337, "learning_rate": 1.201238182447867e-06, "loss": 0.5327658653259277, "step": 6997 }, { "epoch": 1.7010209042294604, "grad_norm": 1.6579199619841285, "learning_rate": 1.1993288442577455e-06, "loss": 0.6163473129272461, "step": 6998 }, { "epoch": 1.7012639766650461, "grad_norm": 1.6336678278868693, "learning_rate": 1.1974209278935579e-06, "loss": 0.5545359253883362, "step": 6999 }, { "epoch": 1.7015070491006319, "grad_norm": 1.5102778517971265, "learning_rate": 1.1955144336635472e-06, "loss": 0.5048406720161438, "step": 7000 }, { "epoch": 1.7017501215362176, "grad_norm": 1.8643809871323775, "learning_rate": 1.1936093618757205e-06, "loss": 0.6181983947753906, "step": 7001 }, { "epoch": 1.7019931939718036, "grad_norm": 1.7702722610824682, "learning_rate": 1.1917057128378606e-06, "loss": 0.6327251195907593, "step": 7002 }, { "epoch": 1.7022362664073896, "grad_norm": 1.8330002263078482, "learning_rate": 1.1898034868575226e-06, "loss": 0.44232177734375, "step": 7003 }, { "epoch": 1.7024793388429753, "grad_norm": 1.519316218970577, "learning_rate": 1.1879026842420215e-06, "loss": 0.45845192670822144, "step": 7004 }, { "epoch": 1.702722411278561, "grad_norm": 1.7133341018862, "learning_rate": 1.1860033052984544e-06, "loss": 0.6225829124450684, "step": 7005 }, { "epoch": 1.7029654837141468, "grad_norm": 1.6690149173592213, "learning_rate": 1.1841053503336776e-06, "loss": 0.5592281222343445, "step": 7006 }, { "epoch": 1.7032085561497325, "grad_norm": 1.7398958995196623, "learning_rate": 1.1822088196543268e-06, "loss": 0.5095856785774231, "step": 7007 }, { "epoch": 1.7034516285853183, "grad_norm": 1.2943910478858722, "learning_rate": 1.1803137135667998e-06, "loss": 0.430867075920105, "step": 7008 }, { "epoch": 1.7036947010209043, "grad_norm": 1.7928836032511566, "learning_rate": 1.178420032377271e-06, "loss": 0.5781290531158447, "step": 7009 }, { "epoch": 1.70393777345649, "grad_norm": 1.487029895076521, "learning_rate": 1.1765277763916782e-06, "loss": 0.7603342533111572, "step": 7010 }, { "epoch": 1.704180845892076, "grad_norm": 1.5264388313824817, "learning_rate": 1.1746369459157324e-06, "loss": 0.5057399272918701, "step": 7011 }, { "epoch": 1.7044239183276617, "grad_norm": 1.7368078706775634, "learning_rate": 1.172747541254917e-06, "loss": 0.4730629324913025, "step": 7012 }, { "epoch": 1.7046669907632475, "grad_norm": 1.5735661733021797, "learning_rate": 1.1708595627144782e-06, "loss": 0.46421319246292114, "step": 7013 }, { "epoch": 1.7049100631988332, "grad_norm": 1.5430217403094764, "learning_rate": 1.168973010599439e-06, "loss": 0.44152718782424927, "step": 7014 }, { "epoch": 1.705153135634419, "grad_norm": 1.477036859440865, "learning_rate": 1.167087885214585e-06, "loss": 0.45652082562446594, "step": 7015 }, { "epoch": 1.705396208070005, "grad_norm": 1.7249542775304152, "learning_rate": 1.165204186864478e-06, "loss": 0.5520972013473511, "step": 7016 }, { "epoch": 1.7056392805055907, "grad_norm": 1.777489875824628, "learning_rate": 1.1633219158534426e-06, "loss": 0.6130070090293884, "step": 7017 }, { "epoch": 1.7058823529411766, "grad_norm": 1.7355516694017412, "learning_rate": 1.1614410724855806e-06, "loss": 0.6580911874771118, "step": 7018 }, { "epoch": 1.7061254253767624, "grad_norm": 1.6011883474753725, "learning_rate": 1.1595616570647538e-06, "loss": 0.6733517646789551, "step": 7019 }, { "epoch": 1.7063684978123481, "grad_norm": 1.539076108461274, "learning_rate": 1.1576836698946014e-06, "loss": 0.5557100176811218, "step": 7020 }, { "epoch": 1.7066115702479339, "grad_norm": 1.569151127160824, "learning_rate": 1.1558071112785297e-06, "loss": 0.5482790470123291, "step": 7021 }, { "epoch": 1.7068546426835196, "grad_norm": 1.8381687027899811, "learning_rate": 1.1539319815197103e-06, "loss": 0.5913633108139038, "step": 7022 }, { "epoch": 1.7070977151191054, "grad_norm": 1.5491604301083566, "learning_rate": 1.1520582809210912e-06, "loss": 0.5215263962745667, "step": 7023 }, { "epoch": 1.7073407875546913, "grad_norm": 1.695600511816215, "learning_rate": 1.1501860097853789e-06, "loss": 0.765160322189331, "step": 7024 }, { "epoch": 1.707583859990277, "grad_norm": 1.7141811406992824, "learning_rate": 1.148315168415063e-06, "loss": 0.5156651139259338, "step": 7025 }, { "epoch": 1.707826932425863, "grad_norm": 1.693807787549538, "learning_rate": 1.1464457571123867e-06, "loss": 0.6203553676605225, "step": 7026 }, { "epoch": 1.7080700048614488, "grad_norm": 1.7791958069291178, "learning_rate": 1.1445777761793741e-06, "loss": 0.5610600709915161, "step": 7027 }, { "epoch": 1.7083130772970345, "grad_norm": 1.5740968362141456, "learning_rate": 1.1427112259178153e-06, "loss": 0.5588027834892273, "step": 7028 }, { "epoch": 1.7085561497326203, "grad_norm": 1.6227038719126259, "learning_rate": 1.1408461066292643e-06, "loss": 0.5190004706382751, "step": 7029 }, { "epoch": 1.708799222168206, "grad_norm": 1.7076757969830498, "learning_rate": 1.13898241861505e-06, "loss": 0.4808519184589386, "step": 7030 }, { "epoch": 1.709042294603792, "grad_norm": 1.6361748285041506, "learning_rate": 1.1371201621762662e-06, "loss": 0.5810747146606445, "step": 7031 }, { "epoch": 1.7092853670393777, "grad_norm": 1.6034490172520588, "learning_rate": 1.1352593376137789e-06, "loss": 0.5361249446868896, "step": 7032 }, { "epoch": 1.7095284394749637, "grad_norm": 1.808658052528641, "learning_rate": 1.1333999452282173e-06, "loss": 0.6881250143051147, "step": 7033 }, { "epoch": 1.7097715119105494, "grad_norm": 1.5145103324288711, "learning_rate": 1.131541985319986e-06, "loss": 0.6427826285362244, "step": 7034 }, { "epoch": 1.7100145843461352, "grad_norm": 1.6707093287070307, "learning_rate": 1.1296854581892502e-06, "loss": 0.5406540632247925, "step": 7035 }, { "epoch": 1.710257656781721, "grad_norm": 1.8272264193191614, "learning_rate": 1.1278303641359556e-06, "loss": 0.5722986459732056, "step": 7036 }, { "epoch": 1.7105007292173067, "grad_norm": 1.9221457918995037, "learning_rate": 1.1259767034598046e-06, "loss": 0.6329094171524048, "step": 7037 }, { "epoch": 1.7107438016528924, "grad_norm": 1.8237422317624759, "learning_rate": 1.1241244764602699e-06, "loss": 0.6043883562088013, "step": 7038 }, { "epoch": 1.7109868740884784, "grad_norm": 2.102857192656174, "learning_rate": 1.1222736834366e-06, "loss": 0.5785830616950989, "step": 7039 }, { "epoch": 1.7112299465240641, "grad_norm": 1.8187171175655592, "learning_rate": 1.1204243246878033e-06, "loss": 0.5294018983840942, "step": 7040 }, { "epoch": 1.71147301895965, "grad_norm": 1.8066550968039727, "learning_rate": 1.1185764005126632e-06, "loss": 0.7663992047309875, "step": 7041 }, { "epoch": 1.7117160913952358, "grad_norm": 1.4959535256059755, "learning_rate": 1.116729911209723e-06, "loss": 0.5952847003936768, "step": 7042 }, { "epoch": 1.7119591638308216, "grad_norm": 1.770953561285208, "learning_rate": 1.114884857077303e-06, "loss": 0.5025338530540466, "step": 7043 }, { "epoch": 1.7122022362664073, "grad_norm": 1.667945846216907, "learning_rate": 1.113041238413488e-06, "loss": 0.49013739824295044, "step": 7044 }, { "epoch": 1.712445308701993, "grad_norm": 1.6995393968289014, "learning_rate": 1.1111990555161322e-06, "loss": 0.6210087537765503, "step": 7045 }, { "epoch": 1.712688381137579, "grad_norm": 1.8772683289387837, "learning_rate": 1.1093583086828541e-06, "loss": 0.6244876384735107, "step": 7046 }, { "epoch": 1.7129314535731648, "grad_norm": 1.8833642303335147, "learning_rate": 1.1075189982110413e-06, "loss": 0.5329588055610657, "step": 7047 }, { "epoch": 1.7131745260087508, "grad_norm": 1.4291832330386098, "learning_rate": 1.105681124397854e-06, "loss": 0.5990760326385498, "step": 7048 }, { "epoch": 1.7134175984443365, "grad_norm": 2.0907301742894258, "learning_rate": 1.1038446875402132e-06, "loss": 0.6969426274299622, "step": 7049 }, { "epoch": 1.7136606708799222, "grad_norm": 2.049489141402783, "learning_rate": 1.1020096879348164e-06, "loss": 0.69802325963974, "step": 7050 }, { "epoch": 1.713903743315508, "grad_norm": 1.6873349677062666, "learning_rate": 1.1001761258781163e-06, "loss": 0.5412179231643677, "step": 7051 }, { "epoch": 1.7141468157510937, "grad_norm": 1.618413685871969, "learning_rate": 1.0983440016663494e-06, "loss": 0.42051851749420166, "step": 7052 }, { "epoch": 1.7143898881866795, "grad_norm": 1.6894459250536256, "learning_rate": 1.0965133155955066e-06, "loss": 0.5694934129714966, "step": 7053 }, { "epoch": 1.7146329606222654, "grad_norm": 1.7454589472581241, "learning_rate": 1.0946840679613536e-06, "loss": 0.561693549156189, "step": 7054 }, { "epoch": 1.7148760330578512, "grad_norm": 1.6151849955633606, "learning_rate": 1.0928562590594217e-06, "loss": 0.6643763780593872, "step": 7055 }, { "epoch": 1.7151191054934372, "grad_norm": 1.9816149347808965, "learning_rate": 1.0910298891850046e-06, "loss": 0.6967281699180603, "step": 7056 }, { "epoch": 1.715362177929023, "grad_norm": 1.555924183033048, "learning_rate": 1.0892049586331754e-06, "loss": 0.523347020149231, "step": 7057 }, { "epoch": 1.7156052503646086, "grad_norm": 1.7643116325134496, "learning_rate": 1.0873814676987625e-06, "loss": 0.6424588561058044, "step": 7058 }, { "epoch": 1.7158483228001944, "grad_norm": 1.652930555263912, "learning_rate": 1.0855594166763706e-06, "loss": 0.6308034658432007, "step": 7059 }, { "epoch": 1.7160913952357801, "grad_norm": 1.7022977173052574, "learning_rate": 1.0837388058603616e-06, "loss": 0.5960665941238403, "step": 7060 }, { "epoch": 1.716334467671366, "grad_norm": 1.4420781296886316, "learning_rate": 1.0819196355448801e-06, "loss": 0.4999016523361206, "step": 7061 }, { "epoch": 1.7165775401069518, "grad_norm": 1.7407133961411287, "learning_rate": 1.0801019060238238e-06, "loss": 0.5890176296234131, "step": 7062 }, { "epoch": 1.7168206125425378, "grad_norm": 1.579727364943343, "learning_rate": 1.0782856175908651e-06, "loss": 0.465996652841568, "step": 7063 }, { "epoch": 1.7170636849781236, "grad_norm": 1.4790256473071224, "learning_rate": 1.0764707705394395e-06, "loss": 0.57140052318573, "step": 7064 }, { "epoch": 1.7173067574137093, "grad_norm": 1.462143368220335, "learning_rate": 1.0746573651627534e-06, "loss": 0.5663843750953674, "step": 7065 }, { "epoch": 1.717549829849295, "grad_norm": 1.8121822682932225, "learning_rate": 1.0728454017537771e-06, "loss": 0.682969331741333, "step": 7066 }, { "epoch": 1.7177929022848808, "grad_norm": 1.6581333296998917, "learning_rate": 1.0710348806052483e-06, "loss": 0.5177004337310791, "step": 7067 }, { "epoch": 1.7180359747204665, "grad_norm": 1.6915894275942562, "learning_rate": 1.0692258020096736e-06, "loss": 0.7258341312408447, "step": 7068 }, { "epoch": 1.7182790471560525, "grad_norm": 2.086839695237079, "learning_rate": 1.0674181662593253e-06, "loss": 0.6007675528526306, "step": 7069 }, { "epoch": 1.7185221195916383, "grad_norm": 1.818617467665979, "learning_rate": 1.0656119736462456e-06, "loss": 0.6179118156433105, "step": 7070 }, { "epoch": 1.7187651920272242, "grad_norm": 1.7682139722247388, "learning_rate": 1.0638072244622367e-06, "loss": 0.5354841947555542, "step": 7071 }, { "epoch": 1.71900826446281, "grad_norm": 1.850060112253181, "learning_rate": 1.0620039189988763e-06, "loss": 0.49829432368278503, "step": 7072 }, { "epoch": 1.7192513368983957, "grad_norm": 1.8679833173325449, "learning_rate": 1.0602020575474992e-06, "loss": 0.7114384174346924, "step": 7073 }, { "epoch": 1.7194944093339815, "grad_norm": 1.5527481080806063, "learning_rate": 1.0584016403992159e-06, "loss": 0.518661379814148, "step": 7074 }, { "epoch": 1.7197374817695672, "grad_norm": 1.5625809038214824, "learning_rate": 1.0566026678448982e-06, "loss": 0.5743809342384338, "step": 7075 }, { "epoch": 1.7199805542051532, "grad_norm": 1.539543904506258, "learning_rate": 1.0548051401751824e-06, "loss": 0.42892998456954956, "step": 7076 }, { "epoch": 1.720223626640739, "grad_norm": 1.7191605393116745, "learning_rate": 1.053009057680483e-06, "loss": 0.540953516960144, "step": 7077 }, { "epoch": 1.7204666990763249, "grad_norm": 1.6882142819888641, "learning_rate": 1.0512144206509656e-06, "loss": 0.593490719795227, "step": 7078 }, { "epoch": 1.7207097715119106, "grad_norm": 1.7911564519783814, "learning_rate": 1.0494212293765749e-06, "loss": 0.5055816769599915, "step": 7079 }, { "epoch": 1.7209528439474964, "grad_norm": 1.3486395212491338, "learning_rate": 1.0476294841470114e-06, "loss": 0.5547653436660767, "step": 7080 }, { "epoch": 1.7211959163830821, "grad_norm": 1.6610028845931193, "learning_rate": 1.0458391852517523e-06, "loss": 0.5421024560928345, "step": 7081 }, { "epoch": 1.7214389888186679, "grad_norm": 1.4191406383055263, "learning_rate": 1.0440503329800323e-06, "loss": 0.5401644706726074, "step": 7082 }, { "epoch": 1.7216820612542536, "grad_norm": 1.540646373919477, "learning_rate": 1.0422629276208606e-06, "loss": 0.5638446807861328, "step": 7083 }, { "epoch": 1.7219251336898396, "grad_norm": 1.7140137988812914, "learning_rate": 1.0404769694630035e-06, "loss": 0.746790885925293, "step": 7084 }, { "epoch": 1.7221682061254255, "grad_norm": 1.49479348790079, "learning_rate": 1.0386924587949998e-06, "loss": 0.6844606399536133, "step": 7085 }, { "epoch": 1.7224112785610113, "grad_norm": 1.5633214693330206, "learning_rate": 1.0369093959051558e-06, "loss": 0.51324462890625, "step": 7086 }, { "epoch": 1.722654350996597, "grad_norm": 1.493798029364528, "learning_rate": 1.0351277810815364e-06, "loss": 0.4941144585609436, "step": 7087 }, { "epoch": 1.7228974234321828, "grad_norm": 1.7257647609529216, "learning_rate": 1.0333476146119814e-06, "loss": 0.5186027884483337, "step": 7088 }, { "epoch": 1.7231404958677685, "grad_norm": 1.4112928466460806, "learning_rate": 1.0315688967840898e-06, "loss": 0.45268940925598145, "step": 7089 }, { "epoch": 1.7233835683033543, "grad_norm": 1.5239100409924184, "learning_rate": 1.0297916278852304e-06, "loss": 0.4641229510307312, "step": 7090 }, { "epoch": 1.7236266407389402, "grad_norm": 1.653831520317405, "learning_rate": 1.0280158082025348e-06, "loss": 0.6302483081817627, "step": 7091 }, { "epoch": 1.723869713174526, "grad_norm": 2.0984163690606934, "learning_rate": 1.0262414380229035e-06, "loss": 0.7158781290054321, "step": 7092 }, { "epoch": 1.724112785610112, "grad_norm": 1.4657986970567705, "learning_rate": 1.0244685176330027e-06, "loss": 0.5358500480651855, "step": 7093 }, { "epoch": 1.7243558580456977, "grad_norm": 1.7431163104092617, "learning_rate": 1.022697047319261e-06, "loss": 0.6447398662567139, "step": 7094 }, { "epoch": 1.7245989304812834, "grad_norm": 1.7213816550198144, "learning_rate": 1.0209270273678784e-06, "loss": 0.40171656012535095, "step": 7095 }, { "epoch": 1.7248420029168692, "grad_norm": 1.5795908302515205, "learning_rate": 1.019158458064814e-06, "loss": 0.5895754098892212, "step": 7096 }, { "epoch": 1.725085075352455, "grad_norm": 1.5279459981078138, "learning_rate": 1.017391339695798e-06, "loss": 0.5296128988265991, "step": 7097 }, { "epoch": 1.7253281477880409, "grad_norm": 1.662634636449033, "learning_rate": 1.015625672546321e-06, "loss": 0.5732979774475098, "step": 7098 }, { "epoch": 1.7255712202236266, "grad_norm": 1.5415045396788971, "learning_rate": 1.013861456901646e-06, "loss": 0.5662792921066284, "step": 7099 }, { "epoch": 1.7258142926592126, "grad_norm": 1.879979655081414, "learning_rate": 1.0120986930467925e-06, "loss": 0.5464475154876709, "step": 7100 }, { "epoch": 1.7260573650947983, "grad_norm": 1.688481495926475, "learning_rate": 1.0103373812665552e-06, "loss": 0.5140405297279358, "step": 7101 }, { "epoch": 1.726300437530384, "grad_norm": 1.7121285200559666, "learning_rate": 1.0085775218454884e-06, "loss": 0.41442224383354187, "step": 7102 }, { "epoch": 1.7265435099659698, "grad_norm": 1.677799327123369, "learning_rate": 1.0068191150679107e-06, "loss": 0.6059253215789795, "step": 7103 }, { "epoch": 1.7267865824015556, "grad_norm": 2.0968827702460455, "learning_rate": 1.0050621612179123e-06, "loss": 0.6581311225891113, "step": 7104 }, { "epoch": 1.7270296548371413, "grad_norm": 1.904878435454076, "learning_rate": 1.0033066605793396e-06, "loss": 0.605432391166687, "step": 7105 }, { "epoch": 1.7272727272727273, "grad_norm": 1.6135585868470823, "learning_rate": 1.0015526134358133e-06, "loss": 0.6220988631248474, "step": 7106 }, { "epoch": 1.727515799708313, "grad_norm": 1.708394510420748, "learning_rate": 9.998000200707124e-07, "loss": 0.6674602031707764, "step": 7107 }, { "epoch": 1.727758872143899, "grad_norm": 1.78422755503454, "learning_rate": 9.980488807671873e-07, "loss": 0.5683202743530273, "step": 7108 }, { "epoch": 1.7280019445794847, "grad_norm": 1.6857789604231885, "learning_rate": 9.962991958081446e-07, "loss": 0.5900683403015137, "step": 7109 }, { "epoch": 1.7282450170150705, "grad_norm": 1.4997213129107438, "learning_rate": 9.945509654762653e-07, "loss": 0.6257412433624268, "step": 7110 }, { "epoch": 1.7284880894506562, "grad_norm": 1.6268126804458556, "learning_rate": 9.92804190053991e-07, "loss": 0.5178164839744568, "step": 7111 }, { "epoch": 1.728731161886242, "grad_norm": 1.7995441488037132, "learning_rate": 9.910588698235268e-07, "loss": 0.5541508197784424, "step": 7112 }, { "epoch": 1.728974234321828, "grad_norm": 1.6413026193741387, "learning_rate": 9.893150050668488e-07, "loss": 0.5980440974235535, "step": 7113 }, { "epoch": 1.7292173067574137, "grad_norm": 1.8725261201221048, "learning_rate": 9.875725960656878e-07, "loss": 0.6698215007781982, "step": 7114 }, { "epoch": 1.7294603791929997, "grad_norm": 1.5539712947898656, "learning_rate": 9.858316431015502e-07, "loss": 0.6907895803451538, "step": 7115 }, { "epoch": 1.7297034516285854, "grad_norm": 1.49029092076946, "learning_rate": 9.840921464556986e-07, "loss": 0.45723748207092285, "step": 7116 }, { "epoch": 1.7299465240641712, "grad_norm": 1.758857070036727, "learning_rate": 9.823541064091657e-07, "loss": 0.5686448812484741, "step": 7117 }, { "epoch": 1.730189596499757, "grad_norm": 1.720451426508153, "learning_rate": 9.806175232427496e-07, "loss": 0.5706985592842102, "step": 7118 }, { "epoch": 1.7304326689353426, "grad_norm": 1.5482880638951093, "learning_rate": 9.788823972370054e-07, "loss": 0.6068903207778931, "step": 7119 }, { "epoch": 1.7306757413709284, "grad_norm": 1.7757783306920156, "learning_rate": 9.771487286722637e-07, "loss": 0.5139985680580139, "step": 7120 }, { "epoch": 1.7309188138065144, "grad_norm": 1.7103353266078145, "learning_rate": 9.75416517828609e-07, "loss": 0.6612091660499573, "step": 7121 }, { "epoch": 1.7311618862421, "grad_norm": 1.8384074515205089, "learning_rate": 9.736857649858988e-07, "loss": 0.6577581167221069, "step": 7122 }, { "epoch": 1.731404958677686, "grad_norm": 1.4771713973968061, "learning_rate": 9.719564704237484e-07, "loss": 0.5059847235679626, "step": 7123 }, { "epoch": 1.7316480311132718, "grad_norm": 1.7452659240348258, "learning_rate": 9.702286344215439e-07, "loss": 0.828927755355835, "step": 7124 }, { "epoch": 1.7318911035488576, "grad_norm": 1.4904524038212577, "learning_rate": 9.68502257258428e-07, "loss": 0.5666127800941467, "step": 7125 }, { "epoch": 1.7321341759844433, "grad_norm": 1.4611468762990958, "learning_rate": 9.667773392133162e-07, "loss": 0.5733182430267334, "step": 7126 }, { "epoch": 1.732377248420029, "grad_norm": 1.5714545582609747, "learning_rate": 9.650538805648834e-07, "loss": 0.6534433364868164, "step": 7127 }, { "epoch": 1.732620320855615, "grad_norm": 1.4094925646300474, "learning_rate": 9.63331881591567e-07, "loss": 0.5860487222671509, "step": 7128 }, { "epoch": 1.7328633932912008, "grad_norm": 1.4680391879083983, "learning_rate": 9.616113425715756e-07, "loss": 0.47984662652015686, "step": 7129 }, { "epoch": 1.7331064657267867, "grad_norm": 1.5975482774398133, "learning_rate": 9.598922637828733e-07, "loss": 0.5206437706947327, "step": 7130 }, { "epoch": 1.7333495381623725, "grad_norm": 1.8047593619418452, "learning_rate": 9.581746455031948e-07, "loss": 0.5578868389129639, "step": 7131 }, { "epoch": 1.7335926105979582, "grad_norm": 1.638517029383283, "learning_rate": 9.564584880100346e-07, "loss": 0.524941623210907, "step": 7132 }, { "epoch": 1.733835683033544, "grad_norm": 1.8372512695712002, "learning_rate": 9.547437915806534e-07, "loss": 0.5750945806503296, "step": 7133 }, { "epoch": 1.7340787554691297, "grad_norm": 1.69320674729775, "learning_rate": 9.530305564920761e-07, "loss": 0.5331832766532898, "step": 7134 }, { "epoch": 1.7343218279047155, "grad_norm": 1.8478467759323565, "learning_rate": 9.513187830210935e-07, "loss": 0.5804538726806641, "step": 7135 }, { "epoch": 1.7345649003403014, "grad_norm": 1.4560630404308414, "learning_rate": 9.496084714442555e-07, "loss": 0.4709426164627075, "step": 7136 }, { "epoch": 1.7348079727758872, "grad_norm": 1.5002473559189193, "learning_rate": 9.478996220378744e-07, "loss": 0.42629408836364746, "step": 7137 }, { "epoch": 1.7350510452114731, "grad_norm": 1.5030348582447919, "learning_rate": 9.461922350780361e-07, "loss": 0.5962677001953125, "step": 7138 }, { "epoch": 1.7352941176470589, "grad_norm": 1.5538511382514553, "learning_rate": 9.444863108405789e-07, "loss": 0.7033505439758301, "step": 7139 }, { "epoch": 1.7355371900826446, "grad_norm": 1.6258692191062567, "learning_rate": 9.427818496011131e-07, "loss": 0.49157899618148804, "step": 7140 }, { "epoch": 1.7357802625182304, "grad_norm": 1.6273656607815417, "learning_rate": 9.410788516350078e-07, "loss": 0.7404162883758545, "step": 7141 }, { "epoch": 1.736023334953816, "grad_norm": 1.7625029224174833, "learning_rate": 9.39377317217397e-07, "loss": 0.4742138087749481, "step": 7142 }, { "epoch": 1.736266407389402, "grad_norm": 1.9653972722052389, "learning_rate": 9.376772466231798e-07, "loss": 0.6652418375015259, "step": 7143 }, { "epoch": 1.7365094798249878, "grad_norm": 2.0037080482113367, "learning_rate": 9.359786401270188e-07, "loss": 0.5516102313995361, "step": 7144 }, { "epoch": 1.7367525522605738, "grad_norm": 1.7096951558321376, "learning_rate": 9.342814980033376e-07, "loss": 0.49715667963027954, "step": 7145 }, { "epoch": 1.7369956246961595, "grad_norm": 1.5692392108542677, "learning_rate": 9.325858205263228e-07, "loss": 0.5047621130943298, "step": 7146 }, { "epoch": 1.7372386971317453, "grad_norm": 1.8112078480242104, "learning_rate": 9.308916079699282e-07, "loss": 0.6366746425628662, "step": 7147 }, { "epoch": 1.737481769567331, "grad_norm": 1.7085043544355074, "learning_rate": 9.291988606078661e-07, "loss": 0.6682502627372742, "step": 7148 }, { "epoch": 1.7377248420029168, "grad_norm": 1.5979339541839688, "learning_rate": 9.27507578713619e-07, "loss": 0.5354546308517456, "step": 7149 }, { "epoch": 1.7379679144385025, "grad_norm": 1.866195062707187, "learning_rate": 9.258177625604226e-07, "loss": 0.52567058801651, "step": 7150 }, { "epoch": 1.7382109868740885, "grad_norm": 1.9234966802076152, "learning_rate": 9.241294124212874e-07, "loss": 0.6524764895439148, "step": 7151 }, { "epoch": 1.7384540593096744, "grad_norm": 1.814680326797024, "learning_rate": 9.224425285689775e-07, "loss": 0.6778207421302795, "step": 7152 }, { "epoch": 1.7386971317452602, "grad_norm": 1.4454509218684872, "learning_rate": 9.207571112760272e-07, "loss": 0.524504542350769, "step": 7153 }, { "epoch": 1.738940204180846, "grad_norm": 1.8017404178823413, "learning_rate": 9.190731608147274e-07, "loss": 0.6399134993553162, "step": 7154 }, { "epoch": 1.7391832766164317, "grad_norm": 1.4755741879405693, "learning_rate": 9.173906774571351e-07, "loss": 0.5932737588882446, "step": 7155 }, { "epoch": 1.7394263490520174, "grad_norm": 1.5945074390409841, "learning_rate": 9.157096614750738e-07, "loss": 0.5863087177276611, "step": 7156 }, { "epoch": 1.7396694214876032, "grad_norm": 1.5046431014304538, "learning_rate": 9.140301131401208e-07, "loss": 0.45507481694221497, "step": 7157 }, { "epoch": 1.7399124939231891, "grad_norm": 1.8846608160524716, "learning_rate": 9.123520327236258e-07, "loss": 0.5521073341369629, "step": 7158 }, { "epoch": 1.7401555663587749, "grad_norm": 1.2272253166285183, "learning_rate": 9.106754204966972e-07, "loss": 0.4713262915611267, "step": 7159 }, { "epoch": 1.7403986387943609, "grad_norm": 1.7659028770728176, "learning_rate": 9.090002767302064e-07, "loss": 0.5143114328384399, "step": 7160 }, { "epoch": 1.7406417112299466, "grad_norm": 1.5353432319395974, "learning_rate": 9.073266016947868e-07, "loss": 0.49434608221054077, "step": 7161 }, { "epoch": 1.7408847836655323, "grad_norm": 1.687092658639806, "learning_rate": 9.056543956608377e-07, "loss": 0.5073480606079102, "step": 7162 }, { "epoch": 1.741127856101118, "grad_norm": 1.7658017112780586, "learning_rate": 9.039836588985162e-07, "loss": 0.6743518710136414, "step": 7163 }, { "epoch": 1.7413709285367038, "grad_norm": 1.519826262255739, "learning_rate": 9.023143916777433e-07, "loss": 0.45106616616249084, "step": 7164 }, { "epoch": 1.7416140009722896, "grad_norm": 1.6611109911164734, "learning_rate": 9.006465942682074e-07, "loss": 0.5563609600067139, "step": 7165 }, { "epoch": 1.7418570734078755, "grad_norm": 1.7507661140950277, "learning_rate": 8.98980266939351e-07, "loss": 0.6371118426322937, "step": 7166 }, { "epoch": 1.7421001458434615, "grad_norm": 1.5995980088418382, "learning_rate": 8.973154099603909e-07, "loss": 0.5661859512329102, "step": 7167 }, { "epoch": 1.7423432182790473, "grad_norm": 1.8470026149351777, "learning_rate": 8.95652023600293e-07, "loss": 0.6327462792396545, "step": 7168 }, { "epoch": 1.742586290714633, "grad_norm": 1.4947977053936705, "learning_rate": 8.939901081277969e-07, "loss": 0.36265504360198975, "step": 7169 }, { "epoch": 1.7428293631502187, "grad_norm": 1.582391250346749, "learning_rate": 8.923296638113955e-07, "loss": 0.6286135315895081, "step": 7170 }, { "epoch": 1.7430724355858045, "grad_norm": 1.8034740161189984, "learning_rate": 8.906706909193519e-07, "loss": 0.5623425245285034, "step": 7171 }, { "epoch": 1.7433155080213902, "grad_norm": 1.6174805798474357, "learning_rate": 8.890131897196852e-07, "loss": 0.5112342834472656, "step": 7172 }, { "epoch": 1.7435585804569762, "grad_norm": 1.4036173594841803, "learning_rate": 8.873571604801812e-07, "loss": 0.5407052636146545, "step": 7173 }, { "epoch": 1.743801652892562, "grad_norm": 1.6774058014990731, "learning_rate": 8.857026034683846e-07, "loss": 0.48834264278411865, "step": 7174 }, { "epoch": 1.744044725328148, "grad_norm": 2.0080117068307897, "learning_rate": 8.840495189515997e-07, "loss": 0.5172756314277649, "step": 7175 }, { "epoch": 1.7442877977637337, "grad_norm": 1.6485946334114454, "learning_rate": 8.823979071969058e-07, "loss": 0.5835664868354797, "step": 7176 }, { "epoch": 1.7445308701993194, "grad_norm": 1.81146429921415, "learning_rate": 8.807477684711286e-07, "loss": 0.5465087890625, "step": 7177 }, { "epoch": 1.7447739426349052, "grad_norm": 1.4865497594697528, "learning_rate": 8.790991030408669e-07, "loss": 0.5359375476837158, "step": 7178 }, { "epoch": 1.745017015070491, "grad_norm": 1.9658340190946173, "learning_rate": 8.774519111724722e-07, "loss": 0.45092013478279114, "step": 7179 }, { "epoch": 1.7452600875060769, "grad_norm": 1.7103073912887687, "learning_rate": 8.758061931320671e-07, "loss": 0.5632704496383667, "step": 7180 }, { "epoch": 1.7455031599416626, "grad_norm": 1.7603152238823596, "learning_rate": 8.741619491855291e-07, "loss": 0.5449767708778381, "step": 7181 }, { "epoch": 1.7457462323772486, "grad_norm": 1.763104436326813, "learning_rate": 8.725191795985033e-07, "loss": 0.6463754773139954, "step": 7182 }, { "epoch": 1.7459893048128343, "grad_norm": 1.731350858161757, "learning_rate": 8.708778846363896e-07, "loss": 0.44208401441574097, "step": 7183 }, { "epoch": 1.74623237724842, "grad_norm": 1.4311525729792995, "learning_rate": 8.692380645643561e-07, "loss": 0.4559135138988495, "step": 7184 }, { "epoch": 1.7464754496840058, "grad_norm": 1.6883753237828354, "learning_rate": 8.675997196473307e-07, "loss": 0.5405670404434204, "step": 7185 }, { "epoch": 1.7467185221195916, "grad_norm": 1.7650448770106937, "learning_rate": 8.659628501500006e-07, "loss": 0.5504032373428345, "step": 7186 }, { "epoch": 1.7469615945551773, "grad_norm": 1.6332527581481375, "learning_rate": 8.643274563368198e-07, "loss": 0.6874337792396545, "step": 7187 }, { "epoch": 1.7472046669907633, "grad_norm": 1.7917312762181627, "learning_rate": 8.626935384719948e-07, "loss": 0.5800569653511047, "step": 7188 }, { "epoch": 1.747447739426349, "grad_norm": 1.4616324151015132, "learning_rate": 8.610610968195065e-07, "loss": 0.5133660435676575, "step": 7189 }, { "epoch": 1.747690811861935, "grad_norm": 1.5680245716082883, "learning_rate": 8.594301316430831e-07, "loss": 0.475870281457901, "step": 7190 }, { "epoch": 1.7479338842975207, "grad_norm": 1.8229847486899822, "learning_rate": 8.578006432062258e-07, "loss": 0.6189819574356079, "step": 7191 }, { "epoch": 1.7481769567331065, "grad_norm": 1.954445390947735, "learning_rate": 8.561726317721941e-07, "loss": 0.5414847135543823, "step": 7192 }, { "epoch": 1.7484200291686922, "grad_norm": 1.7959492125211942, "learning_rate": 8.545460976040032e-07, "loss": 0.4211514890193939, "step": 7193 }, { "epoch": 1.748663101604278, "grad_norm": 1.5820410049037041, "learning_rate": 8.529210409644373e-07, "loss": 0.6498831510543823, "step": 7194 }, { "epoch": 1.748906174039864, "grad_norm": 1.8771253663744814, "learning_rate": 8.512974621160363e-07, "loss": 0.6424624919891357, "step": 7195 }, { "epoch": 1.7491492464754497, "grad_norm": 3.1758238594471653, "learning_rate": 8.496753613211073e-07, "loss": 0.5464578866958618, "step": 7196 }, { "epoch": 1.7493923189110356, "grad_norm": 1.4659228930580734, "learning_rate": 8.480547388417104e-07, "loss": 0.5341691374778748, "step": 7197 }, { "epoch": 1.7496353913466214, "grad_norm": 1.611057848209165, "learning_rate": 8.464355949396741e-07, "loss": 0.6062131524085999, "step": 7198 }, { "epoch": 1.7498784637822071, "grad_norm": 1.6605356472801984, "learning_rate": 8.448179298765846e-07, "loss": 0.604424238204956, "step": 7199 }, { "epoch": 1.7501215362177929, "grad_norm": 1.8471668069929963, "learning_rate": 8.432017439137896e-07, "loss": 0.5450854897499084, "step": 7200 }, { "epoch": 1.7503646086533786, "grad_norm": 1.8801048242933378, "learning_rate": 8.415870373124003e-07, "loss": 0.5727156400680542, "step": 7201 }, { "epoch": 1.7506076810889644, "grad_norm": 1.7631500388544035, "learning_rate": 8.399738103332833e-07, "loss": 0.6321247220039368, "step": 7202 }, { "epoch": 1.7508507535245503, "grad_norm": 1.6566698787483334, "learning_rate": 8.383620632370748e-07, "loss": 0.5835232734680176, "step": 7203 }, { "epoch": 1.751093825960136, "grad_norm": 1.648568927361829, "learning_rate": 8.367517962841598e-07, "loss": 0.5092148780822754, "step": 7204 }, { "epoch": 1.751336898395722, "grad_norm": 1.5892756524139227, "learning_rate": 8.35143009734698e-07, "loss": 0.41295644640922546, "step": 7205 }, { "epoch": 1.7515799708313078, "grad_norm": 1.4748500671258085, "learning_rate": 8.33535703848597e-07, "loss": 0.56064373254776, "step": 7206 }, { "epoch": 1.7518230432668935, "grad_norm": 1.9056967022790605, "learning_rate": 8.319298788855346e-07, "loss": 0.560275673866272, "step": 7207 }, { "epoch": 1.7520661157024793, "grad_norm": 1.6088669599753642, "learning_rate": 8.303255351049466e-07, "loss": 0.4395606517791748, "step": 7208 }, { "epoch": 1.752309188138065, "grad_norm": 1.9016043082292595, "learning_rate": 8.287226727660269e-07, "loss": 0.458577036857605, "step": 7209 }, { "epoch": 1.752552260573651, "grad_norm": 1.59052895047287, "learning_rate": 8.271212921277339e-07, "loss": 0.4112289845943451, "step": 7210 }, { "epoch": 1.7527953330092367, "grad_norm": 1.7145824164390255, "learning_rate": 8.255213934487827e-07, "loss": 0.6126878261566162, "step": 7211 }, { "epoch": 1.7530384054448227, "grad_norm": 1.7950697196312495, "learning_rate": 8.239229769876523e-07, "loss": 0.5741666555404663, "step": 7212 }, { "epoch": 1.7532814778804084, "grad_norm": 1.6497682906407152, "learning_rate": 8.223260430025804e-07, "loss": 0.5643469095230103, "step": 7213 }, { "epoch": 1.7535245503159942, "grad_norm": 1.7223211989046785, "learning_rate": 8.207305917515662e-07, "loss": 0.6091288924217224, "step": 7214 }, { "epoch": 1.75376762275158, "grad_norm": 1.5334247570759465, "learning_rate": 8.191366234923681e-07, "loss": 0.5090304017066956, "step": 7215 }, { "epoch": 1.7540106951871657, "grad_norm": 1.7684014650761053, "learning_rate": 8.175441384825067e-07, "loss": 0.4784495234489441, "step": 7216 }, { "epoch": 1.7542537676227514, "grad_norm": 1.6340224207245748, "learning_rate": 8.159531369792617e-07, "loss": 0.6834518909454346, "step": 7217 }, { "epoch": 1.7544968400583374, "grad_norm": 1.616006400173914, "learning_rate": 8.143636192396731e-07, "loss": 0.5588961839675903, "step": 7218 }, { "epoch": 1.7547399124939231, "grad_norm": 1.6985970112086277, "learning_rate": 8.127755855205421e-07, "loss": 0.6047674417495728, "step": 7219 }, { "epoch": 1.754982984929509, "grad_norm": 1.6568802585781823, "learning_rate": 8.11189036078428e-07, "loss": 0.5158240795135498, "step": 7220 }, { "epoch": 1.7552260573650948, "grad_norm": 1.503027489700795, "learning_rate": 8.096039711696546e-07, "loss": 0.5362980961799622, "step": 7221 }, { "epoch": 1.7554691298006806, "grad_norm": 1.9378612991955122, "learning_rate": 8.080203910503004e-07, "loss": 0.5471987724304199, "step": 7222 }, { "epoch": 1.7557122022362663, "grad_norm": 1.531742265367791, "learning_rate": 8.064382959762085e-07, "loss": 0.4829220175743103, "step": 7223 }, { "epoch": 1.755955274671852, "grad_norm": 1.7846706729437187, "learning_rate": 8.048576862029789e-07, "loss": 0.5967848300933838, "step": 7224 }, { "epoch": 1.756198347107438, "grad_norm": 1.8014946034864399, "learning_rate": 8.032785619859729e-07, "loss": 0.6688293218612671, "step": 7225 }, { "epoch": 1.7564414195430238, "grad_norm": 1.949516375378217, "learning_rate": 8.017009235803142e-07, "loss": 0.5423623323440552, "step": 7226 }, { "epoch": 1.7566844919786098, "grad_norm": 1.5834300751227255, "learning_rate": 8.001247712408811e-07, "loss": 0.5529879331588745, "step": 7227 }, { "epoch": 1.7569275644141955, "grad_norm": 1.572704694854394, "learning_rate": 7.985501052223187e-07, "loss": 0.6023896336555481, "step": 7228 }, { "epoch": 1.7571706368497813, "grad_norm": 1.6855660000714217, "learning_rate": 7.969769257790238e-07, "loss": 0.6863710880279541, "step": 7229 }, { "epoch": 1.757413709285367, "grad_norm": 1.8731846434879127, "learning_rate": 7.954052331651619e-07, "loss": 0.5379829406738281, "step": 7230 }, { "epoch": 1.7576567817209527, "grad_norm": 1.455536111650245, "learning_rate": 7.938350276346485e-07, "loss": 0.5111559629440308, "step": 7231 }, { "epoch": 1.7578998541565385, "grad_norm": 1.4107323855197738, "learning_rate": 7.922663094411676e-07, "loss": 0.5473719835281372, "step": 7232 }, { "epoch": 1.7581429265921245, "grad_norm": 1.7125131725792626, "learning_rate": 7.906990788381608e-07, "loss": 0.49573397636413574, "step": 7233 }, { "epoch": 1.7583859990277104, "grad_norm": 2.016888385942969, "learning_rate": 7.891333360788245e-07, "loss": 0.5781996250152588, "step": 7234 }, { "epoch": 1.7586290714632962, "grad_norm": 1.7933387467945572, "learning_rate": 7.875690814161208e-07, "loss": 0.6696301698684692, "step": 7235 }, { "epoch": 1.758872143898882, "grad_norm": 1.6116833911529285, "learning_rate": 7.860063151027674e-07, "loss": 0.6194499731063843, "step": 7236 }, { "epoch": 1.7591152163344677, "grad_norm": 1.7932260908075726, "learning_rate": 7.844450373912437e-07, "loss": 0.6350524425506592, "step": 7237 }, { "epoch": 1.7593582887700534, "grad_norm": 1.7861421857195108, "learning_rate": 7.828852485337867e-07, "loss": 0.5818303823471069, "step": 7238 }, { "epoch": 1.7596013612056391, "grad_norm": 1.506168012062453, "learning_rate": 7.81326948782396e-07, "loss": 0.5730060338973999, "step": 7239 }, { "epoch": 1.7598444336412251, "grad_norm": 1.7461551736361438, "learning_rate": 7.797701383888246e-07, "loss": 0.4858086407184601, "step": 7240 }, { "epoch": 1.7600875060768109, "grad_norm": 1.9008672759221767, "learning_rate": 7.782148176045956e-07, "loss": 0.621117353439331, "step": 7241 }, { "epoch": 1.7603305785123968, "grad_norm": 1.6194587193422862, "learning_rate": 7.766609866809793e-07, "loss": 0.487629771232605, "step": 7242 }, { "epoch": 1.7605736509479826, "grad_norm": 1.7312404572028264, "learning_rate": 7.751086458690138e-07, "loss": 0.5851833820343018, "step": 7243 }, { "epoch": 1.7608167233835683, "grad_norm": 1.6642618023382745, "learning_rate": 7.735577954194917e-07, "loss": 0.49739474058151245, "step": 7244 }, { "epoch": 1.761059795819154, "grad_norm": 1.572357838281862, "learning_rate": 7.720084355829661e-07, "loss": 0.49908167123794556, "step": 7245 }, { "epoch": 1.7613028682547398, "grad_norm": 1.570682273747554, "learning_rate": 7.70460566609752e-07, "loss": 0.6217117309570312, "step": 7246 }, { "epoch": 1.7615459406903256, "grad_norm": 1.6480731627200387, "learning_rate": 7.689141887499186e-07, "loss": 0.5697247982025146, "step": 7247 }, { "epoch": 1.7617890131259115, "grad_norm": 1.600986974938389, "learning_rate": 7.67369302253298e-07, "loss": 0.5079313516616821, "step": 7248 }, { "epoch": 1.7620320855614975, "grad_norm": 1.5035587191067172, "learning_rate": 7.658259073694796e-07, "loss": 0.5017232894897461, "step": 7249 }, { "epoch": 1.7622751579970832, "grad_norm": 1.429703061654759, "learning_rate": 7.642840043478161e-07, "loss": 0.4431387782096863, "step": 7250 }, { "epoch": 1.762518230432669, "grad_norm": 1.6903177664477702, "learning_rate": 7.627435934374105e-07, "loss": 0.561010479927063, "step": 7251 }, { "epoch": 1.7627613028682547, "grad_norm": 1.6143370736014788, "learning_rate": 7.612046748871327e-07, "loss": 0.5936762094497681, "step": 7252 }, { "epoch": 1.7630043753038405, "grad_norm": 1.6197054379551177, "learning_rate": 7.596672489456092e-07, "loss": 0.5573071241378784, "step": 7253 }, { "epoch": 1.7632474477394262, "grad_norm": 1.824599571582169, "learning_rate": 7.581313158612202e-07, "loss": 0.47226476669311523, "step": 7254 }, { "epoch": 1.7634905201750122, "grad_norm": 1.8479812114004455, "learning_rate": 7.565968758821141e-07, "loss": 0.6798070073127747, "step": 7255 }, { "epoch": 1.763733592610598, "grad_norm": 1.50289232870412, "learning_rate": 7.550639292561901e-07, "loss": 0.49947071075439453, "step": 7256 }, { "epoch": 1.763976665046184, "grad_norm": 1.4895435576613587, "learning_rate": 7.535324762311102e-07, "loss": 0.5844362378120422, "step": 7257 }, { "epoch": 1.7642197374817696, "grad_norm": 1.607410992846171, "learning_rate": 7.520025170542944e-07, "loss": 0.5754332542419434, "step": 7258 }, { "epoch": 1.7644628099173554, "grad_norm": 1.8286726274796499, "learning_rate": 7.504740519729237e-07, "loss": 0.5812749266624451, "step": 7259 }, { "epoch": 1.7647058823529411, "grad_norm": 1.6783466353626633, "learning_rate": 7.489470812339295e-07, "loss": 0.5783290863037109, "step": 7260 }, { "epoch": 1.7649489547885269, "grad_norm": 1.6685455377812808, "learning_rate": 7.474216050840122e-07, "loss": 0.574560284614563, "step": 7261 }, { "epoch": 1.7651920272241128, "grad_norm": 1.6380564393303965, "learning_rate": 7.458976237696247e-07, "loss": 0.579317033290863, "step": 7262 }, { "epoch": 1.7654350996596986, "grad_norm": 1.4841085335185191, "learning_rate": 7.443751375369768e-07, "loss": 0.5336765050888062, "step": 7263 }, { "epoch": 1.7656781720952845, "grad_norm": 1.6162517995145624, "learning_rate": 7.428541466320438e-07, "loss": 0.663861095905304, "step": 7264 }, { "epoch": 1.7659212445308703, "grad_norm": 1.650279799453657, "learning_rate": 7.413346513005492e-07, "loss": 0.46345382928848267, "step": 7265 }, { "epoch": 1.766164316966456, "grad_norm": 1.7686819971526577, "learning_rate": 7.398166517879879e-07, "loss": 0.6105637550354004, "step": 7266 }, { "epoch": 1.7664073894020418, "grad_norm": 1.5145922695759695, "learning_rate": 7.383001483396013e-07, "loss": 0.6025274991989136, "step": 7267 }, { "epoch": 1.7666504618376275, "grad_norm": 1.8832239145980878, "learning_rate": 7.367851412003979e-07, "loss": 0.5211626887321472, "step": 7268 }, { "epoch": 1.7668935342732133, "grad_norm": 1.621246464939436, "learning_rate": 7.352716306151353e-07, "loss": 0.4727970063686371, "step": 7269 }, { "epoch": 1.7671366067087992, "grad_norm": 1.8775287236312397, "learning_rate": 7.337596168283401e-07, "loss": 0.471781849861145, "step": 7270 }, { "epoch": 1.767379679144385, "grad_norm": 1.4787838967755211, "learning_rate": 7.322491000842858e-07, "loss": 0.6051086187362671, "step": 7271 }, { "epoch": 1.767622751579971, "grad_norm": 1.7760899555827152, "learning_rate": 7.307400806270148e-07, "loss": 0.6179298758506775, "step": 7272 }, { "epoch": 1.7678658240155567, "grad_norm": 1.740224882382948, "learning_rate": 7.292325587003179e-07, "loss": 0.5328035354614258, "step": 7273 }, { "epoch": 1.7681088964511424, "grad_norm": 1.5694431572668226, "learning_rate": 7.277265345477514e-07, "loss": 0.5680062770843506, "step": 7274 }, { "epoch": 1.7683519688867282, "grad_norm": 1.7950885599599584, "learning_rate": 7.262220084126281e-07, "loss": 0.6064876317977905, "step": 7275 }, { "epoch": 1.768595041322314, "grad_norm": 1.7035555114129637, "learning_rate": 7.247189805380128e-07, "loss": 0.4603910744190216, "step": 7276 }, { "epoch": 1.7688381137579, "grad_norm": 1.5460271406729547, "learning_rate": 7.232174511667378e-07, "loss": 0.4664996862411499, "step": 7277 }, { "epoch": 1.7690811861934856, "grad_norm": 1.663779206414999, "learning_rate": 7.217174205413846e-07, "loss": 0.38883382081985474, "step": 7278 }, { "epoch": 1.7693242586290716, "grad_norm": 1.6645520782230319, "learning_rate": 7.202188889042994e-07, "loss": 0.5264104008674622, "step": 7279 }, { "epoch": 1.7695673310646574, "grad_norm": 1.4729536148130347, "learning_rate": 7.187218564975795e-07, "loss": 0.7445130348205566, "step": 7280 }, { "epoch": 1.769810403500243, "grad_norm": 1.8012032337690709, "learning_rate": 7.172263235630871e-07, "loss": 0.49378088116645813, "step": 7281 }, { "epoch": 1.7700534759358288, "grad_norm": 1.4542387727218073, "learning_rate": 7.15732290342438e-07, "loss": 0.5658531188964844, "step": 7282 }, { "epoch": 1.7702965483714146, "grad_norm": 1.8319984332662829, "learning_rate": 7.142397570770054e-07, "loss": 0.5283742547035217, "step": 7283 }, { "epoch": 1.7705396208070003, "grad_norm": 1.6642716665687445, "learning_rate": 7.127487240079223e-07, "loss": 0.6320713758468628, "step": 7284 }, { "epoch": 1.7707826932425863, "grad_norm": 1.9495866282525407, "learning_rate": 7.112591913760769e-07, "loss": 0.48735564947128296, "step": 7285 }, { "epoch": 1.771025765678172, "grad_norm": 1.6070659132870961, "learning_rate": 7.09771159422118e-07, "loss": 0.6251550912857056, "step": 7286 }, { "epoch": 1.771268838113758, "grad_norm": 1.6420740193944776, "learning_rate": 7.082846283864476e-07, "loss": 0.5542525053024292, "step": 7287 }, { "epoch": 1.7715119105493438, "grad_norm": 1.7691621210328425, "learning_rate": 7.067995985092313e-07, "loss": 0.6286768913269043, "step": 7288 }, { "epoch": 1.7717549829849295, "grad_norm": 1.3220565074511283, "learning_rate": 7.053160700303863e-07, "loss": 0.5013065934181213, "step": 7289 }, { "epoch": 1.7719980554205153, "grad_norm": 1.822320148597228, "learning_rate": 7.038340431895896e-07, "loss": 0.6323823928833008, "step": 7290 }, { "epoch": 1.772241127856101, "grad_norm": 1.7797616494709692, "learning_rate": 7.023535182262786e-07, "loss": 0.7350116968154907, "step": 7291 }, { "epoch": 1.772484200291687, "grad_norm": 1.5641180572674267, "learning_rate": 7.00874495379641e-07, "loss": 0.6540883779525757, "step": 7292 }, { "epoch": 1.7727272727272727, "grad_norm": 1.5588091091255682, "learning_rate": 6.993969748886297e-07, "loss": 0.5593808889389038, "step": 7293 }, { "epoch": 1.7729703451628587, "grad_norm": 1.9662273684735734, "learning_rate": 6.979209569919488e-07, "loss": 0.6296613812446594, "step": 7294 }, { "epoch": 1.7732134175984444, "grad_norm": 1.434133948516556, "learning_rate": 6.964464419280636e-07, "loss": 0.3858981728553772, "step": 7295 }, { "epoch": 1.7734564900340302, "grad_norm": 1.482411779801419, "learning_rate": 6.949734299351918e-07, "loss": 0.5985245108604431, "step": 7296 }, { "epoch": 1.773699562469616, "grad_norm": 1.822273879486442, "learning_rate": 6.935019212513161e-07, "loss": 0.5091167092323303, "step": 7297 }, { "epoch": 1.7739426349052017, "grad_norm": 1.6279801551094522, "learning_rate": 6.920319161141675e-07, "loss": 0.4312141537666321, "step": 7298 }, { "epoch": 1.7741857073407874, "grad_norm": 1.7947829092051997, "learning_rate": 6.905634147612405e-07, "loss": 0.5507571697235107, "step": 7299 }, { "epoch": 1.7744287797763734, "grad_norm": 1.5488227010180946, "learning_rate": 6.890964174297854e-07, "loss": 0.5886832475662231, "step": 7300 }, { "epoch": 1.7746718522119591, "grad_norm": 1.7214285011696784, "learning_rate": 6.876309243568058e-07, "loss": 0.5852469801902771, "step": 7301 }, { "epoch": 1.774914924647545, "grad_norm": 1.739528571817681, "learning_rate": 6.861669357790679e-07, "loss": 0.6761884689331055, "step": 7302 }, { "epoch": 1.7751579970831308, "grad_norm": 1.50791239156401, "learning_rate": 6.847044519330892e-07, "loss": 0.6102312803268433, "step": 7303 }, { "epoch": 1.7754010695187166, "grad_norm": 1.7194913813063046, "learning_rate": 6.832434730551485e-07, "loss": 0.6358771324157715, "step": 7304 }, { "epoch": 1.7756441419543023, "grad_norm": 1.6774287266292567, "learning_rate": 6.817839993812791e-07, "loss": 0.49279600381851196, "step": 7305 }, { "epoch": 1.775887214389888, "grad_norm": 1.9572666618479757, "learning_rate": 6.803260311472715e-07, "loss": 0.6995576024055481, "step": 7306 }, { "epoch": 1.776130286825474, "grad_norm": 1.5590650765471994, "learning_rate": 6.788695685886759e-07, "loss": 0.3981032073497772, "step": 7307 }, { "epoch": 1.7763733592610598, "grad_norm": 1.9889606701203717, "learning_rate": 6.77414611940792e-07, "loss": 0.5354713201522827, "step": 7308 }, { "epoch": 1.7766164316966457, "grad_norm": 1.7851846178930693, "learning_rate": 6.759611614386863e-07, "loss": 0.596454381942749, "step": 7309 }, { "epoch": 1.7768595041322315, "grad_norm": 1.6240288019664273, "learning_rate": 6.745092173171718e-07, "loss": 0.6882486343383789, "step": 7310 }, { "epoch": 1.7771025765678172, "grad_norm": 1.6115095391252348, "learning_rate": 6.730587798108268e-07, "loss": 0.7030788660049438, "step": 7311 }, { "epoch": 1.777345649003403, "grad_norm": 1.5626285626233845, "learning_rate": 6.71609849153978e-07, "loss": 0.6057735681533813, "step": 7312 }, { "epoch": 1.7775887214389887, "grad_norm": 1.7890063352232872, "learning_rate": 6.701624255807171e-07, "loss": 0.5748729705810547, "step": 7313 }, { "epoch": 1.7778317938745745, "grad_norm": 1.9045529942714412, "learning_rate": 6.687165093248849e-07, "loss": 0.5480297803878784, "step": 7314 }, { "epoch": 1.7780748663101604, "grad_norm": 1.7639603115054006, "learning_rate": 6.672721006200833e-07, "loss": 0.615920901298523, "step": 7315 }, { "epoch": 1.7783179387457464, "grad_norm": 2.0744231501267114, "learning_rate": 6.658291996996702e-07, "loss": 0.4329345226287842, "step": 7316 }, { "epoch": 1.7785610111813321, "grad_norm": 1.7154241460190356, "learning_rate": 6.643878067967568e-07, "loss": 0.5661699771881104, "step": 7317 }, { "epoch": 1.7788040836169179, "grad_norm": 1.849084027862558, "learning_rate": 6.629479221442148e-07, "loss": 0.46595269441604614, "step": 7318 }, { "epoch": 1.7790471560525036, "grad_norm": 1.620594989646199, "learning_rate": 6.615095459746678e-07, "loss": 0.5535804629325867, "step": 7319 }, { "epoch": 1.7792902284880894, "grad_norm": 1.7301970553272163, "learning_rate": 6.600726785205025e-07, "loss": 0.522327184677124, "step": 7320 }, { "epoch": 1.7795333009236751, "grad_norm": 1.6954801515539224, "learning_rate": 6.586373200138518e-07, "loss": 0.560158371925354, "step": 7321 }, { "epoch": 1.779776373359261, "grad_norm": 1.4399262317050987, "learning_rate": 6.572034706866149e-07, "loss": 0.4925234913825989, "step": 7322 }, { "epoch": 1.7800194457948468, "grad_norm": 1.755563491352511, "learning_rate": 6.557711307704417e-07, "loss": 0.555733323097229, "step": 7323 }, { "epoch": 1.7802625182304328, "grad_norm": 1.9731711977561077, "learning_rate": 6.543403004967375e-07, "loss": 0.6705294251441956, "step": 7324 }, { "epoch": 1.7805055906660185, "grad_norm": 1.529005581422139, "learning_rate": 6.529109800966693e-07, "loss": 0.6792653799057007, "step": 7325 }, { "epoch": 1.7807486631016043, "grad_norm": 1.2377227501133363, "learning_rate": 6.514831698011515e-07, "loss": 0.45710867643356323, "step": 7326 }, { "epoch": 1.78099173553719, "grad_norm": 1.8093866498903006, "learning_rate": 6.500568698408649e-07, "loss": 0.6697413921356201, "step": 7327 }, { "epoch": 1.7812348079727758, "grad_norm": 1.5519774054704694, "learning_rate": 6.486320804462354e-07, "loss": 0.6033552289009094, "step": 7328 }, { "epoch": 1.7814778804083617, "grad_norm": 1.7286725332418709, "learning_rate": 6.472088018474543e-07, "loss": 0.50254225730896, "step": 7329 }, { "epoch": 1.7817209528439475, "grad_norm": 1.735091646073079, "learning_rate": 6.457870342744599e-07, "loss": 0.5522192716598511, "step": 7330 }, { "epoch": 1.7819640252795335, "grad_norm": 2.102787642750835, "learning_rate": 6.443667779569584e-07, "loss": 0.5916100740432739, "step": 7331 }, { "epoch": 1.7822070977151192, "grad_norm": 1.6867505871395583, "learning_rate": 6.42948033124401e-07, "loss": 0.6611053943634033, "step": 7332 }, { "epoch": 1.782450170150705, "grad_norm": 1.5991074263077576, "learning_rate": 6.415308000059961e-07, "loss": 0.5340003371238708, "step": 7333 }, { "epoch": 1.7826932425862907, "grad_norm": 1.8629112873392835, "learning_rate": 6.401150788307154e-07, "loss": 0.6271836757659912, "step": 7334 }, { "epoch": 1.7829363150218764, "grad_norm": 1.664680385386424, "learning_rate": 6.387008698272757e-07, "loss": 0.7532881498336792, "step": 7335 }, { "epoch": 1.7831793874574622, "grad_norm": 1.7007361020932887, "learning_rate": 6.372881732241587e-07, "loss": 0.6205991506576538, "step": 7336 }, { "epoch": 1.7834224598930482, "grad_norm": 1.5884629455038883, "learning_rate": 6.358769892495963e-07, "loss": 0.4121190309524536, "step": 7337 }, { "epoch": 1.783665532328634, "grad_norm": 1.6227213329768775, "learning_rate": 6.344673181315786e-07, "loss": 0.5545507669448853, "step": 7338 }, { "epoch": 1.7839086047642199, "grad_norm": 1.5916776902025922, "learning_rate": 6.330591600978476e-07, "loss": 0.6936690807342529, "step": 7339 }, { "epoch": 1.7841516771998056, "grad_norm": 1.7217484603393662, "learning_rate": 6.31652515375909e-07, "loss": 0.7720394730567932, "step": 7340 }, { "epoch": 1.7843947496353914, "grad_norm": 1.549727626860728, "learning_rate": 6.302473841930135e-07, "loss": 0.5499593019485474, "step": 7341 }, { "epoch": 1.784637822070977, "grad_norm": 1.4194561867715294, "learning_rate": 6.28843766776176e-07, "loss": 0.46858927607536316, "step": 7342 }, { "epoch": 1.7848808945065628, "grad_norm": 1.5701690986649408, "learning_rate": 6.274416633521618e-07, "loss": 0.48411238193511963, "step": 7343 }, { "epoch": 1.7851239669421488, "grad_norm": 1.6979015728182763, "learning_rate": 6.260410741474921e-07, "loss": 0.40548524260520935, "step": 7344 }, { "epoch": 1.7853670393777346, "grad_norm": 1.629072965887843, "learning_rate": 6.246419993884456e-07, "loss": 0.6741154193878174, "step": 7345 }, { "epoch": 1.7856101118133205, "grad_norm": 1.9420604840324032, "learning_rate": 6.232444393010551e-07, "loss": 0.5948950052261353, "step": 7346 }, { "epoch": 1.7858531842489063, "grad_norm": 1.6243115818107825, "learning_rate": 6.218483941111076e-07, "loss": 0.5760619640350342, "step": 7347 }, { "epoch": 1.786096256684492, "grad_norm": 1.725781496634483, "learning_rate": 6.204538640441471e-07, "loss": 0.5805312395095825, "step": 7348 }, { "epoch": 1.7863393291200778, "grad_norm": 1.8593058028583724, "learning_rate": 6.190608493254746e-07, "loss": 0.5251669883728027, "step": 7349 }, { "epoch": 1.7865824015556635, "grad_norm": 1.834051929394565, "learning_rate": 6.176693501801401e-07, "loss": 0.5961227416992188, "step": 7350 }, { "epoch": 1.7868254739912492, "grad_norm": 1.5894919723283376, "learning_rate": 6.162793668329548e-07, "loss": 0.4422723054885864, "step": 7351 }, { "epoch": 1.7870685464268352, "grad_norm": 1.732723539464437, "learning_rate": 6.148908995084835e-07, "loss": 0.6393437385559082, "step": 7352 }, { "epoch": 1.787311618862421, "grad_norm": 1.8431917202826997, "learning_rate": 6.135039484310423e-07, "loss": 0.7400705814361572, "step": 7353 }, { "epoch": 1.787554691298007, "grad_norm": 1.4257082054664814, "learning_rate": 6.121185138247088e-07, "loss": 0.4385988116264343, "step": 7354 }, { "epoch": 1.7877977637335927, "grad_norm": 1.4660752601017908, "learning_rate": 6.107345959133071e-07, "loss": 0.4860922694206238, "step": 7355 }, { "epoch": 1.7880408361691784, "grad_norm": 1.7314358843847184, "learning_rate": 6.093521949204272e-07, "loss": 0.6007486581802368, "step": 7356 }, { "epoch": 1.7882839086047642, "grad_norm": 1.7576431911633867, "learning_rate": 6.079713110694052e-07, "loss": 0.6040576100349426, "step": 7357 }, { "epoch": 1.78852698104035, "grad_norm": 1.6640158353192396, "learning_rate": 6.065919445833368e-07, "loss": 0.5457966923713684, "step": 7358 }, { "epoch": 1.7887700534759359, "grad_norm": 1.5062679537243746, "learning_rate": 6.052140956850672e-07, "loss": 0.5973365306854248, "step": 7359 }, { "epoch": 1.7890131259115216, "grad_norm": 1.4770368241052674, "learning_rate": 6.038377645972048e-07, "loss": 0.6983096599578857, "step": 7360 }, { "epoch": 1.7892561983471076, "grad_norm": 1.6561458458017966, "learning_rate": 6.024629515421054e-07, "loss": 0.5824168920516968, "step": 7361 }, { "epoch": 1.7894992707826933, "grad_norm": 1.8178304455104015, "learning_rate": 6.010896567418801e-07, "loss": 0.6575400829315186, "step": 7362 }, { "epoch": 1.789742343218279, "grad_norm": 1.5032951259937346, "learning_rate": 5.997178804183989e-07, "loss": 0.5055327415466309, "step": 7363 }, { "epoch": 1.7899854156538648, "grad_norm": 1.7870826454783006, "learning_rate": 5.983476227932849e-07, "loss": 0.5474482774734497, "step": 7364 }, { "epoch": 1.7902284880894506, "grad_norm": 1.6337428201993243, "learning_rate": 5.969788840879165e-07, "loss": 0.5744308233261108, "step": 7365 }, { "epoch": 1.7904715605250363, "grad_norm": 1.5427929758416075, "learning_rate": 5.956116645234222e-07, "loss": 0.46988803148269653, "step": 7366 }, { "epoch": 1.7907146329606223, "grad_norm": 1.6166900258768362, "learning_rate": 5.942459643206911e-07, "loss": 0.630953311920166, "step": 7367 }, { "epoch": 1.790957705396208, "grad_norm": 1.430647480075259, "learning_rate": 5.92881783700362e-07, "loss": 0.49116846919059753, "step": 7368 }, { "epoch": 1.791200777831794, "grad_norm": 1.664001356905571, "learning_rate": 5.91519122882832e-07, "loss": 0.44304782152175903, "step": 7369 }, { "epoch": 1.7914438502673797, "grad_norm": 1.5737356504422775, "learning_rate": 5.901579820882508e-07, "loss": 0.4552682042121887, "step": 7370 }, { "epoch": 1.7916869227029655, "grad_norm": 1.8135467735441306, "learning_rate": 5.887983615365201e-07, "loss": 0.4985540509223938, "step": 7371 }, { "epoch": 1.7919299951385512, "grad_norm": 1.5430158630960347, "learning_rate": 5.874402614473008e-07, "loss": 0.582465410232544, "step": 7372 }, { "epoch": 1.792173067574137, "grad_norm": 1.5770175093678962, "learning_rate": 5.860836820400062e-07, "loss": 0.5856212973594666, "step": 7373 }, { "epoch": 1.792416140009723, "grad_norm": 1.659288788837609, "learning_rate": 5.847286235338046e-07, "loss": 0.5750073194503784, "step": 7374 }, { "epoch": 1.7926592124453087, "grad_norm": 1.4675364419719241, "learning_rate": 5.83375086147614e-07, "loss": 0.44819173216819763, "step": 7375 }, { "epoch": 1.7929022848808946, "grad_norm": 1.9042353951503905, "learning_rate": 5.820230701001139e-07, "loss": 0.5330292582511902, "step": 7376 }, { "epoch": 1.7931453573164804, "grad_norm": 1.5987463762381227, "learning_rate": 5.806725756097321e-07, "loss": 0.6593700647354126, "step": 7377 }, { "epoch": 1.7933884297520661, "grad_norm": 1.6957234966363874, "learning_rate": 5.793236028946547e-07, "loss": 0.6167036294937134, "step": 7378 }, { "epoch": 1.7936315021876519, "grad_norm": 1.6883439192200518, "learning_rate": 5.779761521728178e-07, "loss": 0.4981204867362976, "step": 7379 }, { "epoch": 1.7938745746232376, "grad_norm": 1.7452866789824821, "learning_rate": 5.766302236619148e-07, "loss": 0.5107212066650391, "step": 7380 }, { "epoch": 1.7941176470588234, "grad_norm": 1.5447229543304462, "learning_rate": 5.752858175793951e-07, "loss": 0.476635217666626, "step": 7381 }, { "epoch": 1.7943607194944093, "grad_norm": 1.6355259096044068, "learning_rate": 5.739429341424552e-07, "loss": 0.6525803804397583, "step": 7382 }, { "epoch": 1.794603791929995, "grad_norm": 1.8870529653848411, "learning_rate": 5.726015735680535e-07, "loss": 0.5731471180915833, "step": 7383 }, { "epoch": 1.794846864365581, "grad_norm": 1.4880159424378832, "learning_rate": 5.712617360728944e-07, "loss": 0.6492248773574829, "step": 7384 }, { "epoch": 1.7950899368011668, "grad_norm": 1.9859568486406616, "learning_rate": 5.699234218734451e-07, "loss": 0.524214506149292, "step": 7385 }, { "epoch": 1.7953330092367525, "grad_norm": 1.6164784293232028, "learning_rate": 5.685866311859178e-07, "loss": 0.6090015172958374, "step": 7386 }, { "epoch": 1.7955760816723383, "grad_norm": 1.5285289438961733, "learning_rate": 5.672513642262867e-07, "loss": 0.5143767595291138, "step": 7387 }, { "epoch": 1.795819154107924, "grad_norm": 1.5489332907304931, "learning_rate": 5.659176212102713e-07, "loss": 0.5973137617111206, "step": 7388 }, { "epoch": 1.79606222654351, "grad_norm": 1.8000092051524221, "learning_rate": 5.645854023533537e-07, "loss": 0.551517128944397, "step": 7389 }, { "epoch": 1.7963052989790957, "grad_norm": 1.7920777158007886, "learning_rate": 5.632547078707639e-07, "loss": 0.5834879875183105, "step": 7390 }, { "epoch": 1.7965483714146817, "grad_norm": 1.7094953226664986, "learning_rate": 5.619255379774869e-07, "loss": 0.5209602117538452, "step": 7391 }, { "epoch": 1.7967914438502675, "grad_norm": 1.6838255965756834, "learning_rate": 5.605978928882638e-07, "loss": 0.5948209762573242, "step": 7392 }, { "epoch": 1.7970345162858532, "grad_norm": 1.4976933785115203, "learning_rate": 5.592717728175844e-07, "loss": 0.4350493550300598, "step": 7393 }, { "epoch": 1.797277588721439, "grad_norm": 1.5486969012971674, "learning_rate": 5.579471779796985e-07, "loss": 0.8534889817237854, "step": 7394 }, { "epoch": 1.7975206611570247, "grad_norm": 1.5350811829683981, "learning_rate": 5.566241085886027e-07, "loss": 0.6312271356582642, "step": 7395 }, { "epoch": 1.7977637335926104, "grad_norm": 1.9053673084987492, "learning_rate": 5.553025648580512e-07, "loss": 0.6435139775276184, "step": 7396 }, { "epoch": 1.7980068060281964, "grad_norm": 1.7596404177418326, "learning_rate": 5.539825470015536e-07, "loss": 0.5678051710128784, "step": 7397 }, { "epoch": 1.7982498784637824, "grad_norm": 1.4342564232761168, "learning_rate": 5.52664055232367e-07, "loss": 0.4861147701740265, "step": 7398 }, { "epoch": 1.7984929508993681, "grad_norm": 1.4780344474136247, "learning_rate": 5.513470897635087e-07, "loss": 0.5556927919387817, "step": 7399 }, { "epoch": 1.7987360233349539, "grad_norm": 1.5530270162845763, "learning_rate": 5.50031650807743e-07, "loss": 0.4918834865093231, "step": 7400 }, { "epoch": 1.7989790957705396, "grad_norm": 1.8841698054209752, "learning_rate": 5.487177385775933e-07, "loss": 0.41544103622436523, "step": 7401 }, { "epoch": 1.7992221682061253, "grad_norm": 1.9950334208948761, "learning_rate": 5.474053532853308e-07, "loss": 0.6478413939476013, "step": 7402 }, { "epoch": 1.799465240641711, "grad_norm": 1.6572870686840655, "learning_rate": 5.460944951429858e-07, "loss": 0.4437166452407837, "step": 7403 }, { "epoch": 1.799708313077297, "grad_norm": 1.4739824831456698, "learning_rate": 5.44785164362337e-07, "loss": 0.5329892635345459, "step": 7404 }, { "epoch": 1.7999513855128828, "grad_norm": 1.5675507761034515, "learning_rate": 5.434773611549182e-07, "loss": 0.6164488792419434, "step": 7405 }, { "epoch": 1.8001944579484688, "grad_norm": 1.6582745848667642, "learning_rate": 5.421710857320194e-07, "loss": 0.4713193476200104, "step": 7406 }, { "epoch": 1.8004375303840545, "grad_norm": 1.5124570354990317, "learning_rate": 5.408663383046775e-07, "loss": 0.5670373439788818, "step": 7407 }, { "epoch": 1.8006806028196403, "grad_norm": 1.8621951547995963, "learning_rate": 5.395631190836881e-07, "loss": 0.6518068909645081, "step": 7408 }, { "epoch": 1.800923675255226, "grad_norm": 1.7034024555170235, "learning_rate": 5.38261428279595e-07, "loss": 0.4666195511817932, "step": 7409 }, { "epoch": 1.8011667476908118, "grad_norm": 1.704849790547747, "learning_rate": 5.369612661027024e-07, "loss": 0.5408073663711548, "step": 7410 }, { "epoch": 1.8014098201263977, "grad_norm": 1.687427926875968, "learning_rate": 5.356626327630577e-07, "loss": 0.5299798250198364, "step": 7411 }, { "epoch": 1.8016528925619835, "grad_norm": 1.815567144030663, "learning_rate": 5.343655284704719e-07, "loss": 0.5253601670265198, "step": 7412 }, { "epoch": 1.8018959649975694, "grad_norm": 1.8041485597376972, "learning_rate": 5.330699534344986e-07, "loss": 0.6666984558105469, "step": 7413 }, { "epoch": 1.8021390374331552, "grad_norm": 1.8501646124184437, "learning_rate": 5.317759078644513e-07, "loss": 0.4260459840297699, "step": 7414 }, { "epoch": 1.802382109868741, "grad_norm": 1.7028976688607393, "learning_rate": 5.304833919693963e-07, "loss": 0.5395622849464417, "step": 7415 }, { "epoch": 1.8026251823043267, "grad_norm": 1.7891301610445103, "learning_rate": 5.291924059581477e-07, "loss": 0.6489711999893188, "step": 7416 }, { "epoch": 1.8028682547399124, "grad_norm": 1.8419628826705792, "learning_rate": 5.279029500392796e-07, "loss": 0.7376046180725098, "step": 7417 }, { "epoch": 1.8031113271754982, "grad_norm": 1.9731943282735456, "learning_rate": 5.26615024421111e-07, "loss": 0.47269290685653687, "step": 7418 }, { "epoch": 1.8033543996110841, "grad_norm": 1.8022911581285552, "learning_rate": 5.25328629311721e-07, "loss": 0.5840875506401062, "step": 7419 }, { "epoch": 1.8035974720466699, "grad_norm": 1.4166610625760006, "learning_rate": 5.240437649189345e-07, "loss": 0.599895715713501, "step": 7420 }, { "epoch": 1.8038405444822558, "grad_norm": 1.9088235837802847, "learning_rate": 5.227604314503343e-07, "loss": 0.646864652633667, "step": 7421 }, { "epoch": 1.8040836169178416, "grad_norm": 1.851272386389009, "learning_rate": 5.214786291132568e-07, "loss": 0.7098675966262817, "step": 7422 }, { "epoch": 1.8043266893534273, "grad_norm": 1.4870816523174686, "learning_rate": 5.201983581147841e-07, "loss": 0.4810367226600647, "step": 7423 }, { "epoch": 1.804569761789013, "grad_norm": 1.53443389123808, "learning_rate": 5.189196186617584e-07, "loss": 0.7913097143173218, "step": 7424 }, { "epoch": 1.8048128342245988, "grad_norm": 1.5024529524084296, "learning_rate": 5.176424109607692e-07, "loss": 0.6081711053848267, "step": 7425 }, { "epoch": 1.8050559066601848, "grad_norm": 1.5241103466383938, "learning_rate": 5.163667352181623e-07, "loss": 0.4531915783882141, "step": 7426 }, { "epoch": 1.8052989790957705, "grad_norm": 1.5684213384233792, "learning_rate": 5.150925916400329e-07, "loss": 0.5898356437683105, "step": 7427 }, { "epoch": 1.8055420515313565, "grad_norm": 1.924136782413449, "learning_rate": 5.13819980432233e-07, "loss": 0.5261459350585938, "step": 7428 }, { "epoch": 1.8057851239669422, "grad_norm": 1.3699454259782209, "learning_rate": 5.125489018003583e-07, "loss": 0.6277796030044556, "step": 7429 }, { "epoch": 1.806028196402528, "grad_norm": 2.071269838523825, "learning_rate": 5.112793559497697e-07, "loss": 0.4900228977203369, "step": 7430 }, { "epoch": 1.8062712688381137, "grad_norm": 1.9639901310450458, "learning_rate": 5.100113430855713e-07, "loss": 0.6294326186180115, "step": 7431 }, { "epoch": 1.8065143412736995, "grad_norm": 1.7709188850590623, "learning_rate": 5.08744863412618e-07, "loss": 0.5477208495140076, "step": 7432 }, { "epoch": 1.8067574137092852, "grad_norm": 1.479425575537803, "learning_rate": 5.07479917135525e-07, "loss": 0.6181275844573975, "step": 7433 }, { "epoch": 1.8070004861448712, "grad_norm": 1.8880488179787396, "learning_rate": 5.062165044586532e-07, "loss": 0.6744672656059265, "step": 7434 }, { "epoch": 1.807243558580457, "grad_norm": 1.531912478528342, "learning_rate": 5.049546255861204e-07, "loss": 0.5250054001808167, "step": 7435 }, { "epoch": 1.807486631016043, "grad_norm": 1.440433544978561, "learning_rate": 5.036942807217904e-07, "loss": 0.31015872955322266, "step": 7436 }, { "epoch": 1.8077297034516286, "grad_norm": 1.74480234292845, "learning_rate": 5.024354700692868e-07, "loss": 0.6297910213470459, "step": 7437 }, { "epoch": 1.8079727758872144, "grad_norm": 1.7028153951775622, "learning_rate": 5.011781938319793e-07, "loss": 0.4945845901966095, "step": 7438 }, { "epoch": 1.8082158483228001, "grad_norm": 1.6981755726445595, "learning_rate": 4.999224522129942e-07, "loss": 0.5454933643341064, "step": 7439 }, { "epoch": 1.8084589207583859, "grad_norm": 1.5682348914156234, "learning_rate": 4.986682454152059e-07, "loss": 0.5731264352798462, "step": 7440 }, { "epoch": 1.8087019931939718, "grad_norm": 1.627191760130281, "learning_rate": 4.974155736412412e-07, "loss": 0.6072379946708679, "step": 7441 }, { "epoch": 1.8089450656295576, "grad_norm": 1.4068248087839104, "learning_rate": 4.961644370934837e-07, "loss": 0.5712035894393921, "step": 7442 }, { "epoch": 1.8091881380651436, "grad_norm": 1.8629481169725315, "learning_rate": 4.949148359740619e-07, "loss": 0.5423089265823364, "step": 7443 }, { "epoch": 1.8094312105007293, "grad_norm": 1.7076005760088777, "learning_rate": 4.936667704848641e-07, "loss": 0.5068509578704834, "step": 7444 }, { "epoch": 1.809674282936315, "grad_norm": 1.906690417420025, "learning_rate": 4.924202408275203e-07, "loss": 0.8241488933563232, "step": 7445 }, { "epoch": 1.8099173553719008, "grad_norm": 1.4462021635485893, "learning_rate": 4.911752472034248e-07, "loss": 0.6610566973686218, "step": 7446 }, { "epoch": 1.8101604278074865, "grad_norm": 1.6478419717769495, "learning_rate": 4.899317898137123e-07, "loss": 0.5992140769958496, "step": 7447 }, { "epoch": 1.8104035002430723, "grad_norm": 1.538009699338915, "learning_rate": 4.886898688592789e-07, "loss": 0.500322699546814, "step": 7448 }, { "epoch": 1.8106465726786583, "grad_norm": 1.8143327453432485, "learning_rate": 4.874494845407629e-07, "loss": 0.5718967318534851, "step": 7449 }, { "epoch": 1.810889645114244, "grad_norm": 1.6342441160809986, "learning_rate": 4.86210637058564e-07, "loss": 0.5444517135620117, "step": 7450 }, { "epoch": 1.81113271754983, "grad_norm": 1.6108090574445464, "learning_rate": 4.849733266128254e-07, "loss": 0.5434185862541199, "step": 7451 }, { "epoch": 1.8113757899854157, "grad_norm": 1.895548818175176, "learning_rate": 4.837375534034472e-07, "loss": 0.5857287645339966, "step": 7452 }, { "epoch": 1.8116188624210015, "grad_norm": 1.6502408691395312, "learning_rate": 4.825033176300786e-07, "loss": 0.6285684108734131, "step": 7453 }, { "epoch": 1.8118619348565872, "grad_norm": 1.9336016346795812, "learning_rate": 4.812706194921202e-07, "loss": 0.6095008254051208, "step": 7454 }, { "epoch": 1.812105007292173, "grad_norm": 1.7302051464667059, "learning_rate": 4.800394591887292e-07, "loss": 0.5516211986541748, "step": 7455 }, { "epoch": 1.812348079727759, "grad_norm": 1.6015267740619794, "learning_rate": 4.788098369188054e-07, "loss": 0.46988242864608765, "step": 7456 }, { "epoch": 1.8125911521633447, "grad_norm": 1.7316652880175134, "learning_rate": 4.775817528810101e-07, "loss": 0.5765860676765442, "step": 7457 }, { "epoch": 1.8128342245989306, "grad_norm": 1.7710477325856253, "learning_rate": 4.7635520727374764e-07, "loss": 0.5475757718086243, "step": 7458 }, { "epoch": 1.8130772970345164, "grad_norm": 1.6946154793617716, "learning_rate": 4.751302002951785e-07, "loss": 0.6307452321052551, "step": 7459 }, { "epoch": 1.8133203694701021, "grad_norm": 1.8132421027459507, "learning_rate": 4.7390673214321336e-07, "loss": 0.5927107334136963, "step": 7460 }, { "epoch": 1.8135634419056879, "grad_norm": 1.5785671784218973, "learning_rate": 4.726848030155129e-07, "loss": 0.6389549374580383, "step": 7461 }, { "epoch": 1.8138065143412736, "grad_norm": 1.7174723428405034, "learning_rate": 4.7146441310949143e-07, "loss": 0.5749761462211609, "step": 7462 }, { "epoch": 1.8140495867768593, "grad_norm": 1.488241852752305, "learning_rate": 4.702455626223146e-07, "loss": 0.5838632583618164, "step": 7463 }, { "epoch": 1.8142926592124453, "grad_norm": 1.824058524270374, "learning_rate": 4.6902825175089817e-07, "loss": 0.5716566443443298, "step": 7464 }, { "epoch": 1.814535731648031, "grad_norm": 1.793618378306373, "learning_rate": 4.678124806919071e-07, "loss": 0.5039135813713074, "step": 7465 }, { "epoch": 1.814778804083617, "grad_norm": 1.7777784649809465, "learning_rate": 4.6659824964176426e-07, "loss": 0.6268917918205261, "step": 7466 }, { "epoch": 1.8150218765192028, "grad_norm": 1.7203435122443522, "learning_rate": 4.6538555879663496e-07, "loss": 0.6005805730819702, "step": 7467 }, { "epoch": 1.8152649489547885, "grad_norm": 1.8723316072066394, "learning_rate": 4.6417440835244354e-07, "loss": 0.696169376373291, "step": 7468 }, { "epoch": 1.8155080213903743, "grad_norm": 1.6412492631042506, "learning_rate": 4.6296479850486133e-07, "loss": 0.5161569118499756, "step": 7469 }, { "epoch": 1.81575109382596, "grad_norm": 1.586871882603368, "learning_rate": 4.6175672944930637e-07, "loss": 0.5494844913482666, "step": 7470 }, { "epoch": 1.815994166261546, "grad_norm": 1.7984939492844025, "learning_rate": 4.605502013809604e-07, "loss": 0.4944192171096802, "step": 7471 }, { "epoch": 1.8162372386971317, "grad_norm": 1.5432414368725582, "learning_rate": 4.593452144947452e-07, "loss": 0.48573607206344604, "step": 7472 }, { "epoch": 1.8164803111327177, "grad_norm": 1.6052572508845517, "learning_rate": 4.5814176898533734e-07, "loss": 0.5860370993614197, "step": 7473 }, { "epoch": 1.8167233835683034, "grad_norm": 1.3187723879972915, "learning_rate": 4.5693986504716237e-07, "loss": 0.4486370086669922, "step": 7474 }, { "epoch": 1.8169664560038892, "grad_norm": 1.756418280092285, "learning_rate": 4.557395028744016e-07, "loss": 0.46429938077926636, "step": 7475 }, { "epoch": 1.817209528439475, "grad_norm": 1.5970479548525187, "learning_rate": 4.545406826609811e-07, "loss": 0.5526498556137085, "step": 7476 }, { "epoch": 1.8174526008750607, "grad_norm": 2.1070039426316325, "learning_rate": 4.5334340460058354e-07, "loss": 0.7383882999420166, "step": 7477 }, { "epoch": 1.8176956733106464, "grad_norm": 1.590145684501453, "learning_rate": 4.5214766888663777e-07, "loss": 0.5472766160964966, "step": 7478 }, { "epoch": 1.8179387457462324, "grad_norm": 1.9911505823857807, "learning_rate": 4.509534757123257e-07, "loss": 0.5850259065628052, "step": 7479 }, { "epoch": 1.8181818181818183, "grad_norm": 1.5685238840829099, "learning_rate": 4.4976082527058097e-07, "loss": 0.5021121501922607, "step": 7480 }, { "epoch": 1.818424890617404, "grad_norm": 1.7009904864809846, "learning_rate": 4.485697177540849e-07, "loss": 0.6100231409072876, "step": 7481 }, { "epoch": 1.8186679630529898, "grad_norm": 1.6192303456559392, "learning_rate": 4.473801533552735e-07, "loss": 0.5680444836616516, "step": 7482 }, { "epoch": 1.8189110354885756, "grad_norm": 1.4746858774494889, "learning_rate": 4.4619213226632876e-07, "loss": 0.44396570324897766, "step": 7483 }, { "epoch": 1.8191541079241613, "grad_norm": 2.4157250032904427, "learning_rate": 4.4500565467918924e-07, "loss": 0.6591044664382935, "step": 7484 }, { "epoch": 1.819397180359747, "grad_norm": 1.8081223526821604, "learning_rate": 4.438207207855383e-07, "loss": 0.5125536918640137, "step": 7485 }, { "epoch": 1.819640252795333, "grad_norm": 1.4956639188067546, "learning_rate": 4.426373307768128e-07, "loss": 0.6328020095825195, "step": 7486 }, { "epoch": 1.8198833252309188, "grad_norm": 1.9400609644641575, "learning_rate": 4.4145548484420096e-07, "loss": 0.6381379961967468, "step": 7487 }, { "epoch": 1.8201263976665047, "grad_norm": 1.6804074565747846, "learning_rate": 4.4027518317863895e-07, "loss": 0.5199364423751831, "step": 7488 }, { "epoch": 1.8203694701020905, "grad_norm": 1.515334421213655, "learning_rate": 4.3909642597081745e-07, "loss": 0.5932647585868835, "step": 7489 }, { "epoch": 1.8206125425376762, "grad_norm": 1.7705707299961306, "learning_rate": 4.3791921341117097e-07, "loss": 0.5025992393493652, "step": 7490 }, { "epoch": 1.820855614973262, "grad_norm": 1.7707055501837161, "learning_rate": 4.3674354568989275e-07, "loss": 0.4459072947502136, "step": 7491 }, { "epoch": 1.8210986874088477, "grad_norm": 1.4940513137287035, "learning_rate": 4.3556942299691876e-07, "loss": 0.5146268606185913, "step": 7492 }, { "epoch": 1.8213417598444337, "grad_norm": 1.6083028418296605, "learning_rate": 4.343968455219416e-07, "loss": 0.5724704265594482, "step": 7493 }, { "epoch": 1.8215848322800194, "grad_norm": 1.6924324670500526, "learning_rate": 4.3322581345439985e-07, "loss": 0.5938864350318909, "step": 7494 }, { "epoch": 1.8218279047156054, "grad_norm": 1.48352626917137, "learning_rate": 4.320563269834832e-07, "loss": 0.5393657088279724, "step": 7495 }, { "epoch": 1.8220709771511912, "grad_norm": 1.4640623397447854, "learning_rate": 4.308883862981361e-07, "loss": 0.5500369071960449, "step": 7496 }, { "epoch": 1.822314049586777, "grad_norm": 1.6356526521885848, "learning_rate": 4.2972199158704654e-07, "loss": 0.6175419688224792, "step": 7497 }, { "epoch": 1.8225571220223626, "grad_norm": 1.4286489003493796, "learning_rate": 4.2855714303865703e-07, "loss": 0.5697623491287231, "step": 7498 }, { "epoch": 1.8228001944579484, "grad_norm": 1.7785000431086992, "learning_rate": 4.273938408411582e-07, "loss": 0.7035548090934753, "step": 7499 }, { "epoch": 1.8230432668935341, "grad_norm": 1.613193589884072, "learning_rate": 4.2623208518249306e-07, "loss": 0.5491641163825989, "step": 7500 }, { "epoch": 1.82328633932912, "grad_norm": 1.7480943270817981, "learning_rate": 4.250718762503514e-07, "loss": 0.5420243740081787, "step": 7501 }, { "epoch": 1.8235294117647058, "grad_norm": 1.7558663727252188, "learning_rate": 4.239132142321789e-07, "loss": 0.6741905212402344, "step": 7502 }, { "epoch": 1.8237724842002918, "grad_norm": 1.7913898603430427, "learning_rate": 4.227560993151636e-07, "loss": 0.6599386930465698, "step": 7503 }, { "epoch": 1.8240155566358776, "grad_norm": 1.629400369592614, "learning_rate": 4.2160053168624813e-07, "loss": 0.6282012462615967, "step": 7504 }, { "epoch": 1.8242586290714633, "grad_norm": 1.4587930192769112, "learning_rate": 4.204465115321288e-07, "loss": 0.5040055513381958, "step": 7505 }, { "epoch": 1.824501701507049, "grad_norm": 1.6625169705289504, "learning_rate": 4.192940390392419e-07, "loss": 0.6194158792495728, "step": 7506 }, { "epoch": 1.8247447739426348, "grad_norm": 1.9763142474336257, "learning_rate": 4.1814311439378417e-07, "loss": 0.6971737146377563, "step": 7507 }, { "epoch": 1.8249878463782208, "grad_norm": 1.5080937262704404, "learning_rate": 4.169937377816957e-07, "loss": 0.7170711755752563, "step": 7508 }, { "epoch": 1.8252309188138065, "grad_norm": 1.5156429799773308, "learning_rate": 4.15845909388668e-07, "loss": 0.5397158265113831, "step": 7509 }, { "epoch": 1.8254739912493925, "grad_norm": 1.6148455915238615, "learning_rate": 4.1469962940014266e-07, "loss": 0.5601998567581177, "step": 7510 }, { "epoch": 1.8257170636849782, "grad_norm": 2.018869554117644, "learning_rate": 4.1355489800131266e-07, "loss": 0.584208071231842, "step": 7511 }, { "epoch": 1.825960136120564, "grad_norm": 1.8593066309854742, "learning_rate": 4.1241171537711897e-07, "loss": 0.5028250217437744, "step": 7512 }, { "epoch": 1.8262032085561497, "grad_norm": 1.706544422675929, "learning_rate": 4.1127008171225167e-07, "loss": 0.7011351585388184, "step": 7513 }, { "epoch": 1.8264462809917354, "grad_norm": 1.5529677119633756, "learning_rate": 4.1012999719115434e-07, "loss": 0.552605926990509, "step": 7514 }, { "epoch": 1.8266893534273212, "grad_norm": 1.6362475354936667, "learning_rate": 4.08991461998014e-07, "loss": 0.4611266255378723, "step": 7515 }, { "epoch": 1.8269324258629072, "grad_norm": 1.5730847422678629, "learning_rate": 4.078544763167747e-07, "loss": 0.5074219703674316, "step": 7516 }, { "epoch": 1.827175498298493, "grad_norm": 1.5975909777543666, "learning_rate": 4.0671904033112386e-07, "loss": 0.632306694984436, "step": 7517 }, { "epoch": 1.8274185707340789, "grad_norm": 1.5302405278726479, "learning_rate": 4.055851542245037e-07, "loss": 0.5635461807250977, "step": 7518 }, { "epoch": 1.8276616431696646, "grad_norm": 1.8900911970504868, "learning_rate": 4.0445281818009885e-07, "loss": 0.5577338337898254, "step": 7519 }, { "epoch": 1.8279047156052504, "grad_norm": 1.504479829346006, "learning_rate": 4.0332203238085285e-07, "loss": 0.5660429000854492, "step": 7520 }, { "epoch": 1.828147788040836, "grad_norm": 1.6904984003085746, "learning_rate": 4.021927970094541e-07, "loss": 0.560404896736145, "step": 7521 }, { "epoch": 1.8283908604764219, "grad_norm": 1.6947054057634725, "learning_rate": 4.0106511224833665e-07, "loss": 0.568442702293396, "step": 7522 }, { "epoch": 1.8286339329120078, "grad_norm": 1.5202066190413202, "learning_rate": 3.999389782796925e-07, "loss": 0.5674595832824707, "step": 7523 }, { "epoch": 1.8288770053475936, "grad_norm": 1.6032825080101836, "learning_rate": 3.98814395285454e-07, "loss": 0.48983603715896606, "step": 7524 }, { "epoch": 1.8291200777831795, "grad_norm": 1.569609616632016, "learning_rate": 3.976913634473112e-07, "loss": 0.5677398443222046, "step": 7525 }, { "epoch": 1.8293631502187653, "grad_norm": 1.6291763629447094, "learning_rate": 3.9656988294669686e-07, "loss": 0.5144846439361572, "step": 7526 }, { "epoch": 1.829606222654351, "grad_norm": 1.7390059428428364, "learning_rate": 3.954499539647982e-07, "loss": 0.6189659237861633, "step": 7527 }, { "epoch": 1.8298492950899368, "grad_norm": 1.5349880328082002, "learning_rate": 3.943315766825473e-07, "loss": 0.5956740379333496, "step": 7528 }, { "epoch": 1.8300923675255225, "grad_norm": 1.6440156939575015, "learning_rate": 3.932147512806306e-07, "loss": 0.610299825668335, "step": 7529 }, { "epoch": 1.8303354399611083, "grad_norm": 1.8201385000807335, "learning_rate": 3.9209947793948043e-07, "loss": 0.5272360444068909, "step": 7530 }, { "epoch": 1.8305785123966942, "grad_norm": 1.466935563513372, "learning_rate": 3.909857568392783e-07, "loss": 0.520119845867157, "step": 7531 }, { "epoch": 1.83082158483228, "grad_norm": 1.608015356513015, "learning_rate": 3.898735881599558e-07, "loss": 0.5933508276939392, "step": 7532 }, { "epoch": 1.831064657267866, "grad_norm": 1.6945515190870333, "learning_rate": 3.8876297208119253e-07, "loss": 0.6299557089805603, "step": 7533 }, { "epoch": 1.8313077297034517, "grad_norm": 1.6618076096016303, "learning_rate": 3.876539087824205e-07, "loss": 0.5325095653533936, "step": 7534 }, { "epoch": 1.8315508021390374, "grad_norm": 1.8367470067011549, "learning_rate": 3.8654639844281637e-07, "loss": 0.6195364594459534, "step": 7535 }, { "epoch": 1.8317938745746232, "grad_norm": 1.7187732412669918, "learning_rate": 3.854404412413093e-07, "loss": 0.5978748798370361, "step": 7536 }, { "epoch": 1.832036947010209, "grad_norm": 2.043738670227984, "learning_rate": 3.8433603735657634e-07, "loss": 0.6708927154541016, "step": 7537 }, { "epoch": 1.8322800194457949, "grad_norm": 2.275857963170857, "learning_rate": 3.832331869670458e-07, "loss": 0.4499890208244324, "step": 7538 }, { "epoch": 1.8325230918813806, "grad_norm": 1.5894710878996012, "learning_rate": 3.821318902508897e-07, "loss": 0.5159859657287598, "step": 7539 }, { "epoch": 1.8327661643169666, "grad_norm": 1.654068220342335, "learning_rate": 3.810321473860323e-07, "loss": 0.6301312446594238, "step": 7540 }, { "epoch": 1.8330092367525523, "grad_norm": 1.954384773556245, "learning_rate": 3.799339585501494e-07, "loss": 0.6194727420806885, "step": 7541 }, { "epoch": 1.833252309188138, "grad_norm": 1.8167710719023098, "learning_rate": 3.7883732392065996e-07, "loss": 0.4922182559967041, "step": 7542 }, { "epoch": 1.8334953816237238, "grad_norm": 1.7563168865893783, "learning_rate": 3.77742243674738e-07, "loss": 0.5119845867156982, "step": 7543 }, { "epoch": 1.8337384540593096, "grad_norm": 1.5514456325263806, "learning_rate": 3.766487179892997e-07, "loss": 0.5373200178146362, "step": 7544 }, { "epoch": 1.8339815264948953, "grad_norm": 1.568157146084888, "learning_rate": 3.7555674704101817e-07, "loss": 0.6173033714294434, "step": 7545 }, { "epoch": 1.8342245989304813, "grad_norm": 1.4246836571108936, "learning_rate": 3.744663310063068e-07, "loss": 0.5282677412033081, "step": 7546 }, { "epoch": 1.8344676713660673, "grad_norm": 1.5653374454448579, "learning_rate": 3.7337747006133573e-07, "loss": 0.5431182384490967, "step": 7547 }, { "epoch": 1.834710743801653, "grad_norm": 1.692280174524952, "learning_rate": 3.722901643820176e-07, "loss": 0.37982386350631714, "step": 7548 }, { "epoch": 1.8349538162372387, "grad_norm": 2.0148900203405735, "learning_rate": 3.712044141440152e-07, "loss": 0.5549271702766418, "step": 7549 }, { "epoch": 1.8351968886728245, "grad_norm": 1.87107652489875, "learning_rate": 3.701202195227449e-07, "loss": 0.5350375175476074, "step": 7550 }, { "epoch": 1.8354399611084102, "grad_norm": 1.5815331167295013, "learning_rate": 3.6903758069336326e-07, "loss": 0.5374858379364014, "step": 7551 }, { "epoch": 1.835683033543996, "grad_norm": 1.8325793757693294, "learning_rate": 3.679564978307826e-07, "loss": 0.6362526416778564, "step": 7552 }, { "epoch": 1.835926105979582, "grad_norm": 1.7215542480782742, "learning_rate": 3.66876971109662e-07, "loss": 0.5922945737838745, "step": 7553 }, { "epoch": 1.8361691784151677, "grad_norm": 1.5772371347302003, "learning_rate": 3.657990007044088e-07, "loss": 0.6110559701919556, "step": 7554 }, { "epoch": 1.8364122508507537, "grad_norm": 1.6896619747169963, "learning_rate": 3.6472258678917574e-07, "loss": 0.617983877658844, "step": 7555 }, { "epoch": 1.8366553232863394, "grad_norm": 1.7337430600630512, "learning_rate": 3.6364772953786933e-07, "loss": 0.5913004875183105, "step": 7556 }, { "epoch": 1.8368983957219251, "grad_norm": 1.928850936280578, "learning_rate": 3.6257442912414175e-07, "loss": 0.660622239112854, "step": 7557 }, { "epoch": 1.837141468157511, "grad_norm": 1.691838862168106, "learning_rate": 3.6150268572139435e-07, "loss": 0.59464430809021, "step": 7558 }, { "epoch": 1.8373845405930966, "grad_norm": 1.6042428416758279, "learning_rate": 3.604324995027764e-07, "loss": 0.5242499113082886, "step": 7559 }, { "epoch": 1.8376276130286824, "grad_norm": 1.7338502565994827, "learning_rate": 3.5936387064118395e-07, "loss": 0.6828572750091553, "step": 7560 }, { "epoch": 1.8378706854642683, "grad_norm": 1.515993652115806, "learning_rate": 3.5829679930926677e-07, "loss": 0.529929518699646, "step": 7561 }, { "epoch": 1.8381137578998543, "grad_norm": 1.818567022251648, "learning_rate": 3.57231285679418e-07, "loss": 0.5135204792022705, "step": 7562 }, { "epoch": 1.83835683033544, "grad_norm": 1.8807450795621812, "learning_rate": 3.561673299237822e-07, "loss": 0.53022700548172, "step": 7563 }, { "epoch": 1.8385999027710258, "grad_norm": 1.7629335124821974, "learning_rate": 3.5510493221424745e-07, "loss": 0.5492967367172241, "step": 7564 }, { "epoch": 1.8388429752066116, "grad_norm": 1.4772606244165905, "learning_rate": 3.5404409272245757e-07, "loss": 0.7099740505218506, "step": 7565 }, { "epoch": 1.8390860476421973, "grad_norm": 1.5168241325873462, "learning_rate": 3.529848116197976e-07, "loss": 0.5181381106376648, "step": 7566 }, { "epoch": 1.839329120077783, "grad_norm": 1.5433723866717173, "learning_rate": 3.5192708907740513e-07, "loss": 0.5292133092880249, "step": 7567 }, { "epoch": 1.839572192513369, "grad_norm": 1.7836514502872705, "learning_rate": 3.508709252661635e-07, "loss": 0.6223948001861572, "step": 7568 }, { "epoch": 1.8398152649489548, "grad_norm": 1.6212282857962823, "learning_rate": 3.49816320356704e-07, "loss": 0.42907005548477173, "step": 7569 }, { "epoch": 1.8400583373845407, "grad_norm": 1.6680235409645223, "learning_rate": 3.487632745194125e-07, "loss": 0.548282265663147, "step": 7570 }, { "epoch": 1.8403014098201265, "grad_norm": 1.6126500760070202, "learning_rate": 3.4771178792441185e-07, "loss": 0.5329982042312622, "step": 7571 }, { "epoch": 1.8405444822557122, "grad_norm": 1.6741601405731112, "learning_rate": 3.4666186074158394e-07, "loss": 0.6297140717506409, "step": 7572 }, { "epoch": 1.840787554691298, "grad_norm": 1.8397176845104108, "learning_rate": 3.456134931405497e-07, "loss": 0.5818435549736023, "step": 7573 }, { "epoch": 1.8410306271268837, "grad_norm": 1.7382768728653852, "learning_rate": 3.4456668529068483e-07, "loss": 0.5444146990776062, "step": 7574 }, { "epoch": 1.8412736995624697, "grad_norm": 1.554000128349223, "learning_rate": 3.4352143736110843e-07, "loss": 0.6437186598777771, "step": 7575 }, { "epoch": 1.8415167719980554, "grad_norm": 1.4788293816431108, "learning_rate": 3.4247774952069214e-07, "loss": 0.5745055675506592, "step": 7576 }, { "epoch": 1.8417598444336414, "grad_norm": 1.9246100681100857, "learning_rate": 3.4143562193804994e-07, "loss": 0.5417137145996094, "step": 7577 }, { "epoch": 1.8420029168692271, "grad_norm": 1.6360289449740337, "learning_rate": 3.403950547815482e-07, "loss": 0.5881091356277466, "step": 7578 }, { "epoch": 1.8422459893048129, "grad_norm": 1.4910271695020203, "learning_rate": 3.393560482193003e-07, "loss": 0.5759551525115967, "step": 7579 }, { "epoch": 1.8424890617403986, "grad_norm": 1.8084864067143023, "learning_rate": 3.3831860241916536e-07, "loss": 0.5858041644096375, "step": 7580 }, { "epoch": 1.8427321341759844, "grad_norm": 1.8035059031146339, "learning_rate": 3.3728271754875365e-07, "loss": 0.7042722702026367, "step": 7581 }, { "epoch": 1.84297520661157, "grad_norm": 1.8994209354561011, "learning_rate": 3.3624839377541907e-07, "loss": 0.42041027545928955, "step": 7582 }, { "epoch": 1.843218279047156, "grad_norm": 1.580580973210285, "learning_rate": 3.3521563126626797e-07, "loss": 0.6193783283233643, "step": 7583 }, { "epoch": 1.8434613514827418, "grad_norm": 1.9977821998830854, "learning_rate": 3.341844301881514e-07, "loss": 0.574118971824646, "step": 7584 }, { "epoch": 1.8437044239183278, "grad_norm": 1.9262941265052074, "learning_rate": 3.331547907076682e-07, "loss": 0.679192066192627, "step": 7585 }, { "epoch": 1.8439474963539135, "grad_norm": 1.6075086300369406, "learning_rate": 3.321267129911687e-07, "loss": 0.5070498585700989, "step": 7586 }, { "epoch": 1.8441905687894993, "grad_norm": 1.66962260698861, "learning_rate": 3.311001972047434e-07, "loss": 0.5924850702285767, "step": 7587 }, { "epoch": 1.844433641225085, "grad_norm": 1.819384984653572, "learning_rate": 3.300752435142396e-07, "loss": 0.6132354736328125, "step": 7588 }, { "epoch": 1.8446767136606708, "grad_norm": 1.697307573381583, "learning_rate": 3.290518520852437e-07, "loss": 0.5507901906967163, "step": 7589 }, { "epoch": 1.8449197860962567, "grad_norm": 1.6665882741024642, "learning_rate": 3.280300230830957e-07, "loss": 0.4672454595565796, "step": 7590 }, { "epoch": 1.8451628585318425, "grad_norm": 1.5717251496903262, "learning_rate": 3.2700975667288025e-07, "loss": 0.43469083309173584, "step": 7591 }, { "epoch": 1.8454059309674284, "grad_norm": 1.7712790840052333, "learning_rate": 3.25991053019431e-07, "loss": 0.5243342518806458, "step": 7592 }, { "epoch": 1.8456490034030142, "grad_norm": 1.4192262897588637, "learning_rate": 3.2497391228732854e-07, "loss": 0.6284945011138916, "step": 7593 }, { "epoch": 1.8458920758386, "grad_norm": 1.6034310820564204, "learning_rate": 3.239583346408992e-07, "loss": 0.5137461423873901, "step": 7594 }, { "epoch": 1.8461351482741857, "grad_norm": 1.859641473595678, "learning_rate": 3.2294432024422173e-07, "loss": 0.664883017539978, "step": 7595 }, { "epoch": 1.8463782207097714, "grad_norm": 1.4141809952440523, "learning_rate": 3.219318692611151e-07, "loss": 0.5964332222938538, "step": 7596 }, { "epoch": 1.8466212931453572, "grad_norm": 1.558683879702525, "learning_rate": 3.20920981855154e-07, "loss": 0.6568869948387146, "step": 7597 }, { "epoch": 1.8468643655809431, "grad_norm": 1.5976898588242976, "learning_rate": 3.199116581896522e-07, "loss": 0.48106497526168823, "step": 7598 }, { "epoch": 1.8471074380165289, "grad_norm": 1.4572766833846205, "learning_rate": 3.189038984276782e-07, "loss": 0.6222624778747559, "step": 7599 }, { "epoch": 1.8473505104521148, "grad_norm": 1.584299126288542, "learning_rate": 3.178977027320418e-07, "loss": 0.43133237957954407, "step": 7600 }, { "epoch": 1.8475935828877006, "grad_norm": 1.6678217807764641, "learning_rate": 3.16893071265304e-07, "loss": 0.5290481448173523, "step": 7601 }, { "epoch": 1.8478366553232863, "grad_norm": 1.5703298278946354, "learning_rate": 3.158900041897728e-07, "loss": 0.6264729499816895, "step": 7602 }, { "epoch": 1.848079727758872, "grad_norm": 1.5096914987339687, "learning_rate": 3.148885016674996e-07, "loss": 0.674250602722168, "step": 7603 }, { "epoch": 1.8483228001944578, "grad_norm": 1.5995149468881178, "learning_rate": 3.1388856386028953e-07, "loss": 0.4319840669631958, "step": 7604 }, { "epoch": 1.8485658726300438, "grad_norm": 1.8349148343804476, "learning_rate": 3.1289019092968774e-07, "loss": 0.6415689587593079, "step": 7605 }, { "epoch": 1.8488089450656295, "grad_norm": 2.1571469114068327, "learning_rate": 3.118933830369941e-07, "loss": 0.7833950519561768, "step": 7606 }, { "epoch": 1.8490520175012155, "grad_norm": 1.64335343759362, "learning_rate": 3.108981403432487e-07, "loss": 0.5638118982315063, "step": 7607 }, { "epoch": 1.8492950899368013, "grad_norm": 1.5478086219136744, "learning_rate": 3.099044630092429e-07, "loss": 0.555737316608429, "step": 7608 }, { "epoch": 1.849538162372387, "grad_norm": 1.5060103748951243, "learning_rate": 3.089123511955139e-07, "loss": 0.5318240523338318, "step": 7609 }, { "epoch": 1.8497812348079727, "grad_norm": 1.6998550760557767, "learning_rate": 3.079218050623445e-07, "loss": 0.6860259771347046, "step": 7610 }, { "epoch": 1.8500243072435585, "grad_norm": 1.343274610853249, "learning_rate": 3.0693282476977e-07, "loss": 0.49555376172065735, "step": 7611 }, { "epoch": 1.8502673796791442, "grad_norm": 1.5656481367532626, "learning_rate": 3.05945410477565e-07, "loss": 0.6133030652999878, "step": 7612 }, { "epoch": 1.8505104521147302, "grad_norm": 1.8004477277981428, "learning_rate": 3.0495956234525836e-07, "loss": 0.4981146454811096, "step": 7613 }, { "epoch": 1.850753524550316, "grad_norm": 1.5268437265570987, "learning_rate": 3.0397528053211836e-07, "loss": 0.42781633138656616, "step": 7614 }, { "epoch": 1.850996596985902, "grad_norm": 1.6593696329867307, "learning_rate": 3.029925651971688e-07, "loss": 0.44886475801467896, "step": 7615 }, { "epoch": 1.8512396694214877, "grad_norm": 1.6992527559347208, "learning_rate": 3.0201141649917276e-07, "loss": 0.507283627986908, "step": 7616 }, { "epoch": 1.8514827418570734, "grad_norm": 1.5652396575032748, "learning_rate": 3.0103183459664564e-07, "loss": 0.6538492441177368, "step": 7617 }, { "epoch": 1.8517258142926591, "grad_norm": 1.8727339364624664, "learning_rate": 3.000538196478453e-07, "loss": 0.6012519598007202, "step": 7618 }, { "epoch": 1.851968886728245, "grad_norm": 1.7398141314216762, "learning_rate": 2.9907737181077977e-07, "loss": 0.617375373840332, "step": 7619 }, { "epoch": 1.8522119591638309, "grad_norm": 1.8207958844021677, "learning_rate": 2.98102491243204e-07, "loss": 0.6556087732315063, "step": 7620 }, { "epoch": 1.8524550315994166, "grad_norm": 1.613591489655944, "learning_rate": 2.9712917810261645e-07, "loss": 0.6740443706512451, "step": 7621 }, { "epoch": 1.8526981040350026, "grad_norm": 2.1104027502635927, "learning_rate": 2.961574325462668e-07, "loss": 0.7500443458557129, "step": 7622 }, { "epoch": 1.8529411764705883, "grad_norm": 1.4107652470531964, "learning_rate": 2.9518725473114627e-07, "loss": 0.5642383098602295, "step": 7623 }, { "epoch": 1.853184248906174, "grad_norm": 1.3309934550391338, "learning_rate": 2.942186448139983e-07, "loss": 0.48047181963920593, "step": 7624 }, { "epoch": 1.8534273213417598, "grad_norm": 1.6053272914925834, "learning_rate": 2.9325160295130773e-07, "loss": 0.5751594305038452, "step": 7625 }, { "epoch": 1.8536703937773455, "grad_norm": 1.816947457640657, "learning_rate": 2.9228612929931065e-07, "loss": 0.5854326486587524, "step": 7626 }, { "epoch": 1.8539134662129313, "grad_norm": 1.719514682404328, "learning_rate": 2.913222240139879e-07, "loss": 0.49307987093925476, "step": 7627 }, { "epoch": 1.8541565386485173, "grad_norm": 1.8673416653577142, "learning_rate": 2.90359887251066e-07, "loss": 0.6270163059234619, "step": 7628 }, { "epoch": 1.8543996110841032, "grad_norm": 1.557049573974724, "learning_rate": 2.893991191660195e-07, "loss": 0.5010210871696472, "step": 7629 }, { "epoch": 1.854642683519689, "grad_norm": 1.6754774236781966, "learning_rate": 2.884399199140686e-07, "loss": 0.5609187483787537, "step": 7630 }, { "epoch": 1.8548857559552747, "grad_norm": 1.5427977047540875, "learning_rate": 2.8748228965018055e-07, "loss": 0.5393847227096558, "step": 7631 }, { "epoch": 1.8551288283908605, "grad_norm": 1.7269672611044655, "learning_rate": 2.865262285290682e-07, "loss": 0.6978926062583923, "step": 7632 }, { "epoch": 1.8553719008264462, "grad_norm": 1.675139649910353, "learning_rate": 2.8557173670519244e-07, "loss": 0.42088282108306885, "step": 7633 }, { "epoch": 1.855614973262032, "grad_norm": 2.0974943740026815, "learning_rate": 2.8461881433275885e-07, "loss": 0.3861282765865326, "step": 7634 }, { "epoch": 1.855858045697618, "grad_norm": 1.7424457151246928, "learning_rate": 2.83667461565722e-07, "loss": 0.4818567633628845, "step": 7635 }, { "epoch": 1.8561011181332037, "grad_norm": 1.777570781896223, "learning_rate": 2.827176785577801e-07, "loss": 0.5782536268234253, "step": 7636 }, { "epoch": 1.8563441905687896, "grad_norm": 2.0379803209210827, "learning_rate": 2.817694654623804e-07, "loss": 0.6077359318733215, "step": 7637 }, { "epoch": 1.8565872630043754, "grad_norm": 1.7474823235357746, "learning_rate": 2.8082282243271474e-07, "loss": 0.5489153861999512, "step": 7638 }, { "epoch": 1.8568303354399611, "grad_norm": 1.8913818409715553, "learning_rate": 2.798777496217198e-07, "loss": 0.5932180285453796, "step": 7639 }, { "epoch": 1.8570734078755469, "grad_norm": 1.7070411842759146, "learning_rate": 2.789342471820822e-07, "loss": 0.46032536029815674, "step": 7640 }, { "epoch": 1.8573164803111326, "grad_norm": 1.8827031535575156, "learning_rate": 2.779923152662312e-07, "loss": 0.48617517948150635, "step": 7641 }, { "epoch": 1.8575595527467184, "grad_norm": 1.6483659252545784, "learning_rate": 2.770519540263461e-07, "loss": 0.7286633849143982, "step": 7642 }, { "epoch": 1.8578026251823043, "grad_norm": 1.8416126910244077, "learning_rate": 2.761131636143488e-07, "loss": 0.43000921607017517, "step": 7643 }, { "epoch": 1.8580456976178903, "grad_norm": 1.763217231115964, "learning_rate": 2.751759441819124e-07, "loss": 0.7323812246322632, "step": 7644 }, { "epoch": 1.858288770053476, "grad_norm": 1.6913227308968144, "learning_rate": 2.742402958804491e-07, "loss": 0.4581894874572754, "step": 7645 }, { "epoch": 1.8585318424890618, "grad_norm": 1.6365994277096876, "learning_rate": 2.733062188611246e-07, "loss": 0.5062367916107178, "step": 7646 }, { "epoch": 1.8587749149246475, "grad_norm": 2.076631270885127, "learning_rate": 2.7237371327484495e-07, "loss": 0.4908297061920166, "step": 7647 }, { "epoch": 1.8590179873602333, "grad_norm": 1.7728083390255525, "learning_rate": 2.7144277927226516e-07, "loss": 0.6067780256271362, "step": 7648 }, { "epoch": 1.859261059795819, "grad_norm": 1.6757392400794344, "learning_rate": 2.70513417003786e-07, "loss": 0.6111218333244324, "step": 7649 }, { "epoch": 1.859504132231405, "grad_norm": 1.9002164013312688, "learning_rate": 2.695856266195529e-07, "loss": 0.5603574514389038, "step": 7650 }, { "epoch": 1.8597472046669907, "grad_norm": 1.4513343263986371, "learning_rate": 2.6865940826945933e-07, "loss": 0.4702567458152771, "step": 7651 }, { "epoch": 1.8599902771025767, "grad_norm": 1.9079098747461063, "learning_rate": 2.6773476210314563e-07, "loss": 0.5221645832061768, "step": 7652 }, { "epoch": 1.8602333495381624, "grad_norm": 1.573463331877222, "learning_rate": 2.668116882699956e-07, "loss": 0.6210920810699463, "step": 7653 }, { "epoch": 1.8604764219737482, "grad_norm": 1.548947253914556, "learning_rate": 2.658901869191377e-07, "loss": 0.5999516844749451, "step": 7654 }, { "epoch": 1.860719494409334, "grad_norm": 1.9729347613468657, "learning_rate": 2.6497025819945287e-07, "loss": 0.5331133604049683, "step": 7655 }, { "epoch": 1.8609625668449197, "grad_norm": 1.6676914379778294, "learning_rate": 2.640519022595589e-07, "loss": 0.5248265266418457, "step": 7656 }, { "epoch": 1.8612056392805056, "grad_norm": 1.452883739150583, "learning_rate": 2.631351192478293e-07, "loss": 0.3608626127243042, "step": 7657 }, { "epoch": 1.8614487117160914, "grad_norm": 1.6863807700054523, "learning_rate": 2.622199093123745e-07, "loss": 0.516484260559082, "step": 7658 }, { "epoch": 1.8616917841516774, "grad_norm": 1.8060971236560892, "learning_rate": 2.6130627260105514e-07, "loss": 0.5631277561187744, "step": 7659 }, { "epoch": 1.861934856587263, "grad_norm": 1.5437249814165321, "learning_rate": 2.6039420926147975e-07, "loss": 0.5106554627418518, "step": 7660 }, { "epoch": 1.8621779290228488, "grad_norm": 1.635919504909455, "learning_rate": 2.5948371944099715e-07, "loss": 0.6281803250312805, "step": 7661 }, { "epoch": 1.8624210014584346, "grad_norm": 1.5312556445658119, "learning_rate": 2.585748032867075e-07, "loss": 0.5296651721000671, "step": 7662 }, { "epoch": 1.8626640738940203, "grad_norm": 1.74463714048543, "learning_rate": 2.5766746094545325e-07, "loss": 0.5519661903381348, "step": 7663 }, { "epoch": 1.862907146329606, "grad_norm": 1.4321355353843819, "learning_rate": 2.567616925638239e-07, "loss": 0.5056583881378174, "step": 7664 }, { "epoch": 1.863150218765192, "grad_norm": 1.8418902706390274, "learning_rate": 2.5585749828815234e-07, "loss": 0.5012210011482239, "step": 7665 }, { "epoch": 1.8633932912007778, "grad_norm": 1.9381415120556365, "learning_rate": 2.549548782645217e-07, "loss": 0.49517571926116943, "step": 7666 }, { "epoch": 1.8636363636363638, "grad_norm": 1.54892592644936, "learning_rate": 2.540538326387576e-07, "loss": 0.4581488370895386, "step": 7667 }, { "epoch": 1.8638794360719495, "grad_norm": 1.6798657853202552, "learning_rate": 2.5315436155643024e-07, "loss": 0.5512526035308838, "step": 7668 }, { "epoch": 1.8641225085075352, "grad_norm": 1.6380786911218068, "learning_rate": 2.5225646516285897e-07, "loss": 0.5947009325027466, "step": 7669 }, { "epoch": 1.864365580943121, "grad_norm": 1.5744034084197247, "learning_rate": 2.5136014360310655e-07, "loss": 0.675125241279602, "step": 7670 }, { "epoch": 1.8646086533787067, "grad_norm": 2.128957125259765, "learning_rate": 2.504653970219817e-07, "loss": 0.5653089284896851, "step": 7671 }, { "epoch": 1.8648517258142927, "grad_norm": 1.571656172365456, "learning_rate": 2.495722255640365e-07, "loss": 0.5003147721290588, "step": 7672 }, { "epoch": 1.8650947982498784, "grad_norm": 1.5716585147479627, "learning_rate": 2.486806293735744e-07, "loss": 0.4631611704826355, "step": 7673 }, { "epoch": 1.8653378706854644, "grad_norm": 1.5005994528442603, "learning_rate": 2.477906085946369e-07, "loss": 0.48364537954330444, "step": 7674 }, { "epoch": 1.8655809431210502, "grad_norm": 1.5668142995619767, "learning_rate": 2.469021633710167e-07, "loss": 0.5641829967498779, "step": 7675 }, { "epoch": 1.865824015556636, "grad_norm": 1.882769361347896, "learning_rate": 2.4601529384625013e-07, "loss": 0.7627846598625183, "step": 7676 }, { "epoch": 1.8660670879922217, "grad_norm": 1.7931270308118288, "learning_rate": 2.45130000163617e-07, "loss": 0.5080038905143738, "step": 7677 }, { "epoch": 1.8663101604278074, "grad_norm": 1.8787226712454361, "learning_rate": 2.4424628246614737e-07, "loss": 0.5471298694610596, "step": 7678 }, { "epoch": 1.8665532328633931, "grad_norm": 1.559063745086715, "learning_rate": 2.4336414089661144e-07, "loss": 0.4715222716331482, "step": 7679 }, { "epoch": 1.866796305298979, "grad_norm": 1.7258421770380685, "learning_rate": 2.4248357559752746e-07, "loss": 0.5153000354766846, "step": 7680 }, { "epoch": 1.8670393777345649, "grad_norm": 1.8942657364393254, "learning_rate": 2.4160458671115717e-07, "loss": 0.5489841103553772, "step": 7681 }, { "epoch": 1.8672824501701508, "grad_norm": 1.5439033433565839, "learning_rate": 2.4072717437951145e-07, "loss": 0.6382418870925903, "step": 7682 }, { "epoch": 1.8675255226057366, "grad_norm": 1.74516782061006, "learning_rate": 2.398513387443413e-07, "loss": 0.5548015236854553, "step": 7683 }, { "epoch": 1.8677685950413223, "grad_norm": 1.5624771393435408, "learning_rate": 2.3897707994714804e-07, "loss": 0.6808834075927734, "step": 7684 }, { "epoch": 1.868011667476908, "grad_norm": 1.804191824294957, "learning_rate": 2.381043981291753e-07, "loss": 0.5832809209823608, "step": 7685 }, { "epoch": 1.8682547399124938, "grad_norm": 1.9910220267579402, "learning_rate": 2.3723329343141034e-07, "loss": 0.7198724746704102, "step": 7686 }, { "epoch": 1.8684978123480798, "grad_norm": 2.0205152279969827, "learning_rate": 2.3636376599459165e-07, "loss": 0.5745752453804016, "step": 7687 }, { "epoch": 1.8687408847836655, "grad_norm": 1.6927093195541862, "learning_rate": 2.354958159591958e-07, "loss": 0.5604575872421265, "step": 7688 }, { "epoch": 1.8689839572192515, "grad_norm": 1.708872580690476, "learning_rate": 2.3462944346545058e-07, "loss": 0.4932689666748047, "step": 7689 }, { "epoch": 1.8692270296548372, "grad_norm": 1.7873684442641808, "learning_rate": 2.3376464865332294e-07, "loss": 0.6401560306549072, "step": 7690 }, { "epoch": 1.869470102090423, "grad_norm": 1.6804871945999325, "learning_rate": 2.3290143166252998e-07, "loss": 0.5183942317962646, "step": 7691 }, { "epoch": 1.8697131745260087, "grad_norm": 1.694217353938917, "learning_rate": 2.320397926325313e-07, "loss": 0.6567792892456055, "step": 7692 }, { "epoch": 1.8699562469615945, "grad_norm": 1.8420568956899195, "learning_rate": 2.3117973170253216e-07, "loss": 0.5634849071502686, "step": 7693 }, { "epoch": 1.8701993193971802, "grad_norm": 1.5140186388513848, "learning_rate": 2.3032124901148478e-07, "loss": 0.6496703624725342, "step": 7694 }, { "epoch": 1.8704423918327662, "grad_norm": 1.536079454854506, "learning_rate": 2.2946434469808265e-07, "loss": 0.5600416660308838, "step": 7695 }, { "epoch": 1.870685464268352, "grad_norm": 1.8313321995428458, "learning_rate": 2.2860901890076725e-07, "loss": 0.7283717393875122, "step": 7696 }, { "epoch": 1.8709285367039379, "grad_norm": 1.5630203863456096, "learning_rate": 2.2775527175772361e-07, "loss": 0.46134012937545776, "step": 7697 }, { "epoch": 1.8711716091395236, "grad_norm": 1.7085443150013453, "learning_rate": 2.2690310340688247e-07, "loss": 0.4966389536857605, "step": 7698 }, { "epoch": 1.8714146815751094, "grad_norm": 1.6543403540415662, "learning_rate": 2.260525139859171e-07, "loss": 0.3952391743659973, "step": 7699 }, { "epoch": 1.8716577540106951, "grad_norm": 1.8638116873075512, "learning_rate": 2.2520350363224975e-07, "loss": 0.6515939831733704, "step": 7700 }, { "epoch": 1.8719008264462809, "grad_norm": 1.9300656730050358, "learning_rate": 2.2435607248304624e-07, "loss": 0.6628179550170898, "step": 7701 }, { "epoch": 1.8721438988818668, "grad_norm": 1.621394035548756, "learning_rate": 2.2351022067521377e-07, "loss": 0.6213867664337158, "step": 7702 }, { "epoch": 1.8723869713174526, "grad_norm": 1.521877566302022, "learning_rate": 2.2266594834541078e-07, "loss": 0.5767359733581543, "step": 7703 }, { "epoch": 1.8726300437530385, "grad_norm": 1.6876860738776518, "learning_rate": 2.218232556300337e-07, "loss": 0.5481653213500977, "step": 7704 }, { "epoch": 1.8728731161886243, "grad_norm": 1.6507754468990632, "learning_rate": 2.209821426652292e-07, "loss": 0.5829246044158936, "step": 7705 }, { "epoch": 1.87311618862421, "grad_norm": 1.8618726561887557, "learning_rate": 2.2014260958688417e-07, "loss": 0.532573938369751, "step": 7706 }, { "epoch": 1.8733592610597958, "grad_norm": 1.6686956190676052, "learning_rate": 2.193046565306345e-07, "loss": 0.594386100769043, "step": 7707 }, { "epoch": 1.8736023334953815, "grad_norm": 1.7026633977785046, "learning_rate": 2.1846828363185745e-07, "loss": 0.587457001209259, "step": 7708 }, { "epoch": 1.8738454059309673, "grad_norm": 1.3782250045689735, "learning_rate": 2.176334910256772e-07, "loss": 0.48141148686408997, "step": 7709 }, { "epoch": 1.8740884783665532, "grad_norm": 1.4384083240013203, "learning_rate": 2.1680027884696363e-07, "loss": 0.49543464183807373, "step": 7710 }, { "epoch": 1.8743315508021392, "grad_norm": 1.781834173916383, "learning_rate": 2.1596864723032574e-07, "loss": 0.5184134244918823, "step": 7711 }, { "epoch": 1.874574623237725, "grad_norm": 1.8703111624833155, "learning_rate": 2.151385963101238e-07, "loss": 0.4947367310523987, "step": 7712 }, { "epoch": 1.8748176956733107, "grad_norm": 1.6103537917077926, "learning_rate": 2.1431012622045943e-07, "loss": 0.5448033809661865, "step": 7713 }, { "epoch": 1.8750607681088964, "grad_norm": 1.6442457370769294, "learning_rate": 2.1348323709517894e-07, "loss": 0.5889345407485962, "step": 7714 }, { "epoch": 1.8753038405444822, "grad_norm": 1.5859792338672367, "learning_rate": 2.126579290678721e-07, "loss": 0.5745453834533691, "step": 7715 }, { "epoch": 1.875546912980068, "grad_norm": 1.384898702280333, "learning_rate": 2.1183420227187667e-07, "loss": 0.5494492053985596, "step": 7716 }, { "epoch": 1.875789985415654, "grad_norm": 1.6308434365501787, "learning_rate": 2.1101205684027294e-07, "loss": 0.44019925594329834, "step": 7717 }, { "epoch": 1.8760330578512396, "grad_norm": 1.4074778366176872, "learning_rate": 2.1019149290588346e-07, "loss": 0.5263615250587463, "step": 7718 }, { "epoch": 1.8762761302868256, "grad_norm": 1.8790927115253038, "learning_rate": 2.0937251060128116e-07, "loss": 0.6416229605674744, "step": 7719 }, { "epoch": 1.8765192027224114, "grad_norm": 1.5335462247574465, "learning_rate": 2.085551100587768e-07, "loss": 0.48630982637405396, "step": 7720 }, { "epoch": 1.876762275157997, "grad_norm": 1.6726710643054479, "learning_rate": 2.0773929141043148e-07, "loss": 0.6325064301490784, "step": 7721 }, { "epoch": 1.8770053475935828, "grad_norm": 1.7148130501608014, "learning_rate": 2.0692505478804414e-07, "loss": 0.44983893632888794, "step": 7722 }, { "epoch": 1.8772484200291686, "grad_norm": 1.66168017484003, "learning_rate": 2.0611240032316515e-07, "loss": 0.6251232624053955, "step": 7723 }, { "epoch": 1.8774914924647546, "grad_norm": 1.56700458515074, "learning_rate": 2.0530132814708393e-07, "loss": 0.680910050868988, "step": 7724 }, { "epoch": 1.8777345649003403, "grad_norm": 1.6550895534310142, "learning_rate": 2.0449183839083675e-07, "loss": 0.6115360260009766, "step": 7725 }, { "epoch": 1.8779776373359263, "grad_norm": 1.7225328264512636, "learning_rate": 2.0368393118520457e-07, "loss": 0.41538721323013306, "step": 7726 }, { "epoch": 1.878220709771512, "grad_norm": 1.5050288973038919, "learning_rate": 2.0287760666071188e-07, "loss": 0.6037436723709106, "step": 7727 }, { "epoch": 1.8784637822070978, "grad_norm": 1.6972878211809936, "learning_rate": 2.020728649476278e-07, "loss": 0.5614669322967529, "step": 7728 }, { "epoch": 1.8787068546426835, "grad_norm": 1.8096459024627891, "learning_rate": 2.0126970617596386e-07, "loss": 0.5870437026023865, "step": 7729 }, { "epoch": 1.8789499270782692, "grad_norm": 1.7990381444042334, "learning_rate": 2.0046813047547963e-07, "loss": 0.6668080687522888, "step": 7730 }, { "epoch": 1.879192999513855, "grad_norm": 1.5035301688989298, "learning_rate": 1.9966813797567375e-07, "loss": 0.644027054309845, "step": 7731 }, { "epoch": 1.879436071949441, "grad_norm": 1.835239738139721, "learning_rate": 1.9886972880579502e-07, "loss": 0.5653098821640015, "step": 7732 }, { "epoch": 1.8796791443850267, "grad_norm": 1.5781635633564297, "learning_rate": 1.980729030948314e-07, "loss": 0.554958701133728, "step": 7733 }, { "epoch": 1.8799222168206127, "grad_norm": 1.5314587460454645, "learning_rate": 1.972776609715188e-07, "loss": 0.7691187858581543, "step": 7734 }, { "epoch": 1.8801652892561984, "grad_norm": 1.6505427554352774, "learning_rate": 1.964840025643333e-07, "loss": 0.6942070722579956, "step": 7735 }, { "epoch": 1.8804083616917842, "grad_norm": 1.7819614381012971, "learning_rate": 1.9569192800150127e-07, "loss": 0.7166361808776855, "step": 7736 }, { "epoch": 1.88065143412737, "grad_norm": 1.5930511847459636, "learning_rate": 1.949014374109859e-07, "loss": 0.4614207148551941, "step": 7737 }, { "epoch": 1.8808945065629556, "grad_norm": 1.6768510766616722, "learning_rate": 1.9411253092049831e-07, "loss": 0.5728522539138794, "step": 7738 }, { "epoch": 1.8811375789985416, "grad_norm": 1.5079716157282343, "learning_rate": 1.9332520865749438e-07, "loss": 0.5086087584495544, "step": 7739 }, { "epoch": 1.8813806514341274, "grad_norm": 1.7241335752214144, "learning_rate": 1.9253947074917122e-07, "loss": 0.5460541248321533, "step": 7740 }, { "epoch": 1.8816237238697133, "grad_norm": 1.750403036972762, "learning_rate": 1.91755317322474e-07, "loss": 0.7092289924621582, "step": 7741 }, { "epoch": 1.881866796305299, "grad_norm": 1.8260778613583062, "learning_rate": 1.9097274850408798e-07, "loss": 0.5998955368995667, "step": 7742 }, { "epoch": 1.8821098687408848, "grad_norm": 1.587878550309057, "learning_rate": 1.901917644204454e-07, "loss": 0.6296600103378296, "step": 7743 }, { "epoch": 1.8823529411764706, "grad_norm": 1.508249033595076, "learning_rate": 1.8941236519771978e-07, "loss": 0.47445589303970337, "step": 7744 }, { "epoch": 1.8825960136120563, "grad_norm": 1.7750723977032878, "learning_rate": 1.8863455096183148e-07, "loss": 0.5391896367073059, "step": 7745 }, { "epoch": 1.882839086047642, "grad_norm": 1.7219480724936047, "learning_rate": 1.8785832183844222e-07, "loss": 0.5763751268386841, "step": 7746 }, { "epoch": 1.883082158483228, "grad_norm": 1.5857757962409604, "learning_rate": 1.8708367795295834e-07, "loss": 0.4817984104156494, "step": 7747 }, { "epoch": 1.8833252309188138, "grad_norm": 1.875798995738305, "learning_rate": 1.8631061943053086e-07, "loss": 0.5656089782714844, "step": 7748 }, { "epoch": 1.8835683033543997, "grad_norm": 1.7160510007303187, "learning_rate": 1.8553914639605207e-07, "loss": 0.6206098794937134, "step": 7749 }, { "epoch": 1.8838113757899855, "grad_norm": 1.6672394608833947, "learning_rate": 1.8476925897416453e-07, "loss": 0.4756508469581604, "step": 7750 }, { "epoch": 1.8840544482255712, "grad_norm": 1.741812846336381, "learning_rate": 1.840009572892476e-07, "loss": 0.5547622442245483, "step": 7751 }, { "epoch": 1.884297520661157, "grad_norm": 1.64468286171616, "learning_rate": 1.8323424146542757e-07, "loss": 0.5648959875106812, "step": 7752 }, { "epoch": 1.8845405930967427, "grad_norm": 1.7080994031278744, "learning_rate": 1.8246911162657422e-07, "loss": 0.5864073038101196, "step": 7753 }, { "epoch": 1.8847836655323287, "grad_norm": 1.6065536331542818, "learning_rate": 1.817055678963009e-07, "loss": 0.6092144250869751, "step": 7754 }, { "epoch": 1.8850267379679144, "grad_norm": 1.748094650052903, "learning_rate": 1.809436103979656e-07, "loss": 0.5884865522384644, "step": 7755 }, { "epoch": 1.8852698104035004, "grad_norm": 1.828771488200622, "learning_rate": 1.8018323925466653e-07, "loss": 0.6063424348831177, "step": 7756 }, { "epoch": 1.8855128828390861, "grad_norm": 1.7018614869337632, "learning_rate": 1.7942445458925206e-07, "loss": 0.6643038988113403, "step": 7757 }, { "epoch": 1.8857559552746719, "grad_norm": 1.4824111257787085, "learning_rate": 1.786672565243075e-07, "loss": 0.450206458568573, "step": 7758 }, { "epoch": 1.8859990277102576, "grad_norm": 1.6574380828615938, "learning_rate": 1.779116451821683e-07, "loss": 0.5376932621002197, "step": 7759 }, { "epoch": 1.8862421001458434, "grad_norm": 1.6879902036649095, "learning_rate": 1.771576206849057e-07, "loss": 0.48125123977661133, "step": 7760 }, { "epoch": 1.8864851725814291, "grad_norm": 1.7625092299054879, "learning_rate": 1.764051831543434e-07, "loss": 0.6430345773696899, "step": 7761 }, { "epoch": 1.886728245017015, "grad_norm": 1.5862774081654076, "learning_rate": 1.756543327120408e-07, "loss": 0.46804726123809814, "step": 7762 }, { "epoch": 1.8869713174526008, "grad_norm": 1.582918687885871, "learning_rate": 1.7490506947930752e-07, "loss": 0.5242605209350586, "step": 7763 }, { "epoch": 1.8872143898881868, "grad_norm": 1.7880122488253145, "learning_rate": 1.7415739357719007e-07, "loss": 0.5216671228408813, "step": 7764 }, { "epoch": 1.8874574623237725, "grad_norm": 1.811299672235424, "learning_rate": 1.734113051264852e-07, "loss": 0.7931971549987793, "step": 7765 }, { "epoch": 1.8877005347593583, "grad_norm": 1.5477848411446626, "learning_rate": 1.7266680424772863e-07, "loss": 0.3835228979587555, "step": 7766 }, { "epoch": 1.887943607194944, "grad_norm": 1.8234353842061994, "learning_rate": 1.7192389106120088e-07, "loss": 0.6291341781616211, "step": 7767 }, { "epoch": 1.8881866796305298, "grad_norm": 1.729762248465934, "learning_rate": 1.7118256568692703e-07, "loss": 0.719878077507019, "step": 7768 }, { "epoch": 1.8884297520661157, "grad_norm": 1.5732111081655988, "learning_rate": 1.704428282446735e-07, "loss": 0.6668254137039185, "step": 7769 }, { "epoch": 1.8886728245017015, "grad_norm": 2.022804610474918, "learning_rate": 1.6970467885395248e-07, "loss": 0.5879684090614319, "step": 7770 }, { "epoch": 1.8889158969372875, "grad_norm": 1.5666763117987983, "learning_rate": 1.6896811763401854e-07, "loss": 0.4185545742511749, "step": 7771 }, { "epoch": 1.8891589693728732, "grad_norm": 1.4745366806537794, "learning_rate": 1.6823314470386875e-07, "loss": 0.577307403087616, "step": 7772 }, { "epoch": 1.889402041808459, "grad_norm": 1.636389817113941, "learning_rate": 1.674997601822448e-07, "loss": 0.503916323184967, "step": 7773 }, { "epoch": 1.8896451142440447, "grad_norm": 1.6569895801476806, "learning_rate": 1.6676796418763074e-07, "loss": 0.46335655450820923, "step": 7774 }, { "epoch": 1.8898881866796304, "grad_norm": 1.7142197573970595, "learning_rate": 1.6603775683825762e-07, "loss": 0.4234963059425354, "step": 7775 }, { "epoch": 1.8901312591152162, "grad_norm": 1.6586849313706309, "learning_rate": 1.6530913825209216e-07, "loss": 0.556725263595581, "step": 7776 }, { "epoch": 1.8903743315508021, "grad_norm": 1.6334175879397415, "learning_rate": 1.6458210854685353e-07, "loss": 0.4995235204696655, "step": 7777 }, { "epoch": 1.890617403986388, "grad_norm": 1.6381234727934055, "learning_rate": 1.6385666783999666e-07, "loss": 0.5359085202217102, "step": 7778 }, { "epoch": 1.8908604764219739, "grad_norm": 1.4855155362238048, "learning_rate": 1.6313281624872556e-07, "loss": 0.5429174900054932, "step": 7779 }, { "epoch": 1.8911035488575596, "grad_norm": 1.7709363106143399, "learning_rate": 1.6241055388998229e-07, "loss": 0.5327854156494141, "step": 7780 }, { "epoch": 1.8913466212931453, "grad_norm": 1.7283321175709523, "learning_rate": 1.616898808804568e-07, "loss": 0.5802199840545654, "step": 7781 }, { "epoch": 1.891589693728731, "grad_norm": 1.799674212416833, "learning_rate": 1.6097079733657817e-07, "loss": 0.5169596076011658, "step": 7782 }, { "epoch": 1.8918327661643168, "grad_norm": 1.828014766796504, "learning_rate": 1.6025330337452127e-07, "loss": 0.6990519762039185, "step": 7783 }, { "epoch": 1.8920758385999028, "grad_norm": 1.8288277048641817, "learning_rate": 1.595373991102045e-07, "loss": 0.632573664188385, "step": 7784 }, { "epoch": 1.8923189110354885, "grad_norm": 1.6421447196559875, "learning_rate": 1.5882308465928752e-07, "loss": 0.5634104609489441, "step": 7785 }, { "epoch": 1.8925619834710745, "grad_norm": 1.6206022798584048, "learning_rate": 1.5811036013717583e-07, "loss": 0.45638880133628845, "step": 7786 }, { "epoch": 1.8928050559066603, "grad_norm": 1.4840295121654832, "learning_rate": 1.57399225659014e-07, "loss": 0.5162194967269897, "step": 7787 }, { "epoch": 1.893048128342246, "grad_norm": 1.4726045879310365, "learning_rate": 1.5668968133969453e-07, "loss": 0.4275258183479309, "step": 7788 }, { "epoch": 1.8932912007778318, "grad_norm": 1.4332114315283864, "learning_rate": 1.5598172729384686e-07, "loss": 0.468000590801239, "step": 7789 }, { "epoch": 1.8935342732134175, "grad_norm": 1.5684396364714213, "learning_rate": 1.552753636358506e-07, "loss": 0.5879625082015991, "step": 7790 }, { "epoch": 1.8937773456490032, "grad_norm": 1.5073949422259991, "learning_rate": 1.5457059047982448e-07, "loss": 0.5579147338867188, "step": 7791 }, { "epoch": 1.8940204180845892, "grad_norm": 1.6279226582129087, "learning_rate": 1.5386740793962961e-07, "loss": 0.6585639715194702, "step": 7792 }, { "epoch": 1.8942634905201752, "grad_norm": 1.7303676874930554, "learning_rate": 1.5316581612887294e-07, "loss": 0.6431276798248291, "step": 7793 }, { "epoch": 1.894506562955761, "grad_norm": 1.6847373651606588, "learning_rate": 1.5246581516090042e-07, "loss": 0.5775979161262512, "step": 7794 }, { "epoch": 1.8947496353913467, "grad_norm": 1.5721827984007328, "learning_rate": 1.5176740514880606e-07, "loss": 0.5160274505615234, "step": 7795 }, { "epoch": 1.8949927078269324, "grad_norm": 1.4642179626057403, "learning_rate": 1.5107058620542293e-07, "loss": 0.4751996397972107, "step": 7796 }, { "epoch": 1.8952357802625182, "grad_norm": 1.6056622183555427, "learning_rate": 1.5037535844332762e-07, "loss": 0.6413856744766235, "step": 7797 }, { "epoch": 1.895478852698104, "grad_norm": 1.5536153307361475, "learning_rate": 1.4968172197484032e-07, "loss": 0.46943509578704834, "step": 7798 }, { "epoch": 1.8957219251336899, "grad_norm": 1.5096881903297619, "learning_rate": 1.4898967691202581e-07, "loss": 0.5496254563331604, "step": 7799 }, { "epoch": 1.8959649975692756, "grad_norm": 1.6331068786236957, "learning_rate": 1.48299223366688e-07, "loss": 0.46369320154190063, "step": 7800 }, { "epoch": 1.8962080700048616, "grad_norm": 1.6318754743926587, "learning_rate": 1.4761036145037766e-07, "loss": 0.6446744203567505, "step": 7801 }, { "epoch": 1.8964511424404473, "grad_norm": 1.8776272963357388, "learning_rate": 1.4692309127438463e-07, "loss": 0.6572991013526917, "step": 7802 }, { "epoch": 1.896694214876033, "grad_norm": 1.7088494170289616, "learning_rate": 1.4623741294974459e-07, "loss": 0.7837535738945007, "step": 7803 }, { "epoch": 1.8969372873116188, "grad_norm": 1.7351870070778705, "learning_rate": 1.4555332658723553e-07, "loss": 0.5903918743133545, "step": 7804 }, { "epoch": 1.8971803597472046, "grad_norm": 1.6027436780477549, "learning_rate": 1.4487083229737574e-07, "loss": 0.6797206401824951, "step": 7805 }, { "epoch": 1.8974234321827905, "grad_norm": 1.5468965749025458, "learning_rate": 1.4418993019043038e-07, "loss": 0.5265955924987793, "step": 7806 }, { "epoch": 1.8976665046183763, "grad_norm": 1.6211580107702281, "learning_rate": 1.435106203764025e-07, "loss": 0.4402262270450592, "step": 7807 }, { "epoch": 1.8979095770539622, "grad_norm": 1.735856219924877, "learning_rate": 1.4283290296504214e-07, "loss": 0.5074328184127808, "step": 7808 }, { "epoch": 1.898152649489548, "grad_norm": 1.7223118964828386, "learning_rate": 1.421567780658417e-07, "loss": 0.4367665648460388, "step": 7809 }, { "epoch": 1.8983957219251337, "grad_norm": 1.5315608191888657, "learning_rate": 1.4148224578803272e-07, "loss": 0.49211326241493225, "step": 7810 }, { "epoch": 1.8986387943607195, "grad_norm": 1.5514820319650748, "learning_rate": 1.4080930624059463e-07, "loss": 0.5359773635864258, "step": 7811 }, { "epoch": 1.8988818667963052, "grad_norm": 2.1603735112510942, "learning_rate": 1.4013795953224384e-07, "loss": 0.6409411430358887, "step": 7812 }, { "epoch": 1.899124939231891, "grad_norm": 1.7099231270664967, "learning_rate": 1.3946820577144359e-07, "loss": 0.568091630935669, "step": 7813 }, { "epoch": 1.899368011667477, "grad_norm": 1.670887717883516, "learning_rate": 1.3880004506639955e-07, "loss": 0.4993840456008911, "step": 7814 }, { "epoch": 1.8996110841030627, "grad_norm": 1.7712913351642694, "learning_rate": 1.3813347752505645e-07, "loss": 0.5239939093589783, "step": 7815 }, { "epoch": 1.8998541565386486, "grad_norm": 1.5353178353709316, "learning_rate": 1.3746850325510708e-07, "loss": 0.5640568733215332, "step": 7816 }, { "epoch": 1.9000972289742344, "grad_norm": 1.7480674782552088, "learning_rate": 1.3680512236398102e-07, "loss": 0.5832219123840332, "step": 7817 }, { "epoch": 1.9003403014098201, "grad_norm": 1.4709536074458518, "learning_rate": 1.361433349588559e-07, "loss": 0.47876977920532227, "step": 7818 }, { "epoch": 1.9005833738454059, "grad_norm": 1.5679366043470475, "learning_rate": 1.354831411466473e-07, "loss": 0.5298969149589539, "step": 7819 }, { "epoch": 1.9008264462809916, "grad_norm": 1.8078440529881061, "learning_rate": 1.3482454103401655e-07, "loss": 0.47520726919174194, "step": 7820 }, { "epoch": 1.9010695187165776, "grad_norm": 1.7664189696298562, "learning_rate": 1.341675347273652e-07, "loss": 0.7611651420593262, "step": 7821 }, { "epoch": 1.9013125911521633, "grad_norm": 1.5491737876719753, "learning_rate": 1.3351212233284062e-07, "loss": 0.5160998106002808, "step": 7822 }, { "epoch": 1.9015556635877493, "grad_norm": 1.659118693867665, "learning_rate": 1.3285830395632693e-07, "loss": 0.4854723811149597, "step": 7823 }, { "epoch": 1.901798736023335, "grad_norm": 1.6179366302136147, "learning_rate": 1.3220607970345746e-07, "loss": 0.6521256566047668, "step": 7824 }, { "epoch": 1.9020418084589208, "grad_norm": 1.815607783787043, "learning_rate": 1.3155544967960455e-07, "loss": 0.5420296788215637, "step": 7825 }, { "epoch": 1.9022848808945065, "grad_norm": 1.8448711837038727, "learning_rate": 1.309064139898808e-07, "loss": 0.5796738266944885, "step": 7826 }, { "epoch": 1.9025279533300923, "grad_norm": 1.7596957815836842, "learning_rate": 1.3025897273914568e-07, "loss": 0.5714402198791504, "step": 7827 }, { "epoch": 1.902771025765678, "grad_norm": 1.7481228147490098, "learning_rate": 1.2961312603199772e-07, "loss": 0.6441277861595154, "step": 7828 }, { "epoch": 1.903014098201264, "grad_norm": 1.8366263232671565, "learning_rate": 1.2896887397278125e-07, "loss": 0.5722419619560242, "step": 7829 }, { "epoch": 1.9032571706368497, "grad_norm": 1.773092891259718, "learning_rate": 1.2832621666557744e-07, "loss": 0.48379284143447876, "step": 7830 }, { "epoch": 1.9035002430724357, "grad_norm": 1.4984724802875389, "learning_rate": 1.2768515421421656e-07, "loss": 0.5761758089065552, "step": 7831 }, { "epoch": 1.9037433155080214, "grad_norm": 1.569547420261906, "learning_rate": 1.270456867222658e-07, "loss": 0.4363502264022827, "step": 7832 }, { "epoch": 1.9039863879436072, "grad_norm": 1.669778869527229, "learning_rate": 1.2640781429303916e-07, "loss": 0.5518699288368225, "step": 7833 }, { "epoch": 1.904229460379193, "grad_norm": 1.6801157273901037, "learning_rate": 1.2577153702958756e-07, "loss": 0.5483746528625488, "step": 7834 }, { "epoch": 1.9044725328147787, "grad_norm": 2.1255659075941007, "learning_rate": 1.2513685503470984e-07, "loss": 0.6079699397087097, "step": 7835 }, { "epoch": 1.9047156052503647, "grad_norm": 1.6478983845220228, "learning_rate": 1.245037684109418e-07, "loss": 0.5469602346420288, "step": 7836 }, { "epoch": 1.9049586776859504, "grad_norm": 1.9450054895433497, "learning_rate": 1.2387227726056606e-07, "loss": 0.6172827482223511, "step": 7837 }, { "epoch": 1.9052017501215364, "grad_norm": 1.4455029838565423, "learning_rate": 1.2324238168560543e-07, "loss": 0.4481564164161682, "step": 7838 }, { "epoch": 1.905444822557122, "grad_norm": 1.670739619596825, "learning_rate": 1.2261408178782518e-07, "loss": 0.470109224319458, "step": 7839 }, { "epoch": 1.9056878949927079, "grad_norm": 1.702943163540084, "learning_rate": 1.219873776687308e-07, "loss": 0.48267096281051636, "step": 7840 }, { "epoch": 1.9059309674282936, "grad_norm": 1.525282166575885, "learning_rate": 1.2136226942957352e-07, "loss": 0.4377221465110779, "step": 7841 }, { "epoch": 1.9061740398638793, "grad_norm": 1.7841617357149921, "learning_rate": 1.2073875717134587e-07, "loss": 0.49283134937286377, "step": 7842 }, { "epoch": 1.906417112299465, "grad_norm": 1.9778249855136145, "learning_rate": 1.201168409947806e-07, "loss": 0.46124714612960815, "step": 7843 }, { "epoch": 1.906660184735051, "grad_norm": 1.6503470053352602, "learning_rate": 1.1949652100035402e-07, "loss": 0.632723331451416, "step": 7844 }, { "epoch": 1.9069032571706368, "grad_norm": 1.686356237847561, "learning_rate": 1.1887779728828486e-07, "loss": 0.5600190162658691, "step": 7845 }, { "epoch": 1.9071463296062228, "grad_norm": 1.846963737317572, "learning_rate": 1.1826066995853092e-07, "loss": 0.6844348907470703, "step": 7846 }, { "epoch": 1.9073894020418085, "grad_norm": 1.7148274693027308, "learning_rate": 1.17645139110798e-07, "loss": 0.5553156137466431, "step": 7847 }, { "epoch": 1.9076324744773943, "grad_norm": 2.521827005403142, "learning_rate": 1.1703120484452657e-07, "loss": 0.6470756530761719, "step": 7848 }, { "epoch": 1.90787554691298, "grad_norm": 1.8947739508639267, "learning_rate": 1.1641886725890728e-07, "loss": 0.5665540099143982, "step": 7849 }, { "epoch": 1.9081186193485657, "grad_norm": 1.460583561896763, "learning_rate": 1.1580812645286654e-07, "loss": 0.5729918479919434, "step": 7850 }, { "epoch": 1.9083616917841517, "grad_norm": 1.7838063944214575, "learning_rate": 1.1519898252507545e-07, "loss": 0.5258395671844482, "step": 7851 }, { "epoch": 1.9086047642197375, "grad_norm": 1.898207872521561, "learning_rate": 1.1459143557394525e-07, "loss": 0.5530171394348145, "step": 7852 }, { "epoch": 1.9088478366553234, "grad_norm": 1.9266209667795218, "learning_rate": 1.1398548569763191e-07, "loss": 0.6208722591400146, "step": 7853 }, { "epoch": 1.9090909090909092, "grad_norm": 1.789866348211892, "learning_rate": 1.1338113299403263e-07, "loss": 0.5536078214645386, "step": 7854 }, { "epoch": 1.909333981526495, "grad_norm": 1.6359679677641825, "learning_rate": 1.127783775607838e-07, "loss": 0.606512725353241, "step": 7855 }, { "epoch": 1.9095770539620807, "grad_norm": 1.5982825134593037, "learning_rate": 1.1217721949526638e-07, "loss": 0.5810530185699463, "step": 7856 }, { "epoch": 1.9098201263976664, "grad_norm": 2.289162966271093, "learning_rate": 1.1157765889460381e-07, "loss": 0.5287162661552429, "step": 7857 }, { "epoch": 1.9100631988332522, "grad_norm": 1.715924899311407, "learning_rate": 1.1097969585566082e-07, "loss": 0.6001414060592651, "step": 7858 }, { "epoch": 1.9103062712688381, "grad_norm": 1.6034872089703154, "learning_rate": 1.1038333047504234e-07, "loss": 0.6067862510681152, "step": 7859 }, { "epoch": 1.9105493437044239, "grad_norm": 1.9912944522561649, "learning_rate": 1.0978856284909689e-07, "loss": 0.574019193649292, "step": 7860 }, { "epoch": 1.9107924161400098, "grad_norm": 1.6343278920875086, "learning_rate": 1.0919539307391314e-07, "loss": 0.4814596176147461, "step": 7861 }, { "epoch": 1.9110354885755956, "grad_norm": 1.5970615422250387, "learning_rate": 1.0860382124532554e-07, "loss": 0.6257460117340088, "step": 7862 }, { "epoch": 1.9112785610111813, "grad_norm": 1.7792251436354336, "learning_rate": 1.0801384745890542e-07, "loss": 0.6687425374984741, "step": 7863 }, { "epoch": 1.911521633446767, "grad_norm": 1.5906663110713586, "learning_rate": 1.0742547180996876e-07, "loss": 0.6984440684318542, "step": 7864 }, { "epoch": 1.9117647058823528, "grad_norm": 1.5423502948957797, "learning_rate": 1.0683869439357397e-07, "loss": 0.5978307723999023, "step": 7865 }, { "epoch": 1.9120077783179388, "grad_norm": 1.8759721186472622, "learning_rate": 1.0625351530451856e-07, "loss": 0.6617599725723267, "step": 7866 }, { "epoch": 1.9122508507535245, "grad_norm": 1.4428258467784194, "learning_rate": 1.0566993463734576e-07, "loss": 0.5909592509269714, "step": 7867 }, { "epoch": 1.9124939231891105, "grad_norm": 1.6415302225877084, "learning_rate": 1.0508795248633463e-07, "loss": 0.3719388246536255, "step": 7868 }, { "epoch": 1.9127369956246962, "grad_norm": 1.4786546353849803, "learning_rate": 1.0450756894551217e-07, "loss": 0.5602902173995972, "step": 7869 }, { "epoch": 1.912980068060282, "grad_norm": 1.5975772173570275, "learning_rate": 1.0392878410864337e-07, "loss": 0.5145024657249451, "step": 7870 }, { "epoch": 1.9132231404958677, "grad_norm": 1.4893200428318698, "learning_rate": 1.0335159806923679e-07, "loss": 0.6021307706832886, "step": 7871 }, { "epoch": 1.9134662129314535, "grad_norm": 1.6114893193772357, "learning_rate": 1.0277601092054113e-07, "loss": 0.5766277313232422, "step": 7872 }, { "epoch": 1.9137092853670392, "grad_norm": 1.8966773247936872, "learning_rate": 1.0220202275554758e-07, "loss": 0.6043760180473328, "step": 7873 }, { "epoch": 1.9139523578026252, "grad_norm": 1.6026709398286014, "learning_rate": 1.0162963366698975e-07, "loss": 0.6258494853973389, "step": 7874 }, { "epoch": 1.9141954302382111, "grad_norm": 1.8889082576865925, "learning_rate": 1.0105884374734032e-07, "loss": 0.5908513069152832, "step": 7875 }, { "epoch": 1.914438502673797, "grad_norm": 1.4680099086330185, "learning_rate": 1.0048965308881886e-07, "loss": 0.4749200940132141, "step": 7876 }, { "epoch": 1.9146815751093826, "grad_norm": 2.3943235111034107, "learning_rate": 9.99220617833796e-08, "loss": 0.536959171295166, "step": 7877 }, { "epoch": 1.9149246475449684, "grad_norm": 1.5944389380877824, "learning_rate": 9.935606992272362e-08, "loss": 0.5888774394989014, "step": 7878 }, { "epoch": 1.9151677199805541, "grad_norm": 1.5670496494716122, "learning_rate": 9.879167759829223e-08, "loss": 0.6311365365982056, "step": 7879 }, { "epoch": 1.9154107924161399, "grad_norm": 1.728939151631142, "learning_rate": 9.822888490126692e-08, "loss": 0.7119824886322021, "step": 7880 }, { "epoch": 1.9156538648517258, "grad_norm": 1.6611837519350625, "learning_rate": 9.766769192257275e-08, "loss": 0.6669738292694092, "step": 7881 }, { "epoch": 1.9158969372873116, "grad_norm": 1.492436436749434, "learning_rate": 9.710809875287386e-08, "loss": 0.45102983713150024, "step": 7882 }, { "epoch": 1.9161400097228976, "grad_norm": 1.613068526098166, "learning_rate": 9.655010548257903e-08, "loss": 0.661641001701355, "step": 7883 }, { "epoch": 1.9163830821584833, "grad_norm": 1.6554391954258298, "learning_rate": 9.599371220183618e-08, "loss": 0.5447478890419006, "step": 7884 }, { "epoch": 1.916626154594069, "grad_norm": 1.6948642647912686, "learning_rate": 9.54389190005367e-08, "loss": 0.5650343894958496, "step": 7885 }, { "epoch": 1.9168692270296548, "grad_norm": 1.8548249661396576, "learning_rate": 9.488572596831114e-08, "loss": 0.717132031917572, "step": 7886 }, { "epoch": 1.9171122994652405, "grad_norm": 1.5430237566523124, "learning_rate": 9.433413319453355e-08, "loss": 0.6439914107322693, "step": 7887 }, { "epoch": 1.9173553719008265, "grad_norm": 1.8407622667016448, "learning_rate": 9.378414076831598e-08, "loss": 0.635654091835022, "step": 7888 }, { "epoch": 1.9175984443364122, "grad_norm": 1.3836332041528028, "learning_rate": 9.323574877851849e-08, "loss": 0.46564117074012756, "step": 7889 }, { "epoch": 1.9178415167719982, "grad_norm": 1.8034400655804155, "learning_rate": 9.268895731373684e-08, "loss": 0.4144997000694275, "step": 7890 }, { "epoch": 1.918084589207584, "grad_norm": 1.7204518338456052, "learning_rate": 9.214376646230815e-08, "loss": 0.5827692747116089, "step": 7891 }, { "epoch": 1.9183276616431697, "grad_norm": 1.8915732089878712, "learning_rate": 9.160017631231643e-08, "loss": 0.4170801043510437, "step": 7892 }, { "epoch": 1.9185707340787554, "grad_norm": 1.4961069336740473, "learning_rate": 9.105818695158031e-08, "loss": 0.5425165891647339, "step": 7893 }, { "epoch": 1.9188138065143412, "grad_norm": 1.7091694509209534, "learning_rate": 9.051779846766529e-08, "loss": 0.614035964012146, "step": 7894 }, { "epoch": 1.919056878949927, "grad_norm": 1.5089394397862408, "learning_rate": 8.997901094787486e-08, "loss": 0.4266599416732788, "step": 7895 }, { "epoch": 1.919299951385513, "grad_norm": 1.6639535990039638, "learning_rate": 8.944182447925498e-08, "loss": 0.5825821161270142, "step": 7896 }, { "epoch": 1.9195430238210986, "grad_norm": 1.8820864880858443, "learning_rate": 8.890623914859176e-08, "loss": 0.6605106592178345, "step": 7897 }, { "epoch": 1.9197860962566846, "grad_norm": 1.5855911891040053, "learning_rate": 8.837225504241486e-08, "loss": 0.5082308650016785, "step": 7898 }, { "epoch": 1.9200291686922704, "grad_norm": 1.6293577149687402, "learning_rate": 8.783987224699531e-08, "loss": 0.5540370345115662, "step": 7899 }, { "epoch": 1.920272241127856, "grad_norm": 1.9079146201675807, "learning_rate": 8.730909084834094e-08, "loss": 0.5947761535644531, "step": 7900 }, { "epoch": 1.9205153135634419, "grad_norm": 1.569249720216967, "learning_rate": 8.677991093220761e-08, "loss": 0.5506346225738525, "step": 7901 }, { "epoch": 1.9207583859990276, "grad_norm": 1.7066180367289316, "learning_rate": 8.625233258408694e-08, "loss": 0.8269942998886108, "step": 7902 }, { "epoch": 1.9210014584346136, "grad_norm": 1.5235162204355908, "learning_rate": 8.572635588921519e-08, "loss": 0.556951105594635, "step": 7903 }, { "epoch": 1.9212445308701993, "grad_norm": 1.7617143263518384, "learning_rate": 8.52019809325666e-08, "loss": 0.5119898319244385, "step": 7904 }, { "epoch": 1.9214876033057853, "grad_norm": 1.8201741496910975, "learning_rate": 8.467920779886008e-08, "loss": 0.5009436011314392, "step": 7905 }, { "epoch": 1.921730675741371, "grad_norm": 1.6968266010879123, "learning_rate": 8.415803657255473e-08, "loss": 0.528529167175293, "step": 7906 }, { "epoch": 1.9219737481769568, "grad_norm": 1.686849934902172, "learning_rate": 8.363846733784875e-08, "loss": 0.6037752628326416, "step": 7907 }, { "epoch": 1.9222168206125425, "grad_norm": 1.7474163906815379, "learning_rate": 8.31205001786839e-08, "loss": 0.4522310495376587, "step": 7908 }, { "epoch": 1.9224598930481283, "grad_norm": 1.6493480044648348, "learning_rate": 8.26041351787421e-08, "loss": 0.5517383217811584, "step": 7909 }, { "epoch": 1.922702965483714, "grad_norm": 2.063337668063682, "learning_rate": 8.208937242144777e-08, "loss": 0.7731345891952515, "step": 7910 }, { "epoch": 1.9229460379193, "grad_norm": 1.8596174799415213, "learning_rate": 8.157621198996325e-08, "loss": 0.46835148334503174, "step": 7911 }, { "epoch": 1.9231891103548857, "grad_norm": 1.5433999529495586, "learning_rate": 8.106465396719664e-08, "loss": 0.4477365016937256, "step": 7912 }, { "epoch": 1.9234321827904717, "grad_norm": 1.7247524421356701, "learning_rate": 8.055469843579189e-08, "loss": 0.483159601688385, "step": 7913 }, { "epoch": 1.9236752552260574, "grad_norm": 1.763976191105011, "learning_rate": 8.004634547813972e-08, "loss": 0.532046914100647, "step": 7914 }, { "epoch": 1.9239183276616432, "grad_norm": 2.058754464949749, "learning_rate": 7.953959517636778e-08, "loss": 0.5159087181091309, "step": 7915 }, { "epoch": 1.924161400097229, "grad_norm": 1.7568770885736658, "learning_rate": 7.903444761234391e-08, "loss": 0.7910261154174805, "step": 7916 }, { "epoch": 1.9244044725328147, "grad_norm": 1.7895067210496105, "learning_rate": 7.853090286768284e-08, "loss": 0.533238410949707, "step": 7917 }, { "epoch": 1.9246475449684006, "grad_norm": 1.8835461445205601, "learning_rate": 7.802896102373392e-08, "loss": 0.5685431957244873, "step": 7918 }, { "epoch": 1.9248906174039864, "grad_norm": 1.690356350682088, "learning_rate": 7.75286221615923e-08, "loss": 0.5810141563415527, "step": 7919 }, { "epoch": 1.9251336898395723, "grad_norm": 1.9080657797574716, "learning_rate": 7.702988636208997e-08, "loss": 0.5106228590011597, "step": 7920 }, { "epoch": 1.925376762275158, "grad_norm": 1.8414683840774404, "learning_rate": 7.653275370580471e-08, "loss": 0.48370152711868286, "step": 7921 }, { "epoch": 1.9256198347107438, "grad_norm": 1.5913436278415014, "learning_rate": 7.603722427304893e-08, "loss": 0.44294774532318115, "step": 7922 }, { "epoch": 1.9258629071463296, "grad_norm": 1.6551859157115432, "learning_rate": 7.554329814388418e-08, "loss": 0.4312477707862854, "step": 7923 }, { "epoch": 1.9261059795819153, "grad_norm": 1.7320783178686114, "learning_rate": 7.505097539810546e-08, "loss": 0.5994080901145935, "step": 7924 }, { "epoch": 1.926349052017501, "grad_norm": 1.5652359597288825, "learning_rate": 7.456025611525253e-08, "loss": 0.6063407063484192, "step": 7925 }, { "epoch": 1.926592124453087, "grad_norm": 1.9464216127100558, "learning_rate": 7.407114037460639e-08, "loss": 0.7578713893890381, "step": 7926 }, { "epoch": 1.9268351968886728, "grad_norm": 1.5670955925776064, "learning_rate": 7.358362825518717e-08, "loss": 0.6088755130767822, "step": 7927 }, { "epoch": 1.9270782693242587, "grad_norm": 1.567783327969289, "learning_rate": 7.309771983575632e-08, "loss": 0.6491324305534363, "step": 7928 }, { "epoch": 1.9273213417598445, "grad_norm": 1.4280099155685106, "learning_rate": 7.261341519481768e-08, "loss": 0.541689932346344, "step": 7929 }, { "epoch": 1.9275644141954302, "grad_norm": 1.5988504713727918, "learning_rate": 7.213071441061425e-08, "loss": 0.550780177116394, "step": 7930 }, { "epoch": 1.927807486631016, "grad_norm": 1.5443476698131455, "learning_rate": 7.164961756113031e-08, "loss": 0.49613553285598755, "step": 7931 }, { "epoch": 1.9280505590666017, "grad_norm": 1.5519577483385147, "learning_rate": 7.117012472409257e-08, "loss": 0.4044298231601715, "step": 7932 }, { "epoch": 1.9282936315021877, "grad_norm": 1.4455761968510126, "learning_rate": 7.069223597696572e-08, "loss": 0.4667312502861023, "step": 7933 }, { "epoch": 1.9285367039377734, "grad_norm": 1.8267810331752599, "learning_rate": 7.021595139695914e-08, "loss": 0.5031038522720337, "step": 7934 }, { "epoch": 1.9287797763733594, "grad_norm": 1.523025815999097, "learning_rate": 6.974127106101792e-08, "loss": 0.5720036029815674, "step": 7935 }, { "epoch": 1.9290228488089451, "grad_norm": 1.6607201090707235, "learning_rate": 6.926819504583293e-08, "loss": 0.6124526262283325, "step": 7936 }, { "epoch": 1.929265921244531, "grad_norm": 1.7373578855207343, "learning_rate": 6.879672342783305e-08, "loss": 0.7157636880874634, "step": 7937 }, { "epoch": 1.9295089936801166, "grad_norm": 1.6116812655386006, "learning_rate": 6.832685628318847e-08, "loss": 0.48646029829978943, "step": 7938 }, { "epoch": 1.9297520661157024, "grad_norm": 1.6966978084137374, "learning_rate": 6.785859368781066e-08, "loss": 0.4770312011241913, "step": 7939 }, { "epoch": 1.9299951385512881, "grad_norm": 1.7096140771631787, "learning_rate": 6.739193571735137e-08, "loss": 0.5382134318351746, "step": 7940 }, { "epoch": 1.930238210986874, "grad_norm": 1.5127973095404645, "learning_rate": 6.692688244720357e-08, "loss": 0.5130734443664551, "step": 7941 }, { "epoch": 1.93048128342246, "grad_norm": 1.78586247869007, "learning_rate": 6.646343395250054e-08, "loss": 0.6499456167221069, "step": 7942 }, { "epoch": 1.9307243558580458, "grad_norm": 1.5166264730235963, "learning_rate": 6.600159030811682e-08, "loss": 0.4999987483024597, "step": 7943 }, { "epoch": 1.9309674282936315, "grad_norm": 1.6985889003290733, "learning_rate": 6.554135158866825e-08, "loss": 0.5581475496292114, "step": 7944 }, { "epoch": 1.9312105007292173, "grad_norm": 1.5616489765747004, "learning_rate": 6.508271786850762e-08, "loss": 0.577781081199646, "step": 7945 }, { "epoch": 1.931453573164803, "grad_norm": 1.6175005006361787, "learning_rate": 6.46256892217323e-08, "loss": 0.6217760443687439, "step": 7946 }, { "epoch": 1.9316966456003888, "grad_norm": 1.7058334083127669, "learning_rate": 6.417026572218099e-08, "loss": 0.5909137725830078, "step": 7947 }, { "epoch": 1.9319397180359748, "grad_norm": 1.5672193540099315, "learning_rate": 6.371644744343042e-08, "loss": 0.5671443343162537, "step": 7948 }, { "epoch": 1.9321827904715605, "grad_norm": 1.8264826327825745, "learning_rate": 6.326423445879748e-08, "loss": 0.614419162273407, "step": 7949 }, { "epoch": 1.9324258629071465, "grad_norm": 1.5461022446817498, "learning_rate": 6.281362684134373e-08, "loss": 0.5486583113670349, "step": 7950 }, { "epoch": 1.9326689353427322, "grad_norm": 1.7105509120170317, "learning_rate": 6.236462466386651e-08, "loss": 0.5263485908508301, "step": 7951 }, { "epoch": 1.932912007778318, "grad_norm": 2.022751441252122, "learning_rate": 6.191722799890776e-08, "loss": 0.5123140215873718, "step": 7952 }, { "epoch": 1.9331550802139037, "grad_norm": 1.796469284618645, "learning_rate": 6.147143691874746e-08, "loss": 0.5958017110824585, "step": 7953 }, { "epoch": 1.9333981526494894, "grad_norm": 1.6734380653286738, "learning_rate": 6.102725149540689e-08, "loss": 0.6394611597061157, "step": 7954 }, { "epoch": 1.9336412250850752, "grad_norm": 1.79479093520703, "learning_rate": 6.058467180064864e-08, "loss": 0.7065525650978088, "step": 7955 }, { "epoch": 1.9338842975206612, "grad_norm": 1.8560882862070975, "learning_rate": 6.014369790597552e-08, "loss": 0.5571230053901672, "step": 7956 }, { "epoch": 1.9341273699562471, "grad_norm": 1.7630482838216488, "learning_rate": 5.970432988263053e-08, "loss": 0.6030617952346802, "step": 7957 }, { "epoch": 1.9343704423918329, "grad_norm": 1.468318107458492, "learning_rate": 5.92665678015969e-08, "loss": 0.5067697763442993, "step": 7958 }, { "epoch": 1.9346135148274186, "grad_norm": 1.659198517953283, "learning_rate": 5.883041173360027e-08, "loss": 0.6058977246284485, "step": 7959 }, { "epoch": 1.9348565872630044, "grad_norm": 1.6390612699131113, "learning_rate": 5.839586174910317e-08, "loss": 0.5550916194915771, "step": 7960 }, { "epoch": 1.93509965969859, "grad_norm": 1.4096846332239537, "learning_rate": 5.796291791831388e-08, "loss": 0.48672211170196533, "step": 7961 }, { "epoch": 1.9353427321341758, "grad_norm": 1.7502068388488512, "learning_rate": 5.7531580311176446e-08, "loss": 0.6279679536819458, "step": 7962 }, { "epoch": 1.9355858045697618, "grad_norm": 1.6274823184212908, "learning_rate": 5.7101848997376255e-08, "loss": 0.536941647529602, "step": 7963 }, { "epoch": 1.9358288770053476, "grad_norm": 1.4605944917534976, "learning_rate": 5.66737240463433e-08, "loss": 0.5373420715332031, "step": 7964 }, { "epoch": 1.9360719494409335, "grad_norm": 1.6696257244590091, "learning_rate": 5.624720552724228e-08, "loss": 0.5443302989006042, "step": 7965 }, { "epoch": 1.9363150218765193, "grad_norm": 1.7896064194606252, "learning_rate": 5.5822293508982494e-08, "loss": 0.5848689675331116, "step": 7966 }, { "epoch": 1.936558094312105, "grad_norm": 1.3778762251798187, "learning_rate": 5.5398988060211266e-08, "loss": 0.4382008910179138, "step": 7967 }, { "epoch": 1.9368011667476908, "grad_norm": 1.6090872816260822, "learning_rate": 5.497728924931833e-08, "loss": 0.48486748337745667, "step": 7968 }, { "epoch": 1.9370442391832765, "grad_norm": 1.7224715706589186, "learning_rate": 5.455719714443253e-08, "loss": 0.4609367251396179, "step": 7969 }, { "epoch": 1.9372873116188625, "grad_norm": 1.7718586389968056, "learning_rate": 5.4138711813422895e-08, "loss": 0.6235551834106445, "step": 7970 }, { "epoch": 1.9375303840544482, "grad_norm": 1.60185572569191, "learning_rate": 5.372183332389869e-08, "loss": 0.5649920105934143, "step": 7971 }, { "epoch": 1.9377734564900342, "grad_norm": 1.7820953197621148, "learning_rate": 5.330656174321158e-08, "loss": 0.6266863942146301, "step": 7972 }, { "epoch": 1.93801652892562, "grad_norm": 1.885326396435473, "learning_rate": 5.289289713845347e-08, "loss": 0.5801906585693359, "step": 7973 }, { "epoch": 1.9382596013612057, "grad_norm": 1.613432603702149, "learning_rate": 5.248083957645311e-08, "loss": 0.5726218223571777, "step": 7974 }, { "epoch": 1.9385026737967914, "grad_norm": 1.6359592004588381, "learning_rate": 5.207038912378282e-08, "loss": 0.6533780097961426, "step": 7975 }, { "epoch": 1.9387457462323772, "grad_norm": 1.6273423570185526, "learning_rate": 5.166154584675509e-08, "loss": 0.489124596118927, "step": 7976 }, { "epoch": 1.938988818667963, "grad_norm": 1.6577385030407006, "learning_rate": 5.1254309811421544e-08, "loss": 0.5937591791152954, "step": 7977 }, { "epoch": 1.9392318911035489, "grad_norm": 1.7798223997567588, "learning_rate": 5.08486810835751e-08, "loss": 0.6749411821365356, "step": 7978 }, { "epoch": 1.9394749635391346, "grad_norm": 1.8167711663266874, "learning_rate": 5.0444659728747794e-08, "loss": 0.5938769578933716, "step": 7979 }, { "epoch": 1.9397180359747206, "grad_norm": 1.9934246298015534, "learning_rate": 5.0042245812215176e-08, "loss": 0.5795644521713257, "step": 7980 }, { "epoch": 1.9399611084103063, "grad_norm": 1.5001689728944123, "learning_rate": 4.964143939898747e-08, "loss": 0.4639715254306793, "step": 7981 }, { "epoch": 1.940204180845892, "grad_norm": 1.7239314943575188, "learning_rate": 4.924224055382176e-08, "loss": 0.6922030448913574, "step": 7982 }, { "epoch": 1.9404472532814778, "grad_norm": 1.7165015723221706, "learning_rate": 4.88446493412098e-08, "loss": 0.5267813205718994, "step": 7983 }, { "epoch": 1.9406903257170636, "grad_norm": 1.8494884742373676, "learning_rate": 4.8448665825386876e-08, "loss": 0.7038043737411499, "step": 7984 }, { "epoch": 1.9409333981526495, "grad_norm": 1.6786698131815128, "learning_rate": 4.805429007032625e-08, "loss": 0.458914577960968, "step": 7985 }, { "epoch": 1.9411764705882353, "grad_norm": 1.6377008269689217, "learning_rate": 4.7661522139745845e-08, "loss": 0.41276681423187256, "step": 7986 }, { "epoch": 1.9414195430238212, "grad_norm": 1.9247273784081467, "learning_rate": 4.727036209709712e-08, "loss": 0.4643521308898926, "step": 7987 }, { "epoch": 1.941662615459407, "grad_norm": 1.6727012668951746, "learning_rate": 4.688081000557843e-08, "loss": 0.5868631601333618, "step": 7988 }, { "epoch": 1.9419056878949927, "grad_norm": 1.563909869999048, "learning_rate": 4.6492865928123856e-08, "loss": 0.6089300513267517, "step": 7989 }, { "epoch": 1.9421487603305785, "grad_norm": 1.7235118685496313, "learning_rate": 4.6106529927408826e-08, "loss": 0.5506345629692078, "step": 7990 }, { "epoch": 1.9423918327661642, "grad_norm": 1.6761769021294368, "learning_rate": 4.5721802065851194e-08, "loss": 0.5011916756629944, "step": 7991 }, { "epoch": 1.94263490520175, "grad_norm": 1.898023137497349, "learning_rate": 4.533868240560457e-08, "loss": 0.5232019424438477, "step": 7992 }, { "epoch": 1.942877977637336, "grad_norm": 1.4028604289573157, "learning_rate": 4.4957171008568336e-08, "loss": 0.5309011936187744, "step": 7993 }, { "epoch": 1.9431210500729217, "grad_norm": 1.8622002630557832, "learning_rate": 4.45772679363754e-08, "loss": 0.5101293921470642, "step": 7994 }, { "epoch": 1.9433641225085077, "grad_norm": 1.3996524202111835, "learning_rate": 4.419897325040556e-08, "loss": 0.4267985224723816, "step": 7995 }, { "epoch": 1.9436071949440934, "grad_norm": 1.5667694791469615, "learning_rate": 4.3822287011774376e-08, "loss": 0.4599170982837677, "step": 7996 }, { "epoch": 1.9438502673796791, "grad_norm": 1.7639629605688272, "learning_rate": 4.344720928133761e-08, "loss": 0.68778395652771, "step": 7997 }, { "epoch": 1.9440933398152649, "grad_norm": 1.651582240830553, "learning_rate": 4.307374011969567e-08, "loss": 0.4134146571159363, "step": 7998 }, { "epoch": 1.9443364122508506, "grad_norm": 1.6705086625835153, "learning_rate": 4.270187958718253e-08, "loss": 0.6147234439849854, "step": 7999 }, { "epoch": 1.9445794846864366, "grad_norm": 1.4860096332602857, "learning_rate": 4.233162774387678e-08, "loss": 0.5465489029884338, "step": 8000 }, { "epoch": 1.9448225571220223, "grad_norm": 1.5730141992969198, "learning_rate": 4.1962984649596137e-08, "loss": 0.6280418038368225, "step": 8001 }, { "epoch": 1.9450656295576083, "grad_norm": 1.7044307079167818, "learning_rate": 4.159595036389852e-08, "loss": 0.4733526408672333, "step": 8002 }, { "epoch": 1.945308701993194, "grad_norm": 1.4526452076959524, "learning_rate": 4.123052494607982e-08, "loss": 0.6163961887359619, "step": 8003 }, { "epoch": 1.9455517744287798, "grad_norm": 1.6610906340986502, "learning_rate": 4.086670845517948e-08, "loss": 0.5681342482566833, "step": 8004 }, { "epoch": 1.9457948468643655, "grad_norm": 2.016579335650003, "learning_rate": 4.0504500949974936e-08, "loss": 0.6314210295677185, "step": 8005 }, { "epoch": 1.9460379192999513, "grad_norm": 1.6253244465184962, "learning_rate": 4.01439024889827e-08, "loss": 0.6224822998046875, "step": 8006 }, { "epoch": 1.946280991735537, "grad_norm": 1.8335522324971083, "learning_rate": 3.978491313046173e-08, "loss": 0.4971364736557007, "step": 8007 }, { "epoch": 1.946524064171123, "grad_norm": 1.8629280156713137, "learning_rate": 3.942753293241009e-08, "loss": 0.6026595830917358, "step": 8008 }, { "epoch": 1.9467671366067087, "grad_norm": 1.5399239385518186, "learning_rate": 3.9071761952566014e-08, "loss": 0.4900486469268799, "step": 8009 }, { "epoch": 1.9470102090422947, "grad_norm": 1.6364186560934466, "learning_rate": 3.871760024840576e-08, "loss": 0.5796183347702026, "step": 8010 }, { "epoch": 1.9472532814778805, "grad_norm": 1.8269435259680415, "learning_rate": 3.836504787714912e-08, "loss": 0.6999501585960388, "step": 8011 }, { "epoch": 1.9474963539134662, "grad_norm": 1.9516096926429396, "learning_rate": 3.8014104895752746e-08, "loss": 0.5851176381111145, "step": 8012 }, { "epoch": 1.947739426349052, "grad_norm": 1.6245514430595729, "learning_rate": 3.766477136091573e-08, "loss": 0.5548041462898254, "step": 8013 }, { "epoch": 1.9479824987846377, "grad_norm": 2.123470982063871, "learning_rate": 3.731704732907404e-08, "loss": 0.5671961307525635, "step": 8014 }, { "epoch": 1.9482255712202237, "grad_norm": 1.4614232024279667, "learning_rate": 3.697093285640829e-08, "loss": 0.4963279366493225, "step": 8015 }, { "epoch": 1.9484686436558094, "grad_norm": 1.7359046997167205, "learning_rate": 3.662642799883376e-08, "loss": 0.5257710814476013, "step": 8016 }, { "epoch": 1.9487117160913954, "grad_norm": 1.7110354587862144, "learning_rate": 3.628353281201036e-08, "loss": 0.663546621799469, "step": 8017 }, { "epoch": 1.9489547885269811, "grad_norm": 1.9155825711964847, "learning_rate": 3.594224735133378e-08, "loss": 0.5878391265869141, "step": 8018 }, { "epoch": 1.9491978609625669, "grad_norm": 1.634387708420559, "learning_rate": 3.560257167194325e-08, "loss": 0.5225180387496948, "step": 8019 }, { "epoch": 1.9494409333981526, "grad_norm": 1.872529648777762, "learning_rate": 3.526450582871599e-08, "loss": 0.6309431791305542, "step": 8020 }, { "epoch": 1.9496840058337384, "grad_norm": 1.8110298080330942, "learning_rate": 3.49280498762683e-08, "loss": 0.6860992908477783, "step": 8021 }, { "epoch": 1.949927078269324, "grad_norm": 1.4781396068686063, "learning_rate": 3.4593203868960036e-08, "loss": 0.6619086265563965, "step": 8022 }, { "epoch": 1.95017015070491, "grad_norm": 1.527817685488595, "learning_rate": 3.425996786088681e-08, "loss": 0.5759109258651733, "step": 8023 }, { "epoch": 1.950413223140496, "grad_norm": 1.6943377664583148, "learning_rate": 3.392834190588556e-08, "loss": 0.6422957181930542, "step": 8024 }, { "epoch": 1.9506562955760818, "grad_norm": 1.4357798362239498, "learning_rate": 3.3598326057534546e-08, "loss": 0.43303847312927246, "step": 8025 }, { "epoch": 1.9508993680116675, "grad_norm": 1.557783640659279, "learning_rate": 3.326992036915e-08, "loss": 0.5504913330078125, "step": 8026 }, { "epoch": 1.9511424404472533, "grad_norm": 1.4520461868917287, "learning_rate": 3.294312489378948e-08, "loss": 0.6208460330963135, "step": 8027 }, { "epoch": 1.951385512882839, "grad_norm": 1.5471599374677962, "learning_rate": 3.261793968424853e-08, "loss": 0.46562930941581726, "step": 8028 }, { "epoch": 1.9516285853184248, "grad_norm": 1.7164530919643382, "learning_rate": 3.2294364793064026e-08, "loss": 0.5011501908302307, "step": 8029 }, { "epoch": 1.9518716577540107, "grad_norm": 1.9686721350405991, "learning_rate": 3.1972400272513023e-08, "loss": 0.4784110188484192, "step": 8030 }, { "epoch": 1.9521147301895965, "grad_norm": 1.8589146259420957, "learning_rate": 3.1652046174611705e-08, "loss": 0.5602524280548096, "step": 8031 }, { "epoch": 1.9523578026251824, "grad_norm": 1.8144279690273548, "learning_rate": 3.133330255111533e-08, "loss": 0.7297102212905884, "step": 8032 }, { "epoch": 1.9526008750607682, "grad_norm": 1.571220246280546, "learning_rate": 3.101616945352048e-08, "loss": 0.6647676229476929, "step": 8033 }, { "epoch": 1.952843947496354, "grad_norm": 1.4637815958989284, "learning_rate": 3.070064693306174e-08, "loss": 0.6496282815933228, "step": 8034 }, { "epoch": 1.9530870199319397, "grad_norm": 1.8090019997460673, "learning_rate": 3.0386735040714985e-08, "loss": 0.40399229526519775, "step": 8035 }, { "epoch": 1.9533300923675254, "grad_norm": 1.54180135965365, "learning_rate": 3.0074433827196327e-08, "loss": 0.5491804480552673, "step": 8036 }, { "epoch": 1.9535731648031112, "grad_norm": 1.6219155550487192, "learning_rate": 2.976374334295984e-08, "loss": 0.5165129899978638, "step": 8037 }, { "epoch": 1.9538162372386971, "grad_norm": 1.5277366852648029, "learning_rate": 2.945466363820093e-08, "loss": 0.5446646213531494, "step": 8038 }, { "epoch": 1.954059309674283, "grad_norm": 1.5604814905085784, "learning_rate": 2.9147194762854103e-08, "loss": 0.43082207441329956, "step": 8039 }, { "epoch": 1.9543023821098688, "grad_norm": 1.9069869274029336, "learning_rate": 2.8841336766592953e-08, "loss": 0.5309962034225464, "step": 8040 }, { "epoch": 1.9545454545454546, "grad_norm": 1.6847126633840415, "learning_rate": 2.8537089698832398e-08, "loss": 0.4580988883972168, "step": 8041 }, { "epoch": 1.9547885269810403, "grad_norm": 1.5845108546610218, "learning_rate": 2.823445360872534e-08, "loss": 0.5015397071838379, "step": 8042 }, { "epoch": 1.955031599416626, "grad_norm": 1.5314022114459527, "learning_rate": 2.793342854516601e-08, "loss": 0.4176642894744873, "step": 8043 }, { "epoch": 1.9552746718522118, "grad_norm": 1.748061168886993, "learning_rate": 2.7634014556786605e-08, "loss": 0.6200051307678223, "step": 8044 }, { "epoch": 1.9555177442877978, "grad_norm": 1.701468228061779, "learning_rate": 2.7336211691961767e-08, "loss": 0.547723650932312, "step": 8045 }, { "epoch": 1.9557608167233835, "grad_norm": 1.8810229318442677, "learning_rate": 2.7040019998803012e-08, "loss": 0.5184592604637146, "step": 8046 }, { "epoch": 1.9560038891589695, "grad_norm": 1.5436329391764927, "learning_rate": 2.6745439525163173e-08, "loss": 0.47843310236930847, "step": 8047 }, { "epoch": 1.9562469615945552, "grad_norm": 1.66427396095495, "learning_rate": 2.6452470318634182e-08, "loss": 0.5432286262512207, "step": 8048 }, { "epoch": 1.956490034030141, "grad_norm": 1.7000256783004493, "learning_rate": 2.6161112426548174e-08, "loss": 0.5357005596160889, "step": 8049 }, { "epoch": 1.9567331064657267, "grad_norm": 1.7558337445802705, "learning_rate": 2.5871365895975276e-08, "loss": 0.607999861240387, "step": 8050 }, { "epoch": 1.9569761789013125, "grad_norm": 1.6115206081911637, "learning_rate": 2.558323077372804e-08, "loss": 0.5714758634567261, "step": 8051 }, { "epoch": 1.9572192513368984, "grad_norm": 1.8983912579052038, "learning_rate": 2.529670710635701e-08, "loss": 0.504132091999054, "step": 8052 }, { "epoch": 1.9574623237724842, "grad_norm": 1.699403123602557, "learning_rate": 2.5011794940150713e-08, "loss": 0.5047479271888733, "step": 8053 }, { "epoch": 1.9577053962080702, "grad_norm": 1.6702723979235696, "learning_rate": 2.4728494321142328e-08, "loss": 0.6837470531463623, "step": 8054 }, { "epoch": 1.957948468643656, "grad_norm": 1.6083531102425306, "learning_rate": 2.4446805295099686e-08, "loss": 0.5068359971046448, "step": 8055 }, { "epoch": 1.9581915410792416, "grad_norm": 1.7523678423173754, "learning_rate": 2.416672790753305e-08, "loss": 0.5186532139778137, "step": 8056 }, { "epoch": 1.9584346135148274, "grad_norm": 1.7097928354789924, "learning_rate": 2.388826220368956e-08, "loss": 0.6049820780754089, "step": 8057 }, { "epoch": 1.9586776859504131, "grad_norm": 1.5960969318581635, "learning_rate": 2.3611408228559896e-08, "loss": 0.6304708123207092, "step": 8058 }, { "epoch": 1.9589207583859989, "grad_norm": 1.798165468251264, "learning_rate": 2.333616602687161e-08, "loss": 0.5485708117485046, "step": 8059 }, { "epoch": 1.9591638308215849, "grad_norm": 1.7994804792268295, "learning_rate": 2.3062535643091356e-08, "loss": 0.5138549208641052, "step": 8060 }, { "epoch": 1.9594069032571706, "grad_norm": 1.7556042626262376, "learning_rate": 2.2790517121428212e-08, "loss": 0.5280382633209229, "step": 8061 }, { "epoch": 1.9596499756927566, "grad_norm": 1.7957160200975795, "learning_rate": 2.2520110505828142e-08, "loss": 0.4938809275627136, "step": 8062 }, { "epoch": 1.9598930481283423, "grad_norm": 1.4733509842707337, "learning_rate": 2.2251315839978416e-08, "loss": 0.5245448350906372, "step": 8063 }, { "epoch": 1.960136120563928, "grad_norm": 1.482497692871673, "learning_rate": 2.19841331673043e-08, "loss": 0.5479950904846191, "step": 8064 }, { "epoch": 1.9603791929995138, "grad_norm": 1.594562768399401, "learning_rate": 2.1718562530972377e-08, "loss": 0.4927760362625122, "step": 8065 }, { "epoch": 1.9606222654350995, "grad_norm": 1.6976482880889545, "learning_rate": 2.145460397388721e-08, "loss": 0.39044421911239624, "step": 8066 }, { "epoch": 1.9608653378706855, "grad_norm": 1.4871297988675842, "learning_rate": 2.1192257538694695e-08, "loss": 0.49693381786346436, "step": 8067 }, { "epoch": 1.9611084103062713, "grad_norm": 1.7005285556066914, "learning_rate": 2.0931523267777587e-08, "loss": 0.6872067451477051, "step": 8068 }, { "epoch": 1.9613514827418572, "grad_norm": 1.5287832409984519, "learning_rate": 2.0672401203261084e-08, "loss": 0.5290889739990234, "step": 8069 }, { "epoch": 1.961594555177443, "grad_norm": 1.6041965051283014, "learning_rate": 2.041489138700836e-08, "loss": 0.5769343376159668, "step": 8070 }, { "epoch": 1.9618376276130287, "grad_norm": 1.5352549754427842, "learning_rate": 2.01589938606217e-08, "loss": 0.5471999645233154, "step": 8071 }, { "epoch": 1.9620807000486145, "grad_norm": 1.608725980023436, "learning_rate": 1.9904708665444695e-08, "loss": 0.7289726734161377, "step": 8072 }, { "epoch": 1.9623237724842002, "grad_norm": 1.708038741406979, "learning_rate": 1.9652035842558927e-08, "loss": 0.5774322748184204, "step": 8073 }, { "epoch": 1.962566844919786, "grad_norm": 1.6305208521496481, "learning_rate": 1.9400975432785074e-08, "loss": 0.6361749768257141, "step": 8074 }, { "epoch": 1.962809917355372, "grad_norm": 1.6582307270021, "learning_rate": 1.915152747668514e-08, "loss": 0.5289106965065002, "step": 8075 }, { "epoch": 1.9630529897909577, "grad_norm": 1.484386448539513, "learning_rate": 1.8903692014560215e-08, "loss": 0.45020824670791626, "step": 8076 }, { "epoch": 1.9632960622265436, "grad_norm": 1.974376392020929, "learning_rate": 1.8657469086448275e-08, "loss": 0.5678034424781799, "step": 8077 }, { "epoch": 1.9635391346621294, "grad_norm": 1.5734569134477319, "learning_rate": 1.8412858732130835e-08, "loss": 0.6024196147918701, "step": 8078 }, { "epoch": 1.9637822070977151, "grad_norm": 1.6686520859841651, "learning_rate": 1.8169860991126276e-08, "loss": 0.644067645072937, "step": 8079 }, { "epoch": 1.9640252795333009, "grad_norm": 1.6127019757627623, "learning_rate": 1.7928475902692087e-08, "loss": 0.5179576277732849, "step": 8080 }, { "epoch": 1.9642683519688866, "grad_norm": 1.6537369692593222, "learning_rate": 1.7688703505827077e-08, "loss": 0.5802712440490723, "step": 8081 }, { "epoch": 1.9645114244044726, "grad_norm": 1.6638485132796517, "learning_rate": 1.745054383926914e-08, "loss": 0.6732298135757446, "step": 8082 }, { "epoch": 1.9647544968400583, "grad_norm": 1.8757297927069114, "learning_rate": 1.7213996941493062e-08, "loss": 0.6222912073135376, "step": 8083 }, { "epoch": 1.9649975692756443, "grad_norm": 1.4417767732289641, "learning_rate": 1.6979062850717153e-08, "loss": 0.665208101272583, "step": 8084 }, { "epoch": 1.96524064171123, "grad_norm": 1.5637509017466045, "learning_rate": 1.6745741604895503e-08, "loss": 0.6373528242111206, "step": 8085 }, { "epoch": 1.9654837141468158, "grad_norm": 1.6993070062695845, "learning_rate": 1.6514033241724627e-08, "loss": 0.6211100816726685, "step": 8086 }, { "epoch": 1.9657267865824015, "grad_norm": 1.7246966522057285, "learning_rate": 1.6283937798637907e-08, "loss": 0.6089982390403748, "step": 8087 }, { "epoch": 1.9659698590179873, "grad_norm": 1.6407176893189308, "learning_rate": 1.6055455312810055e-08, "loss": 0.40608176589012146, "step": 8088 }, { "epoch": 1.966212931453573, "grad_norm": 1.747468013357729, "learning_rate": 1.5828585821153764e-08, "loss": 0.5538586974143982, "step": 8089 }, { "epoch": 1.966456003889159, "grad_norm": 1.7758721050792554, "learning_rate": 1.560332936032305e-08, "loss": 0.5172138810157776, "step": 8090 }, { "epoch": 1.9666990763247447, "grad_norm": 1.6340267945399696, "learning_rate": 1.53796859667088e-08, "loss": 0.5685547590255737, "step": 8091 }, { "epoch": 1.9669421487603307, "grad_norm": 1.3946661000204428, "learning_rate": 1.5157655676442118e-08, "loss": 0.4844553470611572, "step": 8092 }, { "epoch": 1.9671852211959164, "grad_norm": 1.4286458109689997, "learning_rate": 1.4937238525395416e-08, "loss": 0.5016804933547974, "step": 8093 }, { "epoch": 1.9674282936315022, "grad_norm": 1.5907997476922708, "learning_rate": 1.4718434549179095e-08, "loss": 0.5435065031051636, "step": 8094 }, { "epoch": 1.967671366067088, "grad_norm": 1.4563904663288978, "learning_rate": 1.450124378314155e-08, "loss": 0.5788138508796692, "step": 8095 }, { "epoch": 1.9679144385026737, "grad_norm": 1.49362758390397, "learning_rate": 1.4285666262372488e-08, "loss": 0.5771435499191284, "step": 8096 }, { "epoch": 1.9681575109382596, "grad_norm": 1.6761438766349817, "learning_rate": 1.4071702021700717e-08, "loss": 0.6671679019927979, "step": 8097 }, { "epoch": 1.9684005833738454, "grad_norm": 1.6214067618027603, "learning_rate": 1.385935109569303e-08, "loss": 0.619156002998352, "step": 8098 }, { "epoch": 1.9686436558094313, "grad_norm": 1.7172324877211715, "learning_rate": 1.3648613518657538e-08, "loss": 0.5151993036270142, "step": 8099 }, { "epoch": 1.968886728245017, "grad_norm": 1.8148482477465298, "learning_rate": 1.3439489324640343e-08, "loss": 0.5225537419319153, "step": 8100 }, { "epoch": 1.9691298006806028, "grad_norm": 1.483435908507282, "learning_rate": 1.3231978547427749e-08, "loss": 0.4875636100769043, "step": 8101 }, { "epoch": 1.9693728731161886, "grad_norm": 1.7287778288746327, "learning_rate": 1.3026081220544051e-08, "loss": 0.5741934776306152, "step": 8102 }, { "epoch": 1.9696159455517743, "grad_norm": 1.4228475218536998, "learning_rate": 1.2821797377254863e-08, "loss": 0.49031782150268555, "step": 8103 }, { "epoch": 1.96985901798736, "grad_norm": 1.4002344563916875, "learning_rate": 1.2619127050562673e-08, "loss": 0.49544060230255127, "step": 8104 }, { "epoch": 1.970102090422946, "grad_norm": 1.7386985216792281, "learning_rate": 1.2418070273212401e-08, "loss": 0.40391069650650024, "step": 8105 }, { "epoch": 1.970345162858532, "grad_norm": 1.4997886668909146, "learning_rate": 1.221862707768584e-08, "loss": 0.4708350896835327, "step": 8106 }, { "epoch": 1.9705882352941178, "grad_norm": 1.7990350942824327, "learning_rate": 1.2020797496203885e-08, "loss": 0.6768019795417786, "step": 8107 }, { "epoch": 1.9708313077297035, "grad_norm": 1.4048983035767633, "learning_rate": 1.1824581560728742e-08, "loss": 0.5094609260559082, "step": 8108 }, { "epoch": 1.9710743801652892, "grad_norm": 1.5236724665668613, "learning_rate": 1.1629979302960614e-08, "loss": 0.43947410583496094, "step": 8109 }, { "epoch": 1.971317452600875, "grad_norm": 1.411499664038373, "learning_rate": 1.1436990754338795e-08, "loss": 0.46635138988494873, "step": 8110 }, { "epoch": 1.9715605250364607, "grad_norm": 1.654578634208634, "learning_rate": 1.1245615946041677e-08, "loss": 0.4965408444404602, "step": 8111 }, { "epoch": 1.9718035974720467, "grad_norm": 1.4855133807567658, "learning_rate": 1.1055854908990082e-08, "loss": 0.44376879930496216, "step": 8112 }, { "epoch": 1.9720466699076324, "grad_norm": 1.6812491833769823, "learning_rate": 1.086770767383838e-08, "loss": 0.5433126091957092, "step": 8113 }, { "epoch": 1.9722897423432184, "grad_norm": 1.8523126489031145, "learning_rate": 1.0681174270985584e-08, "loss": 0.45546701550483704, "step": 8114 }, { "epoch": 1.9725328147788042, "grad_norm": 1.7706084494751297, "learning_rate": 1.049625473056759e-08, "loss": 0.6078514456748962, "step": 8115 }, { "epoch": 1.97277588721439, "grad_norm": 1.6095698138234507, "learning_rate": 1.0312949082459388e-08, "loss": 0.577741265296936, "step": 8116 }, { "epoch": 1.9730189596499756, "grad_norm": 1.606872914473081, "learning_rate": 1.013125735627507e-08, "loss": 0.5845562219619751, "step": 8117 }, { "epoch": 1.9732620320855614, "grad_norm": 1.5453898248767979, "learning_rate": 9.95117958136893e-09, "loss": 0.531240701675415, "step": 8118 }, { "epoch": 1.9735051045211471, "grad_norm": 1.7372868493306861, "learning_rate": 9.772715786834363e-09, "loss": 0.648520827293396, "step": 8119 }, { "epoch": 1.973748176956733, "grad_norm": 1.7519688901974426, "learning_rate": 9.595866001502751e-09, "loss": 0.7140178680419922, "step": 8120 }, { "epoch": 1.973991249392319, "grad_norm": 1.5771985847178143, "learning_rate": 9.420630253947904e-09, "loss": 0.606143593788147, "step": 8121 }, { "epoch": 1.9742343218279048, "grad_norm": 1.5319960475076595, "learning_rate": 9.2470085724794e-09, "loss": 0.5062379837036133, "step": 8122 }, { "epoch": 1.9744773942634906, "grad_norm": 1.79404270946258, "learning_rate": 9.07500098514702e-09, "loss": 0.5483251810073853, "step": 8123 }, { "epoch": 1.9747204666990763, "grad_norm": 2.0007494526365295, "learning_rate": 8.90460751973965e-09, "loss": 0.653238832950592, "step": 8124 }, { "epoch": 1.974963539134662, "grad_norm": 1.9799508032022113, "learning_rate": 8.735828203787489e-09, "loss": 0.5481163263320923, "step": 8125 }, { "epoch": 1.9752066115702478, "grad_norm": 1.8355432269849097, "learning_rate": 8.568663064557614e-09, "loss": 0.6336281895637512, "step": 8126 }, { "epoch": 1.9754496840058338, "grad_norm": 1.6600173328732548, "learning_rate": 8.403112129056202e-09, "loss": 0.5268070101737976, "step": 8127 }, { "epoch": 1.9756927564414195, "grad_norm": 1.752091260775063, "learning_rate": 8.239175424030743e-09, "loss": 0.6086564064025879, "step": 8128 }, { "epoch": 1.9759358288770055, "grad_norm": 1.5270424717757485, "learning_rate": 8.07685297596561e-09, "loss": 0.39888665080070496, "step": 8129 }, { "epoch": 1.9761789013125912, "grad_norm": 1.7425026270128328, "learning_rate": 7.916144811086491e-09, "loss": 0.6250603199005127, "step": 8130 }, { "epoch": 1.976421973748177, "grad_norm": 1.618326214266157, "learning_rate": 7.75705095535595e-09, "loss": 0.6448434591293335, "step": 8131 }, { "epoch": 1.9766650461837627, "grad_norm": 1.6786491463231157, "learning_rate": 7.59957143447787e-09, "loss": 0.5650820136070251, "step": 8132 }, { "epoch": 1.9769081186193485, "grad_norm": 1.5631621327216574, "learning_rate": 7.443706273895235e-09, "loss": 0.4242585003376007, "step": 8133 }, { "epoch": 1.9771511910549344, "grad_norm": 2.0111829701009487, "learning_rate": 7.28945549878679e-09, "loss": 0.6832283735275269, "step": 8134 }, { "epoch": 1.9773942634905202, "grad_norm": 1.7508156229261909, "learning_rate": 7.136819134075934e-09, "loss": 0.6147920489311218, "step": 8135 }, { "epoch": 1.9776373359261061, "grad_norm": 1.5969751594632478, "learning_rate": 6.985797204420719e-09, "loss": 0.42715227603912354, "step": 8136 }, { "epoch": 1.9778804083616919, "grad_norm": 1.755314498773543, "learning_rate": 6.836389734220517e-09, "loss": 0.4406201243400574, "step": 8137 }, { "epoch": 1.9781234807972776, "grad_norm": 1.7683280350332422, "learning_rate": 6.688596747613796e-09, "loss": 0.5465303063392639, "step": 8138 }, { "epoch": 1.9783665532328634, "grad_norm": 1.7515101095552073, "learning_rate": 6.542418268477013e-09, "loss": 0.49667221307754517, "step": 8139 }, { "epoch": 1.9786096256684491, "grad_norm": 1.7265631019547638, "learning_rate": 6.397854320426833e-09, "loss": 0.6032174825668335, "step": 8140 }, { "epoch": 1.9788526981040349, "grad_norm": 1.633176403976278, "learning_rate": 6.254904926820127e-09, "loss": 0.5551379919052124, "step": 8141 }, { "epoch": 1.9790957705396208, "grad_norm": 1.7344437106728994, "learning_rate": 6.1135701107495335e-09, "loss": 0.4674392342567444, "step": 8142 }, { "epoch": 1.9793388429752066, "grad_norm": 1.7034971167413246, "learning_rate": 5.973849895051231e-09, "loss": 0.7117341756820679, "step": 8143 }, { "epoch": 1.9795819154107925, "grad_norm": 1.3952784643723328, "learning_rate": 5.835744302296054e-09, "loss": 0.5422139763832092, "step": 8144 }, { "epoch": 1.9798249878463783, "grad_norm": 1.8742562358452448, "learning_rate": 5.699253354797263e-09, "loss": 0.4605628252029419, "step": 8145 }, { "epoch": 1.980068060281964, "grad_norm": 1.769290438625458, "learning_rate": 5.564377074606109e-09, "loss": 0.47448861598968506, "step": 8146 }, { "epoch": 1.9803111327175498, "grad_norm": 1.6911613354730148, "learning_rate": 5.431115483512938e-09, "loss": 0.5412624478340149, "step": 8147 }, { "epoch": 1.9805542051531355, "grad_norm": 1.68090515104248, "learning_rate": 5.299468603047197e-09, "loss": 0.6275195479393005, "step": 8148 }, { "epoch": 1.9807972775887215, "grad_norm": 1.7666380925746032, "learning_rate": 5.169436454478538e-09, "loss": 0.5362234115600586, "step": 8149 }, { "epoch": 1.9810403500243072, "grad_norm": 1.7192893101672095, "learning_rate": 5.04101905881349e-09, "loss": 0.607162356376648, "step": 8150 }, { "epoch": 1.9812834224598932, "grad_norm": 1.7671559902178415, "learning_rate": 4.914216436798791e-09, "loss": 0.5754002928733826, "step": 8151 }, { "epoch": 1.981526494895479, "grad_norm": 2.063547901476875, "learning_rate": 4.789028608922497e-09, "loss": 0.619001567363739, "step": 8152 }, { "epoch": 1.9817695673310647, "grad_norm": 1.7040447806243755, "learning_rate": 4.66545559540732e-09, "loss": 0.5672964453697205, "step": 8153 }, { "epoch": 1.9820126397666504, "grad_norm": 1.3997033559542236, "learning_rate": 4.543497416220622e-09, "loss": 0.525580644607544, "step": 8154 }, { "epoch": 1.9822557122022362, "grad_norm": 1.3907867847814634, "learning_rate": 4.423154091062198e-09, "loss": 0.5107690691947937, "step": 8155 }, { "epoch": 1.982498784637822, "grad_norm": 1.6074509841437488, "learning_rate": 4.304425639377607e-09, "loss": 0.5281432271003723, "step": 8156 }, { "epoch": 1.9827418570734079, "grad_norm": 1.542505938894722, "learning_rate": 4.187312080347061e-09, "loss": 0.6328741908073425, "step": 8157 }, { "epoch": 1.9829849295089936, "grad_norm": 1.6114906809615257, "learning_rate": 4.071813432890981e-09, "loss": 0.4903757572174072, "step": 8158 }, { "epoch": 1.9832280019445796, "grad_norm": 1.6122204231379094, "learning_rate": 3.957929715669995e-09, "loss": 0.5874505043029785, "step": 8159 }, { "epoch": 1.9834710743801653, "grad_norm": 1.4704855193163247, "learning_rate": 3.845660947082719e-09, "loss": 0.5373573303222656, "step": 8160 }, { "epoch": 1.983714146815751, "grad_norm": 1.8327947562514566, "learning_rate": 3.735007145267977e-09, "loss": 0.5453875064849854, "step": 8161 }, { "epoch": 1.9839572192513368, "grad_norm": 1.8220788606608276, "learning_rate": 3.62596832810147e-09, "loss": 0.5328253507614136, "step": 8162 }, { "epoch": 1.9842002916869226, "grad_norm": 2.0115203656458136, "learning_rate": 3.518544513199107e-09, "loss": 0.534184992313385, "step": 8163 }, { "epoch": 1.9844433641225085, "grad_norm": 1.5529153286841983, "learning_rate": 3.412735717918114e-09, "loss": 0.6383777856826782, "step": 8164 }, { "epoch": 1.9846864365580943, "grad_norm": 1.7640031506130385, "learning_rate": 3.3085419593514857e-09, "loss": 0.47655659914016724, "step": 8165 }, { "epoch": 1.9849295089936803, "grad_norm": 1.665367726458886, "learning_rate": 3.205963254333533e-09, "loss": 0.66390061378479, "step": 8166 }, { "epoch": 1.985172581429266, "grad_norm": 1.4000309112092126, "learning_rate": 3.104999619435445e-09, "loss": 0.5635068416595459, "step": 8167 }, { "epoch": 1.9854156538648517, "grad_norm": 1.623858938030555, "learning_rate": 3.005651070968618e-09, "loss": 0.6595260500907898, "step": 8168 }, { "epoch": 1.9856587263004375, "grad_norm": 1.7962115625648474, "learning_rate": 2.9079176249857676e-09, "loss": 0.6058772802352905, "step": 8169 }, { "epoch": 1.9859017987360232, "grad_norm": 1.5517765601753102, "learning_rate": 2.811799297275375e-09, "loss": 0.5757821202278137, "step": 8170 }, { "epoch": 1.986144871171609, "grad_norm": 1.6861184915222933, "learning_rate": 2.7172961033650192e-09, "loss": 0.5037826895713806, "step": 8171 }, { "epoch": 1.986387943607195, "grad_norm": 1.5098413246517635, "learning_rate": 2.6244080585247077e-09, "loss": 0.5383747816085815, "step": 8172 }, { "epoch": 1.9866310160427807, "grad_norm": 1.768480255079785, "learning_rate": 2.533135177760215e-09, "loss": 0.7344633340835571, "step": 8173 }, { "epoch": 1.9868740884783667, "grad_norm": 1.8675134267075726, "learning_rate": 2.443477475817524e-09, "loss": 0.6530161499977112, "step": 8174 }, { "epoch": 1.9871171609139524, "grad_norm": 1.5611397650083907, "learning_rate": 2.3554349671817134e-09, "loss": 0.5963507890701294, "step": 8175 }, { "epoch": 1.9873602333495382, "grad_norm": 1.797298949226432, "learning_rate": 2.269007666075851e-09, "loss": 0.5046417713165283, "step": 8176 }, { "epoch": 1.987603305785124, "grad_norm": 1.7240970888556537, "learning_rate": 2.1841955864643215e-09, "loss": 0.4884135127067566, "step": 8177 }, { "epoch": 1.9878463782207096, "grad_norm": 1.6270439954212172, "learning_rate": 2.1009987420494982e-09, "loss": 0.5335084795951843, "step": 8178 }, { "epoch": 1.9880894506562956, "grad_norm": 1.6292056979056968, "learning_rate": 2.0194171462717406e-09, "loss": 0.538794219493866, "step": 8179 }, { "epoch": 1.9883325230918814, "grad_norm": 2.075796019253046, "learning_rate": 1.9394508123116162e-09, "loss": 0.5251164436340332, "step": 8180 }, { "epoch": 1.9885755955274673, "grad_norm": 1.5549736999774926, "learning_rate": 1.8610997530876806e-09, "loss": 0.43467527627944946, "step": 8181 }, { "epoch": 1.988818667963053, "grad_norm": 1.5989773582318236, "learning_rate": 1.7843639812598068e-09, "loss": 0.5421528816223145, "step": 8182 }, { "epoch": 1.9890617403986388, "grad_norm": 1.7101335462286777, "learning_rate": 1.7092435092236349e-09, "loss": 0.637461245059967, "step": 8183 }, { "epoch": 1.9893048128342246, "grad_norm": 1.5474445548525562, "learning_rate": 1.6357383491161226e-09, "loss": 0.6385377645492554, "step": 8184 }, { "epoch": 1.9895478852698103, "grad_norm": 1.4512982410256856, "learning_rate": 1.5638485128133264e-09, "loss": 0.5563337802886963, "step": 8185 }, { "epoch": 1.989790957705396, "grad_norm": 1.5044253436572812, "learning_rate": 1.4935740119292885e-09, "loss": 0.4418630599975586, "step": 8186 }, { "epoch": 1.990034030140982, "grad_norm": 1.7039025318716723, "learning_rate": 1.4249148578171501e-09, "loss": 0.6057864427566528, "step": 8187 }, { "epoch": 1.990277102576568, "grad_norm": 1.7700324770644402, "learning_rate": 1.3578710615691494e-09, "loss": 0.6730139255523682, "step": 8188 }, { "epoch": 1.9905201750121537, "grad_norm": 1.598314816694646, "learning_rate": 1.2924426340177321e-09, "loss": 0.4304819703102112, "step": 8189 }, { "epoch": 1.9907632474477395, "grad_norm": 1.5652465763513306, "learning_rate": 1.2286295857322218e-09, "loss": 0.6053767204284668, "step": 8190 }, { "epoch": 1.9910063198833252, "grad_norm": 1.5408351711589978, "learning_rate": 1.1664319270243695e-09, "loss": 0.5697376728057861, "step": 8191 }, { "epoch": 1.991249392318911, "grad_norm": 1.483058046207839, "learning_rate": 1.105849667940584e-09, "loss": 0.40884870290756226, "step": 8192 }, { "epoch": 1.9914924647544967, "grad_norm": 1.650410218786151, "learning_rate": 1.0468828182685908e-09, "loss": 0.5237031579017639, "step": 8193 }, { "epoch": 1.9917355371900827, "grad_norm": 1.7714399729398835, "learning_rate": 9.895313875363244e-10, "loss": 0.6936608552932739, "step": 8194 }, { "epoch": 1.9919786096256684, "grad_norm": 1.4516714358399125, "learning_rate": 9.337953850074854e-10, "loss": 0.47868192195892334, "step": 8195 }, { "epoch": 1.9922216820612544, "grad_norm": 1.7516190378989165, "learning_rate": 8.796748196893134e-10, "loss": 0.5334954261779785, "step": 8196 }, { "epoch": 1.9924647544968401, "grad_norm": 1.5635902164855715, "learning_rate": 8.271697003237045e-10, "loss": 0.5826839208602905, "step": 8197 }, { "epoch": 1.9927078269324259, "grad_norm": 1.6891993783939467, "learning_rate": 7.762800353938727e-10, "loss": 0.5630276203155518, "step": 8198 }, { "epoch": 1.9929508993680116, "grad_norm": 1.785408209467656, "learning_rate": 7.270058331210194e-10, "loss": 0.6313284635543823, "step": 8199 }, { "epoch": 1.9931939718035974, "grad_norm": 1.4995458709083294, "learning_rate": 6.793471014654441e-10, "loss": 0.3847610056400299, "step": 8200 }, { "epoch": 1.9934370442391833, "grad_norm": 1.5687482829291741, "learning_rate": 6.333038481287635e-10, "loss": 0.5632356405258179, "step": 8201 }, { "epoch": 1.993680116674769, "grad_norm": 1.714281355764881, "learning_rate": 5.888760805472516e-10, "loss": 0.6004670858383179, "step": 8202 }, { "epoch": 1.993923189110355, "grad_norm": 1.6440472587262902, "learning_rate": 5.460638059007207e-10, "loss": 0.520415186882019, "step": 8203 }, { "epoch": 1.9941662615459408, "grad_norm": 1.5090785403491935, "learning_rate": 5.048670311036396e-10, "loss": 0.6111814975738525, "step": 8204 }, { "epoch": 1.9944093339815265, "grad_norm": 1.5628773437959742, "learning_rate": 4.6528576281401616e-10, "loss": 0.5083954334259033, "step": 8205 }, { "epoch": 1.9946524064171123, "grad_norm": 1.749259060703111, "learning_rate": 4.2732000742562453e-10, "loss": 0.594918966293335, "step": 8206 }, { "epoch": 1.994895478852698, "grad_norm": 1.6518855732550424, "learning_rate": 3.9096977107133673e-10, "loss": 0.6767618656158447, "step": 8207 }, { "epoch": 1.9951385512882838, "grad_norm": 1.8064253966021258, "learning_rate": 3.562350596253428e-10, "loss": 0.46052372455596924, "step": 8208 }, { "epoch": 1.9953816237238697, "grad_norm": 1.8499060263213665, "learning_rate": 3.2311587869759964e-10, "loss": 0.5915631055831909, "step": 8209 }, { "epoch": 1.9956246961594555, "grad_norm": 1.6863970138753537, "learning_rate": 2.916122336404925e-10, "loss": 0.6333601474761963, "step": 8210 }, { "epoch": 1.9958677685950414, "grad_norm": 1.7122895071244688, "learning_rate": 2.6172412954328375e-10, "loss": 0.5709751844406128, "step": 8211 }, { "epoch": 1.9961108410306272, "grad_norm": 1.8730827438266653, "learning_rate": 2.3345157123433325e-10, "loss": 0.5645607709884644, "step": 8212 }, { "epoch": 1.996353913466213, "grad_norm": 1.8251318947448587, "learning_rate": 2.0679456327998838e-10, "loss": 0.5842437744140625, "step": 8213 }, { "epoch": 1.9965969859017987, "grad_norm": 1.827576494662562, "learning_rate": 1.8175310999013485e-10, "loss": 0.4908216893672943, "step": 8214 }, { "epoch": 1.9968400583373844, "grad_norm": 1.8989836786468686, "learning_rate": 1.5832721540709473e-10, "loss": 0.6466413736343384, "step": 8215 }, { "epoch": 1.9970831307729704, "grad_norm": 1.7861868839134962, "learning_rate": 1.3651688331783875e-10, "loss": 0.6034338474273682, "step": 8216 }, { "epoch": 1.9973262032085561, "grad_norm": 2.240353105299191, "learning_rate": 1.1632211724510456e-10, "loss": 0.6798490285873413, "step": 8217 }, { "epoch": 1.997569275644142, "grad_norm": 1.724133860093608, "learning_rate": 9.774292045183765e-11, "loss": 0.5309431552886963, "step": 8218 }, { "epoch": 1.9978123480797279, "grad_norm": 1.584897104002578, "learning_rate": 8.077929593897082e-11, "loss": 0.5851815938949585, "step": 8219 }, { "epoch": 1.9980554205153136, "grad_norm": 1.7962349167044902, "learning_rate": 6.543124644764476e-11, "loss": 0.4377000331878662, "step": 8220 }, { "epoch": 1.9982984929508993, "grad_norm": 1.6008380204061958, "learning_rate": 5.169877445809768e-11, "loss": 0.4701763987541199, "step": 8221 }, { "epoch": 1.998541565386485, "grad_norm": 1.3989729254271568, "learning_rate": 3.958188218855519e-11, "loss": 0.5565349459648132, "step": 8222 }, { "epoch": 1.9987846378220708, "grad_norm": 1.4417551089965497, "learning_rate": 2.9080571595230255e-11, "loss": 0.6663869023323059, "step": 8223 }, { "epoch": 1.9990277102576568, "grad_norm": 1.6854165748127108, "learning_rate": 2.019484437676411e-11, "loss": 0.48776480555534363, "step": 8224 }, { "epoch": 1.9992707826932425, "grad_norm": 1.5630775645509538, "learning_rate": 1.2924701966454678e-11, "loss": 0.49559369683265686, "step": 8225 }, { "epoch": 1.9995138551288285, "grad_norm": 1.5697860903962033, "learning_rate": 7.270145541138363e-12, "loss": 0.5073534250259399, "step": 8226 }, { "epoch": 1.9997569275644143, "grad_norm": 1.6978081847297783, "learning_rate": 3.2311760134184913e-12, "loss": 0.45585986971855164, "step": 8227 }, { "epoch": 2.0, "grad_norm": 1.8753498687905656, "learning_rate": 8.07794036106202e-13, "loss": 0.577706515789032, "step": 8228 }, { "epoch": 2.0, "step": 8228, "total_flos": 2518988633899008.0, "train_loss": 0.6358900075261316, "train_runtime": 72723.4305, "train_samples_per_second": 0.453, "train_steps_per_second": 0.113 } ], "logging_steps": 1, "max_steps": 8228, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2518988633899008.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }