{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 4048, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004942542938341777, "grad_norm": 1.7827389996067007, "learning_rate": 0.0, "loss": 1.1816105842590332, "step": 1 }, { "epoch": 0.0009885085876683553, "grad_norm": 1.891128580111598, "learning_rate": 9.852216748768474e-08, "loss": 1.1496102809906006, "step": 2 }, { "epoch": 0.001482762881502533, "grad_norm": 1.8581340535316004, "learning_rate": 1.9704433497536947e-07, "loss": 1.1515967845916748, "step": 3 }, { "epoch": 0.0019770171753367106, "grad_norm": 1.708604556953044, "learning_rate": 2.955665024630542e-07, "loss": 1.1795943975448608, "step": 4 }, { "epoch": 0.0024712714691708885, "grad_norm": 1.8513528590958555, "learning_rate": 3.9408866995073894e-07, "loss": 1.2289564609527588, "step": 5 }, { "epoch": 0.002965525763005066, "grad_norm": 1.972324289049384, "learning_rate": 4.926108374384237e-07, "loss": 1.179269790649414, "step": 6 }, { "epoch": 0.003459780056839244, "grad_norm": 1.8334156798400192, "learning_rate": 5.911330049261084e-07, "loss": 1.199608564376831, "step": 7 }, { "epoch": 0.003954034350673421, "grad_norm": 1.6669436389627912, "learning_rate": 6.896551724137931e-07, "loss": 1.1643707752227783, "step": 8 }, { "epoch": 0.004448288644507599, "grad_norm": 1.8750060934609654, "learning_rate": 7.881773399014779e-07, "loss": 1.1264240741729736, "step": 9 }, { "epoch": 0.004942542938341777, "grad_norm": 1.9962482953672744, "learning_rate": 8.866995073891626e-07, "loss": 1.1717555522918701, "step": 10 }, { "epoch": 0.005436797232175955, "grad_norm": 1.895693583554434, "learning_rate": 9.852216748768474e-07, "loss": 1.1856712102890015, "step": 11 }, { "epoch": 0.005931051526010132, "grad_norm": 1.7765248738469863, "learning_rate": 1.0837438423645322e-06, "loss": 1.1258785724639893, "step": 12 }, { "epoch": 0.00642530581984431, "grad_norm": 1.8326605479421993, "learning_rate": 1.1822660098522167e-06, "loss": 1.1333656311035156, "step": 13 }, { "epoch": 0.006919560113678488, "grad_norm": 1.9142537067819894, "learning_rate": 1.2807881773399017e-06, "loss": 1.2281363010406494, "step": 14 }, { "epoch": 0.0074138144075126654, "grad_norm": 1.9232318367357442, "learning_rate": 1.3793103448275862e-06, "loss": 1.1910676956176758, "step": 15 }, { "epoch": 0.007908068701346842, "grad_norm": 2.5599273269087885, "learning_rate": 1.4778325123152712e-06, "loss": 1.2124552726745605, "step": 16 }, { "epoch": 0.008402322995181021, "grad_norm": 2.2109761155287133, "learning_rate": 1.5763546798029558e-06, "loss": 1.1993463039398193, "step": 17 }, { "epoch": 0.008896577289015198, "grad_norm": 2.1999117305307077, "learning_rate": 1.6748768472906405e-06, "loss": 1.1245683431625366, "step": 18 }, { "epoch": 0.009390831582849375, "grad_norm": 2.203478389299074, "learning_rate": 1.7733990147783253e-06, "loss": 1.1838568449020386, "step": 19 }, { "epoch": 0.009885085876683554, "grad_norm": 2.419107047950166, "learning_rate": 1.8719211822660098e-06, "loss": 1.081169843673706, "step": 20 }, { "epoch": 0.010379340170517731, "grad_norm": 2.559921706815215, "learning_rate": 1.970443349753695e-06, "loss": 1.1506569385528564, "step": 21 }, { "epoch": 0.01087359446435191, "grad_norm": 2.8697838151244977, "learning_rate": 2.0689655172413796e-06, "loss": 1.0841327905654907, "step": 22 }, { "epoch": 0.011367848758186087, "grad_norm": 2.8012936510978905, "learning_rate": 2.1674876847290643e-06, "loss": 1.1335525512695312, "step": 23 }, { "epoch": 0.011862103052020264, "grad_norm": 2.649521736906966, "learning_rate": 2.266009852216749e-06, "loss": 1.035188913345337, "step": 24 }, { "epoch": 0.012356357345854442, "grad_norm": 2.7385314170591166, "learning_rate": 2.3645320197044334e-06, "loss": 1.0640877485275269, "step": 25 }, { "epoch": 0.01285061163968862, "grad_norm": 2.5011806151261755, "learning_rate": 2.4630541871921186e-06, "loss": 1.0479273796081543, "step": 26 }, { "epoch": 0.013344865933522798, "grad_norm": 2.236670838822209, "learning_rate": 2.5615763546798034e-06, "loss": 1.0522505044937134, "step": 27 }, { "epoch": 0.013839120227356975, "grad_norm": 2.065544668093392, "learning_rate": 2.660098522167488e-06, "loss": 1.080836296081543, "step": 28 }, { "epoch": 0.014333374521191152, "grad_norm": 1.7478242928012908, "learning_rate": 2.7586206896551725e-06, "loss": 0.9712544679641724, "step": 29 }, { "epoch": 0.014827628815025331, "grad_norm": 1.5930614486695707, "learning_rate": 2.8571428571428573e-06, "loss": 1.0469061136245728, "step": 30 }, { "epoch": 0.015321883108859508, "grad_norm": 1.380137621152324, "learning_rate": 2.9556650246305424e-06, "loss": 0.9911116361618042, "step": 31 }, { "epoch": 0.015816137402693685, "grad_norm": 1.3167918112915387, "learning_rate": 3.054187192118227e-06, "loss": 0.9552959203720093, "step": 32 }, { "epoch": 0.016310391696527864, "grad_norm": 1.2266567383194062, "learning_rate": 3.1527093596059115e-06, "loss": 0.957429051399231, "step": 33 }, { "epoch": 0.016804645990362042, "grad_norm": 1.305011449405004, "learning_rate": 3.2512315270935963e-06, "loss": 1.0180628299713135, "step": 34 }, { "epoch": 0.017298900284196218, "grad_norm": 1.2347397961596738, "learning_rate": 3.349753694581281e-06, "loss": 0.9064415097236633, "step": 35 }, { "epoch": 0.017793154578030396, "grad_norm": 1.216758814553776, "learning_rate": 3.448275862068966e-06, "loss": 0.9718184471130371, "step": 36 }, { "epoch": 0.018287408871864575, "grad_norm": 1.065779121444896, "learning_rate": 3.5467980295566506e-06, "loss": 0.8831444978713989, "step": 37 }, { "epoch": 0.01878166316569875, "grad_norm": 1.0132491929086573, "learning_rate": 3.6453201970443354e-06, "loss": 0.9167139530181885, "step": 38 }, { "epoch": 0.01927591745953293, "grad_norm": 1.0431186403983612, "learning_rate": 3.7438423645320197e-06, "loss": 0.9322037696838379, "step": 39 }, { "epoch": 0.019770171753367108, "grad_norm": 1.0319066435292568, "learning_rate": 3.842364532019705e-06, "loss": 0.9189817905426025, "step": 40 }, { "epoch": 0.020264426047201287, "grad_norm": 1.1670657884595383, "learning_rate": 3.94088669950739e-06, "loss": 0.8480448126792908, "step": 41 }, { "epoch": 0.020758680341035462, "grad_norm": 0.9850175889441174, "learning_rate": 4.039408866995074e-06, "loss": 0.8907301425933838, "step": 42 }, { "epoch": 0.02125293463486964, "grad_norm": 1.0028387912933743, "learning_rate": 4.137931034482759e-06, "loss": 0.8674390316009521, "step": 43 }, { "epoch": 0.02174718892870382, "grad_norm": 0.9822966394815191, "learning_rate": 4.236453201970444e-06, "loss": 0.8674882054328918, "step": 44 }, { "epoch": 0.022241443222537995, "grad_norm": 0.9778327665239519, "learning_rate": 4.334975369458129e-06, "loss": 0.8542560338973999, "step": 45 }, { "epoch": 0.022735697516372173, "grad_norm": 0.8621828386281931, "learning_rate": 4.4334975369458135e-06, "loss": 0.772778332233429, "step": 46 }, { "epoch": 0.023229951810206352, "grad_norm": 0.8638093364937629, "learning_rate": 4.532019704433498e-06, "loss": 0.7481152415275574, "step": 47 }, { "epoch": 0.023724206104040527, "grad_norm": 0.8467972866728939, "learning_rate": 4.630541871921182e-06, "loss": 0.8373709917068481, "step": 48 }, { "epoch": 0.024218460397874706, "grad_norm": 0.8165134857986008, "learning_rate": 4.729064039408867e-06, "loss": 0.8163385391235352, "step": 49 }, { "epoch": 0.024712714691708885, "grad_norm": 0.833026336683437, "learning_rate": 4.8275862068965525e-06, "loss": 0.7444975972175598, "step": 50 }, { "epoch": 0.025206968985543064, "grad_norm": 0.858591041664589, "learning_rate": 4.926108374384237e-06, "loss": 0.7683243751525879, "step": 51 }, { "epoch": 0.02570122327937724, "grad_norm": 1.0127725906591662, "learning_rate": 5.024630541871922e-06, "loss": 0.806761622428894, "step": 52 }, { "epoch": 0.026195477573211418, "grad_norm": 0.8333649125881921, "learning_rate": 5.123152709359607e-06, "loss": 0.7312102913856506, "step": 53 }, { "epoch": 0.026689731867045596, "grad_norm": 0.9425883709792775, "learning_rate": 5.2216748768472915e-06, "loss": 0.7351999282836914, "step": 54 }, { "epoch": 0.02718398616087977, "grad_norm": 0.9039627787948463, "learning_rate": 5.320197044334976e-06, "loss": 0.7453763484954834, "step": 55 }, { "epoch": 0.02767824045471395, "grad_norm": 0.9324665454088699, "learning_rate": 5.41871921182266e-06, "loss": 0.7063292860984802, "step": 56 }, { "epoch": 0.02817249474854813, "grad_norm": 0.8343256198457882, "learning_rate": 5.517241379310345e-06, "loss": 0.7145994901657104, "step": 57 }, { "epoch": 0.028666749042382304, "grad_norm": 0.7157092163314197, "learning_rate": 5.61576354679803e-06, "loss": 0.687594473361969, "step": 58 }, { "epoch": 0.029161003336216483, "grad_norm": 0.7603582128739335, "learning_rate": 5.7142857142857145e-06, "loss": 0.6643895506858826, "step": 59 }, { "epoch": 0.029655257630050662, "grad_norm": 0.6925073694472516, "learning_rate": 5.812807881773399e-06, "loss": 0.6781614422798157, "step": 60 }, { "epoch": 0.030149511923884837, "grad_norm": 0.7169709854131228, "learning_rate": 5.911330049261085e-06, "loss": 0.6209158301353455, "step": 61 }, { "epoch": 0.030643766217719016, "grad_norm": 0.6749920715098945, "learning_rate": 6.00985221674877e-06, "loss": 0.6424679756164551, "step": 62 }, { "epoch": 0.031138020511553195, "grad_norm": 0.6435584468821339, "learning_rate": 6.108374384236454e-06, "loss": 0.6745971441268921, "step": 63 }, { "epoch": 0.03163227480538737, "grad_norm": 0.657544191989632, "learning_rate": 6.206896551724138e-06, "loss": 0.6520330905914307, "step": 64 }, { "epoch": 0.03212652909922155, "grad_norm": 0.6351335823908374, "learning_rate": 6.305418719211823e-06, "loss": 0.6790571212768555, "step": 65 }, { "epoch": 0.03262078339305573, "grad_norm": 0.6484215339353426, "learning_rate": 6.403940886699508e-06, "loss": 0.6491506099700928, "step": 66 }, { "epoch": 0.033115037686889906, "grad_norm": 0.617685895397393, "learning_rate": 6.502463054187193e-06, "loss": 0.6347313523292542, "step": 67 }, { "epoch": 0.033609291980724085, "grad_norm": 0.6638567270691007, "learning_rate": 6.600985221674877e-06, "loss": 0.6785881519317627, "step": 68 }, { "epoch": 0.034103546274558263, "grad_norm": 0.6459369268846485, "learning_rate": 6.699507389162562e-06, "loss": 0.6470085978507996, "step": 69 }, { "epoch": 0.034597800568392435, "grad_norm": 0.6364523697931875, "learning_rate": 6.798029556650246e-06, "loss": 0.6205961108207703, "step": 70 }, { "epoch": 0.035092054862226614, "grad_norm": 0.6434045969551643, "learning_rate": 6.896551724137932e-06, "loss": 0.6621580123901367, "step": 71 }, { "epoch": 0.03558630915606079, "grad_norm": 0.6281362500041567, "learning_rate": 6.995073891625616e-06, "loss": 0.6363088488578796, "step": 72 }, { "epoch": 0.03608056344989497, "grad_norm": 0.6023389614758552, "learning_rate": 7.093596059113301e-06, "loss": 0.6073004007339478, "step": 73 }, { "epoch": 0.03657481774372915, "grad_norm": 0.5962790573618366, "learning_rate": 7.192118226600986e-06, "loss": 0.6490880846977234, "step": 74 }, { "epoch": 0.03706907203756333, "grad_norm": 0.6425224117743127, "learning_rate": 7.290640394088671e-06, "loss": 0.6540624499320984, "step": 75 }, { "epoch": 0.0375633263313975, "grad_norm": 0.6885040620745063, "learning_rate": 7.3891625615763555e-06, "loss": 0.6237976551055908, "step": 76 }, { "epoch": 0.03805758062523168, "grad_norm": 0.6110947192931153, "learning_rate": 7.487684729064039e-06, "loss": 0.6121219992637634, "step": 77 }, { "epoch": 0.03855183491906586, "grad_norm": 0.6031847840211293, "learning_rate": 7.586206896551724e-06, "loss": 0.5785888433456421, "step": 78 }, { "epoch": 0.03904608921290004, "grad_norm": 0.645073431050071, "learning_rate": 7.68472906403941e-06, "loss": 0.6144810914993286, "step": 79 }, { "epoch": 0.039540343506734216, "grad_norm": 0.709404375816405, "learning_rate": 7.783251231527095e-06, "loss": 0.6522500514984131, "step": 80 }, { "epoch": 0.040034597800568394, "grad_norm": 0.6784602446095636, "learning_rate": 7.88177339901478e-06, "loss": 0.6126501560211182, "step": 81 }, { "epoch": 0.04052885209440257, "grad_norm": 0.6834338295248128, "learning_rate": 7.980295566502464e-06, "loss": 0.573388934135437, "step": 82 }, { "epoch": 0.041023106388236745, "grad_norm": 0.7128627750045655, "learning_rate": 8.078817733990149e-06, "loss": 0.6462322473526001, "step": 83 }, { "epoch": 0.041517360682070924, "grad_norm": 0.6985575396830678, "learning_rate": 8.177339901477834e-06, "loss": 0.6542905569076538, "step": 84 }, { "epoch": 0.0420116149759051, "grad_norm": 0.6800738258763197, "learning_rate": 8.275862068965518e-06, "loss": 0.6539976000785828, "step": 85 }, { "epoch": 0.04250586926973928, "grad_norm": 0.6805451756514653, "learning_rate": 8.374384236453203e-06, "loss": 0.6303049325942993, "step": 86 }, { "epoch": 0.04300012356357346, "grad_norm": 0.6262637687675628, "learning_rate": 8.472906403940888e-06, "loss": 0.5727078318595886, "step": 87 }, { "epoch": 0.04349437785740764, "grad_norm": 0.6392194157453778, "learning_rate": 8.571428571428571e-06, "loss": 0.6204914450645447, "step": 88 }, { "epoch": 0.04398863215124181, "grad_norm": 0.8144620373591464, "learning_rate": 8.669950738916257e-06, "loss": 0.633359432220459, "step": 89 }, { "epoch": 0.04448288644507599, "grad_norm": 0.6564252660453104, "learning_rate": 8.768472906403942e-06, "loss": 0.5737719535827637, "step": 90 }, { "epoch": 0.04497714073891017, "grad_norm": 0.704224097621618, "learning_rate": 8.866995073891627e-06, "loss": 0.6438707709312439, "step": 91 }, { "epoch": 0.04547139503274435, "grad_norm": 0.7123681566966987, "learning_rate": 8.965517241379312e-06, "loss": 0.6284823417663574, "step": 92 }, { "epoch": 0.045965649326578525, "grad_norm": 0.6879682376399587, "learning_rate": 9.064039408866996e-06, "loss": 0.6442058086395264, "step": 93 }, { "epoch": 0.046459903620412704, "grad_norm": 0.709934515039082, "learning_rate": 9.162561576354681e-06, "loss": 0.5821751356124878, "step": 94 }, { "epoch": 0.04695415791424688, "grad_norm": 1.530236961676562, "learning_rate": 9.261083743842364e-06, "loss": 0.546042263507843, "step": 95 }, { "epoch": 0.047448412208081055, "grad_norm": 0.6844457378175872, "learning_rate": 9.359605911330049e-06, "loss": 0.5743244886398315, "step": 96 }, { "epoch": 0.04794266650191523, "grad_norm": 0.6876016450255833, "learning_rate": 9.458128078817734e-06, "loss": 0.5775831341743469, "step": 97 }, { "epoch": 0.04843692079574941, "grad_norm": 0.6367125491834975, "learning_rate": 9.55665024630542e-06, "loss": 0.5632016658782959, "step": 98 }, { "epoch": 0.04893117508958359, "grad_norm": 0.635357516984843, "learning_rate": 9.655172413793105e-06, "loss": 0.5817564129829407, "step": 99 }, { "epoch": 0.04942542938341777, "grad_norm": 0.6380730461382318, "learning_rate": 9.75369458128079e-06, "loss": 0.5692225098609924, "step": 100 }, { "epoch": 0.04991968367725195, "grad_norm": 0.6016319910280624, "learning_rate": 9.852216748768475e-06, "loss": 0.5239434242248535, "step": 101 }, { "epoch": 0.05041393797108613, "grad_norm": 0.6757811368400487, "learning_rate": 9.95073891625616e-06, "loss": 0.543138861656189, "step": 102 }, { "epoch": 0.0509081922649203, "grad_norm": 0.6907500926239555, "learning_rate": 1.0049261083743844e-05, "loss": 0.5914052128791809, "step": 103 }, { "epoch": 0.05140244655875448, "grad_norm": 0.657964391130701, "learning_rate": 1.0147783251231529e-05, "loss": 0.5394442081451416, "step": 104 }, { "epoch": 0.051896700852588656, "grad_norm": 0.6411875370567456, "learning_rate": 1.0246305418719214e-05, "loss": 0.6157902479171753, "step": 105 }, { "epoch": 0.052390955146422835, "grad_norm": 0.738818036033501, "learning_rate": 1.0344827586206898e-05, "loss": 0.5863415598869324, "step": 106 }, { "epoch": 0.052885209440257014, "grad_norm": 0.7066380161278255, "learning_rate": 1.0443349753694583e-05, "loss": 0.5783145427703857, "step": 107 }, { "epoch": 0.05337946373409119, "grad_norm": 0.6486663261886427, "learning_rate": 1.0541871921182268e-05, "loss": 0.5761469006538391, "step": 108 }, { "epoch": 0.053873718027925364, "grad_norm": 0.7011826885785277, "learning_rate": 1.0640394088669953e-05, "loss": 0.5931205749511719, "step": 109 }, { "epoch": 0.05436797232175954, "grad_norm": 0.6624296231637669, "learning_rate": 1.0738916256157637e-05, "loss": 0.5429986119270325, "step": 110 }, { "epoch": 0.05486222661559372, "grad_norm": 0.758180242025479, "learning_rate": 1.083743842364532e-05, "loss": 0.5154455304145813, "step": 111 }, { "epoch": 0.0553564809094279, "grad_norm": 0.6631694030017043, "learning_rate": 1.0935960591133005e-05, "loss": 0.5465028285980225, "step": 112 }, { "epoch": 0.05585073520326208, "grad_norm": 0.7234030186547562, "learning_rate": 1.103448275862069e-05, "loss": 0.5973349213600159, "step": 113 }, { "epoch": 0.05634498949709626, "grad_norm": 0.8062494007312124, "learning_rate": 1.1133004926108375e-05, "loss": 0.6201578378677368, "step": 114 }, { "epoch": 0.05683924379093044, "grad_norm": 0.7754913697435033, "learning_rate": 1.123152709359606e-05, "loss": 0.5090143084526062, "step": 115 }, { "epoch": 0.05733349808476461, "grad_norm": 0.7128751966577052, "learning_rate": 1.1330049261083744e-05, "loss": 0.5275869369506836, "step": 116 }, { "epoch": 0.05782775237859879, "grad_norm": 0.6950533949454222, "learning_rate": 1.1428571428571429e-05, "loss": 0.571302056312561, "step": 117 }, { "epoch": 0.058322006672432966, "grad_norm": 0.727683614551879, "learning_rate": 1.1527093596059114e-05, "loss": 0.5920293927192688, "step": 118 }, { "epoch": 0.058816260966267145, "grad_norm": 0.7151674344713859, "learning_rate": 1.1625615763546799e-05, "loss": 0.5877068042755127, "step": 119 }, { "epoch": 0.059310515260101324, "grad_norm": 0.7467125629300125, "learning_rate": 1.1724137931034483e-05, "loss": 0.6140042543411255, "step": 120 }, { "epoch": 0.0598047695539355, "grad_norm": 0.7531213899377466, "learning_rate": 1.182266009852217e-05, "loss": 0.5642052292823792, "step": 121 }, { "epoch": 0.060299023847769674, "grad_norm": 0.7258097143889621, "learning_rate": 1.1921182266009855e-05, "loss": 0.5535261034965515, "step": 122 }, { "epoch": 0.06079327814160385, "grad_norm": 0.6906824437380253, "learning_rate": 1.201970443349754e-05, "loss": 0.5202849507331848, "step": 123 }, { "epoch": 0.06128753243543803, "grad_norm": 0.7290752273219125, "learning_rate": 1.2118226600985224e-05, "loss": 0.5626791715621948, "step": 124 }, { "epoch": 0.06178178672927221, "grad_norm": 0.6770400510110369, "learning_rate": 1.2216748768472909e-05, "loss": 0.5416101217269897, "step": 125 }, { "epoch": 0.06227604102310639, "grad_norm": 0.730080694043851, "learning_rate": 1.2315270935960592e-05, "loss": 0.5683388710021973, "step": 126 }, { "epoch": 0.06277029531694056, "grad_norm": 0.7617011668537459, "learning_rate": 1.2413793103448277e-05, "loss": 0.564468264579773, "step": 127 }, { "epoch": 0.06326454961077474, "grad_norm": 0.7085057216007719, "learning_rate": 1.2512315270935961e-05, "loss": 0.5419844388961792, "step": 128 }, { "epoch": 0.06375880390460892, "grad_norm": 0.7653624040034734, "learning_rate": 1.2610837438423646e-05, "loss": 0.51283860206604, "step": 129 }, { "epoch": 0.0642530581984431, "grad_norm": 0.8138449595397697, "learning_rate": 1.2709359605911331e-05, "loss": 0.5807296633720398, "step": 130 }, { "epoch": 0.06474731249227728, "grad_norm": 0.6723079879875923, "learning_rate": 1.2807881773399016e-05, "loss": 0.5277815461158752, "step": 131 }, { "epoch": 0.06524156678611145, "grad_norm": 0.6681532618442926, "learning_rate": 1.29064039408867e-05, "loss": 0.5044680833816528, "step": 132 }, { "epoch": 0.06573582107994563, "grad_norm": 0.753382083900827, "learning_rate": 1.3004926108374385e-05, "loss": 0.5412886738777161, "step": 133 }, { "epoch": 0.06623007537377981, "grad_norm": 0.7168767227212489, "learning_rate": 1.310344827586207e-05, "loss": 0.5314532518386841, "step": 134 }, { "epoch": 0.06672432966761399, "grad_norm": 0.8393067756176276, "learning_rate": 1.3201970443349755e-05, "loss": 0.5544138550758362, "step": 135 }, { "epoch": 0.06721858396144817, "grad_norm": 0.7720251101355328, "learning_rate": 1.330049261083744e-05, "loss": 0.5745705366134644, "step": 136 }, { "epoch": 0.06771283825528235, "grad_norm": 0.8433611027798503, "learning_rate": 1.3399014778325124e-05, "loss": 0.5361800789833069, "step": 137 }, { "epoch": 0.06820709254911653, "grad_norm": 0.7945865329579561, "learning_rate": 1.3497536945812807e-05, "loss": 0.5878221392631531, "step": 138 }, { "epoch": 0.06870134684295069, "grad_norm": 0.7847520309491554, "learning_rate": 1.3596059113300492e-05, "loss": 0.5952787399291992, "step": 139 }, { "epoch": 0.06919560113678487, "grad_norm": 0.7556944357281568, "learning_rate": 1.369458128078818e-05, "loss": 0.5334340929985046, "step": 140 }, { "epoch": 0.06968985543061905, "grad_norm": 0.7730405260844581, "learning_rate": 1.3793103448275863e-05, "loss": 0.5297533273696899, "step": 141 }, { "epoch": 0.07018410972445323, "grad_norm": 0.7838373123609123, "learning_rate": 1.3891625615763548e-05, "loss": 0.5388105511665344, "step": 142 }, { "epoch": 0.0706783640182874, "grad_norm": 0.6827867428906486, "learning_rate": 1.3990147783251233e-05, "loss": 0.484375536441803, "step": 143 }, { "epoch": 0.07117261831212159, "grad_norm": 0.7377838543831393, "learning_rate": 1.4088669950738918e-05, "loss": 0.5395358800888062, "step": 144 }, { "epoch": 0.07166687260595576, "grad_norm": 0.7024037339686016, "learning_rate": 1.4187192118226602e-05, "loss": 0.501459538936615, "step": 145 }, { "epoch": 0.07216112689978994, "grad_norm": 0.7544878056630825, "learning_rate": 1.4285714285714287e-05, "loss": 0.5390491485595703, "step": 146 }, { "epoch": 0.07265538119362412, "grad_norm": 0.7358581376182646, "learning_rate": 1.4384236453201972e-05, "loss": 0.505649745464325, "step": 147 }, { "epoch": 0.0731496354874583, "grad_norm": 0.791834759029257, "learning_rate": 1.4482758620689657e-05, "loss": 0.5155121684074402, "step": 148 }, { "epoch": 0.07364388978129248, "grad_norm": 0.9182625859668322, "learning_rate": 1.4581280788177341e-05, "loss": 0.5502114295959473, "step": 149 }, { "epoch": 0.07413814407512666, "grad_norm": 0.7705513444985356, "learning_rate": 1.4679802955665026e-05, "loss": 0.5243497490882874, "step": 150 }, { "epoch": 0.07463239836896084, "grad_norm": 0.7936247647794451, "learning_rate": 1.4778325123152711e-05, "loss": 0.529721736907959, "step": 151 }, { "epoch": 0.075126652662795, "grad_norm": 0.7493387955752852, "learning_rate": 1.4876847290640396e-05, "loss": 0.4721008241176605, "step": 152 }, { "epoch": 0.07562090695662918, "grad_norm": 0.8448372107109295, "learning_rate": 1.4975369458128079e-05, "loss": 0.46029576659202576, "step": 153 }, { "epoch": 0.07611516125046336, "grad_norm": 0.8666504632745452, "learning_rate": 1.5073891625615764e-05, "loss": 0.5151746273040771, "step": 154 }, { "epoch": 0.07660941554429754, "grad_norm": 0.8234378506914858, "learning_rate": 1.5172413793103448e-05, "loss": 0.4743254780769348, "step": 155 }, { "epoch": 0.07710366983813172, "grad_norm": 0.7901189046711773, "learning_rate": 1.5270935960591133e-05, "loss": 0.5167561769485474, "step": 156 }, { "epoch": 0.0775979241319659, "grad_norm": 0.7442599788530032, "learning_rate": 1.536945812807882e-05, "loss": 0.47482365369796753, "step": 157 }, { "epoch": 0.07809217842580007, "grad_norm": 0.7472930500337165, "learning_rate": 1.5467980295566506e-05, "loss": 0.5088409781455994, "step": 158 }, { "epoch": 0.07858643271963425, "grad_norm": 0.839637174922739, "learning_rate": 1.556650246305419e-05, "loss": 0.5264201164245605, "step": 159 }, { "epoch": 0.07908068701346843, "grad_norm": 0.8043048232381864, "learning_rate": 1.5665024630541875e-05, "loss": 0.5475984811782837, "step": 160 }, { "epoch": 0.07957494130730261, "grad_norm": 0.813963733997232, "learning_rate": 1.576354679802956e-05, "loss": 0.5652282238006592, "step": 161 }, { "epoch": 0.08006919560113679, "grad_norm": 0.8257458665080726, "learning_rate": 1.586206896551724e-05, "loss": 0.5179979801177979, "step": 162 }, { "epoch": 0.08056344989497097, "grad_norm": 0.7453513460678786, "learning_rate": 1.5960591133004928e-05, "loss": 0.4966253638267517, "step": 163 }, { "epoch": 0.08105770418880515, "grad_norm": 0.7400908854625781, "learning_rate": 1.605911330049261e-05, "loss": 0.5216315388679504, "step": 164 }, { "epoch": 0.08155195848263931, "grad_norm": 0.7974617542166776, "learning_rate": 1.6157635467980298e-05, "loss": 0.495576411485672, "step": 165 }, { "epoch": 0.08204621277647349, "grad_norm": 0.7828217496299378, "learning_rate": 1.625615763546798e-05, "loss": 0.5101697444915771, "step": 166 }, { "epoch": 0.08254046707030767, "grad_norm": 0.7891722656265441, "learning_rate": 1.6354679802955667e-05, "loss": 0.5438036918640137, "step": 167 }, { "epoch": 0.08303472136414185, "grad_norm": 0.8062908900423786, "learning_rate": 1.645320197044335e-05, "loss": 0.5043500661849976, "step": 168 }, { "epoch": 0.08352897565797603, "grad_norm": 0.8893145421032131, "learning_rate": 1.6551724137931037e-05, "loss": 0.5129355788230896, "step": 169 }, { "epoch": 0.0840232299518102, "grad_norm": 0.8344265538652059, "learning_rate": 1.665024630541872e-05, "loss": 0.48643916845321655, "step": 170 }, { "epoch": 0.08451748424564438, "grad_norm": 0.9138503767586129, "learning_rate": 1.6748768472906406e-05, "loss": 0.5300272703170776, "step": 171 }, { "epoch": 0.08501173853947856, "grad_norm": 0.9819214205489949, "learning_rate": 1.684729064039409e-05, "loss": 0.5321004390716553, "step": 172 }, { "epoch": 0.08550599283331274, "grad_norm": 0.9555025734347583, "learning_rate": 1.6945812807881776e-05, "loss": 0.5066401958465576, "step": 173 }, { "epoch": 0.08600024712714692, "grad_norm": 0.8139597552129452, "learning_rate": 1.704433497536946e-05, "loss": 0.48993563652038574, "step": 174 }, { "epoch": 0.0864945014209811, "grad_norm": 0.8921248257221488, "learning_rate": 1.7142857142857142e-05, "loss": 0.5468013882637024, "step": 175 }, { "epoch": 0.08698875571481528, "grad_norm": 0.8277628260630481, "learning_rate": 1.7241379310344828e-05, "loss": 0.5081865191459656, "step": 176 }, { "epoch": 0.08748301000864946, "grad_norm": 0.7727605442624492, "learning_rate": 1.7339901477832515e-05, "loss": 0.48374873399734497, "step": 177 }, { "epoch": 0.08797726430248362, "grad_norm": 0.7716185332367417, "learning_rate": 1.7438423645320198e-05, "loss": 0.4929465651512146, "step": 178 }, { "epoch": 0.0884715185963178, "grad_norm": 0.7369259534742475, "learning_rate": 1.7536945812807884e-05, "loss": 0.49666428565979004, "step": 179 }, { "epoch": 0.08896577289015198, "grad_norm": 0.9095846029993176, "learning_rate": 1.7635467980295567e-05, "loss": 0.5705476403236389, "step": 180 }, { "epoch": 0.08946002718398616, "grad_norm": 0.8153458294604309, "learning_rate": 1.7733990147783254e-05, "loss": 0.5466605424880981, "step": 181 }, { "epoch": 0.08995428147782034, "grad_norm": 0.7908211366510465, "learning_rate": 1.7832512315270937e-05, "loss": 0.47837337851524353, "step": 182 }, { "epoch": 0.09044853577165451, "grad_norm": 0.8050205335034676, "learning_rate": 1.7931034482758623e-05, "loss": 0.5370041131973267, "step": 183 }, { "epoch": 0.0909427900654887, "grad_norm": 0.8315453873696782, "learning_rate": 1.8029556650246306e-05, "loss": 0.540340006351471, "step": 184 }, { "epoch": 0.09143704435932287, "grad_norm": 0.7864886396514408, "learning_rate": 1.8128078817733993e-05, "loss": 0.5165396928787231, "step": 185 }, { "epoch": 0.09193129865315705, "grad_norm": 1.0212742677335798, "learning_rate": 1.8226600985221676e-05, "loss": 0.5391616821289062, "step": 186 }, { "epoch": 0.09242555294699123, "grad_norm": 0.8362655612683817, "learning_rate": 1.8325123152709362e-05, "loss": 0.472774475812912, "step": 187 }, { "epoch": 0.09291980724082541, "grad_norm": 0.7994913228950927, "learning_rate": 1.8423645320197045e-05, "loss": 0.5079161524772644, "step": 188 }, { "epoch": 0.09341406153465959, "grad_norm": 0.7908069143027292, "learning_rate": 1.852216748768473e-05, "loss": 0.4909520149230957, "step": 189 }, { "epoch": 0.09390831582849377, "grad_norm": 0.8204263481704893, "learning_rate": 1.8620689655172415e-05, "loss": 0.5214540362358093, "step": 190 }, { "epoch": 0.09440257012232794, "grad_norm": 1.0097310819423937, "learning_rate": 1.8719211822660098e-05, "loss": 0.4820341467857361, "step": 191 }, { "epoch": 0.09489682441616211, "grad_norm": 0.7986122947719724, "learning_rate": 1.8817733990147784e-05, "loss": 0.5094855427742004, "step": 192 }, { "epoch": 0.09539107870999629, "grad_norm": 0.8104059351445748, "learning_rate": 1.8916256157635468e-05, "loss": 0.47840312123298645, "step": 193 }, { "epoch": 0.09588533300383047, "grad_norm": 0.8556791067143968, "learning_rate": 1.9014778325123154e-05, "loss": 0.5368070602416992, "step": 194 }, { "epoch": 0.09637958729766465, "grad_norm": 0.8413108625552047, "learning_rate": 1.911330049261084e-05, "loss": 0.493880033493042, "step": 195 }, { "epoch": 0.09687384159149882, "grad_norm": 0.8344269563446816, "learning_rate": 1.9211822660098524e-05, "loss": 0.5052261352539062, "step": 196 }, { "epoch": 0.097368095885333, "grad_norm": 0.8488100596559239, "learning_rate": 1.931034482758621e-05, "loss": 0.4817495346069336, "step": 197 }, { "epoch": 0.09786235017916718, "grad_norm": 0.8835550004433761, "learning_rate": 1.9408866995073893e-05, "loss": 0.530259370803833, "step": 198 }, { "epoch": 0.09835660447300136, "grad_norm": 0.8634602606490965, "learning_rate": 1.950738916256158e-05, "loss": 0.4984540045261383, "step": 199 }, { "epoch": 0.09885085876683554, "grad_norm": 0.8655848178642821, "learning_rate": 1.9605911330049263e-05, "loss": 0.5472708940505981, "step": 200 }, { "epoch": 0.09934511306066972, "grad_norm": 0.8520053240792014, "learning_rate": 1.970443349753695e-05, "loss": 0.5394926071166992, "step": 201 }, { "epoch": 0.0998393673545039, "grad_norm": 0.9089636816290306, "learning_rate": 1.9802955665024632e-05, "loss": 0.5299160480499268, "step": 202 }, { "epoch": 0.10033362164833808, "grad_norm": 0.9396000630272938, "learning_rate": 1.990147783251232e-05, "loss": 0.506400465965271, "step": 203 }, { "epoch": 0.10082787594217225, "grad_norm": 0.7711226267847403, "learning_rate": 2e-05, "loss": 0.47956231236457825, "step": 204 }, { "epoch": 0.10132213023600642, "grad_norm": 0.8971065288988803, "learning_rate": 1.9999996662071442e-05, "loss": 0.48805660009384155, "step": 205 }, { "epoch": 0.1018163845298406, "grad_norm": 0.8419748393313904, "learning_rate": 1.9999986648287996e-05, "loss": 0.46014025807380676, "step": 206 }, { "epoch": 0.10231063882367478, "grad_norm": 0.8667704651728929, "learning_rate": 1.9999969958656345e-05, "loss": 0.4654610753059387, "step": 207 }, { "epoch": 0.10280489311750896, "grad_norm": 0.8143872307343123, "learning_rate": 1.999994659318763e-05, "loss": 0.47037336230278015, "step": 208 }, { "epoch": 0.10329914741134313, "grad_norm": 0.8238466130965688, "learning_rate": 1.999991655189745e-05, "loss": 0.4853154718875885, "step": 209 }, { "epoch": 0.10379340170517731, "grad_norm": 0.8233043672230826, "learning_rate": 1.9999879834805865e-05, "loss": 0.4918109178543091, "step": 210 }, { "epoch": 0.10428765599901149, "grad_norm": 0.787297319281164, "learning_rate": 1.999983644193738e-05, "loss": 0.5136955380439758, "step": 211 }, { "epoch": 0.10478191029284567, "grad_norm": 0.8895124065919626, "learning_rate": 1.9999786373320972e-05, "loss": 0.5145115852355957, "step": 212 }, { "epoch": 0.10527616458667985, "grad_norm": 0.8153315460424436, "learning_rate": 1.9999729628990058e-05, "loss": 0.4624764025211334, "step": 213 }, { "epoch": 0.10577041888051403, "grad_norm": 0.7949012412003572, "learning_rate": 1.9999666208982518e-05, "loss": 0.4599718749523163, "step": 214 }, { "epoch": 0.1062646731743482, "grad_norm": 0.8110981138692489, "learning_rate": 1.99995961133407e-05, "loss": 0.4642864465713501, "step": 215 }, { "epoch": 0.10675892746818239, "grad_norm": 0.7785663242974379, "learning_rate": 1.9999519342111392e-05, "loss": 0.4756677448749542, "step": 216 }, { "epoch": 0.10725318176201656, "grad_norm": 0.8781173550322721, "learning_rate": 1.9999435895345846e-05, "loss": 0.4982803463935852, "step": 217 }, { "epoch": 0.10774743605585073, "grad_norm": 0.905677346569408, "learning_rate": 1.999934577309977e-05, "loss": 0.5189295411109924, "step": 218 }, { "epoch": 0.10824169034968491, "grad_norm": 0.8870093356565885, "learning_rate": 1.999924897543333e-05, "loss": 0.5077873468399048, "step": 219 }, { "epoch": 0.10873594464351909, "grad_norm": 0.9164316488089079, "learning_rate": 1.9999145502411148e-05, "loss": 0.5510451793670654, "step": 220 }, { "epoch": 0.10923019893735327, "grad_norm": 0.850640343977404, "learning_rate": 1.9999035354102298e-05, "loss": 0.44604551792144775, "step": 221 }, { "epoch": 0.10972445323118744, "grad_norm": 0.7739778425864705, "learning_rate": 1.9998918530580315e-05, "loss": 0.42567160725593567, "step": 222 }, { "epoch": 0.11021870752502162, "grad_norm": 0.8699648367810445, "learning_rate": 1.9998795031923186e-05, "loss": 0.4622190594673157, "step": 223 }, { "epoch": 0.1107129618188558, "grad_norm": 0.8261943707290175, "learning_rate": 1.999866485821336e-05, "loss": 0.5023611783981323, "step": 224 }, { "epoch": 0.11120721611268998, "grad_norm": 0.769204860463621, "learning_rate": 1.9998528009537735e-05, "loss": 0.451701819896698, "step": 225 }, { "epoch": 0.11170147040652416, "grad_norm": 0.9053438794448195, "learning_rate": 1.9998384485987675e-05, "loss": 0.48493725061416626, "step": 226 }, { "epoch": 0.11219572470035834, "grad_norm": 0.7780216873284675, "learning_rate": 1.9998234287658996e-05, "loss": 0.45377853512763977, "step": 227 }, { "epoch": 0.11268997899419252, "grad_norm": 0.9129521331875277, "learning_rate": 1.9998077414651957e-05, "loss": 0.48963701725006104, "step": 228 }, { "epoch": 0.1131842332880267, "grad_norm": 0.8500208947168179, "learning_rate": 1.9997913867071296e-05, "loss": 0.47935402393341064, "step": 229 }, { "epoch": 0.11367848758186087, "grad_norm": 0.8984825507205957, "learning_rate": 1.999774364502619e-05, "loss": 0.46203523874282837, "step": 230 }, { "epoch": 0.11417274187569504, "grad_norm": 0.8695917880315948, "learning_rate": 1.9997566748630274e-05, "loss": 0.4411412179470062, "step": 231 }, { "epoch": 0.11466699616952922, "grad_norm": 0.9063292151670944, "learning_rate": 1.9997383178001646e-05, "loss": 0.44424787163734436, "step": 232 }, { "epoch": 0.1151612504633634, "grad_norm": 0.9239108187837685, "learning_rate": 1.9997192933262853e-05, "loss": 0.4862042963504791, "step": 233 }, { "epoch": 0.11565550475719757, "grad_norm": 0.9583721120887143, "learning_rate": 1.99969960145409e-05, "loss": 0.49599340558052063, "step": 234 }, { "epoch": 0.11614975905103175, "grad_norm": 0.8373453660412895, "learning_rate": 1.999679242196725e-05, "loss": 0.49702027440071106, "step": 235 }, { "epoch": 0.11664401334486593, "grad_norm": 0.9122480348696357, "learning_rate": 1.9996582155677813e-05, "loss": 0.520037829875946, "step": 236 }, { "epoch": 0.11713826763870011, "grad_norm": 0.8765545420336399, "learning_rate": 1.999636521581296e-05, "loss": 0.4571160674095154, "step": 237 }, { "epoch": 0.11763252193253429, "grad_norm": 0.8739431997449725, "learning_rate": 1.9996141602517526e-05, "loss": 0.45602840185165405, "step": 238 }, { "epoch": 0.11812677622636847, "grad_norm": 0.8737753030098584, "learning_rate": 1.999591131594078e-05, "loss": 0.4909728169441223, "step": 239 }, { "epoch": 0.11862103052020265, "grad_norm": 0.9637438681008479, "learning_rate": 1.9995674356236468e-05, "loss": 0.47716090083122253, "step": 240 }, { "epoch": 0.11911528481403683, "grad_norm": 0.8781513787464966, "learning_rate": 1.9995430723562774e-05, "loss": 0.4449527859687805, "step": 241 }, { "epoch": 0.119609539107871, "grad_norm": 0.9278951723441426, "learning_rate": 1.9995180418082347e-05, "loss": 0.49069035053253174, "step": 242 }, { "epoch": 0.12010379340170518, "grad_norm": 0.8082383806465664, "learning_rate": 1.9994923439962286e-05, "loss": 0.506738543510437, "step": 243 }, { "epoch": 0.12059804769553935, "grad_norm": 0.7256243644120642, "learning_rate": 1.9994659789374145e-05, "loss": 0.38516658544540405, "step": 244 }, { "epoch": 0.12109230198937353, "grad_norm": 1.120005864402108, "learning_rate": 1.9994389466493942e-05, "loss": 0.49539780616760254, "step": 245 }, { "epoch": 0.1215865562832077, "grad_norm": 0.8099291045850996, "learning_rate": 1.999411247150213e-05, "loss": 0.4400706887245178, "step": 246 }, { "epoch": 0.12208081057704188, "grad_norm": 1.033732324753182, "learning_rate": 1.9993828804583625e-05, "loss": 0.48815736174583435, "step": 247 }, { "epoch": 0.12257506487087606, "grad_norm": 0.8506340248073136, "learning_rate": 1.999353846592781e-05, "loss": 0.42744773626327515, "step": 248 }, { "epoch": 0.12306931916471024, "grad_norm": 0.8847437809130215, "learning_rate": 1.9993241455728505e-05, "loss": 0.4370969235897064, "step": 249 }, { "epoch": 0.12356357345854442, "grad_norm": 0.8643380888364789, "learning_rate": 1.9992937774183988e-05, "loss": 0.4803960621356964, "step": 250 }, { "epoch": 0.1240578277523786, "grad_norm": 0.8986867692232635, "learning_rate": 1.9992627421496994e-05, "loss": 0.4614640474319458, "step": 251 }, { "epoch": 0.12455208204621278, "grad_norm": 0.819634526245566, "learning_rate": 1.9992310397874715e-05, "loss": 0.46626490354537964, "step": 252 }, { "epoch": 0.12504633634004694, "grad_norm": 0.8614062439986471, "learning_rate": 1.9991986703528784e-05, "loss": 0.4812886416912079, "step": 253 }, { "epoch": 0.12554059063388112, "grad_norm": 0.782352455662906, "learning_rate": 1.99916563386753e-05, "loss": 0.45037686824798584, "step": 254 }, { "epoch": 0.1260348449277153, "grad_norm": 0.8735972282090627, "learning_rate": 1.9991319303534804e-05, "loss": 0.48492124676704407, "step": 255 }, { "epoch": 0.12652909922154948, "grad_norm": 0.9123971905878313, "learning_rate": 1.9990975598332304e-05, "loss": 0.48825496435165405, "step": 256 }, { "epoch": 0.12702335351538366, "grad_norm": 0.9350748088966393, "learning_rate": 1.9990625223297244e-05, "loss": 0.4836634695529938, "step": 257 }, { "epoch": 0.12751760780921784, "grad_norm": 0.8091067369882244, "learning_rate": 1.9990268178663538e-05, "loss": 0.4632943272590637, "step": 258 }, { "epoch": 0.12801186210305202, "grad_norm": 0.8933963237824735, "learning_rate": 1.9989904464669533e-05, "loss": 0.4601137042045593, "step": 259 }, { "epoch": 0.1285061163968862, "grad_norm": 0.956219889400008, "learning_rate": 1.998953408155805e-05, "loss": 0.4390139579772949, "step": 260 }, { "epoch": 0.12900037069072037, "grad_norm": 0.8209256250218969, "learning_rate": 1.9989157029576348e-05, "loss": 0.45749080181121826, "step": 261 }, { "epoch": 0.12949462498455455, "grad_norm": 0.8687280720196128, "learning_rate": 1.998877330897614e-05, "loss": 0.4490616023540497, "step": 262 }, { "epoch": 0.12998887927838873, "grad_norm": 0.8048623785766325, "learning_rate": 1.998838292001359e-05, "loss": 0.4819987714290619, "step": 263 }, { "epoch": 0.1304831335722229, "grad_norm": 0.8512266303867803, "learning_rate": 1.9987985862949325e-05, "loss": 0.4448384940624237, "step": 264 }, { "epoch": 0.1309773878660571, "grad_norm": 0.8699526878628875, "learning_rate": 1.9987582138048405e-05, "loss": 0.4574149549007416, "step": 265 }, { "epoch": 0.13147164215989127, "grad_norm": 0.8239086741829158, "learning_rate": 1.9987171745580353e-05, "loss": 0.4765186607837677, "step": 266 }, { "epoch": 0.13196589645372545, "grad_norm": 0.8859727328667625, "learning_rate": 1.998675468581915e-05, "loss": 0.4900081753730774, "step": 267 }, { "epoch": 0.13246015074755962, "grad_norm": 0.8200731674424109, "learning_rate": 1.9986330959043206e-05, "loss": 0.433933287858963, "step": 268 }, { "epoch": 0.1329544050413938, "grad_norm": 0.8424887851968712, "learning_rate": 1.9985900565535403e-05, "loss": 0.452491819858551, "step": 269 }, { "epoch": 0.13344865933522798, "grad_norm": 0.8454499255279871, "learning_rate": 1.9985463505583062e-05, "loss": 0.4583294987678528, "step": 270 }, { "epoch": 0.13394291362906216, "grad_norm": 0.7993545503780815, "learning_rate": 1.9985019779477958e-05, "loss": 0.43183961510658264, "step": 271 }, { "epoch": 0.13443716792289634, "grad_norm": 0.8548370246393396, "learning_rate": 1.998456938751632e-05, "loss": 0.48075324296951294, "step": 272 }, { "epoch": 0.13493142221673052, "grad_norm": 0.9002412472414919, "learning_rate": 1.9984112329998825e-05, "loss": 0.5131007432937622, "step": 273 }, { "epoch": 0.1354256765105647, "grad_norm": 0.9730858409317547, "learning_rate": 1.998364860723059e-05, "loss": 0.4841446876525879, "step": 274 }, { "epoch": 0.13591993080439888, "grad_norm": 0.845168898875427, "learning_rate": 1.9983178219521194e-05, "loss": 0.5001078248023987, "step": 275 }, { "epoch": 0.13641418509823305, "grad_norm": 0.9216453803321015, "learning_rate": 1.998270116718466e-05, "loss": 0.44851893186569214, "step": 276 }, { "epoch": 0.1369084393920672, "grad_norm": 0.8496437780068066, "learning_rate": 1.9982217450539464e-05, "loss": 0.4635714888572693, "step": 277 }, { "epoch": 0.13740269368590138, "grad_norm": 0.8697167139912243, "learning_rate": 1.9981727069908525e-05, "loss": 0.4171838164329529, "step": 278 }, { "epoch": 0.13789694797973556, "grad_norm": 0.9173222191020198, "learning_rate": 1.9981230025619216e-05, "loss": 0.4819942116737366, "step": 279 }, { "epoch": 0.13839120227356974, "grad_norm": 0.965585018194969, "learning_rate": 1.998072631800336e-05, "loss": 0.47878971695899963, "step": 280 }, { "epoch": 0.13888545656740392, "grad_norm": 0.8354999533998939, "learning_rate": 1.9980215947397217e-05, "loss": 0.4436519145965576, "step": 281 }, { "epoch": 0.1393797108612381, "grad_norm": 0.9615471937507843, "learning_rate": 1.9979698914141507e-05, "loss": 0.4633050262928009, "step": 282 }, { "epoch": 0.13987396515507228, "grad_norm": 0.8419828093645744, "learning_rate": 1.9979175218581397e-05, "loss": 0.4264826774597168, "step": 283 }, { "epoch": 0.14036821944890646, "grad_norm": 0.9397240311894202, "learning_rate": 1.9978644861066493e-05, "loss": 0.47763916850090027, "step": 284 }, { "epoch": 0.14086247374274063, "grad_norm": 0.9621046785661004, "learning_rate": 1.997810784195086e-05, "loss": 0.44895434379577637, "step": 285 }, { "epoch": 0.1413567280365748, "grad_norm": 0.9045420673708359, "learning_rate": 1.9977564161593e-05, "loss": 0.4287600517272949, "step": 286 }, { "epoch": 0.141850982330409, "grad_norm": 0.9070406248365095, "learning_rate": 1.997701382035587e-05, "loss": 0.44175297021865845, "step": 287 }, { "epoch": 0.14234523662424317, "grad_norm": 0.9409958894859969, "learning_rate": 1.9976456818606868e-05, "loss": 0.4393232464790344, "step": 288 }, { "epoch": 0.14283949091807735, "grad_norm": 0.9574764348211552, "learning_rate": 1.9975893156717836e-05, "loss": 0.4600023329257965, "step": 289 }, { "epoch": 0.14333374521191153, "grad_norm": 0.9582932704552442, "learning_rate": 1.9975322835065075e-05, "loss": 0.4819300174713135, "step": 290 }, { "epoch": 0.1438279995057457, "grad_norm": 0.8798665685233671, "learning_rate": 1.9974745854029318e-05, "loss": 0.4391498267650604, "step": 291 }, { "epoch": 0.14432225379957989, "grad_norm": 0.8278978827145046, "learning_rate": 1.9974162213995748e-05, "loss": 0.43435904383659363, "step": 292 }, { "epoch": 0.14481650809341406, "grad_norm": 0.8555919001416697, "learning_rate": 1.9973571915354e-05, "loss": 0.43575727939605713, "step": 293 }, { "epoch": 0.14531076238724824, "grad_norm": 0.847472972308698, "learning_rate": 1.9972974958498145e-05, "loss": 0.39998459815979004, "step": 294 }, { "epoch": 0.14580501668108242, "grad_norm": 0.9068432330089449, "learning_rate": 1.9972371343826705e-05, "loss": 0.4620361030101776, "step": 295 }, { "epoch": 0.1462992709749166, "grad_norm": 0.9496965104492539, "learning_rate": 1.9971761071742644e-05, "loss": 0.5172264575958252, "step": 296 }, { "epoch": 0.14679352526875078, "grad_norm": 0.9234160870013586, "learning_rate": 1.997114414265337e-05, "loss": 0.4685489535331726, "step": 297 }, { "epoch": 0.14728777956258496, "grad_norm": 0.8830728533856737, "learning_rate": 1.9970520556970735e-05, "loss": 0.4346499741077423, "step": 298 }, { "epoch": 0.14778203385641914, "grad_norm": 0.8462127222831192, "learning_rate": 1.996989031511104e-05, "loss": 0.4051141142845154, "step": 299 }, { "epoch": 0.14827628815025332, "grad_norm": 1.5751283315817302, "learning_rate": 1.996925341749502e-05, "loss": 0.4862591028213501, "step": 300 }, { "epoch": 0.1487705424440875, "grad_norm": 0.9475006076143342, "learning_rate": 1.996860986454787e-05, "loss": 0.44075754284858704, "step": 301 }, { "epoch": 0.14926479673792167, "grad_norm": 0.8707373783945862, "learning_rate": 1.99679596566992e-05, "loss": 0.44321805238723755, "step": 302 }, { "epoch": 0.14975905103175585, "grad_norm": 0.8195768056986794, "learning_rate": 1.996730279438309e-05, "loss": 0.4468157887458801, "step": 303 }, { "epoch": 0.15025330532559, "grad_norm": 0.9918503423974457, "learning_rate": 1.996663927803805e-05, "loss": 0.48698270320892334, "step": 304 }, { "epoch": 0.15074755961942418, "grad_norm": 0.9116215117394889, "learning_rate": 1.9965969108107032e-05, "loss": 0.41898253560066223, "step": 305 }, { "epoch": 0.15124181391325836, "grad_norm": 0.9221438157249551, "learning_rate": 1.9965292285037437e-05, "loss": 0.4827130436897278, "step": 306 }, { "epoch": 0.15173606820709254, "grad_norm": 0.8314057300557679, "learning_rate": 1.99646088092811e-05, "loss": 0.4219037592411041, "step": 307 }, { "epoch": 0.15223032250092672, "grad_norm": 0.8392045773293594, "learning_rate": 1.9963918681294298e-05, "loss": 0.4431123733520508, "step": 308 }, { "epoch": 0.1527245767947609, "grad_norm": 0.8500815118931239, "learning_rate": 1.996322190153775e-05, "loss": 0.4161941409111023, "step": 309 }, { "epoch": 0.15321883108859508, "grad_norm": 0.9107651666369411, "learning_rate": 1.9962518470476617e-05, "loss": 0.4774768650531769, "step": 310 }, { "epoch": 0.15371308538242925, "grad_norm": 0.8037347887475985, "learning_rate": 1.9961808388580503e-05, "loss": 0.4196036159992218, "step": 311 }, { "epoch": 0.15420733967626343, "grad_norm": 1.0067362464519019, "learning_rate": 1.996109165632344e-05, "loss": 0.44241398572921753, "step": 312 }, { "epoch": 0.1547015939700976, "grad_norm": 0.888150506782497, "learning_rate": 1.996036827418392e-05, "loss": 0.47662627696990967, "step": 313 }, { "epoch": 0.1551958482639318, "grad_norm": 0.8458159023673953, "learning_rate": 1.9959638242644855e-05, "loss": 0.4241487979888916, "step": 314 }, { "epoch": 0.15569010255776597, "grad_norm": 0.9355978957071136, "learning_rate": 1.9958901562193605e-05, "loss": 0.45686113834381104, "step": 315 }, { "epoch": 0.15618435685160015, "grad_norm": 0.944155507976385, "learning_rate": 1.9958158233321968e-05, "loss": 0.4154825806617737, "step": 316 }, { "epoch": 0.15667861114543433, "grad_norm": 0.9827195710672626, "learning_rate": 1.9957408256526176e-05, "loss": 0.4705435037612915, "step": 317 }, { "epoch": 0.1571728654392685, "grad_norm": 0.9880074034620054, "learning_rate": 1.9956651632306908e-05, "loss": 0.4367898404598236, "step": 318 }, { "epoch": 0.15766711973310268, "grad_norm": 0.9294773909083144, "learning_rate": 1.9955888361169272e-05, "loss": 0.4668901264667511, "step": 319 }, { "epoch": 0.15816137402693686, "grad_norm": 0.9543525396859661, "learning_rate": 1.995511844362282e-05, "loss": 0.46429356932640076, "step": 320 }, { "epoch": 0.15865562832077104, "grad_norm": 0.9206239653453478, "learning_rate": 1.9954341880181536e-05, "loss": 0.4582952857017517, "step": 321 }, { "epoch": 0.15914988261460522, "grad_norm": 0.9460762127599929, "learning_rate": 1.9953558671363843e-05, "loss": 0.45110762119293213, "step": 322 }, { "epoch": 0.1596441369084394, "grad_norm": 0.9441078381056233, "learning_rate": 1.99527688176926e-05, "loss": 0.4049065113067627, "step": 323 }, { "epoch": 0.16013839120227358, "grad_norm": 0.8033040053333058, "learning_rate": 1.9951972319695105e-05, "loss": 0.40884825587272644, "step": 324 }, { "epoch": 0.16063264549610776, "grad_norm": 0.902465277703788, "learning_rate": 1.9951169177903084e-05, "loss": 0.4416786730289459, "step": 325 }, { "epoch": 0.16112689978994194, "grad_norm": 0.8396124025463547, "learning_rate": 1.9950359392852704e-05, "loss": 0.4318765103816986, "step": 326 }, { "epoch": 0.16162115408377611, "grad_norm": 0.9197188335811614, "learning_rate": 1.9949542965084564e-05, "loss": 0.4415965974330902, "step": 327 }, { "epoch": 0.1621154083776103, "grad_norm": 0.9816748337776936, "learning_rate": 1.9948719895143703e-05, "loss": 0.4816298186779022, "step": 328 }, { "epoch": 0.16260966267144447, "grad_norm": 0.8960734361029558, "learning_rate": 1.9947890183579594e-05, "loss": 0.4329088032245636, "step": 329 }, { "epoch": 0.16310391696527862, "grad_norm": 0.9960918612087606, "learning_rate": 1.9947053830946134e-05, "loss": 0.43193015456199646, "step": 330 }, { "epoch": 0.1635981712591128, "grad_norm": 0.9310501291263382, "learning_rate": 1.994621083780166e-05, "loss": 0.48738086223602295, "step": 331 }, { "epoch": 0.16409242555294698, "grad_norm": 0.9523291617618251, "learning_rate": 1.9945361204708948e-05, "loss": 0.4707815647125244, "step": 332 }, { "epoch": 0.16458667984678116, "grad_norm": 0.8438149141988297, "learning_rate": 1.9944504932235198e-05, "loss": 0.4190637469291687, "step": 333 }, { "epoch": 0.16508093414061534, "grad_norm": 0.9348901251563362, "learning_rate": 1.9943642020952042e-05, "loss": 0.45955735445022583, "step": 334 }, { "epoch": 0.16557518843444952, "grad_norm": 0.9334033255095994, "learning_rate": 1.9942772471435555e-05, "loss": 0.4675702750682831, "step": 335 }, { "epoch": 0.1660694427282837, "grad_norm": 0.9694338385909206, "learning_rate": 1.9941896284266224e-05, "loss": 0.42571327090263367, "step": 336 }, { "epoch": 0.16656369702211787, "grad_norm": 0.827954024094364, "learning_rate": 1.994101346002899e-05, "loss": 0.4341443181037903, "step": 337 }, { "epoch": 0.16705795131595205, "grad_norm": 0.9227161087353433, "learning_rate": 1.9940123999313214e-05, "loss": 0.4473035931587219, "step": 338 }, { "epoch": 0.16755220560978623, "grad_norm": 0.9514215023205275, "learning_rate": 1.9939227902712676e-05, "loss": 0.4692152142524719, "step": 339 }, { "epoch": 0.1680464599036204, "grad_norm": 0.902462533797338, "learning_rate": 1.9938325170825607e-05, "loss": 0.4169067442417145, "step": 340 }, { "epoch": 0.1685407141974546, "grad_norm": 0.8958693793994358, "learning_rate": 1.9937415804254657e-05, "loss": 0.451092928647995, "step": 341 }, { "epoch": 0.16903496849128877, "grad_norm": 0.9439820250269497, "learning_rate": 1.99364998036069e-05, "loss": 0.39640212059020996, "step": 342 }, { "epoch": 0.16952922278512295, "grad_norm": 0.9953253959869931, "learning_rate": 1.9935577169493854e-05, "loss": 0.46396374702453613, "step": 343 }, { "epoch": 0.17002347707895712, "grad_norm": 0.940542166338043, "learning_rate": 1.9934647902531453e-05, "loss": 0.4343748390674591, "step": 344 }, { "epoch": 0.1705177313727913, "grad_norm": 0.8926095624124082, "learning_rate": 1.9933712003340056e-05, "loss": 0.4353589713573456, "step": 345 }, { "epoch": 0.17101198566662548, "grad_norm": 0.981244679678695, "learning_rate": 1.9932769472544464e-05, "loss": 0.4423677623271942, "step": 346 }, { "epoch": 0.17150623996045966, "grad_norm": 0.9632090771111401, "learning_rate": 1.9931820310773894e-05, "loss": 0.4382045865058899, "step": 347 }, { "epoch": 0.17200049425429384, "grad_norm": 0.9042153187184925, "learning_rate": 1.993086451866199e-05, "loss": 0.3966183066368103, "step": 348 }, { "epoch": 0.17249474854812802, "grad_norm": 0.9998736444681166, "learning_rate": 1.9929902096846833e-05, "loss": 0.48624011874198914, "step": 349 }, { "epoch": 0.1729890028419622, "grad_norm": 0.9399569652966117, "learning_rate": 1.9928933045970913e-05, "loss": 0.4442569315433502, "step": 350 }, { "epoch": 0.17348325713579638, "grad_norm": 0.9204808269523502, "learning_rate": 1.992795736668116e-05, "loss": 0.42499929666519165, "step": 351 }, { "epoch": 0.17397751142963055, "grad_norm": 0.9507435140290256, "learning_rate": 1.9926975059628923e-05, "loss": 0.4230741858482361, "step": 352 }, { "epoch": 0.17447176572346473, "grad_norm": 0.9092303670359448, "learning_rate": 1.9925986125469974e-05, "loss": 0.4273882806301117, "step": 353 }, { "epoch": 0.1749660200172989, "grad_norm": 0.9603670891238569, "learning_rate": 1.9924990564864513e-05, "loss": 0.45237618684768677, "step": 354 }, { "epoch": 0.1754602743111331, "grad_norm": 0.8737901526941092, "learning_rate": 1.9923988378477165e-05, "loss": 0.4115524888038635, "step": 355 }, { "epoch": 0.17595452860496724, "grad_norm": 0.8886450314145863, "learning_rate": 1.9922979566976968e-05, "loss": 0.4476633071899414, "step": 356 }, { "epoch": 0.17644878289880142, "grad_norm": 1.155944411883778, "learning_rate": 1.9921964131037398e-05, "loss": 0.44930100440979004, "step": 357 }, { "epoch": 0.1769430371926356, "grad_norm": 1.0356351975379994, "learning_rate": 1.9920942071336338e-05, "loss": 0.4714374244213104, "step": 358 }, { "epoch": 0.17743729148646978, "grad_norm": 0.9469405731486913, "learning_rate": 1.9919913388556105e-05, "loss": 0.47696003317832947, "step": 359 }, { "epoch": 0.17793154578030396, "grad_norm": 0.9021123492009391, "learning_rate": 1.9918878083383434e-05, "loss": 0.44937074184417725, "step": 360 }, { "epoch": 0.17842580007413814, "grad_norm": 0.9771832594876818, "learning_rate": 1.9917836156509472e-05, "loss": 0.44937658309936523, "step": 361 }, { "epoch": 0.17892005436797231, "grad_norm": 0.8240548100976023, "learning_rate": 1.9916787608629805e-05, "loss": 0.42068418860435486, "step": 362 }, { "epoch": 0.1794143086618065, "grad_norm": 0.9112160927316303, "learning_rate": 1.9915732440444428e-05, "loss": 0.3791036605834961, "step": 363 }, { "epoch": 0.17990856295564067, "grad_norm": 0.8982890263422821, "learning_rate": 1.991467065265775e-05, "loss": 0.401694118976593, "step": 364 }, { "epoch": 0.18040281724947485, "grad_norm": 0.9743587318559909, "learning_rate": 1.9913602245978602e-05, "loss": 0.44095057249069214, "step": 365 }, { "epoch": 0.18089707154330903, "grad_norm": 1.0125028049881057, "learning_rate": 1.9912527221120248e-05, "loss": 0.435880184173584, "step": 366 }, { "epoch": 0.1813913258371432, "grad_norm": 0.9329716691545672, "learning_rate": 1.991144557880035e-05, "loss": 0.4147350490093231, "step": 367 }, { "epoch": 0.1818855801309774, "grad_norm": 1.0077861725089856, "learning_rate": 1.9910357319741006e-05, "loss": 0.4191502630710602, "step": 368 }, { "epoch": 0.18237983442481157, "grad_norm": 0.9334667001994715, "learning_rate": 1.9909262444668715e-05, "loss": 0.41988956928253174, "step": 369 }, { "epoch": 0.18287408871864574, "grad_norm": 1.0279430559635638, "learning_rate": 1.99081609543144e-05, "loss": 0.47451251745224, "step": 370 }, { "epoch": 0.18336834301247992, "grad_norm": 0.9591522165165333, "learning_rate": 1.9907052849413408e-05, "loss": 0.44665899872779846, "step": 371 }, { "epoch": 0.1838625973063141, "grad_norm": 1.0147189696208934, "learning_rate": 1.990593813070548e-05, "loss": 0.40575331449508667, "step": 372 }, { "epoch": 0.18435685160014828, "grad_norm": 0.869456919545876, "learning_rate": 1.99048167989348e-05, "loss": 0.40580621361732483, "step": 373 }, { "epoch": 0.18485110589398246, "grad_norm": 0.9514367145479501, "learning_rate": 1.9903688854849948e-05, "loss": 0.461843878030777, "step": 374 }, { "epoch": 0.18534536018781664, "grad_norm": 0.9237949473924573, "learning_rate": 1.990255429920392e-05, "loss": 0.38992881774902344, "step": 375 }, { "epoch": 0.18583961448165082, "grad_norm": 0.8831901142276523, "learning_rate": 1.9901413132754133e-05, "loss": 0.4288073480129242, "step": 376 }, { "epoch": 0.186333868775485, "grad_norm": 0.9233387492673684, "learning_rate": 1.9900265356262418e-05, "loss": 0.4376278221607208, "step": 377 }, { "epoch": 0.18682812306931917, "grad_norm": 1.0362403856880367, "learning_rate": 1.9899110970495e-05, "loss": 0.4127569794654846, "step": 378 }, { "epoch": 0.18732237736315335, "grad_norm": 0.9507974239376735, "learning_rate": 1.9897949976222543e-05, "loss": 0.4221431016921997, "step": 379 }, { "epoch": 0.18781663165698753, "grad_norm": 0.9433678538632697, "learning_rate": 1.9896782374220108e-05, "loss": 0.3540682792663574, "step": 380 }, { "epoch": 0.1883108859508217, "grad_norm": 0.9261378158924178, "learning_rate": 1.9895608165267165e-05, "loss": 0.3746468424797058, "step": 381 }, { "epoch": 0.1888051402446559, "grad_norm": 0.885989840984364, "learning_rate": 1.9894427350147602e-05, "loss": 0.44986462593078613, "step": 382 }, { "epoch": 0.18929939453849004, "grad_norm": 0.990953109983041, "learning_rate": 1.9893239929649716e-05, "loss": 0.38902726769447327, "step": 383 }, { "epoch": 0.18979364883232422, "grad_norm": 0.9780134618767543, "learning_rate": 1.9892045904566212e-05, "loss": 0.43202030658721924, "step": 384 }, { "epoch": 0.1902879031261584, "grad_norm": 0.9892650612917288, "learning_rate": 1.9890845275694197e-05, "loss": 0.3984760344028473, "step": 385 }, { "epoch": 0.19078215741999258, "grad_norm": 0.9818585745680383, "learning_rate": 1.9889638043835203e-05, "loss": 0.41927874088287354, "step": 386 }, { "epoch": 0.19127641171382676, "grad_norm": 0.8767703705433573, "learning_rate": 1.9888424209795153e-05, "loss": 0.3809741735458374, "step": 387 }, { "epoch": 0.19177066600766093, "grad_norm": 0.9482820311569345, "learning_rate": 1.988720377438439e-05, "loss": 0.4237920045852661, "step": 388 }, { "epoch": 0.1922649203014951, "grad_norm": 1.0327070863618417, "learning_rate": 1.9885976738417662e-05, "loss": 0.4065277576446533, "step": 389 }, { "epoch": 0.1927591745953293, "grad_norm": 0.9237977569787911, "learning_rate": 1.9884743102714116e-05, "loss": 0.41154375672340393, "step": 390 }, { "epoch": 0.19325342888916347, "grad_norm": 1.2326124039761357, "learning_rate": 1.9883502868097304e-05, "loss": 0.46544453501701355, "step": 391 }, { "epoch": 0.19374768318299765, "grad_norm": 0.9587510645484782, "learning_rate": 1.9882256035395204e-05, "loss": 0.41279950737953186, "step": 392 }, { "epoch": 0.19424193747683183, "grad_norm": 0.861022204519604, "learning_rate": 1.988100260544017e-05, "loss": 0.40083667635917664, "step": 393 }, { "epoch": 0.194736191770666, "grad_norm": 0.8790820180214292, "learning_rate": 1.9879742579068976e-05, "loss": 0.40041595697402954, "step": 394 }, { "epoch": 0.19523044606450018, "grad_norm": 1.0258873082657662, "learning_rate": 1.9878475957122803e-05, "loss": 0.45317894220352173, "step": 395 }, { "epoch": 0.19572470035833436, "grad_norm": 0.9348755525455025, "learning_rate": 1.987720274044723e-05, "loss": 0.4163329005241394, "step": 396 }, { "epoch": 0.19621895465216854, "grad_norm": 0.9706842353465618, "learning_rate": 1.9875922929892235e-05, "loss": 0.4252028167247772, "step": 397 }, { "epoch": 0.19671320894600272, "grad_norm": 0.9127590943033566, "learning_rate": 1.9874636526312202e-05, "loss": 0.40558624267578125, "step": 398 }, { "epoch": 0.1972074632398369, "grad_norm": 0.9762994418484081, "learning_rate": 1.9873343530565913e-05, "loss": 0.4352114796638489, "step": 399 }, { "epoch": 0.19770171753367108, "grad_norm": 0.9123271316620398, "learning_rate": 1.9872043943516556e-05, "loss": 0.4076879024505615, "step": 400 }, { "epoch": 0.19819597182750526, "grad_norm": 0.9627661884342358, "learning_rate": 1.987073776603172e-05, "loss": 0.4406166672706604, "step": 401 }, { "epoch": 0.19869022612133944, "grad_norm": 0.8833048421451372, "learning_rate": 1.9869424998983386e-05, "loss": 0.3974360227584839, "step": 402 }, { "epoch": 0.19918448041517361, "grad_norm": 0.8808806866223299, "learning_rate": 1.9868105643247934e-05, "loss": 0.4297831058502197, "step": 403 }, { "epoch": 0.1996787347090078, "grad_norm": 0.9793340004481055, "learning_rate": 1.986677969970616e-05, "loss": 0.4214811623096466, "step": 404 }, { "epoch": 0.20017298900284197, "grad_norm": 0.8979387674277745, "learning_rate": 1.9865447169243234e-05, "loss": 0.37227538228034973, "step": 405 }, { "epoch": 0.20066724329667615, "grad_norm": 0.9492862396661451, "learning_rate": 1.986410805274874e-05, "loss": 0.4367320239543915, "step": 406 }, { "epoch": 0.20116149759051033, "grad_norm": 0.9753990450504955, "learning_rate": 1.9862762351116646e-05, "loss": 0.4327583909034729, "step": 407 }, { "epoch": 0.2016557518843445, "grad_norm": 0.9742332984468446, "learning_rate": 1.9861410065245332e-05, "loss": 0.45309939980506897, "step": 408 }, { "epoch": 0.20215000617817866, "grad_norm": 0.9433373475369933, "learning_rate": 1.986005119603756e-05, "loss": 0.39196106791496277, "step": 409 }, { "epoch": 0.20264426047201284, "grad_norm": 0.9834536288459345, "learning_rate": 1.985868574440049e-05, "loss": 0.4037923812866211, "step": 410 }, { "epoch": 0.20313851476584702, "grad_norm": 0.9331733674072598, "learning_rate": 1.9857313711245684e-05, "loss": 0.41214677691459656, "step": 411 }, { "epoch": 0.2036327690596812, "grad_norm": 0.9676344806099859, "learning_rate": 1.9855935097489087e-05, "loss": 0.4265231192111969, "step": 412 }, { "epoch": 0.20412702335351537, "grad_norm": 0.9398051984820485, "learning_rate": 1.9854549904051046e-05, "loss": 0.4245712161064148, "step": 413 }, { "epoch": 0.20462127764734955, "grad_norm": 1.0688359248893853, "learning_rate": 1.985315813185629e-05, "loss": 0.36296984553337097, "step": 414 }, { "epoch": 0.20511553194118373, "grad_norm": 0.8752111789079005, "learning_rate": 1.985175978183395e-05, "loss": 0.3982447683811188, "step": 415 }, { "epoch": 0.2056097862350179, "grad_norm": 0.9696106773901182, "learning_rate": 1.9850354854917543e-05, "loss": 0.4087941646575928, "step": 416 }, { "epoch": 0.2061040405288521, "grad_norm": 0.9068111697273192, "learning_rate": 1.9848943352044982e-05, "loss": 0.4147699177265167, "step": 417 }, { "epoch": 0.20659829482268627, "grad_norm": 0.9679150237458849, "learning_rate": 1.9847525274158562e-05, "loss": 0.42588335275650024, "step": 418 }, { "epoch": 0.20709254911652045, "grad_norm": 0.8455247598954041, "learning_rate": 1.9846100622204975e-05, "loss": 0.42607247829437256, "step": 419 }, { "epoch": 0.20758680341035463, "grad_norm": 0.8383230576354441, "learning_rate": 1.9844669397135292e-05, "loss": 0.3600303530693054, "step": 420 }, { "epoch": 0.2080810577041888, "grad_norm": 0.9989742736396935, "learning_rate": 1.9843231599904988e-05, "loss": 0.47888651490211487, "step": 421 }, { "epoch": 0.20857531199802298, "grad_norm": 0.9050077435994102, "learning_rate": 1.9841787231473906e-05, "loss": 0.3789903521537781, "step": 422 }, { "epoch": 0.20906956629185716, "grad_norm": 0.9737429395044322, "learning_rate": 1.9840336292806292e-05, "loss": 0.3682858943939209, "step": 423 }, { "epoch": 0.20956382058569134, "grad_norm": 0.9565489819657318, "learning_rate": 1.9838878784870772e-05, "loss": 0.42071375250816345, "step": 424 }, { "epoch": 0.21005807487952552, "grad_norm": 0.8997646005118014, "learning_rate": 1.9837414708640353e-05, "loss": 0.4258945882320404, "step": 425 }, { "epoch": 0.2105523291733597, "grad_norm": 0.8773247199262179, "learning_rate": 1.9835944065092433e-05, "loss": 0.42377644777297974, "step": 426 }, { "epoch": 0.21104658346719388, "grad_norm": 0.8695535067011908, "learning_rate": 1.9834466855208795e-05, "loss": 0.35860198736190796, "step": 427 }, { "epoch": 0.21154083776102806, "grad_norm": 0.8547283257189083, "learning_rate": 1.9832983079975606e-05, "loss": 0.3498537242412567, "step": 428 }, { "epoch": 0.21203509205486223, "grad_norm": 0.9645117506541977, "learning_rate": 1.9831492740383405e-05, "loss": 0.3779754042625427, "step": 429 }, { "epoch": 0.2125293463486964, "grad_norm": 0.9052431386511324, "learning_rate": 1.9829995837427124e-05, "loss": 0.3574570119380951, "step": 430 }, { "epoch": 0.2130236006425306, "grad_norm": 0.9528105437455127, "learning_rate": 1.982849237210608e-05, "loss": 0.40678369998931885, "step": 431 }, { "epoch": 0.21351785493636477, "grad_norm": 1.0383565017869998, "learning_rate": 1.9826982345423955e-05, "loss": 0.4392494261264801, "step": 432 }, { "epoch": 0.21401210923019895, "grad_norm": 0.9595788699726988, "learning_rate": 1.982546575838883e-05, "loss": 0.3858703374862671, "step": 433 }, { "epoch": 0.21450636352403313, "grad_norm": 1.022569300933342, "learning_rate": 1.9823942612013153e-05, "loss": 0.4427873492240906, "step": 434 }, { "epoch": 0.21500061781786728, "grad_norm": 1.0243841009335557, "learning_rate": 1.9822412907313756e-05, "loss": 0.40610629320144653, "step": 435 }, { "epoch": 0.21549487211170146, "grad_norm": 1.0647698522638835, "learning_rate": 1.9820876645311847e-05, "loss": 0.4181024432182312, "step": 436 }, { "epoch": 0.21598912640553564, "grad_norm": 0.9101041422869367, "learning_rate": 1.981933382703301e-05, "loss": 0.39591747522354126, "step": 437 }, { "epoch": 0.21648338069936982, "grad_norm": 1.0250837449595331, "learning_rate": 1.9817784453507215e-05, "loss": 0.4326947033405304, "step": 438 }, { "epoch": 0.216977634993204, "grad_norm": 1.0886150838818542, "learning_rate": 1.98162285257688e-05, "loss": 0.42645522952079773, "step": 439 }, { "epoch": 0.21747188928703817, "grad_norm": 0.978930417047399, "learning_rate": 1.9814666044856472e-05, "loss": 0.37372538447380066, "step": 440 }, { "epoch": 0.21796614358087235, "grad_norm": 1.0917263900138416, "learning_rate": 1.9813097011813328e-05, "loss": 0.44066423177719116, "step": 441 }, { "epoch": 0.21846039787470653, "grad_norm": 0.9730835844652884, "learning_rate": 1.9811521427686833e-05, "loss": 0.39892369508743286, "step": 442 }, { "epoch": 0.2189546521685407, "grad_norm": 1.003964491264553, "learning_rate": 1.980993929352882e-05, "loss": 0.43497514724731445, "step": 443 }, { "epoch": 0.2194489064623749, "grad_norm": 0.9716014988350979, "learning_rate": 1.9808350610395504e-05, "loss": 0.3810148239135742, "step": 444 }, { "epoch": 0.21994316075620907, "grad_norm": 1.0156931642150575, "learning_rate": 1.9806755379347465e-05, "loss": 0.3952462673187256, "step": 445 }, { "epoch": 0.22043741505004324, "grad_norm": 0.8774607433571091, "learning_rate": 1.9805153601449655e-05, "loss": 0.39168232679367065, "step": 446 }, { "epoch": 0.22093166934387742, "grad_norm": 0.8991272209071992, "learning_rate": 1.98035452777714e-05, "loss": 0.38572901487350464, "step": 447 }, { "epoch": 0.2214259236377116, "grad_norm": 0.9468757778036829, "learning_rate": 1.980193040938639e-05, "loss": 0.40514758229255676, "step": 448 }, { "epoch": 0.22192017793154578, "grad_norm": 0.9858758484436677, "learning_rate": 1.9800308997372696e-05, "loss": 0.4289678931236267, "step": 449 }, { "epoch": 0.22241443222537996, "grad_norm": 1.074259689420517, "learning_rate": 1.979868104281274e-05, "loss": 0.4082314670085907, "step": 450 }, { "epoch": 0.22290868651921414, "grad_norm": 0.8691392363656588, "learning_rate": 1.979704654679333e-05, "loss": 0.3819827735424042, "step": 451 }, { "epoch": 0.22340294081304832, "grad_norm": 0.9538480526249539, "learning_rate": 1.979540551040563e-05, "loss": 0.42063748836517334, "step": 452 }, { "epoch": 0.2238971951068825, "grad_norm": 0.9510560747426838, "learning_rate": 1.9793757934745166e-05, "loss": 0.41634586453437805, "step": 453 }, { "epoch": 0.22439144940071667, "grad_norm": 0.9597511417746731, "learning_rate": 1.979210382091184e-05, "loss": 0.4151400625705719, "step": 454 }, { "epoch": 0.22488570369455085, "grad_norm": 0.9461794779595009, "learning_rate": 1.9790443170009918e-05, "loss": 0.40609729290008545, "step": 455 }, { "epoch": 0.22537995798838503, "grad_norm": 0.9000627758052128, "learning_rate": 1.9788775983148022e-05, "loss": 0.38967129588127136, "step": 456 }, { "epoch": 0.2258742122822192, "grad_norm": 0.9437292574418441, "learning_rate": 1.978710226143915e-05, "loss": 0.3833470940589905, "step": 457 }, { "epoch": 0.2263684665760534, "grad_norm": 1.0849111028533656, "learning_rate": 1.978542200600064e-05, "loss": 0.42918887734413147, "step": 458 }, { "epoch": 0.22686272086988757, "grad_norm": 0.8891911900981012, "learning_rate": 1.978373521795422e-05, "loss": 0.3793666660785675, "step": 459 }, { "epoch": 0.22735697516372175, "grad_norm": 0.9329571379921634, "learning_rate": 1.978204189842596e-05, "loss": 0.3885256350040436, "step": 460 }, { "epoch": 0.22785122945755593, "grad_norm": 0.9612859575938862, "learning_rate": 1.97803420485463e-05, "loss": 0.4003330171108246, "step": 461 }, { "epoch": 0.22834548375139008, "grad_norm": 1.0153934251086247, "learning_rate": 1.9778635669450026e-05, "loss": 0.4050712585449219, "step": 462 }, { "epoch": 0.22883973804522426, "grad_norm": 0.9955917551783842, "learning_rate": 1.9776922762276304e-05, "loss": 0.4003967046737671, "step": 463 }, { "epoch": 0.22933399233905843, "grad_norm": 1.0625378898456048, "learning_rate": 1.9775203328168643e-05, "loss": 0.4506968855857849, "step": 464 }, { "epoch": 0.2298282466328926, "grad_norm": 0.9586656507624374, "learning_rate": 1.9773477368274906e-05, "loss": 0.3947281241416931, "step": 465 }, { "epoch": 0.2303225009267268, "grad_norm": 1.0193199601021392, "learning_rate": 1.9771744883747326e-05, "loss": 0.4166758954524994, "step": 466 }, { "epoch": 0.23081675522056097, "grad_norm": 0.9824293606770813, "learning_rate": 1.9770005875742484e-05, "loss": 0.40400344133377075, "step": 467 }, { "epoch": 0.23131100951439515, "grad_norm": 0.9404029827561814, "learning_rate": 1.9768260345421312e-05, "loss": 0.4143296480178833, "step": 468 }, { "epoch": 0.23180526380822933, "grad_norm": 1.0496759638208417, "learning_rate": 1.976650829394911e-05, "loss": 0.39128193259239197, "step": 469 }, { "epoch": 0.2322995181020635, "grad_norm": 1.033325283396431, "learning_rate": 1.9764749722495514e-05, "loss": 0.4305758476257324, "step": 470 }, { "epoch": 0.23279377239589769, "grad_norm": 0.9791981730439014, "learning_rate": 1.9762984632234523e-05, "loss": 0.41711747646331787, "step": 471 }, { "epoch": 0.23328802668973186, "grad_norm": 0.9590482451910926, "learning_rate": 1.976121302434449e-05, "loss": 0.43328845500946045, "step": 472 }, { "epoch": 0.23378228098356604, "grad_norm": 0.9134750069589276, "learning_rate": 1.975943490000811e-05, "loss": 0.38707420229911804, "step": 473 }, { "epoch": 0.23427653527740022, "grad_norm": 0.9896782154106246, "learning_rate": 1.9757650260412438e-05, "loss": 0.390054851770401, "step": 474 }, { "epoch": 0.2347707895712344, "grad_norm": 1.0430972668852745, "learning_rate": 1.9755859106748875e-05, "loss": 0.45697346329689026, "step": 475 }, { "epoch": 0.23526504386506858, "grad_norm": 0.950214634248398, "learning_rate": 1.9754061440213165e-05, "loss": 0.4381307363510132, "step": 476 }, { "epoch": 0.23575929815890276, "grad_norm": 0.9612066818802636, "learning_rate": 1.9752257262005403e-05, "loss": 0.4217841625213623, "step": 477 }, { "epoch": 0.23625355245273694, "grad_norm": 0.8699003234814695, "learning_rate": 1.9750446573330038e-05, "loss": 0.35968005657196045, "step": 478 }, { "epoch": 0.23674780674657112, "grad_norm": 0.8353290173002438, "learning_rate": 1.9748629375395856e-05, "loss": 0.3516439199447632, "step": 479 }, { "epoch": 0.2372420610404053, "grad_norm": 0.9683111499165196, "learning_rate": 1.9746805669415995e-05, "loss": 0.4078671634197235, "step": 480 }, { "epoch": 0.23773631533423947, "grad_norm": 0.967434671965903, "learning_rate": 1.9744975456607936e-05, "loss": 0.39654213190078735, "step": 481 }, { "epoch": 0.23823056962807365, "grad_norm": 0.9446129798331165, "learning_rate": 1.9743138738193498e-05, "loss": 0.41271698474884033, "step": 482 }, { "epoch": 0.23872482392190783, "grad_norm": 0.9563785743614732, "learning_rate": 1.974129551539885e-05, "loss": 0.3957251310348511, "step": 483 }, { "epoch": 0.239219078215742, "grad_norm": 1.0318067283466978, "learning_rate": 1.9739445789454506e-05, "loss": 0.39857393503189087, "step": 484 }, { "epoch": 0.2397133325095762, "grad_norm": 0.9625937520590958, "learning_rate": 1.973758956159531e-05, "loss": 0.4263526499271393, "step": 485 }, { "epoch": 0.24020758680341037, "grad_norm": 0.9782583924092142, "learning_rate": 1.9735726833060457e-05, "loss": 0.3849489688873291, "step": 486 }, { "epoch": 0.24070184109724455, "grad_norm": 0.9932149128826128, "learning_rate": 1.9733857605093476e-05, "loss": 0.431019127368927, "step": 487 }, { "epoch": 0.2411960953910787, "grad_norm": 0.9703866882534654, "learning_rate": 1.973198187894224e-05, "loss": 0.3740619421005249, "step": 488 }, { "epoch": 0.24169034968491288, "grad_norm": 0.9420951155788563, "learning_rate": 1.9730099655858953e-05, "loss": 0.361680269241333, "step": 489 }, { "epoch": 0.24218460397874705, "grad_norm": 1.0045147685747362, "learning_rate": 1.9728210937100162e-05, "loss": 0.41683071851730347, "step": 490 }, { "epoch": 0.24267885827258123, "grad_norm": 1.0255058564946795, "learning_rate": 1.9726315723926746e-05, "loss": 0.3898739516735077, "step": 491 }, { "epoch": 0.2431731125664154, "grad_norm": 0.992746780987763, "learning_rate": 1.9724414017603925e-05, "loss": 0.39339032769203186, "step": 492 }, { "epoch": 0.2436673668602496, "grad_norm": 0.9018262406248393, "learning_rate": 1.9722505819401255e-05, "loss": 0.401676744222641, "step": 493 }, { "epoch": 0.24416162115408377, "grad_norm": 0.956392375337736, "learning_rate": 1.9720591130592613e-05, "loss": 0.3814789056777954, "step": 494 }, { "epoch": 0.24465587544791795, "grad_norm": 1.0339059816881517, "learning_rate": 1.9718669952456226e-05, "loss": 0.3980346918106079, "step": 495 }, { "epoch": 0.24515012974175213, "grad_norm": 1.0852693818985448, "learning_rate": 1.971674228627464e-05, "loss": 0.4222795069217682, "step": 496 }, { "epoch": 0.2456443840355863, "grad_norm": 0.9629746856387489, "learning_rate": 1.971480813333474e-05, "loss": 0.3795197904109955, "step": 497 }, { "epoch": 0.24613863832942048, "grad_norm": 1.0428831707745134, "learning_rate": 1.971286749492774e-05, "loss": 0.3746161460876465, "step": 498 }, { "epoch": 0.24663289262325466, "grad_norm": 1.0211942338953277, "learning_rate": 1.9710920372349174e-05, "loss": 0.3552350699901581, "step": 499 }, { "epoch": 0.24712714691708884, "grad_norm": 0.913724645727759, "learning_rate": 1.9708966766898925e-05, "loss": 0.39690741896629333, "step": 500 }, { "epoch": 0.24762140121092302, "grad_norm": 1.0179277636972188, "learning_rate": 1.9707006679881186e-05, "loss": 0.39530014991760254, "step": 501 }, { "epoch": 0.2481156555047572, "grad_norm": 1.0722850381631455, "learning_rate": 1.9705040112604483e-05, "loss": 0.41228705644607544, "step": 502 }, { "epoch": 0.24860990979859138, "grad_norm": 0.9774177098582278, "learning_rate": 1.9703067066381668e-05, "loss": 0.4330476224422455, "step": 503 }, { "epoch": 0.24910416409242556, "grad_norm": 0.9849824106564479, "learning_rate": 1.970108754252992e-05, "loss": 0.38365668058395386, "step": 504 }, { "epoch": 0.24959841838625973, "grad_norm": 1.0789440281177851, "learning_rate": 1.969910154237074e-05, "loss": 0.4419581890106201, "step": 505 }, { "epoch": 0.2500926726800939, "grad_norm": 1.0828116066497757, "learning_rate": 1.9697109067229957e-05, "loss": 0.38741230964660645, "step": 506 }, { "epoch": 0.2505869269739281, "grad_norm": 0.9914523280251673, "learning_rate": 1.969511011843771e-05, "loss": 0.41751983761787415, "step": 507 }, { "epoch": 0.25108118126776224, "grad_norm": 0.9718169799013945, "learning_rate": 1.9693104697328477e-05, "loss": 0.40355241298675537, "step": 508 }, { "epoch": 0.25157543556159645, "grad_norm": 1.003225231520968, "learning_rate": 1.9691092805241046e-05, "loss": 0.3511045575141907, "step": 509 }, { "epoch": 0.2520696898554306, "grad_norm": 1.1208960250871327, "learning_rate": 1.9689074443518526e-05, "loss": 0.38917112350463867, "step": 510 }, { "epoch": 0.2525639441492648, "grad_norm": 0.9640213098912707, "learning_rate": 1.968704961350835e-05, "loss": 0.40256473422050476, "step": 511 }, { "epoch": 0.25305819844309896, "grad_norm": 0.8857886708710384, "learning_rate": 1.968501831656226e-05, "loss": 0.32350897789001465, "step": 512 }, { "epoch": 0.25355245273693316, "grad_norm": 1.0209548318094466, "learning_rate": 1.9682980554036322e-05, "loss": 0.36787012219429016, "step": 513 }, { "epoch": 0.2540467070307673, "grad_norm": 1.063374274844625, "learning_rate": 1.9680936327290924e-05, "loss": 0.4035605490207672, "step": 514 }, { "epoch": 0.2545409613246015, "grad_norm": 0.9437423188361623, "learning_rate": 1.9678885637690755e-05, "loss": 0.39402660727500916, "step": 515 }, { "epoch": 0.2550352156184357, "grad_norm": 1.1793476229973228, "learning_rate": 1.967682848660483e-05, "loss": 0.37553271651268005, "step": 516 }, { "epoch": 0.2555294699122699, "grad_norm": 1.047789732428987, "learning_rate": 1.9674764875406472e-05, "loss": 0.40148675441741943, "step": 517 }, { "epoch": 0.25602372420610403, "grad_norm": 1.1994265366678782, "learning_rate": 1.967269480547332e-05, "loss": 0.45255252718925476, "step": 518 }, { "epoch": 0.25651797849993824, "grad_norm": 1.0116666478277523, "learning_rate": 1.9670618278187318e-05, "loss": 0.4183574616909027, "step": 519 }, { "epoch": 0.2570122327937724, "grad_norm": 0.9518606397664687, "learning_rate": 1.9668535294934733e-05, "loss": 0.3950796127319336, "step": 520 }, { "epoch": 0.2575064870876066, "grad_norm": 0.9729673190351172, "learning_rate": 1.9666445857106132e-05, "loss": 0.4062424898147583, "step": 521 }, { "epoch": 0.25800074138144075, "grad_norm": 0.9474577180562711, "learning_rate": 1.966434996609639e-05, "loss": 0.4095906913280487, "step": 522 }, { "epoch": 0.25849499567527495, "grad_norm": 1.1739974412660419, "learning_rate": 1.96622476233047e-05, "loss": 0.42302393913269043, "step": 523 }, { "epoch": 0.2589892499691091, "grad_norm": 1.0746371790844444, "learning_rate": 1.966013883013455e-05, "loss": 0.43204039335250854, "step": 524 }, { "epoch": 0.2594835042629433, "grad_norm": 0.9744852361980706, "learning_rate": 1.9658023587993748e-05, "loss": 0.39941906929016113, "step": 525 }, { "epoch": 0.25997775855677746, "grad_norm": 0.9322675006976836, "learning_rate": 1.9655901898294397e-05, "loss": 0.37053728103637695, "step": 526 }, { "epoch": 0.26047201285061167, "grad_norm": 0.9500036404091089, "learning_rate": 1.96537737624529e-05, "loss": 0.4126317501068115, "step": 527 }, { "epoch": 0.2609662671444458, "grad_norm": 0.9592560956850021, "learning_rate": 1.9651639181889975e-05, "loss": 0.42397794127464294, "step": 528 }, { "epoch": 0.26146052143827997, "grad_norm": 1.09730750123291, "learning_rate": 1.964949815803064e-05, "loss": 0.3606872260570526, "step": 529 }, { "epoch": 0.2619547757321142, "grad_norm": 1.0256203362936218, "learning_rate": 1.9647350692304206e-05, "loss": 0.420923113822937, "step": 530 }, { "epoch": 0.2624490300259483, "grad_norm": 1.0242401280009386, "learning_rate": 1.9645196786144298e-05, "loss": 0.41700440645217896, "step": 531 }, { "epoch": 0.26294328431978253, "grad_norm": 0.9861507549209962, "learning_rate": 1.9643036440988825e-05, "loss": 0.3961814045906067, "step": 532 }, { "epoch": 0.2634375386136167, "grad_norm": 0.9400998714081333, "learning_rate": 1.9640869658280005e-05, "loss": 0.4025250971317291, "step": 533 }, { "epoch": 0.2639317929074509, "grad_norm": 1.0201682019086518, "learning_rate": 1.9638696439464357e-05, "loss": 0.38828611373901367, "step": 534 }, { "epoch": 0.26442604720128504, "grad_norm": 0.8944214314341241, "learning_rate": 1.963651678599268e-05, "loss": 0.3109109401702881, "step": 535 }, { "epoch": 0.26492030149511925, "grad_norm": 1.0758326810562073, "learning_rate": 1.963433069932009e-05, "loss": 0.41516438126564026, "step": 536 }, { "epoch": 0.2654145557889534, "grad_norm": 0.972035022615468, "learning_rate": 1.9632138180905982e-05, "loss": 0.3765295743942261, "step": 537 }, { "epoch": 0.2659088100827876, "grad_norm": 1.0590611315407708, "learning_rate": 1.9629939232214052e-05, "loss": 0.37631309032440186, "step": 538 }, { "epoch": 0.26640306437662176, "grad_norm": 0.9543257606304313, "learning_rate": 1.9627733854712286e-05, "loss": 0.3640018403530121, "step": 539 }, { "epoch": 0.26689731867045596, "grad_norm": 1.0213174253270256, "learning_rate": 1.9625522049872962e-05, "loss": 0.3971521854400635, "step": 540 }, { "epoch": 0.2673915729642901, "grad_norm": 1.0059131210770185, "learning_rate": 1.962330381917265e-05, "loss": 0.4218612313270569, "step": 541 }, { "epoch": 0.2678858272581243, "grad_norm": 1.0124871124462342, "learning_rate": 1.9621079164092203e-05, "loss": 0.38814622163772583, "step": 542 }, { "epoch": 0.26838008155195847, "grad_norm": 1.0310689772428585, "learning_rate": 1.961884808611678e-05, "loss": 0.3912709355354309, "step": 543 }, { "epoch": 0.2688743358457927, "grad_norm": 0.9919097213748044, "learning_rate": 1.9616610586735808e-05, "loss": 0.4007106423377991, "step": 544 }, { "epoch": 0.26936859013962683, "grad_norm": 0.9871985402956727, "learning_rate": 1.9614366667443016e-05, "loss": 0.37406057119369507, "step": 545 }, { "epoch": 0.26986284443346104, "grad_norm": 0.970768236440829, "learning_rate": 1.961211632973641e-05, "loss": 0.4187811613082886, "step": 546 }, { "epoch": 0.2703570987272952, "grad_norm": 1.049304525520643, "learning_rate": 1.960985957511828e-05, "loss": 0.44418057799339294, "step": 547 }, { "epoch": 0.2708513530211294, "grad_norm": 1.0048719478421346, "learning_rate": 1.9607596405095205e-05, "loss": 0.41016438603401184, "step": 548 }, { "epoch": 0.27134560731496354, "grad_norm": 1.2563417457062223, "learning_rate": 1.9605326821178047e-05, "loss": 0.39461439847946167, "step": 549 }, { "epoch": 0.27183986160879775, "grad_norm": 0.9443238609304102, "learning_rate": 1.960305082488195e-05, "loss": 0.4159786105155945, "step": 550 }, { "epoch": 0.2723341159026319, "grad_norm": 0.9387957037755528, "learning_rate": 1.960076841772633e-05, "loss": 0.3702941834926605, "step": 551 }, { "epoch": 0.2728283701964661, "grad_norm": 1.0745575617770338, "learning_rate": 1.9598479601234894e-05, "loss": 0.3482900559902191, "step": 552 }, { "epoch": 0.27332262449030026, "grad_norm": 1.1412061517783256, "learning_rate": 1.9596184376935618e-05, "loss": 0.40550655126571655, "step": 553 }, { "epoch": 0.2738168787841344, "grad_norm": 0.9446073244587436, "learning_rate": 1.9593882746360767e-05, "loss": 0.38604867458343506, "step": 554 }, { "epoch": 0.2743111330779686, "grad_norm": 0.9388567147005249, "learning_rate": 1.9591574711046876e-05, "loss": 0.36586758494377136, "step": 555 }, { "epoch": 0.27480538737180277, "grad_norm": 0.9730414125092071, "learning_rate": 1.958926027253475e-05, "loss": 0.37780559062957764, "step": 556 }, { "epoch": 0.275299641665637, "grad_norm": 0.9401659835761762, "learning_rate": 1.9586939432369486e-05, "loss": 0.3837544322013855, "step": 557 }, { "epoch": 0.2757938959594711, "grad_norm": 1.038905164013387, "learning_rate": 1.9584612192100433e-05, "loss": 0.39425861835479736, "step": 558 }, { "epoch": 0.27628815025330533, "grad_norm": 1.0791545750316935, "learning_rate": 1.958227855328123e-05, "loss": 0.4008832275867462, "step": 559 }, { "epoch": 0.2767824045471395, "grad_norm": 1.0509839705522974, "learning_rate": 1.957993851746978e-05, "loss": 0.42411595582962036, "step": 560 }, { "epoch": 0.2772766588409737, "grad_norm": 1.1626138880546706, "learning_rate": 1.9577592086228257e-05, "loss": 0.4028055965900421, "step": 561 }, { "epoch": 0.27777091313480784, "grad_norm": 0.9383996498843509, "learning_rate": 1.9575239261123102e-05, "loss": 0.3785157799720764, "step": 562 }, { "epoch": 0.27826516742864205, "grad_norm": 0.9289370196839293, "learning_rate": 1.9572880043725032e-05, "loss": 0.3726264536380768, "step": 563 }, { "epoch": 0.2787594217224762, "grad_norm": 0.9959287145902769, "learning_rate": 1.957051443560902e-05, "loss": 0.37261486053466797, "step": 564 }, { "epoch": 0.2792536760163104, "grad_norm": 0.9394373844868922, "learning_rate": 1.956814243835432e-05, "loss": 0.34781068563461304, "step": 565 }, { "epoch": 0.27974793031014455, "grad_norm": 0.9899407389551799, "learning_rate": 1.956576405354444e-05, "loss": 0.3828197121620178, "step": 566 }, { "epoch": 0.28024218460397876, "grad_norm": 0.9387592741594649, "learning_rate": 1.9563379282767156e-05, "loss": 0.3839726150035858, "step": 567 }, { "epoch": 0.2807364388978129, "grad_norm": 1.053498529947078, "learning_rate": 1.9560988127614507e-05, "loss": 0.3658025562763214, "step": 568 }, { "epoch": 0.2812306931916471, "grad_norm": 1.064206434015044, "learning_rate": 1.9558590589682795e-05, "loss": 0.400045782327652, "step": 569 }, { "epoch": 0.28172494748548127, "grad_norm": 0.9470530474737298, "learning_rate": 1.955618667057258e-05, "loss": 0.36586880683898926, "step": 570 }, { "epoch": 0.2822192017793155, "grad_norm": 1.0137760854012388, "learning_rate": 1.9553776371888684e-05, "loss": 0.3886389136314392, "step": 571 }, { "epoch": 0.2827134560731496, "grad_norm": 1.0159520278130145, "learning_rate": 1.955135969524019e-05, "loss": 0.37858110666275024, "step": 572 }, { "epoch": 0.28320771036698383, "grad_norm": 0.939134880585939, "learning_rate": 1.9548936642240435e-05, "loss": 0.3264877498149872, "step": 573 }, { "epoch": 0.283701964660818, "grad_norm": 1.1465399296789363, "learning_rate": 1.9546507214507017e-05, "loss": 0.3756924569606781, "step": 574 }, { "epoch": 0.2841962189546522, "grad_norm": 1.0922050133590595, "learning_rate": 1.9544071413661783e-05, "loss": 0.3773806691169739, "step": 575 }, { "epoch": 0.28469047324848634, "grad_norm": 1.0432958526312845, "learning_rate": 1.9541629241330842e-05, "loss": 0.37437382340431213, "step": 576 }, { "epoch": 0.28518472754232055, "grad_norm": 0.9730241652440514, "learning_rate": 1.9539180699144552e-05, "loss": 0.3835929036140442, "step": 577 }, { "epoch": 0.2856789818361547, "grad_norm": 1.2039096391780213, "learning_rate": 1.9536725788737528e-05, "loss": 0.39163681864738464, "step": 578 }, { "epoch": 0.2861732361299889, "grad_norm": 1.1007303408462066, "learning_rate": 1.953426451174863e-05, "loss": 0.39241698384284973, "step": 579 }, { "epoch": 0.28666749042382306, "grad_norm": 0.9748115984741068, "learning_rate": 1.953179686982097e-05, "loss": 0.32731348276138306, "step": 580 }, { "epoch": 0.2871617447176572, "grad_norm": 0.9649406632940735, "learning_rate": 1.9529322864601915e-05, "loss": 0.34735041856765747, "step": 581 }, { "epoch": 0.2876559990114914, "grad_norm": 1.0831552948058796, "learning_rate": 1.952684249774307e-05, "loss": 0.3795308470726013, "step": 582 }, { "epoch": 0.28815025330532557, "grad_norm": 1.0599543241474398, "learning_rate": 1.95243557709003e-05, "loss": 0.3546086549758911, "step": 583 }, { "epoch": 0.28864450759915977, "grad_norm": 0.9634030800835625, "learning_rate": 1.9521862685733703e-05, "loss": 0.35397839546203613, "step": 584 }, { "epoch": 0.2891387618929939, "grad_norm": 0.972134968680729, "learning_rate": 1.9519363243907627e-05, "loss": 0.350521981716156, "step": 585 }, { "epoch": 0.28963301618682813, "grad_norm": 1.0201322204570258, "learning_rate": 1.9516857447090663e-05, "loss": 0.380625456571579, "step": 586 }, { "epoch": 0.2901272704806623, "grad_norm": 0.9847688200101109, "learning_rate": 1.9514345296955647e-05, "loss": 0.40378236770629883, "step": 587 }, { "epoch": 0.2906215247744965, "grad_norm": 1.0122113576142937, "learning_rate": 1.9511826795179653e-05, "loss": 0.4050450325012207, "step": 588 }, { "epoch": 0.29111577906833064, "grad_norm": 1.048628562831542, "learning_rate": 1.9509301943444e-05, "loss": 0.3772329092025757, "step": 589 }, { "epoch": 0.29161003336216484, "grad_norm": 1.0803687765146506, "learning_rate": 1.9506770743434244e-05, "loss": 0.4079870581626892, "step": 590 }, { "epoch": 0.292104287655999, "grad_norm": 1.0069688403525805, "learning_rate": 1.950423319684017e-05, "loss": 0.4233503043651581, "step": 591 }, { "epoch": 0.2925985419498332, "grad_norm": 1.0403594154189246, "learning_rate": 1.9501689305355814e-05, "loss": 0.395530104637146, "step": 592 }, { "epoch": 0.29309279624366735, "grad_norm": 1.0468686113369423, "learning_rate": 1.949913907067944e-05, "loss": 0.4266175925731659, "step": 593 }, { "epoch": 0.29358705053750156, "grad_norm": 1.0371386643985676, "learning_rate": 1.949658249451355e-05, "loss": 0.4428660571575165, "step": 594 }, { "epoch": 0.2940813048313357, "grad_norm": 0.928511699803538, "learning_rate": 1.9494019578564874e-05, "loss": 0.36831945180892944, "step": 595 }, { "epoch": 0.2945755591251699, "grad_norm": 1.059362576098806, "learning_rate": 1.949145032454438e-05, "loss": 0.392259806394577, "step": 596 }, { "epoch": 0.29506981341900407, "grad_norm": 0.9638882642169329, "learning_rate": 1.948887473416727e-05, "loss": 0.43743032217025757, "step": 597 }, { "epoch": 0.2955640677128383, "grad_norm": 0.9566828851720006, "learning_rate": 1.9486292809152965e-05, "loss": 0.3725258409976959, "step": 598 }, { "epoch": 0.2960583220066724, "grad_norm": 0.9479087116485218, "learning_rate": 1.948370455122512e-05, "loss": 0.39507436752319336, "step": 599 }, { "epoch": 0.29655257630050663, "grad_norm": 1.0509892705512045, "learning_rate": 1.9481109962111623e-05, "loss": 0.40915870666503906, "step": 600 }, { "epoch": 0.2970468305943408, "grad_norm": 0.9528830591600533, "learning_rate": 1.947850904354459e-05, "loss": 0.3465006351470947, "step": 601 }, { "epoch": 0.297541084888175, "grad_norm": 1.0989483899383072, "learning_rate": 1.9475901797260346e-05, "loss": 0.4205567538738251, "step": 602 }, { "epoch": 0.29803533918200914, "grad_norm": 0.9986003576186586, "learning_rate": 1.9473288224999455e-05, "loss": 0.37682560086250305, "step": 603 }, { "epoch": 0.29852959347584335, "grad_norm": 1.0393693996744362, "learning_rate": 1.9470668328506705e-05, "loss": 0.3865458369255066, "step": 604 }, { "epoch": 0.2990238477696775, "grad_norm": 0.9426218637426483, "learning_rate": 1.9468042109531096e-05, "loss": 0.36366063356399536, "step": 605 }, { "epoch": 0.2995181020635117, "grad_norm": 0.9801320950707162, "learning_rate": 1.9465409569825857e-05, "loss": 0.3861471116542816, "step": 606 }, { "epoch": 0.30001235635734586, "grad_norm": 1.0257103381374684, "learning_rate": 1.9462770711148433e-05, "loss": 0.3499199151992798, "step": 607 }, { "epoch": 0.30050661065118, "grad_norm": 1.1030346241860873, "learning_rate": 1.946012553526049e-05, "loss": 0.3704417943954468, "step": 608 }, { "epoch": 0.3010008649450142, "grad_norm": 1.0751948386377395, "learning_rate": 1.9457474043927908e-05, "loss": 0.41278937458992004, "step": 609 }, { "epoch": 0.30149511923884836, "grad_norm": 1.0379271128545955, "learning_rate": 1.9454816238920787e-05, "loss": 0.36078256368637085, "step": 610 }, { "epoch": 0.30198937353268257, "grad_norm": 1.05890389444684, "learning_rate": 1.9452152122013434e-05, "loss": 0.3713051676750183, "step": 611 }, { "epoch": 0.3024836278265167, "grad_norm": 1.0547983951495754, "learning_rate": 1.9449481694984382e-05, "loss": 0.3919684886932373, "step": 612 }, { "epoch": 0.3029778821203509, "grad_norm": 1.1211767888578545, "learning_rate": 1.9446804959616364e-05, "loss": 0.4249044358730316, "step": 613 }, { "epoch": 0.3034721364141851, "grad_norm": 1.0386798112962086, "learning_rate": 1.9444121917696335e-05, "loss": 0.4033172130584717, "step": 614 }, { "epoch": 0.3039663907080193, "grad_norm": 1.020453301484689, "learning_rate": 1.9441432571015455e-05, "loss": 0.35740789771080017, "step": 615 }, { "epoch": 0.30446064500185344, "grad_norm": 1.0567402195641693, "learning_rate": 1.9438736921369093e-05, "loss": 0.41219189763069153, "step": 616 }, { "epoch": 0.30495489929568764, "grad_norm": 1.0356137182677312, "learning_rate": 1.9436034970556824e-05, "loss": 0.3751283884048462, "step": 617 }, { "epoch": 0.3054491535895218, "grad_norm": 1.0460808776118622, "learning_rate": 1.9433326720382433e-05, "loss": 0.40294593572616577, "step": 618 }, { "epoch": 0.305943407883356, "grad_norm": 1.0087358245362568, "learning_rate": 1.943061217265391e-05, "loss": 0.4163772463798523, "step": 619 }, { "epoch": 0.30643766217719015, "grad_norm": 1.007467123707354, "learning_rate": 1.9427891329183444e-05, "loss": 0.3796529769897461, "step": 620 }, { "epoch": 0.30693191647102436, "grad_norm": 1.0905533067383615, "learning_rate": 1.942516419178744e-05, "loss": 0.44097092747688293, "step": 621 }, { "epoch": 0.3074261707648585, "grad_norm": 0.9615172689674734, "learning_rate": 1.942243076228649e-05, "loss": 0.384232759475708, "step": 622 }, { "epoch": 0.3079204250586927, "grad_norm": 0.9038435200954008, "learning_rate": 1.941969104250539e-05, "loss": 0.3734084367752075, "step": 623 }, { "epoch": 0.30841467935252687, "grad_norm": 0.9414597847653995, "learning_rate": 1.9416945034273142e-05, "loss": 0.3532239496707916, "step": 624 }, { "epoch": 0.3089089336463611, "grad_norm": 1.0668895366566058, "learning_rate": 1.941419273942294e-05, "loss": 0.39430537819862366, "step": 625 }, { "epoch": 0.3094031879401952, "grad_norm": 1.0091341034087684, "learning_rate": 1.941143415979218e-05, "loss": 0.35790857672691345, "step": 626 }, { "epoch": 0.30989744223402943, "grad_norm": 1.0381854826035726, "learning_rate": 1.9408669297222446e-05, "loss": 0.3684060871601105, "step": 627 }, { "epoch": 0.3103916965278636, "grad_norm": 0.9553898295016832, "learning_rate": 1.9405898153559522e-05, "loss": 0.3425355553627014, "step": 628 }, { "epoch": 0.3108859508216978, "grad_norm": 0.9032294986887355, "learning_rate": 1.9403120730653387e-05, "loss": 0.3295109272003174, "step": 629 }, { "epoch": 0.31138020511553194, "grad_norm": 1.0576168899253493, "learning_rate": 1.940033703035821e-05, "loss": 0.37015989422798157, "step": 630 }, { "epoch": 0.31187445940936614, "grad_norm": 1.1361288169710941, "learning_rate": 1.939754705453234e-05, "loss": 0.40625980496406555, "step": 631 }, { "epoch": 0.3123687137032003, "grad_norm": 1.3354529260238757, "learning_rate": 1.939475080503833e-05, "loss": 0.42503830790519714, "step": 632 }, { "epoch": 0.31286296799703445, "grad_norm": 1.0863606838535078, "learning_rate": 1.939194828374292e-05, "loss": 0.36230289936065674, "step": 633 }, { "epoch": 0.31335722229086865, "grad_norm": 0.9800314584790245, "learning_rate": 1.938913949251703e-05, "loss": 0.4128720164299011, "step": 634 }, { "epoch": 0.3138514765847028, "grad_norm": 1.1018828002960295, "learning_rate": 1.938632443323577e-05, "loss": 0.39706575870513916, "step": 635 }, { "epoch": 0.314345730878537, "grad_norm": 1.0451325322820368, "learning_rate": 1.9383503107778434e-05, "loss": 0.38395214080810547, "step": 636 }, { "epoch": 0.31483998517237116, "grad_norm": 0.9669746428685202, "learning_rate": 1.9380675518028495e-05, "loss": 0.3629944324493408, "step": 637 }, { "epoch": 0.31533423946620537, "grad_norm": 1.0589959103814197, "learning_rate": 1.937784166587361e-05, "loss": 0.39474761486053467, "step": 638 }, { "epoch": 0.3158284937600395, "grad_norm": 1.085403264447479, "learning_rate": 1.9375001553205627e-05, "loss": 0.423098087310791, "step": 639 }, { "epoch": 0.3163227480538737, "grad_norm": 0.9239589256190138, "learning_rate": 1.937215518192056e-05, "loss": 0.3453904986381531, "step": 640 }, { "epoch": 0.3168170023477079, "grad_norm": 0.9432054956835023, "learning_rate": 1.9369302553918605e-05, "loss": 0.3659127354621887, "step": 641 }, { "epoch": 0.3173112566415421, "grad_norm": 1.060860081964917, "learning_rate": 1.9366443671104132e-05, "loss": 0.3613426089286804, "step": 642 }, { "epoch": 0.31780551093537623, "grad_norm": 0.9515218135636598, "learning_rate": 1.93635785353857e-05, "loss": 0.3556531071662903, "step": 643 }, { "epoch": 0.31829976522921044, "grad_norm": 0.9893630091198329, "learning_rate": 1.9360707148676022e-05, "loss": 0.3515596091747284, "step": 644 }, { "epoch": 0.3187940195230446, "grad_norm": 0.9802147109168395, "learning_rate": 1.9357829512892e-05, "loss": 0.36270469427108765, "step": 645 }, { "epoch": 0.3192882738168788, "grad_norm": 0.9936651325349853, "learning_rate": 1.9354945629954706e-05, "loss": 0.3617076277732849, "step": 646 }, { "epoch": 0.31978252811071295, "grad_norm": 1.0835943099678094, "learning_rate": 1.9352055501789376e-05, "loss": 0.3888331949710846, "step": 647 }, { "epoch": 0.32027678240454716, "grad_norm": 1.0454884563674065, "learning_rate": 1.9349159130325413e-05, "loss": 0.41199982166290283, "step": 648 }, { "epoch": 0.3207710366983813, "grad_norm": 1.0758693507529822, "learning_rate": 1.93462565174964e-05, "loss": 0.3878370225429535, "step": 649 }, { "epoch": 0.3212652909922155, "grad_norm": 1.0303850194409756, "learning_rate": 1.9343347665240077e-05, "loss": 0.380184531211853, "step": 650 }, { "epoch": 0.32175954528604966, "grad_norm": 1.143999159363527, "learning_rate": 1.9340432575498355e-05, "loss": 0.3746795356273651, "step": 651 }, { "epoch": 0.32225379957988387, "grad_norm": 1.0188863097829193, "learning_rate": 1.93375112502173e-05, "loss": 0.3700905442237854, "step": 652 }, { "epoch": 0.322748053873718, "grad_norm": 0.9032826115280742, "learning_rate": 1.9334583691347153e-05, "loss": 0.3331850469112396, "step": 653 }, { "epoch": 0.32324230816755223, "grad_norm": 0.949854268007892, "learning_rate": 1.933164990084231e-05, "loss": 0.3397464156150818, "step": 654 }, { "epoch": 0.3237365624613864, "grad_norm": 1.1199806793436613, "learning_rate": 1.9328709880661326e-05, "loss": 0.3837242126464844, "step": 655 }, { "epoch": 0.3242308167552206, "grad_norm": 1.0801449332087112, "learning_rate": 1.9325763632766916e-05, "loss": 0.38854193687438965, "step": 656 }, { "epoch": 0.32472507104905474, "grad_norm": 1.1330798719469783, "learning_rate": 1.9322811159125955e-05, "loss": 0.41792556643486023, "step": 657 }, { "epoch": 0.32521932534288894, "grad_norm": 0.9831880252943476, "learning_rate": 1.931985246170947e-05, "loss": 0.3968243896961212, "step": 658 }, { "epoch": 0.3257135796367231, "grad_norm": 1.0416971268065567, "learning_rate": 1.9316887542492645e-05, "loss": 0.41183531284332275, "step": 659 }, { "epoch": 0.32620783393055724, "grad_norm": 1.0367106782684, "learning_rate": 1.931391640345482e-05, "loss": 0.36057350039482117, "step": 660 }, { "epoch": 0.32670208822439145, "grad_norm": 1.0663955736026025, "learning_rate": 1.9310939046579482e-05, "loss": 0.36032363772392273, "step": 661 }, { "epoch": 0.3271963425182256, "grad_norm": 0.9657326304523917, "learning_rate": 1.9307955473854275e-05, "loss": 0.3682931363582611, "step": 662 }, { "epoch": 0.3276905968120598, "grad_norm": 1.004896861978755, "learning_rate": 1.9304965687270987e-05, "loss": 0.3829198181629181, "step": 663 }, { "epoch": 0.32818485110589396, "grad_norm": 1.0180253035605964, "learning_rate": 1.930196968882556e-05, "loss": 0.3901137709617615, "step": 664 }, { "epoch": 0.32867910539972817, "grad_norm": 0.9037607838463562, "learning_rate": 1.9298967480518077e-05, "loss": 0.34352344274520874, "step": 665 }, { "epoch": 0.3291733596935623, "grad_norm": 0.9918701152773953, "learning_rate": 1.9295959064352767e-05, "loss": 0.38822662830352783, "step": 666 }, { "epoch": 0.3296676139873965, "grad_norm": 0.9619347095581623, "learning_rate": 1.9292944442338013e-05, "loss": 0.3639586567878723, "step": 667 }, { "epoch": 0.3301618682812307, "grad_norm": 1.0248410702019595, "learning_rate": 1.9289923616486326e-05, "loss": 0.38537997007369995, "step": 668 }, { "epoch": 0.3306561225750649, "grad_norm": 0.9469693142742907, "learning_rate": 1.9286896588814373e-05, "loss": 0.3514263331890106, "step": 669 }, { "epoch": 0.33115037686889903, "grad_norm": 0.9776369401143131, "learning_rate": 1.928386336134295e-05, "loss": 0.3873803913593292, "step": 670 }, { "epoch": 0.33164463116273324, "grad_norm": 1.0063829461952047, "learning_rate": 1.9280823936096994e-05, "loss": 0.36644282937049866, "step": 671 }, { "epoch": 0.3321388854565674, "grad_norm": 0.8900960907324665, "learning_rate": 1.9277778315105587e-05, "loss": 0.34837427735328674, "step": 672 }, { "epoch": 0.3326331397504016, "grad_norm": 1.0946494998655654, "learning_rate": 1.927472650040194e-05, "loss": 0.3879021406173706, "step": 673 }, { "epoch": 0.33312739404423575, "grad_norm": 1.0256193203663788, "learning_rate": 1.9271668494023404e-05, "loss": 0.3753926753997803, "step": 674 }, { "epoch": 0.33362164833806995, "grad_norm": 1.1193381317991955, "learning_rate": 1.9268604298011454e-05, "loss": 0.35362815856933594, "step": 675 }, { "epoch": 0.3341159026319041, "grad_norm": 1.0612190451852097, "learning_rate": 1.926553391441171e-05, "loss": 0.3685564696788788, "step": 676 }, { "epoch": 0.3346101569257383, "grad_norm": 1.2837359031878948, "learning_rate": 1.926245734527391e-05, "loss": 0.42326927185058594, "step": 677 }, { "epoch": 0.33510441121957246, "grad_norm": 1.0247968871472715, "learning_rate": 1.925937459265193e-05, "loss": 0.35918861627578735, "step": 678 }, { "epoch": 0.33559866551340667, "grad_norm": 1.1358099673309532, "learning_rate": 1.9256285658603773e-05, "loss": 0.38703471422195435, "step": 679 }, { "epoch": 0.3360929198072408, "grad_norm": 1.0232813577835114, "learning_rate": 1.9253190545191567e-05, "loss": 0.3993009924888611, "step": 680 }, { "epoch": 0.336587174101075, "grad_norm": 1.178587285681796, "learning_rate": 1.9250089254481566e-05, "loss": 0.3998498320579529, "step": 681 }, { "epoch": 0.3370814283949092, "grad_norm": 1.0577657705862298, "learning_rate": 1.9246981788544145e-05, "loss": 0.37211501598358154, "step": 682 }, { "epoch": 0.3375756826887434, "grad_norm": 1.0126592857393306, "learning_rate": 1.9243868149453806e-05, "loss": 0.37204745411872864, "step": 683 }, { "epoch": 0.33806993698257753, "grad_norm": 0.9626025917248462, "learning_rate": 1.924074833928917e-05, "loss": 0.3784663677215576, "step": 684 }, { "epoch": 0.33856419127641174, "grad_norm": 1.0085796667337208, "learning_rate": 1.9237622360132975e-05, "loss": 0.4140951633453369, "step": 685 }, { "epoch": 0.3390584455702459, "grad_norm": 1.0251059918961796, "learning_rate": 1.9234490214072083e-05, "loss": 0.3723721504211426, "step": 686 }, { "epoch": 0.33955269986408004, "grad_norm": 1.0704762953012439, "learning_rate": 1.923135190319747e-05, "loss": 0.3714251220226288, "step": 687 }, { "epoch": 0.34004695415791425, "grad_norm": 2.286186750342226, "learning_rate": 1.9228207429604224e-05, "loss": 0.3551461696624756, "step": 688 }, { "epoch": 0.3405412084517484, "grad_norm": 1.0184392375158444, "learning_rate": 1.9225056795391554e-05, "loss": 0.3543378412723541, "step": 689 }, { "epoch": 0.3410354627455826, "grad_norm": 0.9670805241747071, "learning_rate": 1.922190000266278e-05, "loss": 0.3405894935131073, "step": 690 }, { "epoch": 0.34152971703941676, "grad_norm": 1.0375943311061684, "learning_rate": 1.9218737053525324e-05, "loss": 0.36478808522224426, "step": 691 }, { "epoch": 0.34202397133325096, "grad_norm": 1.036881907490894, "learning_rate": 1.9215567950090734e-05, "loss": 0.39778709411621094, "step": 692 }, { "epoch": 0.3425182256270851, "grad_norm": 0.9719804294561131, "learning_rate": 1.9212392694474654e-05, "loss": 0.3553788661956787, "step": 693 }, { "epoch": 0.3430124799209193, "grad_norm": 1.0265620111261864, "learning_rate": 1.920921128879684e-05, "loss": 0.3393115997314453, "step": 694 }, { "epoch": 0.3435067342147535, "grad_norm": 1.2003228723584403, "learning_rate": 1.9206023735181154e-05, "loss": 0.4240456819534302, "step": 695 }, { "epoch": 0.3440009885085877, "grad_norm": 1.0687040296992496, "learning_rate": 1.920283003575556e-05, "loss": 0.3451164960861206, "step": 696 }, { "epoch": 0.34449524280242183, "grad_norm": 1.0859108204006387, "learning_rate": 1.919963019265213e-05, "loss": 0.4328063726425171, "step": 697 }, { "epoch": 0.34498949709625604, "grad_norm": 0.9953984300461581, "learning_rate": 1.9196424208007026e-05, "loss": 0.35965877771377563, "step": 698 }, { "epoch": 0.3454837513900902, "grad_norm": 1.0276560460371096, "learning_rate": 1.9193212083960522e-05, "loss": 0.40995267033576965, "step": 699 }, { "epoch": 0.3459780056839244, "grad_norm": 1.047717179086883, "learning_rate": 1.9189993822656984e-05, "loss": 0.373586505651474, "step": 700 }, { "epoch": 0.34647225997775855, "grad_norm": 0.967832395747722, "learning_rate": 1.918676942624488e-05, "loss": 0.3651657998561859, "step": 701 }, { "epoch": 0.34696651427159275, "grad_norm": 0.9154206667420104, "learning_rate": 1.918353889687677e-05, "loss": 0.3333090543746948, "step": 702 }, { "epoch": 0.3474607685654269, "grad_norm": 1.109347895406641, "learning_rate": 1.9180302236709312e-05, "loss": 0.444000780582428, "step": 703 }, { "epoch": 0.3479550228592611, "grad_norm": 0.9543494832625998, "learning_rate": 1.917705944790325e-05, "loss": 0.34942537546157837, "step": 704 }, { "epoch": 0.34844927715309526, "grad_norm": 1.206317081042567, "learning_rate": 1.9173810532623425e-05, "loss": 0.4709789752960205, "step": 705 }, { "epoch": 0.34894353144692947, "grad_norm": 1.0126287373930702, "learning_rate": 1.917055549303877e-05, "loss": 0.3615723252296448, "step": 706 }, { "epoch": 0.3494377857407636, "grad_norm": 0.98553805717422, "learning_rate": 1.9167294331322293e-05, "loss": 0.366035133600235, "step": 707 }, { "epoch": 0.3499320400345978, "grad_norm": 1.085095649211616, "learning_rate": 1.9164027049651105e-05, "loss": 0.3916548490524292, "step": 708 }, { "epoch": 0.350426294328432, "grad_norm": 1.0423550617328055, "learning_rate": 1.91607536502064e-05, "loss": 0.3752925992012024, "step": 709 }, { "epoch": 0.3509205486222662, "grad_norm": 1.0859051595052658, "learning_rate": 1.9157474135173448e-05, "loss": 0.3471261262893677, "step": 710 }, { "epoch": 0.35141480291610033, "grad_norm": 1.0011428490015388, "learning_rate": 1.9154188506741605e-05, "loss": 0.36898115277290344, "step": 711 }, { "epoch": 0.3519090572099345, "grad_norm": 1.0150877470647623, "learning_rate": 1.9150896767104315e-05, "loss": 0.38236287236213684, "step": 712 }, { "epoch": 0.3524033115037687, "grad_norm": 1.0813644645593066, "learning_rate": 1.9147598918459096e-05, "loss": 0.39260241389274597, "step": 713 }, { "epoch": 0.35289756579760284, "grad_norm": 0.988095993083205, "learning_rate": 1.9144294963007542e-05, "loss": 0.3699083626270294, "step": 714 }, { "epoch": 0.35339182009143705, "grad_norm": 0.9649609380548236, "learning_rate": 1.914098490295532e-05, "loss": 0.37720543146133423, "step": 715 }, { "epoch": 0.3538860743852712, "grad_norm": 0.8834082509396699, "learning_rate": 1.9137668740512195e-05, "loss": 0.298441082239151, "step": 716 }, { "epoch": 0.3543803286791054, "grad_norm": 0.996165149875045, "learning_rate": 1.913434647789197e-05, "loss": 0.3867550194263458, "step": 717 }, { "epoch": 0.35487458297293956, "grad_norm": 0.9824732772890364, "learning_rate": 1.913101811731256e-05, "loss": 0.37111300230026245, "step": 718 }, { "epoch": 0.35536883726677376, "grad_norm": 0.9874274570055057, "learning_rate": 1.9127683660995916e-05, "loss": 0.3922812342643738, "step": 719 }, { "epoch": 0.3558630915606079, "grad_norm": 1.0744489462576237, "learning_rate": 1.9124343111168077e-05, "loss": 0.3878915309906006, "step": 720 }, { "epoch": 0.3563573458544421, "grad_norm": 0.9551023310729483, "learning_rate": 1.9120996470059153e-05, "loss": 0.34974879026412964, "step": 721 }, { "epoch": 0.35685160014827627, "grad_norm": 1.0403250728390605, "learning_rate": 1.9117643739903306e-05, "loss": 0.38341426849365234, "step": 722 }, { "epoch": 0.3573458544421105, "grad_norm": 0.9876921724558848, "learning_rate": 1.9114284922938772e-05, "loss": 0.32610252499580383, "step": 723 }, { "epoch": 0.35784010873594463, "grad_norm": 1.0486464385186933, "learning_rate": 1.9110920021407855e-05, "loss": 0.37203550338745117, "step": 724 }, { "epoch": 0.35833436302977884, "grad_norm": 1.0809240289061282, "learning_rate": 1.9107549037556906e-05, "loss": 0.2954786419868469, "step": 725 }, { "epoch": 0.358828617323613, "grad_norm": 0.9795897601711951, "learning_rate": 1.9104171973636353e-05, "loss": 0.33074450492858887, "step": 726 }, { "epoch": 0.3593228716174472, "grad_norm": 1.0341587070514209, "learning_rate": 1.9100788831900676e-05, "loss": 0.350687712430954, "step": 727 }, { "epoch": 0.35981712591128134, "grad_norm": 1.143909518582956, "learning_rate": 1.9097399614608406e-05, "loss": 0.3635619878768921, "step": 728 }, { "epoch": 0.36031138020511555, "grad_norm": 1.0607740871884148, "learning_rate": 1.909400432402214e-05, "loss": 0.36409544944763184, "step": 729 }, { "epoch": 0.3608056344989497, "grad_norm": 1.069313873032721, "learning_rate": 1.9090602962408523e-05, "loss": 0.4109501540660858, "step": 730 }, { "epoch": 0.3612998887927839, "grad_norm": 1.0147750628685799, "learning_rate": 1.908719553203826e-05, "loss": 0.337943971157074, "step": 731 }, { "epoch": 0.36179414308661806, "grad_norm": 1.0957860180414656, "learning_rate": 1.9083782035186097e-05, "loss": 0.36411553621292114, "step": 732 }, { "epoch": 0.36228839738045227, "grad_norm": 1.1570738944902594, "learning_rate": 1.908036247413084e-05, "loss": 0.3513786494731903, "step": 733 }, { "epoch": 0.3627826516742864, "grad_norm": 1.156885907892102, "learning_rate": 1.907693685115534e-05, "loss": 0.4017047584056854, "step": 734 }, { "epoch": 0.3632769059681206, "grad_norm": 1.0932284273900412, "learning_rate": 1.907350516854649e-05, "loss": 0.3780835270881653, "step": 735 }, { "epoch": 0.3637711602619548, "grad_norm": 1.10688269569213, "learning_rate": 1.9070067428595234e-05, "loss": 0.35562777519226074, "step": 736 }, { "epoch": 0.364265414555789, "grad_norm": 1.0784034928358046, "learning_rate": 1.9066623633596556e-05, "loss": 0.34880492091178894, "step": 737 }, { "epoch": 0.36475966884962313, "grad_norm": 1.1213824671894879, "learning_rate": 1.9063173785849488e-05, "loss": 0.3798677921295166, "step": 738 }, { "epoch": 0.3652539231434573, "grad_norm": 1.0300538330170659, "learning_rate": 1.9059717887657098e-05, "loss": 0.371119886636734, "step": 739 }, { "epoch": 0.3657481774372915, "grad_norm": 1.075537593372937, "learning_rate": 1.9056255941326497e-05, "loss": 0.3845891058444977, "step": 740 }, { "epoch": 0.36624243173112564, "grad_norm": 1.0460904589757556, "learning_rate": 1.9052787949168823e-05, "loss": 0.34627166390419006, "step": 741 }, { "epoch": 0.36673668602495985, "grad_norm": 1.0588032623720978, "learning_rate": 1.9049313913499266e-05, "loss": 0.3872081935405731, "step": 742 }, { "epoch": 0.367230940318794, "grad_norm": 1.0173727289332204, "learning_rate": 1.9045833836637038e-05, "loss": 0.40446269512176514, "step": 743 }, { "epoch": 0.3677251946126282, "grad_norm": 0.9672045860873493, "learning_rate": 1.904234772090539e-05, "loss": 0.3421085476875305, "step": 744 }, { "epoch": 0.36821944890646235, "grad_norm": 0.9886363928023795, "learning_rate": 1.90388555686316e-05, "loss": 0.3626730442047119, "step": 745 }, { "epoch": 0.36871370320029656, "grad_norm": 0.9308335236520315, "learning_rate": 1.9035357382146984e-05, "loss": 0.338506281375885, "step": 746 }, { "epoch": 0.3692079574941307, "grad_norm": 1.010277605498289, "learning_rate": 1.903185316378688e-05, "loss": 0.3709959089756012, "step": 747 }, { "epoch": 0.3697022117879649, "grad_norm": 1.0369282663858728, "learning_rate": 1.9028342915890655e-05, "loss": 0.3804059624671936, "step": 748 }, { "epoch": 0.37019646608179907, "grad_norm": 1.0305613800678137, "learning_rate": 1.9024826640801694e-05, "loss": 0.3416539132595062, "step": 749 }, { "epoch": 0.3706907203756333, "grad_norm": 1.0119233680399335, "learning_rate": 1.9021304340867418e-05, "loss": 0.3642072081565857, "step": 750 }, { "epoch": 0.3711849746694674, "grad_norm": 0.9749783281253589, "learning_rate": 1.9017776018439267e-05, "loss": 0.35957199335098267, "step": 751 }, { "epoch": 0.37167922896330163, "grad_norm": 1.1539382067501942, "learning_rate": 1.9014241675872692e-05, "loss": 0.38497287034988403, "step": 752 }, { "epoch": 0.3721734832571358, "grad_norm": 1.1731793747690833, "learning_rate": 1.9010701315527173e-05, "loss": 0.40713614225387573, "step": 753 }, { "epoch": 0.37266773755097, "grad_norm": 1.0417857344342851, "learning_rate": 1.9007154939766196e-05, "loss": 0.35115551948547363, "step": 754 }, { "epoch": 0.37316199184480414, "grad_norm": 0.958988647508799, "learning_rate": 1.9003602550957284e-05, "loss": 0.3478096127510071, "step": 755 }, { "epoch": 0.37365624613863835, "grad_norm": 1.040896998789985, "learning_rate": 1.9000044151471956e-05, "loss": 0.36460641026496887, "step": 756 }, { "epoch": 0.3741505004324725, "grad_norm": 1.1161707385765272, "learning_rate": 1.8996479743685745e-05, "loss": 0.38015758991241455, "step": 757 }, { "epoch": 0.3746447547263067, "grad_norm": 1.1039269634713542, "learning_rate": 1.8992909329978202e-05, "loss": 0.35270214080810547, "step": 758 }, { "epoch": 0.37513900902014086, "grad_norm": 1.0025131869881447, "learning_rate": 1.8989332912732884e-05, "loss": 0.3875473439693451, "step": 759 }, { "epoch": 0.37563326331397506, "grad_norm": 1.0209812095079043, "learning_rate": 1.8985750494337353e-05, "loss": 0.3281819820404053, "step": 760 }, { "epoch": 0.3761275176078092, "grad_norm": 1.2490133288735825, "learning_rate": 1.8982162077183182e-05, "loss": 0.4081311821937561, "step": 761 }, { "epoch": 0.3766217719016434, "grad_norm": 1.2134865751354402, "learning_rate": 1.897856766366595e-05, "loss": 0.3546852469444275, "step": 762 }, { "epoch": 0.37711602619547757, "grad_norm": 0.9620958606777789, "learning_rate": 1.8974967256185234e-05, "loss": 0.3177235424518585, "step": 763 }, { "epoch": 0.3776102804893118, "grad_norm": 1.0401218813843935, "learning_rate": 1.8971360857144616e-05, "loss": 0.3739625811576843, "step": 764 }, { "epoch": 0.37810453478314593, "grad_norm": 0.9714277368627854, "learning_rate": 1.8967748468951673e-05, "loss": 0.32039010524749756, "step": 765 }, { "epoch": 0.3785987890769801, "grad_norm": 1.0178844258047104, "learning_rate": 1.8964130094017986e-05, "loss": 0.3237234354019165, "step": 766 }, { "epoch": 0.3790930433708143, "grad_norm": 1.0589536664735313, "learning_rate": 1.896050573475913e-05, "loss": 0.33864307403564453, "step": 767 }, { "epoch": 0.37958729766464844, "grad_norm": 1.076259010215984, "learning_rate": 1.8956875393594675e-05, "loss": 0.40412086248397827, "step": 768 }, { "epoch": 0.38008155195848264, "grad_norm": 1.049114130745209, "learning_rate": 1.8953239072948185e-05, "loss": 0.37689530849456787, "step": 769 }, { "epoch": 0.3805758062523168, "grad_norm": 1.1429748380406861, "learning_rate": 1.8949596775247215e-05, "loss": 0.3632664680480957, "step": 770 }, { "epoch": 0.381070060546151, "grad_norm": 1.0707340379824546, "learning_rate": 1.8945948502923314e-05, "loss": 0.384027361869812, "step": 771 }, { "epoch": 0.38156431483998515, "grad_norm": 1.0884709757767692, "learning_rate": 1.8942294258412012e-05, "loss": 0.37623292207717896, "step": 772 }, { "epoch": 0.38205856913381936, "grad_norm": 0.9918916696644151, "learning_rate": 1.8938634044152837e-05, "loss": 0.3449557423591614, "step": 773 }, { "epoch": 0.3825528234276535, "grad_norm": 1.0216495444427651, "learning_rate": 1.8934967862589287e-05, "loss": 0.37977170944213867, "step": 774 }, { "epoch": 0.3830470777214877, "grad_norm": 1.035626875821766, "learning_rate": 1.893129571616886e-05, "loss": 0.3535463809967041, "step": 775 }, { "epoch": 0.38354133201532187, "grad_norm": 0.9784961361645077, "learning_rate": 1.8927617607343024e-05, "loss": 0.3107556104660034, "step": 776 }, { "epoch": 0.3840355863091561, "grad_norm": 0.9647734455274504, "learning_rate": 1.8923933538567238e-05, "loss": 0.33028605580329895, "step": 777 }, { "epoch": 0.3845298406029902, "grad_norm": 1.0880250729774004, "learning_rate": 1.8920243512300925e-05, "loss": 0.35947421193122864, "step": 778 }, { "epoch": 0.38502409489682443, "grad_norm": 1.1225656593555045, "learning_rate": 1.89165475310075e-05, "loss": 0.36262935400009155, "step": 779 }, { "epoch": 0.3855183491906586, "grad_norm": 0.9595574558826961, "learning_rate": 1.8912845597154344e-05, "loss": 0.3441828489303589, "step": 780 }, { "epoch": 0.3860126034844928, "grad_norm": 1.1060761912194574, "learning_rate": 1.8909137713212813e-05, "loss": 0.3748928904533386, "step": 781 }, { "epoch": 0.38650685777832694, "grad_norm": 1.0401989681427097, "learning_rate": 1.8905423881658248e-05, "loss": 0.3571966588497162, "step": 782 }, { "epoch": 0.38700111207216115, "grad_norm": 1.0661600684644588, "learning_rate": 1.8901704104969937e-05, "loss": 0.3937920331954956, "step": 783 }, { "epoch": 0.3874953663659953, "grad_norm": 1.036207969764135, "learning_rate": 1.8897978385631157e-05, "loss": 0.3641708493232727, "step": 784 }, { "epoch": 0.3879896206598295, "grad_norm": 1.0259735566777997, "learning_rate": 1.8894246726129143e-05, "loss": 0.33510833978652954, "step": 785 }, { "epoch": 0.38848387495366365, "grad_norm": 1.0496886995032506, "learning_rate": 1.88905091289551e-05, "loss": 0.3553236722946167, "step": 786 }, { "epoch": 0.38897812924749786, "grad_norm": 1.1065055000350301, "learning_rate": 1.8886765596604188e-05, "loss": 0.3802195191383362, "step": 787 }, { "epoch": 0.389472383541332, "grad_norm": 1.0233155379560877, "learning_rate": 1.8883016131575546e-05, "loss": 0.3672805726528168, "step": 788 }, { "epoch": 0.3899666378351662, "grad_norm": 1.1021600101810725, "learning_rate": 1.887926073637225e-05, "loss": 0.35715609788894653, "step": 789 }, { "epoch": 0.39046089212900037, "grad_norm": 1.0669470229074853, "learning_rate": 1.8875499413501362e-05, "loss": 0.3800659775733948, "step": 790 }, { "epoch": 0.3909551464228345, "grad_norm": 1.0110531011706714, "learning_rate": 1.8871732165473878e-05, "loss": 0.36886462569236755, "step": 791 }, { "epoch": 0.3914494007166687, "grad_norm": 1.1716485087298352, "learning_rate": 1.886795899480476e-05, "loss": 0.37373536825180054, "step": 792 }, { "epoch": 0.3919436550105029, "grad_norm": 1.1804493539453536, "learning_rate": 1.8864179904012932e-05, "loss": 0.4016551375389099, "step": 793 }, { "epoch": 0.3924379093043371, "grad_norm": 1.3227573763511704, "learning_rate": 1.886039489562125e-05, "loss": 0.35107535123825073, "step": 794 }, { "epoch": 0.39293216359817124, "grad_norm": 1.0690764214154878, "learning_rate": 1.8856603972156532e-05, "loss": 0.36280331015586853, "step": 795 }, { "epoch": 0.39342641789200544, "grad_norm": 1.0372650355149657, "learning_rate": 1.885280713614955e-05, "loss": 0.3417884111404419, "step": 796 }, { "epoch": 0.3939206721858396, "grad_norm": 1.0135638633522712, "learning_rate": 1.8849004390135017e-05, "loss": 0.3257544934749603, "step": 797 }, { "epoch": 0.3944149264796738, "grad_norm": 1.138312578356034, "learning_rate": 1.8845195736651588e-05, "loss": 0.3694860339164734, "step": 798 }, { "epoch": 0.39490918077350795, "grad_norm": 1.0432466517484986, "learning_rate": 1.8841381178241865e-05, "loss": 0.37279266119003296, "step": 799 }, { "epoch": 0.39540343506734216, "grad_norm": 1.023281980764518, "learning_rate": 1.88375607174524e-05, "loss": 0.38758352398872375, "step": 800 }, { "epoch": 0.3958976893611763, "grad_norm": 1.0321652923702807, "learning_rate": 1.883373435683367e-05, "loss": 0.34098950028419495, "step": 801 }, { "epoch": 0.3963919436550105, "grad_norm": 1.0256865325574602, "learning_rate": 1.8829902098940105e-05, "loss": 0.3278653621673584, "step": 802 }, { "epoch": 0.39688619794884467, "grad_norm": 1.1042531688452888, "learning_rate": 1.8826063946330065e-05, "loss": 0.3673133850097656, "step": 803 }, { "epoch": 0.39738045224267887, "grad_norm": 0.9510108180701087, "learning_rate": 1.882221990156584e-05, "loss": 0.37917453050613403, "step": 804 }, { "epoch": 0.397874706536513, "grad_norm": 0.9926574292369763, "learning_rate": 1.8818369967213662e-05, "loss": 0.33986327052116394, "step": 805 }, { "epoch": 0.39836896083034723, "grad_norm": 1.0256369099360807, "learning_rate": 1.8814514145843694e-05, "loss": 0.34402647614479065, "step": 806 }, { "epoch": 0.3988632151241814, "grad_norm": 1.0984836868071073, "learning_rate": 1.8810652440030026e-05, "loss": 0.32781803607940674, "step": 807 }, { "epoch": 0.3993574694180156, "grad_norm": 1.063630501097469, "learning_rate": 1.8806784852350678e-05, "loss": 0.35807961225509644, "step": 808 }, { "epoch": 0.39985172371184974, "grad_norm": 1.0130076092125457, "learning_rate": 1.8802911385387596e-05, "loss": 0.33577096462249756, "step": 809 }, { "epoch": 0.40034597800568394, "grad_norm": 1.205635135602797, "learning_rate": 1.8799032041726654e-05, "loss": 0.37786391377449036, "step": 810 }, { "epoch": 0.4008402322995181, "grad_norm": 1.0055899694647235, "learning_rate": 1.879514682395764e-05, "loss": 0.3237725496292114, "step": 811 }, { "epoch": 0.4013344865933523, "grad_norm": 1.0557641796624602, "learning_rate": 1.8791255734674275e-05, "loss": 0.29552844166755676, "step": 812 }, { "epoch": 0.40182874088718645, "grad_norm": 1.0675222800328668, "learning_rate": 1.8787358776474192e-05, "loss": 0.40317612886428833, "step": 813 }, { "epoch": 0.40232299518102066, "grad_norm": 1.0548023053217102, "learning_rate": 1.8783455951958948e-05, "loss": 0.33383694291114807, "step": 814 }, { "epoch": 0.4028172494748548, "grad_norm": 1.0255061005640398, "learning_rate": 1.8779547263734012e-05, "loss": 0.35020262002944946, "step": 815 }, { "epoch": 0.403311503768689, "grad_norm": 1.098709822155027, "learning_rate": 1.8775632714408765e-05, "loss": 0.3742774724960327, "step": 816 }, { "epoch": 0.40380575806252317, "grad_norm": 0.9986084839363315, "learning_rate": 1.8771712306596506e-05, "loss": 0.35037580132484436, "step": 817 }, { "epoch": 0.4043000123563573, "grad_norm": 1.078218018297503, "learning_rate": 1.8767786042914445e-05, "loss": 0.3416820168495178, "step": 818 }, { "epoch": 0.4047942666501915, "grad_norm": 1.0398523365943921, "learning_rate": 1.8763853925983695e-05, "loss": 0.33287927508354187, "step": 819 }, { "epoch": 0.4052885209440257, "grad_norm": 1.031774367057856, "learning_rate": 1.875991595842929e-05, "loss": 0.3493141531944275, "step": 820 }, { "epoch": 0.4057827752378599, "grad_norm": 1.1647269737420223, "learning_rate": 1.875597214288015e-05, "loss": 0.4184780418872833, "step": 821 }, { "epoch": 0.40627702953169403, "grad_norm": 1.0098974718957208, "learning_rate": 1.8752022481969116e-05, "loss": 0.33189794421195984, "step": 822 }, { "epoch": 0.40677128382552824, "grad_norm": 1.1012026040533913, "learning_rate": 1.8748066978332925e-05, "loss": 0.35339856147766113, "step": 823 }, { "epoch": 0.4072655381193624, "grad_norm": 0.990995886573267, "learning_rate": 1.874410563461221e-05, "loss": 0.3766328692436218, "step": 824 }, { "epoch": 0.4077597924131966, "grad_norm": 1.023451056136873, "learning_rate": 1.874013845345152e-05, "loss": 0.32575076818466187, "step": 825 }, { "epoch": 0.40825404670703075, "grad_norm": 0.9933822197860499, "learning_rate": 1.8736165437499273e-05, "loss": 0.3417864441871643, "step": 826 }, { "epoch": 0.40874830100086496, "grad_norm": 1.053854919420327, "learning_rate": 1.8732186589407807e-05, "loss": 0.3636544942855835, "step": 827 }, { "epoch": 0.4092425552946991, "grad_norm": 1.0398605740994966, "learning_rate": 1.872820191183334e-05, "loss": 0.38730406761169434, "step": 828 }, { "epoch": 0.4097368095885333, "grad_norm": 1.031894160648423, "learning_rate": 1.872421140743599e-05, "loss": 0.3593043088912964, "step": 829 }, { "epoch": 0.41023106388236746, "grad_norm": 1.046860972263581, "learning_rate": 1.872021507887976e-05, "loss": 0.39092978835105896, "step": 830 }, { "epoch": 0.41072531817620167, "grad_norm": 1.1607362555786684, "learning_rate": 1.8716212928832537e-05, "loss": 0.3745616674423218, "step": 831 }, { "epoch": 0.4112195724700358, "grad_norm": 1.1451994826740608, "learning_rate": 1.87122049599661e-05, "loss": 0.39571845531463623, "step": 832 }, { "epoch": 0.41171382676387, "grad_norm": 1.0987542615004384, "learning_rate": 1.8708191174956116e-05, "loss": 0.35459476709365845, "step": 833 }, { "epoch": 0.4122080810577042, "grad_norm": 1.1159636372579822, "learning_rate": 1.870417157648213e-05, "loss": 0.38937896490097046, "step": 834 }, { "epoch": 0.4127023353515384, "grad_norm": 1.002441779942121, "learning_rate": 1.8700146167227563e-05, "loss": 0.33595120906829834, "step": 835 }, { "epoch": 0.41319658964537254, "grad_norm": 0.9899088387295479, "learning_rate": 1.869611494987973e-05, "loss": 0.332889199256897, "step": 836 }, { "epoch": 0.41369084393920674, "grad_norm": 1.0005984941908395, "learning_rate": 1.8692077927129803e-05, "loss": 0.333438515663147, "step": 837 }, { "epoch": 0.4141850982330409, "grad_norm": 0.9672990037342486, "learning_rate": 1.868803510167285e-05, "loss": 0.30645743012428284, "step": 838 }, { "epoch": 0.4146793525268751, "grad_norm": 1.0166404987540014, "learning_rate": 1.86839864762078e-05, "loss": 0.3333967924118042, "step": 839 }, { "epoch": 0.41517360682070925, "grad_norm": 1.1324675944020866, "learning_rate": 1.867993205343746e-05, "loss": 0.36230576038360596, "step": 840 }, { "epoch": 0.41566786111454346, "grad_norm": 1.4565152055506116, "learning_rate": 1.8675871836068498e-05, "loss": 0.34191709756851196, "step": 841 }, { "epoch": 0.4161621154083776, "grad_norm": 1.1876819294674656, "learning_rate": 1.8671805826811462e-05, "loss": 0.3115188479423523, "step": 842 }, { "epoch": 0.4166563697022118, "grad_norm": 1.023080563524472, "learning_rate": 1.866773402838076e-05, "loss": 0.3725768029689789, "step": 843 }, { "epoch": 0.41715062399604597, "grad_norm": 1.1051799194693688, "learning_rate": 1.8663656443494673e-05, "loss": 0.376983642578125, "step": 844 }, { "epoch": 0.4176448782898801, "grad_norm": 1.0101343157113072, "learning_rate": 1.8659573074875327e-05, "loss": 0.31490784883499146, "step": 845 }, { "epoch": 0.4181391325837143, "grad_norm": 1.0250002510666845, "learning_rate": 1.8655483925248727e-05, "loss": 0.3533504605293274, "step": 846 }, { "epoch": 0.4186333868775485, "grad_norm": 1.090746715781531, "learning_rate": 1.8651388997344734e-05, "loss": 0.3282274305820465, "step": 847 }, { "epoch": 0.4191276411713827, "grad_norm": 1.1145704933282803, "learning_rate": 1.8647288293897055e-05, "loss": 0.32892414927482605, "step": 848 }, { "epoch": 0.41962189546521683, "grad_norm": 1.1451436882679205, "learning_rate": 1.864318181764327e-05, "loss": 0.40414246916770935, "step": 849 }, { "epoch": 0.42011614975905104, "grad_norm": 0.9874933781402742, "learning_rate": 1.8639069571324798e-05, "loss": 0.30335378646850586, "step": 850 }, { "epoch": 0.4206104040528852, "grad_norm": 1.0390790492756226, "learning_rate": 1.863495155768692e-05, "loss": 0.311710000038147, "step": 851 }, { "epoch": 0.4211046583467194, "grad_norm": 1.1685121542837038, "learning_rate": 1.8630827779478755e-05, "loss": 0.37345218658447266, "step": 852 }, { "epoch": 0.42159891264055355, "grad_norm": 1.118375459884757, "learning_rate": 1.8626698239453287e-05, "loss": 0.37286317348480225, "step": 853 }, { "epoch": 0.42209316693438775, "grad_norm": 1.061435107804804, "learning_rate": 1.8622562940367335e-05, "loss": 0.3706691861152649, "step": 854 }, { "epoch": 0.4225874212282219, "grad_norm": 1.045639661440086, "learning_rate": 1.8618421884981567e-05, "loss": 0.30183354020118713, "step": 855 }, { "epoch": 0.4230816755220561, "grad_norm": 0.9282918926966607, "learning_rate": 1.8614275076060486e-05, "loss": 0.32329827547073364, "step": 856 }, { "epoch": 0.42357592981589026, "grad_norm": 0.9823332197669685, "learning_rate": 1.861012251637245e-05, "loss": 0.39380010962486267, "step": 857 }, { "epoch": 0.42407018410972447, "grad_norm": 1.2258684110272524, "learning_rate": 1.8605964208689646e-05, "loss": 0.41745316982269287, "step": 858 }, { "epoch": 0.4245644384035586, "grad_norm": 1.0539643629085786, "learning_rate": 1.86018001557881e-05, "loss": 0.36751389503479004, "step": 859 }, { "epoch": 0.4250586926973928, "grad_norm": 1.052378043397748, "learning_rate": 1.8597630360447673e-05, "loss": 0.36876100301742554, "step": 860 }, { "epoch": 0.425552946991227, "grad_norm": 1.0649813734142937, "learning_rate": 1.8593454825452067e-05, "loss": 0.3473365306854248, "step": 861 }, { "epoch": 0.4260472012850612, "grad_norm": 1.0186749062796028, "learning_rate": 1.8589273553588802e-05, "loss": 0.3429828882217407, "step": 862 }, { "epoch": 0.42654145557889533, "grad_norm": 0.9471164855143414, "learning_rate": 1.8585086547649238e-05, "loss": 0.3424219787120819, "step": 863 }, { "epoch": 0.42703570987272954, "grad_norm": 1.002345729786534, "learning_rate": 1.8580893810428562e-05, "loss": 0.32187891006469727, "step": 864 }, { "epoch": 0.4275299641665637, "grad_norm": 0.997893238522563, "learning_rate": 1.8576695344725785e-05, "loss": 0.3116072416305542, "step": 865 }, { "epoch": 0.4280242184603979, "grad_norm": 0.9198063604105835, "learning_rate": 1.8572491153343742e-05, "loss": 0.32645124197006226, "step": 866 }, { "epoch": 0.42851847275423205, "grad_norm": 1.0827892730720303, "learning_rate": 1.8568281239089088e-05, "loss": 0.36861616373062134, "step": 867 }, { "epoch": 0.42901272704806626, "grad_norm": 1.05561333743087, "learning_rate": 1.8564065604772307e-05, "loss": 0.38477885723114014, "step": 868 }, { "epoch": 0.4295069813419004, "grad_norm": 1.1711610330815532, "learning_rate": 1.8559844253207694e-05, "loss": 0.352588951587677, "step": 869 }, { "epoch": 0.43000123563573456, "grad_norm": 1.1459489566657088, "learning_rate": 1.8555617187213362e-05, "loss": 0.43443864583969116, "step": 870 }, { "epoch": 0.43049548992956876, "grad_norm": 1.1608032541581428, "learning_rate": 1.8551384409611238e-05, "loss": 0.37355685234069824, "step": 871 }, { "epoch": 0.4309897442234029, "grad_norm": 1.120838755410591, "learning_rate": 1.854714592322707e-05, "loss": 0.3529026508331299, "step": 872 }, { "epoch": 0.4314839985172371, "grad_norm": 1.031744932760461, "learning_rate": 1.854290173089041e-05, "loss": 0.3278823494911194, "step": 873 }, { "epoch": 0.4319782528110713, "grad_norm": 1.045846838310407, "learning_rate": 1.8538651835434615e-05, "loss": 0.3677588999271393, "step": 874 }, { "epoch": 0.4324725071049055, "grad_norm": 0.9726822011565114, "learning_rate": 1.8534396239696852e-05, "loss": 0.34132176637649536, "step": 875 }, { "epoch": 0.43296676139873963, "grad_norm": 0.967842291132869, "learning_rate": 1.8530134946518106e-05, "loss": 0.3329963684082031, "step": 876 }, { "epoch": 0.43346101569257384, "grad_norm": 1.1447169522915757, "learning_rate": 1.852586795874315e-05, "loss": 0.38435080647468567, "step": 877 }, { "epoch": 0.433955269986408, "grad_norm": 1.076068410050275, "learning_rate": 1.8521595279220564e-05, "loss": 0.3737541735172272, "step": 878 }, { "epoch": 0.4344495242802422, "grad_norm": 1.0947429210573731, "learning_rate": 1.851731691080273e-05, "loss": 0.3676382303237915, "step": 879 }, { "epoch": 0.43494377857407635, "grad_norm": 0.9624268111771948, "learning_rate": 1.8513032856345825e-05, "loss": 0.317960262298584, "step": 880 }, { "epoch": 0.43543803286791055, "grad_norm": 1.040958800557315, "learning_rate": 1.8508743118709816e-05, "loss": 0.38857966661453247, "step": 881 }, { "epoch": 0.4359322871617447, "grad_norm": 1.0694529449199925, "learning_rate": 1.8504447700758482e-05, "loss": 0.33234506845474243, "step": 882 }, { "epoch": 0.4364265414555789, "grad_norm": 1.0262098516685678, "learning_rate": 1.8500146605359375e-05, "loss": 0.3380611538887024, "step": 883 }, { "epoch": 0.43692079574941306, "grad_norm": 1.032922511494617, "learning_rate": 1.8495839835383845e-05, "loss": 0.36386823654174805, "step": 884 }, { "epoch": 0.43741505004324727, "grad_norm": 1.0814661245803954, "learning_rate": 1.849152739370703e-05, "loss": 0.34711897373199463, "step": 885 }, { "epoch": 0.4379093043370814, "grad_norm": 1.1112439466083954, "learning_rate": 1.848720928320786e-05, "loss": 0.3861457109451294, "step": 886 }, { "epoch": 0.4384035586309156, "grad_norm": 1.0062524071684966, "learning_rate": 1.848288550676904e-05, "loss": 0.3387115001678467, "step": 887 }, { "epoch": 0.4388978129247498, "grad_norm": 1.119801920916648, "learning_rate": 1.847855606727706e-05, "loss": 0.3419748842716217, "step": 888 }, { "epoch": 0.439392067218584, "grad_norm": 1.1162084355940824, "learning_rate": 1.847422096762219e-05, "loss": 0.38184499740600586, "step": 889 }, { "epoch": 0.43988632151241813, "grad_norm": 1.1974191241625343, "learning_rate": 1.846988021069849e-05, "loss": 0.3845345973968506, "step": 890 }, { "epoch": 0.44038057580625234, "grad_norm": 1.035257767207683, "learning_rate": 1.8465533799403778e-05, "loss": 0.31854647397994995, "step": 891 }, { "epoch": 0.4408748301000865, "grad_norm": 1.2150547461116588, "learning_rate": 1.8461181736639658e-05, "loss": 0.3940027356147766, "step": 892 }, { "epoch": 0.4413690843939207, "grad_norm": 1.0827124100419134, "learning_rate": 1.8456824025311508e-05, "loss": 0.3580612540245056, "step": 893 }, { "epoch": 0.44186333868775485, "grad_norm": 1.0457692243819372, "learning_rate": 1.8452460668328474e-05, "loss": 0.3662642240524292, "step": 894 }, { "epoch": 0.44235759298158905, "grad_norm": 1.3135451040729966, "learning_rate": 1.8448091668603464e-05, "loss": 0.29031360149383545, "step": 895 }, { "epoch": 0.4428518472754232, "grad_norm": 1.2267380523250877, "learning_rate": 1.844371702905317e-05, "loss": 0.36141306161880493, "step": 896 }, { "epoch": 0.44334610156925736, "grad_norm": 0.9926258795727512, "learning_rate": 1.8439336752598027e-05, "loss": 0.35286253690719604, "step": 897 }, { "epoch": 0.44384035586309156, "grad_norm": 1.0509214985554662, "learning_rate": 1.8434950842162256e-05, "loss": 0.38967087864875793, "step": 898 }, { "epoch": 0.4443346101569257, "grad_norm": 1.1041873655686079, "learning_rate": 1.8430559300673824e-05, "loss": 0.4260423183441162, "step": 899 }, { "epoch": 0.4448288644507599, "grad_norm": 1.0004221402171782, "learning_rate": 1.8426162131064456e-05, "loss": 0.35336780548095703, "step": 900 }, { "epoch": 0.44532311874459407, "grad_norm": 1.0124996907215051, "learning_rate": 1.842175933626965e-05, "loss": 0.32953035831451416, "step": 901 }, { "epoch": 0.4458173730384283, "grad_norm": 1.1481125848953921, "learning_rate": 1.841735091922864e-05, "loss": 0.3495085537433624, "step": 902 }, { "epoch": 0.44631162733226243, "grad_norm": 1.0556558347257945, "learning_rate": 1.8412936882884426e-05, "loss": 0.3774382174015045, "step": 903 }, { "epoch": 0.44680588162609663, "grad_norm": 1.1488659780400408, "learning_rate": 1.8408517230183756e-05, "loss": 0.397183358669281, "step": 904 }, { "epoch": 0.4473001359199308, "grad_norm": 1.1226988100601583, "learning_rate": 1.840409196407713e-05, "loss": 0.4004632234573364, "step": 905 }, { "epoch": 0.447794390213765, "grad_norm": 0.9888048683742604, "learning_rate": 1.8399661087518784e-05, "loss": 0.3464478850364685, "step": 906 }, { "epoch": 0.44828864450759914, "grad_norm": 1.0618254470638813, "learning_rate": 1.839522460346671e-05, "loss": 0.38161879777908325, "step": 907 }, { "epoch": 0.44878289880143335, "grad_norm": 1.0021571541379897, "learning_rate": 1.839078251488265e-05, "loss": 0.3307412266731262, "step": 908 }, { "epoch": 0.4492771530952675, "grad_norm": 1.0558486391083746, "learning_rate": 1.838633482473207e-05, "loss": 0.3238945007324219, "step": 909 }, { "epoch": 0.4497714073891017, "grad_norm": 1.1763396472681338, "learning_rate": 1.8381881535984186e-05, "loss": 0.37863802909851074, "step": 910 }, { "epoch": 0.45026566168293586, "grad_norm": 1.187536001798055, "learning_rate": 1.8377422651611955e-05, "loss": 0.35920199751853943, "step": 911 }, { "epoch": 0.45075991597677006, "grad_norm": 1.1108046485108733, "learning_rate": 1.8372958174592054e-05, "loss": 0.3913283050060272, "step": 912 }, { "epoch": 0.4512541702706042, "grad_norm": 1.029447767687351, "learning_rate": 1.8368488107904916e-05, "loss": 0.32950836420059204, "step": 913 }, { "epoch": 0.4517484245644384, "grad_norm": 0.9275296283957708, "learning_rate": 1.8364012454534687e-05, "loss": 0.30557066202163696, "step": 914 }, { "epoch": 0.4522426788582726, "grad_norm": 1.0685283966213752, "learning_rate": 1.835953121746925e-05, "loss": 0.3280435800552368, "step": 915 }, { "epoch": 0.4527369331521068, "grad_norm": 1.0053118292301932, "learning_rate": 1.835504439970021e-05, "loss": 0.323611319065094, "step": 916 }, { "epoch": 0.45323118744594093, "grad_norm": 1.086332749113099, "learning_rate": 1.835055200422292e-05, "loss": 0.3794775605201721, "step": 917 }, { "epoch": 0.45372544173977514, "grad_norm": 1.1746257984153148, "learning_rate": 1.8346054034036418e-05, "loss": 0.3437816798686981, "step": 918 }, { "epoch": 0.4542196960336093, "grad_norm": 1.175593282348777, "learning_rate": 1.8341550492143497e-05, "loss": 0.40312957763671875, "step": 919 }, { "epoch": 0.4547139503274435, "grad_norm": 1.0344840643948632, "learning_rate": 1.833704138155065e-05, "loss": 0.33988016843795776, "step": 920 }, { "epoch": 0.45520820462127765, "grad_norm": 1.099362227926189, "learning_rate": 1.83325267052681e-05, "loss": 0.30893969535827637, "step": 921 }, { "epoch": 0.45570245891511185, "grad_norm": 1.1279932203915406, "learning_rate": 1.832800646630978e-05, "loss": 0.3351095914840698, "step": 922 }, { "epoch": 0.456196713208946, "grad_norm": 1.0211776718159757, "learning_rate": 1.8323480667693335e-05, "loss": 0.3235122561454773, "step": 923 }, { "epoch": 0.45669096750278015, "grad_norm": 1.0274671423740642, "learning_rate": 1.8318949312440126e-05, "loss": 0.3482256531715393, "step": 924 }, { "epoch": 0.45718522179661436, "grad_norm": 1.0223238909560575, "learning_rate": 1.831441240357522e-05, "loss": 0.3577580451965332, "step": 925 }, { "epoch": 0.4576794760904485, "grad_norm": 1.100617534966992, "learning_rate": 1.8309869944127386e-05, "loss": 0.34081172943115234, "step": 926 }, { "epoch": 0.4581737303842827, "grad_norm": 1.1911908757683491, "learning_rate": 1.8305321937129118e-05, "loss": 0.4041389524936676, "step": 927 }, { "epoch": 0.45866798467811687, "grad_norm": 0.9300326755373893, "learning_rate": 1.830076838561659e-05, "loss": 0.3014240562915802, "step": 928 }, { "epoch": 0.4591622389719511, "grad_norm": 1.0061666296037273, "learning_rate": 1.829620929262969e-05, "loss": 0.3105698823928833, "step": 929 }, { "epoch": 0.4596564932657852, "grad_norm": 1.035696211609358, "learning_rate": 1.8291644661212008e-05, "loss": 0.36114832758903503, "step": 930 }, { "epoch": 0.46015074755961943, "grad_norm": 1.0621844186259055, "learning_rate": 1.828707449441082e-05, "loss": 0.33738240599632263, "step": 931 }, { "epoch": 0.4606450018534536, "grad_norm": 1.0507412286541111, "learning_rate": 1.8282498795277108e-05, "loss": 0.3455100655555725, "step": 932 }, { "epoch": 0.4611392561472878, "grad_norm": 1.0635377650103532, "learning_rate": 1.8277917566865544e-05, "loss": 0.3622395992279053, "step": 933 }, { "epoch": 0.46163351044112194, "grad_norm": 1.1698746861585616, "learning_rate": 1.8273330812234488e-05, "loss": 0.36942192912101746, "step": 934 }, { "epoch": 0.46212776473495615, "grad_norm": 1.1083328377879573, "learning_rate": 1.8268738534445996e-05, "loss": 0.33603039383888245, "step": 935 }, { "epoch": 0.4626220190287903, "grad_norm": 1.0473328437100615, "learning_rate": 1.82641407365658e-05, "loss": 0.34806567430496216, "step": 936 }, { "epoch": 0.4631162733226245, "grad_norm": 1.0559884618945852, "learning_rate": 1.8259537421663333e-05, "loss": 0.35512328147888184, "step": 937 }, { "epoch": 0.46361052761645866, "grad_norm": 1.0108795008514326, "learning_rate": 1.8254928592811695e-05, "loss": 0.33349719643592834, "step": 938 }, { "epoch": 0.46410478191029286, "grad_norm": 1.2122442261111321, "learning_rate": 1.8250314253087677e-05, "loss": 0.3510274887084961, "step": 939 }, { "epoch": 0.464599036204127, "grad_norm": 1.2184941603930532, "learning_rate": 1.824569440557175e-05, "loss": 0.35831883549690247, "step": 940 }, { "epoch": 0.4650932904979612, "grad_norm": 1.1635496425287044, "learning_rate": 1.824106905334805e-05, "loss": 0.353208065032959, "step": 941 }, { "epoch": 0.46558754479179537, "grad_norm": 1.1400926219916139, "learning_rate": 1.8236438199504402e-05, "loss": 0.3335849642753601, "step": 942 }, { "epoch": 0.4660817990856296, "grad_norm": 1.0623049779098108, "learning_rate": 1.8231801847132294e-05, "loss": 0.346247136592865, "step": 943 }, { "epoch": 0.46657605337946373, "grad_norm": 1.0719060242361118, "learning_rate": 1.8227159999326895e-05, "loss": 0.35125380754470825, "step": 944 }, { "epoch": 0.46707030767329794, "grad_norm": 1.026675887024196, "learning_rate": 1.822251265918703e-05, "loss": 0.34262675046920776, "step": 945 }, { "epoch": 0.4675645619671321, "grad_norm": 1.0951735908349534, "learning_rate": 1.82178598298152e-05, "loss": 0.3437168598175049, "step": 946 }, { "epoch": 0.4680588162609663, "grad_norm": 1.2204880290084008, "learning_rate": 1.8213201514317565e-05, "loss": 0.35729774832725525, "step": 947 }, { "epoch": 0.46855307055480044, "grad_norm": 1.1062871199303559, "learning_rate": 1.8208537715803954e-05, "loss": 0.36507898569107056, "step": 948 }, { "epoch": 0.46904732484863465, "grad_norm": 1.0875432400928187, "learning_rate": 1.8203868437387847e-05, "loss": 0.363017737865448, "step": 949 }, { "epoch": 0.4695415791424688, "grad_norm": 1.0718622311605446, "learning_rate": 1.8199193682186388e-05, "loss": 0.3645821511745453, "step": 950 }, { "epoch": 0.47003583343630295, "grad_norm": 1.2195854283374437, "learning_rate": 1.8194513453320387e-05, "loss": 0.3054324686527252, "step": 951 }, { "epoch": 0.47053008773013716, "grad_norm": 1.0538248118306075, "learning_rate": 1.8189827753914282e-05, "loss": 0.35003694891929626, "step": 952 }, { "epoch": 0.4710243420239713, "grad_norm": 1.1789267282791076, "learning_rate": 1.8185136587096193e-05, "loss": 0.37834814190864563, "step": 953 }, { "epoch": 0.4715185963178055, "grad_norm": 1.0741971770420784, "learning_rate": 1.8180439955997867e-05, "loss": 0.3369285464286804, "step": 954 }, { "epoch": 0.47201285061163967, "grad_norm": 1.010532535770725, "learning_rate": 1.8175737863754706e-05, "loss": 0.3612895905971527, "step": 955 }, { "epoch": 0.4725071049054739, "grad_norm": 1.057430538694607, "learning_rate": 1.817103031350577e-05, "loss": 0.34393271803855896, "step": 956 }, { "epoch": 0.473001359199308, "grad_norm": 1.0983705860238564, "learning_rate": 1.8166317308393745e-05, "loss": 0.3824620544910431, "step": 957 }, { "epoch": 0.47349561349314223, "grad_norm": 1.0093831974265368, "learning_rate": 1.816159885156497e-05, "loss": 0.3092145621776581, "step": 958 }, { "epoch": 0.4739898677869764, "grad_norm": 0.9971938324913802, "learning_rate": 1.8156874946169414e-05, "loss": 0.3328183889389038, "step": 959 }, { "epoch": 0.4744841220808106, "grad_norm": 1.1071894513842127, "learning_rate": 1.815214559536069e-05, "loss": 0.3715244233608246, "step": 960 }, { "epoch": 0.47497837637464474, "grad_norm": 0.9615506144211561, "learning_rate": 1.814741080229605e-05, "loss": 0.31065690517425537, "step": 961 }, { "epoch": 0.47547263066847895, "grad_norm": 1.0443475280559777, "learning_rate": 1.814267057013637e-05, "loss": 0.3632475733757019, "step": 962 }, { "epoch": 0.4759668849623131, "grad_norm": 1.0447314581931118, "learning_rate": 1.813792490204616e-05, "loss": 0.3367992043495178, "step": 963 }, { "epoch": 0.4764611392561473, "grad_norm": 3.0902704784337263, "learning_rate": 1.813317380119356e-05, "loss": 0.37678295373916626, "step": 964 }, { "epoch": 0.47695539354998145, "grad_norm": 1.092515860835368, "learning_rate": 1.8128417270750342e-05, "loss": 0.31454166769981384, "step": 965 }, { "epoch": 0.47744964784381566, "grad_norm": 1.1351912635055343, "learning_rate": 1.81236553138919e-05, "loss": 0.38495004177093506, "step": 966 }, { "epoch": 0.4779439021376498, "grad_norm": 1.1935841314497264, "learning_rate": 1.8118887933797237e-05, "loss": 0.3867315948009491, "step": 967 }, { "epoch": 0.478438156431484, "grad_norm": 1.0520609240642282, "learning_rate": 1.8114115133648996e-05, "loss": 0.3453156650066376, "step": 968 }, { "epoch": 0.47893241072531817, "grad_norm": 1.0244115852831113, "learning_rate": 1.8109336916633426e-05, "loss": 0.34461456537246704, "step": 969 }, { "epoch": 0.4794266650191524, "grad_norm": 1.0814329785787762, "learning_rate": 1.8104553285940404e-05, "loss": 0.36489856243133545, "step": 970 }, { "epoch": 0.4799209193129865, "grad_norm": 1.0551232871498393, "learning_rate": 1.80997642447634e-05, "loss": 0.3596840500831604, "step": 971 }, { "epoch": 0.48041517360682073, "grad_norm": 1.1473167291229827, "learning_rate": 1.8094969796299527e-05, "loss": 0.3856956362724304, "step": 972 }, { "epoch": 0.4809094279006549, "grad_norm": 1.036679746340059, "learning_rate": 1.8090169943749477e-05, "loss": 0.3235170245170593, "step": 973 }, { "epoch": 0.4814036821944891, "grad_norm": 0.9980037023378185, "learning_rate": 1.8085364690317564e-05, "loss": 0.28033584356307983, "step": 974 }, { "epoch": 0.48189793648832324, "grad_norm": 1.0350899218465197, "learning_rate": 1.808055403921171e-05, "loss": 0.3279935419559479, "step": 975 }, { "epoch": 0.4823921907821574, "grad_norm": 1.1400322966724836, "learning_rate": 1.8075737993643442e-05, "loss": 0.36426058411598206, "step": 976 }, { "epoch": 0.4828864450759916, "grad_norm": 1.062964412180167, "learning_rate": 1.8070916556827876e-05, "loss": 0.3720256984233856, "step": 977 }, { "epoch": 0.48338069936982575, "grad_norm": 1.1101144076762623, "learning_rate": 1.8066089731983735e-05, "loss": 0.3299727439880371, "step": 978 }, { "epoch": 0.48387495366365996, "grad_norm": 1.1080862284860111, "learning_rate": 1.8061257522333338e-05, "loss": 0.3425888419151306, "step": 979 }, { "epoch": 0.4843692079574941, "grad_norm": 1.1899160965861721, "learning_rate": 1.80564199311026e-05, "loss": 0.34109392762184143, "step": 980 }, { "epoch": 0.4848634622513283, "grad_norm": 1.017538963669655, "learning_rate": 1.805157696152103e-05, "loss": 0.29130926728248596, "step": 981 }, { "epoch": 0.48535771654516247, "grad_norm": 1.1092378859222098, "learning_rate": 1.8046728616821726e-05, "loss": 0.36200815439224243, "step": 982 }, { "epoch": 0.48585197083899667, "grad_norm": 1.3150178990962822, "learning_rate": 1.8041874900241368e-05, "loss": 0.3343828320503235, "step": 983 }, { "epoch": 0.4863462251328308, "grad_norm": 0.9882024578614582, "learning_rate": 1.803701581502023e-05, "loss": 0.32942160964012146, "step": 984 }, { "epoch": 0.48684047942666503, "grad_norm": 0.9909863431121513, "learning_rate": 1.803215136440217e-05, "loss": 0.34390491247177124, "step": 985 }, { "epoch": 0.4873347337204992, "grad_norm": 1.1118778887065912, "learning_rate": 1.8027281551634622e-05, "loss": 0.37723374366760254, "step": 986 }, { "epoch": 0.4878289880143334, "grad_norm": 1.0469525424396737, "learning_rate": 1.802240637996861e-05, "loss": 0.3493693470954895, "step": 987 }, { "epoch": 0.48832324230816754, "grad_norm": 1.131021341780466, "learning_rate": 1.8017525852658723e-05, "loss": 0.3564317524433136, "step": 988 }, { "epoch": 0.48881749660200174, "grad_norm": 1.0435634175515676, "learning_rate": 1.8012639972963136e-05, "loss": 0.36572349071502686, "step": 989 }, { "epoch": 0.4893117508958359, "grad_norm": 1.0078714155476896, "learning_rate": 1.8007748744143586e-05, "loss": 0.31457674503326416, "step": 990 }, { "epoch": 0.4898060051896701, "grad_norm": 1.1126722971991523, "learning_rate": 1.8002852169465393e-05, "loss": 0.36191096901893616, "step": 991 }, { "epoch": 0.49030025948350425, "grad_norm": 1.0321930748215848, "learning_rate": 1.799795025219744e-05, "loss": 0.33284491300582886, "step": 992 }, { "epoch": 0.49079451377733846, "grad_norm": 1.0239281284644144, "learning_rate": 1.7993042995612172e-05, "loss": 0.3101437985897064, "step": 993 }, { "epoch": 0.4912887680711726, "grad_norm": 1.033291904553078, "learning_rate": 1.7988130402985608e-05, "loss": 0.3196948170661926, "step": 994 }, { "epoch": 0.4917830223650068, "grad_norm": 1.1489266069218314, "learning_rate": 1.7983212477597325e-05, "loss": 0.3757585883140564, "step": 995 }, { "epoch": 0.49227727665884097, "grad_norm": 1.1725728838471274, "learning_rate": 1.7978289222730454e-05, "loss": 0.3949659466743469, "step": 996 }, { "epoch": 0.4927715309526752, "grad_norm": 1.1279800728609437, "learning_rate": 1.79733606416717e-05, "loss": 0.3490184545516968, "step": 997 }, { "epoch": 0.4932657852465093, "grad_norm": 1.2158784468170585, "learning_rate": 1.7968426737711304e-05, "loss": 0.32302743196487427, "step": 998 }, { "epoch": 0.49376003954034353, "grad_norm": 1.1923748239050125, "learning_rate": 1.7963487514143073e-05, "loss": 0.4205089807510376, "step": 999 }, { "epoch": 0.4942542938341777, "grad_norm": 1.0997609009048648, "learning_rate": 1.7958542974264363e-05, "loss": 0.30787885189056396, "step": 1000 }, { "epoch": 0.4947485481280119, "grad_norm": 0.9527130505595168, "learning_rate": 1.7953593121376075e-05, "loss": 0.3174916207790375, "step": 1001 }, { "epoch": 0.49524280242184604, "grad_norm": 0.9736659707101099, "learning_rate": 1.7948637958782662e-05, "loss": 0.330039381980896, "step": 1002 }, { "epoch": 0.4957370567156802, "grad_norm": 1.0487288206783625, "learning_rate": 1.794367748979212e-05, "loss": 0.3362613320350647, "step": 1003 }, { "epoch": 0.4962313110095144, "grad_norm": 1.065682818958373, "learning_rate": 1.793871171771599e-05, "loss": 0.3479865789413452, "step": 1004 }, { "epoch": 0.49672556530334855, "grad_norm": 1.0920057715386207, "learning_rate": 1.7933740645869345e-05, "loss": 0.361303448677063, "step": 1005 }, { "epoch": 0.49721981959718276, "grad_norm": 1.07605927747069, "learning_rate": 1.79287642775708e-05, "loss": 0.32340794801712036, "step": 1006 }, { "epoch": 0.4977140738910169, "grad_norm": 1.086462795838887, "learning_rate": 1.792378261614252e-05, "loss": 0.3410148620605469, "step": 1007 }, { "epoch": 0.4982083281848511, "grad_norm": 1.0450045575623719, "learning_rate": 1.791879566491018e-05, "loss": 0.3332127034664154, "step": 1008 }, { "epoch": 0.49870258247868526, "grad_norm": 1.1673390171795246, "learning_rate": 1.7913803427202998e-05, "loss": 0.36532774567604065, "step": 1009 }, { "epoch": 0.49919683677251947, "grad_norm": 1.1838892890378474, "learning_rate": 1.7908805906353725e-05, "loss": 0.3721959888935089, "step": 1010 }, { "epoch": 0.4996910910663536, "grad_norm": 0.990806411218012, "learning_rate": 1.7903803105698627e-05, "loss": 0.3406672477722168, "step": 1011 }, { "epoch": 0.5001853453601878, "grad_norm": 1.0152890264941994, "learning_rate": 1.789879502857751e-05, "loss": 0.323926717042923, "step": 1012 }, { "epoch": 0.500679599654022, "grad_norm": 1.082078334287421, "learning_rate": 1.7893781678333694e-05, "loss": 0.36245018243789673, "step": 1013 }, { "epoch": 0.5011738539478562, "grad_norm": 1.1363612319173766, "learning_rate": 1.7888763058314016e-05, "loss": 0.36145877838134766, "step": 1014 }, { "epoch": 0.5016681082416904, "grad_norm": 0.9479821815236287, "learning_rate": 1.788373917186884e-05, "loss": 0.31398001313209534, "step": 1015 }, { "epoch": 0.5021623625355245, "grad_norm": 1.0634976007398544, "learning_rate": 1.7878710022352033e-05, "loss": 0.36732447147369385, "step": 1016 }, { "epoch": 0.5026566168293587, "grad_norm": 1.0888289854290114, "learning_rate": 1.787367561312099e-05, "loss": 0.3336929678916931, "step": 1017 }, { "epoch": 0.5031508711231929, "grad_norm": 1.081948070644993, "learning_rate": 1.786863594753661e-05, "loss": 0.33306068181991577, "step": 1018 }, { "epoch": 0.5036451254170271, "grad_norm": 1.1710814753085148, "learning_rate": 1.7863591028963297e-05, "loss": 0.32577213644981384, "step": 1019 }, { "epoch": 0.5041393797108612, "grad_norm": 1.0902819718302648, "learning_rate": 1.7858540860768974e-05, "loss": 0.33542972803115845, "step": 1020 }, { "epoch": 0.5046336340046954, "grad_norm": 1.1116685663765398, "learning_rate": 1.7853485446325055e-05, "loss": 0.3075249195098877, "step": 1021 }, { "epoch": 0.5051278882985296, "grad_norm": 1.135601263046101, "learning_rate": 1.7848424789006466e-05, "loss": 0.3473510146141052, "step": 1022 }, { "epoch": 0.5056221425923638, "grad_norm": 1.2152682076096186, "learning_rate": 1.784335889219163e-05, "loss": 0.3543929159641266, "step": 1023 }, { "epoch": 0.5061163968861979, "grad_norm": 1.026549045591816, "learning_rate": 1.783828775926246e-05, "loss": 0.3198593556880951, "step": 1024 }, { "epoch": 0.5066106511800321, "grad_norm": 1.07796975394457, "learning_rate": 1.783321139360438e-05, "loss": 0.34223973751068115, "step": 1025 }, { "epoch": 0.5071049054738663, "grad_norm": 1.2487195797385122, "learning_rate": 1.78281297986063e-05, "loss": 0.3895387351512909, "step": 1026 }, { "epoch": 0.5075991597677005, "grad_norm": 1.0333211037977794, "learning_rate": 1.782304297766061e-05, "loss": 0.35764580965042114, "step": 1027 }, { "epoch": 0.5080934140615346, "grad_norm": 0.9679048017438919, "learning_rate": 1.7817950934163213e-05, "loss": 0.30859488248825073, "step": 1028 }, { "epoch": 0.5085876683553688, "grad_norm": 1.0913185130679384, "learning_rate": 1.7812853671513472e-05, "loss": 0.3554389476776123, "step": 1029 }, { "epoch": 0.509081922649203, "grad_norm": 1.0101463789736986, "learning_rate": 1.7807751193114254e-05, "loss": 0.3528766632080078, "step": 1030 }, { "epoch": 0.5095761769430371, "grad_norm": 1.054067237260528, "learning_rate": 1.78026435023719e-05, "loss": 0.3645275831222534, "step": 1031 }, { "epoch": 0.5100704312368713, "grad_norm": 1.338540047449502, "learning_rate": 1.779753060269623e-05, "loss": 0.3137075901031494, "step": 1032 }, { "epoch": 0.5105646855307056, "grad_norm": 1.0928434325752037, "learning_rate": 1.7792412497500538e-05, "loss": 0.31993091106414795, "step": 1033 }, { "epoch": 0.5110589398245398, "grad_norm": 1.032718640643118, "learning_rate": 1.7787289190201606e-05, "loss": 0.3514295220375061, "step": 1034 }, { "epoch": 0.5115531941183739, "grad_norm": 0.9529992201270954, "learning_rate": 1.7782160684219677e-05, "loss": 0.3167670667171478, "step": 1035 }, { "epoch": 0.5120474484122081, "grad_norm": 1.1056391999630892, "learning_rate": 1.7777026982978473e-05, "loss": 0.3298097252845764, "step": 1036 }, { "epoch": 0.5125417027060423, "grad_norm": 1.008539858185866, "learning_rate": 1.777188808990517e-05, "loss": 0.3334948420524597, "step": 1037 }, { "epoch": 0.5130359569998765, "grad_norm": 1.1451382861648118, "learning_rate": 1.776674400843043e-05, "loss": 0.3705115020275116, "step": 1038 }, { "epoch": 0.5135302112937106, "grad_norm": 1.2062150323771585, "learning_rate": 1.7761594741988356e-05, "loss": 0.3586978614330292, "step": 1039 }, { "epoch": 0.5140244655875448, "grad_norm": 0.9949081741462515, "learning_rate": 1.7756440294016535e-05, "loss": 0.3105466663837433, "step": 1040 }, { "epoch": 0.514518719881379, "grad_norm": 1.240576049327348, "learning_rate": 1.7751280667956002e-05, "loss": 0.35213470458984375, "step": 1041 }, { "epoch": 0.5150129741752132, "grad_norm": 1.1494264660428748, "learning_rate": 1.7746115867251245e-05, "loss": 0.3830525875091553, "step": 1042 }, { "epoch": 0.5155072284690473, "grad_norm": 1.044917786849415, "learning_rate": 1.7740945895350215e-05, "loss": 0.34106165170669556, "step": 1043 }, { "epoch": 0.5160014827628815, "grad_norm": 0.9456529066854209, "learning_rate": 1.773577075570431e-05, "loss": 0.33408549427986145, "step": 1044 }, { "epoch": 0.5164957370567157, "grad_norm": 1.057634132461443, "learning_rate": 1.7730590451768375e-05, "loss": 0.32823115587234497, "step": 1045 }, { "epoch": 0.5169899913505499, "grad_norm": 0.9870247990943719, "learning_rate": 1.7725404987000716e-05, "loss": 0.2866591811180115, "step": 1046 }, { "epoch": 0.517484245644384, "grad_norm": 1.0669638645996897, "learning_rate": 1.772021436486307e-05, "loss": 0.34053099155426025, "step": 1047 }, { "epoch": 0.5179784999382182, "grad_norm": 1.0384310943814752, "learning_rate": 1.771501858882062e-05, "loss": 0.30379486083984375, "step": 1048 }, { "epoch": 0.5184727542320524, "grad_norm": 1.299899967945095, "learning_rate": 1.7709817662341998e-05, "loss": 0.37569302320480347, "step": 1049 }, { "epoch": 0.5189670085258866, "grad_norm": 1.0489606422309163, "learning_rate": 1.770461158889926e-05, "loss": 0.31770390272140503, "step": 1050 }, { "epoch": 0.5194612628197207, "grad_norm": 1.1640089464310481, "learning_rate": 1.769940037196791e-05, "loss": 0.34175002574920654, "step": 1051 }, { "epoch": 0.5199555171135549, "grad_norm": 1.0797819699416114, "learning_rate": 1.769418401502689e-05, "loss": 0.3634580671787262, "step": 1052 }, { "epoch": 0.5204497714073891, "grad_norm": 1.1990448584577926, "learning_rate": 1.7688962521558554e-05, "loss": 0.3631044030189514, "step": 1053 }, { "epoch": 0.5209440257012233, "grad_norm": 1.2482048374766477, "learning_rate": 1.7683735895048698e-05, "loss": 0.3402160704135895, "step": 1054 }, { "epoch": 0.5214382799950574, "grad_norm": 1.2190765212037056, "learning_rate": 1.7678504138986548e-05, "loss": 0.3895665407180786, "step": 1055 }, { "epoch": 0.5219325342888916, "grad_norm": 1.076846194861831, "learning_rate": 1.767326725686475e-05, "loss": 0.32207030057907104, "step": 1056 }, { "epoch": 0.5224267885827258, "grad_norm": 1.10282378456951, "learning_rate": 1.7668025252179363e-05, "loss": 0.33095866441726685, "step": 1057 }, { "epoch": 0.5229210428765599, "grad_norm": 1.1487800022178571, "learning_rate": 1.7662778128429883e-05, "loss": 0.33239442110061646, "step": 1058 }, { "epoch": 0.5234152971703941, "grad_norm": 0.9873637767970463, "learning_rate": 1.7657525889119212e-05, "loss": 0.27432021498680115, "step": 1059 }, { "epoch": 0.5239095514642284, "grad_norm": 1.0928994862368866, "learning_rate": 1.7652268537753672e-05, "loss": 0.3221333622932434, "step": 1060 }, { "epoch": 0.5244038057580626, "grad_norm": 1.114838100134283, "learning_rate": 1.764700607784299e-05, "loss": 0.3126341700553894, "step": 1061 }, { "epoch": 0.5248980600518967, "grad_norm": 1.0401864286303986, "learning_rate": 1.7641738512900315e-05, "loss": 0.33239883184432983, "step": 1062 }, { "epoch": 0.5253923143457309, "grad_norm": 0.9509614150111031, "learning_rate": 1.7636465846442197e-05, "loss": 0.30075010657310486, "step": 1063 }, { "epoch": 0.5258865686395651, "grad_norm": 1.0717488761603333, "learning_rate": 1.763118808198859e-05, "loss": 0.3577713370323181, "step": 1064 }, { "epoch": 0.5263808229333993, "grad_norm": 1.0802706273753335, "learning_rate": 1.7625905223062858e-05, "loss": 0.3483964204788208, "step": 1065 }, { "epoch": 0.5268750772272334, "grad_norm": 1.1651963376515642, "learning_rate": 1.762061727319176e-05, "loss": 0.3622454106807709, "step": 1066 }, { "epoch": 0.5273693315210676, "grad_norm": 1.0440643033385941, "learning_rate": 1.761532423590545e-05, "loss": 0.35156917572021484, "step": 1067 }, { "epoch": 0.5278635858149018, "grad_norm": 1.1589394381083906, "learning_rate": 1.7610026114737498e-05, "loss": 0.3413820266723633, "step": 1068 }, { "epoch": 0.528357840108736, "grad_norm": 1.1280561588615983, "learning_rate": 1.760472291322484e-05, "loss": 0.3707934021949768, "step": 1069 }, { "epoch": 0.5288520944025701, "grad_norm": 1.2170503232061094, "learning_rate": 1.7599414634907828e-05, "loss": 0.3472951054573059, "step": 1070 }, { "epoch": 0.5293463486964043, "grad_norm": 1.1676650140216285, "learning_rate": 1.7594101283330184e-05, "loss": 0.393882155418396, "step": 1071 }, { "epoch": 0.5298406029902385, "grad_norm": 0.9683606994511744, "learning_rate": 1.758878286203903e-05, "loss": 0.3094913065433502, "step": 1072 }, { "epoch": 0.5303348572840727, "grad_norm": 1.09347684867524, "learning_rate": 1.758345937458487e-05, "loss": 0.33904048800468445, "step": 1073 }, { "epoch": 0.5308291115779068, "grad_norm": 1.0218184375103434, "learning_rate": 1.7578130824521585e-05, "loss": 0.3218901753425598, "step": 1074 }, { "epoch": 0.531323365871741, "grad_norm": 0.95615697696865, "learning_rate": 1.7572797215406442e-05, "loss": 0.31584852933883667, "step": 1075 }, { "epoch": 0.5318176201655752, "grad_norm": 0.9682503945021611, "learning_rate": 1.756745855080008e-05, "loss": 0.3449877202510834, "step": 1076 }, { "epoch": 0.5323118744594094, "grad_norm": 1.084607183777355, "learning_rate": 1.756211483426651e-05, "loss": 0.3544886112213135, "step": 1077 }, { "epoch": 0.5328061287532435, "grad_norm": 1.1680618553038933, "learning_rate": 1.755676606937313e-05, "loss": 0.34360697865486145, "step": 1078 }, { "epoch": 0.5333003830470777, "grad_norm": 1.0514045755368502, "learning_rate": 1.7551412259690695e-05, "loss": 0.3214710056781769, "step": 1079 }, { "epoch": 0.5337946373409119, "grad_norm": 0.9951048830690797, "learning_rate": 1.754605340879333e-05, "loss": 0.33841896057128906, "step": 1080 }, { "epoch": 0.534288891634746, "grad_norm": 1.0536673015942455, "learning_rate": 1.7540689520258532e-05, "loss": 0.3134745657444, "step": 1081 }, { "epoch": 0.5347831459285802, "grad_norm": 1.1773503335041235, "learning_rate": 1.753532059766715e-05, "loss": 0.3469204306602478, "step": 1082 }, { "epoch": 0.5352774002224144, "grad_norm": 1.3802140663046265, "learning_rate": 1.752994664460341e-05, "loss": 0.39217621088027954, "step": 1083 }, { "epoch": 0.5357716545162486, "grad_norm": 1.148906185686213, "learning_rate": 1.7524567664654873e-05, "loss": 0.34482622146606445, "step": 1084 }, { "epoch": 0.5362659088100827, "grad_norm": 1.0089175831530743, "learning_rate": 1.751918366141248e-05, "loss": 0.308369517326355, "step": 1085 }, { "epoch": 0.5367601631039169, "grad_norm": 1.1441511379564429, "learning_rate": 1.751379463847051e-05, "loss": 0.3396676480770111, "step": 1086 }, { "epoch": 0.5372544173977511, "grad_norm": 1.0963418237920814, "learning_rate": 1.7508400599426596e-05, "loss": 0.3059370517730713, "step": 1087 }, { "epoch": 0.5377486716915854, "grad_norm": 0.993693807257297, "learning_rate": 1.7503001547881728e-05, "loss": 0.31689077615737915, "step": 1088 }, { "epoch": 0.5382429259854195, "grad_norm": 1.2996366258679217, "learning_rate": 1.749759748744023e-05, "loss": 0.37134337425231934, "step": 1089 }, { "epoch": 0.5387371802792537, "grad_norm": 1.0586799377490923, "learning_rate": 1.7492188421709775e-05, "loss": 0.30404967069625854, "step": 1090 }, { "epoch": 0.5392314345730879, "grad_norm": 1.1213884593031693, "learning_rate": 1.7486774354301382e-05, "loss": 0.34773269295692444, "step": 1091 }, { "epoch": 0.5397256888669221, "grad_norm": 1.135256212480744, "learning_rate": 1.7481355288829404e-05, "loss": 0.34448760747909546, "step": 1092 }, { "epoch": 0.5402199431607562, "grad_norm": 1.1111138178806874, "learning_rate": 1.7475931228911526e-05, "loss": 0.33557915687561035, "step": 1093 }, { "epoch": 0.5407141974545904, "grad_norm": 1.1277612406863344, "learning_rate": 1.7470502178168783e-05, "loss": 0.3216322362422943, "step": 1094 }, { "epoch": 0.5412084517484246, "grad_norm": 1.1416777218141756, "learning_rate": 1.7465068140225524e-05, "loss": 0.3175346255302429, "step": 1095 }, { "epoch": 0.5417027060422588, "grad_norm": 1.0466005920407673, "learning_rate": 1.7459629118709435e-05, "loss": 0.3150678277015686, "step": 1096 }, { "epoch": 0.5421969603360929, "grad_norm": 1.1080261557130098, "learning_rate": 1.7454185117251534e-05, "loss": 0.3372325897216797, "step": 1097 }, { "epoch": 0.5426912146299271, "grad_norm": 1.1607395393986693, "learning_rate": 1.7448736139486156e-05, "loss": 0.3460095524787903, "step": 1098 }, { "epoch": 0.5431854689237613, "grad_norm": 1.0960477562857334, "learning_rate": 1.7443282189050964e-05, "loss": 0.3465900421142578, "step": 1099 }, { "epoch": 0.5436797232175955, "grad_norm": 1.1271957826518202, "learning_rate": 1.7437823269586925e-05, "loss": 0.3707941174507141, "step": 1100 }, { "epoch": 0.5441739775114296, "grad_norm": 1.0732325510644303, "learning_rate": 1.7432359384738354e-05, "loss": 0.3317713141441345, "step": 1101 }, { "epoch": 0.5446682318052638, "grad_norm": 1.10075448775578, "learning_rate": 1.742689053815285e-05, "loss": 0.3391956090927124, "step": 1102 }, { "epoch": 0.545162486099098, "grad_norm": 1.483156522178114, "learning_rate": 1.742141673348134e-05, "loss": 0.3838513195514679, "step": 1103 }, { "epoch": 0.5456567403929322, "grad_norm": 1.2368776155357775, "learning_rate": 1.7415937974378057e-05, "loss": 0.4438849687576294, "step": 1104 }, { "epoch": 0.5461509946867663, "grad_norm": 1.1360365035496875, "learning_rate": 1.7410454264500542e-05, "loss": 0.35329896211624146, "step": 1105 }, { "epoch": 0.5466452489806005, "grad_norm": 0.9946710480219276, "learning_rate": 1.7404965607509646e-05, "loss": 0.3124481439590454, "step": 1106 }, { "epoch": 0.5471395032744347, "grad_norm": 1.1827285369169889, "learning_rate": 1.739947200706951e-05, "loss": 0.3595995008945465, "step": 1107 }, { "epoch": 0.5476337575682688, "grad_norm": 1.0771205850736374, "learning_rate": 1.7393973466847592e-05, "loss": 0.35914891958236694, "step": 1108 }, { "epoch": 0.548128011862103, "grad_norm": 1.0372075645038734, "learning_rate": 1.7388469990514636e-05, "loss": 0.34034737944602966, "step": 1109 }, { "epoch": 0.5486222661559372, "grad_norm": 0.9639792162761298, "learning_rate": 1.7382961581744677e-05, "loss": 0.3033643066883087, "step": 1110 }, { "epoch": 0.5491165204497714, "grad_norm": 1.0333536833038373, "learning_rate": 1.737744824421506e-05, "loss": 0.3239862322807312, "step": 1111 }, { "epoch": 0.5496107747436055, "grad_norm": 1.0992782883377998, "learning_rate": 1.7371929981606403e-05, "loss": 0.36473411321640015, "step": 1112 }, { "epoch": 0.5501050290374397, "grad_norm": 0.9808971248907185, "learning_rate": 1.7366406797602625e-05, "loss": 0.3129761517047882, "step": 1113 }, { "epoch": 0.550599283331274, "grad_norm": 1.0031500416462213, "learning_rate": 1.736087869589092e-05, "loss": 0.30224812030792236, "step": 1114 }, { "epoch": 0.5510935376251082, "grad_norm": 1.0008522519559948, "learning_rate": 1.7355345680161774e-05, "loss": 0.30045247077941895, "step": 1115 }, { "epoch": 0.5515877919189422, "grad_norm": 1.1079372723945795, "learning_rate": 1.7349807754108944e-05, "loss": 0.3356926739215851, "step": 1116 }, { "epoch": 0.5520820462127765, "grad_norm": 1.3704982317685879, "learning_rate": 1.7344264921429475e-05, "loss": 0.37749868631362915, "step": 1117 }, { "epoch": 0.5525763005066107, "grad_norm": 1.0400914273370205, "learning_rate": 1.733871718582368e-05, "loss": 0.331012099981308, "step": 1118 }, { "epoch": 0.5530705548004449, "grad_norm": 1.2654046748606915, "learning_rate": 1.7333164550995153e-05, "loss": 0.3557187020778656, "step": 1119 }, { "epoch": 0.553564809094279, "grad_norm": 1.151377810019934, "learning_rate": 1.7327607020650744e-05, "loss": 0.34102991223335266, "step": 1120 }, { "epoch": 0.5540590633881132, "grad_norm": 1.0397881413898085, "learning_rate": 1.7322044598500594e-05, "loss": 0.328019917011261, "step": 1121 }, { "epoch": 0.5545533176819474, "grad_norm": 1.0773058589187376, "learning_rate": 1.7316477288258085e-05, "loss": 0.33980751037597656, "step": 1122 }, { "epoch": 0.5550475719757816, "grad_norm": 1.1823119583137516, "learning_rate": 1.731090509363988e-05, "loss": 0.3460109233856201, "step": 1123 }, { "epoch": 0.5555418262696157, "grad_norm": 1.0727245460190564, "learning_rate": 1.730532801836589e-05, "loss": 0.3013002276420593, "step": 1124 }, { "epoch": 0.5560360805634499, "grad_norm": 1.191952525403325, "learning_rate": 1.72997460661593e-05, "loss": 0.36195772886276245, "step": 1125 }, { "epoch": 0.5565303348572841, "grad_norm": 1.1481571926267522, "learning_rate": 1.7294159240746532e-05, "loss": 0.3368675112724304, "step": 1126 }, { "epoch": 0.5570245891511183, "grad_norm": 1.0950064938478345, "learning_rate": 1.7288567545857283e-05, "loss": 0.36618539690971375, "step": 1127 }, { "epoch": 0.5575188434449524, "grad_norm": 1.0773610015009678, "learning_rate": 1.7282970985224477e-05, "loss": 0.3230215311050415, "step": 1128 }, { "epoch": 0.5580130977387866, "grad_norm": 1.1539889538468413, "learning_rate": 1.72773695625843e-05, "loss": 0.38779711723327637, "step": 1129 }, { "epoch": 0.5585073520326208, "grad_norm": 1.0853438524765577, "learning_rate": 1.7271763281676187e-05, "loss": 0.33910998702049255, "step": 1130 }, { "epoch": 0.559001606326455, "grad_norm": 1.1265909455665821, "learning_rate": 1.726615214624281e-05, "loss": 0.3526651859283447, "step": 1131 }, { "epoch": 0.5594958606202891, "grad_norm": 1.0899084132349224, "learning_rate": 1.7260536160030077e-05, "loss": 0.33794116973876953, "step": 1132 }, { "epoch": 0.5599901149141233, "grad_norm": 1.2383181058563666, "learning_rate": 1.7254915326787145e-05, "loss": 0.3294123411178589, "step": 1133 }, { "epoch": 0.5604843692079575, "grad_norm": 1.0381296685245769, "learning_rate": 1.7249289650266402e-05, "loss": 0.31193166971206665, "step": 1134 }, { "epoch": 0.5609786235017916, "grad_norm": 1.0273514183990056, "learning_rate": 1.7243659134223467e-05, "loss": 0.298290491104126, "step": 1135 }, { "epoch": 0.5614728777956258, "grad_norm": 1.0372406743131939, "learning_rate": 1.7238023782417194e-05, "loss": 0.3157176971435547, "step": 1136 }, { "epoch": 0.56196713208946, "grad_norm": 0.9703670449018593, "learning_rate": 1.7232383598609664e-05, "loss": 0.3152535855770111, "step": 1137 }, { "epoch": 0.5624613863832942, "grad_norm": 1.1457741905911056, "learning_rate": 1.722673858656618e-05, "loss": 0.35004952549934387, "step": 1138 }, { "epoch": 0.5629556406771283, "grad_norm": 1.2128755723830003, "learning_rate": 1.722108875005527e-05, "loss": 0.3531174957752228, "step": 1139 }, { "epoch": 0.5634498949709625, "grad_norm": 0.9896343114056704, "learning_rate": 1.7215434092848693e-05, "loss": 0.32532358169555664, "step": 1140 }, { "epoch": 0.5639441492647967, "grad_norm": 1.086973420033045, "learning_rate": 1.7209774618721408e-05, "loss": 0.3252495229244232, "step": 1141 }, { "epoch": 0.564438403558631, "grad_norm": 1.1232225314649664, "learning_rate": 1.7204110331451603e-05, "loss": 0.35428208112716675, "step": 1142 }, { "epoch": 0.564932657852465, "grad_norm": 1.165276028587328, "learning_rate": 1.7198441234820674e-05, "loss": 0.37419646978378296, "step": 1143 }, { "epoch": 0.5654269121462993, "grad_norm": 1.1206339776354848, "learning_rate": 1.7192767332613235e-05, "loss": 0.3342249095439911, "step": 1144 }, { "epoch": 0.5659211664401335, "grad_norm": 1.0700889667237288, "learning_rate": 1.7187088628617093e-05, "loss": 0.36827898025512695, "step": 1145 }, { "epoch": 0.5664154207339677, "grad_norm": 1.1884715403984119, "learning_rate": 1.7181405126623275e-05, "loss": 0.3560858964920044, "step": 1146 }, { "epoch": 0.5669096750278018, "grad_norm": 1.0578073497156413, "learning_rate": 1.7175716830426005e-05, "loss": 0.35333797335624695, "step": 1147 }, { "epoch": 0.567403929321636, "grad_norm": 1.0504095801617317, "learning_rate": 1.71700237438227e-05, "loss": 0.31053799390792847, "step": 1148 }, { "epoch": 0.5678981836154702, "grad_norm": 1.1443484208273471, "learning_rate": 1.7164325870613998e-05, "loss": 0.37123826146125793, "step": 1149 }, { "epoch": 0.5683924379093044, "grad_norm": 1.069054169156011, "learning_rate": 1.715862321460371e-05, "loss": 0.33981990814208984, "step": 1150 }, { "epoch": 0.5688866922031385, "grad_norm": 1.1295222791710222, "learning_rate": 1.7152915779598846e-05, "loss": 0.34938257932662964, "step": 1151 }, { "epoch": 0.5693809464969727, "grad_norm": 1.10704413276648, "learning_rate": 1.714720356940961e-05, "loss": 0.3069387376308441, "step": 1152 }, { "epoch": 0.5698752007908069, "grad_norm": 1.1206304490989205, "learning_rate": 1.7141486587849397e-05, "loss": 0.34879156947135925, "step": 1153 }, { "epoch": 0.5703694550846411, "grad_norm": 1.140159647567344, "learning_rate": 1.7135764838734773e-05, "loss": 0.3624545931816101, "step": 1154 }, { "epoch": 0.5708637093784752, "grad_norm": 1.0671159168894162, "learning_rate": 1.7130038325885502e-05, "loss": 0.3548320531845093, "step": 1155 }, { "epoch": 0.5713579636723094, "grad_norm": 1.0469806768045702, "learning_rate": 1.7124307053124518e-05, "loss": 0.3004404902458191, "step": 1156 }, { "epoch": 0.5718522179661436, "grad_norm": 1.1058227077648823, "learning_rate": 1.7118571024277943e-05, "loss": 0.31545472145080566, "step": 1157 }, { "epoch": 0.5723464722599778, "grad_norm": 1.100412587450837, "learning_rate": 1.711283024317506e-05, "loss": 0.3116477429866791, "step": 1158 }, { "epoch": 0.5728407265538119, "grad_norm": 1.1169526030822408, "learning_rate": 1.710708471364834e-05, "loss": 0.3472268581390381, "step": 1159 }, { "epoch": 0.5733349808476461, "grad_norm": 1.1641407854241053, "learning_rate": 1.7101334439533414e-05, "loss": 0.33334046602249146, "step": 1160 }, { "epoch": 0.5738292351414803, "grad_norm": 1.1720238639752558, "learning_rate": 1.7095579424669074e-05, "loss": 0.3462664783000946, "step": 1161 }, { "epoch": 0.5743234894353144, "grad_norm": 1.0854325044336006, "learning_rate": 1.7089819672897304e-05, "loss": 0.3241977393627167, "step": 1162 }, { "epoch": 0.5748177437291486, "grad_norm": 1.2501733360326688, "learning_rate": 1.7084055188063217e-05, "loss": 0.3194134533405304, "step": 1163 }, { "epoch": 0.5753119980229828, "grad_norm": 1.1336053472715226, "learning_rate": 1.7078285974015103e-05, "loss": 0.3644179701805115, "step": 1164 }, { "epoch": 0.575806252316817, "grad_norm": 1.1434067682408584, "learning_rate": 1.7072512034604412e-05, "loss": 0.36653730273246765, "step": 1165 }, { "epoch": 0.5763005066106511, "grad_norm": 1.1221051792069954, "learning_rate": 1.706673337368574e-05, "loss": 0.3435714840888977, "step": 1166 }, { "epoch": 0.5767947609044853, "grad_norm": 1.0603782757024258, "learning_rate": 1.706094999511684e-05, "loss": 0.36935871839523315, "step": 1167 }, { "epoch": 0.5772890151983195, "grad_norm": 0.9845968090919184, "learning_rate": 1.7055161902758607e-05, "loss": 0.29493796825408936, "step": 1168 }, { "epoch": 0.5777832694921538, "grad_norm": 1.0115254154804856, "learning_rate": 1.70493691004751e-05, "loss": 0.32378828525543213, "step": 1169 }, { "epoch": 0.5782775237859878, "grad_norm": 1.1123861652198228, "learning_rate": 1.70435715921335e-05, "loss": 0.3587600588798523, "step": 1170 }, { "epoch": 0.578771778079822, "grad_norm": 1.1091481408248292, "learning_rate": 1.703776938160415e-05, "loss": 0.31885826587677, "step": 1171 }, { "epoch": 0.5792660323736563, "grad_norm": 1.0414979222224348, "learning_rate": 1.7031962472760514e-05, "loss": 0.2950041890144348, "step": 1172 }, { "epoch": 0.5797602866674905, "grad_norm": 1.121100234384589, "learning_rate": 1.7026150869479208e-05, "loss": 0.36190298199653625, "step": 1173 }, { "epoch": 0.5802545409613246, "grad_norm": 1.067632760047313, "learning_rate": 1.7020334575639972e-05, "loss": 0.3402514159679413, "step": 1174 }, { "epoch": 0.5807487952551588, "grad_norm": 0.9679286148168113, "learning_rate": 1.7014513595125684e-05, "loss": 0.3131282925605774, "step": 1175 }, { "epoch": 0.581243049548993, "grad_norm": 1.056786860676952, "learning_rate": 1.7008687931822344e-05, "loss": 0.29499226808547974, "step": 1176 }, { "epoch": 0.5817373038428272, "grad_norm": 1.0712930292635054, "learning_rate": 1.700285758961908e-05, "loss": 0.36821871995925903, "step": 1177 }, { "epoch": 0.5822315581366613, "grad_norm": 1.2780126948070993, "learning_rate": 1.6997022572408152e-05, "loss": 0.31486836075782776, "step": 1178 }, { "epoch": 0.5827258124304955, "grad_norm": 1.0778384840117066, "learning_rate": 1.6991182884084928e-05, "loss": 0.3176078498363495, "step": 1179 }, { "epoch": 0.5832200667243297, "grad_norm": 1.294300282858588, "learning_rate": 1.69853385285479e-05, "loss": 0.4130980968475342, "step": 1180 }, { "epoch": 0.5837143210181639, "grad_norm": 1.103648457674251, "learning_rate": 1.697948950969868e-05, "loss": 0.3164641857147217, "step": 1181 }, { "epoch": 0.584208575311998, "grad_norm": 1.1707357674613739, "learning_rate": 1.697363583144199e-05, "loss": 0.36420726776123047, "step": 1182 }, { "epoch": 0.5847028296058322, "grad_norm": 1.1827091905189109, "learning_rate": 1.696777749768566e-05, "loss": 0.3279833197593689, "step": 1183 }, { "epoch": 0.5851970838996664, "grad_norm": 1.2462082843052198, "learning_rate": 1.696191451234063e-05, "loss": 0.311473548412323, "step": 1184 }, { "epoch": 0.5856913381935006, "grad_norm": 1.0514702517271486, "learning_rate": 1.6956046879320943e-05, "loss": 0.32284629344940186, "step": 1185 }, { "epoch": 0.5861855924873347, "grad_norm": 1.081683685343838, "learning_rate": 1.6950174602543753e-05, "loss": 0.3318635821342468, "step": 1186 }, { "epoch": 0.5866798467811689, "grad_norm": 1.10655975155716, "learning_rate": 1.6944297685929298e-05, "loss": 0.3268307149410248, "step": 1187 }, { "epoch": 0.5871741010750031, "grad_norm": 1.1757413336808826, "learning_rate": 1.6938416133400934e-05, "loss": 0.31885889172554016, "step": 1188 }, { "epoch": 0.5876683553688372, "grad_norm": 1.044019985672413, "learning_rate": 1.69325299488851e-05, "loss": 0.29273971915245056, "step": 1189 }, { "epoch": 0.5881626096626714, "grad_norm": 1.2128861059808687, "learning_rate": 1.692663913631132e-05, "loss": 0.3585188388824463, "step": 1190 }, { "epoch": 0.5886568639565056, "grad_norm": 1.152183266519285, "learning_rate": 1.6920743699612226e-05, "loss": 0.37145692110061646, "step": 1191 }, { "epoch": 0.5891511182503398, "grad_norm": 1.1211663085079848, "learning_rate": 1.691484364272352e-05, "loss": 0.34805262088775635, "step": 1192 }, { "epoch": 0.5896453725441739, "grad_norm": 1.1094913177494823, "learning_rate": 1.6908938969584002e-05, "loss": 0.3540152907371521, "step": 1193 }, { "epoch": 0.5901396268380081, "grad_norm": 1.1138288622940957, "learning_rate": 1.6903029684135545e-05, "loss": 0.35808512568473816, "step": 1194 }, { "epoch": 0.5906338811318423, "grad_norm": 1.2028693910668573, "learning_rate": 1.68971157903231e-05, "loss": 0.2881169021129608, "step": 1195 }, { "epoch": 0.5911281354256765, "grad_norm": 1.126509020875868, "learning_rate": 1.6891197292094704e-05, "loss": 0.33551955223083496, "step": 1196 }, { "epoch": 0.5916223897195106, "grad_norm": 1.0141998416691063, "learning_rate": 1.688527419340146e-05, "loss": 0.30721622705459595, "step": 1197 }, { "epoch": 0.5921166440133449, "grad_norm": 1.0876501850612135, "learning_rate": 1.687934649819754e-05, "loss": 0.3296341300010681, "step": 1198 }, { "epoch": 0.5926108983071791, "grad_norm": 1.1194456964334092, "learning_rate": 1.6873414210440194e-05, "loss": 0.3511606454849243, "step": 1199 }, { "epoch": 0.5931051526010133, "grad_norm": 1.0762712673108126, "learning_rate": 1.6867477334089728e-05, "loss": 0.34293919801712036, "step": 1200 }, { "epoch": 0.5935994068948474, "grad_norm": 0.9942852659141888, "learning_rate": 1.686153587310952e-05, "loss": 0.3334580659866333, "step": 1201 }, { "epoch": 0.5940936611886816, "grad_norm": 1.1354238373080972, "learning_rate": 1.6855589831466e-05, "loss": 0.3542851209640503, "step": 1202 }, { "epoch": 0.5945879154825158, "grad_norm": 1.0952906678959344, "learning_rate": 1.6849639213128667e-05, "loss": 0.30951520800590515, "step": 1203 }, { "epoch": 0.59508216977635, "grad_norm": 1.0716710567299268, "learning_rate": 1.6843684022070062e-05, "loss": 0.333478718996048, "step": 1204 }, { "epoch": 0.5955764240701841, "grad_norm": 1.0944556204789582, "learning_rate": 1.683772426226579e-05, "loss": 0.33562588691711426, "step": 1205 }, { "epoch": 0.5960706783640183, "grad_norm": 0.9136596878493712, "learning_rate": 1.6831759937694497e-05, "loss": 0.2626678943634033, "step": 1206 }, { "epoch": 0.5965649326578525, "grad_norm": 1.1138721974001247, "learning_rate": 1.6825791052337884e-05, "loss": 0.349543035030365, "step": 1207 }, { "epoch": 0.5970591869516867, "grad_norm": 1.0760285856821303, "learning_rate": 1.6819817610180696e-05, "loss": 0.3229057788848877, "step": 1208 }, { "epoch": 0.5975534412455208, "grad_norm": 1.0511960959262137, "learning_rate": 1.681383961521071e-05, "loss": 0.32023823261260986, "step": 1209 }, { "epoch": 0.598047695539355, "grad_norm": 1.0122201188951288, "learning_rate": 1.680785707141876e-05, "loss": 0.31556791067123413, "step": 1210 }, { "epoch": 0.5985419498331892, "grad_norm": 1.1858949236151264, "learning_rate": 1.68018699827987e-05, "loss": 0.33287158608436584, "step": 1211 }, { "epoch": 0.5990362041270234, "grad_norm": 1.0276520854994282, "learning_rate": 1.6795878353347427e-05, "loss": 0.28690433502197266, "step": 1212 }, { "epoch": 0.5995304584208575, "grad_norm": 1.1202382723881081, "learning_rate": 1.6789882187064862e-05, "loss": 0.3501484990119934, "step": 1213 }, { "epoch": 0.6000247127146917, "grad_norm": 1.15016872261832, "learning_rate": 1.678388148795397e-05, "loss": 0.3645259439945221, "step": 1214 }, { "epoch": 0.6005189670085259, "grad_norm": 1.0232559071014062, "learning_rate": 1.6777876260020726e-05, "loss": 0.3270183801651001, "step": 1215 }, { "epoch": 0.60101322130236, "grad_norm": 1.0680433488207848, "learning_rate": 1.6771866507274132e-05, "loss": 0.31767967343330383, "step": 1216 }, { "epoch": 0.6015074755961942, "grad_norm": 1.0642272352631703, "learning_rate": 1.6765852233726216e-05, "loss": 0.3170120120048523, "step": 1217 }, { "epoch": 0.6020017298900284, "grad_norm": 1.0689193394735252, "learning_rate": 1.6759833443392022e-05, "loss": 0.3270176351070404, "step": 1218 }, { "epoch": 0.6024959841838626, "grad_norm": 1.0053062396233938, "learning_rate": 1.6753810140289608e-05, "loss": 0.3229079246520996, "step": 1219 }, { "epoch": 0.6029902384776967, "grad_norm": 1.060220470914707, "learning_rate": 1.6747782328440044e-05, "loss": 0.3366449773311615, "step": 1220 }, { "epoch": 0.6034844927715309, "grad_norm": 1.2656940979343048, "learning_rate": 1.674175001186741e-05, "loss": 0.4027010500431061, "step": 1221 }, { "epoch": 0.6039787470653651, "grad_norm": 1.039989374871811, "learning_rate": 1.6735713194598798e-05, "loss": 0.31566083431243896, "step": 1222 }, { "epoch": 0.6044730013591993, "grad_norm": 1.1667815915058346, "learning_rate": 1.67296718806643e-05, "loss": 0.3361780047416687, "step": 1223 }, { "epoch": 0.6049672556530334, "grad_norm": 1.0628494144880791, "learning_rate": 1.6723626074097007e-05, "loss": 0.3197939693927765, "step": 1224 }, { "epoch": 0.6054615099468676, "grad_norm": 1.078571350485402, "learning_rate": 1.671757577893302e-05, "loss": 0.32977360486984253, "step": 1225 }, { "epoch": 0.6059557642407019, "grad_norm": 1.1192119082687915, "learning_rate": 1.671152099921142e-05, "loss": 0.3434401750564575, "step": 1226 }, { "epoch": 0.6064500185345361, "grad_norm": 1.0664877094913836, "learning_rate": 1.67054617389743e-05, "loss": 0.33856305480003357, "step": 1227 }, { "epoch": 0.6069442728283702, "grad_norm": 1.147959053573069, "learning_rate": 1.669939800226673e-05, "loss": 0.31594911217689514, "step": 1228 }, { "epoch": 0.6074385271222044, "grad_norm": 1.105417739927691, "learning_rate": 1.669332979313678e-05, "loss": 0.32347679138183594, "step": 1229 }, { "epoch": 0.6079327814160386, "grad_norm": 1.1057400329817928, "learning_rate": 1.6687257115635492e-05, "loss": 0.32733607292175293, "step": 1230 }, { "epoch": 0.6084270357098728, "grad_norm": 0.9869005136013326, "learning_rate": 1.6681179973816908e-05, "loss": 0.306827187538147, "step": 1231 }, { "epoch": 0.6089212900037069, "grad_norm": 1.068802395839477, "learning_rate": 1.667509837173803e-05, "loss": 0.3515884280204773, "step": 1232 }, { "epoch": 0.6094155442975411, "grad_norm": 1.0062662165973097, "learning_rate": 1.6669012313458862e-05, "loss": 0.28699082136154175, "step": 1233 }, { "epoch": 0.6099097985913753, "grad_norm": 1.0697164166178312, "learning_rate": 1.6662921803042356e-05, "loss": 0.30737537145614624, "step": 1234 }, { "epoch": 0.6104040528852095, "grad_norm": 1.0782793991023802, "learning_rate": 1.665682684455446e-05, "loss": 0.3193345069885254, "step": 1235 }, { "epoch": 0.6108983071790436, "grad_norm": 1.1629258901733988, "learning_rate": 1.6650727442064073e-05, "loss": 0.3326336741447449, "step": 1236 }, { "epoch": 0.6113925614728778, "grad_norm": 1.0950813589125916, "learning_rate": 1.6644623599643076e-05, "loss": 0.2967267632484436, "step": 1237 }, { "epoch": 0.611886815766712, "grad_norm": 1.104366364956542, "learning_rate": 1.66385153213663e-05, "loss": 0.3163914084434509, "step": 1238 }, { "epoch": 0.6123810700605461, "grad_norm": 1.1913476484695409, "learning_rate": 1.663240261131155e-05, "loss": 0.40281808376312256, "step": 1239 }, { "epoch": 0.6128753243543803, "grad_norm": 1.1744917859448287, "learning_rate": 1.6626285473559586e-05, "loss": 0.33946287631988525, "step": 1240 }, { "epoch": 0.6133695786482145, "grad_norm": 1.121011060895708, "learning_rate": 1.6620163912194114e-05, "loss": 0.3750913143157959, "step": 1241 }, { "epoch": 0.6138638329420487, "grad_norm": 1.1601773319994575, "learning_rate": 1.6614037931301804e-05, "loss": 0.32449400424957275, "step": 1242 }, { "epoch": 0.6143580872358828, "grad_norm": 1.146035054497973, "learning_rate": 1.6607907534972277e-05, "loss": 0.3484799861907959, "step": 1243 }, { "epoch": 0.614852341529717, "grad_norm": 1.0478699674323781, "learning_rate": 1.6601772727298095e-05, "loss": 0.2991127669811249, "step": 1244 }, { "epoch": 0.6153465958235512, "grad_norm": 1.0941316253076903, "learning_rate": 1.6595633512374768e-05, "loss": 0.339094340801239, "step": 1245 }, { "epoch": 0.6158408501173854, "grad_norm": 1.0756027047064132, "learning_rate": 1.6589489894300744e-05, "loss": 0.3147842288017273, "step": 1246 }, { "epoch": 0.6163351044112195, "grad_norm": 1.0944450465347566, "learning_rate": 1.6583341877177427e-05, "loss": 0.3036183714866638, "step": 1247 }, { "epoch": 0.6168293587050537, "grad_norm": 1.0983853525092009, "learning_rate": 1.657718946510913e-05, "loss": 0.32657095789909363, "step": 1248 }, { "epoch": 0.6173236129988879, "grad_norm": 1.0660730573251251, "learning_rate": 1.6571032662203126e-05, "loss": 0.3104664385318756, "step": 1249 }, { "epoch": 0.6178178672927221, "grad_norm": 1.0675015064613533, "learning_rate": 1.6564871472569604e-05, "loss": 0.30392807722091675, "step": 1250 }, { "epoch": 0.6183121215865562, "grad_norm": 1.080894190005694, "learning_rate": 1.655870590032169e-05, "loss": 0.3087356388568878, "step": 1251 }, { "epoch": 0.6188063758803904, "grad_norm": 1.0633256442775108, "learning_rate": 1.6552535949575427e-05, "loss": 0.3220480978488922, "step": 1252 }, { "epoch": 0.6193006301742247, "grad_norm": 1.0867949301055795, "learning_rate": 1.654636162444979e-05, "loss": 0.33925485610961914, "step": 1253 }, { "epoch": 0.6197948844680589, "grad_norm": 1.0651223448844926, "learning_rate": 1.6540182929066667e-05, "loss": 0.3704617917537689, "step": 1254 }, { "epoch": 0.620289138761893, "grad_norm": 1.1158405395395257, "learning_rate": 1.653399986755087e-05, "loss": 0.33745670318603516, "step": 1255 }, { "epoch": 0.6207833930557272, "grad_norm": 1.1397943957058634, "learning_rate": 1.6527812444030118e-05, "loss": 0.31651467084884644, "step": 1256 }, { "epoch": 0.6212776473495614, "grad_norm": 1.141112365152985, "learning_rate": 1.6521620662635053e-05, "loss": 0.360455185174942, "step": 1257 }, { "epoch": 0.6217719016433956, "grad_norm": 1.0000307812773819, "learning_rate": 1.6515424527499214e-05, "loss": 0.32819390296936035, "step": 1258 }, { "epoch": 0.6222661559372297, "grad_norm": 1.229539015248975, "learning_rate": 1.6509224042759053e-05, "loss": 0.38759690523147583, "step": 1259 }, { "epoch": 0.6227604102310639, "grad_norm": 1.127403937815861, "learning_rate": 1.6503019212553932e-05, "loss": 0.34250545501708984, "step": 1260 }, { "epoch": 0.6232546645248981, "grad_norm": 1.0060644367410545, "learning_rate": 1.6496810041026097e-05, "loss": 0.3120163679122925, "step": 1261 }, { "epoch": 0.6237489188187323, "grad_norm": 1.1050188267024101, "learning_rate": 1.649059653232071e-05, "loss": 0.35985836386680603, "step": 1262 }, { "epoch": 0.6242431731125664, "grad_norm": 1.0877426950647728, "learning_rate": 1.648437869058581e-05, "loss": 0.3551288843154907, "step": 1263 }, { "epoch": 0.6247374274064006, "grad_norm": 1.095568415742879, "learning_rate": 1.6478156519972354e-05, "loss": 0.33047816157341003, "step": 1264 }, { "epoch": 0.6252316817002348, "grad_norm": 1.0643242802432207, "learning_rate": 1.6471930024634164e-05, "loss": 0.32909417152404785, "step": 1265 }, { "epoch": 0.6257259359940689, "grad_norm": 1.07195158812182, "learning_rate": 1.6465699208727964e-05, "loss": 0.3726924657821655, "step": 1266 }, { "epoch": 0.6262201902879031, "grad_norm": 1.1316893144153, "learning_rate": 1.6459464076413355e-05, "loss": 0.3569204807281494, "step": 1267 }, { "epoch": 0.6267144445817373, "grad_norm": 1.0125649890138406, "learning_rate": 1.6453224631852825e-05, "loss": 0.33798107504844666, "step": 1268 }, { "epoch": 0.6272086988755715, "grad_norm": 1.1537944647220344, "learning_rate": 1.644698087921173e-05, "loss": 0.32891637086868286, "step": 1269 }, { "epoch": 0.6277029531694056, "grad_norm": 1.1246833616649612, "learning_rate": 1.644073282265832e-05, "loss": 0.31512969732284546, "step": 1270 }, { "epoch": 0.6281972074632398, "grad_norm": 1.1199823464164773, "learning_rate": 1.643448046636371e-05, "loss": 0.350041925907135, "step": 1271 }, { "epoch": 0.628691461757074, "grad_norm": 1.0925989435954497, "learning_rate": 1.642822381450187e-05, "loss": 0.3248854875564575, "step": 1272 }, { "epoch": 0.6291857160509082, "grad_norm": 1.0344569444697491, "learning_rate": 1.6421962871249662e-05, "loss": 0.3031661808490753, "step": 1273 }, { "epoch": 0.6296799703447423, "grad_norm": 1.0843035546126185, "learning_rate": 1.6415697640786802e-05, "loss": 0.2903754711151123, "step": 1274 }, { "epoch": 0.6301742246385765, "grad_norm": 1.0122518499053432, "learning_rate": 1.6409428127295864e-05, "loss": 0.300454318523407, "step": 1275 }, { "epoch": 0.6306684789324107, "grad_norm": 1.0842968830814483, "learning_rate": 1.6403154334962286e-05, "loss": 0.3430244028568268, "step": 1276 }, { "epoch": 0.6311627332262449, "grad_norm": 1.1383634793407482, "learning_rate": 1.6396876267974367e-05, "loss": 0.3728436827659607, "step": 1277 }, { "epoch": 0.631656987520079, "grad_norm": 1.103371729978927, "learning_rate": 1.639059393052325e-05, "loss": 0.3021183907985687, "step": 1278 }, { "epoch": 0.6321512418139132, "grad_norm": 1.0649900935701406, "learning_rate": 1.6384307326802934e-05, "loss": 0.3313615918159485, "step": 1279 }, { "epoch": 0.6326454961077475, "grad_norm": 1.0519110395000262, "learning_rate": 1.637801646101027e-05, "loss": 0.32833239436149597, "step": 1280 }, { "epoch": 0.6331397504015817, "grad_norm": 1.1672616485147485, "learning_rate": 1.6371721337344947e-05, "loss": 0.3575769066810608, "step": 1281 }, { "epoch": 0.6336340046954158, "grad_norm": 1.044512245658177, "learning_rate": 1.6365421960009502e-05, "loss": 0.33323729038238525, "step": 1282 }, { "epoch": 0.63412825898925, "grad_norm": 1.150185694461945, "learning_rate": 1.6359118333209307e-05, "loss": 0.3522900938987732, "step": 1283 }, { "epoch": 0.6346225132830842, "grad_norm": 1.2143932108960407, "learning_rate": 1.635281046115257e-05, "loss": 0.3350796699523926, "step": 1284 }, { "epoch": 0.6351167675769184, "grad_norm": 1.2071815938700088, "learning_rate": 1.6346498348050342e-05, "loss": 0.350632905960083, "step": 1285 }, { "epoch": 0.6356110218707525, "grad_norm": 1.0108749382306044, "learning_rate": 1.6340181998116494e-05, "loss": 0.2961253523826599, "step": 1286 }, { "epoch": 0.6361052761645867, "grad_norm": 1.3686468141070485, "learning_rate": 1.6333861415567736e-05, "loss": 0.35736170411109924, "step": 1287 }, { "epoch": 0.6365995304584209, "grad_norm": 1.1749750672779442, "learning_rate": 1.63275366046236e-05, "loss": 0.35654571652412415, "step": 1288 }, { "epoch": 0.6370937847522551, "grad_norm": 1.0658003578898634, "learning_rate": 1.6321207569506435e-05, "loss": 0.30518224835395813, "step": 1289 }, { "epoch": 0.6375880390460892, "grad_norm": 1.1007851387105425, "learning_rate": 1.6314874314441413e-05, "loss": 0.35099470615386963, "step": 1290 }, { "epoch": 0.6380822933399234, "grad_norm": 1.0971286067217327, "learning_rate": 1.6308536843656528e-05, "loss": 0.3577536344528198, "step": 1291 }, { "epoch": 0.6385765476337576, "grad_norm": 1.0395121014513669, "learning_rate": 1.6302195161382586e-05, "loss": 0.3141167163848877, "step": 1292 }, { "epoch": 0.6390708019275917, "grad_norm": 0.981608659730199, "learning_rate": 1.62958492718532e-05, "loss": 0.2920055389404297, "step": 1293 }, { "epoch": 0.6395650562214259, "grad_norm": 1.0875768517352407, "learning_rate": 1.6289499179304797e-05, "loss": 0.32826486229896545, "step": 1294 }, { "epoch": 0.6400593105152601, "grad_norm": 1.0051851075633542, "learning_rate": 1.628314488797661e-05, "loss": 0.3080480992794037, "step": 1295 }, { "epoch": 0.6405535648090943, "grad_norm": 1.006537470660458, "learning_rate": 1.627678640211067e-05, "loss": 0.304529070854187, "step": 1296 }, { "epoch": 0.6410478191029284, "grad_norm": 1.1108978139615113, "learning_rate": 1.627042372595181e-05, "loss": 0.34653496742248535, "step": 1297 }, { "epoch": 0.6415420733967626, "grad_norm": 0.9745027779333038, "learning_rate": 1.6264056863747667e-05, "loss": 0.2938673496246338, "step": 1298 }, { "epoch": 0.6420363276905968, "grad_norm": 1.1585281714148792, "learning_rate": 1.625768581974866e-05, "loss": 0.32350343465805054, "step": 1299 }, { "epoch": 0.642530581984431, "grad_norm": 1.0756982630474194, "learning_rate": 1.6251310598208015e-05, "loss": 0.3175384998321533, "step": 1300 }, { "epoch": 0.6430248362782651, "grad_norm": 1.1335110071944674, "learning_rate": 1.6244931203381734e-05, "loss": 0.32667648792266846, "step": 1301 }, { "epoch": 0.6435190905720993, "grad_norm": 0.9986052180267636, "learning_rate": 1.623854763952861e-05, "loss": 0.30110976099967957, "step": 1302 }, { "epoch": 0.6440133448659335, "grad_norm": 1.2219754266907614, "learning_rate": 1.6232159910910224e-05, "loss": 0.3508617579936981, "step": 1303 }, { "epoch": 0.6445075991597677, "grad_norm": 1.1027211796126624, "learning_rate": 1.622576802179092e-05, "loss": 0.34416183829307556, "step": 1304 }, { "epoch": 0.6450018534536018, "grad_norm": 1.1267200023483468, "learning_rate": 1.6219371976437847e-05, "loss": 0.3509306311607361, "step": 1305 }, { "epoch": 0.645496107747436, "grad_norm": 1.1746524244290708, "learning_rate": 1.6212971779120904e-05, "loss": 0.36186683177948, "step": 1306 }, { "epoch": 0.6459903620412702, "grad_norm": 1.128374133277422, "learning_rate": 1.6206567434112776e-05, "loss": 0.3123924732208252, "step": 1307 }, { "epoch": 0.6464846163351045, "grad_norm": 1.2141772034453755, "learning_rate": 1.6200158945688907e-05, "loss": 0.3691411018371582, "step": 1308 }, { "epoch": 0.6469788706289386, "grad_norm": 1.1011618758034853, "learning_rate": 1.6193746318127516e-05, "loss": 0.3136986792087555, "step": 1309 }, { "epoch": 0.6474731249227728, "grad_norm": 1.0883839992045683, "learning_rate": 1.6187329555709585e-05, "loss": 0.30374211072921753, "step": 1310 }, { "epoch": 0.647967379216607, "grad_norm": 1.207837369942263, "learning_rate": 1.618090866271884e-05, "loss": 0.3633323907852173, "step": 1311 }, { "epoch": 0.6484616335104412, "grad_norm": 1.056749654034174, "learning_rate": 1.6174483643441795e-05, "loss": 0.31395208835601807, "step": 1312 }, { "epoch": 0.6489558878042753, "grad_norm": 1.0312943002596973, "learning_rate": 1.6168054502167687e-05, "loss": 0.29258471727371216, "step": 1313 }, { "epoch": 0.6494501420981095, "grad_norm": 1.052844702612926, "learning_rate": 1.6161621243188528e-05, "loss": 0.3086007833480835, "step": 1314 }, { "epoch": 0.6499443963919437, "grad_norm": 1.1099907156572013, "learning_rate": 1.6155183870799063e-05, "loss": 0.3604614734649658, "step": 1315 }, { "epoch": 0.6504386506857779, "grad_norm": 1.230657559418624, "learning_rate": 1.614874238929679e-05, "loss": 0.3784678876399994, "step": 1316 }, { "epoch": 0.650932904979612, "grad_norm": 0.9692609071600233, "learning_rate": 1.6142296802981957e-05, "loss": 0.29009610414505005, "step": 1317 }, { "epoch": 0.6514271592734462, "grad_norm": 1.1385261282180998, "learning_rate": 1.6135847116157542e-05, "loss": 0.3667104244232178, "step": 1318 }, { "epoch": 0.6519214135672804, "grad_norm": 1.0454111919656257, "learning_rate": 1.6129393333129262e-05, "loss": 0.3100985884666443, "step": 1319 }, { "epoch": 0.6524156678611145, "grad_norm": 1.0967001531345488, "learning_rate": 1.612293545820557e-05, "loss": 0.34128522872924805, "step": 1320 }, { "epoch": 0.6529099221549487, "grad_norm": 1.016572733864691, "learning_rate": 1.611647349569765e-05, "loss": 0.3017216920852661, "step": 1321 }, { "epoch": 0.6534041764487829, "grad_norm": 1.0979244854260226, "learning_rate": 1.611000744991942e-05, "loss": 0.35060590505599976, "step": 1322 }, { "epoch": 0.6538984307426171, "grad_norm": 1.180855026456707, "learning_rate": 1.610353732518752e-05, "loss": 0.3766549825668335, "step": 1323 }, { "epoch": 0.6543926850364512, "grad_norm": 0.9954937284294141, "learning_rate": 1.609706312582131e-05, "loss": 0.2970678210258484, "step": 1324 }, { "epoch": 0.6548869393302854, "grad_norm": 1.2407304893003468, "learning_rate": 1.609058485614287e-05, "loss": 0.3345789909362793, "step": 1325 }, { "epoch": 0.6553811936241196, "grad_norm": 1.159801774337048, "learning_rate": 1.608410252047701e-05, "loss": 0.34838157892227173, "step": 1326 }, { "epoch": 0.6558754479179538, "grad_norm": 1.052743453114199, "learning_rate": 1.6077616123151232e-05, "loss": 0.27454087138175964, "step": 1327 }, { "epoch": 0.6563697022117879, "grad_norm": 1.1304513457691607, "learning_rate": 1.607112566849577e-05, "loss": 0.3372647762298584, "step": 1328 }, { "epoch": 0.6568639565056221, "grad_norm": 1.1678098502989476, "learning_rate": 1.606463116084356e-05, "loss": 0.34433993697166443, "step": 1329 }, { "epoch": 0.6573582107994563, "grad_norm": 1.0760327464429003, "learning_rate": 1.6058132604530242e-05, "loss": 0.3267759382724762, "step": 1330 }, { "epoch": 0.6578524650932905, "grad_norm": 1.044029067228307, "learning_rate": 1.6051630003894155e-05, "loss": 0.3022347390651703, "step": 1331 }, { "epoch": 0.6583467193871246, "grad_norm": 1.0701124312590375, "learning_rate": 1.604512336327634e-05, "loss": 0.32478266954421997, "step": 1332 }, { "epoch": 0.6588409736809588, "grad_norm": 1.1194211733981758, "learning_rate": 1.6038612687020548e-05, "loss": 0.32039204239845276, "step": 1333 }, { "epoch": 0.659335227974793, "grad_norm": 1.189072572166891, "learning_rate": 1.6032097979473203e-05, "loss": 0.3376410901546478, "step": 1334 }, { "epoch": 0.6598294822686273, "grad_norm": 1.0209465387535948, "learning_rate": 1.6025579244983443e-05, "loss": 0.28432029485702515, "step": 1335 }, { "epoch": 0.6603237365624613, "grad_norm": 1.1101085579973957, "learning_rate": 1.6019056487903067e-05, "loss": 0.3349001109600067, "step": 1336 }, { "epoch": 0.6608179908562956, "grad_norm": 1.016991018325495, "learning_rate": 1.601252971258658e-05, "loss": 0.27995598316192627, "step": 1337 }, { "epoch": 0.6613122451501298, "grad_norm": 1.0652875110729838, "learning_rate": 1.6005998923391172e-05, "loss": 0.28326892852783203, "step": 1338 }, { "epoch": 0.661806499443964, "grad_norm": 1.1089400050162956, "learning_rate": 1.5999464124676697e-05, "loss": 0.3139200806617737, "step": 1339 }, { "epoch": 0.6623007537377981, "grad_norm": 1.0857703956199403, "learning_rate": 1.5992925320805688e-05, "loss": 0.32395505905151367, "step": 1340 }, { "epoch": 0.6627950080316323, "grad_norm": 1.187400707476865, "learning_rate": 1.598638251614337e-05, "loss": 0.35880255699157715, "step": 1341 }, { "epoch": 0.6632892623254665, "grad_norm": 1.1264632686384342, "learning_rate": 1.5979835715057616e-05, "loss": 0.3696775436401367, "step": 1342 }, { "epoch": 0.6637835166193007, "grad_norm": 1.2084738763641774, "learning_rate": 1.597328492191898e-05, "loss": 0.38413193821907043, "step": 1343 }, { "epoch": 0.6642777709131348, "grad_norm": 2.0572947223290017, "learning_rate": 1.596673014110068e-05, "loss": 0.3564830720424652, "step": 1344 }, { "epoch": 0.664772025206969, "grad_norm": 1.0170026931569898, "learning_rate": 1.5960171376978587e-05, "loss": 0.30634552240371704, "step": 1345 }, { "epoch": 0.6652662795008032, "grad_norm": 1.0375692111937291, "learning_rate": 1.595360863393125e-05, "loss": 0.27113068103790283, "step": 1346 }, { "epoch": 0.6657605337946373, "grad_norm": 1.242773829739391, "learning_rate": 1.594704191633985e-05, "loss": 0.34015512466430664, "step": 1347 }, { "epoch": 0.6662547880884715, "grad_norm": 0.9724222230737607, "learning_rate": 1.594047122858824e-05, "loss": 0.2509229779243469, "step": 1348 }, { "epoch": 0.6667490423823057, "grad_norm": 1.0705371704599513, "learning_rate": 1.5933896575062922e-05, "loss": 0.35122111439704895, "step": 1349 }, { "epoch": 0.6672432966761399, "grad_norm": 1.0469402955634624, "learning_rate": 1.592731796015303e-05, "loss": 0.3656314015388489, "step": 1350 }, { "epoch": 0.667737550969974, "grad_norm": 1.0980190562444532, "learning_rate": 1.5920735388250363e-05, "loss": 0.3482551574707031, "step": 1351 }, { "epoch": 0.6682318052638082, "grad_norm": 0.9987728958846398, "learning_rate": 1.5914148863749344e-05, "loss": 0.2852175831794739, "step": 1352 }, { "epoch": 0.6687260595576424, "grad_norm": 1.1231968462948256, "learning_rate": 1.590755839104705e-05, "loss": 0.3435940742492676, "step": 1353 }, { "epoch": 0.6692203138514766, "grad_norm": 1.2334019463480403, "learning_rate": 1.590096397454318e-05, "loss": 0.34816527366638184, "step": 1354 }, { "epoch": 0.6697145681453107, "grad_norm": 1.4472355399081582, "learning_rate": 1.5894365618640077e-05, "loss": 0.3283170461654663, "step": 1355 }, { "epoch": 0.6702088224391449, "grad_norm": 1.1520168978191874, "learning_rate": 1.588776332774271e-05, "loss": 0.335905522108078, "step": 1356 }, { "epoch": 0.6707030767329791, "grad_norm": 1.1244736910598108, "learning_rate": 1.5881157106258666e-05, "loss": 0.3055316209793091, "step": 1357 }, { "epoch": 0.6711973310268133, "grad_norm": 1.050666765324263, "learning_rate": 1.5874546958598172e-05, "loss": 0.2873142659664154, "step": 1358 }, { "epoch": 0.6716915853206474, "grad_norm": 1.0218331884680711, "learning_rate": 1.586793288917406e-05, "loss": 0.29659712314605713, "step": 1359 }, { "epoch": 0.6721858396144816, "grad_norm": 1.0827802259474617, "learning_rate": 1.5861314902401802e-05, "loss": 0.33081990480422974, "step": 1360 }, { "epoch": 0.6726800939083158, "grad_norm": 1.2140107638410536, "learning_rate": 1.5854693002699457e-05, "loss": 0.3559015691280365, "step": 1361 }, { "epoch": 0.67317434820215, "grad_norm": 1.1424828520826207, "learning_rate": 1.584806719448772e-05, "loss": 0.3353438973426819, "step": 1362 }, { "epoch": 0.6736686024959841, "grad_norm": 1.0533009951881467, "learning_rate": 1.5841437482189882e-05, "loss": 0.3320685923099518, "step": 1363 }, { "epoch": 0.6741628567898184, "grad_norm": 1.0600254033440624, "learning_rate": 1.5834803870231846e-05, "loss": 0.3070179224014282, "step": 1364 }, { "epoch": 0.6746571110836526, "grad_norm": 1.0452219544938475, "learning_rate": 1.5828166363042115e-05, "loss": 0.28779780864715576, "step": 1365 }, { "epoch": 0.6751513653774868, "grad_norm": 0.9932658974656241, "learning_rate": 1.5821524965051793e-05, "loss": 0.2793114185333252, "step": 1366 }, { "epoch": 0.6756456196713209, "grad_norm": 1.117744874079583, "learning_rate": 1.5814879680694585e-05, "loss": 0.3586357831954956, "step": 1367 }, { "epoch": 0.6761398739651551, "grad_norm": 1.122494918770383, "learning_rate": 1.5808230514406786e-05, "loss": 0.35258832573890686, "step": 1368 }, { "epoch": 0.6766341282589893, "grad_norm": 1.0624893424167818, "learning_rate": 1.5801577470627286e-05, "loss": 0.2783607840538025, "step": 1369 }, { "epoch": 0.6771283825528235, "grad_norm": 1.217710803865883, "learning_rate": 1.579492055379756e-05, "loss": 0.3494858741760254, "step": 1370 }, { "epoch": 0.6776226368466576, "grad_norm": 1.1913846811426898, "learning_rate": 1.578825976836167e-05, "loss": 0.34512561559677124, "step": 1371 }, { "epoch": 0.6781168911404918, "grad_norm": 1.0303182849177774, "learning_rate": 1.5781595118766265e-05, "loss": 0.2923341989517212, "step": 1372 }, { "epoch": 0.678611145434326, "grad_norm": 1.0423481220482165, "learning_rate": 1.5774926609460566e-05, "loss": 0.3078833818435669, "step": 1373 }, { "epoch": 0.6791053997281601, "grad_norm": 1.0871141007271816, "learning_rate": 1.576825424489638e-05, "loss": 0.3147008419036865, "step": 1374 }, { "epoch": 0.6795996540219943, "grad_norm": 1.0340836184197277, "learning_rate": 1.576157802952807e-05, "loss": 0.2907789349555969, "step": 1375 }, { "epoch": 0.6800939083158285, "grad_norm": 1.1801114991913197, "learning_rate": 1.57548979678126e-05, "loss": 0.2941555976867676, "step": 1376 }, { "epoch": 0.6805881626096627, "grad_norm": 1.137398706652914, "learning_rate": 1.5748214064209473e-05, "loss": 0.3452342748641968, "step": 1377 }, { "epoch": 0.6810824169034968, "grad_norm": 0.9870368606552603, "learning_rate": 1.5741526323180765e-05, "loss": 0.31481361389160156, "step": 1378 }, { "epoch": 0.681576671197331, "grad_norm": 1.1734004344416635, "learning_rate": 1.573483474919112e-05, "loss": 0.3403349220752716, "step": 1379 }, { "epoch": 0.6820709254911652, "grad_norm": 1.3661262290783491, "learning_rate": 1.572813934670774e-05, "loss": 0.3283364176750183, "step": 1380 }, { "epoch": 0.6825651797849994, "grad_norm": 1.0790334315781973, "learning_rate": 1.5721440120200376e-05, "loss": 0.3294883966445923, "step": 1381 }, { "epoch": 0.6830594340788335, "grad_norm": 1.057215667272423, "learning_rate": 1.5714737074141338e-05, "loss": 0.3087981343269348, "step": 1382 }, { "epoch": 0.6835536883726677, "grad_norm": 0.9953380542206125, "learning_rate": 1.570803021300548e-05, "loss": 0.29511693120002747, "step": 1383 }, { "epoch": 0.6840479426665019, "grad_norm": 1.1147415286539601, "learning_rate": 1.570131954127021e-05, "loss": 0.3620823323726654, "step": 1384 }, { "epoch": 0.6845421969603361, "grad_norm": 1.2518358127130127, "learning_rate": 1.5694605063415477e-05, "loss": 0.3978300988674164, "step": 1385 }, { "epoch": 0.6850364512541702, "grad_norm": 1.2104388988265296, "learning_rate": 1.5687886783923773e-05, "loss": 0.35367661714553833, "step": 1386 }, { "epoch": 0.6855307055480044, "grad_norm": 1.158470270474232, "learning_rate": 1.5681164707280117e-05, "loss": 0.3313448131084442, "step": 1387 }, { "epoch": 0.6860249598418386, "grad_norm": 1.1312206183637163, "learning_rate": 1.5674438837972077e-05, "loss": 0.34115713834762573, "step": 1388 }, { "epoch": 0.6865192141356729, "grad_norm": 1.071906380475402, "learning_rate": 1.566770918048975e-05, "loss": 0.311326265335083, "step": 1389 }, { "epoch": 0.687013468429507, "grad_norm": 1.0496646406815568, "learning_rate": 1.5660975739325755e-05, "loss": 0.32622700929641724, "step": 1390 }, { "epoch": 0.6875077227233412, "grad_norm": 1.1530479303397307, "learning_rate": 1.565423851897524e-05, "loss": 0.36029747128486633, "step": 1391 }, { "epoch": 0.6880019770171754, "grad_norm": 0.9691306195768644, "learning_rate": 1.5647497523935883e-05, "loss": 0.2771177291870117, "step": 1392 }, { "epoch": 0.6884962313110096, "grad_norm": 1.1450942478438548, "learning_rate": 1.5640752758707868e-05, "loss": 0.3474002182483673, "step": 1393 }, { "epoch": 0.6889904856048437, "grad_norm": 1.09850595363495, "learning_rate": 1.563400422779391e-05, "loss": 0.28006255626678467, "step": 1394 }, { "epoch": 0.6894847398986779, "grad_norm": 1.0953635794573913, "learning_rate": 1.562725193569923e-05, "loss": 0.32151490449905396, "step": 1395 }, { "epoch": 0.6899789941925121, "grad_norm": 1.1995785901348681, "learning_rate": 1.5620495886931557e-05, "loss": 0.3081187903881073, "step": 1396 }, { "epoch": 0.6904732484863463, "grad_norm": 1.1390576796125735, "learning_rate": 1.561373608600114e-05, "loss": 0.3158992826938629, "step": 1397 }, { "epoch": 0.6909675027801804, "grad_norm": 1.1783652693752096, "learning_rate": 1.5606972537420723e-05, "loss": 0.33790335059165955, "step": 1398 }, { "epoch": 0.6914617570740146, "grad_norm": 1.1733705340509706, "learning_rate": 1.5600205245705553e-05, "loss": 0.3157292902469635, "step": 1399 }, { "epoch": 0.6919560113678488, "grad_norm": 1.1674234642263648, "learning_rate": 1.559343421537338e-05, "loss": 0.31090572476387024, "step": 1400 }, { "epoch": 0.6924502656616829, "grad_norm": 1.1604041250760992, "learning_rate": 1.5586659450944443e-05, "loss": 0.30499958992004395, "step": 1401 }, { "epoch": 0.6929445199555171, "grad_norm": 1.0713722972416724, "learning_rate": 1.5579880956941478e-05, "loss": 0.3036794662475586, "step": 1402 }, { "epoch": 0.6934387742493513, "grad_norm": 1.1543376848490539, "learning_rate": 1.5573098737889716e-05, "loss": 0.26514700055122375, "step": 1403 }, { "epoch": 0.6939330285431855, "grad_norm": 1.0755683699565965, "learning_rate": 1.5566312798316867e-05, "loss": 0.31947457790374756, "step": 1404 }, { "epoch": 0.6944272828370196, "grad_norm": 1.1317886658483896, "learning_rate": 1.5559523142753124e-05, "loss": 0.29387322068214417, "step": 1405 }, { "epoch": 0.6949215371308538, "grad_norm": 1.117372828260635, "learning_rate": 1.555272977573117e-05, "loss": 0.33459946513175964, "step": 1406 }, { "epoch": 0.695415791424688, "grad_norm": 1.2196871082649428, "learning_rate": 1.5545932701786154e-05, "loss": 0.31394320726394653, "step": 1407 }, { "epoch": 0.6959100457185222, "grad_norm": 1.0669033993360486, "learning_rate": 1.5539131925455713e-05, "loss": 0.2891885042190552, "step": 1408 }, { "epoch": 0.6964043000123563, "grad_norm": 1.2475463319045528, "learning_rate": 1.5532327451279938e-05, "loss": 0.33686599135398865, "step": 1409 }, { "epoch": 0.6968985543061905, "grad_norm": 1.0648029492831064, "learning_rate": 1.5525519283801405e-05, "loss": 0.31463146209716797, "step": 1410 }, { "epoch": 0.6973928086000247, "grad_norm": 1.226099759538899, "learning_rate": 1.5518707427565146e-05, "loss": 0.3598940372467041, "step": 1411 }, { "epoch": 0.6978870628938589, "grad_norm": 1.149083094787804, "learning_rate": 1.5511891887118665e-05, "loss": 0.32980066537857056, "step": 1412 }, { "epoch": 0.698381317187693, "grad_norm": 1.1872142618250514, "learning_rate": 1.5505072667011915e-05, "loss": 0.3264961242675781, "step": 1413 }, { "epoch": 0.6988755714815272, "grad_norm": 1.0604770012284015, "learning_rate": 1.549824977179731e-05, "loss": 0.3355519771575928, "step": 1414 }, { "epoch": 0.6993698257753614, "grad_norm": 1.0119765938601295, "learning_rate": 1.5491423206029717e-05, "loss": 0.27073174715042114, "step": 1415 }, { "epoch": 0.6998640800691956, "grad_norm": 1.1356545279602395, "learning_rate": 1.5484592974266456e-05, "loss": 0.32638323307037354, "step": 1416 }, { "epoch": 0.7003583343630297, "grad_norm": 1.192307972564017, "learning_rate": 1.5477759081067288e-05, "loss": 0.38844019174575806, "step": 1417 }, { "epoch": 0.700852588656864, "grad_norm": 1.1060104448967631, "learning_rate": 1.5470921530994426e-05, "loss": 0.3386498689651489, "step": 1418 }, { "epoch": 0.7013468429506982, "grad_norm": 1.113333245203903, "learning_rate": 1.5464080328612522e-05, "loss": 0.3304392993450165, "step": 1419 }, { "epoch": 0.7018410972445324, "grad_norm": 1.1024158772042199, "learning_rate": 1.545723547848866e-05, "loss": 0.314837247133255, "step": 1420 }, { "epoch": 0.7023353515383665, "grad_norm": 0.9888192419219921, "learning_rate": 1.5450386985192368e-05, "loss": 0.30135127902030945, "step": 1421 }, { "epoch": 0.7028296058322007, "grad_norm": 1.0640354824874358, "learning_rate": 1.5443534853295602e-05, "loss": 0.29176798462867737, "step": 1422 }, { "epoch": 0.7033238601260349, "grad_norm": 1.3021824252266967, "learning_rate": 1.5436679087372746e-05, "loss": 0.36438125371932983, "step": 1423 }, { "epoch": 0.703818114419869, "grad_norm": 1.1147780995478658, "learning_rate": 1.542981969200061e-05, "loss": 0.37140434980392456, "step": 1424 }, { "epoch": 0.7043123687137032, "grad_norm": 1.3176538326023695, "learning_rate": 1.542295667175843e-05, "loss": 0.36072903871536255, "step": 1425 }, { "epoch": 0.7048066230075374, "grad_norm": 1.1262882885574772, "learning_rate": 1.5416090031227868e-05, "loss": 0.3266327977180481, "step": 1426 }, { "epoch": 0.7053008773013716, "grad_norm": 1.0179565917308762, "learning_rate": 1.5409219774992978e-05, "loss": 0.3081423342227936, "step": 1427 }, { "epoch": 0.7057951315952057, "grad_norm": 1.3034313694807904, "learning_rate": 1.5402345907640262e-05, "loss": 0.3571197986602783, "step": 1428 }, { "epoch": 0.7062893858890399, "grad_norm": 1.1385888315844002, "learning_rate": 1.5395468433758604e-05, "loss": 0.32380104064941406, "step": 1429 }, { "epoch": 0.7067836401828741, "grad_norm": 1.0129718670355197, "learning_rate": 1.5388587357939313e-05, "loss": 0.33777546882629395, "step": 1430 }, { "epoch": 0.7072778944767083, "grad_norm": 1.0997780610685683, "learning_rate": 1.5381702684776093e-05, "loss": 0.31793370842933655, "step": 1431 }, { "epoch": 0.7077721487705424, "grad_norm": 1.065324744616134, "learning_rate": 1.537481441886506e-05, "loss": 0.3282355070114136, "step": 1432 }, { "epoch": 0.7082664030643766, "grad_norm": 1.1740655706878367, "learning_rate": 1.5367922564804716e-05, "loss": 0.3523057699203491, "step": 1433 }, { "epoch": 0.7087606573582108, "grad_norm": 1.1790295388685894, "learning_rate": 1.5361027127195964e-05, "loss": 0.36351460218429565, "step": 1434 }, { "epoch": 0.709254911652045, "grad_norm": 2.2339320260763373, "learning_rate": 1.5354128110642102e-05, "loss": 0.2936401963233948, "step": 1435 }, { "epoch": 0.7097491659458791, "grad_norm": 1.1080576186798932, "learning_rate": 1.5347225519748818e-05, "loss": 0.3178175091743469, "step": 1436 }, { "epoch": 0.7102434202397133, "grad_norm": 1.1375761171495609, "learning_rate": 1.5340319359124177e-05, "loss": 0.3098832666873932, "step": 1437 }, { "epoch": 0.7107376745335475, "grad_norm": 0.951807024133746, "learning_rate": 1.5333409633378633e-05, "loss": 0.2644941806793213, "step": 1438 }, { "epoch": 0.7112319288273817, "grad_norm": 1.1193499530101132, "learning_rate": 1.5326496347125027e-05, "loss": 0.3046286702156067, "step": 1439 }, { "epoch": 0.7117261831212158, "grad_norm": 1.1009971048909013, "learning_rate": 1.5319579504978567e-05, "loss": 0.33757925033569336, "step": 1440 }, { "epoch": 0.71222043741505, "grad_norm": 1.1415644120008137, "learning_rate": 1.5312659111556832e-05, "loss": 0.3470202684402466, "step": 1441 }, { "epoch": 0.7127146917088842, "grad_norm": 1.0829483976260892, "learning_rate": 1.5305735171479785e-05, "loss": 0.3310868740081787, "step": 1442 }, { "epoch": 0.7132089460027184, "grad_norm": 1.2738694792524405, "learning_rate": 1.529880768936975e-05, "loss": 0.31649407744407654, "step": 1443 }, { "epoch": 0.7137032002965525, "grad_norm": 1.0510301649062292, "learning_rate": 1.5291876669851408e-05, "loss": 0.2986135184764862, "step": 1444 }, { "epoch": 0.7141974545903867, "grad_norm": 1.1622525691797543, "learning_rate": 1.5284942117551817e-05, "loss": 0.3033408224582672, "step": 1445 }, { "epoch": 0.714691708884221, "grad_norm": 1.1648719329133883, "learning_rate": 1.5278004037100378e-05, "loss": 0.34231680631637573, "step": 1446 }, { "epoch": 0.7151859631780552, "grad_norm": 1.1347301204641653, "learning_rate": 1.5271062433128857e-05, "loss": 0.3273579478263855, "step": 1447 }, { "epoch": 0.7156802174718893, "grad_norm": 1.2307292916383785, "learning_rate": 1.5264117310271372e-05, "loss": 0.344064861536026, "step": 1448 }, { "epoch": 0.7161744717657235, "grad_norm": 1.0685505855741966, "learning_rate": 1.5257168673164384e-05, "loss": 0.3131038546562195, "step": 1449 }, { "epoch": 0.7166687260595577, "grad_norm": 1.1403948273488542, "learning_rate": 1.5250216526446708e-05, "loss": 0.32794755697250366, "step": 1450 }, { "epoch": 0.7171629803533918, "grad_norm": 1.2597097116316462, "learning_rate": 1.5243260874759494e-05, "loss": 0.3633842468261719, "step": 1451 }, { "epoch": 0.717657234647226, "grad_norm": 0.943013995379639, "learning_rate": 1.5236301722746235e-05, "loss": 0.24650251865386963, "step": 1452 }, { "epoch": 0.7181514889410602, "grad_norm": 1.1777840335640666, "learning_rate": 1.5229339075052769e-05, "loss": 0.34167230129241943, "step": 1453 }, { "epoch": 0.7186457432348944, "grad_norm": 1.0945051908887762, "learning_rate": 1.522237293632725e-05, "loss": 0.29454126954078674, "step": 1454 }, { "epoch": 0.7191399975287285, "grad_norm": 1.1517995676673816, "learning_rate": 1.5215403311220178e-05, "loss": 0.3709314465522766, "step": 1455 }, { "epoch": 0.7196342518225627, "grad_norm": 1.1421076533752808, "learning_rate": 1.5208430204384377e-05, "loss": 0.3543916642665863, "step": 1456 }, { "epoch": 0.7201285061163969, "grad_norm": 1.1924648010793302, "learning_rate": 1.5201453620474986e-05, "loss": 0.33827707171440125, "step": 1457 }, { "epoch": 0.7206227604102311, "grad_norm": 1.1616070041381745, "learning_rate": 1.5194473564149484e-05, "loss": 0.31289514899253845, "step": 1458 }, { "epoch": 0.7211170147040652, "grad_norm": 1.1655875507968474, "learning_rate": 1.5187490040067646e-05, "loss": 0.3345657289028168, "step": 1459 }, { "epoch": 0.7216112689978994, "grad_norm": 1.091971369166992, "learning_rate": 1.5180503052891578e-05, "loss": 0.3322404623031616, "step": 1460 }, { "epoch": 0.7221055232917336, "grad_norm": 1.0009476128919939, "learning_rate": 1.5173512607285692e-05, "loss": 0.31120461225509644, "step": 1461 }, { "epoch": 0.7225997775855678, "grad_norm": 1.140979323325151, "learning_rate": 1.5166518707916714e-05, "loss": 0.3388645648956299, "step": 1462 }, { "epoch": 0.7230940318794019, "grad_norm": 1.098469502784105, "learning_rate": 1.5159521359453661e-05, "loss": 0.3048557639122009, "step": 1463 }, { "epoch": 0.7235882861732361, "grad_norm": 1.0437743408474436, "learning_rate": 1.5152520566567873e-05, "loss": 0.32128047943115234, "step": 1464 }, { "epoch": 0.7240825404670703, "grad_norm": 1.0754519434907805, "learning_rate": 1.5145516333932973e-05, "loss": 0.3016900420188904, "step": 1465 }, { "epoch": 0.7245767947609045, "grad_norm": 0.9730419604339762, "learning_rate": 1.5138508666224892e-05, "loss": 0.27410340309143066, "step": 1466 }, { "epoch": 0.7250710490547386, "grad_norm": 1.1548137674896846, "learning_rate": 1.513149756812184e-05, "loss": 0.314311146736145, "step": 1467 }, { "epoch": 0.7255653033485728, "grad_norm": 1.0652992161056178, "learning_rate": 1.5124483044304339e-05, "loss": 0.300488144159317, "step": 1468 }, { "epoch": 0.726059557642407, "grad_norm": 1.0437811199768454, "learning_rate": 1.5117465099455173e-05, "loss": 0.2610424757003784, "step": 1469 }, { "epoch": 0.7265538119362412, "grad_norm": 1.0473843452456588, "learning_rate": 1.5110443738259425e-05, "loss": 0.2631368637084961, "step": 1470 }, { "epoch": 0.7270480662300753, "grad_norm": 1.1572872923696271, "learning_rate": 1.510341896540446e-05, "loss": 0.2894716262817383, "step": 1471 }, { "epoch": 0.7275423205239095, "grad_norm": 1.1539682565039295, "learning_rate": 1.5096390785579913e-05, "loss": 0.2859206199645996, "step": 1472 }, { "epoch": 0.7280365748177438, "grad_norm": 1.1861776477785995, "learning_rate": 1.5089359203477693e-05, "loss": 0.2966008484363556, "step": 1473 }, { "epoch": 0.728530829111578, "grad_norm": 1.0911088494470613, "learning_rate": 1.5082324223791988e-05, "loss": 0.3187675476074219, "step": 1474 }, { "epoch": 0.729025083405412, "grad_norm": 1.1920802680772398, "learning_rate": 1.507528585121925e-05, "loss": 0.32434171438217163, "step": 1475 }, { "epoch": 0.7295193376992463, "grad_norm": 1.233732485912319, "learning_rate": 1.5068244090458197e-05, "loss": 0.3518364429473877, "step": 1476 }, { "epoch": 0.7300135919930805, "grad_norm": 1.091189612496036, "learning_rate": 1.50611989462098e-05, "loss": 0.32294291257858276, "step": 1477 }, { "epoch": 0.7305078462869146, "grad_norm": 1.184027940449126, "learning_rate": 1.5054150423177307e-05, "loss": 0.3413415253162384, "step": 1478 }, { "epoch": 0.7310021005807488, "grad_norm": 1.1760745568840743, "learning_rate": 1.5047098526066207e-05, "loss": 0.3562566637992859, "step": 1479 }, { "epoch": 0.731496354874583, "grad_norm": 1.130494844464842, "learning_rate": 1.504004325958424e-05, "loss": 0.30018410086631775, "step": 1480 }, { "epoch": 0.7319906091684172, "grad_norm": 1.027268124102698, "learning_rate": 1.5032984628441409e-05, "loss": 0.2937701344490051, "step": 1481 }, { "epoch": 0.7324848634622513, "grad_norm": 1.131154387943882, "learning_rate": 1.5025922637349953e-05, "loss": 0.3268740773200989, "step": 1482 }, { "epoch": 0.7329791177560855, "grad_norm": 1.053089747814938, "learning_rate": 1.5018857291024356e-05, "loss": 0.3246314525604248, "step": 1483 }, { "epoch": 0.7334733720499197, "grad_norm": 1.033026683314433, "learning_rate": 1.501178859418134e-05, "loss": 0.276904433965683, "step": 1484 }, { "epoch": 0.7339676263437539, "grad_norm": 1.1901915790154476, "learning_rate": 1.5004716551539873e-05, "loss": 0.27665287256240845, "step": 1485 }, { "epoch": 0.734461880637588, "grad_norm": 1.065690181516995, "learning_rate": 1.4997641167821143e-05, "loss": 0.325985848903656, "step": 1486 }, { "epoch": 0.7349561349314222, "grad_norm": 1.2333398180696593, "learning_rate": 1.4990562447748573e-05, "loss": 0.2951817214488983, "step": 1487 }, { "epoch": 0.7354503892252564, "grad_norm": 1.0415622998394476, "learning_rate": 1.4983480396047822e-05, "loss": 0.2592772841453552, "step": 1488 }, { "epoch": 0.7359446435190906, "grad_norm": 1.0977128928049222, "learning_rate": 1.4976395017446767e-05, "loss": 0.3278253674507141, "step": 1489 }, { "epoch": 0.7364388978129247, "grad_norm": 2.4840016288238886, "learning_rate": 1.4969306316675497e-05, "loss": 0.32366445660591125, "step": 1490 }, { "epoch": 0.7369331521067589, "grad_norm": 1.065618785924185, "learning_rate": 1.4962214298466337e-05, "loss": 0.30544513463974, "step": 1491 }, { "epoch": 0.7374274064005931, "grad_norm": 1.1151764286390358, "learning_rate": 1.4955118967553812e-05, "loss": 0.3712898790836334, "step": 1492 }, { "epoch": 0.7379216606944273, "grad_norm": 1.072095940180716, "learning_rate": 1.4948020328674662e-05, "loss": 0.3006438612937927, "step": 1493 }, { "epoch": 0.7384159149882614, "grad_norm": 1.1145573413296936, "learning_rate": 1.494091838656784e-05, "loss": 0.3494953215122223, "step": 1494 }, { "epoch": 0.7389101692820956, "grad_norm": 1.091824613740768, "learning_rate": 1.4933813145974504e-05, "loss": 0.2698785662651062, "step": 1495 }, { "epoch": 0.7394044235759298, "grad_norm": 1.1072713673032075, "learning_rate": 1.4926704611638003e-05, "loss": 0.34775635600090027, "step": 1496 }, { "epoch": 0.739898677869764, "grad_norm": 1.1542085278706422, "learning_rate": 1.4919592788303898e-05, "loss": 0.328175812959671, "step": 1497 }, { "epoch": 0.7403929321635981, "grad_norm": 1.1735161292651393, "learning_rate": 1.491247768071994e-05, "loss": 0.3320178687572479, "step": 1498 }, { "epoch": 0.7408871864574323, "grad_norm": 1.8687355330582882, "learning_rate": 1.4905359293636074e-05, "loss": 0.308150053024292, "step": 1499 }, { "epoch": 0.7413814407512666, "grad_norm": 1.1422704685641505, "learning_rate": 1.489823763180443e-05, "loss": 0.3311570882797241, "step": 1500 }, { "epoch": 0.7418756950451008, "grad_norm": 1.2844910379105308, "learning_rate": 1.4891112699979334e-05, "loss": 0.36916327476501465, "step": 1501 }, { "epoch": 0.7423699493389349, "grad_norm": 1.0354244070195735, "learning_rate": 1.4883984502917286e-05, "loss": 0.28005337715148926, "step": 1502 }, { "epoch": 0.7428642036327691, "grad_norm": 1.2241818166146565, "learning_rate": 1.4876853045376962e-05, "loss": 0.3502781391143799, "step": 1503 }, { "epoch": 0.7433584579266033, "grad_norm": 1.2448349850537428, "learning_rate": 1.4869718332119232e-05, "loss": 0.32032880187034607, "step": 1504 }, { "epoch": 0.7438527122204374, "grad_norm": 1.1236679189592251, "learning_rate": 1.4862580367907118e-05, "loss": 0.3229472041130066, "step": 1505 }, { "epoch": 0.7443469665142716, "grad_norm": 1.087360074547477, "learning_rate": 1.4855439157505833e-05, "loss": 0.2725368142127991, "step": 1506 }, { "epoch": 0.7448412208081058, "grad_norm": 1.2509876854452482, "learning_rate": 1.4848294705682737e-05, "loss": 0.35358861088752747, "step": 1507 }, { "epoch": 0.74533547510194, "grad_norm": 1.0843196708603702, "learning_rate": 1.4841147017207376e-05, "loss": 0.299206018447876, "step": 1508 }, { "epoch": 0.7458297293957741, "grad_norm": 2.7618594064377384, "learning_rate": 1.4833996096851432e-05, "loss": 0.32004314661026, "step": 1509 }, { "epoch": 0.7463239836896083, "grad_norm": 1.1399779760270892, "learning_rate": 1.4826841949388767e-05, "loss": 0.32800590991973877, "step": 1510 }, { "epoch": 0.7468182379834425, "grad_norm": 1.112132363505793, "learning_rate": 1.4819684579595382e-05, "loss": 0.2916460335254669, "step": 1511 }, { "epoch": 0.7473124922772767, "grad_norm": 1.2041472096070427, "learning_rate": 1.4812523992249437e-05, "loss": 0.3276118338108063, "step": 1512 }, { "epoch": 0.7478067465711108, "grad_norm": 1.2310079375510266, "learning_rate": 1.4805360192131234e-05, "loss": 0.34718069434165955, "step": 1513 }, { "epoch": 0.748301000864945, "grad_norm": 1.0130113878676084, "learning_rate": 1.4798193184023233e-05, "loss": 0.2810167372226715, "step": 1514 }, { "epoch": 0.7487952551587792, "grad_norm": 1.1600230287701154, "learning_rate": 1.4791022972710017e-05, "loss": 0.3542296886444092, "step": 1515 }, { "epoch": 0.7492895094526134, "grad_norm": 1.0717623685966582, "learning_rate": 1.4783849562978319e-05, "loss": 0.27578431367874146, "step": 1516 }, { "epoch": 0.7497837637464475, "grad_norm": 1.2193919844014014, "learning_rate": 1.4776672959617006e-05, "loss": 0.32235798239707947, "step": 1517 }, { "epoch": 0.7502780180402817, "grad_norm": 1.073591922439447, "learning_rate": 1.4769493167417079e-05, "loss": 0.30588477849960327, "step": 1518 }, { "epoch": 0.7507722723341159, "grad_norm": 1.1259837125407774, "learning_rate": 1.4762310191171657e-05, "loss": 0.31242361664772034, "step": 1519 }, { "epoch": 0.7512665266279501, "grad_norm": 1.2265290610094162, "learning_rate": 1.4755124035675995e-05, "loss": 0.3679526150226593, "step": 1520 }, { "epoch": 0.7517607809217842, "grad_norm": 1.0185674037419847, "learning_rate": 1.4747934705727473e-05, "loss": 0.28588515520095825, "step": 1521 }, { "epoch": 0.7522550352156184, "grad_norm": 1.0624456882482982, "learning_rate": 1.4740742206125582e-05, "loss": 0.29861775040626526, "step": 1522 }, { "epoch": 0.7527492895094526, "grad_norm": 1.1245071890104912, "learning_rate": 1.4733546541671928e-05, "loss": 0.31373754143714905, "step": 1523 }, { "epoch": 0.7532435438032868, "grad_norm": 1.1569601569555032, "learning_rate": 1.472634771717024e-05, "loss": 0.3127061128616333, "step": 1524 }, { "epoch": 0.7537377980971209, "grad_norm": 1.0554556810771654, "learning_rate": 1.4719145737426346e-05, "loss": 0.33681541681289673, "step": 1525 }, { "epoch": 0.7542320523909551, "grad_norm": 1.1202634511050926, "learning_rate": 1.4711940607248182e-05, "loss": 0.30266639590263367, "step": 1526 }, { "epoch": 0.7547263066847893, "grad_norm": 1.0915134711866425, "learning_rate": 1.47047323314458e-05, "loss": 0.2988300323486328, "step": 1527 }, { "epoch": 0.7552205609786236, "grad_norm": 1.1041853232471737, "learning_rate": 1.4697520914831334e-05, "loss": 0.32679620385169983, "step": 1528 }, { "epoch": 0.7557148152724577, "grad_norm": 1.0049846597819565, "learning_rate": 1.4690306362219024e-05, "loss": 0.2935605049133301, "step": 1529 }, { "epoch": 0.7562090695662919, "grad_norm": 1.1114952379308272, "learning_rate": 1.4683088678425204e-05, "loss": 0.303417831659317, "step": 1530 }, { "epoch": 0.7567033238601261, "grad_norm": 1.0605597139601082, "learning_rate": 1.4675867868268295e-05, "loss": 0.30822527408599854, "step": 1531 }, { "epoch": 0.7571975781539602, "grad_norm": 1.0772522309630048, "learning_rate": 1.4668643936568807e-05, "loss": 0.3104674220085144, "step": 1532 }, { "epoch": 0.7576918324477944, "grad_norm": 1.0598818436947175, "learning_rate": 1.4661416888149333e-05, "loss": 0.27899307012557983, "step": 1533 }, { "epoch": 0.7581860867416286, "grad_norm": 1.1291791785743877, "learning_rate": 1.465418672783455e-05, "loss": 0.3285380005836487, "step": 1534 }, { "epoch": 0.7586803410354628, "grad_norm": 1.0773746767557166, "learning_rate": 1.4646953460451205e-05, "loss": 0.32028889656066895, "step": 1535 }, { "epoch": 0.7591745953292969, "grad_norm": 1.2647242329167074, "learning_rate": 1.4639717090828127e-05, "loss": 0.29870709776878357, "step": 1536 }, { "epoch": 0.7596688496231311, "grad_norm": 1.1833149129368068, "learning_rate": 1.4632477623796216e-05, "loss": 0.3556699752807617, "step": 1537 }, { "epoch": 0.7601631039169653, "grad_norm": 1.158341046754784, "learning_rate": 1.462523506418843e-05, "loss": 0.3433789014816284, "step": 1538 }, { "epoch": 0.7606573582107995, "grad_norm": 1.2526530715160118, "learning_rate": 1.4617989416839802e-05, "loss": 0.3146114945411682, "step": 1539 }, { "epoch": 0.7611516125046336, "grad_norm": 1.0254798742920868, "learning_rate": 1.4610740686587424e-05, "loss": 0.29029202461242676, "step": 1540 }, { "epoch": 0.7616458667984678, "grad_norm": 1.2367212561484746, "learning_rate": 1.4603488878270442e-05, "loss": 0.2976688742637634, "step": 1541 }, { "epoch": 0.762140121092302, "grad_norm": 1.0384933941129642, "learning_rate": 1.459623399673006e-05, "loss": 0.28604352474212646, "step": 1542 }, { "epoch": 0.7626343753861362, "grad_norm": 1.2070329997652125, "learning_rate": 1.4588976046809536e-05, "loss": 0.34977301955223083, "step": 1543 }, { "epoch": 0.7631286296799703, "grad_norm": 1.1108699937366455, "learning_rate": 1.458171503335417e-05, "loss": 0.31592974066734314, "step": 1544 }, { "epoch": 0.7636228839738045, "grad_norm": 1.224645404968216, "learning_rate": 1.4574450961211312e-05, "loss": 0.31539830565452576, "step": 1545 }, { "epoch": 0.7641171382676387, "grad_norm": 1.1914008033212045, "learning_rate": 1.4567183835230355e-05, "loss": 0.3100752532482147, "step": 1546 }, { "epoch": 0.7646113925614729, "grad_norm": 1.1973069016485758, "learning_rate": 1.4559913660262726e-05, "loss": 0.31005364656448364, "step": 1547 }, { "epoch": 0.765105646855307, "grad_norm": 1.102020410207535, "learning_rate": 1.4552640441161889e-05, "loss": 0.3050577640533447, "step": 1548 }, { "epoch": 0.7655999011491412, "grad_norm": 1.1151715417212549, "learning_rate": 1.4545364182783343e-05, "loss": 0.294721394777298, "step": 1549 }, { "epoch": 0.7660941554429754, "grad_norm": 1.0907882805879732, "learning_rate": 1.4538084889984616e-05, "loss": 0.2974075376987457, "step": 1550 }, { "epoch": 0.7665884097368096, "grad_norm": 1.218483256285566, "learning_rate": 1.4530802567625259e-05, "loss": 0.3247089385986328, "step": 1551 }, { "epoch": 0.7670826640306437, "grad_norm": 1.1751647980540385, "learning_rate": 1.4523517220566843e-05, "loss": 0.3219151198863983, "step": 1552 }, { "epoch": 0.7675769183244779, "grad_norm": 1.092743254793347, "learning_rate": 1.4516228853672962e-05, "loss": 0.30580246448516846, "step": 1553 }, { "epoch": 0.7680711726183121, "grad_norm": 1.0670412493946726, "learning_rate": 1.4508937471809233e-05, "loss": 0.2983207702636719, "step": 1554 }, { "epoch": 0.7685654269121464, "grad_norm": 1.1155603134808716, "learning_rate": 1.4501643079843266e-05, "loss": 0.3429039418697357, "step": 1555 }, { "epoch": 0.7690596812059804, "grad_norm": 1.0600298870014666, "learning_rate": 1.4494345682644704e-05, "loss": 0.3055192530155182, "step": 1556 }, { "epoch": 0.7695539354998147, "grad_norm": 1.0843598968647987, "learning_rate": 1.4487045285085178e-05, "loss": 0.2964102327823639, "step": 1557 }, { "epoch": 0.7700481897936489, "grad_norm": 1.0436581793993642, "learning_rate": 1.4479741892038335e-05, "loss": 0.3088444471359253, "step": 1558 }, { "epoch": 0.770542444087483, "grad_norm": 1.070280126063037, "learning_rate": 1.4472435508379808e-05, "loss": 0.28697890043258667, "step": 1559 }, { "epoch": 0.7710366983813172, "grad_norm": 1.1055317673748768, "learning_rate": 1.4465126138987242e-05, "loss": 0.3664681315422058, "step": 1560 }, { "epoch": 0.7715309526751514, "grad_norm": 1.1042702127280148, "learning_rate": 1.4457813788740263e-05, "loss": 0.3282932937145233, "step": 1561 }, { "epoch": 0.7720252069689856, "grad_norm": 1.4857133307558297, "learning_rate": 1.4450498462520495e-05, "loss": 0.27597576379776, "step": 1562 }, { "epoch": 0.7725194612628197, "grad_norm": 1.2214452597170176, "learning_rate": 1.4443180165211541e-05, "loss": 0.3553946614265442, "step": 1563 }, { "epoch": 0.7730137155566539, "grad_norm": 1.1827716129984904, "learning_rate": 1.4435858901698995e-05, "loss": 0.36224859952926636, "step": 1564 }, { "epoch": 0.7735079698504881, "grad_norm": 1.1341836510498036, "learning_rate": 1.4428534676870427e-05, "loss": 0.2940914034843445, "step": 1565 }, { "epoch": 0.7740022241443223, "grad_norm": 0.9563512574257287, "learning_rate": 1.4421207495615385e-05, "loss": 0.2717741131782532, "step": 1566 }, { "epoch": 0.7744964784381564, "grad_norm": 1.1327871067959112, "learning_rate": 1.441387736282539e-05, "loss": 0.32340431213378906, "step": 1567 }, { "epoch": 0.7749907327319906, "grad_norm": 1.1090264087970254, "learning_rate": 1.4406544283393935e-05, "loss": 0.3080120086669922, "step": 1568 }, { "epoch": 0.7754849870258248, "grad_norm": 1.4441577426158039, "learning_rate": 1.4399208262216475e-05, "loss": 0.3118380308151245, "step": 1569 }, { "epoch": 0.775979241319659, "grad_norm": 1.3307213271784917, "learning_rate": 1.439186930419044e-05, "loss": 0.3086084723472595, "step": 1570 }, { "epoch": 0.7764734956134931, "grad_norm": 1.1593176371811458, "learning_rate": 1.438452741421521e-05, "loss": 0.3233364522457123, "step": 1571 }, { "epoch": 0.7769677499073273, "grad_norm": 1.0623874748102813, "learning_rate": 1.4377182597192124e-05, "loss": 0.29029640555381775, "step": 1572 }, { "epoch": 0.7774620042011615, "grad_norm": 0.9791711244739897, "learning_rate": 1.4369834858024476e-05, "loss": 0.2888006567955017, "step": 1573 }, { "epoch": 0.7779562584949957, "grad_norm": 1.1118016172702438, "learning_rate": 1.4362484201617519e-05, "loss": 0.3260151743888855, "step": 1574 }, { "epoch": 0.7784505127888298, "grad_norm": 1.3306536044832058, "learning_rate": 1.4355130632878439e-05, "loss": 0.333207905292511, "step": 1575 }, { "epoch": 0.778944767082664, "grad_norm": 1.0844273121477916, "learning_rate": 1.4347774156716375e-05, "loss": 0.2577935457229614, "step": 1576 }, { "epoch": 0.7794390213764982, "grad_norm": 1.0777103823564191, "learning_rate": 1.434041477804241e-05, "loss": 0.29645979404449463, "step": 1577 }, { "epoch": 0.7799332756703324, "grad_norm": 1.1743796307407597, "learning_rate": 1.433305250176955e-05, "loss": 0.2973156273365021, "step": 1578 }, { "epoch": 0.7804275299641665, "grad_norm": 1.0277241805983874, "learning_rate": 1.4325687332812754e-05, "loss": 0.29159975051879883, "step": 1579 }, { "epoch": 0.7809217842580007, "grad_norm": 1.1751334806332727, "learning_rate": 1.4318319276088902e-05, "loss": 0.29718664288520813, "step": 1580 }, { "epoch": 0.781416038551835, "grad_norm": 1.316577919508971, "learning_rate": 1.4310948336516803e-05, "loss": 0.3262369632720947, "step": 1581 }, { "epoch": 0.781910292845669, "grad_norm": 1.182680350644687, "learning_rate": 1.4303574519017187e-05, "loss": 0.36491623520851135, "step": 1582 }, { "epoch": 0.7824045471395032, "grad_norm": 1.181580153295467, "learning_rate": 1.4296197828512716e-05, "loss": 0.3558582365512848, "step": 1583 }, { "epoch": 0.7828988014333375, "grad_norm": 0.9802630700834107, "learning_rate": 1.428881826992796e-05, "loss": 0.2745930552482605, "step": 1584 }, { "epoch": 0.7833930557271717, "grad_norm": 1.1668091765691224, "learning_rate": 1.4281435848189404e-05, "loss": 0.3239384889602661, "step": 1585 }, { "epoch": 0.7838873100210058, "grad_norm": 1.0164738185404556, "learning_rate": 1.4274050568225452e-05, "loss": 0.2708761692047119, "step": 1586 }, { "epoch": 0.78438156431484, "grad_norm": 1.2356501028179845, "learning_rate": 1.4266662434966412e-05, "loss": 0.3633013963699341, "step": 1587 }, { "epoch": 0.7848758186086742, "grad_norm": 1.2145151160613337, "learning_rate": 1.425927145334449e-05, "loss": 0.36411651968955994, "step": 1588 }, { "epoch": 0.7853700729025084, "grad_norm": 1.2093753197442545, "learning_rate": 1.4251877628293804e-05, "loss": 0.3120966851711273, "step": 1589 }, { "epoch": 0.7858643271963425, "grad_norm": 1.111474907013162, "learning_rate": 1.4244480964750365e-05, "loss": 0.32788634300231934, "step": 1590 }, { "epoch": 0.7863585814901767, "grad_norm": 1.1320230499507122, "learning_rate": 1.423708146765208e-05, "loss": 0.2919159233570099, "step": 1591 }, { "epoch": 0.7868528357840109, "grad_norm": 1.1271090926469096, "learning_rate": 1.4229679141938749e-05, "loss": 0.3135683834552765, "step": 1592 }, { "epoch": 0.7873470900778451, "grad_norm": 1.2447784007425877, "learning_rate": 1.4222273992552058e-05, "loss": 0.351981520652771, "step": 1593 }, { "epoch": 0.7878413443716792, "grad_norm": 1.1846979202846248, "learning_rate": 1.4214866024435576e-05, "loss": 0.3615785837173462, "step": 1594 }, { "epoch": 0.7883355986655134, "grad_norm": 1.1632616021817466, "learning_rate": 1.420745524253476e-05, "loss": 0.29399484395980835, "step": 1595 }, { "epoch": 0.7888298529593476, "grad_norm": 1.1714512606078011, "learning_rate": 1.420004165179694e-05, "loss": 0.30501872301101685, "step": 1596 }, { "epoch": 0.7893241072531818, "grad_norm": 1.1172632404953093, "learning_rate": 1.4192625257171331e-05, "loss": 0.33745667338371277, "step": 1597 }, { "epoch": 0.7898183615470159, "grad_norm": 0.994693525988225, "learning_rate": 1.4185206063609e-05, "loss": 0.2675662934780121, "step": 1598 }, { "epoch": 0.7903126158408501, "grad_norm": 1.022107075414073, "learning_rate": 1.41777840760629e-05, "loss": 0.295659601688385, "step": 1599 }, { "epoch": 0.7908068701346843, "grad_norm": 1.119079517603524, "learning_rate": 1.4170359299487848e-05, "loss": 0.3164275586605072, "step": 1600 }, { "epoch": 0.7913011244285185, "grad_norm": 1.0695885495482724, "learning_rate": 1.416293173884051e-05, "loss": 0.3039100766181946, "step": 1601 }, { "epoch": 0.7917953787223526, "grad_norm": 1.1080665801372258, "learning_rate": 1.4155501399079427e-05, "loss": 0.2994040846824646, "step": 1602 }, { "epoch": 0.7922896330161868, "grad_norm": 1.3291271745996591, "learning_rate": 1.4148068285164984e-05, "loss": 0.3129369616508484, "step": 1603 }, { "epoch": 0.792783887310021, "grad_norm": 1.084724718149673, "learning_rate": 1.4140632402059424e-05, "loss": 0.3223167657852173, "step": 1604 }, { "epoch": 0.7932781416038552, "grad_norm": 1.0882285752839331, "learning_rate": 1.4133193754726834e-05, "loss": 0.2734811305999756, "step": 1605 }, { "epoch": 0.7937723958976893, "grad_norm": 1.103029405529104, "learning_rate": 1.4125752348133148e-05, "loss": 0.27474087476730347, "step": 1606 }, { "epoch": 0.7942666501915235, "grad_norm": 1.0487344928171054, "learning_rate": 1.4118308187246145e-05, "loss": 0.2619907557964325, "step": 1607 }, { "epoch": 0.7947609044853577, "grad_norm": 1.1981880636142406, "learning_rate": 1.411086127703544e-05, "loss": 0.3176937699317932, "step": 1608 }, { "epoch": 0.7952551587791918, "grad_norm": 1.12323060393325, "learning_rate": 1.4103411622472483e-05, "loss": 0.28044235706329346, "step": 1609 }, { "epoch": 0.795749413073026, "grad_norm": 1.0360499320558048, "learning_rate": 1.409595922853056e-05, "loss": 0.27778196334838867, "step": 1610 }, { "epoch": 0.7962436673668603, "grad_norm": 1.2703211339383462, "learning_rate": 1.4088504100184777e-05, "loss": 0.3168628513813019, "step": 1611 }, { "epoch": 0.7967379216606945, "grad_norm": 1.1557608708585085, "learning_rate": 1.4081046242412075e-05, "loss": 0.30454084277153015, "step": 1612 }, { "epoch": 0.7972321759545286, "grad_norm": 1.169123128871501, "learning_rate": 1.4073585660191214e-05, "loss": 0.34019169211387634, "step": 1613 }, { "epoch": 0.7977264302483628, "grad_norm": 1.173717391982327, "learning_rate": 1.4066122358502772e-05, "loss": 0.3044774830341339, "step": 1614 }, { "epoch": 0.798220684542197, "grad_norm": 1.1570346377203322, "learning_rate": 1.4058656342329136e-05, "loss": 0.3181847333908081, "step": 1615 }, { "epoch": 0.7987149388360312, "grad_norm": 1.249158616205248, "learning_rate": 1.405118761665452e-05, "loss": 0.3400845229625702, "step": 1616 }, { "epoch": 0.7992091931298653, "grad_norm": 1.2103435711338524, "learning_rate": 1.4043716186464935e-05, "loss": 0.2845221161842346, "step": 1617 }, { "epoch": 0.7997034474236995, "grad_norm": 1.060854004382088, "learning_rate": 1.4036242056748202e-05, "loss": 0.27315276861190796, "step": 1618 }, { "epoch": 0.8001977017175337, "grad_norm": 1.2994888590220768, "learning_rate": 1.4028765232493942e-05, "loss": 0.3388780951499939, "step": 1619 }, { "epoch": 0.8006919560113679, "grad_norm": 1.282329812705599, "learning_rate": 1.4021285718693581e-05, "loss": 0.338635116815567, "step": 1620 }, { "epoch": 0.801186210305202, "grad_norm": 1.051985157077811, "learning_rate": 1.4013803520340328e-05, "loss": 0.26962924003601074, "step": 1621 }, { "epoch": 0.8016804645990362, "grad_norm": 1.119736165525956, "learning_rate": 1.4006318642429194e-05, "loss": 0.32106393575668335, "step": 1622 }, { "epoch": 0.8021747188928704, "grad_norm": 1.1215264874092639, "learning_rate": 1.399883108995698e-05, "loss": 0.33063358068466187, "step": 1623 }, { "epoch": 0.8026689731867046, "grad_norm": 1.2875541426354853, "learning_rate": 1.3991340867922266e-05, "loss": 0.31906163692474365, "step": 1624 }, { "epoch": 0.8031632274805387, "grad_norm": 1.0397829646035845, "learning_rate": 1.3983847981325415e-05, "loss": 0.2601381242275238, "step": 1625 }, { "epoch": 0.8036574817743729, "grad_norm": 1.1557585059548563, "learning_rate": 1.3976352435168577e-05, "loss": 0.3342537581920624, "step": 1626 }, { "epoch": 0.8041517360682071, "grad_norm": 1.2564737583224261, "learning_rate": 1.3968854234455669e-05, "loss": 0.3372059166431427, "step": 1627 }, { "epoch": 0.8046459903620413, "grad_norm": 1.1676806235835944, "learning_rate": 1.3961353384192377e-05, "loss": 0.31026744842529297, "step": 1628 }, { "epoch": 0.8051402446558754, "grad_norm": 1.0921501695742, "learning_rate": 1.3953849889386173e-05, "loss": 0.2867652177810669, "step": 1629 }, { "epoch": 0.8056344989497096, "grad_norm": 1.1055169200249502, "learning_rate": 1.3946343755046274e-05, "loss": 0.29169392585754395, "step": 1630 }, { "epoch": 0.8061287532435438, "grad_norm": 1.0753220774925722, "learning_rate": 1.393883498618367e-05, "loss": 0.2976510524749756, "step": 1631 }, { "epoch": 0.806623007537378, "grad_norm": 1.1387290098549956, "learning_rate": 1.3931323587811107e-05, "loss": 0.2900371551513672, "step": 1632 }, { "epoch": 0.8071172618312121, "grad_norm": 1.0560549112494348, "learning_rate": 1.3923809564943093e-05, "loss": 0.31660354137420654, "step": 1633 }, { "epoch": 0.8076115161250463, "grad_norm": 1.079892158607702, "learning_rate": 1.3916292922595875e-05, "loss": 0.3099827468395233, "step": 1634 }, { "epoch": 0.8081057704188805, "grad_norm": 1.1450154190444473, "learning_rate": 1.3908773665787459e-05, "loss": 0.34322571754455566, "step": 1635 }, { "epoch": 0.8086000247127146, "grad_norm": 1.0812992610334402, "learning_rate": 1.3901251799537592e-05, "loss": 0.2780989408493042, "step": 1636 }, { "epoch": 0.8090942790065488, "grad_norm": 1.1023331343203706, "learning_rate": 1.389372732886777e-05, "loss": 0.31049463152885437, "step": 1637 }, { "epoch": 0.809588533300383, "grad_norm": 1.0442513462466116, "learning_rate": 1.3886200258801213e-05, "loss": 0.29925107955932617, "step": 1638 }, { "epoch": 0.8100827875942173, "grad_norm": 1.1039524368767084, "learning_rate": 1.3878670594362893e-05, "loss": 0.31893983483314514, "step": 1639 }, { "epoch": 0.8105770418880514, "grad_norm": 1.1529184850949745, "learning_rate": 1.3871138340579502e-05, "loss": 0.31307080388069153, "step": 1640 }, { "epoch": 0.8110712961818856, "grad_norm": 1.068880489111062, "learning_rate": 1.3863603502479465e-05, "loss": 0.28198909759521484, "step": 1641 }, { "epoch": 0.8115655504757198, "grad_norm": 1.085975037148026, "learning_rate": 1.3856066085092936e-05, "loss": 0.28937461972236633, "step": 1642 }, { "epoch": 0.812059804769554, "grad_norm": 1.2145338549731968, "learning_rate": 1.3848526093451789e-05, "loss": 0.32332292199134827, "step": 1643 }, { "epoch": 0.8125540590633881, "grad_norm": 1.1174132019487801, "learning_rate": 1.3840983532589606e-05, "loss": 0.3059847056865692, "step": 1644 }, { "epoch": 0.8130483133572223, "grad_norm": 1.0961695985122493, "learning_rate": 1.3833438407541698e-05, "loss": 0.2939583957195282, "step": 1645 }, { "epoch": 0.8135425676510565, "grad_norm": 1.098340825845408, "learning_rate": 1.3825890723345082e-05, "loss": 0.3293933868408203, "step": 1646 }, { "epoch": 0.8140368219448907, "grad_norm": 1.230371641918686, "learning_rate": 1.3818340485038488e-05, "loss": 0.33373600244522095, "step": 1647 }, { "epoch": 0.8145310762387248, "grad_norm": 1.0438422749127716, "learning_rate": 1.3810787697662337e-05, "loss": 0.2716716527938843, "step": 1648 }, { "epoch": 0.815025330532559, "grad_norm": 1.0698510483790238, "learning_rate": 1.3803232366258774e-05, "loss": 0.26109835505485535, "step": 1649 }, { "epoch": 0.8155195848263932, "grad_norm": 1.1399890180248013, "learning_rate": 1.3795674495871627e-05, "loss": 0.3161536753177643, "step": 1650 }, { "epoch": 0.8160138391202274, "grad_norm": 1.0602857079391073, "learning_rate": 1.3788114091546414e-05, "loss": 0.3078432083129883, "step": 1651 }, { "epoch": 0.8165080934140615, "grad_norm": 1.2174185154702881, "learning_rate": 1.3780551158330364e-05, "loss": 0.31023627519607544, "step": 1652 }, { "epoch": 0.8170023477078957, "grad_norm": 1.2616374354619766, "learning_rate": 1.3772985701272374e-05, "loss": 0.3438849151134491, "step": 1653 }, { "epoch": 0.8174966020017299, "grad_norm": 1.178467386446937, "learning_rate": 1.376541772542304e-05, "loss": 0.31897789239883423, "step": 1654 }, { "epoch": 0.8179908562955641, "grad_norm": 1.1125307511503921, "learning_rate": 1.3757847235834636e-05, "loss": 0.3101171553134918, "step": 1655 }, { "epoch": 0.8184851105893982, "grad_norm": 1.0084998738545823, "learning_rate": 1.375027423756111e-05, "loss": 0.28926995396614075, "step": 1656 }, { "epoch": 0.8189793648832324, "grad_norm": 1.0711136459333532, "learning_rate": 1.3742698735658087e-05, "loss": 0.322610080242157, "step": 1657 }, { "epoch": 0.8194736191770666, "grad_norm": 1.272869487692088, "learning_rate": 1.3735120735182865e-05, "loss": 0.27430039644241333, "step": 1658 }, { "epoch": 0.8199678734709008, "grad_norm": 1.217768709412782, "learning_rate": 1.3727540241194408e-05, "loss": 0.3091571629047394, "step": 1659 }, { "epoch": 0.8204621277647349, "grad_norm": 1.1956758026057746, "learning_rate": 1.3719957258753347e-05, "loss": 0.3039378523826599, "step": 1660 }, { "epoch": 0.8209563820585691, "grad_norm": 1.0829759157920493, "learning_rate": 1.371237179292197e-05, "loss": 0.29711851477622986, "step": 1661 }, { "epoch": 0.8214506363524033, "grad_norm": 1.238383962420106, "learning_rate": 1.370478384876423e-05, "loss": 0.32411956787109375, "step": 1662 }, { "epoch": 0.8219448906462374, "grad_norm": 1.124344248809279, "learning_rate": 1.3697193431345725e-05, "loss": 0.2981719672679901, "step": 1663 }, { "epoch": 0.8224391449400716, "grad_norm": 1.1755056696925432, "learning_rate": 1.3689600545733713e-05, "loss": 0.32756730914115906, "step": 1664 }, { "epoch": 0.8229333992339058, "grad_norm": 1.2289352827455349, "learning_rate": 1.3682005196997094e-05, "loss": 0.3910979628562927, "step": 1665 }, { "epoch": 0.82342765352774, "grad_norm": 1.1421419999727185, "learning_rate": 1.3674407390206417e-05, "loss": 0.31716856360435486, "step": 1666 }, { "epoch": 0.8239219078215742, "grad_norm": 1.087053372594853, "learning_rate": 1.3666807130433865e-05, "loss": 0.31816208362579346, "step": 1667 }, { "epoch": 0.8244161621154084, "grad_norm": 1.075967107077939, "learning_rate": 1.3659204422753265e-05, "loss": 0.3008955121040344, "step": 1668 }, { "epoch": 0.8249104164092426, "grad_norm": 1.0356290376868373, "learning_rate": 1.3651599272240078e-05, "loss": 0.2957409918308258, "step": 1669 }, { "epoch": 0.8254046707030768, "grad_norm": 1.0989692049502364, "learning_rate": 1.364399168397139e-05, "loss": 0.33019471168518066, "step": 1670 }, { "epoch": 0.8258989249969109, "grad_norm": 1.1429937641963879, "learning_rate": 1.3636381663025917e-05, "loss": 0.3532376289367676, "step": 1671 }, { "epoch": 0.8263931792907451, "grad_norm": 1.1063068636148639, "learning_rate": 1.362876921448401e-05, "loss": 0.2980180084705353, "step": 1672 }, { "epoch": 0.8268874335845793, "grad_norm": 1.0418269417044947, "learning_rate": 1.362115434342762e-05, "loss": 0.27932479977607727, "step": 1673 }, { "epoch": 0.8273816878784135, "grad_norm": 1.1782768860255097, "learning_rate": 1.3613537054940331e-05, "loss": 0.2783966064453125, "step": 1674 }, { "epoch": 0.8278759421722476, "grad_norm": 1.1803795842967677, "learning_rate": 1.3605917354107336e-05, "loss": 0.2957308888435364, "step": 1675 }, { "epoch": 0.8283701964660818, "grad_norm": 1.180747940998609, "learning_rate": 1.3598295246015439e-05, "loss": 0.31640201807022095, "step": 1676 }, { "epoch": 0.828864450759916, "grad_norm": 1.0988354367735653, "learning_rate": 1.3590670735753047e-05, "loss": 0.2969709634780884, "step": 1677 }, { "epoch": 0.8293587050537502, "grad_norm": 1.1164468460017938, "learning_rate": 1.3583043828410177e-05, "loss": 0.34167301654815674, "step": 1678 }, { "epoch": 0.8298529593475843, "grad_norm": 1.0956930352290435, "learning_rate": 1.3575414529078443e-05, "loss": 0.28540804982185364, "step": 1679 }, { "epoch": 0.8303472136414185, "grad_norm": 1.1795719749617215, "learning_rate": 1.3567782842851054e-05, "loss": 0.2962091565132141, "step": 1680 }, { "epoch": 0.8308414679352527, "grad_norm": 1.1969039130243166, "learning_rate": 1.3560148774822816e-05, "loss": 0.3650284707546234, "step": 1681 }, { "epoch": 0.8313357222290869, "grad_norm": 1.1374534594887609, "learning_rate": 1.3552512330090126e-05, "loss": 0.3134267330169678, "step": 1682 }, { "epoch": 0.831829976522921, "grad_norm": 1.0921894303145987, "learning_rate": 1.3544873513750967e-05, "loss": 0.3020439147949219, "step": 1683 }, { "epoch": 0.8323242308167552, "grad_norm": 0.9393581319245673, "learning_rate": 1.3537232330904895e-05, "loss": 0.25083282589912415, "step": 1684 }, { "epoch": 0.8328184851105894, "grad_norm": 1.2024147558027563, "learning_rate": 1.3529588786653063e-05, "loss": 0.33875352144241333, "step": 1685 }, { "epoch": 0.8333127394044236, "grad_norm": 1.0620839528979684, "learning_rate": 1.3521942886098186e-05, "loss": 0.2717735171318054, "step": 1686 }, { "epoch": 0.8338069936982577, "grad_norm": 1.1255995988400895, "learning_rate": 1.3514294634344562e-05, "loss": 0.271842896938324, "step": 1687 }, { "epoch": 0.8343012479920919, "grad_norm": 1.3262220000473801, "learning_rate": 1.3506644036498054e-05, "loss": 0.29420506954193115, "step": 1688 }, { "epoch": 0.8347955022859261, "grad_norm": 1.338127401529371, "learning_rate": 1.349899109766609e-05, "loss": 0.3336431682109833, "step": 1689 }, { "epoch": 0.8352897565797602, "grad_norm": 1.0514224360912943, "learning_rate": 1.3491335822957665e-05, "loss": 0.2848295569419861, "step": 1690 }, { "epoch": 0.8357840108735944, "grad_norm": 1.1721842125626762, "learning_rate": 1.3483678217483327e-05, "loss": 0.3164542019367218, "step": 1691 }, { "epoch": 0.8362782651674286, "grad_norm": 1.1542823329984544, "learning_rate": 1.3476018286355189e-05, "loss": 0.3030688762664795, "step": 1692 }, { "epoch": 0.8367725194612629, "grad_norm": 1.3329503320081877, "learning_rate": 1.3468356034686912e-05, "loss": 0.30218198895454407, "step": 1693 }, { "epoch": 0.837266773755097, "grad_norm": 1.1429497129560076, "learning_rate": 1.3460691467593697e-05, "loss": 0.3327499032020569, "step": 1694 }, { "epoch": 0.8377610280489312, "grad_norm": 1.2198627663252626, "learning_rate": 1.3453024590192307e-05, "loss": 0.29298892617225647, "step": 1695 }, { "epoch": 0.8382552823427654, "grad_norm": 1.238368209416205, "learning_rate": 1.344535540760104e-05, "loss": 0.3096858859062195, "step": 1696 }, { "epoch": 0.8387495366365996, "grad_norm": 1.1297510733547198, "learning_rate": 1.3437683924939731e-05, "loss": 0.30680233240127563, "step": 1697 }, { "epoch": 0.8392437909304337, "grad_norm": 1.13902422944666, "learning_rate": 1.3430010147329752e-05, "loss": 0.3139989972114563, "step": 1698 }, { "epoch": 0.8397380452242679, "grad_norm": 1.132396621648215, "learning_rate": 1.3422334079894008e-05, "loss": 0.30418652296066284, "step": 1699 }, { "epoch": 0.8402322995181021, "grad_norm": 1.228592620621731, "learning_rate": 1.3414655727756931e-05, "loss": 0.31245100498199463, "step": 1700 }, { "epoch": 0.8407265538119363, "grad_norm": 1.1908375195801162, "learning_rate": 1.3406975096044477e-05, "loss": 0.3381880223751068, "step": 1701 }, { "epoch": 0.8412208081057704, "grad_norm": 1.2009611203254438, "learning_rate": 1.3399292189884135e-05, "loss": 0.3359968960285187, "step": 1702 }, { "epoch": 0.8417150623996046, "grad_norm": 1.2698512981575327, "learning_rate": 1.3391607014404891e-05, "loss": 0.3320350646972656, "step": 1703 }, { "epoch": 0.8422093166934388, "grad_norm": 1.1615181813433448, "learning_rate": 1.3383919574737267e-05, "loss": 0.32830795645713806, "step": 1704 }, { "epoch": 0.842703570987273, "grad_norm": 0.9808785682252426, "learning_rate": 1.3376229876013285e-05, "loss": 0.255840927362442, "step": 1705 }, { "epoch": 0.8431978252811071, "grad_norm": 1.0739012833500008, "learning_rate": 1.3368537923366476e-05, "loss": 0.3110755681991577, "step": 1706 }, { "epoch": 0.8436920795749413, "grad_norm": 1.0815136095330147, "learning_rate": 1.336084372193188e-05, "loss": 0.28063881397247314, "step": 1707 }, { "epoch": 0.8441863338687755, "grad_norm": 1.1539434345644544, "learning_rate": 1.3353147276846042e-05, "loss": 0.31297358870506287, "step": 1708 }, { "epoch": 0.8446805881626097, "grad_norm": 1.155638509555895, "learning_rate": 1.3345448593246986e-05, "loss": 0.30750149488449097, "step": 1709 }, { "epoch": 0.8451748424564438, "grad_norm": 1.0259778822912606, "learning_rate": 1.333774767627425e-05, "loss": 0.2665224075317383, "step": 1710 }, { "epoch": 0.845669096750278, "grad_norm": 1.0618832452009934, "learning_rate": 1.3330044531068858e-05, "loss": 0.28920280933380127, "step": 1711 }, { "epoch": 0.8461633510441122, "grad_norm": 1.0688762844449171, "learning_rate": 1.332233916277332e-05, "loss": 0.2678643465042114, "step": 1712 }, { "epoch": 0.8466576053379464, "grad_norm": 1.1389370638959122, "learning_rate": 1.3314631576531623e-05, "loss": 0.33682242035865784, "step": 1713 }, { "epoch": 0.8471518596317805, "grad_norm": 1.2088936099945806, "learning_rate": 1.330692177748925e-05, "loss": 0.36704546213150024, "step": 1714 }, { "epoch": 0.8476461139256147, "grad_norm": 1.0972613113130176, "learning_rate": 1.3299209770793144e-05, "loss": 0.3183630108833313, "step": 1715 }, { "epoch": 0.8481403682194489, "grad_norm": 1.0799352919589156, "learning_rate": 1.3291495561591736e-05, "loss": 0.27138596773147583, "step": 1716 }, { "epoch": 0.848634622513283, "grad_norm": 1.1332588592044, "learning_rate": 1.3283779155034925e-05, "loss": 0.30252328515052795, "step": 1717 }, { "epoch": 0.8491288768071172, "grad_norm": 1.1212549613542353, "learning_rate": 1.3276060556274067e-05, "loss": 0.29494598507881165, "step": 1718 }, { "epoch": 0.8496231311009514, "grad_norm": 1.01148770717553, "learning_rate": 1.3268339770461988e-05, "loss": 0.2822422981262207, "step": 1719 }, { "epoch": 0.8501173853947857, "grad_norm": 1.134036508201843, "learning_rate": 1.3260616802752979e-05, "loss": 0.3348005712032318, "step": 1720 }, { "epoch": 0.8506116396886197, "grad_norm": 1.171053745899539, "learning_rate": 1.3252891658302782e-05, "loss": 0.3146229088306427, "step": 1721 }, { "epoch": 0.851105893982454, "grad_norm": 1.1635384669674214, "learning_rate": 1.3245164342268592e-05, "loss": 0.34189414978027344, "step": 1722 }, { "epoch": 0.8516001482762882, "grad_norm": 1.0403207041973201, "learning_rate": 1.3237434859809055e-05, "loss": 0.2967323958873749, "step": 1723 }, { "epoch": 0.8520944025701224, "grad_norm": 1.1011411329678815, "learning_rate": 1.3229703216084262e-05, "loss": 0.329689085483551, "step": 1724 }, { "epoch": 0.8525886568639565, "grad_norm": 1.1910259713127598, "learning_rate": 1.3221969416255751e-05, "loss": 0.33041107654571533, "step": 1725 }, { "epoch": 0.8530829111577907, "grad_norm": 1.144468406694428, "learning_rate": 1.321423346548649e-05, "loss": 0.30197203159332275, "step": 1726 }, { "epoch": 0.8535771654516249, "grad_norm": 1.1709857904248526, "learning_rate": 1.3206495368940897e-05, "loss": 0.29060906171798706, "step": 1727 }, { "epoch": 0.8540714197454591, "grad_norm": 1.1769143322358042, "learning_rate": 1.3198755131784808e-05, "loss": 0.3119436502456665, "step": 1728 }, { "epoch": 0.8545656740392932, "grad_norm": 1.1825299188260439, "learning_rate": 1.31910127591855e-05, "loss": 0.35256415605545044, "step": 1729 }, { "epoch": 0.8550599283331274, "grad_norm": 1.169751710502227, "learning_rate": 1.3183268256311665e-05, "loss": 0.3093785345554352, "step": 1730 }, { "epoch": 0.8555541826269616, "grad_norm": 1.0555303314758304, "learning_rate": 1.317552162833343e-05, "loss": 0.2713086009025574, "step": 1731 }, { "epoch": 0.8560484369207958, "grad_norm": 1.1667835049569328, "learning_rate": 1.3167772880422325e-05, "loss": 0.3135699927806854, "step": 1732 }, { "epoch": 0.8565426912146299, "grad_norm": 1.2127716623193672, "learning_rate": 1.3160022017751308e-05, "loss": 0.3077283501625061, "step": 1733 }, { "epoch": 0.8570369455084641, "grad_norm": 1.0914461784602205, "learning_rate": 1.3152269045494744e-05, "loss": 0.2900918424129486, "step": 1734 }, { "epoch": 0.8575311998022983, "grad_norm": 1.1010374385853228, "learning_rate": 1.3144513968828406e-05, "loss": 0.30828869342803955, "step": 1735 }, { "epoch": 0.8580254540961325, "grad_norm": 1.2038482894608615, "learning_rate": 1.3136756792929469e-05, "loss": 0.32526400685310364, "step": 1736 }, { "epoch": 0.8585197083899666, "grad_norm": 1.2033734524328428, "learning_rate": 1.3128997522976518e-05, "loss": 0.35023608803749084, "step": 1737 }, { "epoch": 0.8590139626838008, "grad_norm": 1.0100870731750684, "learning_rate": 1.312123616414953e-05, "loss": 0.27287641167640686, "step": 1738 }, { "epoch": 0.859508216977635, "grad_norm": 1.1797907328737691, "learning_rate": 1.3113472721629871e-05, "loss": 0.346009761095047, "step": 1739 }, { "epoch": 0.8600024712714691, "grad_norm": 1.0724791595798373, "learning_rate": 1.3105707200600312e-05, "loss": 0.3297504186630249, "step": 1740 }, { "epoch": 0.8604967255653033, "grad_norm": 1.1244989642514696, "learning_rate": 1.3097939606245005e-05, "loss": 0.29835087060928345, "step": 1741 }, { "epoch": 0.8609909798591375, "grad_norm": 1.1715549927893771, "learning_rate": 1.3090169943749475e-05, "loss": 0.31466037034988403, "step": 1742 }, { "epoch": 0.8614852341529717, "grad_norm": 1.1357283105102867, "learning_rate": 1.3082398218300646e-05, "loss": 0.32722294330596924, "step": 1743 }, { "epoch": 0.8619794884468058, "grad_norm": 1.0679826147860039, "learning_rate": 1.3074624435086809e-05, "loss": 0.2603963613510132, "step": 1744 }, { "epoch": 0.86247374274064, "grad_norm": 1.3274641459704042, "learning_rate": 1.3066848599297633e-05, "loss": 0.3100607991218567, "step": 1745 }, { "epoch": 0.8629679970344742, "grad_norm": 1.0941880035602098, "learning_rate": 1.3059070716124145e-05, "loss": 0.2772334814071655, "step": 1746 }, { "epoch": 0.8634622513283084, "grad_norm": 1.0597299652706509, "learning_rate": 1.305129079075876e-05, "loss": 0.3097267746925354, "step": 1747 }, { "epoch": 0.8639565056221425, "grad_norm": 0.9828148026871187, "learning_rate": 1.304350882839524e-05, "loss": 0.24734097719192505, "step": 1748 }, { "epoch": 0.8644507599159768, "grad_norm": 1.1998311484351691, "learning_rate": 1.3035724834228713e-05, "loss": 0.32148587703704834, "step": 1749 }, { "epoch": 0.864945014209811, "grad_norm": 1.0741747588028856, "learning_rate": 1.3027938813455663e-05, "loss": 0.3037404417991638, "step": 1750 }, { "epoch": 0.8654392685036452, "grad_norm": 1.011225470292042, "learning_rate": 1.3020150771273925e-05, "loss": 0.30760154128074646, "step": 1751 }, { "epoch": 0.8659335227974793, "grad_norm": 1.2184140511087935, "learning_rate": 1.3012360712882681e-05, "loss": 0.3169519305229187, "step": 1752 }, { "epoch": 0.8664277770913135, "grad_norm": 1.1637013896789081, "learning_rate": 1.300456864348247e-05, "loss": 0.32497861981391907, "step": 1753 }, { "epoch": 0.8669220313851477, "grad_norm": 1.1775374726585146, "learning_rate": 1.2996774568275163e-05, "loss": 0.3318047821521759, "step": 1754 }, { "epoch": 0.8674162856789819, "grad_norm": 1.1616476146855954, "learning_rate": 1.298897849246397e-05, "loss": 0.32553863525390625, "step": 1755 }, { "epoch": 0.867910539972816, "grad_norm": 1.2503555115463478, "learning_rate": 1.2981180421253446e-05, "loss": 0.36457520723342896, "step": 1756 }, { "epoch": 0.8684047942666502, "grad_norm": 1.169094604095011, "learning_rate": 1.2973380359849466e-05, "loss": 0.3038361668586731, "step": 1757 }, { "epoch": 0.8688990485604844, "grad_norm": 1.1027242700855706, "learning_rate": 1.2965578313459246e-05, "loss": 0.3219846785068512, "step": 1758 }, { "epoch": 0.8693933028543186, "grad_norm": 1.1142620036406827, "learning_rate": 1.2957774287291311e-05, "loss": 0.3180781304836273, "step": 1759 }, { "epoch": 0.8698875571481527, "grad_norm": 1.0882143758359024, "learning_rate": 1.2949968286555527e-05, "loss": 0.27302947640419006, "step": 1760 }, { "epoch": 0.8703818114419869, "grad_norm": 1.1731384509972838, "learning_rate": 1.2942160316463066e-05, "loss": 0.31756314635276794, "step": 1761 }, { "epoch": 0.8708760657358211, "grad_norm": 1.1248252560155063, "learning_rate": 1.2934350382226412e-05, "loss": 0.2921680510044098, "step": 1762 }, { "epoch": 0.8713703200296553, "grad_norm": 1.2402414216219324, "learning_rate": 1.2926538489059373e-05, "loss": 0.36426985263824463, "step": 1763 }, { "epoch": 0.8718645743234894, "grad_norm": 1.1314972333554063, "learning_rate": 1.2918724642177054e-05, "loss": 0.31873831152915955, "step": 1764 }, { "epoch": 0.8723588286173236, "grad_norm": 1.0875738556359984, "learning_rate": 1.2910908846795867e-05, "loss": 0.30952733755111694, "step": 1765 }, { "epoch": 0.8728530829111578, "grad_norm": 1.149047421683754, "learning_rate": 1.2903091108133523e-05, "loss": 0.33339035511016846, "step": 1766 }, { "epoch": 0.8733473372049919, "grad_norm": 1.1272534729456736, "learning_rate": 1.2895271431409038e-05, "loss": 0.31531351804733276, "step": 1767 }, { "epoch": 0.8738415914988261, "grad_norm": 1.0832848102714157, "learning_rate": 1.2887449821842713e-05, "loss": 0.3016526401042938, "step": 1768 }, { "epoch": 0.8743358457926603, "grad_norm": 1.1203275148695215, "learning_rate": 1.2879626284656141e-05, "loss": 0.3364630341529846, "step": 1769 }, { "epoch": 0.8748301000864945, "grad_norm": 1.049317791331816, "learning_rate": 1.287180082507221e-05, "loss": 0.29755398631095886, "step": 1770 }, { "epoch": 0.8753243543803286, "grad_norm": 1.1616312337400467, "learning_rate": 1.286397344831508e-05, "loss": 0.2986103892326355, "step": 1771 }, { "epoch": 0.8758186086741628, "grad_norm": 1.0876002245947722, "learning_rate": 1.2856144159610197e-05, "loss": 0.31291434168815613, "step": 1772 }, { "epoch": 0.876312862967997, "grad_norm": 1.0995747953439883, "learning_rate": 1.2848312964184283e-05, "loss": 0.28285568952560425, "step": 1773 }, { "epoch": 0.8768071172618312, "grad_norm": 1.1726581514839194, "learning_rate": 1.2840479867265331e-05, "loss": 0.3319891095161438, "step": 1774 }, { "epoch": 0.8773013715556653, "grad_norm": 1.1459648615093148, "learning_rate": 1.2832644874082604e-05, "loss": 0.3265117406845093, "step": 1775 }, { "epoch": 0.8777956258494996, "grad_norm": 1.1247446577558389, "learning_rate": 1.2824807989866635e-05, "loss": 0.32061511278152466, "step": 1776 }, { "epoch": 0.8782898801433338, "grad_norm": 1.2889890707472673, "learning_rate": 1.2816969219849214e-05, "loss": 0.34278666973114014, "step": 1777 }, { "epoch": 0.878784134437168, "grad_norm": 1.0560963427574246, "learning_rate": 1.2809128569263387e-05, "loss": 0.28335195779800415, "step": 1778 }, { "epoch": 0.8792783887310021, "grad_norm": 1.261751311219404, "learning_rate": 1.2801286043343468e-05, "loss": 0.35037046670913696, "step": 1779 }, { "epoch": 0.8797726430248363, "grad_norm": 1.2110241423671546, "learning_rate": 1.2793441647325012e-05, "loss": 0.30058878660202026, "step": 1780 }, { "epoch": 0.8802668973186705, "grad_norm": 1.1234244113929972, "learning_rate": 1.2785595386444824e-05, "loss": 0.29526466131210327, "step": 1781 }, { "epoch": 0.8807611516125047, "grad_norm": 1.128737037655087, "learning_rate": 1.2777747265940956e-05, "loss": 0.3194332718849182, "step": 1782 }, { "epoch": 0.8812554059063388, "grad_norm": 1.1751557862784823, "learning_rate": 1.2769897291052709e-05, "loss": 0.33527326583862305, "step": 1783 }, { "epoch": 0.881749660200173, "grad_norm": 1.1396387575620477, "learning_rate": 1.2762045467020601e-05, "loss": 0.3277815580368042, "step": 1784 }, { "epoch": 0.8822439144940072, "grad_norm": 1.2472488401817894, "learning_rate": 1.2754191799086406e-05, "loss": 0.31030380725860596, "step": 1785 }, { "epoch": 0.8827381687878414, "grad_norm": 1.2316289072611675, "learning_rate": 1.274633629249312e-05, "loss": 0.34496408700942993, "step": 1786 }, { "epoch": 0.8832324230816755, "grad_norm": 1.1907817971144352, "learning_rate": 1.2738478952484964e-05, "loss": 0.31008201837539673, "step": 1787 }, { "epoch": 0.8837266773755097, "grad_norm": 1.1874449425538405, "learning_rate": 1.2730619784307388e-05, "loss": 0.35956043004989624, "step": 1788 }, { "epoch": 0.8842209316693439, "grad_norm": 1.075617061625236, "learning_rate": 1.272275879320706e-05, "loss": 0.2944573760032654, "step": 1789 }, { "epoch": 0.8847151859631781, "grad_norm": 1.0739187183942678, "learning_rate": 1.2714895984431863e-05, "loss": 0.2941366136074066, "step": 1790 }, { "epoch": 0.8852094402570122, "grad_norm": 1.3130838842625934, "learning_rate": 1.2707031363230901e-05, "loss": 0.34683144092559814, "step": 1791 }, { "epoch": 0.8857036945508464, "grad_norm": 1.1309361657268096, "learning_rate": 1.2699164934854475e-05, "loss": 0.3014514744281769, "step": 1792 }, { "epoch": 0.8861979488446806, "grad_norm": 1.1642635843186193, "learning_rate": 1.2691296704554112e-05, "loss": 0.2749955654144287, "step": 1793 }, { "epoch": 0.8866922031385147, "grad_norm": 1.2406193113190336, "learning_rate": 1.2683426677582518e-05, "loss": 0.3707960844039917, "step": 1794 }, { "epoch": 0.8871864574323489, "grad_norm": 1.098057655891237, "learning_rate": 1.2675554859193615e-05, "loss": 0.3122541606426239, "step": 1795 }, { "epoch": 0.8876807117261831, "grad_norm": 1.1564617646628, "learning_rate": 1.2667681254642521e-05, "loss": 0.3072753846645355, "step": 1796 }, { "epoch": 0.8881749660200173, "grad_norm": 1.1000251936377918, "learning_rate": 1.2659805869185534e-05, "loss": 0.27002331614494324, "step": 1797 }, { "epoch": 0.8886692203138514, "grad_norm": 1.0649961261949041, "learning_rate": 1.2651928708080155e-05, "loss": 0.2775167226791382, "step": 1798 }, { "epoch": 0.8891634746076856, "grad_norm": 1.0134446617324497, "learning_rate": 1.2644049776585061e-05, "loss": 0.30023425817489624, "step": 1799 }, { "epoch": 0.8896577289015198, "grad_norm": 1.0536326288458973, "learning_rate": 1.2636169079960116e-05, "loss": 0.29491451382637024, "step": 1800 }, { "epoch": 0.890151983195354, "grad_norm": 1.1393442237009457, "learning_rate": 1.2628286623466359e-05, "loss": 0.3069722652435303, "step": 1801 }, { "epoch": 0.8906462374891881, "grad_norm": 1.0432479678380786, "learning_rate": 1.2620402412366006e-05, "loss": 0.30594444274902344, "step": 1802 }, { "epoch": 0.8911404917830223, "grad_norm": 1.224543789313884, "learning_rate": 1.2612516451922442e-05, "loss": 0.278346985578537, "step": 1803 }, { "epoch": 0.8916347460768566, "grad_norm": 1.2157715105375322, "learning_rate": 1.2604628747400227e-05, "loss": 0.2985970973968506, "step": 1804 }, { "epoch": 0.8921290003706908, "grad_norm": 1.1226040901686842, "learning_rate": 1.259673930406507e-05, "loss": 0.31054627895355225, "step": 1805 }, { "epoch": 0.8926232546645249, "grad_norm": 1.1289150487077437, "learning_rate": 1.258884812718386e-05, "loss": 0.28903907537460327, "step": 1806 }, { "epoch": 0.8931175089583591, "grad_norm": 1.132448586420589, "learning_rate": 1.258095522202463e-05, "loss": 0.2937915027141571, "step": 1807 }, { "epoch": 0.8936117632521933, "grad_norm": 1.251676196860064, "learning_rate": 1.257306059385657e-05, "loss": 0.3038950562477112, "step": 1808 }, { "epoch": 0.8941060175460275, "grad_norm": 1.235431629529867, "learning_rate": 1.2565164247950023e-05, "loss": 0.3081057071685791, "step": 1809 }, { "epoch": 0.8946002718398616, "grad_norm": 1.1023391071403523, "learning_rate": 1.2557266189576478e-05, "loss": 0.2608702480792999, "step": 1810 }, { "epoch": 0.8950945261336958, "grad_norm": 1.1733196648921136, "learning_rate": 1.254936642400856e-05, "loss": 0.2829548120498657, "step": 1811 }, { "epoch": 0.89558878042753, "grad_norm": 1.1986500261226571, "learning_rate": 1.2541464956520045e-05, "loss": 0.3157985508441925, "step": 1812 }, { "epoch": 0.8960830347213642, "grad_norm": 1.1805347109907394, "learning_rate": 1.2533561792385837e-05, "loss": 0.2948974370956421, "step": 1813 }, { "epoch": 0.8965772890151983, "grad_norm": 1.1460952132203441, "learning_rate": 1.252565693688198e-05, "loss": 0.3011903166770935, "step": 1814 }, { "epoch": 0.8970715433090325, "grad_norm": 1.3055245186221631, "learning_rate": 1.2517750395285635e-05, "loss": 0.3570353388786316, "step": 1815 }, { "epoch": 0.8975657976028667, "grad_norm": 1.1337741379781219, "learning_rate": 1.2509842172875105e-05, "loss": 0.30166712403297424, "step": 1816 }, { "epoch": 0.8980600518967009, "grad_norm": 1.104423129790351, "learning_rate": 1.2501932274929797e-05, "loss": 0.3260636329650879, "step": 1817 }, { "epoch": 0.898554306190535, "grad_norm": 1.0975906386988825, "learning_rate": 1.2494020706730251e-05, "loss": 0.31647035479545593, "step": 1818 }, { "epoch": 0.8990485604843692, "grad_norm": 1.2099925292750648, "learning_rate": 1.2486107473558118e-05, "loss": 0.3059273064136505, "step": 1819 }, { "epoch": 0.8995428147782034, "grad_norm": 1.1238527206258473, "learning_rate": 1.247819258069616e-05, "loss": 0.31050577759742737, "step": 1820 }, { "epoch": 0.9000370690720375, "grad_norm": 1.167261413544568, "learning_rate": 1.2470276033428241e-05, "loss": 0.3199779689311981, "step": 1821 }, { "epoch": 0.9005313233658717, "grad_norm": 1.1634621252313533, "learning_rate": 1.2462357837039338e-05, "loss": 0.31346091628074646, "step": 1822 }, { "epoch": 0.9010255776597059, "grad_norm": 1.7712393639688087, "learning_rate": 1.245443799681553e-05, "loss": 0.31128326058387756, "step": 1823 }, { "epoch": 0.9015198319535401, "grad_norm": 1.0665988205220116, "learning_rate": 1.244651651804398e-05, "loss": 0.27540329098701477, "step": 1824 }, { "epoch": 0.9020140862473742, "grad_norm": 1.08908725997666, "learning_rate": 1.243859340601296e-05, "loss": 0.2613363265991211, "step": 1825 }, { "epoch": 0.9025083405412084, "grad_norm": 1.1499718586586674, "learning_rate": 1.2430668666011825e-05, "loss": 0.30530184507369995, "step": 1826 }, { "epoch": 0.9030025948350426, "grad_norm": 1.0907140946424856, "learning_rate": 1.2422742303331022e-05, "loss": 0.3223349153995514, "step": 1827 }, { "epoch": 0.9034968491288768, "grad_norm": 1.131086049145241, "learning_rate": 1.2414814323262067e-05, "loss": 0.32017287611961365, "step": 1828 }, { "epoch": 0.9039911034227109, "grad_norm": 1.2183101338845472, "learning_rate": 1.2406884731097582e-05, "loss": 0.2965891361236572, "step": 1829 }, { "epoch": 0.9044853577165451, "grad_norm": 1.535326476461108, "learning_rate": 1.2398953532131235e-05, "loss": 0.3517727851867676, "step": 1830 }, { "epoch": 0.9049796120103794, "grad_norm": 1.0055415215772612, "learning_rate": 1.2391020731657788e-05, "loss": 0.26107311248779297, "step": 1831 }, { "epoch": 0.9054738663042136, "grad_norm": 1.16405975535122, "learning_rate": 1.2383086334973065e-05, "loss": 0.31327998638153076, "step": 1832 }, { "epoch": 0.9059681205980477, "grad_norm": 1.1376729658041929, "learning_rate": 1.2375150347373956e-05, "loss": 0.2708127498626709, "step": 1833 }, { "epoch": 0.9064623748918819, "grad_norm": 1.2578266997569258, "learning_rate": 1.236721277415841e-05, "loss": 0.3264025151729584, "step": 1834 }, { "epoch": 0.9069566291857161, "grad_norm": 1.1552886471917594, "learning_rate": 1.2359273620625438e-05, "loss": 0.3226723074913025, "step": 1835 }, { "epoch": 0.9074508834795503, "grad_norm": 1.095230882373492, "learning_rate": 1.2351332892075109e-05, "loss": 0.2895771861076355, "step": 1836 }, { "epoch": 0.9079451377733844, "grad_norm": 1.149733162695983, "learning_rate": 1.234339059380854e-05, "loss": 0.3316076397895813, "step": 1837 }, { "epoch": 0.9084393920672186, "grad_norm": 1.10037368979265, "learning_rate": 1.2335446731127887e-05, "loss": 0.29858651757240295, "step": 1838 }, { "epoch": 0.9089336463610528, "grad_norm": 1.2759313559643695, "learning_rate": 1.2327501309336371e-05, "loss": 0.31340792775154114, "step": 1839 }, { "epoch": 0.909427900654887, "grad_norm": 1.038203202123546, "learning_rate": 1.2319554333738236e-05, "loss": 0.27344945073127747, "step": 1840 }, { "epoch": 0.9099221549487211, "grad_norm": 1.1811761633875792, "learning_rate": 1.2311605809638766e-05, "loss": 0.27349725365638733, "step": 1841 }, { "epoch": 0.9104164092425553, "grad_norm": 1.2931266398373575, "learning_rate": 1.2303655742344292e-05, "loss": 0.28933316469192505, "step": 1842 }, { "epoch": 0.9109106635363895, "grad_norm": 1.1360201134878805, "learning_rate": 1.2295704137162158e-05, "loss": 0.3315466344356537, "step": 1843 }, { "epoch": 0.9114049178302237, "grad_norm": 1.3735184410271417, "learning_rate": 1.2287750999400743e-05, "loss": 0.3227408528327942, "step": 1844 }, { "epoch": 0.9118991721240578, "grad_norm": 1.1237568254849295, "learning_rate": 1.2279796334369447e-05, "loss": 0.30476877093315125, "step": 1845 }, { "epoch": 0.912393426417892, "grad_norm": 1.1863082805694927, "learning_rate": 1.2271840147378697e-05, "loss": 0.29941046237945557, "step": 1846 }, { "epoch": 0.9128876807117262, "grad_norm": 1.040665730868043, "learning_rate": 1.2263882443739923e-05, "loss": 0.26635122299194336, "step": 1847 }, { "epoch": 0.9133819350055603, "grad_norm": 1.2009768589181191, "learning_rate": 1.2255923228765574e-05, "loss": 0.32384809851646423, "step": 1848 }, { "epoch": 0.9138761892993945, "grad_norm": 1.1005403546735195, "learning_rate": 1.2247962507769113e-05, "loss": 0.2830178141593933, "step": 1849 }, { "epoch": 0.9143704435932287, "grad_norm": 1.146384025635135, "learning_rate": 1.2240000286065003e-05, "loss": 0.32860931754112244, "step": 1850 }, { "epoch": 0.9148646978870629, "grad_norm": 1.1448106720128721, "learning_rate": 1.2232036568968703e-05, "loss": 0.2820647954940796, "step": 1851 }, { "epoch": 0.915358952180897, "grad_norm": 1.2180250787611469, "learning_rate": 1.2224071361796685e-05, "loss": 0.3368694484233856, "step": 1852 }, { "epoch": 0.9158532064747312, "grad_norm": 1.1414152376911786, "learning_rate": 1.2216104669866405e-05, "loss": 0.32594096660614014, "step": 1853 }, { "epoch": 0.9163474607685654, "grad_norm": 1.129839927585001, "learning_rate": 1.2208136498496307e-05, "loss": 0.3383556008338928, "step": 1854 }, { "epoch": 0.9168417150623996, "grad_norm": 1.1516691565096748, "learning_rate": 1.2200166853005837e-05, "loss": 0.2655363976955414, "step": 1855 }, { "epoch": 0.9173359693562337, "grad_norm": 1.067780593954706, "learning_rate": 1.2192195738715414e-05, "loss": 0.30512773990631104, "step": 1856 }, { "epoch": 0.917830223650068, "grad_norm": 1.3304322049937938, "learning_rate": 1.2184223160946433e-05, "loss": 0.34026995301246643, "step": 1857 }, { "epoch": 0.9183244779439022, "grad_norm": 1.2079696433735554, "learning_rate": 1.2176249125021281e-05, "loss": 0.29324328899383545, "step": 1858 }, { "epoch": 0.9188187322377364, "grad_norm": 1.454623275441196, "learning_rate": 1.2168273636263308e-05, "loss": 0.3114206790924072, "step": 1859 }, { "epoch": 0.9193129865315705, "grad_norm": 1.1301917440411622, "learning_rate": 1.2160296699996839e-05, "loss": 0.2829141914844513, "step": 1860 }, { "epoch": 0.9198072408254047, "grad_norm": 1.0721269081592821, "learning_rate": 1.2152318321547156e-05, "loss": 0.2735600769519806, "step": 1861 }, { "epoch": 0.9203014951192389, "grad_norm": 1.0465335380212768, "learning_rate": 1.2144338506240519e-05, "loss": 0.3160930573940277, "step": 1862 }, { "epoch": 0.9207957494130731, "grad_norm": 1.0735769631967078, "learning_rate": 1.2136357259404128e-05, "loss": 0.26677393913269043, "step": 1863 }, { "epoch": 0.9212900037069072, "grad_norm": 1.1305004585474958, "learning_rate": 1.2128374586366159e-05, "loss": 0.33033064007759094, "step": 1864 }, { "epoch": 0.9217842580007414, "grad_norm": 1.1210908469065626, "learning_rate": 1.2120390492455727e-05, "loss": 0.28271663188934326, "step": 1865 }, { "epoch": 0.9222785122945756, "grad_norm": 1.1196923913120616, "learning_rate": 1.21124049830029e-05, "loss": 0.3116013705730438, "step": 1866 }, { "epoch": 0.9227727665884098, "grad_norm": 1.1258678919425735, "learning_rate": 1.2104418063338686e-05, "loss": 0.30614158511161804, "step": 1867 }, { "epoch": 0.9232670208822439, "grad_norm": 1.2128311616527454, "learning_rate": 1.2096429738795041e-05, "loss": 0.34351983666419983, "step": 1868 }, { "epoch": 0.9237612751760781, "grad_norm": 1.2814647055659063, "learning_rate": 1.2088440014704858e-05, "loss": 0.31006965041160583, "step": 1869 }, { "epoch": 0.9242555294699123, "grad_norm": 1.093225958461299, "learning_rate": 1.2080448896401964e-05, "loss": 0.2671147584915161, "step": 1870 }, { "epoch": 0.9247497837637465, "grad_norm": 1.2298582810409653, "learning_rate": 1.207245638922111e-05, "loss": 0.29123416543006897, "step": 1871 }, { "epoch": 0.9252440380575806, "grad_norm": 1.1613532669189326, "learning_rate": 1.2064462498497984e-05, "loss": 0.31838539242744446, "step": 1872 }, { "epoch": 0.9257382923514148, "grad_norm": 1.1861407153761483, "learning_rate": 1.205646722956919e-05, "loss": 0.3158906102180481, "step": 1873 }, { "epoch": 0.926232546645249, "grad_norm": 1.2339017273841688, "learning_rate": 1.2048470587772257e-05, "loss": 0.3679552674293518, "step": 1874 }, { "epoch": 0.9267268009390831, "grad_norm": 1.1210108605660978, "learning_rate": 1.204047257844563e-05, "loss": 0.2891008257865906, "step": 1875 }, { "epoch": 0.9272210552329173, "grad_norm": 1.1110723692294957, "learning_rate": 1.2032473206928663e-05, "loss": 0.3207235634326935, "step": 1876 }, { "epoch": 0.9277153095267515, "grad_norm": 1.203189154519193, "learning_rate": 1.2024472478561624e-05, "loss": 0.2710658311843872, "step": 1877 }, { "epoch": 0.9282095638205857, "grad_norm": 1.1156076578026985, "learning_rate": 1.2016470398685685e-05, "loss": 0.2554836869239807, "step": 1878 }, { "epoch": 0.9287038181144198, "grad_norm": 1.079454168196498, "learning_rate": 1.2008466972642921e-05, "loss": 0.2822943329811096, "step": 1879 }, { "epoch": 0.929198072408254, "grad_norm": 1.2007950112208574, "learning_rate": 1.20004622057763e-05, "loss": 0.3447754681110382, "step": 1880 }, { "epoch": 0.9296923267020882, "grad_norm": 1.1885607345269107, "learning_rate": 1.1992456103429694e-05, "loss": 0.3009227514266968, "step": 1881 }, { "epoch": 0.9301865809959224, "grad_norm": 1.3491102685763696, "learning_rate": 1.1984448670947863e-05, "loss": 0.33154594898223877, "step": 1882 }, { "epoch": 0.9306808352897565, "grad_norm": 1.7075348805187878, "learning_rate": 1.1976439913676457e-05, "loss": 0.32905343174934387, "step": 1883 }, { "epoch": 0.9311750895835907, "grad_norm": 1.2010662669423082, "learning_rate": 1.1968429836962e-05, "loss": 0.34757447242736816, "step": 1884 }, { "epoch": 0.931669343877425, "grad_norm": 1.2626693752273819, "learning_rate": 1.1960418446151912e-05, "loss": 0.29980987310409546, "step": 1885 }, { "epoch": 0.9321635981712592, "grad_norm": 1.081439601568963, "learning_rate": 1.1952405746594477e-05, "loss": 0.3106808662414551, "step": 1886 }, { "epoch": 0.9326578524650933, "grad_norm": 1.2465315131717423, "learning_rate": 1.1944391743638863e-05, "loss": 0.3222411572933197, "step": 1887 }, { "epoch": 0.9331521067589275, "grad_norm": 1.117897007008322, "learning_rate": 1.1936376442635104e-05, "loss": 0.3365646302700043, "step": 1888 }, { "epoch": 0.9336463610527617, "grad_norm": 1.2223325106102665, "learning_rate": 1.1928359848934101e-05, "loss": 0.32500627636909485, "step": 1889 }, { "epoch": 0.9341406153465959, "grad_norm": 1.1692844365001853, "learning_rate": 1.1920341967887614e-05, "loss": 0.31395500898361206, "step": 1890 }, { "epoch": 0.93463486964043, "grad_norm": 1.084320264091655, "learning_rate": 1.1912322804848268e-05, "loss": 0.3060624301433563, "step": 1891 }, { "epoch": 0.9351291239342642, "grad_norm": 1.042165685734395, "learning_rate": 1.190430236516954e-05, "loss": 0.2644454836845398, "step": 1892 }, { "epoch": 0.9356233782280984, "grad_norm": 1.2086818065931575, "learning_rate": 1.1896280654205765e-05, "loss": 0.33404678106307983, "step": 1893 }, { "epoch": 0.9361176325219326, "grad_norm": 1.0362894963118763, "learning_rate": 1.1888257677312119e-05, "loss": 0.28557512164115906, "step": 1894 }, { "epoch": 0.9366118868157667, "grad_norm": 1.1281245501630466, "learning_rate": 1.1880233439844623e-05, "loss": 0.3332308530807495, "step": 1895 }, { "epoch": 0.9371061411096009, "grad_norm": 1.0648316720915905, "learning_rate": 1.1872207947160155e-05, "loss": 0.3274528384208679, "step": 1896 }, { "epoch": 0.9376003954034351, "grad_norm": 1.168900116977035, "learning_rate": 1.1864181204616404e-05, "loss": 0.297880083322525, "step": 1897 }, { "epoch": 0.9380946496972693, "grad_norm": 1.1118774536365064, "learning_rate": 1.1856153217571924e-05, "loss": 0.3404296040534973, "step": 1898 }, { "epoch": 0.9385889039911034, "grad_norm": 1.7308625403608067, "learning_rate": 1.1848123991386073e-05, "loss": 0.32343849539756775, "step": 1899 }, { "epoch": 0.9390831582849376, "grad_norm": 1.0296882466024648, "learning_rate": 1.1840093531419052e-05, "loss": 0.26679158210754395, "step": 1900 }, { "epoch": 0.9395774125787718, "grad_norm": 1.0019153721179144, "learning_rate": 1.1832061843031884e-05, "loss": 0.28106996417045593, "step": 1901 }, { "epoch": 0.9400716668726059, "grad_norm": 1.1236721425678955, "learning_rate": 1.1824028931586406e-05, "loss": 0.28356847167015076, "step": 1902 }, { "epoch": 0.9405659211664401, "grad_norm": 1.2443758247501144, "learning_rate": 1.1815994802445274e-05, "loss": 0.3256348669528961, "step": 1903 }, { "epoch": 0.9410601754602743, "grad_norm": 1.1514727386744015, "learning_rate": 1.1807959460971958e-05, "loss": 0.2781906723976135, "step": 1904 }, { "epoch": 0.9415544297541085, "grad_norm": 1.2599759308188183, "learning_rate": 1.1799922912530741e-05, "loss": 0.3129916787147522, "step": 1905 }, { "epoch": 0.9420486840479426, "grad_norm": 1.1353254220103308, "learning_rate": 1.1791885162486705e-05, "loss": 0.281986266374588, "step": 1906 }, { "epoch": 0.9425429383417768, "grad_norm": 1.2313275172087987, "learning_rate": 1.1783846216205734e-05, "loss": 0.33587342500686646, "step": 1907 }, { "epoch": 0.943037192635611, "grad_norm": 1.0408682927660702, "learning_rate": 1.1775806079054522e-05, "loss": 0.27715635299682617, "step": 1908 }, { "epoch": 0.9435314469294452, "grad_norm": 1.1581221243071849, "learning_rate": 1.1767764756400541e-05, "loss": 0.3190307915210724, "step": 1909 }, { "epoch": 0.9440257012232793, "grad_norm": 1.1962319364965919, "learning_rate": 1.175972225361207e-05, "loss": 0.29336807131767273, "step": 1910 }, { "epoch": 0.9445199555171135, "grad_norm": 1.1448708364637925, "learning_rate": 1.1751678576058164e-05, "loss": 0.3001596927642822, "step": 1911 }, { "epoch": 0.9450142098109477, "grad_norm": 1.0832545536390727, "learning_rate": 1.1743633729108672e-05, "loss": 0.26952457427978516, "step": 1912 }, { "epoch": 0.945508464104782, "grad_norm": 1.166519142960908, "learning_rate": 1.1735587718134212e-05, "loss": 0.3193609118461609, "step": 1913 }, { "epoch": 0.946002718398616, "grad_norm": 1.2095746348772163, "learning_rate": 1.172754054850619e-05, "loss": 0.2810664176940918, "step": 1914 }, { "epoch": 0.9464969726924503, "grad_norm": 1.1743627712454017, "learning_rate": 1.1719492225596783e-05, "loss": 0.28850311040878296, "step": 1915 }, { "epoch": 0.9469912269862845, "grad_norm": 1.1739524489187587, "learning_rate": 1.1711442754778936e-05, "loss": 0.32268932461738586, "step": 1916 }, { "epoch": 0.9474854812801187, "grad_norm": 1.2236575262685914, "learning_rate": 1.1703392141426356e-05, "loss": 0.3149149715900421, "step": 1917 }, { "epoch": 0.9479797355739528, "grad_norm": 1.0472038436966378, "learning_rate": 1.1695340390913526e-05, "loss": 0.2537482678890228, "step": 1918 }, { "epoch": 0.948473989867787, "grad_norm": 1.1232208833213926, "learning_rate": 1.168728750861567e-05, "loss": 0.2611936330795288, "step": 1919 }, { "epoch": 0.9489682441616212, "grad_norm": 1.0077623948815433, "learning_rate": 1.1679233499908781e-05, "loss": 0.263653427362442, "step": 1920 }, { "epoch": 0.9494624984554554, "grad_norm": 1.1707561168968341, "learning_rate": 1.1671178370169604e-05, "loss": 0.3122594952583313, "step": 1921 }, { "epoch": 0.9499567527492895, "grad_norm": 1.1924449722361925, "learning_rate": 1.1663122124775626e-05, "loss": 0.3101043701171875, "step": 1922 }, { "epoch": 0.9504510070431237, "grad_norm": 1.129901320884474, "learning_rate": 1.1655064769105077e-05, "loss": 0.295572966337204, "step": 1923 }, { "epoch": 0.9509452613369579, "grad_norm": 1.1537509505815167, "learning_rate": 1.1647006308536937e-05, "loss": 0.29732125997543335, "step": 1924 }, { "epoch": 0.951439515630792, "grad_norm": 1.1914038253365087, "learning_rate": 1.1638946748450922e-05, "loss": 0.32320737838745117, "step": 1925 }, { "epoch": 0.9519337699246262, "grad_norm": 1.2581984463314084, "learning_rate": 1.1630886094227471e-05, "loss": 0.3306753933429718, "step": 1926 }, { "epoch": 0.9524280242184604, "grad_norm": 1.0367245477692144, "learning_rate": 1.1622824351247767e-05, "loss": 0.2368355095386505, "step": 1927 }, { "epoch": 0.9529222785122946, "grad_norm": 1.2216253394681036, "learning_rate": 1.1614761524893715e-05, "loss": 0.28470784425735474, "step": 1928 }, { "epoch": 0.9534165328061287, "grad_norm": 1.1721810384499396, "learning_rate": 1.160669762054794e-05, "loss": 0.34468895196914673, "step": 1929 }, { "epoch": 0.9539107870999629, "grad_norm": 1.1277795177992218, "learning_rate": 1.1598632643593787e-05, "loss": 0.30562442541122437, "step": 1930 }, { "epoch": 0.9544050413937971, "grad_norm": 1.2141650113141733, "learning_rate": 1.159056659941533e-05, "loss": 0.2861478924751282, "step": 1931 }, { "epoch": 0.9548992956876313, "grad_norm": 1.0692532214940453, "learning_rate": 1.1582499493397332e-05, "loss": 0.32385969161987305, "step": 1932 }, { "epoch": 0.9553935499814654, "grad_norm": 1.173323189937386, "learning_rate": 1.1574431330925287e-05, "loss": 0.2935449481010437, "step": 1933 }, { "epoch": 0.9558878042752996, "grad_norm": 1.1041433205065538, "learning_rate": 1.156636211738538e-05, "loss": 0.29380083084106445, "step": 1934 }, { "epoch": 0.9563820585691338, "grad_norm": 1.1455066452691371, "learning_rate": 1.1558291858164503e-05, "loss": 0.2957204282283783, "step": 1935 }, { "epoch": 0.956876312862968, "grad_norm": 1.084977751415868, "learning_rate": 1.1550220558650246e-05, "loss": 0.26402851939201355, "step": 1936 }, { "epoch": 0.9573705671568021, "grad_norm": 1.1085858464768976, "learning_rate": 1.1542148224230897e-05, "loss": 0.29163527488708496, "step": 1937 }, { "epoch": 0.9578648214506363, "grad_norm": 1.2120558942254267, "learning_rate": 1.1534074860295426e-05, "loss": 0.302470326423645, "step": 1938 }, { "epoch": 0.9583590757444705, "grad_norm": 1.1861857419569999, "learning_rate": 1.15260004722335e-05, "loss": 0.25946593284606934, "step": 1939 }, { "epoch": 0.9588533300383048, "grad_norm": 1.1153985574382288, "learning_rate": 1.1517925065435457e-05, "loss": 0.2680559456348419, "step": 1940 }, { "epoch": 0.9593475843321388, "grad_norm": 1.2104349484077064, "learning_rate": 1.1509848645292334e-05, "loss": 0.2684473991394043, "step": 1941 }, { "epoch": 0.959841838625973, "grad_norm": 1.245187124369965, "learning_rate": 1.1501771217195827e-05, "loss": 0.2795519232749939, "step": 1942 }, { "epoch": 0.9603360929198073, "grad_norm": 1.2532047895072767, "learning_rate": 1.1493692786538313e-05, "loss": 0.35209575295448303, "step": 1943 }, { "epoch": 0.9608303472136415, "grad_norm": 1.176019791514668, "learning_rate": 1.1485613358712839e-05, "loss": 0.3058928847312927, "step": 1944 }, { "epoch": 0.9613246015074756, "grad_norm": 1.103375830615649, "learning_rate": 1.1477532939113112e-05, "loss": 0.2889159619808197, "step": 1945 }, { "epoch": 0.9618188558013098, "grad_norm": 1.175759039350938, "learning_rate": 1.1469451533133506e-05, "loss": 0.30782538652420044, "step": 1946 }, { "epoch": 0.962313110095144, "grad_norm": 1.1326992133409532, "learning_rate": 1.1461369146169052e-05, "loss": 0.3091726005077362, "step": 1947 }, { "epoch": 0.9628073643889782, "grad_norm": 1.2061917553730328, "learning_rate": 1.1453285783615438e-05, "loss": 0.3287050724029541, "step": 1948 }, { "epoch": 0.9633016186828123, "grad_norm": 1.1941959404182023, "learning_rate": 1.1445201450868998e-05, "loss": 0.31267625093460083, "step": 1949 }, { "epoch": 0.9637958729766465, "grad_norm": 1.1346278168962094, "learning_rate": 1.1437116153326719e-05, "loss": 0.30775952339172363, "step": 1950 }, { "epoch": 0.9642901272704807, "grad_norm": 1.292541938462464, "learning_rate": 1.142902989638623e-05, "loss": 0.3825497329235077, "step": 1951 }, { "epoch": 0.9647843815643148, "grad_norm": 1.0454710330230295, "learning_rate": 1.1420942685445801e-05, "loss": 0.2866062521934509, "step": 1952 }, { "epoch": 0.965278635858149, "grad_norm": 1.144633580750803, "learning_rate": 1.1412854525904335e-05, "loss": 0.27787062525749207, "step": 1953 }, { "epoch": 0.9657728901519832, "grad_norm": 1.1290436448297894, "learning_rate": 1.1404765423161381e-05, "loss": 0.302572101354599, "step": 1954 }, { "epoch": 0.9662671444458174, "grad_norm": 1.0781086639824042, "learning_rate": 1.1396675382617097e-05, "loss": 0.29608359932899475, "step": 1955 }, { "epoch": 0.9667613987396515, "grad_norm": 1.1646658995895742, "learning_rate": 1.1388584409672285e-05, "loss": 0.28057801723480225, "step": 1956 }, { "epoch": 0.9672556530334857, "grad_norm": 1.1188617227766138, "learning_rate": 1.1380492509728363e-05, "loss": 0.29628869891166687, "step": 1957 }, { "epoch": 0.9677499073273199, "grad_norm": 1.1207660926511307, "learning_rate": 1.1372399688187365e-05, "loss": 0.29254984855651855, "step": 1958 }, { "epoch": 0.9682441616211541, "grad_norm": 1.10665523309967, "learning_rate": 1.1364305950451946e-05, "loss": 0.32925280928611755, "step": 1959 }, { "epoch": 0.9687384159149882, "grad_norm": 1.108029328920716, "learning_rate": 1.1356211301925367e-05, "loss": 0.3072258234024048, "step": 1960 }, { "epoch": 0.9692326702088224, "grad_norm": 1.1133536367191044, "learning_rate": 1.1348115748011499e-05, "loss": 0.29737845063209534, "step": 1961 }, { "epoch": 0.9697269245026566, "grad_norm": 1.1169451234105505, "learning_rate": 1.1340019294114822e-05, "loss": 0.27369949221611023, "step": 1962 }, { "epoch": 0.9702211787964908, "grad_norm": 1.2861478922811351, "learning_rate": 1.1331921945640408e-05, "loss": 0.33116602897644043, "step": 1963 }, { "epoch": 0.9707154330903249, "grad_norm": 1.9398235156973715, "learning_rate": 1.1323823707993937e-05, "loss": 0.2620438039302826, "step": 1964 }, { "epoch": 0.9712096873841591, "grad_norm": 1.1505189829247824, "learning_rate": 1.1315724586581673e-05, "loss": 0.3187680244445801, "step": 1965 }, { "epoch": 0.9717039416779933, "grad_norm": 1.2391813787863328, "learning_rate": 1.1307624586810472e-05, "loss": 0.3675233721733093, "step": 1966 }, { "epoch": 0.9721981959718276, "grad_norm": 1.2521490817049854, "learning_rate": 1.1299523714087784e-05, "loss": 0.31064945459365845, "step": 1967 }, { "epoch": 0.9726924502656616, "grad_norm": 1.1166975993354054, "learning_rate": 1.1291421973821632e-05, "loss": 0.2941773235797882, "step": 1968 }, { "epoch": 0.9731867045594959, "grad_norm": 1.2565504643296834, "learning_rate": 1.128331937142062e-05, "loss": 0.3443846106529236, "step": 1969 }, { "epoch": 0.9736809588533301, "grad_norm": 1.1142268279429304, "learning_rate": 1.1275215912293933e-05, "loss": 0.2815151810646057, "step": 1970 }, { "epoch": 0.9741752131471643, "grad_norm": 1.1622346059327586, "learning_rate": 1.1267111601851327e-05, "loss": 0.2886476516723633, "step": 1971 }, { "epoch": 0.9746694674409984, "grad_norm": 1.0942194208380682, "learning_rate": 1.1259006445503116e-05, "loss": 0.2692835330963135, "step": 1972 }, { "epoch": 0.9751637217348326, "grad_norm": 1.1112683317978183, "learning_rate": 1.1250900448660192e-05, "loss": 0.2748587727546692, "step": 1973 }, { "epoch": 0.9756579760286668, "grad_norm": 1.192989589829818, "learning_rate": 1.1242793616734002e-05, "loss": 0.2963098883628845, "step": 1974 }, { "epoch": 0.976152230322501, "grad_norm": 1.1305326657315258, "learning_rate": 1.1234685955136552e-05, "loss": 0.28353193402290344, "step": 1975 }, { "epoch": 0.9766464846163351, "grad_norm": 1.1967273051238179, "learning_rate": 1.1226577469280397e-05, "loss": 0.3308493494987488, "step": 1976 }, { "epoch": 0.9771407389101693, "grad_norm": 1.096933031801606, "learning_rate": 1.1218468164578653e-05, "loss": 0.26923754811286926, "step": 1977 }, { "epoch": 0.9776349932040035, "grad_norm": 1.5091635403311783, "learning_rate": 1.1210358046444968e-05, "loss": 0.2730574905872345, "step": 1978 }, { "epoch": 0.9781292474978376, "grad_norm": 1.1338996219219686, "learning_rate": 1.1202247120293548e-05, "loss": 0.26464858651161194, "step": 1979 }, { "epoch": 0.9786235017916718, "grad_norm": 1.2694994457222093, "learning_rate": 1.1194135391539127e-05, "loss": 0.30095499753952026, "step": 1980 }, { "epoch": 0.979117756085506, "grad_norm": 1.3227283597348862, "learning_rate": 1.1186022865596983e-05, "loss": 0.3418167233467102, "step": 1981 }, { "epoch": 0.9796120103793402, "grad_norm": 1.2780598996117225, "learning_rate": 1.117790954788292e-05, "loss": 0.28735262155532837, "step": 1982 }, { "epoch": 0.9801062646731743, "grad_norm": 1.109707631385258, "learning_rate": 1.116979544381327e-05, "loss": 0.26816800236701965, "step": 1983 }, { "epoch": 0.9806005189670085, "grad_norm": 1.1873089360962268, "learning_rate": 1.1161680558804897e-05, "loss": 0.31004661321640015, "step": 1984 }, { "epoch": 0.9810947732608427, "grad_norm": 1.2669673078204273, "learning_rate": 1.1153564898275184e-05, "loss": 0.33103084564208984, "step": 1985 }, { "epoch": 0.9815890275546769, "grad_norm": 1.3375894512262838, "learning_rate": 1.1145448467642021e-05, "loss": 0.3804841637611389, "step": 1986 }, { "epoch": 0.982083281848511, "grad_norm": 1.2029739003434823, "learning_rate": 1.1137331272323834e-05, "loss": 0.31861352920532227, "step": 1987 }, { "epoch": 0.9825775361423452, "grad_norm": 1.1954996526655464, "learning_rate": 1.1129213317739539e-05, "loss": 0.3022298216819763, "step": 1988 }, { "epoch": 0.9830717904361794, "grad_norm": 1.3466664334904774, "learning_rate": 1.1121094609308564e-05, "loss": 0.38203683495521545, "step": 1989 }, { "epoch": 0.9835660447300136, "grad_norm": 1.215882197519198, "learning_rate": 1.1112975152450848e-05, "loss": 0.3105717897415161, "step": 1990 }, { "epoch": 0.9840602990238477, "grad_norm": 1.2066484647947713, "learning_rate": 1.1104854952586827e-05, "loss": 0.31930285692214966, "step": 1991 }, { "epoch": 0.9845545533176819, "grad_norm": 1.1639723195264664, "learning_rate": 1.1096734015137422e-05, "loss": 0.3167966902256012, "step": 1992 }, { "epoch": 0.9850488076115161, "grad_norm": 1.168704133231974, "learning_rate": 1.1088612345524059e-05, "loss": 0.2693050801753998, "step": 1993 }, { "epoch": 0.9855430619053503, "grad_norm": 1.0985586655404702, "learning_rate": 1.1080489949168651e-05, "loss": 0.27986466884613037, "step": 1994 }, { "epoch": 0.9860373161991844, "grad_norm": 1.1481757517161775, "learning_rate": 1.1072366831493589e-05, "loss": 0.26814526319503784, "step": 1995 }, { "epoch": 0.9865315704930187, "grad_norm": 1.146921609246337, "learning_rate": 1.1064242997921753e-05, "loss": 0.31393951177597046, "step": 1996 }, { "epoch": 0.9870258247868529, "grad_norm": 1.1375630444026625, "learning_rate": 1.1056118453876496e-05, "loss": 0.2958461344242096, "step": 1997 }, { "epoch": 0.9875200790806871, "grad_norm": 1.137037421352785, "learning_rate": 1.1047993204781652e-05, "loss": 0.29744619131088257, "step": 1998 }, { "epoch": 0.9880143333745212, "grad_norm": 1.1508003551512254, "learning_rate": 1.1039867256061516e-05, "loss": 0.29055094718933105, "step": 1999 }, { "epoch": 0.9885085876683554, "grad_norm": 1.1632161121950038, "learning_rate": 1.103174061314086e-05, "loss": 0.29961663484573364, "step": 2000 }, { "epoch": 0.9890028419621896, "grad_norm": 1.0841825843818378, "learning_rate": 1.102361328144491e-05, "loss": 0.34533610939979553, "step": 2001 }, { "epoch": 0.9894970962560238, "grad_norm": 1.1849596678411713, "learning_rate": 1.1015485266399362e-05, "loss": 0.2994460463523865, "step": 2002 }, { "epoch": 0.9899913505498579, "grad_norm": 1.2325420364808024, "learning_rate": 1.1007356573430357e-05, "loss": 0.34309566020965576, "step": 2003 }, { "epoch": 0.9904856048436921, "grad_norm": 1.2050309252665437, "learning_rate": 1.09992272079645e-05, "loss": 0.3049868643283844, "step": 2004 }, { "epoch": 0.9909798591375263, "grad_norm": 1.1759703775328856, "learning_rate": 1.0991097175428833e-05, "loss": 0.30586326122283936, "step": 2005 }, { "epoch": 0.9914741134313604, "grad_norm": 1.1997965130034223, "learning_rate": 1.0982966481250854e-05, "loss": 0.29740482568740845, "step": 2006 }, { "epoch": 0.9919683677251946, "grad_norm": 1.2400023524315222, "learning_rate": 1.0974835130858497e-05, "loss": 0.3218206465244293, "step": 2007 }, { "epoch": 0.9924626220190288, "grad_norm": 1.1309419286206777, "learning_rate": 1.0966703129680139e-05, "loss": 0.2747582495212555, "step": 2008 }, { "epoch": 0.992956876312863, "grad_norm": 1.2581670135770728, "learning_rate": 1.0958570483144578e-05, "loss": 0.33215245604515076, "step": 2009 }, { "epoch": 0.9934511306066971, "grad_norm": 1.2834058413633842, "learning_rate": 1.0950437196681061e-05, "loss": 0.3149756193161011, "step": 2010 }, { "epoch": 0.9939453849005313, "grad_norm": 1.1001136330607295, "learning_rate": 1.0942303275719253e-05, "loss": 0.2763513922691345, "step": 2011 }, { "epoch": 0.9944396391943655, "grad_norm": 1.0592905887432897, "learning_rate": 1.0934168725689239e-05, "loss": 0.2818325161933899, "step": 2012 }, { "epoch": 0.9949338934881997, "grad_norm": 1.1079515754649163, "learning_rate": 1.0926033552021533e-05, "loss": 0.2659858167171478, "step": 2013 }, { "epoch": 0.9954281477820338, "grad_norm": 1.1926210163358253, "learning_rate": 1.091789776014706e-05, "loss": 0.30891451239585876, "step": 2014 }, { "epoch": 0.995922402075868, "grad_norm": 1.2194298136031743, "learning_rate": 1.0909761355497156e-05, "loss": 0.33645111322402954, "step": 2015 }, { "epoch": 0.9964166563697022, "grad_norm": 1.1110546475920504, "learning_rate": 1.0901624343503571e-05, "loss": 0.3086194097995758, "step": 2016 }, { "epoch": 0.9969109106635364, "grad_norm": 1.0167201052564092, "learning_rate": 1.089348672959846e-05, "loss": 0.2614179253578186, "step": 2017 }, { "epoch": 0.9974051649573705, "grad_norm": 1.2224853324284848, "learning_rate": 1.088534851921437e-05, "loss": 0.3300556540489197, "step": 2018 }, { "epoch": 0.9978994192512047, "grad_norm": 1.1929848499106601, "learning_rate": 1.087720971778426e-05, "loss": 0.28443643450737, "step": 2019 }, { "epoch": 0.9983936735450389, "grad_norm": 1.052677422924197, "learning_rate": 1.0869070330741475e-05, "loss": 0.2805534601211548, "step": 2020 }, { "epoch": 0.9988879278388731, "grad_norm": 1.065568553175956, "learning_rate": 1.0860930363519758e-05, "loss": 0.28186699748039246, "step": 2021 }, { "epoch": 0.9993821821327072, "grad_norm": 1.2171160812601536, "learning_rate": 1.0852789821553228e-05, "loss": 0.3527688980102539, "step": 2022 }, { "epoch": 0.9998764364265414, "grad_norm": 1.2020406854373213, "learning_rate": 1.08446487102764e-05, "loss": 0.30708247423171997, "step": 2023 }, { "epoch": 1.0, "grad_norm": 2.286184440614986, "learning_rate": 1.083650703512416e-05, "loss": 0.3015655279159546, "step": 2024 }, { "epoch": 1.0004942542938342, "grad_norm": 1.2067651750081223, "learning_rate": 1.0828364801531777e-05, "loss": 0.29792484641075134, "step": 2025 }, { "epoch": 1.0009885085876684, "grad_norm": 1.1529758757862274, "learning_rate": 1.0820222014934887e-05, "loss": 0.27995994687080383, "step": 2026 }, { "epoch": 1.0014827628815026, "grad_norm": 1.115022133563525, "learning_rate": 1.0812078680769501e-05, "loss": 0.25797444581985474, "step": 2027 }, { "epoch": 1.0019770171753366, "grad_norm": 1.1202805963305373, "learning_rate": 1.0803934804471991e-05, "loss": 0.2834373116493225, "step": 2028 }, { "epoch": 1.0024712714691708, "grad_norm": 1.147731866533824, "learning_rate": 1.079579039147909e-05, "loss": 0.27055832743644714, "step": 2029 }, { "epoch": 1.002965525763005, "grad_norm": 1.1916483552600579, "learning_rate": 1.0787645447227897e-05, "loss": 0.30029311776161194, "step": 2030 }, { "epoch": 1.0034597800568392, "grad_norm": 1.1834514894044206, "learning_rate": 1.0779499977155858e-05, "loss": 0.2741442322731018, "step": 2031 }, { "epoch": 1.0039540343506734, "grad_norm": 1.1233171341295944, "learning_rate": 1.0771353986700767e-05, "loss": 0.27097994089126587, "step": 2032 }, { "epoch": 1.0044482886445076, "grad_norm": 1.1267943347727831, "learning_rate": 1.0763207481300781e-05, "loss": 0.2690125107765198, "step": 2033 }, { "epoch": 1.0049425429383418, "grad_norm": 1.1312636860673373, "learning_rate": 1.0755060466394383e-05, "loss": 0.29656079411506653, "step": 2034 }, { "epoch": 1.005436797232176, "grad_norm": 1.1729529368370135, "learning_rate": 1.0746912947420407e-05, "loss": 0.25291675329208374, "step": 2035 }, { "epoch": 1.00593105152601, "grad_norm": 1.410951786073956, "learning_rate": 1.0738764929818017e-05, "loss": 0.26391562819480896, "step": 2036 }, { "epoch": 1.0064253058198442, "grad_norm": 1.258204498994485, "learning_rate": 1.073061641902672e-05, "loss": 0.2850308418273926, "step": 2037 }, { "epoch": 1.0069195601136784, "grad_norm": 1.1368887973206072, "learning_rate": 1.0722467420486338e-05, "loss": 0.2529013454914093, "step": 2038 }, { "epoch": 1.0074138144075127, "grad_norm": 1.2420233139292696, "learning_rate": 1.0714317939637028e-05, "loss": 0.2577154040336609, "step": 2039 }, { "epoch": 1.0079080687013469, "grad_norm": 1.1996492314644527, "learning_rate": 1.0706167981919269e-05, "loss": 0.28677526116371155, "step": 2040 }, { "epoch": 1.008402322995181, "grad_norm": 1.210233649974949, "learning_rate": 1.0698017552773859e-05, "loss": 0.25146183371543884, "step": 2041 }, { "epoch": 1.0088965772890153, "grad_norm": 1.217205041102825, "learning_rate": 1.0689866657641899e-05, "loss": 0.29958251118659973, "step": 2042 }, { "epoch": 1.0093908315828495, "grad_norm": 1.2422486891064726, "learning_rate": 1.0681715301964817e-05, "loss": 0.28512266278266907, "step": 2043 }, { "epoch": 1.0098850858766835, "grad_norm": 1.3312817373132209, "learning_rate": 1.067356349118434e-05, "loss": 0.29768145084381104, "step": 2044 }, { "epoch": 1.0103793401705177, "grad_norm": 1.2397312600868813, "learning_rate": 1.0665411230742498e-05, "loss": 0.25144103169441223, "step": 2045 }, { "epoch": 1.0108735944643519, "grad_norm": 1.6026936131359757, "learning_rate": 1.0657258526081629e-05, "loss": 0.2673259973526001, "step": 2046 }, { "epoch": 1.011367848758186, "grad_norm": 1.2940971813114743, "learning_rate": 1.0649105382644359e-05, "loss": 0.2845848500728607, "step": 2047 }, { "epoch": 1.0118621030520203, "grad_norm": 1.0898574113835153, "learning_rate": 1.0640951805873607e-05, "loss": 0.2569392919540405, "step": 2048 }, { "epoch": 1.0123563573458545, "grad_norm": 1.2632947550014098, "learning_rate": 1.0632797801212591e-05, "loss": 0.250387966632843, "step": 2049 }, { "epoch": 1.0128506116396887, "grad_norm": 1.233630096360243, "learning_rate": 1.0624643374104804e-05, "loss": 0.28228282928466797, "step": 2050 }, { "epoch": 1.0133448659335227, "grad_norm": 1.0888042979148498, "learning_rate": 1.0616488529994024e-05, "loss": 0.24724754691123962, "step": 2051 }, { "epoch": 1.013839120227357, "grad_norm": 1.2576287774069197, "learning_rate": 1.0608333274324312e-05, "loss": 0.268532395362854, "step": 2052 }, { "epoch": 1.014333374521191, "grad_norm": 1.1578525571147846, "learning_rate": 1.0600177612539995e-05, "loss": 0.27454662322998047, "step": 2053 }, { "epoch": 1.0148276288150253, "grad_norm": 1.2050116136682636, "learning_rate": 1.0592021550085683e-05, "loss": 0.27497538924217224, "step": 2054 }, { "epoch": 1.0153218831088595, "grad_norm": 1.1358282649300115, "learning_rate": 1.0583865092406237e-05, "loss": 0.24480152130126953, "step": 2055 }, { "epoch": 1.0158161374026937, "grad_norm": 1.1352545460867702, "learning_rate": 1.0575708244946805e-05, "loss": 0.23754069209098816, "step": 2056 }, { "epoch": 1.016310391696528, "grad_norm": 1.150720407382798, "learning_rate": 1.056755101315277e-05, "loss": 0.24541275203227997, "step": 2057 }, { "epoch": 1.0168046459903621, "grad_norm": 1.2022551315194179, "learning_rate": 1.055939340246979e-05, "loss": 0.27724504470825195, "step": 2058 }, { "epoch": 1.0172989002841961, "grad_norm": 1.2400168112160508, "learning_rate": 1.0551235418343766e-05, "loss": 0.2869918942451477, "step": 2059 }, { "epoch": 1.0177931545780303, "grad_norm": 1.2299839323583324, "learning_rate": 1.0543077066220854e-05, "loss": 0.27153679728507996, "step": 2060 }, { "epoch": 1.0182874088718645, "grad_norm": 1.1366017541860491, "learning_rate": 1.0534918351547454e-05, "loss": 0.2611347436904907, "step": 2061 }, { "epoch": 1.0187816631656987, "grad_norm": 1.1317421431613228, "learning_rate": 1.0526759279770202e-05, "loss": 0.26649200916290283, "step": 2062 }, { "epoch": 1.019275917459533, "grad_norm": 1.0930466767865903, "learning_rate": 1.0518599856335983e-05, "loss": 0.25164204835891724, "step": 2063 }, { "epoch": 1.0197701717533671, "grad_norm": 1.2027289451385044, "learning_rate": 1.0510440086691911e-05, "loss": 0.288251131772995, "step": 2064 }, { "epoch": 1.0202644260472014, "grad_norm": 1.2837951062377317, "learning_rate": 1.0502279976285325e-05, "loss": 0.27177444100379944, "step": 2065 }, { "epoch": 1.0207586803410356, "grad_norm": 1.222948820556725, "learning_rate": 1.0494119530563812e-05, "loss": 0.2723502218723297, "step": 2066 }, { "epoch": 1.0212529346348695, "grad_norm": 1.214398839170698, "learning_rate": 1.0485958754975156e-05, "loss": 0.2704971432685852, "step": 2067 }, { "epoch": 1.0217471889287038, "grad_norm": 1.267114179641731, "learning_rate": 1.0477797654967376e-05, "loss": 0.30302050709724426, "step": 2068 }, { "epoch": 1.022241443222538, "grad_norm": 1.268227752862744, "learning_rate": 1.0469636235988711e-05, "loss": 0.26408523321151733, "step": 2069 }, { "epoch": 1.0227356975163722, "grad_norm": 1.2197627847133865, "learning_rate": 1.0461474503487606e-05, "loss": 0.2691786289215088, "step": 2070 }, { "epoch": 1.0232299518102064, "grad_norm": 1.2792531550605064, "learning_rate": 1.0453312462912714e-05, "loss": 0.2823137640953064, "step": 2071 }, { "epoch": 1.0237242061040406, "grad_norm": 1.2027503273852609, "learning_rate": 1.04451501197129e-05, "loss": 0.28837013244628906, "step": 2072 }, { "epoch": 1.0242184603978748, "grad_norm": 1.27109994402604, "learning_rate": 1.0436987479337229e-05, "loss": 0.2809562683105469, "step": 2073 }, { "epoch": 1.024712714691709, "grad_norm": 1.240431430170138, "learning_rate": 1.0428824547234956e-05, "loss": 0.2604525685310364, "step": 2074 }, { "epoch": 1.025206968985543, "grad_norm": 1.1799966275921325, "learning_rate": 1.0420661328855546e-05, "loss": 0.24755606055259705, "step": 2075 }, { "epoch": 1.0257012232793772, "grad_norm": 1.148092531592558, "learning_rate": 1.0412497829648642e-05, "loss": 0.2592730224132538, "step": 2076 }, { "epoch": 1.0261954775732114, "grad_norm": 1.2356689091758393, "learning_rate": 1.0404334055064083e-05, "loss": 0.2693594694137573, "step": 2077 }, { "epoch": 1.0266897318670456, "grad_norm": 1.2195187999450414, "learning_rate": 1.0396170010551881e-05, "loss": 0.2712753117084503, "step": 2078 }, { "epoch": 1.0271839861608798, "grad_norm": 1.1741285828383992, "learning_rate": 1.0388005701562245e-05, "loss": 0.2693077027797699, "step": 2079 }, { "epoch": 1.027678240454714, "grad_norm": 1.2670826968894364, "learning_rate": 1.0379841133545544e-05, "loss": 0.2791144847869873, "step": 2080 }, { "epoch": 1.0281724947485482, "grad_norm": 1.163594554813514, "learning_rate": 1.037167631195233e-05, "loss": 0.27496254444122314, "step": 2081 }, { "epoch": 1.0286667490423822, "grad_norm": 1.1305894692188725, "learning_rate": 1.0363511242233322e-05, "loss": 0.26037347316741943, "step": 2082 }, { "epoch": 1.0291610033362164, "grad_norm": 1.2085934995349474, "learning_rate": 1.0355345929839402e-05, "loss": 0.2610514760017395, "step": 2083 }, { "epoch": 1.0296552576300506, "grad_norm": 1.1531883738354434, "learning_rate": 1.0347180380221618e-05, "loss": 0.24750857055187225, "step": 2084 }, { "epoch": 1.0301495119238848, "grad_norm": 1.2017075670935908, "learning_rate": 1.0339014598831169e-05, "loss": 0.2835415303707123, "step": 2085 }, { "epoch": 1.030643766217719, "grad_norm": 1.2153811049556569, "learning_rate": 1.033084859111942e-05, "loss": 0.25762057304382324, "step": 2086 }, { "epoch": 1.0311380205115532, "grad_norm": 1.3245241554987517, "learning_rate": 1.032268236253788e-05, "loss": 0.2818237841129303, "step": 2087 }, { "epoch": 1.0316322748053874, "grad_norm": 1.2402911628462394, "learning_rate": 1.0314515918538202e-05, "loss": 0.27192944288253784, "step": 2088 }, { "epoch": 1.0321265290992216, "grad_norm": 1.1715597954552734, "learning_rate": 1.0306349264572195e-05, "loss": 0.3002319931983948, "step": 2089 }, { "epoch": 1.0326207833930556, "grad_norm": 1.221598051409306, "learning_rate": 1.0298182406091794e-05, "loss": 0.27106401324272156, "step": 2090 }, { "epoch": 1.0331150376868898, "grad_norm": 1.2123644146814079, "learning_rate": 1.0290015348549076e-05, "loss": 0.2740558385848999, "step": 2091 }, { "epoch": 1.033609291980724, "grad_norm": 1.2394453454529126, "learning_rate": 1.0281848097396261e-05, "loss": 0.2970008850097656, "step": 2092 }, { "epoch": 1.0341035462745582, "grad_norm": 1.2003549808286662, "learning_rate": 1.027368065808568e-05, "loss": 0.27684125304222107, "step": 2093 }, { "epoch": 1.0345978005683925, "grad_norm": 1.1371538472805924, "learning_rate": 1.0265513036069803e-05, "loss": 0.2732700705528259, "step": 2094 }, { "epoch": 1.0350920548622267, "grad_norm": 1.1448190493490698, "learning_rate": 1.0257345236801215e-05, "loss": 0.25189805030822754, "step": 2095 }, { "epoch": 1.0355863091560609, "grad_norm": 1.1221327830153236, "learning_rate": 1.0249177265732629e-05, "loss": 0.3177054524421692, "step": 2096 }, { "epoch": 1.036080563449895, "grad_norm": 1.0492479192600686, "learning_rate": 1.0241009128316854e-05, "loss": 0.23350921273231506, "step": 2097 }, { "epoch": 1.036574817743729, "grad_norm": 1.2565303796372052, "learning_rate": 1.0232840830006832e-05, "loss": 0.3011140525341034, "step": 2098 }, { "epoch": 1.0370690720375633, "grad_norm": 1.164329016307231, "learning_rate": 1.0224672376255598e-05, "loss": 0.2578561305999756, "step": 2099 }, { "epoch": 1.0375633263313975, "grad_norm": 1.1701632763887444, "learning_rate": 1.0216503772516297e-05, "loss": 0.2622804045677185, "step": 2100 }, { "epoch": 1.0380575806252317, "grad_norm": 1.219987069304434, "learning_rate": 1.0208335024242169e-05, "loss": 0.2662869691848755, "step": 2101 }, { "epoch": 1.0385518349190659, "grad_norm": 1.2303351498865798, "learning_rate": 1.0200166136886558e-05, "loss": 0.27084922790527344, "step": 2102 }, { "epoch": 1.0390460892129, "grad_norm": 1.2434849653646893, "learning_rate": 1.0191997115902891e-05, "loss": 0.26290780305862427, "step": 2103 }, { "epoch": 1.0395403435067343, "grad_norm": 1.192171896111284, "learning_rate": 1.0183827966744694e-05, "loss": 0.27367106080055237, "step": 2104 }, { "epoch": 1.0400345978005685, "grad_norm": 1.2706879657010888, "learning_rate": 1.0175658694865574e-05, "loss": 0.28507113456726074, "step": 2105 }, { "epoch": 1.0405288520944025, "grad_norm": 1.2299041683114893, "learning_rate": 1.0167489305719221e-05, "loss": 0.2533179521560669, "step": 2106 }, { "epoch": 1.0410231063882367, "grad_norm": 1.2546449586851505, "learning_rate": 1.0159319804759398e-05, "loss": 0.28755924105644226, "step": 2107 }, { "epoch": 1.041517360682071, "grad_norm": 1.1726176332749902, "learning_rate": 1.015115019743995e-05, "loss": 0.26722773909568787, "step": 2108 }, { "epoch": 1.042011614975905, "grad_norm": 1.3986075029095133, "learning_rate": 1.0142980489214788e-05, "loss": 0.3122308850288391, "step": 2109 }, { "epoch": 1.0425058692697393, "grad_norm": 1.1273960807987882, "learning_rate": 1.0134810685537899e-05, "loss": 0.22603261470794678, "step": 2110 }, { "epoch": 1.0430001235635735, "grad_norm": 1.1517998097919544, "learning_rate": 1.0126640791863316e-05, "loss": 0.2823299169540405, "step": 2111 }, { "epoch": 1.0434943778574077, "grad_norm": 1.3191906526904469, "learning_rate": 1.0118470813645156e-05, "loss": 0.30999040603637695, "step": 2112 }, { "epoch": 1.0439886321512417, "grad_norm": 1.1820148857556874, "learning_rate": 1.0110300756337569e-05, "loss": 0.266022264957428, "step": 2113 }, { "epoch": 1.044482886445076, "grad_norm": 1.6608098375974347, "learning_rate": 1.0102130625394776e-05, "loss": 0.2674095034599304, "step": 2114 }, { "epoch": 1.0449771407389101, "grad_norm": 1.2172826939531747, "learning_rate": 1.0093960426271037e-05, "loss": 0.30045652389526367, "step": 2115 }, { "epoch": 1.0454713950327443, "grad_norm": 1.1782919874699391, "learning_rate": 1.0085790164420659e-05, "loss": 0.28455668687820435, "step": 2116 }, { "epoch": 1.0459656493265785, "grad_norm": 1.1749948852757104, "learning_rate": 1.0077619845297992e-05, "loss": 0.2429066300392151, "step": 2117 }, { "epoch": 1.0464599036204127, "grad_norm": 1.1453766958637177, "learning_rate": 1.0069449474357427e-05, "loss": 0.2515121102333069, "step": 2118 }, { "epoch": 1.046954157914247, "grad_norm": 1.234414346344525, "learning_rate": 1.0061279057053385e-05, "loss": 0.30011802911758423, "step": 2119 }, { "epoch": 1.0474484122080812, "grad_norm": 1.1997300836338318, "learning_rate": 1.005310859884032e-05, "loss": 0.2577645480632782, "step": 2120 }, { "epoch": 1.0479426665019151, "grad_norm": 1.0391250618888572, "learning_rate": 1.0044938105172713e-05, "loss": 0.21476465463638306, "step": 2121 }, { "epoch": 1.0484369207957493, "grad_norm": 1.3902782329860977, "learning_rate": 1.0036767581505067e-05, "loss": 0.2587023079395294, "step": 2122 }, { "epoch": 1.0489311750895836, "grad_norm": 1.1311469001510768, "learning_rate": 1.0028597033291911e-05, "loss": 0.2537185251712799, "step": 2123 }, { "epoch": 1.0494254293834178, "grad_norm": 1.0410406857423857, "learning_rate": 1.0020426465987782e-05, "loss": 0.24486014246940613, "step": 2124 }, { "epoch": 1.049919683677252, "grad_norm": 1.4376390907817962, "learning_rate": 1.0012255885047241e-05, "loss": 0.2728436589241028, "step": 2125 }, { "epoch": 1.0504139379710862, "grad_norm": 1.3186765660198476, "learning_rate": 1.0004085295924843e-05, "loss": 0.30238842964172363, "step": 2126 }, { "epoch": 1.0509081922649204, "grad_norm": 1.2910923396564535, "learning_rate": 9.99591470407516e-06, "loss": 0.30347609519958496, "step": 2127 }, { "epoch": 1.0514024465587544, "grad_norm": 1.2188667375190219, "learning_rate": 9.987744114952764e-06, "loss": 0.2581411302089691, "step": 2128 }, { "epoch": 1.0518967008525886, "grad_norm": 1.2560629408792487, "learning_rate": 9.979573534012218e-06, "loss": 0.239881694316864, "step": 2129 }, { "epoch": 1.0523909551464228, "grad_norm": 1.2977893982324902, "learning_rate": 9.971402966708092e-06, "loss": 0.3058615028858185, "step": 2130 }, { "epoch": 1.052885209440257, "grad_norm": 1.2842102843103194, "learning_rate": 9.963232418494936e-06, "loss": 0.25285837054252625, "step": 2131 }, { "epoch": 1.0533794637340912, "grad_norm": 1.2217652802535364, "learning_rate": 9.955061894827294e-06, "loss": 0.27366510033607483, "step": 2132 }, { "epoch": 1.0538737180279254, "grad_norm": 1.1489983530266883, "learning_rate": 9.946891401159683e-06, "loss": 0.22268086671829224, "step": 2133 }, { "epoch": 1.0543679723217596, "grad_norm": 1.1461059074650484, "learning_rate": 9.938720942946616e-06, "loss": 0.2540682554244995, "step": 2134 }, { "epoch": 1.0548622266155938, "grad_norm": 1.2357731632052622, "learning_rate": 9.930550525642576e-06, "loss": 0.262179970741272, "step": 2135 }, { "epoch": 1.0553564809094278, "grad_norm": 1.2267299487839205, "learning_rate": 9.92238015470201e-06, "loss": 0.25471946597099304, "step": 2136 }, { "epoch": 1.055850735203262, "grad_norm": 1.162352058446371, "learning_rate": 9.914209835579344e-06, "loss": 0.2580556571483612, "step": 2137 }, { "epoch": 1.0563449894970962, "grad_norm": 1.261401071852413, "learning_rate": 9.906039573728964e-06, "loss": 0.29909616708755493, "step": 2138 }, { "epoch": 1.0568392437909304, "grad_norm": 1.2162562018595562, "learning_rate": 9.897869374605226e-06, "loss": 0.2828724980354309, "step": 2139 }, { "epoch": 1.0573334980847646, "grad_norm": 1.2076714268656592, "learning_rate": 9.889699243662433e-06, "loss": 0.26731711626052856, "step": 2140 }, { "epoch": 1.0578277523785988, "grad_norm": 1.2666827338430986, "learning_rate": 9.88152918635485e-06, "loss": 0.2912555932998657, "step": 2141 }, { "epoch": 1.058322006672433, "grad_norm": 1.1593053736993435, "learning_rate": 9.873359208136685e-06, "loss": 0.2335313856601715, "step": 2142 }, { "epoch": 1.0588162609662672, "grad_norm": 1.2934128795704303, "learning_rate": 9.865189314462105e-06, "loss": 0.2716987729072571, "step": 2143 }, { "epoch": 1.0593105152601012, "grad_norm": 1.3251488161911162, "learning_rate": 9.857019510785215e-06, "loss": 0.2919968068599701, "step": 2144 }, { "epoch": 1.0598047695539354, "grad_norm": 1.197230535187453, "learning_rate": 9.848849802560057e-06, "loss": 0.26279503107070923, "step": 2145 }, { "epoch": 1.0602990238477696, "grad_norm": 1.263871154668556, "learning_rate": 9.840680195240606e-06, "loss": 0.31622597575187683, "step": 2146 }, { "epoch": 1.0607932781416038, "grad_norm": 1.270948260835911, "learning_rate": 9.832510694280782e-06, "loss": 0.2399556040763855, "step": 2147 }, { "epoch": 1.061287532435438, "grad_norm": 1.2181574543701559, "learning_rate": 9.824341305134428e-06, "loss": 0.2650333046913147, "step": 2148 }, { "epoch": 1.0617817867292723, "grad_norm": 1.274348887888969, "learning_rate": 9.816172033255307e-06, "loss": 0.26629161834716797, "step": 2149 }, { "epoch": 1.0622760410231065, "grad_norm": 1.2611051957138737, "learning_rate": 9.808002884097109e-06, "loss": 0.28042545914649963, "step": 2150 }, { "epoch": 1.0627702953169407, "grad_norm": 1.1495131020915084, "learning_rate": 9.799833863113445e-06, "loss": 0.24374082684516907, "step": 2151 }, { "epoch": 1.0632645496107747, "grad_norm": 1.1048551979398207, "learning_rate": 9.791664975757835e-06, "loss": 0.23013898730278015, "step": 2152 }, { "epoch": 1.0637588039046089, "grad_norm": 1.4072884886903234, "learning_rate": 9.783496227483706e-06, "loss": 0.25313276052474976, "step": 2153 }, { "epoch": 1.064253058198443, "grad_norm": 1.248155174046862, "learning_rate": 9.775327623744403e-06, "loss": 0.2642362713813782, "step": 2154 }, { "epoch": 1.0647473124922773, "grad_norm": 1.1405325090848468, "learning_rate": 9.76715916999317e-06, "loss": 0.2417108118534088, "step": 2155 }, { "epoch": 1.0652415667861115, "grad_norm": 1.2556215450887547, "learning_rate": 9.758990871683148e-06, "loss": 0.25653502345085144, "step": 2156 }, { "epoch": 1.0657358210799457, "grad_norm": 1.22877547041534, "learning_rate": 9.750822734267378e-06, "loss": 0.247604638338089, "step": 2157 }, { "epoch": 1.06623007537378, "grad_norm": 1.2330600407976389, "learning_rate": 9.742654763198786e-06, "loss": 0.2675636112689972, "step": 2158 }, { "epoch": 1.0667243296676139, "grad_norm": 1.230290211943024, "learning_rate": 9.7344869639302e-06, "loss": 0.2570686340332031, "step": 2159 }, { "epoch": 1.067218583961448, "grad_norm": 1.4290278531414855, "learning_rate": 9.726319341914323e-06, "loss": 0.3046165704727173, "step": 2160 }, { "epoch": 1.0677128382552823, "grad_norm": 1.3759048148010737, "learning_rate": 9.718151902603744e-06, "loss": 0.24278515577316284, "step": 2161 }, { "epoch": 1.0682070925491165, "grad_norm": 1.235098490769484, "learning_rate": 9.709984651450924e-06, "loss": 0.2565615773200989, "step": 2162 }, { "epoch": 1.0687013468429507, "grad_norm": 1.3303607886608886, "learning_rate": 9.701817593908209e-06, "loss": 0.2672972083091736, "step": 2163 }, { "epoch": 1.069195601136785, "grad_norm": 1.1620974642583077, "learning_rate": 9.693650735427808e-06, "loss": 0.21376445889472961, "step": 2164 }, { "epoch": 1.0696898554306191, "grad_norm": 1.2628274098639385, "learning_rate": 9.685484081461802e-06, "loss": 0.27743393182754517, "step": 2165 }, { "epoch": 1.0701841097244533, "grad_norm": 1.3615817033316626, "learning_rate": 9.677317637462125e-06, "loss": 0.2747134566307068, "step": 2166 }, { "epoch": 1.0706783640182873, "grad_norm": 1.1533673233774355, "learning_rate": 9.669151408880581e-06, "loss": 0.2775312066078186, "step": 2167 }, { "epoch": 1.0711726183121215, "grad_norm": 1.392383813550365, "learning_rate": 9.660985401168833e-06, "loss": 0.2743167281150818, "step": 2168 }, { "epoch": 1.0716668726059557, "grad_norm": 1.1731022030570613, "learning_rate": 9.652819619778387e-06, "loss": 0.26030686497688293, "step": 2169 }, { "epoch": 1.07216112689979, "grad_norm": 1.2886350622041207, "learning_rate": 9.644654070160603e-06, "loss": 0.32307812571525574, "step": 2170 }, { "epoch": 1.0726553811936241, "grad_norm": 1.309807945595821, "learning_rate": 9.63648875776668e-06, "loss": 0.2773011028766632, "step": 2171 }, { "epoch": 1.0731496354874583, "grad_norm": 1.3767412291020849, "learning_rate": 9.628323688047672e-06, "loss": 0.27996528148651123, "step": 2172 }, { "epoch": 1.0736438897812925, "grad_norm": 1.176261909375135, "learning_rate": 9.620158866454459e-06, "loss": 0.28022176027297974, "step": 2173 }, { "epoch": 1.0741381440751268, "grad_norm": 1.1746327357052728, "learning_rate": 9.61199429843776e-06, "loss": 0.2688876986503601, "step": 2174 }, { "epoch": 1.0746323983689607, "grad_norm": 1.1454924799354713, "learning_rate": 9.60382998944812e-06, "loss": 0.23915211856365204, "step": 2175 }, { "epoch": 1.075126652662795, "grad_norm": 1.1770664027196904, "learning_rate": 9.59566594493592e-06, "loss": 0.2533806264400482, "step": 2176 }, { "epoch": 1.0756209069566292, "grad_norm": 1.2321355277799408, "learning_rate": 9.587502170351361e-06, "loss": 0.2887522876262665, "step": 2177 }, { "epoch": 1.0761151612504634, "grad_norm": 1.2169372388289537, "learning_rate": 9.579338671144459e-06, "loss": 0.2885408401489258, "step": 2178 }, { "epoch": 1.0766094155442976, "grad_norm": 1.2209492195717289, "learning_rate": 9.571175452765045e-06, "loss": 0.25656914710998535, "step": 2179 }, { "epoch": 1.0771036698381318, "grad_norm": 1.2669016448608037, "learning_rate": 9.563012520662773e-06, "loss": 0.2935143709182739, "step": 2180 }, { "epoch": 1.077597924131966, "grad_norm": 1.2902152081672096, "learning_rate": 9.554849880287103e-06, "loss": 0.26728200912475586, "step": 2181 }, { "epoch": 1.0780921784258002, "grad_norm": 1.4327778934971358, "learning_rate": 9.546687537087287e-06, "loss": 0.2558351159095764, "step": 2182 }, { "epoch": 1.0785864327196342, "grad_norm": 1.133861673349663, "learning_rate": 9.538525496512394e-06, "loss": 0.2517240047454834, "step": 2183 }, { "epoch": 1.0790806870134684, "grad_norm": 1.1033603168250732, "learning_rate": 9.53036376401129e-06, "loss": 0.23258647322654724, "step": 2184 }, { "epoch": 1.0795749413073026, "grad_norm": 1.2016172891455823, "learning_rate": 9.522202345032627e-06, "loss": 0.24100016057491302, "step": 2185 }, { "epoch": 1.0800691956011368, "grad_norm": 1.1844138198826075, "learning_rate": 9.51404124502485e-06, "loss": 0.27807697653770447, "step": 2186 }, { "epoch": 1.080563449894971, "grad_norm": 1.2045646158236256, "learning_rate": 9.50588046943619e-06, "loss": 0.26146867871284485, "step": 2187 }, { "epoch": 1.0810577041888052, "grad_norm": 1.3792610621050578, "learning_rate": 9.497720023714675e-06, "loss": 0.28570955991744995, "step": 2188 }, { "epoch": 1.0815519584826394, "grad_norm": 1.146591161630138, "learning_rate": 9.489559913308092e-06, "loss": 0.22583246231079102, "step": 2189 }, { "epoch": 1.0820462127764734, "grad_norm": 1.2292468406383597, "learning_rate": 9.48140014366402e-06, "loss": 0.27526232600212097, "step": 2190 }, { "epoch": 1.0825404670703076, "grad_norm": 1.287410242270342, "learning_rate": 9.473240720229803e-06, "loss": 0.2777514159679413, "step": 2191 }, { "epoch": 1.0830347213641418, "grad_norm": 1.217692620890676, "learning_rate": 9.465081648452549e-06, "loss": 0.25767001509666443, "step": 2192 }, { "epoch": 1.083528975657976, "grad_norm": 1.2401214064051047, "learning_rate": 9.456922933779148e-06, "loss": 0.24114865064620972, "step": 2193 }, { "epoch": 1.0840232299518102, "grad_norm": 1.3343620945353547, "learning_rate": 9.448764581656237e-06, "loss": 0.31198200583457947, "step": 2194 }, { "epoch": 1.0845174842456444, "grad_norm": 1.2865355942160217, "learning_rate": 9.440606597530213e-06, "loss": 0.2724478840827942, "step": 2195 }, { "epoch": 1.0850117385394786, "grad_norm": 1.2982367761916904, "learning_rate": 9.432448986847229e-06, "loss": 0.27796900272369385, "step": 2196 }, { "epoch": 1.0855059928333128, "grad_norm": 1.293883522594156, "learning_rate": 9.424291755053198e-06, "loss": 0.2877587676048279, "step": 2197 }, { "epoch": 1.0860002471271468, "grad_norm": 1.354561961211439, "learning_rate": 9.416134907593764e-06, "loss": 0.2898337244987488, "step": 2198 }, { "epoch": 1.086494501420981, "grad_norm": 1.2931825621227928, "learning_rate": 9.407978449914322e-06, "loss": 0.2544672191143036, "step": 2199 }, { "epoch": 1.0869887557148152, "grad_norm": 1.2905943399481439, "learning_rate": 9.399822387460005e-06, "loss": 0.28336071968078613, "step": 2200 }, { "epoch": 1.0874830100086494, "grad_norm": 1.2871287196611743, "learning_rate": 9.391666725675691e-06, "loss": 0.2862734794616699, "step": 2201 }, { "epoch": 1.0879772643024836, "grad_norm": 1.386969000020192, "learning_rate": 9.383511470005978e-06, "loss": 0.26331260800361633, "step": 2202 }, { "epoch": 1.0884715185963179, "grad_norm": 1.2750467510922643, "learning_rate": 9.375356625895201e-06, "loss": 0.30087417364120483, "step": 2203 }, { "epoch": 1.088965772890152, "grad_norm": 1.3434362766675538, "learning_rate": 9.36720219878741e-06, "loss": 0.2736594080924988, "step": 2204 }, { "epoch": 1.089460027183986, "grad_norm": 1.4852243291487657, "learning_rate": 9.359048194126395e-06, "loss": 0.2704418897628784, "step": 2205 }, { "epoch": 1.0899542814778203, "grad_norm": 1.2230094225693318, "learning_rate": 9.350894617355645e-06, "loss": 0.24540236592292786, "step": 2206 }, { "epoch": 1.0904485357716545, "grad_norm": 1.2299505503288506, "learning_rate": 9.342741473918375e-06, "loss": 0.26376527547836304, "step": 2207 }, { "epoch": 1.0909427900654887, "grad_norm": 1.0803859595224048, "learning_rate": 9.334588769257502e-06, "loss": 0.24062004685401917, "step": 2208 }, { "epoch": 1.0914370443593229, "grad_norm": 1.1443970874822365, "learning_rate": 9.326436508815662e-06, "loss": 0.24209418892860413, "step": 2209 }, { "epoch": 1.091931298653157, "grad_norm": 1.3414968412819865, "learning_rate": 9.318284698035188e-06, "loss": 0.2732285261154175, "step": 2210 }, { "epoch": 1.0924255529469913, "grad_norm": 1.2470429271312866, "learning_rate": 9.310133342358106e-06, "loss": 0.2684158980846405, "step": 2211 }, { "epoch": 1.0929198072408255, "grad_norm": 1.1035267199988392, "learning_rate": 9.301982447226145e-06, "loss": 0.22511601448059082, "step": 2212 }, { "epoch": 1.0934140615346597, "grad_norm": 1.165505029883992, "learning_rate": 9.293832018080731e-06, "loss": 0.2622867226600647, "step": 2213 }, { "epoch": 1.0939083158284937, "grad_norm": 1.2923685951682604, "learning_rate": 9.285682060362974e-06, "loss": 0.3030891418457031, "step": 2214 }, { "epoch": 1.094402570122328, "grad_norm": 1.2523210407583818, "learning_rate": 9.277532579513666e-06, "loss": 0.24928592145442963, "step": 2215 }, { "epoch": 1.094896824416162, "grad_norm": 1.2048717570746186, "learning_rate": 9.269383580973285e-06, "loss": 0.2588339149951935, "step": 2216 }, { "epoch": 1.0953910787099963, "grad_norm": 1.2427748942142012, "learning_rate": 9.261235070181983e-06, "loss": 0.2587873339653015, "step": 2217 }, { "epoch": 1.0958853330038305, "grad_norm": 1.3192410250632676, "learning_rate": 9.253087052579596e-06, "loss": 0.29420971870422363, "step": 2218 }, { "epoch": 1.0963795872976647, "grad_norm": 1.1714489078180652, "learning_rate": 9.244939533605619e-06, "loss": 0.25384342670440674, "step": 2219 }, { "epoch": 1.096873841591499, "grad_norm": 1.2208998726962157, "learning_rate": 9.236792518699224e-06, "loss": 0.23133251070976257, "step": 2220 }, { "epoch": 1.097368095885333, "grad_norm": 1.1919788928879418, "learning_rate": 9.228646013299233e-06, "loss": 0.26196008920669556, "step": 2221 }, { "epoch": 1.0978623501791671, "grad_norm": 1.345065700534229, "learning_rate": 9.220500022844144e-06, "loss": 0.2567690908908844, "step": 2222 }, { "epoch": 1.0983566044730013, "grad_norm": 1.1808254692787845, "learning_rate": 9.212354552772107e-06, "loss": 0.2555367350578308, "step": 2223 }, { "epoch": 1.0988508587668355, "grad_norm": 1.1544608952675586, "learning_rate": 9.204209608520913e-06, "loss": 0.24357245862483978, "step": 2224 }, { "epoch": 1.0993451130606697, "grad_norm": 1.3367524689374175, "learning_rate": 9.19606519552801e-06, "loss": 0.2792712450027466, "step": 2225 }, { "epoch": 1.099839367354504, "grad_norm": 1.3277136329189279, "learning_rate": 9.1879213192305e-06, "loss": 0.29090794920921326, "step": 2226 }, { "epoch": 1.1003336216483381, "grad_norm": 1.304360721279056, "learning_rate": 9.179777985065115e-06, "loss": 0.2777528762817383, "step": 2227 }, { "epoch": 1.1008278759421724, "grad_norm": 1.1781995191131436, "learning_rate": 9.171635198468227e-06, "loss": 0.263868123292923, "step": 2228 }, { "epoch": 1.1013221302360063, "grad_norm": 1.184942105326879, "learning_rate": 9.16349296487584e-06, "loss": 0.24118748307228088, "step": 2229 }, { "epoch": 1.1018163845298405, "grad_norm": 1.2411255946822906, "learning_rate": 9.155351289723603e-06, "loss": 0.2176896631717682, "step": 2230 }, { "epoch": 1.1023106388236747, "grad_norm": 1.3759218504425914, "learning_rate": 9.147210178446776e-06, "loss": 0.24727840721607208, "step": 2231 }, { "epoch": 1.102804893117509, "grad_norm": 1.287783002848043, "learning_rate": 9.139069636480247e-06, "loss": 0.2711295783519745, "step": 2232 }, { "epoch": 1.1032991474113432, "grad_norm": 1.2808604096079383, "learning_rate": 9.130929669258525e-06, "loss": 0.2987736165523529, "step": 2233 }, { "epoch": 1.1037934017051774, "grad_norm": 1.3771259989337001, "learning_rate": 9.122790282215743e-06, "loss": 0.2773835062980652, "step": 2234 }, { "epoch": 1.1042876559990116, "grad_norm": 1.2299830744412572, "learning_rate": 9.114651480785632e-06, "loss": 0.29417523741722107, "step": 2235 }, { "epoch": 1.1047819102928456, "grad_norm": 1.377692958442212, "learning_rate": 9.106513270401545e-06, "loss": 0.2642611265182495, "step": 2236 }, { "epoch": 1.1052761645866798, "grad_norm": 1.2764125735134089, "learning_rate": 9.098375656496434e-06, "loss": 0.2789427638053894, "step": 2237 }, { "epoch": 1.105770418880514, "grad_norm": 1.3238778744589295, "learning_rate": 9.090238644502845e-06, "loss": 0.3002237379550934, "step": 2238 }, { "epoch": 1.1062646731743482, "grad_norm": 1.1862434874371655, "learning_rate": 9.082102239852942e-06, "loss": 0.27620676159858704, "step": 2239 }, { "epoch": 1.1067589274681824, "grad_norm": 1.327009037228036, "learning_rate": 9.07396644797847e-06, "loss": 0.26718735694885254, "step": 2240 }, { "epoch": 1.1072531817620166, "grad_norm": 1.3581828145326202, "learning_rate": 9.065831274310763e-06, "loss": 0.27443817257881165, "step": 2241 }, { "epoch": 1.1077474360558508, "grad_norm": 1.2348189100714968, "learning_rate": 9.057696724280748e-06, "loss": 0.2536284923553467, "step": 2242 }, { "epoch": 1.108241690349685, "grad_norm": 1.274876240899672, "learning_rate": 9.049562803318942e-06, "loss": 0.2583077549934387, "step": 2243 }, { "epoch": 1.108735944643519, "grad_norm": 1.2591915779147578, "learning_rate": 9.041429516855427e-06, "loss": 0.2696278393268585, "step": 2244 }, { "epoch": 1.1092301989373532, "grad_norm": 1.4248240108913692, "learning_rate": 9.033296870319868e-06, "loss": 0.2966364622116089, "step": 2245 }, { "epoch": 1.1097244532311874, "grad_norm": 1.1050822330716321, "learning_rate": 9.025164869141503e-06, "loss": 0.22690679132938385, "step": 2246 }, { "epoch": 1.1102187075250216, "grad_norm": 1.192560579016723, "learning_rate": 9.017033518749147e-06, "loss": 0.2777915894985199, "step": 2247 }, { "epoch": 1.1107129618188558, "grad_norm": 1.3394858504136318, "learning_rate": 9.008902824571168e-06, "loss": 0.2890303134918213, "step": 2248 }, { "epoch": 1.11120721611269, "grad_norm": 1.0426463189164805, "learning_rate": 9.000772792035505e-06, "loss": 0.22669392824172974, "step": 2249 }, { "epoch": 1.1117014704065242, "grad_norm": 1.1970809485558533, "learning_rate": 8.992643426569643e-06, "loss": 0.26416563987731934, "step": 2250 }, { "epoch": 1.1121957247003584, "grad_norm": 1.1888202892832207, "learning_rate": 8.984514733600641e-06, "loss": 0.2745298147201538, "step": 2251 }, { "epoch": 1.1126899789941924, "grad_norm": 1.3798693264357922, "learning_rate": 8.97638671855509e-06, "loss": 0.31175684928894043, "step": 2252 }, { "epoch": 1.1131842332880266, "grad_norm": 1.1626887122886307, "learning_rate": 8.968259386859146e-06, "loss": 0.2632657289505005, "step": 2253 }, { "epoch": 1.1136784875818608, "grad_norm": 1.810662888324155, "learning_rate": 8.960132743938485e-06, "loss": 0.25820252299308777, "step": 2254 }, { "epoch": 1.114172741875695, "grad_norm": 1.061521514088085, "learning_rate": 8.95200679521835e-06, "loss": 0.24255456030368805, "step": 2255 }, { "epoch": 1.1146669961695292, "grad_norm": 1.2696759740581753, "learning_rate": 8.943881546123506e-06, "loss": 0.2973442077636719, "step": 2256 }, { "epoch": 1.1151612504633635, "grad_norm": 1.1336353694819978, "learning_rate": 8.935757002078252e-06, "loss": 0.23320606350898743, "step": 2257 }, { "epoch": 1.1156555047571977, "grad_norm": 1.275444057796017, "learning_rate": 8.927633168506415e-06, "loss": 0.2923268675804138, "step": 2258 }, { "epoch": 1.1161497590510319, "grad_norm": 1.25496425665649, "learning_rate": 8.91951005083135e-06, "loss": 0.25932425260543823, "step": 2259 }, { "epoch": 1.1166440133448658, "grad_norm": 1.2215943645090854, "learning_rate": 8.911387654475943e-06, "loss": 0.2631821036338806, "step": 2260 }, { "epoch": 1.1171382676387, "grad_norm": 1.226020936236602, "learning_rate": 8.903265984862581e-06, "loss": 0.24741420149803162, "step": 2261 }, { "epoch": 1.1176325219325343, "grad_norm": 1.165036984102613, "learning_rate": 8.895145047413178e-06, "loss": 0.2593516707420349, "step": 2262 }, { "epoch": 1.1181267762263685, "grad_norm": 1.2132388690590856, "learning_rate": 8.88702484754915e-06, "loss": 0.22109609842300415, "step": 2263 }, { "epoch": 1.1186210305202027, "grad_norm": 1.242512673005374, "learning_rate": 8.878905390691437e-06, "loss": 0.24363039433956146, "step": 2264 }, { "epoch": 1.1191152848140369, "grad_norm": 1.210365574835302, "learning_rate": 8.870786682260465e-06, "loss": 0.2507505714893341, "step": 2265 }, { "epoch": 1.119609539107871, "grad_norm": 1.3229609964254254, "learning_rate": 8.86266872767617e-06, "loss": 0.303046315908432, "step": 2266 }, { "epoch": 1.120103793401705, "grad_norm": 1.282548473383847, "learning_rate": 8.854551532357977e-06, "loss": 0.257943332195282, "step": 2267 }, { "epoch": 1.1205980476955393, "grad_norm": 1.2641740973335522, "learning_rate": 8.84643510172482e-06, "loss": 0.2697421610355377, "step": 2268 }, { "epoch": 1.1210923019893735, "grad_norm": 1.126371134669409, "learning_rate": 8.838319441195105e-06, "loss": 0.20090234279632568, "step": 2269 }, { "epoch": 1.1215865562832077, "grad_norm": 1.3584193930662543, "learning_rate": 8.830204556186736e-06, "loss": 0.2714189887046814, "step": 2270 }, { "epoch": 1.122080810577042, "grad_norm": 1.1168786328747864, "learning_rate": 8.822090452117084e-06, "loss": 0.23497477173805237, "step": 2271 }, { "epoch": 1.122575064870876, "grad_norm": 1.3047944688196833, "learning_rate": 8.81397713440302e-06, "loss": 0.2582445740699768, "step": 2272 }, { "epoch": 1.1230693191647103, "grad_norm": 1.2807794267280126, "learning_rate": 8.805864608460876e-06, "loss": 0.26494619250297546, "step": 2273 }, { "epoch": 1.1235635734585445, "grad_norm": 1.3251515621500554, "learning_rate": 8.797752879706455e-06, "loss": 0.2767868936061859, "step": 2274 }, { "epoch": 1.1240578277523785, "grad_norm": 1.5161646380346314, "learning_rate": 8.789641953555032e-06, "loss": 0.27696311473846436, "step": 2275 }, { "epoch": 1.1245520820462127, "grad_norm": 1.3659389136687503, "learning_rate": 8.78153183542135e-06, "loss": 0.27048689126968384, "step": 2276 }, { "epoch": 1.125046336340047, "grad_norm": 1.3893625373049876, "learning_rate": 8.773422530719606e-06, "loss": 0.2940211892127991, "step": 2277 }, { "epoch": 1.1255405906338811, "grad_norm": 1.310212206650707, "learning_rate": 8.765314044863453e-06, "loss": 0.24859851598739624, "step": 2278 }, { "epoch": 1.1260348449277153, "grad_norm": 1.3087530353150083, "learning_rate": 8.757206383265998e-06, "loss": 0.28879350423812866, "step": 2279 }, { "epoch": 1.1265290992215495, "grad_norm": 1.2514534154786532, "learning_rate": 8.74909955133981e-06, "loss": 0.24804209172725677, "step": 2280 }, { "epoch": 1.1270233535153837, "grad_norm": 1.3358056447173947, "learning_rate": 8.740993554496886e-06, "loss": 0.3199496567249298, "step": 2281 }, { "epoch": 1.1275176078092177, "grad_norm": 2.15705729620974, "learning_rate": 8.732888398148678e-06, "loss": 0.3098929524421692, "step": 2282 }, { "epoch": 1.128011862103052, "grad_norm": 1.2048730778866592, "learning_rate": 8.724784087706067e-06, "loss": 0.21280749142169952, "step": 2283 }, { "epoch": 1.1285061163968861, "grad_norm": 1.1819530781050969, "learning_rate": 8.716680628579382e-06, "loss": 0.25330856442451477, "step": 2284 }, { "epoch": 1.1290003706907203, "grad_norm": 1.2218083349938962, "learning_rate": 8.708578026178371e-06, "loss": 0.26141977310180664, "step": 2285 }, { "epoch": 1.1294946249845546, "grad_norm": 1.3085311775335164, "learning_rate": 8.700476285912219e-06, "loss": 0.2529010772705078, "step": 2286 }, { "epoch": 1.1299888792783888, "grad_norm": 1.4496496993285695, "learning_rate": 8.69237541318953e-06, "loss": 0.2662504315376282, "step": 2287 }, { "epoch": 1.130483133572223, "grad_norm": 1.2797233255982605, "learning_rate": 8.684275413418329e-06, "loss": 0.2724575996398926, "step": 2288 }, { "epoch": 1.1309773878660572, "grad_norm": 1.2524016016810007, "learning_rate": 8.676176292006065e-06, "loss": 0.2820962965488434, "step": 2289 }, { "epoch": 1.1314716421598914, "grad_norm": 1.2157522787611978, "learning_rate": 8.668078054359595e-06, "loss": 0.2594743072986603, "step": 2290 }, { "epoch": 1.1319658964537254, "grad_norm": 1.1017631552140204, "learning_rate": 8.659980705885183e-06, "loss": 0.25397709012031555, "step": 2291 }, { "epoch": 1.1324601507475596, "grad_norm": 1.3505914192645034, "learning_rate": 8.651884251988503e-06, "loss": 0.27261337637901306, "step": 2292 }, { "epoch": 1.1329544050413938, "grad_norm": 1.191460472235454, "learning_rate": 8.643788698074638e-06, "loss": 0.2726992070674896, "step": 2293 }, { "epoch": 1.133448659335228, "grad_norm": 1.2175895117879216, "learning_rate": 8.635694049548058e-06, "loss": 0.2792774438858032, "step": 2294 }, { "epoch": 1.1339429136290622, "grad_norm": 1.272860546351146, "learning_rate": 8.627600311812638e-06, "loss": 0.310885488986969, "step": 2295 }, { "epoch": 1.1344371679228964, "grad_norm": 1.2747295027163217, "learning_rate": 8.619507490271638e-06, "loss": 0.27060413360595703, "step": 2296 }, { "epoch": 1.1349314222167306, "grad_norm": 1.2507140444567972, "learning_rate": 8.611415590327718e-06, "loss": 0.27069440484046936, "step": 2297 }, { "epoch": 1.1354256765105646, "grad_norm": 1.2299186955801236, "learning_rate": 8.603324617382905e-06, "loss": 0.2790459990501404, "step": 2298 }, { "epoch": 1.1359199308043988, "grad_norm": 1.2813816772493964, "learning_rate": 8.595234576838624e-06, "loss": 0.27170947194099426, "step": 2299 }, { "epoch": 1.136414185098233, "grad_norm": 1.1903279302585759, "learning_rate": 8.587145474095665e-06, "loss": 0.25313863158226013, "step": 2300 }, { "epoch": 1.1369084393920672, "grad_norm": 1.2968469055543796, "learning_rate": 8.5790573145542e-06, "loss": 0.289467990398407, "step": 2301 }, { "epoch": 1.1374026936859014, "grad_norm": 1.3141096348522086, "learning_rate": 8.570970103613774e-06, "loss": 0.29796460270881653, "step": 2302 }, { "epoch": 1.1378969479797356, "grad_norm": 1.2855551342619271, "learning_rate": 8.562883846673286e-06, "loss": 0.27264270186424255, "step": 2303 }, { "epoch": 1.1383912022735698, "grad_norm": 1.2243974310235655, "learning_rate": 8.554798549131005e-06, "loss": 0.3099757134914398, "step": 2304 }, { "epoch": 1.138885456567404, "grad_norm": 1.2936181628424743, "learning_rate": 8.546714216384565e-06, "loss": 0.30002498626708984, "step": 2305 }, { "epoch": 1.139379710861238, "grad_norm": 1.7617864884936485, "learning_rate": 8.538630853830951e-06, "loss": 0.2428818643093109, "step": 2306 }, { "epoch": 1.1398739651550722, "grad_norm": 1.24686983002664, "learning_rate": 8.530548466866497e-06, "loss": 0.2601294219493866, "step": 2307 }, { "epoch": 1.1403682194489064, "grad_norm": 1.2066765531591284, "learning_rate": 8.522467060886888e-06, "loss": 0.23878628015518188, "step": 2308 }, { "epoch": 1.1408624737427406, "grad_norm": 1.345733709932402, "learning_rate": 8.514386641287163e-06, "loss": 0.2780643403530121, "step": 2309 }, { "epoch": 1.1413567280365748, "grad_norm": 1.2756115099724787, "learning_rate": 8.506307213461689e-06, "loss": 0.29834824800491333, "step": 2310 }, { "epoch": 1.141850982330409, "grad_norm": 1.3376095615389103, "learning_rate": 8.498228782804175e-06, "loss": 0.2733996510505676, "step": 2311 }, { "epoch": 1.1423452366242433, "grad_norm": 1.3063802509871558, "learning_rate": 8.490151354707669e-06, "loss": 0.2524843215942383, "step": 2312 }, { "epoch": 1.1428394909180772, "grad_norm": 1.2776723106689647, "learning_rate": 8.482074934564543e-06, "loss": 0.29077857732772827, "step": 2313 }, { "epoch": 1.1433337452119114, "grad_norm": 1.2114776729729342, "learning_rate": 8.473999527766503e-06, "loss": 0.25935155153274536, "step": 2314 }, { "epoch": 1.1438279995057457, "grad_norm": 1.3166365920869918, "learning_rate": 8.465925139704578e-06, "loss": 0.23595012724399567, "step": 2315 }, { "epoch": 1.1443222537995799, "grad_norm": 1.2268504419293456, "learning_rate": 8.457851775769108e-06, "loss": 0.25193360447883606, "step": 2316 }, { "epoch": 1.144816508093414, "grad_norm": 1.2847886622034916, "learning_rate": 8.449779441349755e-06, "loss": 0.26844412088394165, "step": 2317 }, { "epoch": 1.1453107623872483, "grad_norm": 1.2550831674884213, "learning_rate": 8.441708141835499e-06, "loss": 0.2507320046424866, "step": 2318 }, { "epoch": 1.1458050166810825, "grad_norm": 1.31186920690482, "learning_rate": 8.433637882614624e-06, "loss": 0.2756047248840332, "step": 2319 }, { "epoch": 1.1462992709749167, "grad_norm": 1.3818376930568548, "learning_rate": 8.425568669074717e-06, "loss": 0.3136482536792755, "step": 2320 }, { "epoch": 1.146793525268751, "grad_norm": 1.3094285230006764, "learning_rate": 8.417500506602668e-06, "loss": 0.25975438952445984, "step": 2321 }, { "epoch": 1.1472877795625849, "grad_norm": 1.3148310008881885, "learning_rate": 8.409433400584674e-06, "loss": 0.2524915039539337, "step": 2322 }, { "epoch": 1.147782033856419, "grad_norm": 1.316055955366049, "learning_rate": 8.401367356406214e-06, "loss": 0.2731180787086487, "step": 2323 }, { "epoch": 1.1482762881502533, "grad_norm": 1.4277670811350172, "learning_rate": 8.393302379452065e-06, "loss": 0.27752095460891724, "step": 2324 }, { "epoch": 1.1487705424440875, "grad_norm": 1.2586766809004215, "learning_rate": 8.385238475106287e-06, "loss": 0.269240140914917, "step": 2325 }, { "epoch": 1.1492647967379217, "grad_norm": 1.301058586916402, "learning_rate": 8.377175648752236e-06, "loss": 0.2668418288230896, "step": 2326 }, { "epoch": 1.149759051031756, "grad_norm": 1.2869179599070777, "learning_rate": 8.369113905772532e-06, "loss": 0.29276758432388306, "step": 2327 }, { "epoch": 1.15025330532559, "grad_norm": 1.240170388592341, "learning_rate": 8.361053251549083e-06, "loss": 0.26562872529029846, "step": 2328 }, { "epoch": 1.150747559619424, "grad_norm": 1.2907483203574122, "learning_rate": 8.352993691463063e-06, "loss": 0.257779061794281, "step": 2329 }, { "epoch": 1.1512418139132583, "grad_norm": 1.3761256870332743, "learning_rate": 8.344935230894926e-06, "loss": 0.2871868312358856, "step": 2330 }, { "epoch": 1.1517360682070925, "grad_norm": 1.2766304490065612, "learning_rate": 8.336877875224379e-06, "loss": 0.25191348791122437, "step": 2331 }, { "epoch": 1.1522303225009267, "grad_norm": 1.1532415542893881, "learning_rate": 8.3288216298304e-06, "loss": 0.27057239413261414, "step": 2332 }, { "epoch": 1.152724576794761, "grad_norm": 1.3903855220327628, "learning_rate": 8.32076650009122e-06, "loss": 0.31574326753616333, "step": 2333 }, { "epoch": 1.1532188310885951, "grad_norm": 1.5549371484345924, "learning_rate": 8.312712491384332e-06, "loss": 0.22503693401813507, "step": 2334 }, { "epoch": 1.1537130853824293, "grad_norm": 1.2363735263099107, "learning_rate": 8.304659609086478e-06, "loss": 0.25754863023757935, "step": 2335 }, { "epoch": 1.1542073396762635, "grad_norm": 1.1790773293013888, "learning_rate": 8.296607858573646e-06, "loss": 0.24367934465408325, "step": 2336 }, { "epoch": 1.1547015939700975, "grad_norm": 1.2792693896599328, "learning_rate": 8.288557245221068e-06, "loss": 0.28907084465026855, "step": 2337 }, { "epoch": 1.1551958482639317, "grad_norm": 1.1852966795691644, "learning_rate": 8.280507774403217e-06, "loss": 0.24526283144950867, "step": 2338 }, { "epoch": 1.155690102557766, "grad_norm": 1.1473094958169556, "learning_rate": 8.272459451493811e-06, "loss": 0.21968787908554077, "step": 2339 }, { "epoch": 1.1561843568516001, "grad_norm": 1.1613080622383485, "learning_rate": 8.264412281865791e-06, "loss": 0.23803061246871948, "step": 2340 }, { "epoch": 1.1566786111454344, "grad_norm": 1.2818425210270699, "learning_rate": 8.256366270891335e-06, "loss": 0.25715917348861694, "step": 2341 }, { "epoch": 1.1571728654392686, "grad_norm": 1.300939575113673, "learning_rate": 8.248321423941836e-06, "loss": 0.29443520307540894, "step": 2342 }, { "epoch": 1.1576671197331028, "grad_norm": 1.2224332053171705, "learning_rate": 8.240277746387934e-06, "loss": 0.24904949963092804, "step": 2343 }, { "epoch": 1.1581613740269368, "grad_norm": 1.2866663921835886, "learning_rate": 8.23223524359946e-06, "loss": 0.2594628632068634, "step": 2344 }, { "epoch": 1.158655628320771, "grad_norm": 1.2731058113968243, "learning_rate": 8.224193920945482e-06, "loss": 0.23853302001953125, "step": 2345 }, { "epoch": 1.1591498826146052, "grad_norm": 1.3394742959570003, "learning_rate": 8.216153783794266e-06, "loss": 0.25465112924575806, "step": 2346 }, { "epoch": 1.1596441369084394, "grad_norm": 1.3135301213887383, "learning_rate": 8.208114837513297e-06, "loss": 0.28038230538368225, "step": 2347 }, { "epoch": 1.1601383912022736, "grad_norm": 1.350685866794537, "learning_rate": 8.200077087469262e-06, "loss": 0.3144591450691223, "step": 2348 }, { "epoch": 1.1606326454961078, "grad_norm": 1.281224607522297, "learning_rate": 8.192040539028047e-06, "loss": 0.25782787799835205, "step": 2349 }, { "epoch": 1.161126899789942, "grad_norm": 1.5124699254380607, "learning_rate": 8.18400519755473e-06, "loss": 0.21928566694259644, "step": 2350 }, { "epoch": 1.1616211540837762, "grad_norm": 1.2617101773123074, "learning_rate": 8.175971068413598e-06, "loss": 0.2277221381664276, "step": 2351 }, { "epoch": 1.1621154083776104, "grad_norm": 1.3465952359588251, "learning_rate": 8.16793815696812e-06, "loss": 0.26971378922462463, "step": 2352 }, { "epoch": 1.1626096626714444, "grad_norm": 1.352802202139023, "learning_rate": 8.15990646858095e-06, "loss": 0.26448535919189453, "step": 2353 }, { "epoch": 1.1631039169652786, "grad_norm": 1.3091049684475664, "learning_rate": 8.151876008613927e-06, "loss": 0.26372095942497253, "step": 2354 }, { "epoch": 1.1635981712591128, "grad_norm": 1.3450938198850664, "learning_rate": 8.143846782428078e-06, "loss": 0.2594243288040161, "step": 2355 }, { "epoch": 1.164092425552947, "grad_norm": 1.2377171543356333, "learning_rate": 8.135818795383597e-06, "loss": 0.23994986712932587, "step": 2356 }, { "epoch": 1.1645866798467812, "grad_norm": 1.2983017697862052, "learning_rate": 8.12779205283985e-06, "loss": 0.2746032476425171, "step": 2357 }, { "epoch": 1.1650809341406154, "grad_norm": 1.3938993958898265, "learning_rate": 8.119766560155377e-06, "loss": 0.3323846161365509, "step": 2358 }, { "epoch": 1.1655751884344494, "grad_norm": 1.3890076094482564, "learning_rate": 8.111742322687886e-06, "loss": 0.28155508637428284, "step": 2359 }, { "epoch": 1.1660694427282836, "grad_norm": 1.361844276882708, "learning_rate": 8.103719345794237e-06, "loss": 0.2936748266220093, "step": 2360 }, { "epoch": 1.1665636970221178, "grad_norm": 1.2168650482731003, "learning_rate": 8.095697634830463e-06, "loss": 0.23575282096862793, "step": 2361 }, { "epoch": 1.167057951315952, "grad_norm": 1.277845029620416, "learning_rate": 8.087677195151737e-06, "loss": 0.24547496438026428, "step": 2362 }, { "epoch": 1.1675522056097862, "grad_norm": 1.3371291006512767, "learning_rate": 8.079658032112388e-06, "loss": 0.2936372458934784, "step": 2363 }, { "epoch": 1.1680464599036204, "grad_norm": 1.316297337509115, "learning_rate": 8.071640151065902e-06, "loss": 0.28602418303489685, "step": 2364 }, { "epoch": 1.1685407141974546, "grad_norm": 1.271542457187923, "learning_rate": 8.0636235573649e-06, "loss": 0.2742761969566345, "step": 2365 }, { "epoch": 1.1690349684912889, "grad_norm": 1.2379702024007857, "learning_rate": 8.05560825636114e-06, "loss": 0.2590268552303314, "step": 2366 }, { "epoch": 1.169529222785123, "grad_norm": 1.2195835846594238, "learning_rate": 8.047594253405525e-06, "loss": 0.26881399750709534, "step": 2367 }, { "epoch": 1.170023477078957, "grad_norm": 1.279205613064969, "learning_rate": 8.039581553848093e-06, "loss": 0.27069953083992004, "step": 2368 }, { "epoch": 1.1705177313727912, "grad_norm": 1.1650094541250327, "learning_rate": 8.031570163038005e-06, "loss": 0.27320611476898193, "step": 2369 }, { "epoch": 1.1710119856666255, "grad_norm": 1.289507742767465, "learning_rate": 8.023560086323548e-06, "loss": 0.26400327682495117, "step": 2370 }, { "epoch": 1.1715062399604597, "grad_norm": 1.1403608861276666, "learning_rate": 8.015551329052136e-06, "loss": 0.22287744283676147, "step": 2371 }, { "epoch": 1.1720004942542939, "grad_norm": 1.2409841787965832, "learning_rate": 8.007543896570309e-06, "loss": 0.28240424394607544, "step": 2372 }, { "epoch": 1.172494748548128, "grad_norm": 1.3414402473623117, "learning_rate": 7.999537794223702e-06, "loss": 0.27119147777557373, "step": 2373 }, { "epoch": 1.1729890028419623, "grad_norm": 1.376418134177551, "learning_rate": 7.991533027357085e-06, "loss": 0.2579900920391083, "step": 2374 }, { "epoch": 1.1734832571357963, "grad_norm": 1.197547817498857, "learning_rate": 7.983529601314317e-06, "loss": 0.25550374388694763, "step": 2375 }, { "epoch": 1.1739775114296305, "grad_norm": 1.119102387270249, "learning_rate": 7.97552752143838e-06, "loss": 0.21197429299354553, "step": 2376 }, { "epoch": 1.1744717657234647, "grad_norm": 1.245296460371477, "learning_rate": 7.96752679307134e-06, "loss": 0.28724029660224915, "step": 2377 }, { "epoch": 1.1749660200172989, "grad_norm": 1.119081251981291, "learning_rate": 7.959527421554375e-06, "loss": 0.24320468306541443, "step": 2378 }, { "epoch": 1.175460274311133, "grad_norm": 1.1094352642608503, "learning_rate": 7.951529412227745e-06, "loss": 0.22487501800060272, "step": 2379 }, { "epoch": 1.1759545286049673, "grad_norm": 1.1424975538486684, "learning_rate": 7.943532770430811e-06, "loss": 0.2754969894886017, "step": 2380 }, { "epoch": 1.1764487828988015, "grad_norm": 1.2424832323819373, "learning_rate": 7.93553750150202e-06, "loss": 0.2734825909137726, "step": 2381 }, { "epoch": 1.1769430371926357, "grad_norm": 1.3311172796502668, "learning_rate": 7.927543610778895e-06, "loss": 0.2803332209587097, "step": 2382 }, { "epoch": 1.1774372914864697, "grad_norm": 1.3572589379934268, "learning_rate": 7.919551103598037e-06, "loss": 0.2820316255092621, "step": 2383 }, { "epoch": 1.177931545780304, "grad_norm": 1.1984541262238777, "learning_rate": 7.911559985295142e-06, "loss": 0.26788315176963806, "step": 2384 }, { "epoch": 1.178425800074138, "grad_norm": 1.152974420484647, "learning_rate": 7.90357026120496e-06, "loss": 0.2562825083732605, "step": 2385 }, { "epoch": 1.1789200543679723, "grad_norm": 1.3733272776027918, "learning_rate": 7.895581936661316e-06, "loss": 0.28260675072669983, "step": 2386 }, { "epoch": 1.1794143086618065, "grad_norm": 1.2509507258139472, "learning_rate": 7.887595016997105e-06, "loss": 0.25887200236320496, "step": 2387 }, { "epoch": 1.1799085629556407, "grad_norm": 1.1852436756934879, "learning_rate": 7.879609507544274e-06, "loss": 0.2351648062467575, "step": 2388 }, { "epoch": 1.180402817249475, "grad_norm": 1.310528017980178, "learning_rate": 7.871625413633843e-06, "loss": 0.2958889305591583, "step": 2389 }, { "epoch": 1.180897071543309, "grad_norm": 1.260660594043313, "learning_rate": 7.863642740595873e-06, "loss": 0.29704710841178894, "step": 2390 }, { "epoch": 1.1813913258371431, "grad_norm": 1.1273593973839822, "learning_rate": 7.855661493759488e-06, "loss": 0.23283210396766663, "step": 2391 }, { "epoch": 1.1818855801309773, "grad_norm": 1.1497387573049556, "learning_rate": 7.847681678452846e-06, "loss": 0.22818870842456818, "step": 2392 }, { "epoch": 1.1823798344248115, "grad_norm": 1.2334848445567106, "learning_rate": 7.839703300003163e-06, "loss": 0.2345077246427536, "step": 2393 }, { "epoch": 1.1828740887186457, "grad_norm": 1.3979127898652413, "learning_rate": 7.831726363736694e-06, "loss": 0.31161409616470337, "step": 2394 }, { "epoch": 1.18336834301248, "grad_norm": 1.3157666615230723, "learning_rate": 7.823750874978724e-06, "loss": 0.2958439588546753, "step": 2395 }, { "epoch": 1.1838625973063142, "grad_norm": 1.1914805532137183, "learning_rate": 7.815776839053568e-06, "loss": 0.24895446002483368, "step": 2396 }, { "epoch": 1.1843568516001484, "grad_norm": 1.189611866561264, "learning_rate": 7.807804261284591e-06, "loss": 0.2691795825958252, "step": 2397 }, { "epoch": 1.1848511058939826, "grad_norm": 1.2282823509277643, "learning_rate": 7.799833146994165e-06, "loss": 0.26797783374786377, "step": 2398 }, { "epoch": 1.1853453601878166, "grad_norm": 1.2297499766268158, "learning_rate": 7.791863501503694e-06, "loss": 0.2665610611438751, "step": 2399 }, { "epoch": 1.1858396144816508, "grad_norm": 1.1290863581864232, "learning_rate": 7.783895330133596e-06, "loss": 0.24712792038917542, "step": 2400 }, { "epoch": 1.186333868775485, "grad_norm": 1.2300895404986125, "learning_rate": 7.775928638203316e-06, "loss": 0.24131645262241364, "step": 2401 }, { "epoch": 1.1868281230693192, "grad_norm": 1.2566198414342145, "learning_rate": 7.7679634310313e-06, "loss": 0.24233923852443695, "step": 2402 }, { "epoch": 1.1873223773631534, "grad_norm": 1.2397915401139883, "learning_rate": 7.759999713935002e-06, "loss": 0.24929150938987732, "step": 2403 }, { "epoch": 1.1878166316569876, "grad_norm": 1.2005274695814647, "learning_rate": 7.752037492230887e-06, "loss": 0.266767293214798, "step": 2404 }, { "epoch": 1.1883108859508218, "grad_norm": 1.2083997342227277, "learning_rate": 7.744076771234427e-06, "loss": 0.257263720035553, "step": 2405 }, { "epoch": 1.1888051402446558, "grad_norm": 1.3017758985808945, "learning_rate": 7.73611755626008e-06, "loss": 0.26949891448020935, "step": 2406 }, { "epoch": 1.18929939453849, "grad_norm": 1.3523825920294412, "learning_rate": 7.728159852621308e-06, "loss": 0.250274121761322, "step": 2407 }, { "epoch": 1.1897936488323242, "grad_norm": 1.6370645689880403, "learning_rate": 7.720203665630553e-06, "loss": 0.2442864030599594, "step": 2408 }, { "epoch": 1.1902879031261584, "grad_norm": 1.4258170868908235, "learning_rate": 7.71224900059926e-06, "loss": 0.273416131734848, "step": 2409 }, { "epoch": 1.1907821574199926, "grad_norm": 1.2547538223250059, "learning_rate": 7.704295862837845e-06, "loss": 0.2559645175933838, "step": 2410 }, { "epoch": 1.1912764117138268, "grad_norm": 1.3439078919148493, "learning_rate": 7.696344257655713e-06, "loss": 0.2793371379375458, "step": 2411 }, { "epoch": 1.191770666007661, "grad_norm": 1.1661216324600743, "learning_rate": 7.688394190361235e-06, "loss": 0.23739437758922577, "step": 2412 }, { "epoch": 1.1922649203014952, "grad_norm": 1.293132062594429, "learning_rate": 7.680445666261766e-06, "loss": 0.27027466893196106, "step": 2413 }, { "epoch": 1.1927591745953292, "grad_norm": 1.2887121644516222, "learning_rate": 7.672498690663632e-06, "loss": 0.2641778886318207, "step": 2414 }, { "epoch": 1.1932534288891634, "grad_norm": 1.235898023301149, "learning_rate": 7.664553268872116e-06, "loss": 0.25086820125579834, "step": 2415 }, { "epoch": 1.1937476831829976, "grad_norm": 1.6761712741491541, "learning_rate": 7.656609406191467e-06, "loss": 0.2871254086494446, "step": 2416 }, { "epoch": 1.1942419374768318, "grad_norm": 1.193500770631568, "learning_rate": 7.648667107924893e-06, "loss": 0.2657528221607208, "step": 2417 }, { "epoch": 1.194736191770666, "grad_norm": 1.3739698225148846, "learning_rate": 7.640726379374564e-06, "loss": 0.26942694187164307, "step": 2418 }, { "epoch": 1.1952304460645002, "grad_norm": 1.1561137180130854, "learning_rate": 7.632787225841593e-06, "loss": 0.23883840441703796, "step": 2419 }, { "epoch": 1.1957247003583344, "grad_norm": 1.215726770348901, "learning_rate": 7.624849652626049e-06, "loss": 0.24837304651737213, "step": 2420 }, { "epoch": 1.1962189546521684, "grad_norm": 1.194954932679119, "learning_rate": 7.616913665026936e-06, "loss": 0.2882450222969055, "step": 2421 }, { "epoch": 1.1967132089460026, "grad_norm": 1.4557191034476904, "learning_rate": 7.608979268342213e-06, "loss": 0.25877460837364197, "step": 2422 }, { "epoch": 1.1972074632398368, "grad_norm": 1.2343724838571453, "learning_rate": 7.601046467868767e-06, "loss": 0.26970750093460083, "step": 2423 }, { "epoch": 1.197701717533671, "grad_norm": 1.1598747816375319, "learning_rate": 7.593115268902423e-06, "loss": 0.23771706223487854, "step": 2424 }, { "epoch": 1.1981959718275053, "grad_norm": 1.1949187968831856, "learning_rate": 7.585185676737932e-06, "loss": 0.25420787930488586, "step": 2425 }, { "epoch": 1.1986902261213395, "grad_norm": 1.248194263596005, "learning_rate": 7.577257696668982e-06, "loss": 0.2551025152206421, "step": 2426 }, { "epoch": 1.1991844804151737, "grad_norm": 1.1913659485965633, "learning_rate": 7.569331333988177e-06, "loss": 0.2302972972393036, "step": 2427 }, { "epoch": 1.1996787347090079, "grad_norm": 1.340176223566515, "learning_rate": 7.561406593987045e-06, "loss": 0.25811445713043213, "step": 2428 }, { "epoch": 1.200172989002842, "grad_norm": 1.1946803554276415, "learning_rate": 7.5534834819560235e-06, "loss": 0.2550782561302185, "step": 2429 }, { "epoch": 1.200667243296676, "grad_norm": 1.3588122473637638, "learning_rate": 7.545562003184474e-06, "loss": 0.24825535714626312, "step": 2430 }, { "epoch": 1.2011614975905103, "grad_norm": 1.3105140055807547, "learning_rate": 7.537642162960664e-06, "loss": 0.29703712463378906, "step": 2431 }, { "epoch": 1.2016557518843445, "grad_norm": 1.2707072551305245, "learning_rate": 7.5297239665717625e-06, "loss": 0.26830747723579407, "step": 2432 }, { "epoch": 1.2021500061781787, "grad_norm": 1.2272388404108225, "learning_rate": 7.521807419303846e-06, "loss": 0.2428341656923294, "step": 2433 }, { "epoch": 1.202644260472013, "grad_norm": 1.3310573803274635, "learning_rate": 7.513892526441883e-06, "loss": 0.2843051552772522, "step": 2434 }, { "epoch": 1.203138514765847, "grad_norm": 1.297091941411815, "learning_rate": 7.50597929326975e-06, "loss": 0.2485228031873703, "step": 2435 }, { "epoch": 1.203632769059681, "grad_norm": 1.3716686006321661, "learning_rate": 7.498067725070206e-06, "loss": 0.25343626737594604, "step": 2436 }, { "epoch": 1.2041270233535153, "grad_norm": 1.3197919626781558, "learning_rate": 7.490157827124902e-06, "loss": 0.24906575679779053, "step": 2437 }, { "epoch": 1.2046212776473495, "grad_norm": 1.6398204697926184, "learning_rate": 7.4822496047143665e-06, "loss": 0.33576443791389465, "step": 2438 }, { "epoch": 1.2051155319411837, "grad_norm": 1.341601959864184, "learning_rate": 7.474343063118023e-06, "loss": 0.2755683362483978, "step": 2439 }, { "epoch": 1.205609786235018, "grad_norm": 1.259839098151577, "learning_rate": 7.466438207614165e-06, "loss": 0.2667745351791382, "step": 2440 }, { "epoch": 1.2061040405288521, "grad_norm": 1.3942381323272646, "learning_rate": 7.458535043479959e-06, "loss": 0.2970271408557892, "step": 2441 }, { "epoch": 1.2065982948226863, "grad_norm": 1.2934031608191798, "learning_rate": 7.450633575991442e-06, "loss": 0.2628048360347748, "step": 2442 }, { "epoch": 1.2070925491165205, "grad_norm": 1.3935428467061275, "learning_rate": 7.442733810423526e-06, "loss": 0.29923003911972046, "step": 2443 }, { "epoch": 1.2075868034103547, "grad_norm": 1.2121764987473183, "learning_rate": 7.4348357520499805e-06, "loss": 0.2486419975757599, "step": 2444 }, { "epoch": 1.2080810577041887, "grad_norm": 1.2651423288599317, "learning_rate": 7.4269394061434315e-06, "loss": 0.2711118459701538, "step": 2445 }, { "epoch": 1.208575311998023, "grad_norm": 1.2689988235231109, "learning_rate": 7.419044777975371e-06, "loss": 0.2568815052509308, "step": 2446 }, { "epoch": 1.2090695662918571, "grad_norm": 1.3357220203112758, "learning_rate": 7.411151872816143e-06, "loss": 0.2546462416648865, "step": 2447 }, { "epoch": 1.2095638205856913, "grad_norm": 1.1716595202066384, "learning_rate": 7.403260695934933e-06, "loss": 0.23455393314361572, "step": 2448 }, { "epoch": 1.2100580748795255, "grad_norm": 1.3263077198790523, "learning_rate": 7.395371252599779e-06, "loss": 0.2874235510826111, "step": 2449 }, { "epoch": 1.2105523291733598, "grad_norm": 1.2319732877340805, "learning_rate": 7.387483548077559e-06, "loss": 0.2462289184331894, "step": 2450 }, { "epoch": 1.211046583467194, "grad_norm": 1.381045021384348, "learning_rate": 7.379597587633998e-06, "loss": 0.29385364055633545, "step": 2451 }, { "epoch": 1.211540837761028, "grad_norm": 1.1902133906710186, "learning_rate": 7.371713376533642e-06, "loss": 0.25049760937690735, "step": 2452 }, { "epoch": 1.2120350920548622, "grad_norm": 1.267298470174844, "learning_rate": 7.363830920039887e-06, "loss": 0.2748974859714508, "step": 2453 }, { "epoch": 1.2125293463486964, "grad_norm": 1.2929931198793703, "learning_rate": 7.355950223414939e-06, "loss": 0.2707570791244507, "step": 2454 }, { "epoch": 1.2130236006425306, "grad_norm": 1.3328464163268134, "learning_rate": 7.3480712919198474e-06, "loss": 0.2864024043083191, "step": 2455 }, { "epoch": 1.2135178549363648, "grad_norm": 1.400259353784304, "learning_rate": 7.340194130814466e-06, "loss": 0.3181900680065155, "step": 2456 }, { "epoch": 1.214012109230199, "grad_norm": 1.2994892273470056, "learning_rate": 7.332318745357483e-06, "loss": 0.3022974729537964, "step": 2457 }, { "epoch": 1.2145063635240332, "grad_norm": 1.2350650698265369, "learning_rate": 7.324445140806387e-06, "loss": 0.2850461006164551, "step": 2458 }, { "epoch": 1.2150006178178674, "grad_norm": 1.0534315857750147, "learning_rate": 7.316573322417483e-06, "loss": 0.21958643198013306, "step": 2459 }, { "epoch": 1.2154948721117014, "grad_norm": 1.3531472648001939, "learning_rate": 7.3087032954458915e-06, "loss": 0.2517468333244324, "step": 2460 }, { "epoch": 1.2159891264055356, "grad_norm": 1.1714370722498957, "learning_rate": 7.300835065145526e-06, "loss": 0.26957637071609497, "step": 2461 }, { "epoch": 1.2164833806993698, "grad_norm": 1.2755586367674554, "learning_rate": 7.292968636769103e-06, "loss": 0.2699058949947357, "step": 2462 }, { "epoch": 1.216977634993204, "grad_norm": 1.2382912705778586, "learning_rate": 7.285104015568138e-06, "loss": 0.25076431035995483, "step": 2463 }, { "epoch": 1.2174718892870382, "grad_norm": 1.2104527847150177, "learning_rate": 7.277241206792944e-06, "loss": 0.24862724542617798, "step": 2464 }, { "epoch": 1.2179661435808724, "grad_norm": 1.3107261919810722, "learning_rate": 7.269380215692614e-06, "loss": 0.27427712082862854, "step": 2465 }, { "epoch": 1.2184603978747066, "grad_norm": 1.2946586839730188, "learning_rate": 7.261521047515041e-06, "loss": 0.24343061447143555, "step": 2466 }, { "epoch": 1.2189546521685406, "grad_norm": 1.1968860231182823, "learning_rate": 7.253663707506882e-06, "loss": 0.25482866168022156, "step": 2467 }, { "epoch": 1.2194489064623748, "grad_norm": 1.2806570256332481, "learning_rate": 7.2458082009135964e-06, "loss": 0.27699458599090576, "step": 2468 }, { "epoch": 1.219943160756209, "grad_norm": 1.3000686730507884, "learning_rate": 7.237954532979401e-06, "loss": 0.26576149463653564, "step": 2469 }, { "epoch": 1.2204374150500432, "grad_norm": 1.2984838025251157, "learning_rate": 7.230102708947298e-06, "loss": 0.287861168384552, "step": 2470 }, { "epoch": 1.2209316693438774, "grad_norm": 1.2911534198412806, "learning_rate": 7.2222527340590434e-06, "loss": 0.25484874844551086, "step": 2471 }, { "epoch": 1.2214259236377116, "grad_norm": 1.284847349415858, "learning_rate": 7.214404613555177e-06, "loss": 0.26371529698371887, "step": 2472 }, { "epoch": 1.2219201779315458, "grad_norm": 1.334957534550205, "learning_rate": 7.206558352674992e-06, "loss": 0.23692578077316284, "step": 2473 }, { "epoch": 1.22241443222538, "grad_norm": 1.2696744902236006, "learning_rate": 7.198713956656538e-06, "loss": 0.26369085907936096, "step": 2474 }, { "epoch": 1.2229086865192142, "grad_norm": 1.4374683516439322, "learning_rate": 7.1908714307366145e-06, "loss": 0.260580450296402, "step": 2475 }, { "epoch": 1.2234029408130482, "grad_norm": 1.280804641850837, "learning_rate": 7.1830307801507904e-06, "loss": 0.2693007290363312, "step": 2476 }, { "epoch": 1.2238971951068824, "grad_norm": 1.3429546136121409, "learning_rate": 7.1751920101333695e-06, "loss": 0.26629775762557983, "step": 2477 }, { "epoch": 1.2243914494007166, "grad_norm": 1.3999841706301799, "learning_rate": 7.167355125917399e-06, "loss": 0.2963234782218933, "step": 2478 }, { "epoch": 1.2248857036945509, "grad_norm": 1.2332551275962955, "learning_rate": 7.159520132734669e-06, "loss": 0.24415187537670135, "step": 2479 }, { "epoch": 1.225379957988385, "grad_norm": 1.3645078601677985, "learning_rate": 7.15168703581572e-06, "loss": 0.2941599190235138, "step": 2480 }, { "epoch": 1.2258742122822193, "grad_norm": 1.2551885597461083, "learning_rate": 7.1438558403898065e-06, "loss": 0.22807514667510986, "step": 2481 }, { "epoch": 1.2263684665760535, "grad_norm": 1.3774209397395383, "learning_rate": 7.136026551684923e-06, "loss": 0.28865426778793335, "step": 2482 }, { "epoch": 1.2268627208698875, "grad_norm": 1.3250195381886638, "learning_rate": 7.1281991749277945e-06, "loss": 0.3015780448913574, "step": 2483 }, { "epoch": 1.2273569751637217, "grad_norm": 1.30264219696165, "learning_rate": 7.12037371534386e-06, "loss": 0.2521517872810364, "step": 2484 }, { "epoch": 1.2278512294575559, "grad_norm": 1.520486974517902, "learning_rate": 7.1125501781572896e-06, "loss": 0.2904277443885803, "step": 2485 }, { "epoch": 1.22834548375139, "grad_norm": 1.2434155494713983, "learning_rate": 7.104728568590966e-06, "loss": 0.26172375679016113, "step": 2486 }, { "epoch": 1.2288397380452243, "grad_norm": 1.3588693705399504, "learning_rate": 7.096908891866483e-06, "loss": 0.23565448820590973, "step": 2487 }, { "epoch": 1.2293339923390585, "grad_norm": 1.276833588621656, "learning_rate": 7.0890911532041375e-06, "loss": 0.2550106644630432, "step": 2488 }, { "epoch": 1.2298282466328927, "grad_norm": 1.4167484141197517, "learning_rate": 7.08127535782295e-06, "loss": 0.3221823573112488, "step": 2489 }, { "epoch": 1.230322500926727, "grad_norm": 1.2657124525427264, "learning_rate": 7.073461510940631e-06, "loss": 0.26209163665771484, "step": 2490 }, { "epoch": 1.2308167552205609, "grad_norm": 1.3626305998908985, "learning_rate": 7.06564961777359e-06, "loss": 0.28635868430137634, "step": 2491 }, { "epoch": 1.231311009514395, "grad_norm": 1.417027138446056, "learning_rate": 7.0578396835369355e-06, "loss": 0.25630202889442444, "step": 2492 }, { "epoch": 1.2318052638082293, "grad_norm": 1.233621488661494, "learning_rate": 7.050031713444474e-06, "loss": 0.27345454692840576, "step": 2493 }, { "epoch": 1.2322995181020635, "grad_norm": 1.2592068756906736, "learning_rate": 7.042225712708692e-06, "loss": 0.2365841269493103, "step": 2494 }, { "epoch": 1.2327937723958977, "grad_norm": 1.730933189967813, "learning_rate": 7.03442168654076e-06, "loss": 0.2891104221343994, "step": 2495 }, { "epoch": 1.233288026689732, "grad_norm": 1.3811266669598459, "learning_rate": 7.026619640150534e-06, "loss": 0.2713435888290405, "step": 2496 }, { "epoch": 1.2337822809835661, "grad_norm": 1.3509192768016722, "learning_rate": 7.018819578746557e-06, "loss": 0.28552842140197754, "step": 2497 }, { "epoch": 1.2342765352774, "grad_norm": 1.377186562637688, "learning_rate": 7.011021507536031e-06, "loss": 0.2731080949306488, "step": 2498 }, { "epoch": 1.2347707895712343, "grad_norm": 1.1800591795719682, "learning_rate": 7.003225431724841e-06, "loss": 0.27373206615448, "step": 2499 }, { "epoch": 1.2352650438650685, "grad_norm": 1.3197536250384188, "learning_rate": 6.99543135651753e-06, "loss": 0.24507245421409607, "step": 2500 }, { "epoch": 1.2357592981589027, "grad_norm": 1.2680812543691635, "learning_rate": 6.9876392871173205e-06, "loss": 0.2653801739215851, "step": 2501 }, { "epoch": 1.236253552452737, "grad_norm": 1.115227060544212, "learning_rate": 6.979849228726079e-06, "loss": 0.1929643303155899, "step": 2502 }, { "epoch": 1.2367478067465711, "grad_norm": 1.330653204132735, "learning_rate": 6.972061186544341e-06, "loss": 0.2684918940067291, "step": 2503 }, { "epoch": 1.2372420610404053, "grad_norm": 1.2129572179563677, "learning_rate": 6.964275165771288e-06, "loss": 0.23158729076385498, "step": 2504 }, { "epoch": 1.2377363153342396, "grad_norm": 1.3192284190451669, "learning_rate": 6.95649117160476e-06, "loss": 0.24757611751556396, "step": 2505 }, { "epoch": 1.2382305696280738, "grad_norm": 1.328208985585749, "learning_rate": 6.9487092092412425e-06, "loss": 0.2651844620704651, "step": 2506 }, { "epoch": 1.2387248239219077, "grad_norm": 1.3550284074069674, "learning_rate": 6.940929283875859e-06, "loss": 0.26745620369911194, "step": 2507 }, { "epoch": 1.239219078215742, "grad_norm": 1.2361002758783033, "learning_rate": 6.933151400702374e-06, "loss": 0.22088846564292908, "step": 2508 }, { "epoch": 1.2397133325095762, "grad_norm": 1.2379679284464757, "learning_rate": 6.925375564913193e-06, "loss": 0.2662886381149292, "step": 2509 }, { "epoch": 1.2402075868034104, "grad_norm": 1.3634625495618726, "learning_rate": 6.917601781699357e-06, "loss": 0.2691834270954132, "step": 2510 }, { "epoch": 1.2407018410972446, "grad_norm": 1.1575744185130052, "learning_rate": 6.909830056250527e-06, "loss": 0.2110689878463745, "step": 2511 }, { "epoch": 1.2411960953910788, "grad_norm": 1.2961548823459923, "learning_rate": 6.902060393755001e-06, "loss": 0.29281991720199585, "step": 2512 }, { "epoch": 1.2416903496849128, "grad_norm": 1.2724295845366205, "learning_rate": 6.894292799399688e-06, "loss": 0.27409040927886963, "step": 2513 }, { "epoch": 1.242184603978747, "grad_norm": 1.304980332058365, "learning_rate": 6.886527278370131e-06, "loss": 0.29440224170684814, "step": 2514 }, { "epoch": 1.2426788582725812, "grad_norm": 1.1224782958445216, "learning_rate": 6.878763835850475e-06, "loss": 0.23107948899269104, "step": 2515 }, { "epoch": 1.2431731125664154, "grad_norm": 1.55997556893969, "learning_rate": 6.871002477023488e-06, "loss": 0.2682652473449707, "step": 2516 }, { "epoch": 1.2436673668602496, "grad_norm": 1.2329698948831815, "learning_rate": 6.863243207070534e-06, "loss": 0.2935982644557953, "step": 2517 }, { "epoch": 1.2441616211540838, "grad_norm": 1.4373018605291157, "learning_rate": 6.855486031171597e-06, "loss": 0.29027625918388367, "step": 2518 }, { "epoch": 1.244655875447918, "grad_norm": 1.2739101669235458, "learning_rate": 6.84773095450526e-06, "loss": 0.25107353925704956, "step": 2519 }, { "epoch": 1.2451501297417522, "grad_norm": 1.2325888755211254, "learning_rate": 6.839977982248697e-06, "loss": 0.279231995344162, "step": 2520 }, { "epoch": 1.2456443840355864, "grad_norm": 1.2006221660421637, "learning_rate": 6.832227119577677e-06, "loss": 0.2544802129268646, "step": 2521 }, { "epoch": 1.2461386383294204, "grad_norm": 1.397981415575177, "learning_rate": 6.824478371666573e-06, "loss": 0.24365633726119995, "step": 2522 }, { "epoch": 1.2466328926232546, "grad_norm": 1.1393524200353975, "learning_rate": 6.816731743688336e-06, "loss": 0.2673290967941284, "step": 2523 }, { "epoch": 1.2471271469170888, "grad_norm": 1.284093438519867, "learning_rate": 6.808987240814504e-06, "loss": 0.23896455764770508, "step": 2524 }, { "epoch": 1.247621401210923, "grad_norm": 1.200000168994301, "learning_rate": 6.801244868215192e-06, "loss": 0.23196406662464142, "step": 2525 }, { "epoch": 1.2481156555047572, "grad_norm": 1.2289321548733863, "learning_rate": 6.793504631059106e-06, "loss": 0.24249708652496338, "step": 2526 }, { "epoch": 1.2486099097985914, "grad_norm": 1.1511217069627229, "learning_rate": 6.785766534513514e-06, "loss": 0.2366780787706375, "step": 2527 }, { "epoch": 1.2491041640924256, "grad_norm": 1.291146988373714, "learning_rate": 6.778030583744254e-06, "loss": 0.2615105211734772, "step": 2528 }, { "epoch": 1.2495984183862596, "grad_norm": 1.4688230831159943, "learning_rate": 6.770296783915738e-06, "loss": 0.29761314392089844, "step": 2529 }, { "epoch": 1.2500926726800938, "grad_norm": 1.2928438568936322, "learning_rate": 6.762565140190948e-06, "loss": 0.25020867586135864, "step": 2530 }, { "epoch": 1.250586926973928, "grad_norm": 1.3858962507108388, "learning_rate": 6.754835657731409e-06, "loss": 0.2716590166091919, "step": 2531 }, { "epoch": 1.2510811812677622, "grad_norm": 1.4048062063243787, "learning_rate": 6.747108341697221e-06, "loss": 0.27042001485824585, "step": 2532 }, { "epoch": 1.2515754355615964, "grad_norm": 1.3297085932201778, "learning_rate": 6.739383197247023e-06, "loss": 0.2659035325050354, "step": 2533 }, { "epoch": 1.2520696898554307, "grad_norm": 1.3945414928963702, "learning_rate": 6.731660229538014e-06, "loss": 0.2803581655025482, "step": 2534 }, { "epoch": 1.2525639441492649, "grad_norm": 1.1484885760506975, "learning_rate": 6.723939443725938e-06, "loss": 0.24422097206115723, "step": 2535 }, { "epoch": 1.253058198443099, "grad_norm": 1.5676789145324774, "learning_rate": 6.71622084496508e-06, "loss": 0.30003631114959717, "step": 2536 }, { "epoch": 1.2535524527369333, "grad_norm": 1.3207189074013763, "learning_rate": 6.708504438408265e-06, "loss": 0.25745317339897156, "step": 2537 }, { "epoch": 1.2540467070307673, "grad_norm": 1.3298790802481242, "learning_rate": 6.700790229206856e-06, "loss": 0.27648618817329407, "step": 2538 }, { "epoch": 1.2545409613246015, "grad_norm": 1.2910375745243117, "learning_rate": 6.6930782225107536e-06, "loss": 0.2579975724220276, "step": 2539 }, { "epoch": 1.2550352156184357, "grad_norm": 1.3321333943034437, "learning_rate": 6.68536842346838e-06, "loss": 0.2806825637817383, "step": 2540 }, { "epoch": 1.2555294699122699, "grad_norm": 1.5211080365897773, "learning_rate": 6.677660837226685e-06, "loss": 0.2641657888889313, "step": 2541 }, { "epoch": 1.256023724206104, "grad_norm": 1.3170844434659201, "learning_rate": 6.669955468931142e-06, "loss": 0.25483542680740356, "step": 2542 }, { "epoch": 1.2565179784999383, "grad_norm": 1.438596032878092, "learning_rate": 6.662252323725751e-06, "loss": 0.264334112405777, "step": 2543 }, { "epoch": 1.2570122327937723, "grad_norm": 1.2825942587632855, "learning_rate": 6.654551406753017e-06, "loss": 0.2541567385196686, "step": 2544 }, { "epoch": 1.2575064870876065, "grad_norm": 1.3007868833040497, "learning_rate": 6.646852723153965e-06, "loss": 0.2695424258708954, "step": 2545 }, { "epoch": 1.2580007413814407, "grad_norm": 1.2114763710946868, "learning_rate": 6.63915627806812e-06, "loss": 0.2694344222545624, "step": 2546 }, { "epoch": 1.258494995675275, "grad_norm": 1.3203626104751756, "learning_rate": 6.631462076633527e-06, "loss": 0.2695961892604828, "step": 2547 }, { "epoch": 1.258989249969109, "grad_norm": 1.43655166025842, "learning_rate": 6.623770123986719e-06, "loss": 0.26878753304481506, "step": 2548 }, { "epoch": 1.2594835042629433, "grad_norm": 1.4117532208090406, "learning_rate": 6.616080425262738e-06, "loss": 0.27568501234054565, "step": 2549 }, { "epoch": 1.2599777585567775, "grad_norm": 1.4407785281346286, "learning_rate": 6.608392985595111e-06, "loss": 0.2991989254951477, "step": 2550 }, { "epoch": 1.2604720128506117, "grad_norm": 1.2938769852574108, "learning_rate": 6.600707810115869e-06, "loss": 0.21832239627838135, "step": 2551 }, { "epoch": 1.260966267144446, "grad_norm": 1.3528768023288296, "learning_rate": 6.593024903955525e-06, "loss": 0.2671685516834259, "step": 2552 }, { "epoch": 1.26146052143828, "grad_norm": 1.142061359022944, "learning_rate": 6.585344272243073e-06, "loss": 0.23399557173252106, "step": 2553 }, { "epoch": 1.2619547757321141, "grad_norm": 1.3000899404630435, "learning_rate": 6.577665920105996e-06, "loss": 0.2701990008354187, "step": 2554 }, { "epoch": 1.2624490300259483, "grad_norm": 1.216581780326655, "learning_rate": 6.56998985267025e-06, "loss": 0.2679189145565033, "step": 2555 }, { "epoch": 1.2629432843197825, "grad_norm": 1.3457541131318878, "learning_rate": 6.562316075060272e-06, "loss": 0.2597065567970276, "step": 2556 }, { "epoch": 1.2634375386136167, "grad_norm": 1.3732680167208262, "learning_rate": 6.554644592398962e-06, "loss": 0.2942010462284088, "step": 2557 }, { "epoch": 1.263931792907451, "grad_norm": 1.2654921757837638, "learning_rate": 6.546975409807696e-06, "loss": 0.2547098994255066, "step": 2558 }, { "epoch": 1.264426047201285, "grad_norm": 1.29416806058113, "learning_rate": 6.539308532406306e-06, "loss": 0.2779114246368408, "step": 2559 }, { "epoch": 1.2649203014951191, "grad_norm": 1.2525651200835928, "learning_rate": 6.531643965313093e-06, "loss": 0.22318917512893677, "step": 2560 }, { "epoch": 1.2654145557889533, "grad_norm": 1.2931765026229116, "learning_rate": 6.523981713644814e-06, "loss": 0.25439128279685974, "step": 2561 }, { "epoch": 1.2659088100827876, "grad_norm": 1.1946536852540512, "learning_rate": 6.516321782516677e-06, "loss": 0.2317974865436554, "step": 2562 }, { "epoch": 1.2664030643766218, "grad_norm": 1.3517228291780166, "learning_rate": 6.508664177042339e-06, "loss": 0.273223876953125, "step": 2563 }, { "epoch": 1.266897318670456, "grad_norm": 1.3767500694886763, "learning_rate": 6.501008902333912e-06, "loss": 0.28408509492874146, "step": 2564 }, { "epoch": 1.2673915729642902, "grad_norm": 1.4378995512233899, "learning_rate": 6.493355963501951e-06, "loss": 0.2702238857746124, "step": 2565 }, { "epoch": 1.2678858272581244, "grad_norm": 1.2819637354130675, "learning_rate": 6.485705365655441e-06, "loss": 0.2142164558172226, "step": 2566 }, { "epoch": 1.2683800815519586, "grad_norm": 1.4108385899794438, "learning_rate": 6.478057113901817e-06, "loss": 0.2654300928115845, "step": 2567 }, { "epoch": 1.2688743358457928, "grad_norm": 1.1724627648861543, "learning_rate": 6.470411213346941e-06, "loss": 0.24601367115974426, "step": 2568 }, { "epoch": 1.2693685901396268, "grad_norm": 1.36613316910106, "learning_rate": 6.462767669095109e-06, "loss": 0.26201942563056946, "step": 2569 }, { "epoch": 1.269862844433461, "grad_norm": 1.342399065083916, "learning_rate": 6.455126486249038e-06, "loss": 0.2839587926864624, "step": 2570 }, { "epoch": 1.2703570987272952, "grad_norm": 1.2538564056049797, "learning_rate": 6.447487669909873e-06, "loss": 0.21100708842277527, "step": 2571 }, { "epoch": 1.2708513530211294, "grad_norm": 1.1457223195177177, "learning_rate": 6.439851225177185e-06, "loss": 0.2181582748889923, "step": 2572 }, { "epoch": 1.2713456073149636, "grad_norm": 1.397761306307691, "learning_rate": 6.432217157148948e-06, "loss": 0.29196488857269287, "step": 2573 }, { "epoch": 1.2718398616087978, "grad_norm": 1.3664440708479575, "learning_rate": 6.424585470921563e-06, "loss": 0.2365931123495102, "step": 2574 }, { "epoch": 1.2723341159026318, "grad_norm": 1.3496940412150429, "learning_rate": 6.4169561715898255e-06, "loss": 0.2277393937110901, "step": 2575 }, { "epoch": 1.272828370196466, "grad_norm": 1.3624051718280268, "learning_rate": 6.409329264246956e-06, "loss": 0.25285032391548157, "step": 2576 }, { "epoch": 1.2733226244903002, "grad_norm": 1.2632390853508073, "learning_rate": 6.401704753984563e-06, "loss": 0.253650963306427, "step": 2577 }, { "epoch": 1.2738168787841344, "grad_norm": 1.264245223392645, "learning_rate": 6.394082645892668e-06, "loss": 0.22143784165382385, "step": 2578 }, { "epoch": 1.2743111330779686, "grad_norm": 1.3283739907286298, "learning_rate": 6.3864629450596696e-06, "loss": 0.27591395378112793, "step": 2579 }, { "epoch": 1.2748053873718028, "grad_norm": 1.6236594986793635, "learning_rate": 6.37884565657238e-06, "loss": 0.32865333557128906, "step": 2580 }, { "epoch": 1.275299641665637, "grad_norm": 1.2172019661301716, "learning_rate": 6.371230785515992e-06, "loss": 0.2743702530860901, "step": 2581 }, { "epoch": 1.2757938959594712, "grad_norm": 1.2586352823219396, "learning_rate": 6.3636183369740845e-06, "loss": 0.23967956006526947, "step": 2582 }, { "epoch": 1.2762881502533054, "grad_norm": 1.206746025741565, "learning_rate": 6.356008316028614e-06, "loss": 0.2474803626537323, "step": 2583 }, { "epoch": 1.2767824045471394, "grad_norm": 1.2591134604976273, "learning_rate": 6.348400727759925e-06, "loss": 0.2523267865180969, "step": 2584 }, { "epoch": 1.2772766588409736, "grad_norm": 1.3690385191668641, "learning_rate": 6.340795577246738e-06, "loss": 0.2549436092376709, "step": 2585 }, { "epoch": 1.2777709131348078, "grad_norm": 1.309885921175695, "learning_rate": 6.333192869566138e-06, "loss": 0.2602443993091583, "step": 2586 }, { "epoch": 1.278265167428642, "grad_norm": 1.248955873440961, "learning_rate": 6.325592609793588e-06, "loss": 0.22912462055683136, "step": 2587 }, { "epoch": 1.2787594217224763, "grad_norm": 1.3253843576578603, "learning_rate": 6.317994803002907e-06, "loss": 0.3004158139228821, "step": 2588 }, { "epoch": 1.2792536760163105, "grad_norm": 1.2054603629919527, "learning_rate": 6.310399454266289e-06, "loss": 0.25851407647132874, "step": 2589 }, { "epoch": 1.2797479303101444, "grad_norm": 1.2857681683589963, "learning_rate": 6.302806568654277e-06, "loss": 0.24637526273727417, "step": 2590 }, { "epoch": 1.2802421846039787, "grad_norm": 1.2976312908550238, "learning_rate": 6.295216151235774e-06, "loss": 0.26500213146209717, "step": 2591 }, { "epoch": 1.2807364388978129, "grad_norm": 1.2103490895138174, "learning_rate": 6.287628207078031e-06, "loss": 0.24276241660118103, "step": 2592 }, { "epoch": 1.281230693191647, "grad_norm": 2.3839558822188787, "learning_rate": 6.280042741246655e-06, "loss": 0.27117204666137695, "step": 2593 }, { "epoch": 1.2817249474854813, "grad_norm": 1.4461368742366545, "learning_rate": 6.272459758805596e-06, "loss": 0.29287856817245483, "step": 2594 }, { "epoch": 1.2822192017793155, "grad_norm": 1.4301387064569637, "learning_rate": 6.26487926481714e-06, "loss": 0.3065788149833679, "step": 2595 }, { "epoch": 1.2827134560731497, "grad_norm": 1.3198078410588965, "learning_rate": 6.257301264341915e-06, "loss": 0.2738455533981323, "step": 2596 }, { "epoch": 1.283207710366984, "grad_norm": 1.5398007848288653, "learning_rate": 6.2497257624388915e-06, "loss": 0.24216318130493164, "step": 2597 }, { "epoch": 1.283701964660818, "grad_norm": 1.2565420891983292, "learning_rate": 6.242152764165368e-06, "loss": 0.276785671710968, "step": 2598 }, { "epoch": 1.2841962189546523, "grad_norm": 1.2307015932000853, "learning_rate": 6.234582274576961e-06, "loss": 0.24999365210533142, "step": 2599 }, { "epoch": 1.2846904732484863, "grad_norm": 1.2824145770644522, "learning_rate": 6.227014298727627e-06, "loss": 0.27714112401008606, "step": 2600 }, { "epoch": 1.2851847275423205, "grad_norm": 1.2260344372038856, "learning_rate": 6.219448841669639e-06, "loss": 0.2422318160533905, "step": 2601 }, { "epoch": 1.2856789818361547, "grad_norm": 1.3255802725159413, "learning_rate": 6.21188590845359e-06, "loss": 0.26688697934150696, "step": 2602 }, { "epoch": 1.286173236129989, "grad_norm": 1.2753676961687272, "learning_rate": 6.204325504128379e-06, "loss": 0.256889671087265, "step": 2603 }, { "epoch": 1.2866674904238231, "grad_norm": 1.3013140965176258, "learning_rate": 6.196767633741225e-06, "loss": 0.27372461557388306, "step": 2604 }, { "epoch": 1.287161744717657, "grad_norm": 1.3064762941978003, "learning_rate": 6.189212302337663e-06, "loss": 0.25194403529167175, "step": 2605 }, { "epoch": 1.2876559990114913, "grad_norm": 1.2533511197404907, "learning_rate": 6.181659514961515e-06, "loss": 0.24381688237190247, "step": 2606 }, { "epoch": 1.2881502533053255, "grad_norm": 1.2987400887924563, "learning_rate": 6.17410927665492e-06, "loss": 0.255805104970932, "step": 2607 }, { "epoch": 1.2886445075991597, "grad_norm": 1.270289405479379, "learning_rate": 6.166561592458307e-06, "loss": 0.25070682168006897, "step": 2608 }, { "epoch": 1.289138761892994, "grad_norm": 1.1954868388063873, "learning_rate": 6.159016467410397e-06, "loss": 0.24080060422420502, "step": 2609 }, { "epoch": 1.2896330161868281, "grad_norm": 1.3524298235557053, "learning_rate": 6.151473906548215e-06, "loss": 0.28041762113571167, "step": 2610 }, { "epoch": 1.2901272704806623, "grad_norm": 1.3891353799265191, "learning_rate": 6.143933914907065e-06, "loss": 0.2624273896217346, "step": 2611 }, { "epoch": 1.2906215247744965, "grad_norm": 1.3838932352032651, "learning_rate": 6.136396497520536e-06, "loss": 0.2658112049102783, "step": 2612 }, { "epoch": 1.2911157790683307, "grad_norm": 1.3103712430992434, "learning_rate": 6.1288616594205e-06, "loss": 0.27714237570762634, "step": 2613 }, { "epoch": 1.291610033362165, "grad_norm": 1.2276105048536776, "learning_rate": 6.121329405637111e-06, "loss": 0.23253153264522552, "step": 2614 }, { "epoch": 1.292104287655999, "grad_norm": 1.2168125400378236, "learning_rate": 6.1137997411987915e-06, "loss": 0.2438409924507141, "step": 2615 }, { "epoch": 1.2925985419498331, "grad_norm": 1.3814066274151728, "learning_rate": 6.106272671132236e-06, "loss": 0.24013856053352356, "step": 2616 }, { "epoch": 1.2930927962436674, "grad_norm": 1.4362282063831207, "learning_rate": 6.098748200462408e-06, "loss": 0.2850446403026581, "step": 2617 }, { "epoch": 1.2935870505375016, "grad_norm": 1.3403873033762816, "learning_rate": 6.0912263342125445e-06, "loss": 0.22195187211036682, "step": 2618 }, { "epoch": 1.2940813048313358, "grad_norm": 1.3701004376420556, "learning_rate": 6.083707077404129e-06, "loss": 0.29266390204429626, "step": 2619 }, { "epoch": 1.29457555912517, "grad_norm": 1.2103981171479565, "learning_rate": 6.076190435056913e-06, "loss": 0.26741352677345276, "step": 2620 }, { "epoch": 1.295069813419004, "grad_norm": 1.259544042020202, "learning_rate": 6.068676412188892e-06, "loss": 0.26014602184295654, "step": 2621 }, { "epoch": 1.2955640677128382, "grad_norm": 1.2871395012144142, "learning_rate": 6.061165013816333e-06, "loss": 0.2561393976211548, "step": 2622 }, { "epoch": 1.2960583220066724, "grad_norm": 1.312678751233067, "learning_rate": 6.053656244953728e-06, "loss": 0.2952851951122284, "step": 2623 }, { "epoch": 1.2965525763005066, "grad_norm": 1.2817239432203538, "learning_rate": 6.046150110613831e-06, "loss": 0.2830423414707184, "step": 2624 }, { "epoch": 1.2970468305943408, "grad_norm": 1.2514529269380406, "learning_rate": 6.038646615807622e-06, "loss": 0.22306497395038605, "step": 2625 }, { "epoch": 1.297541084888175, "grad_norm": 1.3018072981213034, "learning_rate": 6.031145765544333e-06, "loss": 0.23291784524917603, "step": 2626 }, { "epoch": 1.2980353391820092, "grad_norm": 1.3763927806121403, "learning_rate": 6.023647564831425e-06, "loss": 0.2376563400030136, "step": 2627 }, { "epoch": 1.2985295934758434, "grad_norm": 1.3283544756021872, "learning_rate": 6.016152018674588e-06, "loss": 0.2873516380786896, "step": 2628 }, { "epoch": 1.2990238477696776, "grad_norm": 1.2475849952661122, "learning_rate": 6.00865913207774e-06, "loss": 0.2416999638080597, "step": 2629 }, { "epoch": 1.2995181020635118, "grad_norm": 1.2254304075146119, "learning_rate": 6.001168910043023e-06, "loss": 0.2627726197242737, "step": 2630 }, { "epoch": 1.3000123563573458, "grad_norm": 1.4025542210635493, "learning_rate": 5.993681357570809e-06, "loss": 0.25375279784202576, "step": 2631 }, { "epoch": 1.30050661065118, "grad_norm": 1.3348797401747288, "learning_rate": 5.986196479659676e-06, "loss": 0.2853030562400818, "step": 2632 }, { "epoch": 1.3010008649450142, "grad_norm": 1.3089867713489467, "learning_rate": 5.978714281306425e-06, "loss": 0.2626519501209259, "step": 2633 }, { "epoch": 1.3014951192388484, "grad_norm": 1.4566011034207051, "learning_rate": 5.971234767506057e-06, "loss": 0.2895713448524475, "step": 2634 }, { "epoch": 1.3019893735326826, "grad_norm": 1.2504104998957544, "learning_rate": 5.9637579432518e-06, "loss": 0.24617832899093628, "step": 2635 }, { "epoch": 1.3024836278265166, "grad_norm": 1.2199824881911456, "learning_rate": 5.956283813535066e-06, "loss": 0.25497785210609436, "step": 2636 }, { "epoch": 1.3029778821203508, "grad_norm": 1.3200409304272294, "learning_rate": 5.948812383345484e-06, "loss": 0.25832462310791016, "step": 2637 }, { "epoch": 1.303472136414185, "grad_norm": 1.214232538768618, "learning_rate": 5.941343657670866e-06, "loss": 0.24273909628391266, "step": 2638 }, { "epoch": 1.3039663907080192, "grad_norm": 1.2844572342866962, "learning_rate": 5.933877641497232e-06, "loss": 0.2668009400367737, "step": 2639 }, { "epoch": 1.3044606450018534, "grad_norm": 1.2388896928667246, "learning_rate": 5.92641433980879e-06, "loss": 0.2519373595714569, "step": 2640 }, { "epoch": 1.3049548992956876, "grad_norm": 1.3760811135868023, "learning_rate": 5.918953757587928e-06, "loss": 0.30091768503189087, "step": 2641 }, { "epoch": 1.3054491535895218, "grad_norm": 1.207587317973019, "learning_rate": 5.911495899815225e-06, "loss": 0.2504241466522217, "step": 2642 }, { "epoch": 1.305943407883356, "grad_norm": 1.1902656490822856, "learning_rate": 5.904040771469444e-06, "loss": 0.24741190671920776, "step": 2643 }, { "epoch": 1.3064376621771903, "grad_norm": 1.2559463008488698, "learning_rate": 5.896588377527519e-06, "loss": 0.2636350691318512, "step": 2644 }, { "epoch": 1.3069319164710245, "grad_norm": 1.1981836589630794, "learning_rate": 5.889138722964563e-06, "loss": 0.22512421011924744, "step": 2645 }, { "epoch": 1.3074261707648585, "grad_norm": 1.2451009493990417, "learning_rate": 5.8816918127538546e-06, "loss": 0.26447975635528564, "step": 2646 }, { "epoch": 1.3079204250586927, "grad_norm": 1.1839899877527418, "learning_rate": 5.874247651866853e-06, "loss": 0.22084996104240417, "step": 2647 }, { "epoch": 1.3084146793525269, "grad_norm": 1.2774730254159221, "learning_rate": 5.8668062452731715e-06, "loss": 0.24033552408218384, "step": 2648 }, { "epoch": 1.308908933646361, "grad_norm": 1.305578072115893, "learning_rate": 5.8593675979405795e-06, "loss": 0.24829509854316711, "step": 2649 }, { "epoch": 1.3094031879401953, "grad_norm": 1.3651670637998603, "learning_rate": 5.851931714835016e-06, "loss": 0.29011303186416626, "step": 2650 }, { "epoch": 1.3098974422340295, "grad_norm": 1.3474001783390817, "learning_rate": 5.8444986009205754e-06, "loss": 0.273196280002594, "step": 2651 }, { "epoch": 1.3103916965278635, "grad_norm": 1.4905929158728624, "learning_rate": 5.837068261159491e-06, "loss": 0.28843480348587036, "step": 2652 }, { "epoch": 1.3108859508216977, "grad_norm": 1.297768951304802, "learning_rate": 5.829640700512159e-06, "loss": 0.25919461250305176, "step": 2653 }, { "epoch": 1.3113802051155319, "grad_norm": 1.4662917967499176, "learning_rate": 5.822215923937105e-06, "loss": 0.24588480591773987, "step": 2654 }, { "epoch": 1.311874459409366, "grad_norm": 1.4533199098003418, "learning_rate": 5.814793936391001e-06, "loss": 0.26138943433761597, "step": 2655 }, { "epoch": 1.3123687137032003, "grad_norm": 1.3892499103405112, "learning_rate": 5.807374742828675e-06, "loss": 0.2740943729877472, "step": 2656 }, { "epoch": 1.3128629679970345, "grad_norm": 1.2689667055719156, "learning_rate": 5.7999583482030605e-06, "loss": 0.2307349294424057, "step": 2657 }, { "epoch": 1.3133572222908687, "grad_norm": 1.4143938245126972, "learning_rate": 5.792544757465242e-06, "loss": 0.28424161672592163, "step": 2658 }, { "epoch": 1.313851476584703, "grad_norm": 1.1640800807114133, "learning_rate": 5.785133975564426e-06, "loss": 0.2586106061935425, "step": 2659 }, { "epoch": 1.3143457308785371, "grad_norm": 1.3677717868907802, "learning_rate": 5.7777260074479455e-06, "loss": 0.23268333077430725, "step": 2660 }, { "epoch": 1.314839985172371, "grad_norm": 1.3455357811423037, "learning_rate": 5.770320858061254e-06, "loss": 0.22144779562950134, "step": 2661 }, { "epoch": 1.3153342394662053, "grad_norm": 1.1539678539958322, "learning_rate": 5.762918532347925e-06, "loss": 0.2450334131717682, "step": 2662 }, { "epoch": 1.3158284937600395, "grad_norm": 1.3018328480210146, "learning_rate": 5.7555190352496375e-06, "loss": 0.24483400583267212, "step": 2663 }, { "epoch": 1.3163227480538737, "grad_norm": 1.484825672376601, "learning_rate": 5.748122371706198e-06, "loss": 0.2590720057487488, "step": 2664 }, { "epoch": 1.316817002347708, "grad_norm": 1.5800261617865896, "learning_rate": 5.740728546655515e-06, "loss": 0.27116847038269043, "step": 2665 }, { "epoch": 1.3173112566415421, "grad_norm": 1.3133697295364004, "learning_rate": 5.733337565033595e-06, "loss": 0.2720273435115814, "step": 2666 }, { "epoch": 1.3178055109353761, "grad_norm": 1.223977156924706, "learning_rate": 5.7259494317745514e-06, "loss": 0.22150173783302307, "step": 2667 }, { "epoch": 1.3182997652292103, "grad_norm": 1.2771960781536442, "learning_rate": 5.718564151810597e-06, "loss": 0.27474984526634216, "step": 2668 }, { "epoch": 1.3187940195230445, "grad_norm": 1.3886425317966573, "learning_rate": 5.711181730072044e-06, "loss": 0.2547265291213989, "step": 2669 }, { "epoch": 1.3192882738168787, "grad_norm": 1.3822455459704068, "learning_rate": 5.703802171487286e-06, "loss": 0.2686036229133606, "step": 2670 }, { "epoch": 1.319782528110713, "grad_norm": 1.2588694556349689, "learning_rate": 5.696425480982814e-06, "loss": 0.2276458591222763, "step": 2671 }, { "epoch": 1.3202767824045472, "grad_norm": 1.3366284026803796, "learning_rate": 5.6890516634832e-06, "loss": 0.25005075335502625, "step": 2672 }, { "epoch": 1.3207710366983814, "grad_norm": 1.4092724528348008, "learning_rate": 5.681680723911104e-06, "loss": 0.25919869542121887, "step": 2673 }, { "epoch": 1.3212652909922156, "grad_norm": 1.3254224112633677, "learning_rate": 5.6743126671872505e-06, "loss": 0.2684757709503174, "step": 2674 }, { "epoch": 1.3217595452860498, "grad_norm": 1.2529305606098464, "learning_rate": 5.666947498230451e-06, "loss": 0.2554991543292999, "step": 2675 }, { "epoch": 1.322253799579884, "grad_norm": 1.3734571061597927, "learning_rate": 5.6595852219575975e-06, "loss": 0.27026665210723877, "step": 2676 }, { "epoch": 1.322748053873718, "grad_norm": 1.5029882994051502, "learning_rate": 5.652225843283629e-06, "loss": 0.3248092234134674, "step": 2677 }, { "epoch": 1.3232423081675522, "grad_norm": 1.3299866785479277, "learning_rate": 5.644869367121564e-06, "loss": 0.2554503083229065, "step": 2678 }, { "epoch": 1.3237365624613864, "grad_norm": 1.4099986774485116, "learning_rate": 5.637515798382488e-06, "loss": 0.25482693314552307, "step": 2679 }, { "epoch": 1.3242308167552206, "grad_norm": 1.268292092612611, "learning_rate": 5.630165141975523e-06, "loss": 0.24664446711540222, "step": 2680 }, { "epoch": 1.3247250710490548, "grad_norm": 1.3674712589344702, "learning_rate": 5.622817402807879e-06, "loss": 0.23855865001678467, "step": 2681 }, { "epoch": 1.325219325342889, "grad_norm": 1.28659959156705, "learning_rate": 5.615472585784796e-06, "loss": 0.2847699820995331, "step": 2682 }, { "epoch": 1.325713579636723, "grad_norm": 1.3902791844570088, "learning_rate": 5.608130695809564e-06, "loss": 0.2705647051334381, "step": 2683 }, { "epoch": 1.3262078339305572, "grad_norm": 1.3726972299660716, "learning_rate": 5.600791737783523e-06, "loss": 0.30135318636894226, "step": 2684 }, { "epoch": 1.3267020882243914, "grad_norm": 1.3006770767718296, "learning_rate": 5.593455716606069e-06, "loss": 0.261536180973053, "step": 2685 }, { "epoch": 1.3271963425182256, "grad_norm": 1.2249107195075626, "learning_rate": 5.586122637174614e-06, "loss": 0.24006187915802002, "step": 2686 }, { "epoch": 1.3276905968120598, "grad_norm": 1.2887498899635654, "learning_rate": 5.578792504384618e-06, "loss": 0.27928346395492554, "step": 2687 }, { "epoch": 1.328184851105894, "grad_norm": 1.1715759673643904, "learning_rate": 5.5714653231295745e-06, "loss": 0.24134980142116547, "step": 2688 }, { "epoch": 1.3286791053997282, "grad_norm": 1.2633540397916776, "learning_rate": 5.5641410983010055e-06, "loss": 0.27914801239967346, "step": 2689 }, { "epoch": 1.3291733596935624, "grad_norm": 1.6348254119913803, "learning_rate": 5.55681983478846e-06, "loss": 0.2735476493835449, "step": 2690 }, { "epoch": 1.3296676139873966, "grad_norm": 1.3384777828423575, "learning_rate": 5.549501537479511e-06, "loss": 0.24919739365577698, "step": 2691 }, { "epoch": 1.3301618682812306, "grad_norm": 1.430948519009228, "learning_rate": 5.542186211259737e-06, "loss": 0.25435787439346313, "step": 2692 }, { "epoch": 1.3306561225750648, "grad_norm": 1.2533415908145504, "learning_rate": 5.534873861012763e-06, "loss": 0.2502862811088562, "step": 2693 }, { "epoch": 1.331150376868899, "grad_norm": 1.5771700033159861, "learning_rate": 5.527564491620195e-06, "loss": 0.25752580165863037, "step": 2694 }, { "epoch": 1.3316446311627332, "grad_norm": 1.332305251527839, "learning_rate": 5.520258107961671e-06, "loss": 0.22301846742630005, "step": 2695 }, { "epoch": 1.3321388854565674, "grad_norm": 1.4890781870784164, "learning_rate": 5.512954714914825e-06, "loss": 0.24581964313983917, "step": 2696 }, { "epoch": 1.3326331397504017, "grad_norm": 1.3113609641171107, "learning_rate": 5.5056543173553e-06, "loss": 0.271970272064209, "step": 2697 }, { "epoch": 1.3331273940442356, "grad_norm": 1.2432947451070444, "learning_rate": 5.498356920156735e-06, "loss": 0.23041053116321564, "step": 2698 }, { "epoch": 1.3336216483380698, "grad_norm": 1.3239879393507852, "learning_rate": 5.491062528190775e-06, "loss": 0.2338491678237915, "step": 2699 }, { "epoch": 1.334115902631904, "grad_norm": 1.3971989589857847, "learning_rate": 5.483771146327037e-06, "loss": 0.2667239010334015, "step": 2700 }, { "epoch": 1.3346101569257383, "grad_norm": 1.1737606299055239, "learning_rate": 5.4764827794331586e-06, "loss": 0.24761441349983215, "step": 2701 }, { "epoch": 1.3351044112195725, "grad_norm": 1.2384835240862428, "learning_rate": 5.469197432374747e-06, "loss": 0.24087639153003693, "step": 2702 }, { "epoch": 1.3355986655134067, "grad_norm": 1.3287799587341789, "learning_rate": 5.461915110015386e-06, "loss": 0.26774898171424866, "step": 2703 }, { "epoch": 1.3360929198072409, "grad_norm": 1.432719946516567, "learning_rate": 5.454635817216658e-06, "loss": 0.2820417284965515, "step": 2704 }, { "epoch": 1.336587174101075, "grad_norm": 1.408646831955897, "learning_rate": 5.447359558838113e-06, "loss": 0.2891086935997009, "step": 2705 }, { "epoch": 1.3370814283949093, "grad_norm": 1.370327694474157, "learning_rate": 5.440086339737277e-06, "loss": 0.24551361799240112, "step": 2706 }, { "epoch": 1.3375756826887435, "grad_norm": 1.3889596017030068, "learning_rate": 5.432816164769648e-06, "loss": 0.2293522208929062, "step": 2707 }, { "epoch": 1.3380699369825775, "grad_norm": 1.255610549812546, "learning_rate": 5.425549038788693e-06, "loss": 0.22325105965137482, "step": 2708 }, { "epoch": 1.3385641912764117, "grad_norm": 1.3152207031427636, "learning_rate": 5.4182849666458315e-06, "loss": 0.2263861447572708, "step": 2709 }, { "epoch": 1.339058445570246, "grad_norm": 1.2663328789435477, "learning_rate": 5.411023953190466e-06, "loss": 0.26902303099632263, "step": 2710 }, { "epoch": 1.33955269986408, "grad_norm": 1.4136099878472004, "learning_rate": 5.403766003269944e-06, "loss": 0.26154825091362, "step": 2711 }, { "epoch": 1.3400469541579143, "grad_norm": 1.32960722740892, "learning_rate": 5.396511121729562e-06, "loss": 0.2878270745277405, "step": 2712 }, { "epoch": 1.3405412084517483, "grad_norm": 1.3136699200223048, "learning_rate": 5.389259313412581e-06, "loss": 0.26206687092781067, "step": 2713 }, { "epoch": 1.3410354627455825, "grad_norm": 1.4998302342686003, "learning_rate": 5.382010583160201e-06, "loss": 0.25612518191337585, "step": 2714 }, { "epoch": 1.3415297170394167, "grad_norm": 1.2688327982594605, "learning_rate": 5.374764935811574e-06, "loss": 0.25600868463516235, "step": 2715 }, { "epoch": 1.342023971333251, "grad_norm": 1.274882827976935, "learning_rate": 5.367522376203787e-06, "loss": 0.24837616086006165, "step": 2716 }, { "epoch": 1.3425182256270851, "grad_norm": 1.2814047275641038, "learning_rate": 5.360282909171875e-06, "loss": 0.23487885296344757, "step": 2717 }, { "epoch": 1.3430124799209193, "grad_norm": 1.2024219184737237, "learning_rate": 5.353046539548797e-06, "loss": 0.22786842286586761, "step": 2718 }, { "epoch": 1.3435067342147535, "grad_norm": 1.288373437821988, "learning_rate": 5.3458132721654564e-06, "loss": 0.2198137640953064, "step": 2719 }, { "epoch": 1.3440009885085877, "grad_norm": 1.157338464361865, "learning_rate": 5.338583111850671e-06, "loss": 0.20056495070457458, "step": 2720 }, { "epoch": 1.344495242802422, "grad_norm": 1.2341328448147324, "learning_rate": 5.331356063431195e-06, "loss": 0.21636295318603516, "step": 2721 }, { "epoch": 1.3449894970962561, "grad_norm": 1.2390666617057948, "learning_rate": 5.32413213173171e-06, "loss": 0.23933230340480804, "step": 2722 }, { "epoch": 1.3454837513900901, "grad_norm": 1.3024836233276083, "learning_rate": 5.316911321574799e-06, "loss": 0.2402106523513794, "step": 2723 }, { "epoch": 1.3459780056839243, "grad_norm": 1.252933113923405, "learning_rate": 5.309693637780979e-06, "loss": 0.22524669766426086, "step": 2724 }, { "epoch": 1.3464722599777585, "grad_norm": 1.3140972939485838, "learning_rate": 5.302479085168668e-06, "loss": 0.25381600856781006, "step": 2725 }, { "epoch": 1.3469665142715928, "grad_norm": 1.2857997911307526, "learning_rate": 5.295267668554202e-06, "loss": 0.2614738643169403, "step": 2726 }, { "epoch": 1.347460768565427, "grad_norm": 8.575818718402259, "learning_rate": 5.288059392751817e-06, "loss": 0.2701472043991089, "step": 2727 }, { "epoch": 1.3479550228592612, "grad_norm": 1.378318405059408, "learning_rate": 5.280854262573661e-06, "loss": 0.2788996696472168, "step": 2728 }, { "epoch": 1.3484492771530951, "grad_norm": 1.2759693341337726, "learning_rate": 5.273652282829764e-06, "loss": 0.2419927418231964, "step": 2729 }, { "epoch": 1.3489435314469294, "grad_norm": 1.4943656047554885, "learning_rate": 5.266453458328071e-06, "loss": 0.26454097032546997, "step": 2730 }, { "epoch": 1.3494377857407636, "grad_norm": 1.3109211241308218, "learning_rate": 5.259257793874421e-06, "loss": 0.24090510606765747, "step": 2731 }, { "epoch": 1.3499320400345978, "grad_norm": 1.3390086912520884, "learning_rate": 5.252065294272528e-06, "loss": 0.27343428134918213, "step": 2732 }, { "epoch": 1.350426294328432, "grad_norm": 1.3272957509132868, "learning_rate": 5.244875964324005e-06, "loss": 0.2623448967933655, "step": 2733 }, { "epoch": 1.3509205486222662, "grad_norm": 1.2273005978142049, "learning_rate": 5.237689808828346e-06, "loss": 0.22721052169799805, "step": 2734 }, { "epoch": 1.3514148029161004, "grad_norm": 1.4111267721919942, "learning_rate": 5.230506832582924e-06, "loss": 0.26385387778282166, "step": 2735 }, { "epoch": 1.3519090572099346, "grad_norm": 1.4309565613654673, "learning_rate": 5.223327040382995e-06, "loss": 0.2679533064365387, "step": 2736 }, { "epoch": 1.3524033115037688, "grad_norm": 1.285385576934023, "learning_rate": 5.2161504370216855e-06, "loss": 0.25042447447776794, "step": 2737 }, { "epoch": 1.3528975657976028, "grad_norm": 1.3420398780717075, "learning_rate": 5.2089770272899845e-06, "loss": 0.22735297679901123, "step": 2738 }, { "epoch": 1.353391820091437, "grad_norm": 1.2715261749804811, "learning_rate": 5.201806815976772e-06, "loss": 0.25517284870147705, "step": 2739 }, { "epoch": 1.3538860743852712, "grad_norm": 1.4834789867138143, "learning_rate": 5.194639807868767e-06, "loss": 0.2942652702331543, "step": 2740 }, { "epoch": 1.3543803286791054, "grad_norm": 1.2535180106339032, "learning_rate": 5.187476007750567e-06, "loss": 0.2605661153793335, "step": 2741 }, { "epoch": 1.3548745829729396, "grad_norm": 1.34702814682356, "learning_rate": 5.1803154204046215e-06, "loss": 0.22976648807525635, "step": 2742 }, { "epoch": 1.3553688372667738, "grad_norm": 1.2786328684416228, "learning_rate": 5.173158050611236e-06, "loss": 0.24301470816135406, "step": 2743 }, { "epoch": 1.3558630915606078, "grad_norm": 1.3509518199555386, "learning_rate": 5.166003903148568e-06, "loss": 0.2714199125766754, "step": 2744 }, { "epoch": 1.356357345854442, "grad_norm": 1.4130809131188478, "learning_rate": 5.15885298279263e-06, "loss": 0.27004045248031616, "step": 2745 }, { "epoch": 1.3568516001482762, "grad_norm": 1.1866112739948385, "learning_rate": 5.151705294317262e-06, "loss": 0.2062053680419922, "step": 2746 }, { "epoch": 1.3573458544421104, "grad_norm": 1.3476275860643891, "learning_rate": 5.144560842494168e-06, "loss": 0.2589803636074066, "step": 2747 }, { "epoch": 1.3578401087359446, "grad_norm": 1.4207662826517113, "learning_rate": 5.137419632092886e-06, "loss": 0.26469242572784424, "step": 2748 }, { "epoch": 1.3583343630297788, "grad_norm": 1.217607994018294, "learning_rate": 5.130281667880774e-06, "loss": 0.26241326332092285, "step": 2749 }, { "epoch": 1.358828617323613, "grad_norm": 1.375829317891462, "learning_rate": 5.123146954623038e-06, "loss": 0.2674810290336609, "step": 2750 }, { "epoch": 1.3593228716174472, "grad_norm": 1.3872924823998294, "learning_rate": 5.116015497082719e-06, "loss": 0.23186063766479492, "step": 2751 }, { "epoch": 1.3598171259112815, "grad_norm": 1.3207469475464653, "learning_rate": 5.108887300020669e-06, "loss": 0.2794165313243866, "step": 2752 }, { "epoch": 1.3603113802051157, "grad_norm": 1.2682065300683938, "learning_rate": 5.1017623681955705e-06, "loss": 0.25263023376464844, "step": 2753 }, { "epoch": 1.3608056344989496, "grad_norm": 1.385223404499901, "learning_rate": 5.0946407063639315e-06, "loss": 0.2503500282764435, "step": 2754 }, { "epoch": 1.3612998887927839, "grad_norm": 1.1490078969357793, "learning_rate": 5.087522319280061e-06, "loss": 0.21871569752693176, "step": 2755 }, { "epoch": 1.361794143086618, "grad_norm": 1.3919853358310244, "learning_rate": 5.080407211696103e-06, "loss": 0.2790142893791199, "step": 2756 }, { "epoch": 1.3622883973804523, "grad_norm": 1.3837841689522787, "learning_rate": 5.073295388362003e-06, "loss": 0.27197304368019104, "step": 2757 }, { "epoch": 1.3627826516742865, "grad_norm": 1.3248855835987599, "learning_rate": 5.066186854025502e-06, "loss": 0.2402152568101883, "step": 2758 }, { "epoch": 1.3632769059681207, "grad_norm": 1.3193984824612894, "learning_rate": 5.059081613432162e-06, "loss": 0.24418887495994568, "step": 2759 }, { "epoch": 1.3637711602619547, "grad_norm": 1.1840901033348532, "learning_rate": 5.05197967132534e-06, "loss": 0.2239491045475006, "step": 2760 }, { "epoch": 1.3642654145557889, "grad_norm": 1.3401183348354848, "learning_rate": 5.044881032446192e-06, "loss": 0.25177091360092163, "step": 2761 }, { "epoch": 1.364759668849623, "grad_norm": 1.2524679914953787, "learning_rate": 5.0377857015336655e-06, "loss": 0.25462138652801514, "step": 2762 }, { "epoch": 1.3652539231434573, "grad_norm": 1.154660335850044, "learning_rate": 5.0306936833245034e-06, "loss": 0.21030092239379883, "step": 2763 }, { "epoch": 1.3657481774372915, "grad_norm": 1.2778480955324765, "learning_rate": 5.0236049825532355e-06, "loss": 0.24033348262310028, "step": 2764 }, { "epoch": 1.3662424317311257, "grad_norm": 1.2874693424331807, "learning_rate": 5.016519603952177e-06, "loss": 0.20803815126419067, "step": 2765 }, { "epoch": 1.36673668602496, "grad_norm": 1.3360777408248645, "learning_rate": 5.00943755225143e-06, "loss": 0.21589599549770355, "step": 2766 }, { "epoch": 1.367230940318794, "grad_norm": 1.3112690340132882, "learning_rate": 5.00235883217886e-06, "loss": 0.2690975069999695, "step": 2767 }, { "epoch": 1.3677251946126283, "grad_norm": 1.395793399890879, "learning_rate": 4.995283448460131e-06, "loss": 0.2368423044681549, "step": 2768 }, { "epoch": 1.3682194489064623, "grad_norm": 1.428306560095472, "learning_rate": 4.988211405818661e-06, "loss": 0.2801262140274048, "step": 2769 }, { "epoch": 1.3687137032002965, "grad_norm": 1.4209027545437471, "learning_rate": 4.981142708975647e-06, "loss": 0.2777586877346039, "step": 2770 }, { "epoch": 1.3692079574941307, "grad_norm": 1.1921679323806382, "learning_rate": 4.97407736265005e-06, "loss": 0.2400980144739151, "step": 2771 }, { "epoch": 1.369702211787965, "grad_norm": 1.233538906022963, "learning_rate": 4.967015371558592e-06, "loss": 0.2513861358165741, "step": 2772 }, { "epoch": 1.3701964660817991, "grad_norm": 1.2944813845771217, "learning_rate": 4.959956740415761e-06, "loss": 0.2785816490650177, "step": 2773 }, { "epoch": 1.3706907203756333, "grad_norm": 1.456856079389265, "learning_rate": 4.9529014739338e-06, "loss": 0.29092347621917725, "step": 2774 }, { "epoch": 1.3711849746694673, "grad_norm": 1.3133832748237033, "learning_rate": 4.945849576822693e-06, "loss": 0.27067384123802185, "step": 2775 }, { "epoch": 1.3716792289633015, "grad_norm": 1.3000530351478699, "learning_rate": 4.938801053790199e-06, "loss": 0.21500205993652344, "step": 2776 }, { "epoch": 1.3721734832571357, "grad_norm": 1.2838621226635265, "learning_rate": 4.931755909541808e-06, "loss": 0.2422936111688614, "step": 2777 }, { "epoch": 1.37266773755097, "grad_norm": 1.3694112071584477, "learning_rate": 4.9247141487807515e-06, "loss": 0.2760060727596283, "step": 2778 }, { "epoch": 1.3731619918448041, "grad_norm": 1.39746625445185, "learning_rate": 4.917675776208013e-06, "loss": 0.22626326978206635, "step": 2779 }, { "epoch": 1.3736562461386383, "grad_norm": 1.34096746485375, "learning_rate": 4.910640796522308e-06, "loss": 0.23023411631584167, "step": 2780 }, { "epoch": 1.3741505004324726, "grad_norm": 1.29137003736815, "learning_rate": 4.903609214420088e-06, "loss": 0.22157053649425507, "step": 2781 }, { "epoch": 1.3746447547263068, "grad_norm": 1.1801851543310786, "learning_rate": 4.89658103459554e-06, "loss": 0.24125584959983826, "step": 2782 }, { "epoch": 1.375139009020141, "grad_norm": 1.3517508821088553, "learning_rate": 4.889556261740578e-06, "loss": 0.26294079422950745, "step": 2783 }, { "epoch": 1.3756332633139752, "grad_norm": 1.2726719724151299, "learning_rate": 4.882534900544829e-06, "loss": 0.25327497720718384, "step": 2784 }, { "epoch": 1.3761275176078092, "grad_norm": 1.2868199846308948, "learning_rate": 4.875516955695663e-06, "loss": 0.2716723084449768, "step": 2785 }, { "epoch": 1.3766217719016434, "grad_norm": 1.4619117882899046, "learning_rate": 4.8685024318781615e-06, "loss": 0.2889532446861267, "step": 2786 }, { "epoch": 1.3771160261954776, "grad_norm": 1.2622088454697893, "learning_rate": 4.861491333775114e-06, "loss": 0.23743030428886414, "step": 2787 }, { "epoch": 1.3776102804893118, "grad_norm": 1.2912517641324606, "learning_rate": 4.8544836660670305e-06, "loss": 0.27180567383766174, "step": 2788 }, { "epoch": 1.378104534783146, "grad_norm": 1.3376004646586275, "learning_rate": 4.847479433432131e-06, "loss": 0.2549944221973419, "step": 2789 }, { "epoch": 1.37859878907698, "grad_norm": 1.189305404121555, "learning_rate": 4.8404786405463414e-06, "loss": 0.24112319946289062, "step": 2790 }, { "epoch": 1.3790930433708142, "grad_norm": 1.1833978049698726, "learning_rate": 4.833481292083291e-06, "loss": 0.22865869104862213, "step": 2791 }, { "epoch": 1.3795872976646484, "grad_norm": 1.268697923498799, "learning_rate": 4.82648739271431e-06, "loss": 0.24851003289222717, "step": 2792 }, { "epoch": 1.3800815519584826, "grad_norm": 1.2931223721765053, "learning_rate": 4.819496947108424e-06, "loss": 0.251456081867218, "step": 2793 }, { "epoch": 1.3805758062523168, "grad_norm": 1.4758961733623657, "learning_rate": 4.81250995993236e-06, "loss": 0.31711041927337646, "step": 2794 }, { "epoch": 1.381070060546151, "grad_norm": 1.3291779254725478, "learning_rate": 4.805526435850523e-06, "loss": 0.2204340100288391, "step": 2795 }, { "epoch": 1.3815643148399852, "grad_norm": 1.2784619373678463, "learning_rate": 4.798546379525013e-06, "loss": 0.26289406418800354, "step": 2796 }, { "epoch": 1.3820585691338194, "grad_norm": 1.28320111492484, "learning_rate": 4.7915697956156284e-06, "loss": 0.24830611050128937, "step": 2797 }, { "epoch": 1.3825528234276536, "grad_norm": 1.2879657785107324, "learning_rate": 4.784596688779825e-06, "loss": 0.24792183935642242, "step": 2798 }, { "epoch": 1.3830470777214878, "grad_norm": 1.2696074389245717, "learning_rate": 4.777627063672753e-06, "loss": 0.2689560651779175, "step": 2799 }, { "epoch": 1.3835413320153218, "grad_norm": 1.3225545388421776, "learning_rate": 4.770660924947238e-06, "loss": 0.24323254823684692, "step": 2800 }, { "epoch": 1.384035586309156, "grad_norm": 1.4076671335254063, "learning_rate": 4.7636982772537645e-06, "loss": 0.24404528737068176, "step": 2801 }, { "epoch": 1.3845298406029902, "grad_norm": 1.203765816908177, "learning_rate": 4.7567391252405075e-06, "loss": 0.23512448370456696, "step": 2802 }, { "epoch": 1.3850240948968244, "grad_norm": 1.5018331188451308, "learning_rate": 4.749783473553297e-06, "loss": 0.26446110010147095, "step": 2803 }, { "epoch": 1.3855183491906586, "grad_norm": 1.408580468005289, "learning_rate": 4.742831326835618e-06, "loss": 0.24630968272686005, "step": 2804 }, { "epoch": 1.3860126034844928, "grad_norm": 1.3358261514200123, "learning_rate": 4.735882689728628e-06, "loss": 0.253492146730423, "step": 2805 }, { "epoch": 1.3865068577783268, "grad_norm": 1.3501776737603972, "learning_rate": 4.7289375668711444e-06, "loss": 0.271090567111969, "step": 2806 }, { "epoch": 1.387001112072161, "grad_norm": 1.278147407656648, "learning_rate": 4.721995962899625e-06, "loss": 0.24045832455158234, "step": 2807 }, { "epoch": 1.3874953663659952, "grad_norm": 1.3482420589650876, "learning_rate": 4.715057882448187e-06, "loss": 0.2525935471057892, "step": 2808 }, { "epoch": 1.3879896206598294, "grad_norm": 1.6416013674407632, "learning_rate": 4.708123330148593e-06, "loss": 0.30852392315864563, "step": 2809 }, { "epoch": 1.3884838749536637, "grad_norm": 1.4379358472073636, "learning_rate": 4.701192310630253e-06, "loss": 0.2770250737667084, "step": 2810 }, { "epoch": 1.3889781292474979, "grad_norm": 1.3872314722590495, "learning_rate": 4.6942648285202154e-06, "loss": 0.29135680198669434, "step": 2811 }, { "epoch": 1.389472383541332, "grad_norm": 1.3561535153102244, "learning_rate": 4.687340888443171e-06, "loss": 0.26933860778808594, "step": 2812 }, { "epoch": 1.3899666378351663, "grad_norm": 1.3589820356083573, "learning_rate": 4.680420495021436e-06, "loss": 0.26089105010032654, "step": 2813 }, { "epoch": 1.3904608921290005, "grad_norm": 1.446680212777315, "learning_rate": 4.673503652874977e-06, "loss": 0.26031410694122314, "step": 2814 }, { "epoch": 1.3909551464228345, "grad_norm": 1.4223445911905375, "learning_rate": 4.6665903666213685e-06, "loss": 0.2887076139450073, "step": 2815 }, { "epoch": 1.3914494007166687, "grad_norm": 1.4125652827001185, "learning_rate": 4.6596806408758275e-06, "loss": 0.2360706925392151, "step": 2816 }, { "epoch": 1.3919436550105029, "grad_norm": 1.2857689419175287, "learning_rate": 4.652774480251186e-06, "loss": 0.22275522351264954, "step": 2817 }, { "epoch": 1.392437909304337, "grad_norm": 1.4433288432295395, "learning_rate": 4.645871889357899e-06, "loss": 0.2425977736711502, "step": 2818 }, { "epoch": 1.3929321635981713, "grad_norm": 1.3257241152583827, "learning_rate": 4.638972872804038e-06, "loss": 0.25219830870628357, "step": 2819 }, { "epoch": 1.3934264178920055, "grad_norm": 1.3749035761313395, "learning_rate": 4.6320774351952916e-06, "loss": 0.28060346841812134, "step": 2820 }, { "epoch": 1.3939206721858395, "grad_norm": 1.2003147708990263, "learning_rate": 4.625185581134942e-06, "loss": 0.2395240217447281, "step": 2821 }, { "epoch": 1.3944149264796737, "grad_norm": 1.1704641579429333, "learning_rate": 4.618297315223906e-06, "loss": 0.23622646927833557, "step": 2822 }, { "epoch": 1.394909180773508, "grad_norm": 1.2829625624138312, "learning_rate": 4.611412642060692e-06, "loss": 0.2189474105834961, "step": 2823 }, { "epoch": 1.395403435067342, "grad_norm": 1.433264639271618, "learning_rate": 4.6045315662414e-06, "loss": 0.266002357006073, "step": 2824 }, { "epoch": 1.3958976893611763, "grad_norm": 1.3252437693414834, "learning_rate": 4.5976540923597425e-06, "loss": 0.2402176856994629, "step": 2825 }, { "epoch": 1.3963919436550105, "grad_norm": 1.359969321526994, "learning_rate": 4.5907802250070235e-06, "loss": 0.2493474781513214, "step": 2826 }, { "epoch": 1.3968861979488447, "grad_norm": 1.41117190363675, "learning_rate": 4.583909968772137e-06, "loss": 0.25716543197631836, "step": 2827 }, { "epoch": 1.397380452242679, "grad_norm": 1.2726969842984424, "learning_rate": 4.57704332824157e-06, "loss": 0.29470473527908325, "step": 2828 }, { "epoch": 1.3978747065365131, "grad_norm": 1.3349562969336177, "learning_rate": 4.570180307999394e-06, "loss": 0.28095656633377075, "step": 2829 }, { "epoch": 1.3983689608303473, "grad_norm": 1.3296802970374444, "learning_rate": 4.563320912627256e-06, "loss": 0.2351825088262558, "step": 2830 }, { "epoch": 1.3988632151241813, "grad_norm": 1.378245480597285, "learning_rate": 4.556465146704399e-06, "loss": 0.25859856605529785, "step": 2831 }, { "epoch": 1.3993574694180155, "grad_norm": 1.3122509634402246, "learning_rate": 4.549613014807637e-06, "loss": 0.2503181993961334, "step": 2832 }, { "epoch": 1.3998517237118497, "grad_norm": 1.4164889794081637, "learning_rate": 4.542764521511345e-06, "loss": 0.26368820667266846, "step": 2833 }, { "epoch": 1.400345978005684, "grad_norm": 1.2584462742908673, "learning_rate": 4.535919671387483e-06, "loss": 0.24077676236629486, "step": 2834 }, { "epoch": 1.4008402322995182, "grad_norm": 1.3906309875331755, "learning_rate": 4.529078469005577e-06, "loss": 0.27042093873023987, "step": 2835 }, { "epoch": 1.4013344865933524, "grad_norm": 1.3047899471845867, "learning_rate": 4.5222409189327155e-06, "loss": 0.2731306552886963, "step": 2836 }, { "epoch": 1.4018287408871863, "grad_norm": 1.293016022457822, "learning_rate": 4.515407025733548e-06, "loss": 0.2925037741661072, "step": 2837 }, { "epoch": 1.4023229951810205, "grad_norm": 1.3019226114538747, "learning_rate": 4.508576793970285e-06, "loss": 0.2927025556564331, "step": 2838 }, { "epoch": 1.4028172494748548, "grad_norm": 1.2637397509173496, "learning_rate": 4.5017502282026926e-06, "loss": 0.26285338401794434, "step": 2839 }, { "epoch": 1.403311503768689, "grad_norm": 1.3147900807622677, "learning_rate": 4.49492733298809e-06, "loss": 0.22698873281478882, "step": 2840 }, { "epoch": 1.4038057580625232, "grad_norm": 1.3171706155487821, "learning_rate": 4.488108112881339e-06, "loss": 0.24116170406341553, "step": 2841 }, { "epoch": 1.4043000123563574, "grad_norm": 1.57472275672956, "learning_rate": 4.481292572434852e-06, "loss": 0.3211704194545746, "step": 2842 }, { "epoch": 1.4047942666501916, "grad_norm": 1.3631722904804857, "learning_rate": 4.474480716198598e-06, "loss": 0.26634523272514343, "step": 2843 }, { "epoch": 1.4052885209440258, "grad_norm": 1.2801660794508798, "learning_rate": 4.467672548720066e-06, "loss": 0.24751242995262146, "step": 2844 }, { "epoch": 1.40578277523786, "grad_norm": 1.2023997182117507, "learning_rate": 4.4608680745442915e-06, "loss": 0.22031354904174805, "step": 2845 }, { "epoch": 1.406277029531694, "grad_norm": 1.4549549871552898, "learning_rate": 4.454067298213847e-06, "loss": 0.2474634051322937, "step": 2846 }, { "epoch": 1.4067712838255282, "grad_norm": 1.2925543429398942, "learning_rate": 4.4472702242688315e-06, "loss": 0.2494845986366272, "step": 2847 }, { "epoch": 1.4072655381193624, "grad_norm": 1.246615378915442, "learning_rate": 4.440476857246876e-06, "loss": 0.23150494694709778, "step": 2848 }, { "epoch": 1.4077597924131966, "grad_norm": 1.3473585855048795, "learning_rate": 4.433687201683138e-06, "loss": 0.2093413770198822, "step": 2849 }, { "epoch": 1.4082540467070308, "grad_norm": 1.4247715723132508, "learning_rate": 4.426901262110287e-06, "loss": 0.26741865277290344, "step": 2850 }, { "epoch": 1.408748301000865, "grad_norm": 1.3965732526570211, "learning_rate": 4.420119043058521e-06, "loss": 0.2599044740200043, "step": 2851 }, { "epoch": 1.409242555294699, "grad_norm": 1.37695062225065, "learning_rate": 4.413340549055562e-06, "loss": 0.26934683322906494, "step": 2852 }, { "epoch": 1.4097368095885332, "grad_norm": 1.247550824996485, "learning_rate": 4.4065657846266255e-06, "loss": 0.2609720528125763, "step": 2853 }, { "epoch": 1.4102310638823674, "grad_norm": 1.3034094501092508, "learning_rate": 4.39979475429445e-06, "loss": 0.23431813716888428, "step": 2854 }, { "epoch": 1.4107253181762016, "grad_norm": 1.5127417165274348, "learning_rate": 4.39302746257928e-06, "loss": 0.2791878581047058, "step": 2855 }, { "epoch": 1.4112195724700358, "grad_norm": 1.445393105302077, "learning_rate": 4.386263913998862e-06, "loss": 0.30482247471809387, "step": 2856 }, { "epoch": 1.41171382676387, "grad_norm": 1.517774336378155, "learning_rate": 4.379504113068445e-06, "loss": 0.24561305344104767, "step": 2857 }, { "epoch": 1.4122080810577042, "grad_norm": 1.2686201180133903, "learning_rate": 4.372748064300777e-06, "loss": 0.23973286151885986, "step": 2858 }, { "epoch": 1.4127023353515384, "grad_norm": 1.2884315615066577, "learning_rate": 4.365995772206092e-06, "loss": 0.26788556575775146, "step": 2859 }, { "epoch": 1.4131965896453726, "grad_norm": 1.2479985472864645, "learning_rate": 4.359247241292136e-06, "loss": 0.22432288527488708, "step": 2860 }, { "epoch": 1.4136908439392069, "grad_norm": 1.4071442664764462, "learning_rate": 4.352502476064121e-06, "loss": 0.282687783241272, "step": 2861 }, { "epoch": 1.4141850982330408, "grad_norm": 1.350175603929749, "learning_rate": 4.345761481024761e-06, "loss": 0.2516692578792572, "step": 2862 }, { "epoch": 1.414679352526875, "grad_norm": 1.3813903906983658, "learning_rate": 4.3390242606742465e-06, "loss": 0.2473583221435547, "step": 2863 }, { "epoch": 1.4151736068207093, "grad_norm": 1.365125849897862, "learning_rate": 4.33229081951025e-06, "loss": 0.24372908473014832, "step": 2864 }, { "epoch": 1.4156678611145435, "grad_norm": 1.935117633937839, "learning_rate": 4.325561162027922e-06, "loss": 0.2877897024154663, "step": 2865 }, { "epoch": 1.4161621154083777, "grad_norm": 1.3789670558806315, "learning_rate": 4.318835292719886e-06, "loss": 0.2554720342159271, "step": 2866 }, { "epoch": 1.4166563697022119, "grad_norm": 1.400243578908533, "learning_rate": 4.312113216076228e-06, "loss": 0.26695260405540466, "step": 2867 }, { "epoch": 1.4171506239960459, "grad_norm": 1.310264039945657, "learning_rate": 4.305394936584522e-06, "loss": 0.26983851194381714, "step": 2868 }, { "epoch": 1.41764487828988, "grad_norm": 1.4664847959785403, "learning_rate": 4.298680458729793e-06, "loss": 0.303170382976532, "step": 2869 }, { "epoch": 1.4181391325837143, "grad_norm": 1.2870012899484584, "learning_rate": 4.2919697869945234e-06, "loss": 0.23217584192752838, "step": 2870 }, { "epoch": 1.4186333868775485, "grad_norm": 1.3723703910904035, "learning_rate": 4.285262925858663e-06, "loss": 0.2895517349243164, "step": 2871 }, { "epoch": 1.4191276411713827, "grad_norm": 1.3083324921698822, "learning_rate": 4.278559879799628e-06, "loss": 0.24025630950927734, "step": 2872 }, { "epoch": 1.4196218954652169, "grad_norm": 1.2827271091784578, "learning_rate": 4.271860653292263e-06, "loss": 0.22810839116573334, "step": 2873 }, { "epoch": 1.420116149759051, "grad_norm": 1.3806208017840322, "learning_rate": 4.26516525080888e-06, "loss": 0.266724169254303, "step": 2874 }, { "epoch": 1.4206104040528853, "grad_norm": 1.225057219675358, "learning_rate": 4.25847367681924e-06, "loss": 0.22618745267391205, "step": 2875 }, { "epoch": 1.4211046583467195, "grad_norm": 1.2369737958102245, "learning_rate": 4.251785935790529e-06, "loss": 0.2239789217710495, "step": 2876 }, { "epoch": 1.4215989126405535, "grad_norm": 1.4266723106614325, "learning_rate": 4.245102032187399e-06, "loss": 0.21519358456134796, "step": 2877 }, { "epoch": 1.4220931669343877, "grad_norm": 1.3543349519259755, "learning_rate": 4.2384219704719284e-06, "loss": 0.31226712465286255, "step": 2878 }, { "epoch": 1.422587421228222, "grad_norm": 1.56763311196269, "learning_rate": 4.231745755103625e-06, "loss": 0.26814836263656616, "step": 2879 }, { "epoch": 1.423081675522056, "grad_norm": 1.340943129837897, "learning_rate": 4.225073390539436e-06, "loss": 0.2369621843099594, "step": 2880 }, { "epoch": 1.4235759298158903, "grad_norm": 1.4174455321042607, "learning_rate": 4.218404881233737e-06, "loss": 0.2556746304035187, "step": 2881 }, { "epoch": 1.4240701841097245, "grad_norm": 1.4008574237374047, "learning_rate": 4.2117402316383314e-06, "loss": 0.25875598192214966, "step": 2882 }, { "epoch": 1.4245644384035585, "grad_norm": 1.3837412182941131, "learning_rate": 4.205079446202443e-06, "loss": 0.26839762926101685, "step": 2883 }, { "epoch": 1.4250586926973927, "grad_norm": 1.3404796422391116, "learning_rate": 4.198422529372717e-06, "loss": 0.2764383554458618, "step": 2884 }, { "epoch": 1.425552946991227, "grad_norm": 1.6233600341280843, "learning_rate": 4.191769485593216e-06, "loss": 0.24517112970352173, "step": 2885 }, { "epoch": 1.4260472012850611, "grad_norm": 1.2960278491651354, "learning_rate": 4.18512031930542e-06, "loss": 0.21880990266799927, "step": 2886 }, { "epoch": 1.4265414555788953, "grad_norm": 1.25547495232964, "learning_rate": 4.178475034948212e-06, "loss": 0.24671246111392975, "step": 2887 }, { "epoch": 1.4270357098727295, "grad_norm": 1.3321806455697769, "learning_rate": 4.171833636957886e-06, "loss": 0.25473371148109436, "step": 2888 }, { "epoch": 1.4275299641665637, "grad_norm": 1.2832708163920512, "learning_rate": 4.1651961297681574e-06, "loss": 0.2675618529319763, "step": 2889 }, { "epoch": 1.428024218460398, "grad_norm": 1.361777795281808, "learning_rate": 4.15856251781012e-06, "loss": 0.24357986450195312, "step": 2890 }, { "epoch": 1.4285184727542322, "grad_norm": 1.360475333723739, "learning_rate": 4.1519328055122825e-06, "loss": 0.2668409049510956, "step": 2891 }, { "epoch": 1.4290127270480664, "grad_norm": 1.237397304360782, "learning_rate": 4.145306997300543e-06, "loss": 0.24507637321949005, "step": 2892 }, { "epoch": 1.4295069813419004, "grad_norm": 1.366253286129835, "learning_rate": 4.1386850975982e-06, "loss": 0.2791709899902344, "step": 2893 }, { "epoch": 1.4300012356357346, "grad_norm": 1.2339989570889298, "learning_rate": 4.132067110825939e-06, "loss": 0.24982133507728577, "step": 2894 }, { "epoch": 1.4304954899295688, "grad_norm": 1.4357848897595227, "learning_rate": 4.125453041401835e-06, "loss": 0.2814679741859436, "step": 2895 }, { "epoch": 1.430989744223403, "grad_norm": 1.2447298736764703, "learning_rate": 4.118842893741336e-06, "loss": 0.22699782252311707, "step": 2896 }, { "epoch": 1.4314839985172372, "grad_norm": 1.9366220135779266, "learning_rate": 4.112236672257294e-06, "loss": 0.23297230899333954, "step": 2897 }, { "epoch": 1.4319782528110712, "grad_norm": 1.4169021772429402, "learning_rate": 4.1056343813599265e-06, "loss": 0.26085159182548523, "step": 2898 }, { "epoch": 1.4324725071049054, "grad_norm": 1.2947699028454482, "learning_rate": 4.0990360254568216e-06, "loss": 0.27813559770584106, "step": 2899 }, { "epoch": 1.4329667613987396, "grad_norm": 1.4648322974961994, "learning_rate": 4.092441608952953e-06, "loss": 0.2821611762046814, "step": 2900 }, { "epoch": 1.4334610156925738, "grad_norm": 1.4262304528738896, "learning_rate": 4.085851136250657e-06, "loss": 0.25223150849342346, "step": 2901 }, { "epoch": 1.433955269986408, "grad_norm": 1.2236760469459784, "learning_rate": 4.079264611749639e-06, "loss": 0.225361630320549, "step": 2902 }, { "epoch": 1.4344495242802422, "grad_norm": 1.2980114377261416, "learning_rate": 4.07268203984697e-06, "loss": 0.2564583420753479, "step": 2903 }, { "epoch": 1.4349437785740764, "grad_norm": 1.618238680371033, "learning_rate": 4.066103424937083e-06, "loss": 0.2433827817440033, "step": 2904 }, { "epoch": 1.4354380328679106, "grad_norm": 1.326779755851318, "learning_rate": 4.059528771411758e-06, "loss": 0.26073208451271057, "step": 2905 }, { "epoch": 1.4359322871617448, "grad_norm": 1.381783420476221, "learning_rate": 4.052958083660153e-06, "loss": 0.2937609553337097, "step": 2906 }, { "epoch": 1.436426541455579, "grad_norm": 1.2248682484343931, "learning_rate": 4.046391366068756e-06, "loss": 0.22026552259922028, "step": 2907 }, { "epoch": 1.436920795749413, "grad_norm": 1.2471555303405935, "learning_rate": 4.039828623021415e-06, "loss": 0.21137471497058868, "step": 2908 }, { "epoch": 1.4374150500432472, "grad_norm": 1.316365476590171, "learning_rate": 4.033269858899324e-06, "loss": 0.23597699403762817, "step": 2909 }, { "epoch": 1.4379093043370814, "grad_norm": 1.3166979356724768, "learning_rate": 4.026715078081023e-06, "loss": 0.2667025923728943, "step": 2910 }, { "epoch": 1.4384035586309156, "grad_norm": 1.2942746954451143, "learning_rate": 4.020164284942387e-06, "loss": 0.2789616584777832, "step": 2911 }, { "epoch": 1.4388978129247498, "grad_norm": 1.2105601579452838, "learning_rate": 4.013617483856637e-06, "loss": 0.23176617920398712, "step": 2912 }, { "epoch": 1.439392067218584, "grad_norm": 1.3989428986083243, "learning_rate": 4.007074679194313e-06, "loss": 0.2814248204231262, "step": 2913 }, { "epoch": 1.439886321512418, "grad_norm": 1.7399518805726892, "learning_rate": 4.000535875323307e-06, "loss": 0.26201730966567993, "step": 2914 }, { "epoch": 1.4403805758062522, "grad_norm": 1.3752450122135709, "learning_rate": 3.994001076608833e-06, "loss": 0.22517681121826172, "step": 2915 }, { "epoch": 1.4408748301000864, "grad_norm": 1.2576751634156127, "learning_rate": 3.9874702874134205e-06, "loss": 0.25220564007759094, "step": 2916 }, { "epoch": 1.4413690843939206, "grad_norm": 1.3128506030513347, "learning_rate": 3.980943512096934e-06, "loss": 0.23441332578659058, "step": 2917 }, { "epoch": 1.4418633386877548, "grad_norm": 1.1616125895518352, "learning_rate": 3.9744207550165625e-06, "loss": 0.21659764647483826, "step": 2918 }, { "epoch": 1.442357592981589, "grad_norm": 1.3726974417027011, "learning_rate": 3.967902020526797e-06, "loss": 0.21888667345046997, "step": 2919 }, { "epoch": 1.4428518472754233, "grad_norm": 2.445936326011648, "learning_rate": 3.961387312979454e-06, "loss": 0.2771157920360565, "step": 2920 }, { "epoch": 1.4433461015692575, "grad_norm": 1.312047281106489, "learning_rate": 3.9548766367236605e-06, "loss": 0.21376901865005493, "step": 2921 }, { "epoch": 1.4438403558630917, "grad_norm": 1.4472763394283668, "learning_rate": 3.948369996105849e-06, "loss": 0.2888128161430359, "step": 2922 }, { "epoch": 1.4443346101569257, "grad_norm": 1.327788891714265, "learning_rate": 3.941867395469761e-06, "loss": 0.27809786796569824, "step": 2923 }, { "epoch": 1.4448288644507599, "grad_norm": 1.377899507369851, "learning_rate": 3.935368839156443e-06, "loss": 0.2573625445365906, "step": 2924 }, { "epoch": 1.445323118744594, "grad_norm": 1.5375959387987326, "learning_rate": 3.928874331504232e-06, "loss": 0.21472841501235962, "step": 2925 }, { "epoch": 1.4458173730384283, "grad_norm": 1.2616393731465387, "learning_rate": 3.922383876848771e-06, "loss": 0.23214091360569, "step": 2926 }, { "epoch": 1.4463116273322625, "grad_norm": 1.2717196020996628, "learning_rate": 3.915897479522995e-06, "loss": 0.23830139636993408, "step": 2927 }, { "epoch": 1.4468058816260967, "grad_norm": 1.306053937449173, "learning_rate": 3.909415143857132e-06, "loss": 0.2519805431365967, "step": 2928 }, { "epoch": 1.4473001359199307, "grad_norm": 1.3548983452054761, "learning_rate": 3.9029368741786935e-06, "loss": 0.2191445231437683, "step": 2929 }, { "epoch": 1.4477943902137649, "grad_norm": 1.2448486288410623, "learning_rate": 3.896462674812482e-06, "loss": 0.2267228364944458, "step": 2930 }, { "epoch": 1.448288644507599, "grad_norm": 1.3302096442776044, "learning_rate": 3.88999255008058e-06, "loss": 0.26456522941589355, "step": 2931 }, { "epoch": 1.4487828988014333, "grad_norm": 1.3729869343228434, "learning_rate": 3.883526504302353e-06, "loss": 0.25602713227272034, "step": 2932 }, { "epoch": 1.4492771530952675, "grad_norm": 1.9847312680384686, "learning_rate": 3.877064541794435e-06, "loss": 0.2545332610607147, "step": 2933 }, { "epoch": 1.4497714073891017, "grad_norm": 1.3785644388388194, "learning_rate": 3.87060666687074e-06, "loss": 0.2846388816833496, "step": 2934 }, { "epoch": 1.450265661682936, "grad_norm": 1.4353094721790403, "learning_rate": 3.864152883842461e-06, "loss": 0.2686496376991272, "step": 2935 }, { "epoch": 1.4507599159767701, "grad_norm": 1.2943779410551872, "learning_rate": 3.857703197018044e-06, "loss": 0.2712322473526001, "step": 2936 }, { "epoch": 1.4512541702706043, "grad_norm": 1.3542096863749147, "learning_rate": 3.851257610703209e-06, "loss": 0.23492589592933655, "step": 2937 }, { "epoch": 1.4517484245644385, "grad_norm": 1.2747230322582852, "learning_rate": 3.84481612920094e-06, "loss": 0.274332731962204, "step": 2938 }, { "epoch": 1.4522426788582725, "grad_norm": 1.4107112786506069, "learning_rate": 3.838378756811475e-06, "loss": 0.250995010137558, "step": 2939 }, { "epoch": 1.4527369331521067, "grad_norm": 1.3749429977256393, "learning_rate": 3.831945497832313e-06, "loss": 0.25221261382102966, "step": 2940 }, { "epoch": 1.453231187445941, "grad_norm": 1.4826415922959744, "learning_rate": 3.825516356558211e-06, "loss": 0.2549906075000763, "step": 2941 }, { "epoch": 1.4537254417397751, "grad_norm": 1.296751596925164, "learning_rate": 3.819091337281158e-06, "loss": 0.2369248867034912, "step": 2942 }, { "epoch": 1.4542196960336093, "grad_norm": 1.3057816538242708, "learning_rate": 3.8126704442904182e-06, "loss": 0.23681433498859406, "step": 2943 }, { "epoch": 1.4547139503274436, "grad_norm": 1.237019268284654, "learning_rate": 3.806253681872486e-06, "loss": 0.24966523051261902, "step": 2944 }, { "epoch": 1.4552082046212775, "grad_norm": 1.4768369352256168, "learning_rate": 3.7998410543110954e-06, "loss": 0.28130626678466797, "step": 2945 }, { "epoch": 1.4557024589151117, "grad_norm": 1.3443210173277784, "learning_rate": 3.7934325658872275e-06, "loss": 0.2725732922554016, "step": 2946 }, { "epoch": 1.456196713208946, "grad_norm": 1.3345618379823432, "learning_rate": 3.7870282208790976e-06, "loss": 0.23695361614227295, "step": 2947 }, { "epoch": 1.4566909675027802, "grad_norm": 1.3094683367768178, "learning_rate": 3.780628023562154e-06, "loss": 0.2556610405445099, "step": 2948 }, { "epoch": 1.4571852217966144, "grad_norm": 1.29841880424943, "learning_rate": 3.7742319782090786e-06, "loss": 0.26012274622917175, "step": 2949 }, { "epoch": 1.4576794760904486, "grad_norm": 1.4612114957138427, "learning_rate": 3.7678400890897827e-06, "loss": 0.23788896203041077, "step": 2950 }, { "epoch": 1.4581737303842828, "grad_norm": 1.4390155766896275, "learning_rate": 3.7614523604713894e-06, "loss": 0.2927572727203369, "step": 2951 }, { "epoch": 1.458667984678117, "grad_norm": 1.2435143086118214, "learning_rate": 3.75506879661827e-06, "loss": 0.2254970222711563, "step": 2952 }, { "epoch": 1.4591622389719512, "grad_norm": 1.2816222898303182, "learning_rate": 3.7486894017919883e-06, "loss": 0.216854065656662, "step": 2953 }, { "epoch": 1.4596564932657852, "grad_norm": 1.1833481657982283, "learning_rate": 3.7423141802513417e-06, "loss": 0.2505137026309967, "step": 2954 }, { "epoch": 1.4601507475596194, "grad_norm": 1.2187582021965486, "learning_rate": 3.735943136252337e-06, "loss": 0.19780108332633972, "step": 2955 }, { "epoch": 1.4606450018534536, "grad_norm": 1.482633837182769, "learning_rate": 3.7295762740481923e-06, "loss": 0.26869216561317444, "step": 2956 }, { "epoch": 1.4611392561472878, "grad_norm": 1.4121232274028632, "learning_rate": 3.7232135978893336e-06, "loss": 0.28265517950057983, "step": 2957 }, { "epoch": 1.461633510441122, "grad_norm": 1.268342410891318, "learning_rate": 3.7168551120233965e-06, "loss": 0.2381918877363205, "step": 2958 }, { "epoch": 1.4621277647349562, "grad_norm": 1.3343795310746396, "learning_rate": 3.710500820695203e-06, "loss": 0.27194735407829285, "step": 2959 }, { "epoch": 1.4626220190287902, "grad_norm": 1.419071318428777, "learning_rate": 3.7041507281468e-06, "loss": 0.2611599266529083, "step": 2960 }, { "epoch": 1.4631162733226244, "grad_norm": 1.3417831313824735, "learning_rate": 3.697804838617418e-06, "loss": 0.2970972955226898, "step": 2961 }, { "epoch": 1.4636105276164586, "grad_norm": 1.3986503652920064, "learning_rate": 3.6914631563434743e-06, "loss": 0.24313557147979736, "step": 2962 }, { "epoch": 1.4641047819102928, "grad_norm": 1.21693161859368, "learning_rate": 3.685125685558587e-06, "loss": 0.23243792355060577, "step": 2963 }, { "epoch": 1.464599036204127, "grad_norm": 1.384655578733909, "learning_rate": 3.6787924304935696e-06, "loss": 0.2850711941719055, "step": 2964 }, { "epoch": 1.4650932904979612, "grad_norm": 1.2938153090671698, "learning_rate": 3.6724633953764023e-06, "loss": 0.26217392086982727, "step": 2965 }, { "epoch": 1.4655875447917954, "grad_norm": 1.3004956100522334, "learning_rate": 3.666138584432264e-06, "loss": 0.24623268842697144, "step": 2966 }, { "epoch": 1.4660817990856296, "grad_norm": 1.2765502382143128, "learning_rate": 3.6598180018835063e-06, "loss": 0.25010040402412415, "step": 2967 }, { "epoch": 1.4665760533794638, "grad_norm": 1.2806642930208934, "learning_rate": 3.6535016519496603e-06, "loss": 0.24471378326416016, "step": 2968 }, { "epoch": 1.467070307673298, "grad_norm": 1.4411992818002375, "learning_rate": 3.6471895388474323e-06, "loss": 0.2845621109008789, "step": 2969 }, { "epoch": 1.467564561967132, "grad_norm": 1.394997312403621, "learning_rate": 3.640881666790699e-06, "loss": 0.26768919825553894, "step": 2970 }, { "epoch": 1.4680588162609662, "grad_norm": 1.3707198305280583, "learning_rate": 3.6345780399904983e-06, "loss": 0.27386170625686646, "step": 2971 }, { "epoch": 1.4685530705548004, "grad_norm": 1.2413908046529407, "learning_rate": 3.628278662655055e-06, "loss": 0.259655237197876, "step": 2972 }, { "epoch": 1.4690473248486347, "grad_norm": 1.2328404027424946, "learning_rate": 3.6219835389897305e-06, "loss": 0.2234620749950409, "step": 2973 }, { "epoch": 1.4695415791424689, "grad_norm": 1.2170225214049992, "learning_rate": 3.6156926731970664e-06, "loss": 0.25133174657821655, "step": 2974 }, { "epoch": 1.4700358334363028, "grad_norm": 1.4753631122763826, "learning_rate": 3.609406069476752e-06, "loss": 0.2856005132198334, "step": 2975 }, { "epoch": 1.470530087730137, "grad_norm": 1.352763052735898, "learning_rate": 3.603123732025635e-06, "loss": 0.23760217428207397, "step": 2976 }, { "epoch": 1.4710243420239713, "grad_norm": 1.315945468844056, "learning_rate": 3.596845665037715e-06, "loss": 0.2344968169927597, "step": 2977 }, { "epoch": 1.4715185963178055, "grad_norm": 1.3513242562279373, "learning_rate": 3.5905718727041415e-06, "loss": 0.23936885595321655, "step": 2978 }, { "epoch": 1.4720128506116397, "grad_norm": 1.2281537442777626, "learning_rate": 3.584302359213204e-06, "loss": 0.24542436003684998, "step": 2979 }, { "epoch": 1.4725071049054739, "grad_norm": 1.2816242991916544, "learning_rate": 3.578037128750338e-06, "loss": 0.24754226207733154, "step": 2980 }, { "epoch": 1.473001359199308, "grad_norm": 1.3406109779820896, "learning_rate": 3.5717761854981335e-06, "loss": 0.25167495012283325, "step": 2981 }, { "epoch": 1.4734956134931423, "grad_norm": 1.2820406301810907, "learning_rate": 3.565519533636296e-06, "loss": 0.21352116763591766, "step": 2982 }, { "epoch": 1.4739898677869765, "grad_norm": 1.5800404779419173, "learning_rate": 3.5592671773416798e-06, "loss": 0.24721838533878326, "step": 2983 }, { "epoch": 1.4744841220808107, "grad_norm": 1.209332122723965, "learning_rate": 3.5530191207882705e-06, "loss": 0.2098400741815567, "step": 2984 }, { "epoch": 1.4749783763746447, "grad_norm": 1.4059961620340085, "learning_rate": 3.5467753681471784e-06, "loss": 0.27138370275497437, "step": 2985 }, { "epoch": 1.475472630668479, "grad_norm": 1.456553871591733, "learning_rate": 3.5405359235866468e-06, "loss": 0.2675255537033081, "step": 2986 }, { "epoch": 1.475966884962313, "grad_norm": 1.3852192514849078, "learning_rate": 3.5343007912720397e-06, "loss": 0.2927984893321991, "step": 2987 }, { "epoch": 1.4764611392561473, "grad_norm": 1.4840757807353469, "learning_rate": 3.5280699753658354e-06, "loss": 0.2897256910800934, "step": 2988 }, { "epoch": 1.4769553935499815, "grad_norm": 1.3162511876956198, "learning_rate": 3.521843480027646e-06, "loss": 0.25903570652008057, "step": 2989 }, { "epoch": 1.4774496478438157, "grad_norm": 1.1815962199969574, "learning_rate": 3.515621309414191e-06, "loss": 0.2097684144973755, "step": 2990 }, { "epoch": 1.4779439021376497, "grad_norm": 1.368257943211956, "learning_rate": 3.5094034676792952e-06, "loss": 0.25807827711105347, "step": 2991 }, { "epoch": 1.478438156431484, "grad_norm": 1.3326288392160186, "learning_rate": 3.503189958973906e-06, "loss": 0.24161803722381592, "step": 2992 }, { "epoch": 1.4789324107253181, "grad_norm": 1.3735233821721475, "learning_rate": 3.4969807874460717e-06, "loss": 0.2612338364124298, "step": 2993 }, { "epoch": 1.4794266650191523, "grad_norm": 1.3484776453875857, "learning_rate": 3.490775957240947e-06, "loss": 0.2529192566871643, "step": 2994 }, { "epoch": 1.4799209193129865, "grad_norm": 1.376626480795096, "learning_rate": 3.4845754725007883e-06, "loss": 0.2616920471191406, "step": 2995 }, { "epoch": 1.4804151736068207, "grad_norm": 1.1709509708234012, "learning_rate": 3.4783793373649534e-06, "loss": 0.2372770607471466, "step": 2996 }, { "epoch": 1.480909427900655, "grad_norm": 1.6683733615888718, "learning_rate": 3.4721875559698826e-06, "loss": 0.2993369996547699, "step": 2997 }, { "epoch": 1.4814036821944891, "grad_norm": 1.444631738912031, "learning_rate": 3.4660001324491354e-06, "loss": 0.2703147530555725, "step": 2998 }, { "epoch": 1.4818979364883234, "grad_norm": 1.497851135078702, "learning_rate": 3.459817070933337e-06, "loss": 0.2909662425518036, "step": 2999 }, { "epoch": 1.4823921907821573, "grad_norm": 1.4957339087199897, "learning_rate": 3.4536383755502146e-06, "loss": 0.2620519697666168, "step": 3000 }, { "epoch": 1.4828864450759915, "grad_norm": 1.4607702963487426, "learning_rate": 3.447464050424576e-06, "loss": 0.2740327715873718, "step": 3001 }, { "epoch": 1.4833806993698258, "grad_norm": 1.4051737005514326, "learning_rate": 3.441294099678314e-06, "loss": 0.2597920000553131, "step": 3002 }, { "epoch": 1.48387495366366, "grad_norm": 1.2931150222772085, "learning_rate": 3.435128527430397e-06, "loss": 0.23138844966888428, "step": 3003 }, { "epoch": 1.4843692079574942, "grad_norm": 1.4678522965018421, "learning_rate": 3.428967337796879e-06, "loss": 0.26457998156547546, "step": 3004 }, { "epoch": 1.4848634622513284, "grad_norm": 1.3435199008351797, "learning_rate": 3.4228105348908703e-06, "loss": 0.22283414006233215, "step": 3005 }, { "epoch": 1.4853577165451624, "grad_norm": 1.404722725472706, "learning_rate": 3.416658122822576e-06, "loss": 0.26169392466545105, "step": 3006 }, { "epoch": 1.4858519708389966, "grad_norm": 1.3942121909077798, "learning_rate": 3.4105101056992574e-06, "loss": 0.22738765180110931, "step": 3007 }, { "epoch": 1.4863462251328308, "grad_norm": 1.640113120385147, "learning_rate": 3.404366487625237e-06, "loss": 0.24252702295780182, "step": 3008 }, { "epoch": 1.486840479426665, "grad_norm": 1.2658350422978366, "learning_rate": 3.398227272701905e-06, "loss": 0.2192659229040146, "step": 3009 }, { "epoch": 1.4873347337204992, "grad_norm": 1.3659525117305242, "learning_rate": 3.3920924650277253e-06, "loss": 0.23824100196361542, "step": 3010 }, { "epoch": 1.4878289880143334, "grad_norm": 1.304246601014088, "learning_rate": 3.3859620686981977e-06, "loss": 0.25558948516845703, "step": 3011 }, { "epoch": 1.4883232423081676, "grad_norm": 1.2977660969069507, "learning_rate": 3.3798360878058887e-06, "loss": 0.23521414399147034, "step": 3012 }, { "epoch": 1.4888174966020018, "grad_norm": 1.5059732923775448, "learning_rate": 3.373714526440417e-06, "loss": 0.26024043560028076, "step": 3013 }, { "epoch": 1.489311750895836, "grad_norm": 1.3966534942487767, "learning_rate": 3.3675973886884506e-06, "loss": 0.2676945626735687, "step": 3014 }, { "epoch": 1.4898060051896702, "grad_norm": 1.4302757106543351, "learning_rate": 3.361484678633701e-06, "loss": 0.29499778151512146, "step": 3015 }, { "epoch": 1.4903002594835042, "grad_norm": 1.2541194356509255, "learning_rate": 3.35537640035693e-06, "loss": 0.21667227149009705, "step": 3016 }, { "epoch": 1.4907945137773384, "grad_norm": 1.5055716214820787, "learning_rate": 3.3492725579359288e-06, "loss": 0.2852727770805359, "step": 3017 }, { "epoch": 1.4912887680711726, "grad_norm": 1.3110566349547437, "learning_rate": 3.343173155445546e-06, "loss": 0.22535362839698792, "step": 3018 }, { "epoch": 1.4917830223650068, "grad_norm": 1.3390943365322368, "learning_rate": 3.3370781969576473e-06, "loss": 0.23513402044773102, "step": 3019 }, { "epoch": 1.492277276658841, "grad_norm": 1.34171251218287, "learning_rate": 3.3309876865411426e-06, "loss": 0.2343328893184662, "step": 3020 }, { "epoch": 1.4927715309526752, "grad_norm": 1.4982279835949508, "learning_rate": 3.3249016282619696e-06, "loss": 0.309964656829834, "step": 3021 }, { "epoch": 1.4932657852465092, "grad_norm": 1.4104830526650916, "learning_rate": 3.318820026183095e-06, "loss": 0.2678214907646179, "step": 3022 }, { "epoch": 1.4937600395403434, "grad_norm": 1.3871314289257326, "learning_rate": 3.312742884364508e-06, "loss": 0.24117907881736755, "step": 3023 }, { "epoch": 1.4942542938341776, "grad_norm": 1.4966526123322192, "learning_rate": 3.306670206863225e-06, "loss": 0.23572009801864624, "step": 3024 }, { "epoch": 1.4947485481280118, "grad_norm": 1.1974970903692888, "learning_rate": 3.3006019977332728e-06, "loss": 0.20058652758598328, "step": 3025 }, { "epoch": 1.495242802421846, "grad_norm": 1.4552709446661256, "learning_rate": 3.2945382610257017e-06, "loss": 0.2433123141527176, "step": 3026 }, { "epoch": 1.4957370567156802, "grad_norm": 1.330592869585441, "learning_rate": 3.2884790007885834e-06, "loss": 0.2648032009601593, "step": 3027 }, { "epoch": 1.4962313110095145, "grad_norm": 1.4274009022113794, "learning_rate": 3.2824242210669853e-06, "loss": 0.23508986830711365, "step": 3028 }, { "epoch": 1.4967255653033487, "grad_norm": 1.337116326245031, "learning_rate": 3.2763739259029946e-06, "loss": 0.2340327799320221, "step": 3029 }, { "epoch": 1.4972198195971829, "grad_norm": 1.4724312525996526, "learning_rate": 3.2703281193357028e-06, "loss": 0.24071671068668365, "step": 3030 }, { "epoch": 1.4977140738910169, "grad_norm": 1.4191732736253682, "learning_rate": 3.264286805401203e-06, "loss": 0.26332271099090576, "step": 3031 }, { "epoch": 1.498208328184851, "grad_norm": 1.266600605298302, "learning_rate": 3.2582499881325904e-06, "loss": 0.21818014979362488, "step": 3032 }, { "epoch": 1.4987025824786853, "grad_norm": 1.3340246980776698, "learning_rate": 3.2522176715599606e-06, "loss": 0.26997917890548706, "step": 3033 }, { "epoch": 1.4991968367725195, "grad_norm": 1.4818331950802985, "learning_rate": 3.2461898597103935e-06, "loss": 0.21703608334064484, "step": 3034 }, { "epoch": 1.4996910910663537, "grad_norm": 1.287764216628678, "learning_rate": 3.240166556607979e-06, "loss": 0.24345526099205017, "step": 3035 }, { "epoch": 1.5001853453601877, "grad_norm": 1.2134455175661707, "learning_rate": 3.2341477662737877e-06, "loss": 0.2428402602672577, "step": 3036 }, { "epoch": 1.5006795996540219, "grad_norm": 1.389226279044202, "learning_rate": 3.228133492725872e-06, "loss": 0.234619602560997, "step": 3037 }, { "epoch": 1.501173853947856, "grad_norm": 1.3308420188359134, "learning_rate": 3.2221237399792784e-06, "loss": 0.27995944023132324, "step": 3038 }, { "epoch": 1.5016681082416903, "grad_norm": 1.283844133259085, "learning_rate": 3.2161185120460327e-06, "loss": 0.23708665370941162, "step": 3039 }, { "epoch": 1.5021623625355245, "grad_norm": 1.3268773172813266, "learning_rate": 3.2101178129351373e-06, "loss": 0.2541486620903015, "step": 3040 }, { "epoch": 1.5026566168293587, "grad_norm": 1.2735534589560005, "learning_rate": 3.204121646652576e-06, "loss": 0.2281494140625, "step": 3041 }, { "epoch": 1.503150871123193, "grad_norm": 1.4214183804465141, "learning_rate": 3.1981300172013006e-06, "loss": 0.24793995916843414, "step": 3042 }, { "epoch": 1.503645125417027, "grad_norm": 1.3820844339773122, "learning_rate": 3.19214292858124e-06, "loss": 0.25877612829208374, "step": 3043 }, { "epoch": 1.5041393797108613, "grad_norm": 1.2606638362034603, "learning_rate": 3.1861603847892907e-06, "loss": 0.23822908103466034, "step": 3044 }, { "epoch": 1.5046336340046955, "grad_norm": 1.3375723790086107, "learning_rate": 3.1801823898193075e-06, "loss": 0.2450297623872757, "step": 3045 }, { "epoch": 1.5051278882985297, "grad_norm": 1.291286771303469, "learning_rate": 3.1742089476621176e-06, "loss": 0.23657044768333435, "step": 3046 }, { "epoch": 1.505622142592364, "grad_norm": 1.330327819651038, "learning_rate": 3.1682400623055043e-06, "loss": 0.22040539979934692, "step": 3047 }, { "epoch": 1.506116396886198, "grad_norm": 1.2295078748580162, "learning_rate": 3.162275737734213e-06, "loss": 0.24671347439289093, "step": 3048 }, { "epoch": 1.5066106511800321, "grad_norm": 1.3193055288047242, "learning_rate": 3.156315977929939e-06, "loss": 0.2590971291065216, "step": 3049 }, { "epoch": 1.5071049054738663, "grad_norm": 1.3201796395435559, "learning_rate": 3.1503607868713383e-06, "loss": 0.2650923430919647, "step": 3050 }, { "epoch": 1.5075991597677005, "grad_norm": 1.3124240495866886, "learning_rate": 3.1444101685339987e-06, "loss": 0.22146420180797577, "step": 3051 }, { "epoch": 1.5080934140615345, "grad_norm": 1.3875424644692997, "learning_rate": 3.1384641268904804e-06, "loss": 0.26743125915527344, "step": 3052 }, { "epoch": 1.5085876683553687, "grad_norm": 1.4406215302595167, "learning_rate": 3.1325226659102746e-06, "loss": 0.24730908870697021, "step": 3053 }, { "epoch": 1.509081922649203, "grad_norm": 1.3933207280707873, "learning_rate": 3.1265857895598094e-06, "loss": 0.26301079988479614, "step": 3054 }, { "epoch": 1.5095761769430371, "grad_norm": 1.2589035946994764, "learning_rate": 3.1206535018024598e-06, "loss": 0.22815877199172974, "step": 3055 }, { "epoch": 1.5100704312368713, "grad_norm": 1.533757049437193, "learning_rate": 3.114725806598544e-06, "loss": 0.25178754329681396, "step": 3056 }, { "epoch": 1.5105646855307056, "grad_norm": 1.3661154596053653, "learning_rate": 3.1088027079052973e-06, "loss": 0.20269548892974854, "step": 3057 }, { "epoch": 1.5110589398245398, "grad_norm": 1.4014331356202114, "learning_rate": 3.1028842096769006e-06, "loss": 0.25972461700439453, "step": 3058 }, { "epoch": 1.511553194118374, "grad_norm": 1.3745096869790834, "learning_rate": 3.0969703158644583e-06, "loss": 0.23313641548156738, "step": 3059 }, { "epoch": 1.5120474484122082, "grad_norm": 1.2941298023610517, "learning_rate": 3.0910610304159993e-06, "loss": 0.2359476238489151, "step": 3060 }, { "epoch": 1.5125417027060424, "grad_norm": 1.3631605592123968, "learning_rate": 3.085156357276481e-06, "loss": 0.263039767742157, "step": 3061 }, { "epoch": 1.5130359569998766, "grad_norm": 1.4414947958352682, "learning_rate": 3.0792563003877795e-06, "loss": 0.2222701609134674, "step": 3062 }, { "epoch": 1.5135302112937106, "grad_norm": 1.5152386602086467, "learning_rate": 3.0733608636886815e-06, "loss": 0.2511240839958191, "step": 3063 }, { "epoch": 1.5140244655875448, "grad_norm": 1.3426863589238012, "learning_rate": 3.0674700511149057e-06, "loss": 0.26376873254776, "step": 3064 }, { "epoch": 1.514518719881379, "grad_norm": 1.50705834278763, "learning_rate": 3.0615838665990685e-06, "loss": 0.2883176803588867, "step": 3065 }, { "epoch": 1.5150129741752132, "grad_norm": 1.4534493774446482, "learning_rate": 3.055702314070703e-06, "loss": 0.2641439437866211, "step": 3066 }, { "epoch": 1.5155072284690472, "grad_norm": 1.2206107550113217, "learning_rate": 3.049825397456252e-06, "loss": 0.22250229120254517, "step": 3067 }, { "epoch": 1.5160014827628814, "grad_norm": 1.6917159383624243, "learning_rate": 3.0439531206790585e-06, "loss": 0.291684091091156, "step": 3068 }, { "epoch": 1.5164957370567156, "grad_norm": 1.2582948861406589, "learning_rate": 3.0380854876593725e-06, "loss": 0.22581104934215546, "step": 3069 }, { "epoch": 1.5169899913505498, "grad_norm": 1.3218689478609282, "learning_rate": 3.032222502314345e-06, "loss": 0.22701920568943024, "step": 3070 }, { "epoch": 1.517484245644384, "grad_norm": 1.4011754473371674, "learning_rate": 3.0263641685580134e-06, "loss": 0.27151840925216675, "step": 3071 }, { "epoch": 1.5179784999382182, "grad_norm": 1.4319870241234463, "learning_rate": 3.0205104903013183e-06, "loss": 0.25780510902404785, "step": 3072 }, { "epoch": 1.5184727542320524, "grad_norm": 1.232949136662072, "learning_rate": 3.014661471452103e-06, "loss": 0.23905009031295776, "step": 3073 }, { "epoch": 1.5189670085258866, "grad_norm": 1.296685135563547, "learning_rate": 3.0088171159150758e-06, "loss": 0.25984710454940796, "step": 3074 }, { "epoch": 1.5194612628197208, "grad_norm": 1.5925440917505933, "learning_rate": 3.0029774275918523e-06, "loss": 0.24934321641921997, "step": 3075 }, { "epoch": 1.519955517113555, "grad_norm": 1.3570253725800296, "learning_rate": 2.997142410380921e-06, "loss": 0.24181538820266724, "step": 3076 }, { "epoch": 1.5204497714073892, "grad_norm": 1.4224922399256614, "learning_rate": 2.9913120681776586e-06, "loss": 0.28867265582084656, "step": 3077 }, { "epoch": 1.5209440257012234, "grad_norm": 1.3689537883355085, "learning_rate": 2.9854864048743183e-06, "loss": 0.25082239508628845, "step": 3078 }, { "epoch": 1.5214382799950574, "grad_norm": 1.1809552467181543, "learning_rate": 2.979665424360031e-06, "loss": 0.21152186393737793, "step": 3079 }, { "epoch": 1.5219325342888916, "grad_norm": 1.3255328033562375, "learning_rate": 2.9738491305207926e-06, "loss": 0.22989922761917114, "step": 3080 }, { "epoch": 1.5224267885827258, "grad_norm": 1.4352789035320561, "learning_rate": 2.9680375272394855e-06, "loss": 0.21606113016605377, "step": 3081 }, { "epoch": 1.5229210428765598, "grad_norm": 1.2795767684328416, "learning_rate": 2.962230618395855e-06, "loss": 0.25060969591140747, "step": 3082 }, { "epoch": 1.523415297170394, "grad_norm": 1.4409246111783223, "learning_rate": 2.9564284078665016e-06, "loss": 0.2574993371963501, "step": 3083 }, { "epoch": 1.5239095514642282, "grad_norm": 1.3476850353049301, "learning_rate": 2.9506308995249035e-06, "loss": 0.2552590072154999, "step": 3084 }, { "epoch": 1.5244038057580624, "grad_norm": 1.4294064187721107, "learning_rate": 2.9448380972413936e-06, "loss": 0.2356393188238144, "step": 3085 }, { "epoch": 1.5248980600518967, "grad_norm": 1.2956637091449177, "learning_rate": 2.939050004883164e-06, "loss": 0.25111299753189087, "step": 3086 }, { "epoch": 1.5253923143457309, "grad_norm": 1.6187968050107684, "learning_rate": 2.933266626314263e-06, "loss": 0.2713226079940796, "step": 3087 }, { "epoch": 1.525886568639565, "grad_norm": 1.371480760416421, "learning_rate": 2.92748796539559e-06, "loss": 0.2493591606616974, "step": 3088 }, { "epoch": 1.5263808229333993, "grad_norm": 1.3919253891743593, "learning_rate": 2.9217140259848984e-06, "loss": 0.2377934455871582, "step": 3089 }, { "epoch": 1.5268750772272335, "grad_norm": 1.222188939870737, "learning_rate": 2.9159448119367896e-06, "loss": 0.23113523423671722, "step": 3090 }, { "epoch": 1.5273693315210677, "grad_norm": 1.3071786210451368, "learning_rate": 2.910180327102702e-06, "loss": 0.2212657630443573, "step": 3091 }, { "epoch": 1.527863585814902, "grad_norm": 1.4809706556535216, "learning_rate": 2.904420575330923e-06, "loss": 0.3317147195339203, "step": 3092 }, { "epoch": 1.528357840108736, "grad_norm": 1.222501836116789, "learning_rate": 2.8986655604665914e-06, "loss": 0.21677865087985992, "step": 3093 }, { "epoch": 1.52885209440257, "grad_norm": 1.4687657258901345, "learning_rate": 2.892915286351663e-06, "loss": 0.2719038724899292, "step": 3094 }, { "epoch": 1.5293463486964043, "grad_norm": 1.4800981330468082, "learning_rate": 2.887169756824941e-06, "loss": 0.2870655953884125, "step": 3095 }, { "epoch": 1.5298406029902385, "grad_norm": 1.6050530390151894, "learning_rate": 2.8814289757220636e-06, "loss": 0.27370864152908325, "step": 3096 }, { "epoch": 1.5303348572840727, "grad_norm": 1.2925821727625635, "learning_rate": 2.8756929468754834e-06, "loss": 0.24579623341560364, "step": 3097 }, { "epoch": 1.5308291115779067, "grad_norm": 1.5466324939604184, "learning_rate": 2.869961674114501e-06, "loss": 0.25092196464538574, "step": 3098 }, { "epoch": 1.531323365871741, "grad_norm": 1.539826368870157, "learning_rate": 2.864235161265232e-06, "loss": 0.29637211561203003, "step": 3099 }, { "epoch": 1.531817620165575, "grad_norm": 1.346232107313421, "learning_rate": 2.8585134121506086e-06, "loss": 0.24216854572296143, "step": 3100 }, { "epoch": 1.5323118744594093, "grad_norm": 1.264644352464564, "learning_rate": 2.8527964305903887e-06, "loss": 0.2050018608570099, "step": 3101 }, { "epoch": 1.5328061287532435, "grad_norm": 1.4429594327267479, "learning_rate": 2.8470842204011562e-06, "loss": 0.2323600798845291, "step": 3102 }, { "epoch": 1.5333003830470777, "grad_norm": 1.3588986581117766, "learning_rate": 2.8413767853962937e-06, "loss": 0.2582741379737854, "step": 3103 }, { "epoch": 1.533794637340912, "grad_norm": 1.2503142010331656, "learning_rate": 2.8356741293860034e-06, "loss": 0.2190069705247879, "step": 3104 }, { "epoch": 1.5342888916347461, "grad_norm": 1.2700906528895424, "learning_rate": 2.8299762561773004e-06, "loss": 0.2293972671031952, "step": 3105 }, { "epoch": 1.5347831459285803, "grad_norm": 1.4604730845156306, "learning_rate": 2.8242831695740004e-06, "loss": 0.28793102502822876, "step": 3106 }, { "epoch": 1.5352774002224145, "grad_norm": 1.3871033704581968, "learning_rate": 2.8185948733767276e-06, "loss": 0.25700464844703674, "step": 3107 }, { "epoch": 1.5357716545162488, "grad_norm": 1.6036334059609652, "learning_rate": 2.8129113713829115e-06, "loss": 0.2633448541164398, "step": 3108 }, { "epoch": 1.5362659088100827, "grad_norm": 1.2623866770143863, "learning_rate": 2.8072326673867667e-06, "loss": 0.2363145351409912, "step": 3109 }, { "epoch": 1.536760163103917, "grad_norm": 1.3073287831639788, "learning_rate": 2.8015587651793273e-06, "loss": 0.24324053525924683, "step": 3110 }, { "epoch": 1.5372544173977511, "grad_norm": 1.445888976457047, "learning_rate": 2.795889668548399e-06, "loss": 0.24139198660850525, "step": 3111 }, { "epoch": 1.5377486716915854, "grad_norm": 1.3070463104686283, "learning_rate": 2.790225381278595e-06, "loss": 0.2502334713935852, "step": 3112 }, { "epoch": 1.5382429259854193, "grad_norm": 1.3233606598015195, "learning_rate": 2.784565907151311e-06, "loss": 0.24635109305381775, "step": 3113 }, { "epoch": 1.5387371802792535, "grad_norm": 1.236974627125298, "learning_rate": 2.7789112499447312e-06, "loss": 0.2299586534500122, "step": 3114 }, { "epoch": 1.5392314345730878, "grad_norm": 1.232633224868461, "learning_rate": 2.7732614134338243e-06, "loss": 0.2296627312898636, "step": 3115 }, { "epoch": 1.539725688866922, "grad_norm": 1.3919487561893158, "learning_rate": 2.767616401390343e-06, "loss": 0.26127320528030396, "step": 3116 }, { "epoch": 1.5402199431607562, "grad_norm": 1.3612758454379796, "learning_rate": 2.761976217582808e-06, "loss": 0.24718445539474487, "step": 3117 }, { "epoch": 1.5407141974545904, "grad_norm": 1.3000063965271036, "learning_rate": 2.7563408657765345e-06, "loss": 0.22314362227916718, "step": 3118 }, { "epoch": 1.5412084517484246, "grad_norm": 1.2190954536725822, "learning_rate": 2.750710349733602e-06, "loss": 0.2288416028022766, "step": 3119 }, { "epoch": 1.5417027060422588, "grad_norm": 1.3774388084670495, "learning_rate": 2.7450846732128577e-06, "loss": 0.26181158423423767, "step": 3120 }, { "epoch": 1.542196960336093, "grad_norm": 1.2123920647911897, "learning_rate": 2.739463839969926e-06, "loss": 0.22397834062576294, "step": 3121 }, { "epoch": 1.5426912146299272, "grad_norm": 1.4361842348504215, "learning_rate": 2.7338478537571943e-06, "loss": 0.23633858561515808, "step": 3122 }, { "epoch": 1.5431854689237614, "grad_norm": 1.402092217147563, "learning_rate": 2.7282367183238143e-06, "loss": 0.26719149947166443, "step": 3123 }, { "epoch": 1.5436797232175956, "grad_norm": 1.5260713360749147, "learning_rate": 2.722630437415701e-06, "loss": 0.2882165014743805, "step": 3124 }, { "epoch": 1.5441739775114296, "grad_norm": 1.258294682394544, "learning_rate": 2.7170290147755285e-06, "loss": 0.2377905696630478, "step": 3125 }, { "epoch": 1.5446682318052638, "grad_norm": 1.3195147017546947, "learning_rate": 2.7114324541427193e-06, "loss": 0.2705368399620056, "step": 3126 }, { "epoch": 1.545162486099098, "grad_norm": 1.2857701503132921, "learning_rate": 2.7058407592534663e-06, "loss": 0.246593177318573, "step": 3127 }, { "epoch": 1.5456567403929322, "grad_norm": 1.33265619524068, "learning_rate": 2.700253933840705e-06, "loss": 0.2339816391468048, "step": 3128 }, { "epoch": 1.5461509946867662, "grad_norm": 1.3254997645322988, "learning_rate": 2.6946719816341127e-06, "loss": 0.2727898359298706, "step": 3129 }, { "epoch": 1.5466452489806004, "grad_norm": 1.483440007746236, "learning_rate": 2.6890949063601255e-06, "loss": 0.285343736410141, "step": 3130 }, { "epoch": 1.5471395032744346, "grad_norm": 1.4219498161281177, "learning_rate": 2.6835227117419184e-06, "loss": 0.25782397389411926, "step": 3131 }, { "epoch": 1.5476337575682688, "grad_norm": 1.4096561970820742, "learning_rate": 2.67795540149941e-06, "loss": 0.26677054166793823, "step": 3132 }, { "epoch": 1.548128011862103, "grad_norm": 1.375758748898483, "learning_rate": 2.6723929793492555e-06, "loss": 0.2696993052959442, "step": 3133 }, { "epoch": 1.5486222661559372, "grad_norm": 1.3214248540646165, "learning_rate": 2.66683544900485e-06, "loss": 0.2536013424396515, "step": 3134 }, { "epoch": 1.5491165204497714, "grad_norm": 1.352660590997614, "learning_rate": 2.661282814176319e-06, "loss": 0.2583885192871094, "step": 3135 }, { "epoch": 1.5496107747436056, "grad_norm": 1.3555750519784333, "learning_rate": 2.655735078570528e-06, "loss": 0.24341340363025665, "step": 3136 }, { "epoch": 1.5501050290374399, "grad_norm": 1.3694743065317843, "learning_rate": 2.650192245891059e-06, "loss": 0.2575637698173523, "step": 3137 }, { "epoch": 1.550599283331274, "grad_norm": 1.3743479794773286, "learning_rate": 2.644654319838227e-06, "loss": 0.24109753966331482, "step": 3138 }, { "epoch": 1.5510935376251083, "grad_norm": 1.2822421062589742, "learning_rate": 2.6391213041090822e-06, "loss": 0.246525377035141, "step": 3139 }, { "epoch": 1.5515877919189422, "grad_norm": 1.3144657839500415, "learning_rate": 2.6335932023973777e-06, "loss": 0.2589566111564636, "step": 3140 }, { "epoch": 1.5520820462127765, "grad_norm": 1.333811387247849, "learning_rate": 2.628070018393598e-06, "loss": 0.26198744773864746, "step": 3141 }, { "epoch": 1.5525763005066107, "grad_norm": 1.2808916237604833, "learning_rate": 2.622551755784942e-06, "loss": 0.22991782426834106, "step": 3142 }, { "epoch": 1.5530705548004449, "grad_norm": 1.242582313641482, "learning_rate": 2.6170384182553244e-06, "loss": 0.22211629152297974, "step": 3143 }, { "epoch": 1.5535648090942789, "grad_norm": 1.306994517774283, "learning_rate": 2.6115300094853666e-06, "loss": 0.2665289640426636, "step": 3144 }, { "epoch": 1.554059063388113, "grad_norm": 1.260713008188702, "learning_rate": 2.6060265331524114e-06, "loss": 0.20211085677146912, "step": 3145 }, { "epoch": 1.5545533176819473, "grad_norm": 1.3930467289400041, "learning_rate": 2.6005279929304918e-06, "loss": 0.24264919757843018, "step": 3146 }, { "epoch": 1.5550475719757815, "grad_norm": 1.316241217623005, "learning_rate": 2.595034392490354e-06, "loss": 0.2722601294517517, "step": 3147 }, { "epoch": 1.5555418262696157, "grad_norm": 1.3463437829147908, "learning_rate": 2.58954573549946e-06, "loss": 0.26061201095581055, "step": 3148 }, { "epoch": 1.5560360805634499, "grad_norm": 1.3701131034296847, "learning_rate": 2.5840620256219464e-06, "loss": 0.20620305836200714, "step": 3149 }, { "epoch": 1.556530334857284, "grad_norm": 1.3323948648350379, "learning_rate": 2.578583266518664e-06, "loss": 0.2424723207950592, "step": 3150 }, { "epoch": 1.5570245891511183, "grad_norm": 1.4286998078779003, "learning_rate": 2.573109461847153e-06, "loss": 0.248019739985466, "step": 3151 }, { "epoch": 1.5575188434449525, "grad_norm": 1.2753051030343154, "learning_rate": 2.5676406152616483e-06, "loss": 0.23162522912025452, "step": 3152 }, { "epoch": 1.5580130977387867, "grad_norm": 1.6072180292151754, "learning_rate": 2.562176730413074e-06, "loss": 0.20099176466464996, "step": 3153 }, { "epoch": 1.558507352032621, "grad_norm": 1.4868098360756863, "learning_rate": 2.5567178109490433e-06, "loss": 0.27957430481910706, "step": 3154 }, { "epoch": 1.5590016063264551, "grad_norm": 1.248830156095604, "learning_rate": 2.551263860513845e-06, "loss": 0.23941464722156525, "step": 3155 }, { "epoch": 1.559495860620289, "grad_norm": 1.4371594834198067, "learning_rate": 2.5458148827484695e-06, "loss": 0.24910275638103485, "step": 3156 }, { "epoch": 1.5599901149141233, "grad_norm": 1.325153365111165, "learning_rate": 2.540370881290568e-06, "loss": 0.26430344581604004, "step": 3157 }, { "epoch": 1.5604843692079575, "grad_norm": 1.419775898075986, "learning_rate": 2.534931859774481e-06, "loss": 0.2833614945411682, "step": 3158 }, { "epoch": 1.5609786235017915, "grad_norm": 1.2863995969426358, "learning_rate": 2.5294978218312215e-06, "loss": 0.24630708992481232, "step": 3159 }, { "epoch": 1.5614728777956257, "grad_norm": 1.398973984381973, "learning_rate": 2.524068771088476e-06, "loss": 0.2674857974052429, "step": 3160 }, { "epoch": 1.56196713208946, "grad_norm": 1.34356245737179, "learning_rate": 2.5186447111706005e-06, "loss": 0.23531441390514374, "step": 3161 }, { "epoch": 1.5624613863832941, "grad_norm": 1.2374731185400574, "learning_rate": 2.5132256456986236e-06, "loss": 0.2603223919868469, "step": 3162 }, { "epoch": 1.5629556406771283, "grad_norm": 1.302457785178724, "learning_rate": 2.5078115782902267e-06, "loss": 0.220007985830307, "step": 3163 }, { "epoch": 1.5634498949709625, "grad_norm": 1.36046018530454, "learning_rate": 2.502402512559773e-06, "loss": 0.22660651803016663, "step": 3164 }, { "epoch": 1.5639441492647967, "grad_norm": 1.4627286861974862, "learning_rate": 2.4969984521182766e-06, "loss": 0.26425695419311523, "step": 3165 }, { "epoch": 1.564438403558631, "grad_norm": 1.3019070428865334, "learning_rate": 2.4915994005734057e-06, "loss": 0.22870787978172302, "step": 3166 }, { "epoch": 1.5649326578524652, "grad_norm": 1.2622414815912377, "learning_rate": 2.48620536152949e-06, "loss": 0.25734084844589233, "step": 3167 }, { "epoch": 1.5654269121462994, "grad_norm": 1.2954820564672134, "learning_rate": 2.4808163385875226e-06, "loss": 0.24831843376159668, "step": 3168 }, { "epoch": 1.5659211664401336, "grad_norm": 1.3356720372460569, "learning_rate": 2.4754323353451284e-06, "loss": 0.2389685958623886, "step": 3169 }, { "epoch": 1.5664154207339678, "grad_norm": 1.33182477221405, "learning_rate": 2.4700533553965946e-06, "loss": 0.24750663340091705, "step": 3170 }, { "epoch": 1.5669096750278018, "grad_norm": 1.4158946259185428, "learning_rate": 2.4646794023328525e-06, "loss": 0.2689003348350525, "step": 3171 }, { "epoch": 1.567403929321636, "grad_norm": 1.32371836304635, "learning_rate": 2.45931047974147e-06, "loss": 0.2574145197868347, "step": 3172 }, { "epoch": 1.5678981836154702, "grad_norm": 1.5403400973166155, "learning_rate": 2.4539465912066706e-06, "loss": 0.2586211562156677, "step": 3173 }, { "epoch": 1.5683924379093044, "grad_norm": 1.340393455505496, "learning_rate": 2.4485877403093095e-06, "loss": 0.26383671164512634, "step": 3174 }, { "epoch": 1.5688866922031384, "grad_norm": 1.2806590186816509, "learning_rate": 2.4432339306268736e-06, "loss": 0.28196123242378235, "step": 3175 }, { "epoch": 1.5693809464969726, "grad_norm": 1.4692337066995136, "learning_rate": 2.4378851657334923e-06, "loss": 0.2736835181713104, "step": 3176 }, { "epoch": 1.5698752007908068, "grad_norm": 1.3442483287569258, "learning_rate": 2.4325414491999255e-06, "loss": 0.2316201627254486, "step": 3177 }, { "epoch": 1.570369455084641, "grad_norm": 1.363437265904272, "learning_rate": 2.427202784593562e-06, "loss": 0.23955810070037842, "step": 3178 }, { "epoch": 1.5708637093784752, "grad_norm": 1.4240865879172782, "learning_rate": 2.4218691754784162e-06, "loss": 0.263042151927948, "step": 3179 }, { "epoch": 1.5713579636723094, "grad_norm": 1.3283544396978941, "learning_rate": 2.4165406254151312e-06, "loss": 0.25570976734161377, "step": 3180 }, { "epoch": 1.5718522179661436, "grad_norm": 1.3508561425487733, "learning_rate": 2.4112171379609696e-06, "loss": 0.2503488063812256, "step": 3181 }, { "epoch": 1.5723464722599778, "grad_norm": 1.2731349274514334, "learning_rate": 2.40589871666982e-06, "loss": 0.21815824508666992, "step": 3182 }, { "epoch": 1.572840726553812, "grad_norm": 1.4354076907799536, "learning_rate": 2.400585365092177e-06, "loss": 0.23936739563941956, "step": 3183 }, { "epoch": 1.5733349808476462, "grad_norm": 1.2459112031686363, "learning_rate": 2.3952770867751595e-06, "loss": 0.2618086636066437, "step": 3184 }, { "epoch": 1.5738292351414804, "grad_norm": 1.477109441631464, "learning_rate": 2.3899738852625065e-06, "loss": 0.2852020263671875, "step": 3185 }, { "epoch": 1.5743234894353144, "grad_norm": 1.4364121007652697, "learning_rate": 2.3846757640945505e-06, "loss": 0.28860047459602356, "step": 3186 }, { "epoch": 1.5748177437291486, "grad_norm": 1.2738328733534112, "learning_rate": 2.3793827268082446e-06, "loss": 0.2397383451461792, "step": 3187 }, { "epoch": 1.5753119980229828, "grad_norm": 1.3548543446694599, "learning_rate": 2.374094776937145e-06, "loss": 0.25204962491989136, "step": 3188 }, { "epoch": 1.575806252316817, "grad_norm": 1.2908932541507008, "learning_rate": 2.368811918011411e-06, "loss": 0.21216189861297607, "step": 3189 }, { "epoch": 1.576300506610651, "grad_norm": 1.4719289728075926, "learning_rate": 2.363534153557805e-06, "loss": 0.2647620737552643, "step": 3190 }, { "epoch": 1.5767947609044852, "grad_norm": 1.4154428976481128, "learning_rate": 2.358261487099688e-06, "loss": 0.3079666793346405, "step": 3191 }, { "epoch": 1.5772890151983194, "grad_norm": 1.1847060614906242, "learning_rate": 2.352993922157013e-06, "loss": 0.22961711883544922, "step": 3192 }, { "epoch": 1.5777832694921536, "grad_norm": 1.5460794294977342, "learning_rate": 2.347731462246331e-06, "loss": 0.2657305598258972, "step": 3193 }, { "epoch": 1.5782775237859878, "grad_norm": 1.2622234684788671, "learning_rate": 2.3424741108807914e-06, "loss": 0.224237859249115, "step": 3194 }, { "epoch": 1.578771778079822, "grad_norm": 1.4036688905605132, "learning_rate": 2.337221871570121e-06, "loss": 0.26459985971450806, "step": 3195 }, { "epoch": 1.5792660323736563, "grad_norm": 1.4237290486306964, "learning_rate": 2.331974747820641e-06, "loss": 0.25391027331352234, "step": 3196 }, { "epoch": 1.5797602866674905, "grad_norm": 1.3683418214908574, "learning_rate": 2.326732743135256e-06, "loss": 0.25822141766548157, "step": 3197 }, { "epoch": 1.5802545409613247, "grad_norm": 1.3569651988075904, "learning_rate": 2.3214958610134554e-06, "loss": 0.25140073895454407, "step": 3198 }, { "epoch": 1.5807487952551589, "grad_norm": 1.280802230226295, "learning_rate": 2.3162641049513035e-06, "loss": 0.2550397515296936, "step": 3199 }, { "epoch": 1.581243049548993, "grad_norm": 1.3770416210337255, "learning_rate": 2.3110374784414526e-06, "loss": 0.2648996412754059, "step": 3200 }, { "epoch": 1.5817373038428273, "grad_norm": 1.285627272529884, "learning_rate": 2.3058159849731134e-06, "loss": 0.235626682639122, "step": 3201 }, { "epoch": 1.5822315581366613, "grad_norm": 1.354562155318599, "learning_rate": 2.3005996280320873e-06, "loss": 0.24930328130722046, "step": 3202 }, { "epoch": 1.5827258124304955, "grad_norm": 1.299026803187305, "learning_rate": 2.2953884111007428e-06, "loss": 0.23712117969989777, "step": 3203 }, { "epoch": 1.5832200667243297, "grad_norm": 1.4407443338733177, "learning_rate": 2.290182337658007e-06, "loss": 0.2504096031188965, "step": 3204 }, { "epoch": 1.583714321018164, "grad_norm": 1.345261370550347, "learning_rate": 2.2849814111793823e-06, "loss": 0.2218465358018875, "step": 3205 }, { "epoch": 1.5842085753119979, "grad_norm": 1.3818182639369938, "learning_rate": 2.279785635136933e-06, "loss": 0.2653011977672577, "step": 3206 }, { "epoch": 1.584702829605832, "grad_norm": 1.521658991035551, "learning_rate": 2.2745950129992853e-06, "loss": 0.27551597356796265, "step": 3207 }, { "epoch": 1.5851970838996663, "grad_norm": 1.2816405701256748, "learning_rate": 2.2694095482316247e-06, "loss": 0.21494519710540771, "step": 3208 }, { "epoch": 1.5856913381935005, "grad_norm": 1.2804333364342155, "learning_rate": 2.2642292442956925e-06, "loss": 0.2517405152320862, "step": 3209 }, { "epoch": 1.5861855924873347, "grad_norm": 1.365131298274178, "learning_rate": 2.259054104649786e-06, "loss": 0.25777050852775574, "step": 3210 }, { "epoch": 1.586679846781169, "grad_norm": 1.3722239172040558, "learning_rate": 2.2538841327487582e-06, "loss": 0.25914469361305237, "step": 3211 }, { "epoch": 1.5871741010750031, "grad_norm": 1.3924091851436682, "learning_rate": 2.2487193320440017e-06, "loss": 0.23877818882465363, "step": 3212 }, { "epoch": 1.5876683553688373, "grad_norm": 1.2757007530985867, "learning_rate": 2.2435597059834635e-06, "loss": 0.2226967066526413, "step": 3213 }, { "epoch": 1.5881626096626715, "grad_norm": 1.400079876174728, "learning_rate": 2.2384052580116465e-06, "loss": 0.28768399357795715, "step": 3214 }, { "epoch": 1.5886568639565057, "grad_norm": 1.3700126786923876, "learning_rate": 2.233255991569575e-06, "loss": 0.2563883662223816, "step": 3215 }, { "epoch": 1.58915111825034, "grad_norm": 1.3688176323163237, "learning_rate": 2.2281119100948322e-06, "loss": 0.2595394551753998, "step": 3216 }, { "epoch": 1.589645372544174, "grad_norm": 1.2924408591101029, "learning_rate": 2.2229730170215324e-06, "loss": 0.2354460060596466, "step": 3217 }, { "epoch": 1.5901396268380081, "grad_norm": 1.3015321221613778, "learning_rate": 2.2178393157803225e-06, "loss": 0.2397463619709015, "step": 3218 }, { "epoch": 1.5906338811318423, "grad_norm": 1.5213512082778142, "learning_rate": 2.212710809798393e-06, "loss": 0.3304588794708252, "step": 3219 }, { "epoch": 1.5911281354256765, "grad_norm": 1.2850321771259765, "learning_rate": 2.207587502499464e-06, "loss": 0.23891952633857727, "step": 3220 }, { "epoch": 1.5916223897195105, "grad_norm": 1.3962733600240735, "learning_rate": 2.2024693973037747e-06, "loss": 0.2544774115085602, "step": 3221 }, { "epoch": 1.5921166440133447, "grad_norm": 1.4193118785950918, "learning_rate": 2.1973564976281003e-06, "loss": 0.2620859444141388, "step": 3222 }, { "epoch": 1.592610898307179, "grad_norm": 1.2794541555838774, "learning_rate": 2.192248806885747e-06, "loss": 0.22541281580924988, "step": 3223 }, { "epoch": 1.5931051526010132, "grad_norm": 1.2886654382919192, "learning_rate": 2.187146328486529e-06, "loss": 0.23454351723194122, "step": 3224 }, { "epoch": 1.5935994068948474, "grad_norm": 1.3236984572567387, "learning_rate": 2.18204906583679e-06, "loss": 0.24848732352256775, "step": 3225 }, { "epoch": 1.5940936611886816, "grad_norm": 1.2049251354008288, "learning_rate": 2.176957022339389e-06, "loss": 0.21949590742588043, "step": 3226 }, { "epoch": 1.5945879154825158, "grad_norm": 1.3436660246382202, "learning_rate": 2.171870201393703e-06, "loss": 0.27300944924354553, "step": 3227 }, { "epoch": 1.59508216977635, "grad_norm": 1.3272049247129862, "learning_rate": 2.16678860639562e-06, "loss": 0.23850613832473755, "step": 3228 }, { "epoch": 1.5955764240701842, "grad_norm": 1.4611172116234663, "learning_rate": 2.1617122407375424e-06, "loss": 0.2728792428970337, "step": 3229 }, { "epoch": 1.5960706783640184, "grad_norm": 1.4623582491499052, "learning_rate": 2.1566411078083726e-06, "loss": 0.2321755588054657, "step": 3230 }, { "epoch": 1.5965649326578526, "grad_norm": 1.5111460470858884, "learning_rate": 2.1515752109935374e-06, "loss": 0.30118101835250854, "step": 3231 }, { "epoch": 1.5970591869516868, "grad_norm": 1.2041348970592753, "learning_rate": 2.1465145536749475e-06, "loss": 0.22317390143871307, "step": 3232 }, { "epoch": 1.5975534412455208, "grad_norm": 1.4530812438401597, "learning_rate": 2.141459139231029e-06, "loss": 0.2906285524368286, "step": 3233 }, { "epoch": 1.598047695539355, "grad_norm": 1.3996891865587815, "learning_rate": 2.136408971036704e-06, "loss": 0.24645069241523743, "step": 3234 }, { "epoch": 1.5985419498331892, "grad_norm": 1.4725365119055005, "learning_rate": 2.1313640524633927e-06, "loss": 0.26764121651649475, "step": 3235 }, { "epoch": 1.5990362041270234, "grad_norm": 1.4093554565168636, "learning_rate": 2.126324386879012e-06, "loss": 0.2811397910118103, "step": 3236 }, { "epoch": 1.5995304584208574, "grad_norm": 1.5104838755570678, "learning_rate": 2.121289977647971e-06, "loss": 0.254316508769989, "step": 3237 }, { "epoch": 1.6000247127146916, "grad_norm": 1.378432417546232, "learning_rate": 2.1162608281311636e-06, "loss": 0.2479352205991745, "step": 3238 }, { "epoch": 1.6005189670085258, "grad_norm": 1.200711868039053, "learning_rate": 2.1112369416859847e-06, "loss": 0.22767537832260132, "step": 3239 }, { "epoch": 1.60101322130236, "grad_norm": 1.3194506024522585, "learning_rate": 2.106218321666309e-06, "loss": 0.24286411702632904, "step": 3240 }, { "epoch": 1.6015074755961942, "grad_norm": 1.4143801874217299, "learning_rate": 2.1012049714224914e-06, "loss": 0.22960595786571503, "step": 3241 }, { "epoch": 1.6020017298900284, "grad_norm": 1.3710437918045983, "learning_rate": 2.0961968943013742e-06, "loss": 0.2448965162038803, "step": 3242 }, { "epoch": 1.6024959841838626, "grad_norm": 1.3544107087641921, "learning_rate": 2.0911940936462794e-06, "loss": 0.23486846685409546, "step": 3243 }, { "epoch": 1.6029902384776968, "grad_norm": 1.576868207611872, "learning_rate": 2.0861965727970045e-06, "loss": 0.2470572590827942, "step": 3244 }, { "epoch": 1.603484492771531, "grad_norm": 1.3371604672673962, "learning_rate": 2.0812043350898226e-06, "loss": 0.283765971660614, "step": 3245 }, { "epoch": 1.6039787470653653, "grad_norm": 1.3089763025714083, "learning_rate": 2.076217383857484e-06, "loss": 0.24943199753761292, "step": 3246 }, { "epoch": 1.6044730013591995, "grad_norm": 1.2872721191375163, "learning_rate": 2.0712357224291966e-06, "loss": 0.22150146961212158, "step": 3247 }, { "epoch": 1.6049672556530334, "grad_norm": 1.3204310548386595, "learning_rate": 2.0662593541306563e-06, "loss": 0.2610163390636444, "step": 3248 }, { "epoch": 1.6054615099468676, "grad_norm": 1.243779369506435, "learning_rate": 2.0612882822840154e-06, "loss": 0.22789397835731506, "step": 3249 }, { "epoch": 1.6059557642407019, "grad_norm": 1.3699765130937176, "learning_rate": 2.056322510207882e-06, "loss": 0.22956407070159912, "step": 3250 }, { "epoch": 1.606450018534536, "grad_norm": 1.3752485526796745, "learning_rate": 2.051362041217341e-06, "loss": 0.2579299509525299, "step": 3251 }, { "epoch": 1.60694427282837, "grad_norm": 1.449594870075983, "learning_rate": 2.046406878623929e-06, "loss": 0.24655218422412872, "step": 3252 }, { "epoch": 1.6074385271222043, "grad_norm": 1.8413073723455704, "learning_rate": 2.0414570257356415e-06, "loss": 0.2325882464647293, "step": 3253 }, { "epoch": 1.6079327814160385, "grad_norm": 1.3704743037638702, "learning_rate": 2.0365124858569294e-06, "loss": 0.2678581476211548, "step": 3254 }, { "epoch": 1.6084270357098727, "grad_norm": 1.3329052595945479, "learning_rate": 2.0315732622886976e-06, "loss": 0.23200136423110962, "step": 3255 }, { "epoch": 1.6089212900037069, "grad_norm": 1.3902434854443921, "learning_rate": 2.0266393583283015e-06, "loss": 0.24957536160945892, "step": 3256 }, { "epoch": 1.609415544297541, "grad_norm": 1.3655551679458238, "learning_rate": 2.0217107772695467e-06, "loss": 0.2506657540798187, "step": 3257 }, { "epoch": 1.6099097985913753, "grad_norm": 1.3749666602598227, "learning_rate": 2.0167875224026788e-06, "loss": 0.22255182266235352, "step": 3258 }, { "epoch": 1.6104040528852095, "grad_norm": 1.222166534445823, "learning_rate": 2.011869597014392e-06, "loss": 0.2489611655473709, "step": 3259 }, { "epoch": 1.6108983071790437, "grad_norm": 1.3283456479938487, "learning_rate": 2.0069570043878305e-06, "loss": 0.24808533489704132, "step": 3260 }, { "epoch": 1.611392561472878, "grad_norm": 1.3963788000978605, "learning_rate": 2.0020497478025635e-06, "loss": 0.24013441801071167, "step": 3261 }, { "epoch": 1.6118868157667121, "grad_norm": 1.4500389435817727, "learning_rate": 1.997147830534608e-06, "loss": 0.2813841998577118, "step": 3262 }, { "epoch": 1.612381070060546, "grad_norm": 1.4817798618081903, "learning_rate": 1.9922512558564154e-06, "loss": 0.23727375268936157, "step": 3263 }, { "epoch": 1.6128753243543803, "grad_norm": 1.3341145171932982, "learning_rate": 1.9873600270368664e-06, "loss": 0.2341655194759369, "step": 3264 }, { "epoch": 1.6133695786482145, "grad_norm": 1.4911949653625025, "learning_rate": 1.9824741473412768e-06, "loss": 0.32069963216781616, "step": 3265 }, { "epoch": 1.6138638329420487, "grad_norm": 1.4329702924332965, "learning_rate": 1.977593620031393e-06, "loss": 0.2414681762456894, "step": 3266 }, { "epoch": 1.6143580872358827, "grad_norm": 1.4261430753271709, "learning_rate": 1.9727184483653793e-06, "loss": 0.25517842173576355, "step": 3267 }, { "epoch": 1.614852341529717, "grad_norm": 1.4664579571771421, "learning_rate": 1.967848635597831e-06, "loss": 0.28264889121055603, "step": 3268 }, { "epoch": 1.6153465958235511, "grad_norm": 1.2722226456356633, "learning_rate": 1.962984184979774e-06, "loss": 0.24543075263500214, "step": 3269 }, { "epoch": 1.6158408501173853, "grad_norm": 1.3971489540759634, "learning_rate": 1.9581250997586366e-06, "loss": 0.2770763039588928, "step": 3270 }, { "epoch": 1.6163351044112195, "grad_norm": 1.362601366326608, "learning_rate": 1.953271383178278e-06, "loss": 0.2521423101425171, "step": 3271 }, { "epoch": 1.6168293587050537, "grad_norm": 1.262726405313237, "learning_rate": 1.9484230384789702e-06, "loss": 0.2402455359697342, "step": 3272 }, { "epoch": 1.617323612998888, "grad_norm": 1.4929089203163604, "learning_rate": 1.9435800688974005e-06, "loss": 0.2947021424770355, "step": 3273 }, { "epoch": 1.6178178672927221, "grad_norm": 1.2382587228414774, "learning_rate": 1.938742477666663e-06, "loss": 0.22238701581954956, "step": 3274 }, { "epoch": 1.6183121215865564, "grad_norm": 1.2835510888376274, "learning_rate": 1.933910268016269e-06, "loss": 0.25475019216537476, "step": 3275 }, { "epoch": 1.6188063758803906, "grad_norm": 1.2545564646453307, "learning_rate": 1.929083443172125e-06, "loss": 0.2316315472126007, "step": 3276 }, { "epoch": 1.6193006301742248, "grad_norm": 1.2739392933893041, "learning_rate": 1.9242620063565598e-06, "loss": 0.24977952241897583, "step": 3277 }, { "epoch": 1.619794884468059, "grad_norm": 1.3712132397422443, "learning_rate": 1.9194459607882887e-06, "loss": 0.24006152153015137, "step": 3278 }, { "epoch": 1.620289138761893, "grad_norm": 1.2866259343493134, "learning_rate": 1.9146353096824366e-06, "loss": 0.26050522923469543, "step": 3279 }, { "epoch": 1.6207833930557272, "grad_norm": 1.338449999730035, "learning_rate": 1.9098300562505266e-06, "loss": 0.2698773443698883, "step": 3280 }, { "epoch": 1.6212776473495614, "grad_norm": 1.480404977138073, "learning_rate": 1.9050302037004765e-06, "loss": 0.2627784013748169, "step": 3281 }, { "epoch": 1.6217719016433956, "grad_norm": 1.3335627547093958, "learning_rate": 1.900235755236599e-06, "loss": 0.24261148273944855, "step": 3282 }, { "epoch": 1.6222661559372296, "grad_norm": 1.347149973540751, "learning_rate": 1.8954467140596023e-06, "loss": 0.24689635634422302, "step": 3283 }, { "epoch": 1.6227604102310638, "grad_norm": 1.4586477344669697, "learning_rate": 1.890663083366574e-06, "loss": 0.2885867953300476, "step": 3284 }, { "epoch": 1.623254664524898, "grad_norm": 1.2981242679817548, "learning_rate": 1.8858848663510066e-06, "loss": 0.2624407112598419, "step": 3285 }, { "epoch": 1.6237489188187322, "grad_norm": 1.4544775837390882, "learning_rate": 1.881112066202767e-06, "loss": 0.27705928683280945, "step": 3286 }, { "epoch": 1.6242431731125664, "grad_norm": 1.4465119903360202, "learning_rate": 1.8763446861081058e-06, "loss": 0.26406094431877136, "step": 3287 }, { "epoch": 1.6247374274064006, "grad_norm": 1.3239739188563808, "learning_rate": 1.8715827292496557e-06, "loss": 0.26495790481567383, "step": 3288 }, { "epoch": 1.6252316817002348, "grad_norm": 1.419298583557058, "learning_rate": 1.8668261988064406e-06, "loss": 0.24995195865631104, "step": 3289 }, { "epoch": 1.625725935994069, "grad_norm": 1.4058286500391235, "learning_rate": 1.8620750979538437e-06, "loss": 0.23043034970760345, "step": 3290 }, { "epoch": 1.6262201902879032, "grad_norm": 1.3959905154788135, "learning_rate": 1.8573294298636334e-06, "loss": 0.2590731978416443, "step": 3291 }, { "epoch": 1.6267144445817374, "grad_norm": 1.3919450960931963, "learning_rate": 1.8525891977039557e-06, "loss": 0.24246811866760254, "step": 3292 }, { "epoch": 1.6272086988755716, "grad_norm": 1.2790623939923147, "learning_rate": 1.847854404639311e-06, "loss": 0.2386825680732727, "step": 3293 }, { "epoch": 1.6277029531694056, "grad_norm": 1.3168324939527787, "learning_rate": 1.843125053830588e-06, "loss": 0.2243885099887848, "step": 3294 }, { "epoch": 1.6281972074632398, "grad_norm": 1.264397606173487, "learning_rate": 1.838401148435035e-06, "loss": 0.24984796345233917, "step": 3295 }, { "epoch": 1.628691461757074, "grad_norm": 1.3015406971863621, "learning_rate": 1.8336826916062568e-06, "loss": 0.22784638404846191, "step": 3296 }, { "epoch": 1.6291857160509082, "grad_norm": 1.4178841831435534, "learning_rate": 1.828969686494232e-06, "loss": 0.24812597036361694, "step": 3297 }, { "epoch": 1.6296799703447422, "grad_norm": 1.2132930880582795, "learning_rate": 1.8242621362452939e-06, "loss": 0.234031543135643, "step": 3298 }, { "epoch": 1.6301742246385764, "grad_norm": 1.251471335677166, "learning_rate": 1.8195600440021377e-06, "loss": 0.22455371916294098, "step": 3299 }, { "epoch": 1.6306684789324106, "grad_norm": 1.2488121980955387, "learning_rate": 1.8148634129038113e-06, "loss": 0.22605910897254944, "step": 3300 }, { "epoch": 1.6311627332262448, "grad_norm": 1.3484568896035969, "learning_rate": 1.8101722460857184e-06, "loss": 0.2527684271335602, "step": 3301 }, { "epoch": 1.631656987520079, "grad_norm": 1.355365003110194, "learning_rate": 1.8054865466796167e-06, "loss": 0.24625766277313232, "step": 3302 }, { "epoch": 1.6321512418139132, "grad_norm": 1.7061568076136007, "learning_rate": 1.8008063178136125e-06, "loss": 0.31236231327056885, "step": 3303 }, { "epoch": 1.6326454961077475, "grad_norm": 1.3433390649211776, "learning_rate": 1.7961315626121566e-06, "loss": 0.21256005764007568, "step": 3304 }, { "epoch": 1.6331397504015817, "grad_norm": 1.5039173087965194, "learning_rate": 1.7914622841960482e-06, "loss": 0.25238949060440063, "step": 3305 }, { "epoch": 1.6336340046954159, "grad_norm": 1.3709723014330413, "learning_rate": 1.7867984856824382e-06, "loss": 0.29630619287490845, "step": 3306 }, { "epoch": 1.63412825898925, "grad_norm": 1.3891654533842075, "learning_rate": 1.782140170184804e-06, "loss": 0.26159363985061646, "step": 3307 }, { "epoch": 1.6346225132830843, "grad_norm": 1.2884457367333761, "learning_rate": 1.7774873408129733e-06, "loss": 0.22361448407173157, "step": 3308 }, { "epoch": 1.6351167675769185, "grad_norm": 1.410142665529872, "learning_rate": 1.7728400006731083e-06, "loss": 0.23890942335128784, "step": 3309 }, { "epoch": 1.6356110218707525, "grad_norm": 1.3147986477314286, "learning_rate": 1.7681981528677073e-06, "loss": 0.23067504167556763, "step": 3310 }, { "epoch": 1.6361052761645867, "grad_norm": 1.4202307068972662, "learning_rate": 1.7635618004956012e-06, "loss": 0.24790561199188232, "step": 3311 }, { "epoch": 1.6365995304584209, "grad_norm": 1.3183461895569366, "learning_rate": 1.7589309466519556e-06, "loss": 0.2590476870536804, "step": 3312 }, { "epoch": 1.637093784752255, "grad_norm": 1.4033807602679105, "learning_rate": 1.754305594428254e-06, "loss": 0.26833316683769226, "step": 3313 }, { "epoch": 1.637588039046089, "grad_norm": 1.2949590395956057, "learning_rate": 1.749685746912323e-06, "loss": 0.23390671610832214, "step": 3314 }, { "epoch": 1.6380822933399233, "grad_norm": 1.350070481785481, "learning_rate": 1.7450714071883079e-06, "loss": 0.2760172188282013, "step": 3315 }, { "epoch": 1.6385765476337575, "grad_norm": 1.387338184553767, "learning_rate": 1.7404625783366703e-06, "loss": 0.255672812461853, "step": 3316 }, { "epoch": 1.6390708019275917, "grad_norm": 1.4073549622144716, "learning_rate": 1.7358592634342008e-06, "loss": 0.26336947083473206, "step": 3317 }, { "epoch": 1.639565056221426, "grad_norm": 1.2609217918610456, "learning_rate": 1.7312614655540071e-06, "loss": 0.2308199107646942, "step": 3318 }, { "epoch": 1.64005931051526, "grad_norm": 1.335188741822115, "learning_rate": 1.7266691877655129e-06, "loss": 0.24762676656246185, "step": 3319 }, { "epoch": 1.6405535648090943, "grad_norm": 1.3287358421539026, "learning_rate": 1.7220824331344577e-06, "loss": 0.2175157219171524, "step": 3320 }, { "epoch": 1.6410478191029285, "grad_norm": 1.3635707435478155, "learning_rate": 1.7175012047228956e-06, "loss": 0.24319039285182953, "step": 3321 }, { "epoch": 1.6415420733967627, "grad_norm": 1.2272267263054326, "learning_rate": 1.7129255055891813e-06, "loss": 0.21708521246910095, "step": 3322 }, { "epoch": 1.642036327690597, "grad_norm": 1.4404881849035673, "learning_rate": 1.7083553387879969e-06, "loss": 0.28576910495758057, "step": 3323 }, { "epoch": 1.6425305819844311, "grad_norm": 1.3120467826579518, "learning_rate": 1.703790707370313e-06, "loss": 0.2664312720298767, "step": 3324 }, { "epoch": 1.6430248362782651, "grad_norm": 1.5950926505285568, "learning_rate": 1.6992316143834142e-06, "loss": 0.23930951952934265, "step": 3325 }, { "epoch": 1.6435190905720993, "grad_norm": 1.3985303284465023, "learning_rate": 1.694678062870886e-06, "loss": 0.2741955518722534, "step": 3326 }, { "epoch": 1.6440133448659335, "grad_norm": 1.2830935776841221, "learning_rate": 1.6901300558726142e-06, "loss": 0.25177690386772156, "step": 3327 }, { "epoch": 1.6445075991597677, "grad_norm": 1.4111945712412088, "learning_rate": 1.6855875964247837e-06, "loss": 0.26517611742019653, "step": 3328 }, { "epoch": 1.6450018534536017, "grad_norm": 1.227994601145186, "learning_rate": 1.6810506875598776e-06, "loss": 0.2294573187828064, "step": 3329 }, { "epoch": 1.645496107747436, "grad_norm": 1.3101987526620804, "learning_rate": 1.6765193323066653e-06, "loss": 0.23062998056411743, "step": 3330 }, { "epoch": 1.6459903620412701, "grad_norm": 1.4687005380243534, "learning_rate": 1.6719935336902205e-06, "loss": 0.3047422468662262, "step": 3331 }, { "epoch": 1.6464846163351043, "grad_norm": 1.4214345840675306, "learning_rate": 1.6674732947319017e-06, "loss": 0.2715694308280945, "step": 3332 }, { "epoch": 1.6469788706289386, "grad_norm": 1.3486732362780178, "learning_rate": 1.6629586184493519e-06, "loss": 0.20359721779823303, "step": 3333 }, { "epoch": 1.6474731249227728, "grad_norm": 1.256842666883273, "learning_rate": 1.6584495078565045e-06, "loss": 0.20083262026309967, "step": 3334 }, { "epoch": 1.647967379216607, "grad_norm": 1.2824441486710174, "learning_rate": 1.6539459659635848e-06, "loss": 0.2274707555770874, "step": 3335 }, { "epoch": 1.6484616335104412, "grad_norm": 1.4170790489583633, "learning_rate": 1.6494479957770847e-06, "loss": 0.2654137909412384, "step": 3336 }, { "epoch": 1.6489558878042754, "grad_norm": 1.2207871831065553, "learning_rate": 1.644955600299788e-06, "loss": 0.24672716856002808, "step": 3337 }, { "epoch": 1.6494501420981096, "grad_norm": 2.7206661248050494, "learning_rate": 1.640468782530753e-06, "loss": 0.21563802659511566, "step": 3338 }, { "epoch": 1.6499443963919438, "grad_norm": 1.2772497258385302, "learning_rate": 1.6359875454653151e-06, "loss": 0.22986169159412384, "step": 3339 }, { "epoch": 1.650438650685778, "grad_norm": 1.1914212857874291, "learning_rate": 1.6315118920950857e-06, "loss": 0.22981731593608856, "step": 3340 }, { "epoch": 1.650932904979612, "grad_norm": 1.423180347857553, "learning_rate": 1.6270418254079478e-06, "loss": 0.25922536849975586, "step": 3341 }, { "epoch": 1.6514271592734462, "grad_norm": 1.3808711162643625, "learning_rate": 1.6225773483880503e-06, "loss": 0.23273468017578125, "step": 3342 }, { "epoch": 1.6519214135672804, "grad_norm": 1.3019728240659525, "learning_rate": 1.6181184640158165e-06, "loss": 0.22988896071910858, "step": 3343 }, { "epoch": 1.6524156678611144, "grad_norm": 1.3674976753844925, "learning_rate": 1.6136651752679333e-06, "loss": 0.2628646790981293, "step": 3344 }, { "epoch": 1.6529099221549486, "grad_norm": 1.3498513177046836, "learning_rate": 1.6092174851173526e-06, "loss": 0.24670086801052094, "step": 3345 }, { "epoch": 1.6534041764487828, "grad_norm": 1.3175919767027275, "learning_rate": 1.6047753965332902e-06, "loss": 0.27845436334609985, "step": 3346 }, { "epoch": 1.653898430742617, "grad_norm": 1.30200656487082, "learning_rate": 1.6003389124812185e-06, "loss": 0.25297483801841736, "step": 3347 }, { "epoch": 1.6543926850364512, "grad_norm": 1.237195636484559, "learning_rate": 1.595908035922873e-06, "loss": 0.18876859545707703, "step": 3348 }, { "epoch": 1.6548869393302854, "grad_norm": 1.3417621492525376, "learning_rate": 1.591482769816246e-06, "loss": 0.23852673172950745, "step": 3349 }, { "epoch": 1.6553811936241196, "grad_norm": 1.3350614987774176, "learning_rate": 1.587063117115576e-06, "loss": 0.2569701373577118, "step": 3350 }, { "epoch": 1.6558754479179538, "grad_norm": 1.354350083762125, "learning_rate": 1.582649080771359e-06, "loss": 0.29305699467658997, "step": 3351 }, { "epoch": 1.656369702211788, "grad_norm": 1.42534989112271, "learning_rate": 1.5782406637303527e-06, "loss": 0.28942832350730896, "step": 3352 }, { "epoch": 1.6568639565056222, "grad_norm": 1.351062882636418, "learning_rate": 1.5738378689355439e-06, "loss": 0.27491068840026855, "step": 3353 }, { "epoch": 1.6573582107994564, "grad_norm": 1.4736732865815314, "learning_rate": 1.569440699326179e-06, "loss": 0.26730844378471375, "step": 3354 }, { "epoch": 1.6578524650932907, "grad_norm": 1.3194299490413177, "learning_rate": 1.5650491578377458e-06, "loss": 0.23610982298851013, "step": 3355 }, { "epoch": 1.6583467193871246, "grad_norm": 1.5894671595119023, "learning_rate": 1.5606632474019734e-06, "loss": 0.26817262172698975, "step": 3356 }, { "epoch": 1.6588409736809588, "grad_norm": 1.4847304906222882, "learning_rate": 1.556282970946833e-06, "loss": 0.2403341382741928, "step": 3357 }, { "epoch": 1.659335227974793, "grad_norm": 1.4109665373138245, "learning_rate": 1.5519083313965378e-06, "loss": 0.24433058500289917, "step": 3358 }, { "epoch": 1.6598294822686273, "grad_norm": 1.2685951523616033, "learning_rate": 1.5475393316715282e-06, "loss": 0.2526702582836151, "step": 3359 }, { "epoch": 1.6603237365624612, "grad_norm": 1.3373930264060108, "learning_rate": 1.543175974688491e-06, "loss": 0.24032334983348846, "step": 3360 }, { "epoch": 1.6608179908562954, "grad_norm": 1.3759465001084996, "learning_rate": 1.5388182633603433e-06, "loss": 0.27770349383354187, "step": 3361 }, { "epoch": 1.6613122451501297, "grad_norm": 1.5590715119269358, "learning_rate": 1.534466200596224e-06, "loss": 0.26002752780914307, "step": 3362 }, { "epoch": 1.6618064994439639, "grad_norm": 1.867324678142589, "learning_rate": 1.5301197893015129e-06, "loss": 0.2707037329673767, "step": 3363 }, { "epoch": 1.662300753737798, "grad_norm": 1.3300911116600942, "learning_rate": 1.52577903237781e-06, "loss": 0.27249252796173096, "step": 3364 }, { "epoch": 1.6627950080316323, "grad_norm": 1.341030721831506, "learning_rate": 1.5214439327229425e-06, "loss": 0.22495020925998688, "step": 3365 }, { "epoch": 1.6632892623254665, "grad_norm": 1.4580410293752506, "learning_rate": 1.5171144932309622e-06, "loss": 0.23561973869800568, "step": 3366 }, { "epoch": 1.6637835166193007, "grad_norm": 1.4580927261417298, "learning_rate": 1.512790716792143e-06, "loss": 0.2689869701862335, "step": 3367 }, { "epoch": 1.664277770913135, "grad_norm": 1.2734577307213573, "learning_rate": 1.5084726062929688e-06, "loss": 0.22249455749988556, "step": 3368 }, { "epoch": 1.664772025206969, "grad_norm": 1.444110335390912, "learning_rate": 1.5041601646161585e-06, "loss": 0.24586130678653717, "step": 3369 }, { "epoch": 1.6652662795008033, "grad_norm": 1.3250583547488792, "learning_rate": 1.499853394640629e-06, "loss": 0.2549409568309784, "step": 3370 }, { "epoch": 1.6657605337946373, "grad_norm": 1.4135792596464256, "learning_rate": 1.4955522992415206e-06, "loss": 0.2517774999141693, "step": 3371 }, { "epoch": 1.6662547880884715, "grad_norm": 1.6132674993246225, "learning_rate": 1.491256881290184e-06, "loss": 0.2627662420272827, "step": 3372 }, { "epoch": 1.6667490423823057, "grad_norm": 1.250156659660365, "learning_rate": 1.4869671436541788e-06, "loss": 0.25203272700309753, "step": 3373 }, { "epoch": 1.66724329667614, "grad_norm": 1.3035778741812132, "learning_rate": 1.482683089197271e-06, "loss": 0.2206164300441742, "step": 3374 }, { "epoch": 1.667737550969974, "grad_norm": 1.4034071560123977, "learning_rate": 1.4784047207794383e-06, "loss": 0.2551203966140747, "step": 3375 }, { "epoch": 1.668231805263808, "grad_norm": 1.4247468939554981, "learning_rate": 1.4741320412568505e-06, "loss": 0.2592264711856842, "step": 3376 }, { "epoch": 1.6687260595576423, "grad_norm": 1.3609833066581156, "learning_rate": 1.4698650534818936e-06, "loss": 0.25902658700942993, "step": 3377 }, { "epoch": 1.6692203138514765, "grad_norm": 1.5283083080675575, "learning_rate": 1.4656037603031491e-06, "loss": 0.2685459852218628, "step": 3378 }, { "epoch": 1.6697145681453107, "grad_norm": 1.2083368696295387, "learning_rate": 1.4613481645653914e-06, "loss": 0.21010839939117432, "step": 3379 }, { "epoch": 1.670208822439145, "grad_norm": 1.3019618254178054, "learning_rate": 1.4570982691095925e-06, "loss": 0.23318082094192505, "step": 3380 }, { "epoch": 1.6707030767329791, "grad_norm": 1.346937478273973, "learning_rate": 1.4528540767729315e-06, "loss": 0.25045326352119446, "step": 3381 }, { "epoch": 1.6711973310268133, "grad_norm": 1.5157571774504706, "learning_rate": 1.4486155903887623e-06, "loss": 0.2436288446187973, "step": 3382 }, { "epoch": 1.6716915853206475, "grad_norm": 1.2766580343897052, "learning_rate": 1.444382812786641e-06, "loss": 0.20454761385917664, "step": 3383 }, { "epoch": 1.6721858396144818, "grad_norm": 1.3207693230256567, "learning_rate": 1.4401557467923089e-06, "loss": 0.24906963109970093, "step": 3384 }, { "epoch": 1.672680093908316, "grad_norm": 1.3391460516330347, "learning_rate": 1.435934395227695e-06, "loss": 0.2552015483379364, "step": 3385 }, { "epoch": 1.6731743482021502, "grad_norm": 1.3523733680416914, "learning_rate": 1.4317187609109129e-06, "loss": 0.2393915057182312, "step": 3386 }, { "epoch": 1.6736686024959841, "grad_norm": 1.370539563215592, "learning_rate": 1.4275088466562625e-06, "loss": 0.2607477009296417, "step": 3387 }, { "epoch": 1.6741628567898184, "grad_norm": 1.3296614147148798, "learning_rate": 1.423304655274218e-06, "loss": 0.23722632229328156, "step": 3388 }, { "epoch": 1.6746571110836526, "grad_norm": 1.303256653854929, "learning_rate": 1.4191061895714398e-06, "loss": 0.2614964246749878, "step": 3389 }, { "epoch": 1.6751513653774868, "grad_norm": 1.476448410559568, "learning_rate": 1.4149134523507634e-06, "loss": 0.2727823555469513, "step": 3390 }, { "epoch": 1.6756456196713208, "grad_norm": 1.2739771939884463, "learning_rate": 1.4107264464112003e-06, "loss": 0.25176581740379333, "step": 3391 }, { "epoch": 1.676139873965155, "grad_norm": 1.3087240197668597, "learning_rate": 1.4065451745479352e-06, "loss": 0.21339070796966553, "step": 3392 }, { "epoch": 1.6766341282589892, "grad_norm": 1.449069234603101, "learning_rate": 1.4023696395523267e-06, "loss": 0.26540419459342957, "step": 3393 }, { "epoch": 1.6771283825528234, "grad_norm": 1.3788929945945605, "learning_rate": 1.3981998442119017e-06, "loss": 0.2621360421180725, "step": 3394 }, { "epoch": 1.6776226368466576, "grad_norm": 1.3149158272362809, "learning_rate": 1.3940357913103576e-06, "loss": 0.2578747570514679, "step": 3395 }, { "epoch": 1.6781168911404918, "grad_norm": 1.3223117210430684, "learning_rate": 1.3898774836275531e-06, "loss": 0.26105010509490967, "step": 3396 }, { "epoch": 1.678611145434326, "grad_norm": 1.277709690267506, "learning_rate": 1.3857249239395143e-06, "loss": 0.2221919298171997, "step": 3397 }, { "epoch": 1.6791053997281602, "grad_norm": 1.3742911888899896, "learning_rate": 1.3815781150184382e-06, "loss": 0.2498932033777237, "step": 3398 }, { "epoch": 1.6795996540219944, "grad_norm": 1.3631278461436225, "learning_rate": 1.377437059632668e-06, "loss": 0.29306796193122864, "step": 3399 }, { "epoch": 1.6800939083158286, "grad_norm": 1.41106483401144, "learning_rate": 1.3733017605467158e-06, "loss": 0.23804892599582672, "step": 3400 }, { "epoch": 1.6805881626096628, "grad_norm": 1.264388446305106, "learning_rate": 1.3691722205212465e-06, "loss": 0.18528425693511963, "step": 3401 }, { "epoch": 1.6810824169034968, "grad_norm": 1.434400904695952, "learning_rate": 1.365048442313085e-06, "loss": 0.257534921169281, "step": 3402 }, { "epoch": 1.681576671197331, "grad_norm": 1.390183210111369, "learning_rate": 1.3609304286752034e-06, "loss": 0.2519993782043457, "step": 3403 }, { "epoch": 1.6820709254911652, "grad_norm": 1.5041703905686798, "learning_rate": 1.3568181823567328e-06, "loss": 0.27830445766448975, "step": 3404 }, { "epoch": 1.6825651797849994, "grad_norm": 1.3496130761993563, "learning_rate": 1.3527117061029438e-06, "loss": 0.22532883286476135, "step": 3405 }, { "epoch": 1.6830594340788334, "grad_norm": 1.3484913124474047, "learning_rate": 1.3486110026552668e-06, "loss": 0.23230011761188507, "step": 3406 }, { "epoch": 1.6835536883726676, "grad_norm": 1.320791018685261, "learning_rate": 1.3445160747512743e-06, "loss": 0.24105653166770935, "step": 3407 }, { "epoch": 1.6840479426665018, "grad_norm": 1.5077644423875391, "learning_rate": 1.340426925124676e-06, "loss": 0.2946394681930542, "step": 3408 }, { "epoch": 1.684542196960336, "grad_norm": 1.403422513607122, "learning_rate": 1.3363435565053319e-06, "loss": 0.2682989239692688, "step": 3409 }, { "epoch": 1.6850364512541702, "grad_norm": 1.3363195283881322, "learning_rate": 1.332265971619241e-06, "loss": 0.2219456285238266, "step": 3410 }, { "epoch": 1.6855307055480044, "grad_norm": 1.2440577869208935, "learning_rate": 1.3281941731885396e-06, "loss": 0.22532151639461517, "step": 3411 }, { "epoch": 1.6860249598418386, "grad_norm": 1.3951142777226702, "learning_rate": 1.324128163931504e-06, "loss": 0.24166807532310486, "step": 3412 }, { "epoch": 1.6865192141356729, "grad_norm": 1.8803758040895027, "learning_rate": 1.3200679465625453e-06, "loss": 0.25514671206474304, "step": 3413 }, { "epoch": 1.687013468429507, "grad_norm": 1.4161288294493581, "learning_rate": 1.3160135237922011e-06, "loss": 0.263123482465744, "step": 3414 }, { "epoch": 1.6875077227233413, "grad_norm": 1.3692510048196695, "learning_rate": 1.3119648983271527e-06, "loss": 0.23763976991176605, "step": 3415 }, { "epoch": 1.6880019770171755, "grad_norm": 1.4514594135261416, "learning_rate": 1.3079220728701991e-06, "loss": 0.28645598888397217, "step": 3416 }, { "epoch": 1.6884962313110097, "grad_norm": 1.3145652794970974, "learning_rate": 1.303885050120275e-06, "loss": 0.2269624024629593, "step": 3417 }, { "epoch": 1.6889904856048437, "grad_norm": 1.2380861054344243, "learning_rate": 1.2998538327724386e-06, "loss": 0.23601466417312622, "step": 3418 }, { "epoch": 1.6894847398986779, "grad_norm": 1.4253359182592056, "learning_rate": 1.2958284235178743e-06, "loss": 0.2246169149875641, "step": 3419 }, { "epoch": 1.689978994192512, "grad_norm": 1.497489718348998, "learning_rate": 1.2918088250438865e-06, "loss": 0.26519715785980225, "step": 3420 }, { "epoch": 1.6904732484863463, "grad_norm": 1.443915314302877, "learning_rate": 1.2877950400339046e-06, "loss": 0.2590267062187195, "step": 3421 }, { "epoch": 1.6909675027801803, "grad_norm": 1.3941822393799335, "learning_rate": 1.2837870711674672e-06, "loss": 0.2535945773124695, "step": 3422 }, { "epoch": 1.6914617570740145, "grad_norm": 1.3833358145204437, "learning_rate": 1.279784921120244e-06, "loss": 0.21907874941825867, "step": 3423 }, { "epoch": 1.6919560113678487, "grad_norm": 1.3775789573220893, "learning_rate": 1.2757885925640124e-06, "loss": 0.23314553499221802, "step": 3424 }, { "epoch": 1.6924502656616829, "grad_norm": 1.2335650824399806, "learning_rate": 1.2717980881666615e-06, "loss": 0.2288433313369751, "step": 3425 }, { "epoch": 1.692944519955517, "grad_norm": 1.3218922014839134, "learning_rate": 1.2678134105921924e-06, "loss": 0.2285449206829071, "step": 3426 }, { "epoch": 1.6934387742493513, "grad_norm": 1.4061495134031399, "learning_rate": 1.2638345625007287e-06, "loss": 0.2898653447628021, "step": 3427 }, { "epoch": 1.6939330285431855, "grad_norm": 1.3140964049835469, "learning_rate": 1.2598615465484831e-06, "loss": 0.23574519157409668, "step": 3428 }, { "epoch": 1.6944272828370197, "grad_norm": 1.8163323929078987, "learning_rate": 1.2558943653877887e-06, "loss": 0.23385417461395264, "step": 3429 }, { "epoch": 1.694921537130854, "grad_norm": 1.4332956021988026, "learning_rate": 1.2519330216670766e-06, "loss": 0.2555482089519501, "step": 3430 }, { "epoch": 1.6954157914246881, "grad_norm": 1.3005186125236943, "learning_rate": 1.247977518030885e-06, "loss": 0.22221535444259644, "step": 3431 }, { "epoch": 1.6959100457185223, "grad_norm": 1.2645213358789251, "learning_rate": 1.2440278571198516e-06, "loss": 0.21753090620040894, "step": 3432 }, { "epoch": 1.6964043000123563, "grad_norm": 1.3199124302473737, "learning_rate": 1.240084041570716e-06, "loss": 0.2352944314479828, "step": 3433 }, { "epoch": 1.6968985543061905, "grad_norm": 1.3019158889354874, "learning_rate": 1.2361460740163045e-06, "loss": 0.22581814229488373, "step": 3434 }, { "epoch": 1.6973928086000247, "grad_norm": 1.5051457985045136, "learning_rate": 1.2322139570855596e-06, "loss": 0.28703421354293823, "step": 3435 }, { "epoch": 1.697887062893859, "grad_norm": 1.2466294121854475, "learning_rate": 1.2282876934034972e-06, "loss": 0.21528789401054382, "step": 3436 }, { "epoch": 1.698381317187693, "grad_norm": 1.3714652202926056, "learning_rate": 1.2243672855912393e-06, "loss": 0.2675422430038452, "step": 3437 }, { "epoch": 1.6988755714815271, "grad_norm": 1.4468798550658835, "learning_rate": 1.2204527362659913e-06, "loss": 0.26681527495384216, "step": 3438 }, { "epoch": 1.6993698257753613, "grad_norm": 1.6692863707132455, "learning_rate": 1.216544048041054e-06, "loss": 0.2436470091342926, "step": 3439 }, { "epoch": 1.6998640800691955, "grad_norm": 1.3471564011899657, "learning_rate": 1.212641223525809e-06, "loss": 0.25458425283432007, "step": 3440 }, { "epoch": 1.7003583343630297, "grad_norm": 1.5076141037655715, "learning_rate": 1.2087442653257286e-06, "loss": 0.24890559911727905, "step": 3441 }, { "epoch": 1.700852588656864, "grad_norm": 1.2935321774740525, "learning_rate": 1.2048531760423642e-06, "loss": 0.26031816005706787, "step": 3442 }, { "epoch": 1.7013468429506982, "grad_norm": 1.2852726465517723, "learning_rate": 1.200967958273349e-06, "loss": 0.22184975445270538, "step": 3443 }, { "epoch": 1.7018410972445324, "grad_norm": 1.4055101079653758, "learning_rate": 1.1970886146124073e-06, "loss": 0.2670953571796417, "step": 3444 }, { "epoch": 1.7023353515383666, "grad_norm": 1.4509425159233789, "learning_rate": 1.1932151476493247e-06, "loss": 0.27950525283813477, "step": 3445 }, { "epoch": 1.7028296058322008, "grad_norm": 1.177838308027136, "learning_rate": 1.1893475599699766e-06, "loss": 0.23257380723953247, "step": 3446 }, { "epoch": 1.703323860126035, "grad_norm": 1.33833163811184, "learning_rate": 1.1854858541563086e-06, "loss": 0.2586575746536255, "step": 3447 }, { "epoch": 1.703818114419869, "grad_norm": 1.4079485154063143, "learning_rate": 1.1816300327863406e-06, "loss": 0.2677457928657532, "step": 3448 }, { "epoch": 1.7043123687137032, "grad_norm": 1.565618455451115, "learning_rate": 1.1777800984341637e-06, "loss": 0.29866284132003784, "step": 3449 }, { "epoch": 1.7048066230075374, "grad_norm": 1.3858480302164131, "learning_rate": 1.1739360536699397e-06, "loss": 0.27279675006866455, "step": 3450 }, { "epoch": 1.7053008773013716, "grad_norm": 1.4265301971817403, "learning_rate": 1.1700979010598945e-06, "loss": 0.25695672631263733, "step": 3451 }, { "epoch": 1.7057951315952056, "grad_norm": 1.2548676263466874, "learning_rate": 1.1662656431663278e-06, "loss": 0.22578787803649902, "step": 3452 }, { "epoch": 1.7062893858890398, "grad_norm": 1.2884557931863843, "learning_rate": 1.1624392825476016e-06, "loss": 0.1946491301059723, "step": 3453 }, { "epoch": 1.706783640182874, "grad_norm": 1.7214838792794764, "learning_rate": 1.158618821758134e-06, "loss": 0.2099667191505432, "step": 3454 }, { "epoch": 1.7072778944767082, "grad_norm": 1.3956932051100446, "learning_rate": 1.1548042633484148e-06, "loss": 0.22660428285598755, "step": 3455 }, { "epoch": 1.7077721487705424, "grad_norm": 1.486801447510752, "learning_rate": 1.1509956098649855e-06, "loss": 0.27378255128860474, "step": 3456 }, { "epoch": 1.7082664030643766, "grad_norm": 1.3265929348116055, "learning_rate": 1.1471928638504504e-06, "loss": 0.2209164947271347, "step": 3457 }, { "epoch": 1.7087606573582108, "grad_norm": 1.4225246621575494, "learning_rate": 1.1433960278434687e-06, "loss": 0.24310322105884552, "step": 3458 }, { "epoch": 1.709254911652045, "grad_norm": 1.408175906725771, "learning_rate": 1.1396051043787526e-06, "loss": 0.23209068179130554, "step": 3459 }, { "epoch": 1.7097491659458792, "grad_norm": 1.3815567972930465, "learning_rate": 1.1358200959870703e-06, "loss": 0.2514454126358032, "step": 3460 }, { "epoch": 1.7102434202397134, "grad_norm": 1.4417631759146625, "learning_rate": 1.132041005195239e-06, "loss": 0.2580721378326416, "step": 3461 }, { "epoch": 1.7107376745335476, "grad_norm": 1.3709268368925525, "learning_rate": 1.1282678345261234e-06, "loss": 0.26388949155807495, "step": 3462 }, { "epoch": 1.7112319288273818, "grad_norm": 1.2783952905855267, "learning_rate": 1.1245005864986402e-06, "loss": 0.2194654643535614, "step": 3463 }, { "epoch": 1.7117261831212158, "grad_norm": 1.2633121407835717, "learning_rate": 1.1207392636277502e-06, "loss": 0.2048814296722412, "step": 3464 }, { "epoch": 1.71222043741505, "grad_norm": 1.33926020269927, "learning_rate": 1.1169838684244584e-06, "loss": 0.24165832996368408, "step": 3465 }, { "epoch": 1.7127146917088842, "grad_norm": 1.3906329052137327, "learning_rate": 1.1132344033958132e-06, "loss": 0.2484482377767563, "step": 3466 }, { "epoch": 1.7132089460027184, "grad_norm": 1.4564028814853938, "learning_rate": 1.1094908710449048e-06, "loss": 0.2406741827726364, "step": 3467 }, { "epoch": 1.7137032002965524, "grad_norm": 1.4018531611252434, "learning_rate": 1.1057532738708588e-06, "loss": 0.2417721152305603, "step": 3468 }, { "epoch": 1.7141974545903866, "grad_norm": 1.4560734194910743, "learning_rate": 1.1020216143688446e-06, "loss": 0.26304543018341064, "step": 3469 }, { "epoch": 1.7146917088842208, "grad_norm": 1.476031518585943, "learning_rate": 1.098295895030066e-06, "loss": 0.30013689398765564, "step": 3470 }, { "epoch": 1.715185963178055, "grad_norm": 1.3175345714713855, "learning_rate": 1.0945761183417569e-06, "loss": 0.21451817452907562, "step": 3471 }, { "epoch": 1.7156802174718893, "grad_norm": 1.3300365419760627, "learning_rate": 1.0908622867871854e-06, "loss": 0.235377699136734, "step": 3472 }, { "epoch": 1.7161744717657235, "grad_norm": 1.2866674867130445, "learning_rate": 1.0871544028456594e-06, "loss": 0.23560425639152527, "step": 3473 }, { "epoch": 1.7166687260595577, "grad_norm": 1.3385949926310057, "learning_rate": 1.083452468992503e-06, "loss": 0.2431229054927826, "step": 3474 }, { "epoch": 1.7171629803533919, "grad_norm": 1.2089508133597444, "learning_rate": 1.0797564876990762e-06, "loss": 0.211553692817688, "step": 3475 }, { "epoch": 1.717657234647226, "grad_norm": 1.3533177183735723, "learning_rate": 1.0760664614327643e-06, "loss": 0.23565953969955444, "step": 3476 }, { "epoch": 1.7181514889410603, "grad_norm": 1.328162178864468, "learning_rate": 1.0723823926569744e-06, "loss": 0.2052966058254242, "step": 3477 }, { "epoch": 1.7186457432348945, "grad_norm": 1.3067945675468369, "learning_rate": 1.06870428383114e-06, "loss": 0.24831204116344452, "step": 3478 }, { "epoch": 1.7191399975287285, "grad_norm": 1.273169118321956, "learning_rate": 1.0650321374107142e-06, "loss": 0.24706462025642395, "step": 3479 }, { "epoch": 1.7196342518225627, "grad_norm": 1.4211234189057285, "learning_rate": 1.0613659558471644e-06, "loss": 0.20845818519592285, "step": 3480 }, { "epoch": 1.720128506116397, "grad_norm": 1.2323642708024432, "learning_rate": 1.0577057415879887e-06, "loss": 0.21599797904491425, "step": 3481 }, { "epoch": 1.720622760410231, "grad_norm": 1.4618240857831881, "learning_rate": 1.054051497076689e-06, "loss": 0.2381049394607544, "step": 3482 }, { "epoch": 1.721117014704065, "grad_norm": 1.3155008449637104, "learning_rate": 1.0504032247527874e-06, "loss": 0.22402817010879517, "step": 3483 }, { "epoch": 1.7216112689978993, "grad_norm": 1.5409902580545625, "learning_rate": 1.0467609270518186e-06, "loss": 0.24406251311302185, "step": 3484 }, { "epoch": 1.7221055232917335, "grad_norm": 1.339222294791023, "learning_rate": 1.0431246064053291e-06, "loss": 0.24388936161994934, "step": 3485 }, { "epoch": 1.7225997775855677, "grad_norm": 1.3265412686691833, "learning_rate": 1.0394942652408735e-06, "loss": 0.26131671667099, "step": 3486 }, { "epoch": 1.723094031879402, "grad_norm": 1.3718768259485188, "learning_rate": 1.0358699059820188e-06, "loss": 0.247392475605011, "step": 3487 }, { "epoch": 1.7235882861732361, "grad_norm": 1.335920284358623, "learning_rate": 1.0322515310483316e-06, "loss": 0.22713768482208252, "step": 3488 }, { "epoch": 1.7240825404670703, "grad_norm": 1.3821197244420464, "learning_rate": 1.0286391428553854e-06, "loss": 0.2544357180595398, "step": 3489 }, { "epoch": 1.7245767947609045, "grad_norm": 1.260460911336476, "learning_rate": 1.0250327438147678e-06, "loss": 0.23186656832695007, "step": 3490 }, { "epoch": 1.7250710490547387, "grad_norm": 1.1804266448755296, "learning_rate": 1.0214323363340506e-06, "loss": 0.20387035608291626, "step": 3491 }, { "epoch": 1.725565303348573, "grad_norm": 1.4265943405789598, "learning_rate": 1.017837922816819e-06, "loss": 0.25391846895217896, "step": 3492 }, { "epoch": 1.7260595576424071, "grad_norm": 1.2603447890118837, "learning_rate": 1.014249505662649e-06, "loss": 0.23214812576770782, "step": 3493 }, { "epoch": 1.7265538119362414, "grad_norm": 1.5899981641866812, "learning_rate": 1.0106670872671187e-06, "loss": 0.31888365745544434, "step": 3494 }, { "epoch": 1.7270480662300753, "grad_norm": 1.2907611357867346, "learning_rate": 1.0070906700217998e-06, "loss": 0.23372362554073334, "step": 3495 }, { "epoch": 1.7275423205239095, "grad_norm": 1.2449017093435057, "learning_rate": 1.0035202563142577e-06, "loss": 0.20082907378673553, "step": 3496 }, { "epoch": 1.7280365748177438, "grad_norm": 1.3171397747083256, "learning_rate": 9.99955848528046e-07, "loss": 0.23895825445652008, "step": 3497 }, { "epoch": 1.728530829111578, "grad_norm": 1.4142591511055072, "learning_rate": 9.963974490427153e-07, "loss": 0.30089694261550903, "step": 3498 }, { "epoch": 1.729025083405412, "grad_norm": 1.4071492496267155, "learning_rate": 9.928450602338046e-07, "loss": 0.28134891390800476, "step": 3499 }, { "epoch": 1.7295193376992462, "grad_norm": 1.239666390023503, "learning_rate": 9.892986844728325e-07, "loss": 0.1947125792503357, "step": 3500 }, { "epoch": 1.7300135919930804, "grad_norm": 1.2560350647671819, "learning_rate": 9.857583241273116e-07, "loss": 0.252549409866333, "step": 3501 }, { "epoch": 1.7305078462869146, "grad_norm": 1.8080125735095465, "learning_rate": 9.82223981560736e-07, "loss": 0.28061211109161377, "step": 3502 }, { "epoch": 1.7310021005807488, "grad_norm": 1.3465400182463805, "learning_rate": 9.786956591325813e-07, "loss": 0.2492327094078064, "step": 3503 }, { "epoch": 1.731496354874583, "grad_norm": 1.3114105920039891, "learning_rate": 9.75173359198307e-07, "loss": 0.20470373332500458, "step": 3504 }, { "epoch": 1.7319906091684172, "grad_norm": 1.4582343704980485, "learning_rate": 9.716570841093476e-07, "loss": 0.24190351366996765, "step": 3505 }, { "epoch": 1.7324848634622514, "grad_norm": 1.3916465638756335, "learning_rate": 9.681468362131209e-07, "loss": 0.28784725069999695, "step": 3506 }, { "epoch": 1.7329791177560856, "grad_norm": 1.4872057430892556, "learning_rate": 9.646426178530176e-07, "loss": 0.2676560878753662, "step": 3507 }, { "epoch": 1.7334733720499198, "grad_norm": 1.4118374661566944, "learning_rate": 9.611444313684027e-07, "loss": 0.2493928223848343, "step": 3508 }, { "epoch": 1.733967626343754, "grad_norm": 1.272854491876895, "learning_rate": 9.57652279094613e-07, "loss": 0.23272472620010376, "step": 3509 }, { "epoch": 1.734461880637588, "grad_norm": 1.3295460481124186, "learning_rate": 9.541661633629662e-07, "loss": 0.23245804011821747, "step": 3510 }, { "epoch": 1.7349561349314222, "grad_norm": 1.318916212284511, "learning_rate": 9.506860865007373e-07, "loss": 0.22367024421691895, "step": 3511 }, { "epoch": 1.7354503892252564, "grad_norm": 1.312738075120818, "learning_rate": 9.472120508311788e-07, "loss": 0.22332677245140076, "step": 3512 }, { "epoch": 1.7359446435190906, "grad_norm": 1.3669711817276102, "learning_rate": 9.437440586735081e-07, "loss": 0.28051453828811646, "step": 3513 }, { "epoch": 1.7364388978129246, "grad_norm": 1.5089189064457602, "learning_rate": 9.402821123429017e-07, "loss": 0.24815741181373596, "step": 3514 }, { "epoch": 1.7369331521067588, "grad_norm": 1.338757796188803, "learning_rate": 9.368262141505114e-07, "loss": 0.24077603220939636, "step": 3515 }, { "epoch": 1.737427406400593, "grad_norm": 1.419717776508751, "learning_rate": 9.333763664034457e-07, "loss": 0.24596062302589417, "step": 3516 }, { "epoch": 1.7379216606944272, "grad_norm": 1.3347588363810814, "learning_rate": 9.299325714047702e-07, "loss": 0.22939634323120117, "step": 3517 }, { "epoch": 1.7384159149882614, "grad_norm": 1.446015117761441, "learning_rate": 9.264948314535116e-07, "loss": 0.24870653450489044, "step": 3518 }, { "epoch": 1.7389101692820956, "grad_norm": 1.2985600743859553, "learning_rate": 9.23063148844664e-07, "loss": 0.24589623510837555, "step": 3519 }, { "epoch": 1.7394044235759298, "grad_norm": 1.3138002527909343, "learning_rate": 9.196375258691615e-07, "loss": 0.24228474497795105, "step": 3520 }, { "epoch": 1.739898677869764, "grad_norm": 1.274631487561465, "learning_rate": 9.162179648139047e-07, "loss": 0.24371150135993958, "step": 3521 }, { "epoch": 1.7403929321635982, "grad_norm": 1.279720023026326, "learning_rate": 9.128044679617432e-07, "loss": 0.24775750935077667, "step": 3522 }, { "epoch": 1.7408871864574325, "grad_norm": 1.5257492514284694, "learning_rate": 9.093970375914784e-07, "loss": 0.2893243432044983, "step": 3523 }, { "epoch": 1.7413814407512667, "grad_norm": 1.300861064044251, "learning_rate": 9.059956759778632e-07, "loss": 0.24014830589294434, "step": 3524 }, { "epoch": 1.7418756950451009, "grad_norm": 1.399249837900177, "learning_rate": 9.026003853915977e-07, "loss": 0.21439003944396973, "step": 3525 }, { "epoch": 1.7423699493389349, "grad_norm": 1.3253623378225632, "learning_rate": 8.992111680993265e-07, "loss": 0.23376847803592682, "step": 3526 }, { "epoch": 1.742864203632769, "grad_norm": 1.3914877634645069, "learning_rate": 8.958280263636487e-07, "loss": 0.244795560836792, "step": 3527 }, { "epoch": 1.7433584579266033, "grad_norm": 1.3847661327530765, "learning_rate": 8.924509624430955e-07, "loss": 0.2513751685619354, "step": 3528 }, { "epoch": 1.7438527122204373, "grad_norm": 1.3808839230401615, "learning_rate": 8.890799785921478e-07, "loss": 0.2118893414735794, "step": 3529 }, { "epoch": 1.7443469665142715, "grad_norm": 1.4606627623109902, "learning_rate": 8.857150770612288e-07, "loss": 0.2834109365940094, "step": 3530 }, { "epoch": 1.7448412208081057, "grad_norm": 1.3959930901293698, "learning_rate": 8.823562600966962e-07, "loss": 0.2546151876449585, "step": 3531 }, { "epoch": 1.7453354751019399, "grad_norm": 1.3410984246991777, "learning_rate": 8.790035299408494e-07, "loss": 0.2654607594013214, "step": 3532 }, { "epoch": 1.745829729395774, "grad_norm": 1.4773453802832905, "learning_rate": 8.756568888319239e-07, "loss": 0.2720295786857605, "step": 3533 }, { "epoch": 1.7463239836896083, "grad_norm": 1.3341271298777078, "learning_rate": 8.723163390040856e-07, "loss": 0.22259725630283356, "step": 3534 }, { "epoch": 1.7468182379834425, "grad_norm": 1.3952830917524783, "learning_rate": 8.68981882687443e-07, "loss": 0.22918277978897095, "step": 3535 }, { "epoch": 1.7473124922772767, "grad_norm": 1.4553860122555766, "learning_rate": 8.656535221080297e-07, "loss": 0.24396009743213654, "step": 3536 }, { "epoch": 1.747806746571111, "grad_norm": 1.4530449395488945, "learning_rate": 8.623312594878097e-07, "loss": 0.2370900958776474, "step": 3537 }, { "epoch": 1.748301000864945, "grad_norm": 1.4353409191789361, "learning_rate": 8.590150970446798e-07, "loss": 0.2785671055316925, "step": 3538 }, { "epoch": 1.7487952551587793, "grad_norm": 1.3531168663907844, "learning_rate": 8.557050369924624e-07, "loss": 0.29365241527557373, "step": 3539 }, { "epoch": 1.7492895094526135, "grad_norm": 1.3579124483240532, "learning_rate": 8.524010815409068e-07, "loss": 0.24052876234054565, "step": 3540 }, { "epoch": 1.7497837637464475, "grad_norm": 1.5096531715278536, "learning_rate": 8.49103232895685e-07, "loss": 0.23938694596290588, "step": 3541 }, { "epoch": 1.7502780180402817, "grad_norm": 1.2842245856075563, "learning_rate": 8.458114932583961e-07, "loss": 0.2244144231081009, "step": 3542 }, { "epoch": 1.750772272334116, "grad_norm": 1.4659940645429403, "learning_rate": 8.425258648265544e-07, "loss": 0.25028878450393677, "step": 3543 }, { "epoch": 1.7512665266279501, "grad_norm": 1.414718407414415, "learning_rate": 8.39246349793602e-07, "loss": 0.23135274648666382, "step": 3544 }, { "epoch": 1.751760780921784, "grad_norm": 1.3004631081596045, "learning_rate": 8.359729503488967e-07, "loss": 0.23874548077583313, "step": 3545 }, { "epoch": 1.7522550352156183, "grad_norm": 1.4912661633646227, "learning_rate": 8.327056686777102e-07, "loss": 0.2780659794807434, "step": 3546 }, { "epoch": 1.7527492895094525, "grad_norm": 1.3424848463452685, "learning_rate": 8.294445069612356e-07, "loss": 0.213335320353508, "step": 3547 }, { "epoch": 1.7532435438032867, "grad_norm": 1.3764395925344186, "learning_rate": 8.261894673765757e-07, "loss": 0.23284730315208435, "step": 3548 }, { "epoch": 1.753737798097121, "grad_norm": 1.4152912967440003, "learning_rate": 8.229405520967504e-07, "loss": 0.25429633259773254, "step": 3549 }, { "epoch": 1.7542320523909551, "grad_norm": 1.42166486412748, "learning_rate": 8.196977632906877e-07, "loss": 0.2519379258155823, "step": 3550 }, { "epoch": 1.7547263066847893, "grad_norm": 1.3397514660513317, "learning_rate": 8.164611031232283e-07, "loss": 0.2510948181152344, "step": 3551 }, { "epoch": 1.7552205609786236, "grad_norm": 1.4391737307664527, "learning_rate": 8.132305737551193e-07, "loss": 0.27415433526039124, "step": 3552 }, { "epoch": 1.7557148152724578, "grad_norm": 1.4503824956137814, "learning_rate": 8.100061773430179e-07, "loss": 0.26723912358283997, "step": 3553 }, { "epoch": 1.756209069566292, "grad_norm": 1.3305646078685684, "learning_rate": 8.067879160394821e-07, "loss": 0.2710701823234558, "step": 3554 }, { "epoch": 1.7567033238601262, "grad_norm": 1.2981752509304552, "learning_rate": 8.035757919929765e-07, "loss": 0.23247234523296356, "step": 3555 }, { "epoch": 1.7571975781539602, "grad_norm": 1.3788336069912301, "learning_rate": 8.003698073478749e-07, "loss": 0.2514559328556061, "step": 3556 }, { "epoch": 1.7576918324477944, "grad_norm": 1.2669691261364102, "learning_rate": 7.971699642444419e-07, "loss": 0.23549199104309082, "step": 3557 }, { "epoch": 1.7581860867416286, "grad_norm": 1.326325870924157, "learning_rate": 7.939762648188476e-07, "loss": 0.24511446058750153, "step": 3558 }, { "epoch": 1.7586803410354628, "grad_norm": 1.244030857989509, "learning_rate": 7.907887112031609e-07, "loss": 0.18705075979232788, "step": 3559 }, { "epoch": 1.7591745953292968, "grad_norm": 1.3163815425830492, "learning_rate": 7.876073055253474e-07, "loss": 0.24297048151493073, "step": 3560 }, { "epoch": 1.759668849623131, "grad_norm": 1.3886968971610452, "learning_rate": 7.844320499092683e-07, "loss": 0.239119753241539, "step": 3561 }, { "epoch": 1.7601631039169652, "grad_norm": 1.3716161630664097, "learning_rate": 7.81262946474679e-07, "loss": 0.2430122196674347, "step": 3562 }, { "epoch": 1.7606573582107994, "grad_norm": 1.5018987096099226, "learning_rate": 7.78099997337225e-07, "loss": 0.2785049378871918, "step": 3563 }, { "epoch": 1.7611516125046336, "grad_norm": 1.324774124882076, "learning_rate": 7.749432046084471e-07, "loss": 0.2451494038105011, "step": 3564 }, { "epoch": 1.7616458667984678, "grad_norm": 1.2759037312949375, "learning_rate": 7.717925703957785e-07, "loss": 0.20071648061275482, "step": 3565 }, { "epoch": 1.762140121092302, "grad_norm": 1.265455917769001, "learning_rate": 7.686480968025333e-07, "loss": 0.22308245301246643, "step": 3566 }, { "epoch": 1.7626343753861362, "grad_norm": 1.4753453520092665, "learning_rate": 7.655097859279192e-07, "loss": 0.26082009077072144, "step": 3567 }, { "epoch": 1.7631286296799704, "grad_norm": 1.2035646972809244, "learning_rate": 7.623776398670268e-07, "loss": 0.21026611328125, "step": 3568 }, { "epoch": 1.7636228839738046, "grad_norm": 1.3616311603644673, "learning_rate": 7.592516607108324e-07, "loss": 0.23878465592861176, "step": 3569 }, { "epoch": 1.7641171382676388, "grad_norm": 1.4512524044419246, "learning_rate": 7.561318505461956e-07, "loss": 0.30288150906562805, "step": 3570 }, { "epoch": 1.764611392561473, "grad_norm": 1.3464088406966324, "learning_rate": 7.530182114558582e-07, "loss": 0.25749915838241577, "step": 3571 }, { "epoch": 1.765105646855307, "grad_norm": 1.4850779133681176, "learning_rate": 7.499107455184351e-07, "loss": 0.23799163103103638, "step": 3572 }, { "epoch": 1.7655999011491412, "grad_norm": 1.2970926183891958, "learning_rate": 7.46809454808436e-07, "loss": 0.2626670002937317, "step": 3573 }, { "epoch": 1.7660941554429754, "grad_norm": 1.4394447645143165, "learning_rate": 7.437143413962299e-07, "loss": 0.23273026943206787, "step": 3574 }, { "epoch": 1.7665884097368096, "grad_norm": 1.329151714167698, "learning_rate": 7.406254073480735e-07, "loss": 0.22592151165008545, "step": 3575 }, { "epoch": 1.7670826640306436, "grad_norm": 1.4000212660765223, "learning_rate": 7.375426547260944e-07, "loss": 0.2594859004020691, "step": 3576 }, { "epoch": 1.7675769183244778, "grad_norm": 1.2114788921542652, "learning_rate": 7.344660855882946e-07, "loss": 0.2161571979522705, "step": 3577 }, { "epoch": 1.768071172618312, "grad_norm": 1.2669666342048183, "learning_rate": 7.313957019885487e-07, "loss": 0.23052990436553955, "step": 3578 }, { "epoch": 1.7685654269121462, "grad_norm": 1.2921856609362714, "learning_rate": 7.283315059766005e-07, "loss": 0.2309163510799408, "step": 3579 }, { "epoch": 1.7690596812059804, "grad_norm": 1.3800150012724666, "learning_rate": 7.252734995980604e-07, "loss": 0.24543863534927368, "step": 3580 }, { "epoch": 1.7695539354998147, "grad_norm": 1.315509052214176, "learning_rate": 7.22221684894413e-07, "loss": 0.27616050839424133, "step": 3581 }, { "epoch": 1.7700481897936489, "grad_norm": 1.5849292816622715, "learning_rate": 7.191760639030077e-07, "loss": 0.2247719019651413, "step": 3582 }, { "epoch": 1.770542444087483, "grad_norm": 1.3600242028973613, "learning_rate": 7.161366386570545e-07, "loss": 0.28721702098846436, "step": 3583 }, { "epoch": 1.7710366983813173, "grad_norm": 1.3444976293289765, "learning_rate": 7.131034111856294e-07, "loss": 0.24191290140151978, "step": 3584 }, { "epoch": 1.7715309526751515, "grad_norm": 1.3549546462173616, "learning_rate": 7.100763835136748e-07, "loss": 0.24049970507621765, "step": 3585 }, { "epoch": 1.7720252069689857, "grad_norm": 1.4855378384649431, "learning_rate": 7.070555576619887e-07, "loss": 0.255404531955719, "step": 3586 }, { "epoch": 1.7725194612628197, "grad_norm": 1.3672964019576628, "learning_rate": 7.040409356472333e-07, "loss": 0.23041129112243652, "step": 3587 }, { "epoch": 1.7730137155566539, "grad_norm": 1.3790812567511086, "learning_rate": 7.010325194819278e-07, "loss": 0.2589847147464752, "step": 3588 }, { "epoch": 1.773507969850488, "grad_norm": 1.4114272066031652, "learning_rate": 6.980303111744424e-07, "loss": 0.2604563236236572, "step": 3589 }, { "epoch": 1.7740022241443223, "grad_norm": 1.3786249354000182, "learning_rate": 6.950343127290138e-07, "loss": 0.26831385493278503, "step": 3590 }, { "epoch": 1.7744964784381563, "grad_norm": 1.3398044201914234, "learning_rate": 6.920445261457276e-07, "loss": 0.20475032925605774, "step": 3591 }, { "epoch": 1.7749907327319905, "grad_norm": 1.669693479578031, "learning_rate": 6.890609534205206e-07, "loss": 0.32378682494163513, "step": 3592 }, { "epoch": 1.7754849870258247, "grad_norm": 1.4433175991642826, "learning_rate": 6.86083596545184e-07, "loss": 0.2526070177555084, "step": 3593 }, { "epoch": 1.775979241319659, "grad_norm": 1.3738645357999373, "learning_rate": 6.831124575073578e-07, "loss": 0.2467537820339203, "step": 3594 }, { "epoch": 1.776473495613493, "grad_norm": 1.4660741149631984, "learning_rate": 6.801475382905332e-07, "loss": 0.2857215404510498, "step": 3595 }, { "epoch": 1.7769677499073273, "grad_norm": 1.4443968381596262, "learning_rate": 6.771888408740479e-07, "loss": 0.23615087568759918, "step": 3596 }, { "epoch": 1.7774620042011615, "grad_norm": 1.451390021672748, "learning_rate": 6.742363672330854e-07, "loss": 0.2613365054130554, "step": 3597 }, { "epoch": 1.7779562584949957, "grad_norm": 1.465141872886975, "learning_rate": 6.712901193386756e-07, "loss": 0.2558417320251465, "step": 3598 }, { "epoch": 1.77845051278883, "grad_norm": 1.4467371641088191, "learning_rate": 6.683500991576919e-07, "loss": 0.2683117091655731, "step": 3599 }, { "epoch": 1.7789447670826641, "grad_norm": 1.4625204738144366, "learning_rate": 6.654163086528487e-07, "loss": 0.2546064555644989, "step": 3600 }, { "epoch": 1.7794390213764983, "grad_norm": 1.5872307428555623, "learning_rate": 6.624887497827004e-07, "loss": 0.2683906555175781, "step": 3601 }, { "epoch": 1.7799332756703325, "grad_norm": 1.363900663564542, "learning_rate": 6.595674245016492e-07, "loss": 0.23260846734046936, "step": 3602 }, { "epoch": 1.7804275299641665, "grad_norm": 1.3840728964244504, "learning_rate": 6.566523347599252e-07, "loss": 0.22884608805179596, "step": 3603 }, { "epoch": 1.7809217842580007, "grad_norm": 1.3583647776279095, "learning_rate": 6.537434825036027e-07, "loss": 0.24236485362052917, "step": 3604 }, { "epoch": 1.781416038551835, "grad_norm": 1.4869775379128283, "learning_rate": 6.508408696745893e-07, "loss": 0.29543957114219666, "step": 3605 }, { "epoch": 1.781910292845669, "grad_norm": 1.3626399619539873, "learning_rate": 6.479444982106276e-07, "loss": 0.24011383950710297, "step": 3606 }, { "epoch": 1.7824045471395031, "grad_norm": 1.3135116984072812, "learning_rate": 6.450543700452949e-07, "loss": 0.248407244682312, "step": 3607 }, { "epoch": 1.7828988014333373, "grad_norm": 1.4089475770026854, "learning_rate": 6.421704871080004e-07, "loss": 0.2405746728181839, "step": 3608 }, { "epoch": 1.7833930557271716, "grad_norm": 1.2522903384339197, "learning_rate": 6.392928513239804e-07, "loss": 0.24601790308952332, "step": 3609 }, { "epoch": 1.7838873100210058, "grad_norm": 1.2436557177887422, "learning_rate": 6.36421464614303e-07, "loss": 0.20030242204666138, "step": 3610 }, { "epoch": 1.78438156431484, "grad_norm": 1.3296983724782687, "learning_rate": 6.335563288958691e-07, "loss": 0.23858311772346497, "step": 3611 }, { "epoch": 1.7848758186086742, "grad_norm": 1.4392435044249465, "learning_rate": 6.306974460813986e-07, "loss": 0.2330242097377777, "step": 3612 }, { "epoch": 1.7853700729025084, "grad_norm": 1.445863340067418, "learning_rate": 6.278448180794416e-07, "loss": 0.25513261556625366, "step": 3613 }, { "epoch": 1.7858643271963426, "grad_norm": 1.3248647587522469, "learning_rate": 6.249984467943737e-07, "loss": 0.2298405021429062, "step": 3614 }, { "epoch": 1.7863585814901768, "grad_norm": 1.3090685428520892, "learning_rate": 6.221583341263893e-07, "loss": 0.22120623290538788, "step": 3615 }, { "epoch": 1.786852835784011, "grad_norm": 1.3392765156774626, "learning_rate": 6.193244819715072e-07, "loss": 0.26976969838142395, "step": 3616 }, { "epoch": 1.7873470900778452, "grad_norm": 1.3657180436845977, "learning_rate": 6.164968922215697e-07, "loss": 0.24354586005210876, "step": 3617 }, { "epoch": 1.7878413443716792, "grad_norm": 1.4254233164600292, "learning_rate": 6.136755667642302e-07, "loss": 0.2849498689174652, "step": 3618 }, { "epoch": 1.7883355986655134, "grad_norm": 1.2708453781613391, "learning_rate": 6.10860507482971e-07, "loss": 0.2431584596633911, "step": 3619 }, { "epoch": 1.7888298529593476, "grad_norm": 1.5031154285158648, "learning_rate": 6.080517162570809e-07, "loss": 0.2384781688451767, "step": 3620 }, { "epoch": 1.7893241072531818, "grad_norm": 1.45686854578023, "learning_rate": 6.052491949616712e-07, "loss": 0.23782339692115784, "step": 3621 }, { "epoch": 1.7898183615470158, "grad_norm": 1.342733882676876, "learning_rate": 6.024529454676631e-07, "loss": 0.23293447494506836, "step": 3622 }, { "epoch": 1.79031261584085, "grad_norm": 1.2930495337650696, "learning_rate": 5.996629696417955e-07, "loss": 0.21202662587165833, "step": 3623 }, { "epoch": 1.7908068701346842, "grad_norm": 1.5889243123202152, "learning_rate": 5.968792693466141e-07, "loss": 0.27971768379211426, "step": 3624 }, { "epoch": 1.7913011244285184, "grad_norm": 1.441999540970622, "learning_rate": 5.94101846440478e-07, "loss": 0.2433638721704483, "step": 3625 }, { "epoch": 1.7917953787223526, "grad_norm": 1.3682285780053611, "learning_rate": 5.91330702777555e-07, "loss": 0.21812602877616882, "step": 3626 }, { "epoch": 1.7922896330161868, "grad_norm": 1.924541384200403, "learning_rate": 5.88565840207822e-07, "loss": 0.2135028839111328, "step": 3627 }, { "epoch": 1.792783887310021, "grad_norm": 1.3226125497456243, "learning_rate": 5.858072605770626e-07, "loss": 0.23919226229190826, "step": 3628 }, { "epoch": 1.7932781416038552, "grad_norm": 1.3008122554752455, "learning_rate": 5.830549657268614e-07, "loss": 0.2495008111000061, "step": 3629 }, { "epoch": 1.7937723958976894, "grad_norm": 1.4679589100669386, "learning_rate": 5.80308957494613e-07, "loss": 0.2531805634498596, "step": 3630 }, { "epoch": 1.7942666501915236, "grad_norm": 1.2654762717037664, "learning_rate": 5.775692377135156e-07, "loss": 0.22644619643688202, "step": 3631 }, { "epoch": 1.7947609044853579, "grad_norm": 1.2567004368149646, "learning_rate": 5.748358082125638e-07, "loss": 0.2264411598443985, "step": 3632 }, { "epoch": 1.7952551587791918, "grad_norm": 1.3206987713043599, "learning_rate": 5.721086708165568e-07, "loss": 0.2663921117782593, "step": 3633 }, { "epoch": 1.795749413073026, "grad_norm": 1.35703763331278, "learning_rate": 5.693878273460951e-07, "loss": 0.2398051619529724, "step": 3634 }, { "epoch": 1.7962436673668603, "grad_norm": 1.4184943078470147, "learning_rate": 5.6667327961757e-07, "loss": 0.28781580924987793, "step": 3635 }, { "epoch": 1.7967379216606945, "grad_norm": 2.1761368991988084, "learning_rate": 5.639650294431787e-07, "loss": 0.2232055813074112, "step": 3636 }, { "epoch": 1.7972321759545284, "grad_norm": 1.402577073030083, "learning_rate": 5.612630786309103e-07, "loss": 0.23214340209960938, "step": 3637 }, { "epoch": 1.7977264302483627, "grad_norm": 1.2714718799747338, "learning_rate": 5.585674289845467e-07, "loss": 0.21598659455776215, "step": 3638 }, { "epoch": 1.7982206845421969, "grad_norm": 1.351029180109128, "learning_rate": 5.558780823036658e-07, "loss": 0.2760176956653595, "step": 3639 }, { "epoch": 1.798714938836031, "grad_norm": 1.3941723061811673, "learning_rate": 5.531950403836373e-07, "loss": 0.2641429901123047, "step": 3640 }, { "epoch": 1.7992091931298653, "grad_norm": 1.390874465362023, "learning_rate": 5.505183050156204e-07, "loss": 0.2407502382993698, "step": 3641 }, { "epoch": 1.7997034474236995, "grad_norm": 1.2164247841450622, "learning_rate": 5.478478779865682e-07, "loss": 0.19910940527915955, "step": 3642 }, { "epoch": 1.8001977017175337, "grad_norm": 1.4412656091937792, "learning_rate": 5.451837610792166e-07, "loss": 0.2716234624385834, "step": 3643 }, { "epoch": 1.800691956011368, "grad_norm": 1.3284477963142056, "learning_rate": 5.42525956072093e-07, "loss": 0.2784198224544525, "step": 3644 }, { "epoch": 1.801186210305202, "grad_norm": 1.3444314874013155, "learning_rate": 5.398744647395104e-07, "loss": 0.2277904599905014, "step": 3645 }, { "epoch": 1.8016804645990363, "grad_norm": 1.4299842617414134, "learning_rate": 5.372292888515684e-07, "loss": 0.26788002252578735, "step": 3646 }, { "epoch": 1.8021747188928705, "grad_norm": 1.3607541160674654, "learning_rate": 5.345904301741445e-07, "loss": 0.22452175617218018, "step": 3647 }, { "epoch": 1.8026689731867047, "grad_norm": 1.44450101040719, "learning_rate": 5.319578904689071e-07, "loss": 0.2337179332971573, "step": 3648 }, { "epoch": 1.8031632274805387, "grad_norm": 1.3116281040368842, "learning_rate": 5.293316714932983e-07, "loss": 0.2614130973815918, "step": 3649 }, { "epoch": 1.803657481774373, "grad_norm": 1.3142722561763884, "learning_rate": 5.267117750005468e-07, "loss": 0.2577320635318756, "step": 3650 }, { "epoch": 1.8041517360682071, "grad_norm": 1.231846526151871, "learning_rate": 5.24098202739658e-07, "loss": 0.2058672308921814, "step": 3651 }, { "epoch": 1.8046459903620413, "grad_norm": 1.3970882237865128, "learning_rate": 5.214909564554138e-07, "loss": 0.25223514437675476, "step": 3652 }, { "epoch": 1.8051402446558753, "grad_norm": 1.3683940041570406, "learning_rate": 5.188900378883765e-07, "loss": 0.25651872158050537, "step": 3653 }, { "epoch": 1.8056344989497095, "grad_norm": 1.3167902113360206, "learning_rate": 5.162954487748828e-07, "loss": 0.257855623960495, "step": 3654 }, { "epoch": 1.8061287532435437, "grad_norm": 1.3408137381423195, "learning_rate": 5.137071908470381e-07, "loss": 0.22942093014717102, "step": 3655 }, { "epoch": 1.806623007537378, "grad_norm": 1.3905585042591802, "learning_rate": 5.111252658327326e-07, "loss": 0.25629153847694397, "step": 3656 }, { "epoch": 1.8071172618312121, "grad_norm": 1.3417957205977868, "learning_rate": 5.085496754556207e-07, "loss": 0.23882299661636353, "step": 3657 }, { "epoch": 1.8076115161250463, "grad_norm": 1.3092883951034957, "learning_rate": 5.059804214351283e-07, "loss": 0.2323160469532013, "step": 3658 }, { "epoch": 1.8081057704188805, "grad_norm": 1.318607555394289, "learning_rate": 5.034175054864531e-07, "loss": 0.2080869972705841, "step": 3659 }, { "epoch": 1.8086000247127147, "grad_norm": 1.476319660825777, "learning_rate": 5.008609293205624e-07, "loss": 0.22439511120319366, "step": 3660 }, { "epoch": 1.809094279006549, "grad_norm": 1.3639928518895943, "learning_rate": 4.983106946441885e-07, "loss": 0.2527809739112854, "step": 3661 }, { "epoch": 1.8095885333003832, "grad_norm": 1.181172468164539, "learning_rate": 4.957668031598328e-07, "loss": 0.2149294763803482, "step": 3662 }, { "epoch": 1.8100827875942174, "grad_norm": 1.3244234520799762, "learning_rate": 4.932292565657615e-07, "loss": 0.2471565306186676, "step": 3663 }, { "epoch": 1.8105770418880514, "grad_norm": 1.328701941509414, "learning_rate": 4.906980565560004e-07, "loss": 0.25820282101631165, "step": 3664 }, { "epoch": 1.8110712961818856, "grad_norm": 1.4538113944792308, "learning_rate": 4.881732048203469e-07, "loss": 0.2815645933151245, "step": 3665 }, { "epoch": 1.8115655504757198, "grad_norm": 1.4078938194960222, "learning_rate": 4.856547030443559e-07, "loss": 0.23443330824375153, "step": 3666 }, { "epoch": 1.812059804769554, "grad_norm": 1.413689966723704, "learning_rate": 4.831425529093403e-07, "loss": 0.2452373206615448, "step": 3667 }, { "epoch": 1.812554059063388, "grad_norm": 1.2405057526282826, "learning_rate": 4.806367560923764e-07, "loss": 0.21815839409828186, "step": 3668 }, { "epoch": 1.8130483133572222, "grad_norm": 1.3418751770168684, "learning_rate": 4.781373142663003e-07, "loss": 0.23436316847801208, "step": 3669 }, { "epoch": 1.8135425676510564, "grad_norm": 1.277189547676361, "learning_rate": 4.75644229099701e-07, "loss": 0.18917132914066315, "step": 3670 }, { "epoch": 1.8140368219448906, "grad_norm": 1.3842801505047626, "learning_rate": 4.7315750225692905e-07, "loss": 0.24570351839065552, "step": 3671 }, { "epoch": 1.8145310762387248, "grad_norm": 1.2514343072057177, "learning_rate": 4.7067713539808543e-07, "loss": 0.23367956280708313, "step": 3672 }, { "epoch": 1.815025330532559, "grad_norm": 1.372723501995688, "learning_rate": 4.682031301790291e-07, "loss": 0.24563322961330414, "step": 3673 }, { "epoch": 1.8155195848263932, "grad_norm": 1.3552399849082646, "learning_rate": 4.6573548825137204e-07, "loss": 0.2425815761089325, "step": 3674 }, { "epoch": 1.8160138391202274, "grad_norm": 1.2732667032266225, "learning_rate": 4.632742112624744e-07, "loss": 0.2173803597688675, "step": 3675 }, { "epoch": 1.8165080934140616, "grad_norm": 1.4674070434763509, "learning_rate": 4.6081930085544734e-07, "loss": 0.2665477395057678, "step": 3676 }, { "epoch": 1.8170023477078958, "grad_norm": 1.2335396057121188, "learning_rate": 4.5837075866915994e-07, "loss": 0.23834756016731262, "step": 3677 }, { "epoch": 1.81749660200173, "grad_norm": 1.3614176095599289, "learning_rate": 4.55928586338219e-07, "loss": 0.2479294240474701, "step": 3678 }, { "epoch": 1.8179908562955642, "grad_norm": 1.370567608566195, "learning_rate": 4.5349278549298716e-07, "loss": 0.24136531352996826, "step": 3679 }, { "epoch": 1.8184851105893982, "grad_norm": 1.3881148070094378, "learning_rate": 4.510633577595669e-07, "loss": 0.24397623538970947, "step": 3680 }, { "epoch": 1.8189793648832324, "grad_norm": 1.3189259944629108, "learning_rate": 4.48640304759812e-07, "loss": 0.27078694105148315, "step": 3681 }, { "epoch": 1.8194736191770666, "grad_norm": 1.5222352072420349, "learning_rate": 4.4622362811131745e-07, "loss": 0.2544251084327698, "step": 3682 }, { "epoch": 1.8199678734709008, "grad_norm": 1.3696668102162666, "learning_rate": 4.4381332942742384e-07, "loss": 0.2528873682022095, "step": 3683 }, { "epoch": 1.8204621277647348, "grad_norm": 1.470119432024013, "learning_rate": 4.414094103172084e-07, "loss": 0.25487592816352844, "step": 3684 }, { "epoch": 1.820956382058569, "grad_norm": 1.3872878168023053, "learning_rate": 4.3901187238549414e-07, "loss": 0.22061187028884888, "step": 3685 }, { "epoch": 1.8214506363524032, "grad_norm": 1.355863796177502, "learning_rate": 4.366207172328452e-07, "loss": 0.2793615758419037, "step": 3686 }, { "epoch": 1.8219448906462374, "grad_norm": 1.2429295933181803, "learning_rate": 4.342359464555612e-07, "loss": 0.2323140949010849, "step": 3687 }, { "epoch": 1.8224391449400716, "grad_norm": 1.370663497944958, "learning_rate": 4.3185756164568104e-07, "loss": 0.2616409659385681, "step": 3688 }, { "epoch": 1.8229333992339058, "grad_norm": 1.3843956978002738, "learning_rate": 4.294855643909812e-07, "loss": 0.203874871134758, "step": 3689 }, { "epoch": 1.82342765352774, "grad_norm": 1.2289114807067458, "learning_rate": 4.271199562749717e-07, "loss": 0.2272878736257553, "step": 3690 }, { "epoch": 1.8239219078215743, "grad_norm": 1.338434972419624, "learning_rate": 4.247607388769004e-07, "loss": 0.23728047311306, "step": 3691 }, { "epoch": 1.8244161621154085, "grad_norm": 1.4750745226923418, "learning_rate": 4.2240791377174737e-07, "loss": 0.2570911943912506, "step": 3692 }, { "epoch": 1.8249104164092427, "grad_norm": 1.4969254471055817, "learning_rate": 4.200614825302207e-07, "loss": 0.24265727400779724, "step": 3693 }, { "epoch": 1.8254046707030769, "grad_norm": 1.405819385173928, "learning_rate": 4.177214467187707e-07, "loss": 0.24822816252708435, "step": 3694 }, { "epoch": 1.8258989249969109, "grad_norm": 1.3218266218091017, "learning_rate": 4.153878078995677e-07, "loss": 0.23382046818733215, "step": 3695 }, { "epoch": 1.826393179290745, "grad_norm": 1.4037010093048616, "learning_rate": 4.130605676305166e-07, "loss": 0.27590304613113403, "step": 3696 }, { "epoch": 1.8268874335845793, "grad_norm": 1.4161501438852775, "learning_rate": 4.1073972746525026e-07, "loss": 0.25702038407325745, "step": 3697 }, { "epoch": 1.8273816878784135, "grad_norm": 1.488627338365754, "learning_rate": 4.0842528895312707e-07, "loss": 0.28980135917663574, "step": 3698 }, { "epoch": 1.8278759421722475, "grad_norm": 1.5075437506896323, "learning_rate": 4.0611725363923435e-07, "loss": 0.22739271819591522, "step": 3699 }, { "epoch": 1.8283701964660817, "grad_norm": 1.4671495030162094, "learning_rate": 4.038156230643853e-07, "loss": 0.26396334171295166, "step": 3700 }, { "epoch": 1.8288644507599159, "grad_norm": 1.5855861974203058, "learning_rate": 4.015203987651106e-07, "loss": 0.25548964738845825, "step": 3701 }, { "epoch": 1.82935870505375, "grad_norm": 1.3315259515817186, "learning_rate": 3.992315822736725e-07, "loss": 0.22227105498313904, "step": 3702 }, { "epoch": 1.8298529593475843, "grad_norm": 1.445413897274288, "learning_rate": 3.969491751180543e-07, "loss": 0.30854254961013794, "step": 3703 }, { "epoch": 1.8303472136414185, "grad_norm": 1.4678349464130562, "learning_rate": 3.946731788219538e-07, "loss": 0.27471429109573364, "step": 3704 }, { "epoch": 1.8308414679352527, "grad_norm": 1.334822235698922, "learning_rate": 3.924035949047955e-07, "loss": 0.2317768633365631, "step": 3705 }, { "epoch": 1.831335722229087, "grad_norm": 1.4197098897896443, "learning_rate": 3.901404248817231e-07, "loss": 0.2450723946094513, "step": 3706 }, { "epoch": 1.8318299765229211, "grad_norm": 1.4676009490842072, "learning_rate": 3.878836702635935e-07, "loss": 0.2428039014339447, "step": 3707 }, { "epoch": 1.8323242308167553, "grad_norm": 1.4376208196933993, "learning_rate": 3.856333325569861e-07, "loss": 0.27869629859924316, "step": 3708 }, { "epoch": 1.8328184851105895, "grad_norm": 1.2808253694997749, "learning_rate": 3.8338941326419353e-07, "loss": 0.21661749482154846, "step": 3709 }, { "epoch": 1.8333127394044237, "grad_norm": 1.3452610575891626, "learning_rate": 3.8115191388322206e-07, "loss": 0.2655249834060669, "step": 3710 }, { "epoch": 1.8338069936982577, "grad_norm": 1.3643896556477109, "learning_rate": 3.7892083590779784e-07, "loss": 0.2281903475522995, "step": 3711 }, { "epoch": 1.834301247992092, "grad_norm": 1.492937654145658, "learning_rate": 3.7669618082735504e-07, "loss": 0.24545446038246155, "step": 3712 }, { "epoch": 1.8347955022859261, "grad_norm": 1.2788794377367898, "learning_rate": 3.7447795012704237e-07, "loss": 0.24749556183815002, "step": 3713 }, { "epoch": 1.8352897565797601, "grad_norm": 1.4606135919595513, "learning_rate": 3.722661452877163e-07, "loss": 0.26234689354896545, "step": 3714 }, { "epoch": 1.8357840108735943, "grad_norm": 1.3697239858165842, "learning_rate": 3.700607677859491e-07, "loss": 0.21348389983177185, "step": 3715 }, { "epoch": 1.8362782651674285, "grad_norm": 1.3198403259649356, "learning_rate": 3.6786181909401864e-07, "loss": 0.2527744770050049, "step": 3716 }, { "epoch": 1.8367725194612627, "grad_norm": 1.3153305717810528, "learning_rate": 3.6566930067991056e-07, "loss": 0.2175026535987854, "step": 3717 }, { "epoch": 1.837266773755097, "grad_norm": 1.3795015677920492, "learning_rate": 3.6348321400731967e-07, "loss": 0.2847272753715515, "step": 3718 }, { "epoch": 1.8377610280489312, "grad_norm": 1.4885049894439106, "learning_rate": 3.613035605356463e-07, "loss": 0.2549072504043579, "step": 3719 }, { "epoch": 1.8382552823427654, "grad_norm": 1.3444222427486383, "learning_rate": 3.591303417199965e-07, "loss": 0.24534013867378235, "step": 3720 }, { "epoch": 1.8387495366365996, "grad_norm": 1.461602538702394, "learning_rate": 3.5696355901117865e-07, "loss": 0.25336408615112305, "step": 3721 }, { "epoch": 1.8392437909304338, "grad_norm": 1.4932038589381658, "learning_rate": 3.548032138557056e-07, "loss": 0.2787632346153259, "step": 3722 }, { "epoch": 1.839738045224268, "grad_norm": 1.3687827308256, "learning_rate": 3.5264930769579595e-07, "loss": 0.22364875674247742, "step": 3723 }, { "epoch": 1.8402322995181022, "grad_norm": 1.509493433022075, "learning_rate": 3.5050184196936285e-07, "loss": 0.2526230216026306, "step": 3724 }, { "epoch": 1.8407265538119364, "grad_norm": 1.449998297788816, "learning_rate": 3.483608181100262e-07, "loss": 0.2412932962179184, "step": 3725 }, { "epoch": 1.8412208081057704, "grad_norm": 1.4100243345912178, "learning_rate": 3.462262375471026e-07, "loss": 0.28693705797195435, "step": 3726 }, { "epoch": 1.8417150623996046, "grad_norm": 1.4369299703462226, "learning_rate": 3.4409810170560667e-07, "loss": 0.2600281834602356, "step": 3727 }, { "epoch": 1.8422093166934388, "grad_norm": 1.3702328145360616, "learning_rate": 3.4197641200625185e-07, "loss": 0.24885150790214539, "step": 3728 }, { "epoch": 1.842703570987273, "grad_norm": 1.476451776245579, "learning_rate": 3.398611698654497e-07, "loss": 0.27185115218162537, "step": 3729 }, { "epoch": 1.843197825281107, "grad_norm": 1.6779196665373166, "learning_rate": 3.377523766953006e-07, "loss": 0.2999323010444641, "step": 3730 }, { "epoch": 1.8436920795749412, "grad_norm": 1.3755033406487114, "learning_rate": 3.356500339036106e-07, "loss": 0.22807806730270386, "step": 3731 }, { "epoch": 1.8441863338687754, "grad_norm": 1.4727836521575108, "learning_rate": 3.3355414289387155e-07, "loss": 0.23006726801395416, "step": 3732 }, { "epoch": 1.8446805881626096, "grad_norm": 1.4892072813513704, "learning_rate": 3.314647050652686e-07, "loss": 0.25261276960372925, "step": 3733 }, { "epoch": 1.8451748424564438, "grad_norm": 1.3741598151970273, "learning_rate": 3.293817218126827e-07, "loss": 0.2484148144721985, "step": 3734 }, { "epoch": 1.845669096750278, "grad_norm": 1.2679669997107472, "learning_rate": 3.273051945266836e-07, "loss": 0.2472834438085556, "step": 3735 }, { "epoch": 1.8461633510441122, "grad_norm": 1.16756829401485, "learning_rate": 3.2523512459352923e-07, "loss": 0.20510706305503845, "step": 3736 }, { "epoch": 1.8466576053379464, "grad_norm": 1.292644423038628, "learning_rate": 3.231715133951707e-07, "loss": 0.2331993281841278, "step": 3737 }, { "epoch": 1.8471518596317806, "grad_norm": 1.4584815860954135, "learning_rate": 3.211143623092461e-07, "loss": 0.2704228162765503, "step": 3738 }, { "epoch": 1.8476461139256148, "grad_norm": 1.4579018041488718, "learning_rate": 3.190636727090768e-07, "loss": 0.2514714002609253, "step": 3739 }, { "epoch": 1.848140368219449, "grad_norm": 1.258977256920419, "learning_rate": 3.170194459636777e-07, "loss": 0.2396089732646942, "step": 3740 }, { "epoch": 1.848634622513283, "grad_norm": 1.4139144003983488, "learning_rate": 3.149816834377428e-07, "loss": 0.266484797000885, "step": 3741 }, { "epoch": 1.8491288768071172, "grad_norm": 1.338105672337281, "learning_rate": 3.129503864916539e-07, "loss": 0.24549749493598938, "step": 3742 }, { "epoch": 1.8496231311009514, "grad_norm": 1.6902480251834826, "learning_rate": 3.1092555648147615e-07, "loss": 0.2659090757369995, "step": 3743 }, { "epoch": 1.8501173853947857, "grad_norm": 1.4018081288366548, "learning_rate": 3.0890719475895615e-07, "loss": 0.2756732702255249, "step": 3744 }, { "epoch": 1.8506116396886196, "grad_norm": 1.3509953718874834, "learning_rate": 3.068953026715238e-07, "loss": 0.2568710148334503, "step": 3745 }, { "epoch": 1.8511058939824538, "grad_norm": 1.3512798325752944, "learning_rate": 3.048898815622914e-07, "loss": 0.2255566120147705, "step": 3746 }, { "epoch": 1.851600148276288, "grad_norm": 1.309385732750396, "learning_rate": 3.028909327700458e-07, "loss": 0.2083941102027893, "step": 3747 }, { "epoch": 1.8520944025701223, "grad_norm": 1.2287507621351796, "learning_rate": 3.0089845762926063e-07, "loss": 0.20739290118217468, "step": 3748 }, { "epoch": 1.8525886568639565, "grad_norm": 1.2356251229389228, "learning_rate": 2.989124574700819e-07, "loss": 0.21835210919380188, "step": 3749 }, { "epoch": 1.8530829111577907, "grad_norm": 1.312598409351232, "learning_rate": 2.969329336183335e-07, "loss": 0.2170596569776535, "step": 3750 }, { "epoch": 1.8535771654516249, "grad_norm": 1.3990932569701935, "learning_rate": 2.949598873955184e-07, "loss": 0.23584111034870148, "step": 3751 }, { "epoch": 1.854071419745459, "grad_norm": 1.5531646127161125, "learning_rate": 2.9299332011881623e-07, "loss": 0.2690342664718628, "step": 3752 }, { "epoch": 1.8545656740392933, "grad_norm": 1.2634424740078676, "learning_rate": 2.9103323310107566e-07, "loss": 0.2499091923236847, "step": 3753 }, { "epoch": 1.8550599283331275, "grad_norm": 1.417744173198578, "learning_rate": 2.8907962765082567e-07, "loss": 0.23112377524375916, "step": 3754 }, { "epoch": 1.8555541826269617, "grad_norm": 1.375590332914505, "learning_rate": 2.8713250507226285e-07, "loss": 0.25203657150268555, "step": 3755 }, { "epoch": 1.856048436920796, "grad_norm": 1.4015552448571456, "learning_rate": 2.8519186666526086e-07, "loss": 0.2468508780002594, "step": 3756 }, { "epoch": 1.85654269121463, "grad_norm": 1.427563584784084, "learning_rate": 2.8325771372536e-07, "loss": 0.22745928168296814, "step": 3757 }, { "epoch": 1.857036945508464, "grad_norm": 1.2932963376428803, "learning_rate": 2.8133004754377525e-07, "loss": 0.23090660572052002, "step": 3758 }, { "epoch": 1.8575311998022983, "grad_norm": 1.420318152152914, "learning_rate": 2.7940886940738707e-07, "loss": 0.27513352036476135, "step": 3759 }, { "epoch": 1.8580254540961325, "grad_norm": 1.4517333399175874, "learning_rate": 2.774941805987474e-07, "loss": 0.25791019201278687, "step": 3760 }, { "epoch": 1.8585197083899665, "grad_norm": 1.523404531013776, "learning_rate": 2.75585982396076e-07, "loss": 0.2703961730003357, "step": 3761 }, { "epoch": 1.8590139626838007, "grad_norm": 1.4198437134006967, "learning_rate": 2.736842760732561e-07, "loss": 0.2557608485221863, "step": 3762 }, { "epoch": 1.859508216977635, "grad_norm": 1.4276231211370918, "learning_rate": 2.717890628998421e-07, "loss": 0.26276740431785583, "step": 3763 }, { "epoch": 1.8600024712714691, "grad_norm": 1.3830597360775128, "learning_rate": 2.699003441410508e-07, "loss": 0.3033446967601776, "step": 3764 }, { "epoch": 1.8604967255653033, "grad_norm": 1.3975518004533982, "learning_rate": 2.680181210577637e-07, "loss": 0.2513597905635834, "step": 3765 }, { "epoch": 1.8609909798591375, "grad_norm": 1.2527716887935596, "learning_rate": 2.661423949065267e-07, "loss": 0.22935059666633606, "step": 3766 }, { "epoch": 1.8614852341529717, "grad_norm": 1.5028347517247218, "learning_rate": 2.6427316693954596e-07, "loss": 0.2585369348526001, "step": 3767 }, { "epoch": 1.861979488446806, "grad_norm": 1.4129565265857094, "learning_rate": 2.6241043840469104e-07, "loss": 0.25701645016670227, "step": 3768 }, { "epoch": 1.8624737427406401, "grad_norm": 1.304405538262163, "learning_rate": 2.605542105454961e-07, "loss": 0.24622182548046112, "step": 3769 }, { "epoch": 1.8629679970344744, "grad_norm": 1.380891732165765, "learning_rate": 2.5870448460114994e-07, "loss": 0.2650758624076843, "step": 3770 }, { "epoch": 1.8634622513283086, "grad_norm": 1.4721649336836553, "learning_rate": 2.568612618065036e-07, "loss": 0.2364269644021988, "step": 3771 }, { "epoch": 1.8639565056221425, "grad_norm": 1.2217358212004363, "learning_rate": 2.5502454339206617e-07, "loss": 0.23226915299892426, "step": 3772 }, { "epoch": 1.8644507599159768, "grad_norm": 1.3407554644381927, "learning_rate": 2.5319433058400565e-07, "loss": 0.23077306151390076, "step": 3773 }, { "epoch": 1.864945014209811, "grad_norm": 1.289395146095016, "learning_rate": 2.5137062460414476e-07, "loss": 0.23707103729248047, "step": 3774 }, { "epoch": 1.8654392685036452, "grad_norm": 1.3571808886592325, "learning_rate": 2.4955342666996505e-07, "loss": 0.268571138381958, "step": 3775 }, { "epoch": 1.8659335227974791, "grad_norm": 1.4298616373621023, "learning_rate": 2.4774273799459847e-07, "loss": 0.21469517052173615, "step": 3776 }, { "epoch": 1.8664277770913134, "grad_norm": 1.302386517113681, "learning_rate": 2.45938559786838e-07, "loss": 0.2513999938964844, "step": 3777 }, { "epoch": 1.8669220313851476, "grad_norm": 1.2688339559395354, "learning_rate": 2.44140893251128e-07, "loss": 0.23660680651664734, "step": 3778 }, { "epoch": 1.8674162856789818, "grad_norm": 1.499995655954345, "learning_rate": 2.423497395875618e-07, "loss": 0.24594557285308838, "step": 3779 }, { "epoch": 1.867910539972816, "grad_norm": 1.4315211319459857, "learning_rate": 2.405650999918896e-07, "loss": 0.2725435793399811, "step": 3780 }, { "epoch": 1.8684047942666502, "grad_norm": 1.3565937935517103, "learning_rate": 2.3878697565551167e-07, "loss": 0.25718316435813904, "step": 3781 }, { "epoch": 1.8688990485604844, "grad_norm": 1.3523272274009415, "learning_rate": 2.3701536776547851e-07, "loss": 0.2546181082725525, "step": 3782 }, { "epoch": 1.8693933028543186, "grad_norm": 1.1875597307843324, "learning_rate": 2.3525027750448959e-07, "loss": 0.22146770358085632, "step": 3783 }, { "epoch": 1.8698875571481528, "grad_norm": 1.5616036933474096, "learning_rate": 2.3349170605089456e-07, "loss": 0.23873519897460938, "step": 3784 }, { "epoch": 1.870381811441987, "grad_norm": 1.3056198220614723, "learning_rate": 2.3173965457868875e-07, "loss": 0.2530808746814728, "step": 3785 }, { "epoch": 1.8708760657358212, "grad_norm": 1.5174642956273923, "learning_rate": 2.2999412425751987e-07, "loss": 0.21616236865520477, "step": 3786 }, { "epoch": 1.8713703200296554, "grad_norm": 1.3867713509711206, "learning_rate": 2.2825511625267583e-07, "loss": 0.21596969664096832, "step": 3787 }, { "epoch": 1.8718645743234894, "grad_norm": 1.4557650561795843, "learning_rate": 2.265226317250957e-07, "loss": 0.25873616337776184, "step": 3788 }, { "epoch": 1.8723588286173236, "grad_norm": 1.3108065941801126, "learning_rate": 2.247966718313599e-07, "loss": 0.21096865832805634, "step": 3789 }, { "epoch": 1.8728530829111578, "grad_norm": 1.374596799099242, "learning_rate": 2.230772377236956e-07, "loss": 0.2159111499786377, "step": 3790 }, { "epoch": 1.8733473372049918, "grad_norm": 1.3658642346441578, "learning_rate": 2.213643305499724e-07, "loss": 0.2264566719532013, "step": 3791 }, { "epoch": 1.873841591498826, "grad_norm": 1.2529368730648867, "learning_rate": 2.1965795145370338e-07, "loss": 0.216034397482872, "step": 3792 }, { "epoch": 1.8743358457926602, "grad_norm": 1.2144868387665828, "learning_rate": 2.1795810157404063e-07, "loss": 0.22257745265960693, "step": 3793 }, { "epoch": 1.8748301000864944, "grad_norm": 1.5075158608293073, "learning_rate": 2.1626478204578082e-07, "loss": 0.2569161653518677, "step": 3794 }, { "epoch": 1.8753243543803286, "grad_norm": 1.3028902539101006, "learning_rate": 2.1457799399936087e-07, "loss": 0.24172556400299072, "step": 3795 }, { "epoch": 1.8758186086741628, "grad_norm": 1.4100197142967315, "learning_rate": 2.128977385608555e-07, "loss": 0.25539106130599976, "step": 3796 }, { "epoch": 1.876312862967997, "grad_norm": 1.3564195764364628, "learning_rate": 2.1122401685197747e-07, "loss": 0.23766650259494781, "step": 3797 }, { "epoch": 1.8768071172618312, "grad_norm": 2.0847437292387516, "learning_rate": 2.095568299900841e-07, "loss": 0.24102288484573364, "step": 3798 }, { "epoch": 1.8773013715556655, "grad_norm": 1.4163898812472968, "learning_rate": 2.0789617908816063e-07, "loss": 0.25168395042419434, "step": 3799 }, { "epoch": 1.8777956258494997, "grad_norm": 1.2853968722580162, "learning_rate": 2.0624206525483582e-07, "loss": 0.23417149484157562, "step": 3800 }, { "epoch": 1.8782898801433339, "grad_norm": 1.4002834822702614, "learning_rate": 2.04594489594373e-07, "loss": 0.2875264883041382, "step": 3801 }, { "epoch": 1.878784134437168, "grad_norm": 1.3714454637927955, "learning_rate": 2.0295345320667014e-07, "loss": 0.24828693270683289, "step": 3802 }, { "epoch": 1.879278388731002, "grad_norm": 1.3521250596424406, "learning_rate": 2.013189571872587e-07, "loss": 0.23279064893722534, "step": 3803 }, { "epoch": 1.8797726430248363, "grad_norm": 1.1425181629308492, "learning_rate": 1.996910026273058e-07, "loss": 0.2099420577287674, "step": 3804 }, { "epoch": 1.8802668973186705, "grad_norm": 1.346362344532125, "learning_rate": 1.9806959061360985e-07, "loss": 0.25043174624443054, "step": 3805 }, { "epoch": 1.8807611516125047, "grad_norm": 1.3680517059526944, "learning_rate": 1.9645472222860286e-07, "loss": 0.2606011927127838, "step": 3806 }, { "epoch": 1.8812554059063387, "grad_norm": 1.2606250431650987, "learning_rate": 1.948463985503468e-07, "loss": 0.22487565875053406, "step": 3807 }, { "epoch": 1.8817496602001729, "grad_norm": 1.6823729371263936, "learning_rate": 1.9324462065253735e-07, "loss": 0.29611343145370483, "step": 3808 }, { "epoch": 1.882243914494007, "grad_norm": 1.282763458334529, "learning_rate": 1.9164938960449685e-07, "loss": 0.2301706224679947, "step": 3809 }, { "epoch": 1.8827381687878413, "grad_norm": 1.319243063789466, "learning_rate": 1.9006070647118015e-07, "loss": 0.2306794822216034, "step": 3810 }, { "epoch": 1.8832324230816755, "grad_norm": 1.4208055299495237, "learning_rate": 1.884785723131688e-07, "loss": 0.2588786482810974, "step": 3811 }, { "epoch": 1.8837266773755097, "grad_norm": 1.527285475263959, "learning_rate": 1.8690298818667463e-07, "loss": 0.2795346677303314, "step": 3812 }, { "epoch": 1.884220931669344, "grad_norm": 1.2499989201376016, "learning_rate": 1.853339551435318e-07, "loss": 0.2313271164894104, "step": 3813 }, { "epoch": 1.884715185963178, "grad_norm": 1.4803115521216077, "learning_rate": 1.8377147423120467e-07, "loss": 0.22814632952213287, "step": 3814 }, { "epoch": 1.8852094402570123, "grad_norm": 1.3259243101199787, "learning_rate": 1.822155464927866e-07, "loss": 0.2605836093425751, "step": 3815 }, { "epoch": 1.8857036945508465, "grad_norm": 1.3976508324913761, "learning_rate": 1.8066617296699007e-07, "loss": 0.23902952671051025, "step": 3816 }, { "epoch": 1.8861979488446807, "grad_norm": 1.290435692515394, "learning_rate": 1.7912335468815545e-07, "loss": 0.24895761907100677, "step": 3817 }, { "epoch": 1.8866922031385147, "grad_norm": 1.4446135232841222, "learning_rate": 1.7758709268624664e-07, "loss": 0.24108648300170898, "step": 3818 }, { "epoch": 1.887186457432349, "grad_norm": 1.4071508146495701, "learning_rate": 1.7605738798684767e-07, "loss": 0.2600073516368866, "step": 3819 }, { "epoch": 1.8876807117261831, "grad_norm": 1.3261487318829528, "learning_rate": 1.745342416111706e-07, "loss": 0.21564190089702606, "step": 3820 }, { "epoch": 1.8881749660200173, "grad_norm": 1.4577577895280622, "learning_rate": 1.7301765457604647e-07, "loss": 0.24080556631088257, "step": 3821 }, { "epoch": 1.8886692203138513, "grad_norm": 1.316642170468449, "learning_rate": 1.7150762789392316e-07, "loss": 0.22631056606769562, "step": 3822 }, { "epoch": 1.8891634746076855, "grad_norm": 1.4341533325292704, "learning_rate": 1.7000416257287654e-07, "loss": 0.26355087757110596, "step": 3823 }, { "epoch": 1.8896577289015197, "grad_norm": 1.387410149780388, "learning_rate": 1.685072596165982e-07, "loss": 0.248369500041008, "step": 3824 }, { "epoch": 1.890151983195354, "grad_norm": 1.4331472853704903, "learning_rate": 1.670169200243976e-07, "loss": 0.2789249122142792, "step": 3825 }, { "epoch": 1.8906462374891881, "grad_norm": 1.2052406993380367, "learning_rate": 1.6553314479120453e-07, "loss": 0.22493675351142883, "step": 3826 }, { "epoch": 1.8911404917830223, "grad_norm": 1.2074956449276386, "learning_rate": 1.6405593490756766e-07, "loss": 0.21274074912071228, "step": 3827 }, { "epoch": 1.8916347460768566, "grad_norm": 1.3986179942656674, "learning_rate": 1.6258529135964928e-07, "loss": 0.2591193914413452, "step": 3828 }, { "epoch": 1.8921290003706908, "grad_norm": 1.5077061888652343, "learning_rate": 1.6112121512923075e-07, "loss": 0.2791387140750885, "step": 3829 }, { "epoch": 1.892623254664525, "grad_norm": 1.449596307066075, "learning_rate": 1.5966370719371015e-07, "loss": 0.2840545177459717, "step": 3830 }, { "epoch": 1.8931175089583592, "grad_norm": 1.538114321399184, "learning_rate": 1.582127685260948e-07, "loss": 0.2563555836677551, "step": 3831 }, { "epoch": 1.8936117632521934, "grad_norm": 1.2897284655116197, "learning_rate": 1.5676840009501538e-07, "loss": 0.22912704944610596, "step": 3832 }, { "epoch": 1.8941060175460276, "grad_norm": 1.3733822665309192, "learning_rate": 1.5533060286470837e-07, "loss": 0.25490787625312805, "step": 3833 }, { "epoch": 1.8946002718398616, "grad_norm": 1.2282031018618578, "learning_rate": 1.5389937779502818e-07, "loss": 0.21826709806919098, "step": 3834 }, { "epoch": 1.8950945261336958, "grad_norm": 1.303626845787231, "learning_rate": 1.524747258414394e-07, "loss": 0.2292749583721161, "step": 3835 }, { "epoch": 1.89558878042753, "grad_norm": 1.3359905611934206, "learning_rate": 1.5105664795501908e-07, "loss": 0.24652332067489624, "step": 3836 }, { "epoch": 1.8960830347213642, "grad_norm": 1.3777956922677133, "learning_rate": 1.4964514508245652e-07, "loss": 0.25154706835746765, "step": 3837 }, { "epoch": 1.8965772890151982, "grad_norm": 1.3722697572324272, "learning_rate": 1.482402181660525e-07, "loss": 0.2414158582687378, "step": 3838 }, { "epoch": 1.8970715433090324, "grad_norm": 1.3960215733148371, "learning_rate": 1.4684186814371225e-07, "loss": 0.22421908378601074, "step": 3839 }, { "epoch": 1.8975657976028666, "grad_norm": 1.3337706977662172, "learning_rate": 1.4545009594895687e-07, "loss": 0.2506029009819031, "step": 3840 }, { "epoch": 1.8980600518967008, "grad_norm": 1.239516400526973, "learning_rate": 1.440649025109142e-07, "loss": 0.2011726200580597, "step": 3841 }, { "epoch": 1.898554306190535, "grad_norm": 1.5242598019660087, "learning_rate": 1.4268628875431677e-07, "loss": 0.27702796459198, "step": 3842 }, { "epoch": 1.8990485604843692, "grad_norm": 1.236260659855922, "learning_rate": 1.413142555995095e-07, "loss": 0.23884715139865875, "step": 3843 }, { "epoch": 1.8995428147782034, "grad_norm": 1.2385068593263413, "learning_rate": 1.3994880396244304e-07, "loss": 0.2191702425479889, "step": 3844 }, { "epoch": 1.9000370690720376, "grad_norm": 1.3532676134331167, "learning_rate": 1.385899347546704e-07, "loss": 0.25425833463668823, "step": 3845 }, { "epoch": 1.9005313233658718, "grad_norm": 1.3452712776781028, "learning_rate": 1.37237648883356e-07, "loss": 0.23355990648269653, "step": 3846 }, { "epoch": 1.901025577659706, "grad_norm": 1.200878562022238, "learning_rate": 1.3589194725126542e-07, "loss": 0.2079685628414154, "step": 3847 }, { "epoch": 1.9015198319535402, "grad_norm": 1.380798956497921, "learning_rate": 1.3455283075676895e-07, "loss": 0.25126928091049194, "step": 3848 }, { "epoch": 1.9020140862473742, "grad_norm": 1.3306751541769635, "learning_rate": 1.332203002938437e-07, "loss": 0.2608864903450012, "step": 3849 }, { "epoch": 1.9025083405412084, "grad_norm": 1.3536846944777874, "learning_rate": 1.3189435675206697e-07, "loss": 0.27048414945602417, "step": 3850 }, { "epoch": 1.9030025948350426, "grad_norm": 1.3873264194773522, "learning_rate": 1.3057500101661846e-07, "loss": 0.24350577592849731, "step": 3851 }, { "epoch": 1.9034968491288768, "grad_norm": 1.5060374095399143, "learning_rate": 1.2926223396828363e-07, "loss": 0.23283880949020386, "step": 3852 }, { "epoch": 1.9039911034227108, "grad_norm": 1.3722502195381412, "learning_rate": 1.2795605648344477e-07, "loss": 0.23332493007183075, "step": 3853 }, { "epoch": 1.904485357716545, "grad_norm": 1.2805992535782373, "learning_rate": 1.2665646943408882e-07, "loss": 0.19833901524543762, "step": 3854 }, { "epoch": 1.9049796120103792, "grad_norm": 1.316108497317141, "learning_rate": 1.2536347368780066e-07, "loss": 0.23650333285331726, "step": 3855 }, { "epoch": 1.9054738663042134, "grad_norm": 1.1749486485284195, "learning_rate": 1.240770701077665e-07, "loss": 0.20151859521865845, "step": 3856 }, { "epoch": 1.9059681205980477, "grad_norm": 1.4620220273758984, "learning_rate": 1.2279725955277044e-07, "loss": 0.32347559928894043, "step": 3857 }, { "epoch": 1.9064623748918819, "grad_norm": 1.2726582104041342, "learning_rate": 1.215240428771969e-07, "loss": 0.25937923789024353, "step": 3858 }, { "epoch": 1.906956629185716, "grad_norm": 1.6959402751075685, "learning_rate": 1.2025742093102477e-07, "loss": 0.2648822069168091, "step": 3859 }, { "epoch": 1.9074508834795503, "grad_norm": 1.4639245582336404, "learning_rate": 1.1899739455983327e-07, "loss": 0.27612054347991943, "step": 3860 }, { "epoch": 1.9079451377733845, "grad_norm": 1.32342317481008, "learning_rate": 1.1774396460480064e-07, "loss": 0.2204264998435974, "step": 3861 }, { "epoch": 1.9084393920672187, "grad_norm": 1.4448526349141402, "learning_rate": 1.164971319026964e-07, "loss": 0.2719968557357788, "step": 3862 }, { "epoch": 1.908933646361053, "grad_norm": 1.3288093626980793, "learning_rate": 1.1525689728588807e-07, "loss": 0.2308243364095688, "step": 3863 }, { "epoch": 1.909427900654887, "grad_norm": 1.405242953564276, "learning_rate": 1.1402326158234e-07, "loss": 0.23281638324260712, "step": 3864 }, { "epoch": 1.909922154948721, "grad_norm": 1.553800687505842, "learning_rate": 1.127962256156101e-07, "loss": 0.26273444294929504, "step": 3865 }, { "epoch": 1.9104164092425553, "grad_norm": 1.3311046226223713, "learning_rate": 1.1157579020484755e-07, "loss": 0.26783496141433716, "step": 3866 }, { "epoch": 1.9109106635363895, "grad_norm": 1.4482920311066827, "learning_rate": 1.1036195616480061e-07, "loss": 0.2575075626373291, "step": 3867 }, { "epoch": 1.9114049178302237, "grad_norm": 1.3313207733281058, "learning_rate": 1.0915472430580443e-07, "loss": 0.24802085757255554, "step": 3868 }, { "epoch": 1.9118991721240577, "grad_norm": 1.230518560175702, "learning_rate": 1.0795409543379099e-07, "loss": 0.22017821669578552, "step": 3869 }, { "epoch": 1.912393426417892, "grad_norm": 1.3804831257002024, "learning_rate": 1.0676007035028579e-07, "loss": 0.2525743246078491, "step": 3870 }, { "epoch": 1.912887680711726, "grad_norm": 1.5674388988470875, "learning_rate": 1.05572649852399e-07, "loss": 0.26704782247543335, "step": 3871 }, { "epoch": 1.9133819350055603, "grad_norm": 3.430480948746706, "learning_rate": 1.0439183473283654e-07, "loss": 0.25393134355545044, "step": 3872 }, { "epoch": 1.9138761892993945, "grad_norm": 1.4465108879454651, "learning_rate": 1.0321762577989448e-07, "loss": 0.27266988158226013, "step": 3873 }, { "epoch": 1.9143704435932287, "grad_norm": 1.366912603525092, "learning_rate": 1.0205002377745799e-07, "loss": 0.2694425582885742, "step": 3874 }, { "epoch": 1.914864697887063, "grad_norm": 1.394500016346508, "learning_rate": 1.0088902950500023e-07, "loss": 0.28820598125457764, "step": 3875 }, { "epoch": 1.9153589521808971, "grad_norm": 1.3050023577266547, "learning_rate": 9.973464373758679e-08, "loss": 0.2194051444530487, "step": 3876 }, { "epoch": 1.9158532064747313, "grad_norm": 1.3831603392475145, "learning_rate": 9.858686724586675e-08, "loss": 0.25639402866363525, "step": 3877 }, { "epoch": 1.9163474607685655, "grad_norm": 1.2744346736321277, "learning_rate": 9.744570079608051e-08, "loss": 0.23420584201812744, "step": 3878 }, { "epoch": 1.9168417150623998, "grad_norm": 1.38639151316596, "learning_rate": 9.631114515005425e-08, "loss": 0.2514578700065613, "step": 3879 }, { "epoch": 1.9173359693562337, "grad_norm": 1.296540814966686, "learning_rate": 9.518320106520096e-08, "loss": 0.2223532646894455, "step": 3880 }, { "epoch": 1.917830223650068, "grad_norm": 1.367450022954602, "learning_rate": 9.406186929451943e-08, "loss": 0.21725934743881226, "step": 3881 }, { "epoch": 1.9183244779439022, "grad_norm": 1.2939049219304557, "learning_rate": 9.294715058659531e-08, "loss": 0.2081519365310669, "step": 3882 }, { "epoch": 1.9188187322377364, "grad_norm": 1.4148048553245687, "learning_rate": 9.183904568559998e-08, "loss": 0.23683780431747437, "step": 3883 }, { "epoch": 1.9193129865315703, "grad_norm": 1.3217345576155297, "learning_rate": 9.073755533128725e-08, "loss": 0.26095467805862427, "step": 3884 }, { "epoch": 1.9198072408254045, "grad_norm": 1.253461281568054, "learning_rate": 8.964268025899558e-08, "loss": 0.24427568912506104, "step": 3885 }, { "epoch": 1.9203014951192388, "grad_norm": 1.3603609343742546, "learning_rate": 8.855442119964919e-08, "loss": 0.23549365997314453, "step": 3886 }, { "epoch": 1.920795749413073, "grad_norm": 1.4769071310965274, "learning_rate": 8.74727788797547e-08, "loss": 0.2645740807056427, "step": 3887 }, { "epoch": 1.9212900037069072, "grad_norm": 1.3315198325383535, "learning_rate": 8.639775402139894e-08, "loss": 0.22890612483024597, "step": 3888 }, { "epoch": 1.9217842580007414, "grad_norm": 1.4439303401955232, "learning_rate": 8.532934734225451e-08, "loss": 0.23417067527770996, "step": 3889 }, { "epoch": 1.9222785122945756, "grad_norm": 1.3482339584478593, "learning_rate": 8.42675595555753e-08, "loss": 0.26125872135162354, "step": 3890 }, { "epoch": 1.9227727665884098, "grad_norm": 1.4420298418522868, "learning_rate": 8.321239137019433e-08, "loss": 0.26559343934059143, "step": 3891 }, { "epoch": 1.923267020882244, "grad_norm": 1.188066329993037, "learning_rate": 8.216384349052809e-08, "loss": 0.2033136785030365, "step": 3892 }, { "epoch": 1.9237612751760782, "grad_norm": 1.975689815636208, "learning_rate": 8.112191661656999e-08, "loss": 0.2750868797302246, "step": 3893 }, { "epoch": 1.9242555294699124, "grad_norm": 1.366292176712638, "learning_rate": 8.008661144389807e-08, "loss": 0.2082993984222412, "step": 3894 }, { "epoch": 1.9247497837637466, "grad_norm": 1.4608755297303442, "learning_rate": 7.905792866366501e-08, "loss": 0.2495439350605011, "step": 3895 }, { "epoch": 1.9252440380575806, "grad_norm": 1.4141233844295813, "learning_rate": 7.803586896260707e-08, "loss": 0.25609591603279114, "step": 3896 }, { "epoch": 1.9257382923514148, "grad_norm": 1.5334004898395663, "learning_rate": 7.702043302303397e-08, "loss": 0.25372135639190674, "step": 3897 }, { "epoch": 1.926232546645249, "grad_norm": 1.3368221554281705, "learning_rate": 7.601162152283904e-08, "loss": 0.21882784366607666, "step": 3898 }, { "epoch": 1.926726800939083, "grad_norm": 1.5284992426615736, "learning_rate": 7.500943513548797e-08, "loss": 0.24513296782970428, "step": 3899 }, { "epoch": 1.9272210552329172, "grad_norm": 1.3036631509681367, "learning_rate": 7.401387453002673e-08, "loss": 0.23508042097091675, "step": 3900 }, { "epoch": 1.9277153095267514, "grad_norm": 1.2751462486235168, "learning_rate": 7.30249403710792e-08, "loss": 0.2288282811641693, "step": 3901 }, { "epoch": 1.9282095638205856, "grad_norm": 1.4342484579443016, "learning_rate": 7.204263331884175e-08, "loss": 0.24606133997440338, "step": 3902 }, { "epoch": 1.9287038181144198, "grad_norm": 1.3623815600739415, "learning_rate": 7.10669540290887e-08, "loss": 0.2710507810115814, "step": 3903 }, { "epoch": 1.929198072408254, "grad_norm": 1.3748292603956795, "learning_rate": 7.009790315317122e-08, "loss": 0.27333927154541016, "step": 3904 }, { "epoch": 1.9296923267020882, "grad_norm": 1.3028025790213729, "learning_rate": 6.913548133801074e-08, "loss": 0.27518531680107117, "step": 3905 }, { "epoch": 1.9301865809959224, "grad_norm": 1.3661226136758882, "learning_rate": 6.817968922610884e-08, "loss": 0.24289458990097046, "step": 3906 }, { "epoch": 1.9306808352897566, "grad_norm": 1.3726485965253954, "learning_rate": 6.723052745553848e-08, "loss": 0.225175678730011, "step": 3907 }, { "epoch": 1.9311750895835909, "grad_norm": 1.4678815751521954, "learning_rate": 6.628799665994612e-08, "loss": 0.2592085599899292, "step": 3908 }, { "epoch": 1.931669343877425, "grad_norm": 1.5719300045981148, "learning_rate": 6.535209746855064e-08, "loss": 0.2649756968021393, "step": 3909 }, { "epoch": 1.9321635981712593, "grad_norm": 1.3380899824561678, "learning_rate": 6.442283050614673e-08, "loss": 0.2318311631679535, "step": 3910 }, { "epoch": 1.9326578524650933, "grad_norm": 1.6231265342953554, "learning_rate": 6.350019639309923e-08, "loss": 0.252924382686615, "step": 3911 }, { "epoch": 1.9331521067589275, "grad_norm": 1.1670510769577984, "learning_rate": 6.258419574534547e-08, "loss": 0.1903652548789978, "step": 3912 }, { "epoch": 1.9336463610527617, "grad_norm": 1.270678601269557, "learning_rate": 6.167482917439404e-08, "loss": 0.22795221209526062, "step": 3913 }, { "epoch": 1.9341406153465959, "grad_norm": 1.5197318429157889, "learning_rate": 6.077209728732492e-08, "loss": 0.26521584391593933, "step": 3914 }, { "epoch": 1.9346348696404299, "grad_norm": 1.272486350308544, "learning_rate": 5.987600068679045e-08, "loss": 0.22152049839496613, "step": 3915 }, { "epoch": 1.935129123934264, "grad_norm": 1.2727416096160045, "learning_rate": 5.898653997100989e-08, "loss": 0.22663083672523499, "step": 3916 }, { "epoch": 1.9356233782280983, "grad_norm": 1.3553153320714941, "learning_rate": 5.8103715733776047e-08, "loss": 0.23720389604568481, "step": 3917 }, { "epoch": 1.9361176325219325, "grad_norm": 1.4063431471110097, "learning_rate": 5.722752856444858e-08, "loss": 0.24053935706615448, "step": 3918 }, { "epoch": 1.9366118868157667, "grad_norm": 1.3614412415474415, "learning_rate": 5.635797904795848e-08, "loss": 0.26565641164779663, "step": 3919 }, { "epoch": 1.9371061411096009, "grad_norm": 1.4288462330405298, "learning_rate": 5.5495067764804736e-08, "loss": 0.27181264758110046, "step": 3920 }, { "epoch": 1.937600395403435, "grad_norm": 1.3077579832623365, "learning_rate": 5.46387952910532e-08, "loss": 0.23340710997581482, "step": 3921 }, { "epoch": 1.9380946496972693, "grad_norm": 1.2207580518535108, "learning_rate": 5.378916219833996e-08, "loss": 0.19458985328674316, "step": 3922 }, { "epoch": 1.9385889039911035, "grad_norm": 1.4167428327318625, "learning_rate": 5.2946169053869066e-08, "loss": 0.22900202870368958, "step": 3923 }, { "epoch": 1.9390831582849377, "grad_norm": 1.528940034628332, "learning_rate": 5.210981642040924e-08, "loss": 0.30710160732269287, "step": 3924 }, { "epoch": 1.939577412578772, "grad_norm": 1.1725653667546314, "learning_rate": 5.12801048562972e-08, "loss": 0.1754809319972992, "step": 3925 }, { "epoch": 1.940071666872606, "grad_norm": 1.4924045607844934, "learning_rate": 5.045703491543763e-08, "loss": 0.28787121176719666, "step": 3926 }, { "epoch": 1.94056592116644, "grad_norm": 1.2741088738360473, "learning_rate": 4.96406071472999e-08, "loss": 0.2239963263273239, "step": 3927 }, { "epoch": 1.9410601754602743, "grad_norm": 1.3932338575101701, "learning_rate": 4.883082209692025e-08, "loss": 0.2121300995349884, "step": 3928 }, { "epoch": 1.9415544297541085, "grad_norm": 1.4253326667240858, "learning_rate": 4.802768030489735e-08, "loss": 0.23445773124694824, "step": 3929 }, { "epoch": 1.9420486840479425, "grad_norm": 1.3518230097115338, "learning_rate": 4.7231182307400095e-08, "loss": 0.2369021326303482, "step": 3930 }, { "epoch": 1.9425429383417767, "grad_norm": 1.4790931523959723, "learning_rate": 4.644132863615758e-08, "loss": 0.2764047086238861, "step": 3931 }, { "epoch": 1.943037192635611, "grad_norm": 1.3688921537083945, "learning_rate": 4.565811981846468e-08, "loss": 0.26021280884742737, "step": 3932 }, { "epoch": 1.9435314469294451, "grad_norm": 1.4010339478163996, "learning_rate": 4.488155637718095e-08, "loss": 0.26012706756591797, "step": 3933 }, { "epoch": 1.9440257012232793, "grad_norm": 1.4174814798438116, "learning_rate": 4.4111638830729444e-08, "loss": 0.22092604637145996, "step": 3934 }, { "epoch": 1.9445199555171135, "grad_norm": 1.3803129729570953, "learning_rate": 4.334836769309347e-08, "loss": 0.24200648069381714, "step": 3935 }, { "epoch": 1.9450142098109477, "grad_norm": 1.4185191605274636, "learning_rate": 4.2591743473826554e-08, "loss": 0.2545608580112457, "step": 3936 }, { "epoch": 1.945508464104782, "grad_norm": 1.4140513212071641, "learning_rate": 4.1841766678036854e-08, "loss": 0.24908477067947388, "step": 3937 }, { "epoch": 1.9460027183986162, "grad_norm": 1.3236421908105307, "learning_rate": 4.109843780639833e-08, "loss": 0.23568233847618103, "step": 3938 }, { "epoch": 1.9464969726924504, "grad_norm": 1.4613354363975228, "learning_rate": 4.0361757355147355e-08, "loss": 0.2230791449546814, "step": 3939 }, { "epoch": 1.9469912269862846, "grad_norm": 1.4124924138900457, "learning_rate": 3.963172581608166e-08, "loss": 0.2541523277759552, "step": 3940 }, { "epoch": 1.9474854812801188, "grad_norm": 1.320376312149322, "learning_rate": 3.8908343676559156e-08, "loss": 0.2466837763786316, "step": 3941 }, { "epoch": 1.9479797355739528, "grad_norm": 1.434845915637092, "learning_rate": 3.819161141950134e-08, "loss": 0.2700938880443573, "step": 3942 }, { "epoch": 1.948473989867787, "grad_norm": 1.2847188951445323, "learning_rate": 3.7481529523384355e-08, "loss": 0.2353779673576355, "step": 3943 }, { "epoch": 1.9489682441616212, "grad_norm": 1.4586624394757335, "learning_rate": 3.677809846225344e-08, "loss": 0.25708913803100586, "step": 3944 }, { "epoch": 1.9494624984554554, "grad_norm": 1.3816118361393621, "learning_rate": 3.6081318705705195e-08, "loss": 0.26113903522491455, "step": 3945 }, { "epoch": 1.9499567527492894, "grad_norm": 1.353452761923649, "learning_rate": 3.539119071890307e-08, "loss": 0.2561355531215668, "step": 3946 }, { "epoch": 1.9504510070431236, "grad_norm": 1.3679281740557483, "learning_rate": 3.470771496256409e-08, "loss": 0.24893885850906372, "step": 3947 }, { "epoch": 1.9509452613369578, "grad_norm": 1.3725754281016815, "learning_rate": 3.403089189296771e-08, "loss": 0.25399699807167053, "step": 3948 }, { "epoch": 1.951439515630792, "grad_norm": 1.4355008801200986, "learning_rate": 3.3360721961952505e-08, "loss": 0.2820609509944916, "step": 3949 }, { "epoch": 1.9519337699246262, "grad_norm": 1.222474026880474, "learning_rate": 3.269720561691281e-08, "loss": 0.22128066420555115, "step": 3950 }, { "epoch": 1.9524280242184604, "grad_norm": 1.2322544942302993, "learning_rate": 3.204034330080319e-08, "loss": 0.2132534235715866, "step": 3951 }, { "epoch": 1.9529222785122946, "grad_norm": 1.3290638165306805, "learning_rate": 3.1390135452135095e-08, "loss": 0.2308463454246521, "step": 3952 }, { "epoch": 1.9534165328061288, "grad_norm": 1.443895071093895, "learning_rate": 3.074658250497908e-08, "loss": 0.2756718397140503, "step": 3953 }, { "epoch": 1.953910787099963, "grad_norm": 1.404895730578394, "learning_rate": 3.010968488896149e-08, "loss": 0.24619412422180176, "step": 3954 }, { "epoch": 1.9544050413937972, "grad_norm": 1.3236097287021305, "learning_rate": 2.9479443029265532e-08, "loss": 0.2164454162120819, "step": 3955 }, { "epoch": 1.9548992956876314, "grad_norm": 1.4498814795200483, "learning_rate": 2.8855857346632432e-08, "loss": 0.2778991460800171, "step": 3956 }, { "epoch": 1.9553935499814654, "grad_norm": 1.3781229461817452, "learning_rate": 2.8238928257359188e-08, "loss": 0.22639301419258118, "step": 3957 }, { "epoch": 1.9558878042752996, "grad_norm": 1.3946690868287814, "learning_rate": 2.7628656173297463e-08, "loss": 0.2367630898952484, "step": 3958 }, { "epoch": 1.9563820585691338, "grad_norm": 1.233715623675162, "learning_rate": 2.702504150185692e-08, "loss": 0.2400333285331726, "step": 3959 }, { "epoch": 1.956876312862968, "grad_norm": 1.5197718598123784, "learning_rate": 2.6428084646001884e-08, "loss": 0.2384340763092041, "step": 3960 }, { "epoch": 1.957370567156802, "grad_norm": 1.2187445449938668, "learning_rate": 2.5837786004253572e-08, "loss": 0.20191673934459686, "step": 3961 }, { "epoch": 1.9578648214506362, "grad_norm": 1.3530782559852856, "learning_rate": 2.525414597068565e-08, "loss": 0.24700434505939484, "step": 3962 }, { "epoch": 1.9583590757444704, "grad_norm": 1.2586448244620927, "learning_rate": 2.4677164934928665e-08, "loss": 0.20032359659671783, "step": 3963 }, { "epoch": 1.9588533300383046, "grad_norm": 1.5212375132060378, "learning_rate": 2.4106843282165615e-08, "loss": 0.280154287815094, "step": 3964 }, { "epoch": 1.9593475843321388, "grad_norm": 1.46590896106962, "learning_rate": 2.3543181393135274e-08, "loss": 0.25518566370010376, "step": 3965 }, { "epoch": 1.959841838625973, "grad_norm": 1.2792917112791735, "learning_rate": 2.298617964413108e-08, "loss": 0.2246837019920349, "step": 3966 }, { "epoch": 1.9603360929198073, "grad_norm": 1.3954375167289552, "learning_rate": 2.2435838407000034e-08, "loss": 0.23355916142463684, "step": 3967 }, { "epoch": 1.9608303472136415, "grad_norm": 1.3615561015896285, "learning_rate": 2.1892158049140467e-08, "loss": 0.2449415922164917, "step": 3968 }, { "epoch": 1.9613246015074757, "grad_norm": 1.279518283780108, "learning_rate": 2.1355138933507602e-08, "loss": 0.2269652783870697, "step": 3969 }, { "epoch": 1.9618188558013099, "grad_norm": 1.4090731883758925, "learning_rate": 2.0824781418605776e-08, "loss": 0.26923638582229614, "step": 3970 }, { "epoch": 1.962313110095144, "grad_norm": 1.3838329777907195, "learning_rate": 2.0301085858493996e-08, "loss": 0.2631189823150635, "step": 3971 }, { "epoch": 1.9628073643889783, "grad_norm": 1.272147209216066, "learning_rate": 1.978405260278593e-08, "loss": 0.23281526565551758, "step": 3972 }, { "epoch": 1.9633016186828123, "grad_norm": 1.4504818525258278, "learning_rate": 1.9273681996644365e-08, "loss": 0.26399385929107666, "step": 3973 }, { "epoch": 1.9637958729766465, "grad_norm": 1.42867652212037, "learning_rate": 1.876997438078454e-08, "loss": 0.2641673684120178, "step": 3974 }, { "epoch": 1.9642901272704807, "grad_norm": 1.2944638856965318, "learning_rate": 1.8272930091476347e-08, "loss": 0.22440402209758759, "step": 3975 }, { "epoch": 1.9647843815643147, "grad_norm": 1.3585170311291963, "learning_rate": 1.778254946053881e-08, "loss": 0.2552195191383362, "step": 3976 }, { "epoch": 1.9652786358581489, "grad_norm": 1.3475063805104281, "learning_rate": 1.729883281534117e-08, "loss": 0.24455100297927856, "step": 3977 }, { "epoch": 1.965772890151983, "grad_norm": 1.4925946223112605, "learning_rate": 1.6821780478808448e-08, "loss": 0.2324603945016861, "step": 3978 }, { "epoch": 1.9662671444458173, "grad_norm": 1.295320797137711, "learning_rate": 1.6351392769412556e-08, "loss": 0.25488242506980896, "step": 3979 }, { "epoch": 1.9667613987396515, "grad_norm": 1.2382372998222446, "learning_rate": 1.5887670001177856e-08, "loss": 0.23511120676994324, "step": 3980 }, { "epoch": 1.9672556530334857, "grad_norm": 1.474081111410746, "learning_rate": 1.5430612483680052e-08, "loss": 0.2683457136154175, "step": 3981 }, { "epoch": 1.96774990732732, "grad_norm": 1.3558148882952648, "learning_rate": 1.4980220522041734e-08, "loss": 0.26627787947654724, "step": 3982 }, { "epoch": 1.9682441616211541, "grad_norm": 1.3779286197554192, "learning_rate": 1.4536494416940162e-08, "loss": 0.22931841015815735, "step": 3983 }, { "epoch": 1.9687384159149883, "grad_norm": 1.3860885624616435, "learning_rate": 1.4099434464600603e-08, "loss": 0.22918352484703064, "step": 3984 }, { "epoch": 1.9692326702088225, "grad_norm": 1.498852903518302, "learning_rate": 1.3669040956797442e-08, "loss": 0.2542854845523834, "step": 3985 }, { "epoch": 1.9697269245026567, "grad_norm": 1.278204707841908, "learning_rate": 1.3245314180854175e-08, "loss": 0.21581681072711945, "step": 3986 }, { "epoch": 1.970221178796491, "grad_norm": 1.4943144749429917, "learning_rate": 1.2828254419646746e-08, "loss": 0.2708613872528076, "step": 3987 }, { "epoch": 1.970715433090325, "grad_norm": 1.3377756042264306, "learning_rate": 1.2417861951597998e-08, "loss": 0.25348716974258423, "step": 3988 }, { "epoch": 1.9712096873841591, "grad_norm": 1.396109244896111, "learning_rate": 1.2014137050677665e-08, "loss": 0.24585089087486267, "step": 3989 }, { "epoch": 1.9717039416779933, "grad_norm": 1.3367696007925745, "learning_rate": 1.1617079986410152e-08, "loss": 0.26362112164497375, "step": 3990 }, { "epoch": 1.9721981959718276, "grad_norm": 1.815729582105598, "learning_rate": 1.1226691023862312e-08, "loss": 0.23288659751415253, "step": 3991 }, { "epoch": 1.9726924502656615, "grad_norm": 1.2305688182670602, "learning_rate": 1.0842970423654563e-08, "loss": 0.21604478359222412, "step": 3992 }, { "epoch": 1.9731867045594957, "grad_norm": 1.353706093653017, "learning_rate": 1.0465918441950885e-08, "loss": 0.21149985492229462, "step": 3993 }, { "epoch": 1.97368095885333, "grad_norm": 1.40480632228099, "learning_rate": 1.0095535330467698e-08, "loss": 0.26392504572868347, "step": 3994 }, { "epoch": 1.9741752131471642, "grad_norm": 1.246030064073758, "learning_rate": 9.731821336466107e-09, "loss": 0.22993823885917664, "step": 3995 }, { "epoch": 1.9746694674409984, "grad_norm": 1.247780565740116, "learning_rate": 9.374776702757438e-09, "loss": 0.2207789570093155, "step": 3996 }, { "epoch": 1.9751637217348326, "grad_norm": 1.3761642168404886, "learning_rate": 9.024401667698802e-09, "loss": 0.27149268984794617, "step": 3997 }, { "epoch": 1.9756579760286668, "grad_norm": 1.4326020240148696, "learning_rate": 8.680696465196425e-09, "loss": 0.269406795501709, "step": 3998 }, { "epoch": 1.976152230322501, "grad_norm": 1.4639077922370294, "learning_rate": 8.343661324703434e-09, "loss": 0.25354713201522827, "step": 3999 }, { "epoch": 1.9766464846163352, "grad_norm": 1.3417205540337154, "learning_rate": 8.013296471217624e-09, "loss": 0.22957751154899597, "step": 4000 }, { "epoch": 1.9771407389101694, "grad_norm": 1.4456474308400453, "learning_rate": 7.68960212528702e-09, "loss": 0.25355982780456543, "step": 4001 }, { "epoch": 1.9776349932040036, "grad_norm": 1.3644174652815564, "learning_rate": 7.372578503005434e-09, "loss": 0.2453315556049347, "step": 4002 }, { "epoch": 1.9781292474978376, "grad_norm": 1.4020681279841292, "learning_rate": 7.062225816013568e-09, "loss": 0.2274405062198639, "step": 4003 }, { "epoch": 1.9786235017916718, "grad_norm": 1.229857953320014, "learning_rate": 6.7585442714979136e-09, "loss": 0.22195965051651, "step": 4004 }, { "epoch": 1.979117756085506, "grad_norm": 1.266569965950015, "learning_rate": 6.461534072191855e-09, "loss": 0.18664966523647308, "step": 4005 }, { "epoch": 1.9796120103793402, "grad_norm": 1.3364190229696613, "learning_rate": 6.171195416375675e-09, "loss": 0.23385149240493774, "step": 4006 }, { "epoch": 1.9801062646731742, "grad_norm": 1.4261084517387066, "learning_rate": 5.887528497874328e-09, "loss": 0.3141595721244812, "step": 4007 }, { "epoch": 1.9806005189670084, "grad_norm": 1.1455773069888675, "learning_rate": 5.610533506060778e-09, "loss": 0.22392721474170685, "step": 4008 }, { "epoch": 1.9810947732608426, "grad_norm": 1.3684844463884664, "learning_rate": 5.34021062585377e-09, "loss": 0.24451547861099243, "step": 4009 }, { "epoch": 1.9815890275546768, "grad_norm": 1.4628440300051093, "learning_rate": 5.076560037714506e-09, "loss": 0.2916273772716522, "step": 4010 }, { "epoch": 1.982083281848511, "grad_norm": 1.4921825705668792, "learning_rate": 4.819581917654414e-09, "loss": 0.25200486183166504, "step": 4011 }, { "epoch": 1.9825775361423452, "grad_norm": 1.3319991237500093, "learning_rate": 4.569276437227377e-09, "loss": 0.24660873413085938, "step": 4012 }, { "epoch": 1.9830717904361794, "grad_norm": 1.286303231792404, "learning_rate": 4.325643763534171e-09, "loss": 0.22276514768600464, "step": 4013 }, { "epoch": 1.9835660447300136, "grad_norm": 1.556433924523847, "learning_rate": 4.088684059220249e-09, "loss": 0.28938305377960205, "step": 4014 }, { "epoch": 1.9840602990238478, "grad_norm": 1.3733276400200998, "learning_rate": 3.85839748247685e-09, "loss": 0.24640555679798126, "step": 4015 }, { "epoch": 1.984554553317682, "grad_norm": 1.3418256960556196, "learning_rate": 3.6347841870398858e-09, "loss": 0.24476927518844604, "step": 4016 }, { "epoch": 1.9850488076115163, "grad_norm": 1.4034327960934876, "learning_rate": 3.417844322189945e-09, "loss": 0.22534328699111938, "step": 4017 }, { "epoch": 1.9855430619053505, "grad_norm": 1.4375031314885967, "learning_rate": 3.2075780327534e-09, "loss": 0.25029847025871277, "step": 4018 }, { "epoch": 1.9860373161991844, "grad_norm": 1.376115670772505, "learning_rate": 3.0039854591012994e-09, "loss": 0.25584423542022705, "step": 4019 }, { "epoch": 1.9865315704930187, "grad_norm": 1.3971629958782856, "learning_rate": 2.8070667371493663e-09, "loss": 0.26148709654808044, "step": 4020 }, { "epoch": 1.9870258247868529, "grad_norm": 1.3375096203856904, "learning_rate": 2.6168219983557786e-09, "loss": 0.2510269284248352, "step": 4021 }, { "epoch": 1.987520079080687, "grad_norm": 1.2826838094230344, "learning_rate": 2.433251369727829e-09, "loss": 0.24044418334960938, "step": 4022 }, { "epoch": 1.988014333374521, "grad_norm": 1.3500084280255722, "learning_rate": 2.256354973813046e-09, "loss": 0.25793880224227905, "step": 4023 }, { "epoch": 1.9885085876683553, "grad_norm": 1.4052663822447213, "learning_rate": 2.086132928705853e-09, "loss": 0.23637095093727112, "step": 4024 }, { "epoch": 1.9890028419621895, "grad_norm": 1.4859565437270463, "learning_rate": 1.9225853480431267e-09, "loss": 0.29570624232292175, "step": 4025 }, { "epoch": 1.9894970962560237, "grad_norm": 1.423587654508064, "learning_rate": 1.7657123410075306e-09, "loss": 0.2697899341583252, "step": 4026 }, { "epoch": 1.9899913505498579, "grad_norm": 1.6022210661391376, "learning_rate": 1.615514012324182e-09, "loss": 0.28011834621429443, "step": 4027 }, { "epoch": 1.990485604843692, "grad_norm": 1.4192356954254337, "learning_rate": 1.4719904622650939e-09, "loss": 0.2770778238773346, "step": 4028 }, { "epoch": 1.9909798591375263, "grad_norm": 1.3392534385035628, "learning_rate": 1.335141786642513e-09, "loss": 0.2662060558795929, "step": 4029 }, { "epoch": 1.9914741134313605, "grad_norm": 1.2456909926822828, "learning_rate": 1.2049680768166928e-09, "loss": 0.21188628673553467, "step": 4030 }, { "epoch": 1.9919683677251947, "grad_norm": 1.4172757737685897, "learning_rate": 1.0814694196892294e-09, "loss": 0.2749127745628357, "step": 4031 }, { "epoch": 1.992462622019029, "grad_norm": 1.378123997738594, "learning_rate": 9.64645897704175e-10, "loss": 0.2271629124879837, "step": 4032 }, { "epoch": 1.9929568763128631, "grad_norm": 1.3952087957146098, "learning_rate": 8.544975888535867e-10, "loss": 0.2704155743122101, "step": 4033 }, { "epoch": 1.993451130606697, "grad_norm": 1.2966104091933182, "learning_rate": 7.51024566670866e-10, "loss": 0.23073506355285645, "step": 4034 }, { "epoch": 1.9939453849005313, "grad_norm": 1.3325329668608163, "learning_rate": 6.542269002307589e-10, "loss": 0.21597059071063995, "step": 4035 }, { "epoch": 1.9944396391943655, "grad_norm": 1.3812576049514713, "learning_rate": 5.641046541560169e-10, "loss": 0.21368899941444397, "step": 4036 }, { "epoch": 1.9949338934881997, "grad_norm": 1.4586669387690583, "learning_rate": 4.806578886107361e-10, "loss": 0.2594050168991089, "step": 4037 }, { "epoch": 1.9954281477820337, "grad_norm": 1.3017520645594045, "learning_rate": 4.03886659302577e-10, "loss": 0.21630799770355225, "step": 4038 }, { "epoch": 1.995922402075868, "grad_norm": 1.3597057268680945, "learning_rate": 3.337910174827652e-10, "loss": 0.25168266892433167, "step": 4039 }, { "epoch": 1.9964166563697021, "grad_norm": 1.7213418381035832, "learning_rate": 2.70371009946091e-10, "loss": 0.27963966131210327, "step": 4040 }, { "epoch": 1.9969109106635363, "grad_norm": 1.575376951155568, "learning_rate": 2.1362667903090938e-10, "loss": 0.2196345329284668, "step": 4041 }, { "epoch": 1.9974051649573705, "grad_norm": 1.3640521467532305, "learning_rate": 1.6355806262025043e-10, "loss": 0.22677919268608093, "step": 4042 }, { "epoch": 1.9978994192512047, "grad_norm": 1.3667805176029666, "learning_rate": 1.2016519413626804e-10, "loss": 0.23044565320014954, "step": 4043 }, { "epoch": 1.998393673545039, "grad_norm": 1.4131604362123111, "learning_rate": 8.344810255023206e-11, "loss": 0.2739316523075104, "step": 4044 }, { "epoch": 1.9988879278388731, "grad_norm": 1.4085870126547175, "learning_rate": 5.3406812372536196e-11, "loss": 0.2610301673412323, "step": 4045 }, { "epoch": 1.9993821821327074, "grad_norm": 1.2504694698952337, "learning_rate": 3.004134365824918e-11, "loss": 0.23652175068855286, "step": 4046 }, { "epoch": 1.9998764364265416, "grad_norm": 1.2951296197807791, "learning_rate": 1.3351712006004492e-11, "loss": 0.23983967304229736, "step": 4047 }, { "epoch": 2.0, "grad_norm": 2.5975889779248975, "learning_rate": 3.337928559110637e-12, "loss": 0.2640778720378876, "step": 4048 }, { "epoch": 2.0, "step": 4048, "total_flos": 3066106945339392.0, "train_loss": 0.3191354194832708, "train_runtime": 52842.7617, "train_samples_per_second": 1.225, "train_steps_per_second": 0.077 } ], "logging_steps": 1, "max_steps": 4048, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3066106945339392.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }