{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 14794, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.75949709341625e-05, "grad_norm": 59.110233306884766, "learning_rate": 0.0, "loss": 0.770263671875, "step": 1 }, { "epoch": 0.000135189941868325, "grad_norm": 55.2763557434082, "learning_rate": 5.859375e-08, "loss": 0.73876953125, "step": 2 }, { "epoch": 0.0002027849128024875, "grad_norm": 63.177162170410156, "learning_rate": 1.171875e-07, "loss": 0.7373046875, "step": 3 }, { "epoch": 0.00027037988373665, "grad_norm": 47.57095718383789, "learning_rate": 1.7578125e-07, "loss": 0.732177734375, "step": 4 }, { "epoch": 0.0003379748546708125, "grad_norm": 56.5859375, "learning_rate": 2.34375e-07, "loss": 0.7353515625, "step": 5 }, { "epoch": 0.000405569825604975, "grad_norm": 68.9694595336914, "learning_rate": 2.9296875000000003e-07, "loss": 0.740234375, "step": 6 }, { "epoch": 0.0004731647965391375, "grad_norm": 50.85795211791992, "learning_rate": 3.515625e-07, "loss": 0.732666015625, "step": 7 }, { "epoch": 0.0005407597674733, "grad_norm": 54.815216064453125, "learning_rate": 4.1015625e-07, "loss": 0.737060546875, "step": 8 }, { "epoch": 0.0006083547384074625, "grad_norm": 57.61214065551758, "learning_rate": 4.6875e-07, "loss": 0.732421875, "step": 9 }, { "epoch": 0.000675949709341625, "grad_norm": 48.82746124267578, "learning_rate": 5.2734375e-07, "loss": 0.72802734375, "step": 10 }, { "epoch": 0.0007435446802757875, "grad_norm": 61.77345657348633, "learning_rate": 5.859375000000001e-07, "loss": 0.736572265625, "step": 11 }, { "epoch": 0.00081113965120995, "grad_norm": 77.5992202758789, "learning_rate": 6.4453125e-07, "loss": 0.7431640625, "step": 12 }, { "epoch": 0.0008787346221441124, "grad_norm": 48.16891098022461, "learning_rate": 7.03125e-07, "loss": 0.72412109375, "step": 13 }, { "epoch": 0.000946329593078275, "grad_norm": 44.529136657714844, "learning_rate": 7.6171875e-07, "loss": 0.72314453125, "step": 14 }, { "epoch": 0.0010139245640124375, "grad_norm": 83.34319305419922, "learning_rate": 8.203125e-07, "loss": 0.741943359375, "step": 15 }, { "epoch": 0.0010815195349466, "grad_norm": 81.33229064941406, "learning_rate": 8.7890625e-07, "loss": 0.737548828125, "step": 16 }, { "epoch": 0.0011491145058807625, "grad_norm": 46.30405044555664, "learning_rate": 9.375e-07, "loss": 0.7216796875, "step": 17 }, { "epoch": 0.001216709476814925, "grad_norm": 71.16378021240234, "learning_rate": 9.9609375e-07, "loss": 0.724853515625, "step": 18 }, { "epoch": 0.0012843044477490874, "grad_norm": 55.97031021118164, "learning_rate": 1.0546875e-06, "loss": 0.72412109375, "step": 19 }, { "epoch": 0.00135189941868325, "grad_norm": 44.46846389770508, "learning_rate": 1.11328125e-06, "loss": 0.713623046875, "step": 20 }, { "epoch": 0.0014194943896174124, "grad_norm": 61.169166564941406, "learning_rate": 1.1718750000000001e-06, "loss": 0.71875, "step": 21 }, { "epoch": 0.001487089360551575, "grad_norm": 72.04226684570312, "learning_rate": 1.23046875e-06, "loss": 0.69970703125, "step": 22 }, { "epoch": 0.0015546843314857375, "grad_norm": 52.06661605834961, "learning_rate": 1.2890625e-06, "loss": 0.69873046875, "step": 23 }, { "epoch": 0.0016222793024199, "grad_norm": 56.76589584350586, "learning_rate": 1.34765625e-06, "loss": 0.69580078125, "step": 24 }, { "epoch": 0.0016898742733540625, "grad_norm": 43.34426498413086, "learning_rate": 1.40625e-06, "loss": 0.682373046875, "step": 25 }, { "epoch": 0.0017574692442882249, "grad_norm": 67.23594665527344, "learning_rate": 1.46484375e-06, "loss": 0.668701171875, "step": 26 }, { "epoch": 0.0018250642152223874, "grad_norm": 48.07478332519531, "learning_rate": 1.5234375e-06, "loss": 0.69189453125, "step": 27 }, { "epoch": 0.00189265918615655, "grad_norm": 27.090139389038086, "learning_rate": 1.5820312500000001e-06, "loss": 0.681884765625, "step": 28 }, { "epoch": 0.0019602541570907126, "grad_norm": 58.0966796875, "learning_rate": 1.640625e-06, "loss": 0.663818359375, "step": 29 }, { "epoch": 0.002027849128024875, "grad_norm": 35.283653259277344, "learning_rate": 1.69921875e-06, "loss": 0.65478515625, "step": 30 }, { "epoch": 0.0020954440989590373, "grad_norm": 24.81432342529297, "learning_rate": 1.7578125e-06, "loss": 0.64990234375, "step": 31 }, { "epoch": 0.0021630390698932, "grad_norm": 44.06401443481445, "learning_rate": 1.81640625e-06, "loss": 0.613525390625, "step": 32 }, { "epoch": 0.0022306340408273623, "grad_norm": 35.33702087402344, "learning_rate": 1.875e-06, "loss": 0.625732421875, "step": 33 }, { "epoch": 0.002298229011761525, "grad_norm": 46.12205505371094, "learning_rate": 1.93359375e-06, "loss": 0.5791015625, "step": 34 }, { "epoch": 0.0023658239826956874, "grad_norm": 36.45764923095703, "learning_rate": 1.9921875e-06, "loss": 0.598388671875, "step": 35 }, { "epoch": 0.00243341895362985, "grad_norm": 18.96347999572754, "learning_rate": 2.05078125e-06, "loss": 0.627685546875, "step": 36 }, { "epoch": 0.0025010139245640124, "grad_norm": 30.297109603881836, "learning_rate": 2.109375e-06, "loss": 0.593017578125, "step": 37 }, { "epoch": 0.0025686088954981747, "grad_norm": 44.58295822143555, "learning_rate": 2.16796875e-06, "loss": 0.5516357421875, "step": 38 }, { "epoch": 0.0026362038664323375, "grad_norm": 27.501136779785156, "learning_rate": 2.2265625e-06, "loss": 0.592529296875, "step": 39 }, { "epoch": 0.0027037988373665, "grad_norm": 4.408558368682861, "learning_rate": 2.28515625e-06, "loss": 0.642578125, "step": 40 }, { "epoch": 0.0027713938083006625, "grad_norm": 37.33297348022461, "learning_rate": 2.3437500000000002e-06, "loss": 0.5006103515625, "step": 41 }, { "epoch": 0.002838988779234825, "grad_norm": 12.072490692138672, "learning_rate": 2.40234375e-06, "loss": 0.5611572265625, "step": 42 }, { "epoch": 0.0029065837501689876, "grad_norm": 2.314673662185669, "learning_rate": 2.4609375e-06, "loss": 0.5975341796875, "step": 43 }, { "epoch": 0.00297417872110315, "grad_norm": 15.052709579467773, "learning_rate": 2.5195312500000003e-06, "loss": 0.672119140625, "step": 44 }, { "epoch": 0.0030417736920373122, "grad_norm": 11.710618019104004, "learning_rate": 2.578125e-06, "loss": 0.5283203125, "step": 45 }, { "epoch": 0.003109368662971475, "grad_norm": 23.748289108276367, "learning_rate": 2.63671875e-06, "loss": 0.458984375, "step": 46 }, { "epoch": 0.0031769636339056373, "grad_norm": 5.323421955108643, "learning_rate": 2.6953125e-06, "loss": 0.54931640625, "step": 47 }, { "epoch": 0.0032445586048398, "grad_norm": 5.181306838989258, "learning_rate": 2.75390625e-06, "loss": 0.543701171875, "step": 48 }, { "epoch": 0.0033121535757739623, "grad_norm": 10.080525398254395, "learning_rate": 2.8125e-06, "loss": 0.6258544921875, "step": 49 }, { "epoch": 0.003379748546708125, "grad_norm": 11.51768970489502, "learning_rate": 2.87109375e-06, "loss": 0.4954833984375, "step": 50 }, { "epoch": 0.0034473435176422874, "grad_norm": 3.995177984237671, "learning_rate": 2.9296875e-06, "loss": 0.5836181640625, "step": 51 }, { "epoch": 0.0035149384885764497, "grad_norm": 3.81083345413208, "learning_rate": 2.9882812500000002e-06, "loss": 0.5311279296875, "step": 52 }, { "epoch": 0.0035825334595106124, "grad_norm": 21.326133728027344, "learning_rate": 3.046875e-06, "loss": 0.418212890625, "step": 53 }, { "epoch": 0.0036501284304447748, "grad_norm": 12.19087028503418, "learning_rate": 3.10546875e-06, "loss": 0.6298828125, "step": 54 }, { "epoch": 0.0037177234013789375, "grad_norm": 7.981954097747803, "learning_rate": 3.1640625000000003e-06, "loss": 0.494140625, "step": 55 }, { "epoch": 0.0037853183723131, "grad_norm": 10.316963195800781, "learning_rate": 3.22265625e-06, "loss": 0.4703369140625, "step": 56 }, { "epoch": 0.0038529133432472626, "grad_norm": 13.306395530700684, "learning_rate": 3.28125e-06, "loss": 0.6246337890625, "step": 57 }, { "epoch": 0.003920508314181425, "grad_norm": 11.883159637451172, "learning_rate": 3.3398437500000003e-06, "loss": 0.4532470703125, "step": 58 }, { "epoch": 0.003988103285115587, "grad_norm": 19.342395782470703, "learning_rate": 3.3984375e-06, "loss": 0.66943359375, "step": 59 }, { "epoch": 0.00405569825604975, "grad_norm": 1.4705758094787598, "learning_rate": 3.45703125e-06, "loss": 0.531494140625, "step": 60 }, { "epoch": 0.004123293226983913, "grad_norm": 2.7160651683807373, "learning_rate": 3.515625e-06, "loss": 0.547607421875, "step": 61 }, { "epoch": 0.0041908881979180745, "grad_norm": 2.3139986991882324, "learning_rate": 3.57421875e-06, "loss": 0.5467529296875, "step": 62 }, { "epoch": 0.004258483168852237, "grad_norm": 15.224015235900879, "learning_rate": 3.6328125e-06, "loss": 0.4169921875, "step": 63 }, { "epoch": 0.0043260781397864, "grad_norm": 6.562361240386963, "learning_rate": 3.69140625e-06, "loss": 0.4998779296875, "step": 64 }, { "epoch": 0.004393673110720563, "grad_norm": 2.9516196250915527, "learning_rate": 3.75e-06, "loss": 0.5714111328125, "step": 65 }, { "epoch": 0.004461268081654725, "grad_norm": 8.7573881149292, "learning_rate": 3.8085937500000002e-06, "loss": 0.4798583984375, "step": 66 }, { "epoch": 0.004528863052588887, "grad_norm": 6.293442249298096, "learning_rate": 3.8671875e-06, "loss": 0.6048583984375, "step": 67 }, { "epoch": 0.00459645802352305, "grad_norm": 5.416831016540527, "learning_rate": 3.92578125e-06, "loss": 0.5299072265625, "step": 68 }, { "epoch": 0.004664052994457212, "grad_norm": 12.139426231384277, "learning_rate": 3.984375e-06, "loss": 0.4600830078125, "step": 69 }, { "epoch": 0.004731647965391375, "grad_norm": 12.069653511047363, "learning_rate": 4.0429687500000004e-06, "loss": 0.606689453125, "step": 70 }, { "epoch": 0.0047992429363255375, "grad_norm": 9.60067367553711, "learning_rate": 4.1015625e-06, "loss": 0.46258544921875, "step": 71 }, { "epoch": 0.0048668379072597, "grad_norm": 3.6210830211639404, "learning_rate": 4.16015625e-06, "loss": 0.4915771484375, "step": 72 }, { "epoch": 0.004934432878193862, "grad_norm": 9.080794334411621, "learning_rate": 4.21875e-06, "loss": 0.566162109375, "step": 73 }, { "epoch": 0.005002027849128025, "grad_norm": 20.794273376464844, "learning_rate": 4.27734375e-06, "loss": 0.6802978515625, "step": 74 }, { "epoch": 0.005069622820062188, "grad_norm": 9.691886901855469, "learning_rate": 4.3359375e-06, "loss": 0.64794921875, "step": 75 }, { "epoch": 0.0051372177909963495, "grad_norm": 31.95792579650879, "learning_rate": 4.3945312500000005e-06, "loss": 0.5830078125, "step": 76 }, { "epoch": 0.005204812761930512, "grad_norm": 34.214088439941406, "learning_rate": 4.453125e-06, "loss": 0.5328369140625, "step": 77 }, { "epoch": 0.005272407732864675, "grad_norm": 18.377038955688477, "learning_rate": 4.51171875e-06, "loss": 0.6337890625, "step": 78 }, { "epoch": 0.005340002703798838, "grad_norm": 42.3178825378418, "learning_rate": 4.5703125e-06, "loss": 0.523681640625, "step": 79 }, { "epoch": 0.005407597674733, "grad_norm": 18.89365005493164, "learning_rate": 4.62890625e-06, "loss": 0.585693359375, "step": 80 }, { "epoch": 0.005475192645667162, "grad_norm": 16.09290885925293, "learning_rate": 4.6875000000000004e-06, "loss": 0.526123046875, "step": 81 }, { "epoch": 0.005542787616601325, "grad_norm": 2.98407244682312, "learning_rate": 4.74609375e-06, "loss": 0.5926513671875, "step": 82 }, { "epoch": 0.005610382587535487, "grad_norm": 5.014310359954834, "learning_rate": 4.8046875e-06, "loss": 0.561767578125, "step": 83 }, { "epoch": 0.00567797755846965, "grad_norm": 1.7956238985061646, "learning_rate": 4.86328125e-06, "loss": 0.5369873046875, "step": 84 }, { "epoch": 0.0057455725294038125, "grad_norm": 5.99094295501709, "learning_rate": 4.921875e-06, "loss": 0.5452880859375, "step": 85 }, { "epoch": 0.005813167500337975, "grad_norm": 27.442594528198242, "learning_rate": 4.98046875e-06, "loss": 0.672607421875, "step": 86 }, { "epoch": 0.005880762471272137, "grad_norm": 25.15477180480957, "learning_rate": 5.0390625000000005e-06, "loss": 0.6951904296875, "step": 87 }, { "epoch": 0.0059483574422063, "grad_norm": 8.660077095031738, "learning_rate": 5.09765625e-06, "loss": 0.552734375, "step": 88 }, { "epoch": 0.006015952413140463, "grad_norm": 6.200240612030029, "learning_rate": 5.15625e-06, "loss": 0.5408935546875, "step": 89 }, { "epoch": 0.0060835473840746244, "grad_norm": 22.11834144592285, "learning_rate": 5.21484375e-06, "loss": 0.477294921875, "step": 90 }, { "epoch": 0.006151142355008787, "grad_norm": 27.746355056762695, "learning_rate": 5.2734375e-06, "loss": 0.474609375, "step": 91 }, { "epoch": 0.00621873732594295, "grad_norm": 15.14356517791748, "learning_rate": 5.3320312500000004e-06, "loss": 0.5789794921875, "step": 92 }, { "epoch": 0.006286332296877113, "grad_norm": 12.362418174743652, "learning_rate": 5.390625e-06, "loss": 0.586181640625, "step": 93 }, { "epoch": 0.0063539272678112746, "grad_norm": 12.820860862731934, "learning_rate": 5.44921875e-06, "loss": 0.5291748046875, "step": 94 }, { "epoch": 0.006421522238745437, "grad_norm": 5.996450424194336, "learning_rate": 5.5078125e-06, "loss": 0.549560546875, "step": 95 }, { "epoch": 0.0064891172096796, "grad_norm": 5.6679911613464355, "learning_rate": 5.56640625e-06, "loss": 0.4913330078125, "step": 96 }, { "epoch": 0.006556712180613762, "grad_norm": 12.101571083068848, "learning_rate": 5.625e-06, "loss": 0.439453125, "step": 97 }, { "epoch": 0.006624307151547925, "grad_norm": 10.831153869628906, "learning_rate": 5.6835937500000005e-06, "loss": 0.562744140625, "step": 98 }, { "epoch": 0.006691902122482087, "grad_norm": 10.59787368774414, "learning_rate": 5.7421875e-06, "loss": 0.32305908203125, "step": 99 }, { "epoch": 0.00675949709341625, "grad_norm": 15.515405654907227, "learning_rate": 5.80078125e-06, "loss": 0.6064453125, "step": 100 }, { "epoch": 0.006827092064350412, "grad_norm": 8.81550407409668, "learning_rate": 5.859375e-06, "loss": 0.4632568359375, "step": 101 }, { "epoch": 0.006894687035284575, "grad_norm": 5.732886791229248, "learning_rate": 5.91796875e-06, "loss": 0.4920654296875, "step": 102 }, { "epoch": 0.0069622820062187375, "grad_norm": 6.533648490905762, "learning_rate": 5.9765625000000004e-06, "loss": 0.600341796875, "step": 103 }, { "epoch": 0.007029876977152899, "grad_norm": 12.932906150817871, "learning_rate": 6.03515625e-06, "loss": 0.47900390625, "step": 104 }, { "epoch": 0.007097471948087062, "grad_norm": 21.536033630371094, "learning_rate": 6.09375e-06, "loss": 0.32470703125, "step": 105 }, { "epoch": 0.007165066919021225, "grad_norm": 9.352928161621094, "learning_rate": 6.15234375e-06, "loss": 0.624755859375, "step": 106 }, { "epoch": 0.007232661889955388, "grad_norm": 12.890649795532227, "learning_rate": 6.2109375e-06, "loss": 0.5042724609375, "step": 107 }, { "epoch": 0.0073002568608895495, "grad_norm": 37.52421569824219, "learning_rate": 6.26953125e-06, "loss": 0.623046875, "step": 108 }, { "epoch": 0.007367851831823712, "grad_norm": 17.197898864746094, "learning_rate": 6.3281250000000005e-06, "loss": 0.5870361328125, "step": 109 }, { "epoch": 0.007435446802757875, "grad_norm": 5.445653438568115, "learning_rate": 6.38671875e-06, "loss": 0.569091796875, "step": 110 }, { "epoch": 0.007503041773692038, "grad_norm": 3.685796022415161, "learning_rate": 6.4453125e-06, "loss": 0.5445556640625, "step": 111 }, { "epoch": 0.0075706367446262, "grad_norm": 16.067686080932617, "learning_rate": 6.50390625e-06, "loss": 0.6121826171875, "step": 112 }, { "epoch": 0.007638231715560362, "grad_norm": 3.9148123264312744, "learning_rate": 6.5625e-06, "loss": 0.5330810546875, "step": 113 }, { "epoch": 0.007705826686494525, "grad_norm": 15.87633991241455, "learning_rate": 6.6210937500000004e-06, "loss": 0.4434814453125, "step": 114 }, { "epoch": 0.007773421657428687, "grad_norm": 16.056718826293945, "learning_rate": 6.679687500000001e-06, "loss": 0.5347900390625, "step": 115 }, { "epoch": 0.00784101662836285, "grad_norm": 9.026244163513184, "learning_rate": 6.73828125e-06, "loss": 0.5242919921875, "step": 116 }, { "epoch": 0.007908611599297012, "grad_norm": 17.7529354095459, "learning_rate": 6.796875e-06, "loss": 0.378662109375, "step": 117 }, { "epoch": 0.007976206570231174, "grad_norm": 10.605545997619629, "learning_rate": 6.85546875e-06, "loss": 0.5941162109375, "step": 118 }, { "epoch": 0.008043801541165338, "grad_norm": 11.230229377746582, "learning_rate": 6.9140625e-06, "loss": 0.31884765625, "step": 119 }, { "epoch": 0.0081113965120995, "grad_norm": 18.77997398376465, "learning_rate": 6.9726562500000005e-06, "loss": 0.5755615234375, "step": 120 }, { "epoch": 0.008178991483033662, "grad_norm": 15.785585403442383, "learning_rate": 7.03125e-06, "loss": 0.5506591796875, "step": 121 }, { "epoch": 0.008246586453967825, "grad_norm": 14.699843406677246, "learning_rate": 7.08984375e-06, "loss": 0.5396728515625, "step": 122 }, { "epoch": 0.008314181424901987, "grad_norm": 8.191292762756348, "learning_rate": 7.1484375e-06, "loss": 0.58837890625, "step": 123 }, { "epoch": 0.008381776395836149, "grad_norm": 16.5272274017334, "learning_rate": 7.20703125e-06, "loss": 0.42529296875, "step": 124 }, { "epoch": 0.008449371366770313, "grad_norm": 8.090787887573242, "learning_rate": 7.265625e-06, "loss": 0.5516357421875, "step": 125 }, { "epoch": 0.008516966337704475, "grad_norm": 15.129013061523438, "learning_rate": 7.3242187500000006e-06, "loss": 0.5426025390625, "step": 126 }, { "epoch": 0.008584561308638636, "grad_norm": 8.916191101074219, "learning_rate": 7.3828125e-06, "loss": 0.43798828125, "step": 127 }, { "epoch": 0.0086521562795728, "grad_norm": 6.416059494018555, "learning_rate": 7.44140625e-06, "loss": 0.5238037109375, "step": 128 }, { "epoch": 0.008719751250506962, "grad_norm": 17.497074127197266, "learning_rate": 7.5e-06, "loss": 0.4027099609375, "step": 129 }, { "epoch": 0.008787346221441126, "grad_norm": 9.624481201171875, "learning_rate": 7.55859375e-06, "loss": 0.4791259765625, "step": 130 }, { "epoch": 0.008854941192375287, "grad_norm": 26.93037223815918, "learning_rate": 7.6171875000000005e-06, "loss": 0.4688720703125, "step": 131 }, { "epoch": 0.00892253616330945, "grad_norm": 10.554718017578125, "learning_rate": 7.67578125e-06, "loss": 0.339111328125, "step": 132 }, { "epoch": 0.008990131134243613, "grad_norm": 15.841261863708496, "learning_rate": 7.734375e-06, "loss": 0.47979736328125, "step": 133 }, { "epoch": 0.009057726105177775, "grad_norm": 11.680856704711914, "learning_rate": 7.792968750000001e-06, "loss": 0.4505615234375, "step": 134 }, { "epoch": 0.009125321076111937, "grad_norm": 14.094694137573242, "learning_rate": 7.8515625e-06, "loss": 0.3704833984375, "step": 135 }, { "epoch": 0.0091929160470461, "grad_norm": 13.134392738342285, "learning_rate": 7.91015625e-06, "loss": 0.535888671875, "step": 136 }, { "epoch": 0.009260511017980262, "grad_norm": 14.410898208618164, "learning_rate": 7.96875e-06, "loss": 0.470947265625, "step": 137 }, { "epoch": 0.009328105988914424, "grad_norm": 6.6609368324279785, "learning_rate": 8.02734375e-06, "loss": 0.38543701171875, "step": 138 }, { "epoch": 0.009395700959848588, "grad_norm": 6.0970563888549805, "learning_rate": 8.085937500000001e-06, "loss": 0.5787353515625, "step": 139 }, { "epoch": 0.00946329593078275, "grad_norm": 12.146893501281738, "learning_rate": 8.14453125e-06, "loss": 0.3890380859375, "step": 140 }, { "epoch": 0.009530890901716911, "grad_norm": 18.612384796142578, "learning_rate": 8.203125e-06, "loss": 0.42529296875, "step": 141 }, { "epoch": 0.009598485872651075, "grad_norm": 5.246151447296143, "learning_rate": 8.26171875e-06, "loss": 0.556640625, "step": 142 }, { "epoch": 0.009666080843585237, "grad_norm": 18.75251007080078, "learning_rate": 8.3203125e-06, "loss": 0.4786376953125, "step": 143 }, { "epoch": 0.0097336758145194, "grad_norm": 12.250019073486328, "learning_rate": 8.37890625e-06, "loss": 0.547607421875, "step": 144 }, { "epoch": 0.009801270785453562, "grad_norm": 12.071134567260742, "learning_rate": 8.4375e-06, "loss": 0.5264892578125, "step": 145 }, { "epoch": 0.009868865756387724, "grad_norm": 5.622722625732422, "learning_rate": 8.49609375e-06, "loss": 0.511474609375, "step": 146 }, { "epoch": 0.009936460727321888, "grad_norm": 19.010726928710938, "learning_rate": 8.5546875e-06, "loss": 0.323516845703125, "step": 147 }, { "epoch": 0.01000405569825605, "grad_norm": 9.66331672668457, "learning_rate": 8.61328125e-06, "loss": 0.4525146484375, "step": 148 }, { "epoch": 0.010071650669190212, "grad_norm": 25.01964569091797, "learning_rate": 8.671875e-06, "loss": 0.26788330078125, "step": 149 }, { "epoch": 0.010139245640124375, "grad_norm": 9.379332542419434, "learning_rate": 8.73046875e-06, "loss": 0.552734375, "step": 150 }, { "epoch": 0.010206840611058537, "grad_norm": 16.098974227905273, "learning_rate": 8.789062500000001e-06, "loss": 0.5101318359375, "step": 151 }, { "epoch": 0.010274435581992699, "grad_norm": 8.779474258422852, "learning_rate": 8.84765625e-06, "loss": 0.5179443359375, "step": 152 }, { "epoch": 0.010342030552926863, "grad_norm": 7.032350540161133, "learning_rate": 8.90625e-06, "loss": 0.5472412109375, "step": 153 }, { "epoch": 0.010409625523861024, "grad_norm": 10.77122688293457, "learning_rate": 8.96484375e-06, "loss": 0.3970947265625, "step": 154 }, { "epoch": 0.010477220494795188, "grad_norm": 12.253539085388184, "learning_rate": 9.0234375e-06, "loss": 0.521240234375, "step": 155 }, { "epoch": 0.01054481546572935, "grad_norm": 8.965794563293457, "learning_rate": 9.082031250000001e-06, "loss": 0.5462646484375, "step": 156 }, { "epoch": 0.010612410436663512, "grad_norm": 5.89331579208374, "learning_rate": 9.140625e-06, "loss": 0.39404296875, "step": 157 }, { "epoch": 0.010680005407597675, "grad_norm": 15.297825813293457, "learning_rate": 9.19921875e-06, "loss": 0.4677734375, "step": 158 }, { "epoch": 0.010747600378531837, "grad_norm": 15.448439598083496, "learning_rate": 9.2578125e-06, "loss": 0.50518798828125, "step": 159 }, { "epoch": 0.010815195349466, "grad_norm": 6.384533882141113, "learning_rate": 9.31640625e-06, "loss": 0.42169189453125, "step": 160 }, { "epoch": 0.010882790320400163, "grad_norm": 21.78298568725586, "learning_rate": 9.375000000000001e-06, "loss": 0.53009033203125, "step": 161 }, { "epoch": 0.010950385291334325, "grad_norm": 6.944581031799316, "learning_rate": 9.43359375e-06, "loss": 0.477294921875, "step": 162 }, { "epoch": 0.011017980262268487, "grad_norm": 10.08979320526123, "learning_rate": 9.4921875e-06, "loss": 0.39788818359375, "step": 163 }, { "epoch": 0.01108557523320265, "grad_norm": 21.679485321044922, "learning_rate": 9.55078125e-06, "loss": 0.4835205078125, "step": 164 }, { "epoch": 0.011153170204136812, "grad_norm": 18.410783767700195, "learning_rate": 9.609375e-06, "loss": 0.4981689453125, "step": 165 }, { "epoch": 0.011220765175070974, "grad_norm": 11.533369064331055, "learning_rate": 9.66796875e-06, "loss": 0.3372344970703125, "step": 166 }, { "epoch": 0.011288360146005138, "grad_norm": 19.489755630493164, "learning_rate": 9.7265625e-06, "loss": 0.53125, "step": 167 }, { "epoch": 0.0113559551169393, "grad_norm": 8.79713249206543, "learning_rate": 9.78515625e-06, "loss": 0.5023193359375, "step": 168 }, { "epoch": 0.011423550087873463, "grad_norm": 13.85281753540039, "learning_rate": 9.84375e-06, "loss": 0.3602294921875, "step": 169 }, { "epoch": 0.011491145058807625, "grad_norm": 8.22226619720459, "learning_rate": 9.90234375e-06, "loss": 0.29510498046875, "step": 170 }, { "epoch": 0.011558740029741787, "grad_norm": 10.677448272705078, "learning_rate": 9.9609375e-06, "loss": 0.3702392578125, "step": 171 }, { "epoch": 0.01162633500067595, "grad_norm": 19.591066360473633, "learning_rate": 1.001953125e-05, "loss": 0.44134521484375, "step": 172 }, { "epoch": 0.011693929971610112, "grad_norm": 12.61638355255127, "learning_rate": 1.0078125000000001e-05, "loss": 0.4560546875, "step": 173 }, { "epoch": 0.011761524942544274, "grad_norm": 18.47815704345703, "learning_rate": 1.013671875e-05, "loss": 0.36322021484375, "step": 174 }, { "epoch": 0.011829119913478438, "grad_norm": 13.737299919128418, "learning_rate": 1.01953125e-05, "loss": 0.5040283203125, "step": 175 }, { "epoch": 0.0118967148844126, "grad_norm": 22.247827529907227, "learning_rate": 1.025390625e-05, "loss": 0.5322265625, "step": 176 }, { "epoch": 0.011964309855346762, "grad_norm": 9.354398727416992, "learning_rate": 1.03125e-05, "loss": 0.36102294921875, "step": 177 }, { "epoch": 0.012031904826280925, "grad_norm": 35.42775344848633, "learning_rate": 1.0371093750000001e-05, "loss": 0.449462890625, "step": 178 }, { "epoch": 0.012099499797215087, "grad_norm": 19.184255599975586, "learning_rate": 1.04296875e-05, "loss": 0.49810791015625, "step": 179 }, { "epoch": 0.012167094768149249, "grad_norm": 20.15510368347168, "learning_rate": 1.048828125e-05, "loss": 0.42431640625, "step": 180 }, { "epoch": 0.012234689739083413, "grad_norm": 11.98323917388916, "learning_rate": 1.0546875e-05, "loss": 0.4527587890625, "step": 181 }, { "epoch": 0.012302284710017574, "grad_norm": 13.19327163696289, "learning_rate": 1.060546875e-05, "loss": 0.4378662109375, "step": 182 }, { "epoch": 0.012369879680951738, "grad_norm": 9.188831329345703, "learning_rate": 1.0664062500000001e-05, "loss": 0.309906005859375, "step": 183 }, { "epoch": 0.0124374746518859, "grad_norm": 16.454498291015625, "learning_rate": 1.072265625e-05, "loss": 0.47540283203125, "step": 184 }, { "epoch": 0.012505069622820062, "grad_norm": 10.68767261505127, "learning_rate": 1.078125e-05, "loss": 0.46514892578125, "step": 185 }, { "epoch": 0.012572664593754225, "grad_norm": 31.68410873413086, "learning_rate": 1.083984375e-05, "loss": 0.363128662109375, "step": 186 }, { "epoch": 0.012640259564688387, "grad_norm": 21.055166244506836, "learning_rate": 1.08984375e-05, "loss": 0.4691162109375, "step": 187 }, { "epoch": 0.012707854535622549, "grad_norm": 29.60625648498535, "learning_rate": 1.095703125e-05, "loss": 0.20355224609375, "step": 188 }, { "epoch": 0.012775449506556713, "grad_norm": 9.87784194946289, "learning_rate": 1.1015625e-05, "loss": 0.4561767578125, "step": 189 }, { "epoch": 0.012843044477490875, "grad_norm": 21.59063148498535, "learning_rate": 1.1074218750000001e-05, "loss": 0.33062744140625, "step": 190 }, { "epoch": 0.012910639448425036, "grad_norm": 12.074749946594238, "learning_rate": 1.11328125e-05, "loss": 0.35791015625, "step": 191 }, { "epoch": 0.0129782344193592, "grad_norm": 9.166199684143066, "learning_rate": 1.119140625e-05, "loss": 0.328887939453125, "step": 192 }, { "epoch": 0.013045829390293362, "grad_norm": 11.027286529541016, "learning_rate": 1.125e-05, "loss": 0.31610107421875, "step": 193 }, { "epoch": 0.013113424361227524, "grad_norm": 10.467141151428223, "learning_rate": 1.130859375e-05, "loss": 0.52294921875, "step": 194 }, { "epoch": 0.013181019332161687, "grad_norm": 13.471571922302246, "learning_rate": 1.1367187500000001e-05, "loss": 0.479736328125, "step": 195 }, { "epoch": 0.01324861430309585, "grad_norm": 41.910221099853516, "learning_rate": 1.142578125e-05, "loss": 0.45672607421875, "step": 196 }, { "epoch": 0.013316209274030013, "grad_norm": 27.690279006958008, "learning_rate": 1.1484375e-05, "loss": 0.422119140625, "step": 197 }, { "epoch": 0.013383804244964175, "grad_norm": 14.014603614807129, "learning_rate": 1.154296875e-05, "loss": 0.43377685546875, "step": 198 }, { "epoch": 0.013451399215898337, "grad_norm": 16.531034469604492, "learning_rate": 1.16015625e-05, "loss": 0.5006103515625, "step": 199 }, { "epoch": 0.0135189941868325, "grad_norm": 22.304424285888672, "learning_rate": 1.1660156250000001e-05, "loss": 0.35516357421875, "step": 200 }, { "epoch": 0.013586589157766662, "grad_norm": 8.153950691223145, "learning_rate": 1.171875e-05, "loss": 0.43017578125, "step": 201 }, { "epoch": 0.013654184128700824, "grad_norm": 10.384392738342285, "learning_rate": 1.177734375e-05, "loss": 0.36053466796875, "step": 202 }, { "epoch": 0.013721779099634988, "grad_norm": 16.322481155395508, "learning_rate": 1.18359375e-05, "loss": 0.22589111328125, "step": 203 }, { "epoch": 0.01378937407056915, "grad_norm": 22.328792572021484, "learning_rate": 1.189453125e-05, "loss": 0.35650634765625, "step": 204 }, { "epoch": 0.013856969041503311, "grad_norm": 14.692055702209473, "learning_rate": 1.1953125000000001e-05, "loss": 0.49267578125, "step": 205 }, { "epoch": 0.013924564012437475, "grad_norm": 6.7802581787109375, "learning_rate": 1.201171875e-05, "loss": 0.509033203125, "step": 206 }, { "epoch": 0.013992158983371637, "grad_norm": 5.527858734130859, "learning_rate": 1.20703125e-05, "loss": 0.40740966796875, "step": 207 }, { "epoch": 0.014059753954305799, "grad_norm": 24.50481414794922, "learning_rate": 1.212890625e-05, "loss": 0.52215576171875, "step": 208 }, { "epoch": 0.014127348925239962, "grad_norm": 36.30490493774414, "learning_rate": 1.21875e-05, "loss": 0.588836669921875, "step": 209 }, { "epoch": 0.014194943896174124, "grad_norm": 12.675297737121582, "learning_rate": 1.224609375e-05, "loss": 0.35418701171875, "step": 210 }, { "epoch": 0.014262538867108288, "grad_norm": 8.961122512817383, "learning_rate": 1.23046875e-05, "loss": 0.25482177734375, "step": 211 }, { "epoch": 0.01433013383804245, "grad_norm": 18.651443481445312, "learning_rate": 1.2363281250000001e-05, "loss": 0.392822265625, "step": 212 }, { "epoch": 0.014397728808976612, "grad_norm": 16.555866241455078, "learning_rate": 1.2421875e-05, "loss": 0.5245361328125, "step": 213 }, { "epoch": 0.014465323779910775, "grad_norm": 4.250061511993408, "learning_rate": 1.248046875e-05, "loss": 0.3795166015625, "step": 214 }, { "epoch": 0.014532918750844937, "grad_norm": 9.845860481262207, "learning_rate": 1.25390625e-05, "loss": 0.3023681640625, "step": 215 }, { "epoch": 0.014600513721779099, "grad_norm": 8.749181747436523, "learning_rate": 1.259765625e-05, "loss": 0.246124267578125, "step": 216 }, { "epoch": 0.014668108692713263, "grad_norm": 18.12505340576172, "learning_rate": 1.2656250000000001e-05, "loss": 0.36737060546875, "step": 217 }, { "epoch": 0.014735703663647425, "grad_norm": 25.810827255249023, "learning_rate": 1.271484375e-05, "loss": 0.33795166015625, "step": 218 }, { "epoch": 0.014803298634581586, "grad_norm": 8.88468074798584, "learning_rate": 1.27734375e-05, "loss": 0.3916015625, "step": 219 }, { "epoch": 0.01487089360551575, "grad_norm": 7.683413982391357, "learning_rate": 1.283203125e-05, "loss": 0.53662109375, "step": 220 }, { "epoch": 0.014938488576449912, "grad_norm": 13.124102592468262, "learning_rate": 1.2890625e-05, "loss": 0.34661865234375, "step": 221 }, { "epoch": 0.015006083547384075, "grad_norm": 9.105427742004395, "learning_rate": 1.2949218750000001e-05, "loss": 0.28204345703125, "step": 222 }, { "epoch": 0.015073678518318237, "grad_norm": 17.732311248779297, "learning_rate": 1.30078125e-05, "loss": 0.38995361328125, "step": 223 }, { "epoch": 0.0151412734892524, "grad_norm": 6.7243971824646, "learning_rate": 1.306640625e-05, "loss": 0.4027099609375, "step": 224 }, { "epoch": 0.015208868460186563, "grad_norm": 8.136382102966309, "learning_rate": 1.3125e-05, "loss": 0.305572509765625, "step": 225 }, { "epoch": 0.015276463431120725, "grad_norm": 30.52610206604004, "learning_rate": 1.318359375e-05, "loss": 0.3807373046875, "step": 226 }, { "epoch": 0.015344058402054887, "grad_norm": 22.44892120361328, "learning_rate": 1.3242187500000001e-05, "loss": 0.45721435546875, "step": 227 }, { "epoch": 0.01541165337298905, "grad_norm": 22.489412307739258, "learning_rate": 1.330078125e-05, "loss": 0.32330322265625, "step": 228 }, { "epoch": 0.015479248343923212, "grad_norm": 25.134788513183594, "learning_rate": 1.3359375000000001e-05, "loss": 0.396728515625, "step": 229 }, { "epoch": 0.015546843314857374, "grad_norm": 44.69144821166992, "learning_rate": 1.341796875e-05, "loss": 0.47216796875, "step": 230 }, { "epoch": 0.015614438285791538, "grad_norm": 40.86886978149414, "learning_rate": 1.34765625e-05, "loss": 0.4725341796875, "step": 231 }, { "epoch": 0.0156820332567257, "grad_norm": 24.085290908813477, "learning_rate": 1.353515625e-05, "loss": 0.2926025390625, "step": 232 }, { "epoch": 0.01574962822765986, "grad_norm": 15.653055191040039, "learning_rate": 1.359375e-05, "loss": 0.444091796875, "step": 233 }, { "epoch": 0.015817223198594025, "grad_norm": 11.8851900100708, "learning_rate": 1.3652343750000001e-05, "loss": 0.35479736328125, "step": 234 }, { "epoch": 0.01588481816952819, "grad_norm": 18.053234100341797, "learning_rate": 1.37109375e-05, "loss": 0.378570556640625, "step": 235 }, { "epoch": 0.01595241314046235, "grad_norm": 13.784278869628906, "learning_rate": 1.376953125e-05, "loss": 0.3389892578125, "step": 236 }, { "epoch": 0.016020008111396512, "grad_norm": 5.122940540313721, "learning_rate": 1.3828125e-05, "loss": 0.28369140625, "step": 237 }, { "epoch": 0.016087603082330676, "grad_norm": 32.74162673950195, "learning_rate": 1.388671875e-05, "loss": 0.36199951171875, "step": 238 }, { "epoch": 0.016155198053264836, "grad_norm": 59.31987762451172, "learning_rate": 1.3945312500000001e-05, "loss": 0.4249267578125, "step": 239 }, { "epoch": 0.016222793024199, "grad_norm": 38.34546661376953, "learning_rate": 1.400390625e-05, "loss": 0.42413330078125, "step": 240 }, { "epoch": 0.016290387995133163, "grad_norm": 37.92845916748047, "learning_rate": 1.40625e-05, "loss": 0.4285888671875, "step": 241 }, { "epoch": 0.016357982966067323, "grad_norm": 9.306253433227539, "learning_rate": 1.412109375e-05, "loss": 0.332550048828125, "step": 242 }, { "epoch": 0.016425577937001487, "grad_norm": 10.505171775817871, "learning_rate": 1.41796875e-05, "loss": 0.42755126953125, "step": 243 }, { "epoch": 0.01649317290793565, "grad_norm": 8.981510162353516, "learning_rate": 1.4238281250000001e-05, "loss": 0.40362548828125, "step": 244 }, { "epoch": 0.01656076787886981, "grad_norm": 10.409797668457031, "learning_rate": 1.4296875e-05, "loss": 0.43280029296875, "step": 245 }, { "epoch": 0.016628362849803974, "grad_norm": 9.640511512756348, "learning_rate": 1.435546875e-05, "loss": 0.29412841796875, "step": 246 }, { "epoch": 0.016695957820738138, "grad_norm": 8.43283748626709, "learning_rate": 1.44140625e-05, "loss": 0.4049072265625, "step": 247 }, { "epoch": 0.016763552791672298, "grad_norm": 7.976908206939697, "learning_rate": 1.447265625e-05, "loss": 0.40087890625, "step": 248 }, { "epoch": 0.016831147762606462, "grad_norm": 13.340276718139648, "learning_rate": 1.453125e-05, "loss": 0.373687744140625, "step": 249 }, { "epoch": 0.016898742733540625, "grad_norm": 25.665653228759766, "learning_rate": 1.458984375e-05, "loss": 0.49200439453125, "step": 250 }, { "epoch": 0.016966337704474786, "grad_norm": 8.838981628417969, "learning_rate": 1.4648437500000001e-05, "loss": 0.36224365234375, "step": 251 }, { "epoch": 0.01703393267540895, "grad_norm": 22.96874237060547, "learning_rate": 1.470703125e-05, "loss": 0.3992919921875, "step": 252 }, { "epoch": 0.017101527646343113, "grad_norm": 7.071909427642822, "learning_rate": 1.4765625e-05, "loss": 0.438232421875, "step": 253 }, { "epoch": 0.017169122617277273, "grad_norm": 19.137914657592773, "learning_rate": 1.482421875e-05, "loss": 0.49810791015625, "step": 254 }, { "epoch": 0.017236717588211437, "grad_norm": 9.561112403869629, "learning_rate": 1.48828125e-05, "loss": 0.3275604248046875, "step": 255 }, { "epoch": 0.0173043125591456, "grad_norm": 7.83955717086792, "learning_rate": 1.4941406250000001e-05, "loss": 0.44384765625, "step": 256 }, { "epoch": 0.017371907530079764, "grad_norm": 7.671054840087891, "learning_rate": 1.5e-05, "loss": 0.3953857421875, "step": 257 }, { "epoch": 0.017439502501013924, "grad_norm": 11.55572509765625, "learning_rate": 1.505859375e-05, "loss": 0.358856201171875, "step": 258 }, { "epoch": 0.017507097471948087, "grad_norm": 7.398098468780518, "learning_rate": 1.51171875e-05, "loss": 0.3385009765625, "step": 259 }, { "epoch": 0.01757469244288225, "grad_norm": 6.948439598083496, "learning_rate": 1.517578125e-05, "loss": 0.334442138671875, "step": 260 }, { "epoch": 0.01764228741381641, "grad_norm": 7.050668239593506, "learning_rate": 1.5234375000000001e-05, "loss": 0.281982421875, "step": 261 }, { "epoch": 0.017709882384750575, "grad_norm": 6.308709621429443, "learning_rate": 1.529296875e-05, "loss": 0.207672119140625, "step": 262 }, { "epoch": 0.01777747735568474, "grad_norm": 8.364511489868164, "learning_rate": 1.53515625e-05, "loss": 0.475830078125, "step": 263 }, { "epoch": 0.0178450723266189, "grad_norm": 17.60248374938965, "learning_rate": 1.541015625e-05, "loss": 0.3079833984375, "step": 264 }, { "epoch": 0.017912667297553062, "grad_norm": 26.041372299194336, "learning_rate": 1.546875e-05, "loss": 0.44970703125, "step": 265 }, { "epoch": 0.017980262268487226, "grad_norm": 24.87964630126953, "learning_rate": 1.552734375e-05, "loss": 0.4595947265625, "step": 266 }, { "epoch": 0.018047857239421386, "grad_norm": 6.218098163604736, "learning_rate": 1.5585937500000002e-05, "loss": 0.42486572265625, "step": 267 }, { "epoch": 0.01811545221035555, "grad_norm": 4.311285495758057, "learning_rate": 1.564453125e-05, "loss": 0.35174560546875, "step": 268 }, { "epoch": 0.018183047181289713, "grad_norm": 8.270331382751465, "learning_rate": 1.5703125e-05, "loss": 0.3988037109375, "step": 269 }, { "epoch": 0.018250642152223873, "grad_norm": 3.836292266845703, "learning_rate": 1.576171875e-05, "loss": 0.28204345703125, "step": 270 }, { "epoch": 0.018318237123158037, "grad_norm": 12.531079292297363, "learning_rate": 1.58203125e-05, "loss": 0.4267578125, "step": 271 }, { "epoch": 0.0183858320940922, "grad_norm": 4.595803260803223, "learning_rate": 1.587890625e-05, "loss": 0.3702392578125, "step": 272 }, { "epoch": 0.01845342706502636, "grad_norm": 12.017866134643555, "learning_rate": 1.59375e-05, "loss": 0.4993896484375, "step": 273 }, { "epoch": 0.018521022035960524, "grad_norm": 19.029865264892578, "learning_rate": 1.599609375e-05, "loss": 0.3809814453125, "step": 274 }, { "epoch": 0.018588617006894688, "grad_norm": 3.7881884574890137, "learning_rate": 1.60546875e-05, "loss": 0.21551513671875, "step": 275 }, { "epoch": 0.018656211977828848, "grad_norm": 10.876836776733398, "learning_rate": 1.611328125e-05, "loss": 0.34210205078125, "step": 276 }, { "epoch": 0.01872380694876301, "grad_norm": 11.110776901245117, "learning_rate": 1.6171875000000002e-05, "loss": 0.31689453125, "step": 277 }, { "epoch": 0.018791401919697175, "grad_norm": 7.078802585601807, "learning_rate": 1.623046875e-05, "loss": 0.32379150390625, "step": 278 }, { "epoch": 0.018858996890631335, "grad_norm": 5.330185413360596, "learning_rate": 1.62890625e-05, "loss": 0.333831787109375, "step": 279 }, { "epoch": 0.0189265918615655, "grad_norm": 16.214954376220703, "learning_rate": 1.634765625e-05, "loss": 0.4488525390625, "step": 280 }, { "epoch": 0.018994186832499663, "grad_norm": 9.890885353088379, "learning_rate": 1.640625e-05, "loss": 0.25664520263671875, "step": 281 }, { "epoch": 0.019061781803433823, "grad_norm": 8.896258354187012, "learning_rate": 1.646484375e-05, "loss": 0.280303955078125, "step": 282 }, { "epoch": 0.019129376774367986, "grad_norm": 11.9606351852417, "learning_rate": 1.65234375e-05, "loss": 0.3812255859375, "step": 283 }, { "epoch": 0.01919697174530215, "grad_norm": 6.294325351715088, "learning_rate": 1.6582031250000002e-05, "loss": 0.437255859375, "step": 284 }, { "epoch": 0.019264566716236314, "grad_norm": 5.923013687133789, "learning_rate": 1.6640625e-05, "loss": 0.4114990234375, "step": 285 }, { "epoch": 0.019332161687170474, "grad_norm": 12.417588233947754, "learning_rate": 1.669921875e-05, "loss": 0.412841796875, "step": 286 }, { "epoch": 0.019399756658104637, "grad_norm": 16.09050178527832, "learning_rate": 1.67578125e-05, "loss": 0.23504638671875, "step": 287 }, { "epoch": 0.0194673516290388, "grad_norm": 7.653351783752441, "learning_rate": 1.681640625e-05, "loss": 0.3399658203125, "step": 288 }, { "epoch": 0.01953494659997296, "grad_norm": 17.556743621826172, "learning_rate": 1.6875e-05, "loss": 0.3402099609375, "step": 289 }, { "epoch": 0.019602541570907125, "grad_norm": 11.151601791381836, "learning_rate": 1.693359375e-05, "loss": 0.336578369140625, "step": 290 }, { "epoch": 0.01967013654184129, "grad_norm": 13.320305824279785, "learning_rate": 1.69921875e-05, "loss": 0.42724609375, "step": 291 }, { "epoch": 0.01973773151277545, "grad_norm": 6.987049579620361, "learning_rate": 1.705078125e-05, "loss": 0.254150390625, "step": 292 }, { "epoch": 0.019805326483709612, "grad_norm": 27.1398983001709, "learning_rate": 1.7109375e-05, "loss": 0.423583984375, "step": 293 }, { "epoch": 0.019872921454643776, "grad_norm": 15.715489387512207, "learning_rate": 1.7167968750000002e-05, "loss": 0.41607666015625, "step": 294 }, { "epoch": 0.019940516425577936, "grad_norm": 3.283562660217285, "learning_rate": 1.72265625e-05, "loss": 0.32196044921875, "step": 295 }, { "epoch": 0.0200081113965121, "grad_norm": 5.203441619873047, "learning_rate": 1.728515625e-05, "loss": 0.4068603515625, "step": 296 }, { "epoch": 0.020075706367446263, "grad_norm": 12.20215129852295, "learning_rate": 1.734375e-05, "loss": 0.4058837890625, "step": 297 }, { "epoch": 0.020143301338380423, "grad_norm": 28.008119583129883, "learning_rate": 1.740234375e-05, "loss": 0.5225830078125, "step": 298 }, { "epoch": 0.020210896309314587, "grad_norm": 13.312891006469727, "learning_rate": 1.74609375e-05, "loss": 0.30718994140625, "step": 299 }, { "epoch": 0.02027849128024875, "grad_norm": 3.826904296875, "learning_rate": 1.751953125e-05, "loss": 0.26593017578125, "step": 300 }, { "epoch": 0.02034608625118291, "grad_norm": 5.828481197357178, "learning_rate": 1.7578125000000002e-05, "loss": 0.3548583984375, "step": 301 }, { "epoch": 0.020413681222117074, "grad_norm": 14.180804252624512, "learning_rate": 1.763671875e-05, "loss": 0.3154296875, "step": 302 }, { "epoch": 0.020481276193051238, "grad_norm": 16.05838394165039, "learning_rate": 1.76953125e-05, "loss": 0.24835205078125, "step": 303 }, { "epoch": 0.020548871163985398, "grad_norm": 15.28410530090332, "learning_rate": 1.775390625e-05, "loss": 0.312744140625, "step": 304 }, { "epoch": 0.02061646613491956, "grad_norm": 12.303977012634277, "learning_rate": 1.78125e-05, "loss": 0.42730712890625, "step": 305 }, { "epoch": 0.020684061105853725, "grad_norm": 20.792097091674805, "learning_rate": 1.787109375e-05, "loss": 0.4832763671875, "step": 306 }, { "epoch": 0.020751656076787885, "grad_norm": 10.639427185058594, "learning_rate": 1.79296875e-05, "loss": 0.32818603515625, "step": 307 }, { "epoch": 0.02081925104772205, "grad_norm": 14.682500839233398, "learning_rate": 1.798828125e-05, "loss": 0.337615966796875, "step": 308 }, { "epoch": 0.020886846018656213, "grad_norm": 8.945476531982422, "learning_rate": 1.8046875e-05, "loss": 0.39422607421875, "step": 309 }, { "epoch": 0.020954440989590376, "grad_norm": 5.966092586517334, "learning_rate": 1.810546875e-05, "loss": 0.304931640625, "step": 310 }, { "epoch": 0.021022035960524536, "grad_norm": 10.636371612548828, "learning_rate": 1.8164062500000002e-05, "loss": 0.29595947265625, "step": 311 }, { "epoch": 0.0210896309314587, "grad_norm": 16.103071212768555, "learning_rate": 1.822265625e-05, "loss": 0.33160400390625, "step": 312 }, { "epoch": 0.021157225902392864, "grad_norm": 7.8978729248046875, "learning_rate": 1.828125e-05, "loss": 0.37005615234375, "step": 313 }, { "epoch": 0.021224820873327024, "grad_norm": 19.1347599029541, "learning_rate": 1.833984375e-05, "loss": 0.49188232421875, "step": 314 }, { "epoch": 0.021292415844261187, "grad_norm": 12.828756332397461, "learning_rate": 1.83984375e-05, "loss": 0.37054443359375, "step": 315 }, { "epoch": 0.02136001081519535, "grad_norm": 13.509920120239258, "learning_rate": 1.845703125e-05, "loss": 0.40582275390625, "step": 316 }, { "epoch": 0.02142760578612951, "grad_norm": 6.091602802276611, "learning_rate": 1.8515625e-05, "loss": 0.1622314453125, "step": 317 }, { "epoch": 0.021495200757063675, "grad_norm": 4.584897041320801, "learning_rate": 1.8574218750000002e-05, "loss": 0.40350341796875, "step": 318 }, { "epoch": 0.02156279572799784, "grad_norm": 9.266302108764648, "learning_rate": 1.86328125e-05, "loss": 0.3055419921875, "step": 319 }, { "epoch": 0.021630390698932, "grad_norm": 7.083864688873291, "learning_rate": 1.869140625e-05, "loss": 0.267181396484375, "step": 320 }, { "epoch": 0.021697985669866162, "grad_norm": 4.969736099243164, "learning_rate": 1.8750000000000002e-05, "loss": 0.2015380859375, "step": 321 }, { "epoch": 0.021765580640800326, "grad_norm": 7.681707382202148, "learning_rate": 1.880859375e-05, "loss": 0.36871337890625, "step": 322 }, { "epoch": 0.021833175611734486, "grad_norm": 7.5607476234436035, "learning_rate": 1.88671875e-05, "loss": 0.279327392578125, "step": 323 }, { "epoch": 0.02190077058266865, "grad_norm": 9.576064109802246, "learning_rate": 1.892578125e-05, "loss": 0.35400390625, "step": 324 }, { "epoch": 0.021968365553602813, "grad_norm": 9.994389533996582, "learning_rate": 1.8984375e-05, "loss": 0.30560302734375, "step": 325 }, { "epoch": 0.022035960524536973, "grad_norm": 13.851361274719238, "learning_rate": 1.904296875e-05, "loss": 0.41937255859375, "step": 326 }, { "epoch": 0.022103555495471137, "grad_norm": 12.864707946777344, "learning_rate": 1.91015625e-05, "loss": 0.40936279296875, "step": 327 }, { "epoch": 0.0221711504664053, "grad_norm": 8.652629852294922, "learning_rate": 1.9160156250000002e-05, "loss": 0.31097412109375, "step": 328 }, { "epoch": 0.02223874543733946, "grad_norm": 7.963138580322266, "learning_rate": 1.921875e-05, "loss": 0.4039306640625, "step": 329 }, { "epoch": 0.022306340408273624, "grad_norm": 7.995489120483398, "learning_rate": 1.927734375e-05, "loss": 0.3956298828125, "step": 330 }, { "epoch": 0.022373935379207788, "grad_norm": 6.008086204528809, "learning_rate": 1.93359375e-05, "loss": 0.45721435546875, "step": 331 }, { "epoch": 0.022441530350141948, "grad_norm": 16.22348403930664, "learning_rate": 1.939453125e-05, "loss": 0.3892822265625, "step": 332 }, { "epoch": 0.02250912532107611, "grad_norm": 12.896903038024902, "learning_rate": 1.9453125e-05, "loss": 0.23980712890625, "step": 333 }, { "epoch": 0.022576720292010275, "grad_norm": 8.125163078308105, "learning_rate": 1.951171875e-05, "loss": 0.4288330078125, "step": 334 }, { "epoch": 0.022644315262944435, "grad_norm": 14.871481895446777, "learning_rate": 1.95703125e-05, "loss": 0.41937255859375, "step": 335 }, { "epoch": 0.0227119102338786, "grad_norm": 8.436949729919434, "learning_rate": 1.962890625e-05, "loss": 0.46136474609375, "step": 336 }, { "epoch": 0.022779505204812762, "grad_norm": 3.9900710582733154, "learning_rate": 1.96875e-05, "loss": 0.252166748046875, "step": 337 }, { "epoch": 0.022847100175746926, "grad_norm": 7.742371559143066, "learning_rate": 1.9746093750000002e-05, "loss": 0.3543701171875, "step": 338 }, { "epoch": 0.022914695146681086, "grad_norm": 8.010601997375488, "learning_rate": 1.98046875e-05, "loss": 0.39935302734375, "step": 339 }, { "epoch": 0.02298229011761525, "grad_norm": 8.640402793884277, "learning_rate": 1.986328125e-05, "loss": 0.2415771484375, "step": 340 }, { "epoch": 0.023049885088549413, "grad_norm": 6.684320449829102, "learning_rate": 1.9921875e-05, "loss": 0.302001953125, "step": 341 }, { "epoch": 0.023117480059483574, "grad_norm": 3.19362211227417, "learning_rate": 1.998046875e-05, "loss": 0.22418212890625, "step": 342 }, { "epoch": 0.023185075030417737, "grad_norm": 24.919361114501953, "learning_rate": 2.00390625e-05, "loss": 0.490478515625, "step": 343 }, { "epoch": 0.0232526700013519, "grad_norm": 8.026551246643066, "learning_rate": 2.009765625e-05, "loss": 0.3114013671875, "step": 344 }, { "epoch": 0.02332026497228606, "grad_norm": 14.626218795776367, "learning_rate": 2.0156250000000002e-05, "loss": 0.44287109375, "step": 345 }, { "epoch": 0.023387859943220225, "grad_norm": 4.580677032470703, "learning_rate": 2.021484375e-05, "loss": 0.309051513671875, "step": 346 }, { "epoch": 0.023455454914154388, "grad_norm": 9.842231750488281, "learning_rate": 2.02734375e-05, "loss": 0.37548828125, "step": 347 }, { "epoch": 0.02352304988508855, "grad_norm": 13.839217185974121, "learning_rate": 2.033203125e-05, "loss": 0.23187255859375, "step": 348 }, { "epoch": 0.023590644856022712, "grad_norm": 4.114726543426514, "learning_rate": 2.0390625e-05, "loss": 0.4212646484375, "step": 349 }, { "epoch": 0.023658239826956876, "grad_norm": 6.680792331695557, "learning_rate": 2.044921875e-05, "loss": 0.35089111328125, "step": 350 }, { "epoch": 0.023725834797891036, "grad_norm": 2.630173921585083, "learning_rate": 2.05078125e-05, "loss": 0.33172607421875, "step": 351 }, { "epoch": 0.0237934297688252, "grad_norm": 11.128410339355469, "learning_rate": 2.056640625e-05, "loss": 0.476318359375, "step": 352 }, { "epoch": 0.023861024739759363, "grad_norm": 3.5069386959075928, "learning_rate": 2.0625e-05, "loss": 0.38311767578125, "step": 353 }, { "epoch": 0.023928619710693523, "grad_norm": 6.943353652954102, "learning_rate": 2.068359375e-05, "loss": 0.38214111328125, "step": 354 }, { "epoch": 0.023996214681627687, "grad_norm": 10.29513168334961, "learning_rate": 2.0742187500000002e-05, "loss": 0.423828125, "step": 355 }, { "epoch": 0.02406380965256185, "grad_norm": 9.959600448608398, "learning_rate": 2.080078125e-05, "loss": 0.40185546875, "step": 356 }, { "epoch": 0.02413140462349601, "grad_norm": 4.269263744354248, "learning_rate": 2.0859375e-05, "loss": 0.36865234375, "step": 357 }, { "epoch": 0.024198999594430174, "grad_norm": 11.501352310180664, "learning_rate": 2.091796875e-05, "loss": 0.304718017578125, "step": 358 }, { "epoch": 0.024266594565364338, "grad_norm": 7.833051681518555, "learning_rate": 2.09765625e-05, "loss": 0.36956787109375, "step": 359 }, { "epoch": 0.024334189536298498, "grad_norm": 12.18415641784668, "learning_rate": 2.103515625e-05, "loss": 0.37457275390625, "step": 360 }, { "epoch": 0.02440178450723266, "grad_norm": 4.586023330688477, "learning_rate": 2.109375e-05, "loss": 0.2010498046875, "step": 361 }, { "epoch": 0.024469379478166825, "grad_norm": 8.991268157958984, "learning_rate": 2.1152343750000002e-05, "loss": 0.2979736328125, "step": 362 }, { "epoch": 0.024536974449100985, "grad_norm": 4.671360969543457, "learning_rate": 2.12109375e-05, "loss": 0.306396484375, "step": 363 }, { "epoch": 0.02460456942003515, "grad_norm": 15.905271530151367, "learning_rate": 2.126953125e-05, "loss": 0.37732696533203125, "step": 364 }, { "epoch": 0.024672164390969312, "grad_norm": 21.70458221435547, "learning_rate": 2.1328125000000002e-05, "loss": 0.52911376953125, "step": 365 }, { "epoch": 0.024739759361903476, "grad_norm": 23.326099395751953, "learning_rate": 2.138671875e-05, "loss": 0.460906982421875, "step": 366 }, { "epoch": 0.024807354332837636, "grad_norm": 4.944180488586426, "learning_rate": 2.14453125e-05, "loss": 0.189544677734375, "step": 367 }, { "epoch": 0.0248749493037718, "grad_norm": 4.011873722076416, "learning_rate": 2.150390625e-05, "loss": 0.330169677734375, "step": 368 }, { "epoch": 0.024942544274705963, "grad_norm": 8.498035430908203, "learning_rate": 2.15625e-05, "loss": 0.38153076171875, "step": 369 }, { "epoch": 0.025010139245640124, "grad_norm": 5.019863128662109, "learning_rate": 2.162109375e-05, "loss": 0.4268798828125, "step": 370 }, { "epoch": 0.025077734216574287, "grad_norm": 4.76334810256958, "learning_rate": 2.16796875e-05, "loss": 0.3951416015625, "step": 371 }, { "epoch": 0.02514532918750845, "grad_norm": 8.548203468322754, "learning_rate": 2.1738281250000002e-05, "loss": 0.47723388671875, "step": 372 }, { "epoch": 0.02521292415844261, "grad_norm": 12.202250480651855, "learning_rate": 2.1796875e-05, "loss": 0.4210205078125, "step": 373 }, { "epoch": 0.025280519129376774, "grad_norm": 6.1733317375183105, "learning_rate": 2.185546875e-05, "loss": 0.330291748046875, "step": 374 }, { "epoch": 0.025348114100310938, "grad_norm": 2.858281135559082, "learning_rate": 2.19140625e-05, "loss": 0.3331298828125, "step": 375 }, { "epoch": 0.025415709071245098, "grad_norm": 3.021341323852539, "learning_rate": 2.197265625e-05, "loss": 0.25213623046875, "step": 376 }, { "epoch": 0.025483304042179262, "grad_norm": 6.584684371948242, "learning_rate": 2.203125e-05, "loss": 0.474853515625, "step": 377 }, { "epoch": 0.025550899013113425, "grad_norm": 6.565762042999268, "learning_rate": 2.208984375e-05, "loss": 0.347900390625, "step": 378 }, { "epoch": 0.025618493984047586, "grad_norm": 8.539937019348145, "learning_rate": 2.2148437500000002e-05, "loss": 0.156097412109375, "step": 379 }, { "epoch": 0.02568608895498175, "grad_norm": 3.570974588394165, "learning_rate": 2.220703125e-05, "loss": 0.394775390625, "step": 380 }, { "epoch": 0.025753683925915913, "grad_norm": 2.808516263961792, "learning_rate": 2.2265625e-05, "loss": 0.333984375, "step": 381 }, { "epoch": 0.025821278896850073, "grad_norm": 13.46990966796875, "learning_rate": 2.2324218750000002e-05, "loss": 0.334564208984375, "step": 382 }, { "epoch": 0.025888873867784237, "grad_norm": 14.196720123291016, "learning_rate": 2.23828125e-05, "loss": 0.25933837890625, "step": 383 }, { "epoch": 0.0259564688387184, "grad_norm": 9.008776664733887, "learning_rate": 2.244140625e-05, "loss": 0.487060546875, "step": 384 }, { "epoch": 0.02602406380965256, "grad_norm": 4.959026336669922, "learning_rate": 2.25e-05, "loss": 0.4044189453125, "step": 385 }, { "epoch": 0.026091658780586724, "grad_norm": 5.3002119064331055, "learning_rate": 2.255859375e-05, "loss": 0.3712158203125, "step": 386 }, { "epoch": 0.026159253751520888, "grad_norm": 5.234410285949707, "learning_rate": 2.26171875e-05, "loss": 0.435302734375, "step": 387 }, { "epoch": 0.026226848722455048, "grad_norm": 6.971327304840088, "learning_rate": 2.267578125e-05, "loss": 0.18670654296875, "step": 388 }, { "epoch": 0.02629444369338921, "grad_norm": 2.5746238231658936, "learning_rate": 2.2734375000000002e-05, "loss": 0.12677001953125, "step": 389 }, { "epoch": 0.026362038664323375, "grad_norm": 24.637500762939453, "learning_rate": 2.279296875e-05, "loss": 0.50579833984375, "step": 390 }, { "epoch": 0.02642963363525754, "grad_norm": 18.96111488342285, "learning_rate": 2.28515625e-05, "loss": 0.44525146484375, "step": 391 }, { "epoch": 0.0264972286061917, "grad_norm": 11.986050605773926, "learning_rate": 2.291015625e-05, "loss": 0.209686279296875, "step": 392 }, { "epoch": 0.026564823577125862, "grad_norm": 8.006890296936035, "learning_rate": 2.296875e-05, "loss": 0.317169189453125, "step": 393 }, { "epoch": 0.026632418548060026, "grad_norm": 8.278057098388672, "learning_rate": 2.302734375e-05, "loss": 0.359375, "step": 394 }, { "epoch": 0.026700013518994186, "grad_norm": 12.564659118652344, "learning_rate": 2.30859375e-05, "loss": 0.3480224609375, "step": 395 }, { "epoch": 0.02676760848992835, "grad_norm": 13.506868362426758, "learning_rate": 2.3144531250000002e-05, "loss": 0.3861083984375, "step": 396 }, { "epoch": 0.026835203460862513, "grad_norm": 5.085142612457275, "learning_rate": 2.3203125e-05, "loss": 0.201690673828125, "step": 397 }, { "epoch": 0.026902798431796673, "grad_norm": 5.772094249725342, "learning_rate": 2.326171875e-05, "loss": 0.4508056640625, "step": 398 }, { "epoch": 0.026970393402730837, "grad_norm": 9.741066932678223, "learning_rate": 2.3320312500000002e-05, "loss": 0.3671875, "step": 399 }, { "epoch": 0.027037988373665, "grad_norm": 4.6745429039001465, "learning_rate": 2.337890625e-05, "loss": 0.36529541015625, "step": 400 }, { "epoch": 0.02710558334459916, "grad_norm": 2.949317693710327, "learning_rate": 2.34375e-05, "loss": 0.246307373046875, "step": 401 }, { "epoch": 0.027173178315533324, "grad_norm": 8.098626136779785, "learning_rate": 2.349609375e-05, "loss": 0.32440185546875, "step": 402 }, { "epoch": 0.027240773286467488, "grad_norm": 4.718655109405518, "learning_rate": 2.35546875e-05, "loss": 0.33172607421875, "step": 403 }, { "epoch": 0.027308368257401648, "grad_norm": 17.741758346557617, "learning_rate": 2.361328125e-05, "loss": 0.479248046875, "step": 404 }, { "epoch": 0.027375963228335812, "grad_norm": 5.94247579574585, "learning_rate": 2.3671875e-05, "loss": 0.381103515625, "step": 405 }, { "epoch": 0.027443558199269975, "grad_norm": 6.330992698669434, "learning_rate": 2.3730468750000002e-05, "loss": 0.40203857421875, "step": 406 }, { "epoch": 0.027511153170204136, "grad_norm": 3.6445422172546387, "learning_rate": 2.37890625e-05, "loss": 0.1115570068359375, "step": 407 }, { "epoch": 0.0275787481411383, "grad_norm": 17.496801376342773, "learning_rate": 2.384765625e-05, "loss": 0.46124267578125, "step": 408 }, { "epoch": 0.027646343112072463, "grad_norm": 25.54990577697754, "learning_rate": 2.3906250000000002e-05, "loss": 0.5157470703125, "step": 409 }, { "epoch": 0.027713938083006623, "grad_norm": 11.593282699584961, "learning_rate": 2.396484375e-05, "loss": 0.2220001220703125, "step": 410 }, { "epoch": 0.027781533053940786, "grad_norm": 15.209245681762695, "learning_rate": 2.40234375e-05, "loss": 0.3624267578125, "step": 411 }, { "epoch": 0.02784912802487495, "grad_norm": 4.095560073852539, "learning_rate": 2.408203125e-05, "loss": 0.210906982421875, "step": 412 }, { "epoch": 0.02791672299580911, "grad_norm": 6.161952495574951, "learning_rate": 2.4140625e-05, "loss": 0.1975860595703125, "step": 413 }, { "epoch": 0.027984317966743274, "grad_norm": 10.036432266235352, "learning_rate": 2.419921875e-05, "loss": 0.315673828125, "step": 414 }, { "epoch": 0.028051912937677437, "grad_norm": 16.73401641845703, "learning_rate": 2.42578125e-05, "loss": 0.31842041015625, "step": 415 }, { "epoch": 0.028119507908611598, "grad_norm": 11.085611343383789, "learning_rate": 2.4316406250000002e-05, "loss": 0.3255615234375, "step": 416 }, { "epoch": 0.02818710287954576, "grad_norm": 22.547876358032227, "learning_rate": 2.4375e-05, "loss": 0.39947509765625, "step": 417 }, { "epoch": 0.028254697850479925, "grad_norm": 13.38790225982666, "learning_rate": 2.443359375e-05, "loss": 0.41796875, "step": 418 }, { "epoch": 0.02832229282141409, "grad_norm": 6.051294326782227, "learning_rate": 2.44921875e-05, "loss": 0.35784912109375, "step": 419 }, { "epoch": 0.02838988779234825, "grad_norm": 6.584441184997559, "learning_rate": 2.455078125e-05, "loss": 0.4058837890625, "step": 420 }, { "epoch": 0.028457482763282412, "grad_norm": 3.4778871536254883, "learning_rate": 2.4609375e-05, "loss": 0.27215576171875, "step": 421 }, { "epoch": 0.028525077734216576, "grad_norm": 3.500516653060913, "learning_rate": 2.466796875e-05, "loss": 0.3768310546875, "step": 422 }, { "epoch": 0.028592672705150736, "grad_norm": 14.064346313476562, "learning_rate": 2.4726562500000002e-05, "loss": 0.4697265625, "step": 423 }, { "epoch": 0.0286602676760849, "grad_norm": 4.143049240112305, "learning_rate": 2.478515625e-05, "loss": 0.2752685546875, "step": 424 }, { "epoch": 0.028727862647019063, "grad_norm": 3.570446252822876, "learning_rate": 2.484375e-05, "loss": 0.266387939453125, "step": 425 }, { "epoch": 0.028795457617953223, "grad_norm": 3.9236817359924316, "learning_rate": 2.4902343750000002e-05, "loss": 0.2886962890625, "step": 426 }, { "epoch": 0.028863052588887387, "grad_norm": 3.8900468349456787, "learning_rate": 2.49609375e-05, "loss": 0.3961181640625, "step": 427 }, { "epoch": 0.02893064755982155, "grad_norm": 8.518715858459473, "learning_rate": 2.501953125e-05, "loss": 0.17435455322265625, "step": 428 }, { "epoch": 0.02899824253075571, "grad_norm": 14.21646785736084, "learning_rate": 2.5078125e-05, "loss": 0.4107666015625, "step": 429 }, { "epoch": 0.029065837501689874, "grad_norm": 2.56670880317688, "learning_rate": 2.513671875e-05, "loss": 0.4224853515625, "step": 430 }, { "epoch": 0.029133432472624038, "grad_norm": 9.146775245666504, "learning_rate": 2.51953125e-05, "loss": 0.3985595703125, "step": 431 }, { "epoch": 0.029201027443558198, "grad_norm": 4.262503147125244, "learning_rate": 2.525390625e-05, "loss": 0.25933837890625, "step": 432 }, { "epoch": 0.02926862241449236, "grad_norm": 2.930330514907837, "learning_rate": 2.5312500000000002e-05, "loss": 0.43060302734375, "step": 433 }, { "epoch": 0.029336217385426525, "grad_norm": 5.680633544921875, "learning_rate": 2.537109375e-05, "loss": 0.36151123046875, "step": 434 }, { "epoch": 0.029403812356360685, "grad_norm": 4.1382575035095215, "learning_rate": 2.54296875e-05, "loss": 0.34991455078125, "step": 435 }, { "epoch": 0.02947140732729485, "grad_norm": 2.5157015323638916, "learning_rate": 2.548828125e-05, "loss": 0.364288330078125, "step": 436 }, { "epoch": 0.029539002298229013, "grad_norm": 17.210371017456055, "learning_rate": 2.5546875e-05, "loss": 0.43927001953125, "step": 437 }, { "epoch": 0.029606597269163173, "grad_norm": 7.271857738494873, "learning_rate": 2.560546875e-05, "loss": 0.365478515625, "step": 438 }, { "epoch": 0.029674192240097336, "grad_norm": 13.226239204406738, "learning_rate": 2.56640625e-05, "loss": 0.4927978515625, "step": 439 }, { "epoch": 0.0297417872110315, "grad_norm": 8.247701644897461, "learning_rate": 2.5722656250000002e-05, "loss": 0.441650390625, "step": 440 }, { "epoch": 0.02980938218196566, "grad_norm": 6.142560958862305, "learning_rate": 2.578125e-05, "loss": 0.3355712890625, "step": 441 }, { "epoch": 0.029876977152899824, "grad_norm": 4.405035972595215, "learning_rate": 2.583984375e-05, "loss": 0.36810302734375, "step": 442 }, { "epoch": 0.029944572123833987, "grad_norm": 13.606734275817871, "learning_rate": 2.5898437500000002e-05, "loss": 0.2867431640625, "step": 443 }, { "epoch": 0.03001216709476815, "grad_norm": 11.466470718383789, "learning_rate": 2.595703125e-05, "loss": 0.39105224609375, "step": 444 }, { "epoch": 0.03007976206570231, "grad_norm": 2.422365427017212, "learning_rate": 2.6015625e-05, "loss": 0.29901123046875, "step": 445 }, { "epoch": 0.030147357036636475, "grad_norm": 4.885528564453125, "learning_rate": 2.607421875e-05, "loss": 0.3665771484375, "step": 446 }, { "epoch": 0.03021495200757064, "grad_norm": 9.529008865356445, "learning_rate": 2.61328125e-05, "loss": 0.44207763671875, "step": 447 }, { "epoch": 0.0302825469785048, "grad_norm": 7.798081398010254, "learning_rate": 2.619140625e-05, "loss": 0.40057373046875, "step": 448 }, { "epoch": 0.030350141949438962, "grad_norm": 9.272089004516602, "learning_rate": 2.625e-05, "loss": 0.33935546875, "step": 449 }, { "epoch": 0.030417736920373126, "grad_norm": 5.007686138153076, "learning_rate": 2.6308593750000002e-05, "loss": 0.3994140625, "step": 450 }, { "epoch": 0.030485331891307286, "grad_norm": 4.868362903594971, "learning_rate": 2.63671875e-05, "loss": 0.41357421875, "step": 451 }, { "epoch": 0.03055292686224145, "grad_norm": 11.309981346130371, "learning_rate": 2.642578125e-05, "loss": 0.292633056640625, "step": 452 }, { "epoch": 0.030620521833175613, "grad_norm": 6.94182825088501, "learning_rate": 2.6484375000000002e-05, "loss": 0.3660888671875, "step": 453 }, { "epoch": 0.030688116804109773, "grad_norm": 2.6658709049224854, "learning_rate": 2.654296875e-05, "loss": 0.39654541015625, "step": 454 }, { "epoch": 0.030755711775043937, "grad_norm": 5.055711269378662, "learning_rate": 2.66015625e-05, "loss": 0.4534912109375, "step": 455 }, { "epoch": 0.0308233067459781, "grad_norm": 3.9186575412750244, "learning_rate": 2.666015625e-05, "loss": 0.29193115234375, "step": 456 }, { "epoch": 0.03089090171691226, "grad_norm": 2.5627639293670654, "learning_rate": 2.6718750000000002e-05, "loss": 0.254241943359375, "step": 457 }, { "epoch": 0.030958496687846424, "grad_norm": 12.532496452331543, "learning_rate": 2.677734375e-05, "loss": 0.24310302734375, "step": 458 }, { "epoch": 0.031026091658780588, "grad_norm": 4.925905704498291, "learning_rate": 2.68359375e-05, "loss": 0.286224365234375, "step": 459 }, { "epoch": 0.031093686629714748, "grad_norm": 5.966923236846924, "learning_rate": 2.6894531250000002e-05, "loss": 0.3790283203125, "step": 460 }, { "epoch": 0.03116128160064891, "grad_norm": 10.044654846191406, "learning_rate": 2.6953125e-05, "loss": 0.362579345703125, "step": 461 }, { "epoch": 0.031228876571583075, "grad_norm": 3.4280588626861572, "learning_rate": 2.701171875e-05, "loss": 0.3734130859375, "step": 462 }, { "epoch": 0.03129647154251724, "grad_norm": 6.255305767059326, "learning_rate": 2.70703125e-05, "loss": 0.4239501953125, "step": 463 }, { "epoch": 0.0313640665134514, "grad_norm": 3.754990339279175, "learning_rate": 2.712890625e-05, "loss": 0.35736083984375, "step": 464 }, { "epoch": 0.03143166148438556, "grad_norm": 2.341087818145752, "learning_rate": 2.71875e-05, "loss": 0.3848876953125, "step": 465 }, { "epoch": 0.03149925645531972, "grad_norm": 5.349051475524902, "learning_rate": 2.724609375e-05, "loss": 0.37298583984375, "step": 466 }, { "epoch": 0.031566851426253886, "grad_norm": 3.8415236473083496, "learning_rate": 2.7304687500000002e-05, "loss": 0.28985595703125, "step": 467 }, { "epoch": 0.03163444639718805, "grad_norm": 4.87544584274292, "learning_rate": 2.736328125e-05, "loss": 0.3218994140625, "step": 468 }, { "epoch": 0.031702041368122214, "grad_norm": 2.2449734210968018, "learning_rate": 2.7421875e-05, "loss": 0.29010009765625, "step": 469 }, { "epoch": 0.03176963633905638, "grad_norm": 2.799497365951538, "learning_rate": 2.7480468750000002e-05, "loss": 0.3228759765625, "step": 470 }, { "epoch": 0.031837231309990534, "grad_norm": 5.4263529777526855, "learning_rate": 2.75390625e-05, "loss": 0.31768798828125, "step": 471 }, { "epoch": 0.0319048262809247, "grad_norm": 3.4334893226623535, "learning_rate": 2.759765625e-05, "loss": 0.30535888671875, "step": 472 }, { "epoch": 0.03197242125185886, "grad_norm": 2.6932406425476074, "learning_rate": 2.765625e-05, "loss": 0.276336669921875, "step": 473 }, { "epoch": 0.032040016222793025, "grad_norm": 12.669774055480957, "learning_rate": 2.7714843750000002e-05, "loss": 0.39874267578125, "step": 474 }, { "epoch": 0.03210761119372719, "grad_norm": 3.101942300796509, "learning_rate": 2.77734375e-05, "loss": 0.309173583984375, "step": 475 }, { "epoch": 0.03217520616466135, "grad_norm": 2.9684765338897705, "learning_rate": 2.783203125e-05, "loss": 0.28106689453125, "step": 476 }, { "epoch": 0.03224280113559551, "grad_norm": 13.248656272888184, "learning_rate": 2.7890625000000002e-05, "loss": 0.435302734375, "step": 477 }, { "epoch": 0.03231039610652967, "grad_norm": 6.103296756744385, "learning_rate": 2.794921875e-05, "loss": 0.40869140625, "step": 478 }, { "epoch": 0.032377991077463836, "grad_norm": 3.4566497802734375, "learning_rate": 2.80078125e-05, "loss": 0.2940673828125, "step": 479 }, { "epoch": 0.032445586048398, "grad_norm": 5.833952903747559, "learning_rate": 2.806640625e-05, "loss": 0.30517578125, "step": 480 }, { "epoch": 0.03251318101933216, "grad_norm": 8.346158981323242, "learning_rate": 2.8125e-05, "loss": 0.415771484375, "step": 481 }, { "epoch": 0.03258077599026633, "grad_norm": 2.4394288063049316, "learning_rate": 2.818359375e-05, "loss": 0.137420654296875, "step": 482 }, { "epoch": 0.03264837096120048, "grad_norm": 5.202364444732666, "learning_rate": 2.82421875e-05, "loss": 0.31781005859375, "step": 483 }, { "epoch": 0.03271596593213465, "grad_norm": 9.809089660644531, "learning_rate": 2.8300781250000002e-05, "loss": 0.35797119140625, "step": 484 }, { "epoch": 0.03278356090306881, "grad_norm": 12.435544967651367, "learning_rate": 2.8359375e-05, "loss": 0.42919921875, "step": 485 }, { "epoch": 0.032851155874002974, "grad_norm": 5.345820426940918, "learning_rate": 2.841796875e-05, "loss": 0.2982177734375, "step": 486 }, { "epoch": 0.03291875084493714, "grad_norm": 2.1415748596191406, "learning_rate": 2.8476562500000002e-05, "loss": 0.34600830078125, "step": 487 }, { "epoch": 0.0329863458158713, "grad_norm": 6.448680400848389, "learning_rate": 2.853515625e-05, "loss": 0.33074951171875, "step": 488 }, { "epoch": 0.033053940786805465, "grad_norm": 5.247228622436523, "learning_rate": 2.859375e-05, "loss": 0.4593505859375, "step": 489 }, { "epoch": 0.03312153575773962, "grad_norm": 3.8276522159576416, "learning_rate": 2.865234375e-05, "loss": 0.2916259765625, "step": 490 }, { "epoch": 0.033189130728673785, "grad_norm": 4.432213306427002, "learning_rate": 2.87109375e-05, "loss": 0.2291717529296875, "step": 491 }, { "epoch": 0.03325672569960795, "grad_norm": 3.3278746604919434, "learning_rate": 2.876953125e-05, "loss": 0.2747344970703125, "step": 492 }, { "epoch": 0.03332432067054211, "grad_norm": 8.241415023803711, "learning_rate": 2.8828125e-05, "loss": 0.2777099609375, "step": 493 }, { "epoch": 0.033391915641476276, "grad_norm": 11.786637306213379, "learning_rate": 2.8886718750000002e-05, "loss": 0.320220947265625, "step": 494 }, { "epoch": 0.03345951061241044, "grad_norm": 11.904084205627441, "learning_rate": 2.89453125e-05, "loss": 0.3919677734375, "step": 495 }, { "epoch": 0.033527105583344596, "grad_norm": 6.9499287605285645, "learning_rate": 2.900390625e-05, "loss": 0.304412841796875, "step": 496 }, { "epoch": 0.03359470055427876, "grad_norm": 2.9850666522979736, "learning_rate": 2.90625e-05, "loss": 0.284698486328125, "step": 497 }, { "epoch": 0.033662295525212924, "grad_norm": 8.892131805419922, "learning_rate": 2.912109375e-05, "loss": 0.4295654296875, "step": 498 }, { "epoch": 0.03372989049614709, "grad_norm": 5.092553615570068, "learning_rate": 2.91796875e-05, "loss": 0.35784912109375, "step": 499 }, { "epoch": 0.03379748546708125, "grad_norm": 4.726744651794434, "learning_rate": 2.923828125e-05, "loss": 0.187225341796875, "step": 500 }, { "epoch": 0.033865080438015414, "grad_norm": 2.706753730773926, "learning_rate": 2.9296875000000002e-05, "loss": 0.28741455078125, "step": 501 }, { "epoch": 0.03393267540894957, "grad_norm": 6.744409084320068, "learning_rate": 2.935546875e-05, "loss": 0.31524658203125, "step": 502 }, { "epoch": 0.034000270379883735, "grad_norm": 17.818105697631836, "learning_rate": 2.94140625e-05, "loss": 0.3685760498046875, "step": 503 }, { "epoch": 0.0340678653508179, "grad_norm": 7.740932464599609, "learning_rate": 2.9472656250000002e-05, "loss": 0.290557861328125, "step": 504 }, { "epoch": 0.03413546032175206, "grad_norm": 15.512068748474121, "learning_rate": 2.953125e-05, "loss": 0.35546875, "step": 505 }, { "epoch": 0.034203055292686226, "grad_norm": 7.935123920440674, "learning_rate": 2.958984375e-05, "loss": 0.32373046875, "step": 506 }, { "epoch": 0.03427065026362039, "grad_norm": 3.2280499935150146, "learning_rate": 2.96484375e-05, "loss": 0.241668701171875, "step": 507 }, { "epoch": 0.034338245234554546, "grad_norm": 1.9198172092437744, "learning_rate": 2.970703125e-05, "loss": 0.326202392578125, "step": 508 }, { "epoch": 0.03440584020548871, "grad_norm": 9.281710624694824, "learning_rate": 2.9765625e-05, "loss": 0.36627197265625, "step": 509 }, { "epoch": 0.03447343517642287, "grad_norm": 2.1194095611572266, "learning_rate": 2.982421875e-05, "loss": 0.281494140625, "step": 510 }, { "epoch": 0.03454103014735704, "grad_norm": 19.003400802612305, "learning_rate": 2.9882812500000002e-05, "loss": 0.433837890625, "step": 511 }, { "epoch": 0.0346086251182912, "grad_norm": 12.424899101257324, "learning_rate": 2.994140625e-05, "loss": 0.3115234375, "step": 512 }, { "epoch": 0.034676220089225364, "grad_norm": 10.93581771850586, "learning_rate": 3e-05, "loss": 0.4068603515625, "step": 513 }, { "epoch": 0.03474381506015953, "grad_norm": 4.4638190269470215, "learning_rate": 2.9999999637103358e-05, "loss": 0.299652099609375, "step": 514 }, { "epoch": 0.034811410031093684, "grad_norm": 3.5980618000030518, "learning_rate": 2.999999854841345e-05, "loss": 0.3424072265625, "step": 515 }, { "epoch": 0.03487900500202785, "grad_norm": 7.546493053436279, "learning_rate": 2.9999996733930325e-05, "loss": 0.38592529296875, "step": 516 }, { "epoch": 0.03494659997296201, "grad_norm": 11.899807929992676, "learning_rate": 2.999999419365408e-05, "loss": 0.331146240234375, "step": 517 }, { "epoch": 0.035014194943896175, "grad_norm": 11.275507926940918, "learning_rate": 2.999999092758483e-05, "loss": 0.39947509765625, "step": 518 }, { "epoch": 0.03508178991483034, "grad_norm": 2.206550359725952, "learning_rate": 2.9999986935722734e-05, "loss": 0.359130859375, "step": 519 }, { "epoch": 0.0351493848857645, "grad_norm": 3.4530696868896484, "learning_rate": 2.9999982218067983e-05, "loss": 0.2608642578125, "step": 520 }, { "epoch": 0.03521697985669866, "grad_norm": 3.4354255199432373, "learning_rate": 2.9999976774620814e-05, "loss": 0.38787841796875, "step": 521 }, { "epoch": 0.03528457482763282, "grad_norm": 3.2784194946289062, "learning_rate": 2.999997060538148e-05, "loss": 0.4366455078125, "step": 522 }, { "epoch": 0.035352169798566986, "grad_norm": 3.30827260017395, "learning_rate": 2.999996371035029e-05, "loss": 0.295684814453125, "step": 523 }, { "epoch": 0.03541976476950115, "grad_norm": 6.53570556640625, "learning_rate": 2.9999956089527564e-05, "loss": 0.37469482421875, "step": 524 }, { "epoch": 0.03548735974043531, "grad_norm": 1.7448616027832031, "learning_rate": 2.999994774291368e-05, "loss": 0.31988525390625, "step": 525 }, { "epoch": 0.03555495471136948, "grad_norm": 2.038508176803589, "learning_rate": 2.999993867050905e-05, "loss": 0.2918701171875, "step": 526 }, { "epoch": 0.035622549682303634, "grad_norm": 3.0199995040893555, "learning_rate": 2.9999928872314093e-05, "loss": 0.186065673828125, "step": 527 }, { "epoch": 0.0356901446532378, "grad_norm": 4.227959632873535, "learning_rate": 2.99999183483293e-05, "loss": 0.389312744140625, "step": 528 }, { "epoch": 0.03575773962417196, "grad_norm": 5.443289279937744, "learning_rate": 2.999990709855517e-05, "loss": 0.36431884765625, "step": 529 }, { "epoch": 0.035825334595106124, "grad_norm": 2.9951696395874023, "learning_rate": 2.9999895122992257e-05, "loss": 0.38897705078125, "step": 530 }, { "epoch": 0.03589292956604029, "grad_norm": 5.804780960083008, "learning_rate": 2.9999882421641124e-05, "loss": 0.31671142578125, "step": 531 }, { "epoch": 0.03596052453697445, "grad_norm": 2.101958990097046, "learning_rate": 2.9999868994502403e-05, "loss": 0.37744140625, "step": 532 }, { "epoch": 0.03602811950790861, "grad_norm": 4.496527671813965, "learning_rate": 2.999985484157674e-05, "loss": 0.40142822265625, "step": 533 }, { "epoch": 0.03609571447884277, "grad_norm": 6.317284107208252, "learning_rate": 2.999983996286481e-05, "loss": 0.33758544921875, "step": 534 }, { "epoch": 0.036163309449776936, "grad_norm": 2.894611120223999, "learning_rate": 2.9999824358367344e-05, "loss": 0.3934326171875, "step": 535 }, { "epoch": 0.0362309044207111, "grad_norm": 5.110440254211426, "learning_rate": 2.999980802808509e-05, "loss": 0.43792724609375, "step": 536 }, { "epoch": 0.03629849939164526, "grad_norm": 4.375242233276367, "learning_rate": 2.9999790972018845e-05, "loss": 0.394378662109375, "step": 537 }, { "epoch": 0.036366094362579426, "grad_norm": 8.274523735046387, "learning_rate": 2.9999773190169423e-05, "loss": 0.298370361328125, "step": 538 }, { "epoch": 0.03643368933351359, "grad_norm": 3.5953969955444336, "learning_rate": 2.9999754682537697e-05, "loss": 0.35516357421875, "step": 539 }, { "epoch": 0.03650128430444775, "grad_norm": 3.01220965385437, "learning_rate": 2.9999735449124556e-05, "loss": 0.41400146484375, "step": 540 }, { "epoch": 0.03656887927538191, "grad_norm": 5.924288749694824, "learning_rate": 2.999971548993093e-05, "loss": 0.196044921875, "step": 541 }, { "epoch": 0.036636474246316074, "grad_norm": 7.973647117614746, "learning_rate": 2.999969480495779e-05, "loss": 0.4259033203125, "step": 542 }, { "epoch": 0.03670406921725024, "grad_norm": 1.9203258752822876, "learning_rate": 2.999967339420613e-05, "loss": 0.3948974609375, "step": 543 }, { "epoch": 0.0367716641881844, "grad_norm": 3.788862466812134, "learning_rate": 2.9999651257676987e-05, "loss": 0.2816162109375, "step": 544 }, { "epoch": 0.036839259159118565, "grad_norm": 3.534571409225464, "learning_rate": 2.999962839537144e-05, "loss": 0.361724853515625, "step": 545 }, { "epoch": 0.03690685413005272, "grad_norm": 10.060121536254883, "learning_rate": 2.9999604807290582e-05, "loss": 0.39111328125, "step": 546 }, { "epoch": 0.036974449100986885, "grad_norm": 3.1327807903289795, "learning_rate": 2.999958049343557e-05, "loss": 0.2177734375, "step": 547 }, { "epoch": 0.03704204407192105, "grad_norm": 1.7718701362609863, "learning_rate": 2.999955545380757e-05, "loss": 0.22357177734375, "step": 548 }, { "epoch": 0.03710963904285521, "grad_norm": 7.633596420288086, "learning_rate": 2.9999529688407792e-05, "loss": 0.37152099609375, "step": 549 }, { "epoch": 0.037177234013789376, "grad_norm": 2.356869697570801, "learning_rate": 2.999950319723749e-05, "loss": 0.3543701171875, "step": 550 }, { "epoch": 0.03724482898472354, "grad_norm": 13.197010040283203, "learning_rate": 2.9999475980297943e-05, "loss": 0.4085693359375, "step": 551 }, { "epoch": 0.037312423955657696, "grad_norm": 11.541019439697266, "learning_rate": 2.9999448037590466e-05, "loss": 0.291229248046875, "step": 552 }, { "epoch": 0.03738001892659186, "grad_norm": 8.716489791870117, "learning_rate": 2.9999419369116414e-05, "loss": 0.23077392578125, "step": 553 }, { "epoch": 0.03744761389752602, "grad_norm": 9.644001007080078, "learning_rate": 2.999938997487717e-05, "loss": 0.324127197265625, "step": 554 }, { "epoch": 0.03751520886846019, "grad_norm": 6.329170227050781, "learning_rate": 2.999935985487416e-05, "loss": 0.3779296875, "step": 555 }, { "epoch": 0.03758280383939435, "grad_norm": 2.739719867706299, "learning_rate": 2.999932900910884e-05, "loss": 0.280029296875, "step": 556 }, { "epoch": 0.037650398810328514, "grad_norm": 9.308096885681152, "learning_rate": 2.9999297437582704e-05, "loss": 0.4281005859375, "step": 557 }, { "epoch": 0.03771799378126267, "grad_norm": 10.382850646972656, "learning_rate": 2.9999265140297276e-05, "loss": 0.4627685546875, "step": 558 }, { "epoch": 0.037785588752196834, "grad_norm": 6.472853660583496, "learning_rate": 2.999923211725412e-05, "loss": 0.4361572265625, "step": 559 }, { "epoch": 0.037853183723131, "grad_norm": 15.294325828552246, "learning_rate": 2.999919836845484e-05, "loss": 0.36859130859375, "step": 560 }, { "epoch": 0.03792077869406516, "grad_norm": 5.0728678703308105, "learning_rate": 2.9999163893901057e-05, "loss": 0.3812255859375, "step": 561 }, { "epoch": 0.037988373664999325, "grad_norm": 4.293437480926514, "learning_rate": 2.9999128693594456e-05, "loss": 0.34765625, "step": 562 }, { "epoch": 0.03805596863593349, "grad_norm": 5.321987628936768, "learning_rate": 2.999909276753672e-05, "loss": 0.32623291015625, "step": 563 }, { "epoch": 0.038123563606867646, "grad_norm": 2.2757415771484375, "learning_rate": 2.9999056115729605e-05, "loss": 0.2691192626953125, "step": 564 }, { "epoch": 0.03819115857780181, "grad_norm": 8.157017707824707, "learning_rate": 2.9999018738174873e-05, "loss": 0.3110809326171875, "step": 565 }, { "epoch": 0.03825875354873597, "grad_norm": 10.322105407714844, "learning_rate": 2.999898063487434e-05, "loss": 0.45782470703125, "step": 566 }, { "epoch": 0.038326348519670136, "grad_norm": 1.8668588399887085, "learning_rate": 2.9998941805829836e-05, "loss": 0.196990966796875, "step": 567 }, { "epoch": 0.0383939434906043, "grad_norm": 3.949869394302368, "learning_rate": 2.9998902251043263e-05, "loss": 0.3184814453125, "step": 568 }, { "epoch": 0.038461538461538464, "grad_norm": 2.3387792110443115, "learning_rate": 2.999886197051652e-05, "loss": 0.242156982421875, "step": 569 }, { "epoch": 0.03852913343247263, "grad_norm": 9.278301239013672, "learning_rate": 2.999882096425155e-05, "loss": 0.39801025390625, "step": 570 }, { "epoch": 0.038596728403406784, "grad_norm": 12.146195411682129, "learning_rate": 2.999877923225035e-05, "loss": 0.3961181640625, "step": 571 }, { "epoch": 0.03866432337434095, "grad_norm": 8.609543800354004, "learning_rate": 2.9998736774514933e-05, "loss": 0.261505126953125, "step": 572 }, { "epoch": 0.03873191834527511, "grad_norm": 8.362074851989746, "learning_rate": 2.9998693591047358e-05, "loss": 0.28125, "step": 573 }, { "epoch": 0.038799513316209275, "grad_norm": 6.281574726104736, "learning_rate": 2.9998649681849707e-05, "loss": 0.29119873046875, "step": 574 }, { "epoch": 0.03886710828714344, "grad_norm": 4.739378452301025, "learning_rate": 2.999860504692411e-05, "loss": 0.29443359375, "step": 575 }, { "epoch": 0.0389347032580776, "grad_norm": 11.210235595703125, "learning_rate": 2.999855968627273e-05, "loss": 0.405120849609375, "step": 576 }, { "epoch": 0.03900229822901176, "grad_norm": 15.196562767028809, "learning_rate": 2.9998513599897752e-05, "loss": 0.480712890625, "step": 577 }, { "epoch": 0.03906989319994592, "grad_norm": 4.795595645904541, "learning_rate": 2.999846678780141e-05, "loss": 0.165557861328125, "step": 578 }, { "epoch": 0.039137488170880086, "grad_norm": 13.647446632385254, "learning_rate": 2.9998419249985975e-05, "loss": 0.39324951171875, "step": 579 }, { "epoch": 0.03920508314181425, "grad_norm": 11.784943580627441, "learning_rate": 2.999837098645374e-05, "loss": 0.42138671875, "step": 580 }, { "epoch": 0.03927267811274841, "grad_norm": 4.525176525115967, "learning_rate": 2.9998321997207045e-05, "loss": 0.2355499267578125, "step": 581 }, { "epoch": 0.03934027308368258, "grad_norm": 5.435191631317139, "learning_rate": 2.9998272282248255e-05, "loss": 0.4295654296875, "step": 582 }, { "epoch": 0.03940786805461673, "grad_norm": 5.422408103942871, "learning_rate": 2.999822184157978e-05, "loss": 0.2930908203125, "step": 583 }, { "epoch": 0.0394754630255509, "grad_norm": 5.444070816040039, "learning_rate": 2.9998170675204062e-05, "loss": 0.299530029296875, "step": 584 }, { "epoch": 0.03954305799648506, "grad_norm": 6.023238182067871, "learning_rate": 2.9998118783123572e-05, "loss": 0.34906005859375, "step": 585 }, { "epoch": 0.039610652967419224, "grad_norm": 4.358989715576172, "learning_rate": 2.999806616534082e-05, "loss": 0.29541015625, "step": 586 }, { "epoch": 0.03967824793835339, "grad_norm": 1.7932883501052856, "learning_rate": 2.9998012821858358e-05, "loss": 0.328826904296875, "step": 587 }, { "epoch": 0.03974584290928755, "grad_norm": 3.7754623889923096, "learning_rate": 2.9997958752678767e-05, "loss": 0.2215576171875, "step": 588 }, { "epoch": 0.03981343788022171, "grad_norm": 5.251053333282471, "learning_rate": 2.9997903957804654e-05, "loss": 0.418701171875, "step": 589 }, { "epoch": 0.03988103285115587, "grad_norm": 1.6806747913360596, "learning_rate": 2.999784843723868e-05, "loss": 0.2515869140625, "step": 590 }, { "epoch": 0.039948627822090035, "grad_norm": 3.9885716438293457, "learning_rate": 2.9997792190983525e-05, "loss": 0.365234375, "step": 591 }, { "epoch": 0.0400162227930242, "grad_norm": 3.356930732727051, "learning_rate": 2.9997735219041922e-05, "loss": 0.244049072265625, "step": 592 }, { "epoch": 0.04008381776395836, "grad_norm": 4.514139175415039, "learning_rate": 2.9997677521416612e-05, "loss": 0.368560791015625, "step": 593 }, { "epoch": 0.040151412734892526, "grad_norm": 3.413482666015625, "learning_rate": 2.9997619098110394e-05, "loss": 0.39202880859375, "step": 594 }, { "epoch": 0.04021900770582669, "grad_norm": 4.274008750915527, "learning_rate": 2.9997559949126093e-05, "loss": 0.373291015625, "step": 595 }, { "epoch": 0.040286602676760846, "grad_norm": 2.4582133293151855, "learning_rate": 2.999750007446658e-05, "loss": 0.41107177734375, "step": 596 }, { "epoch": 0.04035419764769501, "grad_norm": 3.7623283863067627, "learning_rate": 2.9997439474134738e-05, "loss": 0.2841796875, "step": 597 }, { "epoch": 0.040421792618629174, "grad_norm": 1.8817347288131714, "learning_rate": 2.999737814813351e-05, "loss": 0.249969482421875, "step": 598 }, { "epoch": 0.04048938758956334, "grad_norm": 4.245645523071289, "learning_rate": 2.999731609646586e-05, "loss": 0.274688720703125, "step": 599 }, { "epoch": 0.0405569825604975, "grad_norm": 1.3283932209014893, "learning_rate": 2.999725331913479e-05, "loss": 0.341064453125, "step": 600 }, { "epoch": 0.040624577531431665, "grad_norm": 1.6760720014572144, "learning_rate": 2.9997189816143334e-05, "loss": 0.158477783203125, "step": 601 }, { "epoch": 0.04069217250236582, "grad_norm": 3.3977084159851074, "learning_rate": 2.999712558749457e-05, "loss": 0.4161376953125, "step": 602 }, { "epoch": 0.040759767473299985, "grad_norm": 5.955160617828369, "learning_rate": 2.9997060633191604e-05, "loss": 0.37835693359375, "step": 603 }, { "epoch": 0.04082736244423415, "grad_norm": 6.414957523345947, "learning_rate": 2.9996994953237578e-05, "loss": 0.41668701171875, "step": 604 }, { "epoch": 0.04089495741516831, "grad_norm": 4.293428421020508, "learning_rate": 2.9996928547635675e-05, "loss": 0.35223388671875, "step": 605 }, { "epoch": 0.040962552386102476, "grad_norm": 3.104825973510742, "learning_rate": 2.9996861416389102e-05, "loss": 0.308868408203125, "step": 606 }, { "epoch": 0.04103014735703664, "grad_norm": 3.2158091068267822, "learning_rate": 2.9996793559501106e-05, "loss": 0.33673095703125, "step": 607 }, { "epoch": 0.041097742327970796, "grad_norm": 6.520728588104248, "learning_rate": 2.9996724976974973e-05, "loss": 0.338134765625, "step": 608 }, { "epoch": 0.04116533729890496, "grad_norm": 2.897291660308838, "learning_rate": 2.9996655668814027e-05, "loss": 0.30340576171875, "step": 609 }, { "epoch": 0.04123293226983912, "grad_norm": 1.5819004774093628, "learning_rate": 2.9996585635021613e-05, "loss": 0.21893310546875, "step": 610 }, { "epoch": 0.04130052724077329, "grad_norm": 3.0707056522369385, "learning_rate": 2.9996514875601124e-05, "loss": 0.3094482421875, "step": 611 }, { "epoch": 0.04136812221170745, "grad_norm": 1.8866679668426514, "learning_rate": 2.9996443390555986e-05, "loss": 0.4013671875, "step": 612 }, { "epoch": 0.041435717182641614, "grad_norm": 2.2104105949401855, "learning_rate": 2.9996371179889652e-05, "loss": 0.3160400390625, "step": 613 }, { "epoch": 0.04150331215357577, "grad_norm": 3.780994415283203, "learning_rate": 2.9996298243605616e-05, "loss": 0.3109130859375, "step": 614 }, { "epoch": 0.041570907124509934, "grad_norm": 6.177525520324707, "learning_rate": 2.9996224581707413e-05, "loss": 0.27923583984375, "step": 615 }, { "epoch": 0.0416385020954441, "grad_norm": 6.063457489013672, "learning_rate": 2.9996150194198606e-05, "loss": 0.33197021484375, "step": 616 }, { "epoch": 0.04170609706637826, "grad_norm": 9.01041316986084, "learning_rate": 2.9996075081082792e-05, "loss": 0.33404541015625, "step": 617 }, { "epoch": 0.041773692037312425, "grad_norm": 2.79317045211792, "learning_rate": 2.99959992423636e-05, "loss": 0.307159423828125, "step": 618 }, { "epoch": 0.04184128700824659, "grad_norm": 2.599630355834961, "learning_rate": 2.9995922678044713e-05, "loss": 0.274627685546875, "step": 619 }, { "epoch": 0.04190888197918075, "grad_norm": 2.196805238723755, "learning_rate": 2.9995845388129827e-05, "loss": 0.32733154296875, "step": 620 }, { "epoch": 0.04197647695011491, "grad_norm": 9.51321792602539, "learning_rate": 2.999576737262268e-05, "loss": 0.41064453125, "step": 621 }, { "epoch": 0.04204407192104907, "grad_norm": 9.702874183654785, "learning_rate": 2.999568863152705e-05, "loss": 0.42547607421875, "step": 622 }, { "epoch": 0.042111666891983236, "grad_norm": 7.770937919616699, "learning_rate": 2.999560916484675e-05, "loss": 0.3333740234375, "step": 623 }, { "epoch": 0.0421792618629174, "grad_norm": 2.7705812454223633, "learning_rate": 2.9995528972585623e-05, "loss": 0.37530517578125, "step": 624 }, { "epoch": 0.042246856833851563, "grad_norm": 8.285850524902344, "learning_rate": 2.9995448054747543e-05, "loss": 0.4097900390625, "step": 625 }, { "epoch": 0.04231445180478573, "grad_norm": 2.7662277221679688, "learning_rate": 2.9995366411336432e-05, "loss": 0.34844970703125, "step": 626 }, { "epoch": 0.042382046775719884, "grad_norm": 2.931051731109619, "learning_rate": 2.999528404235624e-05, "loss": 0.33465576171875, "step": 627 }, { "epoch": 0.04244964174665405, "grad_norm": 3.1624903678894043, "learning_rate": 2.999520094781095e-05, "loss": 0.2385711669921875, "step": 628 }, { "epoch": 0.04251723671758821, "grad_norm": 3.4555795192718506, "learning_rate": 2.9995117127704582e-05, "loss": 0.344970703125, "step": 629 }, { "epoch": 0.042584831688522375, "grad_norm": 5.271152019500732, "learning_rate": 2.99950325820412e-05, "loss": 0.26873779296875, "step": 630 }, { "epoch": 0.04265242665945654, "grad_norm": 3.147442102432251, "learning_rate": 2.9994947310824884e-05, "loss": 0.255218505859375, "step": 631 }, { "epoch": 0.0427200216303907, "grad_norm": 3.7303199768066406, "learning_rate": 2.9994861314059766e-05, "loss": 0.35711669921875, "step": 632 }, { "epoch": 0.04278761660132486, "grad_norm": 2.220818519592285, "learning_rate": 2.9994774591750002e-05, "loss": 0.390228271484375, "step": 633 }, { "epoch": 0.04285521157225902, "grad_norm": 3.413996934890747, "learning_rate": 2.99946871438998e-05, "loss": 0.31585693359375, "step": 634 }, { "epoch": 0.042922806543193186, "grad_norm": 7.459842681884766, "learning_rate": 2.9994598970513377e-05, "loss": 0.28997802734375, "step": 635 }, { "epoch": 0.04299040151412735, "grad_norm": 1.7083368301391602, "learning_rate": 2.9994510071595002e-05, "loss": 0.28802490234375, "step": 636 }, { "epoch": 0.04305799648506151, "grad_norm": 8.368088722229004, "learning_rate": 2.9994420447148988e-05, "loss": 0.36627197265625, "step": 637 }, { "epoch": 0.04312559145599568, "grad_norm": 9.816370964050293, "learning_rate": 2.9994330097179657e-05, "loss": 0.3992919921875, "step": 638 }, { "epoch": 0.04319318642692983, "grad_norm": 2.649733066558838, "learning_rate": 2.9994239021691394e-05, "loss": 0.26605224609375, "step": 639 }, { "epoch": 0.043260781397864, "grad_norm": 1.6909621953964233, "learning_rate": 2.9994147220688593e-05, "loss": 0.297119140625, "step": 640 }, { "epoch": 0.04332837636879816, "grad_norm": 1.656394362449646, "learning_rate": 2.9994054694175703e-05, "loss": 0.3394775390625, "step": 641 }, { "epoch": 0.043395971339732324, "grad_norm": 11.179489135742188, "learning_rate": 2.9993961442157203e-05, "loss": 0.45379638671875, "step": 642 }, { "epoch": 0.04346356631066649, "grad_norm": 8.409074783325195, "learning_rate": 2.9993867464637596e-05, "loss": 0.31585693359375, "step": 643 }, { "epoch": 0.04353116128160065, "grad_norm": 1.81592857837677, "learning_rate": 2.9993772761621442e-05, "loss": 0.108367919921875, "step": 644 }, { "epoch": 0.04359875625253481, "grad_norm": 6.303939342498779, "learning_rate": 2.9993677333113312e-05, "loss": 0.367523193359375, "step": 645 }, { "epoch": 0.04366635122346897, "grad_norm": 1.8387033939361572, "learning_rate": 2.9993581179117827e-05, "loss": 0.28436279296875, "step": 646 }, { "epoch": 0.043733946194403135, "grad_norm": 3.3486578464508057, "learning_rate": 2.9993484299639646e-05, "loss": 0.37689208984375, "step": 647 }, { "epoch": 0.0438015411653373, "grad_norm": 4.033984184265137, "learning_rate": 2.9993386694683445e-05, "loss": 0.288604736328125, "step": 648 }, { "epoch": 0.04386913613627146, "grad_norm": 4.414268493652344, "learning_rate": 2.9993288364253956e-05, "loss": 0.2622528076171875, "step": 649 }, { "epoch": 0.043936731107205626, "grad_norm": 7.3878092765808105, "learning_rate": 2.9993189308355933e-05, "loss": 0.33489990234375, "step": 650 }, { "epoch": 0.04400432607813979, "grad_norm": 3.649796962738037, "learning_rate": 2.999308952699417e-05, "loss": 0.182525634765625, "step": 651 }, { "epoch": 0.044071921049073946, "grad_norm": 3.2199721336364746, "learning_rate": 2.9992989020173495e-05, "loss": 0.33660888671875, "step": 652 }, { "epoch": 0.04413951602000811, "grad_norm": 1.41452157497406, "learning_rate": 2.9992887787898772e-05, "loss": 0.2186126708984375, "step": 653 }, { "epoch": 0.044207110990942274, "grad_norm": 3.6435706615448, "learning_rate": 2.9992785830174895e-05, "loss": 0.300445556640625, "step": 654 }, { "epoch": 0.04427470596187644, "grad_norm": 1.6919419765472412, "learning_rate": 2.99926831470068e-05, "loss": 0.36688232421875, "step": 655 }, { "epoch": 0.0443423009328106, "grad_norm": 5.958005905151367, "learning_rate": 2.999257973839946e-05, "loss": 0.324188232421875, "step": 656 }, { "epoch": 0.044409895903744764, "grad_norm": 1.2449015378952026, "learning_rate": 2.9992475604357872e-05, "loss": 0.28143310546875, "step": 657 }, { "epoch": 0.04447749087467892, "grad_norm": 4.055692672729492, "learning_rate": 2.9992370744887078e-05, "loss": 0.44122314453125, "step": 658 }, { "epoch": 0.044545085845613085, "grad_norm": 1.2502351999282837, "learning_rate": 2.999226515999215e-05, "loss": 0.13826751708984375, "step": 659 }, { "epoch": 0.04461268081654725, "grad_norm": 7.195521354675293, "learning_rate": 2.9992158849678196e-05, "loss": 0.361328125, "step": 660 }, { "epoch": 0.04468027578748141, "grad_norm": 3.762009620666504, "learning_rate": 2.9992051813950364e-05, "loss": 0.32568359375, "step": 661 }, { "epoch": 0.044747870758415575, "grad_norm": 2.6173343658447266, "learning_rate": 2.9991944052813833e-05, "loss": 0.2894287109375, "step": 662 }, { "epoch": 0.04481546572934974, "grad_norm": 3.9859189987182617, "learning_rate": 2.9991835566273806e-05, "loss": 0.26519775390625, "step": 663 }, { "epoch": 0.044883060700283896, "grad_norm": 10.335921287536621, "learning_rate": 2.9991726354335546e-05, "loss": 0.38671875, "step": 664 }, { "epoch": 0.04495065567121806, "grad_norm": 7.756373405456543, "learning_rate": 2.9991616417004335e-05, "loss": 0.30718994140625, "step": 665 }, { "epoch": 0.04501825064215222, "grad_norm": 3.2057292461395264, "learning_rate": 2.999150575428549e-05, "loss": 0.3096923828125, "step": 666 }, { "epoch": 0.04508584561308639, "grad_norm": 5.1893205642700195, "learning_rate": 2.9991394366184363e-05, "loss": 0.33355712890625, "step": 667 }, { "epoch": 0.04515344058402055, "grad_norm": 3.9643044471740723, "learning_rate": 2.9991282252706347e-05, "loss": 0.243560791015625, "step": 668 }, { "epoch": 0.045221035554954714, "grad_norm": 1.6137727499008179, "learning_rate": 2.9991169413856866e-05, "loss": 0.2861785888671875, "step": 669 }, { "epoch": 0.04528863052588887, "grad_norm": 2.890599012374878, "learning_rate": 2.999105584964138e-05, "loss": 0.31060791015625, "step": 670 }, { "epoch": 0.045356225496823034, "grad_norm": 7.19579553604126, "learning_rate": 2.999094156006538e-05, "loss": 0.350494384765625, "step": 671 }, { "epoch": 0.0454238204677572, "grad_norm": 6.896668910980225, "learning_rate": 2.9990826545134405e-05, "loss": 0.40252685546875, "step": 672 }, { "epoch": 0.04549141543869136, "grad_norm": 3.5856878757476807, "learning_rate": 2.999071080485401e-05, "loss": 0.28662109375, "step": 673 }, { "epoch": 0.045559010409625525, "grad_norm": 2.1571669578552246, "learning_rate": 2.9990594339229805e-05, "loss": 0.241058349609375, "step": 674 }, { "epoch": 0.04562660538055969, "grad_norm": 2.353957414627075, "learning_rate": 2.9990477148267416e-05, "loss": 0.35162353515625, "step": 675 }, { "epoch": 0.04569420035149385, "grad_norm": 12.26346492767334, "learning_rate": 2.9990359231972517e-05, "loss": 0.34820556640625, "step": 676 }, { "epoch": 0.04576179532242801, "grad_norm": 2.443422555923462, "learning_rate": 2.9990240590350816e-05, "loss": 0.39617919921875, "step": 677 }, { "epoch": 0.04582939029336217, "grad_norm": 6.350247859954834, "learning_rate": 2.9990121223408054e-05, "loss": 0.33782958984375, "step": 678 }, { "epoch": 0.045896985264296336, "grad_norm": 4.275381088256836, "learning_rate": 2.9990001131150003e-05, "loss": 0.22564697265625, "step": 679 }, { "epoch": 0.0459645802352305, "grad_norm": 1.7868928909301758, "learning_rate": 2.9989880313582477e-05, "loss": 0.28558349609375, "step": 680 }, { "epoch": 0.04603217520616466, "grad_norm": 1.5183287858963013, "learning_rate": 2.998975877071132e-05, "loss": 0.132843017578125, "step": 681 }, { "epoch": 0.04609977017709883, "grad_norm": 2.395551919937134, "learning_rate": 2.998963650254241e-05, "loss": 0.358154296875, "step": 682 }, { "epoch": 0.046167365148032984, "grad_norm": 2.4511592388153076, "learning_rate": 2.9989513509081672e-05, "loss": 0.246063232421875, "step": 683 }, { "epoch": 0.04623496011896715, "grad_norm": 9.920998573303223, "learning_rate": 2.9989389790335043e-05, "loss": 0.351348876953125, "step": 684 }, { "epoch": 0.04630255508990131, "grad_norm": 5.201322078704834, "learning_rate": 2.9989265346308528e-05, "loss": 0.43328857421875, "step": 685 }, { "epoch": 0.046370150060835474, "grad_norm": 1.734148383140564, "learning_rate": 2.9989140177008134e-05, "loss": 0.28485107421875, "step": 686 }, { "epoch": 0.04643774503176964, "grad_norm": 9.075897216796875, "learning_rate": 2.998901428243992e-05, "loss": 0.2762451171875, "step": 687 }, { "epoch": 0.0465053400027038, "grad_norm": 6.7082343101501465, "learning_rate": 2.9988887662609983e-05, "loss": 0.3670654296875, "step": 688 }, { "epoch": 0.04657293497363796, "grad_norm": 3.525778293609619, "learning_rate": 2.9988760317524443e-05, "loss": 0.2389068603515625, "step": 689 }, { "epoch": 0.04664052994457212, "grad_norm": 2.164926290512085, "learning_rate": 2.9988632247189466e-05, "loss": 0.354736328125, "step": 690 }, { "epoch": 0.046708124915506286, "grad_norm": 2.1567137241363525, "learning_rate": 2.998850345161125e-05, "loss": 0.31256103515625, "step": 691 }, { "epoch": 0.04677571988644045, "grad_norm": 4.5897321701049805, "learning_rate": 2.9988373930796017e-05, "loss": 0.180145263671875, "step": 692 }, { "epoch": 0.04684331485737461, "grad_norm": 2.9866771697998047, "learning_rate": 2.9988243684750047e-05, "loss": 0.278900146484375, "step": 693 }, { "epoch": 0.046910909828308776, "grad_norm": 2.5879502296447754, "learning_rate": 2.9988112713479637e-05, "loss": 0.337249755859375, "step": 694 }, { "epoch": 0.04697850479924293, "grad_norm": 1.3674193620681763, "learning_rate": 2.9987981016991125e-05, "loss": 0.169769287109375, "step": 695 }, { "epoch": 0.0470460997701771, "grad_norm": 6.926414489746094, "learning_rate": 2.998784859529088e-05, "loss": 0.44171142578125, "step": 696 }, { "epoch": 0.04711369474111126, "grad_norm": 2.3379220962524414, "learning_rate": 2.998771544838531e-05, "loss": 0.2448272705078125, "step": 697 }, { "epoch": 0.047181289712045424, "grad_norm": 1.6970956325531006, "learning_rate": 2.998758157628086e-05, "loss": 0.32965087890625, "step": 698 }, { "epoch": 0.04724888468297959, "grad_norm": 2.141385316848755, "learning_rate": 2.9987446978984007e-05, "loss": 0.244171142578125, "step": 699 }, { "epoch": 0.04731647965391375, "grad_norm": 1.2030174732208252, "learning_rate": 2.998731165650126e-05, "loss": 0.205474853515625, "step": 700 }, { "epoch": 0.047384074624847915, "grad_norm": 2.7460455894470215, "learning_rate": 2.9987175608839176e-05, "loss": 0.270294189453125, "step": 701 }, { "epoch": 0.04745166959578207, "grad_norm": 5.224522590637207, "learning_rate": 2.9987038836004328e-05, "loss": 0.33038330078125, "step": 702 }, { "epoch": 0.047519264566716235, "grad_norm": 1.6953397989273071, "learning_rate": 2.998690133800334e-05, "loss": 0.28863525390625, "step": 703 }, { "epoch": 0.0475868595376504, "grad_norm": 2.917630910873413, "learning_rate": 2.998676311484286e-05, "loss": 0.317657470703125, "step": 704 }, { "epoch": 0.04765445450858456, "grad_norm": 4.959555149078369, "learning_rate": 2.9986624166529582e-05, "loss": 0.40625, "step": 705 }, { "epoch": 0.047722049479518726, "grad_norm": 2.0106351375579834, "learning_rate": 2.9986484493070226e-05, "loss": 0.20159912109375, "step": 706 }, { "epoch": 0.04778964445045289, "grad_norm": 3.7473864555358887, "learning_rate": 2.9986344094471545e-05, "loss": 0.437255859375, "step": 707 }, { "epoch": 0.047857239421387046, "grad_norm": 1.6424260139465332, "learning_rate": 2.9986202970740344e-05, "loss": 0.3460693359375, "step": 708 }, { "epoch": 0.04792483439232121, "grad_norm": 1.2753783464431763, "learning_rate": 2.998606112188344e-05, "loss": 0.200653076171875, "step": 709 }, { "epoch": 0.04799242936325537, "grad_norm": 3.813270092010498, "learning_rate": 2.9985918547907703e-05, "loss": 0.278961181640625, "step": 710 }, { "epoch": 0.04806002433418954, "grad_norm": 3.3665783405303955, "learning_rate": 2.9985775248820033e-05, "loss": 0.244110107421875, "step": 711 }, { "epoch": 0.0481276193051237, "grad_norm": 3.272381544113159, "learning_rate": 2.9985631224627355e-05, "loss": 0.3875732421875, "step": 712 }, { "epoch": 0.048195214276057864, "grad_norm": 3.75441575050354, "learning_rate": 2.998548647533665e-05, "loss": 0.41802978515625, "step": 713 }, { "epoch": 0.04826280924699202, "grad_norm": 2.062497138977051, "learning_rate": 2.9985341000954916e-05, "loss": 0.39208984375, "step": 714 }, { "epoch": 0.048330404217926184, "grad_norm": 3.5306129455566406, "learning_rate": 2.9985194801489188e-05, "loss": 0.18878173828125, "step": 715 }, { "epoch": 0.04839799918886035, "grad_norm": 1.5405710935592651, "learning_rate": 2.9985047876946543e-05, "loss": 0.239837646484375, "step": 716 }, { "epoch": 0.04846559415979451, "grad_norm": 3.3869118690490723, "learning_rate": 2.9984900227334093e-05, "loss": 0.2515869140625, "step": 717 }, { "epoch": 0.048533189130728675, "grad_norm": 2.493736982345581, "learning_rate": 2.998475185265898e-05, "loss": 0.246124267578125, "step": 718 }, { "epoch": 0.04860078410166284, "grad_norm": 2.795839309692383, "learning_rate": 2.998460275292838e-05, "loss": 0.1910400390625, "step": 719 }, { "epoch": 0.048668379072596996, "grad_norm": 6.247623920440674, "learning_rate": 2.9984452928149514e-05, "loss": 0.3372802734375, "step": 720 }, { "epoch": 0.04873597404353116, "grad_norm": 4.927814960479736, "learning_rate": 2.9984302378329623e-05, "loss": 0.35406494140625, "step": 721 }, { "epoch": 0.04880356901446532, "grad_norm": 2.7275893688201904, "learning_rate": 2.9984151103476004e-05, "loss": 0.341094970703125, "step": 722 }, { "epoch": 0.048871163985399486, "grad_norm": 4.855454444885254, "learning_rate": 2.9983999103595965e-05, "loss": 0.3004150390625, "step": 723 }, { "epoch": 0.04893875895633365, "grad_norm": 2.0081264972686768, "learning_rate": 2.9983846378696865e-05, "loss": 0.36639404296875, "step": 724 }, { "epoch": 0.049006353927267814, "grad_norm": 7.263977527618408, "learning_rate": 2.9983692928786094e-05, "loss": 0.3599853515625, "step": 725 }, { "epoch": 0.04907394889820197, "grad_norm": 6.577898979187012, "learning_rate": 2.998353875387108e-05, "loss": 0.2435150146484375, "step": 726 }, { "epoch": 0.049141543869136134, "grad_norm": 9.249200820922852, "learning_rate": 2.9983383853959273e-05, "loss": 0.36761474609375, "step": 727 }, { "epoch": 0.0492091388400703, "grad_norm": 2.0822739601135254, "learning_rate": 2.9983228229058178e-05, "loss": 0.39813232421875, "step": 728 }, { "epoch": 0.04927673381100446, "grad_norm": 3.4739866256713867, "learning_rate": 2.9983071879175322e-05, "loss": 0.26763916015625, "step": 729 }, { "epoch": 0.049344328781938625, "grad_norm": 1.339114785194397, "learning_rate": 2.9982914804318267e-05, "loss": 0.298858642578125, "step": 730 }, { "epoch": 0.04941192375287279, "grad_norm": 2.4553608894348145, "learning_rate": 2.9982757004494617e-05, "loss": 0.271636962890625, "step": 731 }, { "epoch": 0.04947951872380695, "grad_norm": 1.8756306171417236, "learning_rate": 2.998259847971201e-05, "loss": 0.1602935791015625, "step": 732 }, { "epoch": 0.04954711369474111, "grad_norm": 2.670952796936035, "learning_rate": 2.998243922997811e-05, "loss": 0.361083984375, "step": 733 }, { "epoch": 0.04961470866567527, "grad_norm": 4.416046142578125, "learning_rate": 2.9982279255300628e-05, "loss": 0.289581298828125, "step": 734 }, { "epoch": 0.049682303636609436, "grad_norm": 2.4436538219451904, "learning_rate": 2.9982118555687296e-05, "loss": 0.39739990234375, "step": 735 }, { "epoch": 0.0497498986075436, "grad_norm": 1.6335698366165161, "learning_rate": 2.9981957131145895e-05, "loss": 0.3624267578125, "step": 736 }, { "epoch": 0.04981749357847776, "grad_norm": 3.9389877319335938, "learning_rate": 2.9981794981684245e-05, "loss": 0.283416748046875, "step": 737 }, { "epoch": 0.04988508854941193, "grad_norm": 1.9418436288833618, "learning_rate": 2.9981632107310176e-05, "loss": 0.38818359375, "step": 738 }, { "epoch": 0.04995268352034608, "grad_norm": 3.4246134757995605, "learning_rate": 2.998146850803158e-05, "loss": 0.36962890625, "step": 739 }, { "epoch": 0.05002027849128025, "grad_norm": 1.769803762435913, "learning_rate": 2.9981304183856366e-05, "loss": 0.40087890625, "step": 740 }, { "epoch": 0.05008787346221441, "grad_norm": 5.70375394821167, "learning_rate": 2.9981139134792486e-05, "loss": 0.36456298828125, "step": 741 }, { "epoch": 0.050155468433148574, "grad_norm": 4.583150863647461, "learning_rate": 2.998097336084793e-05, "loss": 0.35455322265625, "step": 742 }, { "epoch": 0.05022306340408274, "grad_norm": 1.8378101587295532, "learning_rate": 2.998080686203072e-05, "loss": 0.258880615234375, "step": 743 }, { "epoch": 0.0502906583750169, "grad_norm": 1.5218851566314697, "learning_rate": 2.9980639638348904e-05, "loss": 0.24139404296875, "step": 744 }, { "epoch": 0.05035825334595106, "grad_norm": 1.3494409322738647, "learning_rate": 2.9980471689810584e-05, "loss": 0.303131103515625, "step": 745 }, { "epoch": 0.05042584831688522, "grad_norm": 1.7442303895950317, "learning_rate": 2.9980303016423877e-05, "loss": 0.37225341796875, "step": 746 }, { "epoch": 0.050493443287819385, "grad_norm": 3.6849381923675537, "learning_rate": 2.998013361819695e-05, "loss": 0.19366455078125, "step": 747 }, { "epoch": 0.05056103825875355, "grad_norm": 0.8809331059455872, "learning_rate": 2.9979963495137998e-05, "loss": 0.1688690185546875, "step": 748 }, { "epoch": 0.05062863322968771, "grad_norm": 4.009644031524658, "learning_rate": 2.9979792647255255e-05, "loss": 0.3333740234375, "step": 749 }, { "epoch": 0.050696228200621876, "grad_norm": 7.227642059326172, "learning_rate": 2.9979621074556983e-05, "loss": 0.3985595703125, "step": 750 }, { "epoch": 0.05076382317155603, "grad_norm": 4.393807411193848, "learning_rate": 2.9979448777051483e-05, "loss": 0.32513427734375, "step": 751 }, { "epoch": 0.050831418142490196, "grad_norm": 3.002303123474121, "learning_rate": 2.99792757547471e-05, "loss": 0.355987548828125, "step": 752 }, { "epoch": 0.05089901311342436, "grad_norm": 9.90242862701416, "learning_rate": 2.9979102007652202e-05, "loss": 0.373291015625, "step": 753 }, { "epoch": 0.050966608084358524, "grad_norm": 3.345227003097534, "learning_rate": 2.997892753577519e-05, "loss": 0.26043701171875, "step": 754 }, { "epoch": 0.05103420305529269, "grad_norm": 3.5864081382751465, "learning_rate": 2.9978752339124515e-05, "loss": 0.3472900390625, "step": 755 }, { "epoch": 0.05110179802622685, "grad_norm": 3.2125210762023926, "learning_rate": 2.997857641770865e-05, "loss": 0.302459716796875, "step": 756 }, { "epoch": 0.051169392997161015, "grad_norm": 3.4236900806427, "learning_rate": 2.9978399771536106e-05, "loss": 0.2095947265625, "step": 757 }, { "epoch": 0.05123698796809517, "grad_norm": 6.3299665451049805, "learning_rate": 2.9978222400615427e-05, "loss": 0.4158935546875, "step": 758 }, { "epoch": 0.051304582939029335, "grad_norm": 2.2383668422698975, "learning_rate": 2.9978044304955202e-05, "loss": 0.36785888671875, "step": 759 }, { "epoch": 0.0513721779099635, "grad_norm": 1.762969970703125, "learning_rate": 2.9977865484564046e-05, "loss": 0.3031005859375, "step": 760 }, { "epoch": 0.05143977288089766, "grad_norm": 3.560178279876709, "learning_rate": 2.9977685939450617e-05, "loss": 0.21905517578125, "step": 761 }, { "epoch": 0.051507367851831826, "grad_norm": 3.144047260284424, "learning_rate": 2.997750566962359e-05, "loss": 0.229095458984375, "step": 762 }, { "epoch": 0.05157496282276599, "grad_norm": 3.0306947231292725, "learning_rate": 2.9977324675091697e-05, "loss": 0.28704833984375, "step": 763 }, { "epoch": 0.051642557793700146, "grad_norm": 4.192714691162109, "learning_rate": 2.9977142955863697e-05, "loss": 0.25909423828125, "step": 764 }, { "epoch": 0.05171015276463431, "grad_norm": 1.3782496452331543, "learning_rate": 2.9976960511948375e-05, "loss": 0.1639404296875, "step": 765 }, { "epoch": 0.05177774773556847, "grad_norm": 1.6379649639129639, "learning_rate": 2.997677734335456e-05, "loss": 0.29864501953125, "step": 766 }, { "epoch": 0.05184534270650264, "grad_norm": 3.2249200344085693, "learning_rate": 2.9976593450091124e-05, "loss": 0.38323974609375, "step": 767 }, { "epoch": 0.0519129376774368, "grad_norm": 3.1866350173950195, "learning_rate": 2.997640883216696e-05, "loss": 0.343017578125, "step": 768 }, { "epoch": 0.051980532648370964, "grad_norm": 5.139822959899902, "learning_rate": 2.997622348959099e-05, "loss": 0.250762939453125, "step": 769 }, { "epoch": 0.05204812761930512, "grad_norm": 3.376508951187134, "learning_rate": 2.9976037422372197e-05, "loss": 0.3070068359375, "step": 770 }, { "epoch": 0.052115722590239284, "grad_norm": 2.190156936645508, "learning_rate": 2.9975850630519578e-05, "loss": 0.28472900390625, "step": 771 }, { "epoch": 0.05218331756117345, "grad_norm": 2.458467721939087, "learning_rate": 2.997566311404217e-05, "loss": 0.3394775390625, "step": 772 }, { "epoch": 0.05225091253210761, "grad_norm": 5.161783695220947, "learning_rate": 2.997547487294905e-05, "loss": 0.36285400390625, "step": 773 }, { "epoch": 0.052318507503041775, "grad_norm": 4.438571929931641, "learning_rate": 2.9975285907249326e-05, "loss": 0.315185546875, "step": 774 }, { "epoch": 0.05238610247397594, "grad_norm": 6.696635723114014, "learning_rate": 2.9975096216952137e-05, "loss": 0.353363037109375, "step": 775 }, { "epoch": 0.052453697444910095, "grad_norm": 2.7085466384887695, "learning_rate": 2.9974905802066662e-05, "loss": 0.143798828125, "step": 776 }, { "epoch": 0.05252129241584426, "grad_norm": 1.4384299516677856, "learning_rate": 2.997471466260212e-05, "loss": 0.270111083984375, "step": 777 }, { "epoch": 0.05258888738677842, "grad_norm": 3.842447280883789, "learning_rate": 2.9974522798567754e-05, "loss": 0.38623046875, "step": 778 }, { "epoch": 0.052656482357712586, "grad_norm": 5.641157150268555, "learning_rate": 2.9974330209972848e-05, "loss": 0.40106201171875, "step": 779 }, { "epoch": 0.05272407732864675, "grad_norm": 2.8420889377593994, "learning_rate": 2.9974136896826724e-05, "loss": 0.283294677734375, "step": 780 }, { "epoch": 0.05279167229958091, "grad_norm": 3.765244722366333, "learning_rate": 2.9973942859138735e-05, "loss": 0.267364501953125, "step": 781 }, { "epoch": 0.05285926727051508, "grad_norm": 2.9467592239379883, "learning_rate": 2.9973748096918264e-05, "loss": 0.36334228515625, "step": 782 }, { "epoch": 0.052926862241449234, "grad_norm": 1.991815447807312, "learning_rate": 2.9973552610174745e-05, "loss": 0.311492919921875, "step": 783 }, { "epoch": 0.0529944572123834, "grad_norm": 4.060635089874268, "learning_rate": 2.9973356398917627e-05, "loss": 0.26397705078125, "step": 784 }, { "epoch": 0.05306205218331756, "grad_norm": 5.189950466156006, "learning_rate": 2.997315946315641e-05, "loss": 0.32366943359375, "step": 785 }, { "epoch": 0.053129647154251725, "grad_norm": 4.974122524261475, "learning_rate": 2.997296180290062e-05, "loss": 0.32720947265625, "step": 786 }, { "epoch": 0.05319724212518589, "grad_norm": 5.548740386962891, "learning_rate": 2.997276341815982e-05, "loss": 0.433837890625, "step": 787 }, { "epoch": 0.05326483709612005, "grad_norm": 2.3499326705932617, "learning_rate": 2.997256430894362e-05, "loss": 0.34906005859375, "step": 788 }, { "epoch": 0.05333243206705421, "grad_norm": 3.247675657272339, "learning_rate": 2.9972364475261634e-05, "loss": 0.3560791015625, "step": 789 }, { "epoch": 0.05340002703798837, "grad_norm": 4.971584796905518, "learning_rate": 2.997216391712355e-05, "loss": 0.30877685546875, "step": 790 }, { "epoch": 0.053467622008922536, "grad_norm": 2.5570290088653564, "learning_rate": 2.9971962634539062e-05, "loss": 0.23095703125, "step": 791 }, { "epoch": 0.0535352169798567, "grad_norm": 5.327788352966309, "learning_rate": 2.9971760627517915e-05, "loss": 0.291839599609375, "step": 792 }, { "epoch": 0.05360281195079086, "grad_norm": 1.7586942911148071, "learning_rate": 2.997155789606988e-05, "loss": 0.1733551025390625, "step": 793 }, { "epoch": 0.053670406921725027, "grad_norm": 3.7866835594177246, "learning_rate": 2.9971354440204764e-05, "loss": 0.34246826171875, "step": 794 }, { "epoch": 0.05373800189265918, "grad_norm": 2.8031516075134277, "learning_rate": 2.9971150259932414e-05, "loss": 0.341217041015625, "step": 795 }, { "epoch": 0.05380559686359335, "grad_norm": 2.8548574447631836, "learning_rate": 2.997094535526271e-05, "loss": 0.303375244140625, "step": 796 }, { "epoch": 0.05387319183452751, "grad_norm": 4.095166206359863, "learning_rate": 2.997073972620557e-05, "loss": 0.29193115234375, "step": 797 }, { "epoch": 0.053940786805461674, "grad_norm": 4.790628910064697, "learning_rate": 2.9970533372770934e-05, "loss": 0.2523345947265625, "step": 798 }, { "epoch": 0.05400838177639584, "grad_norm": 3.026719570159912, "learning_rate": 2.99703262949688e-05, "loss": 0.38226318359375, "step": 799 }, { "epoch": 0.05407597674733, "grad_norm": 4.035066604614258, "learning_rate": 2.9970118492809175e-05, "loss": 0.2395477294921875, "step": 800 }, { "epoch": 0.05414357171826416, "grad_norm": 2.0494909286499023, "learning_rate": 2.996990996630212e-05, "loss": 0.39227294921875, "step": 801 }, { "epoch": 0.05421116668919832, "grad_norm": 7.262905120849609, "learning_rate": 2.9969700715457728e-05, "loss": 0.37451171875, "step": 802 }, { "epoch": 0.054278761660132485, "grad_norm": 12.468764305114746, "learning_rate": 2.9969490740286116e-05, "loss": 0.43096923828125, "step": 803 }, { "epoch": 0.05434635663106665, "grad_norm": 7.786237716674805, "learning_rate": 2.996928004079745e-05, "loss": 0.33245849609375, "step": 804 }, { "epoch": 0.05441395160200081, "grad_norm": 5.121335983276367, "learning_rate": 2.996906861700192e-05, "loss": 0.33551025390625, "step": 805 }, { "epoch": 0.054481546572934976, "grad_norm": 2.3729193210601807, "learning_rate": 2.996885646890976e-05, "loss": 0.28668212890625, "step": 806 }, { "epoch": 0.05454914154386914, "grad_norm": 5.328919410705566, "learning_rate": 2.9968643596531234e-05, "loss": 0.41259765625, "step": 807 }, { "epoch": 0.054616736514803296, "grad_norm": 5.391028881072998, "learning_rate": 2.996842999987664e-05, "loss": 0.2573089599609375, "step": 808 }, { "epoch": 0.05468433148573746, "grad_norm": 7.539277076721191, "learning_rate": 2.9968215678956315e-05, "loss": 0.2900238037109375, "step": 809 }, { "epoch": 0.054751926456671623, "grad_norm": 2.623403787612915, "learning_rate": 2.996800063378063e-05, "loss": 0.30828857421875, "step": 810 }, { "epoch": 0.05481952142760579, "grad_norm": 3.3035402297973633, "learning_rate": 2.996778486435999e-05, "loss": 0.226165771484375, "step": 811 }, { "epoch": 0.05488711639853995, "grad_norm": 1.428754210472107, "learning_rate": 2.9967568370704837e-05, "loss": 0.254730224609375, "step": 812 }, { "epoch": 0.054954711369474114, "grad_norm": 6.188496112823486, "learning_rate": 2.9967351152825638e-05, "loss": 0.3953857421875, "step": 813 }, { "epoch": 0.05502230634040827, "grad_norm": 2.3987700939178467, "learning_rate": 2.9967133210732917e-05, "loss": 0.329742431640625, "step": 814 }, { "epoch": 0.055089901311342435, "grad_norm": 2.4076578617095947, "learning_rate": 2.9966914544437204e-05, "loss": 0.2652587890625, "step": 815 }, { "epoch": 0.0551574962822766, "grad_norm": 4.8656110763549805, "learning_rate": 2.9966695153949087e-05, "loss": 0.34466552734375, "step": 816 }, { "epoch": 0.05522509125321076, "grad_norm": 5.187920570373535, "learning_rate": 2.9966475039279188e-05, "loss": 0.2838134765625, "step": 817 }, { "epoch": 0.055292686224144925, "grad_norm": 1.98343026638031, "learning_rate": 2.996625420043815e-05, "loss": 0.199188232421875, "step": 818 }, { "epoch": 0.05536028119507909, "grad_norm": 2.4995334148406982, "learning_rate": 2.996603263743666e-05, "loss": 0.28033447265625, "step": 819 }, { "epoch": 0.055427876166013246, "grad_norm": 1.6128146648406982, "learning_rate": 2.996581035028543e-05, "loss": 0.2250213623046875, "step": 820 }, { "epoch": 0.05549547113694741, "grad_norm": 5.757226943969727, "learning_rate": 2.996558733899523e-05, "loss": 0.280120849609375, "step": 821 }, { "epoch": 0.05556306610788157, "grad_norm": 4.782168388366699, "learning_rate": 2.9965363603576845e-05, "loss": 0.2791748046875, "step": 822 }, { "epoch": 0.05563066107881574, "grad_norm": 7.713655471801758, "learning_rate": 2.99651391440411e-05, "loss": 0.353302001953125, "step": 823 }, { "epoch": 0.0556982560497499, "grad_norm": 9.95822811126709, "learning_rate": 2.9964913960398855e-05, "loss": 0.374969482421875, "step": 824 }, { "epoch": 0.055765851020684064, "grad_norm": 1.426123023033142, "learning_rate": 2.996468805266101e-05, "loss": 0.232513427734375, "step": 825 }, { "epoch": 0.05583344599161822, "grad_norm": 5.797057151794434, "learning_rate": 2.996446142083849e-05, "loss": 0.32135009765625, "step": 826 }, { "epoch": 0.055901040962552384, "grad_norm": 1.3943299055099487, "learning_rate": 2.996423406494226e-05, "loss": 0.17303466796875, "step": 827 }, { "epoch": 0.05596863593348655, "grad_norm": 3.153569459915161, "learning_rate": 2.996400598498333e-05, "loss": 0.2965087890625, "step": 828 }, { "epoch": 0.05603623090442071, "grad_norm": 1.6870073080062866, "learning_rate": 2.996377718097273e-05, "loss": 0.249420166015625, "step": 829 }, { "epoch": 0.056103825875354875, "grad_norm": 4.544062614440918, "learning_rate": 2.9963547652921528e-05, "loss": 0.24139404296875, "step": 830 }, { "epoch": 0.05617142084628904, "grad_norm": 3.0175693035125732, "learning_rate": 2.9963317400840834e-05, "loss": 0.314361572265625, "step": 831 }, { "epoch": 0.056239015817223195, "grad_norm": 1.6352065801620483, "learning_rate": 2.9963086424741788e-05, "loss": 0.36016845703125, "step": 832 }, { "epoch": 0.05630661078815736, "grad_norm": 3.4052696228027344, "learning_rate": 2.9962854724635566e-05, "loss": 0.322113037109375, "step": 833 }, { "epoch": 0.05637420575909152, "grad_norm": 3.8233277797698975, "learning_rate": 2.9962622300533382e-05, "loss": 0.2173614501953125, "step": 834 }, { "epoch": 0.056441800730025686, "grad_norm": 4.850222587585449, "learning_rate": 2.996238915244648e-05, "loss": 0.2659912109375, "step": 835 }, { "epoch": 0.05650939570095985, "grad_norm": 3.802846908569336, "learning_rate": 2.9962155280386133e-05, "loss": 0.259368896484375, "step": 836 }, { "epoch": 0.05657699067189401, "grad_norm": 3.1887271404266357, "learning_rate": 2.996192068436367e-05, "loss": 0.31378173828125, "step": 837 }, { "epoch": 0.05664458564282818, "grad_norm": 3.3354976177215576, "learning_rate": 2.9961685364390444e-05, "loss": 0.4130859375, "step": 838 }, { "epoch": 0.056712180613762334, "grad_norm": 3.1272690296173096, "learning_rate": 2.9961449320477828e-05, "loss": 0.280487060546875, "step": 839 }, { "epoch": 0.0567797755846965, "grad_norm": 2.7350034713745117, "learning_rate": 2.996121255263725e-05, "loss": 0.272430419921875, "step": 840 }, { "epoch": 0.05684737055563066, "grad_norm": 2.606489896774292, "learning_rate": 2.9960975060880163e-05, "loss": 0.377685546875, "step": 841 }, { "epoch": 0.056914965526564824, "grad_norm": 1.8772708177566528, "learning_rate": 2.9960736845218065e-05, "loss": 0.295440673828125, "step": 842 }, { "epoch": 0.05698256049749899, "grad_norm": 1.6580592393875122, "learning_rate": 2.9960497905662478e-05, "loss": 0.36126708984375, "step": 843 }, { "epoch": 0.05705015546843315, "grad_norm": 1.6257785558700562, "learning_rate": 2.9960258242224962e-05, "loss": 0.30645751953125, "step": 844 }, { "epoch": 0.05711775043936731, "grad_norm": 1.3077576160430908, "learning_rate": 2.9960017854917117e-05, "loss": 0.1852874755859375, "step": 845 }, { "epoch": 0.05718534541030147, "grad_norm": 2.2852628231048584, "learning_rate": 2.9959776743750573e-05, "loss": 0.3416748046875, "step": 846 }, { "epoch": 0.057252940381235636, "grad_norm": 1.7011138200759888, "learning_rate": 2.9959534908736997e-05, "loss": 0.245269775390625, "step": 847 }, { "epoch": 0.0573205353521698, "grad_norm": 1.6295061111450195, "learning_rate": 2.995929234988809e-05, "loss": 0.319580078125, "step": 848 }, { "epoch": 0.05738813032310396, "grad_norm": 2.2991864681243896, "learning_rate": 2.9959049067215584e-05, "loss": 0.396240234375, "step": 849 }, { "epoch": 0.057455725294038126, "grad_norm": 4.797669410705566, "learning_rate": 2.9958805060731258e-05, "loss": 0.29254150390625, "step": 850 }, { "epoch": 0.05752332026497228, "grad_norm": 2.288398265838623, "learning_rate": 2.9958560330446918e-05, "loss": 0.31011962890625, "step": 851 }, { "epoch": 0.05759091523590645, "grad_norm": 5.4767560958862305, "learning_rate": 2.9958314876374398e-05, "loss": 0.315765380859375, "step": 852 }, { "epoch": 0.05765851020684061, "grad_norm": 2.974195718765259, "learning_rate": 2.9958068698525584e-05, "loss": 0.2952880859375, "step": 853 }, { "epoch": 0.057726105177774774, "grad_norm": 5.057097434997559, "learning_rate": 2.995782179691238e-05, "loss": 0.31512451171875, "step": 854 }, { "epoch": 0.05779370014870894, "grad_norm": 6.807876110076904, "learning_rate": 2.9957574171546738e-05, "loss": 0.39044189453125, "step": 855 }, { "epoch": 0.0578612951196431, "grad_norm": 1.9689792394638062, "learning_rate": 2.9957325822440633e-05, "loss": 0.254547119140625, "step": 856 }, { "epoch": 0.05792889009057726, "grad_norm": 1.115057349205017, "learning_rate": 2.995707674960609e-05, "loss": 0.17144775390625, "step": 857 }, { "epoch": 0.05799648506151142, "grad_norm": 3.3478596210479736, "learning_rate": 2.995682695305516e-05, "loss": 0.3272705078125, "step": 858 }, { "epoch": 0.058064080032445585, "grad_norm": 2.6253552436828613, "learning_rate": 2.995657643279992e-05, "loss": 0.1768035888671875, "step": 859 }, { "epoch": 0.05813167500337975, "grad_norm": 1.4529532194137573, "learning_rate": 2.99563251888525e-05, "loss": 0.267730712890625, "step": 860 }, { "epoch": 0.05819926997431391, "grad_norm": 1.2592791318893433, "learning_rate": 2.9956073221225055e-05, "loss": 0.1373748779296875, "step": 861 }, { "epoch": 0.058266864945248076, "grad_norm": 4.839780330657959, "learning_rate": 2.995582052992978e-05, "loss": 0.3729248046875, "step": 862 }, { "epoch": 0.05833445991618224, "grad_norm": 4.488484859466553, "learning_rate": 2.9955567114978893e-05, "loss": 0.37255859375, "step": 863 }, { "epoch": 0.058402054887116396, "grad_norm": 3.8040850162506104, "learning_rate": 2.9955312976384664e-05, "loss": 0.332550048828125, "step": 864 }, { "epoch": 0.05846964985805056, "grad_norm": 2.5485610961914062, "learning_rate": 2.995505811415939e-05, "loss": 0.36572265625, "step": 865 }, { "epoch": 0.05853724482898472, "grad_norm": 1.564318060874939, "learning_rate": 2.9954802528315393e-05, "loss": 0.17453765869140625, "step": 866 }, { "epoch": 0.05860483979991889, "grad_norm": 2.2771975994110107, "learning_rate": 2.9954546218865052e-05, "loss": 0.32171630859375, "step": 867 }, { "epoch": 0.05867243477085305, "grad_norm": 7.0426812171936035, "learning_rate": 2.9954289185820762e-05, "loss": 0.38397216796875, "step": 868 }, { "epoch": 0.058740029741787214, "grad_norm": 4.5034356117248535, "learning_rate": 2.9954031429194965e-05, "loss": 0.260040283203125, "step": 869 }, { "epoch": 0.05880762471272137, "grad_norm": 3.503807544708252, "learning_rate": 2.9953772949000123e-05, "loss": 0.3931884765625, "step": 870 }, { "epoch": 0.058875219683655534, "grad_norm": 2.3488516807556152, "learning_rate": 2.9953513745248755e-05, "loss": 0.36541748046875, "step": 871 }, { "epoch": 0.0589428146545897, "grad_norm": 1.8354783058166504, "learning_rate": 2.9953253817953393e-05, "loss": 0.26662445068359375, "step": 872 }, { "epoch": 0.05901040962552386, "grad_norm": 1.9425480365753174, "learning_rate": 2.995299316712662e-05, "loss": 0.37921142578125, "step": 873 }, { "epoch": 0.059078004596458025, "grad_norm": 1.5669938325881958, "learning_rate": 2.9952731792781046e-05, "loss": 0.0907745361328125, "step": 874 }, { "epoch": 0.05914559956739219, "grad_norm": 2.7378456592559814, "learning_rate": 2.9952469694929317e-05, "loss": 0.1733551025390625, "step": 875 }, { "epoch": 0.059213194538326346, "grad_norm": 2.9947075843811035, "learning_rate": 2.9952206873584117e-05, "loss": 0.341064453125, "step": 876 }, { "epoch": 0.05928078950926051, "grad_norm": 2.111846685409546, "learning_rate": 2.995194332875816e-05, "loss": 0.3856201171875, "step": 877 }, { "epoch": 0.05934838448019467, "grad_norm": 2.067922830581665, "learning_rate": 2.9951679060464203e-05, "loss": 0.251556396484375, "step": 878 }, { "epoch": 0.059415979451128836, "grad_norm": 1.791167140007019, "learning_rate": 2.9951414068715027e-05, "loss": 0.304229736328125, "step": 879 }, { "epoch": 0.059483574422063, "grad_norm": 8.286972999572754, "learning_rate": 2.9951148353523454e-05, "loss": 0.3227081298828125, "step": 880 }, { "epoch": 0.059551169392997164, "grad_norm": 2.942737102508545, "learning_rate": 2.9950881914902346e-05, "loss": 0.26824951171875, "step": 881 }, { "epoch": 0.05961876436393132, "grad_norm": 3.149707078933716, "learning_rate": 2.9950614752864594e-05, "loss": 0.3057861328125, "step": 882 }, { "epoch": 0.059686359334865484, "grad_norm": 2.6078782081604004, "learning_rate": 2.9950346867423124e-05, "loss": 0.142120361328125, "step": 883 }, { "epoch": 0.05975395430579965, "grad_norm": 3.249537706375122, "learning_rate": 2.9950078258590895e-05, "loss": 0.27886962890625, "step": 884 }, { "epoch": 0.05982154927673381, "grad_norm": 3.181380271911621, "learning_rate": 2.9949808926380908e-05, "loss": 0.293243408203125, "step": 885 }, { "epoch": 0.059889144247667975, "grad_norm": 2.719086170196533, "learning_rate": 2.994953887080619e-05, "loss": 0.246063232421875, "step": 886 }, { "epoch": 0.05995673921860214, "grad_norm": 2.3149704933166504, "learning_rate": 2.994926809187981e-05, "loss": 0.3394775390625, "step": 887 }, { "epoch": 0.0600243341895363, "grad_norm": 2.890497922897339, "learning_rate": 2.9948996589614874e-05, "loss": 0.37158203125, "step": 888 }, { "epoch": 0.06009192916047046, "grad_norm": 5.057929992675781, "learning_rate": 2.9948724364024517e-05, "loss": 0.31170654296875, "step": 889 }, { "epoch": 0.06015952413140462, "grad_norm": 2.9149937629699707, "learning_rate": 2.994845141512191e-05, "loss": 0.39642333984375, "step": 890 }, { "epoch": 0.060227119102338786, "grad_norm": 4.556676387786865, "learning_rate": 2.994817774292026e-05, "loss": 0.3074951171875, "step": 891 }, { "epoch": 0.06029471407327295, "grad_norm": 2.030900239944458, "learning_rate": 2.994790334743281e-05, "loss": 0.26263427734375, "step": 892 }, { "epoch": 0.06036230904420711, "grad_norm": 1.9723351001739502, "learning_rate": 2.9947628228672832e-05, "loss": 0.3106689453125, "step": 893 }, { "epoch": 0.06042990401514128, "grad_norm": 2.3255183696746826, "learning_rate": 2.9947352386653646e-05, "loss": 0.362579345703125, "step": 894 }, { "epoch": 0.06049749898607543, "grad_norm": 3.477151870727539, "learning_rate": 2.9947075821388593e-05, "loss": 0.31304931640625, "step": 895 }, { "epoch": 0.0605650939570096, "grad_norm": 3.6387672424316406, "learning_rate": 2.9946798532891057e-05, "loss": 0.38250732421875, "step": 896 }, { "epoch": 0.06063268892794376, "grad_norm": 4.063328266143799, "learning_rate": 2.9946520521174456e-05, "loss": 0.31658935546875, "step": 897 }, { "epoch": 0.060700283898877924, "grad_norm": 6.078335762023926, "learning_rate": 2.9946241786252238e-05, "loss": 0.332000732421875, "step": 898 }, { "epoch": 0.06076787886981209, "grad_norm": 2.726874589920044, "learning_rate": 2.9945962328137898e-05, "loss": 0.33062744140625, "step": 899 }, { "epoch": 0.06083547384074625, "grad_norm": 1.8024686574935913, "learning_rate": 2.994568214684495e-05, "loss": 0.28802490234375, "step": 900 }, { "epoch": 0.06090306881168041, "grad_norm": 1.9122287034988403, "learning_rate": 2.9945401242386954e-05, "loss": 0.23046875, "step": 901 }, { "epoch": 0.06097066378261457, "grad_norm": 2.624959945678711, "learning_rate": 2.99451196147775e-05, "loss": 0.4217529296875, "step": 902 }, { "epoch": 0.061038258753548735, "grad_norm": 1.2107248306274414, "learning_rate": 2.994483726403022e-05, "loss": 0.19207763671875, "step": 903 }, { "epoch": 0.0611058537244829, "grad_norm": 5.547976970672607, "learning_rate": 2.9944554190158763e-05, "loss": 0.34613037109375, "step": 904 }, { "epoch": 0.06117344869541706, "grad_norm": 4.608335494995117, "learning_rate": 2.9944270393176843e-05, "loss": 0.3980712890625, "step": 905 }, { "epoch": 0.061241043666351226, "grad_norm": 1.863264799118042, "learning_rate": 2.9943985873098183e-05, "loss": 0.293701171875, "step": 906 }, { "epoch": 0.06130863863728538, "grad_norm": 3.799455165863037, "learning_rate": 2.994370062993655e-05, "loss": 0.253662109375, "step": 907 }, { "epoch": 0.061376233608219546, "grad_norm": 1.280482292175293, "learning_rate": 2.9943414663705744e-05, "loss": 0.2471923828125, "step": 908 }, { "epoch": 0.06144382857915371, "grad_norm": 3.260305643081665, "learning_rate": 2.994312797441961e-05, "loss": 0.303802490234375, "step": 909 }, { "epoch": 0.061511423550087874, "grad_norm": 2.9149258136749268, "learning_rate": 2.9942840562092013e-05, "loss": 0.221221923828125, "step": 910 }, { "epoch": 0.06157901852102204, "grad_norm": 1.2820130586624146, "learning_rate": 2.9942552426736855e-05, "loss": 0.222015380859375, "step": 911 }, { "epoch": 0.0616466134919562, "grad_norm": 2.234856605529785, "learning_rate": 2.994226356836809e-05, "loss": 0.2340850830078125, "step": 912 }, { "epoch": 0.06171420846289036, "grad_norm": 2.295471668243408, "learning_rate": 2.9941973986999685e-05, "loss": 0.4373779296875, "step": 913 }, { "epoch": 0.06178180343382452, "grad_norm": 1.5515000820159912, "learning_rate": 2.9941683682645657e-05, "loss": 0.27716064453125, "step": 914 }, { "epoch": 0.061849398404758685, "grad_norm": 3.777188539505005, "learning_rate": 2.9941392655320053e-05, "loss": 0.3128662109375, "step": 915 }, { "epoch": 0.06191699337569285, "grad_norm": 3.551015615463257, "learning_rate": 2.9941100905036954e-05, "loss": 0.34979248046875, "step": 916 }, { "epoch": 0.06198458834662701, "grad_norm": 3.4485108852386475, "learning_rate": 2.994080843181047e-05, "loss": 0.35565185546875, "step": 917 }, { "epoch": 0.062052183317561176, "grad_norm": 2.127260684967041, "learning_rate": 2.994051523565476e-05, "loss": 0.26220703125, "step": 918 }, { "epoch": 0.06211977828849534, "grad_norm": 4.317677021026611, "learning_rate": 2.9940221316584015e-05, "loss": 0.328948974609375, "step": 919 }, { "epoch": 0.062187373259429496, "grad_norm": 4.280078887939453, "learning_rate": 2.9939926674612437e-05, "loss": 0.3084716796875, "step": 920 }, { "epoch": 0.06225496823036366, "grad_norm": 2.1302154064178467, "learning_rate": 2.9939631309754306e-05, "loss": 0.251373291015625, "step": 921 }, { "epoch": 0.06232256320129782, "grad_norm": 2.8730247020721436, "learning_rate": 2.9939335222023902e-05, "loss": 0.30096435546875, "step": 922 }, { "epoch": 0.06239015817223199, "grad_norm": 3.415881395339966, "learning_rate": 2.9939038411435554e-05, "loss": 0.294891357421875, "step": 923 }, { "epoch": 0.06245775314316615, "grad_norm": 1.558887004852295, "learning_rate": 2.993874087800362e-05, "loss": 0.21783447265625, "step": 924 }, { "epoch": 0.06252534811410031, "grad_norm": 2.578927755355835, "learning_rate": 2.9938442621742505e-05, "loss": 0.3634033203125, "step": 925 }, { "epoch": 0.06259294308503448, "grad_norm": 1.0661625862121582, "learning_rate": 2.993814364266663e-05, "loss": 0.1586456298828125, "step": 926 }, { "epoch": 0.06266053805596863, "grad_norm": 2.411041736602783, "learning_rate": 2.9937843940790466e-05, "loss": 0.3658447265625, "step": 927 }, { "epoch": 0.0627281330269028, "grad_norm": 3.7546865940093994, "learning_rate": 2.993754351612852e-05, "loss": 0.43035888671875, "step": 928 }, { "epoch": 0.06279572799783696, "grad_norm": 2.1344566345214844, "learning_rate": 2.9937242368695316e-05, "loss": 0.27264404296875, "step": 929 }, { "epoch": 0.06286332296877112, "grad_norm": 2.3802075386047363, "learning_rate": 2.9936940498505436e-05, "loss": 0.28668212890625, "step": 930 }, { "epoch": 0.06293091793970529, "grad_norm": 1.743034839630127, "learning_rate": 2.9936637905573483e-05, "loss": 0.3106842041015625, "step": 931 }, { "epoch": 0.06299851291063945, "grad_norm": 1.9433679580688477, "learning_rate": 2.9936334589914097e-05, "loss": 0.23468017578125, "step": 932 }, { "epoch": 0.06306610788157362, "grad_norm": 7.249530792236328, "learning_rate": 2.9936030551541958e-05, "loss": 0.293914794921875, "step": 933 }, { "epoch": 0.06313370285250777, "grad_norm": 4.302403926849365, "learning_rate": 2.993572579047177e-05, "loss": 0.240936279296875, "step": 934 }, { "epoch": 0.06320129782344193, "grad_norm": 7.399057865142822, "learning_rate": 2.9935420306718287e-05, "loss": 0.2833251953125, "step": 935 }, { "epoch": 0.0632688927943761, "grad_norm": 5.609090805053711, "learning_rate": 2.9935114100296286e-05, "loss": 0.4122314453125, "step": 936 }, { "epoch": 0.06333648776531026, "grad_norm": 2.542248010635376, "learning_rate": 2.9934807171220584e-05, "loss": 0.2447357177734375, "step": 937 }, { "epoch": 0.06340408273624443, "grad_norm": 2.0573158264160156, "learning_rate": 2.9934499519506035e-05, "loss": 0.252899169921875, "step": 938 }, { "epoch": 0.06347167770717858, "grad_norm": 2.7631328105926514, "learning_rate": 2.9934191145167523e-05, "loss": 0.1488800048828125, "step": 939 }, { "epoch": 0.06353927267811275, "grad_norm": 4.228944778442383, "learning_rate": 2.9933882048219965e-05, "loss": 0.260772705078125, "step": 940 }, { "epoch": 0.06360686764904691, "grad_norm": 4.314843654632568, "learning_rate": 2.9933572228678324e-05, "loss": 0.282562255859375, "step": 941 }, { "epoch": 0.06367446261998107, "grad_norm": 12.160064697265625, "learning_rate": 2.9933261686557585e-05, "loss": 0.44195556640625, "step": 942 }, { "epoch": 0.06374205759091524, "grad_norm": 4.205821514129639, "learning_rate": 2.993295042187278e-05, "loss": 0.28680419921875, "step": 943 }, { "epoch": 0.0638096525618494, "grad_norm": 1.7683963775634766, "learning_rate": 2.9932638434638964e-05, "loss": 0.2503204345703125, "step": 944 }, { "epoch": 0.06387724753278357, "grad_norm": 2.2436740398406982, "learning_rate": 2.9932325724871236e-05, "loss": 0.3355712890625, "step": 945 }, { "epoch": 0.06394484250371772, "grad_norm": 2.3213655948638916, "learning_rate": 2.9932012292584726e-05, "loss": 0.40673828125, "step": 946 }, { "epoch": 0.06401243747465189, "grad_norm": 7.6259446144104, "learning_rate": 2.9931698137794603e-05, "loss": 0.38836669921875, "step": 947 }, { "epoch": 0.06408003244558605, "grad_norm": 7.237544059753418, "learning_rate": 2.993138326051606e-05, "loss": 0.36932373046875, "step": 948 }, { "epoch": 0.0641476274165202, "grad_norm": 4.069585800170898, "learning_rate": 2.993106766076434e-05, "loss": 0.33380126953125, "step": 949 }, { "epoch": 0.06421522238745438, "grad_norm": 1.5175923109054565, "learning_rate": 2.993075133855471e-05, "loss": 0.1320037841796875, "step": 950 }, { "epoch": 0.06428281735838853, "grad_norm": 2.6431100368499756, "learning_rate": 2.993043429390248e-05, "loss": 0.35101318359375, "step": 951 }, { "epoch": 0.0643504123293227, "grad_norm": 2.7325398921966553, "learning_rate": 2.9930116526822987e-05, "loss": 0.35076904296875, "step": 952 }, { "epoch": 0.06441800730025686, "grad_norm": 4.458263397216797, "learning_rate": 2.9929798037331602e-05, "loss": 0.3323974609375, "step": 953 }, { "epoch": 0.06448560227119102, "grad_norm": 7.5874738693237305, "learning_rate": 2.9929478825443743e-05, "loss": 0.41070556640625, "step": 954 }, { "epoch": 0.06455319724212519, "grad_norm": 2.6418001651763916, "learning_rate": 2.9929158891174856e-05, "loss": 0.32989501953125, "step": 955 }, { "epoch": 0.06462079221305934, "grad_norm": 1.7865201234817505, "learning_rate": 2.9928838234540416e-05, "loss": 0.34210205078125, "step": 956 }, { "epoch": 0.06468838718399351, "grad_norm": 5.803066253662109, "learning_rate": 2.9928516855555942e-05, "loss": 0.4161376953125, "step": 957 }, { "epoch": 0.06475598215492767, "grad_norm": 2.52872371673584, "learning_rate": 2.992819475423698e-05, "loss": 0.2250823974609375, "step": 958 }, { "epoch": 0.06482357712586184, "grad_norm": 2.169461488723755, "learning_rate": 2.9927871930599123e-05, "loss": 0.31817626953125, "step": 959 }, { "epoch": 0.064891172096796, "grad_norm": 2.1494691371917725, "learning_rate": 2.9927548384657986e-05, "loss": 0.20597076416015625, "step": 960 }, { "epoch": 0.06495876706773016, "grad_norm": 3.500654935836792, "learning_rate": 2.992722411642922e-05, "loss": 0.2696533203125, "step": 961 }, { "epoch": 0.06502636203866433, "grad_norm": 1.9903008937835693, "learning_rate": 2.9926899125928524e-05, "loss": 0.34954833984375, "step": 962 }, { "epoch": 0.06509395700959848, "grad_norm": 2.913426637649536, "learning_rate": 2.9926573413171618e-05, "loss": 0.36083984375, "step": 963 }, { "epoch": 0.06516155198053265, "grad_norm": 2.46018123626709, "learning_rate": 2.992624697817426e-05, "loss": 0.35345458984375, "step": 964 }, { "epoch": 0.06522914695146681, "grad_norm": 4.719215393066406, "learning_rate": 2.992591982095225e-05, "loss": 0.34759521484375, "step": 965 }, { "epoch": 0.06529674192240097, "grad_norm": 3.4444632530212402, "learning_rate": 2.992559194152142e-05, "loss": 0.3489990234375, "step": 966 }, { "epoch": 0.06536433689333514, "grad_norm": 2.1192281246185303, "learning_rate": 2.9925263339897623e-05, "loss": 0.1793365478515625, "step": 967 }, { "epoch": 0.0654319318642693, "grad_norm": 1.8266404867172241, "learning_rate": 2.9924934016096775e-05, "loss": 0.159423828125, "step": 968 }, { "epoch": 0.06549952683520346, "grad_norm": 1.6912132501602173, "learning_rate": 2.9924603970134793e-05, "loss": 0.24908447265625, "step": 969 }, { "epoch": 0.06556712180613762, "grad_norm": 4.40051794052124, "learning_rate": 2.9924273202027665e-05, "loss": 0.24359130859375, "step": 970 }, { "epoch": 0.06563471677707179, "grad_norm": 1.0698490142822266, "learning_rate": 2.9923941711791376e-05, "loss": 0.229278564453125, "step": 971 }, { "epoch": 0.06570231174800595, "grad_norm": 6.669766426086426, "learning_rate": 2.9923609499441983e-05, "loss": 0.34429931640625, "step": 972 }, { "epoch": 0.0657699067189401, "grad_norm": 3.3933262825012207, "learning_rate": 2.9923276564995553e-05, "loss": 0.37310791015625, "step": 973 }, { "epoch": 0.06583750168987428, "grad_norm": 2.252594232559204, "learning_rate": 2.9922942908468195e-05, "loss": 0.19866943359375, "step": 974 }, { "epoch": 0.06590509666080843, "grad_norm": 1.8300126791000366, "learning_rate": 2.9922608529876052e-05, "loss": 0.241851806640625, "step": 975 }, { "epoch": 0.0659726916317426, "grad_norm": 2.8313872814178467, "learning_rate": 2.992227342923531e-05, "loss": 0.2354736328125, "step": 976 }, { "epoch": 0.06604028660267676, "grad_norm": 3.4829695224761963, "learning_rate": 2.9921937606562177e-05, "loss": 0.241790771484375, "step": 977 }, { "epoch": 0.06610788157361093, "grad_norm": 4.388527870178223, "learning_rate": 2.99216010618729e-05, "loss": 0.312255859375, "step": 978 }, { "epoch": 0.06617547654454509, "grad_norm": 2.094036102294922, "learning_rate": 2.9921263795183773e-05, "loss": 0.2449493408203125, "step": 979 }, { "epoch": 0.06624307151547924, "grad_norm": 1.8872393369674683, "learning_rate": 2.992092580651111e-05, "loss": 0.318878173828125, "step": 980 }, { "epoch": 0.06631066648641341, "grad_norm": 2.617851972579956, "learning_rate": 2.9920587095871262e-05, "loss": 0.2256927490234375, "step": 981 }, { "epoch": 0.06637826145734757, "grad_norm": 2.1570188999176025, "learning_rate": 2.9920247663280615e-05, "loss": 0.31201171875, "step": 982 }, { "epoch": 0.06644585642828174, "grad_norm": 1.6266995668411255, "learning_rate": 2.9919907508755605e-05, "loss": 0.199615478515625, "step": 983 }, { "epoch": 0.0665134513992159, "grad_norm": 2.233506202697754, "learning_rate": 2.991956663231268e-05, "loss": 0.328369140625, "step": 984 }, { "epoch": 0.06658104637015005, "grad_norm": 4.697340965270996, "learning_rate": 2.9919225033968344e-05, "loss": 0.384063720703125, "step": 985 }, { "epoch": 0.06664864134108422, "grad_norm": 3.1352462768554688, "learning_rate": 2.9918882713739113e-05, "loss": 0.319793701171875, "step": 986 }, { "epoch": 0.06671623631201838, "grad_norm": 4.53118896484375, "learning_rate": 2.9918539671641553e-05, "loss": 0.3638916015625, "step": 987 }, { "epoch": 0.06678383128295255, "grad_norm": 5.398708343505859, "learning_rate": 2.991819590769227e-05, "loss": 0.3759765625, "step": 988 }, { "epoch": 0.06685142625388671, "grad_norm": 3.3948183059692383, "learning_rate": 2.99178514219079e-05, "loss": 0.262054443359375, "step": 989 }, { "epoch": 0.06691902122482088, "grad_norm": 3.0235471725463867, "learning_rate": 2.9917506214305098e-05, "loss": 0.27734375, "step": 990 }, { "epoch": 0.06698661619575504, "grad_norm": 1.8846871852874756, "learning_rate": 2.9917160284900575e-05, "loss": 0.315460205078125, "step": 991 }, { "epoch": 0.06705421116668919, "grad_norm": 5.768362522125244, "learning_rate": 2.991681363371107e-05, "loss": 0.1666259765625, "step": 992 }, { "epoch": 0.06712180613762336, "grad_norm": 2.4053125381469727, "learning_rate": 2.9916466260753358e-05, "loss": 0.2337188720703125, "step": 993 }, { "epoch": 0.06718940110855752, "grad_norm": 0.700771152973175, "learning_rate": 2.991611816604424e-05, "loss": 0.119140625, "step": 994 }, { "epoch": 0.06725699607949169, "grad_norm": 4.065103530883789, "learning_rate": 2.9915769349600565e-05, "loss": 0.2557373046875, "step": 995 }, { "epoch": 0.06732459105042585, "grad_norm": 6.0973124504089355, "learning_rate": 2.9915419811439207e-05, "loss": 0.3353271484375, "step": 996 }, { "epoch": 0.06739218602136002, "grad_norm": 0.9965757727622986, "learning_rate": 2.991506955157708e-05, "loss": 0.17120361328125, "step": 997 }, { "epoch": 0.06745978099229417, "grad_norm": 2.5820212364196777, "learning_rate": 2.991471857003113e-05, "loss": 0.18888092041015625, "step": 998 }, { "epoch": 0.06752737596322833, "grad_norm": 1.5595487356185913, "learning_rate": 2.991436686681835e-05, "loss": 0.244842529296875, "step": 999 }, { "epoch": 0.0675949709341625, "grad_norm": 2.3681833744049072, "learning_rate": 2.991401444195574e-05, "loss": 0.31610107421875, "step": 1000 }, { "epoch": 0.06766256590509666, "grad_norm": 5.5928168296813965, "learning_rate": 2.9913661295460367e-05, "loss": 0.36175537109375, "step": 1001 }, { "epoch": 0.06773016087603083, "grad_norm": 2.8246119022369385, "learning_rate": 2.9913307427349314e-05, "loss": 0.36407470703125, "step": 1002 }, { "epoch": 0.06779775584696499, "grad_norm": 7.157655715942383, "learning_rate": 2.99129528376397e-05, "loss": 0.23211669921875, "step": 1003 }, { "epoch": 0.06786535081789914, "grad_norm": 1.4569354057312012, "learning_rate": 2.9912597526348686e-05, "loss": 0.239288330078125, "step": 1004 }, { "epoch": 0.06793294578883331, "grad_norm": 3.5440545082092285, "learning_rate": 2.9912241493493467e-05, "loss": 0.3516845703125, "step": 1005 }, { "epoch": 0.06800054075976747, "grad_norm": 6.389584064483643, "learning_rate": 2.991188473909126e-05, "loss": 0.3079833984375, "step": 1006 }, { "epoch": 0.06806813573070164, "grad_norm": 1.8989577293395996, "learning_rate": 2.9911527263159337e-05, "loss": 0.300018310546875, "step": 1007 }, { "epoch": 0.0681357307016358, "grad_norm": 3.0918173789978027, "learning_rate": 2.9911169065714992e-05, "loss": 0.3714599609375, "step": 1008 }, { "epoch": 0.06820332567256997, "grad_norm": 10.402778625488281, "learning_rate": 2.9910810146775555e-05, "loss": 0.402587890625, "step": 1009 }, { "epoch": 0.06827092064350412, "grad_norm": 3.082749128341675, "learning_rate": 2.991045050635839e-05, "loss": 0.31591796875, "step": 1010 }, { "epoch": 0.06833851561443828, "grad_norm": 0.8481048345565796, "learning_rate": 2.991009014448091e-05, "loss": 0.154632568359375, "step": 1011 }, { "epoch": 0.06840611058537245, "grad_norm": 3.4355549812316895, "learning_rate": 2.990972906116054e-05, "loss": 0.294281005859375, "step": 1012 }, { "epoch": 0.06847370555630661, "grad_norm": 3.679360866546631, "learning_rate": 2.9909367256414754e-05, "loss": 0.37200927734375, "step": 1013 }, { "epoch": 0.06854130052724078, "grad_norm": 2.2166855335235596, "learning_rate": 2.9909004730261062e-05, "loss": 0.239593505859375, "step": 1014 }, { "epoch": 0.06860889549817493, "grad_norm": 1.6607887744903564, "learning_rate": 2.9908641482717e-05, "loss": 0.171661376953125, "step": 1015 }, { "epoch": 0.06867649046910909, "grad_norm": 3.882861614227295, "learning_rate": 2.990827751380015e-05, "loss": 0.2148284912109375, "step": 1016 }, { "epoch": 0.06874408544004326, "grad_norm": 3.4247052669525146, "learning_rate": 2.990791282352812e-05, "loss": 0.3504638671875, "step": 1017 }, { "epoch": 0.06881168041097742, "grad_norm": 2.0026166439056396, "learning_rate": 2.9907547411918556e-05, "loss": 0.295196533203125, "step": 1018 }, { "epoch": 0.06887927538191159, "grad_norm": 2.626009225845337, "learning_rate": 2.9907181278989138e-05, "loss": 0.337158203125, "step": 1019 }, { "epoch": 0.06894687035284575, "grad_norm": 4.688887119293213, "learning_rate": 2.9906814424757585e-05, "loss": 0.36962890625, "step": 1020 }, { "epoch": 0.06901446532377992, "grad_norm": 5.1586222648620605, "learning_rate": 2.9906446849241648e-05, "loss": 0.33087158203125, "step": 1021 }, { "epoch": 0.06908206029471407, "grad_norm": 6.544078350067139, "learning_rate": 2.99060785524591e-05, "loss": 0.2911376953125, "step": 1022 }, { "epoch": 0.06914965526564823, "grad_norm": 3.1055679321289062, "learning_rate": 2.9905709534427783e-05, "loss": 0.272735595703125, "step": 1023 }, { "epoch": 0.0692172502365824, "grad_norm": 4.182290554046631, "learning_rate": 2.9905339795165535e-05, "loss": 0.29638671875, "step": 1024 }, { "epoch": 0.06928484520751656, "grad_norm": 3.02264666557312, "learning_rate": 2.9904969334690254e-05, "loss": 0.1474761962890625, "step": 1025 }, { "epoch": 0.06935244017845073, "grad_norm": 2.575258731842041, "learning_rate": 2.9904598153019866e-05, "loss": 0.38092041015625, "step": 1026 }, { "epoch": 0.06942003514938488, "grad_norm": 2.7298059463500977, "learning_rate": 2.9904226250172325e-05, "loss": 0.32293701171875, "step": 1027 }, { "epoch": 0.06948763012031905, "grad_norm": 2.478376865386963, "learning_rate": 2.990385362616563e-05, "loss": 0.1909027099609375, "step": 1028 }, { "epoch": 0.06955522509125321, "grad_norm": 2.5867536067962646, "learning_rate": 2.9903480281017815e-05, "loss": 0.2615203857421875, "step": 1029 }, { "epoch": 0.06962282006218737, "grad_norm": 3.725236177444458, "learning_rate": 2.9903106214746936e-05, "loss": 0.2230224609375, "step": 1030 }, { "epoch": 0.06969041503312154, "grad_norm": 1.3618581295013428, "learning_rate": 2.9902731427371096e-05, "loss": 0.264892578125, "step": 1031 }, { "epoch": 0.0697580100040557, "grad_norm": 2.0469281673431396, "learning_rate": 2.990235591890843e-05, "loss": 0.31524658203125, "step": 1032 }, { "epoch": 0.06982560497498987, "grad_norm": 2.4992752075195312, "learning_rate": 2.9901979689377112e-05, "loss": 0.36810302734375, "step": 1033 }, { "epoch": 0.06989319994592402, "grad_norm": 1.9635385274887085, "learning_rate": 2.990160273879534e-05, "loss": 0.28875732421875, "step": 1034 }, { "epoch": 0.06996079491685818, "grad_norm": 5.934496879577637, "learning_rate": 2.9901225067181357e-05, "loss": 0.369873046875, "step": 1035 }, { "epoch": 0.07002838988779235, "grad_norm": 2.331488847732544, "learning_rate": 2.9900846674553433e-05, "loss": 0.3790283203125, "step": 1036 }, { "epoch": 0.0700959848587265, "grad_norm": 1.1660947799682617, "learning_rate": 2.990046756092988e-05, "loss": 0.228057861328125, "step": 1037 }, { "epoch": 0.07016357982966068, "grad_norm": 1.737026572227478, "learning_rate": 2.9900087726329044e-05, "loss": 0.235260009765625, "step": 1038 }, { "epoch": 0.07023117480059483, "grad_norm": 2.3932085037231445, "learning_rate": 2.98997071707693e-05, "loss": 0.267974853515625, "step": 1039 }, { "epoch": 0.070298769771529, "grad_norm": 3.4914515018463135, "learning_rate": 2.989932589426906e-05, "loss": 0.390045166015625, "step": 1040 }, { "epoch": 0.07036636474246316, "grad_norm": 2.949894428253174, "learning_rate": 2.9898943896846776e-05, "loss": 0.267364501953125, "step": 1041 }, { "epoch": 0.07043395971339732, "grad_norm": 3.4665064811706543, "learning_rate": 2.989856117852093e-05, "loss": 0.32476806640625, "step": 1042 }, { "epoch": 0.07050155468433149, "grad_norm": 3.133594512939453, "learning_rate": 2.989817773931005e-05, "loss": 0.3291015625, "step": 1043 }, { "epoch": 0.07056914965526564, "grad_norm": 2.6318612098693848, "learning_rate": 2.989779357923267e-05, "loss": 0.301025390625, "step": 1044 }, { "epoch": 0.07063674462619982, "grad_norm": 6.266139030456543, "learning_rate": 2.989740869830739e-05, "loss": 0.32818603515625, "step": 1045 }, { "epoch": 0.07070433959713397, "grad_norm": 4.594328880310059, "learning_rate": 2.9897023096552837e-05, "loss": 0.28057861328125, "step": 1046 }, { "epoch": 0.07077193456806813, "grad_norm": 3.5969324111938477, "learning_rate": 2.9896636773987658e-05, "loss": 0.266510009765625, "step": 1047 }, { "epoch": 0.0708395295390023, "grad_norm": 2.998670816421509, "learning_rate": 2.9896249730630546e-05, "loss": 0.25128173828125, "step": 1048 }, { "epoch": 0.07090712450993646, "grad_norm": 2.366304874420166, "learning_rate": 2.9895861966500242e-05, "loss": 0.38372802734375, "step": 1049 }, { "epoch": 0.07097471948087063, "grad_norm": 4.328437805175781, "learning_rate": 2.9895473481615495e-05, "loss": 0.35968017578125, "step": 1050 }, { "epoch": 0.07104231445180478, "grad_norm": 4.277622222900391, "learning_rate": 2.989508427599511e-05, "loss": 0.355712890625, "step": 1051 }, { "epoch": 0.07110990942273895, "grad_norm": 5.585047721862793, "learning_rate": 2.9894694349657915e-05, "loss": 0.37823486328125, "step": 1052 }, { "epoch": 0.07117750439367311, "grad_norm": 3.1371397972106934, "learning_rate": 2.9894303702622775e-05, "loss": 0.2686767578125, "step": 1053 }, { "epoch": 0.07124509936460727, "grad_norm": 3.4903974533081055, "learning_rate": 2.98939123349086e-05, "loss": 0.236785888671875, "step": 1054 }, { "epoch": 0.07131269433554144, "grad_norm": 1.9179551601409912, "learning_rate": 2.989352024653432e-05, "loss": 0.3751220703125, "step": 1055 }, { "epoch": 0.0713802893064756, "grad_norm": 2.122344493865967, "learning_rate": 2.989312743751891e-05, "loss": 0.36749267578125, "step": 1056 }, { "epoch": 0.07144788427740976, "grad_norm": 6.154302597045898, "learning_rate": 2.9892733907881375e-05, "loss": 0.31817626953125, "step": 1057 }, { "epoch": 0.07151547924834392, "grad_norm": 7.448545455932617, "learning_rate": 2.9892339657640753e-05, "loss": 0.34039306640625, "step": 1058 }, { "epoch": 0.07158307421927809, "grad_norm": 3.9243950843811035, "learning_rate": 2.9891944686816124e-05, "loss": 0.3621826171875, "step": 1059 }, { "epoch": 0.07165066919021225, "grad_norm": 5.02978515625, "learning_rate": 2.9891548995426606e-05, "loss": 0.2222900390625, "step": 1060 }, { "epoch": 0.0717182641611464, "grad_norm": 1.8380616903305054, "learning_rate": 2.9891152583491332e-05, "loss": 0.37451171875, "step": 1061 }, { "epoch": 0.07178585913208058, "grad_norm": 1.8317123651504517, "learning_rate": 2.9890755451029488e-05, "loss": 0.308319091796875, "step": 1062 }, { "epoch": 0.07185345410301473, "grad_norm": 1.1169795989990234, "learning_rate": 2.9890357598060298e-05, "loss": 0.170166015625, "step": 1063 }, { "epoch": 0.0719210490739489, "grad_norm": 1.586380958557129, "learning_rate": 2.9889959024602998e-05, "loss": 0.276641845703125, "step": 1064 }, { "epoch": 0.07198864404488306, "grad_norm": 12.195116996765137, "learning_rate": 2.9889559730676882e-05, "loss": 0.4573974609375, "step": 1065 }, { "epoch": 0.07205623901581722, "grad_norm": 8.945344924926758, "learning_rate": 2.9889159716301272e-05, "loss": 0.4173583984375, "step": 1066 }, { "epoch": 0.07212383398675139, "grad_norm": 3.9901020526885986, "learning_rate": 2.9888758981495517e-05, "loss": 0.3348388671875, "step": 1067 }, { "epoch": 0.07219142895768554, "grad_norm": 4.318349838256836, "learning_rate": 2.9888357526279008e-05, "loss": 0.237640380859375, "step": 1068 }, { "epoch": 0.07225902392861971, "grad_norm": 4.357712745666504, "learning_rate": 2.988795535067118e-05, "loss": 0.2816162109375, "step": 1069 }, { "epoch": 0.07232661889955387, "grad_norm": 1.7170931100845337, "learning_rate": 2.988755245469148e-05, "loss": 0.26251220703125, "step": 1070 }, { "epoch": 0.07239421387048804, "grad_norm": 6.27475118637085, "learning_rate": 2.9887148838359406e-05, "loss": 0.35491943359375, "step": 1071 }, { "epoch": 0.0724618088414222, "grad_norm": 6.175956726074219, "learning_rate": 2.9886744501694494e-05, "loss": 0.324676513671875, "step": 1072 }, { "epoch": 0.07252940381235636, "grad_norm": 7.450769424438477, "learning_rate": 2.98863394447163e-05, "loss": 0.340576171875, "step": 1073 }, { "epoch": 0.07259699878329053, "grad_norm": 2.634697675704956, "learning_rate": 2.9885933667444424e-05, "loss": 0.26239013671875, "step": 1074 }, { "epoch": 0.07266459375422468, "grad_norm": 2.316862106323242, "learning_rate": 2.9885527169898506e-05, "loss": 0.3392333984375, "step": 1075 }, { "epoch": 0.07273218872515885, "grad_norm": 1.6571762561798096, "learning_rate": 2.9885119952098214e-05, "loss": 0.3521728515625, "step": 1076 }, { "epoch": 0.07279978369609301, "grad_norm": 2.490111827850342, "learning_rate": 2.9884712014063246e-05, "loss": 0.282257080078125, "step": 1077 }, { "epoch": 0.07286737866702718, "grad_norm": 1.2095770835876465, "learning_rate": 2.9884303355813343e-05, "loss": 0.2471923828125, "step": 1078 }, { "epoch": 0.07293497363796134, "grad_norm": 2.171082019805908, "learning_rate": 2.988389397736828e-05, "loss": 0.31329345703125, "step": 1079 }, { "epoch": 0.0730025686088955, "grad_norm": 2.336338520050049, "learning_rate": 2.9883483878747863e-05, "loss": 0.32470703125, "step": 1080 }, { "epoch": 0.07307016357982966, "grad_norm": 4.306347846984863, "learning_rate": 2.988307305997194e-05, "loss": 0.269256591796875, "step": 1081 }, { "epoch": 0.07313775855076382, "grad_norm": 1.4368302822113037, "learning_rate": 2.9882661521060382e-05, "loss": 0.2548828125, "step": 1082 }, { "epoch": 0.07320535352169799, "grad_norm": 1.2577300071716309, "learning_rate": 2.988224926203311e-05, "loss": 0.22540283203125, "step": 1083 }, { "epoch": 0.07327294849263215, "grad_norm": 1.4476953744888306, "learning_rate": 2.9881836282910062e-05, "loss": 0.2225799560546875, "step": 1084 }, { "epoch": 0.0733405434635663, "grad_norm": 1.7728365659713745, "learning_rate": 2.988142258371123e-05, "loss": 0.28985595703125, "step": 1085 }, { "epoch": 0.07340813843450048, "grad_norm": 4.387607097625732, "learning_rate": 2.9881008164456623e-05, "loss": 0.374786376953125, "step": 1086 }, { "epoch": 0.07347573340543463, "grad_norm": 2.997205972671509, "learning_rate": 2.9880593025166298e-05, "loss": 0.351837158203125, "step": 1087 }, { "epoch": 0.0735433283763688, "grad_norm": 2.430786371231079, "learning_rate": 2.9880177165860343e-05, "loss": 0.3521728515625, "step": 1088 }, { "epoch": 0.07361092334730296, "grad_norm": 2.04024600982666, "learning_rate": 2.9879760586558876e-05, "loss": 0.3221435546875, "step": 1089 }, { "epoch": 0.07367851831823713, "grad_norm": 1.3609308004379272, "learning_rate": 2.9879343287282054e-05, "loss": 0.18939208984375, "step": 1090 }, { "epoch": 0.07374611328917129, "grad_norm": 3.053154468536377, "learning_rate": 2.9878925268050072e-05, "loss": 0.132354736328125, "step": 1091 }, { "epoch": 0.07381370826010544, "grad_norm": 3.7855610847473145, "learning_rate": 2.9878506528883152e-05, "loss": 0.305908203125, "step": 1092 }, { "epoch": 0.07388130323103961, "grad_norm": 3.1246254444122314, "learning_rate": 2.987808706980156e-05, "loss": 0.271514892578125, "step": 1093 }, { "epoch": 0.07394889820197377, "grad_norm": 2.40809965133667, "learning_rate": 2.987766689082559e-05, "loss": 0.34820556640625, "step": 1094 }, { "epoch": 0.07401649317290794, "grad_norm": 2.303943395614624, "learning_rate": 2.9877245991975574e-05, "loss": 0.32501220703125, "step": 1095 }, { "epoch": 0.0740840881438421, "grad_norm": 5.474114418029785, "learning_rate": 2.9876824373271872e-05, "loss": 0.372589111328125, "step": 1096 }, { "epoch": 0.07415168311477625, "grad_norm": 6.439183712005615, "learning_rate": 2.9876402034734893e-05, "loss": 0.372955322265625, "step": 1097 }, { "epoch": 0.07421927808571042, "grad_norm": 3.7954611778259277, "learning_rate": 2.987597897638507e-05, "loss": 0.1778717041015625, "step": 1098 }, { "epoch": 0.07428687305664458, "grad_norm": 3.435333490371704, "learning_rate": 2.9875555198242867e-05, "loss": 0.34869384765625, "step": 1099 }, { "epoch": 0.07435446802757875, "grad_norm": 2.829503297805786, "learning_rate": 2.9875130700328796e-05, "loss": 0.3402099609375, "step": 1100 }, { "epoch": 0.07442206299851291, "grad_norm": 3.8426132202148438, "learning_rate": 2.987470548266339e-05, "loss": 0.31427001953125, "step": 1101 }, { "epoch": 0.07448965796944708, "grad_norm": 2.5931308269500732, "learning_rate": 2.9874279545267233e-05, "loss": 0.28228759765625, "step": 1102 }, { "epoch": 0.07455725294038124, "grad_norm": 4.25966739654541, "learning_rate": 2.9873852888160924e-05, "loss": 0.30010986328125, "step": 1103 }, { "epoch": 0.07462484791131539, "grad_norm": 3.144287109375, "learning_rate": 2.9873425511365116e-05, "loss": 0.36865234375, "step": 1104 }, { "epoch": 0.07469244288224956, "grad_norm": 1.3509451150894165, "learning_rate": 2.9872997414900487e-05, "loss": 0.3253173828125, "step": 1105 }, { "epoch": 0.07476003785318372, "grad_norm": 2.3140506744384766, "learning_rate": 2.9872568598787748e-05, "loss": 0.36907958984375, "step": 1106 }, { "epoch": 0.07482763282411789, "grad_norm": 2.2917487621307373, "learning_rate": 2.9872139063047645e-05, "loss": 0.3370361328125, "step": 1107 }, { "epoch": 0.07489522779505205, "grad_norm": 4.340874671936035, "learning_rate": 2.9871708807700968e-05, "loss": 0.2311248779296875, "step": 1108 }, { "epoch": 0.07496282276598622, "grad_norm": 5.0908050537109375, "learning_rate": 2.9871277832768533e-05, "loss": 0.377777099609375, "step": 1109 }, { "epoch": 0.07503041773692037, "grad_norm": 1.256335973739624, "learning_rate": 2.987084613827119e-05, "loss": 0.1681060791015625, "step": 1110 }, { "epoch": 0.07509801270785453, "grad_norm": 2.313063859939575, "learning_rate": 2.9870413724229836e-05, "loss": 0.292022705078125, "step": 1111 }, { "epoch": 0.0751656076787887, "grad_norm": 2.920444965362549, "learning_rate": 2.9869980590665384e-05, "loss": 0.294921875, "step": 1112 }, { "epoch": 0.07523320264972286, "grad_norm": 2.966792583465576, "learning_rate": 2.9869546737598796e-05, "loss": 0.27020263671875, "step": 1113 }, { "epoch": 0.07530079762065703, "grad_norm": 1.1114238500595093, "learning_rate": 2.9869112165051063e-05, "loss": 0.0947418212890625, "step": 1114 }, { "epoch": 0.07536839259159119, "grad_norm": 1.6693717241287231, "learning_rate": 2.9868676873043217e-05, "loss": 0.22137451171875, "step": 1115 }, { "epoch": 0.07543598756252534, "grad_norm": 2.1763205528259277, "learning_rate": 2.9868240861596313e-05, "loss": 0.3629150390625, "step": 1116 }, { "epoch": 0.07550358253345951, "grad_norm": 3.2987990379333496, "learning_rate": 2.986780413073145e-05, "loss": 0.231719970703125, "step": 1117 }, { "epoch": 0.07557117750439367, "grad_norm": 2.2529027462005615, "learning_rate": 2.9867366680469768e-05, "loss": 0.288238525390625, "step": 1118 }, { "epoch": 0.07563877247532784, "grad_norm": 1.0937050580978394, "learning_rate": 2.986692851083242e-05, "loss": 0.13729095458984375, "step": 1119 }, { "epoch": 0.075706367446262, "grad_norm": 3.2072627544403076, "learning_rate": 2.986648962184062e-05, "loss": 0.42425537109375, "step": 1120 }, { "epoch": 0.07577396241719617, "grad_norm": 3.957418203353882, "learning_rate": 2.986605001351559e-05, "loss": 0.343780517578125, "step": 1121 }, { "epoch": 0.07584155738813032, "grad_norm": 1.1564360857009888, "learning_rate": 2.986560968587862e-05, "loss": 0.31781005859375, "step": 1122 }, { "epoch": 0.07590915235906448, "grad_norm": 2.5053606033325195, "learning_rate": 2.9865168638951e-05, "loss": 0.25775909423828125, "step": 1123 }, { "epoch": 0.07597674732999865, "grad_norm": 3.4028425216674805, "learning_rate": 2.9864726872754075e-05, "loss": 0.39300537109375, "step": 1124 }, { "epoch": 0.07604434230093281, "grad_norm": 2.65828275680542, "learning_rate": 2.986428438730922e-05, "loss": 0.32666015625, "step": 1125 }, { "epoch": 0.07611193727186698, "grad_norm": 2.4403321743011475, "learning_rate": 2.9863841182637846e-05, "loss": 0.3421630859375, "step": 1126 }, { "epoch": 0.07617953224280113, "grad_norm": 1.7528375387191772, "learning_rate": 2.98633972587614e-05, "loss": 0.337890625, "step": 1127 }, { "epoch": 0.07624712721373529, "grad_norm": 2.216543674468994, "learning_rate": 2.9862952615701365e-05, "loss": 0.26434326171875, "step": 1128 }, { "epoch": 0.07631472218466946, "grad_norm": 1.3554085493087769, "learning_rate": 2.9862507253479244e-05, "loss": 0.2855224609375, "step": 1129 }, { "epoch": 0.07638231715560362, "grad_norm": 1.7038875818252563, "learning_rate": 2.9862061172116593e-05, "loss": 0.276123046875, "step": 1130 }, { "epoch": 0.07644991212653779, "grad_norm": 2.3540360927581787, "learning_rate": 2.9861614371635e-05, "loss": 0.33465576171875, "step": 1131 }, { "epoch": 0.07651750709747195, "grad_norm": 4.135478973388672, "learning_rate": 2.986116685205608e-05, "loss": 0.27752685546875, "step": 1132 }, { "epoch": 0.07658510206840612, "grad_norm": 1.604796290397644, "learning_rate": 2.9860718613401487e-05, "loss": 0.244720458984375, "step": 1133 }, { "epoch": 0.07665269703934027, "grad_norm": 3.0656042098999023, "learning_rate": 2.9860269655692912e-05, "loss": 0.37109375, "step": 1134 }, { "epoch": 0.07672029201027443, "grad_norm": 2.2606868743896484, "learning_rate": 2.985981997895207e-05, "loss": 0.3192138671875, "step": 1135 }, { "epoch": 0.0767878869812086, "grad_norm": 2.7403602600097656, "learning_rate": 2.985936958320073e-05, "loss": 0.214447021484375, "step": 1136 }, { "epoch": 0.07685548195214276, "grad_norm": 3.859208822250366, "learning_rate": 2.9858918468460678e-05, "loss": 0.299835205078125, "step": 1137 }, { "epoch": 0.07692307692307693, "grad_norm": 2.707573175430298, "learning_rate": 2.9858466634753744e-05, "loss": 0.31591796875, "step": 1138 }, { "epoch": 0.07699067189401108, "grad_norm": 2.1711339950561523, "learning_rate": 2.985801408210179e-05, "loss": 0.202301025390625, "step": 1139 }, { "epoch": 0.07705826686494525, "grad_norm": 3.4871160984039307, "learning_rate": 2.9857560810526712e-05, "loss": 0.36846923828125, "step": 1140 }, { "epoch": 0.07712586183587941, "grad_norm": 3.4007513523101807, "learning_rate": 2.9857106820050447e-05, "loss": 0.3626708984375, "step": 1141 }, { "epoch": 0.07719345680681357, "grad_norm": 2.1759941577911377, "learning_rate": 2.985665211069496e-05, "loss": 0.30364990234375, "step": 1142 }, { "epoch": 0.07726105177774774, "grad_norm": 4.267189979553223, "learning_rate": 2.985619668248225e-05, "loss": 0.25189208984375, "step": 1143 }, { "epoch": 0.0773286467486819, "grad_norm": 1.9259662628173828, "learning_rate": 2.985574053543435e-05, "loss": 0.2661590576171875, "step": 1144 }, { "epoch": 0.07739624171961607, "grad_norm": 4.729660987854004, "learning_rate": 2.985528366957334e-05, "loss": 0.24530029296875, "step": 1145 }, { "epoch": 0.07746383669055022, "grad_norm": 2.2523531913757324, "learning_rate": 2.9854826084921326e-05, "loss": 0.228118896484375, "step": 1146 }, { "epoch": 0.07753143166148438, "grad_norm": 3.8669514656066895, "learning_rate": 2.9854367781500437e-05, "loss": 0.30908203125, "step": 1147 }, { "epoch": 0.07759902663241855, "grad_norm": 1.1907306909561157, "learning_rate": 2.985390875933286e-05, "loss": 0.232666015625, "step": 1148 }, { "epoch": 0.0776666216033527, "grad_norm": 6.965292930603027, "learning_rate": 2.9853449018440807e-05, "loss": 0.41278076171875, "step": 1149 }, { "epoch": 0.07773421657428688, "grad_norm": 2.856097459793091, "learning_rate": 2.985298855884651e-05, "loss": 0.266265869140625, "step": 1150 }, { "epoch": 0.07780181154522103, "grad_norm": 1.244384765625, "learning_rate": 2.985252738057226e-05, "loss": 0.28643798828125, "step": 1151 }, { "epoch": 0.0778694065161552, "grad_norm": 1.6307542324066162, "learning_rate": 2.9852065483640366e-05, "loss": 0.2467803955078125, "step": 1152 }, { "epoch": 0.07793700148708936, "grad_norm": 3.5743794441223145, "learning_rate": 2.9851602868073187e-05, "loss": 0.327362060546875, "step": 1153 }, { "epoch": 0.07800459645802352, "grad_norm": 2.1480095386505127, "learning_rate": 2.9851139533893093e-05, "loss": 0.28143310546875, "step": 1154 }, { "epoch": 0.07807219142895769, "grad_norm": 3.548755407333374, "learning_rate": 2.9850675481122514e-05, "loss": 0.3560791015625, "step": 1155 }, { "epoch": 0.07813978639989184, "grad_norm": 2.41436505317688, "learning_rate": 2.9850210709783898e-05, "loss": 0.211395263671875, "step": 1156 }, { "epoch": 0.07820738137082602, "grad_norm": 3.728111982345581, "learning_rate": 2.9849745219899734e-05, "loss": 0.2907867431640625, "step": 1157 }, { "epoch": 0.07827497634176017, "grad_norm": 2.977987051010132, "learning_rate": 2.9849279011492548e-05, "loss": 0.324493408203125, "step": 1158 }, { "epoch": 0.07834257131269434, "grad_norm": 1.856919527053833, "learning_rate": 2.9848812084584897e-05, "loss": 0.30810546875, "step": 1159 }, { "epoch": 0.0784101662836285, "grad_norm": 2.5305986404418945, "learning_rate": 2.9848344439199374e-05, "loss": 0.363250732421875, "step": 1160 }, { "epoch": 0.07847776125456266, "grad_norm": 2.8955230712890625, "learning_rate": 2.9847876075358604e-05, "loss": 0.2618408203125, "step": 1161 }, { "epoch": 0.07854535622549683, "grad_norm": 2.8423051834106445, "learning_rate": 2.9847406993085254e-05, "loss": 0.2427825927734375, "step": 1162 }, { "epoch": 0.07861295119643098, "grad_norm": 1.985661506652832, "learning_rate": 2.9846937192402018e-05, "loss": 0.32244873046875, "step": 1163 }, { "epoch": 0.07868054616736515, "grad_norm": 3.0666582584381104, "learning_rate": 2.984646667333163e-05, "loss": 0.302825927734375, "step": 1164 }, { "epoch": 0.07874814113829931, "grad_norm": 2.43558406829834, "learning_rate": 2.984599543589685e-05, "loss": 0.3739013671875, "step": 1165 }, { "epoch": 0.07881573610923347, "grad_norm": 3.679013729095459, "learning_rate": 2.9845523480120487e-05, "loss": 0.31219482421875, "step": 1166 }, { "epoch": 0.07888333108016764, "grad_norm": 2.3743929862976074, "learning_rate": 2.984505080602538e-05, "loss": 0.3135986328125, "step": 1167 }, { "epoch": 0.0789509260511018, "grad_norm": 4.159559726715088, "learning_rate": 2.984457741363439e-05, "loss": 0.32550048828125, "step": 1168 }, { "epoch": 0.07901852102203596, "grad_norm": 1.4296106100082397, "learning_rate": 2.984410330297043e-05, "loss": 0.306884765625, "step": 1169 }, { "epoch": 0.07908611599297012, "grad_norm": 3.712912082672119, "learning_rate": 2.9843628474056436e-05, "loss": 0.40264892578125, "step": 1170 }, { "epoch": 0.07915371096390429, "grad_norm": 4.759713649749756, "learning_rate": 2.9843152926915382e-05, "loss": 0.291412353515625, "step": 1171 }, { "epoch": 0.07922130593483845, "grad_norm": 1.7838685512542725, "learning_rate": 2.984267666157028e-05, "loss": 0.2998046875, "step": 1172 }, { "epoch": 0.0792889009057726, "grad_norm": 1.4828332662582397, "learning_rate": 2.984219967804418e-05, "loss": 0.16546630859375, "step": 1173 }, { "epoch": 0.07935649587670678, "grad_norm": 1.2378944158554077, "learning_rate": 2.9841721976360154e-05, "loss": 0.27264404296875, "step": 1174 }, { "epoch": 0.07942409084764093, "grad_norm": 4.229269981384277, "learning_rate": 2.9841243556541323e-05, "loss": 0.389892578125, "step": 1175 }, { "epoch": 0.0794916858185751, "grad_norm": 1.2904409170150757, "learning_rate": 2.9840764418610827e-05, "loss": 0.266021728515625, "step": 1176 }, { "epoch": 0.07955928078950926, "grad_norm": 2.1461493968963623, "learning_rate": 2.9840284562591863e-05, "loss": 0.24285888671875, "step": 1177 }, { "epoch": 0.07962687576044342, "grad_norm": 1.760016918182373, "learning_rate": 2.9839803988507636e-05, "loss": 0.2256622314453125, "step": 1178 }, { "epoch": 0.07969447073137759, "grad_norm": 2.949282169342041, "learning_rate": 2.9839322696381403e-05, "loss": 0.271484375, "step": 1179 }, { "epoch": 0.07976206570231174, "grad_norm": 2.6704537868499756, "learning_rate": 2.983884068623645e-05, "loss": 0.2701416015625, "step": 1180 }, { "epoch": 0.07982966067324591, "grad_norm": 4.132626533508301, "learning_rate": 2.983835795809611e-05, "loss": 0.33013916015625, "step": 1181 }, { "epoch": 0.07989725564418007, "grad_norm": 2.219311237335205, "learning_rate": 2.983787451198373e-05, "loss": 0.1142730712890625, "step": 1182 }, { "epoch": 0.07996485061511424, "grad_norm": 1.9325724840164185, "learning_rate": 2.9837390347922702e-05, "loss": 0.178924560546875, "step": 1183 }, { "epoch": 0.0800324455860484, "grad_norm": 1.5322840213775635, "learning_rate": 2.9836905465936462e-05, "loss": 0.194091796875, "step": 1184 }, { "epoch": 0.08010004055698255, "grad_norm": 3.8369479179382324, "learning_rate": 2.983641986604846e-05, "loss": 0.323699951171875, "step": 1185 }, { "epoch": 0.08016763552791673, "grad_norm": 3.953718423843384, "learning_rate": 2.9835933548282204e-05, "loss": 0.21221923828125, "step": 1186 }, { "epoch": 0.08023523049885088, "grad_norm": 2.087977170944214, "learning_rate": 2.9835446512661218e-05, "loss": 0.327728271484375, "step": 1187 }, { "epoch": 0.08030282546978505, "grad_norm": 3.596010446548462, "learning_rate": 2.9834958759209068e-05, "loss": 0.2580108642578125, "step": 1188 }, { "epoch": 0.08037042044071921, "grad_norm": 2.5647804737091064, "learning_rate": 2.9834470287949357e-05, "loss": 0.24755859375, "step": 1189 }, { "epoch": 0.08043801541165338, "grad_norm": 1.3093675374984741, "learning_rate": 2.9833981098905717e-05, "loss": 0.21929931640625, "step": 1190 }, { "epoch": 0.08050561038258754, "grad_norm": 2.0917508602142334, "learning_rate": 2.9833491192101818e-05, "loss": 0.2591552734375, "step": 1191 }, { "epoch": 0.08057320535352169, "grad_norm": 2.2559404373168945, "learning_rate": 2.983300056756137e-05, "loss": 0.2691650390625, "step": 1192 }, { "epoch": 0.08064080032445586, "grad_norm": 6.880148887634277, "learning_rate": 2.9832509225308105e-05, "loss": 0.25335693359375, "step": 1193 }, { "epoch": 0.08070839529539002, "grad_norm": 5.045110702514648, "learning_rate": 2.9832017165365807e-05, "loss": 0.34344482421875, "step": 1194 }, { "epoch": 0.08077599026632419, "grad_norm": 5.049576759338379, "learning_rate": 2.9831524387758276e-05, "loss": 0.274322509765625, "step": 1195 }, { "epoch": 0.08084358523725835, "grad_norm": 1.5652190446853638, "learning_rate": 2.983103089250936e-05, "loss": 0.207122802734375, "step": 1196 }, { "epoch": 0.0809111802081925, "grad_norm": 4.963166236877441, "learning_rate": 2.9830536679642937e-05, "loss": 0.3565673828125, "step": 1197 }, { "epoch": 0.08097877517912667, "grad_norm": 0.9143381118774414, "learning_rate": 2.9830041749182918e-05, "loss": 0.134613037109375, "step": 1198 }, { "epoch": 0.08104637015006083, "grad_norm": 1.908163070678711, "learning_rate": 2.9829546101153253e-05, "loss": 0.1456451416015625, "step": 1199 }, { "epoch": 0.081113965120995, "grad_norm": 4.269106388092041, "learning_rate": 2.9829049735577922e-05, "loss": 0.320526123046875, "step": 1200 }, { "epoch": 0.08118156009192916, "grad_norm": 5.866423606872559, "learning_rate": 2.9828552652480944e-05, "loss": 0.3819580078125, "step": 1201 }, { "epoch": 0.08124915506286333, "grad_norm": 2.1799535751342773, "learning_rate": 2.9828054851886372e-05, "loss": 0.30780029296875, "step": 1202 }, { "epoch": 0.08131675003379749, "grad_norm": 1.6128097772598267, "learning_rate": 2.9827556333818292e-05, "loss": 0.283477783203125, "step": 1203 }, { "epoch": 0.08138434500473164, "grad_norm": 1.702624797821045, "learning_rate": 2.982705709830082e-05, "loss": 0.1577606201171875, "step": 1204 }, { "epoch": 0.08145193997566581, "grad_norm": 0.5608531832695007, "learning_rate": 2.9826557145358124e-05, "loss": 0.086334228515625, "step": 1205 }, { "epoch": 0.08151953494659997, "grad_norm": 1.8937017917633057, "learning_rate": 2.9826056475014385e-05, "loss": 0.26458740234375, "step": 1206 }, { "epoch": 0.08158712991753414, "grad_norm": 1.2416366338729858, "learning_rate": 2.982555508729383e-05, "loss": 0.223876953125, "step": 1207 }, { "epoch": 0.0816547248884683, "grad_norm": 3.506467580795288, "learning_rate": 2.9825052982220722e-05, "loss": 0.30877685546875, "step": 1208 }, { "epoch": 0.08172231985940245, "grad_norm": 1.8260536193847656, "learning_rate": 2.9824550159819358e-05, "loss": 0.338134765625, "step": 1209 }, { "epoch": 0.08178991483033662, "grad_norm": 2.2151594161987305, "learning_rate": 2.9824046620114064e-05, "loss": 0.27691650390625, "step": 1210 }, { "epoch": 0.08185750980127078, "grad_norm": 3.8751707077026367, "learning_rate": 2.9823542363129203e-05, "loss": 0.23516845703125, "step": 1211 }, { "epoch": 0.08192510477220495, "grad_norm": 2.6207821369171143, "learning_rate": 2.982303738888917e-05, "loss": 0.280181884765625, "step": 1212 }, { "epoch": 0.08199269974313911, "grad_norm": 2.510761260986328, "learning_rate": 2.9822531697418414e-05, "loss": 0.317626953125, "step": 1213 }, { "epoch": 0.08206029471407328, "grad_norm": 2.5462582111358643, "learning_rate": 2.9822025288741387e-05, "loss": 0.28082275390625, "step": 1214 }, { "epoch": 0.08212788968500744, "grad_norm": 1.7188509702682495, "learning_rate": 2.98215181628826e-05, "loss": 0.25274658203125, "step": 1215 }, { "epoch": 0.08219548465594159, "grad_norm": 1.6121203899383545, "learning_rate": 2.9821010319866595e-05, "loss": 0.213592529296875, "step": 1216 }, { "epoch": 0.08226307962687576, "grad_norm": 4.609282493591309, "learning_rate": 2.9820501759717938e-05, "loss": 0.26385498046875, "step": 1217 }, { "epoch": 0.08233067459780992, "grad_norm": 2.4032223224639893, "learning_rate": 2.9819992482461236e-05, "loss": 0.30438232421875, "step": 1218 }, { "epoch": 0.08239826956874409, "grad_norm": 1.5306034088134766, "learning_rate": 2.9819482488121136e-05, "loss": 0.191741943359375, "step": 1219 }, { "epoch": 0.08246586453967825, "grad_norm": 5.384128570556641, "learning_rate": 2.9818971776722312e-05, "loss": 0.2960205078125, "step": 1220 }, { "epoch": 0.08253345951061242, "grad_norm": 2.6659486293792725, "learning_rate": 2.981846034828947e-05, "loss": 0.2799072265625, "step": 1221 }, { "epoch": 0.08260105448154657, "grad_norm": 7.628111362457275, "learning_rate": 2.9817948202847368e-05, "loss": 0.38818359375, "step": 1222 }, { "epoch": 0.08266864945248073, "grad_norm": 1.287540078163147, "learning_rate": 2.9817435340420776e-05, "loss": 0.302459716796875, "step": 1223 }, { "epoch": 0.0827362444234149, "grad_norm": 4.182216644287109, "learning_rate": 2.9816921761034515e-05, "loss": 0.261474609375, "step": 1224 }, { "epoch": 0.08280383939434906, "grad_norm": 3.248418092727661, "learning_rate": 2.9816407464713436e-05, "loss": 0.338043212890625, "step": 1225 }, { "epoch": 0.08287143436528323, "grad_norm": 4.724811553955078, "learning_rate": 2.9815892451482417e-05, "loss": 0.36151123046875, "step": 1226 }, { "epoch": 0.08293902933621738, "grad_norm": 1.9300119876861572, "learning_rate": 2.9815376721366384e-05, "loss": 0.210357666015625, "step": 1227 }, { "epoch": 0.08300662430715154, "grad_norm": 3.3431057929992676, "learning_rate": 2.981486027439029e-05, "loss": 0.338409423828125, "step": 1228 }, { "epoch": 0.08307421927808571, "grad_norm": 2.5774495601654053, "learning_rate": 2.9814343110579122e-05, "loss": 0.2911376953125, "step": 1229 }, { "epoch": 0.08314181424901987, "grad_norm": 1.3957399129867554, "learning_rate": 2.981382522995791e-05, "loss": 0.145721435546875, "step": 1230 }, { "epoch": 0.08320940921995404, "grad_norm": 2.446136474609375, "learning_rate": 2.98133066325517e-05, "loss": 0.25982666015625, "step": 1231 }, { "epoch": 0.0832770041908882, "grad_norm": 3.0706446170806885, "learning_rate": 2.9812787318385595e-05, "loss": 0.3623046875, "step": 1232 }, { "epoch": 0.08334459916182237, "grad_norm": 4.136013031005859, "learning_rate": 2.981226728748472e-05, "loss": 0.27374267578125, "step": 1233 }, { "epoch": 0.08341219413275652, "grad_norm": 1.5590192079544067, "learning_rate": 2.981174653987424e-05, "loss": 0.1515960693359375, "step": 1234 }, { "epoch": 0.08347978910369068, "grad_norm": 3.672222137451172, "learning_rate": 2.9811225075579346e-05, "loss": 0.2943267822265625, "step": 1235 }, { "epoch": 0.08354738407462485, "grad_norm": 7.857468128204346, "learning_rate": 2.9810702894625273e-05, "loss": 0.350250244140625, "step": 1236 }, { "epoch": 0.08361497904555901, "grad_norm": 3.491150379180908, "learning_rate": 2.9810179997037285e-05, "loss": 0.236175537109375, "step": 1237 }, { "epoch": 0.08368257401649318, "grad_norm": 5.366442680358887, "learning_rate": 2.9809656382840688e-05, "loss": 0.328643798828125, "step": 1238 }, { "epoch": 0.08375016898742733, "grad_norm": 3.7318224906921387, "learning_rate": 2.9809132052060813e-05, "loss": 0.33905029296875, "step": 1239 }, { "epoch": 0.0838177639583615, "grad_norm": 1.8351070880889893, "learning_rate": 2.9808607004723036e-05, "loss": 0.25457763671875, "step": 1240 }, { "epoch": 0.08388535892929566, "grad_norm": 3.222823143005371, "learning_rate": 2.9808081240852756e-05, "loss": 0.264129638671875, "step": 1241 }, { "epoch": 0.08395295390022982, "grad_norm": 4.95627498626709, "learning_rate": 2.9807554760475416e-05, "loss": 0.301300048828125, "step": 1242 }, { "epoch": 0.08402054887116399, "grad_norm": 3.166508197784424, "learning_rate": 2.980702756361649e-05, "loss": 0.21490478515625, "step": 1243 }, { "epoch": 0.08408814384209815, "grad_norm": 1.712730884552002, "learning_rate": 2.9806499650301482e-05, "loss": 0.263397216796875, "step": 1244 }, { "epoch": 0.08415573881303232, "grad_norm": 3.6822924613952637, "learning_rate": 2.9805971020555944e-05, "loss": 0.36962890625, "step": 1245 }, { "epoch": 0.08422333378396647, "grad_norm": 1.9094791412353516, "learning_rate": 2.980544167440545e-05, "loss": 0.307861328125, "step": 1246 }, { "epoch": 0.08429092875490063, "grad_norm": 2.4445173740386963, "learning_rate": 2.9804911611875615e-05, "loss": 0.323883056640625, "step": 1247 }, { "epoch": 0.0843585237258348, "grad_norm": 2.4963643550872803, "learning_rate": 2.9804380832992083e-05, "loss": 0.34844970703125, "step": 1248 }, { "epoch": 0.08442611869676896, "grad_norm": 4.518589973449707, "learning_rate": 2.9803849337780542e-05, "loss": 0.24420166015625, "step": 1249 }, { "epoch": 0.08449371366770313, "grad_norm": 1.7477840185165405, "learning_rate": 2.9803317126266706e-05, "loss": 0.2479248046875, "step": 1250 }, { "epoch": 0.08456130863863728, "grad_norm": 3.991312265396118, "learning_rate": 2.9802784198476325e-05, "loss": 0.350311279296875, "step": 1251 }, { "epoch": 0.08462890360957145, "grad_norm": 2.1604225635528564, "learning_rate": 2.9802250554435184e-05, "loss": 0.32843017578125, "step": 1252 }, { "epoch": 0.08469649858050561, "grad_norm": 3.5693342685699463, "learning_rate": 2.9801716194169113e-05, "loss": 0.298004150390625, "step": 1253 }, { "epoch": 0.08476409355143977, "grad_norm": 2.403864860534668, "learning_rate": 2.9801181117703956e-05, "loss": 0.317138671875, "step": 1254 }, { "epoch": 0.08483168852237394, "grad_norm": 1.863322377204895, "learning_rate": 2.9800645325065614e-05, "loss": 0.2232666015625, "step": 1255 }, { "epoch": 0.0848992834933081, "grad_norm": 2.650296926498413, "learning_rate": 2.980010881628e-05, "loss": 0.238922119140625, "step": 1256 }, { "epoch": 0.08496687846424227, "grad_norm": 2.507373094558716, "learning_rate": 2.979957159137309e-05, "loss": 0.1805877685546875, "step": 1257 }, { "epoch": 0.08503447343517642, "grad_norm": 1.5181699991226196, "learning_rate": 2.979903365037086e-05, "loss": 0.250274658203125, "step": 1258 }, { "epoch": 0.08510206840611058, "grad_norm": 4.645081043243408, "learning_rate": 2.9798494993299352e-05, "loss": 0.3228759765625, "step": 1259 }, { "epoch": 0.08516966337704475, "grad_norm": 4.105543613433838, "learning_rate": 2.9797955620184627e-05, "loss": 0.31494140625, "step": 1260 }, { "epoch": 0.0852372583479789, "grad_norm": 2.6716036796569824, "learning_rate": 2.9797415531052776e-05, "loss": 0.213104248046875, "step": 1261 }, { "epoch": 0.08530485331891308, "grad_norm": 3.2193443775177, "learning_rate": 2.9796874725929945e-05, "loss": 0.3177490234375, "step": 1262 }, { "epoch": 0.08537244828984723, "grad_norm": 5.522236347198486, "learning_rate": 2.979633320484229e-05, "loss": 0.27618408203125, "step": 1263 }, { "epoch": 0.0854400432607814, "grad_norm": 5.671538352966309, "learning_rate": 2.9795790967816013e-05, "loss": 0.293304443359375, "step": 1264 }, { "epoch": 0.08550763823171556, "grad_norm": 3.437330961227417, "learning_rate": 2.979524801487736e-05, "loss": 0.292144775390625, "step": 1265 }, { "epoch": 0.08557523320264972, "grad_norm": 2.734093189239502, "learning_rate": 2.97947043460526e-05, "loss": 0.1553955078125, "step": 1266 }, { "epoch": 0.08564282817358389, "grad_norm": 3.461568832397461, "learning_rate": 2.979415996136803e-05, "loss": 0.26422119140625, "step": 1267 }, { "epoch": 0.08571042314451804, "grad_norm": 1.589607834815979, "learning_rate": 2.979361486085e-05, "loss": 0.26007080078125, "step": 1268 }, { "epoch": 0.08577801811545221, "grad_norm": 2.3920505046844482, "learning_rate": 2.979306904452488e-05, "loss": 0.290985107421875, "step": 1269 }, { "epoch": 0.08584561308638637, "grad_norm": 3.6101977825164795, "learning_rate": 2.979252251241909e-05, "loss": 0.33819580078125, "step": 1270 }, { "epoch": 0.08591320805732054, "grad_norm": 2.884575605392456, "learning_rate": 2.9791975264559063e-05, "loss": 0.36700439453125, "step": 1271 }, { "epoch": 0.0859808030282547, "grad_norm": 3.245651960372925, "learning_rate": 2.979142730097128e-05, "loss": 0.2901611328125, "step": 1272 }, { "epoch": 0.08604839799918886, "grad_norm": 1.985909104347229, "learning_rate": 2.979087862168226e-05, "loss": 0.18621826171875, "step": 1273 }, { "epoch": 0.08611599297012303, "grad_norm": 5.508145332336426, "learning_rate": 2.9790329226718544e-05, "loss": 0.357818603515625, "step": 1274 }, { "epoch": 0.08618358794105718, "grad_norm": 3.337095022201538, "learning_rate": 2.978977911610673e-05, "loss": 0.3409423828125, "step": 1275 }, { "epoch": 0.08625118291199135, "grad_norm": 2.4407296180725098, "learning_rate": 2.9789228289873417e-05, "loss": 0.271026611328125, "step": 1276 }, { "epoch": 0.08631877788292551, "grad_norm": 2.4311084747314453, "learning_rate": 2.9788676748045268e-05, "loss": 0.2723388671875, "step": 1277 }, { "epoch": 0.08638637285385967, "grad_norm": 2.112851142883301, "learning_rate": 2.9788124490648967e-05, "loss": 0.307403564453125, "step": 1278 }, { "epoch": 0.08645396782479384, "grad_norm": 3.2641947269439697, "learning_rate": 2.978757151771124e-05, "loss": 0.29095458984375, "step": 1279 }, { "epoch": 0.086521562795728, "grad_norm": 1.4926362037658691, "learning_rate": 2.9787017829258836e-05, "loss": 0.2706298828125, "step": 1280 }, { "epoch": 0.08658915776666216, "grad_norm": 2.9494073390960693, "learning_rate": 2.9786463425318552e-05, "loss": 0.254150390625, "step": 1281 }, { "epoch": 0.08665675273759632, "grad_norm": 1.6178410053253174, "learning_rate": 2.9785908305917212e-05, "loss": 0.276336669921875, "step": 1282 }, { "epoch": 0.08672434770853049, "grad_norm": 4.159379005432129, "learning_rate": 2.9785352471081676e-05, "loss": 0.2862548828125, "step": 1283 }, { "epoch": 0.08679194267946465, "grad_norm": 2.8223671913146973, "learning_rate": 2.9784795920838837e-05, "loss": 0.2659912109375, "step": 1284 }, { "epoch": 0.0868595376503988, "grad_norm": 3.4617607593536377, "learning_rate": 2.9784238655215627e-05, "loss": 0.2489471435546875, "step": 1285 }, { "epoch": 0.08692713262133298, "grad_norm": 1.8310226202011108, "learning_rate": 2.978368067423901e-05, "loss": 0.28082275390625, "step": 1286 }, { "epoch": 0.08699472759226713, "grad_norm": 2.374998092651367, "learning_rate": 2.978312197793598e-05, "loss": 0.2890625, "step": 1287 }, { "epoch": 0.0870623225632013, "grad_norm": 2.0604023933410645, "learning_rate": 2.9782562566333575e-05, "loss": 0.248016357421875, "step": 1288 }, { "epoch": 0.08712991753413546, "grad_norm": 2.349299907684326, "learning_rate": 2.978200243945886e-05, "loss": 0.3271484375, "step": 1289 }, { "epoch": 0.08719751250506962, "grad_norm": 2.364994764328003, "learning_rate": 2.9781441597338942e-05, "loss": 0.34649658203125, "step": 1290 }, { "epoch": 0.08726510747600379, "grad_norm": 1.4262628555297852, "learning_rate": 2.978088004000095e-05, "loss": 0.2818603515625, "step": 1291 }, { "epoch": 0.08733270244693794, "grad_norm": 2.5649778842926025, "learning_rate": 2.9780317767472064e-05, "loss": 0.35430908203125, "step": 1292 }, { "epoch": 0.08740029741787211, "grad_norm": 1.9316438436508179, "learning_rate": 2.9779754779779487e-05, "loss": 0.27398681640625, "step": 1293 }, { "epoch": 0.08746789238880627, "grad_norm": 4.190128326416016, "learning_rate": 2.9779191076950456e-05, "loss": 0.270416259765625, "step": 1294 }, { "epoch": 0.08753548735974044, "grad_norm": 4.184516429901123, "learning_rate": 2.9778626659012254e-05, "loss": 0.40631103515625, "step": 1295 }, { "epoch": 0.0876030823306746, "grad_norm": 1.3656257390975952, "learning_rate": 2.9778061525992182e-05, "loss": 0.249267578125, "step": 1296 }, { "epoch": 0.08767067730160875, "grad_norm": 1.6415821313858032, "learning_rate": 2.9777495677917592e-05, "loss": 0.1088409423828125, "step": 1297 }, { "epoch": 0.08773827227254292, "grad_norm": 3.2956271171569824, "learning_rate": 2.9776929114815864e-05, "loss": 0.3800048828125, "step": 1298 }, { "epoch": 0.08780586724347708, "grad_norm": 2.566476345062256, "learning_rate": 2.9776361836714404e-05, "loss": 0.33807373046875, "step": 1299 }, { "epoch": 0.08787346221441125, "grad_norm": 2.5349643230438232, "learning_rate": 2.977579384364067e-05, "loss": 0.229156494140625, "step": 1300 }, { "epoch": 0.08794105718534541, "grad_norm": 0.767652153968811, "learning_rate": 2.9775225135622136e-05, "loss": 0.1861572265625, "step": 1301 }, { "epoch": 0.08800865215627958, "grad_norm": 1.5497061014175415, "learning_rate": 2.9774655712686324e-05, "loss": 0.36077880859375, "step": 1302 }, { "epoch": 0.08807624712721374, "grad_norm": 1.9150209426879883, "learning_rate": 2.9774085574860786e-05, "loss": 0.141815185546875, "step": 1303 }, { "epoch": 0.08814384209814789, "grad_norm": 1.382738709449768, "learning_rate": 2.977351472217311e-05, "loss": 0.314605712890625, "step": 1304 }, { "epoch": 0.08821143706908206, "grad_norm": 2.540975332260132, "learning_rate": 2.9772943154650918e-05, "loss": 0.29290771484375, "step": 1305 }, { "epoch": 0.08827903204001622, "grad_norm": 1.0054810047149658, "learning_rate": 2.977237087232186e-05, "loss": 0.2314453125, "step": 1306 }, { "epoch": 0.08834662701095039, "grad_norm": 1.1441068649291992, "learning_rate": 2.9771797875213635e-05, "loss": 0.1996307373046875, "step": 1307 }, { "epoch": 0.08841422198188455, "grad_norm": 2.6542844772338867, "learning_rate": 2.9771224163353963e-05, "loss": 0.38604736328125, "step": 1308 }, { "epoch": 0.0884818169528187, "grad_norm": 2.7624168395996094, "learning_rate": 2.9770649736770603e-05, "loss": 0.206573486328125, "step": 1309 }, { "epoch": 0.08854941192375287, "grad_norm": 1.978450059890747, "learning_rate": 2.977007459549135e-05, "loss": 0.28948974609375, "step": 1310 }, { "epoch": 0.08861700689468703, "grad_norm": 1.9739916324615479, "learning_rate": 2.9769498739544037e-05, "loss": 0.2808837890625, "step": 1311 }, { "epoch": 0.0886846018656212, "grad_norm": 1.8646303415298462, "learning_rate": 2.9768922168956523e-05, "loss": 0.1806640625, "step": 1312 }, { "epoch": 0.08875219683655536, "grad_norm": 2.3403003215789795, "learning_rate": 2.9768344883756706e-05, "loss": 0.2528839111328125, "step": 1313 }, { "epoch": 0.08881979180748953, "grad_norm": 2.9221675395965576, "learning_rate": 2.9767766883972525e-05, "loss": 0.3118896484375, "step": 1314 }, { "epoch": 0.08888738677842369, "grad_norm": 1.5028064250946045, "learning_rate": 2.9767188169631937e-05, "loss": 0.231201171875, "step": 1315 }, { "epoch": 0.08895498174935784, "grad_norm": 2.382035255432129, "learning_rate": 2.976660874076295e-05, "loss": 0.239410400390625, "step": 1316 }, { "epoch": 0.08902257672029201, "grad_norm": 0.8940204977989197, "learning_rate": 2.9766028597393604e-05, "loss": 0.1221466064453125, "step": 1317 }, { "epoch": 0.08909017169122617, "grad_norm": 1.5572103261947632, "learning_rate": 2.976544773955196e-05, "loss": 0.36083984375, "step": 1318 }, { "epoch": 0.08915776666216034, "grad_norm": 1.1751738786697388, "learning_rate": 2.976486616726613e-05, "loss": 0.20770263671875, "step": 1319 }, { "epoch": 0.0892253616330945, "grad_norm": 2.465233564376831, "learning_rate": 2.976428388056426e-05, "loss": 0.326141357421875, "step": 1320 }, { "epoch": 0.08929295660402867, "grad_norm": 2.581268072128296, "learning_rate": 2.976370087947451e-05, "loss": 0.2100830078125, "step": 1321 }, { "epoch": 0.08936055157496282, "grad_norm": 4.497420310974121, "learning_rate": 2.97631171640251e-05, "loss": 0.2691650390625, "step": 1322 }, { "epoch": 0.08942814654589698, "grad_norm": 1.4153389930725098, "learning_rate": 2.9762532734244272e-05, "loss": 0.210205078125, "step": 1323 }, { "epoch": 0.08949574151683115, "grad_norm": 2.8274011611938477, "learning_rate": 2.9761947590160298e-05, "loss": 0.30908203125, "step": 1324 }, { "epoch": 0.08956333648776531, "grad_norm": 1.9243011474609375, "learning_rate": 2.97613617318015e-05, "loss": 0.34295654296875, "step": 1325 }, { "epoch": 0.08963093145869948, "grad_norm": 2.1652278900146484, "learning_rate": 2.9760775159196224e-05, "loss": 0.31884765625, "step": 1326 }, { "epoch": 0.08969852642963363, "grad_norm": 1.5023813247680664, "learning_rate": 2.9760187872372847e-05, "loss": 0.203704833984375, "step": 1327 }, { "epoch": 0.08976612140056779, "grad_norm": 2.2813687324523926, "learning_rate": 2.9759599871359787e-05, "loss": 0.2853240966796875, "step": 1328 }, { "epoch": 0.08983371637150196, "grad_norm": 3.9689245223999023, "learning_rate": 2.9759011156185497e-05, "loss": 0.265625, "step": 1329 }, { "epoch": 0.08990131134243612, "grad_norm": 1.7313703298568726, "learning_rate": 2.975842172687846e-05, "loss": 0.302337646484375, "step": 1330 }, { "epoch": 0.08996890631337029, "grad_norm": 1.2855379581451416, "learning_rate": 2.97578315834672e-05, "loss": 0.26373291015625, "step": 1331 }, { "epoch": 0.09003650128430445, "grad_norm": 3.389946222305298, "learning_rate": 2.975724072598027e-05, "loss": 0.3682861328125, "step": 1332 }, { "epoch": 0.09010409625523862, "grad_norm": 1.6537845134735107, "learning_rate": 2.975664915444626e-05, "loss": 0.271026611328125, "step": 1333 }, { "epoch": 0.09017169122617277, "grad_norm": 2.200878858566284, "learning_rate": 2.975605686889379e-05, "loss": 0.267852783203125, "step": 1334 }, { "epoch": 0.09023928619710693, "grad_norm": 4.407474040985107, "learning_rate": 2.9755463869351526e-05, "loss": 0.1910400390625, "step": 1335 }, { "epoch": 0.0903068811680411, "grad_norm": 1.166464924812317, "learning_rate": 2.9754870155848157e-05, "loss": 0.1305999755859375, "step": 1336 }, { "epoch": 0.09037447613897526, "grad_norm": 1.775310754776001, "learning_rate": 2.9754275728412406e-05, "loss": 0.36517333984375, "step": 1337 }, { "epoch": 0.09044207110990943, "grad_norm": 2.0435755252838135, "learning_rate": 2.975368058707304e-05, "loss": 0.2921142578125, "step": 1338 }, { "epoch": 0.09050966608084358, "grad_norm": 1.7368993759155273, "learning_rate": 2.975308473185886e-05, "loss": 0.13330078125, "step": 1339 }, { "epoch": 0.09057726105177774, "grad_norm": 2.3274688720703125, "learning_rate": 2.975248816279869e-05, "loss": 0.3021240234375, "step": 1340 }, { "epoch": 0.09064485602271191, "grad_norm": 3.0587236881256104, "learning_rate": 2.9751890879921395e-05, "loss": 0.13311004638671875, "step": 1341 }, { "epoch": 0.09071245099364607, "grad_norm": 1.5396029949188232, "learning_rate": 2.9751292883255883e-05, "loss": 0.320281982421875, "step": 1342 }, { "epoch": 0.09078004596458024, "grad_norm": 2.8449954986572266, "learning_rate": 2.975069417283108e-05, "loss": 0.253265380859375, "step": 1343 }, { "epoch": 0.0908476409355144, "grad_norm": 0.9403058886528015, "learning_rate": 2.975009474867596e-05, "loss": 0.225830078125, "step": 1344 }, { "epoch": 0.09091523590644857, "grad_norm": 1.5178022384643555, "learning_rate": 2.9749494610819528e-05, "loss": 0.11773681640625, "step": 1345 }, { "epoch": 0.09098283087738272, "grad_norm": 1.148185133934021, "learning_rate": 2.9748893759290817e-05, "loss": 0.23712158203125, "step": 1346 }, { "epoch": 0.09105042584831688, "grad_norm": 1.473791480064392, "learning_rate": 2.974829219411891e-05, "loss": 0.20196533203125, "step": 1347 }, { "epoch": 0.09111802081925105, "grad_norm": 1.9655165672302246, "learning_rate": 2.9747689915332903e-05, "loss": 0.268218994140625, "step": 1348 }, { "epoch": 0.0911856157901852, "grad_norm": 2.406048059463501, "learning_rate": 2.9747086922961943e-05, "loss": 0.312530517578125, "step": 1349 }, { "epoch": 0.09125321076111938, "grad_norm": 3.087674617767334, "learning_rate": 2.974648321703521e-05, "loss": 0.26617431640625, "step": 1350 }, { "epoch": 0.09132080573205353, "grad_norm": 4.311401844024658, "learning_rate": 2.9745878797581906e-05, "loss": 0.33892822265625, "step": 1351 }, { "epoch": 0.0913884007029877, "grad_norm": 3.792572259902954, "learning_rate": 2.9745273664631284e-05, "loss": 0.2333984375, "step": 1352 }, { "epoch": 0.09145599567392186, "grad_norm": 2.8375892639160156, "learning_rate": 2.9744667818212624e-05, "loss": 0.21321868896484375, "step": 1353 }, { "epoch": 0.09152359064485602, "grad_norm": 7.3776469230651855, "learning_rate": 2.9744061258355236e-05, "loss": 0.436981201171875, "step": 1354 }, { "epoch": 0.09159118561579019, "grad_norm": 2.5318362712860107, "learning_rate": 2.9743453985088478e-05, "loss": 0.2767333984375, "step": 1355 }, { "epoch": 0.09165878058672434, "grad_norm": 2.5458433628082275, "learning_rate": 2.9742845998441722e-05, "loss": 0.35772705078125, "step": 1356 }, { "epoch": 0.09172637555765852, "grad_norm": 5.6714935302734375, "learning_rate": 2.9742237298444398e-05, "loss": 0.41455078125, "step": 1357 }, { "epoch": 0.09179397052859267, "grad_norm": 5.694380283355713, "learning_rate": 2.9741627885125944e-05, "loss": 0.269256591796875, "step": 1358 }, { "epoch": 0.09186156549952683, "grad_norm": 4.108724117279053, "learning_rate": 2.974101775851586e-05, "loss": 0.27374267578125, "step": 1359 }, { "epoch": 0.091929160470461, "grad_norm": 5.0373053550720215, "learning_rate": 2.9740406918643664e-05, "loss": 0.3206787109375, "step": 1360 }, { "epoch": 0.09199675544139516, "grad_norm": 6.326249122619629, "learning_rate": 2.9739795365538914e-05, "loss": 0.35455322265625, "step": 1361 }, { "epoch": 0.09206435041232933, "grad_norm": 2.146930456161499, "learning_rate": 2.9739183099231192e-05, "loss": 0.4134521484375, "step": 1362 }, { "epoch": 0.09213194538326348, "grad_norm": 3.3237805366516113, "learning_rate": 2.973857011975013e-05, "loss": 0.2930908203125, "step": 1363 }, { "epoch": 0.09219954035419765, "grad_norm": 1.5417044162750244, "learning_rate": 2.9737956427125395e-05, "loss": 0.241302490234375, "step": 1364 }, { "epoch": 0.09226713532513181, "grad_norm": 1.0258005857467651, "learning_rate": 2.973734202138667e-05, "loss": 0.188873291015625, "step": 1365 }, { "epoch": 0.09233473029606597, "grad_norm": 4.839184761047363, "learning_rate": 2.9736726902563686e-05, "loss": 0.32269287109375, "step": 1366 }, { "epoch": 0.09240232526700014, "grad_norm": 4.443154811859131, "learning_rate": 2.973611107068621e-05, "loss": 0.33941650390625, "step": 1367 }, { "epoch": 0.0924699202379343, "grad_norm": 4.761115074157715, "learning_rate": 2.9735494525784034e-05, "loss": 0.29193115234375, "step": 1368 }, { "epoch": 0.09253751520886846, "grad_norm": 4.463099002838135, "learning_rate": 2.9734877267886996e-05, "loss": 0.3282470703125, "step": 1369 }, { "epoch": 0.09260511017980262, "grad_norm": 3.431645631790161, "learning_rate": 2.9734259297024963e-05, "loss": 0.342529296875, "step": 1370 }, { "epoch": 0.09267270515073678, "grad_norm": 1.365813136100769, "learning_rate": 2.9733640613227827e-05, "loss": 0.255615234375, "step": 1371 }, { "epoch": 0.09274030012167095, "grad_norm": 1.5167032480239868, "learning_rate": 2.9733021216525537e-05, "loss": 0.3157958984375, "step": 1372 }, { "epoch": 0.0928078950926051, "grad_norm": 2.0550310611724854, "learning_rate": 2.9732401106948053e-05, "loss": 0.317352294921875, "step": 1373 }, { "epoch": 0.09287549006353928, "grad_norm": 1.9142835140228271, "learning_rate": 2.9731780284525385e-05, "loss": 0.253662109375, "step": 1374 }, { "epoch": 0.09294308503447343, "grad_norm": 4.662818908691406, "learning_rate": 2.9731158749287574e-05, "loss": 0.26849365234375, "step": 1375 }, { "epoch": 0.0930106800054076, "grad_norm": 1.9648661613464355, "learning_rate": 2.9730536501264684e-05, "loss": 0.35540771484375, "step": 1376 }, { "epoch": 0.09307827497634176, "grad_norm": 2.3279635906219482, "learning_rate": 2.9729913540486835e-05, "loss": 0.2838134765625, "step": 1377 }, { "epoch": 0.09314586994727592, "grad_norm": 2.574065923690796, "learning_rate": 2.972928986698416e-05, "loss": 0.324737548828125, "step": 1378 }, { "epoch": 0.09321346491821009, "grad_norm": 1.613362431526184, "learning_rate": 2.9728665480786848e-05, "loss": 0.37457275390625, "step": 1379 }, { "epoch": 0.09328105988914424, "grad_norm": 2.396097421646118, "learning_rate": 2.97280403819251e-05, "loss": 0.37255859375, "step": 1380 }, { "epoch": 0.09334865486007841, "grad_norm": 1.3603302240371704, "learning_rate": 2.9727414570429162e-05, "loss": 0.10836029052734375, "step": 1381 }, { "epoch": 0.09341624983101257, "grad_norm": 1.657780647277832, "learning_rate": 2.9726788046329323e-05, "loss": 0.3084716796875, "step": 1382 }, { "epoch": 0.09348384480194674, "grad_norm": 1.5966973304748535, "learning_rate": 2.972616080965589e-05, "loss": 0.33740234375, "step": 1383 }, { "epoch": 0.0935514397728809, "grad_norm": 1.0570944547653198, "learning_rate": 2.972553286043922e-05, "loss": 0.212799072265625, "step": 1384 }, { "epoch": 0.09361903474381505, "grad_norm": 1.303904414176941, "learning_rate": 2.972490419870969e-05, "loss": 0.21539306640625, "step": 1385 }, { "epoch": 0.09368662971474923, "grad_norm": 2.3358864784240723, "learning_rate": 2.9724274824497722e-05, "loss": 0.149383544921875, "step": 1386 }, { "epoch": 0.09375422468568338, "grad_norm": 1.7000372409820557, "learning_rate": 2.972364473783377e-05, "loss": 0.239532470703125, "step": 1387 }, { "epoch": 0.09382181965661755, "grad_norm": 3.221896171569824, "learning_rate": 2.972301393874832e-05, "loss": 0.3846435546875, "step": 1388 }, { "epoch": 0.09388941462755171, "grad_norm": 3.0556483268737793, "learning_rate": 2.9722382427271895e-05, "loss": 0.349365234375, "step": 1389 }, { "epoch": 0.09395700959848587, "grad_norm": 0.9315555691719055, "learning_rate": 2.9721750203435044e-05, "loss": 0.1621551513671875, "step": 1390 }, { "epoch": 0.09402460456942004, "grad_norm": 2.0184850692749023, "learning_rate": 2.9721117267268374e-05, "loss": 0.227569580078125, "step": 1391 }, { "epoch": 0.0940921995403542, "grad_norm": 1.6451077461242676, "learning_rate": 2.9720483618802496e-05, "loss": 0.244842529296875, "step": 1392 }, { "epoch": 0.09415979451128836, "grad_norm": 0.9487246870994568, "learning_rate": 2.9719849258068076e-05, "loss": 0.131439208984375, "step": 1393 }, { "epoch": 0.09422738948222252, "grad_norm": 1.783403754234314, "learning_rate": 2.9719214185095805e-05, "loss": 0.37567138671875, "step": 1394 }, { "epoch": 0.09429498445315669, "grad_norm": 1.8391531705856323, "learning_rate": 2.9718578399916415e-05, "loss": 0.337860107421875, "step": 1395 }, { "epoch": 0.09436257942409085, "grad_norm": 2.638192653656006, "learning_rate": 2.9717941902560667e-05, "loss": 0.343017578125, "step": 1396 }, { "epoch": 0.094430174395025, "grad_norm": 2.2230746746063232, "learning_rate": 2.9717304693059364e-05, "loss": 0.2857666015625, "step": 1397 }, { "epoch": 0.09449776936595917, "grad_norm": 2.133082866668701, "learning_rate": 2.971666677144333e-05, "loss": 0.245819091796875, "step": 1398 }, { "epoch": 0.09456536433689333, "grad_norm": 1.178558111190796, "learning_rate": 2.9716028137743442e-05, "loss": 0.269927978515625, "step": 1399 }, { "epoch": 0.0946329593078275, "grad_norm": 2.0937085151672363, "learning_rate": 2.971538879199059e-05, "loss": 0.30706787109375, "step": 1400 }, { "epoch": 0.09470055427876166, "grad_norm": 1.9111181497573853, "learning_rate": 2.9714748734215714e-05, "loss": 0.32476806640625, "step": 1401 }, { "epoch": 0.09476814924969583, "grad_norm": 2.473444700241089, "learning_rate": 2.9714107964449782e-05, "loss": 0.274658203125, "step": 1402 }, { "epoch": 0.09483574422062999, "grad_norm": 3.0832016468048096, "learning_rate": 2.9713466482723802e-05, "loss": 0.38525390625, "step": 1403 }, { "epoch": 0.09490333919156414, "grad_norm": 2.142582416534424, "learning_rate": 2.9712824289068814e-05, "loss": 0.3016357421875, "step": 1404 }, { "epoch": 0.09497093416249831, "grad_norm": 2.9867208003997803, "learning_rate": 2.9712181383515886e-05, "loss": 0.303436279296875, "step": 1405 }, { "epoch": 0.09503852913343247, "grad_norm": 3.14310359954834, "learning_rate": 2.9711537766096133e-05, "loss": 0.327392578125, "step": 1406 }, { "epoch": 0.09510612410436664, "grad_norm": 4.519619464874268, "learning_rate": 2.971089343684069e-05, "loss": 0.2822265625, "step": 1407 }, { "epoch": 0.0951737190753008, "grad_norm": 2.7214884757995605, "learning_rate": 2.9710248395780732e-05, "loss": 0.34368896484375, "step": 1408 }, { "epoch": 0.09524131404623495, "grad_norm": 2.0934112071990967, "learning_rate": 2.970960264294748e-05, "loss": 0.253662109375, "step": 1409 }, { "epoch": 0.09530890901716912, "grad_norm": 5.1465067863464355, "learning_rate": 2.970895617837217e-05, "loss": 0.32861328125, "step": 1410 }, { "epoch": 0.09537650398810328, "grad_norm": 2.908221960067749, "learning_rate": 2.9708309002086092e-05, "loss": 0.30987548828125, "step": 1411 }, { "epoch": 0.09544409895903745, "grad_norm": 3.354797601699829, "learning_rate": 2.9707661114120553e-05, "loss": 0.3060760498046875, "step": 1412 }, { "epoch": 0.09551169392997161, "grad_norm": 1.3339813947677612, "learning_rate": 2.97070125145069e-05, "loss": 0.20684814453125, "step": 1413 }, { "epoch": 0.09557928890090578, "grad_norm": 4.969995975494385, "learning_rate": 2.970636320327652e-05, "loss": 0.37567138671875, "step": 1414 }, { "epoch": 0.09564688387183994, "grad_norm": 2.3638370037078857, "learning_rate": 2.9705713180460835e-05, "loss": 0.2105712890625, "step": 1415 }, { "epoch": 0.09571447884277409, "grad_norm": 1.574220061302185, "learning_rate": 2.9705062446091288e-05, "loss": 0.35552978515625, "step": 1416 }, { "epoch": 0.09578207381370826, "grad_norm": 1.4166442155838013, "learning_rate": 2.970441100019937e-05, "loss": 0.2652587890625, "step": 1417 }, { "epoch": 0.09584966878464242, "grad_norm": 3.4280052185058594, "learning_rate": 2.9703758842816604e-05, "loss": 0.33477783203125, "step": 1418 }, { "epoch": 0.09591726375557659, "grad_norm": 5.841675758361816, "learning_rate": 2.9703105973974543e-05, "loss": 0.3463134765625, "step": 1419 }, { "epoch": 0.09598485872651075, "grad_norm": 3.386040687561035, "learning_rate": 2.9702452393704777e-05, "loss": 0.24053955078125, "step": 1420 }, { "epoch": 0.0960524536974449, "grad_norm": 2.5822389125823975, "learning_rate": 2.970179810203893e-05, "loss": 0.35943603515625, "step": 1421 }, { "epoch": 0.09612004866837907, "grad_norm": 2.0088322162628174, "learning_rate": 2.970114309900866e-05, "loss": 0.268798828125, "step": 1422 }, { "epoch": 0.09618764363931323, "grad_norm": 6.6840410232543945, "learning_rate": 2.9700487384645667e-05, "loss": 0.376312255859375, "step": 1423 }, { "epoch": 0.0962552386102474, "grad_norm": 3.0199294090270996, "learning_rate": 2.9699830958981672e-05, "loss": 0.32025146484375, "step": 1424 }, { "epoch": 0.09632283358118156, "grad_norm": 4.270359039306641, "learning_rate": 2.969917382204843e-05, "loss": 0.3243408203125, "step": 1425 }, { "epoch": 0.09639042855211573, "grad_norm": 1.5337227582931519, "learning_rate": 2.9698515973877752e-05, "loss": 0.260040283203125, "step": 1426 }, { "epoch": 0.09645802352304989, "grad_norm": 1.1690140962600708, "learning_rate": 2.9697857414501463e-05, "loss": 0.24822998046875, "step": 1427 }, { "epoch": 0.09652561849398404, "grad_norm": 1.0478949546813965, "learning_rate": 2.9697198143951425e-05, "loss": 0.18463134765625, "step": 1428 }, { "epoch": 0.09659321346491821, "grad_norm": 2.382619619369507, "learning_rate": 2.969653816225954e-05, "loss": 0.342315673828125, "step": 1429 }, { "epoch": 0.09666080843585237, "grad_norm": 1.5388233661651611, "learning_rate": 2.969587746945774e-05, "loss": 0.269500732421875, "step": 1430 }, { "epoch": 0.09672840340678654, "grad_norm": 2.286345958709717, "learning_rate": 2.9695216065577996e-05, "loss": 0.294281005859375, "step": 1431 }, { "epoch": 0.0967959983777207, "grad_norm": 2.3686935901641846, "learning_rate": 2.9694553950652317e-05, "loss": 0.203704833984375, "step": 1432 }, { "epoch": 0.09686359334865487, "grad_norm": 1.4312090873718262, "learning_rate": 2.9693891124712726e-05, "loss": 0.243988037109375, "step": 1433 }, { "epoch": 0.09693118831958902, "grad_norm": 1.630202054977417, "learning_rate": 2.96932275877913e-05, "loss": 0.269989013671875, "step": 1434 }, { "epoch": 0.09699878329052318, "grad_norm": 1.6655956506729126, "learning_rate": 2.9692563339920152e-05, "loss": 0.3258056640625, "step": 1435 }, { "epoch": 0.09706637826145735, "grad_norm": 1.3231350183486938, "learning_rate": 2.9691898381131417e-05, "loss": 0.218048095703125, "step": 1436 }, { "epoch": 0.09713397323239151, "grad_norm": 1.1816771030426025, "learning_rate": 2.9691232711457273e-05, "loss": 0.10687255859375, "step": 1437 }, { "epoch": 0.09720156820332568, "grad_norm": 2.2172980308532715, "learning_rate": 2.969056633092992e-05, "loss": 0.322265625, "step": 1438 }, { "epoch": 0.09726916317425983, "grad_norm": 1.5714248418807983, "learning_rate": 2.968989923958161e-05, "loss": 0.290435791015625, "step": 1439 }, { "epoch": 0.09733675814519399, "grad_norm": 1.2940678596496582, "learning_rate": 2.9689231437444624e-05, "loss": 0.143524169921875, "step": 1440 }, { "epoch": 0.09740435311612816, "grad_norm": 1.3292138576507568, "learning_rate": 2.9688562924551266e-05, "loss": 0.1902923583984375, "step": 1441 }, { "epoch": 0.09747194808706232, "grad_norm": 2.866523027420044, "learning_rate": 2.968789370093389e-05, "loss": 0.1907501220703125, "step": 1442 }, { "epoch": 0.09753954305799649, "grad_norm": 1.6680091619491577, "learning_rate": 2.968722376662487e-05, "loss": 0.235626220703125, "step": 1443 }, { "epoch": 0.09760713802893065, "grad_norm": 3.820631742477417, "learning_rate": 2.9686553121656624e-05, "loss": 0.351318359375, "step": 1444 }, { "epoch": 0.09767473299986482, "grad_norm": 1.5913323163986206, "learning_rate": 2.968588176606161e-05, "loss": 0.2374267578125, "step": 1445 }, { "epoch": 0.09774232797079897, "grad_norm": 3.1754040718078613, "learning_rate": 2.9685209699872302e-05, "loss": 0.29901123046875, "step": 1446 }, { "epoch": 0.09780992294173313, "grad_norm": 2.3455162048339844, "learning_rate": 2.968453692312122e-05, "loss": 0.29052734375, "step": 1447 }, { "epoch": 0.0978775179126673, "grad_norm": 1.9295132160186768, "learning_rate": 2.9683863435840923e-05, "loss": 0.30145263671875, "step": 1448 }, { "epoch": 0.09794511288360146, "grad_norm": 3.566922903060913, "learning_rate": 2.968318923806399e-05, "loss": 0.25311279296875, "step": 1449 }, { "epoch": 0.09801270785453563, "grad_norm": 1.5800526142120361, "learning_rate": 2.9682514329823053e-05, "loss": 0.25677490234375, "step": 1450 }, { "epoch": 0.09808030282546978, "grad_norm": 5.604096412658691, "learning_rate": 2.9681838711150758e-05, "loss": 0.38311767578125, "step": 1451 }, { "epoch": 0.09814789779640394, "grad_norm": 1.0388859510421753, "learning_rate": 2.9681162382079804e-05, "loss": 0.1293792724609375, "step": 1452 }, { "epoch": 0.09821549276733811, "grad_norm": 1.463148593902588, "learning_rate": 2.9680485342642913e-05, "loss": 0.2438201904296875, "step": 1453 }, { "epoch": 0.09828308773827227, "grad_norm": 1.5040289163589478, "learning_rate": 2.9679807592872847e-05, "loss": 0.260467529296875, "step": 1454 }, { "epoch": 0.09835068270920644, "grad_norm": 2.760204792022705, "learning_rate": 2.9679129132802394e-05, "loss": 0.2307891845703125, "step": 1455 }, { "epoch": 0.0984182776801406, "grad_norm": 1.4651379585266113, "learning_rate": 2.9678449962464386e-05, "loss": 0.2232208251953125, "step": 1456 }, { "epoch": 0.09848587265107477, "grad_norm": 1.9546699523925781, "learning_rate": 2.967777008189168e-05, "loss": 0.3507080078125, "step": 1457 }, { "epoch": 0.09855346762200892, "grad_norm": 2.4538941383361816, "learning_rate": 2.9677089491117185e-05, "loss": 0.352264404296875, "step": 1458 }, { "epoch": 0.09862106259294308, "grad_norm": 2.478516101837158, "learning_rate": 2.967640819017382e-05, "loss": 0.2630615234375, "step": 1459 }, { "epoch": 0.09868865756387725, "grad_norm": 2.682832717895508, "learning_rate": 2.9675726179094555e-05, "loss": 0.306243896484375, "step": 1460 }, { "epoch": 0.0987562525348114, "grad_norm": 2.227734088897705, "learning_rate": 2.967504345791239e-05, "loss": 0.34613037109375, "step": 1461 }, { "epoch": 0.09882384750574558, "grad_norm": 1.1182780265808105, "learning_rate": 2.967436002666036e-05, "loss": 0.19549560546875, "step": 1462 }, { "epoch": 0.09889144247667973, "grad_norm": 4.100792407989502, "learning_rate": 2.9673675885371534e-05, "loss": 0.34747314453125, "step": 1463 }, { "epoch": 0.0989590374476139, "grad_norm": 1.2973765134811401, "learning_rate": 2.9672991034079013e-05, "loss": 0.283660888671875, "step": 1464 }, { "epoch": 0.09902663241854806, "grad_norm": 1.4997172355651855, "learning_rate": 2.9672305472815933e-05, "loss": 0.3330078125, "step": 1465 }, { "epoch": 0.09909422738948222, "grad_norm": 3.1027064323425293, "learning_rate": 2.9671619201615472e-05, "loss": 0.2974853515625, "step": 1466 }, { "epoch": 0.09916182236041639, "grad_norm": 2.554253578186035, "learning_rate": 2.967093222051083e-05, "loss": 0.2503662109375, "step": 1467 }, { "epoch": 0.09922941733135054, "grad_norm": 2.6665496826171875, "learning_rate": 2.967024452953525e-05, "loss": 0.3328857421875, "step": 1468 }, { "epoch": 0.09929701230228472, "grad_norm": 1.0742108821868896, "learning_rate": 2.966955612872201e-05, "loss": 0.1740570068359375, "step": 1469 }, { "epoch": 0.09936460727321887, "grad_norm": 1.182721495628357, "learning_rate": 2.9668867018104407e-05, "loss": 0.1920928955078125, "step": 1470 }, { "epoch": 0.09943220224415303, "grad_norm": 2.1731836795806885, "learning_rate": 2.96681771977158e-05, "loss": 0.35137939453125, "step": 1471 }, { "epoch": 0.0994997972150872, "grad_norm": 1.913387417793274, "learning_rate": 2.9667486667589558e-05, "loss": 0.2752685546875, "step": 1472 }, { "epoch": 0.09956739218602136, "grad_norm": 3.639646530151367, "learning_rate": 2.9666795427759097e-05, "loss": 0.282073974609375, "step": 1473 }, { "epoch": 0.09963498715695553, "grad_norm": 2.536520004272461, "learning_rate": 2.966610347825786e-05, "loss": 0.33416748046875, "step": 1474 }, { "epoch": 0.09970258212788968, "grad_norm": 2.887521743774414, "learning_rate": 2.966541081911933e-05, "loss": 0.236053466796875, "step": 1475 }, { "epoch": 0.09977017709882385, "grad_norm": 2.786384105682373, "learning_rate": 2.966471745037702e-05, "loss": 0.3072509765625, "step": 1476 }, { "epoch": 0.09983777206975801, "grad_norm": 1.7161353826522827, "learning_rate": 2.966402337206448e-05, "loss": 0.30523681640625, "step": 1477 }, { "epoch": 0.09990536704069217, "grad_norm": 1.671047329902649, "learning_rate": 2.9663328584215294e-05, "loss": 0.27178955078125, "step": 1478 }, { "epoch": 0.09997296201162634, "grad_norm": 2.751190662384033, "learning_rate": 2.9662633086863083e-05, "loss": 0.34613037109375, "step": 1479 }, { "epoch": 0.1000405569825605, "grad_norm": 3.584350347518921, "learning_rate": 2.96619368800415e-05, "loss": 0.3712158203125, "step": 1480 }, { "epoch": 0.10010815195349466, "grad_norm": 4.0109710693359375, "learning_rate": 2.966123996378422e-05, "loss": 0.2960205078125, "step": 1481 }, { "epoch": 0.10017574692442882, "grad_norm": 4.6199517250061035, "learning_rate": 2.966054233812498e-05, "loss": 0.37554931640625, "step": 1482 }, { "epoch": 0.10024334189536299, "grad_norm": 2.0339174270629883, "learning_rate": 2.965984400309753e-05, "loss": 0.254730224609375, "step": 1483 }, { "epoch": 0.10031093686629715, "grad_norm": 2.7995681762695312, "learning_rate": 2.9659144958735653e-05, "loss": 0.291290283203125, "step": 1484 }, { "epoch": 0.1003785318372313, "grad_norm": 2.6493043899536133, "learning_rate": 2.9658445205073185e-05, "loss": 0.35284423828125, "step": 1485 }, { "epoch": 0.10044612680816548, "grad_norm": 2.297612190246582, "learning_rate": 2.9657744742143972e-05, "loss": 0.31768798828125, "step": 1486 }, { "epoch": 0.10051372177909963, "grad_norm": 2.0967540740966797, "learning_rate": 2.9657043569981916e-05, "loss": 0.34820556640625, "step": 1487 }, { "epoch": 0.1005813167500338, "grad_norm": 1.3494675159454346, "learning_rate": 2.9656341688620942e-05, "loss": 0.22161865234375, "step": 1488 }, { "epoch": 0.10064891172096796, "grad_norm": 1.4680203199386597, "learning_rate": 2.9655639098095012e-05, "loss": 0.26214599609375, "step": 1489 }, { "epoch": 0.10071650669190212, "grad_norm": 2.4452903270721436, "learning_rate": 2.9654935798438117e-05, "loss": 0.3485107421875, "step": 1490 }, { "epoch": 0.10078410166283629, "grad_norm": 3.152545928955078, "learning_rate": 2.965423178968429e-05, "loss": 0.261688232421875, "step": 1491 }, { "epoch": 0.10085169663377044, "grad_norm": 4.602622985839844, "learning_rate": 2.965352707186759e-05, "loss": 0.330596923828125, "step": 1492 }, { "epoch": 0.10091929160470461, "grad_norm": 3.641401529312134, "learning_rate": 2.9652821645022128e-05, "loss": 0.27178955078125, "step": 1493 }, { "epoch": 0.10098688657563877, "grad_norm": 1.663392424583435, "learning_rate": 2.9652115509182033e-05, "loss": 0.286102294921875, "step": 1494 }, { "epoch": 0.10105448154657294, "grad_norm": 2.78033709526062, "learning_rate": 2.965140866438146e-05, "loss": 0.3023681640625, "step": 1495 }, { "epoch": 0.1011220765175071, "grad_norm": 1.8394386768341064, "learning_rate": 2.9650701110654626e-05, "loss": 0.25244140625, "step": 1496 }, { "epoch": 0.10118967148844125, "grad_norm": 2.92082142829895, "learning_rate": 2.9649992848035757e-05, "loss": 0.3045654296875, "step": 1497 }, { "epoch": 0.10125726645937543, "grad_norm": 1.927323579788208, "learning_rate": 2.9649283876559128e-05, "loss": 0.280609130859375, "step": 1498 }, { "epoch": 0.10132486143030958, "grad_norm": 1.8263404369354248, "learning_rate": 2.9648574196259047e-05, "loss": 0.23480224609375, "step": 1499 }, { "epoch": 0.10139245640124375, "grad_norm": 1.507420539855957, "learning_rate": 2.964786380716984e-05, "loss": 0.26690673828125, "step": 1500 }, { "epoch": 0.10146005137217791, "grad_norm": 1.5142865180969238, "learning_rate": 2.964715270932589e-05, "loss": 0.283050537109375, "step": 1501 }, { "epoch": 0.10152764634311207, "grad_norm": 1.170462727546692, "learning_rate": 2.9646440902761607e-05, "loss": 0.2294921875, "step": 1502 }, { "epoch": 0.10159524131404624, "grad_norm": 2.7186992168426514, "learning_rate": 2.9645728387511424e-05, "loss": 0.3543701171875, "step": 1503 }, { "epoch": 0.10166283628498039, "grad_norm": 2.9254510402679443, "learning_rate": 2.964501516360982e-05, "loss": 0.27777099609375, "step": 1504 }, { "epoch": 0.10173043125591456, "grad_norm": 1.5807162523269653, "learning_rate": 2.9644301231091306e-05, "loss": 0.3111572265625, "step": 1505 }, { "epoch": 0.10179802622684872, "grad_norm": 3.0349864959716797, "learning_rate": 2.964358658999043e-05, "loss": 0.328369140625, "step": 1506 }, { "epoch": 0.10186562119778289, "grad_norm": 2.475132703781128, "learning_rate": 2.9642871240341765e-05, "loss": 0.34423828125, "step": 1507 }, { "epoch": 0.10193321616871705, "grad_norm": 1.3964269161224365, "learning_rate": 2.9642155182179927e-05, "loss": 0.2415313720703125, "step": 1508 }, { "epoch": 0.1020008111396512, "grad_norm": 1.7026355266571045, "learning_rate": 2.964143841553956e-05, "loss": 0.212493896484375, "step": 1509 }, { "epoch": 0.10206840611058537, "grad_norm": 2.0803041458129883, "learning_rate": 2.964072094045535e-05, "loss": 0.21483612060546875, "step": 1510 }, { "epoch": 0.10213600108151953, "grad_norm": 1.7061494588851929, "learning_rate": 2.964000275696201e-05, "loss": 0.1638641357421875, "step": 1511 }, { "epoch": 0.1022035960524537, "grad_norm": 3.75416898727417, "learning_rate": 2.9639283865094298e-05, "loss": 0.3538818359375, "step": 1512 }, { "epoch": 0.10227119102338786, "grad_norm": 2.0426673889160156, "learning_rate": 2.9638564264886982e-05, "loss": 0.264251708984375, "step": 1513 }, { "epoch": 0.10233878599432203, "grad_norm": 0.8659037351608276, "learning_rate": 2.96378439563749e-05, "loss": 0.0671844482421875, "step": 1514 }, { "epoch": 0.10240638096525619, "grad_norm": 1.325165033340454, "learning_rate": 2.9637122939592887e-05, "loss": 0.256195068359375, "step": 1515 }, { "epoch": 0.10247397593619034, "grad_norm": 2.0253350734710693, "learning_rate": 2.9636401214575844e-05, "loss": 0.324066162109375, "step": 1516 }, { "epoch": 0.10254157090712451, "grad_norm": 1.8178542852401733, "learning_rate": 2.963567878135869e-05, "loss": 0.2857666015625, "step": 1517 }, { "epoch": 0.10260916587805867, "grad_norm": 2.033752918243408, "learning_rate": 2.9634955639976374e-05, "loss": 0.29864501953125, "step": 1518 }, { "epoch": 0.10267676084899284, "grad_norm": 3.5922768115997314, "learning_rate": 2.963423179046389e-05, "loss": 0.27276611328125, "step": 1519 }, { "epoch": 0.102744355819927, "grad_norm": 4.016036510467529, "learning_rate": 2.9633507232856265e-05, "loss": 0.252197265625, "step": 1520 }, { "epoch": 0.10281195079086115, "grad_norm": 2.273132801055908, "learning_rate": 2.9632781967188552e-05, "loss": 0.32672119140625, "step": 1521 }, { "epoch": 0.10287954576179532, "grad_norm": 1.0549384355545044, "learning_rate": 2.9632055993495848e-05, "loss": 0.211029052734375, "step": 1522 }, { "epoch": 0.10294714073272948, "grad_norm": 1.1877259016036987, "learning_rate": 2.963132931181328e-05, "loss": 0.1954345703125, "step": 1523 }, { "epoch": 0.10301473570366365, "grad_norm": 1.2774566411972046, "learning_rate": 2.963060192217601e-05, "loss": 0.30047607421875, "step": 1524 }, { "epoch": 0.10308233067459781, "grad_norm": 3.317519426345825, "learning_rate": 2.962987382461923e-05, "loss": 0.269134521484375, "step": 1525 }, { "epoch": 0.10314992564553198, "grad_norm": 2.3038265705108643, "learning_rate": 2.9629145019178175e-05, "loss": 0.2733154296875, "step": 1526 }, { "epoch": 0.10321752061646614, "grad_norm": 2.478764533996582, "learning_rate": 2.9628415505888104e-05, "loss": 0.30572509765625, "step": 1527 }, { "epoch": 0.10328511558740029, "grad_norm": 1.5422990322113037, "learning_rate": 2.962768528478432e-05, "loss": 0.17279052734375, "step": 1528 }, { "epoch": 0.10335271055833446, "grad_norm": 1.7052894830703735, "learning_rate": 2.962695435590215e-05, "loss": 0.36468505859375, "step": 1529 }, { "epoch": 0.10342030552926862, "grad_norm": 2.786677837371826, "learning_rate": 2.9626222719276965e-05, "loss": 0.35479736328125, "step": 1530 }, { "epoch": 0.10348790050020279, "grad_norm": 3.1357650756835938, "learning_rate": 2.9625490374944168e-05, "loss": 0.342498779296875, "step": 1531 }, { "epoch": 0.10355549547113695, "grad_norm": 3.71750807762146, "learning_rate": 2.962475732293919e-05, "loss": 0.279571533203125, "step": 1532 }, { "epoch": 0.1036230904420711, "grad_norm": 1.301236867904663, "learning_rate": 2.9624023563297502e-05, "loss": 0.11944580078125, "step": 1533 }, { "epoch": 0.10369068541300527, "grad_norm": 1.389388918876648, "learning_rate": 2.962328909605461e-05, "loss": 0.155853271484375, "step": 1534 }, { "epoch": 0.10375828038393943, "grad_norm": 2.5480878353118896, "learning_rate": 2.962255392124605e-05, "loss": 0.2982177734375, "step": 1535 }, { "epoch": 0.1038258753548736, "grad_norm": 2.018001079559326, "learning_rate": 2.962181803890739e-05, "loss": 0.346343994140625, "step": 1536 }, { "epoch": 0.10389347032580776, "grad_norm": 3.7003352642059326, "learning_rate": 2.9621081449074247e-05, "loss": 0.32342529296875, "step": 1537 }, { "epoch": 0.10396106529674193, "grad_norm": 6.020237922668457, "learning_rate": 2.962034415178225e-05, "loss": 0.3011474609375, "step": 1538 }, { "epoch": 0.10402866026767608, "grad_norm": 6.340640544891357, "learning_rate": 2.9619606147067085e-05, "loss": 0.375244140625, "step": 1539 }, { "epoch": 0.10409625523861024, "grad_norm": 1.7104945182800293, "learning_rate": 2.9618867434964454e-05, "loss": 0.273956298828125, "step": 1540 }, { "epoch": 0.10416385020954441, "grad_norm": 1.7517911195755005, "learning_rate": 2.9618128015510105e-05, "loss": 0.1663055419921875, "step": 1541 }, { "epoch": 0.10423144518047857, "grad_norm": 2.8733017444610596, "learning_rate": 2.961738788873981e-05, "loss": 0.29400634765625, "step": 1542 }, { "epoch": 0.10429904015141274, "grad_norm": 2.2636349201202393, "learning_rate": 2.961664705468939e-05, "loss": 0.258514404296875, "step": 1543 }, { "epoch": 0.1043666351223469, "grad_norm": 3.6276495456695557, "learning_rate": 2.961590551339468e-05, "loss": 0.293670654296875, "step": 1544 }, { "epoch": 0.10443423009328107, "grad_norm": 1.6906450986862183, "learning_rate": 2.961516326489157e-05, "loss": 0.238433837890625, "step": 1545 }, { "epoch": 0.10450182506421522, "grad_norm": 1.692208170890808, "learning_rate": 2.9614420309215968e-05, "loss": 0.1533966064453125, "step": 1546 }, { "epoch": 0.10456942003514938, "grad_norm": 1.4868698120117188, "learning_rate": 2.9613676646403824e-05, "loss": 0.27899169921875, "step": 1547 }, { "epoch": 0.10463701500608355, "grad_norm": 1.0948781967163086, "learning_rate": 2.9612932276491124e-05, "loss": 0.26416015625, "step": 1548 }, { "epoch": 0.1047046099770177, "grad_norm": 1.8015056848526, "learning_rate": 2.9612187199513883e-05, "loss": 0.27850341796875, "step": 1549 }, { "epoch": 0.10477220494795188, "grad_norm": 1.4450711011886597, "learning_rate": 2.9611441415508154e-05, "loss": 0.199127197265625, "step": 1550 }, { "epoch": 0.10483979991888603, "grad_norm": 2.7106051445007324, "learning_rate": 2.961069492451002e-05, "loss": 0.31854248046875, "step": 1551 }, { "epoch": 0.10490739488982019, "grad_norm": 1.735499620437622, "learning_rate": 2.9609947726555603e-05, "loss": 0.2330322265625, "step": 1552 }, { "epoch": 0.10497498986075436, "grad_norm": 1.0091592073440552, "learning_rate": 2.960919982168106e-05, "loss": 0.1566009521484375, "step": 1553 }, { "epoch": 0.10504258483168852, "grad_norm": 3.695739269256592, "learning_rate": 2.9608451209922576e-05, "loss": 0.21368408203125, "step": 1554 }, { "epoch": 0.10511017980262269, "grad_norm": 5.144921779632568, "learning_rate": 2.960770189131637e-05, "loss": 0.259124755859375, "step": 1555 }, { "epoch": 0.10517777477355685, "grad_norm": 1.833191156387329, "learning_rate": 2.9606951865898704e-05, "loss": 0.2087249755859375, "step": 1556 }, { "epoch": 0.10524536974449102, "grad_norm": 2.993055582046509, "learning_rate": 2.9606201133705866e-05, "loss": 0.1676483154296875, "step": 1557 }, { "epoch": 0.10531296471542517, "grad_norm": 3.429743766784668, "learning_rate": 2.9605449694774184e-05, "loss": 0.368438720703125, "step": 1558 }, { "epoch": 0.10538055968635933, "grad_norm": 1.2703837156295776, "learning_rate": 2.9604697549140015e-05, "loss": 0.253509521484375, "step": 1559 }, { "epoch": 0.1054481546572935, "grad_norm": 0.8361709117889404, "learning_rate": 2.9603944696839745e-05, "loss": 0.157928466796875, "step": 1560 }, { "epoch": 0.10551574962822766, "grad_norm": 3.004551649093628, "learning_rate": 2.9603191137909822e-05, "loss": 0.36279296875, "step": 1561 }, { "epoch": 0.10558334459916183, "grad_norm": 1.8974932432174683, "learning_rate": 2.9602436872386688e-05, "loss": 0.237030029296875, "step": 1562 }, { "epoch": 0.10565093957009598, "grad_norm": 2.891677141189575, "learning_rate": 2.9601681900306847e-05, "loss": 0.2808837890625, "step": 1563 }, { "epoch": 0.10571853454103015, "grad_norm": 1.7400617599487305, "learning_rate": 2.9600926221706833e-05, "loss": 0.240203857421875, "step": 1564 }, { "epoch": 0.10578612951196431, "grad_norm": 3.6680426597595215, "learning_rate": 2.9600169836623204e-05, "loss": 0.31591796875, "step": 1565 }, { "epoch": 0.10585372448289847, "grad_norm": 1.7640172243118286, "learning_rate": 2.959941274509256e-05, "loss": 0.39019775390625, "step": 1566 }, { "epoch": 0.10592131945383264, "grad_norm": 4.9715142250061035, "learning_rate": 2.9598654947151536e-05, "loss": 0.3145751953125, "step": 1567 }, { "epoch": 0.1059889144247668, "grad_norm": 2.241635799407959, "learning_rate": 2.9597896442836793e-05, "loss": 0.207977294921875, "step": 1568 }, { "epoch": 0.10605650939570097, "grad_norm": 5.4190545082092285, "learning_rate": 2.9597137232185042e-05, "loss": 0.3697509765625, "step": 1569 }, { "epoch": 0.10612410436663512, "grad_norm": 2.536675214767456, "learning_rate": 2.959637731523301e-05, "loss": 0.2476043701171875, "step": 1570 }, { "epoch": 0.10619169933756928, "grad_norm": 3.7266323566436768, "learning_rate": 2.959561669201747e-05, "loss": 0.27191162109375, "step": 1571 }, { "epoch": 0.10625929430850345, "grad_norm": 3.320629835128784, "learning_rate": 2.959485536257523e-05, "loss": 0.30828857421875, "step": 1572 }, { "epoch": 0.1063268892794376, "grad_norm": 2.24912428855896, "learning_rate": 2.959409332694312e-05, "loss": 0.293670654296875, "step": 1573 }, { "epoch": 0.10639448425037178, "grad_norm": 1.039711833000183, "learning_rate": 2.9593330585158013e-05, "loss": 0.203582763671875, "step": 1574 }, { "epoch": 0.10646207922130593, "grad_norm": 5.008669376373291, "learning_rate": 2.959256713725682e-05, "loss": 0.3927001953125, "step": 1575 }, { "epoch": 0.1065296741922401, "grad_norm": 2.0162911415100098, "learning_rate": 2.9591802983276478e-05, "loss": 0.312896728515625, "step": 1576 }, { "epoch": 0.10659726916317426, "grad_norm": 1.1525897979736328, "learning_rate": 2.959103812325396e-05, "loss": 0.2357025146484375, "step": 1577 }, { "epoch": 0.10666486413410842, "grad_norm": 3.893089771270752, "learning_rate": 2.959027255722628e-05, "loss": 0.2685546875, "step": 1578 }, { "epoch": 0.10673245910504259, "grad_norm": 1.2157186269760132, "learning_rate": 2.958950628523048e-05, "loss": 0.3548583984375, "step": 1579 }, { "epoch": 0.10680005407597674, "grad_norm": 2.8958330154418945, "learning_rate": 2.9588739307303628e-05, "loss": 0.216949462890625, "step": 1580 }, { "epoch": 0.10686764904691091, "grad_norm": 3.4824776649475098, "learning_rate": 2.9587971623482842e-05, "loss": 0.364013671875, "step": 1581 }, { "epoch": 0.10693524401784507, "grad_norm": 1.6720448732376099, "learning_rate": 2.958720323380527e-05, "loss": 0.338897705078125, "step": 1582 }, { "epoch": 0.10700283898877923, "grad_norm": 3.000746965408325, "learning_rate": 2.958643413830809e-05, "loss": 0.289459228515625, "step": 1583 }, { "epoch": 0.1070704339597134, "grad_norm": 2.0668859481811523, "learning_rate": 2.9585664337028516e-05, "loss": 0.33001708984375, "step": 1584 }, { "epoch": 0.10713802893064756, "grad_norm": 1.9164484739303589, "learning_rate": 2.9584893830003793e-05, "loss": 0.277740478515625, "step": 1585 }, { "epoch": 0.10720562390158173, "grad_norm": 1.8718996047973633, "learning_rate": 2.9584122617271202e-05, "loss": 0.24884414672851562, "step": 1586 }, { "epoch": 0.10727321887251588, "grad_norm": 1.2798113822937012, "learning_rate": 2.958335069886806e-05, "loss": 0.290374755859375, "step": 1587 }, { "epoch": 0.10734081384345005, "grad_norm": 1.8444634675979614, "learning_rate": 2.9582578074831722e-05, "loss": 0.24078369140625, "step": 1588 }, { "epoch": 0.10740840881438421, "grad_norm": 2.7116165161132812, "learning_rate": 2.9581804745199566e-05, "loss": 0.322296142578125, "step": 1589 }, { "epoch": 0.10747600378531837, "grad_norm": 0.8139696717262268, "learning_rate": 2.9581030710009012e-05, "loss": 0.15781021118164062, "step": 1590 }, { "epoch": 0.10754359875625254, "grad_norm": 1.216410517692566, "learning_rate": 2.9580255969297517e-05, "loss": 0.287994384765625, "step": 1591 }, { "epoch": 0.1076111937271867, "grad_norm": 1.3666167259216309, "learning_rate": 2.9579480523102563e-05, "loss": 0.34393310546875, "step": 1592 }, { "epoch": 0.10767878869812086, "grad_norm": 3.827526330947876, "learning_rate": 2.957870437146167e-05, "loss": 0.2692413330078125, "step": 1593 }, { "epoch": 0.10774638366905502, "grad_norm": 2.672813653945923, "learning_rate": 2.9577927514412403e-05, "loss": 0.2434234619140625, "step": 1594 }, { "epoch": 0.10781397863998919, "grad_norm": 2.574552536010742, "learning_rate": 2.9577149951992336e-05, "loss": 0.3369140625, "step": 1595 }, { "epoch": 0.10788157361092335, "grad_norm": 3.5210187435150146, "learning_rate": 2.9576371684239104e-05, "loss": 0.3212890625, "step": 1596 }, { "epoch": 0.1079491685818575, "grad_norm": 1.3075281381607056, "learning_rate": 2.957559271119036e-05, "loss": 0.229736328125, "step": 1597 }, { "epoch": 0.10801676355279168, "grad_norm": 1.7842262983322144, "learning_rate": 2.9574813032883793e-05, "loss": 0.1976470947265625, "step": 1598 }, { "epoch": 0.10808435852372583, "grad_norm": 2.133096218109131, "learning_rate": 2.9574032649357134e-05, "loss": 0.19610595703125, "step": 1599 }, { "epoch": 0.10815195349466, "grad_norm": 2.1439085006713867, "learning_rate": 2.957325156064814e-05, "loss": 0.359619140625, "step": 1600 }, { "epoch": 0.10821954846559416, "grad_norm": 2.6513071060180664, "learning_rate": 2.9572469766794604e-05, "loss": 0.300628662109375, "step": 1601 }, { "epoch": 0.10828714343652832, "grad_norm": 1.9624567031860352, "learning_rate": 2.957168726783436e-05, "loss": 0.221771240234375, "step": 1602 }, { "epoch": 0.10835473840746249, "grad_norm": 2.564096689224243, "learning_rate": 2.9570904063805263e-05, "loss": 0.3175048828125, "step": 1603 }, { "epoch": 0.10842233337839664, "grad_norm": 1.4562222957611084, "learning_rate": 2.9570120154745216e-05, "loss": 0.200103759765625, "step": 1604 }, { "epoch": 0.10848992834933081, "grad_norm": 1.485682487487793, "learning_rate": 2.956933554069214e-05, "loss": 0.303558349609375, "step": 1605 }, { "epoch": 0.10855752332026497, "grad_norm": 1.8518922328948975, "learning_rate": 2.9568550221684003e-05, "loss": 0.27874755859375, "step": 1606 }, { "epoch": 0.10862511829119914, "grad_norm": 2.4140126705169678, "learning_rate": 2.9567764197758808e-05, "loss": 0.234130859375, "step": 1607 }, { "epoch": 0.1086927132621333, "grad_norm": 0.745851993560791, "learning_rate": 2.9566977468954584e-05, "loss": 0.164520263671875, "step": 1608 }, { "epoch": 0.10876030823306745, "grad_norm": 2.3850574493408203, "learning_rate": 2.9566190035309403e-05, "loss": 0.316925048828125, "step": 1609 }, { "epoch": 0.10882790320400162, "grad_norm": 1.4907782077789307, "learning_rate": 2.956540189686136e-05, "loss": 0.259735107421875, "step": 1610 }, { "epoch": 0.10889549817493578, "grad_norm": 2.3319015502929688, "learning_rate": 2.956461305364859e-05, "loss": 0.30572509765625, "step": 1611 }, { "epoch": 0.10896309314586995, "grad_norm": 1.3846973180770874, "learning_rate": 2.9563823505709267e-05, "loss": 0.20135498046875, "step": 1612 }, { "epoch": 0.10903068811680411, "grad_norm": 0.9740503430366516, "learning_rate": 2.956303325308159e-05, "loss": 0.238433837890625, "step": 1613 }, { "epoch": 0.10909828308773828, "grad_norm": 1.7270947694778442, "learning_rate": 2.9562242295803795e-05, "loss": 0.235992431640625, "step": 1614 }, { "epoch": 0.10916587805867244, "grad_norm": 1.6141417026519775, "learning_rate": 2.956145063391416e-05, "loss": 0.21337127685546875, "step": 1615 }, { "epoch": 0.10923347302960659, "grad_norm": 2.0047225952148438, "learning_rate": 2.9560658267450987e-05, "loss": 0.28179931640625, "step": 1616 }, { "epoch": 0.10930106800054076, "grad_norm": 2.5707967281341553, "learning_rate": 2.955986519645261e-05, "loss": 0.285980224609375, "step": 1617 }, { "epoch": 0.10936866297147492, "grad_norm": 3.1708407402038574, "learning_rate": 2.9559071420957415e-05, "loss": 0.347198486328125, "step": 1618 }, { "epoch": 0.10943625794240909, "grad_norm": 2.269791603088379, "learning_rate": 2.9558276941003798e-05, "loss": 0.31939697265625, "step": 1619 }, { "epoch": 0.10950385291334325, "grad_norm": 1.5792938470840454, "learning_rate": 2.955748175663021e-05, "loss": 0.3306884765625, "step": 1620 }, { "epoch": 0.1095714478842774, "grad_norm": 1.558756709098816, "learning_rate": 2.9556685867875118e-05, "loss": 0.2974853515625, "step": 1621 }, { "epoch": 0.10963904285521157, "grad_norm": 2.6623916625976562, "learning_rate": 2.955588927477704e-05, "loss": 0.29986572265625, "step": 1622 }, { "epoch": 0.10970663782614573, "grad_norm": 1.7567728757858276, "learning_rate": 2.9555091977374512e-05, "loss": 0.2202301025390625, "step": 1623 }, { "epoch": 0.1097742327970799, "grad_norm": 2.5662412643432617, "learning_rate": 2.9554293975706122e-05, "loss": 0.2209320068359375, "step": 1624 }, { "epoch": 0.10984182776801406, "grad_norm": 3.253676414489746, "learning_rate": 2.9553495269810474e-05, "loss": 0.361083984375, "step": 1625 }, { "epoch": 0.10990942273894823, "grad_norm": 1.90677011013031, "learning_rate": 2.955269585972622e-05, "loss": 0.325653076171875, "step": 1626 }, { "epoch": 0.10997701770988239, "grad_norm": 2.447054386138916, "learning_rate": 2.9551895745492037e-05, "loss": 0.234222412109375, "step": 1627 }, { "epoch": 0.11004461268081654, "grad_norm": 2.4288837909698486, "learning_rate": 2.955109492714664e-05, "loss": 0.202972412109375, "step": 1628 }, { "epoch": 0.11011220765175071, "grad_norm": 1.497337818145752, "learning_rate": 2.9550293404728776e-05, "loss": 0.2141571044921875, "step": 1629 }, { "epoch": 0.11017980262268487, "grad_norm": 1.0523393154144287, "learning_rate": 2.9549491178277234e-05, "loss": 0.221466064453125, "step": 1630 }, { "epoch": 0.11024739759361904, "grad_norm": 3.1710174083709717, "learning_rate": 2.9548688247830823e-05, "loss": 0.3001708984375, "step": 1631 }, { "epoch": 0.1103149925645532, "grad_norm": 0.9901062250137329, "learning_rate": 2.95478846134284e-05, "loss": 0.1284332275390625, "step": 1632 }, { "epoch": 0.11038258753548735, "grad_norm": 3.992720127105713, "learning_rate": 2.954708027510885e-05, "loss": 0.235595703125, "step": 1633 }, { "epoch": 0.11045018250642152, "grad_norm": 7.939337730407715, "learning_rate": 2.9546275232911082e-05, "loss": 0.30902099609375, "step": 1634 }, { "epoch": 0.11051777747735568, "grad_norm": 2.484947443008423, "learning_rate": 2.9545469486874063e-05, "loss": 0.306640625, "step": 1635 }, { "epoch": 0.11058537244828985, "grad_norm": 2.840237855911255, "learning_rate": 2.9544663037036767e-05, "loss": 0.24029541015625, "step": 1636 }, { "epoch": 0.11065296741922401, "grad_norm": 2.5865962505340576, "learning_rate": 2.9543855883438224e-05, "loss": 0.3387451171875, "step": 1637 }, { "epoch": 0.11072056239015818, "grad_norm": 2.8886055946350098, "learning_rate": 2.9543048026117485e-05, "loss": 0.24420166015625, "step": 1638 }, { "epoch": 0.11078815736109233, "grad_norm": 2.9897663593292236, "learning_rate": 2.9542239465113638e-05, "loss": 0.32244873046875, "step": 1639 }, { "epoch": 0.11085575233202649, "grad_norm": 3.468518018722534, "learning_rate": 2.954143020046581e-05, "loss": 0.267425537109375, "step": 1640 }, { "epoch": 0.11092334730296066, "grad_norm": 2.9573209285736084, "learning_rate": 2.954062023221316e-05, "loss": 0.26971435546875, "step": 1641 }, { "epoch": 0.11099094227389482, "grad_norm": 4.829457759857178, "learning_rate": 2.9539809560394873e-05, "loss": 0.373748779296875, "step": 1642 }, { "epoch": 0.11105853724482899, "grad_norm": 3.3233067989349365, "learning_rate": 2.9538998185050177e-05, "loss": 0.28399658203125, "step": 1643 }, { "epoch": 0.11112613221576315, "grad_norm": 1.7831658124923706, "learning_rate": 2.9538186106218334e-05, "loss": 0.346923828125, "step": 1644 }, { "epoch": 0.11119372718669732, "grad_norm": 1.5680464506149292, "learning_rate": 2.9537373323938636e-05, "loss": 0.27630615234375, "step": 1645 }, { "epoch": 0.11126132215763147, "grad_norm": 1.434263825416565, "learning_rate": 2.9536559838250407e-05, "loss": 0.26971435546875, "step": 1646 }, { "epoch": 0.11132891712856563, "grad_norm": 2.231123685836792, "learning_rate": 2.953574564919301e-05, "loss": 0.31695556640625, "step": 1647 }, { "epoch": 0.1113965120994998, "grad_norm": 2.0201051235198975, "learning_rate": 2.9534930756805845e-05, "loss": 0.232696533203125, "step": 1648 }, { "epoch": 0.11146410707043396, "grad_norm": 1.271832823753357, "learning_rate": 2.9534115161128337e-05, "loss": 0.1876678466796875, "step": 1649 }, { "epoch": 0.11153170204136813, "grad_norm": 1.2781187295913696, "learning_rate": 2.9533298862199948e-05, "loss": 0.191009521484375, "step": 1650 }, { "epoch": 0.11159929701230228, "grad_norm": 1.5031222105026245, "learning_rate": 2.953248186006018e-05, "loss": 0.310821533203125, "step": 1651 }, { "epoch": 0.11166689198323644, "grad_norm": 3.039323329925537, "learning_rate": 2.9531664154748564e-05, "loss": 0.37274169921875, "step": 1652 }, { "epoch": 0.11173448695417061, "grad_norm": 1.742970585823059, "learning_rate": 2.9530845746304665e-05, "loss": 0.17774200439453125, "step": 1653 }, { "epoch": 0.11180208192510477, "grad_norm": 2.736684560775757, "learning_rate": 2.9530026634768084e-05, "loss": 0.304229736328125, "step": 1654 }, { "epoch": 0.11186967689603894, "grad_norm": 1.8113863468170166, "learning_rate": 2.952920682017845e-05, "loss": 0.33331298828125, "step": 1655 }, { "epoch": 0.1119372718669731, "grad_norm": 1.732547640800476, "learning_rate": 2.9528386302575434e-05, "loss": 0.281951904296875, "step": 1656 }, { "epoch": 0.11200486683790727, "grad_norm": 1.8821979761123657, "learning_rate": 2.9527565081998738e-05, "loss": 0.28094482421875, "step": 1657 }, { "epoch": 0.11207246180884142, "grad_norm": 2.2543227672576904, "learning_rate": 2.95267431584881e-05, "loss": 0.29534912109375, "step": 1658 }, { "epoch": 0.11214005677977558, "grad_norm": 1.733359694480896, "learning_rate": 2.9525920532083285e-05, "loss": 0.2196044921875, "step": 1659 }, { "epoch": 0.11220765175070975, "grad_norm": 2.123647689819336, "learning_rate": 2.95250972028241e-05, "loss": 0.24993896484375, "step": 1660 }, { "epoch": 0.1122752467216439, "grad_norm": 1.757737398147583, "learning_rate": 2.952427317075038e-05, "loss": 0.2322235107421875, "step": 1661 }, { "epoch": 0.11234284169257808, "grad_norm": 2.316551923751831, "learning_rate": 2.9523448435902e-05, "loss": 0.2861328125, "step": 1662 }, { "epoch": 0.11241043666351223, "grad_norm": 3.952395439147949, "learning_rate": 2.9522622998318864e-05, "loss": 0.2904052734375, "step": 1663 }, { "epoch": 0.11247803163444639, "grad_norm": 1.897273302078247, "learning_rate": 2.952179685804091e-05, "loss": 0.2860107421875, "step": 1664 }, { "epoch": 0.11254562660538056, "grad_norm": 4.241904258728027, "learning_rate": 2.952097001510812e-05, "loss": 0.377197265625, "step": 1665 }, { "epoch": 0.11261322157631472, "grad_norm": 2.436563730239868, "learning_rate": 2.952014246956049e-05, "loss": 0.2879638671875, "step": 1666 }, { "epoch": 0.11268081654724889, "grad_norm": 2.4117655754089355, "learning_rate": 2.9519314221438067e-05, "loss": 0.251373291015625, "step": 1667 }, { "epoch": 0.11274841151818304, "grad_norm": 1.463629961013794, "learning_rate": 2.951848527078093e-05, "loss": 0.365234375, "step": 1668 }, { "epoch": 0.11281600648911722, "grad_norm": 1.5567110776901245, "learning_rate": 2.9517655617629185e-05, "loss": 0.291259765625, "step": 1669 }, { "epoch": 0.11288360146005137, "grad_norm": 1.9539967775344849, "learning_rate": 2.9516825262022974e-05, "loss": 0.23968505859375, "step": 1670 }, { "epoch": 0.11295119643098553, "grad_norm": 2.5923821926116943, "learning_rate": 2.9515994204002485e-05, "loss": 0.265594482421875, "step": 1671 }, { "epoch": 0.1130187914019197, "grad_norm": 0.5483148097991943, "learning_rate": 2.9515162443607917e-05, "loss": 0.1032867431640625, "step": 1672 }, { "epoch": 0.11308638637285386, "grad_norm": 2.9141650199890137, "learning_rate": 2.951432998087952e-05, "loss": 0.37127685546875, "step": 1673 }, { "epoch": 0.11315398134378803, "grad_norm": 3.836245059967041, "learning_rate": 2.951349681585758e-05, "loss": 0.28021240234375, "step": 1674 }, { "epoch": 0.11322157631472218, "grad_norm": 1.9563997983932495, "learning_rate": 2.95126629485824e-05, "loss": 0.323577880859375, "step": 1675 }, { "epoch": 0.11328917128565635, "grad_norm": 2.095400810241699, "learning_rate": 2.951182837909434e-05, "loss": 0.33642578125, "step": 1676 }, { "epoch": 0.11335676625659051, "grad_norm": 3.7827017307281494, "learning_rate": 2.951099310743377e-05, "loss": 0.23785400390625, "step": 1677 }, { "epoch": 0.11342436122752467, "grad_norm": 3.9898436069488525, "learning_rate": 2.9510157133641114e-05, "loss": 0.35308837890625, "step": 1678 }, { "epoch": 0.11349195619845884, "grad_norm": 1.9271924495697021, "learning_rate": 2.9509320457756812e-05, "loss": 0.28131103515625, "step": 1679 }, { "epoch": 0.113559551169393, "grad_norm": 1.8462233543395996, "learning_rate": 2.950848307982136e-05, "loss": 0.2431640625, "step": 1680 }, { "epoch": 0.11362714614032716, "grad_norm": 1.396069884300232, "learning_rate": 2.9507644999875264e-05, "loss": 0.337738037109375, "step": 1681 }, { "epoch": 0.11369474111126132, "grad_norm": 1.069383144378662, "learning_rate": 2.9506806217959087e-05, "loss": 0.1290130615234375, "step": 1682 }, { "epoch": 0.11376233608219548, "grad_norm": 2.8148820400238037, "learning_rate": 2.95059667341134e-05, "loss": 0.320556640625, "step": 1683 }, { "epoch": 0.11382993105312965, "grad_norm": 2.0191049575805664, "learning_rate": 2.9505126548378838e-05, "loss": 0.3128662109375, "step": 1684 }, { "epoch": 0.1138975260240638, "grad_norm": 3.0249502658843994, "learning_rate": 2.9504285660796047e-05, "loss": 0.27862548828125, "step": 1685 }, { "epoch": 0.11396512099499798, "grad_norm": 1.6678391695022583, "learning_rate": 2.950344407140571e-05, "loss": 0.254425048828125, "step": 1686 }, { "epoch": 0.11403271596593213, "grad_norm": 3.777322292327881, "learning_rate": 2.9502601780248557e-05, "loss": 0.26190185546875, "step": 1687 }, { "epoch": 0.1141003109368663, "grad_norm": 4.621082305908203, "learning_rate": 2.9501758787365335e-05, "loss": 0.3232421875, "step": 1688 }, { "epoch": 0.11416790590780046, "grad_norm": 2.0307886600494385, "learning_rate": 2.9500915092796842e-05, "loss": 0.2908935546875, "step": 1689 }, { "epoch": 0.11423550087873462, "grad_norm": 3.572092294692993, "learning_rate": 2.9500070696583897e-05, "loss": 0.31072998046875, "step": 1690 }, { "epoch": 0.11430309584966879, "grad_norm": 2.922708749771118, "learning_rate": 2.9499225598767352e-05, "loss": 0.357421875, "step": 1691 }, { "epoch": 0.11437069082060294, "grad_norm": 2.055295467376709, "learning_rate": 2.9498379799388104e-05, "loss": 0.32147216796875, "step": 1692 }, { "epoch": 0.11443828579153711, "grad_norm": 2.1311283111572266, "learning_rate": 2.949753329848708e-05, "loss": 0.2217864990234375, "step": 1693 }, { "epoch": 0.11450588076247127, "grad_norm": 1.4851735830307007, "learning_rate": 2.9496686096105232e-05, "loss": 0.29913330078125, "step": 1694 }, { "epoch": 0.11457347573340544, "grad_norm": 1.5922025442123413, "learning_rate": 2.949583819228356e-05, "loss": 0.1230621337890625, "step": 1695 }, { "epoch": 0.1146410707043396, "grad_norm": 2.037130832672119, "learning_rate": 2.9494989587063087e-05, "loss": 0.340301513671875, "step": 1696 }, { "epoch": 0.11470866567527375, "grad_norm": 1.9630711078643799, "learning_rate": 2.9494140280484873e-05, "loss": 0.32037353515625, "step": 1697 }, { "epoch": 0.11477626064620793, "grad_norm": 1.7476041316986084, "learning_rate": 2.9493290272590016e-05, "loss": 0.274322509765625, "step": 1698 }, { "epoch": 0.11484385561714208, "grad_norm": 2.9294497966766357, "learning_rate": 2.949243956341964e-05, "loss": 0.2684326171875, "step": 1699 }, { "epoch": 0.11491145058807625, "grad_norm": 1.7929178476333618, "learning_rate": 2.9491588153014914e-05, "loss": 0.34014892578125, "step": 1700 }, { "epoch": 0.11497904555901041, "grad_norm": 1.8484500646591187, "learning_rate": 2.9490736041417025e-05, "loss": 0.2407684326171875, "step": 1701 }, { "epoch": 0.11504664052994457, "grad_norm": 1.9483742713928223, "learning_rate": 2.9489883228667214e-05, "loss": 0.34149169921875, "step": 1702 }, { "epoch": 0.11511423550087874, "grad_norm": 2.594968318939209, "learning_rate": 2.948902971480674e-05, "loss": 0.2396240234375, "step": 1703 }, { "epoch": 0.1151818304718129, "grad_norm": 1.6622583866119385, "learning_rate": 2.9488175499876902e-05, "loss": 0.23455810546875, "step": 1704 }, { "epoch": 0.11524942544274706, "grad_norm": 1.916985273361206, "learning_rate": 2.9487320583919035e-05, "loss": 0.3148193359375, "step": 1705 }, { "epoch": 0.11531702041368122, "grad_norm": 0.977919340133667, "learning_rate": 2.94864649669745e-05, "loss": 0.23504638671875, "step": 1706 }, { "epoch": 0.11538461538461539, "grad_norm": 1.4280067682266235, "learning_rate": 2.9485608649084698e-05, "loss": 0.1947021484375, "step": 1707 }, { "epoch": 0.11545221035554955, "grad_norm": 0.7403329610824585, "learning_rate": 2.9484751630291065e-05, "loss": 0.155792236328125, "step": 1708 }, { "epoch": 0.1155198053264837, "grad_norm": 2.954456090927124, "learning_rate": 2.948389391063507e-05, "loss": 0.3380126953125, "step": 1709 }, { "epoch": 0.11558740029741787, "grad_norm": 0.9275293350219727, "learning_rate": 2.9483035490158215e-05, "loss": 0.167633056640625, "step": 1710 }, { "epoch": 0.11565499526835203, "grad_norm": 1.709473729133606, "learning_rate": 2.9482176368902027e-05, "loss": 0.341552734375, "step": 1711 }, { "epoch": 0.1157225902392862, "grad_norm": 1.5400729179382324, "learning_rate": 2.9481316546908092e-05, "loss": 0.2035064697265625, "step": 1712 }, { "epoch": 0.11579018521022036, "grad_norm": 3.316409111022949, "learning_rate": 2.9480456024218e-05, "loss": 0.20851898193359375, "step": 1713 }, { "epoch": 0.11585778018115452, "grad_norm": 2.9477696418762207, "learning_rate": 2.947959480087339e-05, "loss": 0.3719482421875, "step": 1714 }, { "epoch": 0.11592537515208869, "grad_norm": 1.6157358884811401, "learning_rate": 2.9478732876915935e-05, "loss": 0.16644287109375, "step": 1715 }, { "epoch": 0.11599297012302284, "grad_norm": 4.092916965484619, "learning_rate": 2.9477870252387343e-05, "loss": 0.2638092041015625, "step": 1716 }, { "epoch": 0.11606056509395701, "grad_norm": 1.6492339372634888, "learning_rate": 2.9477006927329355e-05, "loss": 0.258575439453125, "step": 1717 }, { "epoch": 0.11612816006489117, "grad_norm": 1.4031609296798706, "learning_rate": 2.947614290178374e-05, "loss": 0.2443695068359375, "step": 1718 }, { "epoch": 0.11619575503582534, "grad_norm": 1.202541708946228, "learning_rate": 2.94752781757923e-05, "loss": 0.1947021484375, "step": 1719 }, { "epoch": 0.1162633500067595, "grad_norm": 1.876601219177246, "learning_rate": 2.9474412749396882e-05, "loss": 0.3111572265625, "step": 1720 }, { "epoch": 0.11633094497769365, "grad_norm": 1.1094812154769897, "learning_rate": 2.9473546622639365e-05, "loss": 0.118743896484375, "step": 1721 }, { "epoch": 0.11639853994862782, "grad_norm": 2.086254596710205, "learning_rate": 2.9472679795561648e-05, "loss": 0.295501708984375, "step": 1722 }, { "epoch": 0.11646613491956198, "grad_norm": 3.8202760219573975, "learning_rate": 2.9471812268205675e-05, "loss": 0.274688720703125, "step": 1723 }, { "epoch": 0.11653372989049615, "grad_norm": 1.9873970746994019, "learning_rate": 2.9470944040613434e-05, "loss": 0.160247802734375, "step": 1724 }, { "epoch": 0.11660132486143031, "grad_norm": 5.356479644775391, "learning_rate": 2.947007511282692e-05, "loss": 0.36126708984375, "step": 1725 }, { "epoch": 0.11666891983236448, "grad_norm": 1.2704097032546997, "learning_rate": 2.9469205484888184e-05, "loss": 0.195770263671875, "step": 1726 }, { "epoch": 0.11673651480329864, "grad_norm": 2.592560291290283, "learning_rate": 2.9468335156839307e-05, "loss": 0.289154052734375, "step": 1727 }, { "epoch": 0.11680410977423279, "grad_norm": 4.292648792266846, "learning_rate": 2.9467464128722394e-05, "loss": 0.32568359375, "step": 1728 }, { "epoch": 0.11687170474516696, "grad_norm": 1.2374430894851685, "learning_rate": 2.94665924005796e-05, "loss": 0.31982421875, "step": 1729 }, { "epoch": 0.11693929971610112, "grad_norm": 1.6384292840957642, "learning_rate": 2.946571997245309e-05, "loss": 0.36541748046875, "step": 1730 }, { "epoch": 0.11700689468703529, "grad_norm": 2.7003536224365234, "learning_rate": 2.946484684438509e-05, "loss": 0.3197021484375, "step": 1731 }, { "epoch": 0.11707448965796945, "grad_norm": 3.5385141372680664, "learning_rate": 2.9463973016417844e-05, "loss": 0.37139892578125, "step": 1732 }, { "epoch": 0.1171420846289036, "grad_norm": 2.1116065979003906, "learning_rate": 2.9463098488593632e-05, "loss": 0.32879638671875, "step": 1733 }, { "epoch": 0.11720967959983777, "grad_norm": 5.167695999145508, "learning_rate": 2.9462223260954775e-05, "loss": 0.33489990234375, "step": 1734 }, { "epoch": 0.11727727457077193, "grad_norm": 5.6526055335998535, "learning_rate": 2.9461347333543615e-05, "loss": 0.31304931640625, "step": 1735 }, { "epoch": 0.1173448695417061, "grad_norm": 5.2746806144714355, "learning_rate": 2.9460470706402532e-05, "loss": 0.30908203125, "step": 1736 }, { "epoch": 0.11741246451264026, "grad_norm": 3.6227807998657227, "learning_rate": 2.9459593379573952e-05, "loss": 0.1625213623046875, "step": 1737 }, { "epoch": 0.11748005948357443, "grad_norm": 1.7911057472229004, "learning_rate": 2.9458715353100318e-05, "loss": 0.268707275390625, "step": 1738 }, { "epoch": 0.11754765445450858, "grad_norm": 4.505753517150879, "learning_rate": 2.9457836627024118e-05, "loss": 0.2750244140625, "step": 1739 }, { "epoch": 0.11761524942544274, "grad_norm": 2.02694034576416, "learning_rate": 2.9456957201387872e-05, "loss": 0.1631011962890625, "step": 1740 }, { "epoch": 0.11768284439637691, "grad_norm": 2.2217345237731934, "learning_rate": 2.945607707623413e-05, "loss": 0.271697998046875, "step": 1741 }, { "epoch": 0.11775043936731107, "grad_norm": 1.372554063796997, "learning_rate": 2.9455196251605477e-05, "loss": 0.23980712890625, "step": 1742 }, { "epoch": 0.11781803433824524, "grad_norm": 3.0971455574035645, "learning_rate": 2.9454314727544532e-05, "loss": 0.32867431640625, "step": 1743 }, { "epoch": 0.1178856293091794, "grad_norm": 3.1225380897521973, "learning_rate": 2.9453432504093955e-05, "loss": 0.345428466796875, "step": 1744 }, { "epoch": 0.11795322428011355, "grad_norm": 5.000158309936523, "learning_rate": 2.945254958129642e-05, "loss": 0.281890869140625, "step": 1745 }, { "epoch": 0.11802081925104772, "grad_norm": 5.925994396209717, "learning_rate": 2.9451665959194664e-05, "loss": 0.368743896484375, "step": 1746 }, { "epoch": 0.11808841422198188, "grad_norm": 5.321624755859375, "learning_rate": 2.9450781637831433e-05, "loss": 0.344207763671875, "step": 1747 }, { "epoch": 0.11815600919291605, "grad_norm": 2.8496382236480713, "learning_rate": 2.9449896617249514e-05, "loss": 0.30438232421875, "step": 1748 }, { "epoch": 0.11822360416385021, "grad_norm": 3.5043890476226807, "learning_rate": 2.944901089749174e-05, "loss": 0.187774658203125, "step": 1749 }, { "epoch": 0.11829119913478438, "grad_norm": 1.2958731651306152, "learning_rate": 2.944812447860096e-05, "loss": 0.30364990234375, "step": 1750 }, { "epoch": 0.11835879410571853, "grad_norm": 1.7263426780700684, "learning_rate": 2.9447237360620062e-05, "loss": 0.3046875, "step": 1751 }, { "epoch": 0.11842638907665269, "grad_norm": 1.456263780593872, "learning_rate": 2.9446349543591976e-05, "loss": 0.273284912109375, "step": 1752 }, { "epoch": 0.11849398404758686, "grad_norm": 1.5471032857894897, "learning_rate": 2.9445461027559658e-05, "loss": 0.3277587890625, "step": 1753 }, { "epoch": 0.11856157901852102, "grad_norm": 3.535762071609497, "learning_rate": 2.94445718125661e-05, "loss": 0.2293853759765625, "step": 1754 }, { "epoch": 0.11862917398945519, "grad_norm": 2.3958537578582764, "learning_rate": 2.9443681898654332e-05, "loss": 0.26739501953125, "step": 1755 }, { "epoch": 0.11869676896038935, "grad_norm": 1.952271819114685, "learning_rate": 2.9442791285867407e-05, "loss": 0.26763916015625, "step": 1756 }, { "epoch": 0.11876436393132352, "grad_norm": 2.2296037673950195, "learning_rate": 2.9441899974248413e-05, "loss": 0.31036376953125, "step": 1757 }, { "epoch": 0.11883195890225767, "grad_norm": 1.823161244392395, "learning_rate": 2.9441007963840496e-05, "loss": 0.25576019287109375, "step": 1758 }, { "epoch": 0.11889955387319183, "grad_norm": 3.051870107650757, "learning_rate": 2.94401152546868e-05, "loss": 0.32977294921875, "step": 1759 }, { "epoch": 0.118967148844126, "grad_norm": 1.537503957748413, "learning_rate": 2.943922184683053e-05, "loss": 0.07908248901367188, "step": 1760 }, { "epoch": 0.11903474381506016, "grad_norm": 2.5986928939819336, "learning_rate": 2.9438327740314907e-05, "loss": 0.271331787109375, "step": 1761 }, { "epoch": 0.11910233878599433, "grad_norm": 5.428248882293701, "learning_rate": 2.94374329351832e-05, "loss": 0.42144775390625, "step": 1762 }, { "epoch": 0.11916993375692848, "grad_norm": 1.0197168588638306, "learning_rate": 2.9436537431478695e-05, "loss": 0.166748046875, "step": 1763 }, { "epoch": 0.11923752872786264, "grad_norm": 2.6087584495544434, "learning_rate": 2.943564122924474e-05, "loss": 0.31884765625, "step": 1764 }, { "epoch": 0.11930512369879681, "grad_norm": 2.0019478797912598, "learning_rate": 2.9434744328524678e-05, "loss": 0.26885986328125, "step": 1765 }, { "epoch": 0.11937271866973097, "grad_norm": 2.68440580368042, "learning_rate": 2.9433846729361923e-05, "loss": 0.251953125, "step": 1766 }, { "epoch": 0.11944031364066514, "grad_norm": 3.1600122451782227, "learning_rate": 2.94329484317999e-05, "loss": 0.3233642578125, "step": 1767 }, { "epoch": 0.1195079086115993, "grad_norm": 1.9451719522476196, "learning_rate": 2.943204943588207e-05, "loss": 0.324462890625, "step": 1768 }, { "epoch": 0.11957550358253347, "grad_norm": 2.199049949645996, "learning_rate": 2.9431149741651943e-05, "loss": 0.291473388671875, "step": 1769 }, { "epoch": 0.11964309855346762, "grad_norm": 2.709794282913208, "learning_rate": 2.9430249349153034e-05, "loss": 0.395477294921875, "step": 1770 }, { "epoch": 0.11971069352440178, "grad_norm": 2.8845200538635254, "learning_rate": 2.9429348258428933e-05, "loss": 0.273040771484375, "step": 1771 }, { "epoch": 0.11977828849533595, "grad_norm": 1.6682707071304321, "learning_rate": 2.9428446469523224e-05, "loss": 0.341278076171875, "step": 1772 }, { "epoch": 0.1198458834662701, "grad_norm": 1.8954583406448364, "learning_rate": 2.942754398247954e-05, "loss": 0.378662109375, "step": 1773 }, { "epoch": 0.11991347843720428, "grad_norm": 1.4241095781326294, "learning_rate": 2.942664079734156e-05, "loss": 0.1826019287109375, "step": 1774 }, { "epoch": 0.11998107340813843, "grad_norm": 2.1696228981018066, "learning_rate": 2.9425736914152977e-05, "loss": 0.1922149658203125, "step": 1775 }, { "epoch": 0.1200486683790726, "grad_norm": 1.7950561046600342, "learning_rate": 2.9424832332957532e-05, "loss": 0.205230712890625, "step": 1776 }, { "epoch": 0.12011626335000676, "grad_norm": 2.2546207904815674, "learning_rate": 2.942392705379899e-05, "loss": 0.25445556640625, "step": 1777 }, { "epoch": 0.12018385832094092, "grad_norm": 1.9404627084732056, "learning_rate": 2.9423021076721155e-05, "loss": 0.3018798828125, "step": 1778 }, { "epoch": 0.12025145329187509, "grad_norm": 2.2488152980804443, "learning_rate": 2.942211440176787e-05, "loss": 0.1650390625, "step": 1779 }, { "epoch": 0.12031904826280924, "grad_norm": 1.9979944229125977, "learning_rate": 2.9421207028982992e-05, "loss": 0.32269287109375, "step": 1780 }, { "epoch": 0.12038664323374342, "grad_norm": 1.5565404891967773, "learning_rate": 2.942029895841044e-05, "loss": 0.248291015625, "step": 1781 }, { "epoch": 0.12045423820467757, "grad_norm": 0.7017682194709778, "learning_rate": 2.941939019009414e-05, "loss": 0.139495849609375, "step": 1782 }, { "epoch": 0.12052183317561173, "grad_norm": 2.791139602661133, "learning_rate": 2.941848072407807e-05, "loss": 0.28521728515625, "step": 1783 }, { "epoch": 0.1205894281465459, "grad_norm": 1.3425577878952026, "learning_rate": 2.941757056040624e-05, "loss": 0.2006988525390625, "step": 1784 }, { "epoch": 0.12065702311748006, "grad_norm": 2.318425178527832, "learning_rate": 2.9416659699122682e-05, "loss": 0.25384521484375, "step": 1785 }, { "epoch": 0.12072461808841423, "grad_norm": 2.4183998107910156, "learning_rate": 2.9415748140271466e-05, "loss": 0.35009765625, "step": 1786 }, { "epoch": 0.12079221305934838, "grad_norm": 1.3257008790969849, "learning_rate": 2.9414835883896713e-05, "loss": 0.14251708984375, "step": 1787 }, { "epoch": 0.12085980803028255, "grad_norm": 1.8008090257644653, "learning_rate": 2.941392293004255e-05, "loss": 0.250396728515625, "step": 1788 }, { "epoch": 0.12092740300121671, "grad_norm": 2.956587553024292, "learning_rate": 2.9413009278753158e-05, "loss": 0.32720947265625, "step": 1789 }, { "epoch": 0.12099499797215087, "grad_norm": 1.8624800443649292, "learning_rate": 2.941209493007274e-05, "loss": 0.35369873046875, "step": 1790 }, { "epoch": 0.12106259294308504, "grad_norm": 3.1683218479156494, "learning_rate": 2.9411179884045546e-05, "loss": 0.2308349609375, "step": 1791 }, { "epoch": 0.1211301879140192, "grad_norm": 2.23612380027771, "learning_rate": 2.9410264140715846e-05, "loss": 0.178192138671875, "step": 1792 }, { "epoch": 0.12119778288495336, "grad_norm": 2.2208738327026367, "learning_rate": 2.940934770012795e-05, "loss": 0.3076171875, "step": 1793 }, { "epoch": 0.12126537785588752, "grad_norm": 1.5809036493301392, "learning_rate": 2.94084305623262e-05, "loss": 0.29278564453125, "step": 1794 }, { "epoch": 0.12133297282682168, "grad_norm": 1.4229655265808105, "learning_rate": 2.940751272735498e-05, "loss": 0.208038330078125, "step": 1795 }, { "epoch": 0.12140056779775585, "grad_norm": 1.2423715591430664, "learning_rate": 2.940659419525869e-05, "loss": 0.28955078125, "step": 1796 }, { "epoch": 0.12146816276869, "grad_norm": 1.569250464439392, "learning_rate": 2.940567496608178e-05, "loss": 0.2015533447265625, "step": 1797 }, { "epoch": 0.12153575773962418, "grad_norm": 1.785110592842102, "learning_rate": 2.9404755039868726e-05, "loss": 0.298919677734375, "step": 1798 }, { "epoch": 0.12160335271055833, "grad_norm": 1.2481131553649902, "learning_rate": 2.9403834416664042e-05, "loss": 0.228515625, "step": 1799 }, { "epoch": 0.1216709476814925, "grad_norm": 2.464128017425537, "learning_rate": 2.9402913096512274e-05, "loss": 0.32916259765625, "step": 1800 }, { "epoch": 0.12173854265242666, "grad_norm": 1.1627646684646606, "learning_rate": 2.9401991079457995e-05, "loss": 0.262481689453125, "step": 1801 }, { "epoch": 0.12180613762336082, "grad_norm": 1.2707009315490723, "learning_rate": 2.9401068365545828e-05, "loss": 0.1571197509765625, "step": 1802 }, { "epoch": 0.12187373259429499, "grad_norm": 1.690596103668213, "learning_rate": 2.9400144954820414e-05, "loss": 0.217498779296875, "step": 1803 }, { "epoch": 0.12194132756522914, "grad_norm": 1.834126591682434, "learning_rate": 2.9399220847326427e-05, "loss": 0.3646240234375, "step": 1804 }, { "epoch": 0.12200892253616331, "grad_norm": 3.8047494888305664, "learning_rate": 2.9398296043108595e-05, "loss": 0.349639892578125, "step": 1805 }, { "epoch": 0.12207651750709747, "grad_norm": 1.643298864364624, "learning_rate": 2.9397370542211653e-05, "loss": 0.347412109375, "step": 1806 }, { "epoch": 0.12214411247803164, "grad_norm": 1.7815444469451904, "learning_rate": 2.9396444344680392e-05, "loss": 0.21615982055664062, "step": 1807 }, { "epoch": 0.1222117074489658, "grad_norm": 1.7837022542953491, "learning_rate": 2.939551745055962e-05, "loss": 0.266571044921875, "step": 1808 }, { "epoch": 0.12227930241989995, "grad_norm": 2.5779054164886475, "learning_rate": 2.939458985989419e-05, "loss": 0.295196533203125, "step": 1809 }, { "epoch": 0.12234689739083413, "grad_norm": 2.882974863052368, "learning_rate": 2.9393661572728987e-05, "loss": 0.300201416015625, "step": 1810 }, { "epoch": 0.12241449236176828, "grad_norm": 2.091916799545288, "learning_rate": 2.9392732589108916e-05, "loss": 0.299560546875, "step": 1811 }, { "epoch": 0.12248208733270245, "grad_norm": 0.6761160492897034, "learning_rate": 2.9391802909078945e-05, "loss": 0.09084320068359375, "step": 1812 }, { "epoch": 0.12254968230363661, "grad_norm": 2.781646966934204, "learning_rate": 2.939087253268404e-05, "loss": 0.28033447265625, "step": 1813 }, { "epoch": 0.12261727727457077, "grad_norm": 1.8239221572875977, "learning_rate": 2.9389941459969228e-05, "loss": 0.254791259765625, "step": 1814 }, { "epoch": 0.12268487224550494, "grad_norm": 1.3270915746688843, "learning_rate": 2.9389009690979562e-05, "loss": 0.17144775390625, "step": 1815 }, { "epoch": 0.12275246721643909, "grad_norm": 1.819892168045044, "learning_rate": 2.9388077225760116e-05, "loss": 0.3326416015625, "step": 1816 }, { "epoch": 0.12282006218737326, "grad_norm": 4.78952693939209, "learning_rate": 2.938714406435602e-05, "loss": 0.3411407470703125, "step": 1817 }, { "epoch": 0.12288765715830742, "grad_norm": 2.1668052673339844, "learning_rate": 2.938621020681242e-05, "loss": 0.30914306640625, "step": 1818 }, { "epoch": 0.12295525212924159, "grad_norm": 4.407168388366699, "learning_rate": 2.93852756531745e-05, "loss": 0.2843780517578125, "step": 1819 }, { "epoch": 0.12302284710017575, "grad_norm": 4.652416229248047, "learning_rate": 2.938434040348749e-05, "loss": 0.32427978515625, "step": 1820 }, { "epoch": 0.1230904420711099, "grad_norm": 2.359720468521118, "learning_rate": 2.938340445779663e-05, "loss": 0.300048828125, "step": 1821 }, { "epoch": 0.12315803704204407, "grad_norm": 3.2856569290161133, "learning_rate": 2.9382467816147217e-05, "loss": 0.284393310546875, "step": 1822 }, { "epoch": 0.12322563201297823, "grad_norm": 1.639600396156311, "learning_rate": 2.9381530478584564e-05, "loss": 0.2500457763671875, "step": 1823 }, { "epoch": 0.1232932269839124, "grad_norm": 1.9299628734588623, "learning_rate": 2.938059244515403e-05, "loss": 0.3277587890625, "step": 1824 }, { "epoch": 0.12336082195484656, "grad_norm": 1.7249298095703125, "learning_rate": 2.9379653715901003e-05, "loss": 0.340606689453125, "step": 1825 }, { "epoch": 0.12342841692578072, "grad_norm": 2.8745474815368652, "learning_rate": 2.93787142908709e-05, "loss": 0.3740234375, "step": 1826 }, { "epoch": 0.12349601189671489, "grad_norm": 2.1438188552856445, "learning_rate": 2.937777417010918e-05, "loss": 0.18466949462890625, "step": 1827 }, { "epoch": 0.12356360686764904, "grad_norm": 1.8917938470840454, "learning_rate": 2.9376833353661334e-05, "loss": 0.303619384765625, "step": 1828 }, { "epoch": 0.12363120183858321, "grad_norm": 3.092658519744873, "learning_rate": 2.937589184157288e-05, "loss": 0.28424072265625, "step": 1829 }, { "epoch": 0.12369879680951737, "grad_norm": 2.7979493141174316, "learning_rate": 2.937494963388938e-05, "loss": 0.185699462890625, "step": 1830 }, { "epoch": 0.12376639178045154, "grad_norm": 1.4406728744506836, "learning_rate": 2.9374006730656414e-05, "loss": 0.29766845703125, "step": 1831 }, { "epoch": 0.1238339867513857, "grad_norm": 2.015841245651245, "learning_rate": 2.9373063131919615e-05, "loss": 0.295654296875, "step": 1832 }, { "epoch": 0.12390158172231985, "grad_norm": 1.4475332498550415, "learning_rate": 2.9372118837724633e-05, "loss": 0.24611663818359375, "step": 1833 }, { "epoch": 0.12396917669325402, "grad_norm": 1.6065267324447632, "learning_rate": 2.9371173848117167e-05, "loss": 0.27532958984375, "step": 1834 }, { "epoch": 0.12403677166418818, "grad_norm": 1.956578016281128, "learning_rate": 2.9370228163142935e-05, "loss": 0.2843017578125, "step": 1835 }, { "epoch": 0.12410436663512235, "grad_norm": 1.189755916595459, "learning_rate": 2.9369281782847696e-05, "loss": 0.252197265625, "step": 1836 }, { "epoch": 0.12417196160605651, "grad_norm": 1.3399018049240112, "learning_rate": 2.9368334707277245e-05, "loss": 0.31097412109375, "step": 1837 }, { "epoch": 0.12423955657699068, "grad_norm": 1.4650981426239014, "learning_rate": 2.93673869364774e-05, "loss": 0.300872802734375, "step": 1838 }, { "epoch": 0.12430715154792484, "grad_norm": 1.589064121246338, "learning_rate": 2.936643847049403e-05, "loss": 0.228271484375, "step": 1839 }, { "epoch": 0.12437474651885899, "grad_norm": 0.9247000813484192, "learning_rate": 2.936548930937302e-05, "loss": 0.21734619140625, "step": 1840 }, { "epoch": 0.12444234148979316, "grad_norm": 2.204660177230835, "learning_rate": 2.93645394531603e-05, "loss": 0.327667236328125, "step": 1841 }, { "epoch": 0.12450993646072732, "grad_norm": 1.9088503122329712, "learning_rate": 2.9363588901901832e-05, "loss": 0.248748779296875, "step": 1842 }, { "epoch": 0.12457753143166149, "grad_norm": 1.2027288675308228, "learning_rate": 2.9362637655643603e-05, "loss": 0.1649017333984375, "step": 1843 }, { "epoch": 0.12464512640259565, "grad_norm": 2.724165916442871, "learning_rate": 2.9361685714431646e-05, "loss": 0.294677734375, "step": 1844 }, { "epoch": 0.1247127213735298, "grad_norm": 1.7342392206192017, "learning_rate": 2.936073307831202e-05, "loss": 0.2108154296875, "step": 1845 }, { "epoch": 0.12478031634446397, "grad_norm": 0.8328324556350708, "learning_rate": 2.9359779747330815e-05, "loss": 0.1581878662109375, "step": 1846 }, { "epoch": 0.12484791131539813, "grad_norm": 0.7593821883201599, "learning_rate": 2.9358825721534167e-05, "loss": 0.12587738037109375, "step": 1847 }, { "epoch": 0.1249155062863323, "grad_norm": 1.204154372215271, "learning_rate": 2.9357871000968236e-05, "loss": 0.247711181640625, "step": 1848 }, { "epoch": 0.12498310125726646, "grad_norm": 1.2949291467666626, "learning_rate": 2.935691558567921e-05, "loss": 0.2151336669921875, "step": 1849 }, { "epoch": 0.12505069622820061, "grad_norm": 1.3613406419754028, "learning_rate": 2.9355959475713327e-05, "loss": 0.22900390625, "step": 1850 }, { "epoch": 0.12511829119913478, "grad_norm": 1.37935209274292, "learning_rate": 2.9355002671116847e-05, "loss": 0.15631103515625, "step": 1851 }, { "epoch": 0.12518588617006896, "grad_norm": 3.8516314029693604, "learning_rate": 2.9354045171936064e-05, "loss": 0.2777252197265625, "step": 1852 }, { "epoch": 0.1252534811410031, "grad_norm": 5.487703800201416, "learning_rate": 2.9353086978217307e-05, "loss": 0.33941650390625, "step": 1853 }, { "epoch": 0.12532107611193727, "grad_norm": 2.1289329528808594, "learning_rate": 2.935212809000694e-05, "loss": 0.32281494140625, "step": 1854 }, { "epoch": 0.12538867108287144, "grad_norm": 1.3094414472579956, "learning_rate": 2.9351168507351363e-05, "loss": 0.224853515625, "step": 1855 }, { "epoch": 0.1254562660538056, "grad_norm": 1.382772445678711, "learning_rate": 2.9350208230297004e-05, "loss": 0.2236785888671875, "step": 1856 }, { "epoch": 0.12552386102473975, "grad_norm": 1.3961602449417114, "learning_rate": 2.9349247258890327e-05, "loss": 0.3148193359375, "step": 1857 }, { "epoch": 0.12559145599567392, "grad_norm": 1.4140214920043945, "learning_rate": 2.9348285593177834e-05, "loss": 0.2354736328125, "step": 1858 }, { "epoch": 0.1256590509666081, "grad_norm": 1.2623016834259033, "learning_rate": 2.9347323233206052e-05, "loss": 0.32049560546875, "step": 1859 }, { "epoch": 0.12572664593754224, "grad_norm": 2.758671760559082, "learning_rate": 2.9346360179021546e-05, "loss": 0.32940673828125, "step": 1860 }, { "epoch": 0.1257942409084764, "grad_norm": 1.830437183380127, "learning_rate": 2.9345396430670918e-05, "loss": 0.24578857421875, "step": 1861 }, { "epoch": 0.12586183587941058, "grad_norm": 2.514981508255005, "learning_rate": 2.934443198820079e-05, "loss": 0.194671630859375, "step": 1862 }, { "epoch": 0.12592943085034475, "grad_norm": 2.9480814933776855, "learning_rate": 2.9343466851657844e-05, "loss": 0.281646728515625, "step": 1863 }, { "epoch": 0.1259970258212789, "grad_norm": 2.3436245918273926, "learning_rate": 2.9342501021088764e-05, "loss": 0.373687744140625, "step": 1864 }, { "epoch": 0.12606462079221306, "grad_norm": 1.0912870168685913, "learning_rate": 2.9341534496540295e-05, "loss": 0.22308349609375, "step": 1865 }, { "epoch": 0.12613221576314723, "grad_norm": 3.0860984325408936, "learning_rate": 2.9340567278059193e-05, "loss": 0.325592041015625, "step": 1866 }, { "epoch": 0.12619981073408137, "grad_norm": 1.3687242269515991, "learning_rate": 2.9339599365692265e-05, "loss": 0.31256103515625, "step": 1867 }, { "epoch": 0.12626740570501555, "grad_norm": 1.4618998765945435, "learning_rate": 2.9338630759486345e-05, "loss": 0.3360595703125, "step": 1868 }, { "epoch": 0.12633500067594972, "grad_norm": 2.3789145946502686, "learning_rate": 2.9337661459488298e-05, "loss": 0.340789794921875, "step": 1869 }, { "epoch": 0.12640259564688386, "grad_norm": 1.7082536220550537, "learning_rate": 2.9336691465745022e-05, "loss": 0.34033203125, "step": 1870 }, { "epoch": 0.12647019061781803, "grad_norm": 1.4146870374679565, "learning_rate": 2.9335720778303453e-05, "loss": 0.31219482421875, "step": 1871 }, { "epoch": 0.1265377855887522, "grad_norm": 0.95700603723526, "learning_rate": 2.933474939721056e-05, "loss": 0.1246490478515625, "step": 1872 }, { "epoch": 0.12660538055968637, "grad_norm": 2.510868787765503, "learning_rate": 2.933377732251335e-05, "loss": 0.26092529296875, "step": 1873 }, { "epoch": 0.1266729755306205, "grad_norm": 1.1470750570297241, "learning_rate": 2.9332804554258847e-05, "loss": 0.25958251953125, "step": 1874 }, { "epoch": 0.12674057050155468, "grad_norm": 2.7454025745391846, "learning_rate": 2.9331831092494125e-05, "loss": 0.23150634765625, "step": 1875 }, { "epoch": 0.12680816547248885, "grad_norm": 1.043816089630127, "learning_rate": 2.9330856937266287e-05, "loss": 0.17583465576171875, "step": 1876 }, { "epoch": 0.126875760443423, "grad_norm": 1.7231159210205078, "learning_rate": 2.9329882088622468e-05, "loss": 0.2930908203125, "step": 1877 }, { "epoch": 0.12694335541435717, "grad_norm": 1.6596908569335938, "learning_rate": 2.9328906546609835e-05, "loss": 0.28277587890625, "step": 1878 }, { "epoch": 0.12701095038529134, "grad_norm": 1.280869722366333, "learning_rate": 2.93279303112756e-05, "loss": 0.284454345703125, "step": 1879 }, { "epoch": 0.1270785453562255, "grad_norm": 4.011403560638428, "learning_rate": 2.9326953382666985e-05, "loss": 0.32122802734375, "step": 1880 }, { "epoch": 0.12714614032715965, "grad_norm": 2.1128342151641846, "learning_rate": 2.932597576083127e-05, "loss": 0.27880859375, "step": 1881 }, { "epoch": 0.12721373529809382, "grad_norm": 1.3682283163070679, "learning_rate": 2.932499744581575e-05, "loss": 0.2877197265625, "step": 1882 }, { "epoch": 0.127281330269028, "grad_norm": 3.6626029014587402, "learning_rate": 2.9324018437667773e-05, "loss": 0.35089111328125, "step": 1883 }, { "epoch": 0.12734892523996214, "grad_norm": 2.115060329437256, "learning_rate": 2.93230387364347e-05, "loss": 0.3419189453125, "step": 1884 }, { "epoch": 0.1274165202108963, "grad_norm": 1.9510365724563599, "learning_rate": 2.9322058342163943e-05, "loss": 0.2412109375, "step": 1885 }, { "epoch": 0.12748411518183048, "grad_norm": 2.034024715423584, "learning_rate": 2.9321077254902937e-05, "loss": 0.354461669921875, "step": 1886 }, { "epoch": 0.12755171015276465, "grad_norm": 2.8772506713867188, "learning_rate": 2.9320095474699145e-05, "loss": 0.232635498046875, "step": 1887 }, { "epoch": 0.1276193051236988, "grad_norm": 1.5514729022979736, "learning_rate": 2.9319113001600083e-05, "loss": 0.344085693359375, "step": 1888 }, { "epoch": 0.12768690009463296, "grad_norm": 2.147146224975586, "learning_rate": 2.9318129835653286e-05, "loss": 0.1698760986328125, "step": 1889 }, { "epoch": 0.12775449506556713, "grad_norm": 1.5936743021011353, "learning_rate": 2.9317145976906318e-05, "loss": 0.205078125, "step": 1890 }, { "epoch": 0.12782209003650127, "grad_norm": 1.106304407119751, "learning_rate": 2.9316161425406793e-05, "loss": 0.128875732421875, "step": 1891 }, { "epoch": 0.12788968500743544, "grad_norm": 5.471755504608154, "learning_rate": 2.9315176181202348e-05, "loss": 0.370758056640625, "step": 1892 }, { "epoch": 0.12795727997836961, "grad_norm": 3.1379270553588867, "learning_rate": 2.931419024434065e-05, "loss": 0.291168212890625, "step": 1893 }, { "epoch": 0.12802487494930379, "grad_norm": 3.0621554851531982, "learning_rate": 2.9313203614869416e-05, "loss": 0.3267822265625, "step": 1894 }, { "epoch": 0.12809246992023793, "grad_norm": 1.4742306470870972, "learning_rate": 2.9312216292836374e-05, "loss": 0.351318359375, "step": 1895 }, { "epoch": 0.1281600648911721, "grad_norm": 3.0940122604370117, "learning_rate": 2.9311228278289302e-05, "loss": 0.3125, "step": 1896 }, { "epoch": 0.12822765986210627, "grad_norm": 2.0748393535614014, "learning_rate": 2.9310239571276007e-05, "loss": 0.346588134765625, "step": 1897 }, { "epoch": 0.1282952548330404, "grad_norm": 1.8040688037872314, "learning_rate": 2.9309250171844324e-05, "loss": 0.27410888671875, "step": 1898 }, { "epoch": 0.12836284980397458, "grad_norm": 0.6333792805671692, "learning_rate": 2.930826008004213e-05, "loss": 0.114715576171875, "step": 1899 }, { "epoch": 0.12843044477490875, "grad_norm": 1.9958078861236572, "learning_rate": 2.9307269295917328e-05, "loss": 0.335845947265625, "step": 1900 }, { "epoch": 0.1284980397458429, "grad_norm": 0.8666455745697021, "learning_rate": 2.930627781951787e-05, "loss": 0.1717376708984375, "step": 1901 }, { "epoch": 0.12856563471677707, "grad_norm": 1.2630212306976318, "learning_rate": 2.930528565089172e-05, "loss": 0.1937255859375, "step": 1902 }, { "epoch": 0.12863322968771124, "grad_norm": 2.1951215267181396, "learning_rate": 2.930429279008688e-05, "loss": 0.32275390625, "step": 1903 }, { "epoch": 0.1287008246586454, "grad_norm": 4.189158916473389, "learning_rate": 2.9303299237151402e-05, "loss": 0.37933349609375, "step": 1904 }, { "epoch": 0.12876841962957955, "grad_norm": 1.9613910913467407, "learning_rate": 2.9302304992133354e-05, "loss": 0.31488037109375, "step": 1905 }, { "epoch": 0.12883601460051372, "grad_norm": 2.688852071762085, "learning_rate": 2.9301310055080842e-05, "loss": 0.246734619140625, "step": 1906 }, { "epoch": 0.1289036095714479, "grad_norm": 1.5233365297317505, "learning_rate": 2.9300314426042014e-05, "loss": 0.199951171875, "step": 1907 }, { "epoch": 0.12897120454238203, "grad_norm": 1.6662235260009766, "learning_rate": 2.9299318105065044e-05, "loss": 0.262237548828125, "step": 1908 }, { "epoch": 0.1290387995133162, "grad_norm": 1.1816880702972412, "learning_rate": 2.9298321092198135e-05, "loss": 0.284423828125, "step": 1909 }, { "epoch": 0.12910639448425038, "grad_norm": 1.3231470584869385, "learning_rate": 2.929732338748953e-05, "loss": 0.30572509765625, "step": 1910 }, { "epoch": 0.12917398945518455, "grad_norm": 1.5222097635269165, "learning_rate": 2.9296324990987506e-05, "loss": 0.23931884765625, "step": 1911 }, { "epoch": 0.1292415844261187, "grad_norm": 2.1172194480895996, "learning_rate": 2.9295325902740372e-05, "loss": 0.26934814453125, "step": 1912 }, { "epoch": 0.12930917939705286, "grad_norm": 2.5577142238616943, "learning_rate": 2.9294326122796473e-05, "loss": 0.26593017578125, "step": 1913 }, { "epoch": 0.12937677436798703, "grad_norm": 2.2284209728240967, "learning_rate": 2.9293325651204176e-05, "loss": 0.264404296875, "step": 1914 }, { "epoch": 0.12944436933892117, "grad_norm": 2.292384624481201, "learning_rate": 2.9292324488011893e-05, "loss": 0.20196533203125, "step": 1915 }, { "epoch": 0.12951196430985534, "grad_norm": 1.927016258239746, "learning_rate": 2.9291322633268075e-05, "loss": 0.24041748046875, "step": 1916 }, { "epoch": 0.1295795592807895, "grad_norm": 2.033663034439087, "learning_rate": 2.9290320087021185e-05, "loss": 0.324462890625, "step": 1917 }, { "epoch": 0.12964715425172368, "grad_norm": 2.160675287246704, "learning_rate": 2.928931684931974e-05, "loss": 0.276275634765625, "step": 1918 }, { "epoch": 0.12971474922265783, "grad_norm": 1.8701974153518677, "learning_rate": 2.9288312920212282e-05, "loss": 0.3868408203125, "step": 1919 }, { "epoch": 0.129782344193592, "grad_norm": 1.990841031074524, "learning_rate": 2.9287308299747388e-05, "loss": 0.1800537109375, "step": 1920 }, { "epoch": 0.12984993916452617, "grad_norm": 1.6182200908660889, "learning_rate": 2.9286302987973663e-05, "loss": 0.271484375, "step": 1921 }, { "epoch": 0.1299175341354603, "grad_norm": 1.2411061525344849, "learning_rate": 2.928529698493976e-05, "loss": 0.2300567626953125, "step": 1922 }, { "epoch": 0.12998512910639448, "grad_norm": 1.9075475931167603, "learning_rate": 2.928429029069434e-05, "loss": 0.325531005859375, "step": 1923 }, { "epoch": 0.13005272407732865, "grad_norm": 2.472940444946289, "learning_rate": 2.9283282905286133e-05, "loss": 0.322265625, "step": 1924 }, { "epoch": 0.13012031904826282, "grad_norm": 1.166175127029419, "learning_rate": 2.9282274828763868e-05, "loss": 0.2059478759765625, "step": 1925 }, { "epoch": 0.13018791401919697, "grad_norm": 0.9945905208587646, "learning_rate": 2.928126606117633e-05, "loss": 0.2310791015625, "step": 1926 }, { "epoch": 0.13025550899013114, "grad_norm": 1.6184033155441284, "learning_rate": 2.9280256602572318e-05, "loss": 0.270751953125, "step": 1927 }, { "epoch": 0.1303231039610653, "grad_norm": 1.8597556352615356, "learning_rate": 2.9279246453000686e-05, "loss": 0.235260009765625, "step": 1928 }, { "epoch": 0.13039069893199945, "grad_norm": 1.7760124206542969, "learning_rate": 2.9278235612510317e-05, "loss": 0.34393310546875, "step": 1929 }, { "epoch": 0.13045829390293362, "grad_norm": 1.9911977052688599, "learning_rate": 2.9277224081150104e-05, "loss": 0.30059814453125, "step": 1930 }, { "epoch": 0.1305258888738678, "grad_norm": 1.6752359867095947, "learning_rate": 2.9276211858969003e-05, "loss": 0.28741455078125, "step": 1931 }, { "epoch": 0.13059348384480193, "grad_norm": 1.1985070705413818, "learning_rate": 2.9275198946015993e-05, "loss": 0.205169677734375, "step": 1932 }, { "epoch": 0.1306610788157361, "grad_norm": 3.237945556640625, "learning_rate": 2.9274185342340076e-05, "loss": 0.24791717529296875, "step": 1933 }, { "epoch": 0.13072867378667027, "grad_norm": 2.3569085597991943, "learning_rate": 2.9273171047990305e-05, "loss": 0.2840576171875, "step": 1934 }, { "epoch": 0.13079626875760444, "grad_norm": 1.6818615198135376, "learning_rate": 2.927215606301576e-05, "loss": 0.1982421875, "step": 1935 }, { "epoch": 0.1308638637285386, "grad_norm": 2.6954727172851562, "learning_rate": 2.927114038746554e-05, "loss": 0.343994140625, "step": 1936 }, { "epoch": 0.13093145869947276, "grad_norm": 2.3156535625457764, "learning_rate": 2.9270124021388797e-05, "loss": 0.2168426513671875, "step": 1937 }, { "epoch": 0.13099905367040693, "grad_norm": 1.4595592021942139, "learning_rate": 2.9269106964834712e-05, "loss": 0.283447265625, "step": 1938 }, { "epoch": 0.13106664864134107, "grad_norm": 2.32763934135437, "learning_rate": 2.926808921785249e-05, "loss": 0.31640625, "step": 1939 }, { "epoch": 0.13113424361227524, "grad_norm": 2.487952947616577, "learning_rate": 2.926707078049138e-05, "loss": 0.31683349609375, "step": 1940 }, { "epoch": 0.1312018385832094, "grad_norm": 1.407626986503601, "learning_rate": 2.9266051652800666e-05, "loss": 0.25238037109375, "step": 1941 }, { "epoch": 0.13126943355414358, "grad_norm": 1.54346764087677, "learning_rate": 2.9265031834829645e-05, "loss": 0.333984375, "step": 1942 }, { "epoch": 0.13133702852507773, "grad_norm": 1.9313918352127075, "learning_rate": 2.9264011326627675e-05, "loss": 0.249603271484375, "step": 1943 }, { "epoch": 0.1314046234960119, "grad_norm": 2.4060535430908203, "learning_rate": 2.926299012824413e-05, "loss": 0.37518310546875, "step": 1944 }, { "epoch": 0.13147221846694607, "grad_norm": 1.6056585311889648, "learning_rate": 2.9261968239728418e-05, "loss": 0.331146240234375, "step": 1945 }, { "epoch": 0.1315398134378802, "grad_norm": 1.2601265907287598, "learning_rate": 2.926094566112999e-05, "loss": 0.2208099365234375, "step": 1946 }, { "epoch": 0.13160740840881438, "grad_norm": 2.06046986579895, "learning_rate": 2.9259922392498328e-05, "loss": 0.285614013671875, "step": 1947 }, { "epoch": 0.13167500337974855, "grad_norm": 2.293104648590088, "learning_rate": 2.9258898433882934e-05, "loss": 0.32452392578125, "step": 1948 }, { "epoch": 0.13174259835068272, "grad_norm": 1.0786758661270142, "learning_rate": 2.9257873785333362e-05, "loss": 0.21441650390625, "step": 1949 }, { "epoch": 0.13181019332161686, "grad_norm": 1.8932831287384033, "learning_rate": 2.9256848446899186e-05, "loss": 0.30657958984375, "step": 1950 }, { "epoch": 0.13187778829255103, "grad_norm": 1.723069667816162, "learning_rate": 2.9255822418630017e-05, "loss": 0.229278564453125, "step": 1951 }, { "epoch": 0.1319453832634852, "grad_norm": 2.908053159713745, "learning_rate": 2.9254795700575505e-05, "loss": 0.37115478515625, "step": 1952 }, { "epoch": 0.13201297823441935, "grad_norm": 1.6887019872665405, "learning_rate": 2.9253768292785327e-05, "loss": 0.34661865234375, "step": 1953 }, { "epoch": 0.13208057320535352, "grad_norm": 1.2983099222183228, "learning_rate": 2.92527401953092e-05, "loss": 0.2256927490234375, "step": 1954 }, { "epoch": 0.1321481681762877, "grad_norm": 1.3612072467803955, "learning_rate": 2.9251711408196863e-05, "loss": 0.368408203125, "step": 1955 }, { "epoch": 0.13221576314722186, "grad_norm": 1.8023213148117065, "learning_rate": 2.9250681931498096e-05, "loss": 0.3369140625, "step": 1956 }, { "epoch": 0.132283358118156, "grad_norm": 1.739606261253357, "learning_rate": 2.9249651765262716e-05, "loss": 0.299072265625, "step": 1957 }, { "epoch": 0.13235095308909017, "grad_norm": 1.3616477251052856, "learning_rate": 2.9248620909540565e-05, "loss": 0.1980133056640625, "step": 1958 }, { "epoch": 0.13241854806002434, "grad_norm": 1.478213906288147, "learning_rate": 2.9247589364381523e-05, "loss": 0.2066650390625, "step": 1959 }, { "epoch": 0.1324861430309585, "grad_norm": 1.1819273233413696, "learning_rate": 2.92465571298355e-05, "loss": 0.3160858154296875, "step": 1960 }, { "epoch": 0.13255373800189266, "grad_norm": 1.4291495084762573, "learning_rate": 2.9245524205952445e-05, "loss": 0.07489776611328125, "step": 1961 }, { "epoch": 0.13262133297282683, "grad_norm": 2.173473834991455, "learning_rate": 2.9244490592782344e-05, "loss": 0.2127685546875, "step": 1962 }, { "epoch": 0.13268892794376097, "grad_norm": 3.5363948345184326, "learning_rate": 2.9243456290375195e-05, "loss": 0.36572265625, "step": 1963 }, { "epoch": 0.13275652291469514, "grad_norm": 2.4565534591674805, "learning_rate": 2.9242421298781054e-05, "loss": 0.216522216796875, "step": 1964 }, { "epoch": 0.1328241178856293, "grad_norm": 2.1167171001434326, "learning_rate": 2.9241385618049997e-05, "loss": 0.257781982421875, "step": 1965 }, { "epoch": 0.13289171285656348, "grad_norm": 2.4384989738464355, "learning_rate": 2.924034924823214e-05, "loss": 0.37152099609375, "step": 1966 }, { "epoch": 0.13295930782749762, "grad_norm": 2.0020081996917725, "learning_rate": 2.923931218937762e-05, "loss": 0.2628631591796875, "step": 1967 }, { "epoch": 0.1330269027984318, "grad_norm": 3.365429162979126, "learning_rate": 2.9238274441536632e-05, "loss": 0.3343658447265625, "step": 1968 }, { "epoch": 0.13309449776936597, "grad_norm": 1.6860690116882324, "learning_rate": 2.9237236004759373e-05, "loss": 0.27044677734375, "step": 1969 }, { "epoch": 0.1331620927403001, "grad_norm": 5.960832118988037, "learning_rate": 2.9236196879096098e-05, "loss": 0.36932373046875, "step": 1970 }, { "epoch": 0.13322968771123428, "grad_norm": 1.5849645137786865, "learning_rate": 2.9235157064597087e-05, "loss": 0.306793212890625, "step": 1971 }, { "epoch": 0.13329728268216845, "grad_norm": 1.7251280546188354, "learning_rate": 2.9234116561312646e-05, "loss": 0.311553955078125, "step": 1972 }, { "epoch": 0.13336487765310262, "grad_norm": 5.223647594451904, "learning_rate": 2.9233075369293126e-05, "loss": 0.42620849609375, "step": 1973 }, { "epoch": 0.13343247262403676, "grad_norm": 1.3278086185455322, "learning_rate": 2.9232033488588903e-05, "loss": 0.237518310546875, "step": 1974 }, { "epoch": 0.13350006759497093, "grad_norm": 2.0240421295166016, "learning_rate": 2.9230990919250395e-05, "loss": 0.26837158203125, "step": 1975 }, { "epoch": 0.1335676625659051, "grad_norm": 1.9255291223526, "learning_rate": 2.9229947661328043e-05, "loss": 0.29931640625, "step": 1976 }, { "epoch": 0.13363525753683925, "grad_norm": 3.9750354290008545, "learning_rate": 2.9228903714872327e-05, "loss": 0.328399658203125, "step": 1977 }, { "epoch": 0.13370285250777342, "grad_norm": 1.1635829210281372, "learning_rate": 2.9227859079933764e-05, "loss": 0.27276611328125, "step": 1978 }, { "epoch": 0.1337704474787076, "grad_norm": 2.337601900100708, "learning_rate": 2.9226813756562895e-05, "loss": 0.37249755859375, "step": 1979 }, { "epoch": 0.13383804244964176, "grad_norm": 3.9568450450897217, "learning_rate": 2.9225767744810298e-05, "loss": 0.29168701171875, "step": 1980 }, { "epoch": 0.1339056374205759, "grad_norm": 2.14996075630188, "learning_rate": 2.922472104472659e-05, "loss": 0.3153076171875, "step": 1981 }, { "epoch": 0.13397323239151007, "grad_norm": 1.9887337684631348, "learning_rate": 2.922367365636242e-05, "loss": 0.25640869140625, "step": 1982 }, { "epoch": 0.13404082736244424, "grad_norm": 1.2660444974899292, "learning_rate": 2.9222625579768457e-05, "loss": 0.318359375, "step": 1983 }, { "epoch": 0.13410842233337839, "grad_norm": 3.8871328830718994, "learning_rate": 2.922157681499542e-05, "loss": 0.29095458984375, "step": 1984 }, { "epoch": 0.13417601730431256, "grad_norm": 2.002466917037964, "learning_rate": 2.9220527362094055e-05, "loss": 0.277679443359375, "step": 1985 }, { "epoch": 0.13424361227524673, "grad_norm": 1.3334307670593262, "learning_rate": 2.921947722111514e-05, "loss": 0.2548828125, "step": 1986 }, { "epoch": 0.1343112072461809, "grad_norm": 1.7595202922821045, "learning_rate": 2.9218426392109484e-05, "loss": 0.35205078125, "step": 1987 }, { "epoch": 0.13437880221711504, "grad_norm": 2.4163928031921387, "learning_rate": 2.921737487512794e-05, "loss": 0.165771484375, "step": 1988 }, { "epoch": 0.1344463971880492, "grad_norm": 2.4210286140441895, "learning_rate": 2.9216322670221376e-05, "loss": 0.3299560546875, "step": 1989 }, { "epoch": 0.13451399215898338, "grad_norm": 1.1716192960739136, "learning_rate": 2.9215269777440717e-05, "loss": 0.223724365234375, "step": 1990 }, { "epoch": 0.13458158712991752, "grad_norm": 1.6463793516159058, "learning_rate": 2.92142161968369e-05, "loss": 0.34710693359375, "step": 1991 }, { "epoch": 0.1346491821008517, "grad_norm": 1.1618897914886475, "learning_rate": 2.9213161928460908e-05, "loss": 0.268524169921875, "step": 1992 }, { "epoch": 0.13471677707178586, "grad_norm": 2.662612199783325, "learning_rate": 2.9212106972363748e-05, "loss": 0.287872314453125, "step": 1993 }, { "epoch": 0.13478437204272004, "grad_norm": 3.4384818077087402, "learning_rate": 2.9211051328596472e-05, "loss": 0.31689453125, "step": 1994 }, { "epoch": 0.13485196701365418, "grad_norm": 1.1554499864578247, "learning_rate": 2.920999499721015e-05, "loss": 0.2399139404296875, "step": 1995 }, { "epoch": 0.13491956198458835, "grad_norm": 0.9358417391777039, "learning_rate": 2.9208937978255907e-05, "loss": 0.182708740234375, "step": 1996 }, { "epoch": 0.13498715695552252, "grad_norm": 1.918327808380127, "learning_rate": 2.9207880271784874e-05, "loss": 0.317138671875, "step": 1997 }, { "epoch": 0.13505475192645666, "grad_norm": 1.184692621231079, "learning_rate": 2.9206821877848237e-05, "loss": 0.2436370849609375, "step": 1998 }, { "epoch": 0.13512234689739083, "grad_norm": 1.331689476966858, "learning_rate": 2.920576279649721e-05, "loss": 0.266510009765625, "step": 1999 }, { "epoch": 0.135189941868325, "grad_norm": 2.885580539703369, "learning_rate": 2.9204703027783026e-05, "loss": 0.310028076171875, "step": 2000 }, { "epoch": 0.13525753683925915, "grad_norm": 1.3850419521331787, "learning_rate": 2.9203642571756978e-05, "loss": 0.167022705078125, "step": 2001 }, { "epoch": 0.13532513181019332, "grad_norm": 4.8086419105529785, "learning_rate": 2.9202581428470368e-05, "loss": 0.36248779296875, "step": 2002 }, { "epoch": 0.1353927267811275, "grad_norm": 1.0901061296463013, "learning_rate": 2.9201519597974544e-05, "loss": 0.216400146484375, "step": 2003 }, { "epoch": 0.13546032175206166, "grad_norm": 1.0664007663726807, "learning_rate": 2.9200457080320884e-05, "loss": 0.178131103515625, "step": 2004 }, { "epoch": 0.1355279167229958, "grad_norm": 0.8923791646957397, "learning_rate": 2.9199393875560798e-05, "loss": 0.1157684326171875, "step": 2005 }, { "epoch": 0.13559551169392997, "grad_norm": 1.524048089981079, "learning_rate": 2.9198329983745733e-05, "loss": 0.25360107421875, "step": 2006 }, { "epoch": 0.13566310666486414, "grad_norm": 1.241594910621643, "learning_rate": 2.9197265404927163e-05, "loss": 0.223876953125, "step": 2007 }, { "epoch": 0.13573070163579828, "grad_norm": 5.743877410888672, "learning_rate": 2.9196200139156604e-05, "loss": 0.34967041015625, "step": 2008 }, { "epoch": 0.13579829660673245, "grad_norm": 1.0887269973754883, "learning_rate": 2.9195134186485593e-05, "loss": 0.12429046630859375, "step": 2009 }, { "epoch": 0.13586589157766663, "grad_norm": 2.332225799560547, "learning_rate": 2.9194067546965717e-05, "loss": 0.3494873046875, "step": 2010 }, { "epoch": 0.1359334865486008, "grad_norm": 1.549676537513733, "learning_rate": 2.9193000220648576e-05, "loss": 0.267852783203125, "step": 2011 }, { "epoch": 0.13600108151953494, "grad_norm": 1.893729329109192, "learning_rate": 2.9191932207585822e-05, "loss": 0.2427978515625, "step": 2012 }, { "epoch": 0.1360686764904691, "grad_norm": 2.111773729324341, "learning_rate": 2.9190863507829124e-05, "loss": 0.250762939453125, "step": 2013 }, { "epoch": 0.13613627146140328, "grad_norm": 2.6139743328094482, "learning_rate": 2.9189794121430206e-05, "loss": 0.292816162109375, "step": 2014 }, { "epoch": 0.13620386643233742, "grad_norm": 1.3561451435089111, "learning_rate": 2.9188724048440794e-05, "loss": 0.21109771728515625, "step": 2015 }, { "epoch": 0.1362714614032716, "grad_norm": 2.084684133529663, "learning_rate": 2.9187653288912676e-05, "loss": 0.295928955078125, "step": 2016 }, { "epoch": 0.13633905637420576, "grad_norm": 1.1549692153930664, "learning_rate": 2.9186581842897665e-05, "loss": 0.2254486083984375, "step": 2017 }, { "epoch": 0.13640665134513993, "grad_norm": 1.4449076652526855, "learning_rate": 2.9185509710447595e-05, "loss": 0.23089599609375, "step": 2018 }, { "epoch": 0.13647424631607408, "grad_norm": 2.5144765377044678, "learning_rate": 2.918443689161435e-05, "loss": 0.24859619140625, "step": 2019 }, { "epoch": 0.13654184128700825, "grad_norm": 4.521453380584717, "learning_rate": 2.9183363386449832e-05, "loss": 0.3505859375, "step": 2020 }, { "epoch": 0.13660943625794242, "grad_norm": 1.90945565700531, "learning_rate": 2.9182289195005985e-05, "loss": 0.24822998046875, "step": 2021 }, { "epoch": 0.13667703122887656, "grad_norm": 3.4589109420776367, "learning_rate": 2.9181214317334798e-05, "loss": 0.3077392578125, "step": 2022 }, { "epoch": 0.13674462619981073, "grad_norm": 1.7048181295394897, "learning_rate": 2.9180138753488264e-05, "loss": 0.35052490234375, "step": 2023 }, { "epoch": 0.1368122211707449, "grad_norm": 2.1671645641326904, "learning_rate": 2.9179062503518433e-05, "loss": 0.219085693359375, "step": 2024 }, { "epoch": 0.13687981614167907, "grad_norm": 0.8027602434158325, "learning_rate": 2.9177985567477377e-05, "loss": 0.23248291015625, "step": 2025 }, { "epoch": 0.13694741111261322, "grad_norm": 3.8208181858062744, "learning_rate": 2.9176907945417207e-05, "loss": 0.29083251953125, "step": 2026 }, { "epoch": 0.13701500608354739, "grad_norm": 0.7266286015510559, "learning_rate": 2.917582963739007e-05, "loss": 0.194854736328125, "step": 2027 }, { "epoch": 0.13708260105448156, "grad_norm": 2.139864921569824, "learning_rate": 2.9174750643448135e-05, "loss": 0.35821533203125, "step": 2028 }, { "epoch": 0.1371501960254157, "grad_norm": 4.6616740226745605, "learning_rate": 2.917367096364361e-05, "loss": 0.2716064453125, "step": 2029 }, { "epoch": 0.13721779099634987, "grad_norm": 2.589271068572998, "learning_rate": 2.9172590598028742e-05, "loss": 0.30816650390625, "step": 2030 }, { "epoch": 0.13728538596728404, "grad_norm": 1.613204836845398, "learning_rate": 2.9171509546655797e-05, "loss": 0.2577972412109375, "step": 2031 }, { "epoch": 0.13735298093821818, "grad_norm": 1.6223090887069702, "learning_rate": 2.917042780957709e-05, "loss": 0.25164794921875, "step": 2032 }, { "epoch": 0.13742057590915235, "grad_norm": 2.0261778831481934, "learning_rate": 2.9169345386844963e-05, "loss": 0.27679443359375, "step": 2033 }, { "epoch": 0.13748817088008652, "grad_norm": 1.3256275653839111, "learning_rate": 2.9168262278511788e-05, "loss": 0.316925048828125, "step": 2034 }, { "epoch": 0.1375557658510207, "grad_norm": 2.2283835411071777, "learning_rate": 2.9167178484629976e-05, "loss": 0.31854248046875, "step": 2035 }, { "epoch": 0.13762336082195484, "grad_norm": 1.0831290483474731, "learning_rate": 2.916609400525196e-05, "loss": 0.2108154296875, "step": 2036 }, { "epoch": 0.137690955792889, "grad_norm": 1.7441436052322388, "learning_rate": 2.9165008840430215e-05, "loss": 0.338134765625, "step": 2037 }, { "epoch": 0.13775855076382318, "grad_norm": 1.617406964302063, "learning_rate": 2.9163922990217254e-05, "loss": 0.314239501953125, "step": 2038 }, { "epoch": 0.13782614573475732, "grad_norm": 1.1126232147216797, "learning_rate": 2.916283645466562e-05, "loss": 0.230926513671875, "step": 2039 }, { "epoch": 0.1378937407056915, "grad_norm": 1.3359092473983765, "learning_rate": 2.9161749233827873e-05, "loss": 0.33740234375, "step": 2040 }, { "epoch": 0.13796133567662566, "grad_norm": 1.4877535104751587, "learning_rate": 2.9160661327756628e-05, "loss": 0.36370849609375, "step": 2041 }, { "epoch": 0.13802893064755983, "grad_norm": 2.6850733757019043, "learning_rate": 2.9159572736504524e-05, "loss": 0.2113037109375, "step": 2042 }, { "epoch": 0.13809652561849398, "grad_norm": 1.5990064144134521, "learning_rate": 2.9158483460124234e-05, "loss": 0.3173828125, "step": 2043 }, { "epoch": 0.13816412058942815, "grad_norm": 2.769710063934326, "learning_rate": 2.9157393498668463e-05, "loss": 0.2232666015625, "step": 2044 }, { "epoch": 0.13823171556036232, "grad_norm": 1.4507474899291992, "learning_rate": 2.9156302852189954e-05, "loss": 0.139892578125, "step": 2045 }, { "epoch": 0.13829931053129646, "grad_norm": 1.527876853942871, "learning_rate": 2.9155211520741472e-05, "loss": 0.30023193359375, "step": 2046 }, { "epoch": 0.13836690550223063, "grad_norm": 1.857602596282959, "learning_rate": 2.915411950437583e-05, "loss": 0.192230224609375, "step": 2047 }, { "epoch": 0.1384345004731648, "grad_norm": 1.763621211051941, "learning_rate": 2.9153026803145858e-05, "loss": 0.27410888671875, "step": 2048 }, { "epoch": 0.13850209544409897, "grad_norm": 2.5489132404327393, "learning_rate": 2.9151933417104436e-05, "loss": 0.353240966796875, "step": 2049 }, { "epoch": 0.13856969041503311, "grad_norm": 0.8851385116577148, "learning_rate": 2.9150839346304466e-05, "loss": 0.17299652099609375, "step": 2050 }, { "epoch": 0.13863728538596728, "grad_norm": 1.3056516647338867, "learning_rate": 2.914974459079888e-05, "loss": 0.258453369140625, "step": 2051 }, { "epoch": 0.13870488035690146, "grad_norm": 2.1382017135620117, "learning_rate": 2.914864915064066e-05, "loss": 0.2523193359375, "step": 2052 }, { "epoch": 0.1387724753278356, "grad_norm": 1.4661445617675781, "learning_rate": 2.9147553025882803e-05, "loss": 0.19171142578125, "step": 2053 }, { "epoch": 0.13884007029876977, "grad_norm": 1.3959718942642212, "learning_rate": 2.9146456216578348e-05, "loss": 0.29327392578125, "step": 2054 }, { "epoch": 0.13890766526970394, "grad_norm": 3.0459187030792236, "learning_rate": 2.9145358722780365e-05, "loss": 0.28741455078125, "step": 2055 }, { "epoch": 0.1389752602406381, "grad_norm": 0.8877031803131104, "learning_rate": 2.9144260544541956e-05, "loss": 0.17850494384765625, "step": 2056 }, { "epoch": 0.13904285521157225, "grad_norm": 1.7598780393600464, "learning_rate": 2.9143161681916264e-05, "loss": 0.3568115234375, "step": 2057 }, { "epoch": 0.13911045018250642, "grad_norm": 1.4440569877624512, "learning_rate": 2.914206213495645e-05, "loss": 0.287689208984375, "step": 2058 }, { "epoch": 0.1391780451534406, "grad_norm": 1.563433051109314, "learning_rate": 2.9140961903715726e-05, "loss": 0.28656005859375, "step": 2059 }, { "epoch": 0.13924564012437474, "grad_norm": 1.5265223979949951, "learning_rate": 2.913986098824732e-05, "loss": 0.328399658203125, "step": 2060 }, { "epoch": 0.1393132350953089, "grad_norm": 1.1824935674667358, "learning_rate": 2.9138759388604507e-05, "loss": 0.273834228515625, "step": 2061 }, { "epoch": 0.13938083006624308, "grad_norm": 1.5663001537322998, "learning_rate": 2.913765710484058e-05, "loss": 0.28118896484375, "step": 2062 }, { "epoch": 0.13944842503717722, "grad_norm": 1.2611912488937378, "learning_rate": 2.913655413700889e-05, "loss": 0.29388427734375, "step": 2063 }, { "epoch": 0.1395160200081114, "grad_norm": 1.2574821710586548, "learning_rate": 2.9135450485162792e-05, "loss": 0.25616455078125, "step": 2064 }, { "epoch": 0.13958361497904556, "grad_norm": 1.2047691345214844, "learning_rate": 2.9134346149355696e-05, "loss": 0.273681640625, "step": 2065 }, { "epoch": 0.13965120994997973, "grad_norm": 1.9076591730117798, "learning_rate": 2.9133241129641028e-05, "loss": 0.29852294921875, "step": 2066 }, { "epoch": 0.13971880492091387, "grad_norm": 1.2524629831314087, "learning_rate": 2.913213542607226e-05, "loss": 0.25860595703125, "step": 2067 }, { "epoch": 0.13978639989184805, "grad_norm": 1.6712802648544312, "learning_rate": 2.91310290387029e-05, "loss": 0.1788787841796875, "step": 2068 }, { "epoch": 0.13985399486278222, "grad_norm": 2.834725856781006, "learning_rate": 2.9129921967586465e-05, "loss": 0.31658935546875, "step": 2069 }, { "epoch": 0.13992158983371636, "grad_norm": 3.5051820278167725, "learning_rate": 2.912881421277654e-05, "loss": 0.36346435546875, "step": 2070 }, { "epoch": 0.13998918480465053, "grad_norm": 1.4552335739135742, "learning_rate": 2.9127705774326715e-05, "loss": 0.25494384765625, "step": 2071 }, { "epoch": 0.1400567797755847, "grad_norm": 1.8083834648132324, "learning_rate": 2.9126596652290626e-05, "loss": 0.33575439453125, "step": 2072 }, { "epoch": 0.14012437474651887, "grad_norm": 1.5479202270507812, "learning_rate": 2.912548684672194e-05, "loss": 0.33056640625, "step": 2073 }, { "epoch": 0.140191969717453, "grad_norm": 1.936621069908142, "learning_rate": 2.912437635767435e-05, "loss": 0.31219482421875, "step": 2074 }, { "epoch": 0.14025956468838718, "grad_norm": 0.9784016609191895, "learning_rate": 2.9123265185201596e-05, "loss": 0.16485595703125, "step": 2075 }, { "epoch": 0.14032715965932135, "grad_norm": 0.9355167150497437, "learning_rate": 2.912215332935744e-05, "loss": 0.215484619140625, "step": 2076 }, { "epoch": 0.1403947546302555, "grad_norm": 2.056608200073242, "learning_rate": 2.9121040790195685e-05, "loss": 0.303955078125, "step": 2077 }, { "epoch": 0.14046234960118967, "grad_norm": 1.361748456954956, "learning_rate": 2.9119927567770158e-05, "loss": 0.241241455078125, "step": 2078 }, { "epoch": 0.14052994457212384, "grad_norm": 2.773627758026123, "learning_rate": 2.911881366213472e-05, "loss": 0.2757568359375, "step": 2079 }, { "epoch": 0.140597539543058, "grad_norm": 1.288805603981018, "learning_rate": 2.9117699073343273e-05, "loss": 0.27789306640625, "step": 2080 }, { "epoch": 0.14066513451399215, "grad_norm": 2.5839333534240723, "learning_rate": 2.9116583801449756e-05, "loss": 0.2747955322265625, "step": 2081 }, { "epoch": 0.14073272948492632, "grad_norm": 1.3624582290649414, "learning_rate": 2.911546784650811e-05, "loss": 0.2640380859375, "step": 2082 }, { "epoch": 0.1408003244558605, "grad_norm": 1.5055227279663086, "learning_rate": 2.911435120857236e-05, "loss": 0.28363037109375, "step": 2083 }, { "epoch": 0.14086791942679464, "grad_norm": 1.1011086702346802, "learning_rate": 2.9113233887696517e-05, "loss": 0.2190399169921875, "step": 2084 }, { "epoch": 0.1409355143977288, "grad_norm": 1.5705037117004395, "learning_rate": 2.911211588393465e-05, "loss": 0.32666015625, "step": 2085 }, { "epoch": 0.14100310936866298, "grad_norm": 1.578267216682434, "learning_rate": 2.9110997197340853e-05, "loss": 0.209686279296875, "step": 2086 }, { "epoch": 0.14107070433959715, "grad_norm": 1.219725251197815, "learning_rate": 2.910987782796926e-05, "loss": 0.2213134765625, "step": 2087 }, { "epoch": 0.1411382993105313, "grad_norm": 1.0036636590957642, "learning_rate": 2.9108757775874025e-05, "loss": 0.269287109375, "step": 2088 }, { "epoch": 0.14120589428146546, "grad_norm": 1.314876914024353, "learning_rate": 2.910763704110935e-05, "loss": 0.2110595703125, "step": 2089 }, { "epoch": 0.14127348925239963, "grad_norm": 1.3585588932037354, "learning_rate": 2.910651562372946e-05, "loss": 0.1741943359375, "step": 2090 }, { "epoch": 0.14134108422333377, "grad_norm": 3.036386013031006, "learning_rate": 2.9105393523788615e-05, "loss": 0.2657623291015625, "step": 2091 }, { "epoch": 0.14140867919426794, "grad_norm": 2.1019186973571777, "learning_rate": 2.910427074134111e-05, "loss": 0.2664794921875, "step": 2092 }, { "epoch": 0.14147627416520211, "grad_norm": 0.6886388063430786, "learning_rate": 2.9103147276441275e-05, "loss": 0.155792236328125, "step": 2093 }, { "epoch": 0.14154386913613626, "grad_norm": 0.9456132650375366, "learning_rate": 2.9102023129143468e-05, "loss": 0.12322235107421875, "step": 2094 }, { "epoch": 0.14161146410707043, "grad_norm": 1.036095142364502, "learning_rate": 2.9100898299502084e-05, "loss": 0.180908203125, "step": 2095 }, { "epoch": 0.1416790590780046, "grad_norm": 2.1411826610565186, "learning_rate": 2.9099772787571545e-05, "loss": 0.282684326171875, "step": 2096 }, { "epoch": 0.14174665404893877, "grad_norm": 1.1170692443847656, "learning_rate": 2.9098646593406314e-05, "loss": 0.200775146484375, "step": 2097 }, { "epoch": 0.1418142490198729, "grad_norm": 2.0935628414154053, "learning_rate": 2.909751971706088e-05, "loss": 0.2308807373046875, "step": 2098 }, { "epoch": 0.14188184399080708, "grad_norm": 2.099045991897583, "learning_rate": 2.9096392158589775e-05, "loss": 0.28082275390625, "step": 2099 }, { "epoch": 0.14194943896174125, "grad_norm": 0.9815037250518799, "learning_rate": 2.909526391804755e-05, "loss": 0.12744140625, "step": 2100 }, { "epoch": 0.1420170339326754, "grad_norm": 1.4195021390914917, "learning_rate": 2.9094134995488803e-05, "loss": 0.241302490234375, "step": 2101 }, { "epoch": 0.14208462890360957, "grad_norm": 1.5496110916137695, "learning_rate": 2.9093005390968153e-05, "loss": 0.2560577392578125, "step": 2102 }, { "epoch": 0.14215222387454374, "grad_norm": 1.2329819202423096, "learning_rate": 2.9091875104540254e-05, "loss": 0.2362060546875, "step": 2103 }, { "epoch": 0.1422198188454779, "grad_norm": 2.448594570159912, "learning_rate": 2.9090744136259804e-05, "loss": 0.3458251953125, "step": 2104 }, { "epoch": 0.14228741381641205, "grad_norm": 4.358417510986328, "learning_rate": 2.9089612486181526e-05, "loss": 0.3712158203125, "step": 2105 }, { "epoch": 0.14235500878734622, "grad_norm": 2.105664014816284, "learning_rate": 2.908848015436017e-05, "loss": 0.31793212890625, "step": 2106 }, { "epoch": 0.1424226037582804, "grad_norm": 1.3492971658706665, "learning_rate": 2.908734714085053e-05, "loss": 0.279449462890625, "step": 2107 }, { "epoch": 0.14249019872921453, "grad_norm": 1.5664077997207642, "learning_rate": 2.9086213445707427e-05, "loss": 0.35858154296875, "step": 2108 }, { "epoch": 0.1425577937001487, "grad_norm": 1.2637969255447388, "learning_rate": 2.9085079068985714e-05, "loss": 0.277923583984375, "step": 2109 }, { "epoch": 0.14262538867108288, "grad_norm": 1.9057117700576782, "learning_rate": 2.9083944010740288e-05, "loss": 0.277496337890625, "step": 2110 }, { "epoch": 0.14269298364201705, "grad_norm": 3.5216310024261475, "learning_rate": 2.9082808271026058e-05, "loss": 0.3499755859375, "step": 2111 }, { "epoch": 0.1427605786129512, "grad_norm": 0.8827781081199646, "learning_rate": 2.9081671849897982e-05, "loss": 0.1935577392578125, "step": 2112 }, { "epoch": 0.14282817358388536, "grad_norm": 3.0778872966766357, "learning_rate": 2.9080534747411055e-05, "loss": 0.222808837890625, "step": 2113 }, { "epoch": 0.14289576855481953, "grad_norm": 0.9339617490768433, "learning_rate": 2.9079396963620287e-05, "loss": 0.1308135986328125, "step": 2114 }, { "epoch": 0.14296336352575367, "grad_norm": 1.7682325839996338, "learning_rate": 2.9078258498580738e-05, "loss": 0.266815185546875, "step": 2115 }, { "epoch": 0.14303095849668784, "grad_norm": 1.1139745712280273, "learning_rate": 2.9077119352347487e-05, "loss": 0.280181884765625, "step": 2116 }, { "epoch": 0.14309855346762201, "grad_norm": 1.9964321851730347, "learning_rate": 2.907597952497566e-05, "loss": 0.353271484375, "step": 2117 }, { "epoch": 0.14316614843855618, "grad_norm": 1.9767851829528809, "learning_rate": 2.9074839016520407e-05, "loss": 0.1438751220703125, "step": 2118 }, { "epoch": 0.14323374340949033, "grad_norm": 2.815854787826538, "learning_rate": 2.907369782703691e-05, "loss": 0.339111328125, "step": 2119 }, { "epoch": 0.1433013383804245, "grad_norm": 1.5970664024353027, "learning_rate": 2.907255595658039e-05, "loss": 0.240753173828125, "step": 2120 }, { "epoch": 0.14336893335135867, "grad_norm": 1.1866239309310913, "learning_rate": 2.907141340520609e-05, "loss": 0.167205810546875, "step": 2121 }, { "epoch": 0.1434365283222928, "grad_norm": 1.8187439441680908, "learning_rate": 2.9070270172969308e-05, "loss": 0.161590576171875, "step": 2122 }, { "epoch": 0.14350412329322698, "grad_norm": 1.6305011510849, "learning_rate": 2.9069126259925352e-05, "loss": 0.287994384765625, "step": 2123 }, { "epoch": 0.14357171826416115, "grad_norm": 1.4663249254226685, "learning_rate": 2.9067981666129568e-05, "loss": 0.211578369140625, "step": 2124 }, { "epoch": 0.14363931323509532, "grad_norm": 1.514835000038147, "learning_rate": 2.9066836391637348e-05, "loss": 0.30364990234375, "step": 2125 }, { "epoch": 0.14370690820602947, "grad_norm": 1.7870867252349854, "learning_rate": 2.9065690436504102e-05, "loss": 0.2667236328125, "step": 2126 }, { "epoch": 0.14377450317696364, "grad_norm": 1.5614773035049438, "learning_rate": 2.906454380078528e-05, "loss": 0.2144622802734375, "step": 2127 }, { "epoch": 0.1438420981478978, "grad_norm": 2.3160276412963867, "learning_rate": 2.906339648453636e-05, "loss": 0.296142578125, "step": 2128 }, { "epoch": 0.14390969311883195, "grad_norm": 1.8221968412399292, "learning_rate": 2.9062248487812856e-05, "loss": 0.344390869140625, "step": 2129 }, { "epoch": 0.14397728808976612, "grad_norm": 2.1352791786193848, "learning_rate": 2.9061099810670327e-05, "loss": 0.3255615234375, "step": 2130 }, { "epoch": 0.1440448830607003, "grad_norm": 1.031078815460205, "learning_rate": 2.9059950453164337e-05, "loss": 0.19158935546875, "step": 2131 }, { "epoch": 0.14411247803163443, "grad_norm": 1.3418465852737427, "learning_rate": 2.9058800415350507e-05, "loss": 0.26751708984375, "step": 2132 }, { "epoch": 0.1441800730025686, "grad_norm": 3.4698657989501953, "learning_rate": 2.9057649697284483e-05, "loss": 0.36773681640625, "step": 2133 }, { "epoch": 0.14424766797350277, "grad_norm": 1.3821390867233276, "learning_rate": 2.9056498299021943e-05, "loss": 0.28790283203125, "step": 2134 }, { "epoch": 0.14431526294443695, "grad_norm": 1.3451918363571167, "learning_rate": 2.90553462206186e-05, "loss": 0.2025299072265625, "step": 2135 }, { "epoch": 0.1443828579153711, "grad_norm": 0.9766769409179688, "learning_rate": 2.9054193462130193e-05, "loss": 0.213409423828125, "step": 2136 }, { "epoch": 0.14445045288630526, "grad_norm": 1.4353309869766235, "learning_rate": 2.9053040023612508e-05, "loss": 0.36370849609375, "step": 2137 }, { "epoch": 0.14451804785723943, "grad_norm": 2.2277181148529053, "learning_rate": 2.9051885905121353e-05, "loss": 0.250732421875, "step": 2138 }, { "epoch": 0.14458564282817357, "grad_norm": 1.8889626264572144, "learning_rate": 2.905073110671257e-05, "loss": 0.18123626708984375, "step": 2139 }, { "epoch": 0.14465323779910774, "grad_norm": 1.5754324197769165, "learning_rate": 2.904957562844203e-05, "loss": 0.199554443359375, "step": 2140 }, { "epoch": 0.1447208327700419, "grad_norm": 1.8684943914413452, "learning_rate": 2.9048419470365656e-05, "loss": 0.283843994140625, "step": 2141 }, { "epoch": 0.14478842774097608, "grad_norm": 2.0236964225769043, "learning_rate": 2.9047262632539375e-05, "loss": 0.30072021484375, "step": 2142 }, { "epoch": 0.14485602271191023, "grad_norm": 1.8934524059295654, "learning_rate": 2.9046105115019173e-05, "loss": 0.264739990234375, "step": 2143 }, { "epoch": 0.1449236176828444, "grad_norm": 2.384333848953247, "learning_rate": 2.904494691786105e-05, "loss": 0.298919677734375, "step": 2144 }, { "epoch": 0.14499121265377857, "grad_norm": 1.2681987285614014, "learning_rate": 2.9043788041121053e-05, "loss": 0.1666259765625, "step": 2145 }, { "epoch": 0.1450588076247127, "grad_norm": 1.0048147439956665, "learning_rate": 2.904262848485525e-05, "loss": 0.12705230712890625, "step": 2146 }, { "epoch": 0.14512640259564688, "grad_norm": 1.8962209224700928, "learning_rate": 2.9041468249119758e-05, "loss": 0.31329345703125, "step": 2147 }, { "epoch": 0.14519399756658105, "grad_norm": 1.621358871459961, "learning_rate": 2.9040307333970705e-05, "loss": 0.235931396484375, "step": 2148 }, { "epoch": 0.14526159253751522, "grad_norm": 1.2473360300064087, "learning_rate": 2.9039145739464267e-05, "loss": 0.2330322265625, "step": 2149 }, { "epoch": 0.14532918750844936, "grad_norm": 1.2128852605819702, "learning_rate": 2.903798346565665e-05, "loss": 0.195281982421875, "step": 2150 }, { "epoch": 0.14539678247938354, "grad_norm": 2.1932153701782227, "learning_rate": 2.903682051260409e-05, "loss": 0.2506103515625, "step": 2151 }, { "epoch": 0.1454643774503177, "grad_norm": 2.1231372356414795, "learning_rate": 2.903565688036286e-05, "loss": 0.213714599609375, "step": 2152 }, { "epoch": 0.14553197242125185, "grad_norm": 1.8741570711135864, "learning_rate": 2.9034492568989262e-05, "loss": 0.2730712890625, "step": 2153 }, { "epoch": 0.14559956739218602, "grad_norm": 2.102813482284546, "learning_rate": 2.903332757853964e-05, "loss": 0.22565460205078125, "step": 2154 }, { "epoch": 0.1456671623631202, "grad_norm": 3.00120210647583, "learning_rate": 2.903216190907035e-05, "loss": 0.252227783203125, "step": 2155 }, { "epoch": 0.14573475733405436, "grad_norm": 1.6539567708969116, "learning_rate": 2.903099556063781e-05, "loss": 0.28057861328125, "step": 2156 }, { "epoch": 0.1458023523049885, "grad_norm": 1.6178377866744995, "learning_rate": 2.9029828533298443e-05, "loss": 0.32952880859375, "step": 2157 }, { "epoch": 0.14586994727592267, "grad_norm": 1.712018609046936, "learning_rate": 2.902866082710872e-05, "loss": 0.250701904296875, "step": 2158 }, { "epoch": 0.14593754224685684, "grad_norm": 1.5232062339782715, "learning_rate": 2.902749244212514e-05, "loss": 0.248321533203125, "step": 2159 }, { "epoch": 0.146005137217791, "grad_norm": 1.3573507070541382, "learning_rate": 2.9026323378404244e-05, "loss": 0.323974609375, "step": 2160 }, { "epoch": 0.14607273218872516, "grad_norm": 1.3259507417678833, "learning_rate": 2.9025153636002596e-05, "loss": 0.26214599609375, "step": 2161 }, { "epoch": 0.14614032715965933, "grad_norm": 2.6463749408721924, "learning_rate": 2.902398321497679e-05, "loss": 0.2921142578125, "step": 2162 }, { "epoch": 0.14620792213059347, "grad_norm": 2.6131436824798584, "learning_rate": 2.9022812115383464e-05, "loss": 0.30670166015625, "step": 2163 }, { "epoch": 0.14627551710152764, "grad_norm": 1.5997308492660522, "learning_rate": 2.9021640337279276e-05, "loss": 0.15369415283203125, "step": 2164 }, { "epoch": 0.1463431120724618, "grad_norm": 1.8220804929733276, "learning_rate": 2.9020467880720932e-05, "loss": 0.314483642578125, "step": 2165 }, { "epoch": 0.14641070704339598, "grad_norm": 1.9294462203979492, "learning_rate": 2.901929474576516e-05, "loss": 0.2016143798828125, "step": 2166 }, { "epoch": 0.14647830201433013, "grad_norm": 1.1931945085525513, "learning_rate": 2.9018120932468723e-05, "loss": 0.2324981689453125, "step": 2167 }, { "epoch": 0.1465458969852643, "grad_norm": 1.868310570716858, "learning_rate": 2.9016946440888417e-05, "loss": 0.195770263671875, "step": 2168 }, { "epoch": 0.14661349195619847, "grad_norm": 4.036201477050781, "learning_rate": 2.9015771271081075e-05, "loss": 0.337158203125, "step": 2169 }, { "epoch": 0.1466810869271326, "grad_norm": 0.5832833051681519, "learning_rate": 2.9014595423103553e-05, "loss": 0.13564300537109375, "step": 2170 }, { "epoch": 0.14674868189806678, "grad_norm": 3.0235214233398438, "learning_rate": 2.9013418897012748e-05, "loss": 0.3419189453125, "step": 2171 }, { "epoch": 0.14681627686900095, "grad_norm": 1.3879507780075073, "learning_rate": 2.9012241692865587e-05, "loss": 0.272857666015625, "step": 2172 }, { "epoch": 0.14688387183993512, "grad_norm": 1.5183584690093994, "learning_rate": 2.9011063810719035e-05, "loss": 0.296905517578125, "step": 2173 }, { "epoch": 0.14695146681086926, "grad_norm": 3.0657637119293213, "learning_rate": 2.900988525063008e-05, "loss": 0.23846435546875, "step": 2174 }, { "epoch": 0.14701906178180343, "grad_norm": 2.1484432220458984, "learning_rate": 2.9008706012655752e-05, "loss": 0.2479248046875, "step": 2175 }, { "epoch": 0.1470866567527376, "grad_norm": 2.540661573410034, "learning_rate": 2.9007526096853106e-05, "loss": 0.34381103515625, "step": 2176 }, { "epoch": 0.14715425172367175, "grad_norm": 3.073059320449829, "learning_rate": 2.9006345503279235e-05, "loss": 0.29742431640625, "step": 2177 }, { "epoch": 0.14722184669460592, "grad_norm": 1.2974597215652466, "learning_rate": 2.9005164231991265e-05, "loss": 0.3009033203125, "step": 2178 }, { "epoch": 0.1472894416655401, "grad_norm": 1.4776568412780762, "learning_rate": 2.9003982283046352e-05, "loss": 0.3023681640625, "step": 2179 }, { "epoch": 0.14735703663647426, "grad_norm": 1.0423928499221802, "learning_rate": 2.9002799656501685e-05, "loss": 0.185882568359375, "step": 2180 }, { "epoch": 0.1474246316074084, "grad_norm": 1.8321083784103394, "learning_rate": 2.900161635241449e-05, "loss": 0.2616119384765625, "step": 2181 }, { "epoch": 0.14749222657834257, "grad_norm": 2.493705987930298, "learning_rate": 2.900043237084202e-05, "loss": 0.2867431640625, "step": 2182 }, { "epoch": 0.14755982154927674, "grad_norm": 0.7264971733093262, "learning_rate": 2.8999247711841566e-05, "loss": 0.114501953125, "step": 2183 }, { "epoch": 0.14762741652021089, "grad_norm": 2.27451229095459, "learning_rate": 2.8998062375470446e-05, "loss": 0.345550537109375, "step": 2184 }, { "epoch": 0.14769501149114506, "grad_norm": 2.8008384704589844, "learning_rate": 2.8996876361786018e-05, "loss": 0.30242919921875, "step": 2185 }, { "epoch": 0.14776260646207923, "grad_norm": 1.1254945993423462, "learning_rate": 2.899568967084566e-05, "loss": 0.196929931640625, "step": 2186 }, { "epoch": 0.1478302014330134, "grad_norm": 2.926666498184204, "learning_rate": 2.89945023027068e-05, "loss": 0.33746337890625, "step": 2187 }, { "epoch": 0.14789779640394754, "grad_norm": 1.5109779834747314, "learning_rate": 2.8993314257426893e-05, "loss": 0.292266845703125, "step": 2188 }, { "epoch": 0.1479653913748817, "grad_norm": 2.5540826320648193, "learning_rate": 2.899212553506341e-05, "loss": 0.28411865234375, "step": 2189 }, { "epoch": 0.14803298634581588, "grad_norm": 2.4035799503326416, "learning_rate": 2.8990936135673884e-05, "loss": 0.323089599609375, "step": 2190 }, { "epoch": 0.14810058131675002, "grad_norm": 2.507201910018921, "learning_rate": 2.8989746059315858e-05, "loss": 0.2957763671875, "step": 2191 }, { "epoch": 0.1481681762876842, "grad_norm": 1.0952142477035522, "learning_rate": 2.898855530604691e-05, "loss": 0.261016845703125, "step": 2192 }, { "epoch": 0.14823577125861837, "grad_norm": 1.4321258068084717, "learning_rate": 2.898736387592467e-05, "loss": 0.287353515625, "step": 2193 }, { "epoch": 0.1483033662295525, "grad_norm": 1.6117726564407349, "learning_rate": 2.8986171769006777e-05, "loss": 0.28607177734375, "step": 2194 }, { "epoch": 0.14837096120048668, "grad_norm": 2.574770212173462, "learning_rate": 2.8984978985350915e-05, "loss": 0.23175048828125, "step": 2195 }, { "epoch": 0.14843855617142085, "grad_norm": 1.7011648416519165, "learning_rate": 2.8983785525014796e-05, "loss": 0.250823974609375, "step": 2196 }, { "epoch": 0.14850615114235502, "grad_norm": 1.4853920936584473, "learning_rate": 2.8982591388056172e-05, "loss": 0.237579345703125, "step": 2197 }, { "epoch": 0.14857374611328916, "grad_norm": 2.8817083835601807, "learning_rate": 2.8981396574532818e-05, "loss": 0.35595703125, "step": 2198 }, { "epoch": 0.14864134108422333, "grad_norm": 1.2251864671707153, "learning_rate": 2.8980201084502548e-05, "loss": 0.1767578125, "step": 2199 }, { "epoch": 0.1487089360551575, "grad_norm": 1.115188479423523, "learning_rate": 2.8979004918023208e-05, "loss": 0.207672119140625, "step": 2200 }, { "epoch": 0.14877653102609165, "grad_norm": 4.32085657119751, "learning_rate": 2.8977808075152677e-05, "loss": 0.361724853515625, "step": 2201 }, { "epoch": 0.14884412599702582, "grad_norm": 2.388529062271118, "learning_rate": 2.8976610555948862e-05, "loss": 0.246795654296875, "step": 2202 }, { "epoch": 0.14891172096796, "grad_norm": 1.647191047668457, "learning_rate": 2.897541236046971e-05, "loss": 0.2053070068359375, "step": 2203 }, { "epoch": 0.14897931593889416, "grad_norm": 1.721611499786377, "learning_rate": 2.8974213488773196e-05, "loss": 0.18304443359375, "step": 2204 }, { "epoch": 0.1490469109098283, "grad_norm": 1.2463127374649048, "learning_rate": 2.8973013940917328e-05, "loss": 0.28875732421875, "step": 2205 }, { "epoch": 0.14911450588076247, "grad_norm": 3.179091215133667, "learning_rate": 2.897181371696015e-05, "loss": 0.2669677734375, "step": 2206 }, { "epoch": 0.14918210085169664, "grad_norm": 1.2058929204940796, "learning_rate": 2.8970612816959733e-05, "loss": 0.201141357421875, "step": 2207 }, { "epoch": 0.14924969582263078, "grad_norm": 1.3526713848114014, "learning_rate": 2.896941124097419e-05, "loss": 0.23504638671875, "step": 2208 }, { "epoch": 0.14931729079356496, "grad_norm": 2.6128265857696533, "learning_rate": 2.8968208989061646e-05, "loss": 0.27935791015625, "step": 2209 }, { "epoch": 0.14938488576449913, "grad_norm": 2.604567527770996, "learning_rate": 2.896700606128029e-05, "loss": 0.2850341796875, "step": 2210 }, { "epoch": 0.1494524807354333, "grad_norm": 0.9136465787887573, "learning_rate": 2.8965802457688322e-05, "loss": 0.1985321044921875, "step": 2211 }, { "epoch": 0.14952007570636744, "grad_norm": 3.2833662033081055, "learning_rate": 2.8964598178343978e-05, "loss": 0.2750244140625, "step": 2212 }, { "epoch": 0.1495876706773016, "grad_norm": 2.393143892288208, "learning_rate": 2.8963393223305527e-05, "loss": 0.2803955078125, "step": 2213 }, { "epoch": 0.14965526564823578, "grad_norm": 1.415356993675232, "learning_rate": 2.8962187592631277e-05, "loss": 0.342529296875, "step": 2214 }, { "epoch": 0.14972286061916992, "grad_norm": 0.8463639616966248, "learning_rate": 2.8960981286379563e-05, "loss": 0.14873504638671875, "step": 2215 }, { "epoch": 0.1497904555901041, "grad_norm": 0.637097954750061, "learning_rate": 2.895977430460875e-05, "loss": 0.1132049560546875, "step": 2216 }, { "epoch": 0.14985805056103826, "grad_norm": 1.968226432800293, "learning_rate": 2.895856664737724e-05, "loss": 0.28621673583984375, "step": 2217 }, { "epoch": 0.14992564553197243, "grad_norm": 1.5436882972717285, "learning_rate": 2.8957358314743467e-05, "loss": 0.235748291015625, "step": 2218 }, { "epoch": 0.14999324050290658, "grad_norm": 1.6424728631973267, "learning_rate": 2.8956149306765904e-05, "loss": 0.258056640625, "step": 2219 }, { "epoch": 0.15006083547384075, "grad_norm": 2.0841498374938965, "learning_rate": 2.895493962350304e-05, "loss": 0.315673828125, "step": 2220 }, { "epoch": 0.15012843044477492, "grad_norm": 2.1888604164123535, "learning_rate": 2.8953729265013414e-05, "loss": 0.3138427734375, "step": 2221 }, { "epoch": 0.15019602541570906, "grad_norm": 0.9973850846290588, "learning_rate": 2.895251823135559e-05, "loss": 0.182159423828125, "step": 2222 }, { "epoch": 0.15026362038664323, "grad_norm": 1.3317509889602661, "learning_rate": 2.8951306522588163e-05, "loss": 0.1895294189453125, "step": 2223 }, { "epoch": 0.1503312153575774, "grad_norm": 1.7294918298721313, "learning_rate": 2.8950094138769763e-05, "loss": 0.2672119140625, "step": 2224 }, { "epoch": 0.15039881032851155, "grad_norm": 2.0094592571258545, "learning_rate": 2.8948881079959054e-05, "loss": 0.240997314453125, "step": 2225 }, { "epoch": 0.15046640529944572, "grad_norm": 1.9162694215774536, "learning_rate": 2.8947667346214736e-05, "loss": 0.3278961181640625, "step": 2226 }, { "epoch": 0.1505340002703799, "grad_norm": 1.093528389930725, "learning_rate": 2.8946452937595528e-05, "loss": 0.274810791015625, "step": 2227 }, { "epoch": 0.15060159524131406, "grad_norm": 1.6692355871200562, "learning_rate": 2.8945237854160194e-05, "loss": 0.36572265625, "step": 2228 }, { "epoch": 0.1506691902122482, "grad_norm": 1.6186716556549072, "learning_rate": 2.894402209596753e-05, "loss": 0.283782958984375, "step": 2229 }, { "epoch": 0.15073678518318237, "grad_norm": 1.6782114505767822, "learning_rate": 2.8942805663076362e-05, "loss": 0.283447265625, "step": 2230 }, { "epoch": 0.15080438015411654, "grad_norm": 1.8077222108840942, "learning_rate": 2.8941588555545544e-05, "loss": 0.2630615234375, "step": 2231 }, { "epoch": 0.15087197512505068, "grad_norm": 1.168641209602356, "learning_rate": 2.894037077343397e-05, "loss": 0.260284423828125, "step": 2232 }, { "epoch": 0.15093957009598485, "grad_norm": 1.7437427043914795, "learning_rate": 2.8939152316800562e-05, "loss": 0.2639617919921875, "step": 2233 }, { "epoch": 0.15100716506691902, "grad_norm": 1.0653531551361084, "learning_rate": 2.8937933185704282e-05, "loss": 0.1814422607421875, "step": 2234 }, { "epoch": 0.1510747600378532, "grad_norm": 2.4773495197296143, "learning_rate": 2.8936713380204114e-05, "loss": 0.2772064208984375, "step": 2235 }, { "epoch": 0.15114235500878734, "grad_norm": 0.7743692994117737, "learning_rate": 2.893549290035908e-05, "loss": 0.129791259765625, "step": 2236 }, { "epoch": 0.1512099499797215, "grad_norm": 1.829299807548523, "learning_rate": 2.893427174622824e-05, "loss": 0.3316650390625, "step": 2237 }, { "epoch": 0.15127754495065568, "grad_norm": 0.9709466099739075, "learning_rate": 2.8933049917870668e-05, "loss": 0.1446380615234375, "step": 2238 }, { "epoch": 0.15134513992158982, "grad_norm": 1.6231571435928345, "learning_rate": 2.8931827415345496e-05, "loss": 0.1544647216796875, "step": 2239 }, { "epoch": 0.151412734892524, "grad_norm": 2.2725727558135986, "learning_rate": 2.8930604238711877e-05, "loss": 0.2849273681640625, "step": 2240 }, { "epoch": 0.15148032986345816, "grad_norm": 1.4283143281936646, "learning_rate": 2.8929380388028988e-05, "loss": 0.238128662109375, "step": 2241 }, { "epoch": 0.15154792483439233, "grad_norm": 4.44516134262085, "learning_rate": 2.8928155863356047e-05, "loss": 0.358154296875, "step": 2242 }, { "epoch": 0.15161551980532648, "grad_norm": 0.8131169676780701, "learning_rate": 2.8926930664752312e-05, "loss": 0.13128662109375, "step": 2243 }, { "epoch": 0.15168311477626065, "grad_norm": 1.1914641857147217, "learning_rate": 2.892570479227706e-05, "loss": 0.1151580810546875, "step": 2244 }, { "epoch": 0.15175070974719482, "grad_norm": 1.4321248531341553, "learning_rate": 2.8924478245989603e-05, "loss": 0.27655029296875, "step": 2245 }, { "epoch": 0.15181830471812896, "grad_norm": 2.076435089111328, "learning_rate": 2.89232510259493e-05, "loss": 0.230804443359375, "step": 2246 }, { "epoch": 0.15188589968906313, "grad_norm": 1.1533681154251099, "learning_rate": 2.892202313221552e-05, "loss": 0.181640625, "step": 2247 }, { "epoch": 0.1519534946599973, "grad_norm": 1.7312424182891846, "learning_rate": 2.892079456484768e-05, "loss": 0.2626953125, "step": 2248 }, { "epoch": 0.15202108963093147, "grad_norm": 1.3302180767059326, "learning_rate": 2.8919565323905227e-05, "loss": 0.288055419921875, "step": 2249 }, { "epoch": 0.15208868460186561, "grad_norm": 0.9010581970214844, "learning_rate": 2.8918335409447642e-05, "loss": 0.157928466796875, "step": 2250 }, { "epoch": 0.15215627957279979, "grad_norm": 1.2349194288253784, "learning_rate": 2.891710482153443e-05, "loss": 0.1960601806640625, "step": 2251 }, { "epoch": 0.15222387454373396, "grad_norm": 2.1057541370391846, "learning_rate": 2.8915873560225136e-05, "loss": 0.2624664306640625, "step": 2252 }, { "epoch": 0.1522914695146681, "grad_norm": 1.2648755311965942, "learning_rate": 2.8914641625579342e-05, "loss": 0.258941650390625, "step": 2253 }, { "epoch": 0.15235906448560227, "grad_norm": 1.387248158454895, "learning_rate": 2.8913409017656644e-05, "loss": 0.29217529296875, "step": 2254 }, { "epoch": 0.15242665945653644, "grad_norm": 1.3200386762619019, "learning_rate": 2.89121757365167e-05, "loss": 0.289276123046875, "step": 2255 }, { "epoch": 0.15249425442747058, "grad_norm": 2.5832552909851074, "learning_rate": 2.8910941782219172e-05, "loss": 0.212738037109375, "step": 2256 }, { "epoch": 0.15256184939840475, "grad_norm": 1.2032569646835327, "learning_rate": 2.8909707154823767e-05, "loss": 0.1899871826171875, "step": 2257 }, { "epoch": 0.15262944436933892, "grad_norm": 7.112155914306641, "learning_rate": 2.8908471854390225e-05, "loss": 0.41845703125, "step": 2258 }, { "epoch": 0.1526970393402731, "grad_norm": 1.7854200601577759, "learning_rate": 2.8907235880978324e-05, "loss": 0.245758056640625, "step": 2259 }, { "epoch": 0.15276463431120724, "grad_norm": 2.674954891204834, "learning_rate": 2.890599923464786e-05, "loss": 0.29541015625, "step": 2260 }, { "epoch": 0.1528322292821414, "grad_norm": 4.19438362121582, "learning_rate": 2.8904761915458672e-05, "loss": 0.3128662109375, "step": 2261 }, { "epoch": 0.15289982425307558, "grad_norm": 1.3036625385284424, "learning_rate": 2.890352392347063e-05, "loss": 0.266143798828125, "step": 2262 }, { "epoch": 0.15296741922400972, "grad_norm": 1.1764562129974365, "learning_rate": 2.8902285258743634e-05, "loss": 0.319091796875, "step": 2263 }, { "epoch": 0.1530350141949439, "grad_norm": 2.4969334602355957, "learning_rate": 2.8901045921337625e-05, "loss": 0.27789306640625, "step": 2264 }, { "epoch": 0.15310260916587806, "grad_norm": 3.0102970600128174, "learning_rate": 2.889980591131256e-05, "loss": 0.35491943359375, "step": 2265 }, { "epoch": 0.15317020413681223, "grad_norm": 2.184673547744751, "learning_rate": 2.8898565228728443e-05, "loss": 0.209442138671875, "step": 2266 }, { "epoch": 0.15323779910774638, "grad_norm": 4.000074863433838, "learning_rate": 2.889732387364531e-05, "loss": 0.2515869140625, "step": 2267 }, { "epoch": 0.15330539407868055, "grad_norm": 5.430281162261963, "learning_rate": 2.889608184612322e-05, "loss": 0.371337890625, "step": 2268 }, { "epoch": 0.15337298904961472, "grad_norm": 2.5089147090911865, "learning_rate": 2.889483914622227e-05, "loss": 0.3133544921875, "step": 2269 }, { "epoch": 0.15344058402054886, "grad_norm": 1.340229868888855, "learning_rate": 2.889359577400259e-05, "loss": 0.16986083984375, "step": 2270 }, { "epoch": 0.15350817899148303, "grad_norm": 2.304551601409912, "learning_rate": 2.8892351729524345e-05, "loss": 0.267303466796875, "step": 2271 }, { "epoch": 0.1535757739624172, "grad_norm": 2.9129836559295654, "learning_rate": 2.8891107012847726e-05, "loss": 0.304840087890625, "step": 2272 }, { "epoch": 0.15364336893335137, "grad_norm": 1.3623467683792114, "learning_rate": 2.8889861624032966e-05, "loss": 0.295166015625, "step": 2273 }, { "epoch": 0.1537109639042855, "grad_norm": 1.6671591997146606, "learning_rate": 2.8888615563140318e-05, "loss": 0.242340087890625, "step": 2274 }, { "epoch": 0.15377855887521968, "grad_norm": 2.2940585613250732, "learning_rate": 2.8887368830230073e-05, "loss": 0.131134033203125, "step": 2275 }, { "epoch": 0.15384615384615385, "grad_norm": 2.7096309661865234, "learning_rate": 2.8886121425362564e-05, "loss": 0.287628173828125, "step": 2276 }, { "epoch": 0.153913748817088, "grad_norm": 1.6638896465301514, "learning_rate": 2.8884873348598146e-05, "loss": 0.30755615234375, "step": 2277 }, { "epoch": 0.15398134378802217, "grad_norm": 1.0653537511825562, "learning_rate": 2.88836245999972e-05, "loss": 0.20697021484375, "step": 2278 }, { "epoch": 0.15404893875895634, "grad_norm": 2.5923123359680176, "learning_rate": 2.8882375179620155e-05, "loss": 0.34259033203125, "step": 2279 }, { "epoch": 0.1541165337298905, "grad_norm": 1.8530354499816895, "learning_rate": 2.8881125087527467e-05, "loss": 0.289398193359375, "step": 2280 }, { "epoch": 0.15418412870082465, "grad_norm": 1.3865050077438354, "learning_rate": 2.8879874323779625e-05, "loss": 0.2600860595703125, "step": 2281 }, { "epoch": 0.15425172367175882, "grad_norm": 2.49190354347229, "learning_rate": 2.887862288843714e-05, "loss": 0.28118896484375, "step": 2282 }, { "epoch": 0.154319318642693, "grad_norm": 1.1204255819320679, "learning_rate": 2.8877370781560574e-05, "loss": 0.222686767578125, "step": 2283 }, { "epoch": 0.15438691361362714, "grad_norm": 2.5192742347717285, "learning_rate": 2.8876118003210504e-05, "loss": 0.1983642578125, "step": 2284 }, { "epoch": 0.1544545085845613, "grad_norm": 0.9616654515266418, "learning_rate": 2.8874864553447552e-05, "loss": 0.14593505859375, "step": 2285 }, { "epoch": 0.15452210355549548, "grad_norm": 3.2163262367248535, "learning_rate": 2.8873610432332364e-05, "loss": 0.180267333984375, "step": 2286 }, { "epoch": 0.15458969852642965, "grad_norm": 0.6530656218528748, "learning_rate": 2.8872355639925628e-05, "loss": 0.08648681640625, "step": 2287 }, { "epoch": 0.1546572934973638, "grad_norm": 1.6128863096237183, "learning_rate": 2.887110017628805e-05, "loss": 0.1978759765625, "step": 2288 }, { "epoch": 0.15472488846829796, "grad_norm": 1.659340739250183, "learning_rate": 2.8869844041480382e-05, "loss": 0.259185791015625, "step": 2289 }, { "epoch": 0.15479248343923213, "grad_norm": 3.2485876083374023, "learning_rate": 2.8868587235563408e-05, "loss": 0.23883056640625, "step": 2290 }, { "epoch": 0.15486007841016627, "grad_norm": 5.296931743621826, "learning_rate": 2.8867329758597934e-05, "loss": 0.39093017578125, "step": 2291 }, { "epoch": 0.15492767338110044, "grad_norm": 0.7200137376785278, "learning_rate": 2.8866071610644807e-05, "loss": 0.117523193359375, "step": 2292 }, { "epoch": 0.15499526835203462, "grad_norm": 1.9284332990646362, "learning_rate": 2.88648127917649e-05, "loss": 0.23076248168945312, "step": 2293 }, { "epoch": 0.15506286332296876, "grad_norm": 2.644216299057007, "learning_rate": 2.886355330201913e-05, "loss": 0.231414794921875, "step": 2294 }, { "epoch": 0.15513045829390293, "grad_norm": 2.267794370651245, "learning_rate": 2.886229314146843e-05, "loss": 0.1675262451171875, "step": 2295 }, { "epoch": 0.1551980532648371, "grad_norm": 1.7116438150405884, "learning_rate": 2.8861032310173783e-05, "loss": 0.32171630859375, "step": 2296 }, { "epoch": 0.15526564823577127, "grad_norm": 1.259519100189209, "learning_rate": 2.885977080819619e-05, "loss": 0.224365234375, "step": 2297 }, { "epoch": 0.1553332432067054, "grad_norm": 1.0646758079528809, "learning_rate": 2.8858508635596695e-05, "loss": 0.228973388671875, "step": 2298 }, { "epoch": 0.15540083817763958, "grad_norm": 1.3145701885223389, "learning_rate": 2.8857245792436363e-05, "loss": 0.32574462890625, "step": 2299 }, { "epoch": 0.15546843314857375, "grad_norm": 1.5658135414123535, "learning_rate": 2.8855982278776304e-05, "loss": 0.330841064453125, "step": 2300 }, { "epoch": 0.1555360281195079, "grad_norm": 1.5632798671722412, "learning_rate": 2.8854718094677656e-05, "loss": 0.2265625, "step": 2301 }, { "epoch": 0.15560362309044207, "grad_norm": 0.8707678318023682, "learning_rate": 2.8853453240201582e-05, "loss": 0.19879150390625, "step": 2302 }, { "epoch": 0.15567121806137624, "grad_norm": 1.37408447265625, "learning_rate": 2.8852187715409285e-05, "loss": 0.263275146484375, "step": 2303 }, { "epoch": 0.1557388130323104, "grad_norm": 1.268423318862915, "learning_rate": 2.8850921520362002e-05, "loss": 0.268280029296875, "step": 2304 }, { "epoch": 0.15580640800324455, "grad_norm": 1.5567563772201538, "learning_rate": 2.8849654655121e-05, "loss": 0.179290771484375, "step": 2305 }, { "epoch": 0.15587400297417872, "grad_norm": 3.111487627029419, "learning_rate": 2.8848387119747573e-05, "loss": 0.339996337890625, "step": 2306 }, { "epoch": 0.1559415979451129, "grad_norm": 1.108720302581787, "learning_rate": 2.8847118914303057e-05, "loss": 0.24072265625, "step": 2307 }, { "epoch": 0.15600919291604703, "grad_norm": 0.8329055905342102, "learning_rate": 2.884585003884881e-05, "loss": 0.2098846435546875, "step": 2308 }, { "epoch": 0.1560767878869812, "grad_norm": 3.947798728942871, "learning_rate": 2.8844580493446237e-05, "loss": 0.3323974609375, "step": 2309 }, { "epoch": 0.15614438285791538, "grad_norm": 1.5903234481811523, "learning_rate": 2.8843310278156756e-05, "loss": 0.20574951171875, "step": 2310 }, { "epoch": 0.15621197782884955, "grad_norm": 2.5156283378601074, "learning_rate": 2.8842039393041837e-05, "loss": 0.224639892578125, "step": 2311 }, { "epoch": 0.1562795727997837, "grad_norm": 2.8731749057769775, "learning_rate": 2.8840767838162967e-05, "loss": 0.28240966796875, "step": 2312 }, { "epoch": 0.15634716777071786, "grad_norm": 1.0109318494796753, "learning_rate": 2.8839495613581675e-05, "loss": 0.1711578369140625, "step": 2313 }, { "epoch": 0.15641476274165203, "grad_norm": 1.719666600227356, "learning_rate": 2.883822271935952e-05, "loss": 0.27215576171875, "step": 2314 }, { "epoch": 0.15648235771258617, "grad_norm": 0.936730682849884, "learning_rate": 2.8836949155558088e-05, "loss": 0.145904541015625, "step": 2315 }, { "epoch": 0.15654995268352034, "grad_norm": 2.215204954147339, "learning_rate": 2.8835674922239005e-05, "loss": 0.336395263671875, "step": 2316 }, { "epoch": 0.15661754765445451, "grad_norm": 1.9454907178878784, "learning_rate": 2.883440001946393e-05, "loss": 0.33984375, "step": 2317 }, { "epoch": 0.15668514262538868, "grad_norm": 1.7935394048690796, "learning_rate": 2.8833124447294544e-05, "loss": 0.1943359375, "step": 2318 }, { "epoch": 0.15675273759632283, "grad_norm": 1.1733583211898804, "learning_rate": 2.883184820579257e-05, "loss": 0.2117462158203125, "step": 2319 }, { "epoch": 0.156820332567257, "grad_norm": 2.1826419830322266, "learning_rate": 2.883057129501976e-05, "loss": 0.190643310546875, "step": 2320 }, { "epoch": 0.15688792753819117, "grad_norm": 1.9201531410217285, "learning_rate": 2.8829293715037902e-05, "loss": 0.251312255859375, "step": 2321 }, { "epoch": 0.1569555225091253, "grad_norm": 1.604422926902771, "learning_rate": 2.8828015465908806e-05, "loss": 0.274810791015625, "step": 2322 }, { "epoch": 0.15702311748005948, "grad_norm": 1.6769803762435913, "learning_rate": 2.882673654769433e-05, "loss": 0.2916259765625, "step": 2323 }, { "epoch": 0.15709071245099365, "grad_norm": 2.0106146335601807, "learning_rate": 2.882545696045635e-05, "loss": 0.35150146484375, "step": 2324 }, { "epoch": 0.1571583074219278, "grad_norm": 2.149214506149292, "learning_rate": 2.8824176704256784e-05, "loss": 0.30023193359375, "step": 2325 }, { "epoch": 0.15722590239286197, "grad_norm": 1.1936001777648926, "learning_rate": 2.8822895779157576e-05, "loss": 0.249908447265625, "step": 2326 }, { "epoch": 0.15729349736379614, "grad_norm": 3.57281494140625, "learning_rate": 2.882161418522071e-05, "loss": 0.26800537109375, "step": 2327 }, { "epoch": 0.1573610923347303, "grad_norm": 1.3917039632797241, "learning_rate": 2.8820331922508188e-05, "loss": 0.29168701171875, "step": 2328 }, { "epoch": 0.15742868730566445, "grad_norm": 1.0621790885925293, "learning_rate": 2.8819048991082068e-05, "loss": 0.1666259765625, "step": 2329 }, { "epoch": 0.15749628227659862, "grad_norm": 1.638824224472046, "learning_rate": 2.8817765391004413e-05, "loss": 0.31988525390625, "step": 2330 }, { "epoch": 0.1575638772475328, "grad_norm": 2.2537882328033447, "learning_rate": 2.8816481122337333e-05, "loss": 0.270843505859375, "step": 2331 }, { "epoch": 0.15763147221846693, "grad_norm": 2.532970905303955, "learning_rate": 2.881519618514298e-05, "loss": 0.31365966796875, "step": 2332 }, { "epoch": 0.1576990671894011, "grad_norm": 1.2341798543930054, "learning_rate": 2.8813910579483516e-05, "loss": 0.285430908203125, "step": 2333 }, { "epoch": 0.15776666216033527, "grad_norm": 1.6774728298187256, "learning_rate": 2.8812624305421153e-05, "loss": 0.29345703125, "step": 2334 }, { "epoch": 0.15783425713126945, "grad_norm": 1.4897150993347168, "learning_rate": 2.8811337363018122e-05, "loss": 0.3148193359375, "step": 2335 }, { "epoch": 0.1579018521022036, "grad_norm": 0.8381147384643555, "learning_rate": 2.88100497523367e-05, "loss": 0.24961090087890625, "step": 2336 }, { "epoch": 0.15796944707313776, "grad_norm": 1.5538722276687622, "learning_rate": 2.880876147343919e-05, "loss": 0.35540771484375, "step": 2337 }, { "epoch": 0.15803704204407193, "grad_norm": 1.8006818294525146, "learning_rate": 2.8807472526387915e-05, "loss": 0.165252685546875, "step": 2338 }, { "epoch": 0.15810463701500607, "grad_norm": 1.3507176637649536, "learning_rate": 2.880618291124526e-05, "loss": 0.26708984375, "step": 2339 }, { "epoch": 0.15817223198594024, "grad_norm": 1.4186183214187622, "learning_rate": 2.8804892628073617e-05, "loss": 0.2154541015625, "step": 2340 }, { "epoch": 0.1582398269568744, "grad_norm": 1.6299318075180054, "learning_rate": 2.8803601676935415e-05, "loss": 0.28961181640625, "step": 2341 }, { "epoch": 0.15830742192780858, "grad_norm": 0.7499101758003235, "learning_rate": 2.8802310057893117e-05, "loss": 0.11554718017578125, "step": 2342 }, { "epoch": 0.15837501689874273, "grad_norm": 0.6463841199874878, "learning_rate": 2.8801017771009224e-05, "loss": 0.1272735595703125, "step": 2343 }, { "epoch": 0.1584426118696769, "grad_norm": 1.2399812936782837, "learning_rate": 2.8799724816346262e-05, "loss": 0.17779541015625, "step": 2344 }, { "epoch": 0.15851020684061107, "grad_norm": 2.3942408561706543, "learning_rate": 2.8798431193966798e-05, "loss": 0.205535888671875, "step": 2345 }, { "epoch": 0.1585778018115452, "grad_norm": 1.3177895545959473, "learning_rate": 2.879713690393342e-05, "loss": 0.20496368408203125, "step": 2346 }, { "epoch": 0.15864539678247938, "grad_norm": 1.6683990955352783, "learning_rate": 2.879584194630875e-05, "loss": 0.27484130859375, "step": 2347 }, { "epoch": 0.15871299175341355, "grad_norm": 1.3801000118255615, "learning_rate": 2.8794546321155462e-05, "loss": 0.238433837890625, "step": 2348 }, { "epoch": 0.15878058672434772, "grad_norm": 2.5947823524475098, "learning_rate": 2.879325002853623e-05, "loss": 0.3125, "step": 2349 }, { "epoch": 0.15884818169528186, "grad_norm": 1.1709965467453003, "learning_rate": 2.8791953068513777e-05, "loss": 0.36248779296875, "step": 2350 }, { "epoch": 0.15891577666621604, "grad_norm": 1.9921298027038574, "learning_rate": 2.879065544115087e-05, "loss": 0.32330322265625, "step": 2351 }, { "epoch": 0.1589833716371502, "grad_norm": 1.4020391702651978, "learning_rate": 2.8789357146510288e-05, "loss": 0.29425048828125, "step": 2352 }, { "epoch": 0.15905096660808435, "grad_norm": 2.616241455078125, "learning_rate": 2.8788058184654848e-05, "loss": 0.37261962890625, "step": 2353 }, { "epoch": 0.15911856157901852, "grad_norm": 1.4677739143371582, "learning_rate": 2.878675855564741e-05, "loss": 0.2215118408203125, "step": 2354 }, { "epoch": 0.1591861565499527, "grad_norm": 1.3612079620361328, "learning_rate": 2.8785458259550854e-05, "loss": 0.272369384765625, "step": 2355 }, { "epoch": 0.15925375152088683, "grad_norm": 1.255822777748108, "learning_rate": 2.8784157296428098e-05, "loss": 0.1721343994140625, "step": 2356 }, { "epoch": 0.159321346491821, "grad_norm": 1.751420021057129, "learning_rate": 2.878285566634209e-05, "loss": 0.2708740234375, "step": 2357 }, { "epoch": 0.15938894146275517, "grad_norm": 2.255753517150879, "learning_rate": 2.8781553369355805e-05, "loss": 0.27862548828125, "step": 2358 }, { "epoch": 0.15945653643368934, "grad_norm": 0.9850307106971741, "learning_rate": 2.8780250405532266e-05, "loss": 0.206573486328125, "step": 2359 }, { "epoch": 0.1595241314046235, "grad_norm": 1.4532309770584106, "learning_rate": 2.877894677493451e-05, "loss": 0.300567626953125, "step": 2360 }, { "epoch": 0.15959172637555766, "grad_norm": 1.171374797821045, "learning_rate": 2.8777642477625625e-05, "loss": 0.2183837890625, "step": 2361 }, { "epoch": 0.15965932134649183, "grad_norm": 1.6481759548187256, "learning_rate": 2.877633751366871e-05, "loss": 0.273651123046875, "step": 2362 }, { "epoch": 0.15972691631742597, "grad_norm": 1.6317707300186157, "learning_rate": 2.8775031883126908e-05, "loss": 0.36553955078125, "step": 2363 }, { "epoch": 0.15979451128836014, "grad_norm": 1.1382564306259155, "learning_rate": 2.87737255860634e-05, "loss": 0.25787353515625, "step": 2364 }, { "epoch": 0.1598621062592943, "grad_norm": 1.1568188667297363, "learning_rate": 2.8772418622541395e-05, "loss": 0.20709228515625, "step": 2365 }, { "epoch": 0.15992970123022848, "grad_norm": 1.7264008522033691, "learning_rate": 2.8771110992624125e-05, "loss": 0.341552734375, "step": 2366 }, { "epoch": 0.15999729620116263, "grad_norm": 1.9395453929901123, "learning_rate": 2.8769802696374865e-05, "loss": 0.2978515625, "step": 2367 }, { "epoch": 0.1600648911720968, "grad_norm": 0.8725050091743469, "learning_rate": 2.8768493733856916e-05, "loss": 0.2169036865234375, "step": 2368 }, { "epoch": 0.16013248614303097, "grad_norm": 2.289071559906006, "learning_rate": 2.8767184105133612e-05, "loss": 0.36639404296875, "step": 2369 }, { "epoch": 0.1602000811139651, "grad_norm": 1.8848931789398193, "learning_rate": 2.8765873810268327e-05, "loss": 0.30712890625, "step": 2370 }, { "epoch": 0.16026767608489928, "grad_norm": 1.2038094997406006, "learning_rate": 2.8764562849324454e-05, "loss": 0.15521240234375, "step": 2371 }, { "epoch": 0.16033527105583345, "grad_norm": 1.2555843591690063, "learning_rate": 2.8763251222365433e-05, "loss": 0.151947021484375, "step": 2372 }, { "epoch": 0.16040286602676762, "grad_norm": 0.6475080251693726, "learning_rate": 2.8761938929454722e-05, "loss": 0.13995361328125, "step": 2373 }, { "epoch": 0.16047046099770176, "grad_norm": 1.5381789207458496, "learning_rate": 2.8760625970655826e-05, "loss": 0.3157958984375, "step": 2374 }, { "epoch": 0.16053805596863593, "grad_norm": 2.9891669750213623, "learning_rate": 2.875931234603227e-05, "loss": 0.3157958984375, "step": 2375 }, { "epoch": 0.1606056509395701, "grad_norm": 2.6151230335235596, "learning_rate": 2.8757998055647608e-05, "loss": 0.30572509765625, "step": 2376 }, { "epoch": 0.16067324591050425, "grad_norm": 3.9859461784362793, "learning_rate": 2.8756683099565445e-05, "loss": 0.35858154296875, "step": 2377 }, { "epoch": 0.16074084088143842, "grad_norm": 4.252753734588623, "learning_rate": 2.8755367477849398e-05, "loss": 0.36639404296875, "step": 2378 }, { "epoch": 0.1608084358523726, "grad_norm": 3.0224690437316895, "learning_rate": 2.875405119056313e-05, "loss": 0.3555908203125, "step": 2379 }, { "epoch": 0.16087603082330676, "grad_norm": 1.1092948913574219, "learning_rate": 2.8752734237770327e-05, "loss": 0.240753173828125, "step": 2380 }, { "epoch": 0.1609436257942409, "grad_norm": 0.43315544724464417, "learning_rate": 2.875141661953472e-05, "loss": 0.11446380615234375, "step": 2381 }, { "epoch": 0.16101122076517507, "grad_norm": 1.4222193956375122, "learning_rate": 2.8750098335920055e-05, "loss": 0.2724609375, "step": 2382 }, { "epoch": 0.16107881573610924, "grad_norm": 2.3954458236694336, "learning_rate": 2.8748779386990123e-05, "loss": 0.34478759765625, "step": 2383 }, { "epoch": 0.16114641070704339, "grad_norm": 3.1659812927246094, "learning_rate": 2.8747459772808738e-05, "loss": 0.2768402099609375, "step": 2384 }, { "epoch": 0.16121400567797756, "grad_norm": 0.5941571593284607, "learning_rate": 2.8746139493439757e-05, "loss": 0.1046905517578125, "step": 2385 }, { "epoch": 0.16128160064891173, "grad_norm": 0.8781968951225281, "learning_rate": 2.8744818548947063e-05, "loss": 0.1134185791015625, "step": 2386 }, { "epoch": 0.16134919561984587, "grad_norm": 1.318603515625, "learning_rate": 2.8743496939394568e-05, "loss": 0.30657958984375, "step": 2387 }, { "epoch": 0.16141679059078004, "grad_norm": 2.6009228229522705, "learning_rate": 2.874217466484622e-05, "loss": 0.35101318359375, "step": 2388 }, { "epoch": 0.1614843855617142, "grad_norm": 2.9638757705688477, "learning_rate": 2.8740851725366003e-05, "loss": 0.41851806640625, "step": 2389 }, { "epoch": 0.16155198053264838, "grad_norm": 1.866127371788025, "learning_rate": 2.8739528121017923e-05, "loss": 0.326141357421875, "step": 2390 }, { "epoch": 0.16161957550358252, "grad_norm": 1.1069928407669067, "learning_rate": 2.8738203851866026e-05, "loss": 0.1776580810546875, "step": 2391 }, { "epoch": 0.1616871704745167, "grad_norm": 2.16621470451355, "learning_rate": 2.8736878917974395e-05, "loss": 0.309478759765625, "step": 2392 }, { "epoch": 0.16175476544545087, "grad_norm": 1.7563056945800781, "learning_rate": 2.8735553319407127e-05, "loss": 0.3450927734375, "step": 2393 }, { "epoch": 0.161822360416385, "grad_norm": 1.7216485738754272, "learning_rate": 2.8734227056228372e-05, "loss": 0.241790771484375, "step": 2394 }, { "epoch": 0.16188995538731918, "grad_norm": 1.7547086477279663, "learning_rate": 2.8732900128502302e-05, "loss": 0.34429931640625, "step": 2395 }, { "epoch": 0.16195755035825335, "grad_norm": 0.996578574180603, "learning_rate": 2.8731572536293115e-05, "loss": 0.1833343505859375, "step": 2396 }, { "epoch": 0.16202514532918752, "grad_norm": 0.7680031061172485, "learning_rate": 2.8730244279665056e-05, "loss": 0.143646240234375, "step": 2397 }, { "epoch": 0.16209274030012166, "grad_norm": 0.882755696773529, "learning_rate": 2.8728915358682393e-05, "loss": 0.221160888671875, "step": 2398 }, { "epoch": 0.16216033527105583, "grad_norm": 1.3856743574142456, "learning_rate": 2.8727585773409424e-05, "loss": 0.2919921875, "step": 2399 }, { "epoch": 0.16222793024199, "grad_norm": 1.1514867544174194, "learning_rate": 2.872625552391048e-05, "loss": 0.262420654296875, "step": 2400 }, { "epoch": 0.16229552521292415, "grad_norm": 2.318035364151001, "learning_rate": 2.8724924610249938e-05, "loss": 0.30096435546875, "step": 2401 }, { "epoch": 0.16236312018385832, "grad_norm": 1.7913702726364136, "learning_rate": 2.8723593032492185e-05, "loss": 0.27801513671875, "step": 2402 }, { "epoch": 0.1624307151547925, "grad_norm": 1.7037606239318848, "learning_rate": 2.8722260790701656e-05, "loss": 0.23388671875, "step": 2403 }, { "epoch": 0.16249831012572666, "grad_norm": 1.9783551692962646, "learning_rate": 2.872092788494281e-05, "loss": 0.265838623046875, "step": 2404 }, { "epoch": 0.1625659050966608, "grad_norm": 2.006737470626831, "learning_rate": 2.8719594315280143e-05, "loss": 0.30572509765625, "step": 2405 }, { "epoch": 0.16263350006759497, "grad_norm": 1.951184868812561, "learning_rate": 2.8718260081778188e-05, "loss": 0.316650390625, "step": 2406 }, { "epoch": 0.16270109503852914, "grad_norm": 1.4141063690185547, "learning_rate": 2.8716925184501494e-05, "loss": 0.291900634765625, "step": 2407 }, { "epoch": 0.16276869000946328, "grad_norm": 1.0873935222625732, "learning_rate": 2.8715589623514654e-05, "loss": 0.204925537109375, "step": 2408 }, { "epoch": 0.16283628498039746, "grad_norm": 1.3144463300704956, "learning_rate": 2.8714253398882294e-05, "loss": 0.29278564453125, "step": 2409 }, { "epoch": 0.16290387995133163, "grad_norm": 1.3565987348556519, "learning_rate": 2.871291651066906e-05, "loss": 0.194366455078125, "step": 2410 }, { "epoch": 0.1629714749222658, "grad_norm": 1.7140454053878784, "learning_rate": 2.871157895893965e-05, "loss": 0.296875, "step": 2411 }, { "epoch": 0.16303906989319994, "grad_norm": 0.9079301357269287, "learning_rate": 2.871024074375878e-05, "loss": 0.230926513671875, "step": 2412 }, { "epoch": 0.1631066648641341, "grad_norm": 2.2566397190093994, "learning_rate": 2.8708901865191197e-05, "loss": 0.2911376953125, "step": 2413 }, { "epoch": 0.16317425983506828, "grad_norm": 1.8820050954818726, "learning_rate": 2.8707562323301694e-05, "loss": 0.2872314453125, "step": 2414 }, { "epoch": 0.16324185480600242, "grad_norm": 1.4968050718307495, "learning_rate": 2.8706222118155072e-05, "loss": 0.276519775390625, "step": 2415 }, { "epoch": 0.1633094497769366, "grad_norm": 1.8666696548461914, "learning_rate": 2.8704881249816188e-05, "loss": 0.17285537719726562, "step": 2416 }, { "epoch": 0.16337704474787076, "grad_norm": 1.0638775825500488, "learning_rate": 2.8703539718349917e-05, "loss": 0.18149566650390625, "step": 2417 }, { "epoch": 0.1634446397188049, "grad_norm": 2.0240800380706787, "learning_rate": 2.8702197523821176e-05, "loss": 0.354583740234375, "step": 2418 }, { "epoch": 0.16351223468973908, "grad_norm": 0.9783815741539001, "learning_rate": 2.8700854666294906e-05, "loss": 0.212921142578125, "step": 2419 }, { "epoch": 0.16357982966067325, "grad_norm": 1.6357780694961548, "learning_rate": 2.869951114583608e-05, "loss": 0.2713470458984375, "step": 2420 }, { "epoch": 0.16364742463160742, "grad_norm": 3.746232748031616, "learning_rate": 2.8698166962509707e-05, "loss": 0.3712158203125, "step": 2421 }, { "epoch": 0.16371501960254156, "grad_norm": 2.009188413619995, "learning_rate": 2.8696822116380836e-05, "loss": 0.22344970703125, "step": 2422 }, { "epoch": 0.16378261457347573, "grad_norm": 1.8121521472930908, "learning_rate": 2.8695476607514525e-05, "loss": 0.163116455078125, "step": 2423 }, { "epoch": 0.1638502095444099, "grad_norm": 3.4651026725769043, "learning_rate": 2.8694130435975883e-05, "loss": 0.3322296142578125, "step": 2424 }, { "epoch": 0.16391780451534405, "grad_norm": 3.6188430786132812, "learning_rate": 2.8692783601830056e-05, "loss": 0.28179931640625, "step": 2425 }, { "epoch": 0.16398539948627822, "grad_norm": 1.4766193628311157, "learning_rate": 2.8691436105142196e-05, "loss": 0.30230712890625, "step": 2426 }, { "epoch": 0.1640529944572124, "grad_norm": 1.3833073377609253, "learning_rate": 2.869008794597751e-05, "loss": 0.30914306640625, "step": 2427 }, { "epoch": 0.16412058942814656, "grad_norm": 2.1616456508636475, "learning_rate": 2.8688739124401237e-05, "loss": 0.2563323974609375, "step": 2428 }, { "epoch": 0.1641881843990807, "grad_norm": 1.6428205966949463, "learning_rate": 2.8687389640478633e-05, "loss": 0.305694580078125, "step": 2429 }, { "epoch": 0.16425577937001487, "grad_norm": 1.5236215591430664, "learning_rate": 2.8686039494275e-05, "loss": 0.2061309814453125, "step": 2430 }, { "epoch": 0.16432337434094904, "grad_norm": 1.4808319807052612, "learning_rate": 2.8684688685855663e-05, "loss": 0.1647186279296875, "step": 2431 }, { "epoch": 0.16439096931188318, "grad_norm": 4.245735168457031, "learning_rate": 2.868333721528598e-05, "loss": 0.33489990234375, "step": 2432 }, { "epoch": 0.16445856428281735, "grad_norm": 6.672616004943848, "learning_rate": 2.868198508263135e-05, "loss": 0.376220703125, "step": 2433 }, { "epoch": 0.16452615925375152, "grad_norm": 1.3824142217636108, "learning_rate": 2.868063228795719e-05, "loss": 0.24609375, "step": 2434 }, { "epoch": 0.1645937542246857, "grad_norm": 1.5659115314483643, "learning_rate": 2.8679278831328965e-05, "loss": 0.23577880859375, "step": 2435 }, { "epoch": 0.16466134919561984, "grad_norm": 1.7233589887619019, "learning_rate": 2.867792471281216e-05, "loss": 0.35614013671875, "step": 2436 }, { "epoch": 0.164728944166554, "grad_norm": 1.2198045253753662, "learning_rate": 2.8676569932472298e-05, "loss": 0.3052978515625, "step": 2437 }, { "epoch": 0.16479653913748818, "grad_norm": 2.7834813594818115, "learning_rate": 2.867521449037492e-05, "loss": 0.197021484375, "step": 2438 }, { "epoch": 0.16486413410842232, "grad_norm": 2.0257997512817383, "learning_rate": 2.8673858386585626e-05, "loss": 0.213470458984375, "step": 2439 }, { "epoch": 0.1649317290793565, "grad_norm": 1.1052913665771484, "learning_rate": 2.8672501621170026e-05, "loss": 0.33026123046875, "step": 2440 }, { "epoch": 0.16499932405029066, "grad_norm": 1.2368849515914917, "learning_rate": 2.8671144194193766e-05, "loss": 0.293212890625, "step": 2441 }, { "epoch": 0.16506691902122483, "grad_norm": 2.1667776107788086, "learning_rate": 2.8669786105722535e-05, "loss": 0.2428131103515625, "step": 2442 }, { "epoch": 0.16513451399215898, "grad_norm": 1.0164164304733276, "learning_rate": 2.8668427355822036e-05, "loss": 0.211944580078125, "step": 2443 }, { "epoch": 0.16520210896309315, "grad_norm": 1.2916438579559326, "learning_rate": 2.8667067944558022e-05, "loss": 0.196685791015625, "step": 2444 }, { "epoch": 0.16526970393402732, "grad_norm": 1.4224748611450195, "learning_rate": 2.8665707871996263e-05, "loss": 0.244720458984375, "step": 2445 }, { "epoch": 0.16533729890496146, "grad_norm": 1.3975344896316528, "learning_rate": 2.866434713820257e-05, "loss": 0.319091796875, "step": 2446 }, { "epoch": 0.16540489387589563, "grad_norm": 2.069849729537964, "learning_rate": 2.8662985743242794e-05, "loss": 0.1681365966796875, "step": 2447 }, { "epoch": 0.1654724888468298, "grad_norm": 1.4019418954849243, "learning_rate": 2.8661623687182787e-05, "loss": 0.23577880859375, "step": 2448 }, { "epoch": 0.16554008381776397, "grad_norm": 0.7292040586471558, "learning_rate": 2.8660260970088475e-05, "loss": 0.18603515625, "step": 2449 }, { "epoch": 0.16560767878869811, "grad_norm": 1.8671659231185913, "learning_rate": 2.8658897592025777e-05, "loss": 0.3172607421875, "step": 2450 }, { "epoch": 0.16567527375963229, "grad_norm": 1.046726107597351, "learning_rate": 2.8657533553060674e-05, "loss": 0.161346435546875, "step": 2451 }, { "epoch": 0.16574286873056646, "grad_norm": 1.6103988885879517, "learning_rate": 2.8656168853259162e-05, "loss": 0.2053375244140625, "step": 2452 }, { "epoch": 0.1658104637015006, "grad_norm": 1.6350903511047363, "learning_rate": 2.8654803492687275e-05, "loss": 0.262481689453125, "step": 2453 }, { "epoch": 0.16587805867243477, "grad_norm": 1.3958717584609985, "learning_rate": 2.865343747141107e-05, "loss": 0.24169921875, "step": 2454 }, { "epoch": 0.16594565364336894, "grad_norm": 1.8495205640792847, "learning_rate": 2.865207078949666e-05, "loss": 0.2896728515625, "step": 2455 }, { "epoch": 0.16601324861430308, "grad_norm": 2.622389316558838, "learning_rate": 2.8650703447010157e-05, "loss": 0.24578857421875, "step": 2456 }, { "epoch": 0.16608084358523725, "grad_norm": 1.4263508319854736, "learning_rate": 2.8649335444017733e-05, "loss": 0.2628173828125, "step": 2457 }, { "epoch": 0.16614843855617142, "grad_norm": 2.00179123878479, "learning_rate": 2.8647966780585575e-05, "loss": 0.34454345703125, "step": 2458 }, { "epoch": 0.1662160335271056, "grad_norm": 1.8444013595581055, "learning_rate": 2.8646597456779908e-05, "loss": 0.32855224609375, "step": 2459 }, { "epoch": 0.16628362849803974, "grad_norm": 1.698491096496582, "learning_rate": 2.8645227472666985e-05, "loss": 0.30828857421875, "step": 2460 }, { "epoch": 0.1663512234689739, "grad_norm": 1.4780837297439575, "learning_rate": 2.86438568283131e-05, "loss": 0.24951171875, "step": 2461 }, { "epoch": 0.16641881843990808, "grad_norm": 1.0272929668426514, "learning_rate": 2.8642485523784573e-05, "loss": 0.123565673828125, "step": 2462 }, { "epoch": 0.16648641341084222, "grad_norm": 3.02689266204834, "learning_rate": 2.8641113559147755e-05, "loss": 0.31939697265625, "step": 2463 }, { "epoch": 0.1665540083817764, "grad_norm": 1.2338659763336182, "learning_rate": 2.863974093446903e-05, "loss": 0.23309326171875, "step": 2464 }, { "epoch": 0.16662160335271056, "grad_norm": 1.5396618843078613, "learning_rate": 2.863836764981481e-05, "loss": 0.314208984375, "step": 2465 }, { "epoch": 0.16668919832364473, "grad_norm": 1.3925306797027588, "learning_rate": 2.8636993705251553e-05, "loss": 0.287200927734375, "step": 2466 }, { "epoch": 0.16675679329457888, "grad_norm": 0.9567561745643616, "learning_rate": 2.8635619100845726e-05, "loss": 0.201629638671875, "step": 2467 }, { "epoch": 0.16682438826551305, "grad_norm": 1.4108198881149292, "learning_rate": 2.8634243836663853e-05, "loss": 0.265411376953125, "step": 2468 }, { "epoch": 0.16689198323644722, "grad_norm": 0.8704434633255005, "learning_rate": 2.8632867912772473e-05, "loss": 0.19298553466796875, "step": 2469 }, { "epoch": 0.16695957820738136, "grad_norm": 2.073040723800659, "learning_rate": 2.863149132923816e-05, "loss": 0.21795654296875, "step": 2470 }, { "epoch": 0.16702717317831553, "grad_norm": 2.196323871612549, "learning_rate": 2.8630114086127524e-05, "loss": 0.252197265625, "step": 2471 }, { "epoch": 0.1670947681492497, "grad_norm": 1.0663251876831055, "learning_rate": 2.86287361835072e-05, "loss": 0.2667236328125, "step": 2472 }, { "epoch": 0.16716236312018387, "grad_norm": 0.8467109203338623, "learning_rate": 2.862735762144387e-05, "loss": 0.19382858276367188, "step": 2473 }, { "epoch": 0.16722995809111801, "grad_norm": 1.2323365211486816, "learning_rate": 2.8625978400004228e-05, "loss": 0.235198974609375, "step": 2474 }, { "epoch": 0.16729755306205218, "grad_norm": 1.1257092952728271, "learning_rate": 2.862459851925501e-05, "loss": 0.275146484375, "step": 2475 }, { "epoch": 0.16736514803298636, "grad_norm": 1.3752928972244263, "learning_rate": 2.8623217979262987e-05, "loss": 0.2716064453125, "step": 2476 }, { "epoch": 0.1674327430039205, "grad_norm": 1.9930915832519531, "learning_rate": 2.8621836780094955e-05, "loss": 0.36944580078125, "step": 2477 }, { "epoch": 0.16750033797485467, "grad_norm": 1.2759202718734741, "learning_rate": 2.8620454921817743e-05, "loss": 0.3179931640625, "step": 2478 }, { "epoch": 0.16756793294578884, "grad_norm": 1.7158607244491577, "learning_rate": 2.8619072404498224e-05, "loss": 0.3306884765625, "step": 2479 }, { "epoch": 0.167635527916723, "grad_norm": 2.7735462188720703, "learning_rate": 2.861768922820328e-05, "loss": 0.2919921875, "step": 2480 }, { "epoch": 0.16770312288765715, "grad_norm": 1.038447618484497, "learning_rate": 2.8616305392999848e-05, "loss": 0.22662353515625, "step": 2481 }, { "epoch": 0.16777071785859132, "grad_norm": 1.1479698419570923, "learning_rate": 2.861492089895488e-05, "loss": 0.290283203125, "step": 2482 }, { "epoch": 0.1678383128295255, "grad_norm": 1.5081456899642944, "learning_rate": 2.8613535746135367e-05, "loss": 0.36376953125, "step": 2483 }, { "epoch": 0.16790590780045964, "grad_norm": 1.6530537605285645, "learning_rate": 2.8612149934608335e-05, "loss": 0.197845458984375, "step": 2484 }, { "epoch": 0.1679735027713938, "grad_norm": 3.0896124839782715, "learning_rate": 2.8610763464440836e-05, "loss": 0.30377197265625, "step": 2485 }, { "epoch": 0.16804109774232798, "grad_norm": 0.7772501707077026, "learning_rate": 2.860937633569996e-05, "loss": 0.11272430419921875, "step": 2486 }, { "epoch": 0.16810869271326212, "grad_norm": 1.612212896347046, "learning_rate": 2.860798854845282e-05, "loss": 0.23797607421875, "step": 2487 }, { "epoch": 0.1681762876841963, "grad_norm": 1.552062749862671, "learning_rate": 2.8606600102766562e-05, "loss": 0.2665252685546875, "step": 2488 }, { "epoch": 0.16824388265513046, "grad_norm": 2.324479103088379, "learning_rate": 2.860521099870837e-05, "loss": 0.27978515625, "step": 2489 }, { "epoch": 0.16831147762606463, "grad_norm": 2.2096269130706787, "learning_rate": 2.8603821236345465e-05, "loss": 0.215545654296875, "step": 2490 }, { "epoch": 0.16837907259699877, "grad_norm": 2.9721083641052246, "learning_rate": 2.8602430815745086e-05, "loss": 0.244873046875, "step": 2491 }, { "epoch": 0.16844666756793295, "grad_norm": 1.8214890956878662, "learning_rate": 2.8601039736974517e-05, "loss": 0.2470245361328125, "step": 2492 }, { "epoch": 0.16851426253886712, "grad_norm": 1.745406985282898, "learning_rate": 2.8599648000101052e-05, "loss": 0.13370513916015625, "step": 2493 }, { "epoch": 0.16858185750980126, "grad_norm": 3.0424180030822754, "learning_rate": 2.8598255605192048e-05, "loss": 0.299713134765625, "step": 2494 }, { "epoch": 0.16864945248073543, "grad_norm": 1.5249433517456055, "learning_rate": 2.859686255231487e-05, "loss": 0.1757965087890625, "step": 2495 }, { "epoch": 0.1687170474516696, "grad_norm": 1.3973685503005981, "learning_rate": 2.8595468841536923e-05, "loss": 0.1578216552734375, "step": 2496 }, { "epoch": 0.16878464242260377, "grad_norm": 2.186661958694458, "learning_rate": 2.8594074472925647e-05, "loss": 0.32672119140625, "step": 2497 }, { "epoch": 0.1688522373935379, "grad_norm": 1.349412202835083, "learning_rate": 2.8592679446548506e-05, "loss": 0.27423095703125, "step": 2498 }, { "epoch": 0.16891983236447208, "grad_norm": 3.0484187602996826, "learning_rate": 2.8591283762473e-05, "loss": 0.36328125, "step": 2499 }, { "epoch": 0.16898742733540625, "grad_norm": 1.0799168348312378, "learning_rate": 2.858988742076666e-05, "loss": 0.211761474609375, "step": 2500 }, { "epoch": 0.1690550223063404, "grad_norm": 1.166839838027954, "learning_rate": 2.8588490421497054e-05, "loss": 0.18791961669921875, "step": 2501 }, { "epoch": 0.16912261727727457, "grad_norm": 2.3965137004852295, "learning_rate": 2.8587092764731778e-05, "loss": 0.39404296875, "step": 2502 }, { "epoch": 0.16919021224820874, "grad_norm": 2.700092077255249, "learning_rate": 2.8585694450538458e-05, "loss": 0.330963134765625, "step": 2503 }, { "epoch": 0.1692578072191429, "grad_norm": 1.4662421941757202, "learning_rate": 2.8584295478984748e-05, "loss": 0.217315673828125, "step": 2504 }, { "epoch": 0.16932540219007705, "grad_norm": 2.275418281555176, "learning_rate": 2.8582895850138343e-05, "loss": 0.2884521484375, "step": 2505 }, { "epoch": 0.16939299716101122, "grad_norm": 1.7000895738601685, "learning_rate": 2.8581495564066965e-05, "loss": 0.269744873046875, "step": 2506 }, { "epoch": 0.1694605921319454, "grad_norm": 1.8166635036468506, "learning_rate": 2.8580094620838373e-05, "loss": 0.22308349609375, "step": 2507 }, { "epoch": 0.16952818710287954, "grad_norm": 1.044893741607666, "learning_rate": 2.8578693020520348e-05, "loss": 0.240020751953125, "step": 2508 }, { "epoch": 0.1695957820738137, "grad_norm": 1.170410394668579, "learning_rate": 2.8577290763180708e-05, "loss": 0.21393585205078125, "step": 2509 }, { "epoch": 0.16966337704474788, "grad_norm": 1.4714323282241821, "learning_rate": 2.8575887848887305e-05, "loss": 0.25030517578125, "step": 2510 }, { "epoch": 0.16973097201568205, "grad_norm": 2.0935356616973877, "learning_rate": 2.857448427770802e-05, "loss": 0.29534912109375, "step": 2511 }, { "epoch": 0.1697985669866162, "grad_norm": 1.2040860652923584, "learning_rate": 2.857308004971077e-05, "loss": 0.259124755859375, "step": 2512 }, { "epoch": 0.16986616195755036, "grad_norm": 2.036588191986084, "learning_rate": 2.8571675164963493e-05, "loss": 0.348388671875, "step": 2513 }, { "epoch": 0.16993375692848453, "grad_norm": 1.7410913705825806, "learning_rate": 2.857026962353417e-05, "loss": 0.3427734375, "step": 2514 }, { "epoch": 0.17000135189941867, "grad_norm": 0.9728231430053711, "learning_rate": 2.8568863425490815e-05, "loss": 0.183441162109375, "step": 2515 }, { "epoch": 0.17006894687035284, "grad_norm": 1.180462121963501, "learning_rate": 2.8567456570901457e-05, "loss": 0.2436676025390625, "step": 2516 }, { "epoch": 0.17013654184128701, "grad_norm": 0.9091588854789734, "learning_rate": 2.8566049059834182e-05, "loss": 0.173492431640625, "step": 2517 }, { "epoch": 0.17020413681222116, "grad_norm": 1.9061939716339111, "learning_rate": 2.8564640892357084e-05, "loss": 0.233367919921875, "step": 2518 }, { "epoch": 0.17027173178315533, "grad_norm": 3.0631699562072754, "learning_rate": 2.85632320685383e-05, "loss": 0.2587890625, "step": 2519 }, { "epoch": 0.1703393267540895, "grad_norm": 1.9056099653244019, "learning_rate": 2.8561822588446e-05, "loss": 0.3177490234375, "step": 2520 }, { "epoch": 0.17040692172502367, "grad_norm": 1.2856106758117676, "learning_rate": 2.8560412452148387e-05, "loss": 0.1469573974609375, "step": 2521 }, { "epoch": 0.1704745166959578, "grad_norm": 1.3967623710632324, "learning_rate": 2.8559001659713686e-05, "loss": 0.30218505859375, "step": 2522 }, { "epoch": 0.17054211166689198, "grad_norm": 2.220950126647949, "learning_rate": 2.8557590211210157e-05, "loss": 0.26226806640625, "step": 2523 }, { "epoch": 0.17060970663782615, "grad_norm": 2.5987837314605713, "learning_rate": 2.855617810670611e-05, "loss": 0.288604736328125, "step": 2524 }, { "epoch": 0.1706773016087603, "grad_norm": 1.0502543449401855, "learning_rate": 2.8554765346269855e-05, "loss": 0.1646728515625, "step": 2525 }, { "epoch": 0.17074489657969447, "grad_norm": 1.4721875190734863, "learning_rate": 2.8553351929969758e-05, "loss": 0.2452392578125, "step": 2526 }, { "epoch": 0.17081249155062864, "grad_norm": 1.6285121440887451, "learning_rate": 2.8551937857874205e-05, "loss": 0.2587890625, "step": 2527 }, { "epoch": 0.1708800865215628, "grad_norm": 1.1418979167938232, "learning_rate": 2.8550523130051623e-05, "loss": 0.17350006103515625, "step": 2528 }, { "epoch": 0.17094768149249695, "grad_norm": 2.828144073486328, "learning_rate": 2.854910774657046e-05, "loss": 0.3480224609375, "step": 2529 }, { "epoch": 0.17101527646343112, "grad_norm": 1.185849666595459, "learning_rate": 2.8547691707499204e-05, "loss": 0.212890625, "step": 2530 }, { "epoch": 0.1710828714343653, "grad_norm": 2.691563367843628, "learning_rate": 2.854627501290637e-05, "loss": 0.32086181640625, "step": 2531 }, { "epoch": 0.17115046640529943, "grad_norm": 1.8496415615081787, "learning_rate": 2.854485766286051e-05, "loss": 0.30792236328125, "step": 2532 }, { "epoch": 0.1712180613762336, "grad_norm": 2.299084186553955, "learning_rate": 2.8543439657430197e-05, "loss": 0.242706298828125, "step": 2533 }, { "epoch": 0.17128565634716778, "grad_norm": 1.2916280031204224, "learning_rate": 2.8542020996684052e-05, "loss": 0.184356689453125, "step": 2534 }, { "epoch": 0.17135325131810195, "grad_norm": 1.423941731452942, "learning_rate": 2.854060168069071e-05, "loss": 0.208343505859375, "step": 2535 }, { "epoch": 0.1714208462890361, "grad_norm": 1.3379894495010376, "learning_rate": 2.8539181709518857e-05, "loss": 0.3260498046875, "step": 2536 }, { "epoch": 0.17148844125997026, "grad_norm": 2.507503032684326, "learning_rate": 2.853776108323719e-05, "loss": 0.338134765625, "step": 2537 }, { "epoch": 0.17155603623090443, "grad_norm": 1.197853684425354, "learning_rate": 2.8536339801914453e-05, "loss": 0.196136474609375, "step": 2538 }, { "epoch": 0.17162363120183857, "grad_norm": 0.5841957330703735, "learning_rate": 2.853491786561941e-05, "loss": 0.1035308837890625, "step": 2539 }, { "epoch": 0.17169122617277274, "grad_norm": 1.6923221349716187, "learning_rate": 2.853349527442087e-05, "loss": 0.1510162353515625, "step": 2540 }, { "epoch": 0.1717588211437069, "grad_norm": 3.0226454734802246, "learning_rate": 2.853207202838767e-05, "loss": 0.33770751953125, "step": 2541 }, { "epoch": 0.17182641611464108, "grad_norm": 3.3754420280456543, "learning_rate": 2.8530648127588667e-05, "loss": 0.28753662109375, "step": 2542 }, { "epoch": 0.17189401108557523, "grad_norm": 3.0014281272888184, "learning_rate": 2.852922357209276e-05, "loss": 0.2711181640625, "step": 2543 }, { "epoch": 0.1719616060565094, "grad_norm": 0.9988149404525757, "learning_rate": 2.8527798361968877e-05, "loss": 0.249847412109375, "step": 2544 }, { "epoch": 0.17202920102744357, "grad_norm": 1.644179105758667, "learning_rate": 2.8526372497285988e-05, "loss": 0.322509765625, "step": 2545 }, { "epoch": 0.1720967959983777, "grad_norm": 0.9515352249145508, "learning_rate": 2.8524945978113073e-05, "loss": 0.26165771484375, "step": 2546 }, { "epoch": 0.17216439096931188, "grad_norm": 1.5665391683578491, "learning_rate": 2.852351880451916e-05, "loss": 0.2560272216796875, "step": 2547 }, { "epoch": 0.17223198594024605, "grad_norm": 2.615316867828369, "learning_rate": 2.8522090976573306e-05, "loss": 0.2737579345703125, "step": 2548 }, { "epoch": 0.1722995809111802, "grad_norm": 1.2307016849517822, "learning_rate": 2.8520662494344602e-05, "loss": 0.15511322021484375, "step": 2549 }, { "epoch": 0.17236717588211437, "grad_norm": 2.3848280906677246, "learning_rate": 2.8519233357902157e-05, "loss": 0.2900390625, "step": 2550 }, { "epoch": 0.17243477085304854, "grad_norm": 1.5495461225509644, "learning_rate": 2.8517803567315127e-05, "loss": 0.136627197265625, "step": 2551 }, { "epoch": 0.1725023658239827, "grad_norm": 2.821817636489868, "learning_rate": 2.8516373122652695e-05, "loss": 0.279541015625, "step": 2552 }, { "epoch": 0.17256996079491685, "grad_norm": 1.467230200767517, "learning_rate": 2.8514942023984075e-05, "loss": 0.27728271484375, "step": 2553 }, { "epoch": 0.17263755576585102, "grad_norm": 1.9813827276229858, "learning_rate": 2.851351027137851e-05, "loss": 0.365234375, "step": 2554 }, { "epoch": 0.1727051507367852, "grad_norm": 1.2386987209320068, "learning_rate": 2.8512077864905283e-05, "loss": 0.21436309814453125, "step": 2555 }, { "epoch": 0.17277274570771933, "grad_norm": 1.1011552810668945, "learning_rate": 2.851064480463369e-05, "loss": 0.0811309814453125, "step": 2556 }, { "epoch": 0.1728403406786535, "grad_norm": 1.1465325355529785, "learning_rate": 2.8509211090633086e-05, "loss": 0.228546142578125, "step": 2557 }, { "epoch": 0.17290793564958767, "grad_norm": 3.409461259841919, "learning_rate": 2.850777672297284e-05, "loss": 0.2923736572265625, "step": 2558 }, { "epoch": 0.17297553062052184, "grad_norm": 1.3769842386245728, "learning_rate": 2.8506341701722342e-05, "loss": 0.242706298828125, "step": 2559 }, { "epoch": 0.173043125591456, "grad_norm": 1.9611834287643433, "learning_rate": 2.8504906026951042e-05, "loss": 0.26885986328125, "step": 2560 }, { "epoch": 0.17311072056239016, "grad_norm": 0.9022801518440247, "learning_rate": 2.8503469698728403e-05, "loss": 0.204498291015625, "step": 2561 }, { "epoch": 0.17317831553332433, "grad_norm": 1.8234471082687378, "learning_rate": 2.850203271712392e-05, "loss": 0.28936767578125, "step": 2562 }, { "epoch": 0.17324591050425847, "grad_norm": 2.745314836502075, "learning_rate": 2.8500595082207132e-05, "loss": 0.35357666015625, "step": 2563 }, { "epoch": 0.17331350547519264, "grad_norm": 2.637407064437866, "learning_rate": 2.849915679404759e-05, "loss": 0.2939605712890625, "step": 2564 }, { "epoch": 0.1733811004461268, "grad_norm": 2.2310659885406494, "learning_rate": 2.8497717852714895e-05, "loss": 0.2928466796875, "step": 2565 }, { "epoch": 0.17344869541706098, "grad_norm": 1.641907811164856, "learning_rate": 2.8496278258278665e-05, "loss": 0.32110595703125, "step": 2566 }, { "epoch": 0.17351629038799513, "grad_norm": 1.5862902402877808, "learning_rate": 2.849483801080856e-05, "loss": 0.28729248046875, "step": 2567 }, { "epoch": 0.1735838853589293, "grad_norm": 2.039144515991211, "learning_rate": 2.8493397110374274e-05, "loss": 0.283905029296875, "step": 2568 }, { "epoch": 0.17365148032986347, "grad_norm": 1.5499389171600342, "learning_rate": 2.8491955557045517e-05, "loss": 0.2706298828125, "step": 2569 }, { "epoch": 0.1737190753007976, "grad_norm": 2.294490337371826, "learning_rate": 2.8490513350892045e-05, "loss": 0.31597900390625, "step": 2570 }, { "epoch": 0.17378667027173178, "grad_norm": 4.081048965454102, "learning_rate": 2.8489070491983644e-05, "loss": 0.2772216796875, "step": 2571 }, { "epoch": 0.17385426524266595, "grad_norm": 2.078111171722412, "learning_rate": 2.848762698039012e-05, "loss": 0.311737060546875, "step": 2572 }, { "epoch": 0.17392186021360012, "grad_norm": 1.2573002576828003, "learning_rate": 2.8486182816181326e-05, "loss": 0.2000885009765625, "step": 2573 }, { "epoch": 0.17398945518453426, "grad_norm": 2.09712290763855, "learning_rate": 2.8484737999427137e-05, "loss": 0.33856201171875, "step": 2574 }, { "epoch": 0.17405705015546843, "grad_norm": 1.0651724338531494, "learning_rate": 2.8483292530197468e-05, "loss": 0.226715087890625, "step": 2575 }, { "epoch": 0.1741246451264026, "grad_norm": 1.1406214237213135, "learning_rate": 2.8481846408562247e-05, "loss": 0.1267852783203125, "step": 2576 }, { "epoch": 0.17419224009733675, "grad_norm": 1.8470818996429443, "learning_rate": 2.848039963459146e-05, "loss": 0.2082061767578125, "step": 2577 }, { "epoch": 0.17425983506827092, "grad_norm": 1.1543645858764648, "learning_rate": 2.84789522083551e-05, "loss": 0.2247314453125, "step": 2578 }, { "epoch": 0.1743274300392051, "grad_norm": 1.7316771745681763, "learning_rate": 2.8477504129923213e-05, "loss": 0.29888916015625, "step": 2579 }, { "epoch": 0.17439502501013923, "grad_norm": 1.6967699527740479, "learning_rate": 2.8476055399365857e-05, "loss": 0.326446533203125, "step": 2580 }, { "epoch": 0.1744626199810734, "grad_norm": 1.7224756479263306, "learning_rate": 2.8474606016753136e-05, "loss": 0.24688720703125, "step": 2581 }, { "epoch": 0.17453021495200757, "grad_norm": 2.4482054710388184, "learning_rate": 2.8473155982155174e-05, "loss": 0.32672119140625, "step": 2582 }, { "epoch": 0.17459780992294174, "grad_norm": 1.0455777645111084, "learning_rate": 2.847170529564214e-05, "loss": 0.194732666015625, "step": 2583 }, { "epoch": 0.1746654048938759, "grad_norm": 1.3599021434783936, "learning_rate": 2.8470253957284225e-05, "loss": 0.3214111328125, "step": 2584 }, { "epoch": 0.17473299986481006, "grad_norm": 0.789287805557251, "learning_rate": 2.846880196715165e-05, "loss": 0.149383544921875, "step": 2585 }, { "epoch": 0.17480059483574423, "grad_norm": 5.327151775360107, "learning_rate": 2.8467349325314677e-05, "loss": 0.40850830078125, "step": 2586 }, { "epoch": 0.17486818980667837, "grad_norm": 3.375697135925293, "learning_rate": 2.8465896031843587e-05, "loss": 0.29302978515625, "step": 2587 }, { "epoch": 0.17493578477761254, "grad_norm": 1.3126840591430664, "learning_rate": 2.8464442086808707e-05, "loss": 0.324066162109375, "step": 2588 }, { "epoch": 0.1750033797485467, "grad_norm": 0.5437417030334473, "learning_rate": 2.8462987490280383e-05, "loss": 0.1122894287109375, "step": 2589 }, { "epoch": 0.17507097471948088, "grad_norm": 2.792102575302124, "learning_rate": 2.8461532242329e-05, "loss": 0.3189697265625, "step": 2590 }, { "epoch": 0.17513856969041502, "grad_norm": 0.8625727295875549, "learning_rate": 2.8460076343024972e-05, "loss": 0.1809539794921875, "step": 2591 }, { "epoch": 0.1752061646613492, "grad_norm": 1.4192122220993042, "learning_rate": 2.8458619792438744e-05, "loss": 0.231903076171875, "step": 2592 }, { "epoch": 0.17527375963228337, "grad_norm": 1.6566749811172485, "learning_rate": 2.845716259064079e-05, "loss": 0.28265380859375, "step": 2593 }, { "epoch": 0.1753413546032175, "grad_norm": 1.510857343673706, "learning_rate": 2.845570473770162e-05, "loss": 0.26824951171875, "step": 2594 }, { "epoch": 0.17540894957415168, "grad_norm": 1.4555307626724243, "learning_rate": 2.8454246233691774e-05, "loss": 0.32952880859375, "step": 2595 }, { "epoch": 0.17547654454508585, "grad_norm": 1.6056619882583618, "learning_rate": 2.8452787078681823e-05, "loss": 0.236419677734375, "step": 2596 }, { "epoch": 0.17554413951602002, "grad_norm": 1.5662977695465088, "learning_rate": 2.8451327272742377e-05, "loss": 0.2437896728515625, "step": 2597 }, { "epoch": 0.17561173448695416, "grad_norm": 3.5785765647888184, "learning_rate": 2.844986681594406e-05, "loss": 0.32501220703125, "step": 2598 }, { "epoch": 0.17567932945788833, "grad_norm": 2.3012845516204834, "learning_rate": 2.8448405708357544e-05, "loss": 0.3034820556640625, "step": 2599 }, { "epoch": 0.1757469244288225, "grad_norm": 0.9213380217552185, "learning_rate": 2.8446943950053524e-05, "loss": 0.171875, "step": 2600 }, { "epoch": 0.17581451939975665, "grad_norm": 1.2629406452178955, "learning_rate": 2.844548154110273e-05, "loss": 0.27734375, "step": 2601 }, { "epoch": 0.17588211437069082, "grad_norm": 2.324599504470825, "learning_rate": 2.8444018481575923e-05, "loss": 0.2587890625, "step": 2602 }, { "epoch": 0.175949709341625, "grad_norm": 1.1800737380981445, "learning_rate": 2.8442554771543896e-05, "loss": 0.23089599609375, "step": 2603 }, { "epoch": 0.17601730431255916, "grad_norm": 1.8377186059951782, "learning_rate": 2.844109041107747e-05, "loss": 0.279144287109375, "step": 2604 }, { "epoch": 0.1760848992834933, "grad_norm": 1.9342190027236938, "learning_rate": 2.8439625400247502e-05, "loss": 0.25177001953125, "step": 2605 }, { "epoch": 0.17615249425442747, "grad_norm": 1.175121545791626, "learning_rate": 2.8438159739124873e-05, "loss": 0.2562255859375, "step": 2606 }, { "epoch": 0.17622008922536164, "grad_norm": 1.5735985040664673, "learning_rate": 2.8436693427780508e-05, "loss": 0.337158203125, "step": 2607 }, { "epoch": 0.17628768419629579, "grad_norm": 3.132147789001465, "learning_rate": 2.8435226466285352e-05, "loss": 0.32666015625, "step": 2608 }, { "epoch": 0.17635527916722996, "grad_norm": 1.9005597829818726, "learning_rate": 2.8433758854710387e-05, "loss": 0.3128662109375, "step": 2609 }, { "epoch": 0.17642287413816413, "grad_norm": 1.5875272750854492, "learning_rate": 2.8432290593126627e-05, "loss": 0.33984375, "step": 2610 }, { "epoch": 0.1764904691090983, "grad_norm": 1.9028421640396118, "learning_rate": 2.8430821681605112e-05, "loss": 0.2755126953125, "step": 2611 }, { "epoch": 0.17655806408003244, "grad_norm": 1.5018794536590576, "learning_rate": 2.8429352120216914e-05, "loss": 0.26434326171875, "step": 2612 }, { "epoch": 0.1766256590509666, "grad_norm": 1.2546671628952026, "learning_rate": 2.8427881909033147e-05, "loss": 0.325653076171875, "step": 2613 }, { "epoch": 0.17669325402190078, "grad_norm": 2.2602996826171875, "learning_rate": 2.8426411048124952e-05, "loss": 0.326904296875, "step": 2614 }, { "epoch": 0.17676084899283492, "grad_norm": 1.2439472675323486, "learning_rate": 2.8424939537563483e-05, "loss": 0.23480224609375, "step": 2615 }, { "epoch": 0.1768284439637691, "grad_norm": 1.4353299140930176, "learning_rate": 2.842346737741996e-05, "loss": 0.25030517578125, "step": 2616 }, { "epoch": 0.17689603893470326, "grad_norm": 1.5215243101119995, "learning_rate": 2.84219945677656e-05, "loss": 0.22003173828125, "step": 2617 }, { "epoch": 0.1769636339056374, "grad_norm": 2.5436432361602783, "learning_rate": 2.842052110867167e-05, "loss": 0.24298095703125, "step": 2618 }, { "epoch": 0.17703122887657158, "grad_norm": 1.9660677909851074, "learning_rate": 2.8419047000209473e-05, "loss": 0.2869873046875, "step": 2619 }, { "epoch": 0.17709882384750575, "grad_norm": 1.6911677122116089, "learning_rate": 2.8417572242450327e-05, "loss": 0.36602783203125, "step": 2620 }, { "epoch": 0.17716641881843992, "grad_norm": 1.671105980873108, "learning_rate": 2.8416096835465594e-05, "loss": 0.1782379150390625, "step": 2621 }, { "epoch": 0.17723401378937406, "grad_norm": 3.228189706802368, "learning_rate": 2.841462077932666e-05, "loss": 0.283477783203125, "step": 2622 }, { "epoch": 0.17730160876030823, "grad_norm": 1.6189496517181396, "learning_rate": 2.8413144074104952e-05, "loss": 0.25518798828125, "step": 2623 }, { "epoch": 0.1773692037312424, "grad_norm": 3.1591646671295166, "learning_rate": 2.8411666719871917e-05, "loss": 0.306396484375, "step": 2624 }, { "epoch": 0.17743679870217655, "grad_norm": 1.1351054906845093, "learning_rate": 2.8410188716699037e-05, "loss": 0.23688507080078125, "step": 2625 }, { "epoch": 0.17750439367311072, "grad_norm": 1.4222602844238281, "learning_rate": 2.8408710064657835e-05, "loss": 0.28857421875, "step": 2626 }, { "epoch": 0.1775719886440449, "grad_norm": 1.1006460189819336, "learning_rate": 2.8407230763819848e-05, "loss": 0.22333526611328125, "step": 2627 }, { "epoch": 0.17763958361497906, "grad_norm": 1.4558234214782715, "learning_rate": 2.8405750814256657e-05, "loss": 0.252288818359375, "step": 2628 }, { "epoch": 0.1777071785859132, "grad_norm": 3.129554271697998, "learning_rate": 2.8404270216039873e-05, "loss": 0.29974365234375, "step": 2629 }, { "epoch": 0.17777477355684737, "grad_norm": 1.347793698310852, "learning_rate": 2.840278896924114e-05, "loss": 0.265960693359375, "step": 2630 }, { "epoch": 0.17784236852778154, "grad_norm": 1.497734546661377, "learning_rate": 2.8401307073932123e-05, "loss": 0.1966094970703125, "step": 2631 }, { "epoch": 0.17790996349871568, "grad_norm": 2.8622469902038574, "learning_rate": 2.8399824530184526e-05, "loss": 0.2801513671875, "step": 2632 }, { "epoch": 0.17797755846964985, "grad_norm": 0.9678397178649902, "learning_rate": 2.8398341338070085e-05, "loss": 0.236785888671875, "step": 2633 }, { "epoch": 0.17804515344058403, "grad_norm": 1.6030913591384888, "learning_rate": 2.8396857497660572e-05, "loss": 0.29083251953125, "step": 2634 }, { "epoch": 0.1781127484115182, "grad_norm": 1.2587107419967651, "learning_rate": 2.8395373009027777e-05, "loss": 0.205047607421875, "step": 2635 }, { "epoch": 0.17818034338245234, "grad_norm": 1.3417942523956299, "learning_rate": 2.8393887872243528e-05, "loss": 0.2841796875, "step": 2636 }, { "epoch": 0.1782479383533865, "grad_norm": 1.7724758386611938, "learning_rate": 2.839240208737969e-05, "loss": 0.275970458984375, "step": 2637 }, { "epoch": 0.17831553332432068, "grad_norm": 1.30728280544281, "learning_rate": 2.8390915654508153e-05, "loss": 0.1788330078125, "step": 2638 }, { "epoch": 0.17838312829525482, "grad_norm": 0.8855612874031067, "learning_rate": 2.838942857370084e-05, "loss": 0.10648775100708008, "step": 2639 }, { "epoch": 0.178450723266189, "grad_norm": 2.0286049842834473, "learning_rate": 2.8387940845029703e-05, "loss": 0.228485107421875, "step": 2640 }, { "epoch": 0.17851831823712316, "grad_norm": 1.1745320558547974, "learning_rate": 2.8386452468566726e-05, "loss": 0.17681884765625, "step": 2641 }, { "epoch": 0.17858591320805733, "grad_norm": 2.617760181427002, "learning_rate": 2.8384963444383936e-05, "loss": 0.2498779296875, "step": 2642 }, { "epoch": 0.17865350817899148, "grad_norm": 1.0442754030227661, "learning_rate": 2.8383473772553367e-05, "loss": 0.1485595703125, "step": 2643 }, { "epoch": 0.17872110314992565, "grad_norm": 2.026895523071289, "learning_rate": 2.838198345314711e-05, "loss": 0.223388671875, "step": 2644 }, { "epoch": 0.17878869812085982, "grad_norm": 2.4420506954193115, "learning_rate": 2.8380492486237276e-05, "loss": 0.2295684814453125, "step": 2645 }, { "epoch": 0.17885629309179396, "grad_norm": 1.4300518035888672, "learning_rate": 2.8379000871895997e-05, "loss": 0.173614501953125, "step": 2646 }, { "epoch": 0.17892388806272813, "grad_norm": 1.5688230991363525, "learning_rate": 2.8377508610195453e-05, "loss": 0.3040771484375, "step": 2647 }, { "epoch": 0.1789914830336623, "grad_norm": 1.8502466678619385, "learning_rate": 2.8376015701207855e-05, "loss": 0.3775634765625, "step": 2648 }, { "epoch": 0.17905907800459644, "grad_norm": 1.0999199151992798, "learning_rate": 2.8374522145005425e-05, "loss": 0.244964599609375, "step": 2649 }, { "epoch": 0.17912667297553062, "grad_norm": 1.4676495790481567, "learning_rate": 2.837302794166044e-05, "loss": 0.16546630859375, "step": 2650 }, { "epoch": 0.17919426794646479, "grad_norm": 1.8471699953079224, "learning_rate": 2.8371533091245203e-05, "loss": 0.25653076171875, "step": 2651 }, { "epoch": 0.17926186291739896, "grad_norm": 0.986691951751709, "learning_rate": 2.8370037593832033e-05, "loss": 0.1853179931640625, "step": 2652 }, { "epoch": 0.1793294578883331, "grad_norm": 1.7667301893234253, "learning_rate": 2.8368541449493294e-05, "loss": 0.35345458984375, "step": 2653 }, { "epoch": 0.17939705285926727, "grad_norm": 1.9408766031265259, "learning_rate": 2.8367044658301388e-05, "loss": 0.318603515625, "step": 2654 }, { "epoch": 0.17946464783020144, "grad_norm": 1.8233920335769653, "learning_rate": 2.8365547220328733e-05, "loss": 0.362274169921875, "step": 2655 }, { "epoch": 0.17953224280113558, "grad_norm": 1.8142215013504028, "learning_rate": 2.836404913564778e-05, "loss": 0.30133056640625, "step": 2656 }, { "epoch": 0.17959983777206975, "grad_norm": 1.4648311138153076, "learning_rate": 2.836255040433102e-05, "loss": 0.1917724609375, "step": 2657 }, { "epoch": 0.17966743274300392, "grad_norm": 1.2856284379959106, "learning_rate": 2.8361051026450973e-05, "loss": 0.2689399719238281, "step": 2658 }, { "epoch": 0.1797350277139381, "grad_norm": 0.9965981841087341, "learning_rate": 2.8359551002080185e-05, "loss": 0.225677490234375, "step": 2659 }, { "epoch": 0.17980262268487224, "grad_norm": 1.2609305381774902, "learning_rate": 2.8358050331291237e-05, "loss": 0.300750732421875, "step": 2660 }, { "epoch": 0.1798702176558064, "grad_norm": 1.5188961029052734, "learning_rate": 2.835654901415674e-05, "loss": 0.2613525390625, "step": 2661 }, { "epoch": 0.17993781262674058, "grad_norm": 2.062674045562744, "learning_rate": 2.835504705074934e-05, "loss": 0.33984375, "step": 2662 }, { "epoch": 0.18000540759767472, "grad_norm": 1.5419567823410034, "learning_rate": 2.8353544441141707e-05, "loss": 0.24871826171875, "step": 2663 }, { "epoch": 0.1800730025686089, "grad_norm": 1.3580684661865234, "learning_rate": 2.835204118540655e-05, "loss": 0.32000732421875, "step": 2664 }, { "epoch": 0.18014059753954306, "grad_norm": 1.223468542098999, "learning_rate": 2.8350537283616606e-05, "loss": 0.23406982421875, "step": 2665 }, { "epoch": 0.18020819251047723, "grad_norm": 0.9868462085723877, "learning_rate": 2.8349032735844643e-05, "loss": 0.2354736328125, "step": 2666 }, { "epoch": 0.18027578748141138, "grad_norm": 1.1561908721923828, "learning_rate": 2.8347527542163455e-05, "loss": 0.2109222412109375, "step": 2667 }, { "epoch": 0.18034338245234555, "grad_norm": 0.7441369891166687, "learning_rate": 2.8346021702645885e-05, "loss": 0.205718994140625, "step": 2668 }, { "epoch": 0.18041097742327972, "grad_norm": 1.7562810182571411, "learning_rate": 2.8344515217364782e-05, "loss": 0.28912353515625, "step": 2669 }, { "epoch": 0.18047857239421386, "grad_norm": 1.2666261196136475, "learning_rate": 2.8343008086393046e-05, "loss": 0.297332763671875, "step": 2670 }, { "epoch": 0.18054616736514803, "grad_norm": 1.819847583770752, "learning_rate": 2.8341500309803598e-05, "loss": 0.204345703125, "step": 2671 }, { "epoch": 0.1806137623360822, "grad_norm": 1.215306043624878, "learning_rate": 2.8339991887669392e-05, "loss": 0.16558837890625, "step": 2672 }, { "epoch": 0.18068135730701637, "grad_norm": 2.0358898639678955, "learning_rate": 2.8338482820063422e-05, "loss": 0.3471221923828125, "step": 2673 }, { "epoch": 0.18074895227795051, "grad_norm": 2.357722043991089, "learning_rate": 2.8336973107058703e-05, "loss": 0.217041015625, "step": 2674 }, { "epoch": 0.18081654724888468, "grad_norm": 1.8344281911849976, "learning_rate": 2.833546274872828e-05, "loss": 0.3131103515625, "step": 2675 }, { "epoch": 0.18088414221981886, "grad_norm": 1.0866711139678955, "learning_rate": 2.833395174514524e-05, "loss": 0.207977294921875, "step": 2676 }, { "epoch": 0.180951737190753, "grad_norm": 1.0148664712905884, "learning_rate": 2.833244009638269e-05, "loss": 0.242889404296875, "step": 2677 }, { "epoch": 0.18101933216168717, "grad_norm": 2.07664155960083, "learning_rate": 2.8330927802513774e-05, "loss": 0.20599365234375, "step": 2678 }, { "epoch": 0.18108692713262134, "grad_norm": 2.77012038230896, "learning_rate": 2.8329414863611667e-05, "loss": 0.3302001953125, "step": 2679 }, { "epoch": 0.18115452210355548, "grad_norm": 2.909193515777588, "learning_rate": 2.8327901279749575e-05, "loss": 0.2205657958984375, "step": 2680 }, { "epoch": 0.18122211707448965, "grad_norm": 4.411342144012451, "learning_rate": 2.8326387051000736e-05, "loss": 0.27520751953125, "step": 2681 }, { "epoch": 0.18128971204542382, "grad_norm": 1.7492170333862305, "learning_rate": 2.832487217743841e-05, "loss": 0.18084716796875, "step": 2682 }, { "epoch": 0.181357307016358, "grad_norm": 2.5704526901245117, "learning_rate": 2.8323356659135903e-05, "loss": 0.35418701171875, "step": 2683 }, { "epoch": 0.18142490198729214, "grad_norm": 2.6402995586395264, "learning_rate": 2.8321840496166547e-05, "loss": 0.2438201904296875, "step": 2684 }, { "epoch": 0.1814924969582263, "grad_norm": 1.4306623935699463, "learning_rate": 2.8320323688603698e-05, "loss": 0.2208709716796875, "step": 2685 }, { "epoch": 0.18156009192916048, "grad_norm": 2.0225372314453125, "learning_rate": 2.831880623652075e-05, "loss": 0.2925567626953125, "step": 2686 }, { "epoch": 0.18162768690009462, "grad_norm": 1.4650464057922363, "learning_rate": 2.831728813999113e-05, "loss": 0.236663818359375, "step": 2687 }, { "epoch": 0.1816952818710288, "grad_norm": 2.366351842880249, "learning_rate": 2.8315769399088287e-05, "loss": 0.2339935302734375, "step": 2688 }, { "epoch": 0.18176287684196296, "grad_norm": 2.4736413955688477, "learning_rate": 2.8314250013885713e-05, "loss": 0.29833984375, "step": 2689 }, { "epoch": 0.18183047181289713, "grad_norm": 1.1948097944259644, "learning_rate": 2.8312729984456925e-05, "loss": 0.1426239013671875, "step": 2690 }, { "epoch": 0.18189806678383127, "grad_norm": 2.047680139541626, "learning_rate": 2.8311209310875466e-05, "loss": 0.21551513671875, "step": 2691 }, { "epoch": 0.18196566175476545, "grad_norm": 0.6927260756492615, "learning_rate": 2.830968799321492e-05, "loss": 0.1488037109375, "step": 2692 }, { "epoch": 0.18203325672569962, "grad_norm": 2.137377977371216, "learning_rate": 2.8308166031548896e-05, "loss": 0.296722412109375, "step": 2693 }, { "epoch": 0.18210085169663376, "grad_norm": 1.6056857109069824, "learning_rate": 2.830664342595104e-05, "loss": 0.239044189453125, "step": 2694 }, { "epoch": 0.18216844666756793, "grad_norm": 1.0863072872161865, "learning_rate": 2.8305120176495018e-05, "loss": 0.25, "step": 2695 }, { "epoch": 0.1822360416385021, "grad_norm": 0.9802674651145935, "learning_rate": 2.830359628325454e-05, "loss": 0.1527862548828125, "step": 2696 }, { "epoch": 0.18230363660943627, "grad_norm": 1.1178748607635498, "learning_rate": 2.830207174630334e-05, "loss": 0.225494384765625, "step": 2697 }, { "epoch": 0.1823712315803704, "grad_norm": 1.6681503057479858, "learning_rate": 2.8300546565715183e-05, "loss": 0.31805419921875, "step": 2698 }, { "epoch": 0.18243882655130458, "grad_norm": 3.4373674392700195, "learning_rate": 2.8299020741563867e-05, "loss": 0.300506591796875, "step": 2699 }, { "epoch": 0.18250642152223875, "grad_norm": 1.3043099641799927, "learning_rate": 2.8297494273923227e-05, "loss": 0.322265625, "step": 2700 }, { "epoch": 0.1825740164931729, "grad_norm": 1.5111421346664429, "learning_rate": 2.8295967162867113e-05, "loss": 0.35943603515625, "step": 2701 }, { "epoch": 0.18264161146410707, "grad_norm": 1.4058334827423096, "learning_rate": 2.8294439408469423e-05, "loss": 0.211700439453125, "step": 2702 }, { "epoch": 0.18270920643504124, "grad_norm": 1.794021487236023, "learning_rate": 2.8292911010804076e-05, "loss": 0.2430419921875, "step": 2703 }, { "epoch": 0.1827768014059754, "grad_norm": 1.7631250619888306, "learning_rate": 2.8291381969945027e-05, "loss": 0.39093017578125, "step": 2704 }, { "epoch": 0.18284439637690955, "grad_norm": 2.4726409912109375, "learning_rate": 2.828985228596626e-05, "loss": 0.333526611328125, "step": 2705 }, { "epoch": 0.18291199134784372, "grad_norm": 1.5273058414459229, "learning_rate": 2.828832195894179e-05, "loss": 0.278839111328125, "step": 2706 }, { "epoch": 0.1829795863187779, "grad_norm": 2.7681944370269775, "learning_rate": 2.8286790988945665e-05, "loss": 0.4052734375, "step": 2707 }, { "epoch": 0.18304718128971204, "grad_norm": 1.2249606847763062, "learning_rate": 2.8285259376051962e-05, "loss": 0.259185791015625, "step": 2708 }, { "epoch": 0.1831147762606462, "grad_norm": 1.3639694452285767, "learning_rate": 2.828372712033479e-05, "loss": 0.193756103515625, "step": 2709 }, { "epoch": 0.18318237123158038, "grad_norm": 1.3127968311309814, "learning_rate": 2.8282194221868292e-05, "loss": 0.2808837890625, "step": 2710 }, { "epoch": 0.18324996620251452, "grad_norm": 1.5828065872192383, "learning_rate": 2.828066068072663e-05, "loss": 0.3192138671875, "step": 2711 }, { "epoch": 0.1833175611734487, "grad_norm": 2.2309346199035645, "learning_rate": 2.8279126496984018e-05, "loss": 0.267425537109375, "step": 2712 }, { "epoch": 0.18338515614438286, "grad_norm": 1.075560450553894, "learning_rate": 2.8277591670714688e-05, "loss": 0.19469833374023438, "step": 2713 }, { "epoch": 0.18345275111531703, "grad_norm": 1.6171141862869263, "learning_rate": 2.8276056201992894e-05, "loss": 0.243743896484375, "step": 2714 }, { "epoch": 0.18352034608625117, "grad_norm": 1.4523593187332153, "learning_rate": 2.8274520090892937e-05, "loss": 0.2440338134765625, "step": 2715 }, { "epoch": 0.18358794105718534, "grad_norm": 1.099446415901184, "learning_rate": 2.8272983337489146e-05, "loss": 0.161834716796875, "step": 2716 }, { "epoch": 0.18365553602811951, "grad_norm": 0.9473217725753784, "learning_rate": 2.827144594185588e-05, "loss": 0.215240478515625, "step": 2717 }, { "epoch": 0.18372313099905366, "grad_norm": 1.2196323871612549, "learning_rate": 2.8269907904067526e-05, "loss": 0.2666015625, "step": 2718 }, { "epoch": 0.18379072596998783, "grad_norm": 0.5594713091850281, "learning_rate": 2.8268369224198502e-05, "loss": 0.093170166015625, "step": 2719 }, { "epoch": 0.183858320940922, "grad_norm": 1.1427685022354126, "learning_rate": 2.8266829902323254e-05, "loss": 0.34722900390625, "step": 2720 }, { "epoch": 0.18392591591185617, "grad_norm": 1.0828856229782104, "learning_rate": 2.8265289938516275e-05, "loss": 0.190460205078125, "step": 2721 }, { "epoch": 0.1839935108827903, "grad_norm": 1.990659475326538, "learning_rate": 2.8263749332852072e-05, "loss": 0.221160888671875, "step": 2722 }, { "epoch": 0.18406110585372448, "grad_norm": 1.4091218709945679, "learning_rate": 2.826220808540519e-05, "loss": 0.3355712890625, "step": 2723 }, { "epoch": 0.18412870082465865, "grad_norm": 1.1238635778427124, "learning_rate": 2.8260666196250204e-05, "loss": 0.190032958984375, "step": 2724 }, { "epoch": 0.1841962957955928, "grad_norm": 1.4014790058135986, "learning_rate": 2.8259123665461723e-05, "loss": 0.25604248046875, "step": 2725 }, { "epoch": 0.18426389076652697, "grad_norm": 1.0375396013259888, "learning_rate": 2.8257580493114376e-05, "loss": 0.272216796875, "step": 2726 }, { "epoch": 0.18433148573746114, "grad_norm": 2.4469902515411377, "learning_rate": 2.8256036679282843e-05, "loss": 0.3310546875, "step": 2727 }, { "epoch": 0.1843990807083953, "grad_norm": 2.8637702465057373, "learning_rate": 2.8254492224041814e-05, "loss": 0.279754638671875, "step": 2728 }, { "epoch": 0.18446667567932945, "grad_norm": 1.117681860923767, "learning_rate": 2.825294712746602e-05, "loss": 0.1969146728515625, "step": 2729 }, { "epoch": 0.18453427065026362, "grad_norm": 1.6911966800689697, "learning_rate": 2.8251401389630227e-05, "loss": 0.26336669921875, "step": 2730 }, { "epoch": 0.1846018656211978, "grad_norm": 0.7493424415588379, "learning_rate": 2.8249855010609225e-05, "loss": 0.1141204833984375, "step": 2731 }, { "epoch": 0.18466946059213193, "grad_norm": 1.4139935970306396, "learning_rate": 2.824830799047784e-05, "loss": 0.19573974609375, "step": 2732 }, { "epoch": 0.1847370555630661, "grad_norm": 1.7755247354507446, "learning_rate": 2.824676032931092e-05, "loss": 0.30487060546875, "step": 2733 }, { "epoch": 0.18480465053400028, "grad_norm": 1.9821865558624268, "learning_rate": 2.824521202718336e-05, "loss": 0.252655029296875, "step": 2734 }, { "epoch": 0.18487224550493445, "grad_norm": 1.0365959405899048, "learning_rate": 2.824366308417006e-05, "loss": 0.21874237060546875, "step": 2735 }, { "epoch": 0.1849398404758686, "grad_norm": 1.437105655670166, "learning_rate": 2.8242113500345988e-05, "loss": 0.23016357421875, "step": 2736 }, { "epoch": 0.18500743544680276, "grad_norm": 1.2949438095092773, "learning_rate": 2.8240563275786112e-05, "loss": 0.25469970703125, "step": 2737 }, { "epoch": 0.18507503041773693, "grad_norm": 0.9668265581130981, "learning_rate": 2.8239012410565443e-05, "loss": 0.15821075439453125, "step": 2738 }, { "epoch": 0.18514262538867107, "grad_norm": 1.0358964204788208, "learning_rate": 2.8237460904759018e-05, "loss": 0.168975830078125, "step": 2739 }, { "epoch": 0.18521022035960524, "grad_norm": 2.094684600830078, "learning_rate": 2.8235908758441914e-05, "loss": 0.267425537109375, "step": 2740 }, { "epoch": 0.1852778153305394, "grad_norm": 1.8081986904144287, "learning_rate": 2.8234355971689226e-05, "loss": 0.303741455078125, "step": 2741 }, { "epoch": 0.18534541030147356, "grad_norm": 1.2445331811904907, "learning_rate": 2.8232802544576097e-05, "loss": 0.239593505859375, "step": 2742 }, { "epoch": 0.18541300527240773, "grad_norm": 1.2289214134216309, "learning_rate": 2.8231248477177688e-05, "loss": 0.20501708984375, "step": 2743 }, { "epoch": 0.1854806002433419, "grad_norm": 1.407996654510498, "learning_rate": 2.822969376956919e-05, "loss": 0.287872314453125, "step": 2744 }, { "epoch": 0.18554819521427607, "grad_norm": 3.259767770767212, "learning_rate": 2.822813842182584e-05, "loss": 0.2543487548828125, "step": 2745 }, { "epoch": 0.1856157901852102, "grad_norm": 2.7333011627197266, "learning_rate": 2.822658243402288e-05, "loss": 0.3266754150390625, "step": 2746 }, { "epoch": 0.18568338515614438, "grad_norm": 1.333486795425415, "learning_rate": 2.8225025806235612e-05, "loss": 0.286651611328125, "step": 2747 }, { "epoch": 0.18575098012707855, "grad_norm": 1.57785165309906, "learning_rate": 2.8223468538539344e-05, "loss": 0.2418670654296875, "step": 2748 }, { "epoch": 0.1858185750980127, "grad_norm": 1.4820277690887451, "learning_rate": 2.8221910631009434e-05, "loss": 0.24114990234375, "step": 2749 }, { "epoch": 0.18588617006894687, "grad_norm": 1.5490010976791382, "learning_rate": 2.822035208372127e-05, "loss": 0.34613037109375, "step": 2750 }, { "epoch": 0.18595376503988104, "grad_norm": 1.06184720993042, "learning_rate": 2.8218792896750247e-05, "loss": 0.20654296875, "step": 2751 }, { "epoch": 0.1860213600108152, "grad_norm": 2.792581081390381, "learning_rate": 2.8217233070171816e-05, "loss": 0.37396240234375, "step": 2752 }, { "epoch": 0.18608895498174935, "grad_norm": 1.3627921342849731, "learning_rate": 2.8215672604061453e-05, "loss": 0.260528564453125, "step": 2753 }, { "epoch": 0.18615654995268352, "grad_norm": 1.0203243494033813, "learning_rate": 2.821411149849467e-05, "loss": 0.126708984375, "step": 2754 }, { "epoch": 0.1862241449236177, "grad_norm": 0.9466544985771179, "learning_rate": 2.8212549753546983e-05, "loss": 0.212066650390625, "step": 2755 }, { "epoch": 0.18629173989455183, "grad_norm": 2.4925711154937744, "learning_rate": 2.8210987369293977e-05, "loss": 0.3203125, "step": 2756 }, { "epoch": 0.186359334865486, "grad_norm": 1.3054678440093994, "learning_rate": 2.8209424345811247e-05, "loss": 0.2880859375, "step": 2757 }, { "epoch": 0.18642692983642017, "grad_norm": 1.4190194606781006, "learning_rate": 2.8207860683174415e-05, "loss": 0.320068359375, "step": 2758 }, { "epoch": 0.18649452480735434, "grad_norm": 1.9017667770385742, "learning_rate": 2.8206296381459143e-05, "loss": 0.34112548828125, "step": 2759 }, { "epoch": 0.1865621197782885, "grad_norm": 1.1994727849960327, "learning_rate": 2.820473144074113e-05, "loss": 0.249786376953125, "step": 2760 }, { "epoch": 0.18662971474922266, "grad_norm": 1.9733375310897827, "learning_rate": 2.8203165861096078e-05, "loss": 0.294677734375, "step": 2761 }, { "epoch": 0.18669730972015683, "grad_norm": 1.3637768030166626, "learning_rate": 2.820159964259976e-05, "loss": 0.222259521484375, "step": 2762 }, { "epoch": 0.18676490469109097, "grad_norm": 0.9241870045661926, "learning_rate": 2.820003278532795e-05, "loss": 0.2187347412109375, "step": 2763 }, { "epoch": 0.18683249966202514, "grad_norm": 1.3493008613586426, "learning_rate": 2.8198465289356462e-05, "loss": 0.244842529296875, "step": 2764 }, { "epoch": 0.1869000946329593, "grad_norm": 1.475054144859314, "learning_rate": 2.819689715476115e-05, "loss": 0.306488037109375, "step": 2765 }, { "epoch": 0.18696768960389348, "grad_norm": 3.1583642959594727, "learning_rate": 2.8195328381617878e-05, "loss": 0.350830078125, "step": 2766 }, { "epoch": 0.18703528457482763, "grad_norm": 1.9174669981002808, "learning_rate": 2.8193758970002557e-05, "loss": 0.238006591796875, "step": 2767 }, { "epoch": 0.1871028795457618, "grad_norm": 1.687839388847351, "learning_rate": 2.819218891999113e-05, "loss": 0.183990478515625, "step": 2768 }, { "epoch": 0.18717047451669597, "grad_norm": 1.261143684387207, "learning_rate": 2.8190618231659557e-05, "loss": 0.270477294921875, "step": 2769 }, { "epoch": 0.1872380694876301, "grad_norm": 1.931889295578003, "learning_rate": 2.8189046905083845e-05, "loss": 0.23333740234375, "step": 2770 }, { "epoch": 0.18730566445856428, "grad_norm": 1.1847343444824219, "learning_rate": 2.818747494034002e-05, "loss": 0.1603546142578125, "step": 2771 }, { "epoch": 0.18737325942949845, "grad_norm": 1.6458240747451782, "learning_rate": 2.8185902337504146e-05, "loss": 0.33905029296875, "step": 2772 }, { "epoch": 0.18744085440043262, "grad_norm": 1.4683212041854858, "learning_rate": 2.8184329096652317e-05, "loss": 0.265228271484375, "step": 2773 }, { "epoch": 0.18750844937136676, "grad_norm": 1.378387689590454, "learning_rate": 2.818275521786065e-05, "loss": 0.261962890625, "step": 2774 }, { "epoch": 0.18757604434230093, "grad_norm": 2.3678555488586426, "learning_rate": 2.8181180701205308e-05, "loss": 0.216949462890625, "step": 2775 }, { "epoch": 0.1876436393132351, "grad_norm": 1.9137799739837646, "learning_rate": 2.817960554676247e-05, "loss": 0.319580078125, "step": 2776 }, { "epoch": 0.18771123428416925, "grad_norm": 3.6585469245910645, "learning_rate": 2.8178029754608347e-05, "loss": 0.3619384765625, "step": 2777 }, { "epoch": 0.18777882925510342, "grad_norm": 1.1239465475082397, "learning_rate": 2.8176453324819194e-05, "loss": 0.225860595703125, "step": 2778 }, { "epoch": 0.1878464242260376, "grad_norm": 1.1652804613113403, "learning_rate": 2.8174876257471285e-05, "loss": 0.1937408447265625, "step": 2779 }, { "epoch": 0.18791401919697173, "grad_norm": 1.7736414670944214, "learning_rate": 2.8173298552640932e-05, "loss": 0.2596893310546875, "step": 2780 }, { "epoch": 0.1879816141679059, "grad_norm": 1.873227834701538, "learning_rate": 2.817172021040447e-05, "loss": 0.277099609375, "step": 2781 }, { "epoch": 0.18804920913884007, "grad_norm": 2.502650499343872, "learning_rate": 2.8170141230838266e-05, "loss": 0.202728271484375, "step": 2782 }, { "epoch": 0.18811680410977424, "grad_norm": 1.7113851308822632, "learning_rate": 2.816856161401873e-05, "loss": 0.29083251953125, "step": 2783 }, { "epoch": 0.1881843990807084, "grad_norm": 1.2257741689682007, "learning_rate": 2.816698136002229e-05, "loss": 0.3402099609375, "step": 2784 }, { "epoch": 0.18825199405164256, "grad_norm": 1.7123080492019653, "learning_rate": 2.8165400468925405e-05, "loss": 0.26751708984375, "step": 2785 }, { "epoch": 0.18831958902257673, "grad_norm": 1.9138537645339966, "learning_rate": 2.8163818940804565e-05, "loss": 0.313720703125, "step": 2786 }, { "epoch": 0.18838718399351087, "grad_norm": 4.551046848297119, "learning_rate": 2.816223677573631e-05, "loss": 0.2900543212890625, "step": 2787 }, { "epoch": 0.18845477896444504, "grad_norm": 1.9249324798583984, "learning_rate": 2.8160653973797175e-05, "loss": 0.2423095703125, "step": 2788 }, { "epoch": 0.1885223739353792, "grad_norm": 6.6175360679626465, "learning_rate": 2.815907053506376e-05, "loss": 0.41278076171875, "step": 2789 }, { "epoch": 0.18858996890631338, "grad_norm": 2.4804937839508057, "learning_rate": 2.815748645961268e-05, "loss": 0.33123779296875, "step": 2790 }, { "epoch": 0.18865756387724752, "grad_norm": 4.05133056640625, "learning_rate": 2.8155901747520574e-05, "loss": 0.4071044921875, "step": 2791 }, { "epoch": 0.1887251588481817, "grad_norm": 2.098219633102417, "learning_rate": 2.8154316398864128e-05, "loss": 0.2857666015625, "step": 2792 }, { "epoch": 0.18879275381911587, "grad_norm": 2.3772220611572266, "learning_rate": 2.815273041372005e-05, "loss": 0.29449462890625, "step": 2793 }, { "epoch": 0.18886034879005, "grad_norm": 1.040909767150879, "learning_rate": 2.815114379216508e-05, "loss": 0.2467498779296875, "step": 2794 }, { "epoch": 0.18892794376098418, "grad_norm": 1.1324111223220825, "learning_rate": 2.814955653427598e-05, "loss": 0.23406982421875, "step": 2795 }, { "epoch": 0.18899553873191835, "grad_norm": 2.0982460975646973, "learning_rate": 2.814796864012957e-05, "loss": 0.2802734375, "step": 2796 }, { "epoch": 0.18906313370285252, "grad_norm": 2.354822874069214, "learning_rate": 2.814638010980266e-05, "loss": 0.26470947265625, "step": 2797 }, { "epoch": 0.18913072867378666, "grad_norm": 1.0570638179779053, "learning_rate": 2.8144790943372128e-05, "loss": 0.1538543701171875, "step": 2798 }, { "epoch": 0.18919832364472083, "grad_norm": 2.1589653491973877, "learning_rate": 2.8143201140914866e-05, "loss": 0.16855621337890625, "step": 2799 }, { "epoch": 0.189265918615655, "grad_norm": 1.4375503063201904, "learning_rate": 2.8141610702507797e-05, "loss": 0.337066650390625, "step": 2800 }, { "epoch": 0.18933351358658915, "grad_norm": 1.748233675956726, "learning_rate": 2.8140019628227868e-05, "loss": 0.32135009765625, "step": 2801 }, { "epoch": 0.18940110855752332, "grad_norm": 1.9764072895050049, "learning_rate": 2.813842791815208e-05, "loss": 0.2891845703125, "step": 2802 }, { "epoch": 0.1894687035284575, "grad_norm": 1.7201505899429321, "learning_rate": 2.813683557235744e-05, "loss": 0.2906341552734375, "step": 2803 }, { "epoch": 0.18953629849939166, "grad_norm": 1.8161051273345947, "learning_rate": 2.8135242590921e-05, "loss": 0.297088623046875, "step": 2804 }, { "epoch": 0.1896038934703258, "grad_norm": 1.8666085004806519, "learning_rate": 2.813364897391983e-05, "loss": 0.2451171875, "step": 2805 }, { "epoch": 0.18967148844125997, "grad_norm": 0.6169743537902832, "learning_rate": 2.8132054721431045e-05, "loss": 0.134307861328125, "step": 2806 }, { "epoch": 0.18973908341219414, "grad_norm": 1.724754810333252, "learning_rate": 2.8130459833531793e-05, "loss": 0.302276611328125, "step": 2807 }, { "epoch": 0.18980667838312829, "grad_norm": 1.1043100357055664, "learning_rate": 2.812886431029923e-05, "loss": 0.171356201171875, "step": 2808 }, { "epoch": 0.18987427335406246, "grad_norm": 2.085298538208008, "learning_rate": 2.812726815181057e-05, "loss": 0.3516845703125, "step": 2809 }, { "epoch": 0.18994186832499663, "grad_norm": 4.731112003326416, "learning_rate": 2.8125671358143035e-05, "loss": 0.385101318359375, "step": 2810 }, { "epoch": 0.19000946329593077, "grad_norm": 1.175297737121582, "learning_rate": 2.8124073929373898e-05, "loss": 0.3104248046875, "step": 2811 }, { "epoch": 0.19007705826686494, "grad_norm": 1.5565649271011353, "learning_rate": 2.8122475865580437e-05, "loss": 0.202056884765625, "step": 2812 }, { "epoch": 0.1901446532377991, "grad_norm": 1.922711730003357, "learning_rate": 2.8120877166839992e-05, "loss": 0.19549560546875, "step": 2813 }, { "epoch": 0.19021224820873328, "grad_norm": 1.06428861618042, "learning_rate": 2.811927783322991e-05, "loss": 0.23931884765625, "step": 2814 }, { "epoch": 0.19027984317966742, "grad_norm": 1.2230968475341797, "learning_rate": 2.811767786482758e-05, "loss": 0.292724609375, "step": 2815 }, { "epoch": 0.1903474381506016, "grad_norm": 1.1104307174682617, "learning_rate": 2.8116077261710413e-05, "loss": 0.211639404296875, "step": 2816 }, { "epoch": 0.19041503312153576, "grad_norm": 2.422739028930664, "learning_rate": 2.8114476023955865e-05, "loss": 0.2418212890625, "step": 2817 }, { "epoch": 0.1904826280924699, "grad_norm": 1.3348205089569092, "learning_rate": 2.8112874151641407e-05, "loss": 0.33807373046875, "step": 2818 }, { "epoch": 0.19055022306340408, "grad_norm": 1.6126484870910645, "learning_rate": 2.811127164484455e-05, "loss": 0.208892822265625, "step": 2819 }, { "epoch": 0.19061781803433825, "grad_norm": 1.5484740734100342, "learning_rate": 2.810966850364283e-05, "loss": 0.27392578125, "step": 2820 }, { "epoch": 0.19068541300527242, "grad_norm": 1.5552895069122314, "learning_rate": 2.8108064728113825e-05, "loss": 0.246185302734375, "step": 2821 }, { "epoch": 0.19075300797620656, "grad_norm": 1.149000644683838, "learning_rate": 2.8106460318335124e-05, "loss": 0.268157958984375, "step": 2822 }, { "epoch": 0.19082060294714073, "grad_norm": 1.7094223499298096, "learning_rate": 2.810485527438437e-05, "loss": 0.283599853515625, "step": 2823 }, { "epoch": 0.1908881979180749, "grad_norm": 0.930774986743927, "learning_rate": 2.8103249596339214e-05, "loss": 0.239349365234375, "step": 2824 }, { "epoch": 0.19095579288900905, "grad_norm": 1.3360321521759033, "learning_rate": 2.810164328427736e-05, "loss": 0.188232421875, "step": 2825 }, { "epoch": 0.19102338785994322, "grad_norm": 1.4355043172836304, "learning_rate": 2.810003633827652e-05, "loss": 0.299896240234375, "step": 2826 }, { "epoch": 0.1910909828308774, "grad_norm": 3.010958194732666, "learning_rate": 2.8098428758414458e-05, "loss": 0.250335693359375, "step": 2827 }, { "epoch": 0.19115857780181156, "grad_norm": 1.3491714000701904, "learning_rate": 2.8096820544768955e-05, "loss": 0.22711181640625, "step": 2828 }, { "epoch": 0.1912261727727457, "grad_norm": 1.6230229139328003, "learning_rate": 2.8095211697417823e-05, "loss": 0.26263427734375, "step": 2829 }, { "epoch": 0.19129376774367987, "grad_norm": 2.081747055053711, "learning_rate": 2.8093602216438914e-05, "loss": 0.2393646240234375, "step": 2830 }, { "epoch": 0.19136136271461404, "grad_norm": 1.3397456407546997, "learning_rate": 2.8091992101910094e-05, "loss": 0.2938232421875, "step": 2831 }, { "epoch": 0.19142895768554818, "grad_norm": 1.4430485963821411, "learning_rate": 2.809038135390929e-05, "loss": 0.296600341796875, "step": 2832 }, { "epoch": 0.19149655265648236, "grad_norm": 1.6086353063583374, "learning_rate": 2.808876997251442e-05, "loss": 0.211456298828125, "step": 2833 }, { "epoch": 0.19156414762741653, "grad_norm": 0.9377428889274597, "learning_rate": 2.808715795780346e-05, "loss": 0.300628662109375, "step": 2834 }, { "epoch": 0.1916317425983507, "grad_norm": 1.170734167098999, "learning_rate": 2.808554530985441e-05, "loss": 0.216583251953125, "step": 2835 }, { "epoch": 0.19169933756928484, "grad_norm": 0.9582401514053345, "learning_rate": 2.80839320287453e-05, "loss": 0.200653076171875, "step": 2836 }, { "epoch": 0.191766932540219, "grad_norm": 1.6416369676589966, "learning_rate": 2.8082318114554192e-05, "loss": 0.203460693359375, "step": 2837 }, { "epoch": 0.19183452751115318, "grad_norm": 0.6992636919021606, "learning_rate": 2.8080703567359177e-05, "loss": 0.191070556640625, "step": 2838 }, { "epoch": 0.19190212248208732, "grad_norm": 1.5367807149887085, "learning_rate": 2.8079088387238375e-05, "loss": 0.1590576171875, "step": 2839 }, { "epoch": 0.1919697174530215, "grad_norm": 1.4914624691009521, "learning_rate": 2.8077472574269934e-05, "loss": 0.298675537109375, "step": 2840 }, { "epoch": 0.19203731242395566, "grad_norm": 2.635840892791748, "learning_rate": 2.807585612853205e-05, "loss": 0.33819580078125, "step": 2841 }, { "epoch": 0.1921049073948898, "grad_norm": 1.062591552734375, "learning_rate": 2.8074239050102923e-05, "loss": 0.30731201171875, "step": 2842 }, { "epoch": 0.19217250236582398, "grad_norm": 1.9252125024795532, "learning_rate": 2.80726213390608e-05, "loss": 0.3651123046875, "step": 2843 }, { "epoch": 0.19224009733675815, "grad_norm": 1.0615248680114746, "learning_rate": 2.8071002995483967e-05, "loss": 0.20556640625, "step": 2844 }, { "epoch": 0.19230769230769232, "grad_norm": 3.2595443725585938, "learning_rate": 2.8069384019450715e-05, "loss": 0.289764404296875, "step": 2845 }, { "epoch": 0.19237528727862646, "grad_norm": 1.239891529083252, "learning_rate": 2.8067764411039388e-05, "loss": 0.306915283203125, "step": 2846 }, { "epoch": 0.19244288224956063, "grad_norm": 1.9607418775558472, "learning_rate": 2.8066144170328354e-05, "loss": 0.36346435546875, "step": 2847 }, { "epoch": 0.1925104772204948, "grad_norm": 2.4995737075805664, "learning_rate": 2.8064523297396004e-05, "loss": 0.25103759765625, "step": 2848 }, { "epoch": 0.19257807219142895, "grad_norm": 3.126345157623291, "learning_rate": 2.8062901792320773e-05, "loss": 0.27911376953125, "step": 2849 }, { "epoch": 0.19264566716236312, "grad_norm": 1.8414113521575928, "learning_rate": 2.8061279655181114e-05, "loss": 0.233917236328125, "step": 2850 }, { "epoch": 0.1927132621332973, "grad_norm": 2.296654224395752, "learning_rate": 2.805965688605552e-05, "loss": 0.310791015625, "step": 2851 }, { "epoch": 0.19278085710423146, "grad_norm": 0.8875904679298401, "learning_rate": 2.8058033485022502e-05, "loss": 0.19696044921875, "step": 2852 }, { "epoch": 0.1928484520751656, "grad_norm": 1.0379514694213867, "learning_rate": 2.8056409452160627e-05, "loss": 0.2784423828125, "step": 2853 }, { "epoch": 0.19291604704609977, "grad_norm": 1.5559189319610596, "learning_rate": 2.8054784787548457e-05, "loss": 0.277496337890625, "step": 2854 }, { "epoch": 0.19298364201703394, "grad_norm": 1.3892892599105835, "learning_rate": 2.8053159491264617e-05, "loss": 0.269500732421875, "step": 2855 }, { "epoch": 0.19305123698796808, "grad_norm": 1.0765652656555176, "learning_rate": 2.805153356338774e-05, "loss": 0.207122802734375, "step": 2856 }, { "epoch": 0.19311883195890225, "grad_norm": 2.3225181102752686, "learning_rate": 2.8049907003996504e-05, "loss": 0.4193115234375, "step": 2857 }, { "epoch": 0.19318642692983642, "grad_norm": 2.37443208694458, "learning_rate": 2.8048279813169613e-05, "loss": 0.245941162109375, "step": 2858 }, { "epoch": 0.1932540219007706, "grad_norm": 1.5857388973236084, "learning_rate": 2.8046651990985796e-05, "loss": 0.2235107421875, "step": 2859 }, { "epoch": 0.19332161687170474, "grad_norm": 1.4284439086914062, "learning_rate": 2.804502353752382e-05, "loss": 0.242431640625, "step": 2860 }, { "epoch": 0.1933892118426389, "grad_norm": 1.3990083932876587, "learning_rate": 2.8043394452862482e-05, "loss": 0.3135986328125, "step": 2861 }, { "epoch": 0.19345680681357308, "grad_norm": 1.2503798007965088, "learning_rate": 2.8041764737080605e-05, "loss": 0.2784423828125, "step": 2862 }, { "epoch": 0.19352440178450722, "grad_norm": 0.9278662204742432, "learning_rate": 2.804013439025704e-05, "loss": 0.169097900390625, "step": 2863 }, { "epoch": 0.1935919967554414, "grad_norm": 1.121964454650879, "learning_rate": 2.8038503412470677e-05, "loss": 0.210845947265625, "step": 2864 }, { "epoch": 0.19365959172637556, "grad_norm": 1.2347164154052734, "learning_rate": 2.8036871803800433e-05, "loss": 0.22503662109375, "step": 2865 }, { "epoch": 0.19372718669730973, "grad_norm": 1.5287137031555176, "learning_rate": 2.803523956432526e-05, "loss": 0.26995849609375, "step": 2866 }, { "epoch": 0.19379478166824388, "grad_norm": 0.8545823693275452, "learning_rate": 2.803360669412413e-05, "loss": 0.1662139892578125, "step": 2867 }, { "epoch": 0.19386237663917805, "grad_norm": 0.9173135757446289, "learning_rate": 2.803197319327605e-05, "loss": 0.236419677734375, "step": 2868 }, { "epoch": 0.19392997161011222, "grad_norm": 1.3930625915527344, "learning_rate": 2.8030339061860068e-05, "loss": 0.3359375, "step": 2869 }, { "epoch": 0.19399756658104636, "grad_norm": 1.298912525177002, "learning_rate": 2.802870429995524e-05, "loss": 0.26214599609375, "step": 2870 }, { "epoch": 0.19406516155198053, "grad_norm": 1.0623464584350586, "learning_rate": 2.802706890764068e-05, "loss": 0.250885009765625, "step": 2871 }, { "epoch": 0.1941327565229147, "grad_norm": 1.253537654876709, "learning_rate": 2.802543288499551e-05, "loss": 0.229888916015625, "step": 2872 }, { "epoch": 0.19420035149384884, "grad_norm": 1.3367869853973389, "learning_rate": 2.8023796232098886e-05, "loss": 0.2906494140625, "step": 2873 }, { "epoch": 0.19426794646478301, "grad_norm": 1.679614543914795, "learning_rate": 2.8022158949030015e-05, "loss": 0.253570556640625, "step": 2874 }, { "epoch": 0.19433554143571719, "grad_norm": 1.2300750017166138, "learning_rate": 2.8020521035868103e-05, "loss": 0.24627685546875, "step": 2875 }, { "epoch": 0.19440313640665136, "grad_norm": 1.625988483428955, "learning_rate": 2.8018882492692416e-05, "loss": 0.321746826171875, "step": 2876 }, { "epoch": 0.1944707313775855, "grad_norm": 2.8180267810821533, "learning_rate": 2.801724331958223e-05, "loss": 0.2299957275390625, "step": 2877 }, { "epoch": 0.19453832634851967, "grad_norm": 1.1409218311309814, "learning_rate": 2.8015603516616855e-05, "loss": 0.1917724609375, "step": 2878 }, { "epoch": 0.19460592131945384, "grad_norm": 1.8523468971252441, "learning_rate": 2.801396308387564e-05, "loss": 0.325103759765625, "step": 2879 }, { "epoch": 0.19467351629038798, "grad_norm": 1.97926926612854, "learning_rate": 2.801232202143796e-05, "loss": 0.284423828125, "step": 2880 }, { "epoch": 0.19474111126132215, "grad_norm": 1.5029120445251465, "learning_rate": 2.8010680329383213e-05, "loss": 0.30029296875, "step": 2881 }, { "epoch": 0.19480870623225632, "grad_norm": 1.934716820716858, "learning_rate": 2.8009038007790843e-05, "loss": 0.37249755859375, "step": 2882 }, { "epoch": 0.1948763012031905, "grad_norm": 0.9456402063369751, "learning_rate": 2.800739505674031e-05, "loss": 0.196807861328125, "step": 2883 }, { "epoch": 0.19494389617412464, "grad_norm": 1.4959841966629028, "learning_rate": 2.8005751476311114e-05, "loss": 0.30303955078125, "step": 2884 }, { "epoch": 0.1950114911450588, "grad_norm": 1.8658086061477661, "learning_rate": 2.8004107266582777e-05, "loss": 0.21490478515625, "step": 2885 }, { "epoch": 0.19507908611599298, "grad_norm": 1.1127384901046753, "learning_rate": 2.800246242763486e-05, "loss": 0.24139404296875, "step": 2886 }, { "epoch": 0.19514668108692712, "grad_norm": 1.3502641916275024, "learning_rate": 2.8000816959546954e-05, "loss": 0.2651214599609375, "step": 2887 }, { "epoch": 0.1952142760578613, "grad_norm": 1.3029311895370483, "learning_rate": 2.7999170862398663e-05, "loss": 0.24957275390625, "step": 2888 }, { "epoch": 0.19528187102879546, "grad_norm": 2.4410853385925293, "learning_rate": 2.7997524136269654e-05, "loss": 0.311126708984375, "step": 2889 }, { "epoch": 0.19534946599972963, "grad_norm": 1.3499302864074707, "learning_rate": 2.7995876781239594e-05, "loss": 0.263916015625, "step": 2890 }, { "epoch": 0.19541706097066378, "grad_norm": 1.3083633184432983, "learning_rate": 2.799422879738819e-05, "loss": 0.315948486328125, "step": 2891 }, { "epoch": 0.19548465594159795, "grad_norm": 1.7560955286026, "learning_rate": 2.7992580184795188e-05, "loss": 0.29168701171875, "step": 2892 }, { "epoch": 0.19555225091253212, "grad_norm": 1.6373333930969238, "learning_rate": 2.7990930943540363e-05, "loss": 0.243072509765625, "step": 2893 }, { "epoch": 0.19561984588346626, "grad_norm": 1.1796430349349976, "learning_rate": 2.7989281073703502e-05, "loss": 0.2197265625, "step": 2894 }, { "epoch": 0.19568744085440043, "grad_norm": 1.994292974472046, "learning_rate": 2.7987630575364448e-05, "loss": 0.308502197265625, "step": 2895 }, { "epoch": 0.1957550358253346, "grad_norm": 1.3839062452316284, "learning_rate": 2.798597944860305e-05, "loss": 0.238616943359375, "step": 2896 }, { "epoch": 0.19582263079626877, "grad_norm": 0.8862487077713013, "learning_rate": 2.798432769349922e-05, "loss": 0.21002197265625, "step": 2897 }, { "epoch": 0.1958902257672029, "grad_norm": 1.3569459915161133, "learning_rate": 2.7982675310132857e-05, "loss": 0.1681671142578125, "step": 2898 }, { "epoch": 0.19595782073813708, "grad_norm": 3.6075422763824463, "learning_rate": 2.798102229858393e-05, "loss": 0.29949951171875, "step": 2899 }, { "epoch": 0.19602541570907125, "grad_norm": 1.6687564849853516, "learning_rate": 2.7979368658932413e-05, "loss": 0.284759521484375, "step": 2900 }, { "epoch": 0.1960930106800054, "grad_norm": 1.590660810470581, "learning_rate": 2.797771439125832e-05, "loss": 0.3001708984375, "step": 2901 }, { "epoch": 0.19616060565093957, "grad_norm": 1.0789748430252075, "learning_rate": 2.79760594956417e-05, "loss": 0.20501708984375, "step": 2902 }, { "epoch": 0.19622820062187374, "grad_norm": 1.5931557416915894, "learning_rate": 2.7974403972162627e-05, "loss": 0.281280517578125, "step": 2903 }, { "epoch": 0.19629579559280788, "grad_norm": 1.69313645362854, "learning_rate": 2.7972747820901197e-05, "loss": 0.241485595703125, "step": 2904 }, { "epoch": 0.19636339056374205, "grad_norm": 2.3320095539093018, "learning_rate": 2.7971091041937553e-05, "loss": 0.33160400390625, "step": 2905 }, { "epoch": 0.19643098553467622, "grad_norm": 1.4148508310317993, "learning_rate": 2.7969433635351862e-05, "loss": 0.2587890625, "step": 2906 }, { "epoch": 0.1964985805056104, "grad_norm": 1.845271348953247, "learning_rate": 2.796777560122431e-05, "loss": 0.2491302490234375, "step": 2907 }, { "epoch": 0.19656617547654454, "grad_norm": 1.4444900751113892, "learning_rate": 2.7966116939635133e-05, "loss": 0.2438812255859375, "step": 2908 }, { "epoch": 0.1966337704474787, "grad_norm": 1.7057266235351562, "learning_rate": 2.7964457650664583e-05, "loss": 0.313507080078125, "step": 2909 }, { "epoch": 0.19670136541841288, "grad_norm": 1.5346174240112305, "learning_rate": 2.7962797734392942e-05, "loss": 0.302978515625, "step": 2910 }, { "epoch": 0.19676896038934702, "grad_norm": 1.2296056747436523, "learning_rate": 2.796113719090054e-05, "loss": 0.275970458984375, "step": 2911 }, { "epoch": 0.1968365553602812, "grad_norm": 2.360161066055298, "learning_rate": 2.795947602026771e-05, "loss": 0.3279266357421875, "step": 2912 }, { "epoch": 0.19690415033121536, "grad_norm": 0.7202319502830505, "learning_rate": 2.7957814222574834e-05, "loss": 0.1388092041015625, "step": 2913 }, { "epoch": 0.19697174530214953, "grad_norm": 1.0712244510650635, "learning_rate": 2.7956151797902327e-05, "loss": 0.25775146484375, "step": 2914 }, { "epoch": 0.19703934027308367, "grad_norm": 1.4559043645858765, "learning_rate": 2.795448874633062e-05, "loss": 0.322998046875, "step": 2915 }, { "epoch": 0.19710693524401784, "grad_norm": 2.0939903259277344, "learning_rate": 2.795282506794019e-05, "loss": 0.35302734375, "step": 2916 }, { "epoch": 0.19717453021495202, "grad_norm": 1.6670403480529785, "learning_rate": 2.7951160762811524e-05, "loss": 0.29071044921875, "step": 2917 }, { "epoch": 0.19724212518588616, "grad_norm": 1.5681145191192627, "learning_rate": 2.7949495831025156e-05, "loss": 0.30078125, "step": 2918 }, { "epoch": 0.19730972015682033, "grad_norm": 1.0949057340621948, "learning_rate": 2.7947830272661656e-05, "loss": 0.28558349609375, "step": 2919 }, { "epoch": 0.1973773151277545, "grad_norm": 1.8821858167648315, "learning_rate": 2.79461640878016e-05, "loss": 0.2860107421875, "step": 2920 }, { "epoch": 0.19744491009868867, "grad_norm": 1.3929002285003662, "learning_rate": 2.7944497276525613e-05, "loss": 0.36566162109375, "step": 2921 }, { "epoch": 0.1975125050696228, "grad_norm": 1.280765414237976, "learning_rate": 2.7942829838914352e-05, "loss": 0.1998443603515625, "step": 2922 }, { "epoch": 0.19758010004055698, "grad_norm": 3.12864351272583, "learning_rate": 2.794116177504849e-05, "loss": 0.26300048828125, "step": 2923 }, { "epoch": 0.19764769501149115, "grad_norm": 2.6581666469573975, "learning_rate": 2.7939493085008738e-05, "loss": 0.2904052734375, "step": 2924 }, { "epoch": 0.1977152899824253, "grad_norm": 1.5356477499008179, "learning_rate": 2.7937823768875847e-05, "loss": 0.259552001953125, "step": 2925 }, { "epoch": 0.19778288495335947, "grad_norm": 2.8755226135253906, "learning_rate": 2.7936153826730577e-05, "loss": 0.32232666015625, "step": 2926 }, { "epoch": 0.19785047992429364, "grad_norm": 2.6104462146759033, "learning_rate": 2.793448325865374e-05, "loss": 0.30975341796875, "step": 2927 }, { "epoch": 0.1979180748952278, "grad_norm": 1.4875153303146362, "learning_rate": 2.793281206472616e-05, "loss": 0.22296142578125, "step": 2928 }, { "epoch": 0.19798566986616195, "grad_norm": 1.8983863592147827, "learning_rate": 2.7931140245028705e-05, "loss": 0.2994384765625, "step": 2929 }, { "epoch": 0.19805326483709612, "grad_norm": 0.9148238301277161, "learning_rate": 2.792946779964227e-05, "loss": 0.2860107421875, "step": 2930 }, { "epoch": 0.1981208598080303, "grad_norm": 1.6010668277740479, "learning_rate": 2.7927794728647774e-05, "loss": 0.3173828125, "step": 2931 }, { "epoch": 0.19818845477896443, "grad_norm": 2.3079915046691895, "learning_rate": 2.7926121032126172e-05, "loss": 0.33154296875, "step": 2932 }, { "epoch": 0.1982560497498986, "grad_norm": 0.9108121991157532, "learning_rate": 2.7924446710158446e-05, "loss": 0.206207275390625, "step": 2933 }, { "epoch": 0.19832364472083278, "grad_norm": 2.425408124923706, "learning_rate": 2.7922771762825618e-05, "loss": 0.34814453125, "step": 2934 }, { "epoch": 0.19839123969176695, "grad_norm": 1.7505278587341309, "learning_rate": 2.7921096190208718e-05, "loss": 0.303466796875, "step": 2935 }, { "epoch": 0.1984588346627011, "grad_norm": 3.6242244243621826, "learning_rate": 2.7919419992388833e-05, "loss": 0.263519287109375, "step": 2936 }, { "epoch": 0.19852642963363526, "grad_norm": 0.9193993806838989, "learning_rate": 2.7917743169447064e-05, "loss": 0.1417236328125, "step": 2937 }, { "epoch": 0.19859402460456943, "grad_norm": 1.5893617868423462, "learning_rate": 2.7916065721464544e-05, "loss": 0.3087158203125, "step": 2938 }, { "epoch": 0.19866161957550357, "grad_norm": 1.4491357803344727, "learning_rate": 2.7914387648522444e-05, "loss": 0.21563720703125, "step": 2939 }, { "epoch": 0.19872921454643774, "grad_norm": 1.6337674856185913, "learning_rate": 2.791270895070195e-05, "loss": 0.309478759765625, "step": 2940 }, { "epoch": 0.19879680951737191, "grad_norm": 2.141087770462036, "learning_rate": 2.7911029628084295e-05, "loss": 0.3155517578125, "step": 2941 }, { "epoch": 0.19886440448830606, "grad_norm": 1.8838398456573486, "learning_rate": 2.790934968075074e-05, "loss": 0.33209228515625, "step": 2942 }, { "epoch": 0.19893199945924023, "grad_norm": 0.9492110013961792, "learning_rate": 2.790766910878256e-05, "loss": 0.227142333984375, "step": 2943 }, { "epoch": 0.1989995944301744, "grad_norm": 1.7455350160598755, "learning_rate": 2.7905987912261075e-05, "loss": 0.2911376953125, "step": 2944 }, { "epoch": 0.19906718940110857, "grad_norm": 1.8235026597976685, "learning_rate": 2.7904306091267637e-05, "loss": 0.286285400390625, "step": 2945 }, { "epoch": 0.1991347843720427, "grad_norm": 1.1231770515441895, "learning_rate": 2.7902623645883618e-05, "loss": 0.17118072509765625, "step": 2946 }, { "epoch": 0.19920237934297688, "grad_norm": 0.9220330119132996, "learning_rate": 2.7900940576190427e-05, "loss": 0.1476593017578125, "step": 2947 }, { "epoch": 0.19926997431391105, "grad_norm": 1.6142783164978027, "learning_rate": 2.7899256882269503e-05, "loss": 0.254180908203125, "step": 2948 }, { "epoch": 0.1993375692848452, "grad_norm": 1.3029451370239258, "learning_rate": 2.7897572564202306e-05, "loss": 0.29376220703125, "step": 2949 }, { "epoch": 0.19940516425577937, "grad_norm": 2.655841588973999, "learning_rate": 2.789588762207034e-05, "loss": 0.285125732421875, "step": 2950 }, { "epoch": 0.19947275922671354, "grad_norm": 2.3687424659729004, "learning_rate": 2.7894202055955132e-05, "loss": 0.36065673828125, "step": 2951 }, { "epoch": 0.1995403541976477, "grad_norm": 2.4359261989593506, "learning_rate": 2.7892515865938247e-05, "loss": 0.29052734375, "step": 2952 }, { "epoch": 0.19960794916858185, "grad_norm": 1.288620114326477, "learning_rate": 2.7890829052101264e-05, "loss": 0.21380615234375, "step": 2953 }, { "epoch": 0.19967554413951602, "grad_norm": 1.4935336112976074, "learning_rate": 2.78891416145258e-05, "loss": 0.186126708984375, "step": 2954 }, { "epoch": 0.1997431391104502, "grad_norm": 1.6516530513763428, "learning_rate": 2.788745355329351e-05, "loss": 0.24236679077148438, "step": 2955 }, { "epoch": 0.19981073408138433, "grad_norm": 1.4423432350158691, "learning_rate": 2.7885764868486067e-05, "loss": 0.26434326171875, "step": 2956 }, { "epoch": 0.1998783290523185, "grad_norm": 2.494877576828003, "learning_rate": 2.788407556018519e-05, "loss": 0.290771484375, "step": 2957 }, { "epoch": 0.19994592402325267, "grad_norm": 1.3878684043884277, "learning_rate": 2.788238562847261e-05, "loss": 0.2202911376953125, "step": 2958 }, { "epoch": 0.20001351899418685, "grad_norm": 2.3438827991485596, "learning_rate": 2.78806950734301e-05, "loss": 0.232696533203125, "step": 2959 }, { "epoch": 0.200081113965121, "grad_norm": 1.3675132989883423, "learning_rate": 2.787900389513946e-05, "loss": 0.335205078125, "step": 2960 }, { "epoch": 0.20014870893605516, "grad_norm": 1.551167607307434, "learning_rate": 2.7877312093682512e-05, "loss": 0.25189208984375, "step": 2961 }, { "epoch": 0.20021630390698933, "grad_norm": 1.047109603881836, "learning_rate": 2.7875619669141127e-05, "loss": 0.174285888671875, "step": 2962 }, { "epoch": 0.20028389887792347, "grad_norm": 1.1946715116500854, "learning_rate": 2.7873926621597187e-05, "loss": 0.2085418701171875, "step": 2963 }, { "epoch": 0.20035149384885764, "grad_norm": 1.2280445098876953, "learning_rate": 2.7872232951132614e-05, "loss": 0.30291748046875, "step": 2964 }, { "epoch": 0.2004190888197918, "grad_norm": 2.1950714588165283, "learning_rate": 2.7870538657829362e-05, "loss": 0.24639892578125, "step": 2965 }, { "epoch": 0.20048668379072598, "grad_norm": 1.206703543663025, "learning_rate": 2.786884374176941e-05, "loss": 0.28350830078125, "step": 2966 }, { "epoch": 0.20055427876166013, "grad_norm": 2.0691092014312744, "learning_rate": 2.786714820303476e-05, "loss": 0.20330810546875, "step": 2967 }, { "epoch": 0.2006218737325943, "grad_norm": 1.40524423122406, "learning_rate": 2.786545204170747e-05, "loss": 0.196807861328125, "step": 2968 }, { "epoch": 0.20068946870352847, "grad_norm": 1.4309337139129639, "learning_rate": 2.7863755257869592e-05, "loss": 0.288330078125, "step": 2969 }, { "epoch": 0.2007570636744626, "grad_norm": 1.663336992263794, "learning_rate": 2.7862057851603237e-05, "loss": 0.30047607421875, "step": 2970 }, { "epoch": 0.20082465864539678, "grad_norm": 1.3028206825256348, "learning_rate": 2.786035982299054e-05, "loss": 0.22235107421875, "step": 2971 }, { "epoch": 0.20089225361633095, "grad_norm": 1.6364004611968994, "learning_rate": 2.7858661172113654e-05, "loss": 0.213470458984375, "step": 2972 }, { "epoch": 0.2009598485872651, "grad_norm": 0.8476117849349976, "learning_rate": 2.785696189905477e-05, "loss": 0.16357421875, "step": 2973 }, { "epoch": 0.20102744355819926, "grad_norm": 1.2790766954421997, "learning_rate": 2.7855262003896118e-05, "loss": 0.2984619140625, "step": 2974 }, { "epoch": 0.20109503852913344, "grad_norm": 1.2136484384536743, "learning_rate": 2.785356148671994e-05, "loss": 0.1993408203125, "step": 2975 }, { "epoch": 0.2011626335000676, "grad_norm": 1.3380502462387085, "learning_rate": 2.7851860347608526e-05, "loss": 0.31365966796875, "step": 2976 }, { "epoch": 0.20123022847100175, "grad_norm": 1.572393536567688, "learning_rate": 2.7850158586644186e-05, "loss": 0.305694580078125, "step": 2977 }, { "epoch": 0.20129782344193592, "grad_norm": 1.4191819429397583, "learning_rate": 2.7848456203909252e-05, "loss": 0.296844482421875, "step": 2978 }, { "epoch": 0.2013654184128701, "grad_norm": 1.37727952003479, "learning_rate": 2.7846753199486106e-05, "loss": 0.30206298828125, "step": 2979 }, { "epoch": 0.20143301338380423, "grad_norm": 1.0746618509292603, "learning_rate": 2.784504957345715e-05, "loss": 0.228668212890625, "step": 2980 }, { "epoch": 0.2015006083547384, "grad_norm": 1.3032466173171997, "learning_rate": 2.784334532590481e-05, "loss": 0.20623779296875, "step": 2981 }, { "epoch": 0.20156820332567257, "grad_norm": 1.9822500944137573, "learning_rate": 2.7841640456911555e-05, "loss": 0.42645263671875, "step": 2982 }, { "epoch": 0.20163579829660674, "grad_norm": 1.112478494644165, "learning_rate": 2.7839934966559864e-05, "loss": 0.17559814453125, "step": 2983 }, { "epoch": 0.2017033932675409, "grad_norm": 0.7769802212715149, "learning_rate": 2.783822885493228e-05, "loss": 0.1598663330078125, "step": 2984 }, { "epoch": 0.20177098823847506, "grad_norm": 1.4630982875823975, "learning_rate": 2.783652212211134e-05, "loss": 0.3135986328125, "step": 2985 }, { "epoch": 0.20183858320940923, "grad_norm": 1.1081478595733643, "learning_rate": 2.7834814768179623e-05, "loss": 0.27447509765625, "step": 2986 }, { "epoch": 0.20190617818034337, "grad_norm": 1.5766047239303589, "learning_rate": 2.783310679321976e-05, "loss": 0.280517578125, "step": 2987 }, { "epoch": 0.20197377315127754, "grad_norm": 1.2169208526611328, "learning_rate": 2.7831398197314374e-05, "loss": 0.3311767578125, "step": 2988 }, { "epoch": 0.2020413681222117, "grad_norm": 2.0074872970581055, "learning_rate": 2.782968898054615e-05, "loss": 0.16112518310546875, "step": 2989 }, { "epoch": 0.20210896309314588, "grad_norm": 1.5451782941818237, "learning_rate": 2.782797914299778e-05, "loss": 0.287567138671875, "step": 2990 }, { "epoch": 0.20217655806408003, "grad_norm": 1.1551508903503418, "learning_rate": 2.782626868475201e-05, "loss": 0.28076171875, "step": 2991 }, { "epoch": 0.2022441530350142, "grad_norm": 1.6622004508972168, "learning_rate": 2.7824557605891595e-05, "loss": 0.266021728515625, "step": 2992 }, { "epoch": 0.20231174800594837, "grad_norm": 1.4370290040969849, "learning_rate": 2.7822845906499326e-05, "loss": 0.35919189453125, "step": 2993 }, { "epoch": 0.2023793429768825, "grad_norm": 1.3805313110351562, "learning_rate": 2.7821133586658025e-05, "loss": 0.21234130859375, "step": 2994 }, { "epoch": 0.20244693794781668, "grad_norm": 1.3549437522888184, "learning_rate": 2.781942064645055e-05, "loss": 0.262298583984375, "step": 2995 }, { "epoch": 0.20251453291875085, "grad_norm": 1.2861257791519165, "learning_rate": 2.781770708595978e-05, "loss": 0.20050048828125, "step": 2996 }, { "epoch": 0.20258212788968502, "grad_norm": 1.528303861618042, "learning_rate": 2.7815992905268628e-05, "loss": 0.27783203125, "step": 2997 }, { "epoch": 0.20264972286061916, "grad_norm": 1.2862571477890015, "learning_rate": 2.781427810446004e-05, "loss": 0.34368896484375, "step": 2998 }, { "epoch": 0.20271731783155333, "grad_norm": 0.8771893382072449, "learning_rate": 2.7812562683616986e-05, "loss": 0.1786651611328125, "step": 2999 }, { "epoch": 0.2027849128024875, "grad_norm": 1.6201391220092773, "learning_rate": 2.7810846642822466e-05, "loss": 0.3006591796875, "step": 3000 }, { "epoch": 0.20285250777342165, "grad_norm": 0.9605628848075867, "learning_rate": 2.7809129982159517e-05, "loss": 0.16497039794921875, "step": 3001 }, { "epoch": 0.20292010274435582, "grad_norm": 0.8553195595741272, "learning_rate": 2.78074127017112e-05, "loss": 0.1705474853515625, "step": 3002 }, { "epoch": 0.20298769771529, "grad_norm": 2.35498046875, "learning_rate": 2.7805694801560608e-05, "loss": 0.284637451171875, "step": 3003 }, { "epoch": 0.20305529268622413, "grad_norm": 1.2845224142074585, "learning_rate": 2.7803976281790864e-05, "loss": 0.27337646484375, "step": 3004 }, { "epoch": 0.2031228876571583, "grad_norm": 1.2923980951309204, "learning_rate": 2.780225714248512e-05, "loss": 0.3359375, "step": 3005 }, { "epoch": 0.20319048262809247, "grad_norm": 2.2007193565368652, "learning_rate": 2.7800537383726563e-05, "loss": 0.2745361328125, "step": 3006 }, { "epoch": 0.20325807759902664, "grad_norm": 1.1778916120529175, "learning_rate": 2.77988170055984e-05, "loss": 0.25872802734375, "step": 3007 }, { "epoch": 0.20332567256996079, "grad_norm": 1.3832025527954102, "learning_rate": 2.7797096008183874e-05, "loss": 0.1460113525390625, "step": 3008 }, { "epoch": 0.20339326754089496, "grad_norm": 2.0663907527923584, "learning_rate": 2.779537439156626e-05, "loss": 0.293670654296875, "step": 3009 }, { "epoch": 0.20346086251182913, "grad_norm": 1.6662791967391968, "learning_rate": 2.7793652155828858e-05, "loss": 0.2177734375, "step": 3010 }, { "epoch": 0.20352845748276327, "grad_norm": 2.264590263366699, "learning_rate": 2.7791929301054998e-05, "loss": 0.32733154296875, "step": 3011 }, { "epoch": 0.20359605245369744, "grad_norm": 1.6439130306243896, "learning_rate": 2.779020582732805e-05, "loss": 0.328704833984375, "step": 3012 }, { "epoch": 0.2036636474246316, "grad_norm": 1.7224677801132202, "learning_rate": 2.7788481734731403e-05, "loss": 0.284515380859375, "step": 3013 }, { "epoch": 0.20373124239556578, "grad_norm": 1.0323412418365479, "learning_rate": 2.778675702334848e-05, "loss": 0.1632843017578125, "step": 3014 }, { "epoch": 0.20379883736649992, "grad_norm": 0.8160326480865479, "learning_rate": 2.7785031693262732e-05, "loss": 0.158905029296875, "step": 3015 }, { "epoch": 0.2038664323374341, "grad_norm": 1.385682225227356, "learning_rate": 2.778330574455764e-05, "loss": 0.2354736328125, "step": 3016 }, { "epoch": 0.20393402730836827, "grad_norm": 1.3621443510055542, "learning_rate": 2.778157917731672e-05, "loss": 0.2681884765625, "step": 3017 }, { "epoch": 0.2040016222793024, "grad_norm": 1.070635199546814, "learning_rate": 2.777985199162351e-05, "loss": 0.13714599609375, "step": 3018 }, { "epoch": 0.20406921725023658, "grad_norm": 1.5796170234680176, "learning_rate": 2.777812418756158e-05, "loss": 0.27288818359375, "step": 3019 }, { "epoch": 0.20413681222117075, "grad_norm": 2.1795425415039062, "learning_rate": 2.777639576521454e-05, "loss": 0.20709228515625, "step": 3020 }, { "epoch": 0.20420440719210492, "grad_norm": 1.723026156425476, "learning_rate": 2.7774666724666015e-05, "loss": 0.22210693359375, "step": 3021 }, { "epoch": 0.20427200216303906, "grad_norm": 1.6226752996444702, "learning_rate": 2.777293706599967e-05, "loss": 0.2279510498046875, "step": 3022 }, { "epoch": 0.20433959713397323, "grad_norm": 1.1739999055862427, "learning_rate": 2.7771206789299198e-05, "loss": 0.1624603271484375, "step": 3023 }, { "epoch": 0.2044071921049074, "grad_norm": 0.8601580262184143, "learning_rate": 2.7769475894648315e-05, "loss": 0.17189788818359375, "step": 3024 }, { "epoch": 0.20447478707584155, "grad_norm": 1.6758430004119873, "learning_rate": 2.7767744382130775e-05, "loss": 0.33795166015625, "step": 3025 }, { "epoch": 0.20454238204677572, "grad_norm": 1.887425184249878, "learning_rate": 2.7766012251830364e-05, "loss": 0.2235565185546875, "step": 3026 }, { "epoch": 0.2046099770177099, "grad_norm": 3.356614828109741, "learning_rate": 2.7764279503830888e-05, "loss": 0.3309326171875, "step": 3027 }, { "epoch": 0.20467757198864406, "grad_norm": 1.0158472061157227, "learning_rate": 2.7762546138216187e-05, "loss": 0.25018310546875, "step": 3028 }, { "epoch": 0.2047451669595782, "grad_norm": 2.2599971294403076, "learning_rate": 2.7760812155070136e-05, "loss": 0.255401611328125, "step": 3029 }, { "epoch": 0.20481276193051237, "grad_norm": 1.314997673034668, "learning_rate": 2.7759077554476634e-05, "loss": 0.31427001953125, "step": 3030 }, { "epoch": 0.20488035690144654, "grad_norm": 1.5595966577529907, "learning_rate": 2.7757342336519615e-05, "loss": 0.2662353515625, "step": 3031 }, { "epoch": 0.20494795187238068, "grad_norm": 2.096590757369995, "learning_rate": 2.775560650128303e-05, "loss": 0.291168212890625, "step": 3032 }, { "epoch": 0.20501554684331486, "grad_norm": 1.6808075904846191, "learning_rate": 2.775387004885088e-05, "loss": 0.29473876953125, "step": 3033 }, { "epoch": 0.20508314181424903, "grad_norm": 1.579618215560913, "learning_rate": 2.775213297930718e-05, "loss": 0.27313232421875, "step": 3034 }, { "epoch": 0.20515073678518317, "grad_norm": 2.6905200481414795, "learning_rate": 2.7750395292735987e-05, "loss": 0.325836181640625, "step": 3035 }, { "epoch": 0.20521833175611734, "grad_norm": 1.4981231689453125, "learning_rate": 2.774865698922137e-05, "loss": 0.2037811279296875, "step": 3036 }, { "epoch": 0.2052859267270515, "grad_norm": 2.0175740718841553, "learning_rate": 2.7746918068847447e-05, "loss": 0.282958984375, "step": 3037 }, { "epoch": 0.20535352169798568, "grad_norm": 1.1687524318695068, "learning_rate": 2.7745178531698352e-05, "loss": 0.196075439453125, "step": 3038 }, { "epoch": 0.20542111666891982, "grad_norm": 1.8159571886062622, "learning_rate": 2.7743438377858267e-05, "loss": 0.186187744140625, "step": 3039 }, { "epoch": 0.205488711639854, "grad_norm": 1.5729869604110718, "learning_rate": 2.7741697607411376e-05, "loss": 0.1781005859375, "step": 3040 }, { "epoch": 0.20555630661078816, "grad_norm": 1.284554362297058, "learning_rate": 2.773995622044192e-05, "loss": 0.2268829345703125, "step": 3041 }, { "epoch": 0.2056239015817223, "grad_norm": 1.7305400371551514, "learning_rate": 2.7738214217034147e-05, "loss": 0.29388427734375, "step": 3042 }, { "epoch": 0.20569149655265648, "grad_norm": 1.7488229274749756, "learning_rate": 2.7736471597272355e-05, "loss": 0.302947998046875, "step": 3043 }, { "epoch": 0.20575909152359065, "grad_norm": 1.6515305042266846, "learning_rate": 2.7734728361240857e-05, "loss": 0.27935791015625, "step": 3044 }, { "epoch": 0.20582668649452482, "grad_norm": 2.045717239379883, "learning_rate": 2.773298450902401e-05, "loss": 0.3269195556640625, "step": 3045 }, { "epoch": 0.20589428146545896, "grad_norm": 1.6576989889144897, "learning_rate": 2.7731240040706183e-05, "loss": 0.208343505859375, "step": 3046 }, { "epoch": 0.20596187643639313, "grad_norm": 0.8053619861602783, "learning_rate": 2.7729494956371792e-05, "loss": 0.155120849609375, "step": 3047 }, { "epoch": 0.2060294714073273, "grad_norm": 2.041761875152588, "learning_rate": 2.7727749256105266e-05, "loss": 0.35748291015625, "step": 3048 }, { "epoch": 0.20609706637826145, "grad_norm": 1.073920488357544, "learning_rate": 2.7726002939991084e-05, "loss": 0.24615478515625, "step": 3049 }, { "epoch": 0.20616466134919562, "grad_norm": 1.3387781381607056, "learning_rate": 2.7724256008113733e-05, "loss": 0.2018585205078125, "step": 3050 }, { "epoch": 0.2062322563201298, "grad_norm": 1.232421636581421, "learning_rate": 2.7722508460557742e-05, "loss": 0.152496337890625, "step": 3051 }, { "epoch": 0.20629985129106396, "grad_norm": 1.0609955787658691, "learning_rate": 2.7720760297407675e-05, "loss": 0.2294921875, "step": 3052 }, { "epoch": 0.2063674462619981, "grad_norm": 1.4580504894256592, "learning_rate": 2.7719011518748117e-05, "loss": 0.257537841796875, "step": 3053 }, { "epoch": 0.20643504123293227, "grad_norm": 1.2768754959106445, "learning_rate": 2.771726212466368e-05, "loss": 0.2543792724609375, "step": 3054 }, { "epoch": 0.20650263620386644, "grad_norm": 0.9234589338302612, "learning_rate": 2.7715512115239014e-05, "loss": 0.225128173828125, "step": 3055 }, { "epoch": 0.20657023117480058, "grad_norm": 1.353737711906433, "learning_rate": 2.7713761490558798e-05, "loss": 0.169464111328125, "step": 3056 }, { "epoch": 0.20663782614573475, "grad_norm": 1.1098356246948242, "learning_rate": 2.7712010250707735e-05, "loss": 0.349945068359375, "step": 3057 }, { "epoch": 0.20670542111666892, "grad_norm": 2.4934797286987305, "learning_rate": 2.771025839577056e-05, "loss": 0.3560791015625, "step": 3058 }, { "epoch": 0.2067730160876031, "grad_norm": 1.1818865537643433, "learning_rate": 2.7708505925832034e-05, "loss": 0.26446533203125, "step": 3059 }, { "epoch": 0.20684061105853724, "grad_norm": 1.0885298252105713, "learning_rate": 2.7706752840976966e-05, "loss": 0.260498046875, "step": 3060 }, { "epoch": 0.2069082060294714, "grad_norm": 1.481937050819397, "learning_rate": 2.770499914129017e-05, "loss": 0.30255126953125, "step": 3061 }, { "epoch": 0.20697580100040558, "grad_norm": 1.2960050106048584, "learning_rate": 2.7703244826856504e-05, "loss": 0.304962158203125, "step": 3062 }, { "epoch": 0.20704339597133972, "grad_norm": 1.1241214275360107, "learning_rate": 2.7701489897760848e-05, "loss": 0.209625244140625, "step": 3063 }, { "epoch": 0.2071109909422739, "grad_norm": 1.3436243534088135, "learning_rate": 2.7699734354088125e-05, "loss": 0.205810546875, "step": 3064 }, { "epoch": 0.20717858591320806, "grad_norm": 1.5675525665283203, "learning_rate": 2.7697978195923276e-05, "loss": 0.253997802734375, "step": 3065 }, { "epoch": 0.2072461808841422, "grad_norm": 1.6100279092788696, "learning_rate": 2.7696221423351277e-05, "loss": 0.275604248046875, "step": 3066 }, { "epoch": 0.20731377585507638, "grad_norm": 1.4587658643722534, "learning_rate": 2.769446403645712e-05, "loss": 0.229156494140625, "step": 3067 }, { "epoch": 0.20738137082601055, "grad_norm": 1.1062474250793457, "learning_rate": 2.7692706035325854e-05, "loss": 0.2305908203125, "step": 3068 }, { "epoch": 0.20744896579694472, "grad_norm": 1.3262698650360107, "learning_rate": 2.7690947420042535e-05, "loss": 0.227569580078125, "step": 3069 }, { "epoch": 0.20751656076787886, "grad_norm": 1.7397228479385376, "learning_rate": 2.7689188190692253e-05, "loss": 0.243011474609375, "step": 3070 }, { "epoch": 0.20758415573881303, "grad_norm": 0.9434125423431396, "learning_rate": 2.768742834736013e-05, "loss": 0.17235565185546875, "step": 3071 }, { "epoch": 0.2076517507097472, "grad_norm": 1.1834591627120972, "learning_rate": 2.768566789013133e-05, "loss": 0.205535888671875, "step": 3072 }, { "epoch": 0.20771934568068134, "grad_norm": 2.5195610523223877, "learning_rate": 2.7683906819091022e-05, "loss": 0.290863037109375, "step": 3073 }, { "epoch": 0.20778694065161551, "grad_norm": 2.1656837463378906, "learning_rate": 2.7682145134324416e-05, "loss": 0.22888946533203125, "step": 3074 }, { "epoch": 0.20785453562254969, "grad_norm": 2.381791591644287, "learning_rate": 2.7680382835916766e-05, "loss": 0.306610107421875, "step": 3075 }, { "epoch": 0.20792213059348386, "grad_norm": 1.2014641761779785, "learning_rate": 2.7678619923953336e-05, "loss": 0.2587432861328125, "step": 3076 }, { "epoch": 0.207989725564418, "grad_norm": 1.3000106811523438, "learning_rate": 2.7676856398519422e-05, "loss": 0.13140869140625, "step": 3077 }, { "epoch": 0.20805732053535217, "grad_norm": 1.4041941165924072, "learning_rate": 2.767509225970036e-05, "loss": 0.2979736328125, "step": 3078 }, { "epoch": 0.20812491550628634, "grad_norm": 2.8119678497314453, "learning_rate": 2.767332750758151e-05, "loss": 0.358306884765625, "step": 3079 }, { "epoch": 0.20819251047722048, "grad_norm": 1.1649421453475952, "learning_rate": 2.767156214224826e-05, "loss": 0.1966552734375, "step": 3080 }, { "epoch": 0.20826010544815465, "grad_norm": 1.5086886882781982, "learning_rate": 2.766979616378603e-05, "loss": 0.30865478515625, "step": 3081 }, { "epoch": 0.20832770041908882, "grad_norm": 1.3136252164840698, "learning_rate": 2.766802957228027e-05, "loss": 0.2512664794921875, "step": 3082 }, { "epoch": 0.208395295390023, "grad_norm": 1.1101046800613403, "learning_rate": 2.7666262367816457e-05, "loss": 0.3143310546875, "step": 3083 }, { "epoch": 0.20846289036095714, "grad_norm": 3.317004442214966, "learning_rate": 2.7664494550480098e-05, "loss": 0.353546142578125, "step": 3084 }, { "epoch": 0.2085304853318913, "grad_norm": 2.3064746856689453, "learning_rate": 2.766272612035674e-05, "loss": 0.2490234375, "step": 3085 }, { "epoch": 0.20859808030282548, "grad_norm": 1.9107741117477417, "learning_rate": 2.766095707753194e-05, "loss": 0.27490234375, "step": 3086 }, { "epoch": 0.20866567527375962, "grad_norm": 3.6690523624420166, "learning_rate": 2.7659187422091295e-05, "loss": 0.32470703125, "step": 3087 }, { "epoch": 0.2087332702446938, "grad_norm": 1.3331172466278076, "learning_rate": 2.765741715412044e-05, "loss": 0.247406005859375, "step": 3088 }, { "epoch": 0.20880086521562796, "grad_norm": 1.119168758392334, "learning_rate": 2.7655646273705025e-05, "loss": 0.15613555908203125, "step": 3089 }, { "epoch": 0.20886846018656213, "grad_norm": 1.4291871786117554, "learning_rate": 2.765387478093074e-05, "loss": 0.31927490234375, "step": 3090 }, { "epoch": 0.20893605515749628, "grad_norm": 1.1600524187088013, "learning_rate": 2.76521026758833e-05, "loss": 0.248931884765625, "step": 3091 }, { "epoch": 0.20900365012843045, "grad_norm": 1.353184700012207, "learning_rate": 2.7650329958648452e-05, "loss": 0.2275390625, "step": 3092 }, { "epoch": 0.20907124509936462, "grad_norm": 1.1355549097061157, "learning_rate": 2.7648556629311962e-05, "loss": 0.2083282470703125, "step": 3093 }, { "epoch": 0.20913884007029876, "grad_norm": 1.6484895944595337, "learning_rate": 2.7646782687959647e-05, "loss": 0.2589111328125, "step": 3094 }, { "epoch": 0.20920643504123293, "grad_norm": 2.188560724258423, "learning_rate": 2.7645008134677336e-05, "loss": 0.2166748046875, "step": 3095 }, { "epoch": 0.2092740300121671, "grad_norm": 1.2228665351867676, "learning_rate": 2.764323296955089e-05, "loss": 0.236053466796875, "step": 3096 }, { "epoch": 0.20934162498310127, "grad_norm": 1.6993906497955322, "learning_rate": 2.764145719266621e-05, "loss": 0.234283447265625, "step": 3097 }, { "epoch": 0.2094092199540354, "grad_norm": 1.3735225200653076, "learning_rate": 2.763968080410921e-05, "loss": 0.31243896484375, "step": 3098 }, { "epoch": 0.20947681492496958, "grad_norm": 1.0370370149612427, "learning_rate": 2.7637903803965852e-05, "loss": 0.24853515625, "step": 3099 }, { "epoch": 0.20954440989590375, "grad_norm": 1.8058257102966309, "learning_rate": 2.7636126192322113e-05, "loss": 0.312652587890625, "step": 3100 }, { "epoch": 0.2096120048668379, "grad_norm": 1.118107795715332, "learning_rate": 2.7634347969264002e-05, "loss": 0.22174072265625, "step": 3101 }, { "epoch": 0.20967959983777207, "grad_norm": 2.3857369422912598, "learning_rate": 2.7632569134877568e-05, "loss": 0.365203857421875, "step": 3102 }, { "epoch": 0.20974719480870624, "grad_norm": 2.885838031768799, "learning_rate": 2.7630789689248873e-05, "loss": 0.29937744140625, "step": 3103 }, { "epoch": 0.20981478977964038, "grad_norm": 1.8370766639709473, "learning_rate": 2.762900963246403e-05, "loss": 0.2427520751953125, "step": 3104 }, { "epoch": 0.20988238475057455, "grad_norm": 2.051445960998535, "learning_rate": 2.7627228964609156e-05, "loss": 0.36083984375, "step": 3105 }, { "epoch": 0.20994997972150872, "grad_norm": 1.1339527368545532, "learning_rate": 2.7625447685770417e-05, "loss": 0.24786376953125, "step": 3106 }, { "epoch": 0.2100175746924429, "grad_norm": 0.9814838767051697, "learning_rate": 2.7623665796034004e-05, "loss": 0.2076873779296875, "step": 3107 }, { "epoch": 0.21008516966337704, "grad_norm": 1.0505998134613037, "learning_rate": 2.7621883295486137e-05, "loss": 0.195831298828125, "step": 3108 }, { "epoch": 0.2101527646343112, "grad_norm": 1.283959984779358, "learning_rate": 2.7620100184213054e-05, "loss": 0.261566162109375, "step": 3109 }, { "epoch": 0.21022035960524538, "grad_norm": 2.0259792804718018, "learning_rate": 2.761831646230105e-05, "loss": 0.245635986328125, "step": 3110 }, { "epoch": 0.21028795457617952, "grad_norm": 1.0596877336502075, "learning_rate": 2.7616532129836415e-05, "loss": 0.2213592529296875, "step": 3111 }, { "epoch": 0.2103555495471137, "grad_norm": 0.6845117211341858, "learning_rate": 2.7614747186905498e-05, "loss": 0.144561767578125, "step": 3112 }, { "epoch": 0.21042314451804786, "grad_norm": 1.3888213634490967, "learning_rate": 2.7612961633594663e-05, "loss": 0.2786865234375, "step": 3113 }, { "epoch": 0.21049073948898203, "grad_norm": 2.0557830333709717, "learning_rate": 2.76111754699903e-05, "loss": 0.3370361328125, "step": 3114 }, { "epoch": 0.21055833445991617, "grad_norm": 3.318871259689331, "learning_rate": 2.7609388696178843e-05, "loss": 0.31231689453125, "step": 3115 }, { "epoch": 0.21062592943085034, "grad_norm": 1.8951735496520996, "learning_rate": 2.7607601312246745e-05, "loss": 0.270294189453125, "step": 3116 }, { "epoch": 0.21069352440178452, "grad_norm": 0.9964572787284851, "learning_rate": 2.760581331828049e-05, "loss": 0.212188720703125, "step": 3117 }, { "epoch": 0.21076111937271866, "grad_norm": 1.6272512674331665, "learning_rate": 2.760402471436659e-05, "loss": 0.227386474609375, "step": 3118 }, { "epoch": 0.21082871434365283, "grad_norm": 1.4719548225402832, "learning_rate": 2.760223550059159e-05, "loss": 0.27197265625, "step": 3119 }, { "epoch": 0.210896309314587, "grad_norm": 1.415050745010376, "learning_rate": 2.760044567704206e-05, "loss": 0.3001708984375, "step": 3120 }, { "epoch": 0.21096390428552117, "grad_norm": 2.2457680702209473, "learning_rate": 2.7598655243804618e-05, "loss": 0.205322265625, "step": 3121 }, { "epoch": 0.2110314992564553, "grad_norm": 2.0299084186553955, "learning_rate": 2.7596864200965873e-05, "loss": 0.220245361328125, "step": 3122 }, { "epoch": 0.21109909422738948, "grad_norm": 0.7735278606414795, "learning_rate": 2.759507254861251e-05, "loss": 0.1708831787109375, "step": 3123 }, { "epoch": 0.21116668919832365, "grad_norm": 1.689052939414978, "learning_rate": 2.7593280286831205e-05, "loss": 0.26153564453125, "step": 3124 }, { "epoch": 0.2112342841692578, "grad_norm": 1.6848313808441162, "learning_rate": 2.759148741570868e-05, "loss": 0.229949951171875, "step": 3125 }, { "epoch": 0.21130187914019197, "grad_norm": 1.8624118566513062, "learning_rate": 2.758969393533169e-05, "loss": 0.241455078125, "step": 3126 }, { "epoch": 0.21136947411112614, "grad_norm": 1.5826648473739624, "learning_rate": 2.7587899845787014e-05, "loss": 0.27001953125, "step": 3127 }, { "epoch": 0.2114370690820603, "grad_norm": 1.7623766660690308, "learning_rate": 2.7586105147161458e-05, "loss": 0.23968505859375, "step": 3128 }, { "epoch": 0.21150466405299445, "grad_norm": 1.1988030672073364, "learning_rate": 2.7584309839541867e-05, "loss": 0.2850341796875, "step": 3129 }, { "epoch": 0.21157225902392862, "grad_norm": 1.5710123777389526, "learning_rate": 2.75825139230151e-05, "loss": 0.29266357421875, "step": 3130 }, { "epoch": 0.2116398539948628, "grad_norm": 1.406218409538269, "learning_rate": 2.7580717397668065e-05, "loss": 0.2568206787109375, "step": 3131 }, { "epoch": 0.21170744896579693, "grad_norm": 1.0251625776290894, "learning_rate": 2.7578920263587677e-05, "loss": 0.22088623046875, "step": 3132 }, { "epoch": 0.2117750439367311, "grad_norm": 2.14902400970459, "learning_rate": 2.7577122520860906e-05, "loss": 0.283111572265625, "step": 3133 }, { "epoch": 0.21184263890766528, "grad_norm": 0.9124493598937988, "learning_rate": 2.7575324169574725e-05, "loss": 0.22711181640625, "step": 3134 }, { "epoch": 0.21191023387859942, "grad_norm": 1.1833627223968506, "learning_rate": 2.757352520981616e-05, "loss": 0.3282470703125, "step": 3135 }, { "epoch": 0.2119778288495336, "grad_norm": 0.9867165684700012, "learning_rate": 2.757172564167225e-05, "loss": 0.15740966796875, "step": 3136 }, { "epoch": 0.21204542382046776, "grad_norm": 1.9519026279449463, "learning_rate": 2.7569925465230068e-05, "loss": 0.342681884765625, "step": 3137 }, { "epoch": 0.21211301879140193, "grad_norm": 1.1435661315917969, "learning_rate": 2.7568124680576726e-05, "loss": 0.11504364013671875, "step": 3138 }, { "epoch": 0.21218061376233607, "grad_norm": 1.6338599920272827, "learning_rate": 2.7566323287799346e-05, "loss": 0.30523681640625, "step": 3139 }, { "epoch": 0.21224820873327024, "grad_norm": 0.9880052804946899, "learning_rate": 2.75645212869851e-05, "loss": 0.2310791015625, "step": 3140 }, { "epoch": 0.21231580370420441, "grad_norm": 1.6710160970687866, "learning_rate": 2.7562718678221177e-05, "loss": 0.18341064453125, "step": 3141 }, { "epoch": 0.21238339867513856, "grad_norm": 0.8644048571586609, "learning_rate": 2.7560915461594792e-05, "loss": 0.2044677734375, "step": 3142 }, { "epoch": 0.21245099364607273, "grad_norm": 1.1434268951416016, "learning_rate": 2.7559111637193205e-05, "loss": 0.23046875, "step": 3143 }, { "epoch": 0.2125185886170069, "grad_norm": 0.8855318427085876, "learning_rate": 2.7557307205103692e-05, "loss": 0.16609954833984375, "step": 3144 }, { "epoch": 0.21258618358794107, "grad_norm": 1.1672061681747437, "learning_rate": 2.7555502165413567e-05, "loss": 0.26690673828125, "step": 3145 }, { "epoch": 0.2126537785588752, "grad_norm": 1.5974111557006836, "learning_rate": 2.7553696518210165e-05, "loss": 0.34783935546875, "step": 3146 }, { "epoch": 0.21272137352980938, "grad_norm": 1.0526032447814941, "learning_rate": 2.7551890263580853e-05, "loss": 0.29901123046875, "step": 3147 }, { "epoch": 0.21278896850074355, "grad_norm": 1.7965915203094482, "learning_rate": 2.7550083401613028e-05, "loss": 0.245941162109375, "step": 3148 }, { "epoch": 0.2128565634716777, "grad_norm": 1.2975925207138062, "learning_rate": 2.7548275932394122e-05, "loss": 0.30859375, "step": 3149 }, { "epoch": 0.21292415844261187, "grad_norm": 1.638766884803772, "learning_rate": 2.754646785601159e-05, "loss": 0.2374114990234375, "step": 3150 }, { "epoch": 0.21299175341354604, "grad_norm": 1.5525356531143188, "learning_rate": 2.7544659172552912e-05, "loss": 0.228179931640625, "step": 3151 }, { "epoch": 0.2130593483844802, "grad_norm": 1.6365574598312378, "learning_rate": 2.7542849882105618e-05, "loss": 0.291229248046875, "step": 3152 }, { "epoch": 0.21312694335541435, "grad_norm": 2.0093812942504883, "learning_rate": 2.7541039984757238e-05, "loss": 0.2777099609375, "step": 3153 }, { "epoch": 0.21319453832634852, "grad_norm": 1.1183929443359375, "learning_rate": 2.7539229480595356e-05, "loss": 0.203857421875, "step": 3154 }, { "epoch": 0.2132621332972827, "grad_norm": 1.4732486009597778, "learning_rate": 2.7537418369707568e-05, "loss": 0.21148681640625, "step": 3155 }, { "epoch": 0.21332972826821683, "grad_norm": 1.393689751625061, "learning_rate": 2.753560665218151e-05, "loss": 0.24945068359375, "step": 3156 }, { "epoch": 0.213397323239151, "grad_norm": 1.4718443155288696, "learning_rate": 2.7533794328104844e-05, "loss": 0.2205810546875, "step": 3157 }, { "epoch": 0.21346491821008517, "grad_norm": 0.9263085722923279, "learning_rate": 2.753198139756526e-05, "loss": 0.214202880859375, "step": 3158 }, { "epoch": 0.21353251318101935, "grad_norm": 0.9422867894172668, "learning_rate": 2.753016786065048e-05, "loss": 0.15244674682617188, "step": 3159 }, { "epoch": 0.2136001081519535, "grad_norm": 2.4992706775665283, "learning_rate": 2.7528353717448257e-05, "loss": 0.28485107421875, "step": 3160 }, { "epoch": 0.21366770312288766, "grad_norm": 1.153867244720459, "learning_rate": 2.752653896804637e-05, "loss": 0.26251220703125, "step": 3161 }, { "epoch": 0.21373529809382183, "grad_norm": 1.7744866609573364, "learning_rate": 2.7524723612532625e-05, "loss": 0.2811737060546875, "step": 3162 }, { "epoch": 0.21380289306475597, "grad_norm": 1.1896413564682007, "learning_rate": 2.752290765099486e-05, "loss": 0.31243896484375, "step": 3163 }, { "epoch": 0.21387048803569014, "grad_norm": 1.1176813840866089, "learning_rate": 2.7521091083520944e-05, "loss": 0.2525634765625, "step": 3164 }, { "epoch": 0.2139380830066243, "grad_norm": 1.7243151664733887, "learning_rate": 2.7519273910198775e-05, "loss": 0.2879638671875, "step": 3165 }, { "epoch": 0.21400567797755846, "grad_norm": 0.9016150832176208, "learning_rate": 2.7517456131116274e-05, "loss": 0.243988037109375, "step": 3166 }, { "epoch": 0.21407327294849263, "grad_norm": 1.5210812091827393, "learning_rate": 2.75156377463614e-05, "loss": 0.33660888671875, "step": 3167 }, { "epoch": 0.2141408679194268, "grad_norm": 0.9680485725402832, "learning_rate": 2.7513818756022144e-05, "loss": 0.276519775390625, "step": 3168 }, { "epoch": 0.21420846289036097, "grad_norm": 1.5494023561477661, "learning_rate": 2.7511999160186508e-05, "loss": 0.2781982421875, "step": 3169 }, { "epoch": 0.2142760578612951, "grad_norm": 0.9323601126670837, "learning_rate": 2.7510178958942543e-05, "loss": 0.21075439453125, "step": 3170 }, { "epoch": 0.21434365283222928, "grad_norm": 1.0807310342788696, "learning_rate": 2.7508358152378324e-05, "loss": 0.1979827880859375, "step": 3171 }, { "epoch": 0.21441124780316345, "grad_norm": 2.753734827041626, "learning_rate": 2.7506536740581943e-05, "loss": 0.238677978515625, "step": 3172 }, { "epoch": 0.2144788427740976, "grad_norm": 2.478907585144043, "learning_rate": 2.750471472364154e-05, "loss": 0.338165283203125, "step": 3173 }, { "epoch": 0.21454643774503177, "grad_norm": 1.5902025699615479, "learning_rate": 2.750289210164527e-05, "loss": 0.3563232421875, "step": 3174 }, { "epoch": 0.21461403271596594, "grad_norm": 0.9496876001358032, "learning_rate": 2.7501068874681328e-05, "loss": 0.22943115234375, "step": 3175 }, { "epoch": 0.2146816276869001, "grad_norm": 1.2325048446655273, "learning_rate": 2.749924504283793e-05, "loss": 0.26385498046875, "step": 3176 }, { "epoch": 0.21474922265783425, "grad_norm": 1.0696300268173218, "learning_rate": 2.749742060620333e-05, "loss": 0.3028564453125, "step": 3177 }, { "epoch": 0.21481681762876842, "grad_norm": 1.5189794301986694, "learning_rate": 2.7495595564865796e-05, "loss": 0.260772705078125, "step": 3178 }, { "epoch": 0.2148844125997026, "grad_norm": 1.1068780422210693, "learning_rate": 2.7493769918913633e-05, "loss": 0.22662353515625, "step": 3179 }, { "epoch": 0.21495200757063673, "grad_norm": 1.4226216077804565, "learning_rate": 2.749194366843519e-05, "loss": 0.3079833984375, "step": 3180 }, { "epoch": 0.2150196025415709, "grad_norm": 1.7201367616653442, "learning_rate": 2.749011681351883e-05, "loss": 0.257080078125, "step": 3181 }, { "epoch": 0.21508719751250507, "grad_norm": 1.1328202486038208, "learning_rate": 2.7488289354252935e-05, "loss": 0.202392578125, "step": 3182 }, { "epoch": 0.21515479248343924, "grad_norm": 1.3706669807434082, "learning_rate": 2.748646129072594e-05, "loss": 0.275482177734375, "step": 3183 }, { "epoch": 0.2152223874543734, "grad_norm": 0.6573779582977295, "learning_rate": 2.7484632623026294e-05, "loss": 0.097442626953125, "step": 3184 }, { "epoch": 0.21528998242530756, "grad_norm": 2.244718074798584, "learning_rate": 2.7482803351242488e-05, "loss": 0.27947998046875, "step": 3185 }, { "epoch": 0.21535757739624173, "grad_norm": 1.6907705068588257, "learning_rate": 2.7480973475463018e-05, "loss": 0.33428955078125, "step": 3186 }, { "epoch": 0.21542517236717587, "grad_norm": 1.1177743673324585, "learning_rate": 2.7479142995776436e-05, "loss": 0.240875244140625, "step": 3187 }, { "epoch": 0.21549276733811004, "grad_norm": 1.872591495513916, "learning_rate": 2.7477311912271312e-05, "loss": 0.278656005859375, "step": 3188 }, { "epoch": 0.2155603623090442, "grad_norm": 1.1538593769073486, "learning_rate": 2.7475480225036237e-05, "loss": 0.31890869140625, "step": 3189 }, { "epoch": 0.21562795727997838, "grad_norm": 0.8730750679969788, "learning_rate": 2.7473647934159843e-05, "loss": 0.184967041015625, "step": 3190 }, { "epoch": 0.21569555225091253, "grad_norm": 1.2812044620513916, "learning_rate": 2.7471815039730793e-05, "loss": 0.2418212890625, "step": 3191 }, { "epoch": 0.2157631472218467, "grad_norm": 1.4699276685714722, "learning_rate": 2.746998154183777e-05, "loss": 0.251251220703125, "step": 3192 }, { "epoch": 0.21583074219278087, "grad_norm": 1.1042951345443726, "learning_rate": 2.7468147440569487e-05, "loss": 0.232147216796875, "step": 3193 }, { "epoch": 0.215898337163715, "grad_norm": 1.2707401514053345, "learning_rate": 2.7466312736014698e-05, "loss": 0.314056396484375, "step": 3194 }, { "epoch": 0.21596593213464918, "grad_norm": 1.5514962673187256, "learning_rate": 2.7464477428262164e-05, "loss": 0.30438232421875, "step": 3195 }, { "epoch": 0.21603352710558335, "grad_norm": 1.3548353910446167, "learning_rate": 2.7462641517400703e-05, "loss": 0.273223876953125, "step": 3196 }, { "epoch": 0.2161011220765175, "grad_norm": 1.809556007385254, "learning_rate": 2.7460805003519137e-05, "loss": 0.27020263671875, "step": 3197 }, { "epoch": 0.21616871704745166, "grad_norm": 0.823651134967804, "learning_rate": 2.7458967886706336e-05, "loss": 0.11325454711914062, "step": 3198 }, { "epoch": 0.21623631201838583, "grad_norm": 1.8042936325073242, "learning_rate": 2.7457130167051184e-05, "loss": 0.2262115478515625, "step": 3199 }, { "epoch": 0.21630390698932, "grad_norm": 1.408479928970337, "learning_rate": 2.7455291844642607e-05, "loss": 0.170135498046875, "step": 3200 }, { "epoch": 0.21637150196025415, "grad_norm": 1.0530651807785034, "learning_rate": 2.7453452919569548e-05, "loss": 0.1170806884765625, "step": 3201 }, { "epoch": 0.21643909693118832, "grad_norm": 1.4255114793777466, "learning_rate": 2.745161339192099e-05, "loss": 0.2589111328125, "step": 3202 }, { "epoch": 0.2165066919021225, "grad_norm": 2.0011439323425293, "learning_rate": 2.7449773261785943e-05, "loss": 0.351318359375, "step": 3203 }, { "epoch": 0.21657428687305663, "grad_norm": 3.591648578643799, "learning_rate": 2.744793252925344e-05, "loss": 0.32275390625, "step": 3204 }, { "epoch": 0.2166418818439908, "grad_norm": 2.1597137451171875, "learning_rate": 2.744609119441255e-05, "loss": 0.350830078125, "step": 3205 }, { "epoch": 0.21670947681492497, "grad_norm": 1.315207600593567, "learning_rate": 2.7444249257352368e-05, "loss": 0.26861572265625, "step": 3206 }, { "epoch": 0.21677707178585914, "grad_norm": 1.6855652332305908, "learning_rate": 2.744240671816201e-05, "loss": 0.24542236328125, "step": 3207 }, { "epoch": 0.2168446667567933, "grad_norm": 1.558255672454834, "learning_rate": 2.744056357693064e-05, "loss": 0.2343292236328125, "step": 3208 }, { "epoch": 0.21691226172772746, "grad_norm": 1.053929090499878, "learning_rate": 2.743871983374744e-05, "loss": 0.27459716796875, "step": 3209 }, { "epoch": 0.21697985669866163, "grad_norm": 1.222177267074585, "learning_rate": 2.743687548870162e-05, "loss": 0.25421142578125, "step": 3210 }, { "epoch": 0.21704745166959577, "grad_norm": 0.8057919144630432, "learning_rate": 2.7435030541882412e-05, "loss": 0.19500732421875, "step": 3211 }, { "epoch": 0.21711504664052994, "grad_norm": 1.7515558004379272, "learning_rate": 2.74331849933791e-05, "loss": 0.31646728515625, "step": 3212 }, { "epoch": 0.2171826416114641, "grad_norm": 2.1505227088928223, "learning_rate": 2.7431338843280974e-05, "loss": 0.399658203125, "step": 3213 }, { "epoch": 0.21725023658239828, "grad_norm": 1.5683099031448364, "learning_rate": 2.7429492091677365e-05, "loss": 0.340667724609375, "step": 3214 }, { "epoch": 0.21731783155333242, "grad_norm": 0.9946038126945496, "learning_rate": 2.7427644738657635e-05, "loss": 0.1167755126953125, "step": 3215 }, { "epoch": 0.2173854265242666, "grad_norm": 1.7468148469924927, "learning_rate": 2.742579678431116e-05, "loss": 0.25933837890625, "step": 3216 }, { "epoch": 0.21745302149520077, "grad_norm": 1.774088978767395, "learning_rate": 2.742394822872736e-05, "loss": 0.34149169921875, "step": 3217 }, { "epoch": 0.2175206164661349, "grad_norm": 2.531818389892578, "learning_rate": 2.742209907199569e-05, "loss": 0.341796875, "step": 3218 }, { "epoch": 0.21758821143706908, "grad_norm": 2.0791213512420654, "learning_rate": 2.7420249314205608e-05, "loss": 0.240325927734375, "step": 3219 }, { "epoch": 0.21765580640800325, "grad_norm": 1.3034981489181519, "learning_rate": 2.7418398955446625e-05, "loss": 0.26348876953125, "step": 3220 }, { "epoch": 0.21772340137893742, "grad_norm": 0.9935275912284851, "learning_rate": 2.741654799580827e-05, "loss": 0.135955810546875, "step": 3221 }, { "epoch": 0.21779099634987156, "grad_norm": 1.140586495399475, "learning_rate": 2.7414696435380108e-05, "loss": 0.26715087890625, "step": 3222 }, { "epoch": 0.21785859132080573, "grad_norm": 1.285272479057312, "learning_rate": 2.7412844274251724e-05, "loss": 0.2783203125, "step": 3223 }, { "epoch": 0.2179261862917399, "grad_norm": 2.1172776222229004, "learning_rate": 2.741099151251274e-05, "loss": 0.30242919921875, "step": 3224 }, { "epoch": 0.21799378126267405, "grad_norm": 2.381802558898926, "learning_rate": 2.7409138150252805e-05, "loss": 0.308837890625, "step": 3225 }, { "epoch": 0.21806137623360822, "grad_norm": 1.7878936529159546, "learning_rate": 2.7407284187561593e-05, "loss": 0.246551513671875, "step": 3226 }, { "epoch": 0.2181289712045424, "grad_norm": 1.3764809370040894, "learning_rate": 2.7405429624528813e-05, "loss": 0.17291259765625, "step": 3227 }, { "epoch": 0.21819656617547656, "grad_norm": 2.27350115776062, "learning_rate": 2.74035744612442e-05, "loss": 0.36541748046875, "step": 3228 }, { "epoch": 0.2182641611464107, "grad_norm": 1.1778372526168823, "learning_rate": 2.7401718697797514e-05, "loss": 0.28753662109375, "step": 3229 }, { "epoch": 0.21833175611734487, "grad_norm": 0.9928640723228455, "learning_rate": 2.7399862334278555e-05, "loss": 0.281280517578125, "step": 3230 }, { "epoch": 0.21839935108827904, "grad_norm": 0.8485392332077026, "learning_rate": 2.739800537077714e-05, "loss": 0.2333221435546875, "step": 3231 }, { "epoch": 0.21846694605921319, "grad_norm": 3.4603610038757324, "learning_rate": 2.7396147807383127e-05, "loss": 0.328216552734375, "step": 3232 }, { "epoch": 0.21853454103014736, "grad_norm": 1.142613410949707, "learning_rate": 2.739428964418639e-05, "loss": 0.3157958984375, "step": 3233 }, { "epoch": 0.21860213600108153, "grad_norm": 1.481437087059021, "learning_rate": 2.739243088127684e-05, "loss": 0.320404052734375, "step": 3234 }, { "epoch": 0.21866973097201567, "grad_norm": 0.8842440843582153, "learning_rate": 2.7390571518744416e-05, "loss": 0.2030487060546875, "step": 3235 }, { "epoch": 0.21873732594294984, "grad_norm": 1.2192671298980713, "learning_rate": 2.7388711556679087e-05, "loss": 0.221405029296875, "step": 3236 }, { "epoch": 0.218804920913884, "grad_norm": 1.563705325126648, "learning_rate": 2.7386850995170848e-05, "loss": 0.290252685546875, "step": 3237 }, { "epoch": 0.21887251588481818, "grad_norm": 1.574195146560669, "learning_rate": 2.7384989834309722e-05, "loss": 0.25341796875, "step": 3238 }, { "epoch": 0.21894011085575232, "grad_norm": 1.6826961040496826, "learning_rate": 2.7383128074185767e-05, "loss": 0.259063720703125, "step": 3239 }, { "epoch": 0.2190077058266865, "grad_norm": 2.48868989944458, "learning_rate": 2.738126571488907e-05, "loss": 0.281707763671875, "step": 3240 }, { "epoch": 0.21907530079762066, "grad_norm": 1.0987482070922852, "learning_rate": 2.737940275650974e-05, "loss": 0.213836669921875, "step": 3241 }, { "epoch": 0.2191428957685548, "grad_norm": 1.1603307723999023, "learning_rate": 2.7377539199137914e-05, "loss": 0.26300048828125, "step": 3242 }, { "epoch": 0.21921049073948898, "grad_norm": 1.0040544271469116, "learning_rate": 2.7375675042863765e-05, "loss": 0.211212158203125, "step": 3243 }, { "epoch": 0.21927808571042315, "grad_norm": 1.3256596326828003, "learning_rate": 2.73738102877775e-05, "loss": 0.225341796875, "step": 3244 }, { "epoch": 0.21934568068135732, "grad_norm": 1.0128886699676514, "learning_rate": 2.7371944933969334e-05, "loss": 0.2392578125, "step": 3245 }, { "epoch": 0.21941327565229146, "grad_norm": 0.7815588116645813, "learning_rate": 2.737007898152954e-05, "loss": 0.18628692626953125, "step": 3246 }, { "epoch": 0.21948087062322563, "grad_norm": 3.4089503288269043, "learning_rate": 2.7368212430548388e-05, "loss": 0.37103271484375, "step": 3247 }, { "epoch": 0.2195484655941598, "grad_norm": 1.2589834928512573, "learning_rate": 2.7366345281116207e-05, "loss": 0.291473388671875, "step": 3248 }, { "epoch": 0.21961606056509395, "grad_norm": 0.9628674387931824, "learning_rate": 2.7364477533323337e-05, "loss": 0.175048828125, "step": 3249 }, { "epoch": 0.21968365553602812, "grad_norm": 1.2077562808990479, "learning_rate": 2.7362609187260146e-05, "loss": 0.2747802734375, "step": 3250 }, { "epoch": 0.2197512505069623, "grad_norm": 2.885791540145874, "learning_rate": 2.7360740243017042e-05, "loss": 0.320831298828125, "step": 3251 }, { "epoch": 0.21981884547789646, "grad_norm": 0.7932150363922119, "learning_rate": 2.735887070068445e-05, "loss": 0.16490936279296875, "step": 3252 }, { "epoch": 0.2198864404488306, "grad_norm": 1.4536898136138916, "learning_rate": 2.7357000560352843e-05, "loss": 0.3442840576171875, "step": 3253 }, { "epoch": 0.21995403541976477, "grad_norm": 2.169811964035034, "learning_rate": 2.7355129822112694e-05, "loss": 0.195892333984375, "step": 3254 }, { "epoch": 0.22002163039069894, "grad_norm": 1.7133922576904297, "learning_rate": 2.7353258486054532e-05, "loss": 0.276092529296875, "step": 3255 }, { "epoch": 0.22008922536163308, "grad_norm": 0.9156407117843628, "learning_rate": 2.73513865522689e-05, "loss": 0.2135009765625, "step": 3256 }, { "epoch": 0.22015682033256725, "grad_norm": 1.4472841024398804, "learning_rate": 2.7349514020846373e-05, "loss": 0.26373291015625, "step": 3257 }, { "epoch": 0.22022441530350143, "grad_norm": 0.8653554916381836, "learning_rate": 2.734764089187756e-05, "loss": 0.166595458984375, "step": 3258 }, { "epoch": 0.2202920102744356, "grad_norm": 1.1188312768936157, "learning_rate": 2.734576716545309e-05, "loss": 0.181732177734375, "step": 3259 }, { "epoch": 0.22035960524536974, "grad_norm": 0.8380237221717834, "learning_rate": 2.734389284166362e-05, "loss": 0.2157440185546875, "step": 3260 }, { "epoch": 0.2204272002163039, "grad_norm": 1.5406579971313477, "learning_rate": 2.734201792059986e-05, "loss": 0.352294921875, "step": 3261 }, { "epoch": 0.22049479518723808, "grad_norm": 0.7068056464195251, "learning_rate": 2.7340142402352513e-05, "loss": 0.0939788818359375, "step": 3262 }, { "epoch": 0.22056239015817222, "grad_norm": 1.5612214803695679, "learning_rate": 2.7338266287012336e-05, "loss": 0.302703857421875, "step": 3263 }, { "epoch": 0.2206299851291064, "grad_norm": 1.625972032546997, "learning_rate": 2.7336389574670104e-05, "loss": 0.214385986328125, "step": 3264 }, { "epoch": 0.22069758010004056, "grad_norm": 0.5009796619415283, "learning_rate": 2.7334512265416626e-05, "loss": 0.08985137939453125, "step": 3265 }, { "epoch": 0.2207651750709747, "grad_norm": 1.6779170036315918, "learning_rate": 2.7332634359342736e-05, "loss": 0.2437744140625, "step": 3266 }, { "epoch": 0.22083277004190888, "grad_norm": 1.0274059772491455, "learning_rate": 2.73307558565393e-05, "loss": 0.299957275390625, "step": 3267 }, { "epoch": 0.22090036501284305, "grad_norm": 1.7173848152160645, "learning_rate": 2.732887675709721e-05, "loss": 0.26702880859375, "step": 3268 }, { "epoch": 0.22096795998377722, "grad_norm": 2.4293272495269775, "learning_rate": 2.7326997061107392e-05, "loss": 0.34808349609375, "step": 3269 }, { "epoch": 0.22103555495471136, "grad_norm": 1.2929571866989136, "learning_rate": 2.7325116768660798e-05, "loss": 0.257598876953125, "step": 3270 }, { "epoch": 0.22110314992564553, "grad_norm": 1.9181175231933594, "learning_rate": 2.73232358798484e-05, "loss": 0.205413818359375, "step": 3271 }, { "epoch": 0.2211707448965797, "grad_norm": 1.3770378828048706, "learning_rate": 2.7321354394761216e-05, "loss": 0.29150390625, "step": 3272 }, { "epoch": 0.22123833986751384, "grad_norm": 1.1058740615844727, "learning_rate": 2.731947231349028e-05, "loss": 0.2578887939453125, "step": 3273 }, { "epoch": 0.22130593483844802, "grad_norm": 1.2841095924377441, "learning_rate": 2.731758963612666e-05, "loss": 0.319580078125, "step": 3274 }, { "epoch": 0.22137352980938219, "grad_norm": 0.881317138671875, "learning_rate": 2.7315706362761453e-05, "loss": 0.18243408203125, "step": 3275 }, { "epoch": 0.22144112478031636, "grad_norm": 2.575333833694458, "learning_rate": 2.7313822493485775e-05, "loss": 0.31707763671875, "step": 3276 }, { "epoch": 0.2215087197512505, "grad_norm": 1.2864187955856323, "learning_rate": 2.7311938028390792e-05, "loss": 0.219573974609375, "step": 3277 }, { "epoch": 0.22157631472218467, "grad_norm": 1.774976134300232, "learning_rate": 2.731005296756768e-05, "loss": 0.2869873046875, "step": 3278 }, { "epoch": 0.22164390969311884, "grad_norm": 2.0410208702087402, "learning_rate": 2.7308167311107646e-05, "loss": 0.269775390625, "step": 3279 }, { "epoch": 0.22171150466405298, "grad_norm": 1.236856460571289, "learning_rate": 2.730628105910194e-05, "loss": 0.236907958984375, "step": 3280 }, { "epoch": 0.22177909963498715, "grad_norm": 0.8423427939414978, "learning_rate": 2.730439421164182e-05, "loss": 0.1375885009765625, "step": 3281 }, { "epoch": 0.22184669460592132, "grad_norm": 1.3995662927627563, "learning_rate": 2.7302506768818587e-05, "loss": 0.244232177734375, "step": 3282 }, { "epoch": 0.2219142895768555, "grad_norm": 3.2684364318847656, "learning_rate": 2.7300618730723567e-05, "loss": 0.337982177734375, "step": 3283 }, { "epoch": 0.22198188454778964, "grad_norm": 2.2208704948425293, "learning_rate": 2.7298730097448124e-05, "loss": 0.264801025390625, "step": 3284 }, { "epoch": 0.2220494795187238, "grad_norm": 1.4770408868789673, "learning_rate": 2.729684086908363e-05, "loss": 0.1755523681640625, "step": 3285 }, { "epoch": 0.22211707448965798, "grad_norm": 2.7410500049591064, "learning_rate": 2.7294951045721495e-05, "loss": 0.307342529296875, "step": 3286 }, { "epoch": 0.22218466946059212, "grad_norm": 3.3704326152801514, "learning_rate": 2.7293060627453173e-05, "loss": 0.26422119140625, "step": 3287 }, { "epoch": 0.2222522644315263, "grad_norm": 1.1699254512786865, "learning_rate": 2.729116961437013e-05, "loss": 0.290740966796875, "step": 3288 }, { "epoch": 0.22231985940246046, "grad_norm": 1.410693645477295, "learning_rate": 2.7289278006563854e-05, "loss": 0.32330322265625, "step": 3289 }, { "epoch": 0.22238745437339463, "grad_norm": 2.036172389984131, "learning_rate": 2.728738580412589e-05, "loss": 0.244049072265625, "step": 3290 }, { "epoch": 0.22245504934432878, "grad_norm": 1.839213252067566, "learning_rate": 2.7285493007147785e-05, "loss": 0.291656494140625, "step": 3291 }, { "epoch": 0.22252264431526295, "grad_norm": 2.104982852935791, "learning_rate": 2.728359961572113e-05, "loss": 0.2333984375, "step": 3292 }, { "epoch": 0.22259023928619712, "grad_norm": 1.3048971891403198, "learning_rate": 2.728170562993753e-05, "loss": 0.208587646484375, "step": 3293 }, { "epoch": 0.22265783425713126, "grad_norm": 0.8337860703468323, "learning_rate": 2.727981104988863e-05, "loss": 0.188446044921875, "step": 3294 }, { "epoch": 0.22272542922806543, "grad_norm": 1.7624591588974, "learning_rate": 2.7277915875666106e-05, "loss": 0.3533935546875, "step": 3295 }, { "epoch": 0.2227930241989996, "grad_norm": 1.538195013999939, "learning_rate": 2.7276020107361656e-05, "loss": 0.2132415771484375, "step": 3296 }, { "epoch": 0.22286061916993374, "grad_norm": 1.5204249620437622, "learning_rate": 2.727412374506701e-05, "loss": 0.2547607421875, "step": 3297 }, { "epoch": 0.22292821414086791, "grad_norm": 2.1990976333618164, "learning_rate": 2.7272226788873924e-05, "loss": 0.31072998046875, "step": 3298 }, { "epoch": 0.22299580911180208, "grad_norm": 0.7856186628341675, "learning_rate": 2.727032923887419e-05, "loss": 0.1874542236328125, "step": 3299 }, { "epoch": 0.22306340408273626, "grad_norm": 1.542984127998352, "learning_rate": 2.7268431095159614e-05, "loss": 0.35528564453125, "step": 3300 }, { "epoch": 0.2231309990536704, "grad_norm": 1.558791995048523, "learning_rate": 2.7266532357822046e-05, "loss": 0.23577880859375, "step": 3301 }, { "epoch": 0.22319859402460457, "grad_norm": 1.1715894937515259, "learning_rate": 2.726463302695336e-05, "loss": 0.1822509765625, "step": 3302 }, { "epoch": 0.22326618899553874, "grad_norm": 1.7010014057159424, "learning_rate": 2.7262733102645458e-05, "loss": 0.25799560546875, "step": 3303 }, { "epoch": 0.22333378396647288, "grad_norm": 1.2103793621063232, "learning_rate": 2.726083258499026e-05, "loss": 0.2408599853515625, "step": 3304 }, { "epoch": 0.22340137893740705, "grad_norm": 1.2639861106872559, "learning_rate": 2.725893147407974e-05, "loss": 0.269012451171875, "step": 3305 }, { "epoch": 0.22346897390834122, "grad_norm": 1.3564953804016113, "learning_rate": 2.725702977000587e-05, "loss": 0.322998046875, "step": 3306 }, { "epoch": 0.2235365688792754, "grad_norm": 0.9896065592765808, "learning_rate": 2.725512747286068e-05, "loss": 0.19451904296875, "step": 3307 }, { "epoch": 0.22360416385020954, "grad_norm": 1.1502313613891602, "learning_rate": 2.7253224582736205e-05, "loss": 0.1954803466796875, "step": 3308 }, { "epoch": 0.2236717588211437, "grad_norm": 1.4133660793304443, "learning_rate": 2.7251321099724528e-05, "loss": 0.25701904296875, "step": 3309 }, { "epoch": 0.22373935379207788, "grad_norm": 1.0675420761108398, "learning_rate": 2.724941702391774e-05, "loss": 0.2126007080078125, "step": 3310 }, { "epoch": 0.22380694876301202, "grad_norm": 1.7265220880508423, "learning_rate": 2.7247512355407983e-05, "loss": 0.287628173828125, "step": 3311 }, { "epoch": 0.2238745437339462, "grad_norm": 1.0148515701293945, "learning_rate": 2.724560709428741e-05, "loss": 0.227783203125, "step": 3312 }, { "epoch": 0.22394213870488036, "grad_norm": 2.0233101844787598, "learning_rate": 2.724370124064821e-05, "loss": 0.35333251953125, "step": 3313 }, { "epoch": 0.22400973367581453, "grad_norm": 1.2745307683944702, "learning_rate": 2.7241794794582598e-05, "loss": 0.208831787109375, "step": 3314 }, { "epoch": 0.22407732864674867, "grad_norm": 2.2973289489746094, "learning_rate": 2.723988775618283e-05, "loss": 0.31695556640625, "step": 3315 }, { "epoch": 0.22414492361768285, "grad_norm": 1.9895339012145996, "learning_rate": 2.7237980125541173e-05, "loss": 0.28399658203125, "step": 3316 }, { "epoch": 0.22421251858861702, "grad_norm": 0.7079088687896729, "learning_rate": 2.7236071902749926e-05, "loss": 0.16925048828125, "step": 3317 }, { "epoch": 0.22428011355955116, "grad_norm": 1.6736643314361572, "learning_rate": 2.7234163087901427e-05, "loss": 0.3187255859375, "step": 3318 }, { "epoch": 0.22434770853048533, "grad_norm": 0.5310882329940796, "learning_rate": 2.7232253681088034e-05, "loss": 0.1523590087890625, "step": 3319 }, { "epoch": 0.2244153035014195, "grad_norm": 1.140982747077942, "learning_rate": 2.7230343682402136e-05, "loss": 0.280914306640625, "step": 3320 }, { "epoch": 0.22448289847235367, "grad_norm": 1.6581261157989502, "learning_rate": 2.7228433091936152e-05, "loss": 0.31195068359375, "step": 3321 }, { "epoch": 0.2245504934432878, "grad_norm": 1.0813766717910767, "learning_rate": 2.7226521909782525e-05, "loss": 0.2933349609375, "step": 3322 }, { "epoch": 0.22461808841422198, "grad_norm": 1.671522855758667, "learning_rate": 2.7224610136033733e-05, "loss": 0.233551025390625, "step": 3323 }, { "epoch": 0.22468568338515615, "grad_norm": 1.1631308794021606, "learning_rate": 2.722269777078228e-05, "loss": 0.173736572265625, "step": 3324 }, { "epoch": 0.2247532783560903, "grad_norm": 1.0614093542099, "learning_rate": 2.7220784814120694e-05, "loss": 0.228515625, "step": 3325 }, { "epoch": 0.22482087332702447, "grad_norm": 1.6691627502441406, "learning_rate": 2.7218871266141542e-05, "loss": 0.1918792724609375, "step": 3326 }, { "epoch": 0.22488846829795864, "grad_norm": 2.8310887813568115, "learning_rate": 2.7216957126937407e-05, "loss": 0.2752685546875, "step": 3327 }, { "epoch": 0.22495606326889278, "grad_norm": 1.359696626663208, "learning_rate": 2.7215042396600912e-05, "loss": 0.36236572265625, "step": 3328 }, { "epoch": 0.22502365823982695, "grad_norm": 1.0090181827545166, "learning_rate": 2.7213127075224697e-05, "loss": 0.26397705078125, "step": 3329 }, { "epoch": 0.22509125321076112, "grad_norm": 2.0902633666992188, "learning_rate": 2.7211211162901446e-05, "loss": 0.252532958984375, "step": 3330 }, { "epoch": 0.2251588481816953, "grad_norm": 1.8335434198379517, "learning_rate": 2.7209294659723856e-05, "loss": 0.296905517578125, "step": 3331 }, { "epoch": 0.22522644315262944, "grad_norm": 1.1441835165023804, "learning_rate": 2.7207377565784666e-05, "loss": 0.2311553955078125, "step": 3332 }, { "epoch": 0.2252940381235636, "grad_norm": 1.5021864175796509, "learning_rate": 2.720545988117663e-05, "loss": 0.2225341796875, "step": 3333 }, { "epoch": 0.22536163309449778, "grad_norm": 0.814173698425293, "learning_rate": 2.7203541605992538e-05, "loss": 0.1960296630859375, "step": 3334 }, { "epoch": 0.22542922806543192, "grad_norm": 1.683563470840454, "learning_rate": 2.7201622740325208e-05, "loss": 0.318756103515625, "step": 3335 }, { "epoch": 0.2254968230363661, "grad_norm": 3.3817079067230225, "learning_rate": 2.719970328426749e-05, "loss": 0.3367919921875, "step": 3336 }, { "epoch": 0.22556441800730026, "grad_norm": 1.7765153646469116, "learning_rate": 2.7197783237912262e-05, "loss": 0.2658843994140625, "step": 3337 }, { "epoch": 0.22563201297823443, "grad_norm": 1.6192010641098022, "learning_rate": 2.7195862601352424e-05, "loss": 0.2552642822265625, "step": 3338 }, { "epoch": 0.22569960794916857, "grad_norm": 1.9173059463500977, "learning_rate": 2.7193941374680904e-05, "loss": 0.373626708984375, "step": 3339 }, { "epoch": 0.22576720292010274, "grad_norm": 1.2563318014144897, "learning_rate": 2.719201955799067e-05, "loss": 0.24261474609375, "step": 3340 }, { "epoch": 0.22583479789103691, "grad_norm": 2.4808096885681152, "learning_rate": 2.719009715137471e-05, "loss": 0.351776123046875, "step": 3341 }, { "epoch": 0.22590239286197106, "grad_norm": 0.9515789151191711, "learning_rate": 2.718817415492603e-05, "loss": 0.15557861328125, "step": 3342 }, { "epoch": 0.22596998783290523, "grad_norm": 1.2842799425125122, "learning_rate": 2.7186250568737695e-05, "loss": 0.26654052734375, "step": 3343 }, { "epoch": 0.2260375828038394, "grad_norm": 1.2542613744735718, "learning_rate": 2.7184326392902773e-05, "loss": 0.240020751953125, "step": 3344 }, { "epoch": 0.22610517777477357, "grad_norm": 1.385640263557434, "learning_rate": 2.7182401627514366e-05, "loss": 0.33544921875, "step": 3345 }, { "epoch": 0.2261727727457077, "grad_norm": 1.6941064596176147, "learning_rate": 2.7180476272665604e-05, "loss": 0.1912994384765625, "step": 3346 }, { "epoch": 0.22624036771664188, "grad_norm": 0.9895366430282593, "learning_rate": 2.7178550328449657e-05, "loss": 0.318603515625, "step": 3347 }, { "epoch": 0.22630796268757605, "grad_norm": 1.4884469509124756, "learning_rate": 2.71766237949597e-05, "loss": 0.22833251953125, "step": 3348 }, { "epoch": 0.2263755576585102, "grad_norm": 1.0821261405944824, "learning_rate": 2.717469667228896e-05, "loss": 0.17464447021484375, "step": 3349 }, { "epoch": 0.22644315262944437, "grad_norm": 1.2931712865829468, "learning_rate": 2.7172768960530684e-05, "loss": 0.26422119140625, "step": 3350 }, { "epoch": 0.22651074760037854, "grad_norm": 2.34151291847229, "learning_rate": 2.7170840659778144e-05, "loss": 0.33624267578125, "step": 3351 }, { "epoch": 0.2265783425713127, "grad_norm": 1.4678897857666016, "learning_rate": 2.7168911770124636e-05, "loss": 0.221649169921875, "step": 3352 }, { "epoch": 0.22664593754224685, "grad_norm": 2.9073619842529297, "learning_rate": 2.7166982291663507e-05, "loss": 0.32855224609375, "step": 3353 }, { "epoch": 0.22671353251318102, "grad_norm": 0.6543315649032593, "learning_rate": 2.7165052224488106e-05, "loss": 0.16632080078125, "step": 3354 }, { "epoch": 0.2267811274841152, "grad_norm": 1.5242730379104614, "learning_rate": 2.7163121568691827e-05, "loss": 0.24273681640625, "step": 3355 }, { "epoch": 0.22684872245504933, "grad_norm": 1.6324464082717896, "learning_rate": 2.716119032436808e-05, "loss": 0.18798828125, "step": 3356 }, { "epoch": 0.2269163174259835, "grad_norm": 2.0178325176239014, "learning_rate": 2.7159258491610317e-05, "loss": 0.3035888671875, "step": 3357 }, { "epoch": 0.22698391239691768, "grad_norm": 1.6230632066726685, "learning_rate": 2.7157326070512013e-05, "loss": 0.34344482421875, "step": 3358 }, { "epoch": 0.22705150736785182, "grad_norm": 1.269508957862854, "learning_rate": 2.715539306116667e-05, "loss": 0.219451904296875, "step": 3359 }, { "epoch": 0.227119102338786, "grad_norm": 1.070399522781372, "learning_rate": 2.7153459463667812e-05, "loss": 0.280487060546875, "step": 3360 }, { "epoch": 0.22718669730972016, "grad_norm": 1.0858476161956787, "learning_rate": 2.7151525278109005e-05, "loss": 0.2343902587890625, "step": 3361 }, { "epoch": 0.22725429228065433, "grad_norm": 1.3103747367858887, "learning_rate": 2.714959050458384e-05, "loss": 0.262237548828125, "step": 3362 }, { "epoch": 0.22732188725158847, "grad_norm": 1.6308513879776, "learning_rate": 2.7147655143185923e-05, "loss": 0.274200439453125, "step": 3363 }, { "epoch": 0.22738948222252264, "grad_norm": 2.2752621173858643, "learning_rate": 2.7145719194008907e-05, "loss": 0.32562255859375, "step": 3364 }, { "epoch": 0.2274570771934568, "grad_norm": 1.9094611406326294, "learning_rate": 2.714378265714646e-05, "loss": 0.2084503173828125, "step": 3365 }, { "epoch": 0.22752467216439096, "grad_norm": 0.6791588664054871, "learning_rate": 2.714184553269229e-05, "loss": 0.11611175537109375, "step": 3366 }, { "epoch": 0.22759226713532513, "grad_norm": 1.8025519847869873, "learning_rate": 2.7139907820740123e-05, "loss": 0.340118408203125, "step": 3367 }, { "epoch": 0.2276598621062593, "grad_norm": 0.9220141768455505, "learning_rate": 2.7137969521383723e-05, "loss": 0.2389373779296875, "step": 3368 }, { "epoch": 0.22772745707719347, "grad_norm": 1.0638880729675293, "learning_rate": 2.7136030634716866e-05, "loss": 0.28472900390625, "step": 3369 }, { "epoch": 0.2277950520481276, "grad_norm": 1.6792047023773193, "learning_rate": 2.713409116083338e-05, "loss": 0.23028564453125, "step": 3370 }, { "epoch": 0.22786264701906178, "grad_norm": 1.2670124769210815, "learning_rate": 2.7132151099827095e-05, "loss": 0.2360076904296875, "step": 3371 }, { "epoch": 0.22793024198999595, "grad_norm": 0.9715664982795715, "learning_rate": 2.7130210451791894e-05, "loss": 0.2201080322265625, "step": 3372 }, { "epoch": 0.2279978369609301, "grad_norm": 1.1105990409851074, "learning_rate": 2.7128269216821672e-05, "loss": 0.2556610107421875, "step": 3373 }, { "epoch": 0.22806543193186427, "grad_norm": 1.2007803916931152, "learning_rate": 2.7126327395010368e-05, "loss": 0.245849609375, "step": 3374 }, { "epoch": 0.22813302690279844, "grad_norm": 0.9212018251419067, "learning_rate": 2.712438498645192e-05, "loss": 0.17961883544921875, "step": 3375 }, { "epoch": 0.2282006218737326, "grad_norm": 1.0428639650344849, "learning_rate": 2.712244199124034e-05, "loss": 0.23626708984375, "step": 3376 }, { "epoch": 0.22826821684466675, "grad_norm": 1.136283040046692, "learning_rate": 2.7120498409469615e-05, "loss": 0.33843994140625, "step": 3377 }, { "epoch": 0.22833581181560092, "grad_norm": 1.986993670463562, "learning_rate": 2.7118554241233807e-05, "loss": 0.300323486328125, "step": 3378 }, { "epoch": 0.2284034067865351, "grad_norm": 1.3807270526885986, "learning_rate": 2.711660948662698e-05, "loss": 0.28167724609375, "step": 3379 }, { "epoch": 0.22847100175746923, "grad_norm": 1.6595808267593384, "learning_rate": 2.7114664145743235e-05, "loss": 0.261383056640625, "step": 3380 }, { "epoch": 0.2285385967284034, "grad_norm": 1.9288262128829956, "learning_rate": 2.711271821867669e-05, "loss": 0.205169677734375, "step": 3381 }, { "epoch": 0.22860619169933757, "grad_norm": 2.230902671813965, "learning_rate": 2.7110771705521516e-05, "loss": 0.3248291015625, "step": 3382 }, { "epoch": 0.22867378667027174, "grad_norm": 1.1876411437988281, "learning_rate": 2.7108824606371892e-05, "loss": 0.31964111328125, "step": 3383 }, { "epoch": 0.2287413816412059, "grad_norm": 1.1058971881866455, "learning_rate": 2.7106876921322027e-05, "loss": 0.32763671875, "step": 3384 }, { "epoch": 0.22880897661214006, "grad_norm": 1.115002989768982, "learning_rate": 2.7104928650466167e-05, "loss": 0.237762451171875, "step": 3385 }, { "epoch": 0.22887657158307423, "grad_norm": 1.444206953048706, "learning_rate": 2.7102979793898574e-05, "loss": 0.28594970703125, "step": 3386 }, { "epoch": 0.22894416655400837, "grad_norm": 1.280499815940857, "learning_rate": 2.7101030351713557e-05, "loss": 0.198577880859375, "step": 3387 }, { "epoch": 0.22901176152494254, "grad_norm": 1.3545165061950684, "learning_rate": 2.7099080324005435e-05, "loss": 0.260589599609375, "step": 3388 }, { "epoch": 0.2290793564958767, "grad_norm": 1.099914789199829, "learning_rate": 2.709712971086856e-05, "loss": 0.246063232421875, "step": 3389 }, { "epoch": 0.22914695146681088, "grad_norm": 1.0154657363891602, "learning_rate": 2.709517851239732e-05, "loss": 0.2168121337890625, "step": 3390 }, { "epoch": 0.22921454643774503, "grad_norm": 0.8862037062644958, "learning_rate": 2.709322672868613e-05, "loss": 0.1937255859375, "step": 3391 }, { "epoch": 0.2292821414086792, "grad_norm": 1.1198428869247437, "learning_rate": 2.7091274359829413e-05, "loss": 0.2400970458984375, "step": 3392 }, { "epoch": 0.22934973637961337, "grad_norm": 0.8536851406097412, "learning_rate": 2.7089321405921654e-05, "loss": 0.248870849609375, "step": 3393 }, { "epoch": 0.2294173313505475, "grad_norm": 0.9394567012786865, "learning_rate": 2.7087367867057342e-05, "loss": 0.2250823974609375, "step": 3394 }, { "epoch": 0.22948492632148168, "grad_norm": 1.059890627861023, "learning_rate": 2.7085413743331004e-05, "loss": 0.1515350341796875, "step": 3395 }, { "epoch": 0.22955252129241585, "grad_norm": 0.7634454965591431, "learning_rate": 2.7083459034837188e-05, "loss": 0.177215576171875, "step": 3396 }, { "epoch": 0.22962011626335, "grad_norm": 0.7710686326026917, "learning_rate": 2.7081503741670478e-05, "loss": 0.1360931396484375, "step": 3397 }, { "epoch": 0.22968771123428416, "grad_norm": 1.4237518310546875, "learning_rate": 2.7079547863925484e-05, "loss": 0.32476806640625, "step": 3398 }, { "epoch": 0.22975530620521833, "grad_norm": 1.6588529348373413, "learning_rate": 2.7077591401696842e-05, "loss": 0.268310546875, "step": 3399 }, { "epoch": 0.2298229011761525, "grad_norm": 1.4369949102401733, "learning_rate": 2.7075634355079215e-05, "loss": 0.302978515625, "step": 3400 }, { "epoch": 0.22989049614708665, "grad_norm": 0.9799116849899292, "learning_rate": 2.7073676724167307e-05, "loss": 0.2214508056640625, "step": 3401 }, { "epoch": 0.22995809111802082, "grad_norm": 1.1134687662124634, "learning_rate": 2.707171850905583e-05, "loss": 0.183013916015625, "step": 3402 }, { "epoch": 0.230025686088955, "grad_norm": 1.4568495750427246, "learning_rate": 2.7069759709839537e-05, "loss": 0.26904296875, "step": 3403 }, { "epoch": 0.23009328105988913, "grad_norm": 1.8407421112060547, "learning_rate": 2.706780032661321e-05, "loss": 0.2493896484375, "step": 3404 }, { "epoch": 0.2301608760308233, "grad_norm": 1.4597402811050415, "learning_rate": 2.7065840359471656e-05, "loss": 0.273529052734375, "step": 3405 }, { "epoch": 0.23022847100175747, "grad_norm": 2.970048666000366, "learning_rate": 2.7063879808509705e-05, "loss": 0.284576416015625, "step": 3406 }, { "epoch": 0.23029606597269164, "grad_norm": 0.6527824997901917, "learning_rate": 2.7061918673822226e-05, "loss": 0.09185028076171875, "step": 3407 }, { "epoch": 0.2303636609436258, "grad_norm": 1.418837547302246, "learning_rate": 2.705995695550411e-05, "loss": 0.2484893798828125, "step": 3408 }, { "epoch": 0.23043125591455996, "grad_norm": 1.8032714128494263, "learning_rate": 2.7057994653650275e-05, "loss": 0.2620849609375, "step": 3409 }, { "epoch": 0.23049885088549413, "grad_norm": 2.1561949253082275, "learning_rate": 2.705603176835567e-05, "loss": 0.36669921875, "step": 3410 }, { "epoch": 0.23056644585642827, "grad_norm": 1.0367541313171387, "learning_rate": 2.7054068299715275e-05, "loss": 0.1648406982421875, "step": 3411 }, { "epoch": 0.23063404082736244, "grad_norm": 1.568848967552185, "learning_rate": 2.7052104247824092e-05, "loss": 0.38671875, "step": 3412 }, { "epoch": 0.2307016357982966, "grad_norm": 2.018282890319824, "learning_rate": 2.7050139612777153e-05, "loss": 0.29608154296875, "step": 3413 }, { "epoch": 0.23076923076923078, "grad_norm": 1.5961089134216309, "learning_rate": 2.704817439466952e-05, "loss": 0.212646484375, "step": 3414 }, { "epoch": 0.23083682574016492, "grad_norm": 2.2545325756073, "learning_rate": 2.7046208593596285e-05, "loss": 0.357635498046875, "step": 3415 }, { "epoch": 0.2309044207110991, "grad_norm": 2.0147016048431396, "learning_rate": 2.7044242209652563e-05, "loss": 0.265411376953125, "step": 3416 }, { "epoch": 0.23097201568203327, "grad_norm": 1.1539942026138306, "learning_rate": 2.70422752429335e-05, "loss": 0.193328857421875, "step": 3417 }, { "epoch": 0.2310396106529674, "grad_norm": 1.2089065313339233, "learning_rate": 2.7040307693534272e-05, "loss": 0.298980712890625, "step": 3418 }, { "epoch": 0.23110720562390158, "grad_norm": 3.3073208332061768, "learning_rate": 2.703833956155008e-05, "loss": 0.2714996337890625, "step": 3419 }, { "epoch": 0.23117480059483575, "grad_norm": 1.284355640411377, "learning_rate": 2.703637084707615e-05, "loss": 0.242767333984375, "step": 3420 }, { "epoch": 0.23124239556576992, "grad_norm": 0.7559162378311157, "learning_rate": 2.7034401550207753e-05, "loss": 0.138824462890625, "step": 3421 }, { "epoch": 0.23130999053670406, "grad_norm": 1.643440842628479, "learning_rate": 2.7032431671040162e-05, "loss": 0.29638671875, "step": 3422 }, { "epoch": 0.23137758550763823, "grad_norm": 2.978248119354248, "learning_rate": 2.7030461209668696e-05, "loss": 0.35101318359375, "step": 3423 }, { "epoch": 0.2314451804785724, "grad_norm": 2.673799753189087, "learning_rate": 2.7028490166188708e-05, "loss": 0.32757568359375, "step": 3424 }, { "epoch": 0.23151277544950655, "grad_norm": 1.2379698753356934, "learning_rate": 2.702651854069555e-05, "loss": 0.169586181640625, "step": 3425 }, { "epoch": 0.23158037042044072, "grad_norm": 1.2065207958221436, "learning_rate": 2.702454633328464e-05, "loss": 0.2222900390625, "step": 3426 }, { "epoch": 0.2316479653913749, "grad_norm": 2.7482104301452637, "learning_rate": 2.70225735440514e-05, "loss": 0.30059814453125, "step": 3427 }, { "epoch": 0.23171556036230903, "grad_norm": 1.8373998403549194, "learning_rate": 2.702060017309128e-05, "loss": 0.291229248046875, "step": 3428 }, { "epoch": 0.2317831553332432, "grad_norm": 1.665945053100586, "learning_rate": 2.7018626220499773e-05, "loss": 0.227020263671875, "step": 3429 }, { "epoch": 0.23185075030417737, "grad_norm": 2.1319868564605713, "learning_rate": 2.701665168637238e-05, "loss": 0.22161102294921875, "step": 3430 }, { "epoch": 0.23191834527511154, "grad_norm": 0.7551209926605225, "learning_rate": 2.7014676570804656e-05, "loss": 0.21630859375, "step": 3431 }, { "epoch": 0.23198594024604569, "grad_norm": 1.0369399785995483, "learning_rate": 2.7012700873892155e-05, "loss": 0.27923583984375, "step": 3432 }, { "epoch": 0.23205353521697986, "grad_norm": 2.0393707752227783, "learning_rate": 2.701072459573048e-05, "loss": 0.3564453125, "step": 3433 }, { "epoch": 0.23212113018791403, "grad_norm": 1.2053099870681763, "learning_rate": 2.7008747736415255e-05, "loss": 0.261138916015625, "step": 3434 }, { "epoch": 0.23218872515884817, "grad_norm": 1.099040150642395, "learning_rate": 2.7006770296042137e-05, "loss": 0.22412109375, "step": 3435 }, { "epoch": 0.23225632012978234, "grad_norm": 1.1303411722183228, "learning_rate": 2.70047922747068e-05, "loss": 0.258209228515625, "step": 3436 }, { "epoch": 0.2323239151007165, "grad_norm": 1.544222354888916, "learning_rate": 2.7002813672504957e-05, "loss": 0.261383056640625, "step": 3437 }, { "epoch": 0.23239151007165068, "grad_norm": 0.9215825200080872, "learning_rate": 2.700083448953234e-05, "loss": 0.23285675048828125, "step": 3438 }, { "epoch": 0.23245910504258482, "grad_norm": 1.0396106243133545, "learning_rate": 2.699885472588472e-05, "loss": 0.2040252685546875, "step": 3439 }, { "epoch": 0.232526700013519, "grad_norm": 2.0241053104400635, "learning_rate": 2.699687438165789e-05, "loss": 0.296051025390625, "step": 3440 }, { "epoch": 0.23259429498445316, "grad_norm": 1.344327449798584, "learning_rate": 2.699489345694767e-05, "loss": 0.244476318359375, "step": 3441 }, { "epoch": 0.2326618899553873, "grad_norm": 2.201876401901245, "learning_rate": 2.6992911951849907e-05, "loss": 0.3094329833984375, "step": 3442 }, { "epoch": 0.23272948492632148, "grad_norm": 1.3222150802612305, "learning_rate": 2.6990929866460483e-05, "loss": 0.1773223876953125, "step": 3443 }, { "epoch": 0.23279707989725565, "grad_norm": 1.3567371368408203, "learning_rate": 2.69889472008753e-05, "loss": 0.25408935546875, "step": 3444 }, { "epoch": 0.23286467486818982, "grad_norm": 1.5312150716781616, "learning_rate": 2.698696395519029e-05, "loss": 0.3031005859375, "step": 3445 }, { "epoch": 0.23293226983912396, "grad_norm": 1.7858011722564697, "learning_rate": 2.698498012950142e-05, "loss": 0.2535552978515625, "step": 3446 }, { "epoch": 0.23299986481005813, "grad_norm": 1.9381799697875977, "learning_rate": 2.698299572390468e-05, "loss": 0.26507568359375, "step": 3447 }, { "epoch": 0.2330674597809923, "grad_norm": 1.5142992734909058, "learning_rate": 2.6981010738496082e-05, "loss": 0.2550048828125, "step": 3448 }, { "epoch": 0.23313505475192645, "grad_norm": 1.2972476482391357, "learning_rate": 2.6979025173371675e-05, "loss": 0.3187255859375, "step": 3449 }, { "epoch": 0.23320264972286062, "grad_norm": 1.21878182888031, "learning_rate": 2.6977039028627538e-05, "loss": 0.2136688232421875, "step": 3450 }, { "epoch": 0.2332702446937948, "grad_norm": 1.7507561445236206, "learning_rate": 2.6975052304359765e-05, "loss": 0.30645751953125, "step": 3451 }, { "epoch": 0.23333783966472896, "grad_norm": 1.9012528657913208, "learning_rate": 2.697306500066449e-05, "loss": 0.22705078125, "step": 3452 }, { "epoch": 0.2334054346356631, "grad_norm": 1.8824923038482666, "learning_rate": 2.697107711763787e-05, "loss": 0.314605712890625, "step": 3453 }, { "epoch": 0.23347302960659727, "grad_norm": 1.092441439628601, "learning_rate": 2.696908865537609e-05, "loss": 0.19927597045898438, "step": 3454 }, { "epoch": 0.23354062457753144, "grad_norm": 1.3286141157150269, "learning_rate": 2.696709961397537e-05, "loss": 0.305511474609375, "step": 3455 }, { "epoch": 0.23360821954846558, "grad_norm": 1.8561899662017822, "learning_rate": 2.6965109993531946e-05, "loss": 0.25775146484375, "step": 3456 }, { "epoch": 0.23367581451939975, "grad_norm": 1.1024636030197144, "learning_rate": 2.6963119794142087e-05, "loss": 0.212127685546875, "step": 3457 }, { "epoch": 0.23374340949033393, "grad_norm": 1.7659342288970947, "learning_rate": 2.69611290159021e-05, "loss": 0.3079833984375, "step": 3458 }, { "epoch": 0.23381100446126807, "grad_norm": 1.876245141029358, "learning_rate": 2.69591376589083e-05, "loss": 0.34490966796875, "step": 3459 }, { "epoch": 0.23387859943220224, "grad_norm": 2.1713919639587402, "learning_rate": 2.6957145723257056e-05, "loss": 0.30828857421875, "step": 3460 }, { "epoch": 0.2339461944031364, "grad_norm": 1.4126609563827515, "learning_rate": 2.6955153209044733e-05, "loss": 0.281219482421875, "step": 3461 }, { "epoch": 0.23401378937407058, "grad_norm": 1.7615671157836914, "learning_rate": 2.6953160116367753e-05, "loss": 0.2777099609375, "step": 3462 }, { "epoch": 0.23408138434500472, "grad_norm": 1.3170580863952637, "learning_rate": 2.6951166445322548e-05, "loss": 0.242584228515625, "step": 3463 }, { "epoch": 0.2341489793159389, "grad_norm": 1.8380229473114014, "learning_rate": 2.6949172196005592e-05, "loss": 0.31982421875, "step": 3464 }, { "epoch": 0.23421657428687306, "grad_norm": 1.2909715175628662, "learning_rate": 2.6947177368513368e-05, "loss": 0.19696044921875, "step": 3465 }, { "epoch": 0.2342841692578072, "grad_norm": 1.536007046699524, "learning_rate": 2.694518196294241e-05, "loss": 0.2882080078125, "step": 3466 }, { "epoch": 0.23435176422874138, "grad_norm": 1.3257787227630615, "learning_rate": 2.694318597938926e-05, "loss": 0.32086181640625, "step": 3467 }, { "epoch": 0.23441935919967555, "grad_norm": 0.8909434080123901, "learning_rate": 2.6941189417950497e-05, "loss": 0.1706085205078125, "step": 3468 }, { "epoch": 0.23448695417060972, "grad_norm": 1.5413943529129028, "learning_rate": 2.693919227872273e-05, "loss": 0.1517333984375, "step": 3469 }, { "epoch": 0.23455454914154386, "grad_norm": 0.8515161871910095, "learning_rate": 2.693719456180259e-05, "loss": 0.1806640625, "step": 3470 }, { "epoch": 0.23462214411247803, "grad_norm": 0.9507498145103455, "learning_rate": 2.693519626728674e-05, "loss": 0.16231536865234375, "step": 3471 }, { "epoch": 0.2346897390834122, "grad_norm": 2.794602870941162, "learning_rate": 2.6933197395271872e-05, "loss": 0.307342529296875, "step": 3472 }, { "epoch": 0.23475733405434634, "grad_norm": 1.5889116525650024, "learning_rate": 2.69311979458547e-05, "loss": 0.3070068359375, "step": 3473 }, { "epoch": 0.23482492902528052, "grad_norm": 2.015622854232788, "learning_rate": 2.6929197919131976e-05, "loss": 0.24993896484375, "step": 3474 }, { "epoch": 0.23489252399621469, "grad_norm": 1.8461970090866089, "learning_rate": 2.692719731520047e-05, "loss": 0.251953125, "step": 3475 }, { "epoch": 0.23496011896714886, "grad_norm": 1.7514845132827759, "learning_rate": 2.6925196134156978e-05, "loss": 0.20548629760742188, "step": 3476 }, { "epoch": 0.235027713938083, "grad_norm": 1.7480696439743042, "learning_rate": 2.692319437609834e-05, "loss": 0.34912109375, "step": 3477 }, { "epoch": 0.23509530890901717, "grad_norm": 1.0135842561721802, "learning_rate": 2.6921192041121404e-05, "loss": 0.210296630859375, "step": 3478 }, { "epoch": 0.23516290387995134, "grad_norm": 1.193085789680481, "learning_rate": 2.6919189129323067e-05, "loss": 0.2014617919921875, "step": 3479 }, { "epoch": 0.23523049885088548, "grad_norm": 1.8699599504470825, "learning_rate": 2.6917185640800228e-05, "loss": 0.270751953125, "step": 3480 }, { "epoch": 0.23529809382181965, "grad_norm": 1.5747710466384888, "learning_rate": 2.691518157564984e-05, "loss": 0.290557861328125, "step": 3481 }, { "epoch": 0.23536568879275382, "grad_norm": 1.853691577911377, "learning_rate": 2.6913176933968864e-05, "loss": 0.34576416015625, "step": 3482 }, { "epoch": 0.235433283763688, "grad_norm": 1.1170618534088135, "learning_rate": 2.6911171715854305e-05, "loss": 0.1881103515625, "step": 3483 }, { "epoch": 0.23550087873462214, "grad_norm": 1.7076812982559204, "learning_rate": 2.6909165921403183e-05, "loss": 0.272705078125, "step": 3484 }, { "epoch": 0.2355684737055563, "grad_norm": 1.4832755327224731, "learning_rate": 2.6907159550712552e-05, "loss": 0.29376220703125, "step": 3485 }, { "epoch": 0.23563606867649048, "grad_norm": 2.060877561569214, "learning_rate": 2.690515260387949e-05, "loss": 0.26361083984375, "step": 3486 }, { "epoch": 0.23570366364742462, "grad_norm": 1.8087095022201538, "learning_rate": 2.6903145081001103e-05, "loss": 0.306365966796875, "step": 3487 }, { "epoch": 0.2357712586183588, "grad_norm": 1.1832833290100098, "learning_rate": 2.6901136982174537e-05, "loss": 0.2392578125, "step": 3488 }, { "epoch": 0.23583885358929296, "grad_norm": 0.8265711069107056, "learning_rate": 2.6899128307496953e-05, "loss": 0.197509765625, "step": 3489 }, { "epoch": 0.2359064485602271, "grad_norm": 1.5516010522842407, "learning_rate": 2.689711905706554e-05, "loss": 0.208526611328125, "step": 3490 }, { "epoch": 0.23597404353116128, "grad_norm": 2.6235506534576416, "learning_rate": 2.689510923097752e-05, "loss": 0.3665771484375, "step": 3491 }, { "epoch": 0.23604163850209545, "grad_norm": 1.5957077741622925, "learning_rate": 2.6893098829330134e-05, "loss": 0.174713134765625, "step": 3492 }, { "epoch": 0.23610923347302962, "grad_norm": 1.253859519958496, "learning_rate": 2.6891087852220674e-05, "loss": 0.205474853515625, "step": 3493 }, { "epoch": 0.23617682844396376, "grad_norm": 1.8432575464248657, "learning_rate": 2.688907629974643e-05, "loss": 0.29644775390625, "step": 3494 }, { "epoch": 0.23624442341489793, "grad_norm": 1.850447416305542, "learning_rate": 2.6887064172004738e-05, "loss": 0.3489990234375, "step": 3495 }, { "epoch": 0.2363120183858321, "grad_norm": 1.2575538158416748, "learning_rate": 2.688505146909296e-05, "loss": 0.225799560546875, "step": 3496 }, { "epoch": 0.23637961335676624, "grad_norm": 1.9274640083312988, "learning_rate": 2.6883038191108472e-05, "loss": 0.23968505859375, "step": 3497 }, { "epoch": 0.23644720832770041, "grad_norm": 1.0913910865783691, "learning_rate": 2.68810243381487e-05, "loss": 0.177825927734375, "step": 3498 }, { "epoch": 0.23651480329863458, "grad_norm": 0.9619971513748169, "learning_rate": 2.687900991031108e-05, "loss": 0.212432861328125, "step": 3499 }, { "epoch": 0.23658239826956876, "grad_norm": 2.2152481079101562, "learning_rate": 2.6876994907693094e-05, "loss": 0.32598876953125, "step": 3500 }, { "epoch": 0.2366499932405029, "grad_norm": 1.5308090448379517, "learning_rate": 2.6874979330392227e-05, "loss": 0.2340087890625, "step": 3501 }, { "epoch": 0.23671758821143707, "grad_norm": 1.2822972536087036, "learning_rate": 2.6872963178506007e-05, "loss": 0.30023193359375, "step": 3502 }, { "epoch": 0.23678518318237124, "grad_norm": 1.2665950059890747, "learning_rate": 2.6870946452131997e-05, "loss": 0.239715576171875, "step": 3503 }, { "epoch": 0.23685277815330538, "grad_norm": 1.4433417320251465, "learning_rate": 2.686892915136777e-05, "loss": 0.34521484375, "step": 3504 }, { "epoch": 0.23692037312423955, "grad_norm": 2.1772165298461914, "learning_rate": 2.6866911276310938e-05, "loss": 0.4163818359375, "step": 3505 }, { "epoch": 0.23698796809517372, "grad_norm": 0.876965343952179, "learning_rate": 2.686489282705914e-05, "loss": 0.228790283203125, "step": 3506 }, { "epoch": 0.2370555630661079, "grad_norm": 1.002884864807129, "learning_rate": 2.686287380371004e-05, "loss": 0.229766845703125, "step": 3507 }, { "epoch": 0.23712315803704204, "grad_norm": 1.2012683153152466, "learning_rate": 2.6860854206361332e-05, "loss": 0.15362548828125, "step": 3508 }, { "epoch": 0.2371907530079762, "grad_norm": 1.8792169094085693, "learning_rate": 2.6858834035110736e-05, "loss": 0.287200927734375, "step": 3509 }, { "epoch": 0.23725834797891038, "grad_norm": 1.2436552047729492, "learning_rate": 2.6856813290055996e-05, "loss": 0.21978759765625, "step": 3510 }, { "epoch": 0.23732594294984452, "grad_norm": 1.0822696685791016, "learning_rate": 2.6854791971294894e-05, "loss": 0.209564208984375, "step": 3511 }, { "epoch": 0.2373935379207787, "grad_norm": 1.2822622060775757, "learning_rate": 2.6852770078925235e-05, "loss": 0.34375, "step": 3512 }, { "epoch": 0.23746113289171286, "grad_norm": 1.9637064933776855, "learning_rate": 2.6850747613044845e-05, "loss": 0.35137939453125, "step": 3513 }, { "epoch": 0.23752872786264703, "grad_norm": 1.664739966392517, "learning_rate": 2.684872457375159e-05, "loss": 0.40625, "step": 3514 }, { "epoch": 0.23759632283358117, "grad_norm": 1.983620285987854, "learning_rate": 2.684670096114335e-05, "loss": 0.32562255859375, "step": 3515 }, { "epoch": 0.23766391780451535, "grad_norm": 0.7772923111915588, "learning_rate": 2.6844676775318047e-05, "loss": 0.20745849609375, "step": 3516 }, { "epoch": 0.23773151277544952, "grad_norm": 2.09496808052063, "learning_rate": 2.6842652016373616e-05, "loss": 0.326324462890625, "step": 3517 }, { "epoch": 0.23779910774638366, "grad_norm": 1.1971616744995117, "learning_rate": 2.6840626684408035e-05, "loss": 0.241485595703125, "step": 3518 }, { "epoch": 0.23786670271731783, "grad_norm": 1.3037203550338745, "learning_rate": 2.68386007795193e-05, "loss": 0.1244049072265625, "step": 3519 }, { "epoch": 0.237934297688252, "grad_norm": 1.1081171035766602, "learning_rate": 2.6836574301805434e-05, "loss": 0.21509552001953125, "step": 3520 }, { "epoch": 0.23800189265918614, "grad_norm": 1.5827295780181885, "learning_rate": 2.6834547251364495e-05, "loss": 0.236083984375, "step": 3521 }, { "epoch": 0.2380694876301203, "grad_norm": 1.0873076915740967, "learning_rate": 2.6832519628294556e-05, "loss": 0.1329498291015625, "step": 3522 }, { "epoch": 0.23813708260105448, "grad_norm": 2.8427910804748535, "learning_rate": 2.6830491432693736e-05, "loss": 0.305877685546875, "step": 3523 }, { "epoch": 0.23820467757198865, "grad_norm": 1.2109384536743164, "learning_rate": 2.6828462664660166e-05, "loss": 0.197723388671875, "step": 3524 }, { "epoch": 0.2382722725429228, "grad_norm": 2.1974034309387207, "learning_rate": 2.6826433324292013e-05, "loss": 0.3280029296875, "step": 3525 }, { "epoch": 0.23833986751385697, "grad_norm": 1.8497563600540161, "learning_rate": 2.6824403411687467e-05, "loss": 0.313323974609375, "step": 3526 }, { "epoch": 0.23840746248479114, "grad_norm": 1.5435749292373657, "learning_rate": 2.682237292694475e-05, "loss": 0.1988067626953125, "step": 3527 }, { "epoch": 0.23847505745572528, "grad_norm": 1.5944101810455322, "learning_rate": 2.6820341870162104e-05, "loss": 0.30865478515625, "step": 3528 }, { "epoch": 0.23854265242665945, "grad_norm": 1.1305278539657593, "learning_rate": 2.681831024143781e-05, "loss": 0.296142578125, "step": 3529 }, { "epoch": 0.23861024739759362, "grad_norm": 1.1813377141952515, "learning_rate": 2.6816278040870172e-05, "loss": 0.242095947265625, "step": 3530 }, { "epoch": 0.2386778423685278, "grad_norm": 3.1777455806732178, "learning_rate": 2.681424526855752e-05, "loss": 0.33868408203125, "step": 3531 }, { "epoch": 0.23874543733946194, "grad_norm": 1.9375139474868774, "learning_rate": 2.6812211924598204e-05, "loss": 0.23687744140625, "step": 3532 }, { "epoch": 0.2388130323103961, "grad_norm": 3.4993298053741455, "learning_rate": 2.681017800909062e-05, "loss": 0.29705810546875, "step": 3533 }, { "epoch": 0.23888062728133028, "grad_norm": 2.595562696456909, "learning_rate": 2.6808143522133178e-05, "loss": 0.302398681640625, "step": 3534 }, { "epoch": 0.23894822225226442, "grad_norm": 1.9472148418426514, "learning_rate": 2.6806108463824314e-05, "loss": 0.32037353515625, "step": 3535 }, { "epoch": 0.2390158172231986, "grad_norm": 1.100962519645691, "learning_rate": 2.6804072834262502e-05, "loss": 0.26434326171875, "step": 3536 }, { "epoch": 0.23908341219413276, "grad_norm": 1.2039238214492798, "learning_rate": 2.6802036633546235e-05, "loss": 0.258758544921875, "step": 3537 }, { "epoch": 0.23915100716506693, "grad_norm": 1.051504135131836, "learning_rate": 2.6799999861774046e-05, "loss": 0.200286865234375, "step": 3538 }, { "epoch": 0.23921860213600107, "grad_norm": 0.815258264541626, "learning_rate": 2.6797962519044476e-05, "loss": 0.2100830078125, "step": 3539 }, { "epoch": 0.23928619710693524, "grad_norm": 1.9972189664840698, "learning_rate": 2.6795924605456113e-05, "loss": 0.248382568359375, "step": 3540 }, { "epoch": 0.23935379207786942, "grad_norm": 1.0410363674163818, "learning_rate": 2.6793886121107554e-05, "loss": 0.21051025390625, "step": 3541 }, { "epoch": 0.23942138704880356, "grad_norm": 2.112724781036377, "learning_rate": 2.6791847066097442e-05, "loss": 0.279205322265625, "step": 3542 }, { "epoch": 0.23948898201973773, "grad_norm": 1.345870852470398, "learning_rate": 2.6789807440524436e-05, "loss": 0.24090576171875, "step": 3543 }, { "epoch": 0.2395565769906719, "grad_norm": 1.5187262296676636, "learning_rate": 2.678776724448723e-05, "loss": 0.3221435546875, "step": 3544 }, { "epoch": 0.23962417196160607, "grad_norm": 4.167532920837402, "learning_rate": 2.6785726478084533e-05, "loss": 0.32763671875, "step": 3545 }, { "epoch": 0.2396917669325402, "grad_norm": 1.1528520584106445, "learning_rate": 2.678368514141509e-05, "loss": 0.2276611328125, "step": 3546 }, { "epoch": 0.23975936190347438, "grad_norm": 1.6711620092391968, "learning_rate": 2.6781643234577683e-05, "loss": 0.25042724609375, "step": 3547 }, { "epoch": 0.23982695687440855, "grad_norm": 1.068264126777649, "learning_rate": 2.6779600757671107e-05, "loss": 0.294219970703125, "step": 3548 }, { "epoch": 0.2398945518453427, "grad_norm": 1.7684499025344849, "learning_rate": 2.677755771079419e-05, "loss": 0.400848388671875, "step": 3549 }, { "epoch": 0.23996214681627687, "grad_norm": 1.28861665725708, "learning_rate": 2.6775514094045787e-05, "loss": 0.262054443359375, "step": 3550 }, { "epoch": 0.24002974178721104, "grad_norm": 1.0891435146331787, "learning_rate": 2.6773469907524776e-05, "loss": 0.206756591796875, "step": 3551 }, { "epoch": 0.2400973367581452, "grad_norm": 1.7387319803237915, "learning_rate": 2.6771425151330077e-05, "loss": 0.3280029296875, "step": 3552 }, { "epoch": 0.24016493172907935, "grad_norm": 1.2826064825057983, "learning_rate": 2.6769379825560626e-05, "loss": 0.3099365234375, "step": 3553 }, { "epoch": 0.24023252670001352, "grad_norm": 1.306008219718933, "learning_rate": 2.6767333930315382e-05, "loss": 0.2081451416015625, "step": 3554 }, { "epoch": 0.2403001216709477, "grad_norm": 1.466477870941162, "learning_rate": 2.6765287465693348e-05, "loss": 0.247894287109375, "step": 3555 }, { "epoch": 0.24036771664188183, "grad_norm": 1.717570185661316, "learning_rate": 2.676324043179353e-05, "loss": 0.36248779296875, "step": 3556 }, { "epoch": 0.240435311612816, "grad_norm": 1.6119714975357056, "learning_rate": 2.676119282871499e-05, "loss": 0.29052734375, "step": 3557 }, { "epoch": 0.24050290658375018, "grad_norm": 1.4629124402999878, "learning_rate": 2.67591446565568e-05, "loss": 0.233734130859375, "step": 3558 }, { "epoch": 0.24057050155468432, "grad_norm": 1.3792736530303955, "learning_rate": 2.675709591541806e-05, "loss": 0.235748291015625, "step": 3559 }, { "epoch": 0.2406380965256185, "grad_norm": 1.3143140077590942, "learning_rate": 2.6755046605397903e-05, "loss": 0.3206787109375, "step": 3560 }, { "epoch": 0.24070569149655266, "grad_norm": 1.1717721223831177, "learning_rate": 2.675299672659549e-05, "loss": 0.1490936279296875, "step": 3561 }, { "epoch": 0.24077328646748683, "grad_norm": 1.7044802904129028, "learning_rate": 2.675094627911e-05, "loss": 0.2498779296875, "step": 3562 }, { "epoch": 0.24084088143842097, "grad_norm": 1.6678277254104614, "learning_rate": 2.6748895263040654e-05, "loss": 0.3280029296875, "step": 3563 }, { "epoch": 0.24090847640935514, "grad_norm": 1.7013124227523804, "learning_rate": 2.6746843678486686e-05, "loss": 0.326080322265625, "step": 3564 }, { "epoch": 0.24097607138028931, "grad_norm": 0.8213046193122864, "learning_rate": 2.6744791525547373e-05, "loss": 0.1752166748046875, "step": 3565 }, { "epoch": 0.24104366635122346, "grad_norm": 1.7189215421676636, "learning_rate": 2.6742738804322e-05, "loss": 0.27728271484375, "step": 3566 }, { "epoch": 0.24111126132215763, "grad_norm": 0.8857202529907227, "learning_rate": 2.6740685514909904e-05, "loss": 0.204864501953125, "step": 3567 }, { "epoch": 0.2411788562930918, "grad_norm": 1.5325119495391846, "learning_rate": 2.6738631657410422e-05, "loss": 0.294952392578125, "step": 3568 }, { "epoch": 0.24124645126402597, "grad_norm": 2.0655195713043213, "learning_rate": 2.673657723192294e-05, "loss": 0.3419189453125, "step": 3569 }, { "epoch": 0.2413140462349601, "grad_norm": 1.0614149570465088, "learning_rate": 2.673452223854686e-05, "loss": 0.2062530517578125, "step": 3570 }, { "epoch": 0.24138164120589428, "grad_norm": 0.8957163095474243, "learning_rate": 2.6732466677381625e-05, "loss": 0.131134033203125, "step": 3571 }, { "epoch": 0.24144923617682845, "grad_norm": 1.8476214408874512, "learning_rate": 2.6730410548526683e-05, "loss": 0.29290771484375, "step": 3572 }, { "epoch": 0.2415168311477626, "grad_norm": 1.0775045156478882, "learning_rate": 2.672835385208153e-05, "loss": 0.2140655517578125, "step": 3573 }, { "epoch": 0.24158442611869677, "grad_norm": 1.6332788467407227, "learning_rate": 2.6726296588145676e-05, "loss": 0.286285400390625, "step": 3574 }, { "epoch": 0.24165202108963094, "grad_norm": 2.049776077270508, "learning_rate": 2.6724238756818678e-05, "loss": 0.312164306640625, "step": 3575 }, { "epoch": 0.2417196160605651, "grad_norm": 1.3694099187850952, "learning_rate": 2.6722180358200088e-05, "loss": 0.242279052734375, "step": 3576 }, { "epoch": 0.24178721103149925, "grad_norm": 4.392256259918213, "learning_rate": 2.6720121392389516e-05, "loss": 0.3453369140625, "step": 3577 }, { "epoch": 0.24185480600243342, "grad_norm": 1.6120799779891968, "learning_rate": 2.6718061859486582e-05, "loss": 0.261627197265625, "step": 3578 }, { "epoch": 0.2419224009733676, "grad_norm": 1.478790044784546, "learning_rate": 2.6716001759590943e-05, "loss": 0.29461669921875, "step": 3579 }, { "epoch": 0.24198999594430173, "grad_norm": 0.9716073870658875, "learning_rate": 2.6713941092802275e-05, "loss": 0.169525146484375, "step": 3580 }, { "epoch": 0.2420575909152359, "grad_norm": 1.3925628662109375, "learning_rate": 2.6711879859220295e-05, "loss": 0.242156982421875, "step": 3581 }, { "epoch": 0.24212518588617007, "grad_norm": 2.8314969539642334, "learning_rate": 2.6709818058944727e-05, "loss": 0.3681640625, "step": 3582 }, { "epoch": 0.24219278085710425, "grad_norm": 1.7183579206466675, "learning_rate": 2.670775569207534e-05, "loss": 0.268157958984375, "step": 3583 }, { "epoch": 0.2422603758280384, "grad_norm": 1.7898586988449097, "learning_rate": 2.6705692758711926e-05, "loss": 0.299224853515625, "step": 3584 }, { "epoch": 0.24232797079897256, "grad_norm": 1.1738934516906738, "learning_rate": 2.6703629258954295e-05, "loss": 0.227203369140625, "step": 3585 }, { "epoch": 0.24239556576990673, "grad_norm": 1.6394169330596924, "learning_rate": 2.6701565192902297e-05, "loss": 0.311553955078125, "step": 3586 }, { "epoch": 0.24246316074084087, "grad_norm": 1.8212368488311768, "learning_rate": 2.6699500560655805e-05, "loss": 0.3218994140625, "step": 3587 }, { "epoch": 0.24253075571177504, "grad_norm": 0.6442865133285522, "learning_rate": 2.6697435362314716e-05, "loss": 0.106292724609375, "step": 3588 }, { "epoch": 0.2425983506827092, "grad_norm": 1.411645531654358, "learning_rate": 2.669536959797896e-05, "loss": 0.279327392578125, "step": 3589 }, { "epoch": 0.24266594565364336, "grad_norm": 0.8135839104652405, "learning_rate": 2.6693303267748492e-05, "loss": 0.11710357666015625, "step": 3590 }, { "epoch": 0.24273354062457753, "grad_norm": 1.4640215635299683, "learning_rate": 2.6691236371723287e-05, "loss": 0.216796875, "step": 3591 }, { "epoch": 0.2428011355955117, "grad_norm": 2.0964460372924805, "learning_rate": 2.6689168910003363e-05, "loss": 0.287384033203125, "step": 3592 }, { "epoch": 0.24286873056644587, "grad_norm": 1.7903577089309692, "learning_rate": 2.668710088268875e-05, "loss": 0.3341064453125, "step": 3593 }, { "epoch": 0.24293632553738, "grad_norm": 1.0866684913635254, "learning_rate": 2.6685032289879517e-05, "loss": 0.15191650390625, "step": 3594 }, { "epoch": 0.24300392050831418, "grad_norm": 1.4745910167694092, "learning_rate": 2.668296313167575e-05, "loss": 0.342803955078125, "step": 3595 }, { "epoch": 0.24307151547924835, "grad_norm": 0.4895786643028259, "learning_rate": 2.668089340817758e-05, "loss": 0.16729736328125, "step": 3596 }, { "epoch": 0.2431391104501825, "grad_norm": 0.9862501621246338, "learning_rate": 2.667882311948514e-05, "loss": 0.266387939453125, "step": 3597 }, { "epoch": 0.24320670542111666, "grad_norm": 1.2455569505691528, "learning_rate": 2.6676752265698603e-05, "loss": 0.287841796875, "step": 3598 }, { "epoch": 0.24327430039205084, "grad_norm": 0.9386094212532043, "learning_rate": 2.6674680846918177e-05, "loss": 0.22830963134765625, "step": 3599 }, { "epoch": 0.243341895362985, "grad_norm": 0.8639212846755981, "learning_rate": 2.667260886324409e-05, "loss": 0.17661285400390625, "step": 3600 }, { "epoch": 0.24340949033391915, "grad_norm": 0.9103549718856812, "learning_rate": 2.6670536314776593e-05, "loss": 0.26025390625, "step": 3601 }, { "epoch": 0.24347708530485332, "grad_norm": 1.3233593702316284, "learning_rate": 2.6668463201615972e-05, "loss": 0.28887939453125, "step": 3602 }, { "epoch": 0.2435446802757875, "grad_norm": 1.2511003017425537, "learning_rate": 2.6666389523862535e-05, "loss": 0.2312469482421875, "step": 3603 }, { "epoch": 0.24361227524672163, "grad_norm": 1.9724420309066772, "learning_rate": 2.666431528161662e-05, "loss": 0.195587158203125, "step": 3604 }, { "epoch": 0.2436798702176558, "grad_norm": 0.9887617826461792, "learning_rate": 2.6662240474978594e-05, "loss": 0.186859130859375, "step": 3605 }, { "epoch": 0.24374746518858997, "grad_norm": 1.547764778137207, "learning_rate": 2.6660165104048846e-05, "loss": 0.31903076171875, "step": 3606 }, { "epoch": 0.24381506015952414, "grad_norm": 1.5476975440979004, "learning_rate": 2.6658089168927794e-05, "loss": 0.32061767578125, "step": 3607 }, { "epoch": 0.2438826551304583, "grad_norm": 2.014260768890381, "learning_rate": 2.6656012669715893e-05, "loss": 0.3001708984375, "step": 3608 }, { "epoch": 0.24395025010139246, "grad_norm": 1.8134220838546753, "learning_rate": 2.6653935606513607e-05, "loss": 0.324981689453125, "step": 3609 }, { "epoch": 0.24401784507232663, "grad_norm": 2.469933271408081, "learning_rate": 2.665185797942144e-05, "loss": 0.28924560546875, "step": 3610 }, { "epoch": 0.24408544004326077, "grad_norm": 0.8137964606285095, "learning_rate": 2.6649779788539924e-05, "loss": 0.18671417236328125, "step": 3611 }, { "epoch": 0.24415303501419494, "grad_norm": 1.0877759456634521, "learning_rate": 2.664770103396961e-05, "loss": 0.220062255859375, "step": 3612 }, { "epoch": 0.2442206299851291, "grad_norm": 2.0429494380950928, "learning_rate": 2.6645621715811083e-05, "loss": 0.193206787109375, "step": 3613 }, { "epoch": 0.24428822495606328, "grad_norm": 1.453812599182129, "learning_rate": 2.664354183416496e-05, "loss": 0.30194091796875, "step": 3614 }, { "epoch": 0.24435581992699743, "grad_norm": 1.193833589553833, "learning_rate": 2.664146138913187e-05, "loss": 0.24993896484375, "step": 3615 }, { "epoch": 0.2444234148979316, "grad_norm": 2.160416603088379, "learning_rate": 2.663938038081248e-05, "loss": 0.3123779296875, "step": 3616 }, { "epoch": 0.24449100986886577, "grad_norm": 2.0732779502868652, "learning_rate": 2.6637298809307483e-05, "loss": 0.328033447265625, "step": 3617 }, { "epoch": 0.2445586048397999, "grad_norm": 1.4829214811325073, "learning_rate": 2.6635216674717595e-05, "loss": 0.335357666015625, "step": 3618 }, { "epoch": 0.24462619981073408, "grad_norm": 1.431273102760315, "learning_rate": 2.6633133977143572e-05, "loss": 0.24420166015625, "step": 3619 }, { "epoch": 0.24469379478166825, "grad_norm": 1.8065321445465088, "learning_rate": 2.6631050716686177e-05, "loss": 0.281280517578125, "step": 3620 }, { "epoch": 0.2447613897526024, "grad_norm": 2.0101258754730225, "learning_rate": 2.6628966893446215e-05, "loss": 0.19159698486328125, "step": 3621 }, { "epoch": 0.24482898472353656, "grad_norm": 1.110917329788208, "learning_rate": 2.662688250752452e-05, "loss": 0.2288970947265625, "step": 3622 }, { "epoch": 0.24489657969447073, "grad_norm": 1.2155734300613403, "learning_rate": 2.6624797559021936e-05, "loss": 0.288421630859375, "step": 3623 }, { "epoch": 0.2449641746654049, "grad_norm": 1.0282901525497437, "learning_rate": 2.6622712048039353e-05, "loss": 0.201873779296875, "step": 3624 }, { "epoch": 0.24503176963633905, "grad_norm": 0.863673746585846, "learning_rate": 2.6620625974677687e-05, "loss": 0.2276763916015625, "step": 3625 }, { "epoch": 0.24509936460727322, "grad_norm": 1.192478895187378, "learning_rate": 2.6618539339037862e-05, "loss": 0.189605712890625, "step": 3626 }, { "epoch": 0.2451669595782074, "grad_norm": 1.3641507625579834, "learning_rate": 2.661645214122085e-05, "loss": 0.2762451171875, "step": 3627 }, { "epoch": 0.24523455454914153, "grad_norm": 1.6423052549362183, "learning_rate": 2.661436438132764e-05, "loss": 0.2333984375, "step": 3628 }, { "epoch": 0.2453021495200757, "grad_norm": 1.535815954208374, "learning_rate": 2.6612276059459253e-05, "loss": 0.2547607421875, "step": 3629 }, { "epoch": 0.24536974449100987, "grad_norm": 1.8398953676223755, "learning_rate": 2.661018717571674e-05, "loss": 0.284271240234375, "step": 3630 }, { "epoch": 0.24543733946194404, "grad_norm": 3.22308611869812, "learning_rate": 2.6608097730201163e-05, "loss": 0.38653564453125, "step": 3631 }, { "epoch": 0.24550493443287819, "grad_norm": 1.1851935386657715, "learning_rate": 2.660600772301363e-05, "loss": 0.28485107421875, "step": 3632 }, { "epoch": 0.24557252940381236, "grad_norm": 1.6584151983261108, "learning_rate": 2.6603917154255265e-05, "loss": 0.2612762451171875, "step": 3633 }, { "epoch": 0.24564012437474653, "grad_norm": 1.1051645278930664, "learning_rate": 2.660182602402722e-05, "loss": 0.22705078125, "step": 3634 }, { "epoch": 0.24570771934568067, "grad_norm": 1.712791919708252, "learning_rate": 2.6599734332430686e-05, "loss": 0.35675048828125, "step": 3635 }, { "epoch": 0.24577531431661484, "grad_norm": 1.5101362466812134, "learning_rate": 2.659764207956687e-05, "loss": 0.222930908203125, "step": 3636 }, { "epoch": 0.245842909287549, "grad_norm": 1.3198274374008179, "learning_rate": 2.6595549265537003e-05, "loss": 0.290283203125, "step": 3637 }, { "epoch": 0.24591050425848318, "grad_norm": 2.1384224891662598, "learning_rate": 2.6593455890442348e-05, "loss": 0.33221435546875, "step": 3638 }, { "epoch": 0.24597809922941732, "grad_norm": 1.5498154163360596, "learning_rate": 2.6591361954384196e-05, "loss": 0.33892822265625, "step": 3639 }, { "epoch": 0.2460456942003515, "grad_norm": 1.901997447013855, "learning_rate": 2.6589267457463873e-05, "loss": 0.32171630859375, "step": 3640 }, { "epoch": 0.24611328917128567, "grad_norm": 1.8017548322677612, "learning_rate": 2.6587172399782714e-05, "loss": 0.31597900390625, "step": 3641 }, { "epoch": 0.2461808841422198, "grad_norm": 1.5250986814498901, "learning_rate": 2.6585076781442095e-05, "loss": 0.23663330078125, "step": 3642 }, { "epoch": 0.24624847911315398, "grad_norm": 1.3579127788543701, "learning_rate": 2.6582980602543414e-05, "loss": 0.24847412109375, "step": 3643 }, { "epoch": 0.24631607408408815, "grad_norm": 0.8184322714805603, "learning_rate": 2.6580883863188097e-05, "loss": 0.1631317138671875, "step": 3644 }, { "epoch": 0.24638366905502232, "grad_norm": 1.3439871072769165, "learning_rate": 2.65787865634776e-05, "loss": 0.147552490234375, "step": 3645 }, { "epoch": 0.24645126402595646, "grad_norm": 2.3727664947509766, "learning_rate": 2.65766887035134e-05, "loss": 0.276763916015625, "step": 3646 }, { "epoch": 0.24651885899689063, "grad_norm": 1.4676709175109863, "learning_rate": 2.6574590283397008e-05, "loss": 0.2026824951171875, "step": 3647 }, { "epoch": 0.2465864539678248, "grad_norm": 1.9045705795288086, "learning_rate": 2.657249130322995e-05, "loss": 0.2896728515625, "step": 3648 }, { "epoch": 0.24665404893875895, "grad_norm": 1.1860706806182861, "learning_rate": 2.6570391763113803e-05, "loss": 0.254119873046875, "step": 3649 }, { "epoch": 0.24672164390969312, "grad_norm": 1.3946397304534912, "learning_rate": 2.6568291663150144e-05, "loss": 0.252288818359375, "step": 3650 }, { "epoch": 0.2467892388806273, "grad_norm": 1.543946623802185, "learning_rate": 2.656619100344059e-05, "loss": 0.1957244873046875, "step": 3651 }, { "epoch": 0.24685683385156143, "grad_norm": 1.0385220050811768, "learning_rate": 2.6564089784086783e-05, "loss": 0.171875, "step": 3652 }, { "epoch": 0.2469244288224956, "grad_norm": 1.1278510093688965, "learning_rate": 2.6561988005190402e-05, "loss": 0.251129150390625, "step": 3653 }, { "epoch": 0.24699202379342977, "grad_norm": 1.9323567152023315, "learning_rate": 2.6559885666853137e-05, "loss": 0.300323486328125, "step": 3654 }, { "epoch": 0.24705961876436394, "grad_norm": 1.6444931030273438, "learning_rate": 2.655778276917671e-05, "loss": 0.31610107421875, "step": 3655 }, { "epoch": 0.24712721373529808, "grad_norm": 1.0873491764068604, "learning_rate": 2.655567931226288e-05, "loss": 0.251312255859375, "step": 3656 }, { "epoch": 0.24719480870623226, "grad_norm": 1.7328773736953735, "learning_rate": 2.655357529621342e-05, "loss": 0.3282470703125, "step": 3657 }, { "epoch": 0.24726240367716643, "grad_norm": 1.1243120431900024, "learning_rate": 2.6551470721130132e-05, "loss": 0.2378692626953125, "step": 3658 }, { "epoch": 0.24732999864810057, "grad_norm": 0.9518611431121826, "learning_rate": 2.6549365587114854e-05, "loss": 0.180877685546875, "step": 3659 }, { "epoch": 0.24739759361903474, "grad_norm": 1.4196633100509644, "learning_rate": 2.6547259894269447e-05, "loss": 0.2940673828125, "step": 3660 }, { "epoch": 0.2474651885899689, "grad_norm": 2.121685028076172, "learning_rate": 2.6545153642695796e-05, "loss": 0.238037109375, "step": 3661 }, { "epoch": 0.24753278356090308, "grad_norm": 1.7288874387741089, "learning_rate": 2.654304683249581e-05, "loss": 0.3021240234375, "step": 3662 }, { "epoch": 0.24760037853183722, "grad_norm": 1.4502878189086914, "learning_rate": 2.6540939463771432e-05, "loss": 0.266082763671875, "step": 3663 }, { "epoch": 0.2476679735027714, "grad_norm": 1.1472750902175903, "learning_rate": 2.6538831536624634e-05, "loss": 0.182525634765625, "step": 3664 }, { "epoch": 0.24773556847370556, "grad_norm": 1.7977627515792847, "learning_rate": 2.6536723051157404e-05, "loss": 0.3765869140625, "step": 3665 }, { "epoch": 0.2478031634446397, "grad_norm": 1.4345637559890747, "learning_rate": 2.6534614007471766e-05, "loss": 0.25177001953125, "step": 3666 }, { "epoch": 0.24787075841557388, "grad_norm": 3.294494152069092, "learning_rate": 2.6532504405669772e-05, "loss": 0.34954833984375, "step": 3667 }, { "epoch": 0.24793835338650805, "grad_norm": 1.0431928634643555, "learning_rate": 2.6530394245853494e-05, "loss": 0.248077392578125, "step": 3668 }, { "epoch": 0.24800594835744222, "grad_norm": 1.497066617012024, "learning_rate": 2.6528283528125034e-05, "loss": 0.2734375, "step": 3669 }, { "epoch": 0.24807354332837636, "grad_norm": 1.4143129587173462, "learning_rate": 2.6526172252586526e-05, "loss": 0.316375732421875, "step": 3670 }, { "epoch": 0.24814113829931053, "grad_norm": 1.2183573246002197, "learning_rate": 2.6524060419340123e-05, "loss": 0.208648681640625, "step": 3671 }, { "epoch": 0.2482087332702447, "grad_norm": 2.5234243869781494, "learning_rate": 2.6521948028488007e-05, "loss": 0.32379150390625, "step": 3672 }, { "epoch": 0.24827632824117885, "grad_norm": 1.3337970972061157, "learning_rate": 2.6519835080132395e-05, "loss": 0.25299072265625, "step": 3673 }, { "epoch": 0.24834392321211302, "grad_norm": 1.0422950983047485, "learning_rate": 2.6517721574375518e-05, "loss": 0.309326171875, "step": 3674 }, { "epoch": 0.2484115181830472, "grad_norm": 1.5639485120773315, "learning_rate": 2.651560751131964e-05, "loss": 0.30682373046875, "step": 3675 }, { "epoch": 0.24847911315398136, "grad_norm": 1.2365330457687378, "learning_rate": 2.6513492891067067e-05, "loss": 0.273345947265625, "step": 3676 }, { "epoch": 0.2485467081249155, "grad_norm": 1.2440310716629028, "learning_rate": 2.6511377713720097e-05, "loss": 0.263092041015625, "step": 3677 }, { "epoch": 0.24861430309584967, "grad_norm": 1.5261410474777222, "learning_rate": 2.6509261979381087e-05, "loss": 0.247161865234375, "step": 3678 }, { "epoch": 0.24868189806678384, "grad_norm": 1.7507144212722778, "learning_rate": 2.6507145688152408e-05, "loss": 0.284698486328125, "step": 3679 }, { "epoch": 0.24874949303771798, "grad_norm": 0.7857779860496521, "learning_rate": 2.6505028840136457e-05, "loss": 0.155059814453125, "step": 3680 }, { "epoch": 0.24881708800865215, "grad_norm": 1.0909959077835083, "learning_rate": 2.6502911435435664e-05, "loss": 0.24114990234375, "step": 3681 }, { "epoch": 0.24888468297958632, "grad_norm": 1.0517269372940063, "learning_rate": 2.6500793474152476e-05, "loss": 0.14892578125, "step": 3682 }, { "epoch": 0.24895227795052047, "grad_norm": 1.2826855182647705, "learning_rate": 2.649867495638938e-05, "loss": 0.25482177734375, "step": 3683 }, { "epoch": 0.24901987292145464, "grad_norm": 1.1505714654922485, "learning_rate": 2.6496555882248877e-05, "loss": 0.231109619140625, "step": 3684 }, { "epoch": 0.2490874678923888, "grad_norm": 1.321361780166626, "learning_rate": 2.6494436251833507e-05, "loss": 0.2679290771484375, "step": 3685 }, { "epoch": 0.24915506286332298, "grad_norm": 1.2855098247528076, "learning_rate": 2.649231606524583e-05, "loss": 0.234039306640625, "step": 3686 }, { "epoch": 0.24922265783425712, "grad_norm": 1.1625418663024902, "learning_rate": 2.649019532258843e-05, "loss": 0.241363525390625, "step": 3687 }, { "epoch": 0.2492902528051913, "grad_norm": 2.6769776344299316, "learning_rate": 2.648807402396392e-05, "loss": 0.29364013671875, "step": 3688 }, { "epoch": 0.24935784777612546, "grad_norm": 1.8689563274383545, "learning_rate": 2.6485952169474947e-05, "loss": 0.354736328125, "step": 3689 }, { "epoch": 0.2494254427470596, "grad_norm": 2.1815907955169678, "learning_rate": 2.648382975922418e-05, "loss": 0.253448486328125, "step": 3690 }, { "epoch": 0.24949303771799378, "grad_norm": 1.0246951580047607, "learning_rate": 2.648170679331431e-05, "loss": 0.22503662109375, "step": 3691 }, { "epoch": 0.24956063268892795, "grad_norm": 1.7407556772232056, "learning_rate": 2.6479583271848065e-05, "loss": 0.27850341796875, "step": 3692 }, { "epoch": 0.24962822765986212, "grad_norm": 1.2616276741027832, "learning_rate": 2.6477459194928187e-05, "loss": 0.2530975341796875, "step": 3693 }, { "epoch": 0.24969582263079626, "grad_norm": 1.1458213329315186, "learning_rate": 2.6475334562657458e-05, "loss": 0.27789306640625, "step": 3694 }, { "epoch": 0.24976341760173043, "grad_norm": 1.08085036277771, "learning_rate": 2.6473209375138675e-05, "loss": 0.28173828125, "step": 3695 }, { "epoch": 0.2498310125726646, "grad_norm": 1.3124606609344482, "learning_rate": 2.6471083632474675e-05, "loss": 0.297454833984375, "step": 3696 }, { "epoch": 0.24989860754359874, "grad_norm": 0.9285202026367188, "learning_rate": 2.6468957334768308e-05, "loss": 0.29010009765625, "step": 3697 }, { "epoch": 0.24996620251453291, "grad_norm": 0.649121105670929, "learning_rate": 2.646683048212246e-05, "loss": 0.138031005859375, "step": 3698 }, { "epoch": 0.25003379748546706, "grad_norm": 1.1684715747833252, "learning_rate": 2.6464703074640044e-05, "loss": 0.1504364013671875, "step": 3699 }, { "epoch": 0.25010139245640123, "grad_norm": 1.3620821237564087, "learning_rate": 2.6462575112423994e-05, "loss": 0.237457275390625, "step": 3700 }, { "epoch": 0.2501689874273354, "grad_norm": 2.341355800628662, "learning_rate": 2.6460446595577276e-05, "loss": 0.348663330078125, "step": 3701 }, { "epoch": 0.25023658239826957, "grad_norm": 0.6467495560646057, "learning_rate": 2.645831752420288e-05, "loss": 0.13330459594726562, "step": 3702 }, { "epoch": 0.25030417736920374, "grad_norm": 1.3055297136306763, "learning_rate": 2.6456187898403825e-05, "loss": 0.334564208984375, "step": 3703 }, { "epoch": 0.2503717723401379, "grad_norm": 2.012237071990967, "learning_rate": 2.645405771828315e-05, "loss": 0.285308837890625, "step": 3704 }, { "epoch": 0.2504393673110721, "grad_norm": 1.1758182048797607, "learning_rate": 2.6451926983943935e-05, "loss": 0.229034423828125, "step": 3705 }, { "epoch": 0.2505069622820062, "grad_norm": 0.9397285580635071, "learning_rate": 2.6449795695489268e-05, "loss": 0.30010986328125, "step": 3706 }, { "epoch": 0.25057455725294037, "grad_norm": 1.0176273584365845, "learning_rate": 2.6447663853022286e-05, "loss": 0.1544189453125, "step": 3707 }, { "epoch": 0.25064215222387454, "grad_norm": 2.091160297393799, "learning_rate": 2.6445531456646132e-05, "loss": 0.2874755859375, "step": 3708 }, { "epoch": 0.2507097471948087, "grad_norm": 0.8586620688438416, "learning_rate": 2.6443398506463987e-05, "loss": 0.282470703125, "step": 3709 }, { "epoch": 0.2507773421657429, "grad_norm": 1.3878626823425293, "learning_rate": 2.6441265002579057e-05, "loss": 0.2718505859375, "step": 3710 }, { "epoch": 0.25084493713667705, "grad_norm": 0.7655432224273682, "learning_rate": 2.6439130945094573e-05, "loss": 0.128326416015625, "step": 3711 }, { "epoch": 0.2509125321076112, "grad_norm": 1.225669264793396, "learning_rate": 2.6436996334113793e-05, "loss": 0.310211181640625, "step": 3712 }, { "epoch": 0.25098012707854533, "grad_norm": 1.2354438304901123, "learning_rate": 2.643486116974001e-05, "loss": 0.28753662109375, "step": 3713 }, { "epoch": 0.2510477220494795, "grad_norm": 1.48726224899292, "learning_rate": 2.6432725452076524e-05, "loss": 0.35675048828125, "step": 3714 }, { "epoch": 0.2511153170204137, "grad_norm": 1.2675819396972656, "learning_rate": 2.6430589181226687e-05, "loss": 0.256195068359375, "step": 3715 }, { "epoch": 0.25118291199134785, "grad_norm": 2.135026216506958, "learning_rate": 2.6428452357293857e-05, "loss": 0.3433837890625, "step": 3716 }, { "epoch": 0.251250506962282, "grad_norm": 1.0152952671051025, "learning_rate": 2.642631498038143e-05, "loss": 0.281768798828125, "step": 3717 }, { "epoch": 0.2513181019332162, "grad_norm": 1.0789284706115723, "learning_rate": 2.642417705059282e-05, "loss": 0.2574462890625, "step": 3718 }, { "epoch": 0.25138569690415036, "grad_norm": 0.6717824935913086, "learning_rate": 2.6422038568031485e-05, "loss": 0.09136962890625, "step": 3719 }, { "epoch": 0.2514532918750845, "grad_norm": 1.3439494371414185, "learning_rate": 2.641989953280089e-05, "loss": 0.2873382568359375, "step": 3720 }, { "epoch": 0.25152088684601864, "grad_norm": 0.9515331983566284, "learning_rate": 2.6417759945004533e-05, "loss": 0.227569580078125, "step": 3721 }, { "epoch": 0.2515884818169528, "grad_norm": 1.1523361206054688, "learning_rate": 2.6415619804745942e-05, "loss": 0.26849365234375, "step": 3722 }, { "epoch": 0.251656076787887, "grad_norm": 1.7930768728256226, "learning_rate": 2.6413479112128676e-05, "loss": 0.280853271484375, "step": 3723 }, { "epoch": 0.25172367175882115, "grad_norm": 1.0171555280685425, "learning_rate": 2.641133786725631e-05, "loss": 0.214080810546875, "step": 3724 }, { "epoch": 0.2517912667297553, "grad_norm": 2.705881357192993, "learning_rate": 2.6409196070232452e-05, "loss": 0.32037353515625, "step": 3725 }, { "epoch": 0.2518588617006895, "grad_norm": 1.4257162809371948, "learning_rate": 2.6407053721160736e-05, "loss": 0.31866455078125, "step": 3726 }, { "epoch": 0.2519264566716236, "grad_norm": 1.4759814739227295, "learning_rate": 2.6404910820144817e-05, "loss": 0.31195068359375, "step": 3727 }, { "epoch": 0.2519940516425578, "grad_norm": 4.0804829597473145, "learning_rate": 2.640276736728839e-05, "loss": 0.34521484375, "step": 3728 }, { "epoch": 0.25206164661349195, "grad_norm": 1.8194128274917603, "learning_rate": 2.640062336269516e-05, "loss": 0.2662353515625, "step": 3729 }, { "epoch": 0.2521292415844261, "grad_norm": 1.5951595306396484, "learning_rate": 2.6398478806468876e-05, "loss": 0.28070068359375, "step": 3730 }, { "epoch": 0.2521968365553603, "grad_norm": 1.217184066772461, "learning_rate": 2.6396333698713304e-05, "loss": 0.24835205078125, "step": 3731 }, { "epoch": 0.25226443152629446, "grad_norm": 0.9428315162658691, "learning_rate": 2.639418803953223e-05, "loss": 0.16675567626953125, "step": 3732 }, { "epoch": 0.2523320264972286, "grad_norm": 1.0266873836517334, "learning_rate": 2.639204182902948e-05, "loss": 0.17596435546875, "step": 3733 }, { "epoch": 0.25239962146816275, "grad_norm": 1.6420716047286987, "learning_rate": 2.63898950673089e-05, "loss": 0.2978515625, "step": 3734 }, { "epoch": 0.2524672164390969, "grad_norm": 1.2622990608215332, "learning_rate": 2.638774775447436e-05, "loss": 0.159393310546875, "step": 3735 }, { "epoch": 0.2525348114100311, "grad_norm": 0.6570505499839783, "learning_rate": 2.638559989062977e-05, "loss": 0.1238555908203125, "step": 3736 }, { "epoch": 0.25260240638096526, "grad_norm": 2.2207889556884766, "learning_rate": 2.638345147587905e-05, "loss": 0.26739501953125, "step": 3737 }, { "epoch": 0.25267000135189943, "grad_norm": 1.037335753440857, "learning_rate": 2.6381302510326153e-05, "loss": 0.213958740234375, "step": 3738 }, { "epoch": 0.2527375963228336, "grad_norm": 1.8003604412078857, "learning_rate": 2.6379152994075063e-05, "loss": 0.305389404296875, "step": 3739 }, { "epoch": 0.2528051912937677, "grad_norm": 1.018289566040039, "learning_rate": 2.637700292722978e-05, "loss": 0.1846923828125, "step": 3740 }, { "epoch": 0.2528727862647019, "grad_norm": 3.4371864795684814, "learning_rate": 2.637485230989435e-05, "loss": 0.3120574951171875, "step": 3741 }, { "epoch": 0.25294038123563606, "grad_norm": 1.5313737392425537, "learning_rate": 2.637270114217282e-05, "loss": 0.28167724609375, "step": 3742 }, { "epoch": 0.25300797620657023, "grad_norm": 1.1205180883407593, "learning_rate": 2.637054942416928e-05, "loss": 0.210601806640625, "step": 3743 }, { "epoch": 0.2530755711775044, "grad_norm": 1.265389084815979, "learning_rate": 2.636839715598785e-05, "loss": 0.265411376953125, "step": 3744 }, { "epoch": 0.25314316614843857, "grad_norm": 1.338660717010498, "learning_rate": 2.636624433773267e-05, "loss": 0.23876953125, "step": 3745 }, { "epoch": 0.25321076111937274, "grad_norm": 0.7510449886322021, "learning_rate": 2.6364090969507897e-05, "loss": 0.16717910766601562, "step": 3746 }, { "epoch": 0.25327835609030686, "grad_norm": 1.369105339050293, "learning_rate": 2.6361937051417736e-05, "loss": 0.234893798828125, "step": 3747 }, { "epoch": 0.253345951061241, "grad_norm": 0.879113495349884, "learning_rate": 2.6359782583566397e-05, "loss": 0.12115478515625, "step": 3748 }, { "epoch": 0.2534135460321752, "grad_norm": 2.8891243934631348, "learning_rate": 2.6357627566058133e-05, "loss": 0.305999755859375, "step": 3749 }, { "epoch": 0.25348114100310937, "grad_norm": 0.590092122554779, "learning_rate": 2.6355471998997217e-05, "loss": 0.1008148193359375, "step": 3750 }, { "epoch": 0.25354873597404354, "grad_norm": 1.4068353176116943, "learning_rate": 2.6353315882487942e-05, "loss": 0.22357177734375, "step": 3751 }, { "epoch": 0.2536163309449777, "grad_norm": 1.2373863458633423, "learning_rate": 2.635115921663464e-05, "loss": 0.297119140625, "step": 3752 }, { "epoch": 0.2536839259159119, "grad_norm": 1.447210431098938, "learning_rate": 2.634900200154166e-05, "loss": 0.34576416015625, "step": 3753 }, { "epoch": 0.253751520886846, "grad_norm": 1.865214228630066, "learning_rate": 2.6346844237313394e-05, "loss": 0.22802734375, "step": 3754 }, { "epoch": 0.25381911585778016, "grad_norm": 3.1017203330993652, "learning_rate": 2.6344685924054234e-05, "loss": 0.33477783203125, "step": 3755 }, { "epoch": 0.25388671082871433, "grad_norm": 1.2346806526184082, "learning_rate": 2.6342527061868612e-05, "loss": 0.26507568359375, "step": 3756 }, { "epoch": 0.2539543057996485, "grad_norm": 0.786862313747406, "learning_rate": 2.6340367650861e-05, "loss": 0.1277008056640625, "step": 3757 }, { "epoch": 0.2540219007705827, "grad_norm": 1.4790104627609253, "learning_rate": 2.633820769113587e-05, "loss": 0.200927734375, "step": 3758 }, { "epoch": 0.25408949574151685, "grad_norm": 1.168272614479065, "learning_rate": 2.6336047182797742e-05, "loss": 0.19769287109375, "step": 3759 }, { "epoch": 0.254157090712451, "grad_norm": 1.4689804315567017, "learning_rate": 2.6333886125951154e-05, "loss": 0.2772064208984375, "step": 3760 }, { "epoch": 0.25422468568338513, "grad_norm": 0.6725610494613647, "learning_rate": 2.6331724520700673e-05, "loss": 0.1319122314453125, "step": 3761 }, { "epoch": 0.2542922806543193, "grad_norm": 0.7369093298912048, "learning_rate": 2.6329562367150885e-05, "loss": 0.0887298583984375, "step": 3762 }, { "epoch": 0.2543598756252535, "grad_norm": 1.6599091291427612, "learning_rate": 2.6327399665406415e-05, "loss": 0.342529296875, "step": 3763 }, { "epoch": 0.25442747059618764, "grad_norm": 1.7344355583190918, "learning_rate": 2.6325236415571906e-05, "loss": 0.26025390625, "step": 3764 }, { "epoch": 0.2544950655671218, "grad_norm": 0.5235726833343506, "learning_rate": 2.632307261775202e-05, "loss": 0.10114860534667969, "step": 3765 }, { "epoch": 0.254562660538056, "grad_norm": 1.0236525535583496, "learning_rate": 2.632090827205147e-05, "loss": 0.2576904296875, "step": 3766 }, { "epoch": 0.25463025550899016, "grad_norm": 0.9666770100593567, "learning_rate": 2.6318743378574972e-05, "loss": 0.1839447021484375, "step": 3767 }, { "epoch": 0.25469785047992427, "grad_norm": 1.5264770984649658, "learning_rate": 2.6316577937427282e-05, "loss": 0.209625244140625, "step": 3768 }, { "epoch": 0.25476544545085844, "grad_norm": 1.3095886707305908, "learning_rate": 2.6314411948713168e-05, "loss": 0.27823638916015625, "step": 3769 }, { "epoch": 0.2548330404217926, "grad_norm": 1.740908145904541, "learning_rate": 2.6312245412537444e-05, "loss": 0.2442626953125, "step": 3770 }, { "epoch": 0.2549006353927268, "grad_norm": 1.1563048362731934, "learning_rate": 2.631007832900494e-05, "loss": 0.28631591796875, "step": 3771 }, { "epoch": 0.25496823036366095, "grad_norm": 0.8772183060646057, "learning_rate": 2.6307910698220507e-05, "loss": 0.1717529296875, "step": 3772 }, { "epoch": 0.2550358253345951, "grad_norm": 0.6868202686309814, "learning_rate": 2.630574252028903e-05, "loss": 0.060466766357421875, "step": 3773 }, { "epoch": 0.2551034203055293, "grad_norm": 1.6715302467346191, "learning_rate": 2.630357379531542e-05, "loss": 0.28350830078125, "step": 3774 }, { "epoch": 0.2551710152764634, "grad_norm": 1.2649953365325928, "learning_rate": 2.6301404523404616e-05, "loss": 0.2198333740234375, "step": 3775 }, { "epoch": 0.2552386102473976, "grad_norm": 1.3236355781555176, "learning_rate": 2.6299234704661573e-05, "loss": 0.16705322265625, "step": 3776 }, { "epoch": 0.25530620521833175, "grad_norm": 1.6112351417541504, "learning_rate": 2.6297064339191292e-05, "loss": 0.1831512451171875, "step": 3777 }, { "epoch": 0.2553738001892659, "grad_norm": 1.6208397150039673, "learning_rate": 2.629489342709878e-05, "loss": 0.29400634765625, "step": 3778 }, { "epoch": 0.2554413951602001, "grad_norm": 2.5234079360961914, "learning_rate": 2.6292721968489084e-05, "loss": 0.317535400390625, "step": 3779 }, { "epoch": 0.25550899013113426, "grad_norm": 1.5211089849472046, "learning_rate": 2.629054996346727e-05, "loss": 0.23004150390625, "step": 3780 }, { "epoch": 0.25557658510206843, "grad_norm": 1.9459612369537354, "learning_rate": 2.628837741213843e-05, "loss": 0.341217041015625, "step": 3781 }, { "epoch": 0.25564418007300255, "grad_norm": 2.765882968902588, "learning_rate": 2.628620431460769e-05, "loss": 0.298553466796875, "step": 3782 }, { "epoch": 0.2557117750439367, "grad_norm": 1.8963091373443604, "learning_rate": 2.6284030670980198e-05, "loss": 0.28973388671875, "step": 3783 }, { "epoch": 0.2557793700148709, "grad_norm": 1.504758358001709, "learning_rate": 2.628185648136113e-05, "loss": 0.2690887451171875, "step": 3784 }, { "epoch": 0.25584696498580506, "grad_norm": 0.7512816190719604, "learning_rate": 2.6279681745855685e-05, "loss": 0.174896240234375, "step": 3785 }, { "epoch": 0.25591455995673923, "grad_norm": 1.5992511510849, "learning_rate": 2.6277506464569088e-05, "loss": 0.1924591064453125, "step": 3786 }, { "epoch": 0.2559821549276734, "grad_norm": 1.6527429819107056, "learning_rate": 2.6275330637606593e-05, "loss": 0.293853759765625, "step": 3787 }, { "epoch": 0.25604974989860757, "grad_norm": 1.4572592973709106, "learning_rate": 2.627315426507348e-05, "loss": 0.3660888671875, "step": 3788 }, { "epoch": 0.2561173448695417, "grad_norm": 1.5848277807235718, "learning_rate": 2.627097734707506e-05, "loss": 0.243377685546875, "step": 3789 }, { "epoch": 0.25618493984047586, "grad_norm": 1.766072154045105, "learning_rate": 2.6268799883716662e-05, "loss": 0.3062744140625, "step": 3790 }, { "epoch": 0.25625253481141, "grad_norm": 1.5070254802703857, "learning_rate": 2.626662187510364e-05, "loss": 0.29608154296875, "step": 3791 }, { "epoch": 0.2563201297823442, "grad_norm": 1.2400368452072144, "learning_rate": 2.626444332134139e-05, "loss": 0.252655029296875, "step": 3792 }, { "epoch": 0.25638772475327837, "grad_norm": 2.0268590450286865, "learning_rate": 2.626226422253532e-05, "loss": 0.27532958984375, "step": 3793 }, { "epoch": 0.25645531972421254, "grad_norm": 1.7672597169876099, "learning_rate": 2.6260084578790863e-05, "loss": 0.3421630859375, "step": 3794 }, { "epoch": 0.2565229146951467, "grad_norm": 1.6681627035140991, "learning_rate": 2.6257904390213495e-05, "loss": 0.293701171875, "step": 3795 }, { "epoch": 0.2565905096660808, "grad_norm": 1.2847875356674194, "learning_rate": 2.6255723656908697e-05, "loss": 0.31463623046875, "step": 3796 }, { "epoch": 0.256658104637015, "grad_norm": 1.5369558334350586, "learning_rate": 2.6253542378981992e-05, "loss": 0.26983642578125, "step": 3797 }, { "epoch": 0.25672569960794916, "grad_norm": 1.1890661716461182, "learning_rate": 2.625136055653892e-05, "loss": 0.274200439453125, "step": 3798 }, { "epoch": 0.25679329457888334, "grad_norm": 1.1917954683303833, "learning_rate": 2.6249178189685052e-05, "loss": 0.372406005859375, "step": 3799 }, { "epoch": 0.2568608895498175, "grad_norm": 1.3804454803466797, "learning_rate": 2.6246995278525983e-05, "loss": 0.277496337890625, "step": 3800 }, { "epoch": 0.2569284845207517, "grad_norm": 1.516823649406433, "learning_rate": 2.6244811823167347e-05, "loss": 0.329193115234375, "step": 3801 }, { "epoch": 0.2569960794916858, "grad_norm": 1.7480475902557373, "learning_rate": 2.6242627823714773e-05, "loss": 0.265350341796875, "step": 3802 }, { "epoch": 0.25706367446261996, "grad_norm": 1.6207993030548096, "learning_rate": 2.6240443280273957e-05, "loss": 0.2655792236328125, "step": 3803 }, { "epoch": 0.25713126943355413, "grad_norm": 1.75936758518219, "learning_rate": 2.623825819295059e-05, "loss": 0.25762939453125, "step": 3804 }, { "epoch": 0.2571988644044883, "grad_norm": 0.45349252223968506, "learning_rate": 2.6236072561850396e-05, "loss": 0.123504638671875, "step": 3805 }, { "epoch": 0.2572664593754225, "grad_norm": 1.6165052652359009, "learning_rate": 2.6233886387079137e-05, "loss": 0.271484375, "step": 3806 }, { "epoch": 0.25733405434635664, "grad_norm": 0.9922869205474854, "learning_rate": 2.6231699668742594e-05, "loss": 0.21014404296875, "step": 3807 }, { "epoch": 0.2574016493172908, "grad_norm": 1.9269592761993408, "learning_rate": 2.622951240694657e-05, "loss": 0.3258056640625, "step": 3808 }, { "epoch": 0.25746924428822493, "grad_norm": 1.8939628601074219, "learning_rate": 2.62273246017969e-05, "loss": 0.309295654296875, "step": 3809 }, { "epoch": 0.2575368392591591, "grad_norm": 1.4533318281173706, "learning_rate": 2.6225136253399446e-05, "loss": 0.3359375, "step": 3810 }, { "epoch": 0.25760443423009327, "grad_norm": 1.422771692276001, "learning_rate": 2.622294736186009e-05, "loss": 0.316558837890625, "step": 3811 }, { "epoch": 0.25767202920102744, "grad_norm": 1.4851484298706055, "learning_rate": 2.6220757927284752e-05, "loss": 0.1217803955078125, "step": 3812 }, { "epoch": 0.2577396241719616, "grad_norm": 1.4038994312286377, "learning_rate": 2.6218567949779358e-05, "loss": 0.3035888671875, "step": 3813 }, { "epoch": 0.2578072191428958, "grad_norm": 0.9786273241043091, "learning_rate": 2.6216377429449877e-05, "loss": 0.212127685546875, "step": 3814 }, { "epoch": 0.25787481411382995, "grad_norm": 0.6903173923492432, "learning_rate": 2.6214186366402305e-05, "loss": 0.1241607666015625, "step": 3815 }, { "epoch": 0.25794240908476407, "grad_norm": 1.0231505632400513, "learning_rate": 2.6211994760742658e-05, "loss": 0.211822509765625, "step": 3816 }, { "epoch": 0.25801000405569824, "grad_norm": 0.9608123302459717, "learning_rate": 2.620980261257697e-05, "loss": 0.19044113159179688, "step": 3817 }, { "epoch": 0.2580775990266324, "grad_norm": 1.3489516973495483, "learning_rate": 2.620760992201133e-05, "loss": 0.18994140625, "step": 3818 }, { "epoch": 0.2581451939975666, "grad_norm": 0.9056466817855835, "learning_rate": 2.6205416689151815e-05, "loss": 0.205841064453125, "step": 3819 }, { "epoch": 0.25821278896850075, "grad_norm": 1.510553240776062, "learning_rate": 2.620322291410456e-05, "loss": 0.295440673828125, "step": 3820 }, { "epoch": 0.2582803839394349, "grad_norm": 1.399425745010376, "learning_rate": 2.6201028596975704e-05, "loss": 0.1753387451171875, "step": 3821 }, { "epoch": 0.2583479789103691, "grad_norm": 1.3258421421051025, "learning_rate": 2.6198833737871428e-05, "loss": 0.336639404296875, "step": 3822 }, { "epoch": 0.2584155738813032, "grad_norm": 0.8605675101280212, "learning_rate": 2.619663833689793e-05, "loss": 0.15683746337890625, "step": 3823 }, { "epoch": 0.2584831688522374, "grad_norm": 1.1515952348709106, "learning_rate": 2.6194442394161438e-05, "loss": 0.231903076171875, "step": 3824 }, { "epoch": 0.25855076382317155, "grad_norm": 0.8783408999443054, "learning_rate": 2.6192245909768208e-05, "loss": 0.188629150390625, "step": 3825 }, { "epoch": 0.2586183587941057, "grad_norm": 1.280924916267395, "learning_rate": 2.6190048883824514e-05, "loss": 0.21712493896484375, "step": 3826 }, { "epoch": 0.2586859537650399, "grad_norm": 1.4349812269210815, "learning_rate": 2.6187851316436665e-05, "loss": 0.225433349609375, "step": 3827 }, { "epoch": 0.25875354873597406, "grad_norm": 0.9974781274795532, "learning_rate": 2.6185653207710995e-05, "loss": 0.1981658935546875, "step": 3828 }, { "epoch": 0.25882114370690823, "grad_norm": 1.3600307703018188, "learning_rate": 2.6183454557753857e-05, "loss": 0.2333221435546875, "step": 3829 }, { "epoch": 0.25888873867784234, "grad_norm": 2.0679051876068115, "learning_rate": 2.6181255366671645e-05, "loss": 0.30645751953125, "step": 3830 }, { "epoch": 0.2589563336487765, "grad_norm": 1.5733096599578857, "learning_rate": 2.6179055634570757e-05, "loss": 0.28594970703125, "step": 3831 }, { "epoch": 0.2590239286197107, "grad_norm": 1.2697805166244507, "learning_rate": 2.6176855361557633e-05, "loss": 0.155853271484375, "step": 3832 }, { "epoch": 0.25909152359064486, "grad_norm": 1.0148369073867798, "learning_rate": 2.6174654547738744e-05, "loss": 0.21966552734375, "step": 3833 }, { "epoch": 0.259159118561579, "grad_norm": 2.700380563735962, "learning_rate": 2.617245319322057e-05, "loss": 0.301177978515625, "step": 3834 }, { "epoch": 0.2592267135325132, "grad_norm": 1.5081369876861572, "learning_rate": 2.6170251298109632e-05, "loss": 0.2960205078125, "step": 3835 }, { "epoch": 0.25929430850344737, "grad_norm": 1.9308432340621948, "learning_rate": 2.616804886251247e-05, "loss": 0.30027008056640625, "step": 3836 }, { "epoch": 0.2593619034743815, "grad_norm": 1.1443161964416504, "learning_rate": 2.616584588653565e-05, "loss": 0.2405853271484375, "step": 3837 }, { "epoch": 0.25942949844531565, "grad_norm": 1.2971527576446533, "learning_rate": 2.6163642370285765e-05, "loss": 0.3087158203125, "step": 3838 }, { "epoch": 0.2594970934162498, "grad_norm": 0.6472451090812683, "learning_rate": 2.6161438313869438e-05, "loss": 0.14910888671875, "step": 3839 }, { "epoch": 0.259564688387184, "grad_norm": 0.6377115249633789, "learning_rate": 2.615923371739331e-05, "loss": 0.146759033203125, "step": 3840 }, { "epoch": 0.25963228335811817, "grad_norm": 0.9032424092292786, "learning_rate": 2.615702858096406e-05, "loss": 0.1374664306640625, "step": 3841 }, { "epoch": 0.25969987832905234, "grad_norm": 1.8233815431594849, "learning_rate": 2.615482290468838e-05, "loss": 0.2424163818359375, "step": 3842 }, { "epoch": 0.2597674732999865, "grad_norm": 1.184457540512085, "learning_rate": 2.6152616688672997e-05, "loss": 0.3323974609375, "step": 3843 }, { "epoch": 0.2598350682709206, "grad_norm": 0.9892765283584595, "learning_rate": 2.615040993302466e-05, "loss": 0.11420440673828125, "step": 3844 }, { "epoch": 0.2599026632418548, "grad_norm": 1.1824017763137817, "learning_rate": 2.6148202637850148e-05, "loss": 0.2322540283203125, "step": 3845 }, { "epoch": 0.25997025821278896, "grad_norm": 1.9560890197753906, "learning_rate": 2.6145994803256262e-05, "loss": 0.322021484375, "step": 3846 }, { "epoch": 0.26003785318372313, "grad_norm": 1.5039496421813965, "learning_rate": 2.6143786429349834e-05, "loss": 0.322998046875, "step": 3847 }, { "epoch": 0.2601054481546573, "grad_norm": 1.6518511772155762, "learning_rate": 2.6141577516237712e-05, "loss": 0.26165771484375, "step": 3848 }, { "epoch": 0.2601730431255915, "grad_norm": 1.050162434577942, "learning_rate": 2.613936806402678e-05, "loss": 0.291259765625, "step": 3849 }, { "epoch": 0.26024063809652564, "grad_norm": 1.4117459058761597, "learning_rate": 2.6137158072823955e-05, "loss": 0.260772705078125, "step": 3850 }, { "epoch": 0.26030823306745976, "grad_norm": 1.3769819736480713, "learning_rate": 2.6134947542736152e-05, "loss": 0.2620849609375, "step": 3851 }, { "epoch": 0.26037582803839393, "grad_norm": 1.4213961362838745, "learning_rate": 2.6132736473870346e-05, "loss": 0.27301025390625, "step": 3852 }, { "epoch": 0.2604434230093281, "grad_norm": 1.2889622449874878, "learning_rate": 2.6130524866333513e-05, "loss": 0.233245849609375, "step": 3853 }, { "epoch": 0.26051101798026227, "grad_norm": 1.9848253726959229, "learning_rate": 2.6128312720232665e-05, "loss": 0.1979522705078125, "step": 3854 }, { "epoch": 0.26057861295119644, "grad_norm": 1.6992170810699463, "learning_rate": 2.6126100035674846e-05, "loss": 0.2214202880859375, "step": 3855 }, { "epoch": 0.2606462079221306, "grad_norm": 1.3126332759857178, "learning_rate": 2.6123886812767108e-05, "loss": 0.31292724609375, "step": 3856 }, { "epoch": 0.2607138028930648, "grad_norm": 1.8399382829666138, "learning_rate": 2.6121673051616552e-05, "loss": 0.249114990234375, "step": 3857 }, { "epoch": 0.2607813978639989, "grad_norm": 2.3283214569091797, "learning_rate": 2.6119458752330284e-05, "loss": 0.34521484375, "step": 3858 }, { "epoch": 0.26084899283493307, "grad_norm": 1.1574654579162598, "learning_rate": 2.6117243915015458e-05, "loss": 0.223358154296875, "step": 3859 }, { "epoch": 0.26091658780586724, "grad_norm": 1.2859910726547241, "learning_rate": 2.611502853977923e-05, "loss": 0.20587158203125, "step": 3860 }, { "epoch": 0.2609841827768014, "grad_norm": 2.0623281002044678, "learning_rate": 2.6112812626728796e-05, "loss": 0.247161865234375, "step": 3861 }, { "epoch": 0.2610517777477356, "grad_norm": 1.682814359664917, "learning_rate": 2.611059617597138e-05, "loss": 0.21539306640625, "step": 3862 }, { "epoch": 0.26111937271866975, "grad_norm": 0.809187650680542, "learning_rate": 2.6108379187614225e-05, "loss": 0.2142333984375, "step": 3863 }, { "epoch": 0.26118696768960387, "grad_norm": 2.258558988571167, "learning_rate": 2.61061616617646e-05, "loss": 0.261138916015625, "step": 3864 }, { "epoch": 0.26125456266053804, "grad_norm": 1.1285700798034668, "learning_rate": 2.6103943598529808e-05, "loss": 0.234893798828125, "step": 3865 }, { "epoch": 0.2613221576314722, "grad_norm": 1.6518770456314087, "learning_rate": 2.6101724998017167e-05, "loss": 0.3399658203125, "step": 3866 }, { "epoch": 0.2613897526024064, "grad_norm": 1.571745753288269, "learning_rate": 2.609950586033403e-05, "loss": 0.174652099609375, "step": 3867 }, { "epoch": 0.26145734757334055, "grad_norm": 1.345779538154602, "learning_rate": 2.609728618558778e-05, "loss": 0.3192138671875, "step": 3868 }, { "epoch": 0.2615249425442747, "grad_norm": 1.3183045387268066, "learning_rate": 2.60950659738858e-05, "loss": 0.29595947265625, "step": 3869 }, { "epoch": 0.2615925375152089, "grad_norm": 1.6894487142562866, "learning_rate": 2.6092845225335533e-05, "loss": 0.217620849609375, "step": 3870 }, { "epoch": 0.261660132486143, "grad_norm": 1.74361252784729, "learning_rate": 2.609062394004443e-05, "loss": 0.166717529296875, "step": 3871 }, { "epoch": 0.2617277274570772, "grad_norm": 1.3720191717147827, "learning_rate": 2.608840211811997e-05, "loss": 0.209564208984375, "step": 3872 }, { "epoch": 0.26179532242801135, "grad_norm": 1.0884307622909546, "learning_rate": 2.6086179759669654e-05, "loss": 0.2207489013671875, "step": 3873 }, { "epoch": 0.2618629173989455, "grad_norm": 1.192143201828003, "learning_rate": 2.608395686480102e-05, "loss": 0.253082275390625, "step": 3874 }, { "epoch": 0.2619305123698797, "grad_norm": 1.092092752456665, "learning_rate": 2.6081733433621622e-05, "loss": 0.236663818359375, "step": 3875 }, { "epoch": 0.26199810734081386, "grad_norm": 1.0422805547714233, "learning_rate": 2.607950946623904e-05, "loss": 0.20281982421875, "step": 3876 }, { "epoch": 0.26206570231174803, "grad_norm": 2.0282599925994873, "learning_rate": 2.6077284962760895e-05, "loss": 0.32891845703125, "step": 3877 }, { "epoch": 0.26213329728268214, "grad_norm": 2.156400680541992, "learning_rate": 2.6075059923294805e-05, "loss": 0.26837158203125, "step": 3878 }, { "epoch": 0.2622008922536163, "grad_norm": 1.308396339416504, "learning_rate": 2.6072834347948448e-05, "loss": 0.253326416015625, "step": 3879 }, { "epoch": 0.2622684872245505, "grad_norm": 1.3650685548782349, "learning_rate": 2.6070608236829503e-05, "loss": 0.160247802734375, "step": 3880 }, { "epoch": 0.26233608219548465, "grad_norm": 1.4691742658615112, "learning_rate": 2.6068381590045683e-05, "loss": 0.278350830078125, "step": 3881 }, { "epoch": 0.2624036771664188, "grad_norm": 0.7730748057365417, "learning_rate": 2.6066154407704725e-05, "loss": 0.2229156494140625, "step": 3882 }, { "epoch": 0.262471272137353, "grad_norm": 1.8600155115127563, "learning_rate": 2.60639266899144e-05, "loss": 0.32568359375, "step": 3883 }, { "epoch": 0.26253886710828717, "grad_norm": 0.8797237277030945, "learning_rate": 2.6061698436782496e-05, "loss": 0.06979751586914062, "step": 3884 }, { "epoch": 0.2626064620792213, "grad_norm": 3.1068315505981445, "learning_rate": 2.6059469648416832e-05, "loss": 0.21129608154296875, "step": 3885 }, { "epoch": 0.26267405705015545, "grad_norm": 1.3471145629882812, "learning_rate": 2.605724032492524e-05, "loss": 0.188995361328125, "step": 3886 }, { "epoch": 0.2627416520210896, "grad_norm": 1.391212821006775, "learning_rate": 2.605501046641561e-05, "loss": 0.25537109375, "step": 3887 }, { "epoch": 0.2628092469920238, "grad_norm": 2.8263752460479736, "learning_rate": 2.6052780072995813e-05, "loss": 0.2734375, "step": 3888 }, { "epoch": 0.26287684196295796, "grad_norm": 1.5266655683517456, "learning_rate": 2.6050549144773782e-05, "loss": 0.26708984375, "step": 3889 }, { "epoch": 0.26294443693389213, "grad_norm": 2.118602752685547, "learning_rate": 2.6048317681857463e-05, "loss": 0.2483978271484375, "step": 3890 }, { "epoch": 0.2630120319048263, "grad_norm": 0.823333203792572, "learning_rate": 2.6046085684354825e-05, "loss": 0.1766815185546875, "step": 3891 }, { "epoch": 0.2630796268757604, "grad_norm": 1.919985294342041, "learning_rate": 2.6043853152373863e-05, "loss": 0.26611328125, "step": 3892 }, { "epoch": 0.2631472218466946, "grad_norm": 1.3209614753723145, "learning_rate": 2.604162008602261e-05, "loss": 0.1977386474609375, "step": 3893 }, { "epoch": 0.26321481681762876, "grad_norm": 0.9957150220870972, "learning_rate": 2.6039386485409105e-05, "loss": 0.276580810546875, "step": 3894 }, { "epoch": 0.26328241178856293, "grad_norm": 1.46403169631958, "learning_rate": 2.6037152350641432e-05, "loss": 0.289642333984375, "step": 3895 }, { "epoch": 0.2633500067594971, "grad_norm": 1.5020122528076172, "learning_rate": 2.603491768182769e-05, "loss": 0.30059814453125, "step": 3896 }, { "epoch": 0.2634176017304313, "grad_norm": 1.3719881772994995, "learning_rate": 2.6032682479076004e-05, "loss": 0.2367706298828125, "step": 3897 }, { "epoch": 0.26348519670136544, "grad_norm": 2.0450103282928467, "learning_rate": 2.6030446742494526e-05, "loss": 0.2596435546875, "step": 3898 }, { "epoch": 0.26355279167229956, "grad_norm": 2.460827350616455, "learning_rate": 2.602821047219144e-05, "loss": 0.29034423828125, "step": 3899 }, { "epoch": 0.26362038664323373, "grad_norm": 0.9650810956954956, "learning_rate": 2.6025973668274945e-05, "loss": 0.29083251953125, "step": 3900 }, { "epoch": 0.2636879816141679, "grad_norm": 1.2265074253082275, "learning_rate": 2.6023736330853276e-05, "loss": 0.259124755859375, "step": 3901 }, { "epoch": 0.26375557658510207, "grad_norm": 1.716465711593628, "learning_rate": 2.6021498460034686e-05, "loss": 0.308868408203125, "step": 3902 }, { "epoch": 0.26382317155603624, "grad_norm": 1.49674654006958, "learning_rate": 2.601926005592746e-05, "loss": 0.3143310546875, "step": 3903 }, { "epoch": 0.2638907665269704, "grad_norm": 3.175382614135742, "learning_rate": 2.6017021118639903e-05, "loss": 0.32379150390625, "step": 3904 }, { "epoch": 0.2639583614979046, "grad_norm": 1.362658977508545, "learning_rate": 2.6014781648280347e-05, "loss": 0.27178955078125, "step": 3905 }, { "epoch": 0.2640259564688387, "grad_norm": 1.108538031578064, "learning_rate": 2.601254164495716e-05, "loss": 0.298797607421875, "step": 3906 }, { "epoch": 0.26409355143977287, "grad_norm": 1.106553316116333, "learning_rate": 2.6010301108778722e-05, "loss": 0.207489013671875, "step": 3907 }, { "epoch": 0.26416114641070704, "grad_norm": 1.0471519231796265, "learning_rate": 2.6008060039853442e-05, "loss": 0.3175048828125, "step": 3908 }, { "epoch": 0.2642287413816412, "grad_norm": 1.2953276634216309, "learning_rate": 2.600581843828976e-05, "loss": 0.266326904296875, "step": 3909 }, { "epoch": 0.2642963363525754, "grad_norm": 2.282897472381592, "learning_rate": 2.6003576304196136e-05, "loss": 0.2880859375, "step": 3910 }, { "epoch": 0.26436393132350955, "grad_norm": 1.6556620597839355, "learning_rate": 2.6001333637681058e-05, "loss": 0.32666015625, "step": 3911 }, { "epoch": 0.2644315262944437, "grad_norm": 2.3558716773986816, "learning_rate": 2.5999090438853048e-05, "loss": 0.31414794921875, "step": 3912 }, { "epoch": 0.26449912126537783, "grad_norm": 1.6103425025939941, "learning_rate": 2.5996846707820633e-05, "loss": 0.2232666015625, "step": 3913 }, { "epoch": 0.264566716236312, "grad_norm": 1.3081918954849243, "learning_rate": 2.5994602444692394e-05, "loss": 0.261138916015625, "step": 3914 }, { "epoch": 0.2646343112072462, "grad_norm": 1.5407830476760864, "learning_rate": 2.5992357649576907e-05, "loss": 0.30535888671875, "step": 3915 }, { "epoch": 0.26470190617818035, "grad_norm": 1.590173602104187, "learning_rate": 2.5990112322582798e-05, "loss": 0.3101806640625, "step": 3916 }, { "epoch": 0.2647695011491145, "grad_norm": 0.8509733080863953, "learning_rate": 2.598786646381871e-05, "loss": 0.19683837890625, "step": 3917 }, { "epoch": 0.2648370961200487, "grad_norm": 1.5258395671844482, "learning_rate": 2.5985620073393306e-05, "loss": 0.30230712890625, "step": 3918 }, { "epoch": 0.26490469109098286, "grad_norm": 1.2947884798049927, "learning_rate": 2.598337315141529e-05, "loss": 0.314697265625, "step": 3919 }, { "epoch": 0.264972286061917, "grad_norm": 1.670229434967041, "learning_rate": 2.598112569799337e-05, "loss": 0.1994781494140625, "step": 3920 }, { "epoch": 0.26503988103285114, "grad_norm": 1.5752832889556885, "learning_rate": 2.59788777132363e-05, "loss": 0.2176971435546875, "step": 3921 }, { "epoch": 0.2651074760037853, "grad_norm": 2.6048176288604736, "learning_rate": 2.597662919725285e-05, "loss": 0.30352783203125, "step": 3922 }, { "epoch": 0.2651750709747195, "grad_norm": 2.618272066116333, "learning_rate": 2.5974380150151815e-05, "loss": 0.23858642578125, "step": 3923 }, { "epoch": 0.26524266594565366, "grad_norm": 0.7470802068710327, "learning_rate": 2.5972130572042024e-05, "loss": 0.11102294921875, "step": 3924 }, { "epoch": 0.2653102609165878, "grad_norm": 2.7374892234802246, "learning_rate": 2.5969880463032314e-05, "loss": 0.3126220703125, "step": 3925 }, { "epoch": 0.26537785588752194, "grad_norm": 2.662242889404297, "learning_rate": 2.5967629823231573e-05, "loss": 0.318267822265625, "step": 3926 }, { "epoch": 0.2654454508584561, "grad_norm": 2.720616102218628, "learning_rate": 2.596537865274869e-05, "loss": 0.220306396484375, "step": 3927 }, { "epoch": 0.2655130458293903, "grad_norm": 1.7848236560821533, "learning_rate": 2.5963126951692596e-05, "loss": 0.32421875, "step": 3928 }, { "epoch": 0.26558064080032445, "grad_norm": 1.8550156354904175, "learning_rate": 2.596087472017224e-05, "loss": 0.272735595703125, "step": 3929 }, { "epoch": 0.2656482357712586, "grad_norm": 3.696051836013794, "learning_rate": 2.59586219582966e-05, "loss": 0.3328857421875, "step": 3930 }, { "epoch": 0.2657158307421928, "grad_norm": 0.8416050672531128, "learning_rate": 2.5956368666174683e-05, "loss": 0.2040557861328125, "step": 3931 }, { "epoch": 0.26578342571312696, "grad_norm": 1.974340796470642, "learning_rate": 2.5954114843915505e-05, "loss": 0.23907470703125, "step": 3932 }, { "epoch": 0.2658510206840611, "grad_norm": 1.4938243627548218, "learning_rate": 2.5951860491628133e-05, "loss": 0.14630889892578125, "step": 3933 }, { "epoch": 0.26591861565499525, "grad_norm": 2.1627421379089355, "learning_rate": 2.5949605609421642e-05, "loss": 0.220794677734375, "step": 3934 }, { "epoch": 0.2659862106259294, "grad_norm": 1.2531561851501465, "learning_rate": 2.5947350197405136e-05, "loss": 0.2586669921875, "step": 3935 }, { "epoch": 0.2660538055968636, "grad_norm": 0.8970455527305603, "learning_rate": 2.5945094255687744e-05, "loss": 0.2125244140625, "step": 3936 }, { "epoch": 0.26612140056779776, "grad_norm": 1.7321255207061768, "learning_rate": 2.594283778437863e-05, "loss": 0.227691650390625, "step": 3937 }, { "epoch": 0.26618899553873193, "grad_norm": 1.3324415683746338, "learning_rate": 2.594058078358697e-05, "loss": 0.3111572265625, "step": 3938 }, { "epoch": 0.2662565905096661, "grad_norm": 1.1723310947418213, "learning_rate": 2.5938323253421973e-05, "loss": 0.17644119262695312, "step": 3939 }, { "epoch": 0.2663241854806002, "grad_norm": 0.6421321630477905, "learning_rate": 2.5936065193992867e-05, "loss": 0.158905029296875, "step": 3940 }, { "epoch": 0.2663917804515344, "grad_norm": 1.9985830783843994, "learning_rate": 2.593380660540892e-05, "loss": 0.30670166015625, "step": 3941 }, { "epoch": 0.26645937542246856, "grad_norm": 1.5651181936264038, "learning_rate": 2.5931547487779416e-05, "loss": 0.253692626953125, "step": 3942 }, { "epoch": 0.26652697039340273, "grad_norm": 1.989238977432251, "learning_rate": 2.592928784121366e-05, "loss": 0.2233734130859375, "step": 3943 }, { "epoch": 0.2665945653643369, "grad_norm": 1.0129563808441162, "learning_rate": 2.5927027665820987e-05, "loss": 0.2952880859375, "step": 3944 }, { "epoch": 0.26666216033527107, "grad_norm": 1.3532942533493042, "learning_rate": 2.5924766961710763e-05, "loss": 0.29388427734375, "step": 3945 }, { "epoch": 0.26672975530620524, "grad_norm": 0.6919489502906799, "learning_rate": 2.5922505728992375e-05, "loss": 0.15474700927734375, "step": 3946 }, { "epoch": 0.26679735027713936, "grad_norm": 1.5068106651306152, "learning_rate": 2.5920243967775228e-05, "loss": 0.280029296875, "step": 3947 }, { "epoch": 0.2668649452480735, "grad_norm": 1.865309238433838, "learning_rate": 2.591798167816877e-05, "loss": 0.187835693359375, "step": 3948 }, { "epoch": 0.2669325402190077, "grad_norm": 0.9821410775184631, "learning_rate": 2.591571886028246e-05, "loss": 0.1431884765625, "step": 3949 }, { "epoch": 0.26700013518994187, "grad_norm": 1.0654748678207397, "learning_rate": 2.5913455514225783e-05, "loss": 0.2752685546875, "step": 3950 }, { "epoch": 0.26706773016087604, "grad_norm": 3.425424814224243, "learning_rate": 2.5911191640108262e-05, "loss": 0.35137939453125, "step": 3951 }, { "epoch": 0.2671353251318102, "grad_norm": 0.645140528678894, "learning_rate": 2.5908927238039435e-05, "loss": 0.16387176513671875, "step": 3952 }, { "epoch": 0.2672029201027444, "grad_norm": 3.105102062225342, "learning_rate": 2.5906662308128865e-05, "loss": 0.304840087890625, "step": 3953 }, { "epoch": 0.2672705150736785, "grad_norm": 0.4173853397369385, "learning_rate": 2.5904396850486146e-05, "loss": 0.117156982421875, "step": 3954 }, { "epoch": 0.26733811004461266, "grad_norm": 1.2119207382202148, "learning_rate": 2.590213086522089e-05, "loss": 0.244598388671875, "step": 3955 }, { "epoch": 0.26740570501554684, "grad_norm": 1.3568124771118164, "learning_rate": 2.5899864352442746e-05, "loss": 0.32611083984375, "step": 3956 }, { "epoch": 0.267473299986481, "grad_norm": 1.6989606618881226, "learning_rate": 2.5897597312261375e-05, "loss": 0.218994140625, "step": 3957 }, { "epoch": 0.2675408949574152, "grad_norm": 0.9982155561447144, "learning_rate": 2.589532974478648e-05, "loss": 0.2845458984375, "step": 3958 }, { "epoch": 0.26760848992834935, "grad_norm": 0.927295446395874, "learning_rate": 2.589306165012777e-05, "loss": 0.207733154296875, "step": 3959 }, { "epoch": 0.2676760848992835, "grad_norm": 2.085768938064575, "learning_rate": 2.5890793028394998e-05, "loss": 0.2686767578125, "step": 3960 }, { "epoch": 0.26774367987021763, "grad_norm": 1.0801210403442383, "learning_rate": 2.588852387969793e-05, "loss": 0.2505340576171875, "step": 3961 }, { "epoch": 0.2678112748411518, "grad_norm": 1.6801629066467285, "learning_rate": 2.588625420414636e-05, "loss": 0.290008544921875, "step": 3962 }, { "epoch": 0.267878869812086, "grad_norm": 1.8867506980895996, "learning_rate": 2.588398400185011e-05, "loss": 0.1837921142578125, "step": 3963 }, { "epoch": 0.26794646478302014, "grad_norm": 1.056657075881958, "learning_rate": 2.588171327291903e-05, "loss": 0.296478271484375, "step": 3964 }, { "epoch": 0.2680140597539543, "grad_norm": 1.4614437818527222, "learning_rate": 2.5879442017462987e-05, "loss": 0.35797119140625, "step": 3965 }, { "epoch": 0.2680816547248885, "grad_norm": 0.7490965127944946, "learning_rate": 2.587717023559188e-05, "loss": 0.1864013671875, "step": 3966 }, { "epoch": 0.26814924969582266, "grad_norm": 1.1166640520095825, "learning_rate": 2.5874897927415632e-05, "loss": 0.1619873046875, "step": 3967 }, { "epoch": 0.26821684466675677, "grad_norm": 1.8551725149154663, "learning_rate": 2.5872625093044196e-05, "loss": 0.29962158203125, "step": 3968 }, { "epoch": 0.26828443963769094, "grad_norm": 1.5826843976974487, "learning_rate": 2.587035173258754e-05, "loss": 0.245361328125, "step": 3969 }, { "epoch": 0.2683520346086251, "grad_norm": 2.253858804702759, "learning_rate": 2.5868077846155666e-05, "loss": 0.27862548828125, "step": 3970 }, { "epoch": 0.2684196295795593, "grad_norm": 0.8066667914390564, "learning_rate": 2.5865803433858597e-05, "loss": 0.1958160400390625, "step": 3971 }, { "epoch": 0.26848722455049345, "grad_norm": 1.7613449096679688, "learning_rate": 2.5863528495806382e-05, "loss": 0.208831787109375, "step": 3972 }, { "epoch": 0.2685548195214276, "grad_norm": 1.2567986249923706, "learning_rate": 2.5861253032109102e-05, "loss": 0.29150390625, "step": 3973 }, { "epoch": 0.2686224144923618, "grad_norm": 2.0337331295013428, "learning_rate": 2.585897704287685e-05, "loss": 0.2838134765625, "step": 3974 }, { "epoch": 0.2686900094632959, "grad_norm": 1.262011170387268, "learning_rate": 2.5856700528219764e-05, "loss": 0.170318603515625, "step": 3975 }, { "epoch": 0.2687576044342301, "grad_norm": 1.8107352256774902, "learning_rate": 2.5854423488247986e-05, "loss": 0.30450439453125, "step": 3976 }, { "epoch": 0.26882519940516425, "grad_norm": 2.1330959796905518, "learning_rate": 2.5852145923071697e-05, "loss": 0.224945068359375, "step": 3977 }, { "epoch": 0.2688927943760984, "grad_norm": 1.5759689807891846, "learning_rate": 2.58498678328011e-05, "loss": 0.251251220703125, "step": 3978 }, { "epoch": 0.2689603893470326, "grad_norm": 2.0918526649475098, "learning_rate": 2.5847589217546422e-05, "loss": 0.32244873046875, "step": 3979 }, { "epoch": 0.26902798431796676, "grad_norm": 0.9878791570663452, "learning_rate": 2.5845310077417916e-05, "loss": 0.1830291748046875, "step": 3980 }, { "epoch": 0.26909557928890093, "grad_norm": 1.9599086046218872, "learning_rate": 2.5843030412525864e-05, "loss": 0.37786865234375, "step": 3981 }, { "epoch": 0.26916317425983505, "grad_norm": 1.4023933410644531, "learning_rate": 2.5840750222980568e-05, "loss": 0.29937744140625, "step": 3982 }, { "epoch": 0.2692307692307692, "grad_norm": 2.9974701404571533, "learning_rate": 2.583846950889236e-05, "loss": 0.2657470703125, "step": 3983 }, { "epoch": 0.2692983642017034, "grad_norm": 1.3533737659454346, "learning_rate": 2.583618827037159e-05, "loss": 0.188629150390625, "step": 3984 }, { "epoch": 0.26936595917263756, "grad_norm": 1.5271682739257812, "learning_rate": 2.5833906507528644e-05, "loss": 0.30072021484375, "step": 3985 }, { "epoch": 0.26943355414357173, "grad_norm": 0.5324419140815735, "learning_rate": 2.583162422047393e-05, "loss": 0.08242034912109375, "step": 3986 }, { "epoch": 0.2695011491145059, "grad_norm": 1.2372733354568481, "learning_rate": 2.5829341409317866e-05, "loss": 0.2611083984375, "step": 3987 }, { "epoch": 0.26956874408544007, "grad_norm": 1.7486578226089478, "learning_rate": 2.5827058074170925e-05, "loss": 0.350982666015625, "step": 3988 }, { "epoch": 0.2696363390563742, "grad_norm": 1.2691657543182373, "learning_rate": 2.5824774215143576e-05, "loss": 0.24114990234375, "step": 3989 }, { "epoch": 0.26970393402730836, "grad_norm": 1.4002562761306763, "learning_rate": 2.582248983234634e-05, "loss": 0.30877685546875, "step": 3990 }, { "epoch": 0.2697715289982425, "grad_norm": 0.9437653422355652, "learning_rate": 2.582020492588973e-05, "loss": 0.232635498046875, "step": 3991 }, { "epoch": 0.2698391239691767, "grad_norm": 1.2237317562103271, "learning_rate": 2.5817919495884327e-05, "loss": 0.274017333984375, "step": 3992 }, { "epoch": 0.26990671894011087, "grad_norm": 1.8669755458831787, "learning_rate": 2.5815633542440697e-05, "loss": 0.3109130859375, "step": 3993 }, { "epoch": 0.26997431391104504, "grad_norm": 1.231164813041687, "learning_rate": 2.5813347065669456e-05, "loss": 0.31439208984375, "step": 3994 }, { "epoch": 0.27004190888197915, "grad_norm": 2.3058574199676514, "learning_rate": 2.5811060065681237e-05, "loss": 0.284820556640625, "step": 3995 }, { "epoch": 0.2701095038529133, "grad_norm": 1.3777823448181152, "learning_rate": 2.58087725425867e-05, "loss": 0.222747802734375, "step": 3996 }, { "epoch": 0.2701770988238475, "grad_norm": 0.653343141078949, "learning_rate": 2.580648449649652e-05, "loss": 0.137176513671875, "step": 3997 }, { "epoch": 0.27024469379478167, "grad_norm": 1.0017200708389282, "learning_rate": 2.5804195927521422e-05, "loss": 0.219879150390625, "step": 3998 }, { "epoch": 0.27031228876571584, "grad_norm": 1.8691012859344482, "learning_rate": 2.5801906835772136e-05, "loss": 0.31170654296875, "step": 3999 }, { "epoch": 0.27037988373665, "grad_norm": 1.1963673830032349, "learning_rate": 2.579961722135942e-05, "loss": 0.24530029296875, "step": 4000 }, { "epoch": 0.2704474787075842, "grad_norm": 1.358526349067688, "learning_rate": 2.5797327084394058e-05, "loss": 0.321441650390625, "step": 4001 }, { "epoch": 0.2705150736785183, "grad_norm": 1.0602128505706787, "learning_rate": 2.5795036424986863e-05, "loss": 0.2921142578125, "step": 4002 }, { "epoch": 0.27058266864945246, "grad_norm": 0.6932691335678101, "learning_rate": 2.5792745243248676e-05, "loss": 0.11412429809570312, "step": 4003 }, { "epoch": 0.27065026362038663, "grad_norm": 1.4557758569717407, "learning_rate": 2.5790453539290354e-05, "loss": 0.269927978515625, "step": 4004 }, { "epoch": 0.2707178585913208, "grad_norm": 1.615821123123169, "learning_rate": 2.5788161313222784e-05, "loss": 0.304931640625, "step": 4005 }, { "epoch": 0.270785453562255, "grad_norm": 1.4156490564346313, "learning_rate": 2.5785868565156878e-05, "loss": 0.3023681640625, "step": 4006 }, { "epoch": 0.27085304853318914, "grad_norm": 1.5294438600540161, "learning_rate": 2.5783575295203576e-05, "loss": 0.21124267578125, "step": 4007 }, { "epoch": 0.2709206435041233, "grad_norm": 1.0434136390686035, "learning_rate": 2.5781281503473837e-05, "loss": 0.29345703125, "step": 4008 }, { "epoch": 0.27098823847505743, "grad_norm": 1.233994483947754, "learning_rate": 2.577898719007865e-05, "loss": 0.312286376953125, "step": 4009 }, { "epoch": 0.2710558334459916, "grad_norm": 0.8922997713088989, "learning_rate": 2.577669235512903e-05, "loss": 0.2080230712890625, "step": 4010 }, { "epoch": 0.27112342841692577, "grad_norm": 1.4997869729995728, "learning_rate": 2.5774396998736017e-05, "loss": 0.2744140625, "step": 4011 }, { "epoch": 0.27119102338785994, "grad_norm": 1.8720752000808716, "learning_rate": 2.577210112101067e-05, "loss": 0.285125732421875, "step": 4012 }, { "epoch": 0.2712586183587941, "grad_norm": 1.5513995885849, "learning_rate": 2.5769804722064077e-05, "loss": 0.287689208984375, "step": 4013 }, { "epoch": 0.2713262133297283, "grad_norm": 1.6985670328140259, "learning_rate": 2.576750780200736e-05, "loss": 0.343017578125, "step": 4014 }, { "epoch": 0.27139380830066245, "grad_norm": 1.3084172010421753, "learning_rate": 2.576521036095165e-05, "loss": 0.24359130859375, "step": 4015 }, { "epoch": 0.27146140327159657, "grad_norm": 1.1499223709106445, "learning_rate": 2.5762912399008114e-05, "loss": 0.2435455322265625, "step": 4016 }, { "epoch": 0.27152899824253074, "grad_norm": 0.8993015885353088, "learning_rate": 2.5760613916287948e-05, "loss": 0.171112060546875, "step": 4017 }, { "epoch": 0.2715965932134649, "grad_norm": 1.0742912292480469, "learning_rate": 2.5758314912902355e-05, "loss": 0.29510498046875, "step": 4018 }, { "epoch": 0.2716641881843991, "grad_norm": 1.583391785621643, "learning_rate": 2.575601538896258e-05, "loss": 0.291351318359375, "step": 4019 }, { "epoch": 0.27173178315533325, "grad_norm": 2.2623393535614014, "learning_rate": 2.5753715344579893e-05, "loss": 0.272735595703125, "step": 4020 }, { "epoch": 0.2717993781262674, "grad_norm": 0.5750756859779358, "learning_rate": 2.5751414779865583e-05, "loss": 0.190948486328125, "step": 4021 }, { "epoch": 0.2718669730972016, "grad_norm": 1.9935840368270874, "learning_rate": 2.574911369493096e-05, "loss": 0.1900634765625, "step": 4022 }, { "epoch": 0.2719345680681357, "grad_norm": 0.6855616569519043, "learning_rate": 2.574681208988737e-05, "loss": 0.17724609375, "step": 4023 }, { "epoch": 0.2720021630390699, "grad_norm": 0.6322324275970459, "learning_rate": 2.5744509964846176e-05, "loss": 0.13764190673828125, "step": 4024 }, { "epoch": 0.27206975801000405, "grad_norm": 1.2312896251678467, "learning_rate": 2.574220731991877e-05, "loss": 0.22698974609375, "step": 4025 }, { "epoch": 0.2721373529809382, "grad_norm": 1.239598274230957, "learning_rate": 2.573990415521657e-05, "loss": 0.2659912109375, "step": 4026 }, { "epoch": 0.2722049479518724, "grad_norm": 1.6619764566421509, "learning_rate": 2.573760047085102e-05, "loss": 0.30865478515625, "step": 4027 }, { "epoch": 0.27227254292280656, "grad_norm": 1.429628849029541, "learning_rate": 2.5735296266933577e-05, "loss": 0.21197509765625, "step": 4028 }, { "epoch": 0.27234013789374073, "grad_norm": 1.1402968168258667, "learning_rate": 2.5732991543575745e-05, "loss": 0.298095703125, "step": 4029 }, { "epoch": 0.27240773286467485, "grad_norm": 1.8665274381637573, "learning_rate": 2.5730686300889026e-05, "loss": 0.20245361328125, "step": 4030 }, { "epoch": 0.272475327835609, "grad_norm": 2.61480712890625, "learning_rate": 2.5728380538984975e-05, "loss": 0.3065185546875, "step": 4031 }, { "epoch": 0.2725429228065432, "grad_norm": 1.197540044784546, "learning_rate": 2.5726074257975153e-05, "loss": 0.29486083984375, "step": 4032 }, { "epoch": 0.27261051777747736, "grad_norm": 1.3788623809814453, "learning_rate": 2.5723767457971154e-05, "loss": 0.2095947265625, "step": 4033 }, { "epoch": 0.2726781127484115, "grad_norm": 0.7091089487075806, "learning_rate": 2.5721460139084597e-05, "loss": 0.148284912109375, "step": 4034 }, { "epoch": 0.2727457077193457, "grad_norm": 2.207505464553833, "learning_rate": 2.571915230142712e-05, "loss": 0.279571533203125, "step": 4035 }, { "epoch": 0.27281330269027987, "grad_norm": 1.1794339418411255, "learning_rate": 2.5716843945110396e-05, "loss": 0.2806396484375, "step": 4036 }, { "epoch": 0.272880897661214, "grad_norm": 0.7492177486419678, "learning_rate": 2.571453507024611e-05, "loss": 0.159027099609375, "step": 4037 }, { "epoch": 0.27294849263214815, "grad_norm": 1.4133929014205933, "learning_rate": 2.5712225676945986e-05, "loss": 0.1951904296875, "step": 4038 }, { "epoch": 0.2730160876030823, "grad_norm": 1.4004511833190918, "learning_rate": 2.5709915765321766e-05, "loss": 0.30419921875, "step": 4039 }, { "epoch": 0.2730836825740165, "grad_norm": 2.002429246902466, "learning_rate": 2.5707605335485218e-05, "loss": 0.20953369140625, "step": 4040 }, { "epoch": 0.27315127754495067, "grad_norm": 1.3378794193267822, "learning_rate": 2.5705294387548134e-05, "loss": 0.27313232421875, "step": 4041 }, { "epoch": 0.27321887251588484, "grad_norm": 1.6012061834335327, "learning_rate": 2.5702982921622328e-05, "loss": 0.2879638671875, "step": 4042 }, { "epoch": 0.273286467486819, "grad_norm": 1.448624610900879, "learning_rate": 2.570067093781965e-05, "loss": 0.31640625, "step": 4043 }, { "epoch": 0.2733540624577531, "grad_norm": 2.0621557235717773, "learning_rate": 2.5698358436251966e-05, "loss": 0.33001708984375, "step": 4044 }, { "epoch": 0.2734216574286873, "grad_norm": 0.5932014584541321, "learning_rate": 2.5696045417031168e-05, "loss": 0.093597412109375, "step": 4045 }, { "epoch": 0.27348925239962146, "grad_norm": 2.7586803436279297, "learning_rate": 2.5693731880269178e-05, "loss": 0.29925537109375, "step": 4046 }, { "epoch": 0.27355684737055563, "grad_norm": 1.8486449718475342, "learning_rate": 2.569141782607793e-05, "loss": 0.2935791015625, "step": 4047 }, { "epoch": 0.2736244423414898, "grad_norm": 1.9621386528015137, "learning_rate": 2.5689103254569402e-05, "loss": 0.3089599609375, "step": 4048 }, { "epoch": 0.273692037312424, "grad_norm": 1.3274059295654297, "learning_rate": 2.5686788165855583e-05, "loss": 0.287109375, "step": 4049 }, { "epoch": 0.27375963228335815, "grad_norm": 1.657624363899231, "learning_rate": 2.568447256004849e-05, "loss": 0.337677001953125, "step": 4050 }, { "epoch": 0.27382722725429226, "grad_norm": 1.0885897874832153, "learning_rate": 2.568215643726017e-05, "loss": 0.28070068359375, "step": 4051 }, { "epoch": 0.27389482222522643, "grad_norm": 2.221271514892578, "learning_rate": 2.567983979760269e-05, "loss": 0.2139739990234375, "step": 4052 }, { "epoch": 0.2739624171961606, "grad_norm": 1.7517025470733643, "learning_rate": 2.5677522641188145e-05, "loss": 0.255462646484375, "step": 4053 }, { "epoch": 0.27403001216709477, "grad_norm": 0.7841348648071289, "learning_rate": 2.5675204968128652e-05, "loss": 0.15948486328125, "step": 4054 }, { "epoch": 0.27409760713802894, "grad_norm": 1.4883519411087036, "learning_rate": 2.5672886778536352e-05, "loss": 0.200714111328125, "step": 4055 }, { "epoch": 0.2741652021089631, "grad_norm": 0.8860437273979187, "learning_rate": 2.5670568072523415e-05, "loss": 0.29730224609375, "step": 4056 }, { "epoch": 0.27423279707989723, "grad_norm": 1.3476279973983765, "learning_rate": 2.5668248850202037e-05, "loss": 0.209014892578125, "step": 4057 }, { "epoch": 0.2743003920508314, "grad_norm": 0.8594805002212524, "learning_rate": 2.5665929111684435e-05, "loss": 0.22418212890625, "step": 4058 }, { "epoch": 0.27436798702176557, "grad_norm": 1.5091630220413208, "learning_rate": 2.566360885708285e-05, "loss": 0.2252197265625, "step": 4059 }, { "epoch": 0.27443558199269974, "grad_norm": 1.516618251800537, "learning_rate": 2.5661288086509553e-05, "loss": 0.269317626953125, "step": 4060 }, { "epoch": 0.2745031769636339, "grad_norm": 2.188713788986206, "learning_rate": 2.5658966800076835e-05, "loss": 0.2806396484375, "step": 4061 }, { "epoch": 0.2745707719345681, "grad_norm": 1.316258192062378, "learning_rate": 2.5656644997897018e-05, "loss": 0.24420166015625, "step": 4062 }, { "epoch": 0.27463836690550225, "grad_norm": 1.9658218622207642, "learning_rate": 2.5654322680082445e-05, "loss": 0.19095611572265625, "step": 4063 }, { "epoch": 0.27470596187643637, "grad_norm": 1.9035032987594604, "learning_rate": 2.5651999846745476e-05, "loss": 0.312408447265625, "step": 4064 }, { "epoch": 0.27477355684737054, "grad_norm": 0.9354791641235352, "learning_rate": 2.5649676497998517e-05, "loss": 0.20965576171875, "step": 4065 }, { "epoch": 0.2748411518183047, "grad_norm": 1.8485925197601318, "learning_rate": 2.564735263395398e-05, "loss": 0.309783935546875, "step": 4066 }, { "epoch": 0.2749087467892389, "grad_norm": 1.2656430006027222, "learning_rate": 2.56450282547243e-05, "loss": 0.2211151123046875, "step": 4067 }, { "epoch": 0.27497634176017305, "grad_norm": 1.3451224565505981, "learning_rate": 2.5642703360421954e-05, "loss": 0.255615234375, "step": 4068 }, { "epoch": 0.2750439367311072, "grad_norm": 1.4654462337493896, "learning_rate": 2.5640377951159436e-05, "loss": 0.29205322265625, "step": 4069 }, { "epoch": 0.2751115317020414, "grad_norm": 1.1494109630584717, "learning_rate": 2.5638052027049265e-05, "loss": 0.234222412109375, "step": 4070 }, { "epoch": 0.2751791266729755, "grad_norm": 1.3706110715866089, "learning_rate": 2.5635725588203972e-05, "loss": 0.246734619140625, "step": 4071 }, { "epoch": 0.2752467216439097, "grad_norm": 1.1478532552719116, "learning_rate": 2.5633398634736135e-05, "loss": 0.1909942626953125, "step": 4072 }, { "epoch": 0.27531431661484385, "grad_norm": 2.663619041442871, "learning_rate": 2.563107116675834e-05, "loss": 0.2781982421875, "step": 4073 }, { "epoch": 0.275381911585778, "grad_norm": 0.9660897254943848, "learning_rate": 2.5628743184383218e-05, "loss": 0.19197845458984375, "step": 4074 }, { "epoch": 0.2754495065567122, "grad_norm": 0.755815863609314, "learning_rate": 2.5626414687723395e-05, "loss": 0.18114471435546875, "step": 4075 }, { "epoch": 0.27551710152764636, "grad_norm": 0.9205124974250793, "learning_rate": 2.5624085676891543e-05, "loss": 0.207733154296875, "step": 4076 }, { "epoch": 0.27558469649858053, "grad_norm": 2.4617702960968018, "learning_rate": 2.5621756152000356e-05, "loss": 0.293060302734375, "step": 4077 }, { "epoch": 0.27565229146951464, "grad_norm": 0.6351050138473511, "learning_rate": 2.5619426113162553e-05, "loss": 0.135345458984375, "step": 4078 }, { "epoch": 0.2757198864404488, "grad_norm": 1.7480987310409546, "learning_rate": 2.5617095560490868e-05, "loss": 0.298614501953125, "step": 4079 }, { "epoch": 0.275787481411383, "grad_norm": 1.2849822044372559, "learning_rate": 2.5614764494098078e-05, "loss": 0.293212890625, "step": 4080 }, { "epoch": 0.27585507638231715, "grad_norm": 0.9603520631790161, "learning_rate": 2.5612432914096967e-05, "loss": 0.257293701171875, "step": 4081 }, { "epoch": 0.2759226713532513, "grad_norm": 1.341866135597229, "learning_rate": 2.5610100820600353e-05, "loss": 0.20880126953125, "step": 4082 }, { "epoch": 0.2759902663241855, "grad_norm": 0.7683804035186768, "learning_rate": 2.560776821372108e-05, "loss": 0.23150634765625, "step": 4083 }, { "epoch": 0.27605786129511967, "grad_norm": 1.6896942853927612, "learning_rate": 2.560543509357201e-05, "loss": 0.2069091796875, "step": 4084 }, { "epoch": 0.2761254562660538, "grad_norm": 0.972230076789856, "learning_rate": 2.5603101460266036e-05, "loss": 0.259521484375, "step": 4085 }, { "epoch": 0.27619305123698795, "grad_norm": 2.404749631881714, "learning_rate": 2.5600767313916076e-05, "loss": 0.27093505859375, "step": 4086 }, { "epoch": 0.2762606462079221, "grad_norm": 2.4900450706481934, "learning_rate": 2.5598432654635064e-05, "loss": 0.275634765625, "step": 4087 }, { "epoch": 0.2763282411788563, "grad_norm": 1.2805606126785278, "learning_rate": 2.559609748253597e-05, "loss": 0.319549560546875, "step": 4088 }, { "epoch": 0.27639583614979046, "grad_norm": 1.8490504026412964, "learning_rate": 2.5593761797731783e-05, "loss": 0.31573486328125, "step": 4089 }, { "epoch": 0.27646343112072463, "grad_norm": 0.9584227800369263, "learning_rate": 2.559142560033552e-05, "loss": 0.248443603515625, "step": 4090 }, { "epoch": 0.2765310260916588, "grad_norm": 1.2144638299942017, "learning_rate": 2.558908889046022e-05, "loss": 0.1654510498046875, "step": 4091 }, { "epoch": 0.2765986210625929, "grad_norm": 0.8161343336105347, "learning_rate": 2.5586751668218945e-05, "loss": 0.1737823486328125, "step": 4092 }, { "epoch": 0.2766662160335271, "grad_norm": 1.0487760305404663, "learning_rate": 2.5584413933724785e-05, "loss": 0.212158203125, "step": 4093 }, { "epoch": 0.27673381100446126, "grad_norm": 1.1686644554138184, "learning_rate": 2.5582075687090855e-05, "loss": 0.261566162109375, "step": 4094 }, { "epoch": 0.27680140597539543, "grad_norm": 1.5240938663482666, "learning_rate": 2.55797369284303e-05, "loss": 0.247039794921875, "step": 4095 }, { "epoch": 0.2768690009463296, "grad_norm": 1.714867353439331, "learning_rate": 2.5577397657856272e-05, "loss": 0.302825927734375, "step": 4096 }, { "epoch": 0.2769365959172638, "grad_norm": 0.6813313364982605, "learning_rate": 2.5575057875481966e-05, "loss": 0.18204498291015625, "step": 4097 }, { "epoch": 0.27700419088819794, "grad_norm": 1.4218876361846924, "learning_rate": 2.5572717581420593e-05, "loss": 0.2645263671875, "step": 4098 }, { "epoch": 0.27707178585913206, "grad_norm": 1.3130162954330444, "learning_rate": 2.5570376775785397e-05, "loss": 0.3330078125, "step": 4099 }, { "epoch": 0.27713938083006623, "grad_norm": 0.874297022819519, "learning_rate": 2.5568035458689634e-05, "loss": 0.20611572265625, "step": 4100 }, { "epoch": 0.2772069758010004, "grad_norm": 1.0961707830429077, "learning_rate": 2.556569363024659e-05, "loss": 0.30157470703125, "step": 4101 }, { "epoch": 0.27727457077193457, "grad_norm": 1.0772221088409424, "learning_rate": 2.5563351290569583e-05, "loss": 0.20416259765625, "step": 4102 }, { "epoch": 0.27734216574286874, "grad_norm": 1.6263096332550049, "learning_rate": 2.556100843977195e-05, "loss": 0.22265625, "step": 4103 }, { "epoch": 0.2774097607138029, "grad_norm": 1.3156182765960693, "learning_rate": 2.555866507796705e-05, "loss": 0.306121826171875, "step": 4104 }, { "epoch": 0.2774773556847371, "grad_norm": 3.03151535987854, "learning_rate": 2.5556321205268266e-05, "loss": 0.318115234375, "step": 4105 }, { "epoch": 0.2775449506556712, "grad_norm": 1.3020154237747192, "learning_rate": 2.5553976821789015e-05, "loss": 0.263671875, "step": 4106 }, { "epoch": 0.27761254562660537, "grad_norm": 3.5678067207336426, "learning_rate": 2.5551631927642734e-05, "loss": 0.340576171875, "step": 4107 }, { "epoch": 0.27768014059753954, "grad_norm": 1.2094008922576904, "learning_rate": 2.5549286522942876e-05, "loss": 0.24237060546875, "step": 4108 }, { "epoch": 0.2777477355684737, "grad_norm": 2.5876097679138184, "learning_rate": 2.554694060780294e-05, "loss": 0.319610595703125, "step": 4109 }, { "epoch": 0.2778153305394079, "grad_norm": 1.1645737886428833, "learning_rate": 2.5544594182336415e-05, "loss": 0.15911865234375, "step": 4110 }, { "epoch": 0.27788292551034205, "grad_norm": 1.015095591545105, "learning_rate": 2.5542247246656854e-05, "loss": 0.232177734375, "step": 4111 }, { "epoch": 0.2779505204812762, "grad_norm": 1.215226173400879, "learning_rate": 2.5539899800877807e-05, "loss": 0.241851806640625, "step": 4112 }, { "epoch": 0.27801811545221033, "grad_norm": 1.277246356010437, "learning_rate": 2.553755184511286e-05, "loss": 0.196014404296875, "step": 4113 }, { "epoch": 0.2780857104231445, "grad_norm": 1.5470058917999268, "learning_rate": 2.553520337947562e-05, "loss": 0.27581787109375, "step": 4114 }, { "epoch": 0.2781533053940787, "grad_norm": 1.4291925430297852, "learning_rate": 2.5532854404079725e-05, "loss": 0.3603515625, "step": 4115 }, { "epoch": 0.27822090036501285, "grad_norm": 1.3024364709854126, "learning_rate": 2.5530504919038835e-05, "loss": 0.220458984375, "step": 4116 }, { "epoch": 0.278288495335947, "grad_norm": 2.339974880218506, "learning_rate": 2.5528154924466627e-05, "loss": 0.18328857421875, "step": 4117 }, { "epoch": 0.2783560903068812, "grad_norm": 1.4364125728607178, "learning_rate": 2.552580442047681e-05, "loss": 0.2841796875, "step": 4118 }, { "epoch": 0.27842368527781536, "grad_norm": 2.1155152320861816, "learning_rate": 2.5523453407183112e-05, "loss": 0.3397216796875, "step": 4119 }, { "epoch": 0.2784912802487495, "grad_norm": 1.8006237745285034, "learning_rate": 2.5521101884699298e-05, "loss": 0.280029296875, "step": 4120 }, { "epoch": 0.27855887521968364, "grad_norm": 1.1567615270614624, "learning_rate": 2.5518749853139144e-05, "loss": 0.20159912109375, "step": 4121 }, { "epoch": 0.2786264701906178, "grad_norm": 1.310109257698059, "learning_rate": 2.5516397312616456e-05, "loss": 0.208892822265625, "step": 4122 }, { "epoch": 0.278694065161552, "grad_norm": 1.0853931903839111, "learning_rate": 2.5514044263245064e-05, "loss": 0.1825103759765625, "step": 4123 }, { "epoch": 0.27876166013248616, "grad_norm": 1.0616090297698975, "learning_rate": 2.5511690705138826e-05, "loss": 0.2343597412109375, "step": 4124 }, { "epoch": 0.2788292551034203, "grad_norm": 1.2691465616226196, "learning_rate": 2.5509336638411614e-05, "loss": 0.259521484375, "step": 4125 }, { "epoch": 0.27889685007435444, "grad_norm": 0.84090656042099, "learning_rate": 2.5506982063177347e-05, "loss": 0.20526123046875, "step": 4126 }, { "epoch": 0.2789644450452886, "grad_norm": 2.0635502338409424, "learning_rate": 2.550462697954994e-05, "loss": 0.33416748046875, "step": 4127 }, { "epoch": 0.2790320400162228, "grad_norm": 1.6916054487228394, "learning_rate": 2.5502271387643353e-05, "loss": 0.249542236328125, "step": 4128 }, { "epoch": 0.27909963498715695, "grad_norm": 0.9072046279907227, "learning_rate": 2.5499915287571566e-05, "loss": 0.2191162109375, "step": 4129 }, { "epoch": 0.2791672299580911, "grad_norm": 1.0490351915359497, "learning_rate": 2.549755867944858e-05, "loss": 0.187774658203125, "step": 4130 }, { "epoch": 0.2792348249290253, "grad_norm": 1.4503344297409058, "learning_rate": 2.549520156338842e-05, "loss": 0.212646484375, "step": 4131 }, { "epoch": 0.27930241989995946, "grad_norm": 2.0209615230560303, "learning_rate": 2.5492843939505142e-05, "loss": 0.316009521484375, "step": 4132 }, { "epoch": 0.2793700148708936, "grad_norm": 0.9346146583557129, "learning_rate": 2.5490485807912817e-05, "loss": 0.183319091796875, "step": 4133 }, { "epoch": 0.27943760984182775, "grad_norm": 0.8018685579299927, "learning_rate": 2.548812716872555e-05, "loss": 0.17920684814453125, "step": 4134 }, { "epoch": 0.2795052048127619, "grad_norm": 1.3826797008514404, "learning_rate": 2.548576802205746e-05, "loss": 0.29071044921875, "step": 4135 }, { "epoch": 0.2795727997836961, "grad_norm": 1.7726266384124756, "learning_rate": 2.548340836802271e-05, "loss": 0.225555419921875, "step": 4136 }, { "epoch": 0.27964039475463026, "grad_norm": 2.080097198486328, "learning_rate": 2.5481048206735464e-05, "loss": 0.277130126953125, "step": 4137 }, { "epoch": 0.27970798972556443, "grad_norm": 1.4872419834136963, "learning_rate": 2.5478687538309924e-05, "loss": 0.3114013671875, "step": 4138 }, { "epoch": 0.2797755846964986, "grad_norm": 1.2280820608139038, "learning_rate": 2.5476326362860315e-05, "loss": 0.2106170654296875, "step": 4139 }, { "epoch": 0.2798431796674327, "grad_norm": 1.4558192491531372, "learning_rate": 2.5473964680500888e-05, "loss": 0.2987060546875, "step": 4140 }, { "epoch": 0.2799107746383669, "grad_norm": 1.1932268142700195, "learning_rate": 2.547160249134591e-05, "loss": 0.261688232421875, "step": 4141 }, { "epoch": 0.27997836960930106, "grad_norm": 0.9712399244308472, "learning_rate": 2.546923979550968e-05, "loss": 0.17037200927734375, "step": 4142 }, { "epoch": 0.28004596458023523, "grad_norm": 1.4614527225494385, "learning_rate": 2.5466876593106526e-05, "loss": 0.240966796875, "step": 4143 }, { "epoch": 0.2801135595511694, "grad_norm": 2.1212081909179688, "learning_rate": 2.5464512884250788e-05, "loss": 0.2967529296875, "step": 4144 }, { "epoch": 0.28018115452210357, "grad_norm": 0.8770093321800232, "learning_rate": 2.546214866905684e-05, "loss": 0.144989013671875, "step": 4145 }, { "epoch": 0.28024874949303774, "grad_norm": 1.2426303625106812, "learning_rate": 2.5459783947639072e-05, "loss": 0.185333251953125, "step": 4146 }, { "epoch": 0.28031634446397186, "grad_norm": 0.989546000957489, "learning_rate": 2.5457418720111908e-05, "loss": 0.2060089111328125, "step": 4147 }, { "epoch": 0.280383939434906, "grad_norm": 1.3670322895050049, "learning_rate": 2.5455052986589796e-05, "loss": 0.223052978515625, "step": 4148 }, { "epoch": 0.2804515344058402, "grad_norm": 0.5583055019378662, "learning_rate": 2.54526867471872e-05, "loss": 0.099609375, "step": 4149 }, { "epoch": 0.28051912937677437, "grad_norm": 1.2432914972305298, "learning_rate": 2.5450320002018612e-05, "loss": 0.2515869140625, "step": 4150 }, { "epoch": 0.28058672434770854, "grad_norm": 0.9073882102966309, "learning_rate": 2.544795275119855e-05, "loss": 0.173980712890625, "step": 4151 }, { "epoch": 0.2806543193186427, "grad_norm": 2.0980024337768555, "learning_rate": 2.5445584994841564e-05, "loss": 0.311431884765625, "step": 4152 }, { "epoch": 0.2807219142895769, "grad_norm": 1.8757431507110596, "learning_rate": 2.5443216733062216e-05, "loss": 0.275299072265625, "step": 4153 }, { "epoch": 0.280789509260511, "grad_norm": 1.376432180404663, "learning_rate": 2.5440847965975097e-05, "loss": 0.221405029296875, "step": 4154 }, { "epoch": 0.28085710423144516, "grad_norm": 1.6754953861236572, "learning_rate": 2.543847869369482e-05, "loss": 0.23828125, "step": 4155 }, { "epoch": 0.28092469920237934, "grad_norm": 1.1388185024261475, "learning_rate": 2.5436108916336028e-05, "loss": 0.259735107421875, "step": 4156 }, { "epoch": 0.2809922941733135, "grad_norm": 1.7447032928466797, "learning_rate": 2.5433738634013388e-05, "loss": 0.2470703125, "step": 4157 }, { "epoch": 0.2810598891442477, "grad_norm": 0.7191370725631714, "learning_rate": 2.543136784684158e-05, "loss": 0.1082000732421875, "step": 4158 }, { "epoch": 0.28112748411518185, "grad_norm": 2.067939519882202, "learning_rate": 2.5428996554935332e-05, "loss": 0.309417724609375, "step": 4159 }, { "epoch": 0.281195079086116, "grad_norm": 1.9208598136901855, "learning_rate": 2.5426624758409364e-05, "loss": 0.2926025390625, "step": 4160 }, { "epoch": 0.28126267405705013, "grad_norm": 1.437238335609436, "learning_rate": 2.5424252457378454e-05, "loss": 0.229278564453125, "step": 4161 }, { "epoch": 0.2813302690279843, "grad_norm": 1.3220722675323486, "learning_rate": 2.542187965195738e-05, "loss": 0.19456863403320312, "step": 4162 }, { "epoch": 0.2813978639989185, "grad_norm": 0.9602077603340149, "learning_rate": 2.5419506342260958e-05, "loss": 0.18239593505859375, "step": 4163 }, { "epoch": 0.28146545896985264, "grad_norm": 0.9583789110183716, "learning_rate": 2.5417132528404016e-05, "loss": 0.19793701171875, "step": 4164 }, { "epoch": 0.2815330539407868, "grad_norm": 1.4056155681610107, "learning_rate": 2.5414758210501424e-05, "loss": 0.2880859375, "step": 4165 }, { "epoch": 0.281600648911721, "grad_norm": 1.8754310607910156, "learning_rate": 2.541238338866806e-05, "loss": 0.311065673828125, "step": 4166 }, { "epoch": 0.28166824388265516, "grad_norm": 1.2546873092651367, "learning_rate": 2.5410008063018832e-05, "loss": 0.238037109375, "step": 4167 }, { "epoch": 0.28173583885358927, "grad_norm": 0.9550098180770874, "learning_rate": 2.540763223366868e-05, "loss": 0.21405029296875, "step": 4168 }, { "epoch": 0.28180343382452344, "grad_norm": 1.2770793437957764, "learning_rate": 2.540525590073255e-05, "loss": 0.294647216796875, "step": 4169 }, { "epoch": 0.2818710287954576, "grad_norm": 1.7725963592529297, "learning_rate": 2.5402879064325433e-05, "loss": 0.32855224609375, "step": 4170 }, { "epoch": 0.2819386237663918, "grad_norm": 2.0927200317382812, "learning_rate": 2.5400501724562328e-05, "loss": 0.2364654541015625, "step": 4171 }, { "epoch": 0.28200621873732595, "grad_norm": 1.5431870222091675, "learning_rate": 2.5398123881558274e-05, "loss": 0.34356689453125, "step": 4172 }, { "epoch": 0.2820738137082601, "grad_norm": 1.8237015008926392, "learning_rate": 2.5395745535428318e-05, "loss": 0.26641845703125, "step": 4173 }, { "epoch": 0.2821414086791943, "grad_norm": 1.4166505336761475, "learning_rate": 2.539336668628754e-05, "loss": 0.26373291015625, "step": 4174 }, { "epoch": 0.2822090036501284, "grad_norm": 1.1965521574020386, "learning_rate": 2.5390987334251052e-05, "loss": 0.2557830810546875, "step": 4175 }, { "epoch": 0.2822765986210626, "grad_norm": 1.2040048837661743, "learning_rate": 2.538860747943398e-05, "loss": 0.223663330078125, "step": 4176 }, { "epoch": 0.28234419359199675, "grad_norm": 1.760035514831543, "learning_rate": 2.5386227121951465e-05, "loss": 0.252593994140625, "step": 4177 }, { "epoch": 0.2824117885629309, "grad_norm": 1.276045322418213, "learning_rate": 2.538384626191869e-05, "loss": 0.249542236328125, "step": 4178 }, { "epoch": 0.2824793835338651, "grad_norm": 1.5269322395324707, "learning_rate": 2.538146489945086e-05, "loss": 0.29486083984375, "step": 4179 }, { "epoch": 0.28254697850479926, "grad_norm": 1.2751728296279907, "learning_rate": 2.5379083034663196e-05, "loss": 0.2640380859375, "step": 4180 }, { "epoch": 0.28261457347573343, "grad_norm": 1.8680357933044434, "learning_rate": 2.5376700667670946e-05, "loss": 0.27044677734375, "step": 4181 }, { "epoch": 0.28268216844666755, "grad_norm": 1.2361503839492798, "learning_rate": 2.5374317798589383e-05, "loss": 0.25469970703125, "step": 4182 }, { "epoch": 0.2827497634176017, "grad_norm": 1.3067328929901123, "learning_rate": 2.5371934427533815e-05, "loss": 0.281219482421875, "step": 4183 }, { "epoch": 0.2828173583885359, "grad_norm": 0.7738693356513977, "learning_rate": 2.536955055461955e-05, "loss": 0.18572998046875, "step": 4184 }, { "epoch": 0.28288495335947006, "grad_norm": 0.6316758990287781, "learning_rate": 2.536716617996195e-05, "loss": 0.106170654296875, "step": 4185 }, { "epoch": 0.28295254833040423, "grad_norm": 1.0990813970565796, "learning_rate": 2.5364781303676368e-05, "loss": 0.1566314697265625, "step": 4186 }, { "epoch": 0.2830201433013384, "grad_norm": 1.3208048343658447, "learning_rate": 2.5362395925878216e-05, "loss": 0.24615478515625, "step": 4187 }, { "epoch": 0.2830877382722725, "grad_norm": 1.3538973331451416, "learning_rate": 2.5360010046682898e-05, "loss": 0.25897216796875, "step": 4188 }, { "epoch": 0.2831553332432067, "grad_norm": 1.6739083528518677, "learning_rate": 2.5357623666205876e-05, "loss": 0.34130859375, "step": 4189 }, { "epoch": 0.28322292821414086, "grad_norm": 1.6242693662643433, "learning_rate": 2.5355236784562603e-05, "loss": 0.1999053955078125, "step": 4190 }, { "epoch": 0.283290523185075, "grad_norm": 1.1473453044891357, "learning_rate": 2.5352849401868574e-05, "loss": 0.19287109375, "step": 4191 }, { "epoch": 0.2833581181560092, "grad_norm": 0.9613065123558044, "learning_rate": 2.5350461518239308e-05, "loss": 0.236602783203125, "step": 4192 }, { "epoch": 0.28342571312694337, "grad_norm": 0.9244411587715149, "learning_rate": 2.5348073133790343e-05, "loss": 0.22381591796875, "step": 4193 }, { "epoch": 0.28349330809787754, "grad_norm": 1.5715270042419434, "learning_rate": 2.534568424863725e-05, "loss": 0.256134033203125, "step": 4194 }, { "epoch": 0.28356090306881165, "grad_norm": 2.5895211696624756, "learning_rate": 2.5343294862895613e-05, "loss": 0.2650146484375, "step": 4195 }, { "epoch": 0.2836284980397458, "grad_norm": 1.5195094347000122, "learning_rate": 2.5340904976681044e-05, "loss": 0.27947998046875, "step": 4196 }, { "epoch": 0.28369609301068, "grad_norm": 1.2569301128387451, "learning_rate": 2.5338514590109185e-05, "loss": 0.14654541015625, "step": 4197 }, { "epoch": 0.28376368798161417, "grad_norm": 1.0749276876449585, "learning_rate": 2.5336123703295694e-05, "loss": 0.250274658203125, "step": 4198 }, { "epoch": 0.28383128295254834, "grad_norm": 1.210006833076477, "learning_rate": 2.533373231635626e-05, "loss": 0.2203216552734375, "step": 4199 }, { "epoch": 0.2838988779234825, "grad_norm": 2.4637928009033203, "learning_rate": 2.5331340429406594e-05, "loss": 0.347747802734375, "step": 4200 }, { "epoch": 0.2839664728944167, "grad_norm": 0.810436487197876, "learning_rate": 2.532894804256242e-05, "loss": 0.20629119873046875, "step": 4201 }, { "epoch": 0.2840340678653508, "grad_norm": 1.55299711227417, "learning_rate": 2.5326555155939514e-05, "loss": 0.29638671875, "step": 4202 }, { "epoch": 0.28410166283628496, "grad_norm": 1.1287901401519775, "learning_rate": 2.5324161769653645e-05, "loss": 0.22955322265625, "step": 4203 }, { "epoch": 0.28416925780721913, "grad_norm": 1.4165942668914795, "learning_rate": 2.5321767883820627e-05, "loss": 0.30731201171875, "step": 4204 }, { "epoch": 0.2842368527781533, "grad_norm": 0.9797916412353516, "learning_rate": 2.5319373498556288e-05, "loss": 0.22100830078125, "step": 4205 }, { "epoch": 0.2843044477490875, "grad_norm": 1.1575288772583008, "learning_rate": 2.531697861397648e-05, "loss": 0.2172698974609375, "step": 4206 }, { "epoch": 0.28437204272002164, "grad_norm": 1.8692718744277954, "learning_rate": 2.5314583230197084e-05, "loss": 0.30108642578125, "step": 4207 }, { "epoch": 0.2844396376909558, "grad_norm": 1.052398443222046, "learning_rate": 2.531218734733401e-05, "loss": 0.1886444091796875, "step": 4208 }, { "epoch": 0.28450723266188993, "grad_norm": 1.0448743104934692, "learning_rate": 2.530979096550318e-05, "loss": 0.222076416015625, "step": 4209 }, { "epoch": 0.2845748276328241, "grad_norm": 1.0395987033843994, "learning_rate": 2.530739408482055e-05, "loss": 0.24432373046875, "step": 4210 }, { "epoch": 0.28464242260375827, "grad_norm": 1.3503003120422363, "learning_rate": 2.530499670540209e-05, "loss": 0.31243896484375, "step": 4211 }, { "epoch": 0.28471001757469244, "grad_norm": 1.6134772300720215, "learning_rate": 2.5302598827363804e-05, "loss": 0.33587646484375, "step": 4212 }, { "epoch": 0.2847776125456266, "grad_norm": 1.7273812294006348, "learning_rate": 2.530020045082172e-05, "loss": 0.276519775390625, "step": 4213 }, { "epoch": 0.2848452075165608, "grad_norm": 2.0982680320739746, "learning_rate": 2.5297801575891873e-05, "loss": 0.1978759765625, "step": 4214 }, { "epoch": 0.28491280248749495, "grad_norm": 1.068755030632019, "learning_rate": 2.5295402202690353e-05, "loss": 0.194305419921875, "step": 4215 }, { "epoch": 0.28498039745842907, "grad_norm": 0.919834554195404, "learning_rate": 2.5293002331333246e-05, "loss": 0.19390869140625, "step": 4216 }, { "epoch": 0.28504799242936324, "grad_norm": 2.275256395339966, "learning_rate": 2.529060196193667e-05, "loss": 0.350341796875, "step": 4217 }, { "epoch": 0.2851155874002974, "grad_norm": 1.9941340684890747, "learning_rate": 2.5288201094616778e-05, "loss": 0.32623291015625, "step": 4218 }, { "epoch": 0.2851831823712316, "grad_norm": 0.5908975005149841, "learning_rate": 2.5285799729489736e-05, "loss": 0.120513916015625, "step": 4219 }, { "epoch": 0.28525077734216575, "grad_norm": 2.571432590484619, "learning_rate": 2.5283397866671736e-05, "loss": 0.3447265625, "step": 4220 }, { "epoch": 0.2853183723130999, "grad_norm": 1.9643484354019165, "learning_rate": 2.5280995506278993e-05, "loss": 0.27813720703125, "step": 4221 }, { "epoch": 0.2853859672840341, "grad_norm": 1.8188202381134033, "learning_rate": 2.5278592648427753e-05, "loss": 0.251556396484375, "step": 4222 }, { "epoch": 0.2854535622549682, "grad_norm": 1.4180219173431396, "learning_rate": 2.5276189293234275e-05, "loss": 0.304901123046875, "step": 4223 }, { "epoch": 0.2855211572259024, "grad_norm": 2.0677640438079834, "learning_rate": 2.527378544081486e-05, "loss": 0.2637939453125, "step": 4224 }, { "epoch": 0.28558875219683655, "grad_norm": 1.3292584419250488, "learning_rate": 2.5271381091285806e-05, "loss": 0.30535888671875, "step": 4225 }, { "epoch": 0.2856563471677707, "grad_norm": 1.8199799060821533, "learning_rate": 2.5268976244763464e-05, "loss": 0.3089599609375, "step": 4226 }, { "epoch": 0.2857239421387049, "grad_norm": 1.741274118423462, "learning_rate": 2.5266570901364183e-05, "loss": 0.312896728515625, "step": 4227 }, { "epoch": 0.28579153710963906, "grad_norm": 2.233792304992676, "learning_rate": 2.5264165061204357e-05, "loss": 0.3773193359375, "step": 4228 }, { "epoch": 0.28585913208057323, "grad_norm": 0.8524701595306396, "learning_rate": 2.5261758724400392e-05, "loss": 0.227325439453125, "step": 4229 }, { "epoch": 0.28592672705150735, "grad_norm": 1.7733674049377441, "learning_rate": 2.5259351891068727e-05, "loss": 0.22467041015625, "step": 4230 }, { "epoch": 0.2859943220224415, "grad_norm": 2.523482322692871, "learning_rate": 2.5256944561325813e-05, "loss": 0.278106689453125, "step": 4231 }, { "epoch": 0.2860619169933757, "grad_norm": 2.430913209915161, "learning_rate": 2.525453673528813e-05, "loss": 0.3353271484375, "step": 4232 }, { "epoch": 0.28612951196430986, "grad_norm": 1.588346004486084, "learning_rate": 2.5252128413072193e-05, "loss": 0.2806396484375, "step": 4233 }, { "epoch": 0.28619710693524403, "grad_norm": 3.4066100120544434, "learning_rate": 2.5249719594794523e-05, "loss": 0.208953857421875, "step": 4234 }, { "epoch": 0.2862647019061782, "grad_norm": 1.38900625705719, "learning_rate": 2.5247310280571677e-05, "loss": 0.218841552734375, "step": 4235 }, { "epoch": 0.28633229687711237, "grad_norm": 1.046634554862976, "learning_rate": 2.5244900470520232e-05, "loss": 0.2244110107421875, "step": 4236 }, { "epoch": 0.2863998918480465, "grad_norm": 1.1956807374954224, "learning_rate": 2.5242490164756796e-05, "loss": 0.2010955810546875, "step": 4237 }, { "epoch": 0.28646748681898065, "grad_norm": 1.1625010967254639, "learning_rate": 2.524007936339798e-05, "loss": 0.15533447265625, "step": 4238 }, { "epoch": 0.2865350817899148, "grad_norm": 0.9301044344902039, "learning_rate": 2.5237668066560445e-05, "loss": 0.209320068359375, "step": 4239 }, { "epoch": 0.286602676760849, "grad_norm": 0.704761803150177, "learning_rate": 2.523525627436086e-05, "loss": 0.13720703125, "step": 4240 }, { "epoch": 0.28667027173178317, "grad_norm": 1.3018391132354736, "learning_rate": 2.5232843986915927e-05, "loss": 0.255340576171875, "step": 4241 }, { "epoch": 0.28673786670271734, "grad_norm": 0.88404780626297, "learning_rate": 2.5230431204342365e-05, "loss": 0.1883544921875, "step": 4242 }, { "epoch": 0.2868054616736515, "grad_norm": 1.4962424039840698, "learning_rate": 2.5228017926756914e-05, "loss": 0.308837890625, "step": 4243 }, { "epoch": 0.2868730566445856, "grad_norm": 1.5865724086761475, "learning_rate": 2.5225604154276354e-05, "loss": 0.27899169921875, "step": 4244 }, { "epoch": 0.2869406516155198, "grad_norm": 0.8257021307945251, "learning_rate": 2.522318988701747e-05, "loss": 0.1208038330078125, "step": 4245 }, { "epoch": 0.28700824658645396, "grad_norm": 1.913069725036621, "learning_rate": 2.5220775125097082e-05, "loss": 0.32867431640625, "step": 4246 }, { "epoch": 0.28707584155738813, "grad_norm": 1.5252398252487183, "learning_rate": 2.521835986863203e-05, "loss": 0.277099609375, "step": 4247 }, { "epoch": 0.2871434365283223, "grad_norm": 2.2471020221710205, "learning_rate": 2.5215944117739183e-05, "loss": 0.30194091796875, "step": 4248 }, { "epoch": 0.2872110314992565, "grad_norm": 1.3940374851226807, "learning_rate": 2.5213527872535428e-05, "loss": 0.2667236328125, "step": 4249 }, { "epoch": 0.28727862647019065, "grad_norm": 1.922103762626648, "learning_rate": 2.5211111133137675e-05, "loss": 0.20562744140625, "step": 4250 }, { "epoch": 0.28734622144112476, "grad_norm": 1.1703858375549316, "learning_rate": 2.5208693899662864e-05, "loss": 0.256622314453125, "step": 4251 }, { "epoch": 0.28741381641205893, "grad_norm": 1.5520583391189575, "learning_rate": 2.5206276172227955e-05, "loss": 0.25830841064453125, "step": 4252 }, { "epoch": 0.2874814113829931, "grad_norm": 0.928700864315033, "learning_rate": 2.520385795094993e-05, "loss": 0.209197998046875, "step": 4253 }, { "epoch": 0.2875490063539273, "grad_norm": 1.0892398357391357, "learning_rate": 2.5201439235945804e-05, "loss": 0.23162841796875, "step": 4254 }, { "epoch": 0.28761660132486144, "grad_norm": 1.2167870998382568, "learning_rate": 2.5199020027332603e-05, "loss": 0.21205902099609375, "step": 4255 }, { "epoch": 0.2876841962957956, "grad_norm": 1.5436575412750244, "learning_rate": 2.5196600325227386e-05, "loss": 0.19775390625, "step": 4256 }, { "epoch": 0.28775179126672973, "grad_norm": 1.2813247442245483, "learning_rate": 2.5194180129747235e-05, "loss": 0.21160888671875, "step": 4257 }, { "epoch": 0.2878193862376639, "grad_norm": 1.7772512435913086, "learning_rate": 2.519175944100925e-05, "loss": 0.36187744140625, "step": 4258 }, { "epoch": 0.28788698120859807, "grad_norm": 1.1219372749328613, "learning_rate": 2.5189338259130564e-05, "loss": 0.27862548828125, "step": 4259 }, { "epoch": 0.28795457617953224, "grad_norm": 1.3812973499298096, "learning_rate": 2.518691658422833e-05, "loss": 0.2540283203125, "step": 4260 }, { "epoch": 0.2880221711504664, "grad_norm": 1.0485947132110596, "learning_rate": 2.518449441641971e-05, "loss": 0.28741455078125, "step": 4261 }, { "epoch": 0.2880897661214006, "grad_norm": 0.9771525263786316, "learning_rate": 2.5182071755821923e-05, "loss": 0.23895263671875, "step": 4262 }, { "epoch": 0.28815736109233475, "grad_norm": 0.9483869671821594, "learning_rate": 2.5179648602552174e-05, "loss": 0.135833740234375, "step": 4263 }, { "epoch": 0.28822495606326887, "grad_norm": 0.9239299297332764, "learning_rate": 2.5177224956727725e-05, "loss": 0.1607818603515625, "step": 4264 }, { "epoch": 0.28829255103420304, "grad_norm": 2.1408796310424805, "learning_rate": 2.517480081846584e-05, "loss": 0.29266357421875, "step": 4265 }, { "epoch": 0.2883601460051372, "grad_norm": 2.0519847869873047, "learning_rate": 2.5172376187883814e-05, "loss": 0.259918212890625, "step": 4266 }, { "epoch": 0.2884277409760714, "grad_norm": 1.0035470724105835, "learning_rate": 2.516995106509897e-05, "loss": 0.2008514404296875, "step": 4267 }, { "epoch": 0.28849533594700555, "grad_norm": 0.924946665763855, "learning_rate": 2.516752545022864e-05, "loss": 0.2318572998046875, "step": 4268 }, { "epoch": 0.2885629309179397, "grad_norm": 1.3687900304794312, "learning_rate": 2.51650993433902e-05, "loss": 0.220977783203125, "step": 4269 }, { "epoch": 0.2886305258888739, "grad_norm": 2.5705080032348633, "learning_rate": 2.516267274470104e-05, "loss": 0.297119140625, "step": 4270 }, { "epoch": 0.288698120859808, "grad_norm": 2.053277015686035, "learning_rate": 2.5160245654278572e-05, "loss": 0.281158447265625, "step": 4271 }, { "epoch": 0.2887657158307422, "grad_norm": 1.259202003479004, "learning_rate": 2.5157818072240236e-05, "loss": 0.250885009765625, "step": 4272 }, { "epoch": 0.28883331080167635, "grad_norm": 1.1320202350616455, "learning_rate": 2.5155389998703486e-05, "loss": 0.22613525390625, "step": 4273 }, { "epoch": 0.2889009057726105, "grad_norm": 1.2425212860107422, "learning_rate": 2.5152961433785818e-05, "loss": 0.23358154296875, "step": 4274 }, { "epoch": 0.2889685007435447, "grad_norm": 1.2263717651367188, "learning_rate": 2.515053237760473e-05, "loss": 0.28277587890625, "step": 4275 }, { "epoch": 0.28903609571447886, "grad_norm": 1.584636926651001, "learning_rate": 2.514810283027776e-05, "loss": 0.3013916015625, "step": 4276 }, { "epoch": 0.28910369068541303, "grad_norm": 2.258680820465088, "learning_rate": 2.5145672791922465e-05, "loss": 0.328643798828125, "step": 4277 }, { "epoch": 0.28917128565634714, "grad_norm": 1.155649185180664, "learning_rate": 2.5143242262656425e-05, "loss": 0.16265869140625, "step": 4278 }, { "epoch": 0.2892388806272813, "grad_norm": 1.846210241317749, "learning_rate": 2.5140811242597248e-05, "loss": 0.299896240234375, "step": 4279 }, { "epoch": 0.2893064755982155, "grad_norm": 1.4441653490066528, "learning_rate": 2.513837973186255e-05, "loss": 0.26654052734375, "step": 4280 }, { "epoch": 0.28937407056914966, "grad_norm": 1.6241244077682495, "learning_rate": 2.513594773057e-05, "loss": 0.2559814453125, "step": 4281 }, { "epoch": 0.2894416655400838, "grad_norm": 0.9827901124954224, "learning_rate": 2.513351523883726e-05, "loss": 0.173095703125, "step": 4282 }, { "epoch": 0.289509260511018, "grad_norm": 1.417718768119812, "learning_rate": 2.5131082256782034e-05, "loss": 0.240753173828125, "step": 4283 }, { "epoch": 0.28957685548195217, "grad_norm": 1.461925983428955, "learning_rate": 2.512864878452204e-05, "loss": 0.189300537109375, "step": 4284 }, { "epoch": 0.2896444504528863, "grad_norm": 0.8240128755569458, "learning_rate": 2.5126214822175032e-05, "loss": 0.1833038330078125, "step": 4285 }, { "epoch": 0.28971204542382045, "grad_norm": 0.9573001861572266, "learning_rate": 2.5123780369858776e-05, "loss": 0.224365234375, "step": 4286 }, { "epoch": 0.2897796403947546, "grad_norm": 1.530281901359558, "learning_rate": 2.5121345427691065e-05, "loss": 0.24432373046875, "step": 4287 }, { "epoch": 0.2898472353656888, "grad_norm": 1.6827585697174072, "learning_rate": 2.511890999578972e-05, "loss": 0.2979736328125, "step": 4288 }, { "epoch": 0.28991483033662296, "grad_norm": 2.018718719482422, "learning_rate": 2.511647407427258e-05, "loss": 0.226531982421875, "step": 4289 }, { "epoch": 0.28998242530755713, "grad_norm": 0.8748764991760254, "learning_rate": 2.5114037663257513e-05, "loss": 0.17536163330078125, "step": 4290 }, { "epoch": 0.2900500202784913, "grad_norm": 1.1285438537597656, "learning_rate": 2.5111600762862404e-05, "loss": 0.2728271484375, "step": 4291 }, { "epoch": 0.2901176152494254, "grad_norm": 1.6201871633529663, "learning_rate": 2.5109163373205165e-05, "loss": 0.2479248046875, "step": 4292 }, { "epoch": 0.2901852102203596, "grad_norm": 1.4082393646240234, "learning_rate": 2.5106725494403732e-05, "loss": 0.259063720703125, "step": 4293 }, { "epoch": 0.29025280519129376, "grad_norm": 1.3857084512710571, "learning_rate": 2.510428712657607e-05, "loss": 0.23834228515625, "step": 4294 }, { "epoch": 0.29032040016222793, "grad_norm": 1.380238652229309, "learning_rate": 2.5101848269840162e-05, "loss": 0.27606201171875, "step": 4295 }, { "epoch": 0.2903879951331621, "grad_norm": 1.0588321685791016, "learning_rate": 2.5099408924314005e-05, "loss": 0.257476806640625, "step": 4296 }, { "epoch": 0.2904555901040963, "grad_norm": 1.7564619779586792, "learning_rate": 2.509696909011564e-05, "loss": 0.35040283203125, "step": 4297 }, { "epoch": 0.29052318507503044, "grad_norm": 1.553100347518921, "learning_rate": 2.509452876736312e-05, "loss": 0.284881591796875, "step": 4298 }, { "epoch": 0.29059078004596456, "grad_norm": 0.9502072930335999, "learning_rate": 2.509208795617452e-05, "loss": 0.1664276123046875, "step": 4299 }, { "epoch": 0.29065837501689873, "grad_norm": 1.7882248163223267, "learning_rate": 2.5089646656667937e-05, "loss": 0.2550811767578125, "step": 4300 }, { "epoch": 0.2907259699878329, "grad_norm": 2.027172803878784, "learning_rate": 2.508720486896151e-05, "loss": 0.27679443359375, "step": 4301 }, { "epoch": 0.29079356495876707, "grad_norm": 1.2885664701461792, "learning_rate": 2.5084762593173375e-05, "loss": 0.24566650390625, "step": 4302 }, { "epoch": 0.29086115992970124, "grad_norm": 1.2830705642700195, "learning_rate": 2.508231982942171e-05, "loss": 0.33184814453125, "step": 4303 }, { "epoch": 0.2909287549006354, "grad_norm": 1.337526559829712, "learning_rate": 2.507987657782471e-05, "loss": 0.23382568359375, "step": 4304 }, { "epoch": 0.2909963498715696, "grad_norm": 1.3428261280059814, "learning_rate": 2.5077432838500594e-05, "loss": 0.302398681640625, "step": 4305 }, { "epoch": 0.2910639448425037, "grad_norm": 1.0610668659210205, "learning_rate": 2.5074988611567614e-05, "loss": 0.2126922607421875, "step": 4306 }, { "epoch": 0.29113153981343787, "grad_norm": 1.2446953058242798, "learning_rate": 2.507254389714402e-05, "loss": 0.25275421142578125, "step": 4307 }, { "epoch": 0.29119913478437204, "grad_norm": 0.9782027006149292, "learning_rate": 2.5070098695348117e-05, "loss": 0.19350051879882812, "step": 4308 }, { "epoch": 0.2912667297553062, "grad_norm": 0.62213134765625, "learning_rate": 2.5067653006298214e-05, "loss": 0.1125335693359375, "step": 4309 }, { "epoch": 0.2913343247262404, "grad_norm": 0.9172948002815247, "learning_rate": 2.5065206830112646e-05, "loss": 0.1840667724609375, "step": 4310 }, { "epoch": 0.29140191969717455, "grad_norm": 1.2871384620666504, "learning_rate": 2.5062760166909777e-05, "loss": 0.188446044921875, "step": 4311 }, { "epoch": 0.2914695146681087, "grad_norm": 1.727644443511963, "learning_rate": 2.5060313016807993e-05, "loss": 0.272491455078125, "step": 4312 }, { "epoch": 0.29153710963904284, "grad_norm": 1.6517910957336426, "learning_rate": 2.50578653799257e-05, "loss": 0.26654052734375, "step": 4313 }, { "epoch": 0.291604704609977, "grad_norm": 1.0919100046157837, "learning_rate": 2.505541725638133e-05, "loss": 0.283447265625, "step": 4314 }, { "epoch": 0.2916722995809112, "grad_norm": 1.2002170085906982, "learning_rate": 2.505296864629334e-05, "loss": 0.25958251953125, "step": 4315 }, { "epoch": 0.29173989455184535, "grad_norm": 0.7334715127944946, "learning_rate": 2.5050519549780204e-05, "loss": 0.126129150390625, "step": 4316 }, { "epoch": 0.2918074895227795, "grad_norm": 1.3066837787628174, "learning_rate": 2.5048069966960432e-05, "loss": 0.275970458984375, "step": 4317 }, { "epoch": 0.2918750844937137, "grad_norm": 1.9426475763320923, "learning_rate": 2.504561989795255e-05, "loss": 0.1966552734375, "step": 4318 }, { "epoch": 0.2919426794646478, "grad_norm": 2.048151969909668, "learning_rate": 2.5043169342875105e-05, "loss": 0.316253662109375, "step": 4319 }, { "epoch": 0.292010274435582, "grad_norm": 1.43778657913208, "learning_rate": 2.5040718301846664e-05, "loss": 0.216827392578125, "step": 4320 }, { "epoch": 0.29207786940651614, "grad_norm": 1.504973292350769, "learning_rate": 2.503826677498583e-05, "loss": 0.2213134765625, "step": 4321 }, { "epoch": 0.2921454643774503, "grad_norm": 1.305552363395691, "learning_rate": 2.5035814762411222e-05, "loss": 0.284088134765625, "step": 4322 }, { "epoch": 0.2922130593483845, "grad_norm": 1.151715636253357, "learning_rate": 2.5033362264241485e-05, "loss": 0.2333984375, "step": 4323 }, { "epoch": 0.29228065431931866, "grad_norm": 0.8045654296875, "learning_rate": 2.5030909280595286e-05, "loss": 0.14739990234375, "step": 4324 }, { "epoch": 0.2923482492902528, "grad_norm": 1.4956462383270264, "learning_rate": 2.5028455811591313e-05, "loss": 0.232177734375, "step": 4325 }, { "epoch": 0.29241584426118694, "grad_norm": 2.0639281272888184, "learning_rate": 2.502600185734828e-05, "loss": 0.2352294921875, "step": 4326 }, { "epoch": 0.2924834392321211, "grad_norm": 1.324652910232544, "learning_rate": 2.502354741798493e-05, "loss": 0.32037353515625, "step": 4327 }, { "epoch": 0.2925510342030553, "grad_norm": 0.7387433648109436, "learning_rate": 2.5021092493620015e-05, "loss": 0.10791015625, "step": 4328 }, { "epoch": 0.29261862917398945, "grad_norm": 1.4339927434921265, "learning_rate": 2.501863708437233e-05, "loss": 0.307373046875, "step": 4329 }, { "epoch": 0.2926862241449236, "grad_norm": 1.1767882108688354, "learning_rate": 2.5016181190360674e-05, "loss": 0.30859375, "step": 4330 }, { "epoch": 0.2927538191158578, "grad_norm": 0.9665172100067139, "learning_rate": 2.5013724811703887e-05, "loss": 0.160064697265625, "step": 4331 }, { "epoch": 0.29282141408679196, "grad_norm": 1.5298036336898804, "learning_rate": 2.501126794852081e-05, "loss": 0.1602325439453125, "step": 4332 }, { "epoch": 0.2928890090577261, "grad_norm": 0.5807461142539978, "learning_rate": 2.5008810600930337e-05, "loss": 0.1336517333984375, "step": 4333 }, { "epoch": 0.29295660402866025, "grad_norm": 1.4714291095733643, "learning_rate": 2.500635276905136e-05, "loss": 0.320068359375, "step": 4334 }, { "epoch": 0.2930241989995944, "grad_norm": 2.65104603767395, "learning_rate": 2.500389445300281e-05, "loss": 0.3179931640625, "step": 4335 }, { "epoch": 0.2930917939705286, "grad_norm": 1.3680784702301025, "learning_rate": 2.500143565290363e-05, "loss": 0.1978759765625, "step": 4336 }, { "epoch": 0.29315938894146276, "grad_norm": 1.1714229583740234, "learning_rate": 2.4998976368872796e-05, "loss": 0.304901123046875, "step": 4337 }, { "epoch": 0.29322698391239693, "grad_norm": 1.746074914932251, "learning_rate": 2.4996516601029308e-05, "loss": 0.24676513671875, "step": 4338 }, { "epoch": 0.2932945788833311, "grad_norm": 2.0833816528320312, "learning_rate": 2.4994056349492172e-05, "loss": 0.35516357421875, "step": 4339 }, { "epoch": 0.2933621738542652, "grad_norm": 1.2299656867980957, "learning_rate": 2.4991595614380436e-05, "loss": 0.24945068359375, "step": 4340 }, { "epoch": 0.2934297688251994, "grad_norm": 0.7300592064857483, "learning_rate": 2.498913439581317e-05, "loss": 0.20037841796875, "step": 4341 }, { "epoch": 0.29349736379613356, "grad_norm": 1.17644202709198, "learning_rate": 2.498667269390946e-05, "loss": 0.2512969970703125, "step": 4342 }, { "epoch": 0.29356495876706773, "grad_norm": 1.3396649360656738, "learning_rate": 2.498421050878842e-05, "loss": 0.2916259765625, "step": 4343 }, { "epoch": 0.2936325537380019, "grad_norm": 1.2784276008605957, "learning_rate": 2.4981747840569185e-05, "loss": 0.25506591796875, "step": 4344 }, { "epoch": 0.29370014870893607, "grad_norm": 1.071725845336914, "learning_rate": 2.4979284689370913e-05, "loss": 0.28631591796875, "step": 4345 }, { "epoch": 0.29376774367987024, "grad_norm": 1.812145471572876, "learning_rate": 2.4976821055312785e-05, "loss": 0.171600341796875, "step": 4346 }, { "epoch": 0.29383533865080436, "grad_norm": 2.3626186847686768, "learning_rate": 2.497435693851401e-05, "loss": 0.27783203125, "step": 4347 }, { "epoch": 0.2939029336217385, "grad_norm": 0.5456249713897705, "learning_rate": 2.4971892339093817e-05, "loss": 0.100006103515625, "step": 4348 }, { "epoch": 0.2939705285926727, "grad_norm": 2.8782544136047363, "learning_rate": 2.4969427257171457e-05, "loss": 0.2923583984375, "step": 4349 }, { "epoch": 0.29403812356360687, "grad_norm": 1.4274985790252686, "learning_rate": 2.4966961692866212e-05, "loss": 0.259552001953125, "step": 4350 }, { "epoch": 0.29410571853454104, "grad_norm": 1.8771681785583496, "learning_rate": 2.4964495646297368e-05, "loss": 0.3216552734375, "step": 4351 }, { "epoch": 0.2941733135054752, "grad_norm": 1.5928040742874146, "learning_rate": 2.4962029117584262e-05, "loss": 0.20343017578125, "step": 4352 }, { "epoch": 0.2942409084764094, "grad_norm": 1.3414289951324463, "learning_rate": 2.4959562106846237e-05, "loss": 0.33856201171875, "step": 4353 }, { "epoch": 0.2943085034473435, "grad_norm": 1.856278896331787, "learning_rate": 2.4957094614202657e-05, "loss": 0.2783203125, "step": 4354 }, { "epoch": 0.29437609841827767, "grad_norm": 1.580613136291504, "learning_rate": 2.495462663977291e-05, "loss": 0.2607421875, "step": 4355 }, { "epoch": 0.29444369338921184, "grad_norm": 0.9621272683143616, "learning_rate": 2.4952158183676428e-05, "loss": 0.181976318359375, "step": 4356 }, { "epoch": 0.294511288360146, "grad_norm": 2.1822080612182617, "learning_rate": 2.4949689246032635e-05, "loss": 0.34039306640625, "step": 4357 }, { "epoch": 0.2945788833310802, "grad_norm": 0.9102752804756165, "learning_rate": 2.4947219826961003e-05, "loss": 0.22356414794921875, "step": 4358 }, { "epoch": 0.29464647830201435, "grad_norm": 1.0169203281402588, "learning_rate": 2.4944749926581012e-05, "loss": 0.1655731201171875, "step": 4359 }, { "epoch": 0.2947140732729485, "grad_norm": 1.3204060792922974, "learning_rate": 2.4942279545012175e-05, "loss": 0.281494140625, "step": 4360 }, { "epoch": 0.29478166824388263, "grad_norm": 0.5253260135650635, "learning_rate": 2.4939808682374028e-05, "loss": 0.107452392578125, "step": 4361 }, { "epoch": 0.2948492632148168, "grad_norm": 1.4620575904846191, "learning_rate": 2.493733733878612e-05, "loss": 0.317962646484375, "step": 4362 }, { "epoch": 0.294916858185751, "grad_norm": 1.4912285804748535, "learning_rate": 2.493486551436803e-05, "loss": 0.32489013671875, "step": 4363 }, { "epoch": 0.29498445315668514, "grad_norm": 1.6651042699813843, "learning_rate": 2.493239320923936e-05, "loss": 0.23138427734375, "step": 4364 }, { "epoch": 0.2950520481276193, "grad_norm": 1.1442902088165283, "learning_rate": 2.492992042351974e-05, "loss": 0.2747802734375, "step": 4365 }, { "epoch": 0.2951196430985535, "grad_norm": 1.5000923871994019, "learning_rate": 2.4927447157328818e-05, "loss": 0.217254638671875, "step": 4366 }, { "epoch": 0.29518723806948766, "grad_norm": 1.6943758726119995, "learning_rate": 2.4924973410786262e-05, "loss": 0.350799560546875, "step": 4367 }, { "epoch": 0.29525483304042177, "grad_norm": 1.391280174255371, "learning_rate": 2.4922499184011766e-05, "loss": 0.22425079345703125, "step": 4368 }, { "epoch": 0.29532242801135594, "grad_norm": 1.1328281164169312, "learning_rate": 2.4920024477125058e-05, "loss": 0.176483154296875, "step": 4369 }, { "epoch": 0.2953900229822901, "grad_norm": 1.0857353210449219, "learning_rate": 2.491754929024587e-05, "loss": 0.16058349609375, "step": 4370 }, { "epoch": 0.2954576179532243, "grad_norm": 1.0926944017410278, "learning_rate": 2.4915073623493977e-05, "loss": 0.256378173828125, "step": 4371 }, { "epoch": 0.29552521292415845, "grad_norm": 2.463925361633301, "learning_rate": 2.4912597476989153e-05, "loss": 0.282501220703125, "step": 4372 }, { "epoch": 0.2955928078950926, "grad_norm": 1.5847960710525513, "learning_rate": 2.491012085085122e-05, "loss": 0.23760986328125, "step": 4373 }, { "epoch": 0.2956604028660268, "grad_norm": 1.1803737878799438, "learning_rate": 2.4907643745200013e-05, "loss": 0.229949951171875, "step": 4374 }, { "epoch": 0.2957279978369609, "grad_norm": 1.225609540939331, "learning_rate": 2.4905166160155384e-05, "loss": 0.18731689453125, "step": 4375 }, { "epoch": 0.2957955928078951, "grad_norm": 1.8127403259277344, "learning_rate": 2.490268809583722e-05, "loss": 0.235870361328125, "step": 4376 }, { "epoch": 0.29586318777882925, "grad_norm": 1.7788029909133911, "learning_rate": 2.4900209552365415e-05, "loss": 0.259033203125, "step": 4377 }, { "epoch": 0.2959307827497634, "grad_norm": 1.384898066520691, "learning_rate": 2.4897730529859908e-05, "loss": 0.2529296875, "step": 4378 }, { "epoch": 0.2959983777206976, "grad_norm": 1.266843318939209, "learning_rate": 2.4895251028440644e-05, "loss": 0.265472412109375, "step": 4379 }, { "epoch": 0.29606597269163176, "grad_norm": 1.1295191049575806, "learning_rate": 2.4892771048227597e-05, "loss": 0.1584320068359375, "step": 4380 }, { "epoch": 0.2961335676625659, "grad_norm": 1.9698113203048706, "learning_rate": 2.4890290589340768e-05, "loss": 0.2525634765625, "step": 4381 }, { "epoch": 0.29620116263350005, "grad_norm": 0.9958451390266418, "learning_rate": 2.488780965190017e-05, "loss": 0.1779632568359375, "step": 4382 }, { "epoch": 0.2962687576044342, "grad_norm": 1.0715396404266357, "learning_rate": 2.4885328236025843e-05, "loss": 0.2364501953125, "step": 4383 }, { "epoch": 0.2963363525753684, "grad_norm": 0.5826675891876221, "learning_rate": 2.488284634183787e-05, "loss": 0.15643310546875, "step": 4384 }, { "epoch": 0.29640394754630256, "grad_norm": 0.9034469723701477, "learning_rate": 2.4880363969456325e-05, "loss": 0.208587646484375, "step": 4385 }, { "epoch": 0.29647154251723673, "grad_norm": 1.1299790143966675, "learning_rate": 2.4877881119001328e-05, "loss": 0.1850109100341797, "step": 4386 }, { "epoch": 0.2965391374881709, "grad_norm": 2.1616086959838867, "learning_rate": 2.487539779059301e-05, "loss": 0.2227325439453125, "step": 4387 }, { "epoch": 0.296606732459105, "grad_norm": 1.0308234691619873, "learning_rate": 2.4872913984351536e-05, "loss": 0.24798583984375, "step": 4388 }, { "epoch": 0.2966743274300392, "grad_norm": 1.408361792564392, "learning_rate": 2.487042970039708e-05, "loss": 0.2596435546875, "step": 4389 }, { "epoch": 0.29674192240097336, "grad_norm": 1.6175470352172852, "learning_rate": 2.4867944938849855e-05, "loss": 0.274444580078125, "step": 4390 }, { "epoch": 0.2968095173719075, "grad_norm": 1.4076426029205322, "learning_rate": 2.4865459699830082e-05, "loss": 0.33782958984375, "step": 4391 }, { "epoch": 0.2968771123428417, "grad_norm": 0.7207695245742798, "learning_rate": 2.486297398345802e-05, "loss": 0.06878280639648438, "step": 4392 }, { "epoch": 0.29694470731377587, "grad_norm": 1.3579580783843994, "learning_rate": 2.4860487789853935e-05, "loss": 0.32269287109375, "step": 4393 }, { "epoch": 0.29701230228471004, "grad_norm": 1.8638805150985718, "learning_rate": 2.4858001119138136e-05, "loss": 0.2755126953125, "step": 4394 }, { "epoch": 0.29707989725564415, "grad_norm": 0.9856986403465271, "learning_rate": 2.4855513971430928e-05, "loss": 0.21795654296875, "step": 4395 }, { "epoch": 0.2971474922265783, "grad_norm": 1.0392485857009888, "learning_rate": 2.4853026346852672e-05, "loss": 0.186248779296875, "step": 4396 }, { "epoch": 0.2972150871975125, "grad_norm": 1.0607731342315674, "learning_rate": 2.485053824552372e-05, "loss": 0.308349609375, "step": 4397 }, { "epoch": 0.29728268216844667, "grad_norm": 2.077694892883301, "learning_rate": 2.484804966756447e-05, "loss": 0.310028076171875, "step": 4398 }, { "epoch": 0.29735027713938084, "grad_norm": 1.486333966255188, "learning_rate": 2.4845560613095334e-05, "loss": 0.265625, "step": 4399 }, { "epoch": 0.297417872110315, "grad_norm": 1.2207943201065063, "learning_rate": 2.4843071082236745e-05, "loss": 0.2123260498046875, "step": 4400 }, { "epoch": 0.2974854670812492, "grad_norm": 1.1310656070709229, "learning_rate": 2.4840581075109165e-05, "loss": 0.25616455078125, "step": 4401 }, { "epoch": 0.2975530620521833, "grad_norm": 1.1031244993209839, "learning_rate": 2.4838090591833072e-05, "loss": 0.217376708984375, "step": 4402 }, { "epoch": 0.29762065702311746, "grad_norm": 2.277552366256714, "learning_rate": 2.4835599632528972e-05, "loss": 0.280731201171875, "step": 4403 }, { "epoch": 0.29768825199405163, "grad_norm": 1.2765860557556152, "learning_rate": 2.4833108197317402e-05, "loss": 0.2229461669921875, "step": 4404 }, { "epoch": 0.2977558469649858, "grad_norm": 1.4296804666519165, "learning_rate": 2.48306162863189e-05, "loss": 0.324188232421875, "step": 4405 }, { "epoch": 0.29782344193592, "grad_norm": 1.5292679071426392, "learning_rate": 2.482812389965405e-05, "loss": 0.30572509765625, "step": 4406 }, { "epoch": 0.29789103690685415, "grad_norm": 1.1971486806869507, "learning_rate": 2.4825631037443444e-05, "loss": 0.18670654296875, "step": 4407 }, { "epoch": 0.2979586318777883, "grad_norm": 1.2868050336837769, "learning_rate": 2.4823137699807702e-05, "loss": 0.2451019287109375, "step": 4408 }, { "epoch": 0.29802622684872243, "grad_norm": 2.286688804626465, "learning_rate": 2.482064388686747e-05, "loss": 0.282928466796875, "step": 4409 }, { "epoch": 0.2980938218196566, "grad_norm": 2.6338417530059814, "learning_rate": 2.4818149598743412e-05, "loss": 0.298309326171875, "step": 4410 }, { "epoch": 0.29816141679059077, "grad_norm": 2.2112393379211426, "learning_rate": 2.4815654835556218e-05, "loss": 0.245208740234375, "step": 4411 }, { "epoch": 0.29822901176152494, "grad_norm": 2.9905312061309814, "learning_rate": 2.48131595974266e-05, "loss": 0.321044921875, "step": 4412 }, { "epoch": 0.2982966067324591, "grad_norm": 2.7398922443389893, "learning_rate": 2.4810663884475295e-05, "loss": 0.3262939453125, "step": 4413 }, { "epoch": 0.2983642017033933, "grad_norm": 1.5096315145492554, "learning_rate": 2.4808167696823062e-05, "loss": 0.1418609619140625, "step": 4414 }, { "epoch": 0.29843179667432745, "grad_norm": 1.3404799699783325, "learning_rate": 2.4805671034590673e-05, "loss": 0.257843017578125, "step": 4415 }, { "epoch": 0.29849939164526157, "grad_norm": 1.2140331268310547, "learning_rate": 2.4803173897898942e-05, "loss": 0.238922119140625, "step": 4416 }, { "epoch": 0.29856698661619574, "grad_norm": 1.0820460319519043, "learning_rate": 2.4800676286868693e-05, "loss": 0.25506591796875, "step": 4417 }, { "epoch": 0.2986345815871299, "grad_norm": 1.1031426191329956, "learning_rate": 2.479817820162077e-05, "loss": 0.201904296875, "step": 4418 }, { "epoch": 0.2987021765580641, "grad_norm": 1.5010586977005005, "learning_rate": 2.4795679642276057e-05, "loss": 0.26513671875, "step": 4419 }, { "epoch": 0.29876977152899825, "grad_norm": 2.3148717880249023, "learning_rate": 2.4793180608955442e-05, "loss": 0.2840576171875, "step": 4420 }, { "epoch": 0.2988373664999324, "grad_norm": 1.522922158241272, "learning_rate": 2.4790681101779846e-05, "loss": 0.21368408203125, "step": 4421 }, { "epoch": 0.2989049614708666, "grad_norm": 0.9733905792236328, "learning_rate": 2.4788181120870215e-05, "loss": 0.233154296875, "step": 4422 }, { "epoch": 0.2989725564418007, "grad_norm": 2.0761337280273438, "learning_rate": 2.478568066634751e-05, "loss": 0.294586181640625, "step": 4423 }, { "epoch": 0.2990401514127349, "grad_norm": 1.197981595993042, "learning_rate": 2.478317973833271e-05, "loss": 0.248779296875, "step": 4424 }, { "epoch": 0.29910774638366905, "grad_norm": 2.0082767009735107, "learning_rate": 2.4780678336946835e-05, "loss": 0.17468643188476562, "step": 4425 }, { "epoch": 0.2991753413546032, "grad_norm": 1.2177602052688599, "learning_rate": 2.477817646231092e-05, "loss": 0.29248046875, "step": 4426 }, { "epoch": 0.2992429363255374, "grad_norm": 1.7187477350234985, "learning_rate": 2.4775674114546017e-05, "loss": 0.22454833984375, "step": 4427 }, { "epoch": 0.29931053129647156, "grad_norm": 1.1463195085525513, "learning_rate": 2.4773171293773204e-05, "loss": 0.206512451171875, "step": 4428 }, { "epoch": 0.29937812626740573, "grad_norm": 1.2127639055252075, "learning_rate": 2.4770668000113586e-05, "loss": 0.161834716796875, "step": 4429 }, { "epoch": 0.29944572123833985, "grad_norm": 1.616171956062317, "learning_rate": 2.476816423368829e-05, "loss": 0.28961181640625, "step": 4430 }, { "epoch": 0.299513316209274, "grad_norm": 1.481168270111084, "learning_rate": 2.4765659994618457e-05, "loss": 0.2911376953125, "step": 4431 }, { "epoch": 0.2995809111802082, "grad_norm": 1.089643120765686, "learning_rate": 2.4763155283025262e-05, "loss": 0.209625244140625, "step": 4432 }, { "epoch": 0.29964850615114236, "grad_norm": 1.6399091482162476, "learning_rate": 2.47606500990299e-05, "loss": 0.214599609375, "step": 4433 }, { "epoch": 0.29971610112207653, "grad_norm": 1.4011627435684204, "learning_rate": 2.4758144442753588e-05, "loss": 0.317413330078125, "step": 4434 }, { "epoch": 0.2997836960930107, "grad_norm": 1.3132765293121338, "learning_rate": 2.475563831431756e-05, "loss": 0.303955078125, "step": 4435 }, { "epoch": 0.29985129106394487, "grad_norm": 1.8000085353851318, "learning_rate": 2.4753131713843078e-05, "loss": 0.2068634033203125, "step": 4436 }, { "epoch": 0.299918886034879, "grad_norm": 1.491798996925354, "learning_rate": 2.4750624641451433e-05, "loss": 0.32611083984375, "step": 4437 }, { "epoch": 0.29998648100581315, "grad_norm": 1.0269569158554077, "learning_rate": 2.4748117097263927e-05, "loss": 0.1923980712890625, "step": 4438 }, { "epoch": 0.3000540759767473, "grad_norm": 1.512279748916626, "learning_rate": 2.4745609081401893e-05, "loss": 0.2686614990234375, "step": 4439 }, { "epoch": 0.3001216709476815, "grad_norm": 1.008263111114502, "learning_rate": 2.4743100593986686e-05, "loss": 0.200225830078125, "step": 4440 }, { "epoch": 0.30018926591861567, "grad_norm": 1.0807796716690063, "learning_rate": 2.474059163513968e-05, "loss": 0.284912109375, "step": 4441 }, { "epoch": 0.30025686088954984, "grad_norm": 0.869511067867279, "learning_rate": 2.4738082204982275e-05, "loss": 0.219085693359375, "step": 4442 }, { "epoch": 0.300324455860484, "grad_norm": 1.1285854578018188, "learning_rate": 2.4735572303635888e-05, "loss": 0.2235107421875, "step": 4443 }, { "epoch": 0.3003920508314181, "grad_norm": 0.8795127868652344, "learning_rate": 2.4733061931221973e-05, "loss": 0.19898223876953125, "step": 4444 }, { "epoch": 0.3004596458023523, "grad_norm": 2.4064550399780273, "learning_rate": 2.473055108786199e-05, "loss": 0.30780029296875, "step": 4445 }, { "epoch": 0.30052724077328646, "grad_norm": 1.298919439315796, "learning_rate": 2.4728039773677435e-05, "loss": 0.2290191650390625, "step": 4446 }, { "epoch": 0.30059483574422063, "grad_norm": 1.5993475914001465, "learning_rate": 2.472552798878982e-05, "loss": 0.2026519775390625, "step": 4447 }, { "epoch": 0.3006624307151548, "grad_norm": 2.2409019470214844, "learning_rate": 2.4723015733320673e-05, "loss": 0.3033294677734375, "step": 4448 }, { "epoch": 0.300730025686089, "grad_norm": 1.3412259817123413, "learning_rate": 2.4720503007391555e-05, "loss": 0.26776123046875, "step": 4449 }, { "epoch": 0.3007976206570231, "grad_norm": 0.8890286684036255, "learning_rate": 2.471798981112406e-05, "loss": 0.24822998046875, "step": 4450 }, { "epoch": 0.30086521562795726, "grad_norm": 1.4984896183013916, "learning_rate": 2.471547614463977e-05, "loss": 0.245025634765625, "step": 4451 }, { "epoch": 0.30093281059889143, "grad_norm": 1.4307990074157715, "learning_rate": 2.4712962008060336e-05, "loss": 0.26922607421875, "step": 4452 }, { "epoch": 0.3010004055698256, "grad_norm": 1.6410763263702393, "learning_rate": 2.471044740150739e-05, "loss": 0.282958984375, "step": 4453 }, { "epoch": 0.3010680005407598, "grad_norm": 2.054853677749634, "learning_rate": 2.470793232510261e-05, "loss": 0.235626220703125, "step": 4454 }, { "epoch": 0.30113559551169394, "grad_norm": 1.3623101711273193, "learning_rate": 2.4705416778967697e-05, "loss": 0.205322265625, "step": 4455 }, { "epoch": 0.3012031904826281, "grad_norm": 1.2191481590270996, "learning_rate": 2.4702900763224353e-05, "loss": 0.33935546875, "step": 4456 }, { "epoch": 0.30127078545356223, "grad_norm": 1.4532119035720825, "learning_rate": 2.4700384277994337e-05, "loss": 0.3128662109375, "step": 4457 }, { "epoch": 0.3013383804244964, "grad_norm": 1.7889702320098877, "learning_rate": 2.4697867323399398e-05, "loss": 0.26007080078125, "step": 4458 }, { "epoch": 0.30140597539543057, "grad_norm": 1.342522382736206, "learning_rate": 2.469534989956133e-05, "loss": 0.1927490234375, "step": 4459 }, { "epoch": 0.30147357036636474, "grad_norm": 1.4034547805786133, "learning_rate": 2.469283200660194e-05, "loss": 0.244598388671875, "step": 4460 }, { "epoch": 0.3015411653372989, "grad_norm": 1.5257197618484497, "learning_rate": 2.4690313644643056e-05, "loss": 0.30084228515625, "step": 4461 }, { "epoch": 0.3016087603082331, "grad_norm": 1.2283257246017456, "learning_rate": 2.4687794813806536e-05, "loss": 0.207916259765625, "step": 4462 }, { "epoch": 0.30167635527916725, "grad_norm": 2.5075104236602783, "learning_rate": 2.4685275514214255e-05, "loss": 0.296478271484375, "step": 4463 }, { "epoch": 0.30174395025010137, "grad_norm": 1.5248459577560425, "learning_rate": 2.468275574598811e-05, "loss": 0.268096923828125, "step": 4464 }, { "epoch": 0.30181154522103554, "grad_norm": 1.6665234565734863, "learning_rate": 2.468023550925003e-05, "loss": 0.38995361328125, "step": 4465 }, { "epoch": 0.3018791401919697, "grad_norm": 1.1653680801391602, "learning_rate": 2.4677714804121954e-05, "loss": 0.240264892578125, "step": 4466 }, { "epoch": 0.3019467351629039, "grad_norm": 1.6972118616104126, "learning_rate": 2.4675193630725847e-05, "loss": 0.330291748046875, "step": 4467 }, { "epoch": 0.30201433013383805, "grad_norm": 1.1117188930511475, "learning_rate": 2.467267198918371e-05, "loss": 0.2813720703125, "step": 4468 }, { "epoch": 0.3020819251047722, "grad_norm": 1.255786418914795, "learning_rate": 2.4670149879617543e-05, "loss": 0.2017364501953125, "step": 4469 }, { "epoch": 0.3021495200757064, "grad_norm": 1.4608039855957031, "learning_rate": 2.4667627302149386e-05, "loss": 0.22894287109375, "step": 4470 }, { "epoch": 0.3022171150466405, "grad_norm": 0.8104979991912842, "learning_rate": 2.46651042569013e-05, "loss": 0.191558837890625, "step": 4471 }, { "epoch": 0.3022847100175747, "grad_norm": 1.1839336156845093, "learning_rate": 2.4662580743995365e-05, "loss": 0.24267578125, "step": 4472 }, { "epoch": 0.30235230498850885, "grad_norm": 1.195879578590393, "learning_rate": 2.466005676355368e-05, "loss": 0.2652587890625, "step": 4473 }, { "epoch": 0.302419899959443, "grad_norm": 2.35779070854187, "learning_rate": 2.4657532315698378e-05, "loss": 0.3319091796875, "step": 4474 }, { "epoch": 0.3024874949303772, "grad_norm": 1.2212482690811157, "learning_rate": 2.4655007400551597e-05, "loss": 0.274139404296875, "step": 4475 }, { "epoch": 0.30255508990131136, "grad_norm": 1.6541270017623901, "learning_rate": 2.4652482018235517e-05, "loss": 0.25323486328125, "step": 4476 }, { "epoch": 0.30262268487224553, "grad_norm": 1.421346664428711, "learning_rate": 2.464995616887233e-05, "loss": 0.211578369140625, "step": 4477 }, { "epoch": 0.30269027984317964, "grad_norm": 1.026313066482544, "learning_rate": 2.464742985258425e-05, "loss": 0.16046142578125, "step": 4478 }, { "epoch": 0.3027578748141138, "grad_norm": 1.0478304624557495, "learning_rate": 2.4644903069493514e-05, "loss": 0.2283935546875, "step": 4479 }, { "epoch": 0.302825469785048, "grad_norm": 1.2143875360488892, "learning_rate": 2.4642375819722387e-05, "loss": 0.3118896484375, "step": 4480 }, { "epoch": 0.30289306475598216, "grad_norm": 0.7998113632202148, "learning_rate": 2.463984810339316e-05, "loss": 0.129302978515625, "step": 4481 }, { "epoch": 0.3029606597269163, "grad_norm": 0.9574657678604126, "learning_rate": 2.4637319920628127e-05, "loss": 0.16306304931640625, "step": 4482 }, { "epoch": 0.3030282546978505, "grad_norm": 1.635585904121399, "learning_rate": 2.463479127154962e-05, "loss": 0.324493408203125, "step": 4483 }, { "epoch": 0.30309584966878467, "grad_norm": 1.1199383735656738, "learning_rate": 2.4632262156279995e-05, "loss": 0.23114013671875, "step": 4484 }, { "epoch": 0.3031634446397188, "grad_norm": 1.0009335279464722, "learning_rate": 2.4629732574941624e-05, "loss": 0.23590087890625, "step": 4485 }, { "epoch": 0.30323103961065295, "grad_norm": 0.7436074018478394, "learning_rate": 2.4627202527656904e-05, "loss": 0.180755615234375, "step": 4486 }, { "epoch": 0.3032986345815871, "grad_norm": 1.1698583364486694, "learning_rate": 2.4624672014548257e-05, "loss": 0.2091064453125, "step": 4487 }, { "epoch": 0.3033662295525213, "grad_norm": 1.3713114261627197, "learning_rate": 2.462214103573812e-05, "loss": 0.27459716796875, "step": 4488 }, { "epoch": 0.30343382452345546, "grad_norm": 1.2345142364501953, "learning_rate": 2.4619609591348963e-05, "loss": 0.212066650390625, "step": 4489 }, { "epoch": 0.30350141949438963, "grad_norm": 1.4179165363311768, "learning_rate": 2.461707768150327e-05, "loss": 0.28875732421875, "step": 4490 }, { "epoch": 0.3035690144653238, "grad_norm": 1.7142683267593384, "learning_rate": 2.461454530632355e-05, "loss": 0.298736572265625, "step": 4491 }, { "epoch": 0.3036366094362579, "grad_norm": 1.3177556991577148, "learning_rate": 2.4612012465932333e-05, "loss": 0.185760498046875, "step": 4492 }, { "epoch": 0.3037042044071921, "grad_norm": 1.232491135597229, "learning_rate": 2.4609479160452185e-05, "loss": 0.300750732421875, "step": 4493 }, { "epoch": 0.30377179937812626, "grad_norm": 1.408677339553833, "learning_rate": 2.4606945390005668e-05, "loss": 0.176239013671875, "step": 4494 }, { "epoch": 0.30383939434906043, "grad_norm": 1.4556612968444824, "learning_rate": 2.460441115471539e-05, "loss": 0.199951171875, "step": 4495 }, { "epoch": 0.3039069893199946, "grad_norm": 1.7835973501205444, "learning_rate": 2.460187645470397e-05, "loss": 0.33526611328125, "step": 4496 }, { "epoch": 0.3039745842909288, "grad_norm": 1.279930830001831, "learning_rate": 2.459934129009405e-05, "loss": 0.264739990234375, "step": 4497 }, { "epoch": 0.30404217926186294, "grad_norm": 0.8682933449745178, "learning_rate": 2.4596805661008305e-05, "loss": 0.19805908203125, "step": 4498 }, { "epoch": 0.30410977423279706, "grad_norm": 3.076378345489502, "learning_rate": 2.4594269567569423e-05, "loss": 0.3533172607421875, "step": 4499 }, { "epoch": 0.30417736920373123, "grad_norm": 0.6587358713150024, "learning_rate": 2.4591733009900108e-05, "loss": 0.159637451171875, "step": 4500 }, { "epoch": 0.3042449641746654, "grad_norm": 1.6731339693069458, "learning_rate": 2.45891959881231e-05, "loss": 0.261688232421875, "step": 4501 }, { "epoch": 0.30431255914559957, "grad_norm": 1.6831873655319214, "learning_rate": 2.4586658502361158e-05, "loss": 0.2724609375, "step": 4502 }, { "epoch": 0.30438015411653374, "grad_norm": 2.2498056888580322, "learning_rate": 2.4584120552737057e-05, "loss": 0.2750244140625, "step": 4503 }, { "epoch": 0.3044477490874679, "grad_norm": 1.586027979850769, "learning_rate": 2.45815821393736e-05, "loss": 0.32806396484375, "step": 4504 }, { "epoch": 0.3045153440584021, "grad_norm": 1.6120222806930542, "learning_rate": 2.457904326239362e-05, "loss": 0.254058837890625, "step": 4505 }, { "epoch": 0.3045829390293362, "grad_norm": 1.4937546253204346, "learning_rate": 2.4576503921919942e-05, "loss": 0.255767822265625, "step": 4506 }, { "epoch": 0.30465053400027037, "grad_norm": 1.3783072233200073, "learning_rate": 2.457396411807546e-05, "loss": 0.33612060546875, "step": 4507 }, { "epoch": 0.30471812897120454, "grad_norm": 1.7392706871032715, "learning_rate": 2.4571423850983048e-05, "loss": 0.2368011474609375, "step": 4508 }, { "epoch": 0.3047857239421387, "grad_norm": 1.7780989408493042, "learning_rate": 2.456888312076563e-05, "loss": 0.252960205078125, "step": 4509 }, { "epoch": 0.3048533189130729, "grad_norm": 1.3482084274291992, "learning_rate": 2.4566341927546134e-05, "loss": 0.2379608154296875, "step": 4510 }, { "epoch": 0.30492091388400705, "grad_norm": 1.1789809465408325, "learning_rate": 2.4563800271447524e-05, "loss": 0.1702880859375, "step": 4511 }, { "epoch": 0.30498850885494116, "grad_norm": 1.107798457145691, "learning_rate": 2.4561258152592782e-05, "loss": 0.330810546875, "step": 4512 }, { "epoch": 0.30505610382587534, "grad_norm": 0.9776098728179932, "learning_rate": 2.455871557110491e-05, "loss": 0.244598388671875, "step": 4513 }, { "epoch": 0.3051236987968095, "grad_norm": 1.6668288707733154, "learning_rate": 2.455617252710693e-05, "loss": 0.204833984375, "step": 4514 }, { "epoch": 0.3051912937677437, "grad_norm": 1.324773907661438, "learning_rate": 2.4553629020721896e-05, "loss": 0.252593994140625, "step": 4515 }, { "epoch": 0.30525888873867785, "grad_norm": 1.37460196018219, "learning_rate": 2.455108505207288e-05, "loss": 0.253570556640625, "step": 4516 }, { "epoch": 0.305326483709612, "grad_norm": 1.3937759399414062, "learning_rate": 2.454854062128297e-05, "loss": 0.22760009765625, "step": 4517 }, { "epoch": 0.3053940786805462, "grad_norm": 0.7502084374427795, "learning_rate": 2.4545995728475282e-05, "loss": 0.1096038818359375, "step": 4518 }, { "epoch": 0.3054616736514803, "grad_norm": 0.9929096102714539, "learning_rate": 2.4543450373772956e-05, "loss": 0.281097412109375, "step": 4519 }, { "epoch": 0.3055292686224145, "grad_norm": 1.0339847803115845, "learning_rate": 2.4540904557299146e-05, "loss": 0.23248291015625, "step": 4520 }, { "epoch": 0.30559686359334864, "grad_norm": 1.524471402168274, "learning_rate": 2.4538358279177042e-05, "loss": 0.3096923828125, "step": 4521 }, { "epoch": 0.3056644585642828, "grad_norm": 1.1745914220809937, "learning_rate": 2.453581153952985e-05, "loss": 0.2765045166015625, "step": 4522 }, { "epoch": 0.305732053535217, "grad_norm": 1.1433401107788086, "learning_rate": 2.453326433848079e-05, "loss": 0.2757568359375, "step": 4523 }, { "epoch": 0.30579964850615116, "grad_norm": 1.406829833984375, "learning_rate": 2.4530716676153113e-05, "loss": 0.20572662353515625, "step": 4524 }, { "epoch": 0.3058672434770853, "grad_norm": 1.1767011880874634, "learning_rate": 2.4528168552670095e-05, "loss": 0.26812744140625, "step": 4525 }, { "epoch": 0.30593483844801944, "grad_norm": 1.217828392982483, "learning_rate": 2.4525619968155023e-05, "loss": 0.252716064453125, "step": 4526 }, { "epoch": 0.3060024334189536, "grad_norm": 1.05997633934021, "learning_rate": 2.452307092273122e-05, "loss": 0.19915771484375, "step": 4527 }, { "epoch": 0.3060700283898878, "grad_norm": 2.1799163818359375, "learning_rate": 2.452052141652203e-05, "loss": 0.2635498046875, "step": 4528 }, { "epoch": 0.30613762336082195, "grad_norm": 2.350541114807129, "learning_rate": 2.45179714496508e-05, "loss": 0.41290283203125, "step": 4529 }, { "epoch": 0.3062052183317561, "grad_norm": 0.8470798134803772, "learning_rate": 2.4515421022240917e-05, "loss": 0.1711578369140625, "step": 4530 }, { "epoch": 0.3062728133026903, "grad_norm": 1.2088899612426758, "learning_rate": 2.4512870134415792e-05, "loss": 0.246002197265625, "step": 4531 }, { "epoch": 0.30634040827362446, "grad_norm": 1.5557036399841309, "learning_rate": 2.451031878629885e-05, "loss": 0.17034912109375, "step": 4532 }, { "epoch": 0.3064080032445586, "grad_norm": 1.1727211475372314, "learning_rate": 2.4507766978013538e-05, "loss": 0.1944122314453125, "step": 4533 }, { "epoch": 0.30647559821549275, "grad_norm": 1.6824314594268799, "learning_rate": 2.4505214709683338e-05, "loss": 0.34613037109375, "step": 4534 }, { "epoch": 0.3065431931864269, "grad_norm": 1.0299289226531982, "learning_rate": 2.450266198143173e-05, "loss": 0.215545654296875, "step": 4535 }, { "epoch": 0.3066107881573611, "grad_norm": 2.159263849258423, "learning_rate": 2.4500108793382244e-05, "loss": 0.246826171875, "step": 4536 }, { "epoch": 0.30667838312829526, "grad_norm": 1.7941601276397705, "learning_rate": 2.4497555145658416e-05, "loss": 0.2498626708984375, "step": 4537 }, { "epoch": 0.30674597809922943, "grad_norm": 1.3202959299087524, "learning_rate": 2.44950010383838e-05, "loss": 0.1625823974609375, "step": 4538 }, { "epoch": 0.3068135730701636, "grad_norm": 1.4300581216812134, "learning_rate": 2.4492446471681988e-05, "loss": 0.28070068359375, "step": 4539 }, { "epoch": 0.3068811680410977, "grad_norm": 1.131001591682434, "learning_rate": 2.4489891445676583e-05, "loss": 0.217193603515625, "step": 4540 }, { "epoch": 0.3069487630120319, "grad_norm": 0.9346611499786377, "learning_rate": 2.4487335960491216e-05, "loss": 0.2308349609375, "step": 4541 }, { "epoch": 0.30701635798296606, "grad_norm": 1.5865854024887085, "learning_rate": 2.4484780016249526e-05, "loss": 0.292724609375, "step": 4542 }, { "epoch": 0.30708395295390023, "grad_norm": 1.0036263465881348, "learning_rate": 2.4482223613075197e-05, "loss": 0.236328125, "step": 4543 }, { "epoch": 0.3071515479248344, "grad_norm": 1.4880167245864868, "learning_rate": 2.4479666751091923e-05, "loss": 0.315673828125, "step": 4544 }, { "epoch": 0.30721914289576857, "grad_norm": 1.6064841747283936, "learning_rate": 2.447710943042342e-05, "loss": 0.337432861328125, "step": 4545 }, { "epoch": 0.30728673786670274, "grad_norm": 0.8454347848892212, "learning_rate": 2.4474551651193418e-05, "loss": 0.16949462890625, "step": 4546 }, { "epoch": 0.30735433283763686, "grad_norm": 1.3371624946594238, "learning_rate": 2.447199341352569e-05, "loss": 0.279144287109375, "step": 4547 }, { "epoch": 0.307421927808571, "grad_norm": 1.1295908689498901, "learning_rate": 2.446943471754401e-05, "loss": 0.30731201171875, "step": 4548 }, { "epoch": 0.3074895227795052, "grad_norm": 1.3470324277877808, "learning_rate": 2.4466875563372194e-05, "loss": 0.207244873046875, "step": 4549 }, { "epoch": 0.30755711775043937, "grad_norm": 0.9635011553764343, "learning_rate": 2.4464315951134065e-05, "loss": 0.215545654296875, "step": 4550 }, { "epoch": 0.30762471272137354, "grad_norm": 1.541144609451294, "learning_rate": 2.4461755880953473e-05, "loss": 0.291107177734375, "step": 4551 }, { "epoch": 0.3076923076923077, "grad_norm": 1.491790533065796, "learning_rate": 2.4459195352954283e-05, "loss": 0.30914306640625, "step": 4552 }, { "epoch": 0.3077599026632419, "grad_norm": 1.2364614009857178, "learning_rate": 2.44566343672604e-05, "loss": 0.32965087890625, "step": 4553 }, { "epoch": 0.307827497634176, "grad_norm": 1.0960493087768555, "learning_rate": 2.4454072923995737e-05, "loss": 0.174652099609375, "step": 4554 }, { "epoch": 0.30789509260511017, "grad_norm": 2.1082029342651367, "learning_rate": 2.4451511023284232e-05, "loss": 0.36859130859375, "step": 4555 }, { "epoch": 0.30796268757604434, "grad_norm": 1.0892430543899536, "learning_rate": 2.444894866524984e-05, "loss": 0.22967529296875, "step": 4556 }, { "epoch": 0.3080302825469785, "grad_norm": 2.333608627319336, "learning_rate": 2.4446385850016556e-05, "loss": 0.335357666015625, "step": 4557 }, { "epoch": 0.3080978775179127, "grad_norm": 1.3915923833847046, "learning_rate": 2.4443822577708374e-05, "loss": 0.31549072265625, "step": 4558 }, { "epoch": 0.30816547248884685, "grad_norm": 0.7686880230903625, "learning_rate": 2.444125884844933e-05, "loss": 0.1719970703125, "step": 4559 }, { "epoch": 0.308233067459781, "grad_norm": 1.059994101524353, "learning_rate": 2.4438694662363458e-05, "loss": 0.2077789306640625, "step": 4560 }, { "epoch": 0.30830066243071513, "grad_norm": 2.1987037658691406, "learning_rate": 2.4436130019574847e-05, "loss": 0.328094482421875, "step": 4561 }, { "epoch": 0.3083682574016493, "grad_norm": 1.0296037197113037, "learning_rate": 2.4433564920207582e-05, "loss": 0.32122802734375, "step": 4562 }, { "epoch": 0.3084358523725835, "grad_norm": 1.5598384141921997, "learning_rate": 2.4430999364385782e-05, "loss": 0.24945068359375, "step": 4563 }, { "epoch": 0.30850344734351764, "grad_norm": 1.0583144426345825, "learning_rate": 2.4428433352233577e-05, "loss": 0.12786865234375, "step": 4564 }, { "epoch": 0.3085710423144518, "grad_norm": 1.8409326076507568, "learning_rate": 2.442586688387514e-05, "loss": 0.37835693359375, "step": 4565 }, { "epoch": 0.308638637285386, "grad_norm": 1.227338433265686, "learning_rate": 2.4423299959434637e-05, "loss": 0.23248291015625, "step": 4566 }, { "epoch": 0.30870623225632016, "grad_norm": 1.5793461799621582, "learning_rate": 2.442073257903628e-05, "loss": 0.3265380859375, "step": 4567 }, { "epoch": 0.30877382722725427, "grad_norm": 1.9134804010391235, "learning_rate": 2.4418164742804294e-05, "loss": 0.323974609375, "step": 4568 }, { "epoch": 0.30884142219818844, "grad_norm": 1.9300282001495361, "learning_rate": 2.441559645086293e-05, "loss": 0.310577392578125, "step": 4569 }, { "epoch": 0.3089090171691226, "grad_norm": 1.1220355033874512, "learning_rate": 2.441302770333645e-05, "loss": 0.2640380859375, "step": 4570 }, { "epoch": 0.3089766121400568, "grad_norm": 1.1775456666946411, "learning_rate": 2.441045850034915e-05, "loss": 0.28729248046875, "step": 4571 }, { "epoch": 0.30904420711099095, "grad_norm": 1.3873108625411987, "learning_rate": 2.440788884202535e-05, "loss": 0.193389892578125, "step": 4572 }, { "epoch": 0.3091118020819251, "grad_norm": 1.4849344491958618, "learning_rate": 2.440531872848938e-05, "loss": 0.274078369140625, "step": 4573 }, { "epoch": 0.3091793970528593, "grad_norm": 1.1714214086532593, "learning_rate": 2.4402748159865594e-05, "loss": 0.248779296875, "step": 4574 }, { "epoch": 0.3092469920237934, "grad_norm": 1.2879747152328491, "learning_rate": 2.4400177136278383e-05, "loss": 0.2748260498046875, "step": 4575 }, { "epoch": 0.3093145869947276, "grad_norm": 0.9134518504142761, "learning_rate": 2.4397605657852142e-05, "loss": 0.18798828125, "step": 4576 }, { "epoch": 0.30938218196566175, "grad_norm": 2.035532236099243, "learning_rate": 2.4395033724711293e-05, "loss": 0.3106689453125, "step": 4577 }, { "epoch": 0.3094497769365959, "grad_norm": 1.3626554012298584, "learning_rate": 2.4392461336980285e-05, "loss": 0.257049560546875, "step": 4578 }, { "epoch": 0.3095173719075301, "grad_norm": 1.1872062683105469, "learning_rate": 2.438988849478359e-05, "loss": 0.23638916015625, "step": 4579 }, { "epoch": 0.30958496687846426, "grad_norm": 2.062361717224121, "learning_rate": 2.438731519824569e-05, "loss": 0.2579345703125, "step": 4580 }, { "epoch": 0.3096525618493984, "grad_norm": 1.2148077487945557, "learning_rate": 2.4384741447491104e-05, "loss": 0.162261962890625, "step": 4581 }, { "epoch": 0.30972015682033255, "grad_norm": 1.7293404340744019, "learning_rate": 2.4382167242644366e-05, "loss": 0.322998046875, "step": 4582 }, { "epoch": 0.3097877517912667, "grad_norm": 1.9007090330123901, "learning_rate": 2.437959258383003e-05, "loss": 0.267364501953125, "step": 4583 }, { "epoch": 0.3098553467622009, "grad_norm": 0.898903489112854, "learning_rate": 2.437701747117267e-05, "loss": 0.223175048828125, "step": 4584 }, { "epoch": 0.30992294173313506, "grad_norm": 2.301231861114502, "learning_rate": 2.4374441904796888e-05, "loss": 0.355316162109375, "step": 4585 }, { "epoch": 0.30999053670406923, "grad_norm": 0.8635517954826355, "learning_rate": 2.4371865884827314e-05, "loss": 0.1824188232421875, "step": 4586 }, { "epoch": 0.3100581316750034, "grad_norm": 1.8103305101394653, "learning_rate": 2.4369289411388582e-05, "loss": 0.25543212890625, "step": 4587 }, { "epoch": 0.3101257266459375, "grad_norm": 1.777571678161621, "learning_rate": 2.4366712484605362e-05, "loss": 0.20635986328125, "step": 4588 }, { "epoch": 0.3101933216168717, "grad_norm": 0.8952486515045166, "learning_rate": 2.436413510460234e-05, "loss": 0.263671875, "step": 4589 }, { "epoch": 0.31026091658780586, "grad_norm": 1.479669451713562, "learning_rate": 2.4361557271504227e-05, "loss": 0.298736572265625, "step": 4590 }, { "epoch": 0.31032851155874003, "grad_norm": 1.2035943269729614, "learning_rate": 2.4358978985435757e-05, "loss": 0.1815643310546875, "step": 4591 }, { "epoch": 0.3103961065296742, "grad_norm": 0.9694859981536865, "learning_rate": 2.4356400246521675e-05, "loss": 0.22698974609375, "step": 4592 }, { "epoch": 0.31046370150060837, "grad_norm": 1.1063405275344849, "learning_rate": 2.435382105488677e-05, "loss": 0.319366455078125, "step": 4593 }, { "epoch": 0.31053129647154254, "grad_norm": 0.940929651260376, "learning_rate": 2.4351241410655827e-05, "loss": 0.0843658447265625, "step": 4594 }, { "epoch": 0.31059889144247665, "grad_norm": 2.1761765480041504, "learning_rate": 2.434866131395367e-05, "loss": 0.267333984375, "step": 4595 }, { "epoch": 0.3106664864134108, "grad_norm": 1.1134939193725586, "learning_rate": 2.4346080764905137e-05, "loss": 0.1785888671875, "step": 4596 }, { "epoch": 0.310734081384345, "grad_norm": 1.2885630130767822, "learning_rate": 2.43434997636351e-05, "loss": 0.290283203125, "step": 4597 }, { "epoch": 0.31080167635527917, "grad_norm": 1.7998379468917847, "learning_rate": 2.4340918310268435e-05, "loss": 0.25299072265625, "step": 4598 }, { "epoch": 0.31086927132621334, "grad_norm": 1.4570342302322388, "learning_rate": 2.433833640493005e-05, "loss": 0.31951904296875, "step": 4599 }, { "epoch": 0.3109368662971475, "grad_norm": 4.177323341369629, "learning_rate": 2.4335754047744875e-05, "loss": 0.3072509765625, "step": 4600 }, { "epoch": 0.3110044612680817, "grad_norm": 1.1914758682250977, "learning_rate": 2.4333171238837863e-05, "loss": 0.29949951171875, "step": 4601 }, { "epoch": 0.3110720562390158, "grad_norm": 1.8125063180923462, "learning_rate": 2.4330587978333986e-05, "loss": 0.2298583984375, "step": 4602 }, { "epoch": 0.31113965120994996, "grad_norm": 1.9669334888458252, "learning_rate": 2.4328004266358234e-05, "loss": 0.3045654296875, "step": 4603 }, { "epoch": 0.31120724618088413, "grad_norm": 1.1435749530792236, "learning_rate": 2.4325420103035626e-05, "loss": 0.302490234375, "step": 4604 }, { "epoch": 0.3112748411518183, "grad_norm": 0.9387901425361633, "learning_rate": 2.4322835488491198e-05, "loss": 0.141357421875, "step": 4605 }, { "epoch": 0.3113424361227525, "grad_norm": 1.0842442512512207, "learning_rate": 2.432025042285001e-05, "loss": 0.241363525390625, "step": 4606 }, { "epoch": 0.31141003109368665, "grad_norm": 1.443393349647522, "learning_rate": 2.4317664906237146e-05, "loss": 0.22711181640625, "step": 4607 }, { "epoch": 0.3114776260646208, "grad_norm": 1.2623103857040405, "learning_rate": 2.431507893877771e-05, "loss": 0.15185546875, "step": 4608 }, { "epoch": 0.31154522103555493, "grad_norm": 1.2033824920654297, "learning_rate": 2.431249252059682e-05, "loss": 0.30413818359375, "step": 4609 }, { "epoch": 0.3116128160064891, "grad_norm": 1.5868585109710693, "learning_rate": 2.430990565181963e-05, "loss": 0.29620361328125, "step": 4610 }, { "epoch": 0.3116804109774233, "grad_norm": 1.4913426637649536, "learning_rate": 2.430731833257131e-05, "loss": 0.2230987548828125, "step": 4611 }, { "epoch": 0.31174800594835744, "grad_norm": 1.1461551189422607, "learning_rate": 2.4304730562977047e-05, "loss": 0.17303466796875, "step": 4612 }, { "epoch": 0.3118156009192916, "grad_norm": 2.1470577716827393, "learning_rate": 2.4302142343162053e-05, "loss": 0.2811737060546875, "step": 4613 }, { "epoch": 0.3118831958902258, "grad_norm": 1.806748867034912, "learning_rate": 2.4299553673251563e-05, "loss": 0.267913818359375, "step": 4614 }, { "epoch": 0.31195079086115995, "grad_norm": 1.9596288204193115, "learning_rate": 2.4296964553370832e-05, "loss": 0.22607421875, "step": 4615 }, { "epoch": 0.31201838583209407, "grad_norm": 1.7964155673980713, "learning_rate": 2.429437498364514e-05, "loss": 0.33837890625, "step": 4616 }, { "epoch": 0.31208598080302824, "grad_norm": 1.2887110710144043, "learning_rate": 2.4291784964199775e-05, "loss": 0.33135986328125, "step": 4617 }, { "epoch": 0.3121535757739624, "grad_norm": 0.6990532279014587, "learning_rate": 2.428919449516008e-05, "loss": 0.1409149169921875, "step": 4618 }, { "epoch": 0.3122211707448966, "grad_norm": 1.2651711702346802, "learning_rate": 2.4286603576651388e-05, "loss": 0.274169921875, "step": 4619 }, { "epoch": 0.31228876571583075, "grad_norm": 1.1770187616348267, "learning_rate": 2.4284012208799055e-05, "loss": 0.30133056640625, "step": 4620 }, { "epoch": 0.3123563606867649, "grad_norm": 1.5750256776809692, "learning_rate": 2.4281420391728478e-05, "loss": 0.27923583984375, "step": 4621 }, { "epoch": 0.3124239556576991, "grad_norm": 1.3062790632247925, "learning_rate": 2.4278828125565067e-05, "loss": 0.2528076171875, "step": 4622 }, { "epoch": 0.3124915506286332, "grad_norm": 2.8071391582489014, "learning_rate": 2.427623541043424e-05, "loss": 0.30389404296875, "step": 4623 }, { "epoch": 0.3125591455995674, "grad_norm": 1.370281457901001, "learning_rate": 2.4273642246461457e-05, "loss": 0.1652679443359375, "step": 4624 }, { "epoch": 0.31262674057050155, "grad_norm": 0.9365429282188416, "learning_rate": 2.427104863377219e-05, "loss": 0.23651123046875, "step": 4625 }, { "epoch": 0.3126943355414357, "grad_norm": 0.9409623146057129, "learning_rate": 2.4268454572491934e-05, "loss": 0.25482177734375, "step": 4626 }, { "epoch": 0.3127619305123699, "grad_norm": 1.4293607473373413, "learning_rate": 2.4265860062746203e-05, "loss": 0.262664794921875, "step": 4627 }, { "epoch": 0.31282952548330406, "grad_norm": 0.9143204689025879, "learning_rate": 2.4263265104660542e-05, "loss": 0.186767578125, "step": 4628 }, { "epoch": 0.31289712045423823, "grad_norm": 1.700319528579712, "learning_rate": 2.4260669698360506e-05, "loss": 0.2099609375, "step": 4629 }, { "epoch": 0.31296471542517235, "grad_norm": 1.999149203300476, "learning_rate": 2.4258073843971686e-05, "loss": 0.33355712890625, "step": 4630 }, { "epoch": 0.3130323103961065, "grad_norm": 1.3091357946395874, "learning_rate": 2.425547754161967e-05, "loss": 0.274749755859375, "step": 4631 }, { "epoch": 0.3130999053670407, "grad_norm": 1.000131368637085, "learning_rate": 2.425288079143009e-05, "loss": 0.2793426513671875, "step": 4632 }, { "epoch": 0.31316750033797486, "grad_norm": 1.2308212518692017, "learning_rate": 2.42502835935286e-05, "loss": 0.230560302734375, "step": 4633 }, { "epoch": 0.31323509530890903, "grad_norm": 2.277972936630249, "learning_rate": 2.424768594804086e-05, "loss": 0.2349853515625, "step": 4634 }, { "epoch": 0.3133026902798432, "grad_norm": 1.6682016849517822, "learning_rate": 2.424508785509256e-05, "loss": 0.298431396484375, "step": 4635 }, { "epoch": 0.31337028525077737, "grad_norm": 0.8582949042320251, "learning_rate": 2.424248931480942e-05, "loss": 0.204010009765625, "step": 4636 }, { "epoch": 0.3134378802217115, "grad_norm": 1.3766560554504395, "learning_rate": 2.4239890327317167e-05, "loss": 0.20745849609375, "step": 4637 }, { "epoch": 0.31350547519264566, "grad_norm": 1.3918853998184204, "learning_rate": 2.4237290892741557e-05, "loss": 0.286407470703125, "step": 4638 }, { "epoch": 0.3135730701635798, "grad_norm": 1.2833878993988037, "learning_rate": 2.423469101120837e-05, "loss": 0.270660400390625, "step": 4639 }, { "epoch": 0.313640665134514, "grad_norm": 1.35824453830719, "learning_rate": 2.4232090682843397e-05, "loss": 0.17317962646484375, "step": 4640 }, { "epoch": 0.31370826010544817, "grad_norm": 1.1137781143188477, "learning_rate": 2.4229489907772466e-05, "loss": 0.1929779052734375, "step": 4641 }, { "epoch": 0.31377585507638234, "grad_norm": 1.6864324808120728, "learning_rate": 2.4226888686121414e-05, "loss": 0.195587158203125, "step": 4642 }, { "epoch": 0.31384345004731645, "grad_norm": 1.5545545816421509, "learning_rate": 2.4224287018016112e-05, "loss": 0.31719970703125, "step": 4643 }, { "epoch": 0.3139110450182506, "grad_norm": 2.051492214202881, "learning_rate": 2.4221684903582434e-05, "loss": 0.25872802734375, "step": 4644 }, { "epoch": 0.3139786399891848, "grad_norm": 1.9337760210037231, "learning_rate": 2.4219082342946294e-05, "loss": 0.31439208984375, "step": 4645 }, { "epoch": 0.31404623496011896, "grad_norm": 3.0998494625091553, "learning_rate": 2.4216479336233615e-05, "loss": 0.354766845703125, "step": 4646 }, { "epoch": 0.31411382993105313, "grad_norm": 1.782069206237793, "learning_rate": 2.4213875883570347e-05, "loss": 0.21990966796875, "step": 4647 }, { "epoch": 0.3141814249019873, "grad_norm": 1.6008585691452026, "learning_rate": 2.421127198508247e-05, "loss": 0.19818115234375, "step": 4648 }, { "epoch": 0.3142490198729215, "grad_norm": 1.2570509910583496, "learning_rate": 2.420866764089596e-05, "loss": 0.267333984375, "step": 4649 }, { "epoch": 0.3143166148438556, "grad_norm": 1.1769882440567017, "learning_rate": 2.4206062851136847e-05, "loss": 0.250762939453125, "step": 4650 }, { "epoch": 0.31438420981478976, "grad_norm": 1.61897873878479, "learning_rate": 2.420345761593116e-05, "loss": 0.303009033203125, "step": 4651 }, { "epoch": 0.31445180478572393, "grad_norm": 1.326577067375183, "learning_rate": 2.4200851935404962e-05, "loss": 0.291168212890625, "step": 4652 }, { "epoch": 0.3145193997566581, "grad_norm": 2.794539451599121, "learning_rate": 2.4198245809684325e-05, "loss": 0.2655181884765625, "step": 4653 }, { "epoch": 0.3145869947275923, "grad_norm": 1.1244663000106812, "learning_rate": 2.4195639238895352e-05, "loss": 0.2044677734375, "step": 4654 }, { "epoch": 0.31465458969852644, "grad_norm": 1.7506171464920044, "learning_rate": 2.4193032223164163e-05, "loss": 0.30975341796875, "step": 4655 }, { "epoch": 0.3147221846694606, "grad_norm": 1.0800464153289795, "learning_rate": 2.4190424762616912e-05, "loss": 0.2138671875, "step": 4656 }, { "epoch": 0.31478977964039473, "grad_norm": 1.4020122289657593, "learning_rate": 2.4187816857379752e-05, "loss": 0.26165771484375, "step": 4657 }, { "epoch": 0.3148573746113289, "grad_norm": 1.4712117910385132, "learning_rate": 2.4185208507578875e-05, "loss": 0.247467041015625, "step": 4658 }, { "epoch": 0.31492496958226307, "grad_norm": 1.1743520498275757, "learning_rate": 2.4182599713340484e-05, "loss": 0.251251220703125, "step": 4659 }, { "epoch": 0.31499256455319724, "grad_norm": 1.1115328073501587, "learning_rate": 2.417999047479082e-05, "loss": 0.19927978515625, "step": 4660 }, { "epoch": 0.3150601595241314, "grad_norm": 1.0245426893234253, "learning_rate": 2.417738079205612e-05, "loss": 0.214752197265625, "step": 4661 }, { "epoch": 0.3151277544950656, "grad_norm": 1.4575259685516357, "learning_rate": 2.4174770665262667e-05, "loss": 0.3177490234375, "step": 4662 }, { "epoch": 0.31519534946599975, "grad_norm": 1.4232326745986938, "learning_rate": 2.417216009453675e-05, "loss": 0.33892822265625, "step": 4663 }, { "epoch": 0.31526294443693387, "grad_norm": 1.922491192817688, "learning_rate": 2.4169549080004688e-05, "loss": 0.33197021484375, "step": 4664 }, { "epoch": 0.31533053940786804, "grad_norm": 1.4648089408874512, "learning_rate": 2.4166937621792818e-05, "loss": 0.263427734375, "step": 4665 }, { "epoch": 0.3153981343788022, "grad_norm": 1.6822474002838135, "learning_rate": 2.4164325720027492e-05, "loss": 0.31982421875, "step": 4666 }, { "epoch": 0.3154657293497364, "grad_norm": 0.9881739020347595, "learning_rate": 2.4161713374835103e-05, "loss": 0.116302490234375, "step": 4667 }, { "epoch": 0.31553332432067055, "grad_norm": 1.0847148895263672, "learning_rate": 2.4159100586342044e-05, "loss": 0.208251953125, "step": 4668 }, { "epoch": 0.3156009192916047, "grad_norm": 1.2816576957702637, "learning_rate": 2.4156487354674734e-05, "loss": 0.291015625, "step": 4669 }, { "epoch": 0.3156685142625389, "grad_norm": 0.6296977400779724, "learning_rate": 2.4153873679959624e-05, "loss": 0.1484375, "step": 4670 }, { "epoch": 0.315736109233473, "grad_norm": 1.4855660200119019, "learning_rate": 2.415125956232318e-05, "loss": 0.210479736328125, "step": 4671 }, { "epoch": 0.3158037042044072, "grad_norm": 0.86429762840271, "learning_rate": 2.4148645001891883e-05, "loss": 0.18408203125, "step": 4672 }, { "epoch": 0.31587129917534135, "grad_norm": 2.5732650756835938, "learning_rate": 2.414602999879225e-05, "loss": 0.3160400390625, "step": 4673 }, { "epoch": 0.3159388941462755, "grad_norm": 1.2246031761169434, "learning_rate": 2.4143414553150805e-05, "loss": 0.25531005859375, "step": 4674 }, { "epoch": 0.3160064891172097, "grad_norm": 1.3105015754699707, "learning_rate": 2.41407986650941e-05, "loss": 0.30657958984375, "step": 4675 }, { "epoch": 0.31607408408814386, "grad_norm": 1.3674832582473755, "learning_rate": 2.4138182334748715e-05, "loss": 0.2336578369140625, "step": 4676 }, { "epoch": 0.31614167905907803, "grad_norm": 3.010970115661621, "learning_rate": 2.4135565562241233e-05, "loss": 0.33551025390625, "step": 4677 }, { "epoch": 0.31620927403001214, "grad_norm": 1.7103252410888672, "learning_rate": 2.413294834769828e-05, "loss": 0.187042236328125, "step": 4678 }, { "epoch": 0.3162768690009463, "grad_norm": 1.5655287504196167, "learning_rate": 2.413033069124649e-05, "loss": 0.2032318115234375, "step": 4679 }, { "epoch": 0.3163444639718805, "grad_norm": 1.710871696472168, "learning_rate": 2.412771259301252e-05, "loss": 0.232696533203125, "step": 4680 }, { "epoch": 0.31641205894281466, "grad_norm": 1.1906461715698242, "learning_rate": 2.4125094053123044e-05, "loss": 0.25592041015625, "step": 4681 }, { "epoch": 0.3164796539137488, "grad_norm": 2.6803741455078125, "learning_rate": 2.412247507170477e-05, "loss": 0.34552001953125, "step": 4682 }, { "epoch": 0.316547248884683, "grad_norm": 2.5564043521881104, "learning_rate": 2.411985564888443e-05, "loss": 0.275115966796875, "step": 4683 }, { "epoch": 0.31661484385561717, "grad_norm": 1.510550856590271, "learning_rate": 2.4117235784788747e-05, "loss": 0.271331787109375, "step": 4684 }, { "epoch": 0.3166824388265513, "grad_norm": 1.0894397497177124, "learning_rate": 2.41146154795445e-05, "loss": 0.22308349609375, "step": 4685 }, { "epoch": 0.31675003379748545, "grad_norm": 2.459084987640381, "learning_rate": 2.411199473327847e-05, "loss": 0.250274658203125, "step": 4686 }, { "epoch": 0.3168176287684196, "grad_norm": 1.7447270154953003, "learning_rate": 2.4109373546117475e-05, "loss": 0.37451171875, "step": 4687 }, { "epoch": 0.3168852237393538, "grad_norm": 2.4422130584716797, "learning_rate": 2.4106751918188333e-05, "loss": 0.39154052734375, "step": 4688 }, { "epoch": 0.31695281871028796, "grad_norm": 1.632198691368103, "learning_rate": 2.4104129849617903e-05, "loss": 0.19207763671875, "step": 4689 }, { "epoch": 0.31702041368122214, "grad_norm": 1.1368303298950195, "learning_rate": 2.4101507340533047e-05, "loss": 0.17169952392578125, "step": 4690 }, { "epoch": 0.3170880086521563, "grad_norm": 1.2737733125686646, "learning_rate": 2.409888439106067e-05, "loss": 0.2889404296875, "step": 4691 }, { "epoch": 0.3171556036230904, "grad_norm": 1.018115520477295, "learning_rate": 2.4096261001327675e-05, "loss": 0.244964599609375, "step": 4692 }, { "epoch": 0.3172231985940246, "grad_norm": 0.6630430817604065, "learning_rate": 2.409363717146101e-05, "loss": 0.12725067138671875, "step": 4693 }, { "epoch": 0.31729079356495876, "grad_norm": 1.5736546516418457, "learning_rate": 2.4091012901587622e-05, "loss": 0.2525787353515625, "step": 4694 }, { "epoch": 0.31735838853589293, "grad_norm": 1.1321958303451538, "learning_rate": 2.4088388191834492e-05, "loss": 0.283447265625, "step": 4695 }, { "epoch": 0.3174259835068271, "grad_norm": 0.9958207607269287, "learning_rate": 2.4085763042328626e-05, "loss": 0.2256317138671875, "step": 4696 }, { "epoch": 0.3174935784777613, "grad_norm": 1.1340012550354004, "learning_rate": 2.4083137453197038e-05, "loss": 0.278594970703125, "step": 4697 }, { "epoch": 0.31756117344869544, "grad_norm": 2.3044400215148926, "learning_rate": 2.408051142456677e-05, "loss": 0.307647705078125, "step": 4698 }, { "epoch": 0.31762876841962956, "grad_norm": 1.2632086277008057, "learning_rate": 2.407788495656489e-05, "loss": 0.328826904296875, "step": 4699 }, { "epoch": 0.31769636339056373, "grad_norm": 1.0988348722457886, "learning_rate": 2.407525804931848e-05, "loss": 0.2074432373046875, "step": 4700 }, { "epoch": 0.3177639583614979, "grad_norm": 1.370559573173523, "learning_rate": 2.407263070295465e-05, "loss": 0.27239990234375, "step": 4701 }, { "epoch": 0.31783155333243207, "grad_norm": 1.262197732925415, "learning_rate": 2.4070002917600523e-05, "loss": 0.2974853515625, "step": 4702 }, { "epoch": 0.31789914830336624, "grad_norm": 2.1381051540374756, "learning_rate": 2.406737469338325e-05, "loss": 0.3179931640625, "step": 4703 }, { "epoch": 0.3179667432743004, "grad_norm": 1.3091164827346802, "learning_rate": 2.406474603043e-05, "loss": 0.2010498046875, "step": 4704 }, { "epoch": 0.3180343382452345, "grad_norm": 1.836483120918274, "learning_rate": 2.4062116928867963e-05, "loss": 0.310150146484375, "step": 4705 }, { "epoch": 0.3181019332161687, "grad_norm": 0.565262496471405, "learning_rate": 2.4059487388824352e-05, "loss": 0.1377410888671875, "step": 4706 }, { "epoch": 0.31816952818710287, "grad_norm": 1.425493597984314, "learning_rate": 2.4056857410426402e-05, "loss": 0.30059814453125, "step": 4707 }, { "epoch": 0.31823712315803704, "grad_norm": 1.1161760091781616, "learning_rate": 2.4054226993801363e-05, "loss": 0.26605224609375, "step": 4708 }, { "epoch": 0.3183047181289712, "grad_norm": 1.7851009368896484, "learning_rate": 2.4051596139076516e-05, "loss": 0.35247802734375, "step": 4709 }, { "epoch": 0.3183723130999054, "grad_norm": 1.2126970291137695, "learning_rate": 2.4048964846379154e-05, "loss": 0.22830963134765625, "step": 4710 }, { "epoch": 0.31843990807083955, "grad_norm": 1.1781535148620605, "learning_rate": 2.4046333115836605e-05, "loss": 0.3062744140625, "step": 4711 }, { "epoch": 0.31850750304177367, "grad_norm": 1.3739709854125977, "learning_rate": 2.40437009475762e-05, "loss": 0.309967041015625, "step": 4712 }, { "epoch": 0.31857509801270784, "grad_norm": 2.074258804321289, "learning_rate": 2.4041068341725297e-05, "loss": 0.3074951171875, "step": 4713 }, { "epoch": 0.318642692983642, "grad_norm": 0.8810672163963318, "learning_rate": 2.4038435298411282e-05, "loss": 0.242401123046875, "step": 4714 }, { "epoch": 0.3187102879545762, "grad_norm": 0.9417888522148132, "learning_rate": 2.4035801817761563e-05, "loss": 0.237701416015625, "step": 4715 }, { "epoch": 0.31877788292551035, "grad_norm": 1.9652317762374878, "learning_rate": 2.4033167899903553e-05, "loss": 0.32623291015625, "step": 4716 }, { "epoch": 0.3188454778964445, "grad_norm": 2.0483882427215576, "learning_rate": 2.4030533544964705e-05, "loss": 0.220306396484375, "step": 4717 }, { "epoch": 0.3189130728673787, "grad_norm": 1.817645788192749, "learning_rate": 2.4027898753072482e-05, "loss": 0.3001708984375, "step": 4718 }, { "epoch": 0.3189806678383128, "grad_norm": 0.9303759932518005, "learning_rate": 2.4025263524354378e-05, "loss": 0.1681976318359375, "step": 4719 }, { "epoch": 0.319048262809247, "grad_norm": 1.2277514934539795, "learning_rate": 2.4022627858937894e-05, "loss": 0.22283935546875, "step": 4720 }, { "epoch": 0.31911585778018114, "grad_norm": 1.1533955335617065, "learning_rate": 2.4019991756950562e-05, "loss": 0.267425537109375, "step": 4721 }, { "epoch": 0.3191834527511153, "grad_norm": 0.9847077131271362, "learning_rate": 2.401735521851994e-05, "loss": 0.20147705078125, "step": 4722 }, { "epoch": 0.3192510477220495, "grad_norm": 1.8596009016036987, "learning_rate": 2.401471824377359e-05, "loss": 0.27935791015625, "step": 4723 }, { "epoch": 0.31931864269298366, "grad_norm": 1.5293200016021729, "learning_rate": 2.401208083283911e-05, "loss": 0.2548828125, "step": 4724 }, { "epoch": 0.3193862376639178, "grad_norm": 1.4812568426132202, "learning_rate": 2.4009442985844113e-05, "loss": 0.261627197265625, "step": 4725 }, { "epoch": 0.31945383263485194, "grad_norm": 2.228630304336548, "learning_rate": 2.4006804702916237e-05, "loss": 0.28070068359375, "step": 4726 }, { "epoch": 0.3195214276057861, "grad_norm": 1.3351935148239136, "learning_rate": 2.4004165984183137e-05, "loss": 0.2467041015625, "step": 4727 }, { "epoch": 0.3195890225767203, "grad_norm": 1.1019749641418457, "learning_rate": 2.4001526829772488e-05, "loss": 0.230743408203125, "step": 4728 }, { "epoch": 0.31965661754765445, "grad_norm": 1.3201075792312622, "learning_rate": 2.399888723981199e-05, "loss": 0.2923583984375, "step": 4729 }, { "epoch": 0.3197242125185886, "grad_norm": 1.4847530126571655, "learning_rate": 2.3996247214429366e-05, "loss": 0.271820068359375, "step": 4730 }, { "epoch": 0.3197918074895228, "grad_norm": 1.0270346403121948, "learning_rate": 2.3993606753752356e-05, "loss": 0.1790924072265625, "step": 4731 }, { "epoch": 0.31985940246045697, "grad_norm": 1.4625906944274902, "learning_rate": 2.3990965857908717e-05, "loss": 0.34722900390625, "step": 4732 }, { "epoch": 0.3199269974313911, "grad_norm": 1.2917171716690063, "learning_rate": 2.398832452702624e-05, "loss": 0.162322998046875, "step": 4733 }, { "epoch": 0.31999459240232525, "grad_norm": 1.6315056085586548, "learning_rate": 2.398568276123272e-05, "loss": 0.3582763671875, "step": 4734 }, { "epoch": 0.3200621873732594, "grad_norm": 0.7425487041473389, "learning_rate": 2.398304056065599e-05, "loss": 0.1930389404296875, "step": 4735 }, { "epoch": 0.3201297823441936, "grad_norm": 1.5237693786621094, "learning_rate": 2.3980397925423888e-05, "loss": 0.166168212890625, "step": 4736 }, { "epoch": 0.32019737731512776, "grad_norm": 1.4304922819137573, "learning_rate": 2.3977754855664295e-05, "loss": 0.22589111328125, "step": 4737 }, { "epoch": 0.32026497228606193, "grad_norm": 1.4809038639068604, "learning_rate": 2.397511135150508e-05, "loss": 0.2568359375, "step": 4738 }, { "epoch": 0.3203325672569961, "grad_norm": 2.0080018043518066, "learning_rate": 2.397246741307417e-05, "loss": 0.1893157958984375, "step": 4739 }, { "epoch": 0.3204001622279302, "grad_norm": 0.9667589068412781, "learning_rate": 2.3969823040499486e-05, "loss": 0.155426025390625, "step": 4740 }, { "epoch": 0.3204677571988644, "grad_norm": 1.1111494302749634, "learning_rate": 2.3967178233908978e-05, "loss": 0.26153564453125, "step": 4741 }, { "epoch": 0.32053535216979856, "grad_norm": 0.9982537031173706, "learning_rate": 2.396453299343062e-05, "loss": 0.190032958984375, "step": 4742 }, { "epoch": 0.32060294714073273, "grad_norm": 0.9677668213844299, "learning_rate": 2.396188731919241e-05, "loss": 0.2164154052734375, "step": 4743 }, { "epoch": 0.3206705421116669, "grad_norm": 0.8579214215278625, "learning_rate": 2.3959241211322355e-05, "loss": 0.19622802734375, "step": 4744 }, { "epoch": 0.32073813708260107, "grad_norm": 1.2676678895950317, "learning_rate": 2.395659466994849e-05, "loss": 0.28326416015625, "step": 4745 }, { "epoch": 0.32080573205353524, "grad_norm": 1.680696725845337, "learning_rate": 2.395394769519888e-05, "loss": 0.325927734375, "step": 4746 }, { "epoch": 0.32087332702446936, "grad_norm": 1.7047165632247925, "learning_rate": 2.3951300287201597e-05, "loss": 0.320526123046875, "step": 4747 }, { "epoch": 0.3209409219954035, "grad_norm": 1.1805603504180908, "learning_rate": 2.3948652446084733e-05, "loss": 0.220703125, "step": 4748 }, { "epoch": 0.3210085169663377, "grad_norm": 1.132468819618225, "learning_rate": 2.3946004171976415e-05, "loss": 0.308837890625, "step": 4749 }, { "epoch": 0.32107611193727187, "grad_norm": 1.4454870223999023, "learning_rate": 2.3943355465004786e-05, "loss": 0.1868896484375, "step": 4750 }, { "epoch": 0.32114370690820604, "grad_norm": 1.0847187042236328, "learning_rate": 2.3940706325297995e-05, "loss": 0.2410888671875, "step": 4751 }, { "epoch": 0.3212113018791402, "grad_norm": 0.8587878346443176, "learning_rate": 2.3938056752984233e-05, "loss": 0.2103424072265625, "step": 4752 }, { "epoch": 0.3212788968500744, "grad_norm": 1.6405609846115112, "learning_rate": 2.3935406748191698e-05, "loss": 0.260833740234375, "step": 4753 }, { "epoch": 0.3213464918210085, "grad_norm": 1.8410989046096802, "learning_rate": 2.3932756311048617e-05, "loss": 0.249176025390625, "step": 4754 }, { "epoch": 0.32141408679194267, "grad_norm": 1.4322879314422607, "learning_rate": 2.3930105441683233e-05, "loss": 0.275665283203125, "step": 4755 }, { "epoch": 0.32148168176287684, "grad_norm": 1.286131739616394, "learning_rate": 2.3927454140223816e-05, "loss": 0.2591552734375, "step": 4756 }, { "epoch": 0.321549276733811, "grad_norm": 0.9978892207145691, "learning_rate": 2.3924802406798642e-05, "loss": 0.23553466796875, "step": 4757 }, { "epoch": 0.3216168717047452, "grad_norm": 1.2838021516799927, "learning_rate": 2.392215024153603e-05, "loss": 0.3106689453125, "step": 4758 }, { "epoch": 0.32168446667567935, "grad_norm": 1.4638572931289673, "learning_rate": 2.3919497644564302e-05, "loss": 0.283294677734375, "step": 4759 }, { "epoch": 0.3217520616466135, "grad_norm": 1.5145994424819946, "learning_rate": 2.3916844616011804e-05, "loss": 0.275909423828125, "step": 4760 }, { "epoch": 0.32181965661754763, "grad_norm": 1.7028156518936157, "learning_rate": 2.3914191156006915e-05, "loss": 0.3160400390625, "step": 4761 }, { "epoch": 0.3218872515884818, "grad_norm": 1.2603158950805664, "learning_rate": 2.3911537264678017e-05, "loss": 0.325836181640625, "step": 4762 }, { "epoch": 0.321954846559416, "grad_norm": 1.2282590866088867, "learning_rate": 2.390888294215353e-05, "loss": 0.293182373046875, "step": 4763 }, { "epoch": 0.32202244153035015, "grad_norm": 1.2654935121536255, "learning_rate": 2.3906228188561882e-05, "loss": 0.31341552734375, "step": 4764 }, { "epoch": 0.3220900365012843, "grad_norm": 2.669172763824463, "learning_rate": 2.3903573004031527e-05, "loss": 0.36798095703125, "step": 4765 }, { "epoch": 0.3221576314722185, "grad_norm": 0.8880817294120789, "learning_rate": 2.390091738869094e-05, "loss": 0.19537353515625, "step": 4766 }, { "epoch": 0.32222522644315266, "grad_norm": 0.8269767761230469, "learning_rate": 2.3898261342668613e-05, "loss": 0.152557373046875, "step": 4767 }, { "epoch": 0.32229282141408677, "grad_norm": 1.4165046215057373, "learning_rate": 2.3895604866093062e-05, "loss": 0.1950836181640625, "step": 4768 }, { "epoch": 0.32236041638502094, "grad_norm": 1.2009915113449097, "learning_rate": 2.389294795909283e-05, "loss": 0.24365234375, "step": 4769 }, { "epoch": 0.3224280113559551, "grad_norm": 1.2474193572998047, "learning_rate": 2.3890290621796468e-05, "loss": 0.22666168212890625, "step": 4770 }, { "epoch": 0.3224956063268893, "grad_norm": 1.5948008298873901, "learning_rate": 2.3887632854332564e-05, "loss": 0.243133544921875, "step": 4771 }, { "epoch": 0.32256320129782345, "grad_norm": 0.9447851181030273, "learning_rate": 2.3884974656829705e-05, "loss": 0.1506195068359375, "step": 4772 }, { "epoch": 0.3226307962687576, "grad_norm": 1.0971124172210693, "learning_rate": 2.388231602941652e-05, "loss": 0.170867919921875, "step": 4773 }, { "epoch": 0.32269839123969174, "grad_norm": 0.9763372540473938, "learning_rate": 2.3879656972221646e-05, "loss": 0.261810302734375, "step": 4774 }, { "epoch": 0.3227659862106259, "grad_norm": 1.2856762409210205, "learning_rate": 2.387699748537375e-05, "loss": 0.19399261474609375, "step": 4775 }, { "epoch": 0.3228335811815601, "grad_norm": 1.5698492527008057, "learning_rate": 2.3874337569001505e-05, "loss": 0.197540283203125, "step": 4776 }, { "epoch": 0.32290117615249425, "grad_norm": 1.8025459051132202, "learning_rate": 2.387167722323362e-05, "loss": 0.283447265625, "step": 4777 }, { "epoch": 0.3229687711234284, "grad_norm": 1.04888117313385, "learning_rate": 2.386901644819882e-05, "loss": 0.2657470703125, "step": 4778 }, { "epoch": 0.3230363660943626, "grad_norm": 2.2735893726348877, "learning_rate": 2.3866355244025844e-05, "loss": 0.3243408203125, "step": 4779 }, { "epoch": 0.32310396106529676, "grad_norm": 1.4199872016906738, "learning_rate": 2.386369361084347e-05, "loss": 0.27178955078125, "step": 4780 }, { "epoch": 0.3231715560362309, "grad_norm": 2.067110538482666, "learning_rate": 2.3861031548780472e-05, "loss": 0.3353271484375, "step": 4781 }, { "epoch": 0.32323915100716505, "grad_norm": 2.4299235343933105, "learning_rate": 2.385836905796566e-05, "loss": 0.3409423828125, "step": 4782 }, { "epoch": 0.3233067459780992, "grad_norm": 1.2464523315429688, "learning_rate": 2.3855706138527863e-05, "loss": 0.241729736328125, "step": 4783 }, { "epoch": 0.3233743409490334, "grad_norm": 1.6474555730819702, "learning_rate": 2.385304279059593e-05, "loss": 0.29888916015625, "step": 4784 }, { "epoch": 0.32344193591996756, "grad_norm": 1.0664818286895752, "learning_rate": 2.3850379014298737e-05, "loss": 0.24884033203125, "step": 4785 }, { "epoch": 0.32350953089090173, "grad_norm": 0.9655317664146423, "learning_rate": 2.3847714809765165e-05, "loss": 0.2256011962890625, "step": 4786 }, { "epoch": 0.3235771258618359, "grad_norm": 1.920580267906189, "learning_rate": 2.384505017712412e-05, "loss": 0.316619873046875, "step": 4787 }, { "epoch": 0.32364472083277, "grad_norm": 2.2662482261657715, "learning_rate": 2.3842385116504547e-05, "loss": 0.34954833984375, "step": 4788 }, { "epoch": 0.3237123158037042, "grad_norm": 1.9092004299163818, "learning_rate": 2.383971962803539e-05, "loss": 0.232940673828125, "step": 4789 }, { "epoch": 0.32377991077463836, "grad_norm": 1.2976553440093994, "learning_rate": 2.3837053711845626e-05, "loss": 0.229766845703125, "step": 4790 }, { "epoch": 0.32384750574557253, "grad_norm": 1.2789931297302246, "learning_rate": 2.3834387368064243e-05, "loss": 0.215789794921875, "step": 4791 }, { "epoch": 0.3239151007165067, "grad_norm": 1.4111361503601074, "learning_rate": 2.383172059682026e-05, "loss": 0.31243896484375, "step": 4792 }, { "epoch": 0.32398269568744087, "grad_norm": 2.354011297225952, "learning_rate": 2.3829053398242708e-05, "loss": 0.30389404296875, "step": 4793 }, { "epoch": 0.32405029065837504, "grad_norm": 2.2050864696502686, "learning_rate": 2.382638577246065e-05, "loss": 0.3037109375, "step": 4794 }, { "epoch": 0.32411788562930915, "grad_norm": 1.3389785289764404, "learning_rate": 2.3823717719603156e-05, "loss": 0.290008544921875, "step": 4795 }, { "epoch": 0.3241854806002433, "grad_norm": 1.8827221393585205, "learning_rate": 2.3821049239799325e-05, "loss": 0.292327880859375, "step": 4796 }, { "epoch": 0.3242530755711775, "grad_norm": 1.4066777229309082, "learning_rate": 2.3818380333178273e-05, "loss": 0.242095947265625, "step": 4797 }, { "epoch": 0.32432067054211167, "grad_norm": 2.0585808753967285, "learning_rate": 2.3815710999869143e-05, "loss": 0.337188720703125, "step": 4798 }, { "epoch": 0.32438826551304584, "grad_norm": 0.9636375308036804, "learning_rate": 2.3813041240001083e-05, "loss": 0.2268524169921875, "step": 4799 }, { "epoch": 0.32445586048398, "grad_norm": 1.4689099788665771, "learning_rate": 2.3810371053703287e-05, "loss": 0.30670166015625, "step": 4800 }, { "epoch": 0.3245234554549142, "grad_norm": 1.6544264554977417, "learning_rate": 2.3807700441104946e-05, "loss": 0.296417236328125, "step": 4801 }, { "epoch": 0.3245910504258483, "grad_norm": 1.0651764869689941, "learning_rate": 2.3805029402335282e-05, "loss": 0.214874267578125, "step": 4802 }, { "epoch": 0.32465864539678246, "grad_norm": 1.6106098890304565, "learning_rate": 2.3802357937523537e-05, "loss": 0.2498779296875, "step": 4803 }, { "epoch": 0.32472624036771663, "grad_norm": 0.9583492279052734, "learning_rate": 2.379968604679898e-05, "loss": 0.2161865234375, "step": 4804 }, { "epoch": 0.3247938353386508, "grad_norm": 1.5087108612060547, "learning_rate": 2.379701373029088e-05, "loss": 0.269989013671875, "step": 4805 }, { "epoch": 0.324861430309585, "grad_norm": 1.5594784021377563, "learning_rate": 2.379434098812855e-05, "loss": 0.167388916015625, "step": 4806 }, { "epoch": 0.32492902528051915, "grad_norm": 1.433100700378418, "learning_rate": 2.379166782044131e-05, "loss": 0.245574951171875, "step": 4807 }, { "epoch": 0.3249966202514533, "grad_norm": 1.2254499197006226, "learning_rate": 2.3788994227358513e-05, "loss": 0.27569580078125, "step": 4808 }, { "epoch": 0.32506421522238743, "grad_norm": 1.244966745376587, "learning_rate": 2.3786320209009514e-05, "loss": 0.2657470703125, "step": 4809 }, { "epoch": 0.3251318101933216, "grad_norm": 2.5224883556365967, "learning_rate": 2.3783645765523702e-05, "loss": 0.30364990234375, "step": 4810 }, { "epoch": 0.3251994051642558, "grad_norm": 1.1200075149536133, "learning_rate": 2.378097089703048e-05, "loss": 0.203399658203125, "step": 4811 }, { "epoch": 0.32526700013518994, "grad_norm": 2.1086692810058594, "learning_rate": 2.3778295603659275e-05, "loss": 0.31884765625, "step": 4812 }, { "epoch": 0.3253345951061241, "grad_norm": 1.436536431312561, "learning_rate": 2.377561988553954e-05, "loss": 0.319793701171875, "step": 4813 }, { "epoch": 0.3254021900770583, "grad_norm": 2.217508554458618, "learning_rate": 2.377294374280074e-05, "loss": 0.3701171875, "step": 4814 }, { "epoch": 0.32546978504799245, "grad_norm": 1.3008995056152344, "learning_rate": 2.3770267175572367e-05, "loss": 0.24395751953125, "step": 4815 }, { "epoch": 0.32553738001892657, "grad_norm": 1.0298750400543213, "learning_rate": 2.3767590183983923e-05, "loss": 0.22723388671875, "step": 4816 }, { "epoch": 0.32560497498986074, "grad_norm": 1.8134840726852417, "learning_rate": 2.376491276816494e-05, "loss": 0.189788818359375, "step": 4817 }, { "epoch": 0.3256725699607949, "grad_norm": 2.1115128993988037, "learning_rate": 2.3762234928244973e-05, "loss": 0.2648162841796875, "step": 4818 }, { "epoch": 0.3257401649317291, "grad_norm": 1.2360869646072388, "learning_rate": 2.375955666435358e-05, "loss": 0.222137451171875, "step": 4819 }, { "epoch": 0.32580775990266325, "grad_norm": 1.6978667974472046, "learning_rate": 2.3756877976620372e-05, "loss": 0.301513671875, "step": 4820 }, { "epoch": 0.3258753548735974, "grad_norm": 1.5950090885162354, "learning_rate": 2.3754198865174944e-05, "loss": 0.25653076171875, "step": 4821 }, { "epoch": 0.3259429498445316, "grad_norm": 1.7264209985733032, "learning_rate": 2.375151933014693e-05, "loss": 0.35150146484375, "step": 4822 }, { "epoch": 0.3260105448154657, "grad_norm": 2.1063907146453857, "learning_rate": 2.3748839371665987e-05, "loss": 0.23150634765625, "step": 4823 }, { "epoch": 0.3260781397863999, "grad_norm": 1.6843535900115967, "learning_rate": 2.374615898986179e-05, "loss": 0.2913818359375, "step": 4824 }, { "epoch": 0.32614573475733405, "grad_norm": 1.3091928958892822, "learning_rate": 2.3743478184864026e-05, "loss": 0.2410125732421875, "step": 4825 }, { "epoch": 0.3262133297282682, "grad_norm": 1.2361270189285278, "learning_rate": 2.374079695680241e-05, "loss": 0.2841796875, "step": 4826 }, { "epoch": 0.3262809246992024, "grad_norm": 0.5845895409584045, "learning_rate": 2.3738115305806687e-05, "loss": 0.1357269287109375, "step": 4827 }, { "epoch": 0.32634851967013656, "grad_norm": 1.1686328649520874, "learning_rate": 2.3735433232006597e-05, "loss": 0.21978759765625, "step": 4828 }, { "epoch": 0.32641611464107073, "grad_norm": 1.075465202331543, "learning_rate": 2.3732750735531926e-05, "loss": 0.246917724609375, "step": 4829 }, { "epoch": 0.32648370961200485, "grad_norm": 1.3348536491394043, "learning_rate": 2.3730067816512464e-05, "loss": 0.296630859375, "step": 4830 }, { "epoch": 0.326551304582939, "grad_norm": 1.7890057563781738, "learning_rate": 2.3727384475078028e-05, "loss": 0.273681640625, "step": 4831 }, { "epoch": 0.3266188995538732, "grad_norm": 1.390799880027771, "learning_rate": 2.3724700711358456e-05, "loss": 0.23419189453125, "step": 4832 }, { "epoch": 0.32668649452480736, "grad_norm": 1.2309232950210571, "learning_rate": 2.372201652548361e-05, "loss": 0.337127685546875, "step": 4833 }, { "epoch": 0.32675408949574153, "grad_norm": 3.1225175857543945, "learning_rate": 2.3719331917583358e-05, "loss": 0.36602783203125, "step": 4834 }, { "epoch": 0.3268216844666757, "grad_norm": 1.5357494354248047, "learning_rate": 2.3716646887787604e-05, "loss": 0.28253173828125, "step": 4835 }, { "epoch": 0.3268892794376098, "grad_norm": 1.6090292930603027, "learning_rate": 2.3713961436226264e-05, "loss": 0.338134765625, "step": 4836 }, { "epoch": 0.326956874408544, "grad_norm": 0.9840015769004822, "learning_rate": 2.371127556302928e-05, "loss": 0.2342376708984375, "step": 4837 }, { "epoch": 0.32702446937947816, "grad_norm": 1.2861151695251465, "learning_rate": 2.3708589268326612e-05, "loss": 0.30718994140625, "step": 4838 }, { "epoch": 0.3270920643504123, "grad_norm": 0.8735483884811401, "learning_rate": 2.3705902552248234e-05, "loss": 0.1071014404296875, "step": 4839 }, { "epoch": 0.3271596593213465, "grad_norm": 0.9955658316612244, "learning_rate": 2.3703215414924146e-05, "loss": 0.1864776611328125, "step": 4840 }, { "epoch": 0.32722725429228067, "grad_norm": 2.1812071800231934, "learning_rate": 2.3700527856484376e-05, "loss": 0.291717529296875, "step": 4841 }, { "epoch": 0.32729484926321484, "grad_norm": 0.7965133786201477, "learning_rate": 2.3697839877058963e-05, "loss": 0.16912841796875, "step": 4842 }, { "epoch": 0.32736244423414895, "grad_norm": 0.9505574107170105, "learning_rate": 2.369515147677796e-05, "loss": 0.204376220703125, "step": 4843 }, { "epoch": 0.3274300392050831, "grad_norm": 1.9248058795928955, "learning_rate": 2.3692462655771458e-05, "loss": 0.3665771484375, "step": 4844 }, { "epoch": 0.3274976341760173, "grad_norm": 0.9398323893547058, "learning_rate": 2.368977341416955e-05, "loss": 0.214599609375, "step": 4845 }, { "epoch": 0.32756522914695146, "grad_norm": 1.8755537271499634, "learning_rate": 2.3687083752102366e-05, "loss": 0.2806396484375, "step": 4846 }, { "epoch": 0.32763282411788563, "grad_norm": 0.8873105645179749, "learning_rate": 2.3684393669700044e-05, "loss": 0.26153564453125, "step": 4847 }, { "epoch": 0.3277004190888198, "grad_norm": 0.9374209642410278, "learning_rate": 2.368170316709275e-05, "loss": 0.220306396484375, "step": 4848 }, { "epoch": 0.327768014059754, "grad_norm": 1.2280478477478027, "learning_rate": 2.3679012244410667e-05, "loss": 0.3048095703125, "step": 4849 }, { "epoch": 0.3278356090306881, "grad_norm": 1.046607494354248, "learning_rate": 2.3676320901783995e-05, "loss": 0.21905517578125, "step": 4850 }, { "epoch": 0.32790320400162226, "grad_norm": 1.0833932161331177, "learning_rate": 2.3673629139342963e-05, "loss": 0.1769256591796875, "step": 4851 }, { "epoch": 0.32797079897255643, "grad_norm": 1.2740861177444458, "learning_rate": 2.367093695721781e-05, "loss": 0.2321319580078125, "step": 4852 }, { "epoch": 0.3280383939434906, "grad_norm": 2.4625561237335205, "learning_rate": 2.3668244355538805e-05, "loss": 0.25885009765625, "step": 4853 }, { "epoch": 0.3281059889144248, "grad_norm": 0.7058889865875244, "learning_rate": 2.366555133443623e-05, "loss": 0.074462890625, "step": 4854 }, { "epoch": 0.32817358388535894, "grad_norm": 1.1246004104614258, "learning_rate": 2.366285789404039e-05, "loss": 0.257049560546875, "step": 4855 }, { "epoch": 0.3282411788562931, "grad_norm": 0.6482918858528137, "learning_rate": 2.3660164034481613e-05, "loss": 0.12165069580078125, "step": 4856 }, { "epoch": 0.32830877382722723, "grad_norm": 0.9187455773353577, "learning_rate": 2.3657469755890244e-05, "loss": 0.25701904296875, "step": 4857 }, { "epoch": 0.3283763687981614, "grad_norm": 1.4158145189285278, "learning_rate": 2.3654775058396644e-05, "loss": 0.2291259765625, "step": 4858 }, { "epoch": 0.32844396376909557, "grad_norm": 0.9943570494651794, "learning_rate": 2.365207994213121e-05, "loss": 0.2283782958984375, "step": 4859 }, { "epoch": 0.32851155874002974, "grad_norm": 1.5176008939743042, "learning_rate": 2.3649384407224334e-05, "loss": 0.277374267578125, "step": 4860 }, { "epoch": 0.3285791537109639, "grad_norm": 0.9519460797309875, "learning_rate": 2.3646688453806455e-05, "loss": 0.255126953125, "step": 4861 }, { "epoch": 0.3286467486818981, "grad_norm": 1.6468851566314697, "learning_rate": 2.3643992082008013e-05, "loss": 0.297088623046875, "step": 4862 }, { "epoch": 0.32871434365283225, "grad_norm": 1.2466175556182861, "learning_rate": 2.364129529195948e-05, "loss": 0.181121826171875, "step": 4863 }, { "epoch": 0.32878193862376637, "grad_norm": 1.1104174852371216, "learning_rate": 2.3638598083791337e-05, "loss": 0.288848876953125, "step": 4864 }, { "epoch": 0.32884953359470054, "grad_norm": 1.047014832496643, "learning_rate": 2.36359004576341e-05, "loss": 0.2333984375, "step": 4865 }, { "epoch": 0.3289171285656347, "grad_norm": 1.478371500968933, "learning_rate": 2.363320241361829e-05, "loss": 0.272308349609375, "step": 4866 }, { "epoch": 0.3289847235365689, "grad_norm": 0.9799684286117554, "learning_rate": 2.363050395187446e-05, "loss": 0.2387542724609375, "step": 4867 }, { "epoch": 0.32905231850750305, "grad_norm": 1.2590959072113037, "learning_rate": 2.3627805072533174e-05, "loss": 0.294952392578125, "step": 4868 }, { "epoch": 0.3291199134784372, "grad_norm": 1.1380895376205444, "learning_rate": 2.362510577572503e-05, "loss": 0.33795166015625, "step": 4869 }, { "epoch": 0.3291875084493714, "grad_norm": 1.7131301164627075, "learning_rate": 2.362240606158062e-05, "loss": 0.2060546875, "step": 4870 }, { "epoch": 0.3292551034203055, "grad_norm": 2.280632495880127, "learning_rate": 2.3619705930230587e-05, "loss": 0.28125, "step": 4871 }, { "epoch": 0.3293226983912397, "grad_norm": 1.418947458267212, "learning_rate": 2.3617005381805574e-05, "loss": 0.34246826171875, "step": 4872 }, { "epoch": 0.32939029336217385, "grad_norm": 1.7941645383834839, "learning_rate": 2.3614304416436257e-05, "loss": 0.21920013427734375, "step": 4873 }, { "epoch": 0.329457888333108, "grad_norm": 2.299586296081543, "learning_rate": 2.361160303425332e-05, "loss": 0.2811279296875, "step": 4874 }, { "epoch": 0.3295254833040422, "grad_norm": 1.228960633277893, "learning_rate": 2.3608901235387464e-05, "loss": 0.263824462890625, "step": 4875 }, { "epoch": 0.32959307827497636, "grad_norm": 1.534368634223938, "learning_rate": 2.3606199019969437e-05, "loss": 0.28033447265625, "step": 4876 }, { "epoch": 0.32966067324591053, "grad_norm": 1.3485149145126343, "learning_rate": 2.3603496388129976e-05, "loss": 0.308349609375, "step": 4877 }, { "epoch": 0.32972826821684464, "grad_norm": 1.4291175603866577, "learning_rate": 2.3600793339999855e-05, "loss": 0.2422332763671875, "step": 4878 }, { "epoch": 0.3297958631877788, "grad_norm": 1.2064330577850342, "learning_rate": 2.3598089875709868e-05, "loss": 0.204833984375, "step": 4879 }, { "epoch": 0.329863458158713, "grad_norm": 0.8134979605674744, "learning_rate": 2.3595385995390822e-05, "loss": 0.184906005859375, "step": 4880 }, { "epoch": 0.32993105312964716, "grad_norm": 1.0594472885131836, "learning_rate": 2.3592681699173544e-05, "loss": 0.275634765625, "step": 4881 }, { "epoch": 0.3299986481005813, "grad_norm": 1.8056097030639648, "learning_rate": 2.358997698718889e-05, "loss": 0.25140380859375, "step": 4882 }, { "epoch": 0.3300662430715155, "grad_norm": 1.4793988466262817, "learning_rate": 2.3587271859567725e-05, "loss": 0.236602783203125, "step": 4883 }, { "epoch": 0.33013383804244967, "grad_norm": 1.1757802963256836, "learning_rate": 2.358456631644095e-05, "loss": 0.20831298828125, "step": 4884 }, { "epoch": 0.3302014330133838, "grad_norm": 1.0691356658935547, "learning_rate": 2.3581860357939467e-05, "loss": 0.1682586669921875, "step": 4885 }, { "epoch": 0.33026902798431795, "grad_norm": 2.15641713142395, "learning_rate": 2.3579153984194206e-05, "loss": 0.3236083984375, "step": 4886 }, { "epoch": 0.3303366229552521, "grad_norm": 2.48262357711792, "learning_rate": 2.3576447195336123e-05, "loss": 0.322174072265625, "step": 4887 }, { "epoch": 0.3304042179261863, "grad_norm": 1.5944678783416748, "learning_rate": 2.3573739991496196e-05, "loss": 0.3583984375, "step": 4888 }, { "epoch": 0.33047181289712046, "grad_norm": 0.9076573848724365, "learning_rate": 2.3571032372805403e-05, "loss": 0.1227264404296875, "step": 4889 }, { "epoch": 0.33053940786805464, "grad_norm": 1.850365400314331, "learning_rate": 2.3568324339394764e-05, "loss": 0.3150634765625, "step": 4890 }, { "epoch": 0.3306070028389888, "grad_norm": 2.066051483154297, "learning_rate": 2.3565615891395303e-05, "loss": 0.29852294921875, "step": 4891 }, { "epoch": 0.3306745978099229, "grad_norm": 1.5019291639328003, "learning_rate": 2.356290702893808e-05, "loss": 0.1710205078125, "step": 4892 }, { "epoch": 0.3307421927808571, "grad_norm": 1.0569672584533691, "learning_rate": 2.356019775215416e-05, "loss": 0.205810546875, "step": 4893 }, { "epoch": 0.33080978775179126, "grad_norm": 1.3327080011367798, "learning_rate": 2.355748806117464e-05, "loss": 0.2803955078125, "step": 4894 }, { "epoch": 0.33087738272272543, "grad_norm": 1.770578384399414, "learning_rate": 2.3554777956130625e-05, "loss": 0.334625244140625, "step": 4895 }, { "epoch": 0.3309449776936596, "grad_norm": 1.3727365732192993, "learning_rate": 2.3552067437153258e-05, "loss": 0.307891845703125, "step": 4896 }, { "epoch": 0.3310125726645938, "grad_norm": 2.0259153842926025, "learning_rate": 2.3549356504373678e-05, "loss": 0.279388427734375, "step": 4897 }, { "epoch": 0.33108016763552794, "grad_norm": 1.190627932548523, "learning_rate": 2.3546645157923065e-05, "loss": 0.1586761474609375, "step": 4898 }, { "epoch": 0.33114776260646206, "grad_norm": 1.0850017070770264, "learning_rate": 2.3543933397932603e-05, "loss": 0.27301025390625, "step": 4899 }, { "epoch": 0.33121535757739623, "grad_norm": 1.1423019170761108, "learning_rate": 2.3541221224533515e-05, "loss": 0.32525634765625, "step": 4900 }, { "epoch": 0.3312829525483304, "grad_norm": 0.9403648972511292, "learning_rate": 2.3538508637857026e-05, "loss": 0.22684478759765625, "step": 4901 }, { "epoch": 0.33135054751926457, "grad_norm": 1.6307556629180908, "learning_rate": 2.353579563803439e-05, "loss": 0.2769775390625, "step": 4902 }, { "epoch": 0.33141814249019874, "grad_norm": 1.270128846168518, "learning_rate": 2.3533082225196876e-05, "loss": 0.25091552734375, "step": 4903 }, { "epoch": 0.3314857374611329, "grad_norm": 1.0995287895202637, "learning_rate": 2.353036839947577e-05, "loss": 0.14159011840820312, "step": 4904 }, { "epoch": 0.331553332432067, "grad_norm": 1.580748200416565, "learning_rate": 2.3527654161002402e-05, "loss": 0.1846466064453125, "step": 4905 }, { "epoch": 0.3316209274030012, "grad_norm": 1.0109180212020874, "learning_rate": 2.3524939509908087e-05, "loss": 0.1492919921875, "step": 4906 }, { "epoch": 0.33168852237393537, "grad_norm": 1.1197748184204102, "learning_rate": 2.3522224446324183e-05, "loss": 0.3001708984375, "step": 4907 }, { "epoch": 0.33175611734486954, "grad_norm": 1.3731409311294556, "learning_rate": 2.3519508970382062e-05, "loss": 0.2282562255859375, "step": 4908 }, { "epoch": 0.3318237123158037, "grad_norm": 0.7014598846435547, "learning_rate": 2.351679308221312e-05, "loss": 0.1298370361328125, "step": 4909 }, { "epoch": 0.3318913072867379, "grad_norm": 0.9134876132011414, "learning_rate": 2.3514076781948755e-05, "loss": 0.2213134765625, "step": 4910 }, { "epoch": 0.33195890225767205, "grad_norm": 1.2337071895599365, "learning_rate": 2.351136006972041e-05, "loss": 0.2459716796875, "step": 4911 }, { "epoch": 0.33202649722860617, "grad_norm": 1.4825749397277832, "learning_rate": 2.350864294565953e-05, "loss": 0.284881591796875, "step": 4912 }, { "epoch": 0.33209409219954034, "grad_norm": 0.7612229585647583, "learning_rate": 2.3505925409897596e-05, "loss": 0.1114044189453125, "step": 4913 }, { "epoch": 0.3321616871704745, "grad_norm": 1.328326940536499, "learning_rate": 2.350320746256609e-05, "loss": 0.273345947265625, "step": 4914 }, { "epoch": 0.3322292821414087, "grad_norm": 1.1397649049758911, "learning_rate": 2.3500489103796528e-05, "loss": 0.13392257690429688, "step": 4915 }, { "epoch": 0.33229687711234285, "grad_norm": 1.9693645238876343, "learning_rate": 2.3497770333720432e-05, "loss": 0.270904541015625, "step": 4916 }, { "epoch": 0.332364472083277, "grad_norm": 0.7248533964157104, "learning_rate": 2.3495051152469367e-05, "loss": 0.09040069580078125, "step": 4917 }, { "epoch": 0.3324320670542112, "grad_norm": 3.49579119682312, "learning_rate": 2.3492331560174892e-05, "loss": 0.340057373046875, "step": 4918 }, { "epoch": 0.3324996620251453, "grad_norm": 2.141244888305664, "learning_rate": 2.348961155696861e-05, "loss": 0.2080535888671875, "step": 4919 }, { "epoch": 0.3325672569960795, "grad_norm": 1.4276013374328613, "learning_rate": 2.348689114298212e-05, "loss": 0.2774658203125, "step": 4920 }, { "epoch": 0.33263485196701364, "grad_norm": 0.8381689190864563, "learning_rate": 2.348417031834706e-05, "loss": 0.124664306640625, "step": 4921 }, { "epoch": 0.3327024469379478, "grad_norm": 2.270246744155884, "learning_rate": 2.348144908319507e-05, "loss": 0.27044677734375, "step": 4922 }, { "epoch": 0.332770041908882, "grad_norm": 2.4780213832855225, "learning_rate": 2.3478727437657838e-05, "loss": 0.3363037109375, "step": 4923 }, { "epoch": 0.33283763687981616, "grad_norm": 1.2423031330108643, "learning_rate": 2.3476005381867034e-05, "loss": 0.1546478271484375, "step": 4924 }, { "epoch": 0.3329052318507503, "grad_norm": 1.4334259033203125, "learning_rate": 2.3473282915954384e-05, "loss": 0.220458984375, "step": 4925 }, { "epoch": 0.33297282682168444, "grad_norm": 1.3043640851974487, "learning_rate": 2.3470560040051613e-05, "loss": 0.25579833984375, "step": 4926 }, { "epoch": 0.3330404217926186, "grad_norm": 1.3861474990844727, "learning_rate": 2.3467836754290467e-05, "loss": 0.255828857421875, "step": 4927 }, { "epoch": 0.3331080167635528, "grad_norm": 2.0594234466552734, "learning_rate": 2.3465113058802717e-05, "loss": 0.35400390625, "step": 4928 }, { "epoch": 0.33317561173448695, "grad_norm": 1.6377484798431396, "learning_rate": 2.3462388953720153e-05, "loss": 0.192626953125, "step": 4929 }, { "epoch": 0.3332432067054211, "grad_norm": 1.1476030349731445, "learning_rate": 2.345966443917459e-05, "loss": 0.26513671875, "step": 4930 }, { "epoch": 0.3333108016763553, "grad_norm": 1.1206945180892944, "learning_rate": 2.3456939515297842e-05, "loss": 0.28961181640625, "step": 4931 }, { "epoch": 0.33337839664728947, "grad_norm": 1.8141677379608154, "learning_rate": 2.3454214182221773e-05, "loss": 0.1858673095703125, "step": 4932 }, { "epoch": 0.3334459916182236, "grad_norm": 1.6733072996139526, "learning_rate": 2.3451488440078246e-05, "loss": 0.29254150390625, "step": 4933 }, { "epoch": 0.33351358658915775, "grad_norm": 1.2865350246429443, "learning_rate": 2.3448762288999145e-05, "loss": 0.32391357421875, "step": 4934 }, { "epoch": 0.3335811815600919, "grad_norm": 1.2704648971557617, "learning_rate": 2.3446035729116387e-05, "loss": 0.203216552734375, "step": 4935 }, { "epoch": 0.3336487765310261, "grad_norm": 1.7084476947784424, "learning_rate": 2.344330876056189e-05, "loss": 0.21044921875, "step": 4936 }, { "epoch": 0.33371637150196026, "grad_norm": 1.4941660165786743, "learning_rate": 2.3440581383467613e-05, "loss": 0.290191650390625, "step": 4937 }, { "epoch": 0.33378396647289443, "grad_norm": 0.9567864537239075, "learning_rate": 2.3437853597965514e-05, "loss": 0.16693115234375, "step": 4938 }, { "epoch": 0.3338515614438286, "grad_norm": 0.7561823725700378, "learning_rate": 2.3435125404187585e-05, "loss": 0.14907073974609375, "step": 4939 }, { "epoch": 0.3339191564147627, "grad_norm": 1.0697752237319946, "learning_rate": 2.3432396802265828e-05, "loss": 0.3114013671875, "step": 4940 }, { "epoch": 0.3339867513856969, "grad_norm": 1.0361686944961548, "learning_rate": 2.3429667792332278e-05, "loss": 0.24658203125, "step": 4941 }, { "epoch": 0.33405434635663106, "grad_norm": 1.1502000093460083, "learning_rate": 2.342693837451897e-05, "loss": 0.21026611328125, "step": 4942 }, { "epoch": 0.33412194132756523, "grad_norm": 1.7547396421432495, "learning_rate": 2.342420854895798e-05, "loss": 0.302764892578125, "step": 4943 }, { "epoch": 0.3341895362984994, "grad_norm": 1.6786892414093018, "learning_rate": 2.3421478315781393e-05, "loss": 0.3022918701171875, "step": 4944 }, { "epoch": 0.33425713126943357, "grad_norm": 1.2065534591674805, "learning_rate": 2.341874767512131e-05, "loss": 0.203033447265625, "step": 4945 }, { "epoch": 0.33432472624036774, "grad_norm": 1.6049847602844238, "learning_rate": 2.3416016627109863e-05, "loss": 0.26397705078125, "step": 4946 }, { "epoch": 0.33439232121130186, "grad_norm": 0.4587298631668091, "learning_rate": 2.3413285171879188e-05, "loss": 0.11284446716308594, "step": 4947 }, { "epoch": 0.33445991618223603, "grad_norm": 1.0056113004684448, "learning_rate": 2.3410553309561456e-05, "loss": 0.2006378173828125, "step": 4948 }, { "epoch": 0.3345275111531702, "grad_norm": 0.953926146030426, "learning_rate": 2.340782104028885e-05, "loss": 0.1472930908203125, "step": 4949 }, { "epoch": 0.33459510612410437, "grad_norm": 1.9665638208389282, "learning_rate": 2.3405088364193577e-05, "loss": 0.280487060546875, "step": 4950 }, { "epoch": 0.33466270109503854, "grad_norm": 2.2769598960876465, "learning_rate": 2.3402355281407854e-05, "loss": 0.288421630859375, "step": 4951 }, { "epoch": 0.3347302960659727, "grad_norm": 0.9835708737373352, "learning_rate": 2.339962179206393e-05, "loss": 0.2095947265625, "step": 4952 }, { "epoch": 0.3347978910369069, "grad_norm": 0.9985304474830627, "learning_rate": 2.339688789629407e-05, "loss": 0.207183837890625, "step": 4953 }, { "epoch": 0.334865486007841, "grad_norm": 0.724738359451294, "learning_rate": 2.339415359423055e-05, "loss": 0.12310791015625, "step": 4954 }, { "epoch": 0.33493308097877517, "grad_norm": 0.7232855558395386, "learning_rate": 2.3391418886005682e-05, "loss": 0.128448486328125, "step": 4955 }, { "epoch": 0.33500067594970934, "grad_norm": 1.1169649362564087, "learning_rate": 2.3388683771751775e-05, "loss": 0.18265533447265625, "step": 4956 }, { "epoch": 0.3350682709206435, "grad_norm": 1.29220712184906, "learning_rate": 2.3385948251601183e-05, "loss": 0.25848388671875, "step": 4957 }, { "epoch": 0.3351358658915777, "grad_norm": 1.6152775287628174, "learning_rate": 2.338321232568626e-05, "loss": 0.34429931640625, "step": 4958 }, { "epoch": 0.33520346086251185, "grad_norm": 1.395302414894104, "learning_rate": 2.3380475994139394e-05, "loss": 0.23956298828125, "step": 4959 }, { "epoch": 0.335271055833446, "grad_norm": 1.615907073020935, "learning_rate": 2.3377739257092983e-05, "loss": 0.279388427734375, "step": 4960 }, { "epoch": 0.33533865080438013, "grad_norm": 1.263611912727356, "learning_rate": 2.3375002114679446e-05, "loss": 0.1969757080078125, "step": 4961 }, { "epoch": 0.3354062457753143, "grad_norm": 1.6132733821868896, "learning_rate": 2.3372264567031222e-05, "loss": 0.309814453125, "step": 4962 }, { "epoch": 0.3354738407462485, "grad_norm": 1.4181779623031616, "learning_rate": 2.3369526614280767e-05, "loss": 0.237396240234375, "step": 4963 }, { "epoch": 0.33554143571718265, "grad_norm": 1.3555101156234741, "learning_rate": 2.336678825656057e-05, "loss": 0.1925048828125, "step": 4964 }, { "epoch": 0.3356090306881168, "grad_norm": 1.008127212524414, "learning_rate": 2.3364049494003123e-05, "loss": 0.181182861328125, "step": 4965 }, { "epoch": 0.335676625659051, "grad_norm": 1.1806143522262573, "learning_rate": 2.3361310326740944e-05, "loss": 0.270538330078125, "step": 4966 }, { "epoch": 0.3357442206299851, "grad_norm": 0.8218417763710022, "learning_rate": 2.3358570754906572e-05, "loss": 0.11859130859375, "step": 4967 }, { "epoch": 0.3358118156009193, "grad_norm": 2.532951593399048, "learning_rate": 2.335583077863257e-05, "loss": 0.305755615234375, "step": 4968 }, { "epoch": 0.33587941057185344, "grad_norm": 2.0120437145233154, "learning_rate": 2.3353090398051507e-05, "loss": 0.320343017578125, "step": 4969 }, { "epoch": 0.3359470055427876, "grad_norm": 1.0322321653366089, "learning_rate": 2.3350349613295987e-05, "loss": 0.259765625, "step": 4970 }, { "epoch": 0.3360146005137218, "grad_norm": 1.5836783647537231, "learning_rate": 2.334760842449862e-05, "loss": 0.30322265625, "step": 4971 }, { "epoch": 0.33608219548465595, "grad_norm": 1.6978248357772827, "learning_rate": 2.3344866831792047e-05, "loss": 0.31707763671875, "step": 4972 }, { "epoch": 0.3361497904555901, "grad_norm": 1.0086535215377808, "learning_rate": 2.3342124835308917e-05, "loss": 0.2413330078125, "step": 4973 }, { "epoch": 0.33621738542652424, "grad_norm": 0.9164353013038635, "learning_rate": 2.333938243518191e-05, "loss": 0.21728515625, "step": 4974 }, { "epoch": 0.3362849803974584, "grad_norm": 1.5872639417648315, "learning_rate": 2.333663963154372e-05, "loss": 0.33221435546875, "step": 4975 }, { "epoch": 0.3363525753683926, "grad_norm": 1.1238791942596436, "learning_rate": 2.3333896424527058e-05, "loss": 0.268524169921875, "step": 4976 }, { "epoch": 0.33642017033932675, "grad_norm": 1.3666330575942993, "learning_rate": 2.3331152814264658e-05, "loss": 0.248199462890625, "step": 4977 }, { "epoch": 0.3364877653102609, "grad_norm": 1.977193832397461, "learning_rate": 2.3328408800889273e-05, "loss": 0.2977294921875, "step": 4978 }, { "epoch": 0.3365553602811951, "grad_norm": 1.971431851387024, "learning_rate": 2.3325664384533677e-05, "loss": 0.24822998046875, "step": 4979 }, { "epoch": 0.33662295525212926, "grad_norm": 1.6769298315048218, "learning_rate": 2.3322919565330665e-05, "loss": 0.2989501953125, "step": 4980 }, { "epoch": 0.3366905502230634, "grad_norm": 1.0292843580245972, "learning_rate": 2.3320174343413042e-05, "loss": 0.170440673828125, "step": 4981 }, { "epoch": 0.33675814519399755, "grad_norm": 1.2334165573120117, "learning_rate": 2.3317428718913642e-05, "loss": 0.26861572265625, "step": 4982 }, { "epoch": 0.3368257401649317, "grad_norm": 1.7042677402496338, "learning_rate": 2.3314682691965314e-05, "loss": 0.283966064453125, "step": 4983 }, { "epoch": 0.3368933351358659, "grad_norm": 2.4097025394439697, "learning_rate": 2.3311936262700934e-05, "loss": 0.326080322265625, "step": 4984 }, { "epoch": 0.33696093010680006, "grad_norm": 0.8005099892616272, "learning_rate": 2.3309189431253378e-05, "loss": 0.18773651123046875, "step": 4985 }, { "epoch": 0.33702852507773423, "grad_norm": 1.8553308248519897, "learning_rate": 2.3306442197755572e-05, "loss": 0.234100341796875, "step": 4986 }, { "epoch": 0.3370961200486684, "grad_norm": 1.7357361316680908, "learning_rate": 2.330369456234043e-05, "loss": 0.2943115234375, "step": 4987 }, { "epoch": 0.3371637150196025, "grad_norm": 1.5978878736495972, "learning_rate": 2.3300946525140903e-05, "loss": 0.25921630859375, "step": 4988 }, { "epoch": 0.3372313099905367, "grad_norm": 1.707491397857666, "learning_rate": 2.3298198086289965e-05, "loss": 0.2847900390625, "step": 4989 }, { "epoch": 0.33729890496147086, "grad_norm": 0.7656126022338867, "learning_rate": 2.329544924592059e-05, "loss": 0.155181884765625, "step": 4990 }, { "epoch": 0.33736649993240503, "grad_norm": 1.8796885013580322, "learning_rate": 2.3292700004165798e-05, "loss": 0.191314697265625, "step": 4991 }, { "epoch": 0.3374340949033392, "grad_norm": 2.1940410137176514, "learning_rate": 2.3289950361158605e-05, "loss": 0.276214599609375, "step": 4992 }, { "epoch": 0.33750168987427337, "grad_norm": 2.0029940605163574, "learning_rate": 2.328720031703206e-05, "loss": 0.3780517578125, "step": 4993 }, { "epoch": 0.33756928484520754, "grad_norm": 1.6522982120513916, "learning_rate": 2.3284449871919227e-05, "loss": 0.319091796875, "step": 4994 }, { "epoch": 0.33763687981614166, "grad_norm": 2.8024978637695312, "learning_rate": 2.328169902595319e-05, "loss": 0.27947998046875, "step": 4995 }, { "epoch": 0.3377044747870758, "grad_norm": 1.1151318550109863, "learning_rate": 2.327894777926705e-05, "loss": 0.11016845703125, "step": 4996 }, { "epoch": 0.33777206975801, "grad_norm": 1.4554661512374878, "learning_rate": 2.3276196131993932e-05, "loss": 0.29583740234375, "step": 4997 }, { "epoch": 0.33783966472894417, "grad_norm": 1.3046742677688599, "learning_rate": 2.327344408426697e-05, "loss": 0.239105224609375, "step": 4998 }, { "epoch": 0.33790725969987834, "grad_norm": 2.159038782119751, "learning_rate": 2.3270691636219338e-05, "loss": 0.2349395751953125, "step": 4999 }, { "epoch": 0.3379748546708125, "grad_norm": 1.161941647529602, "learning_rate": 2.3267938787984206e-05, "loss": 0.240631103515625, "step": 5000 }, { "epoch": 0.3380424496417467, "grad_norm": 1.1413780450820923, "learning_rate": 2.326518553969478e-05, "loss": 0.2196807861328125, "step": 5001 }, { "epoch": 0.3381100446126808, "grad_norm": 1.216752052307129, "learning_rate": 2.326243189148427e-05, "loss": 0.231109619140625, "step": 5002 }, { "epoch": 0.33817763958361496, "grad_norm": 0.846778392791748, "learning_rate": 2.3259677843485933e-05, "loss": 0.18133544921875, "step": 5003 }, { "epoch": 0.33824523455454913, "grad_norm": 2.1609721183776855, "learning_rate": 2.3256923395833008e-05, "loss": 0.3514404296875, "step": 5004 }, { "epoch": 0.3383128295254833, "grad_norm": 1.3589974641799927, "learning_rate": 2.325416854865878e-05, "loss": 0.32037353515625, "step": 5005 }, { "epoch": 0.3383804244964175, "grad_norm": 1.4766756296157837, "learning_rate": 2.3251413302096545e-05, "loss": 0.253631591796875, "step": 5006 }, { "epoch": 0.33844801946735165, "grad_norm": 0.9964460134506226, "learning_rate": 2.3248657656279622e-05, "loss": 0.25634765625, "step": 5007 }, { "epoch": 0.3385156144382858, "grad_norm": 1.6567883491516113, "learning_rate": 2.3245901611341344e-05, "loss": 0.2747039794921875, "step": 5008 }, { "epoch": 0.33858320940921993, "grad_norm": 1.5109028816223145, "learning_rate": 2.3243145167415063e-05, "loss": 0.275634765625, "step": 5009 }, { "epoch": 0.3386508043801541, "grad_norm": 1.4074209928512573, "learning_rate": 2.3240388324634155e-05, "loss": 0.25445556640625, "step": 5010 }, { "epoch": 0.3387183993510883, "grad_norm": 1.0788124799728394, "learning_rate": 2.323763108313202e-05, "loss": 0.26904296875, "step": 5011 }, { "epoch": 0.33878599432202244, "grad_norm": 1.03591787815094, "learning_rate": 2.3234873443042056e-05, "loss": 0.1824951171875, "step": 5012 }, { "epoch": 0.3388535892929566, "grad_norm": 1.129499077796936, "learning_rate": 2.3232115404497706e-05, "loss": 0.26324462890625, "step": 5013 }, { "epoch": 0.3389211842638908, "grad_norm": 1.4131956100463867, "learning_rate": 2.3229356967632412e-05, "loss": 0.26251220703125, "step": 5014 }, { "epoch": 0.33898877923482496, "grad_norm": 1.4137991666793823, "learning_rate": 2.322659813257966e-05, "loss": 0.30548095703125, "step": 5015 }, { "epoch": 0.33905637420575907, "grad_norm": 1.6615304946899414, "learning_rate": 2.3223838899472923e-05, "loss": 0.2917938232421875, "step": 5016 }, { "epoch": 0.33912396917669324, "grad_norm": 1.1191768646240234, "learning_rate": 2.322107926844572e-05, "loss": 0.1641082763671875, "step": 5017 }, { "epoch": 0.3391915641476274, "grad_norm": 1.6363235712051392, "learning_rate": 2.321831923963157e-05, "loss": 0.249481201171875, "step": 5018 }, { "epoch": 0.3392591591185616, "grad_norm": 1.3045858144760132, "learning_rate": 2.3215558813164037e-05, "loss": 0.316162109375, "step": 5019 }, { "epoch": 0.33932675408949575, "grad_norm": 1.4797539710998535, "learning_rate": 2.3212797989176667e-05, "loss": 0.16156005859375, "step": 5020 }, { "epoch": 0.3393943490604299, "grad_norm": 1.2600150108337402, "learning_rate": 2.321003676780306e-05, "loss": 0.241455078125, "step": 5021 }, { "epoch": 0.3394619440313641, "grad_norm": 0.8866836428642273, "learning_rate": 2.3207275149176812e-05, "loss": 0.18138504028320312, "step": 5022 }, { "epoch": 0.3395295390022982, "grad_norm": 1.105270504951477, "learning_rate": 2.3204513133431555e-05, "loss": 0.2899169921875, "step": 5023 }, { "epoch": 0.3395971339732324, "grad_norm": 0.7403905987739563, "learning_rate": 2.3201750720700926e-05, "loss": 0.1763763427734375, "step": 5024 }, { "epoch": 0.33966472894416655, "grad_norm": 1.3186166286468506, "learning_rate": 2.3198987911118593e-05, "loss": 0.304962158203125, "step": 5025 }, { "epoch": 0.3397323239151007, "grad_norm": 1.6370978355407715, "learning_rate": 2.3196224704818242e-05, "loss": 0.294189453125, "step": 5026 }, { "epoch": 0.3397999188860349, "grad_norm": 1.781326413154602, "learning_rate": 2.319346110193356e-05, "loss": 0.335968017578125, "step": 5027 }, { "epoch": 0.33986751385696906, "grad_norm": 1.5366636514663696, "learning_rate": 2.319069710259828e-05, "loss": 0.21844482421875, "step": 5028 }, { "epoch": 0.3399351088279032, "grad_norm": 1.226432204246521, "learning_rate": 2.3187932706946135e-05, "loss": 0.2024078369140625, "step": 5029 }, { "epoch": 0.34000270379883735, "grad_norm": 0.8880615234375, "learning_rate": 2.3185167915110882e-05, "loss": 0.198150634765625, "step": 5030 }, { "epoch": 0.3400702987697715, "grad_norm": 1.2426449060440063, "learning_rate": 2.3182402727226306e-05, "loss": 0.252105712890625, "step": 5031 }, { "epoch": 0.3401378937407057, "grad_norm": 1.1604292392730713, "learning_rate": 2.3179637143426204e-05, "loss": 0.17974853515625, "step": 5032 }, { "epoch": 0.34020548871163986, "grad_norm": 2.0287694931030273, "learning_rate": 2.317687116384438e-05, "loss": 0.26336669921875, "step": 5033 }, { "epoch": 0.34027308368257403, "grad_norm": 1.7926292419433594, "learning_rate": 2.3174104788614684e-05, "loss": 0.3262939453125, "step": 5034 }, { "epoch": 0.3403406786535082, "grad_norm": 1.6531699895858765, "learning_rate": 2.317133801787096e-05, "loss": 0.24608612060546875, "step": 5035 }, { "epoch": 0.3404082736244423, "grad_norm": 1.7265517711639404, "learning_rate": 2.316857085174709e-05, "loss": 0.298675537109375, "step": 5036 }, { "epoch": 0.3404758685953765, "grad_norm": 4.431844234466553, "learning_rate": 2.316580329037696e-05, "loss": 0.3257904052734375, "step": 5037 }, { "epoch": 0.34054346356631066, "grad_norm": 3.082228422164917, "learning_rate": 2.3163035333894483e-05, "loss": 0.3103485107421875, "step": 5038 }, { "epoch": 0.3406110585372448, "grad_norm": 1.0972225666046143, "learning_rate": 2.3160266982433593e-05, "loss": 0.230438232421875, "step": 5039 }, { "epoch": 0.340678653508179, "grad_norm": 2.7093214988708496, "learning_rate": 2.3157498236128236e-05, "loss": 0.309600830078125, "step": 5040 }, { "epoch": 0.34074624847911317, "grad_norm": 2.321153163909912, "learning_rate": 2.3154729095112384e-05, "loss": 0.31488037109375, "step": 5041 }, { "epoch": 0.34081384345004734, "grad_norm": 1.1010609865188599, "learning_rate": 2.315195955952003e-05, "loss": 0.27923583984375, "step": 5042 }, { "epoch": 0.34088143842098145, "grad_norm": 1.0246235132217407, "learning_rate": 2.314918962948517e-05, "loss": 0.20635986328125, "step": 5043 }, { "epoch": 0.3409490333919156, "grad_norm": 1.735975980758667, "learning_rate": 2.3146419305141838e-05, "loss": 0.268218994140625, "step": 5044 }, { "epoch": 0.3410166283628498, "grad_norm": 1.9810298681259155, "learning_rate": 2.3143648586624077e-05, "loss": 0.297882080078125, "step": 5045 }, { "epoch": 0.34108422333378396, "grad_norm": 1.7452040910720825, "learning_rate": 2.3140877474065954e-05, "loss": 0.298919677734375, "step": 5046 }, { "epoch": 0.34115181830471814, "grad_norm": 1.7902683019638062, "learning_rate": 2.313810596760155e-05, "loss": 0.34051513671875, "step": 5047 }, { "epoch": 0.3412194132756523, "grad_norm": 0.7685219645500183, "learning_rate": 2.3135334067364972e-05, "loss": 0.17383575439453125, "step": 5048 }, { "epoch": 0.3412870082465865, "grad_norm": 1.8462592363357544, "learning_rate": 2.3132561773490338e-05, "loss": 0.2840576171875, "step": 5049 }, { "epoch": 0.3413546032175206, "grad_norm": 1.1477941274642944, "learning_rate": 2.3129789086111785e-05, "loss": 0.224853515625, "step": 5050 }, { "epoch": 0.34142219818845476, "grad_norm": 1.741380214691162, "learning_rate": 2.3127016005363478e-05, "loss": 0.30511474609375, "step": 5051 }, { "epoch": 0.34148979315938893, "grad_norm": 1.2192302942276, "learning_rate": 2.31242425313796e-05, "loss": 0.264373779296875, "step": 5052 }, { "epoch": 0.3415573881303231, "grad_norm": 1.7020680904388428, "learning_rate": 2.3121468664294345e-05, "loss": 0.2813720703125, "step": 5053 }, { "epoch": 0.3416249831012573, "grad_norm": 1.8274614810943604, "learning_rate": 2.3118694404241927e-05, "loss": 0.281158447265625, "step": 5054 }, { "epoch": 0.34169257807219144, "grad_norm": 1.3740001916885376, "learning_rate": 2.3115919751356583e-05, "loss": 0.267547607421875, "step": 5055 }, { "epoch": 0.3417601730431256, "grad_norm": 0.4584222137928009, "learning_rate": 2.311314470577257e-05, "loss": 0.10162353515625, "step": 5056 }, { "epoch": 0.34182776801405973, "grad_norm": 1.631006121635437, "learning_rate": 2.311036926762416e-05, "loss": 0.33056640625, "step": 5057 }, { "epoch": 0.3418953629849939, "grad_norm": 1.3764128684997559, "learning_rate": 2.3107593437045643e-05, "loss": 0.27734375, "step": 5058 }, { "epoch": 0.34196295795592807, "grad_norm": 1.9244492053985596, "learning_rate": 2.310481721417134e-05, "loss": 0.320526123046875, "step": 5059 }, { "epoch": 0.34203055292686224, "grad_norm": 1.3633743524551392, "learning_rate": 2.3102040599135578e-05, "loss": 0.262359619140625, "step": 5060 }, { "epoch": 0.3420981478977964, "grad_norm": 1.2101860046386719, "learning_rate": 2.3099263592072703e-05, "loss": 0.31585693359375, "step": 5061 }, { "epoch": 0.3421657428687306, "grad_norm": 0.98039710521698, "learning_rate": 2.3096486193117088e-05, "loss": 0.220184326171875, "step": 5062 }, { "epoch": 0.34223333783966475, "grad_norm": 1.8237046003341675, "learning_rate": 2.309370840240312e-05, "loss": 0.178680419921875, "step": 5063 }, { "epoch": 0.34230093281059887, "grad_norm": 1.6142135858535767, "learning_rate": 2.3090930220065202e-05, "loss": 0.2077178955078125, "step": 5064 }, { "epoch": 0.34236852778153304, "grad_norm": 1.181339979171753, "learning_rate": 2.3088151646237766e-05, "loss": 0.220703125, "step": 5065 }, { "epoch": 0.3424361227524672, "grad_norm": 1.3478128910064697, "learning_rate": 2.3085372681055257e-05, "loss": 0.27459716796875, "step": 5066 }, { "epoch": 0.3425037177234014, "grad_norm": 0.8602297902107239, "learning_rate": 2.3082593324652132e-05, "loss": 0.1362152099609375, "step": 5067 }, { "epoch": 0.34257131269433555, "grad_norm": 1.5474557876586914, "learning_rate": 2.307981357716287e-05, "loss": 0.247100830078125, "step": 5068 }, { "epoch": 0.3426389076652697, "grad_norm": 0.8414881825447083, "learning_rate": 2.307703343872199e-05, "loss": 0.1302490234375, "step": 5069 }, { "epoch": 0.3427065026362039, "grad_norm": 1.028010368347168, "learning_rate": 2.3074252909463994e-05, "loss": 0.222076416015625, "step": 5070 }, { "epoch": 0.342774097607138, "grad_norm": 1.7059129476547241, "learning_rate": 2.3071471989523437e-05, "loss": 0.187164306640625, "step": 5071 }, { "epoch": 0.3428416925780722, "grad_norm": 1.5178745985031128, "learning_rate": 2.3068690679034863e-05, "loss": 0.11870574951171875, "step": 5072 }, { "epoch": 0.34290928754900635, "grad_norm": 2.0239663124084473, "learning_rate": 2.3065908978132855e-05, "loss": 0.31640625, "step": 5073 }, { "epoch": 0.3429768825199405, "grad_norm": 1.6539113521575928, "learning_rate": 2.306312688695201e-05, "loss": 0.370452880859375, "step": 5074 }, { "epoch": 0.3430444774908747, "grad_norm": 1.7248936891555786, "learning_rate": 2.3060344405626942e-05, "loss": 0.269622802734375, "step": 5075 }, { "epoch": 0.34311207246180886, "grad_norm": 1.4928226470947266, "learning_rate": 2.3057561534292287e-05, "loss": 0.2132720947265625, "step": 5076 }, { "epoch": 0.34317966743274303, "grad_norm": 0.5794593691825867, "learning_rate": 2.3054778273082695e-05, "loss": 0.1479034423828125, "step": 5077 }, { "epoch": 0.34324726240367714, "grad_norm": 1.592466115951538, "learning_rate": 2.3051994622132838e-05, "loss": 0.215545654296875, "step": 5078 }, { "epoch": 0.3433148573746113, "grad_norm": 1.2168470621109009, "learning_rate": 2.304921058157741e-05, "loss": 0.2388916015625, "step": 5079 }, { "epoch": 0.3433824523455455, "grad_norm": 0.9901182651519775, "learning_rate": 2.304642615155111e-05, "loss": 0.290283203125, "step": 5080 }, { "epoch": 0.34345004731647966, "grad_norm": 1.7415786981582642, "learning_rate": 2.3043641332188673e-05, "loss": 0.313232421875, "step": 5081 }, { "epoch": 0.3435176422874138, "grad_norm": 1.4291939735412598, "learning_rate": 2.3040856123624846e-05, "loss": 0.277252197265625, "step": 5082 }, { "epoch": 0.343585237258348, "grad_norm": 1.119978666305542, "learning_rate": 2.30380705259944e-05, "loss": 0.29278564453125, "step": 5083 }, { "epoch": 0.34365283222928217, "grad_norm": 2.4498095512390137, "learning_rate": 2.3035284539432108e-05, "loss": 0.272369384765625, "step": 5084 }, { "epoch": 0.3437204272002163, "grad_norm": 1.4487801790237427, "learning_rate": 2.3032498164072777e-05, "loss": 0.25921630859375, "step": 5085 }, { "epoch": 0.34378802217115045, "grad_norm": 1.3543187379837036, "learning_rate": 2.3029711400051236e-05, "loss": 0.2877197265625, "step": 5086 }, { "epoch": 0.3438556171420846, "grad_norm": 1.5714802742004395, "learning_rate": 2.3026924247502323e-05, "loss": 0.3016357421875, "step": 5087 }, { "epoch": 0.3439232121130188, "grad_norm": 1.4739487171173096, "learning_rate": 2.3024136706560892e-05, "loss": 0.164459228515625, "step": 5088 }, { "epoch": 0.34399080708395297, "grad_norm": 1.17457115650177, "learning_rate": 2.3021348777361828e-05, "loss": 0.24249267578125, "step": 5089 }, { "epoch": 0.34405840205488714, "grad_norm": 0.5429266095161438, "learning_rate": 2.301856046004002e-05, "loss": 0.123199462890625, "step": 5090 }, { "epoch": 0.3441259970258213, "grad_norm": 1.2128345966339111, "learning_rate": 2.30157717547304e-05, "loss": 0.27508544921875, "step": 5091 }, { "epoch": 0.3441935919967554, "grad_norm": 1.0282015800476074, "learning_rate": 2.3012982661567886e-05, "loss": 0.1366729736328125, "step": 5092 }, { "epoch": 0.3442611869676896, "grad_norm": 1.224242091178894, "learning_rate": 2.3010193180687438e-05, "loss": 0.2884368896484375, "step": 5093 }, { "epoch": 0.34432878193862376, "grad_norm": 0.8643313646316528, "learning_rate": 2.3007403312224033e-05, "loss": 0.169525146484375, "step": 5094 }, { "epoch": 0.34439637690955793, "grad_norm": 1.5060484409332275, "learning_rate": 2.3004613056312653e-05, "loss": 0.1837005615234375, "step": 5095 }, { "epoch": 0.3444639718804921, "grad_norm": 0.8392376899719238, "learning_rate": 2.300182241308832e-05, "loss": 0.14471435546875, "step": 5096 }, { "epoch": 0.3445315668514263, "grad_norm": 1.4558312892913818, "learning_rate": 2.2999031382686053e-05, "loss": 0.26776123046875, "step": 5097 }, { "epoch": 0.3445991618223604, "grad_norm": 1.4816243648529053, "learning_rate": 2.2996239965240896e-05, "loss": 0.247100830078125, "step": 5098 }, { "epoch": 0.34466675679329456, "grad_norm": 1.5726609230041504, "learning_rate": 2.299344816088793e-05, "loss": 0.206085205078125, "step": 5099 }, { "epoch": 0.34473435176422873, "grad_norm": 2.558655023574829, "learning_rate": 2.2990655969762228e-05, "loss": 0.298370361328125, "step": 5100 }, { "epoch": 0.3448019467351629, "grad_norm": 1.8730391263961792, "learning_rate": 2.29878633919989e-05, "loss": 0.263824462890625, "step": 5101 }, { "epoch": 0.34486954170609707, "grad_norm": 1.0393577814102173, "learning_rate": 2.2985070427733064e-05, "loss": 0.315673828125, "step": 5102 }, { "epoch": 0.34493713667703124, "grad_norm": 1.81047523021698, "learning_rate": 2.298227707709986e-05, "loss": 0.2969970703125, "step": 5103 }, { "epoch": 0.3450047316479654, "grad_norm": 1.4116790294647217, "learning_rate": 2.2979483340234452e-05, "loss": 0.30560302734375, "step": 5104 }, { "epoch": 0.3450723266188995, "grad_norm": 1.162624478340149, "learning_rate": 2.297668921727201e-05, "loss": 0.27984619140625, "step": 5105 }, { "epoch": 0.3451399215898337, "grad_norm": 0.7284200191497803, "learning_rate": 2.2973894708347742e-05, "loss": 0.155670166015625, "step": 5106 }, { "epoch": 0.34520751656076787, "grad_norm": 2.7647056579589844, "learning_rate": 2.2971099813596855e-05, "loss": 0.403076171875, "step": 5107 }, { "epoch": 0.34527511153170204, "grad_norm": 1.0520316362380981, "learning_rate": 2.2968304533154595e-05, "loss": 0.2227783203125, "step": 5108 }, { "epoch": 0.3453427065026362, "grad_norm": 1.0011693239212036, "learning_rate": 2.29655088671562e-05, "loss": 0.208038330078125, "step": 5109 }, { "epoch": 0.3454103014735704, "grad_norm": 1.1532034873962402, "learning_rate": 2.2962712815736953e-05, "loss": 0.24652099609375, "step": 5110 }, { "epoch": 0.34547789644450455, "grad_norm": 1.9615360498428345, "learning_rate": 2.295991637903214e-05, "loss": 0.231536865234375, "step": 5111 }, { "epoch": 0.34554549141543867, "grad_norm": 2.2836296558380127, "learning_rate": 2.2957119557177073e-05, "loss": 0.180816650390625, "step": 5112 }, { "epoch": 0.34561308638637284, "grad_norm": 1.1394346952438354, "learning_rate": 2.2954322350307075e-05, "loss": 0.306640625, "step": 5113 }, { "epoch": 0.345680681357307, "grad_norm": 0.9592258930206299, "learning_rate": 2.295152475855749e-05, "loss": 0.1523590087890625, "step": 5114 }, { "epoch": 0.3457482763282412, "grad_norm": 1.594428539276123, "learning_rate": 2.2948726782063688e-05, "loss": 0.27056884765625, "step": 5115 }, { "epoch": 0.34581587129917535, "grad_norm": 1.5402770042419434, "learning_rate": 2.2945928420961056e-05, "loss": 0.311431884765625, "step": 5116 }, { "epoch": 0.3458834662701095, "grad_norm": 0.8305497169494629, "learning_rate": 2.294312967538499e-05, "loss": 0.219818115234375, "step": 5117 }, { "epoch": 0.3459510612410437, "grad_norm": 1.5316263437271118, "learning_rate": 2.2940330545470912e-05, "loss": 0.32454681396484375, "step": 5118 }, { "epoch": 0.3460186562119778, "grad_norm": 0.9156041741371155, "learning_rate": 2.2937531031354263e-05, "loss": 0.20501708984375, "step": 5119 }, { "epoch": 0.346086251182912, "grad_norm": 1.2635316848754883, "learning_rate": 2.29347311331705e-05, "loss": 0.261199951171875, "step": 5120 }, { "epoch": 0.34615384615384615, "grad_norm": 1.6484477519989014, "learning_rate": 2.2931930851055095e-05, "loss": 0.3138427734375, "step": 5121 }, { "epoch": 0.3462214411247803, "grad_norm": 1.6171222925186157, "learning_rate": 2.2929130185143553e-05, "loss": 0.225341796875, "step": 5122 }, { "epoch": 0.3462890360957145, "grad_norm": 1.0858025550842285, "learning_rate": 2.2926329135571378e-05, "loss": 0.256561279296875, "step": 5123 }, { "epoch": 0.34635663106664866, "grad_norm": 0.8549124002456665, "learning_rate": 2.292352770247411e-05, "loss": 0.16814422607421875, "step": 5124 }, { "epoch": 0.3464242260375828, "grad_norm": 2.1631531715393066, "learning_rate": 2.2920725885987292e-05, "loss": 0.3203582763671875, "step": 5125 }, { "epoch": 0.34649182100851694, "grad_norm": 1.146797776222229, "learning_rate": 2.29179236862465e-05, "loss": 0.270904541015625, "step": 5126 }, { "epoch": 0.3465594159794511, "grad_norm": 1.1596812009811401, "learning_rate": 2.2915121103387314e-05, "loss": 0.1893310546875, "step": 5127 }, { "epoch": 0.3466270109503853, "grad_norm": 2.177002191543579, "learning_rate": 2.291231813754535e-05, "loss": 0.1597137451171875, "step": 5128 }, { "epoch": 0.34669460592131945, "grad_norm": 1.8594295978546143, "learning_rate": 2.2909514788856224e-05, "loss": 0.230682373046875, "step": 5129 }, { "epoch": 0.3467622008922536, "grad_norm": 1.4508627653121948, "learning_rate": 2.2906711057455592e-05, "loss": 0.2218780517578125, "step": 5130 }, { "epoch": 0.3468297958631878, "grad_norm": 1.3413926362991333, "learning_rate": 2.29039069434791e-05, "loss": 0.217803955078125, "step": 5131 }, { "epoch": 0.34689739083412197, "grad_norm": 1.9677077531814575, "learning_rate": 2.2901102447062437e-05, "loss": 0.34368896484375, "step": 5132 }, { "epoch": 0.3469649858050561, "grad_norm": 1.4578675031661987, "learning_rate": 2.2898297568341305e-05, "loss": 0.1292724609375, "step": 5133 }, { "epoch": 0.34703258077599025, "grad_norm": 1.6648482084274292, "learning_rate": 2.2895492307451414e-05, "loss": 0.213592529296875, "step": 5134 }, { "epoch": 0.3471001757469244, "grad_norm": 2.541161298751831, "learning_rate": 2.2892686664528508e-05, "loss": 0.32098388671875, "step": 5135 }, { "epoch": 0.3471677707178586, "grad_norm": 1.2747108936309814, "learning_rate": 2.2889880639708333e-05, "loss": 0.229736328125, "step": 5136 }, { "epoch": 0.34723536568879276, "grad_norm": 0.8455001711845398, "learning_rate": 2.288707423312667e-05, "loss": 0.21083831787109375, "step": 5137 }, { "epoch": 0.34730296065972693, "grad_norm": 1.370202660560608, "learning_rate": 2.2884267444919302e-05, "loss": 0.1890869140625, "step": 5138 }, { "epoch": 0.3473705556306611, "grad_norm": 2.2337148189544678, "learning_rate": 2.2881460275222043e-05, "loss": 0.284637451171875, "step": 5139 }, { "epoch": 0.3474381506015952, "grad_norm": 1.7188035249710083, "learning_rate": 2.2878652724170727e-05, "loss": 0.30029296875, "step": 5140 }, { "epoch": 0.3475057455725294, "grad_norm": 1.405010461807251, "learning_rate": 2.287584479190119e-05, "loss": 0.3355712890625, "step": 5141 }, { "epoch": 0.34757334054346356, "grad_norm": 0.5810195803642273, "learning_rate": 2.2873036478549307e-05, "loss": 0.1107940673828125, "step": 5142 }, { "epoch": 0.34764093551439773, "grad_norm": 0.70528244972229, "learning_rate": 2.2870227784250954e-05, "loss": 0.11126708984375, "step": 5143 }, { "epoch": 0.3477085304853319, "grad_norm": 1.31449556350708, "learning_rate": 2.2867418709142035e-05, "loss": 0.30560302734375, "step": 5144 }, { "epoch": 0.34777612545626607, "grad_norm": 1.2113145589828491, "learning_rate": 2.2864609253358477e-05, "loss": 0.233642578125, "step": 5145 }, { "epoch": 0.34784372042720024, "grad_norm": 1.0956677198410034, "learning_rate": 2.286179941703621e-05, "loss": 0.2440185546875, "step": 5146 }, { "epoch": 0.34791131539813436, "grad_norm": 1.52614164352417, "learning_rate": 2.28589892003112e-05, "loss": 0.3511962890625, "step": 5147 }, { "epoch": 0.34797891036906853, "grad_norm": 1.184507131576538, "learning_rate": 2.2856178603319413e-05, "loss": 0.203125, "step": 5148 }, { "epoch": 0.3480465053400027, "grad_norm": 0.9163497090339661, "learning_rate": 2.2853367626196845e-05, "loss": 0.221649169921875, "step": 5149 }, { "epoch": 0.34811410031093687, "grad_norm": 0.6054525971412659, "learning_rate": 2.2850556269079522e-05, "loss": 0.0874786376953125, "step": 5150 }, { "epoch": 0.34818169528187104, "grad_norm": 1.1724480390548706, "learning_rate": 2.284774453210346e-05, "loss": 0.206695556640625, "step": 5151 }, { "epoch": 0.3482492902528052, "grad_norm": 0.7841187119483948, "learning_rate": 2.2844932415404706e-05, "loss": 0.1439056396484375, "step": 5152 }, { "epoch": 0.3483168852237394, "grad_norm": 1.1639374494552612, "learning_rate": 2.2842119919119342e-05, "loss": 0.276123046875, "step": 5153 }, { "epoch": 0.3483844801946735, "grad_norm": 0.6780982613563538, "learning_rate": 2.2839307043383444e-05, "loss": 0.1385650634765625, "step": 5154 }, { "epoch": 0.34845207516560767, "grad_norm": 1.798595905303955, "learning_rate": 2.2836493788333118e-05, "loss": 0.302947998046875, "step": 5155 }, { "epoch": 0.34851967013654184, "grad_norm": 1.7493067979812622, "learning_rate": 2.283368015410449e-05, "loss": 0.24078369140625, "step": 5156 }, { "epoch": 0.348587265107476, "grad_norm": 0.877576470375061, "learning_rate": 2.28308661408337e-05, "loss": 0.20098876953125, "step": 5157 }, { "epoch": 0.3486548600784102, "grad_norm": 1.4365713596343994, "learning_rate": 2.2828051748656903e-05, "loss": 0.313079833984375, "step": 5158 }, { "epoch": 0.34872245504934435, "grad_norm": 1.8607397079467773, "learning_rate": 2.2825236977710288e-05, "loss": 0.36285400390625, "step": 5159 }, { "epoch": 0.34879005002027846, "grad_norm": 1.3925474882125854, "learning_rate": 2.282242182813004e-05, "loss": 0.258544921875, "step": 5160 }, { "epoch": 0.34885764499121263, "grad_norm": 1.7030514478683472, "learning_rate": 2.2819606300052372e-05, "loss": 0.243072509765625, "step": 5161 }, { "epoch": 0.3489252399621468, "grad_norm": 1.5923396348953247, "learning_rate": 2.2816790393613525e-05, "loss": 0.2811279296875, "step": 5162 }, { "epoch": 0.348992834933081, "grad_norm": 1.5514366626739502, "learning_rate": 2.281397410894975e-05, "loss": 0.233642578125, "step": 5163 }, { "epoch": 0.34906042990401515, "grad_norm": 2.2240891456604004, "learning_rate": 2.2811157446197307e-05, "loss": 0.33795166015625, "step": 5164 }, { "epoch": 0.3491280248749493, "grad_norm": 1.0409584045410156, "learning_rate": 2.2808340405492486e-05, "loss": 0.1764678955078125, "step": 5165 }, { "epoch": 0.3491956198458835, "grad_norm": 0.591660737991333, "learning_rate": 2.2805522986971607e-05, "loss": 0.152587890625, "step": 5166 }, { "epoch": 0.3492632148168176, "grad_norm": 2.0580215454101562, "learning_rate": 2.2802705190770978e-05, "loss": 0.33349609375, "step": 5167 }, { "epoch": 0.3493308097877518, "grad_norm": 1.58573579788208, "learning_rate": 2.279988701702695e-05, "loss": 0.286529541015625, "step": 5168 }, { "epoch": 0.34939840475868594, "grad_norm": 1.4868816137313843, "learning_rate": 2.2797068465875878e-05, "loss": 0.33477783203125, "step": 5169 }, { "epoch": 0.3494659997296201, "grad_norm": 1.3400973081588745, "learning_rate": 2.279424953745415e-05, "loss": 0.1795806884765625, "step": 5170 }, { "epoch": 0.3495335947005543, "grad_norm": 1.1624009609222412, "learning_rate": 2.279143023189815e-05, "loss": 0.280792236328125, "step": 5171 }, { "epoch": 0.34960118967148845, "grad_norm": 1.5925441980361938, "learning_rate": 2.2788610549344307e-05, "loss": 0.303070068359375, "step": 5172 }, { "epoch": 0.3496687846424226, "grad_norm": 1.162558674812317, "learning_rate": 2.2785790489929045e-05, "loss": 0.31219482421875, "step": 5173 }, { "epoch": 0.34973637961335674, "grad_norm": 0.8465785384178162, "learning_rate": 2.278297005378882e-05, "loss": 0.219024658203125, "step": 5174 }, { "epoch": 0.3498039745842909, "grad_norm": 1.3381412029266357, "learning_rate": 2.2780149241060108e-05, "loss": 0.258636474609375, "step": 5175 }, { "epoch": 0.3498715695552251, "grad_norm": 1.1746394634246826, "learning_rate": 2.2777328051879385e-05, "loss": 0.2432861328125, "step": 5176 }, { "epoch": 0.34993916452615925, "grad_norm": 1.302897334098816, "learning_rate": 2.2774506486383164e-05, "loss": 0.222259521484375, "step": 5177 }, { "epoch": 0.3500067594970934, "grad_norm": 0.7643082737922668, "learning_rate": 2.277168454470797e-05, "loss": 0.156280517578125, "step": 5178 }, { "epoch": 0.3500743544680276, "grad_norm": 0.9649413228034973, "learning_rate": 2.276886222699035e-05, "loss": 0.2320556640625, "step": 5179 }, { "epoch": 0.35014194943896176, "grad_norm": 1.5746837854385376, "learning_rate": 2.2766039533366857e-05, "loss": 0.2374267578125, "step": 5180 }, { "epoch": 0.3502095444098959, "grad_norm": 1.8796666860580444, "learning_rate": 2.2763216463974077e-05, "loss": 0.237213134765625, "step": 5181 }, { "epoch": 0.35027713938083005, "grad_norm": 1.0586210489273071, "learning_rate": 2.2760393018948605e-05, "loss": 0.26165771484375, "step": 5182 }, { "epoch": 0.3503447343517642, "grad_norm": 0.9178964495658875, "learning_rate": 2.2757569198427065e-05, "loss": 0.1822662353515625, "step": 5183 }, { "epoch": 0.3504123293226984, "grad_norm": 1.1970981359481812, "learning_rate": 2.2754745002546073e-05, "loss": 0.28076171875, "step": 5184 }, { "epoch": 0.35047992429363256, "grad_norm": 1.3627129793167114, "learning_rate": 2.2751920431442293e-05, "loss": 0.291229248046875, "step": 5185 }, { "epoch": 0.35054751926456673, "grad_norm": 0.9002109169960022, "learning_rate": 2.2749095485252397e-05, "loss": 0.181549072265625, "step": 5186 }, { "epoch": 0.3506151142355009, "grad_norm": 1.2124477624893188, "learning_rate": 2.274627016411307e-05, "loss": 0.1945953369140625, "step": 5187 }, { "epoch": 0.350682709206435, "grad_norm": 0.9528355002403259, "learning_rate": 2.2743444468161016e-05, "loss": 0.221221923828125, "step": 5188 }, { "epoch": 0.3507503041773692, "grad_norm": 1.224505066871643, "learning_rate": 2.274061839753297e-05, "loss": 0.291717529296875, "step": 5189 }, { "epoch": 0.35081789914830336, "grad_norm": 1.0689449310302734, "learning_rate": 2.273779195236566e-05, "loss": 0.2850341796875, "step": 5190 }, { "epoch": 0.35088549411923753, "grad_norm": 1.2067722082138062, "learning_rate": 2.2734965132795856e-05, "loss": 0.274322509765625, "step": 5191 }, { "epoch": 0.3509530890901717, "grad_norm": 0.8394047617912292, "learning_rate": 2.2732137938960335e-05, "loss": 0.1815948486328125, "step": 5192 }, { "epoch": 0.35102068406110587, "grad_norm": 1.854333758354187, "learning_rate": 2.27293103709959e-05, "loss": 0.1819610595703125, "step": 5193 }, { "epoch": 0.35108827903204004, "grad_norm": 2.509110689163208, "learning_rate": 2.272648242903936e-05, "loss": 0.3687744140625, "step": 5194 }, { "epoch": 0.35115587400297416, "grad_norm": 1.1703635454177856, "learning_rate": 2.2723654113227544e-05, "loss": 0.2047576904296875, "step": 5195 }, { "epoch": 0.3512234689739083, "grad_norm": 1.6353808641433716, "learning_rate": 2.2720825423697315e-05, "loss": 0.361328125, "step": 5196 }, { "epoch": 0.3512910639448425, "grad_norm": 1.5498603582382202, "learning_rate": 2.2717996360585535e-05, "loss": 0.235504150390625, "step": 5197 }, { "epoch": 0.35135865891577667, "grad_norm": 0.565880298614502, "learning_rate": 2.271516692402909e-05, "loss": 0.10284423828125, "step": 5198 }, { "epoch": 0.35142625388671084, "grad_norm": 1.1747485399246216, "learning_rate": 2.271233711416489e-05, "loss": 0.202545166015625, "step": 5199 }, { "epoch": 0.351493848857645, "grad_norm": 1.6415865421295166, "learning_rate": 2.2709506931129858e-05, "loss": 0.26409912109375, "step": 5200 }, { "epoch": 0.3515614438285792, "grad_norm": 1.5217045545578003, "learning_rate": 2.2706676375060936e-05, "loss": 0.2796630859375, "step": 5201 }, { "epoch": 0.3516290387995133, "grad_norm": 1.242339849472046, "learning_rate": 2.2703845446095083e-05, "loss": 0.1622467041015625, "step": 5202 }, { "epoch": 0.35169663377044746, "grad_norm": 1.2111009359359741, "learning_rate": 2.270101414436928e-05, "loss": 0.37054443359375, "step": 5203 }, { "epoch": 0.35176422874138163, "grad_norm": 1.9244621992111206, "learning_rate": 2.2698182470020527e-05, "loss": 0.253875732421875, "step": 5204 }, { "epoch": 0.3518318237123158, "grad_norm": 1.2967126369476318, "learning_rate": 2.2695350423185822e-05, "loss": 0.318695068359375, "step": 5205 }, { "epoch": 0.35189941868325, "grad_norm": 1.374891757965088, "learning_rate": 2.269251800400221e-05, "loss": 0.26800537109375, "step": 5206 }, { "epoch": 0.35196701365418415, "grad_norm": 0.8885253667831421, "learning_rate": 2.2689685212606733e-05, "loss": 0.1929168701171875, "step": 5207 }, { "epoch": 0.3520346086251183, "grad_norm": 1.0050456523895264, "learning_rate": 2.2686852049136473e-05, "loss": 0.24981689453125, "step": 5208 }, { "epoch": 0.35210220359605243, "grad_norm": 1.2740464210510254, "learning_rate": 2.26840185137285e-05, "loss": 0.246826171875, "step": 5209 }, { "epoch": 0.3521697985669866, "grad_norm": 1.1506129503250122, "learning_rate": 2.268118460651993e-05, "loss": 0.26458740234375, "step": 5210 }, { "epoch": 0.3522373935379208, "grad_norm": 1.4439661502838135, "learning_rate": 2.267835032764787e-05, "loss": 0.27288818359375, "step": 5211 }, { "epoch": 0.35230498850885494, "grad_norm": 0.9402430057525635, "learning_rate": 2.267551567724948e-05, "loss": 0.226715087890625, "step": 5212 }, { "epoch": 0.3523725834797891, "grad_norm": 1.5898644924163818, "learning_rate": 2.2672680655461904e-05, "loss": 0.281890869140625, "step": 5213 }, { "epoch": 0.3524401784507233, "grad_norm": 2.425222158432007, "learning_rate": 2.2669845262422324e-05, "loss": 0.2363433837890625, "step": 5214 }, { "epoch": 0.35250777342165746, "grad_norm": 1.671258568763733, "learning_rate": 2.2667009498267927e-05, "loss": 0.2148895263671875, "step": 5215 }, { "epoch": 0.35257536839259157, "grad_norm": 0.9027411341667175, "learning_rate": 2.2664173363135935e-05, "loss": 0.2118072509765625, "step": 5216 }, { "epoch": 0.35264296336352574, "grad_norm": 0.776627779006958, "learning_rate": 2.2661336857163574e-05, "loss": 0.140625, "step": 5217 }, { "epoch": 0.3527105583344599, "grad_norm": 1.2718698978424072, "learning_rate": 2.2658499980488088e-05, "loss": 0.2718505859375, "step": 5218 }, { "epoch": 0.3527781533053941, "grad_norm": 3.081697702407837, "learning_rate": 2.2655662733246746e-05, "loss": 0.367156982421875, "step": 5219 }, { "epoch": 0.35284574827632825, "grad_norm": 1.3122670650482178, "learning_rate": 2.265282511557683e-05, "loss": 0.250244140625, "step": 5220 }, { "epoch": 0.3529133432472624, "grad_norm": 1.5787512063980103, "learning_rate": 2.264998712761564e-05, "loss": 0.27978515625, "step": 5221 }, { "epoch": 0.3529809382181966, "grad_norm": 1.1856293678283691, "learning_rate": 2.26471487695005e-05, "loss": 0.3436279296875, "step": 5222 }, { "epoch": 0.3530485331891307, "grad_norm": 0.8861820697784424, "learning_rate": 2.2644310041368744e-05, "loss": 0.184722900390625, "step": 5223 }, { "epoch": 0.3531161281600649, "grad_norm": 1.4750264883041382, "learning_rate": 2.264147094335773e-05, "loss": 0.233428955078125, "step": 5224 }, { "epoch": 0.35318372313099905, "grad_norm": 1.9109033346176147, "learning_rate": 2.263863147560483e-05, "loss": 0.308502197265625, "step": 5225 }, { "epoch": 0.3532513181019332, "grad_norm": 1.443045735359192, "learning_rate": 2.2635791638247437e-05, "loss": 0.228363037109375, "step": 5226 }, { "epoch": 0.3533189130728674, "grad_norm": 1.0325191020965576, "learning_rate": 2.2632951431422958e-05, "loss": 0.175750732421875, "step": 5227 }, { "epoch": 0.35338650804380156, "grad_norm": 0.9549221396446228, "learning_rate": 2.2630110855268817e-05, "loss": 0.14776611328125, "step": 5228 }, { "epoch": 0.3534541030147357, "grad_norm": 1.439482569694519, "learning_rate": 2.2627269909922465e-05, "loss": 0.248992919921875, "step": 5229 }, { "epoch": 0.35352169798566985, "grad_norm": 1.458413004875183, "learning_rate": 2.262442859552136e-05, "loss": 0.31903076171875, "step": 5230 }, { "epoch": 0.353589292956604, "grad_norm": 1.1972631216049194, "learning_rate": 2.262158691220298e-05, "loss": 0.231964111328125, "step": 5231 }, { "epoch": 0.3536568879275382, "grad_norm": 1.0277642011642456, "learning_rate": 2.261874486010483e-05, "loss": 0.157928466796875, "step": 5232 }, { "epoch": 0.35372448289847236, "grad_norm": 1.3763329982757568, "learning_rate": 2.2615902439364423e-05, "loss": 0.252197265625, "step": 5233 }, { "epoch": 0.35379207786940653, "grad_norm": 1.0787110328674316, "learning_rate": 2.2613059650119295e-05, "loss": 0.2203369140625, "step": 5234 }, { "epoch": 0.3538596728403407, "grad_norm": 1.0032271146774292, "learning_rate": 2.2610216492506995e-05, "loss": 0.17499542236328125, "step": 5235 }, { "epoch": 0.3539272678112748, "grad_norm": 1.599506139755249, "learning_rate": 2.2607372966665092e-05, "loss": 0.321136474609375, "step": 5236 }, { "epoch": 0.353994862782209, "grad_norm": 1.5076658725738525, "learning_rate": 2.2604529072731175e-05, "loss": 0.189788818359375, "step": 5237 }, { "epoch": 0.35406245775314316, "grad_norm": 0.8112252354621887, "learning_rate": 2.260168481084285e-05, "loss": 0.256317138671875, "step": 5238 }, { "epoch": 0.3541300527240773, "grad_norm": 1.108383297920227, "learning_rate": 2.2598840181137746e-05, "loss": 0.2484130859375, "step": 5239 }, { "epoch": 0.3541976476950115, "grad_norm": 1.9588981866836548, "learning_rate": 2.259599518375349e-05, "loss": 0.2446746826171875, "step": 5240 }, { "epoch": 0.35426524266594567, "grad_norm": 1.165114402770996, "learning_rate": 2.2593149818827746e-05, "loss": 0.17645263671875, "step": 5241 }, { "epoch": 0.35433283763687984, "grad_norm": 2.0993001461029053, "learning_rate": 2.25903040864982e-05, "loss": 0.265869140625, "step": 5242 }, { "epoch": 0.35440043260781395, "grad_norm": 2.0014259815216064, "learning_rate": 2.2587457986902535e-05, "loss": 0.318817138671875, "step": 5243 }, { "epoch": 0.3544680275787481, "grad_norm": 1.720720887184143, "learning_rate": 2.2584611520178465e-05, "loss": 0.37420654296875, "step": 5244 }, { "epoch": 0.3545356225496823, "grad_norm": 0.9151951670646667, "learning_rate": 2.2581764686463723e-05, "loss": 0.1954345703125, "step": 5245 }, { "epoch": 0.35460321752061646, "grad_norm": 1.4056265354156494, "learning_rate": 2.2578917485896057e-05, "loss": 0.21942138671875, "step": 5246 }, { "epoch": 0.35467081249155064, "grad_norm": 2.091222047805786, "learning_rate": 2.2576069918613226e-05, "loss": 0.27532958984375, "step": 5247 }, { "epoch": 0.3547384074624848, "grad_norm": 1.5360387563705444, "learning_rate": 2.257322198475302e-05, "loss": 0.1893768310546875, "step": 5248 }, { "epoch": 0.354806002433419, "grad_norm": 1.7453880310058594, "learning_rate": 2.2570373684453233e-05, "loss": 0.305755615234375, "step": 5249 }, { "epoch": 0.3548735974043531, "grad_norm": 1.189034342765808, "learning_rate": 2.2567525017851692e-05, "loss": 0.262664794921875, "step": 5250 }, { "epoch": 0.35494119237528726, "grad_norm": 1.6452065706253052, "learning_rate": 2.256467598508623e-05, "loss": 0.2593994140625, "step": 5251 }, { "epoch": 0.35500878734622143, "grad_norm": 0.4583859145641327, "learning_rate": 2.2561826586294697e-05, "loss": 0.084075927734375, "step": 5252 }, { "epoch": 0.3550763823171556, "grad_norm": 2.5022366046905518, "learning_rate": 2.255897682161497e-05, "loss": 0.292938232421875, "step": 5253 }, { "epoch": 0.3551439772880898, "grad_norm": 1.483505368232727, "learning_rate": 2.255612669118493e-05, "loss": 0.2686614990234375, "step": 5254 }, { "epoch": 0.35521157225902394, "grad_norm": 1.6033920049667358, "learning_rate": 2.2553276195142494e-05, "loss": 0.1605072021484375, "step": 5255 }, { "epoch": 0.3552791672299581, "grad_norm": 1.1285957098007202, "learning_rate": 2.2550425333625582e-05, "loss": 0.2889404296875, "step": 5256 }, { "epoch": 0.35534676220089223, "grad_norm": 1.4827401638031006, "learning_rate": 2.2547574106772135e-05, "loss": 0.262786865234375, "step": 5257 }, { "epoch": 0.3554143571718264, "grad_norm": 1.0970964431762695, "learning_rate": 2.2544722514720114e-05, "loss": 0.228424072265625, "step": 5258 }, { "epoch": 0.35548195214276057, "grad_norm": 0.8513506054878235, "learning_rate": 2.25418705576075e-05, "loss": 0.178680419921875, "step": 5259 }, { "epoch": 0.35554954711369474, "grad_norm": 0.7849383354187012, "learning_rate": 2.253901823557229e-05, "loss": 0.230804443359375, "step": 5260 }, { "epoch": 0.3556171420846289, "grad_norm": 1.2442429065704346, "learning_rate": 2.2536165548752484e-05, "loss": 0.195404052734375, "step": 5261 }, { "epoch": 0.3556847370555631, "grad_norm": 1.1340148448944092, "learning_rate": 2.2533312497286124e-05, "loss": 0.22552490234375, "step": 5262 }, { "epoch": 0.35575233202649725, "grad_norm": 1.0418925285339355, "learning_rate": 2.253045908131126e-05, "loss": 0.27947998046875, "step": 5263 }, { "epoch": 0.35581992699743137, "grad_norm": 1.240268349647522, "learning_rate": 2.252760530096596e-05, "loss": 0.2586669921875, "step": 5264 }, { "epoch": 0.35588752196836554, "grad_norm": 0.5444379448890686, "learning_rate": 2.2524751156388287e-05, "loss": 0.11426544189453125, "step": 5265 }, { "epoch": 0.3559551169392997, "grad_norm": 2.1319992542266846, "learning_rate": 2.252189664771637e-05, "loss": 0.300048828125, "step": 5266 }, { "epoch": 0.3560227119102339, "grad_norm": 1.4289653301239014, "learning_rate": 2.2519041775088308e-05, "loss": 0.2213134765625, "step": 5267 }, { "epoch": 0.35609030688116805, "grad_norm": 0.8858327865600586, "learning_rate": 2.2516186538642246e-05, "loss": 0.1534881591796875, "step": 5268 }, { "epoch": 0.3561579018521022, "grad_norm": 1.0957080125808716, "learning_rate": 2.2513330938516336e-05, "loss": 0.17649078369140625, "step": 5269 }, { "epoch": 0.3562254968230364, "grad_norm": 0.7711097002029419, "learning_rate": 2.251047497484875e-05, "loss": 0.12908172607421875, "step": 5270 }, { "epoch": 0.3562930917939705, "grad_norm": 1.7549726963043213, "learning_rate": 2.250761864777768e-05, "loss": 0.310211181640625, "step": 5271 }, { "epoch": 0.3563606867649047, "grad_norm": 1.4001387357711792, "learning_rate": 2.250476195744133e-05, "loss": 0.33935546875, "step": 5272 }, { "epoch": 0.35642828173583885, "grad_norm": 1.1349481344223022, "learning_rate": 2.2501904903977925e-05, "loss": 0.281280517578125, "step": 5273 }, { "epoch": 0.356495876706773, "grad_norm": 1.090407133102417, "learning_rate": 2.24990474875257e-05, "loss": 0.2393035888671875, "step": 5274 }, { "epoch": 0.3565634716777072, "grad_norm": 0.8942000865936279, "learning_rate": 2.2496189708222933e-05, "loss": 0.238128662109375, "step": 5275 }, { "epoch": 0.35663106664864136, "grad_norm": 1.336320400238037, "learning_rate": 2.2493331566207883e-05, "loss": 0.18375396728515625, "step": 5276 }, { "epoch": 0.35669866161957553, "grad_norm": 1.6236622333526611, "learning_rate": 2.2490473061618852e-05, "loss": 0.28363037109375, "step": 5277 }, { "epoch": 0.35676625659050965, "grad_norm": 1.8965840339660645, "learning_rate": 2.2487614194594154e-05, "loss": 0.23712158203125, "step": 5278 }, { "epoch": 0.3568338515614438, "grad_norm": 1.449453353881836, "learning_rate": 2.2484754965272116e-05, "loss": 0.269378662109375, "step": 5279 }, { "epoch": 0.356901446532378, "grad_norm": 0.786971390247345, "learning_rate": 2.248189537379108e-05, "loss": 0.17254638671875, "step": 5280 }, { "epoch": 0.35696904150331216, "grad_norm": 1.2138104438781738, "learning_rate": 2.2479035420289426e-05, "loss": 0.254119873046875, "step": 5281 }, { "epoch": 0.3570366364742463, "grad_norm": 0.9584962725639343, "learning_rate": 2.2476175104905517e-05, "loss": 0.215240478515625, "step": 5282 }, { "epoch": 0.3571042314451805, "grad_norm": 1.221838355064392, "learning_rate": 2.2473314427777766e-05, "loss": 0.243438720703125, "step": 5283 }, { "epoch": 0.35717182641611467, "grad_norm": 0.5858154296875, "learning_rate": 2.247045338904459e-05, "loss": 0.119232177734375, "step": 5284 }, { "epoch": 0.3572394213870488, "grad_norm": 1.9036072492599487, "learning_rate": 2.2467591988844416e-05, "loss": 0.2116241455078125, "step": 5285 }, { "epoch": 0.35730701635798295, "grad_norm": 1.120174527168274, "learning_rate": 2.24647302273157e-05, "loss": 0.11749267578125, "step": 5286 }, { "epoch": 0.3573746113289171, "grad_norm": 1.2418242692947388, "learning_rate": 2.246186810459692e-05, "loss": 0.32818603515625, "step": 5287 }, { "epoch": 0.3574422062998513, "grad_norm": 1.0170155763626099, "learning_rate": 2.2459005620826554e-05, "loss": 0.248138427734375, "step": 5288 }, { "epoch": 0.35750980127078547, "grad_norm": 1.1934007406234741, "learning_rate": 2.2456142776143108e-05, "loss": 0.275299072265625, "step": 5289 }, { "epoch": 0.35757739624171964, "grad_norm": 1.3074299097061157, "learning_rate": 2.24532795706851e-05, "loss": 0.32098388671875, "step": 5290 }, { "epoch": 0.35764499121265375, "grad_norm": 1.218510627746582, "learning_rate": 2.2450416004591083e-05, "loss": 0.2138824462890625, "step": 5291 }, { "epoch": 0.3577125861835879, "grad_norm": 1.297330617904663, "learning_rate": 2.2447552077999602e-05, "loss": 0.2698974609375, "step": 5292 }, { "epoch": 0.3577801811545221, "grad_norm": 0.8523848056793213, "learning_rate": 2.2444687791049236e-05, "loss": 0.218994140625, "step": 5293 }, { "epoch": 0.35784777612545626, "grad_norm": 1.7103843688964844, "learning_rate": 2.2441823143878577e-05, "loss": 0.2918701171875, "step": 5294 }, { "epoch": 0.35791537109639043, "grad_norm": 1.2505584955215454, "learning_rate": 2.2438958136626233e-05, "loss": 0.205108642578125, "step": 5295 }, { "epoch": 0.3579829660673246, "grad_norm": 1.2901232242584229, "learning_rate": 2.2436092769430836e-05, "loss": 0.214080810546875, "step": 5296 }, { "epoch": 0.3580505610382588, "grad_norm": 1.119590401649475, "learning_rate": 2.2433227042431022e-05, "loss": 0.235107421875, "step": 5297 }, { "epoch": 0.3581181560091929, "grad_norm": 1.7917768955230713, "learning_rate": 2.2430360955765457e-05, "loss": 0.240570068359375, "step": 5298 }, { "epoch": 0.35818575098012706, "grad_norm": 1.5236210823059082, "learning_rate": 2.2427494509572824e-05, "loss": 0.265594482421875, "step": 5299 }, { "epoch": 0.35825334595106123, "grad_norm": 1.353417158126831, "learning_rate": 2.2424627703991813e-05, "loss": 0.30810546875, "step": 5300 }, { "epoch": 0.3583209409219954, "grad_norm": 1.1169652938842773, "learning_rate": 2.242176053916114e-05, "loss": 0.325531005859375, "step": 5301 }, { "epoch": 0.35838853589292957, "grad_norm": 1.8008383512496948, "learning_rate": 2.241889301521954e-05, "loss": 0.30810546875, "step": 5302 }, { "epoch": 0.35845613086386374, "grad_norm": 1.012527585029602, "learning_rate": 2.2416025132305752e-05, "loss": 0.2010650634765625, "step": 5303 }, { "epoch": 0.3585237258347979, "grad_norm": 0.9812011122703552, "learning_rate": 2.2413156890558552e-05, "loss": 0.30364990234375, "step": 5304 }, { "epoch": 0.35859132080573203, "grad_norm": 1.4555803537368774, "learning_rate": 2.2410288290116718e-05, "loss": 0.28912353515625, "step": 5305 }, { "epoch": 0.3586589157766662, "grad_norm": 0.7758145332336426, "learning_rate": 2.2407419331119057e-05, "loss": 0.2387847900390625, "step": 5306 }, { "epoch": 0.35872651074760037, "grad_norm": 0.7639468908309937, "learning_rate": 2.2404550013704375e-05, "loss": 0.2113037109375, "step": 5307 }, { "epoch": 0.35879410571853454, "grad_norm": 1.295425295829773, "learning_rate": 2.240168033801152e-05, "loss": 0.22125244140625, "step": 5308 }, { "epoch": 0.3588617006894687, "grad_norm": 1.5469293594360352, "learning_rate": 2.239881030417934e-05, "loss": 0.3045196533203125, "step": 5309 }, { "epoch": 0.3589292956604029, "grad_norm": 1.1793301105499268, "learning_rate": 2.2395939912346706e-05, "loss": 0.28814697265625, "step": 5310 }, { "epoch": 0.35899689063133705, "grad_norm": 0.8046035766601562, "learning_rate": 2.23930691626525e-05, "loss": 0.1423187255859375, "step": 5311 }, { "epoch": 0.35906448560227117, "grad_norm": 0.7434158325195312, "learning_rate": 2.2390198055235634e-05, "loss": 0.135040283203125, "step": 5312 }, { "epoch": 0.35913208057320534, "grad_norm": 0.987087607383728, "learning_rate": 2.2387326590235027e-05, "loss": 0.19439697265625, "step": 5313 }, { "epoch": 0.3591996755441395, "grad_norm": 1.5203036069869995, "learning_rate": 2.2384454767789616e-05, "loss": 0.33673095703125, "step": 5314 }, { "epoch": 0.3592672705150737, "grad_norm": 1.5712586641311646, "learning_rate": 2.238158258803836e-05, "loss": 0.31719970703125, "step": 5315 }, { "epoch": 0.35933486548600785, "grad_norm": 1.9206451177597046, "learning_rate": 2.2378710051120234e-05, "loss": 0.28179931640625, "step": 5316 }, { "epoch": 0.359402460456942, "grad_norm": 1.0778346061706543, "learning_rate": 2.2375837157174225e-05, "loss": 0.298370361328125, "step": 5317 }, { "epoch": 0.3594700554278762, "grad_norm": 1.116188406944275, "learning_rate": 2.237296390633935e-05, "loss": 0.19025421142578125, "step": 5318 }, { "epoch": 0.3595376503988103, "grad_norm": 0.9587348699569702, "learning_rate": 2.237009029875463e-05, "loss": 0.19171142578125, "step": 5319 }, { "epoch": 0.3596052453697445, "grad_norm": 0.8576705455780029, "learning_rate": 2.2367216334559097e-05, "loss": 0.242431640625, "step": 5320 }, { "epoch": 0.35967284034067865, "grad_norm": 0.9798846244812012, "learning_rate": 2.236434201389183e-05, "loss": 0.2364501953125, "step": 5321 }, { "epoch": 0.3597404353116128, "grad_norm": 1.2397289276123047, "learning_rate": 2.2361467336891897e-05, "loss": 0.21771240234375, "step": 5322 }, { "epoch": 0.359808030282547, "grad_norm": 0.9487736225128174, "learning_rate": 2.2358592303698392e-05, "loss": 0.190338134765625, "step": 5323 }, { "epoch": 0.35987562525348116, "grad_norm": 1.0532069206237793, "learning_rate": 2.235571691445043e-05, "loss": 0.186614990234375, "step": 5324 }, { "epoch": 0.35994322022441533, "grad_norm": 1.399379014968872, "learning_rate": 2.235284116928714e-05, "loss": 0.2265625, "step": 5325 }, { "epoch": 0.36001081519534944, "grad_norm": 1.2697539329528809, "learning_rate": 2.2349965068347667e-05, "loss": 0.21722412109375, "step": 5326 }, { "epoch": 0.3600784101662836, "grad_norm": 0.6649462580680847, "learning_rate": 2.2347088611771178e-05, "loss": 0.11822509765625, "step": 5327 }, { "epoch": 0.3601460051372178, "grad_norm": 0.9233790636062622, "learning_rate": 2.2344211799696842e-05, "loss": 0.2381591796875, "step": 5328 }, { "epoch": 0.36021360010815195, "grad_norm": 0.7977750301361084, "learning_rate": 2.2341334632263873e-05, "loss": 0.1712188720703125, "step": 5329 }, { "epoch": 0.3602811950790861, "grad_norm": 0.7061092257499695, "learning_rate": 2.2338457109611476e-05, "loss": 0.162384033203125, "step": 5330 }, { "epoch": 0.3603487900500203, "grad_norm": 1.0739855766296387, "learning_rate": 2.233557923187889e-05, "loss": 0.191619873046875, "step": 5331 }, { "epoch": 0.36041638502095447, "grad_norm": 1.4547768831253052, "learning_rate": 2.2332700999205354e-05, "loss": 0.2968902587890625, "step": 5332 }, { "epoch": 0.3604839799918886, "grad_norm": 1.0595301389694214, "learning_rate": 2.232982241173015e-05, "loss": 0.254058837890625, "step": 5333 }, { "epoch": 0.36055157496282275, "grad_norm": 1.7024204730987549, "learning_rate": 2.232694346959255e-05, "loss": 0.25665283203125, "step": 5334 }, { "epoch": 0.3606191699337569, "grad_norm": 1.0687739849090576, "learning_rate": 2.2324064172931866e-05, "loss": 0.232391357421875, "step": 5335 }, { "epoch": 0.3606867649046911, "grad_norm": 1.3572235107421875, "learning_rate": 2.2321184521887402e-05, "loss": 0.250885009765625, "step": 5336 }, { "epoch": 0.36075435987562526, "grad_norm": 0.9640324115753174, "learning_rate": 2.2318304516598503e-05, "loss": 0.247283935546875, "step": 5337 }, { "epoch": 0.36082195484655943, "grad_norm": 1.4852629899978638, "learning_rate": 2.231542415720452e-05, "loss": 0.29144287109375, "step": 5338 }, { "epoch": 0.3608895498174936, "grad_norm": 1.1894850730895996, "learning_rate": 2.2312543443844822e-05, "loss": 0.205352783203125, "step": 5339 }, { "epoch": 0.3609571447884277, "grad_norm": 1.1456279754638672, "learning_rate": 2.2309662376658794e-05, "loss": 0.2352294921875, "step": 5340 }, { "epoch": 0.3610247397593619, "grad_norm": 1.1852692365646362, "learning_rate": 2.230678095578584e-05, "loss": 0.28515625, "step": 5341 }, { "epoch": 0.36109233473029606, "grad_norm": 0.9529224634170532, "learning_rate": 2.230389918136539e-05, "loss": 0.18450927734375, "step": 5342 }, { "epoch": 0.36115992970123023, "grad_norm": 1.1575473546981812, "learning_rate": 2.2301017053536872e-05, "loss": 0.25897216796875, "step": 5343 }, { "epoch": 0.3612275246721644, "grad_norm": 0.8164300322532654, "learning_rate": 2.2298134572439745e-05, "loss": 0.205902099609375, "step": 5344 }, { "epoch": 0.3612951196430986, "grad_norm": 1.4526110887527466, "learning_rate": 2.229525173821348e-05, "loss": 0.25921630859375, "step": 5345 }, { "epoch": 0.36136271461403274, "grad_norm": 1.3697632551193237, "learning_rate": 2.2292368550997567e-05, "loss": 0.216461181640625, "step": 5346 }, { "epoch": 0.36143030958496686, "grad_norm": 1.99887216091156, "learning_rate": 2.2289485010931515e-05, "loss": 0.29364013671875, "step": 5347 }, { "epoch": 0.36149790455590103, "grad_norm": 1.2319519519805908, "learning_rate": 2.228660111815484e-05, "loss": 0.311187744140625, "step": 5348 }, { "epoch": 0.3615654995268352, "grad_norm": 0.7934701442718506, "learning_rate": 2.2283716872807088e-05, "loss": 0.0898590087890625, "step": 5349 }, { "epoch": 0.36163309449776937, "grad_norm": 0.9223483800888062, "learning_rate": 2.228083227502782e-05, "loss": 0.2591552734375, "step": 5350 }, { "epoch": 0.36170068946870354, "grad_norm": 1.2381982803344727, "learning_rate": 2.2277947324956612e-05, "loss": 0.235504150390625, "step": 5351 }, { "epoch": 0.3617682844396377, "grad_norm": 1.5579098463058472, "learning_rate": 2.2275062022733045e-05, "loss": 0.30352783203125, "step": 5352 }, { "epoch": 0.3618358794105719, "grad_norm": 1.483966588973999, "learning_rate": 2.227217636849673e-05, "loss": 0.29888916015625, "step": 5353 }, { "epoch": 0.361903474381506, "grad_norm": 1.1800932884216309, "learning_rate": 2.2269290362387308e-05, "loss": 0.26800537109375, "step": 5354 }, { "epoch": 0.36197106935244017, "grad_norm": 1.3702868223190308, "learning_rate": 2.2266404004544405e-05, "loss": 0.2327880859375, "step": 5355 }, { "epoch": 0.36203866432337434, "grad_norm": 1.3919713497161865, "learning_rate": 2.226351729510769e-05, "loss": 0.238616943359375, "step": 5356 }, { "epoch": 0.3621062592943085, "grad_norm": 0.7835471630096436, "learning_rate": 2.2260630234216835e-05, "loss": 0.206146240234375, "step": 5357 }, { "epoch": 0.3621738542652427, "grad_norm": 2.014883518218994, "learning_rate": 2.2257742822011537e-05, "loss": 0.25592041015625, "step": 5358 }, { "epoch": 0.36224144923617685, "grad_norm": 2.2730562686920166, "learning_rate": 2.2254855058631505e-05, "loss": 0.38531494140625, "step": 5359 }, { "epoch": 0.36230904420711096, "grad_norm": 1.3494360446929932, "learning_rate": 2.2251966944216463e-05, "loss": 0.29052734375, "step": 5360 }, { "epoch": 0.36237663917804513, "grad_norm": 2.1837522983551025, "learning_rate": 2.2249078478906164e-05, "loss": 0.26849365234375, "step": 5361 }, { "epoch": 0.3624442341489793, "grad_norm": 1.324488878250122, "learning_rate": 2.2246189662840368e-05, "loss": 0.271087646484375, "step": 5362 }, { "epoch": 0.3625118291199135, "grad_norm": 1.4333148002624512, "learning_rate": 2.2243300496158854e-05, "loss": 0.1550750732421875, "step": 5363 }, { "epoch": 0.36257942409084765, "grad_norm": 2.2946574687957764, "learning_rate": 2.2240410979001418e-05, "loss": 0.26617431640625, "step": 5364 }, { "epoch": 0.3626470190617818, "grad_norm": 1.5568971633911133, "learning_rate": 2.223752111150787e-05, "loss": 0.242034912109375, "step": 5365 }, { "epoch": 0.362714614032716, "grad_norm": 1.4366588592529297, "learning_rate": 2.223463089381803e-05, "loss": 0.293701171875, "step": 5366 }, { "epoch": 0.3627822090036501, "grad_norm": 2.059823989868164, "learning_rate": 2.2231740326071766e-05, "loss": 0.322296142578125, "step": 5367 }, { "epoch": 0.3628498039745843, "grad_norm": 0.9692266583442688, "learning_rate": 2.2228849408408933e-05, "loss": 0.25970458984375, "step": 5368 }, { "epoch": 0.36291739894551844, "grad_norm": 0.8152551651000977, "learning_rate": 2.2225958140969405e-05, "loss": 0.194305419921875, "step": 5369 }, { "epoch": 0.3629849939164526, "grad_norm": 1.8116551637649536, "learning_rate": 2.2223066523893088e-05, "loss": 0.223297119140625, "step": 5370 }, { "epoch": 0.3630525888873868, "grad_norm": 1.2286298274993896, "learning_rate": 2.222017455731989e-05, "loss": 0.2646484375, "step": 5371 }, { "epoch": 0.36312018385832096, "grad_norm": 1.5519044399261475, "learning_rate": 2.221728224138975e-05, "loss": 0.29083251953125, "step": 5372 }, { "epoch": 0.3631877788292551, "grad_norm": 1.4272692203521729, "learning_rate": 2.2214389576242604e-05, "loss": 0.31280517578125, "step": 5373 }, { "epoch": 0.36325537380018924, "grad_norm": 1.9522452354431152, "learning_rate": 2.221149656201843e-05, "loss": 0.2803955078125, "step": 5374 }, { "epoch": 0.3633229687711234, "grad_norm": 1.1867642402648926, "learning_rate": 2.22086031988572e-05, "loss": 0.297943115234375, "step": 5375 }, { "epoch": 0.3633905637420576, "grad_norm": 1.2775835990905762, "learning_rate": 2.220570948689892e-05, "loss": 0.15903472900390625, "step": 5376 }, { "epoch": 0.36345815871299175, "grad_norm": 1.6596252918243408, "learning_rate": 2.2202815426283606e-05, "loss": 0.33367919921875, "step": 5377 }, { "epoch": 0.3635257536839259, "grad_norm": 1.410656213760376, "learning_rate": 2.2199921017151284e-05, "loss": 0.233734130859375, "step": 5378 }, { "epoch": 0.3635933486548601, "grad_norm": 1.717331051826477, "learning_rate": 2.2197026259642004e-05, "loss": 0.30731201171875, "step": 5379 }, { "epoch": 0.36366094362579426, "grad_norm": 1.1436179876327515, "learning_rate": 2.219413115389584e-05, "loss": 0.28509521484375, "step": 5380 }, { "epoch": 0.3637285385967284, "grad_norm": 1.2472403049468994, "learning_rate": 2.2191235700052877e-05, "loss": 0.22979736328125, "step": 5381 }, { "epoch": 0.36379613356766255, "grad_norm": 0.9761523008346558, "learning_rate": 2.21883398982532e-05, "loss": 0.182403564453125, "step": 5382 }, { "epoch": 0.3638637285385967, "grad_norm": 2.112032651901245, "learning_rate": 2.218544374863694e-05, "loss": 0.2107086181640625, "step": 5383 }, { "epoch": 0.3639313235095309, "grad_norm": 1.1185251474380493, "learning_rate": 2.218254725134422e-05, "loss": 0.283660888671875, "step": 5384 }, { "epoch": 0.36399891848046506, "grad_norm": 1.770470380783081, "learning_rate": 2.21796504065152e-05, "loss": 0.269927978515625, "step": 5385 }, { "epoch": 0.36406651345139923, "grad_norm": 1.7174427509307861, "learning_rate": 2.2176753214290048e-05, "loss": 0.2336883544921875, "step": 5386 }, { "epoch": 0.3641341084223334, "grad_norm": 1.1174681186676025, "learning_rate": 2.2173855674808935e-05, "loss": 0.17661285400390625, "step": 5387 }, { "epoch": 0.3642017033932675, "grad_norm": 0.8402306437492371, "learning_rate": 2.2170957788212076e-05, "loss": 0.217437744140625, "step": 5388 }, { "epoch": 0.3642692983642017, "grad_norm": 2.032864809036255, "learning_rate": 2.2168059554639684e-05, "loss": 0.3267822265625, "step": 5389 }, { "epoch": 0.36433689333513586, "grad_norm": 0.7292386293411255, "learning_rate": 2.2165160974231993e-05, "loss": 0.172515869140625, "step": 5390 }, { "epoch": 0.36440448830607003, "grad_norm": 2.3275372982025146, "learning_rate": 2.2162262047129248e-05, "loss": 0.2344970703125, "step": 5391 }, { "epoch": 0.3644720832770042, "grad_norm": 1.5757275819778442, "learning_rate": 2.2159362773471727e-05, "loss": 0.282684326171875, "step": 5392 }, { "epoch": 0.36453967824793837, "grad_norm": 1.205661416053772, "learning_rate": 2.215646315339972e-05, "loss": 0.2354736328125, "step": 5393 }, { "epoch": 0.36460727321887254, "grad_norm": 2.577275514602661, "learning_rate": 2.215356318705351e-05, "loss": 0.40057373046875, "step": 5394 }, { "epoch": 0.36467486818980666, "grad_norm": 0.7160412669181824, "learning_rate": 2.2150662874573428e-05, "loss": 0.131011962890625, "step": 5395 }, { "epoch": 0.3647424631607408, "grad_norm": 1.1796902418136597, "learning_rate": 2.2147762216099807e-05, "loss": 0.231292724609375, "step": 5396 }, { "epoch": 0.364810058131675, "grad_norm": 0.6702514290809631, "learning_rate": 2.2144861211772998e-05, "loss": 0.179473876953125, "step": 5397 }, { "epoch": 0.36487765310260917, "grad_norm": 0.9959484934806824, "learning_rate": 2.214195986173337e-05, "loss": 0.138397216796875, "step": 5398 }, { "epoch": 0.36494524807354334, "grad_norm": 1.3885960578918457, "learning_rate": 2.2139058166121306e-05, "loss": 0.260406494140625, "step": 5399 }, { "epoch": 0.3650128430444775, "grad_norm": 1.16453218460083, "learning_rate": 2.2136156125077218e-05, "loss": 0.29296875, "step": 5400 }, { "epoch": 0.3650804380154117, "grad_norm": 1.9014129638671875, "learning_rate": 2.213325373874151e-05, "loss": 0.34375, "step": 5401 }, { "epoch": 0.3651480329863458, "grad_norm": 1.060581922531128, "learning_rate": 2.213035100725463e-05, "loss": 0.22711181640625, "step": 5402 }, { "epoch": 0.36521562795727996, "grad_norm": 0.8432697057723999, "learning_rate": 2.2127447930757024e-05, "loss": 0.15501785278320312, "step": 5403 }, { "epoch": 0.36528322292821414, "grad_norm": 1.5540300607681274, "learning_rate": 2.212454450938916e-05, "loss": 0.2835693359375, "step": 5404 }, { "epoch": 0.3653508178991483, "grad_norm": 1.409952163696289, "learning_rate": 2.2121640743291528e-05, "loss": 0.19753265380859375, "step": 5405 }, { "epoch": 0.3654184128700825, "grad_norm": 1.3619402647018433, "learning_rate": 2.2118736632604626e-05, "loss": 0.288543701171875, "step": 5406 }, { "epoch": 0.36548600784101665, "grad_norm": 1.1072998046875, "learning_rate": 2.2115832177468974e-05, "loss": 0.2338409423828125, "step": 5407 }, { "epoch": 0.3655536028119508, "grad_norm": 2.685962200164795, "learning_rate": 2.211292737802511e-05, "loss": 0.3818359375, "step": 5408 }, { "epoch": 0.36562119778288493, "grad_norm": 0.9042295813560486, "learning_rate": 2.2110022234413587e-05, "loss": 0.18505859375, "step": 5409 }, { "epoch": 0.3656887927538191, "grad_norm": 1.667245864868164, "learning_rate": 2.2107116746774967e-05, "loss": 0.20391845703125, "step": 5410 }, { "epoch": 0.3657563877247533, "grad_norm": 1.7855472564697266, "learning_rate": 2.210421091524984e-05, "loss": 0.2611083984375, "step": 5411 }, { "epoch": 0.36582398269568744, "grad_norm": 1.7292057275772095, "learning_rate": 2.210130473997881e-05, "loss": 0.33782958984375, "step": 5412 }, { "epoch": 0.3658915776666216, "grad_norm": 1.0554676055908203, "learning_rate": 2.2098398221102494e-05, "loss": 0.2879638671875, "step": 5413 }, { "epoch": 0.3659591726375558, "grad_norm": 1.122771978378296, "learning_rate": 2.209549135876153e-05, "loss": 0.2857666015625, "step": 5414 }, { "epoch": 0.36602676760848996, "grad_norm": 1.2328566312789917, "learning_rate": 2.2092584153096566e-05, "loss": 0.268707275390625, "step": 5415 }, { "epoch": 0.36609436257942407, "grad_norm": 1.7605059146881104, "learning_rate": 2.2089676604248274e-05, "loss": 0.2432861328125, "step": 5416 }, { "epoch": 0.36616195755035824, "grad_norm": 1.444045066833496, "learning_rate": 2.2086768712357334e-05, "loss": 0.320587158203125, "step": 5417 }, { "epoch": 0.3662295525212924, "grad_norm": 1.678083896636963, "learning_rate": 2.2083860477564454e-05, "loss": 0.211395263671875, "step": 5418 }, { "epoch": 0.3662971474922266, "grad_norm": 2.3366217613220215, "learning_rate": 2.208095190001035e-05, "loss": 0.289215087890625, "step": 5419 }, { "epoch": 0.36636474246316075, "grad_norm": 1.090194582939148, "learning_rate": 2.2078042979835753e-05, "loss": 0.10996246337890625, "step": 5420 }, { "epoch": 0.3664323374340949, "grad_norm": 1.639325499534607, "learning_rate": 2.2075133717181426e-05, "loss": 0.241851806640625, "step": 5421 }, { "epoch": 0.36649993240502904, "grad_norm": 1.1330970525741577, "learning_rate": 2.2072224112188126e-05, "loss": 0.238006591796875, "step": 5422 }, { "epoch": 0.3665675273759632, "grad_norm": 1.2384718656539917, "learning_rate": 2.206931416499664e-05, "loss": 0.26654052734375, "step": 5423 }, { "epoch": 0.3666351223468974, "grad_norm": 1.1312544345855713, "learning_rate": 2.206640387574777e-05, "loss": 0.282501220703125, "step": 5424 }, { "epoch": 0.36670271731783155, "grad_norm": 1.3144690990447998, "learning_rate": 2.206349324458234e-05, "loss": 0.32958984375, "step": 5425 }, { "epoch": 0.3667703122887657, "grad_norm": 1.3372646570205688, "learning_rate": 2.206058227164118e-05, "loss": 0.27410888671875, "step": 5426 }, { "epoch": 0.3668379072596999, "grad_norm": 1.2590917348861694, "learning_rate": 2.2057670957065138e-05, "loss": 0.275115966796875, "step": 5427 }, { "epoch": 0.36690550223063406, "grad_norm": 1.0869237184524536, "learning_rate": 2.205475930099508e-05, "loss": 0.28326416015625, "step": 5428 }, { "epoch": 0.3669730972015682, "grad_norm": 0.7117536664009094, "learning_rate": 2.2051847303571902e-05, "loss": 0.1378936767578125, "step": 5429 }, { "epoch": 0.36704069217250235, "grad_norm": 1.6830027103424072, "learning_rate": 2.204893496493649e-05, "loss": 0.3294677734375, "step": 5430 }, { "epoch": 0.3671082871434365, "grad_norm": 1.4437719583511353, "learning_rate": 2.2046022285229772e-05, "loss": 0.264251708984375, "step": 5431 }, { "epoch": 0.3671758821143707, "grad_norm": 1.1226032972335815, "learning_rate": 2.2043109264592676e-05, "loss": 0.248687744140625, "step": 5432 }, { "epoch": 0.36724347708530486, "grad_norm": 1.4154890775680542, "learning_rate": 2.204019590316615e-05, "loss": 0.20050048828125, "step": 5433 }, { "epoch": 0.36731107205623903, "grad_norm": 2.857542037963867, "learning_rate": 2.2037282201091162e-05, "loss": 0.240753173828125, "step": 5434 }, { "epoch": 0.3673786670271732, "grad_norm": 2.190654754638672, "learning_rate": 2.2034368158508704e-05, "loss": 0.27459716796875, "step": 5435 }, { "epoch": 0.3674462619981073, "grad_norm": 1.0459994077682495, "learning_rate": 2.2031453775559764e-05, "loss": 0.2072296142578125, "step": 5436 }, { "epoch": 0.3675138569690415, "grad_norm": 1.2178173065185547, "learning_rate": 2.202853905238536e-05, "loss": 0.31646728515625, "step": 5437 }, { "epoch": 0.36758145193997566, "grad_norm": 1.1738260984420776, "learning_rate": 2.202562398912653e-05, "loss": 0.2474212646484375, "step": 5438 }, { "epoch": 0.3676490469109098, "grad_norm": 1.3881782293319702, "learning_rate": 2.2022708585924318e-05, "loss": 0.255767822265625, "step": 5439 }, { "epoch": 0.367716641881844, "grad_norm": 1.2271149158477783, "learning_rate": 2.2019792842919796e-05, "loss": 0.268463134765625, "step": 5440 }, { "epoch": 0.36778423685277817, "grad_norm": 1.2148200273513794, "learning_rate": 2.2016876760254036e-05, "loss": 0.199554443359375, "step": 5441 }, { "epoch": 0.36785183182371234, "grad_norm": 1.4404536485671997, "learning_rate": 2.2013960338068142e-05, "loss": 0.26324462890625, "step": 5442 }, { "epoch": 0.36791942679464645, "grad_norm": 1.1123536825180054, "learning_rate": 2.201104357650323e-05, "loss": 0.1633758544921875, "step": 5443 }, { "epoch": 0.3679870217655806, "grad_norm": 0.8232789039611816, "learning_rate": 2.200812647570042e-05, "loss": 0.156463623046875, "step": 5444 }, { "epoch": 0.3680546167365148, "grad_norm": 0.5151742100715637, "learning_rate": 2.200520903580087e-05, "loss": 0.13726043701171875, "step": 5445 }, { "epoch": 0.36812221170744897, "grad_norm": 1.9136372804641724, "learning_rate": 2.2002291256945747e-05, "loss": 0.349456787109375, "step": 5446 }, { "epoch": 0.36818980667838314, "grad_norm": 1.5828925371170044, "learning_rate": 2.1999373139276226e-05, "loss": 0.254608154296875, "step": 5447 }, { "epoch": 0.3682574016493173, "grad_norm": 1.6633321046829224, "learning_rate": 2.1996454682933503e-05, "loss": 0.28045654296875, "step": 5448 }, { "epoch": 0.3683249966202515, "grad_norm": 0.8950858116149902, "learning_rate": 2.1993535888058793e-05, "loss": 0.2244110107421875, "step": 5449 }, { "epoch": 0.3683925915911856, "grad_norm": 1.0392022132873535, "learning_rate": 2.199061675479332e-05, "loss": 0.1357879638671875, "step": 5450 }, { "epoch": 0.36846018656211976, "grad_norm": 1.25613534450531, "learning_rate": 2.198769728327834e-05, "loss": 0.22210693359375, "step": 5451 }, { "epoch": 0.36852778153305393, "grad_norm": 1.0026485919952393, "learning_rate": 2.1984777473655105e-05, "loss": 0.21710205078125, "step": 5452 }, { "epoch": 0.3685953765039881, "grad_norm": 2.587850332260132, "learning_rate": 2.19818573260649e-05, "loss": 0.274261474609375, "step": 5453 }, { "epoch": 0.3686629714749223, "grad_norm": 1.6400861740112305, "learning_rate": 2.1978936840649015e-05, "loss": 0.252166748046875, "step": 5454 }, { "epoch": 0.36873056644585644, "grad_norm": 1.3009603023529053, "learning_rate": 2.1976016017548766e-05, "loss": 0.269805908203125, "step": 5455 }, { "epoch": 0.3687981614167906, "grad_norm": 1.5942695140838623, "learning_rate": 2.1973094856905484e-05, "loss": 0.28692626953125, "step": 5456 }, { "epoch": 0.36886575638772473, "grad_norm": 0.7324731349945068, "learning_rate": 2.19701733588605e-05, "loss": 0.150177001953125, "step": 5457 }, { "epoch": 0.3689333513586589, "grad_norm": 1.299211025238037, "learning_rate": 2.1967251523555183e-05, "loss": 0.2376251220703125, "step": 5458 }, { "epoch": 0.36900094632959307, "grad_norm": 1.4521113634109497, "learning_rate": 2.1964329351130907e-05, "loss": 0.1933746337890625, "step": 5459 }, { "epoch": 0.36906854130052724, "grad_norm": 0.9351992607116699, "learning_rate": 2.1961406841729074e-05, "loss": 0.210968017578125, "step": 5460 }, { "epoch": 0.3691361362714614, "grad_norm": 1.2319926023483276, "learning_rate": 2.195848399549108e-05, "loss": 0.240020751953125, "step": 5461 }, { "epoch": 0.3692037312423956, "grad_norm": 1.2057301998138428, "learning_rate": 2.195556081255835e-05, "loss": 0.223846435546875, "step": 5462 }, { "epoch": 0.36927132621332975, "grad_norm": 0.7948645353317261, "learning_rate": 2.1952637293072344e-05, "loss": 0.201873779296875, "step": 5463 }, { "epoch": 0.36933892118426387, "grad_norm": 1.6675517559051514, "learning_rate": 2.1949713437174504e-05, "loss": 0.130645751953125, "step": 5464 }, { "epoch": 0.36940651615519804, "grad_norm": 1.57062828540802, "learning_rate": 2.1946789245006304e-05, "loss": 0.25115966796875, "step": 5465 }, { "epoch": 0.3694741111261322, "grad_norm": 1.013676643371582, "learning_rate": 2.1943864716709237e-05, "loss": 0.223846435546875, "step": 5466 }, { "epoch": 0.3695417060970664, "grad_norm": 1.2708176374435425, "learning_rate": 2.194093985242482e-05, "loss": 0.3209228515625, "step": 5467 }, { "epoch": 0.36960930106800055, "grad_norm": 1.7035223245620728, "learning_rate": 2.1938014652294564e-05, "loss": 0.30230712890625, "step": 5468 }, { "epoch": 0.3696768960389347, "grad_norm": 1.181093454360962, "learning_rate": 2.193508911646001e-05, "loss": 0.2642822265625, "step": 5469 }, { "epoch": 0.3697444910098689, "grad_norm": 1.250805377960205, "learning_rate": 2.1932163245062714e-05, "loss": 0.272705078125, "step": 5470 }, { "epoch": 0.369812085980803, "grad_norm": 1.384339451789856, "learning_rate": 2.1929237038244254e-05, "loss": 0.22998046875, "step": 5471 }, { "epoch": 0.3698796809517372, "grad_norm": 1.5182958841323853, "learning_rate": 2.1926310496146213e-05, "loss": 0.280029296875, "step": 5472 }, { "epoch": 0.36994727592267135, "grad_norm": 2.026085138320923, "learning_rate": 2.1923383618910194e-05, "loss": 0.29296875, "step": 5473 }, { "epoch": 0.3700148708936055, "grad_norm": 1.6153576374053955, "learning_rate": 2.1920456406677822e-05, "loss": 0.1965789794921875, "step": 5474 }, { "epoch": 0.3700824658645397, "grad_norm": 1.6846083402633667, "learning_rate": 2.1917528859590727e-05, "loss": 0.3330078125, "step": 5475 }, { "epoch": 0.37015006083547386, "grad_norm": 1.025289535522461, "learning_rate": 2.191460097779057e-05, "loss": 0.17104339599609375, "step": 5476 }, { "epoch": 0.37021765580640803, "grad_norm": 1.1824084520339966, "learning_rate": 2.191167276141902e-05, "loss": 0.277099609375, "step": 5477 }, { "epoch": 0.37028525077734215, "grad_norm": 1.2861868143081665, "learning_rate": 2.190874421061775e-05, "loss": 0.201507568359375, "step": 5478 }, { "epoch": 0.3703528457482763, "grad_norm": 1.1762624979019165, "learning_rate": 2.190581532552847e-05, "loss": 0.16387939453125, "step": 5479 }, { "epoch": 0.3704204407192105, "grad_norm": 0.8765829205513, "learning_rate": 2.1902886106292906e-05, "loss": 0.23590087890625, "step": 5480 }, { "epoch": 0.37048803569014466, "grad_norm": 1.8060243129730225, "learning_rate": 2.189995655305278e-05, "loss": 0.2108154296875, "step": 5481 }, { "epoch": 0.3705556306610788, "grad_norm": 1.0367863178253174, "learning_rate": 2.189702666594984e-05, "loss": 0.1689605712890625, "step": 5482 }, { "epoch": 0.370623225632013, "grad_norm": 0.5253709554672241, "learning_rate": 2.1894096445125863e-05, "loss": 0.1106109619140625, "step": 5483 }, { "epoch": 0.3706908206029471, "grad_norm": 1.6468615531921387, "learning_rate": 2.1891165890722628e-05, "loss": 0.288330078125, "step": 5484 }, { "epoch": 0.3707584155738813, "grad_norm": 1.0534415245056152, "learning_rate": 2.188823500288193e-05, "loss": 0.229522705078125, "step": 5485 }, { "epoch": 0.37082601054481545, "grad_norm": 1.6489861011505127, "learning_rate": 2.1885303781745586e-05, "loss": 0.2108306884765625, "step": 5486 }, { "epoch": 0.3708936055157496, "grad_norm": 2.0700571537017822, "learning_rate": 2.188237222745542e-05, "loss": 0.34161376953125, "step": 5487 }, { "epoch": 0.3709612004866838, "grad_norm": 1.9585169553756714, "learning_rate": 2.187944034015329e-05, "loss": 0.319244384765625, "step": 5488 }, { "epoch": 0.37102879545761797, "grad_norm": 2.0962743759155273, "learning_rate": 2.1876508119981053e-05, "loss": 0.215179443359375, "step": 5489 }, { "epoch": 0.37109639042855214, "grad_norm": 0.7886365652084351, "learning_rate": 2.1873575567080584e-05, "loss": 0.18359375, "step": 5490 }, { "epoch": 0.37116398539948625, "grad_norm": 0.922701358795166, "learning_rate": 2.1870642681593787e-05, "loss": 0.194915771484375, "step": 5491 }, { "epoch": 0.3712315803704204, "grad_norm": 1.6424329280853271, "learning_rate": 2.1867709463662568e-05, "loss": 0.249237060546875, "step": 5492 }, { "epoch": 0.3712991753413546, "grad_norm": 1.221276879310608, "learning_rate": 2.1864775913428857e-05, "loss": 0.2249755859375, "step": 5493 }, { "epoch": 0.37136677031228876, "grad_norm": 1.0276482105255127, "learning_rate": 2.1861842031034596e-05, "loss": 0.211090087890625, "step": 5494 }, { "epoch": 0.37143436528322293, "grad_norm": 1.1067534685134888, "learning_rate": 2.185890781662174e-05, "loss": 0.284912109375, "step": 5495 }, { "epoch": 0.3715019602541571, "grad_norm": 0.9508523344993591, "learning_rate": 2.185597327033227e-05, "loss": 0.2024078369140625, "step": 5496 }, { "epoch": 0.3715695552250913, "grad_norm": 1.2640489339828491, "learning_rate": 2.1853038392308178e-05, "loss": 0.26397705078125, "step": 5497 }, { "epoch": 0.3716371501960254, "grad_norm": 0.6891516447067261, "learning_rate": 2.1850103182691472e-05, "loss": 0.1904296875, "step": 5498 }, { "epoch": 0.37170474516695956, "grad_norm": 1.0241248607635498, "learning_rate": 2.184716764162417e-05, "loss": 0.1983642578125, "step": 5499 }, { "epoch": 0.37177234013789373, "grad_norm": 1.4821237325668335, "learning_rate": 2.184423176924832e-05, "loss": 0.31707763671875, "step": 5500 }, { "epoch": 0.3718399351088279, "grad_norm": 3.0344114303588867, "learning_rate": 2.184129556570597e-05, "loss": 0.34844970703125, "step": 5501 }, { "epoch": 0.37190753007976207, "grad_norm": 1.0644255876541138, "learning_rate": 2.1838359031139193e-05, "loss": 0.255859375, "step": 5502 }, { "epoch": 0.37197512505069624, "grad_norm": 1.2268134355545044, "learning_rate": 2.1835422165690085e-05, "loss": 0.29022216796875, "step": 5503 }, { "epoch": 0.3720427200216304, "grad_norm": 1.6299139261245728, "learning_rate": 2.1832484969500733e-05, "loss": 0.31243896484375, "step": 5504 }, { "epoch": 0.37211031499256453, "grad_norm": 0.9600623846054077, "learning_rate": 2.1829547442713276e-05, "loss": 0.23321533203125, "step": 5505 }, { "epoch": 0.3721779099634987, "grad_norm": 2.618520736694336, "learning_rate": 2.1826609585469837e-05, "loss": 0.2423095703125, "step": 5506 }, { "epoch": 0.37224550493443287, "grad_norm": 1.6079576015472412, "learning_rate": 2.1823671397912575e-05, "loss": 0.355621337890625, "step": 5507 }, { "epoch": 0.37231309990536704, "grad_norm": 1.2977997064590454, "learning_rate": 2.1820732880183652e-05, "loss": 0.2945556640625, "step": 5508 }, { "epoch": 0.3723806948763012, "grad_norm": 1.0528439283370972, "learning_rate": 2.1817794032425258e-05, "loss": 0.2305450439453125, "step": 5509 }, { "epoch": 0.3724482898472354, "grad_norm": 2.378997564315796, "learning_rate": 2.181485485477959e-05, "loss": 0.322021484375, "step": 5510 }, { "epoch": 0.37251588481816955, "grad_norm": 1.1820622682571411, "learning_rate": 2.1811915347388864e-05, "loss": 0.274169921875, "step": 5511 }, { "epoch": 0.37258347978910367, "grad_norm": 0.9150123596191406, "learning_rate": 2.1808975510395304e-05, "loss": 0.232879638671875, "step": 5512 }, { "epoch": 0.37265107476003784, "grad_norm": 1.2957805395126343, "learning_rate": 2.1806035343941164e-05, "loss": 0.202423095703125, "step": 5513 }, { "epoch": 0.372718669730972, "grad_norm": 0.7605858445167542, "learning_rate": 2.180309484816871e-05, "loss": 0.209320068359375, "step": 5514 }, { "epoch": 0.3727862647019062, "grad_norm": 2.3974642753601074, "learning_rate": 2.1800154023220225e-05, "loss": 0.2740020751953125, "step": 5515 }, { "epoch": 0.37285385967284035, "grad_norm": 1.8139419555664062, "learning_rate": 2.1797212869237992e-05, "loss": 0.279571533203125, "step": 5516 }, { "epoch": 0.3729214546437745, "grad_norm": 1.025307536125183, "learning_rate": 2.1794271386364334e-05, "loss": 0.32550048828125, "step": 5517 }, { "epoch": 0.3729890496147087, "grad_norm": 0.9649612903594971, "learning_rate": 2.179132957474157e-05, "loss": 0.28857421875, "step": 5518 }, { "epoch": 0.3730566445856428, "grad_norm": 0.46268290281295776, "learning_rate": 2.1788387434512046e-05, "loss": 0.1122894287109375, "step": 5519 }, { "epoch": 0.373124239556577, "grad_norm": 2.2082579135894775, "learning_rate": 2.178544496581812e-05, "loss": 0.3427734375, "step": 5520 }, { "epoch": 0.37319183452751115, "grad_norm": 1.3171204328536987, "learning_rate": 2.178250216880217e-05, "loss": 0.345184326171875, "step": 5521 }, { "epoch": 0.3732594294984453, "grad_norm": 1.3666919469833374, "learning_rate": 2.1779559043606586e-05, "loss": 0.278594970703125, "step": 5522 }, { "epoch": 0.3733270244693795, "grad_norm": 1.096764087677002, "learning_rate": 2.1776615590373777e-05, "loss": 0.217559814453125, "step": 5523 }, { "epoch": 0.37339461944031366, "grad_norm": 1.0636012554168701, "learning_rate": 2.1773671809246162e-05, "loss": 0.2333221435546875, "step": 5524 }, { "epoch": 0.37346221441124783, "grad_norm": 1.094571590423584, "learning_rate": 2.1770727700366172e-05, "loss": 0.21417236328125, "step": 5525 }, { "epoch": 0.37352980938218194, "grad_norm": 1.3066017627716064, "learning_rate": 2.1767783263876274e-05, "loss": 0.220306396484375, "step": 5526 }, { "epoch": 0.3735974043531161, "grad_norm": 1.0141831636428833, "learning_rate": 2.1764838499918933e-05, "loss": 0.17046356201171875, "step": 5527 }, { "epoch": 0.3736649993240503, "grad_norm": 2.1314218044281006, "learning_rate": 2.1761893408636636e-05, "loss": 0.278076171875, "step": 5528 }, { "epoch": 0.37373259429498445, "grad_norm": 1.058956265449524, "learning_rate": 2.175894799017188e-05, "loss": 0.30780029296875, "step": 5529 }, { "epoch": 0.3738001892659186, "grad_norm": 1.2804726362228394, "learning_rate": 2.1756002244667193e-05, "loss": 0.27056884765625, "step": 5530 }, { "epoch": 0.3738677842368528, "grad_norm": 1.5448373556137085, "learning_rate": 2.17530561722651e-05, "loss": 0.3067626953125, "step": 5531 }, { "epoch": 0.37393537920778697, "grad_norm": 0.988612174987793, "learning_rate": 2.1750109773108152e-05, "loss": 0.26812744140625, "step": 5532 }, { "epoch": 0.3740029741787211, "grad_norm": 1.1292235851287842, "learning_rate": 2.1747163047338916e-05, "loss": 0.2789459228515625, "step": 5533 }, { "epoch": 0.37407056914965525, "grad_norm": 1.4839564561843872, "learning_rate": 2.174421599509997e-05, "loss": 0.300689697265625, "step": 5534 }, { "epoch": 0.3741381641205894, "grad_norm": 1.165740728378296, "learning_rate": 2.1741268616533914e-05, "loss": 0.1913299560546875, "step": 5535 }, { "epoch": 0.3742057590915236, "grad_norm": 0.8342100381851196, "learning_rate": 2.1738320911783357e-05, "loss": 0.181671142578125, "step": 5536 }, { "epoch": 0.37427335406245776, "grad_norm": 1.158576488494873, "learning_rate": 2.173537288099093e-05, "loss": 0.234100341796875, "step": 5537 }, { "epoch": 0.37434094903339193, "grad_norm": 1.2241127490997314, "learning_rate": 2.1732424524299277e-05, "loss": 0.240478515625, "step": 5538 }, { "epoch": 0.3744085440043261, "grad_norm": 0.7954559326171875, "learning_rate": 2.1729475841851055e-05, "loss": 0.193878173828125, "step": 5539 }, { "epoch": 0.3744761389752602, "grad_norm": 1.679630160331726, "learning_rate": 2.1726526833788944e-05, "loss": 0.3021240234375, "step": 5540 }, { "epoch": 0.3745437339461944, "grad_norm": 0.9547535181045532, "learning_rate": 2.172357750025563e-05, "loss": 0.1745452880859375, "step": 5541 }, { "epoch": 0.37461132891712856, "grad_norm": 1.1959407329559326, "learning_rate": 2.1720627841393823e-05, "loss": 0.265350341796875, "step": 5542 }, { "epoch": 0.37467892388806273, "grad_norm": 1.0468461513519287, "learning_rate": 2.171767785734625e-05, "loss": 0.2564697265625, "step": 5543 }, { "epoch": 0.3747465188589969, "grad_norm": 0.8649726510047913, "learning_rate": 2.1714727548255642e-05, "loss": 0.2423095703125, "step": 5544 }, { "epoch": 0.3748141138299311, "grad_norm": 1.5916599035263062, "learning_rate": 2.171177691426476e-05, "loss": 0.2777099609375, "step": 5545 }, { "epoch": 0.37488170880086524, "grad_norm": 1.153434157371521, "learning_rate": 2.170882595551637e-05, "loss": 0.248260498046875, "step": 5546 }, { "epoch": 0.37494930377179936, "grad_norm": 0.941352128982544, "learning_rate": 2.1705874672153255e-05, "loss": 0.1690521240234375, "step": 5547 }, { "epoch": 0.37501689874273353, "grad_norm": 1.2456445693969727, "learning_rate": 2.1702923064318222e-05, "loss": 0.260894775390625, "step": 5548 }, { "epoch": 0.3750844937136677, "grad_norm": 0.8545340895652771, "learning_rate": 2.1699971132154087e-05, "loss": 0.197509765625, "step": 5549 }, { "epoch": 0.37515208868460187, "grad_norm": 1.1515556573867798, "learning_rate": 2.169701887580368e-05, "loss": 0.210205078125, "step": 5550 }, { "epoch": 0.37521968365553604, "grad_norm": 1.3273807764053345, "learning_rate": 2.1694066295409852e-05, "loss": 0.2241363525390625, "step": 5551 }, { "epoch": 0.3752872786264702, "grad_norm": 1.1253966093063354, "learning_rate": 2.1691113391115466e-05, "loss": 0.222320556640625, "step": 5552 }, { "epoch": 0.3753548735974043, "grad_norm": 1.7151353359222412, "learning_rate": 2.1688160163063404e-05, "loss": 0.3321533203125, "step": 5553 }, { "epoch": 0.3754224685683385, "grad_norm": 1.7171584367752075, "learning_rate": 2.1685206611396557e-05, "loss": 0.2735748291015625, "step": 5554 }, { "epoch": 0.37549006353927267, "grad_norm": 0.9536635279655457, "learning_rate": 2.1682252736257846e-05, "loss": 0.19921875, "step": 5555 }, { "epoch": 0.37555765851020684, "grad_norm": 1.3550385236740112, "learning_rate": 2.1679298537790185e-05, "loss": 0.1803436279296875, "step": 5556 }, { "epoch": 0.375625253481141, "grad_norm": 1.3684767484664917, "learning_rate": 2.1676344016136528e-05, "loss": 0.24349212646484375, "step": 5557 }, { "epoch": 0.3756928484520752, "grad_norm": 1.4838734865188599, "learning_rate": 2.1673389171439826e-05, "loss": 0.281982421875, "step": 5558 }, { "epoch": 0.37576044342300935, "grad_norm": 1.135989785194397, "learning_rate": 2.1670434003843054e-05, "loss": 0.16254425048828125, "step": 5559 }, { "epoch": 0.37582803839394346, "grad_norm": 2.106027841567993, "learning_rate": 2.16674785134892e-05, "loss": 0.32220458984375, "step": 5560 }, { "epoch": 0.37589563336487763, "grad_norm": 1.1800264120101929, "learning_rate": 2.166452270052127e-05, "loss": 0.2742919921875, "step": 5561 }, { "epoch": 0.3759632283358118, "grad_norm": 1.6233570575714111, "learning_rate": 2.166156656508229e-05, "loss": 0.225311279296875, "step": 5562 }, { "epoch": 0.376030823306746, "grad_norm": 1.7474315166473389, "learning_rate": 2.165861010731529e-05, "loss": 0.35626220703125, "step": 5563 }, { "epoch": 0.37609841827768015, "grad_norm": 2.4571774005889893, "learning_rate": 2.1655653327363324e-05, "loss": 0.2482452392578125, "step": 5564 }, { "epoch": 0.3761660132486143, "grad_norm": 1.3275868892669678, "learning_rate": 2.1652696225369456e-05, "loss": 0.2696533203125, "step": 5565 }, { "epoch": 0.3762336082195485, "grad_norm": 0.7650386095046997, "learning_rate": 2.1649738801476775e-05, "loss": 0.144683837890625, "step": 5566 }, { "epoch": 0.3763012031904826, "grad_norm": 1.3362772464752197, "learning_rate": 2.164678105582837e-05, "loss": 0.29644775390625, "step": 5567 }, { "epoch": 0.3763687981614168, "grad_norm": 0.9225835204124451, "learning_rate": 2.1643822988567373e-05, "loss": 0.205841064453125, "step": 5568 }, { "epoch": 0.37643639313235094, "grad_norm": 0.8237205743789673, "learning_rate": 2.1640864599836898e-05, "loss": 0.1309356689453125, "step": 5569 }, { "epoch": 0.3765039881032851, "grad_norm": 0.34882017970085144, "learning_rate": 2.1637905889780093e-05, "loss": 0.065338134765625, "step": 5570 }, { "epoch": 0.3765715830742193, "grad_norm": 1.279017686843872, "learning_rate": 2.1634946858540118e-05, "loss": 0.31121826171875, "step": 5571 }, { "epoch": 0.37663917804515346, "grad_norm": 2.220184326171875, "learning_rate": 2.1631987506260154e-05, "loss": 0.267913818359375, "step": 5572 }, { "epoch": 0.3767067730160876, "grad_norm": 0.9919989705085754, "learning_rate": 2.1629027833083388e-05, "loss": 0.26507568359375, "step": 5573 }, { "epoch": 0.37677436798702174, "grad_norm": 1.9656962156295776, "learning_rate": 2.1626067839153036e-05, "loss": 0.22440338134765625, "step": 5574 }, { "epoch": 0.3768419629579559, "grad_norm": 2.866243362426758, "learning_rate": 2.1623107524612308e-05, "loss": 0.311920166015625, "step": 5575 }, { "epoch": 0.3769095579288901, "grad_norm": 0.8682146072387695, "learning_rate": 2.162014688960445e-05, "loss": 0.1816558837890625, "step": 5576 }, { "epoch": 0.37697715289982425, "grad_norm": 0.9499367475509644, "learning_rate": 2.1617185934272722e-05, "loss": 0.1239471435546875, "step": 5577 }, { "epoch": 0.3770447478707584, "grad_norm": 1.5323119163513184, "learning_rate": 2.161422465876038e-05, "loss": 0.284637451171875, "step": 5578 }, { "epoch": 0.3771123428416926, "grad_norm": 1.860285997390747, "learning_rate": 2.161126306321072e-05, "loss": 0.326568603515625, "step": 5579 }, { "epoch": 0.37717993781262676, "grad_norm": 1.1792842149734497, "learning_rate": 2.1608301147767036e-05, "loss": 0.28338623046875, "step": 5580 }, { "epoch": 0.3772475327835609, "grad_norm": 1.8708724975585938, "learning_rate": 2.160533891257265e-05, "loss": 0.31781005859375, "step": 5581 }, { "epoch": 0.37731512775449505, "grad_norm": 1.0698634386062622, "learning_rate": 2.160237635777088e-05, "loss": 0.272674560546875, "step": 5582 }, { "epoch": 0.3773827227254292, "grad_norm": 1.0769968032836914, "learning_rate": 2.159941348350509e-05, "loss": 0.226898193359375, "step": 5583 }, { "epoch": 0.3774503176963634, "grad_norm": 1.1369398832321167, "learning_rate": 2.1596450289918628e-05, "loss": 0.26513671875, "step": 5584 }, { "epoch": 0.37751791266729756, "grad_norm": 1.2499091625213623, "learning_rate": 2.1593486777154885e-05, "loss": 0.1916961669921875, "step": 5585 }, { "epoch": 0.37758550763823173, "grad_norm": 1.5851356983184814, "learning_rate": 2.1590522945357246e-05, "loss": 0.28125, "step": 5586 }, { "epoch": 0.3776531026091659, "grad_norm": 2.352055788040161, "learning_rate": 2.1587558794669118e-05, "loss": 0.275909423828125, "step": 5587 }, { "epoch": 0.3777206975801, "grad_norm": 1.4245043992996216, "learning_rate": 2.1584594325233926e-05, "loss": 0.2900390625, "step": 5588 }, { "epoch": 0.3777882925510342, "grad_norm": 0.8932650685310364, "learning_rate": 2.1581629537195115e-05, "loss": 0.206329345703125, "step": 5589 }, { "epoch": 0.37785588752196836, "grad_norm": 0.8963485956192017, "learning_rate": 2.1578664430696134e-05, "loss": 0.19927978515625, "step": 5590 }, { "epoch": 0.37792348249290253, "grad_norm": 0.9614033102989197, "learning_rate": 2.157569900588046e-05, "loss": 0.188323974609375, "step": 5591 }, { "epoch": 0.3779910774638367, "grad_norm": 1.377840280532837, "learning_rate": 2.1572733262891567e-05, "loss": 0.28369140625, "step": 5592 }, { "epoch": 0.37805867243477087, "grad_norm": 1.2898542881011963, "learning_rate": 2.156976720187297e-05, "loss": 0.2457275390625, "step": 5593 }, { "epoch": 0.37812626740570504, "grad_norm": 1.2181456089019775, "learning_rate": 2.156680082296818e-05, "loss": 0.23101806640625, "step": 5594 }, { "epoch": 0.37819386237663916, "grad_norm": 1.025904655456543, "learning_rate": 2.1563834126320725e-05, "loss": 0.2188720703125, "step": 5595 }, { "epoch": 0.3782614573475733, "grad_norm": 0.8964037895202637, "learning_rate": 2.156086711207415e-05, "loss": 0.1655731201171875, "step": 5596 }, { "epoch": 0.3783290523185075, "grad_norm": 1.6261550188064575, "learning_rate": 2.1557899780372027e-05, "loss": 0.309417724609375, "step": 5597 }, { "epoch": 0.37839664728944167, "grad_norm": 1.3476598262786865, "learning_rate": 2.1554932131357927e-05, "loss": 0.31689453125, "step": 5598 }, { "epoch": 0.37846424226037584, "grad_norm": 1.288204312324524, "learning_rate": 2.155196416517545e-05, "loss": 0.29058837890625, "step": 5599 }, { "epoch": 0.37853183723131, "grad_norm": 1.4838353395462036, "learning_rate": 2.1548995881968197e-05, "loss": 0.29730224609375, "step": 5600 }, { "epoch": 0.3785994322022442, "grad_norm": 1.3167686462402344, "learning_rate": 2.15460272818798e-05, "loss": 0.33746337890625, "step": 5601 }, { "epoch": 0.3786670271731783, "grad_norm": 1.9508780241012573, "learning_rate": 2.154305836505389e-05, "loss": 0.2777099609375, "step": 5602 }, { "epoch": 0.37873462214411246, "grad_norm": 1.5923036336898804, "learning_rate": 2.1540089131634125e-05, "loss": 0.2491455078125, "step": 5603 }, { "epoch": 0.37880221711504664, "grad_norm": 1.083714246749878, "learning_rate": 2.1537119581764176e-05, "loss": 0.223876953125, "step": 5604 }, { "epoch": 0.3788698120859808, "grad_norm": 1.6398292779922485, "learning_rate": 2.1534149715587727e-05, "loss": 0.346527099609375, "step": 5605 }, { "epoch": 0.378937407056915, "grad_norm": 1.8052973747253418, "learning_rate": 2.153117953324848e-05, "loss": 0.31060791015625, "step": 5606 }, { "epoch": 0.37900500202784915, "grad_norm": 1.0243213176727295, "learning_rate": 2.1528209034890154e-05, "loss": 0.27056884765625, "step": 5607 }, { "epoch": 0.3790725969987833, "grad_norm": 0.7364709973335266, "learning_rate": 2.1525238220656473e-05, "loss": 0.1545867919921875, "step": 5608 }, { "epoch": 0.37914019196971743, "grad_norm": 1.2246215343475342, "learning_rate": 2.1522267090691186e-05, "loss": 0.273773193359375, "step": 5609 }, { "epoch": 0.3792077869406516, "grad_norm": 1.828107476234436, "learning_rate": 2.1519295645138055e-05, "loss": 0.40667724609375, "step": 5610 }, { "epoch": 0.3792753819115858, "grad_norm": 0.7830167412757874, "learning_rate": 2.1516323884140863e-05, "loss": 0.1566162109375, "step": 5611 }, { "epoch": 0.37934297688251994, "grad_norm": 1.215768575668335, "learning_rate": 2.1513351807843395e-05, "loss": 0.24725341796875, "step": 5612 }, { "epoch": 0.3794105718534541, "grad_norm": 0.9347618222236633, "learning_rate": 2.1510379416389453e-05, "loss": 0.172882080078125, "step": 5613 }, { "epoch": 0.3794781668243883, "grad_norm": 0.6187797784805298, "learning_rate": 2.1507406709922874e-05, "loss": 0.1391754150390625, "step": 5614 }, { "epoch": 0.3795457617953224, "grad_norm": 0.9207379817962646, "learning_rate": 2.1504433688587493e-05, "loss": 0.194976806640625, "step": 5615 }, { "epoch": 0.37961335676625657, "grad_norm": 1.294232726097107, "learning_rate": 2.1501460352527152e-05, "loss": 0.24993896484375, "step": 5616 }, { "epoch": 0.37968095173719074, "grad_norm": 1.7152478694915771, "learning_rate": 2.1498486701885726e-05, "loss": 0.325836181640625, "step": 5617 }, { "epoch": 0.3797485467081249, "grad_norm": 1.954431414604187, "learning_rate": 2.1495512736807107e-05, "loss": 0.2139739990234375, "step": 5618 }, { "epoch": 0.3798161416790591, "grad_norm": 1.70429265499115, "learning_rate": 2.149253845743518e-05, "loss": 0.3438720703125, "step": 5619 }, { "epoch": 0.37988373664999325, "grad_norm": 1.4920272827148438, "learning_rate": 2.1489563863913874e-05, "loss": 0.33074951171875, "step": 5620 }, { "epoch": 0.3799513316209274, "grad_norm": 0.587144136428833, "learning_rate": 2.14865889563871e-05, "loss": 0.095703125, "step": 5621 }, { "epoch": 0.38001892659186154, "grad_norm": 2.00384259223938, "learning_rate": 2.1483613734998818e-05, "loss": 0.3162841796875, "step": 5622 }, { "epoch": 0.3800865215627957, "grad_norm": 0.7174186706542969, "learning_rate": 2.1480638199892982e-05, "loss": 0.1739501953125, "step": 5623 }, { "epoch": 0.3801541165337299, "grad_norm": 1.7145930528640747, "learning_rate": 2.1477662351213567e-05, "loss": 0.273468017578125, "step": 5624 }, { "epoch": 0.38022171150466405, "grad_norm": 1.2359102964401245, "learning_rate": 2.1474686189104566e-05, "loss": 0.253631591796875, "step": 5625 }, { "epoch": 0.3802893064755982, "grad_norm": 1.6959179639816284, "learning_rate": 2.1471709713709976e-05, "loss": 0.2313690185546875, "step": 5626 }, { "epoch": 0.3803569014465324, "grad_norm": 0.6116636991500854, "learning_rate": 2.1468732925173825e-05, "loss": 0.11949920654296875, "step": 5627 }, { "epoch": 0.38042449641746656, "grad_norm": 0.9023776650428772, "learning_rate": 2.146575582364015e-05, "loss": 0.17453765869140625, "step": 5628 }, { "epoch": 0.3804920913884007, "grad_norm": 1.2265546321868896, "learning_rate": 2.1462778409252995e-05, "loss": 0.30389404296875, "step": 5629 }, { "epoch": 0.38055968635933485, "grad_norm": 1.8307263851165771, "learning_rate": 2.145980068215643e-05, "loss": 0.31195068359375, "step": 5630 }, { "epoch": 0.380627281330269, "grad_norm": 1.3051677942276, "learning_rate": 2.1456822642494532e-05, "loss": 0.27838134765625, "step": 5631 }, { "epoch": 0.3806948763012032, "grad_norm": 1.4299298524856567, "learning_rate": 2.1453844290411404e-05, "loss": 0.279022216796875, "step": 5632 }, { "epoch": 0.38076247127213736, "grad_norm": 0.8158823251724243, "learning_rate": 2.145086562605115e-05, "loss": 0.1745452880859375, "step": 5633 }, { "epoch": 0.38083006624307153, "grad_norm": 0.8106896877288818, "learning_rate": 2.1447886649557902e-05, "loss": 0.16754150390625, "step": 5634 }, { "epoch": 0.3808976612140057, "grad_norm": 2.4160099029541016, "learning_rate": 2.1444907361075794e-05, "loss": 0.265380859375, "step": 5635 }, { "epoch": 0.3809652561849398, "grad_norm": 0.8345010876655579, "learning_rate": 2.1441927760748994e-05, "loss": 0.197265625, "step": 5636 }, { "epoch": 0.381032851155874, "grad_norm": 1.0757277011871338, "learning_rate": 2.1438947848721664e-05, "loss": 0.21539306640625, "step": 5637 }, { "epoch": 0.38110044612680816, "grad_norm": 1.2761820554733276, "learning_rate": 2.1435967625137992e-05, "loss": 0.2183837890625, "step": 5638 }, { "epoch": 0.3811680410977423, "grad_norm": 0.8591886758804321, "learning_rate": 2.143298709014219e-05, "loss": 0.20391845703125, "step": 5639 }, { "epoch": 0.3812356360686765, "grad_norm": 0.8316231966018677, "learning_rate": 2.1430006243878458e-05, "loss": 0.1890411376953125, "step": 5640 }, { "epoch": 0.38130323103961067, "grad_norm": 1.1263155937194824, "learning_rate": 2.1427025086491037e-05, "loss": 0.277618408203125, "step": 5641 }, { "epoch": 0.38137082601054484, "grad_norm": 1.252273678779602, "learning_rate": 2.142404361812417e-05, "loss": 0.18145751953125, "step": 5642 }, { "epoch": 0.38143842098147895, "grad_norm": 0.9034684896469116, "learning_rate": 2.142106183892213e-05, "loss": 0.187469482421875, "step": 5643 }, { "epoch": 0.3815060159524131, "grad_norm": 1.1489125490188599, "learning_rate": 2.141807974902918e-05, "loss": 0.178070068359375, "step": 5644 }, { "epoch": 0.3815736109233473, "grad_norm": 0.9584541916847229, "learning_rate": 2.1415097348589622e-05, "loss": 0.222381591796875, "step": 5645 }, { "epoch": 0.38164120589428147, "grad_norm": 1.219994306564331, "learning_rate": 2.1412114637747755e-05, "loss": 0.326934814453125, "step": 5646 }, { "epoch": 0.38170880086521564, "grad_norm": 0.8211851716041565, "learning_rate": 2.1409131616647907e-05, "loss": 0.240936279296875, "step": 5647 }, { "epoch": 0.3817763958361498, "grad_norm": 1.0997045040130615, "learning_rate": 2.1406148285434416e-05, "loss": 0.245758056640625, "step": 5648 }, { "epoch": 0.381843990807084, "grad_norm": 0.9759642481803894, "learning_rate": 2.140316464425163e-05, "loss": 0.2305145263671875, "step": 5649 }, { "epoch": 0.3819115857780181, "grad_norm": 1.2548822164535522, "learning_rate": 2.1400180693243915e-05, "loss": 0.29132080078125, "step": 5650 }, { "epoch": 0.38197918074895226, "grad_norm": 0.8146642446517944, "learning_rate": 2.1397196432555662e-05, "loss": 0.187042236328125, "step": 5651 }, { "epoch": 0.38204677571988643, "grad_norm": 1.0716900825500488, "learning_rate": 2.1394211862331256e-05, "loss": 0.214935302734375, "step": 5652 }, { "epoch": 0.3821143706908206, "grad_norm": 2.855893135070801, "learning_rate": 2.139122698271512e-05, "loss": 0.2068023681640625, "step": 5653 }, { "epoch": 0.3821819656617548, "grad_norm": 1.150963306427002, "learning_rate": 2.138824179385167e-05, "loss": 0.26348876953125, "step": 5654 }, { "epoch": 0.38224956063268895, "grad_norm": 1.1785430908203125, "learning_rate": 2.138525629588536e-05, "loss": 0.219696044921875, "step": 5655 }, { "epoch": 0.3823171556036231, "grad_norm": 1.782388687133789, "learning_rate": 2.1382270488960633e-05, "loss": 0.239105224609375, "step": 5656 }, { "epoch": 0.38238475057455723, "grad_norm": 1.4598722457885742, "learning_rate": 2.1379284373221975e-05, "loss": 0.201416015625, "step": 5657 }, { "epoch": 0.3824523455454914, "grad_norm": 1.9043101072311401, "learning_rate": 2.1376297948813865e-05, "loss": 0.34747314453125, "step": 5658 }, { "epoch": 0.38251994051642557, "grad_norm": 1.7914695739746094, "learning_rate": 2.1373311215880805e-05, "loss": 0.274444580078125, "step": 5659 }, { "epoch": 0.38258753548735974, "grad_norm": 1.2583345174789429, "learning_rate": 2.1370324174567314e-05, "loss": 0.23040771484375, "step": 5660 }, { "epoch": 0.3826551304582939, "grad_norm": 1.4029529094696045, "learning_rate": 2.1367336825017927e-05, "loss": 0.1723785400390625, "step": 5661 }, { "epoch": 0.3827227254292281, "grad_norm": 1.3488932847976685, "learning_rate": 2.1364349167377185e-05, "loss": 0.2320556640625, "step": 5662 }, { "epoch": 0.38279032040016225, "grad_norm": 0.848965048789978, "learning_rate": 2.1361361201789647e-05, "loss": 0.19219970703125, "step": 5663 }, { "epoch": 0.38285791537109637, "grad_norm": 1.1339577436447144, "learning_rate": 2.1358372928399893e-05, "loss": 0.221343994140625, "step": 5664 }, { "epoch": 0.38292551034203054, "grad_norm": 0.7142412066459656, "learning_rate": 2.1355384347352516e-05, "loss": 0.1185150146484375, "step": 5665 }, { "epoch": 0.3829931053129647, "grad_norm": 1.4413435459136963, "learning_rate": 2.1352395458792125e-05, "loss": 0.31060791015625, "step": 5666 }, { "epoch": 0.3830607002838989, "grad_norm": 1.0998185873031616, "learning_rate": 2.1349406262863332e-05, "loss": 0.2740478515625, "step": 5667 }, { "epoch": 0.38312829525483305, "grad_norm": 0.9526340961456299, "learning_rate": 2.1346416759710776e-05, "loss": 0.208465576171875, "step": 5668 }, { "epoch": 0.3831958902257672, "grad_norm": 1.9015650749206543, "learning_rate": 2.1343426949479113e-05, "loss": 0.29144287109375, "step": 5669 }, { "epoch": 0.3832634851967014, "grad_norm": 2.2978744506835938, "learning_rate": 2.1340436832313004e-05, "loss": 0.26007080078125, "step": 5670 }, { "epoch": 0.3833310801676355, "grad_norm": 0.9399648308753967, "learning_rate": 2.1337446408357128e-05, "loss": 0.221282958984375, "step": 5671 }, { "epoch": 0.3833986751385697, "grad_norm": 2.3629722595214844, "learning_rate": 2.1334455677756185e-05, "loss": 0.33282470703125, "step": 5672 }, { "epoch": 0.38346627010950385, "grad_norm": 1.19808030128479, "learning_rate": 2.133146464065488e-05, "loss": 0.27447509765625, "step": 5673 }, { "epoch": 0.383533865080438, "grad_norm": 1.02042818069458, "learning_rate": 2.1328473297197942e-05, "loss": 0.17010498046875, "step": 5674 }, { "epoch": 0.3836014600513722, "grad_norm": 1.4239989519119263, "learning_rate": 2.132548164753011e-05, "loss": 0.2763671875, "step": 5675 }, { "epoch": 0.38366905502230636, "grad_norm": 1.5971328020095825, "learning_rate": 2.132248969179614e-05, "loss": 0.33087158203125, "step": 5676 }, { "epoch": 0.38373664999324053, "grad_norm": 1.2892345190048218, "learning_rate": 2.13194974301408e-05, "loss": 0.28704833984375, "step": 5677 }, { "epoch": 0.38380424496417465, "grad_norm": 0.44465842843055725, "learning_rate": 2.131650486270887e-05, "loss": 0.0837554931640625, "step": 5678 }, { "epoch": 0.3838718399351088, "grad_norm": 0.6553555130958557, "learning_rate": 2.1313511989645156e-05, "loss": 0.1418609619140625, "step": 5679 }, { "epoch": 0.383939434906043, "grad_norm": 1.393455147743225, "learning_rate": 2.1310518811094465e-05, "loss": 0.23223876953125, "step": 5680 }, { "epoch": 0.38400702987697716, "grad_norm": 1.4140198230743408, "learning_rate": 2.1307525327201632e-05, "loss": 0.2501220703125, "step": 5681 }, { "epoch": 0.38407462484791133, "grad_norm": 0.43152669072151184, "learning_rate": 2.1304531538111494e-05, "loss": 0.1202392578125, "step": 5682 }, { "epoch": 0.3841422198188455, "grad_norm": 1.2606500387191772, "learning_rate": 2.1301537443968918e-05, "loss": 0.2889404296875, "step": 5683 }, { "epoch": 0.3842098147897796, "grad_norm": 1.5868251323699951, "learning_rate": 2.1298543044918764e-05, "loss": 0.241058349609375, "step": 5684 }, { "epoch": 0.3842774097607138, "grad_norm": 1.0063648223876953, "learning_rate": 2.129554834110594e-05, "loss": 0.248687744140625, "step": 5685 }, { "epoch": 0.38434500473164795, "grad_norm": 1.2005650997161865, "learning_rate": 2.1292553332675326e-05, "loss": 0.235443115234375, "step": 5686 }, { "epoch": 0.3844125997025821, "grad_norm": 1.431349277496338, "learning_rate": 2.1289558019771852e-05, "loss": 0.26593017578125, "step": 5687 }, { "epoch": 0.3844801946735163, "grad_norm": 1.1750494241714478, "learning_rate": 2.128656240254044e-05, "loss": 0.22564697265625, "step": 5688 }, { "epoch": 0.38454778964445047, "grad_norm": 1.152000069618225, "learning_rate": 2.1283566481126052e-05, "loss": 0.2273406982421875, "step": 5689 }, { "epoch": 0.38461538461538464, "grad_norm": 0.6665958166122437, "learning_rate": 2.1280570255673638e-05, "loss": 0.1258087158203125, "step": 5690 }, { "epoch": 0.38468297958631875, "grad_norm": 0.960854709148407, "learning_rate": 2.1277573726328178e-05, "loss": 0.236297607421875, "step": 5691 }, { "epoch": 0.3847505745572529, "grad_norm": 1.1565042734146118, "learning_rate": 2.1274576893234656e-05, "loss": 0.219573974609375, "step": 5692 }, { "epoch": 0.3848181695281871, "grad_norm": 2.1912028789520264, "learning_rate": 2.1271579756538086e-05, "loss": 0.2918548583984375, "step": 5693 }, { "epoch": 0.38488576449912126, "grad_norm": 2.2960360050201416, "learning_rate": 2.1268582316383485e-05, "loss": 0.296142578125, "step": 5694 }, { "epoch": 0.38495335947005543, "grad_norm": 0.7905098795890808, "learning_rate": 2.126558457291589e-05, "loss": 0.1848602294921875, "step": 5695 }, { "epoch": 0.3850209544409896, "grad_norm": 1.7585264444351196, "learning_rate": 2.1262586526280344e-05, "loss": 0.227386474609375, "step": 5696 }, { "epoch": 0.3850885494119238, "grad_norm": 1.6756850481033325, "learning_rate": 2.1259588176621917e-05, "loss": 0.321380615234375, "step": 5697 }, { "epoch": 0.3851561443828579, "grad_norm": 0.8041818737983704, "learning_rate": 2.1256589524085687e-05, "loss": 0.185089111328125, "step": 5698 }, { "epoch": 0.38522373935379206, "grad_norm": 1.1917515993118286, "learning_rate": 2.1253590568816748e-05, "loss": 0.300323486328125, "step": 5699 }, { "epoch": 0.38529133432472623, "grad_norm": 1.0179553031921387, "learning_rate": 2.1250591310960203e-05, "loss": 0.2598876953125, "step": 5700 }, { "epoch": 0.3853589292956604, "grad_norm": 1.5881787538528442, "learning_rate": 2.1247591750661176e-05, "loss": 0.340423583984375, "step": 5701 }, { "epoch": 0.3854265242665946, "grad_norm": 1.4331930875778198, "learning_rate": 2.1244591888064814e-05, "loss": 0.2322998046875, "step": 5702 }, { "epoch": 0.38549411923752874, "grad_norm": 1.489493727684021, "learning_rate": 2.1241591723316256e-05, "loss": 0.32403564453125, "step": 5703 }, { "epoch": 0.3855617142084629, "grad_norm": 1.240029215812683, "learning_rate": 2.1238591256560678e-05, "loss": 0.247711181640625, "step": 5704 }, { "epoch": 0.38562930917939703, "grad_norm": 1.014534592628479, "learning_rate": 2.1235590487943253e-05, "loss": 0.2223663330078125, "step": 5705 }, { "epoch": 0.3856969041503312, "grad_norm": 1.390243649482727, "learning_rate": 2.1232589417609185e-05, "loss": 0.3092041015625, "step": 5706 }, { "epoch": 0.38576449912126537, "grad_norm": 1.2414681911468506, "learning_rate": 2.122958804570368e-05, "loss": 0.244537353515625, "step": 5707 }, { "epoch": 0.38583209409219954, "grad_norm": 1.5641647577285767, "learning_rate": 2.1226586372371966e-05, "loss": 0.32000732421875, "step": 5708 }, { "epoch": 0.3858996890631337, "grad_norm": 1.3671478033065796, "learning_rate": 2.122358439775928e-05, "loss": 0.226806640625, "step": 5709 }, { "epoch": 0.3859672840340679, "grad_norm": 1.27444589138031, "learning_rate": 2.1220582122010873e-05, "loss": 0.209869384765625, "step": 5710 }, { "epoch": 0.38603487900500205, "grad_norm": 3.290393114089966, "learning_rate": 2.1217579545272023e-05, "loss": 0.283172607421875, "step": 5711 }, { "epoch": 0.38610247397593617, "grad_norm": 1.5710359811782837, "learning_rate": 2.1214576667688005e-05, "loss": 0.24517822265625, "step": 5712 }, { "epoch": 0.38617006894687034, "grad_norm": 1.1140013933181763, "learning_rate": 2.1211573489404115e-05, "loss": 0.210906982421875, "step": 5713 }, { "epoch": 0.3862376639178045, "grad_norm": 1.2238998413085938, "learning_rate": 2.120857001056568e-05, "loss": 0.249755859375, "step": 5714 }, { "epoch": 0.3863052588887387, "grad_norm": 1.1183243989944458, "learning_rate": 2.120556623131801e-05, "loss": 0.18231201171875, "step": 5715 }, { "epoch": 0.38637285385967285, "grad_norm": 1.160252571105957, "learning_rate": 2.1202562151806456e-05, "loss": 0.2523040771484375, "step": 5716 }, { "epoch": 0.386440448830607, "grad_norm": 0.7892977595329285, "learning_rate": 2.119955777217637e-05, "loss": 0.146514892578125, "step": 5717 }, { "epoch": 0.3865080438015412, "grad_norm": 1.3761769533157349, "learning_rate": 2.1196553092573126e-05, "loss": 0.306884765625, "step": 5718 }, { "epoch": 0.3865756387724753, "grad_norm": 1.352781057357788, "learning_rate": 2.119354811314211e-05, "loss": 0.211669921875, "step": 5719 }, { "epoch": 0.3866432337434095, "grad_norm": 1.1539140939712524, "learning_rate": 2.119054283402872e-05, "loss": 0.277130126953125, "step": 5720 }, { "epoch": 0.38671082871434365, "grad_norm": 1.7767386436462402, "learning_rate": 2.118753725537836e-05, "loss": 0.3267822265625, "step": 5721 }, { "epoch": 0.3867784236852778, "grad_norm": 0.9272304177284241, "learning_rate": 2.1184531377336476e-05, "loss": 0.198577880859375, "step": 5722 }, { "epoch": 0.386846018656212, "grad_norm": 1.184188723564148, "learning_rate": 2.11815252000485e-05, "loss": 0.223785400390625, "step": 5723 }, { "epoch": 0.38691361362714616, "grad_norm": 0.8340111374855042, "learning_rate": 2.117851872365989e-05, "loss": 0.23675537109375, "step": 5724 }, { "epoch": 0.38698120859808033, "grad_norm": 0.8602021336555481, "learning_rate": 2.1175511948316127e-05, "loss": 0.220062255859375, "step": 5725 }, { "epoch": 0.38704880356901444, "grad_norm": 0.9258871674537659, "learning_rate": 2.1172504874162683e-05, "loss": 0.240325927734375, "step": 5726 }, { "epoch": 0.3871163985399486, "grad_norm": 0.5786352157592773, "learning_rate": 2.1169497501345072e-05, "loss": 0.11932373046875, "step": 5727 }, { "epoch": 0.3871839935108828, "grad_norm": 1.0379220247268677, "learning_rate": 2.1166489830008803e-05, "loss": 0.26422119140625, "step": 5728 }, { "epoch": 0.38725158848181696, "grad_norm": 1.9767649173736572, "learning_rate": 2.1163481860299407e-05, "loss": 0.259918212890625, "step": 5729 }, { "epoch": 0.3873191834527511, "grad_norm": 1.0753322839736938, "learning_rate": 2.1160473592362423e-05, "loss": 0.248779296875, "step": 5730 }, { "epoch": 0.3873867784236853, "grad_norm": 1.279022455215454, "learning_rate": 2.1157465026343422e-05, "loss": 0.235687255859375, "step": 5731 }, { "epoch": 0.38745437339461947, "grad_norm": 1.7413240671157837, "learning_rate": 2.115445616238797e-05, "loss": 0.2411651611328125, "step": 5732 }, { "epoch": 0.3875219683655536, "grad_norm": 1.4688290357589722, "learning_rate": 2.115144700064166e-05, "loss": 0.34698486328125, "step": 5733 }, { "epoch": 0.38758956333648775, "grad_norm": 1.916020393371582, "learning_rate": 2.1148437541250072e-05, "loss": 0.34576416015625, "step": 5734 }, { "epoch": 0.3876571583074219, "grad_norm": 1.4942106008529663, "learning_rate": 2.114542778435885e-05, "loss": 0.26226806640625, "step": 5735 }, { "epoch": 0.3877247532783561, "grad_norm": 1.05793297290802, "learning_rate": 2.1142417730113614e-05, "loss": 0.30743408203125, "step": 5736 }, { "epoch": 0.38779234824929026, "grad_norm": 1.4307239055633545, "learning_rate": 2.113940737866001e-05, "loss": 0.298583984375, "step": 5737 }, { "epoch": 0.38785994322022443, "grad_norm": 0.953210711479187, "learning_rate": 2.1136396730143687e-05, "loss": 0.233062744140625, "step": 5738 }, { "epoch": 0.3879275381911586, "grad_norm": 0.9986677765846252, "learning_rate": 2.113338578471034e-05, "loss": 0.19092559814453125, "step": 5739 }, { "epoch": 0.3879951331620927, "grad_norm": 1.0387065410614014, "learning_rate": 2.113037454250564e-05, "loss": 0.2249755859375, "step": 5740 }, { "epoch": 0.3880627281330269, "grad_norm": 1.3054169416427612, "learning_rate": 2.11273630036753e-05, "loss": 0.2698974609375, "step": 5741 }, { "epoch": 0.38813032310396106, "grad_norm": 1.3343628644943237, "learning_rate": 2.1124351168365027e-05, "loss": 0.24566650390625, "step": 5742 }, { "epoch": 0.38819791807489523, "grad_norm": 1.1759445667266846, "learning_rate": 2.1121339036720553e-05, "loss": 0.186492919921875, "step": 5743 }, { "epoch": 0.3882655130458294, "grad_norm": 1.1703462600708008, "learning_rate": 2.1118326608887637e-05, "loss": 0.16524505615234375, "step": 5744 }, { "epoch": 0.3883331080167636, "grad_norm": 1.3455508947372437, "learning_rate": 2.1115313885012027e-05, "loss": 0.271240234375, "step": 5745 }, { "epoch": 0.3884007029876977, "grad_norm": 0.9383049607276917, "learning_rate": 2.11123008652395e-05, "loss": 0.2122802734375, "step": 5746 }, { "epoch": 0.38846829795863186, "grad_norm": 1.3611503839492798, "learning_rate": 2.1109287549715847e-05, "loss": 0.2525634765625, "step": 5747 }, { "epoch": 0.38853589292956603, "grad_norm": 1.1111130714416504, "learning_rate": 2.110627393858687e-05, "loss": 0.30279541015625, "step": 5748 }, { "epoch": 0.3886034879005002, "grad_norm": 1.2393591403961182, "learning_rate": 2.1103260031998383e-05, "loss": 0.274810791015625, "step": 5749 }, { "epoch": 0.38867108287143437, "grad_norm": 0.9905017018318176, "learning_rate": 2.110024583009622e-05, "loss": 0.1827392578125, "step": 5750 }, { "epoch": 0.38873867784236854, "grad_norm": 1.758279800415039, "learning_rate": 2.1097231333026223e-05, "loss": 0.293212890625, "step": 5751 }, { "epoch": 0.3888062728133027, "grad_norm": 1.4187078475952148, "learning_rate": 2.109421654093426e-05, "loss": 0.2289581298828125, "step": 5752 }, { "epoch": 0.3888738677842368, "grad_norm": 2.250049591064453, "learning_rate": 2.1091201453966202e-05, "loss": 0.306060791015625, "step": 5753 }, { "epoch": 0.388941462755171, "grad_norm": 0.71647047996521, "learning_rate": 2.1088186072267937e-05, "loss": 0.1381988525390625, "step": 5754 }, { "epoch": 0.38900905772610517, "grad_norm": 1.4853546619415283, "learning_rate": 2.1085170395985366e-05, "loss": 0.237518310546875, "step": 5755 }, { "epoch": 0.38907665269703934, "grad_norm": 2.0824711322784424, "learning_rate": 2.1082154425264408e-05, "loss": 0.2802734375, "step": 5756 }, { "epoch": 0.3891442476679735, "grad_norm": 1.7084286212921143, "learning_rate": 2.1079138160250997e-05, "loss": 0.32489013671875, "step": 5757 }, { "epoch": 0.3892118426389077, "grad_norm": 1.120392084121704, "learning_rate": 2.1076121601091073e-05, "loss": 0.223114013671875, "step": 5758 }, { "epoch": 0.38927943760984185, "grad_norm": 1.9334635734558105, "learning_rate": 2.10731047479306e-05, "loss": 0.3139801025390625, "step": 5759 }, { "epoch": 0.38934703258077596, "grad_norm": 0.9201287627220154, "learning_rate": 2.107008760091555e-05, "loss": 0.26129150390625, "step": 5760 }, { "epoch": 0.38941462755171014, "grad_norm": 1.1593053340911865, "learning_rate": 2.1067070160191916e-05, "loss": 0.19062042236328125, "step": 5761 }, { "epoch": 0.3894822225226443, "grad_norm": 1.1701887845993042, "learning_rate": 2.1064052425905695e-05, "loss": 0.221466064453125, "step": 5762 }, { "epoch": 0.3895498174935785, "grad_norm": 1.5622334480285645, "learning_rate": 2.1061034398202904e-05, "loss": 0.2843017578125, "step": 5763 }, { "epoch": 0.38961741246451265, "grad_norm": 1.3066009283065796, "learning_rate": 2.1058016077229575e-05, "loss": 0.21736907958984375, "step": 5764 }, { "epoch": 0.3896850074354468, "grad_norm": 1.0345441102981567, "learning_rate": 2.1054997463131753e-05, "loss": 0.2951202392578125, "step": 5765 }, { "epoch": 0.389752602406381, "grad_norm": 1.6204262971878052, "learning_rate": 2.1051978556055504e-05, "loss": 0.266387939453125, "step": 5766 }, { "epoch": 0.3898201973773151, "grad_norm": 0.9861566424369812, "learning_rate": 2.104895935614689e-05, "loss": 0.28375244140625, "step": 5767 }, { "epoch": 0.3898877923482493, "grad_norm": 0.4064437448978424, "learning_rate": 2.104593986355201e-05, "loss": 0.09958648681640625, "step": 5768 }, { "epoch": 0.38995538731918344, "grad_norm": 1.0086561441421509, "learning_rate": 2.1042920078416958e-05, "loss": 0.196563720703125, "step": 5769 }, { "epoch": 0.3900229822901176, "grad_norm": 1.9275051355361938, "learning_rate": 2.103990000088785e-05, "loss": 0.26177978515625, "step": 5770 }, { "epoch": 0.3900905772610518, "grad_norm": 1.5424058437347412, "learning_rate": 2.103687963111082e-05, "loss": 0.26495361328125, "step": 5771 }, { "epoch": 0.39015817223198596, "grad_norm": 1.0718958377838135, "learning_rate": 2.1033858969232006e-05, "loss": 0.22772216796875, "step": 5772 }, { "epoch": 0.3902257672029201, "grad_norm": 1.5462892055511475, "learning_rate": 2.1030838015397574e-05, "loss": 0.272064208984375, "step": 5773 }, { "epoch": 0.39029336217385424, "grad_norm": 1.2144972085952759, "learning_rate": 2.1027816769753692e-05, "loss": 0.276275634765625, "step": 5774 }, { "epoch": 0.3903609571447884, "grad_norm": 1.0450055599212646, "learning_rate": 2.1024795232446554e-05, "loss": 0.2069244384765625, "step": 5775 }, { "epoch": 0.3904285521157226, "grad_norm": 1.339451551437378, "learning_rate": 2.102177340362235e-05, "loss": 0.261688232421875, "step": 5776 }, { "epoch": 0.39049614708665675, "grad_norm": 1.4074325561523438, "learning_rate": 2.10187512834273e-05, "loss": 0.23260498046875, "step": 5777 }, { "epoch": 0.3905637420575909, "grad_norm": 1.1756547689437866, "learning_rate": 2.1015728872007635e-05, "loss": 0.1851043701171875, "step": 5778 }, { "epoch": 0.3906313370285251, "grad_norm": 1.71336030960083, "learning_rate": 2.101270616950959e-05, "loss": 0.3065185546875, "step": 5779 }, { "epoch": 0.39069893199945926, "grad_norm": 1.094987392425537, "learning_rate": 2.1009683176079434e-05, "loss": 0.2965087890625, "step": 5780 }, { "epoch": 0.3907665269703934, "grad_norm": 2.947726249694824, "learning_rate": 2.100665989186343e-05, "loss": 0.346282958984375, "step": 5781 }, { "epoch": 0.39083412194132755, "grad_norm": 1.4158821105957031, "learning_rate": 2.1003636317007862e-05, "loss": 0.2734375, "step": 5782 }, { "epoch": 0.3909017169122617, "grad_norm": 1.024328589439392, "learning_rate": 2.100061245165904e-05, "loss": 0.0984344482421875, "step": 5783 }, { "epoch": 0.3909693118831959, "grad_norm": 1.7389954328536987, "learning_rate": 2.0997588295963264e-05, "loss": 0.23895263671875, "step": 5784 }, { "epoch": 0.39103690685413006, "grad_norm": 2.3730361461639404, "learning_rate": 2.099456385006687e-05, "loss": 0.23809814453125, "step": 5785 }, { "epoch": 0.39110450182506423, "grad_norm": 1.5097415447235107, "learning_rate": 2.0991539114116196e-05, "loss": 0.31878662109375, "step": 5786 }, { "epoch": 0.3911720967959984, "grad_norm": 1.4877023696899414, "learning_rate": 2.09885140882576e-05, "loss": 0.2545166015625, "step": 5787 }, { "epoch": 0.3912396917669325, "grad_norm": 1.0290242433547974, "learning_rate": 2.0985488772637452e-05, "loss": 0.227691650390625, "step": 5788 }, { "epoch": 0.3913072867378667, "grad_norm": 1.4172322750091553, "learning_rate": 2.098246316740213e-05, "loss": 0.2564697265625, "step": 5789 }, { "epoch": 0.39137488170880086, "grad_norm": 1.3242400884628296, "learning_rate": 2.0979437272698038e-05, "loss": 0.2435302734375, "step": 5790 }, { "epoch": 0.39144247667973503, "grad_norm": 1.6460797786712646, "learning_rate": 2.0976411088671584e-05, "loss": 0.262115478515625, "step": 5791 }, { "epoch": 0.3915100716506692, "grad_norm": 1.448303461074829, "learning_rate": 2.0973384615469197e-05, "loss": 0.23058319091796875, "step": 5792 }, { "epoch": 0.39157766662160337, "grad_norm": 2.2402443885803223, "learning_rate": 2.0970357853237312e-05, "loss": 0.326416015625, "step": 5793 }, { "epoch": 0.39164526159253754, "grad_norm": 1.427730917930603, "learning_rate": 2.0967330802122383e-05, "loss": 0.19091796875, "step": 5794 }, { "epoch": 0.39171285656347166, "grad_norm": 1.3884297609329224, "learning_rate": 2.096430346227088e-05, "loss": 0.28155517578125, "step": 5795 }, { "epoch": 0.3917804515344058, "grad_norm": 1.337040662765503, "learning_rate": 2.0961275833829286e-05, "loss": 0.2746734619140625, "step": 5796 }, { "epoch": 0.39184804650534, "grad_norm": 1.4264267683029175, "learning_rate": 2.0958247916944093e-05, "loss": 0.282501220703125, "step": 5797 }, { "epoch": 0.39191564147627417, "grad_norm": 1.5559886693954468, "learning_rate": 2.0955219711761814e-05, "loss": 0.2545928955078125, "step": 5798 }, { "epoch": 0.39198323644720834, "grad_norm": 0.691719651222229, "learning_rate": 2.0952191218428968e-05, "loss": 0.174560546875, "step": 5799 }, { "epoch": 0.3920508314181425, "grad_norm": 0.9861275553703308, "learning_rate": 2.0949162437092096e-05, "loss": 0.1753387451171875, "step": 5800 }, { "epoch": 0.3921184263890767, "grad_norm": 1.391747236251831, "learning_rate": 2.094613336789775e-05, "loss": 0.241790771484375, "step": 5801 }, { "epoch": 0.3921860213600108, "grad_norm": 1.2894173860549927, "learning_rate": 2.094310401099249e-05, "loss": 0.2818603515625, "step": 5802 }, { "epoch": 0.39225361633094497, "grad_norm": 1.1352617740631104, "learning_rate": 2.0940074366522898e-05, "loss": 0.190399169921875, "step": 5803 }, { "epoch": 0.39232121130187914, "grad_norm": 1.0538153648376465, "learning_rate": 2.0937044434635567e-05, "loss": 0.1347808837890625, "step": 5804 }, { "epoch": 0.3923888062728133, "grad_norm": 0.5818449258804321, "learning_rate": 2.0934014215477103e-05, "loss": 0.137451171875, "step": 5805 }, { "epoch": 0.3924564012437475, "grad_norm": 1.3307875394821167, "learning_rate": 2.093098370919413e-05, "loss": 0.230560302734375, "step": 5806 }, { "epoch": 0.39252399621468165, "grad_norm": 1.1182078123092651, "learning_rate": 2.0927952915933283e-05, "loss": 0.1561126708984375, "step": 5807 }, { "epoch": 0.39259159118561576, "grad_norm": 1.5084786415100098, "learning_rate": 2.0924921835841204e-05, "loss": 0.244171142578125, "step": 5808 }, { "epoch": 0.39265918615654993, "grad_norm": 1.8887773752212524, "learning_rate": 2.0921890469064562e-05, "loss": 0.2755584716796875, "step": 5809 }, { "epoch": 0.3927267811274841, "grad_norm": 1.0684758424758911, "learning_rate": 2.0918858815750027e-05, "loss": 0.134002685546875, "step": 5810 }, { "epoch": 0.3927943760984183, "grad_norm": 1.3623827695846558, "learning_rate": 2.09158268760443e-05, "loss": 0.227783203125, "step": 5811 }, { "epoch": 0.39286197106935244, "grad_norm": 0.9300353527069092, "learning_rate": 2.0912794650094075e-05, "loss": 0.1614227294921875, "step": 5812 }, { "epoch": 0.3929295660402866, "grad_norm": 1.214369297027588, "learning_rate": 2.0909762138046076e-05, "loss": 0.2705078125, "step": 5813 }, { "epoch": 0.3929971610112208, "grad_norm": 1.4194557666778564, "learning_rate": 2.0906729340047032e-05, "loss": 0.23870849609375, "step": 5814 }, { "epoch": 0.3930647559821549, "grad_norm": 1.5004287958145142, "learning_rate": 2.090369625624369e-05, "loss": 0.1333770751953125, "step": 5815 }, { "epoch": 0.39313235095308907, "grad_norm": 1.097313642501831, "learning_rate": 2.0900662886782805e-05, "loss": 0.2821502685546875, "step": 5816 }, { "epoch": 0.39319994592402324, "grad_norm": 1.217710018157959, "learning_rate": 2.0897629231811156e-05, "loss": 0.236083984375, "step": 5817 }, { "epoch": 0.3932675408949574, "grad_norm": 1.463098406791687, "learning_rate": 2.0894595291475524e-05, "loss": 0.2899169921875, "step": 5818 }, { "epoch": 0.3933351358658916, "grad_norm": 0.9174725413322449, "learning_rate": 2.0891561065922716e-05, "loss": 0.26141357421875, "step": 5819 }, { "epoch": 0.39340273083682575, "grad_norm": 1.9464737176895142, "learning_rate": 2.0888526555299546e-05, "loss": 0.315948486328125, "step": 5820 }, { "epoch": 0.3934703258077599, "grad_norm": 1.3959370851516724, "learning_rate": 2.088549175975284e-05, "loss": 0.231414794921875, "step": 5821 }, { "epoch": 0.39353792077869404, "grad_norm": 1.2683228254318237, "learning_rate": 2.088245667942944e-05, "loss": 0.25189208984375, "step": 5822 }, { "epoch": 0.3936055157496282, "grad_norm": 0.6485906839370728, "learning_rate": 2.0879421314476204e-05, "loss": 0.098663330078125, "step": 5823 }, { "epoch": 0.3936731107205624, "grad_norm": 1.354278802871704, "learning_rate": 2.087638566504e-05, "loss": 0.2713623046875, "step": 5824 }, { "epoch": 0.39374070569149655, "grad_norm": 1.2377675771713257, "learning_rate": 2.087334973126772e-05, "loss": 0.24200439453125, "step": 5825 }, { "epoch": 0.3938083006624307, "grad_norm": 1.370436429977417, "learning_rate": 2.0870313513306243e-05, "loss": 0.2780914306640625, "step": 5826 }, { "epoch": 0.3938758956333649, "grad_norm": 1.607777714729309, "learning_rate": 2.0867277011302496e-05, "loss": 0.221771240234375, "step": 5827 }, { "epoch": 0.39394349060429906, "grad_norm": 1.512631893157959, "learning_rate": 2.0864240225403404e-05, "loss": 0.286834716796875, "step": 5828 }, { "epoch": 0.3940110855752332, "grad_norm": 1.0556470155715942, "learning_rate": 2.0861203155755892e-05, "loss": 0.179595947265625, "step": 5829 }, { "epoch": 0.39407868054616735, "grad_norm": 1.564786434173584, "learning_rate": 2.0858165802506926e-05, "loss": 0.216949462890625, "step": 5830 }, { "epoch": 0.3941462755171015, "grad_norm": 1.5006812810897827, "learning_rate": 2.0855128165803467e-05, "loss": 0.25128173828125, "step": 5831 }, { "epoch": 0.3942138704880357, "grad_norm": 1.3551491498947144, "learning_rate": 2.085209024579249e-05, "loss": 0.244415283203125, "step": 5832 }, { "epoch": 0.39428146545896986, "grad_norm": 2.7692275047302246, "learning_rate": 2.0849052042621e-05, "loss": 0.320556640625, "step": 5833 }, { "epoch": 0.39434906042990403, "grad_norm": 2.524454116821289, "learning_rate": 2.0846013556435993e-05, "loss": 0.300567626953125, "step": 5834 }, { "epoch": 0.3944166554008382, "grad_norm": 1.004498839378357, "learning_rate": 2.084297478738449e-05, "loss": 0.18788909912109375, "step": 5835 }, { "epoch": 0.3944842503717723, "grad_norm": 1.1469576358795166, "learning_rate": 2.0839935735613538e-05, "loss": 0.201873779296875, "step": 5836 }, { "epoch": 0.3945518453427065, "grad_norm": 1.6050420999526978, "learning_rate": 2.0836896401270176e-05, "loss": 0.332763671875, "step": 5837 }, { "epoch": 0.39461944031364066, "grad_norm": 1.6652337312698364, "learning_rate": 2.0833856784501465e-05, "loss": 0.29193115234375, "step": 5838 }, { "epoch": 0.3946870352845748, "grad_norm": 1.2512989044189453, "learning_rate": 2.0830816885454478e-05, "loss": 0.22991943359375, "step": 5839 }, { "epoch": 0.394754630255509, "grad_norm": 0.5908961892127991, "learning_rate": 2.0827776704276313e-05, "loss": 0.1064300537109375, "step": 5840 }, { "epoch": 0.39482222522644317, "grad_norm": 1.1712195873260498, "learning_rate": 2.082473624111407e-05, "loss": 0.207916259765625, "step": 5841 }, { "epoch": 0.39488982019737734, "grad_norm": 1.3059967756271362, "learning_rate": 2.0821695496114862e-05, "loss": 0.29754638671875, "step": 5842 }, { "epoch": 0.39495741516831145, "grad_norm": 1.8098375797271729, "learning_rate": 2.081865446942582e-05, "loss": 0.27911376953125, "step": 5843 }, { "epoch": 0.3950250101392456, "grad_norm": 2.2020998001098633, "learning_rate": 2.0815613161194087e-05, "loss": 0.31048583984375, "step": 5844 }, { "epoch": 0.3950926051101798, "grad_norm": 1.5928210020065308, "learning_rate": 2.081257157156683e-05, "loss": 0.234375, "step": 5845 }, { "epoch": 0.39516020008111397, "grad_norm": 1.0276105403900146, "learning_rate": 2.0809529700691208e-05, "loss": 0.185302734375, "step": 5846 }, { "epoch": 0.39522779505204814, "grad_norm": 1.6506845951080322, "learning_rate": 2.0806487548714407e-05, "loss": 0.2943115234375, "step": 5847 }, { "epoch": 0.3952953900229823, "grad_norm": 1.599859595298767, "learning_rate": 2.080344511578363e-05, "loss": 0.215576171875, "step": 5848 }, { "epoch": 0.3953629849939165, "grad_norm": 1.5330452919006348, "learning_rate": 2.0800402402046093e-05, "loss": 0.260498046875, "step": 5849 }, { "epoch": 0.3954305799648506, "grad_norm": 1.807515263557434, "learning_rate": 2.0797359407649012e-05, "loss": 0.25030517578125, "step": 5850 }, { "epoch": 0.39549817493578476, "grad_norm": 1.7450380325317383, "learning_rate": 2.0794316132739623e-05, "loss": 0.278411865234375, "step": 5851 }, { "epoch": 0.39556576990671893, "grad_norm": 1.1707372665405273, "learning_rate": 2.079127257746519e-05, "loss": 0.178558349609375, "step": 5852 }, { "epoch": 0.3956333648776531, "grad_norm": 2.147299289703369, "learning_rate": 2.0788228741972976e-05, "loss": 0.2052764892578125, "step": 5853 }, { "epoch": 0.3957009598485873, "grad_norm": 2.2828543186187744, "learning_rate": 2.0785184626410255e-05, "loss": 0.2808837890625, "step": 5854 }, { "epoch": 0.39576855481952145, "grad_norm": 1.4360268115997314, "learning_rate": 2.078214023092433e-05, "loss": 0.1792449951171875, "step": 5855 }, { "epoch": 0.3958361497904556, "grad_norm": 1.0453864336013794, "learning_rate": 2.0779095555662492e-05, "loss": 0.2059326171875, "step": 5856 }, { "epoch": 0.39590374476138973, "grad_norm": 1.1288663148880005, "learning_rate": 2.077605060077208e-05, "loss": 0.245147705078125, "step": 5857 }, { "epoch": 0.3959713397323239, "grad_norm": 2.451571226119995, "learning_rate": 2.0773005366400415e-05, "loss": 0.3096923828125, "step": 5858 }, { "epoch": 0.39603893470325807, "grad_norm": 1.4802342653274536, "learning_rate": 2.076995985269485e-05, "loss": 0.246307373046875, "step": 5859 }, { "epoch": 0.39610652967419224, "grad_norm": 1.995661973953247, "learning_rate": 2.0766914059802746e-05, "loss": 0.3433837890625, "step": 5860 }, { "epoch": 0.3961741246451264, "grad_norm": 1.7727798223495483, "learning_rate": 2.0763867987871476e-05, "loss": 0.295196533203125, "step": 5861 }, { "epoch": 0.3962417196160606, "grad_norm": 2.857604742050171, "learning_rate": 2.0760821637048425e-05, "loss": 0.34039306640625, "step": 5862 }, { "epoch": 0.39630931458699475, "grad_norm": 1.4921867847442627, "learning_rate": 2.0757775007480996e-05, "loss": 0.189727783203125, "step": 5863 }, { "epoch": 0.39637690955792887, "grad_norm": 1.9415608644485474, "learning_rate": 2.0754728099316605e-05, "loss": 0.214447021484375, "step": 5864 }, { "epoch": 0.39644450452886304, "grad_norm": 1.55016028881073, "learning_rate": 2.075168091270268e-05, "loss": 0.29339599609375, "step": 5865 }, { "epoch": 0.3965120994997972, "grad_norm": 1.9314203262329102, "learning_rate": 2.0748633447786668e-05, "loss": 0.24859619140625, "step": 5866 }, { "epoch": 0.3965796944707314, "grad_norm": 1.4061917066574097, "learning_rate": 2.074558570471602e-05, "loss": 0.2183837890625, "step": 5867 }, { "epoch": 0.39664728944166555, "grad_norm": 1.1381397247314453, "learning_rate": 2.0742537683638196e-05, "loss": 0.234375, "step": 5868 }, { "epoch": 0.3967148844125997, "grad_norm": 0.9896961450576782, "learning_rate": 2.073948938470069e-05, "loss": 0.257904052734375, "step": 5869 }, { "epoch": 0.3967824793835339, "grad_norm": 1.2478930950164795, "learning_rate": 2.0736440808050996e-05, "loss": 0.249664306640625, "step": 5870 }, { "epoch": 0.396850074354468, "grad_norm": 0.8720347285270691, "learning_rate": 2.073339195383662e-05, "loss": 0.229705810546875, "step": 5871 }, { "epoch": 0.3969176693254022, "grad_norm": 1.9742730855941772, "learning_rate": 2.0730342822205085e-05, "loss": 0.31793212890625, "step": 5872 }, { "epoch": 0.39698526429633635, "grad_norm": 0.6226227879524231, "learning_rate": 2.072729341330393e-05, "loss": 0.1428375244140625, "step": 5873 }, { "epoch": 0.3970528592672705, "grad_norm": 0.9496071338653564, "learning_rate": 2.07242437272807e-05, "loss": 0.256134033203125, "step": 5874 }, { "epoch": 0.3971204542382047, "grad_norm": 2.4610724449157715, "learning_rate": 2.0721193764282963e-05, "loss": 0.350128173828125, "step": 5875 }, { "epoch": 0.39718804920913886, "grad_norm": 2.331315040588379, "learning_rate": 2.0718143524458286e-05, "loss": 0.34796142578125, "step": 5876 }, { "epoch": 0.397255644180073, "grad_norm": 0.9812520146369934, "learning_rate": 2.0715093007954268e-05, "loss": 0.14650344848632812, "step": 5877 }, { "epoch": 0.39732323915100715, "grad_norm": 1.5273487567901611, "learning_rate": 2.071204221491851e-05, "loss": 0.263763427734375, "step": 5878 }, { "epoch": 0.3973908341219413, "grad_norm": 1.2558845281600952, "learning_rate": 2.0708991145498627e-05, "loss": 0.17645263671875, "step": 5879 }, { "epoch": 0.3974584290928755, "grad_norm": 0.8713369965553284, "learning_rate": 2.0705939799842246e-05, "loss": 0.17291259765625, "step": 5880 }, { "epoch": 0.39752602406380966, "grad_norm": 1.1525986194610596, "learning_rate": 2.0702888178097007e-05, "loss": 0.21435546875, "step": 5881 }, { "epoch": 0.39759361903474383, "grad_norm": 0.9824119210243225, "learning_rate": 2.069983628041058e-05, "loss": 0.230560302734375, "step": 5882 }, { "epoch": 0.397661214005678, "grad_norm": 1.0437467098236084, "learning_rate": 2.0696784106930626e-05, "loss": 0.264984130859375, "step": 5883 }, { "epoch": 0.3977288089766121, "grad_norm": 1.489782691001892, "learning_rate": 2.069373165780483e-05, "loss": 0.272979736328125, "step": 5884 }, { "epoch": 0.3977964039475463, "grad_norm": 1.8287094831466675, "learning_rate": 2.0690678933180885e-05, "loss": 0.3275146484375, "step": 5885 }, { "epoch": 0.39786399891848045, "grad_norm": 1.2927144765853882, "learning_rate": 2.0687625933206505e-05, "loss": 0.1988067626953125, "step": 5886 }, { "epoch": 0.3979315938894146, "grad_norm": 1.1380165815353394, "learning_rate": 2.068457265802941e-05, "loss": 0.255645751953125, "step": 5887 }, { "epoch": 0.3979991888603488, "grad_norm": 1.0949515104293823, "learning_rate": 2.068151910779734e-05, "loss": 0.232635498046875, "step": 5888 }, { "epoch": 0.39806678383128297, "grad_norm": 1.5450419187545776, "learning_rate": 2.0678465282658038e-05, "loss": 0.3197021484375, "step": 5889 }, { "epoch": 0.39813437880221714, "grad_norm": 0.9208508133888245, "learning_rate": 2.0675411182759273e-05, "loss": 0.227783203125, "step": 5890 }, { "epoch": 0.39820197377315125, "grad_norm": 1.2894856929779053, "learning_rate": 2.067235680824882e-05, "loss": 0.31243896484375, "step": 5891 }, { "epoch": 0.3982695687440854, "grad_norm": 1.7217657566070557, "learning_rate": 2.0669302159274474e-05, "loss": 0.29443359375, "step": 5892 }, { "epoch": 0.3983371637150196, "grad_norm": 1.4496185779571533, "learning_rate": 2.0666247235984027e-05, "loss": 0.30169677734375, "step": 5893 }, { "epoch": 0.39840475868595376, "grad_norm": 1.0829894542694092, "learning_rate": 2.06631920385253e-05, "loss": 0.19384765625, "step": 5894 }, { "epoch": 0.39847235365688793, "grad_norm": 1.1429036855697632, "learning_rate": 2.0660136567046126e-05, "loss": 0.20819091796875, "step": 5895 }, { "epoch": 0.3985399486278221, "grad_norm": 1.2042162418365479, "learning_rate": 2.0657080821694347e-05, "loss": 0.19773483276367188, "step": 5896 }, { "epoch": 0.3986075435987563, "grad_norm": 1.2169972658157349, "learning_rate": 2.065402480261781e-05, "loss": 0.286285400390625, "step": 5897 }, { "epoch": 0.3986751385696904, "grad_norm": 0.7080713510513306, "learning_rate": 2.0650968509964397e-05, "loss": 0.137847900390625, "step": 5898 }, { "epoch": 0.39874273354062456, "grad_norm": 1.0271207094192505, "learning_rate": 2.0647911943881986e-05, "loss": 0.245361328125, "step": 5899 }, { "epoch": 0.39881032851155873, "grad_norm": 1.1290342807769775, "learning_rate": 2.0644855104518465e-05, "loss": 0.21893310546875, "step": 5900 }, { "epoch": 0.3988779234824929, "grad_norm": 1.1252005100250244, "learning_rate": 2.0641797992021753e-05, "loss": 0.317413330078125, "step": 5901 }, { "epoch": 0.3989455184534271, "grad_norm": 1.116822600364685, "learning_rate": 2.0638740606539764e-05, "loss": 0.206512451171875, "step": 5902 }, { "epoch": 0.39901311342436124, "grad_norm": 1.5972107648849487, "learning_rate": 2.0635682948220442e-05, "loss": 0.2935791015625, "step": 5903 }, { "epoch": 0.3990807083952954, "grad_norm": 0.8844590783119202, "learning_rate": 2.0632625017211728e-05, "loss": 0.22442626953125, "step": 5904 }, { "epoch": 0.39914830336622953, "grad_norm": 0.8565676808357239, "learning_rate": 2.062956681366159e-05, "loss": 0.2317962646484375, "step": 5905 }, { "epoch": 0.3992158983371637, "grad_norm": 1.2613928318023682, "learning_rate": 2.0626508337717994e-05, "loss": 0.261199951171875, "step": 5906 }, { "epoch": 0.39928349330809787, "grad_norm": 0.8774357438087463, "learning_rate": 2.062344958952894e-05, "loss": 0.2208251953125, "step": 5907 }, { "epoch": 0.39935108827903204, "grad_norm": 1.1115033626556396, "learning_rate": 2.0620390569242423e-05, "loss": 0.232574462890625, "step": 5908 }, { "epoch": 0.3994186832499662, "grad_norm": 0.6248216032981873, "learning_rate": 2.0617331277006453e-05, "loss": 0.151336669921875, "step": 5909 }, { "epoch": 0.3994862782209004, "grad_norm": 1.5433728694915771, "learning_rate": 2.0614271712969062e-05, "loss": 0.23012542724609375, "step": 5910 }, { "epoch": 0.39955387319183455, "grad_norm": 1.6223838329315186, "learning_rate": 2.0611211877278295e-05, "loss": 0.29278564453125, "step": 5911 }, { "epoch": 0.39962146816276867, "grad_norm": 1.242956519126892, "learning_rate": 2.06081517700822e-05, "loss": 0.275543212890625, "step": 5912 }, { "epoch": 0.39968906313370284, "grad_norm": 0.8176170587539673, "learning_rate": 2.0605091391528843e-05, "loss": 0.22210693359375, "step": 5913 }, { "epoch": 0.399756658104637, "grad_norm": 1.2262320518493652, "learning_rate": 2.060203074176631e-05, "loss": 0.196044921875, "step": 5914 }, { "epoch": 0.3998242530755712, "grad_norm": 0.8246135711669922, "learning_rate": 2.059896982094269e-05, "loss": 0.207489013671875, "step": 5915 }, { "epoch": 0.39989184804650535, "grad_norm": 0.9946831464767456, "learning_rate": 2.059590862920609e-05, "loss": 0.258697509765625, "step": 5916 }, { "epoch": 0.3999594430174395, "grad_norm": 2.006618022918701, "learning_rate": 2.059284716670463e-05, "loss": 0.301971435546875, "step": 5917 }, { "epoch": 0.4000270379883737, "grad_norm": 0.6776373386383057, "learning_rate": 2.0589785433586445e-05, "loss": 0.1875324249267578, "step": 5918 }, { "epoch": 0.4000946329593078, "grad_norm": 2.199218511581421, "learning_rate": 2.0586723429999678e-05, "loss": 0.2457122802734375, "step": 5919 }, { "epoch": 0.400162227930242, "grad_norm": 0.9963352680206299, "learning_rate": 2.0583661156092483e-05, "loss": 0.205322265625, "step": 5920 }, { "epoch": 0.40022982290117615, "grad_norm": 1.157022476196289, "learning_rate": 2.058059861201304e-05, "loss": 0.30584716796875, "step": 5921 }, { "epoch": 0.4002974178721103, "grad_norm": 0.9921290278434753, "learning_rate": 2.057753579790953e-05, "loss": 0.26068115234375, "step": 5922 }, { "epoch": 0.4003650128430445, "grad_norm": 0.8914539813995361, "learning_rate": 2.0574472713930154e-05, "loss": 0.285675048828125, "step": 5923 }, { "epoch": 0.40043260781397866, "grad_norm": 0.9839207530021667, "learning_rate": 2.0571409360223118e-05, "loss": 0.18182373046875, "step": 5924 }, { "epoch": 0.40050020278491283, "grad_norm": 0.7312842011451721, "learning_rate": 2.056834573693665e-05, "loss": 0.1775054931640625, "step": 5925 }, { "epoch": 0.40056779775584694, "grad_norm": 1.6615712642669678, "learning_rate": 2.0565281844218988e-05, "loss": 0.28948974609375, "step": 5926 }, { "epoch": 0.4006353927267811, "grad_norm": 1.423863172531128, "learning_rate": 2.0562217682218375e-05, "loss": 0.26715087890625, "step": 5927 }, { "epoch": 0.4007029876977153, "grad_norm": 1.181253433227539, "learning_rate": 2.0559153251083086e-05, "loss": 0.1925506591796875, "step": 5928 }, { "epoch": 0.40077058266864946, "grad_norm": 1.068377137184143, "learning_rate": 2.0556088550961385e-05, "loss": 0.2353515625, "step": 5929 }, { "epoch": 0.4008381776395836, "grad_norm": 1.8069850206375122, "learning_rate": 2.055302358200157e-05, "loss": 0.3062744140625, "step": 5930 }, { "epoch": 0.4009057726105178, "grad_norm": 1.6685532331466675, "learning_rate": 2.054995834435194e-05, "loss": 0.251220703125, "step": 5931 }, { "epoch": 0.40097336758145197, "grad_norm": 2.946928024291992, "learning_rate": 2.0546892838160812e-05, "loss": 0.391632080078125, "step": 5932 }, { "epoch": 0.4010409625523861, "grad_norm": 1.2858659029006958, "learning_rate": 2.0543827063576513e-05, "loss": 0.2760009765625, "step": 5933 }, { "epoch": 0.40110855752332025, "grad_norm": 1.670806646347046, "learning_rate": 2.054076102074738e-05, "loss": 0.302520751953125, "step": 5934 }, { "epoch": 0.4011761524942544, "grad_norm": 1.44851553440094, "learning_rate": 2.0537694709821774e-05, "loss": 0.224365234375, "step": 5935 }, { "epoch": 0.4012437474651886, "grad_norm": 1.8773692846298218, "learning_rate": 2.0534628130948056e-05, "loss": 0.281524658203125, "step": 5936 }, { "epoch": 0.40131134243612276, "grad_norm": 2.0701792240142822, "learning_rate": 2.0531561284274613e-05, "loss": 0.337158203125, "step": 5937 }, { "epoch": 0.40137893740705693, "grad_norm": 1.091261863708496, "learning_rate": 2.0528494169949834e-05, "loss": 0.175567626953125, "step": 5938 }, { "epoch": 0.40144653237799105, "grad_norm": 1.4599019289016724, "learning_rate": 2.0525426788122127e-05, "loss": 0.29510498046875, "step": 5939 }, { "epoch": 0.4015141273489252, "grad_norm": 1.4920724630355835, "learning_rate": 2.0522359138939905e-05, "loss": 0.24945068359375, "step": 5940 }, { "epoch": 0.4015817223198594, "grad_norm": 0.9614813923835754, "learning_rate": 2.051929122255161e-05, "loss": 0.28631591796875, "step": 5941 }, { "epoch": 0.40164931729079356, "grad_norm": 1.2663644552230835, "learning_rate": 2.051622303910568e-05, "loss": 0.353271484375, "step": 5942 }, { "epoch": 0.40171691226172773, "grad_norm": 2.700648069381714, "learning_rate": 2.0513154588750575e-05, "loss": 0.3812255859375, "step": 5943 }, { "epoch": 0.4017845072326619, "grad_norm": 0.9419270753860474, "learning_rate": 2.0510085871634763e-05, "loss": 0.201324462890625, "step": 5944 }, { "epoch": 0.4018521022035961, "grad_norm": 1.0807589292526245, "learning_rate": 2.050701688790673e-05, "loss": 0.2030487060546875, "step": 5945 }, { "epoch": 0.4019196971745302, "grad_norm": 0.9629626274108887, "learning_rate": 2.0503947637714972e-05, "loss": 0.1822967529296875, "step": 5946 }, { "epoch": 0.40198729214546436, "grad_norm": 1.4671984910964966, "learning_rate": 2.0500878121208e-05, "loss": 0.316619873046875, "step": 5947 }, { "epoch": 0.40205488711639853, "grad_norm": 0.6354324221611023, "learning_rate": 2.0497808338534333e-05, "loss": 0.1841888427734375, "step": 5948 }, { "epoch": 0.4021224820873327, "grad_norm": 0.9266681671142578, "learning_rate": 2.049473828984251e-05, "loss": 0.27099609375, "step": 5949 }, { "epoch": 0.40219007705826687, "grad_norm": 0.6970189809799194, "learning_rate": 2.0491667975281076e-05, "loss": 0.1353607177734375, "step": 5950 }, { "epoch": 0.40225767202920104, "grad_norm": 1.5256977081298828, "learning_rate": 2.0488597394998596e-05, "loss": 0.165771484375, "step": 5951 }, { "epoch": 0.4023252670001352, "grad_norm": 1.120995044708252, "learning_rate": 2.0485526549143637e-05, "loss": 0.18194580078125, "step": 5952 }, { "epoch": 0.4023928619710693, "grad_norm": 1.0803825855255127, "learning_rate": 2.0482455437864788e-05, "loss": 0.3287353515625, "step": 5953 }, { "epoch": 0.4024604569420035, "grad_norm": 0.8638955354690552, "learning_rate": 2.047938406131066e-05, "loss": 0.1697998046875, "step": 5954 }, { "epoch": 0.40252805191293767, "grad_norm": 0.9913591146469116, "learning_rate": 2.0476312419629845e-05, "loss": 0.20440673828125, "step": 5955 }, { "epoch": 0.40259564688387184, "grad_norm": 1.4062457084655762, "learning_rate": 2.047324051297098e-05, "loss": 0.2899169921875, "step": 5956 }, { "epoch": 0.402663241854806, "grad_norm": 1.9531978368759155, "learning_rate": 2.04701683414827e-05, "loss": 0.267822265625, "step": 5957 }, { "epoch": 0.4027308368257402, "grad_norm": 1.3000919818878174, "learning_rate": 2.046709590531366e-05, "loss": 0.283782958984375, "step": 5958 }, { "epoch": 0.40279843179667435, "grad_norm": 0.9651201367378235, "learning_rate": 2.0464023204612523e-05, "loss": 0.1431427001953125, "step": 5959 }, { "epoch": 0.40286602676760846, "grad_norm": 1.4423155784606934, "learning_rate": 2.0460950239527957e-05, "loss": 0.305572509765625, "step": 5960 }, { "epoch": 0.40293362173854264, "grad_norm": 0.6694448590278625, "learning_rate": 2.045787701020866e-05, "loss": 0.161346435546875, "step": 5961 }, { "epoch": 0.4030012167094768, "grad_norm": 1.4971789121627808, "learning_rate": 2.0454803516803334e-05, "loss": 0.2777099609375, "step": 5962 }, { "epoch": 0.403068811680411, "grad_norm": 1.1630988121032715, "learning_rate": 2.0451729759460686e-05, "loss": 0.178741455078125, "step": 5963 }, { "epoch": 0.40313640665134515, "grad_norm": 1.0414422750473022, "learning_rate": 2.0448655738329448e-05, "loss": 0.2129058837890625, "step": 5964 }, { "epoch": 0.4032040016222793, "grad_norm": 1.174791693687439, "learning_rate": 2.044558145355836e-05, "loss": 0.28076171875, "step": 5965 }, { "epoch": 0.4032715965932135, "grad_norm": 2.3374807834625244, "learning_rate": 2.0442506905296186e-05, "loss": 0.26348876953125, "step": 5966 }, { "epoch": 0.4033391915641476, "grad_norm": 1.0966997146606445, "learning_rate": 2.0439432093691673e-05, "loss": 0.2350006103515625, "step": 5967 }, { "epoch": 0.4034067865350818, "grad_norm": 1.2217282056808472, "learning_rate": 2.0436357018893607e-05, "loss": 0.2494659423828125, "step": 5968 }, { "epoch": 0.40347438150601594, "grad_norm": 1.2231875658035278, "learning_rate": 2.0433281681050784e-05, "loss": 0.243438720703125, "step": 5969 }, { "epoch": 0.4035419764769501, "grad_norm": 1.2632397413253784, "learning_rate": 2.0430206080312e-05, "loss": 0.22509765625, "step": 5970 }, { "epoch": 0.4036095714478843, "grad_norm": 1.2452598810195923, "learning_rate": 2.0427130216826077e-05, "loss": 0.2053070068359375, "step": 5971 }, { "epoch": 0.40367716641881846, "grad_norm": 1.2257744073867798, "learning_rate": 2.0424054090741844e-05, "loss": 0.242462158203125, "step": 5972 }, { "epoch": 0.4037447613897526, "grad_norm": 1.2635794878005981, "learning_rate": 2.042097770220814e-05, "loss": 0.28228759765625, "step": 5973 }, { "epoch": 0.40381235636068674, "grad_norm": 1.3988806009292603, "learning_rate": 2.0417901051373825e-05, "loss": 0.329345703125, "step": 5974 }, { "epoch": 0.4038799513316209, "grad_norm": 2.163639545440674, "learning_rate": 2.0414824138387764e-05, "loss": 0.245147705078125, "step": 5975 }, { "epoch": 0.4039475463025551, "grad_norm": 1.4176586866378784, "learning_rate": 2.0411746963398834e-05, "loss": 0.21533203125, "step": 5976 }, { "epoch": 0.40401514127348925, "grad_norm": 1.2125661373138428, "learning_rate": 2.0408669526555933e-05, "loss": 0.217254638671875, "step": 5977 }, { "epoch": 0.4040827362444234, "grad_norm": 1.1443219184875488, "learning_rate": 2.0405591828007962e-05, "loss": 0.245147705078125, "step": 5978 }, { "epoch": 0.4041503312153576, "grad_norm": 0.9303746819496155, "learning_rate": 2.0402513867903845e-05, "loss": 0.24365234375, "step": 5979 }, { "epoch": 0.40421792618629176, "grad_norm": 1.4943389892578125, "learning_rate": 2.0399435646392505e-05, "loss": 0.256866455078125, "step": 5980 }, { "epoch": 0.4042855211572259, "grad_norm": 1.4597715139389038, "learning_rate": 2.039635716362289e-05, "loss": 0.363616943359375, "step": 5981 }, { "epoch": 0.40435311612816005, "grad_norm": 1.6308304071426392, "learning_rate": 2.0393278419743958e-05, "loss": 0.28448486328125, "step": 5982 }, { "epoch": 0.4044207110990942, "grad_norm": 1.4857962131500244, "learning_rate": 2.0390199414904677e-05, "loss": 0.32342529296875, "step": 5983 }, { "epoch": 0.4044883060700284, "grad_norm": 0.6746436953544617, "learning_rate": 2.0387120149254024e-05, "loss": 0.135498046875, "step": 5984 }, { "epoch": 0.40455590104096256, "grad_norm": 1.0497652292251587, "learning_rate": 2.0384040622941e-05, "loss": 0.27655029296875, "step": 5985 }, { "epoch": 0.40462349601189673, "grad_norm": 1.1146459579467773, "learning_rate": 2.03809608361146e-05, "loss": 0.232696533203125, "step": 5986 }, { "epoch": 0.4046910909828309, "grad_norm": 1.1921144723892212, "learning_rate": 2.0377880788923853e-05, "loss": 0.1453704833984375, "step": 5987 }, { "epoch": 0.404758685953765, "grad_norm": 1.1128935813903809, "learning_rate": 2.0374800481517793e-05, "loss": 0.205810546875, "step": 5988 }, { "epoch": 0.4048262809246992, "grad_norm": 1.5580182075500488, "learning_rate": 2.037171991404546e-05, "loss": 0.3167724609375, "step": 5989 }, { "epoch": 0.40489387589563336, "grad_norm": 0.8223792910575867, "learning_rate": 2.036863908665591e-05, "loss": 0.17724609375, "step": 5990 }, { "epoch": 0.40496147086656753, "grad_norm": 1.1363844871520996, "learning_rate": 2.0365557999498213e-05, "loss": 0.2233734130859375, "step": 5991 }, { "epoch": 0.4050290658375017, "grad_norm": 2.481020212173462, "learning_rate": 2.0362476652721453e-05, "loss": 0.3297576904296875, "step": 5992 }, { "epoch": 0.40509666080843587, "grad_norm": 1.4347152709960938, "learning_rate": 2.0359395046474722e-05, "loss": 0.195648193359375, "step": 5993 }, { "epoch": 0.40516425577937004, "grad_norm": 0.8126206994056702, "learning_rate": 2.0356313180907128e-05, "loss": 0.1540679931640625, "step": 5994 }, { "epoch": 0.40523185075030416, "grad_norm": 1.1713322401046753, "learning_rate": 2.0353231056167793e-05, "loss": 0.26544189453125, "step": 5995 }, { "epoch": 0.4052994457212383, "grad_norm": 1.3852280378341675, "learning_rate": 2.035014867240585e-05, "loss": 0.249359130859375, "step": 5996 }, { "epoch": 0.4053670406921725, "grad_norm": 1.1702673435211182, "learning_rate": 2.0347066029770444e-05, "loss": 0.215972900390625, "step": 5997 }, { "epoch": 0.40543463566310667, "grad_norm": 1.3395602703094482, "learning_rate": 2.0343983128410724e-05, "loss": 0.2320556640625, "step": 5998 }, { "epoch": 0.40550223063404084, "grad_norm": 1.0046063661575317, "learning_rate": 2.0340899968475867e-05, "loss": 0.17643165588378906, "step": 5999 }, { "epoch": 0.405569825604975, "grad_norm": 1.9709566831588745, "learning_rate": 2.033781655011506e-05, "loss": 0.253387451171875, "step": 6000 }, { "epoch": 0.4056374205759092, "grad_norm": 1.2754327058792114, "learning_rate": 2.033473287347749e-05, "loss": 0.28887939453125, "step": 6001 }, { "epoch": 0.4057050155468433, "grad_norm": 1.1554101705551147, "learning_rate": 2.0331648938712366e-05, "loss": 0.25860595703125, "step": 6002 }, { "epoch": 0.40577261051777747, "grad_norm": 1.3863623142242432, "learning_rate": 2.032856474596891e-05, "loss": 0.26165771484375, "step": 6003 }, { "epoch": 0.40584020548871164, "grad_norm": 0.9559834599494934, "learning_rate": 2.0325480295396356e-05, "loss": 0.253997802734375, "step": 6004 }, { "epoch": 0.4059078004596458, "grad_norm": 1.7770380973815918, "learning_rate": 2.0322395587143947e-05, "loss": 0.22235107421875, "step": 6005 }, { "epoch": 0.40597539543058, "grad_norm": 1.8650041818618774, "learning_rate": 2.0319310621360935e-05, "loss": 0.35162353515625, "step": 6006 }, { "epoch": 0.40604299040151415, "grad_norm": 0.8019053936004639, "learning_rate": 2.0316225398196594e-05, "loss": 0.239898681640625, "step": 6007 }, { "epoch": 0.40611058537244826, "grad_norm": 1.346686601638794, "learning_rate": 2.0313139917800213e-05, "loss": 0.220977783203125, "step": 6008 }, { "epoch": 0.40617818034338243, "grad_norm": 1.1837198734283447, "learning_rate": 2.0310054180321078e-05, "loss": 0.28033447265625, "step": 6009 }, { "epoch": 0.4062457753143166, "grad_norm": 1.1868997812271118, "learning_rate": 2.0306968185908502e-05, "loss": 0.273101806640625, "step": 6010 }, { "epoch": 0.4063133702852508, "grad_norm": 1.4724235534667969, "learning_rate": 2.0303881934711793e-05, "loss": 0.28021240234375, "step": 6011 }, { "epoch": 0.40638096525618495, "grad_norm": 0.8930964469909668, "learning_rate": 2.03007954268803e-05, "loss": 0.26220703125, "step": 6012 }, { "epoch": 0.4064485602271191, "grad_norm": 0.9234926700592041, "learning_rate": 2.0297708662563353e-05, "loss": 0.1862945556640625, "step": 6013 }, { "epoch": 0.4065161551980533, "grad_norm": 1.388276219367981, "learning_rate": 2.029462164191032e-05, "loss": 0.337890625, "step": 6014 }, { "epoch": 0.4065837501689874, "grad_norm": 1.2772221565246582, "learning_rate": 2.0291534365070563e-05, "loss": 0.1872406005859375, "step": 6015 }, { "epoch": 0.40665134513992157, "grad_norm": 1.1789835691452026, "learning_rate": 2.0288446832193465e-05, "loss": 0.2686767578125, "step": 6016 }, { "epoch": 0.40671894011085574, "grad_norm": 1.227388858795166, "learning_rate": 2.028535904342842e-05, "loss": 0.25634765625, "step": 6017 }, { "epoch": 0.4067865350817899, "grad_norm": 0.678359866142273, "learning_rate": 2.028227099892484e-05, "loss": 0.1572113037109375, "step": 6018 }, { "epoch": 0.4068541300527241, "grad_norm": 1.4567344188690186, "learning_rate": 2.0279182698832127e-05, "loss": 0.3048553466796875, "step": 6019 }, { "epoch": 0.40692172502365825, "grad_norm": 1.8807300329208374, "learning_rate": 2.0276094143299734e-05, "loss": 0.3516845703125, "step": 6020 }, { "epoch": 0.4069893199945924, "grad_norm": 1.3058253526687622, "learning_rate": 2.0273005332477096e-05, "loss": 0.34991455078125, "step": 6021 }, { "epoch": 0.40705691496552654, "grad_norm": 0.9135449528694153, "learning_rate": 2.0269916266513666e-05, "loss": 0.238433837890625, "step": 6022 }, { "epoch": 0.4071245099364607, "grad_norm": 1.087295413017273, "learning_rate": 2.026682694555891e-05, "loss": 0.23504638671875, "step": 6023 }, { "epoch": 0.4071921049073949, "grad_norm": 1.6945534944534302, "learning_rate": 2.0263737369762318e-05, "loss": 0.286346435546875, "step": 6024 }, { "epoch": 0.40725969987832905, "grad_norm": 1.542893648147583, "learning_rate": 2.0260647539273374e-05, "loss": 0.193359375, "step": 6025 }, { "epoch": 0.4073272948492632, "grad_norm": 1.50922429561615, "learning_rate": 2.0257557454241584e-05, "loss": 0.258697509765625, "step": 6026 }, { "epoch": 0.4073948898201974, "grad_norm": 1.350438117980957, "learning_rate": 2.025446711481647e-05, "loss": 0.2628173828125, "step": 6027 }, { "epoch": 0.40746248479113156, "grad_norm": 0.819010853767395, "learning_rate": 2.025137652114756e-05, "loss": 0.1692047119140625, "step": 6028 }, { "epoch": 0.4075300797620657, "grad_norm": 1.3785632848739624, "learning_rate": 2.0248285673384396e-05, "loss": 0.202789306640625, "step": 6029 }, { "epoch": 0.40759767473299985, "grad_norm": 0.9196876287460327, "learning_rate": 2.0245194571676533e-05, "loss": 0.2078094482421875, "step": 6030 }, { "epoch": 0.407665269703934, "grad_norm": 2.2400145530700684, "learning_rate": 2.0242103216173537e-05, "loss": 0.311981201171875, "step": 6031 }, { "epoch": 0.4077328646748682, "grad_norm": 0.9573386311531067, "learning_rate": 2.0239011607024983e-05, "loss": 0.2403564453125, "step": 6032 }, { "epoch": 0.40780045964580236, "grad_norm": 0.8971439003944397, "learning_rate": 2.0235919744380475e-05, "loss": 0.169097900390625, "step": 6033 }, { "epoch": 0.40786805461673653, "grad_norm": 1.1884047985076904, "learning_rate": 2.0232827628389597e-05, "loss": 0.196563720703125, "step": 6034 }, { "epoch": 0.4079356495876707, "grad_norm": 1.788204312324524, "learning_rate": 2.0229735259201988e-05, "loss": 0.2332763671875, "step": 6035 }, { "epoch": 0.4080032445586048, "grad_norm": 0.7288891077041626, "learning_rate": 2.0226642636967254e-05, "loss": 0.1063079833984375, "step": 6036 }, { "epoch": 0.408070839529539, "grad_norm": 1.356158971786499, "learning_rate": 2.022354976183505e-05, "loss": 0.28033447265625, "step": 6037 }, { "epoch": 0.40813843450047316, "grad_norm": 1.544531226158142, "learning_rate": 2.0220456633955023e-05, "loss": 0.27001953125, "step": 6038 }, { "epoch": 0.40820602947140733, "grad_norm": 2.014091730117798, "learning_rate": 2.0217363253476838e-05, "loss": 0.27947998046875, "step": 6039 }, { "epoch": 0.4082736244423415, "grad_norm": 1.3758786916732788, "learning_rate": 2.021426962055017e-05, "loss": 0.205413818359375, "step": 6040 }, { "epoch": 0.40834121941327567, "grad_norm": 1.284875512123108, "learning_rate": 2.0211175735324713e-05, "loss": 0.21550369262695312, "step": 6041 }, { "epoch": 0.40840881438420984, "grad_norm": 0.6357570886611938, "learning_rate": 2.0208081597950165e-05, "loss": 0.11510848999023438, "step": 6042 }, { "epoch": 0.40847640935514395, "grad_norm": 1.2546378374099731, "learning_rate": 2.0204987208576244e-05, "loss": 0.21008682250976562, "step": 6043 }, { "epoch": 0.4085440043260781, "grad_norm": 1.225319743156433, "learning_rate": 2.0201892567352665e-05, "loss": 0.2219085693359375, "step": 6044 }, { "epoch": 0.4086115992970123, "grad_norm": 1.3650866746902466, "learning_rate": 2.0198797674429177e-05, "loss": 0.2017974853515625, "step": 6045 }, { "epoch": 0.40867919426794647, "grad_norm": 1.8902174234390259, "learning_rate": 2.019570252995553e-05, "loss": 0.2568359375, "step": 6046 }, { "epoch": 0.40874678923888064, "grad_norm": 2.1424167156219482, "learning_rate": 2.0192607134081483e-05, "loss": 0.2096405029296875, "step": 6047 }, { "epoch": 0.4088143842098148, "grad_norm": 1.2559232711791992, "learning_rate": 2.0189511486956806e-05, "loss": 0.1473388671875, "step": 6048 }, { "epoch": 0.408881979180749, "grad_norm": 0.89958256483078, "learning_rate": 2.018641558873129e-05, "loss": 0.16822052001953125, "step": 6049 }, { "epoch": 0.4089495741516831, "grad_norm": 1.76108717918396, "learning_rate": 2.0183319439554737e-05, "loss": 0.2552032470703125, "step": 6050 }, { "epoch": 0.40901716912261726, "grad_norm": 1.3135465383529663, "learning_rate": 2.0180223039576954e-05, "loss": 0.2747344970703125, "step": 6051 }, { "epoch": 0.40908476409355143, "grad_norm": 1.2246102094650269, "learning_rate": 2.017712638894776e-05, "loss": 0.20110321044921875, "step": 6052 }, { "epoch": 0.4091523590644856, "grad_norm": 1.643405795097351, "learning_rate": 2.0174029487817e-05, "loss": 0.29522705078125, "step": 6053 }, { "epoch": 0.4092199540354198, "grad_norm": 2.010756731033325, "learning_rate": 2.0170932336334518e-05, "loss": 0.1650238037109375, "step": 6054 }, { "epoch": 0.40928754900635395, "grad_norm": 1.2773776054382324, "learning_rate": 2.0167834934650167e-05, "loss": 0.310638427734375, "step": 6055 }, { "epoch": 0.4093551439772881, "grad_norm": 1.5733448266983032, "learning_rate": 2.0164737282913827e-05, "loss": 0.269256591796875, "step": 6056 }, { "epoch": 0.40942273894822223, "grad_norm": 1.6426455974578857, "learning_rate": 2.0161639381275378e-05, "loss": 0.35308837890625, "step": 6057 }, { "epoch": 0.4094903339191564, "grad_norm": 1.0892711877822876, "learning_rate": 2.0158541229884714e-05, "loss": 0.27313232421875, "step": 6058 }, { "epoch": 0.4095579288900906, "grad_norm": 1.0217634439468384, "learning_rate": 2.015544282889175e-05, "loss": 0.21138763427734375, "step": 6059 }, { "epoch": 0.40962552386102474, "grad_norm": 1.8067936897277832, "learning_rate": 2.0152344178446397e-05, "loss": 0.3056640625, "step": 6060 }, { "epoch": 0.4096931188319589, "grad_norm": 1.2984715700149536, "learning_rate": 2.0149245278698586e-05, "loss": 0.30889892578125, "step": 6061 }, { "epoch": 0.4097607138028931, "grad_norm": 0.8544002175331116, "learning_rate": 2.0146146129798268e-05, "loss": 0.23382568359375, "step": 6062 }, { "epoch": 0.40982830877382725, "grad_norm": 1.0862258672714233, "learning_rate": 2.01430467318954e-05, "loss": 0.13421630859375, "step": 6063 }, { "epoch": 0.40989590374476137, "grad_norm": 1.7156412601470947, "learning_rate": 2.0139947085139946e-05, "loss": 0.30450439453125, "step": 6064 }, { "epoch": 0.40996349871569554, "grad_norm": 0.9321296811103821, "learning_rate": 2.013684718968188e-05, "loss": 0.217010498046875, "step": 6065 }, { "epoch": 0.4100310936866297, "grad_norm": 1.705073356628418, "learning_rate": 2.0133747045671212e-05, "loss": 0.30120849609375, "step": 6066 }, { "epoch": 0.4100986886575639, "grad_norm": 0.9596408009529114, "learning_rate": 2.013064665325793e-05, "loss": 0.1818389892578125, "step": 6067 }, { "epoch": 0.41016628362849805, "grad_norm": 1.119802713394165, "learning_rate": 2.0127546012592055e-05, "loss": 0.26470947265625, "step": 6068 }, { "epoch": 0.4102338785994322, "grad_norm": 0.893680214881897, "learning_rate": 2.012444512382362e-05, "loss": 0.287200927734375, "step": 6069 }, { "epoch": 0.41030147357036634, "grad_norm": 0.7145435810089111, "learning_rate": 2.0121343987102657e-05, "loss": 0.146728515625, "step": 6070 }, { "epoch": 0.4103690685413005, "grad_norm": 1.586277723312378, "learning_rate": 2.011824260257923e-05, "loss": 0.3062744140625, "step": 6071 }, { "epoch": 0.4104366635122347, "grad_norm": 0.8921360373497009, "learning_rate": 2.0115140970403387e-05, "loss": 0.17048263549804688, "step": 6072 }, { "epoch": 0.41050425848316885, "grad_norm": 1.179292917251587, "learning_rate": 2.0112039090725214e-05, "loss": 0.24249267578125, "step": 6073 }, { "epoch": 0.410571853454103, "grad_norm": 1.1738319396972656, "learning_rate": 2.01089369636948e-05, "loss": 0.2135162353515625, "step": 6074 }, { "epoch": 0.4106394484250372, "grad_norm": 0.7937206625938416, "learning_rate": 2.010583458946225e-05, "loss": 0.2119903564453125, "step": 6075 }, { "epoch": 0.41070704339597136, "grad_norm": 1.1086674928665161, "learning_rate": 2.010273196817766e-05, "loss": 0.1820068359375, "step": 6076 }, { "epoch": 0.4107746383669055, "grad_norm": 1.2353638410568237, "learning_rate": 2.0099629099991168e-05, "loss": 0.1352996826171875, "step": 6077 }, { "epoch": 0.41084223333783965, "grad_norm": 1.6571075916290283, "learning_rate": 2.0096525985052905e-05, "loss": 0.26519775390625, "step": 6078 }, { "epoch": 0.4109098283087738, "grad_norm": 1.3557405471801758, "learning_rate": 2.009342262351302e-05, "loss": 0.214447021484375, "step": 6079 }, { "epoch": 0.410977423279708, "grad_norm": 1.6862713098526, "learning_rate": 2.0090319015521674e-05, "loss": 0.20318603515625, "step": 6080 }, { "epoch": 0.41104501825064216, "grad_norm": 1.1780375242233276, "learning_rate": 2.008721516122904e-05, "loss": 0.2545166015625, "step": 6081 }, { "epoch": 0.41111261322157633, "grad_norm": 1.13499116897583, "learning_rate": 2.0084111060785293e-05, "loss": 0.262725830078125, "step": 6082 }, { "epoch": 0.4111802081925105, "grad_norm": 0.8692522048950195, "learning_rate": 2.0081006714340645e-05, "loss": 0.22894287109375, "step": 6083 }, { "epoch": 0.4112478031634446, "grad_norm": 2.4413840770721436, "learning_rate": 2.0077902122045286e-05, "loss": 0.3190765380859375, "step": 6084 }, { "epoch": 0.4113153981343788, "grad_norm": 1.555323600769043, "learning_rate": 2.0074797284049444e-05, "loss": 0.29022216796875, "step": 6085 }, { "epoch": 0.41138299310531296, "grad_norm": 2.096398115158081, "learning_rate": 2.007169220050335e-05, "loss": 0.3133544921875, "step": 6086 }, { "epoch": 0.4114505880762471, "grad_norm": 1.1058900356292725, "learning_rate": 2.0068586871557246e-05, "loss": 0.243743896484375, "step": 6087 }, { "epoch": 0.4115181830471813, "grad_norm": 1.8782168626785278, "learning_rate": 2.0065481297361386e-05, "loss": 0.29006195068359375, "step": 6088 }, { "epoch": 0.41158577801811547, "grad_norm": 1.466001033782959, "learning_rate": 2.0062375478066044e-05, "loss": 0.31854248046875, "step": 6089 }, { "epoch": 0.41165337298904964, "grad_norm": 1.3942737579345703, "learning_rate": 2.0059269413821492e-05, "loss": 0.203369140625, "step": 6090 }, { "epoch": 0.41172096795998375, "grad_norm": 1.8220279216766357, "learning_rate": 2.005616310477802e-05, "loss": 0.31658935546875, "step": 6091 }, { "epoch": 0.4117885629309179, "grad_norm": 1.3777382373809814, "learning_rate": 2.0053056551085937e-05, "loss": 0.2479248046875, "step": 6092 }, { "epoch": 0.4118561579018521, "grad_norm": 1.359047293663025, "learning_rate": 2.0049949752895552e-05, "loss": 0.19793701171875, "step": 6093 }, { "epoch": 0.41192375287278626, "grad_norm": 0.7518420219421387, "learning_rate": 2.0046842710357192e-05, "loss": 0.16767120361328125, "step": 6094 }, { "epoch": 0.41199134784372043, "grad_norm": 3.9311883449554443, "learning_rate": 2.00437354236212e-05, "loss": 0.3199462890625, "step": 6095 }, { "epoch": 0.4120589428146546, "grad_norm": 1.5445666313171387, "learning_rate": 2.0040627892837914e-05, "loss": 0.31512451171875, "step": 6096 }, { "epoch": 0.4121265377855888, "grad_norm": 1.294189691543579, "learning_rate": 2.003752011815771e-05, "loss": 0.27099609375, "step": 6097 }, { "epoch": 0.4121941327565229, "grad_norm": 1.8518565893173218, "learning_rate": 2.003441209973095e-05, "loss": 0.27923583984375, "step": 6098 }, { "epoch": 0.41226172772745706, "grad_norm": 1.315676212310791, "learning_rate": 2.0031303837708027e-05, "loss": 0.191162109375, "step": 6099 }, { "epoch": 0.41232932269839123, "grad_norm": 1.6784570217132568, "learning_rate": 2.0028195332239335e-05, "loss": 0.3115234375, "step": 6100 }, { "epoch": 0.4123969176693254, "grad_norm": 1.4314665794372559, "learning_rate": 2.002508658347528e-05, "loss": 0.2008819580078125, "step": 6101 }, { "epoch": 0.4124645126402596, "grad_norm": 1.3248077630996704, "learning_rate": 2.0021977591566285e-05, "loss": 0.2630615234375, "step": 6102 }, { "epoch": 0.41253210761119374, "grad_norm": 2.915832042694092, "learning_rate": 2.0018868356662784e-05, "loss": 0.30340576171875, "step": 6103 }, { "epoch": 0.4125997025821279, "grad_norm": 1.0678033828735352, "learning_rate": 2.0015758878915217e-05, "loss": 0.2962646484375, "step": 6104 }, { "epoch": 0.41266729755306203, "grad_norm": 0.4963037669658661, "learning_rate": 2.001264915847405e-05, "loss": 0.1197052001953125, "step": 6105 }, { "epoch": 0.4127348925239962, "grad_norm": 1.6947393417358398, "learning_rate": 2.000953919548974e-05, "loss": 0.244140625, "step": 6106 }, { "epoch": 0.41280248749493037, "grad_norm": 0.5153563618659973, "learning_rate": 2.000642899011277e-05, "loss": 0.1214599609375, "step": 6107 }, { "epoch": 0.41287008246586454, "grad_norm": 1.1963406801223755, "learning_rate": 2.0003318542493633e-05, "loss": 0.2935791015625, "step": 6108 }, { "epoch": 0.4129376774367987, "grad_norm": 1.4038121700286865, "learning_rate": 2.0000207852782825e-05, "loss": 0.266693115234375, "step": 6109 }, { "epoch": 0.4130052724077329, "grad_norm": 0.830227255821228, "learning_rate": 1.9997096921130865e-05, "loss": 0.1963348388671875, "step": 6110 }, { "epoch": 0.41307286737866705, "grad_norm": 1.1655670404434204, "learning_rate": 1.999398574768828e-05, "loss": 0.2674560546875, "step": 6111 }, { "epoch": 0.41314046234960117, "grad_norm": 1.5806721448898315, "learning_rate": 1.999087433260561e-05, "loss": 0.17575836181640625, "step": 6112 }, { "epoch": 0.41320805732053534, "grad_norm": 2.2673325538635254, "learning_rate": 1.99877626760334e-05, "loss": 0.300201416015625, "step": 6113 }, { "epoch": 0.4132756522914695, "grad_norm": 1.9543160200119019, "learning_rate": 1.9984650778122214e-05, "loss": 0.23388671875, "step": 6114 }, { "epoch": 0.4133432472624037, "grad_norm": 1.7389976978302002, "learning_rate": 1.9981538639022624e-05, "loss": 0.2969970703125, "step": 6115 }, { "epoch": 0.41341084223333785, "grad_norm": 1.3675373792648315, "learning_rate": 1.997842625888521e-05, "loss": 0.253692626953125, "step": 6116 }, { "epoch": 0.413478437204272, "grad_norm": 1.4118887186050415, "learning_rate": 1.9975313637860577e-05, "loss": 0.160247802734375, "step": 6117 }, { "epoch": 0.4135460321752062, "grad_norm": 1.718183994293213, "learning_rate": 1.9972200776099337e-05, "loss": 0.2823944091796875, "step": 6118 }, { "epoch": 0.4136136271461403, "grad_norm": 1.4682506322860718, "learning_rate": 1.996908767375209e-05, "loss": 0.339569091796875, "step": 6119 }, { "epoch": 0.4136812221170745, "grad_norm": 1.9140069484710693, "learning_rate": 1.9965974330969485e-05, "loss": 0.25616455078125, "step": 6120 }, { "epoch": 0.41374881708800865, "grad_norm": 1.7593761682510376, "learning_rate": 1.9962860747902153e-05, "loss": 0.2567138671875, "step": 6121 }, { "epoch": 0.4138164120589428, "grad_norm": 1.2512421607971191, "learning_rate": 1.9959746924700763e-05, "loss": 0.19366455078125, "step": 6122 }, { "epoch": 0.413884007029877, "grad_norm": 1.7258222103118896, "learning_rate": 1.9956632861515966e-05, "loss": 0.2594757080078125, "step": 6123 }, { "epoch": 0.41395160200081116, "grad_norm": 2.439100980758667, "learning_rate": 1.9953518558498445e-05, "loss": 0.33306884765625, "step": 6124 }, { "epoch": 0.41401919697174533, "grad_norm": 0.8213671445846558, "learning_rate": 1.9950404015798895e-05, "loss": 0.209808349609375, "step": 6125 }, { "epoch": 0.41408679194267944, "grad_norm": 1.4890186786651611, "learning_rate": 1.994728923356801e-05, "loss": 0.25177001953125, "step": 6126 }, { "epoch": 0.4141543869136136, "grad_norm": 1.8810786008834839, "learning_rate": 1.994417421195651e-05, "loss": 0.2183837890625, "step": 6127 }, { "epoch": 0.4142219818845478, "grad_norm": 1.2622624635696411, "learning_rate": 1.9941058951115105e-05, "loss": 0.173583984375, "step": 6128 }, { "epoch": 0.41428957685548196, "grad_norm": 1.6981801986694336, "learning_rate": 1.9937943451194548e-05, "loss": 0.33087158203125, "step": 6129 }, { "epoch": 0.4143571718264161, "grad_norm": 2.871345281600952, "learning_rate": 1.9934827712345576e-05, "loss": 0.320220947265625, "step": 6130 }, { "epoch": 0.4144247667973503, "grad_norm": 1.0015010833740234, "learning_rate": 1.993171173471895e-05, "loss": 0.248016357421875, "step": 6131 }, { "epoch": 0.4144923617682844, "grad_norm": 2.2701501846313477, "learning_rate": 1.992859551846544e-05, "loss": 0.30169677734375, "step": 6132 }, { "epoch": 0.4145599567392186, "grad_norm": 1.2685784101486206, "learning_rate": 1.9925479063735824e-05, "loss": 0.14535140991210938, "step": 6133 }, { "epoch": 0.41462755171015275, "grad_norm": 1.2585299015045166, "learning_rate": 1.9922362370680903e-05, "loss": 0.2527313232421875, "step": 6134 }, { "epoch": 0.4146951466810869, "grad_norm": 1.6157476902008057, "learning_rate": 1.991924543945148e-05, "loss": 0.24237060546875, "step": 6135 }, { "epoch": 0.4147627416520211, "grad_norm": 1.2682546377182007, "learning_rate": 1.9916128270198368e-05, "loss": 0.2779998779296875, "step": 6136 }, { "epoch": 0.41483033662295526, "grad_norm": 1.1489125490188599, "learning_rate": 1.99130108630724e-05, "loss": 0.221588134765625, "step": 6137 }, { "epoch": 0.41489793159388944, "grad_norm": 1.4585201740264893, "learning_rate": 1.9909893218224406e-05, "loss": 0.3316650390625, "step": 6138 }, { "epoch": 0.41496552656482355, "grad_norm": 1.284668207168579, "learning_rate": 1.9906775335805252e-05, "loss": 0.1999359130859375, "step": 6139 }, { "epoch": 0.4150331215357577, "grad_norm": 1.1023552417755127, "learning_rate": 1.9903657215965788e-05, "loss": 0.210968017578125, "step": 6140 }, { "epoch": 0.4151007165066919, "grad_norm": 1.1189416646957397, "learning_rate": 1.9900538858856895e-05, "loss": 0.23931884765625, "step": 6141 }, { "epoch": 0.41516831147762606, "grad_norm": 1.4410343170166016, "learning_rate": 1.9897420264629456e-05, "loss": 0.34088134765625, "step": 6142 }, { "epoch": 0.41523590644856023, "grad_norm": 0.9174370765686035, "learning_rate": 1.989430143343437e-05, "loss": 0.137359619140625, "step": 6143 }, { "epoch": 0.4153035014194944, "grad_norm": 0.9830288887023926, "learning_rate": 1.989118236542253e-05, "loss": 0.07003402709960938, "step": 6144 }, { "epoch": 0.4153710963904286, "grad_norm": 1.0234497785568237, "learning_rate": 1.9888063060744882e-05, "loss": 0.181854248046875, "step": 6145 }, { "epoch": 0.4154386913613627, "grad_norm": 1.8389570713043213, "learning_rate": 1.988494351955234e-05, "loss": 0.273345947265625, "step": 6146 }, { "epoch": 0.41550628633229686, "grad_norm": 1.9089765548706055, "learning_rate": 1.9881823741995854e-05, "loss": 0.2991943359375, "step": 6147 }, { "epoch": 0.41557388130323103, "grad_norm": 1.1509456634521484, "learning_rate": 1.9878703728226376e-05, "loss": 0.256500244140625, "step": 6148 }, { "epoch": 0.4156414762741652, "grad_norm": 1.3037883043289185, "learning_rate": 1.9875583478394868e-05, "loss": 0.254791259765625, "step": 6149 }, { "epoch": 0.41570907124509937, "grad_norm": 1.155856728553772, "learning_rate": 1.987246299265231e-05, "loss": 0.280517578125, "step": 6150 }, { "epoch": 0.41577666621603354, "grad_norm": 1.3565418720245361, "learning_rate": 1.9869342271149695e-05, "loss": 0.293670654296875, "step": 6151 }, { "epoch": 0.4158442611869677, "grad_norm": 1.197918176651001, "learning_rate": 1.9866221314038018e-05, "loss": 0.1634063720703125, "step": 6152 }, { "epoch": 0.4159118561579018, "grad_norm": 1.4152250289916992, "learning_rate": 1.986310012146829e-05, "loss": 0.3154296875, "step": 6153 }, { "epoch": 0.415979451128836, "grad_norm": 1.2712191343307495, "learning_rate": 1.9859978693591532e-05, "loss": 0.19803619384765625, "step": 6154 }, { "epoch": 0.41604704609977017, "grad_norm": 2.0410523414611816, "learning_rate": 1.9856857030558786e-05, "loss": 0.26739501953125, "step": 6155 }, { "epoch": 0.41611464107070434, "grad_norm": 2.618598461151123, "learning_rate": 1.9853735132521088e-05, "loss": 0.2990875244140625, "step": 6156 }, { "epoch": 0.4161822360416385, "grad_norm": 1.7442162036895752, "learning_rate": 1.9850612999629503e-05, "loss": 0.23272705078125, "step": 6157 }, { "epoch": 0.4162498310125727, "grad_norm": 1.67733895778656, "learning_rate": 1.9847490632035093e-05, "loss": 0.24114990234375, "step": 6158 }, { "epoch": 0.41631742598350685, "grad_norm": 1.5184326171875, "learning_rate": 1.984436802988894e-05, "loss": 0.2700042724609375, "step": 6159 }, { "epoch": 0.41638502095444097, "grad_norm": 0.9232663512229919, "learning_rate": 1.9841245193342137e-05, "loss": 0.198883056640625, "step": 6160 }, { "epoch": 0.41645261592537514, "grad_norm": 1.0120350122451782, "learning_rate": 1.9838122122545782e-05, "loss": 0.183258056640625, "step": 6161 }, { "epoch": 0.4165202108963093, "grad_norm": 1.204054594039917, "learning_rate": 1.9834998817650987e-05, "loss": 0.24298095703125, "step": 6162 }, { "epoch": 0.4165878058672435, "grad_norm": 1.4425126314163208, "learning_rate": 1.9831875278808888e-05, "loss": 0.1478118896484375, "step": 6163 }, { "epoch": 0.41665540083817765, "grad_norm": 1.1256985664367676, "learning_rate": 1.9828751506170614e-05, "loss": 0.1551055908203125, "step": 6164 }, { "epoch": 0.4167229958091118, "grad_norm": 2.344109058380127, "learning_rate": 1.9825627499887308e-05, "loss": 0.299041748046875, "step": 6165 }, { "epoch": 0.416790590780046, "grad_norm": 1.539396047592163, "learning_rate": 1.9822503260110134e-05, "loss": 0.232147216796875, "step": 6166 }, { "epoch": 0.4168581857509801, "grad_norm": 0.7146884202957153, "learning_rate": 1.9819378786990263e-05, "loss": 0.176666259765625, "step": 6167 }, { "epoch": 0.4169257807219143, "grad_norm": 1.5129249095916748, "learning_rate": 1.9816254080678875e-05, "loss": 0.34259033203125, "step": 6168 }, { "epoch": 0.41699337569284844, "grad_norm": 1.2434512376785278, "learning_rate": 1.981312914132716e-05, "loss": 0.255157470703125, "step": 6169 }, { "epoch": 0.4170609706637826, "grad_norm": 2.214332342147827, "learning_rate": 1.9810003969086326e-05, "loss": 0.3245849609375, "step": 6170 }, { "epoch": 0.4171285656347168, "grad_norm": 0.8397699594497681, "learning_rate": 1.980687856410759e-05, "loss": 0.121856689453125, "step": 6171 }, { "epoch": 0.41719616060565096, "grad_norm": 0.7609497308731079, "learning_rate": 1.9803752926542172e-05, "loss": 0.144195556640625, "step": 6172 }, { "epoch": 0.4172637555765851, "grad_norm": 0.926816463470459, "learning_rate": 1.980062705654131e-05, "loss": 0.25299072265625, "step": 6173 }, { "epoch": 0.41733135054751924, "grad_norm": 1.7181446552276611, "learning_rate": 1.979750095425626e-05, "loss": 0.3486328125, "step": 6174 }, { "epoch": 0.4173989455184534, "grad_norm": 0.7924807667732239, "learning_rate": 1.979437461983828e-05, "loss": 0.167236328125, "step": 6175 }, { "epoch": 0.4174665404893876, "grad_norm": 1.3990947008132935, "learning_rate": 1.979124805343864e-05, "loss": 0.2038726806640625, "step": 6176 }, { "epoch": 0.41753413546032175, "grad_norm": 1.452174186706543, "learning_rate": 1.9788121255208623e-05, "loss": 0.2376708984375, "step": 6177 }, { "epoch": 0.4176017304312559, "grad_norm": 1.5855048894882202, "learning_rate": 1.978499422529952e-05, "loss": 0.25177001953125, "step": 6178 }, { "epoch": 0.4176693254021901, "grad_norm": 1.5634182691574097, "learning_rate": 1.978186696386264e-05, "loss": 0.224029541015625, "step": 6179 }, { "epoch": 0.41773692037312427, "grad_norm": 1.4585734605789185, "learning_rate": 1.9778739471049297e-05, "loss": 0.318359375, "step": 6180 }, { "epoch": 0.4178045153440584, "grad_norm": 1.1022707223892212, "learning_rate": 1.9775611747010822e-05, "loss": 0.20721435546875, "step": 6181 }, { "epoch": 0.41787211031499255, "grad_norm": 1.115992784500122, "learning_rate": 1.9772483791898547e-05, "loss": 0.2388916015625, "step": 6182 }, { "epoch": 0.4179397052859267, "grad_norm": 1.533170461654663, "learning_rate": 1.976935560586383e-05, "loss": 0.208587646484375, "step": 6183 }, { "epoch": 0.4180073002568609, "grad_norm": 2.0190136432647705, "learning_rate": 1.976622718905803e-05, "loss": 0.274871826171875, "step": 6184 }, { "epoch": 0.41807489522779506, "grad_norm": 1.7613736391067505, "learning_rate": 1.9763098541632516e-05, "loss": 0.2107696533203125, "step": 6185 }, { "epoch": 0.41814249019872923, "grad_norm": 1.6819629669189453, "learning_rate": 1.9759969663738672e-05, "loss": 0.28326416015625, "step": 6186 }, { "epoch": 0.4182100851696634, "grad_norm": 1.6829081773757935, "learning_rate": 1.975684055552789e-05, "loss": 0.226959228515625, "step": 6187 }, { "epoch": 0.4182776801405975, "grad_norm": 1.9290728569030762, "learning_rate": 1.9753711217151587e-05, "loss": 0.324951171875, "step": 6188 }, { "epoch": 0.4183452751115317, "grad_norm": 1.9412890672683716, "learning_rate": 1.975058164876117e-05, "loss": 0.26873779296875, "step": 6189 }, { "epoch": 0.41841287008246586, "grad_norm": 1.4452954530715942, "learning_rate": 1.9747451850508065e-05, "loss": 0.2977294921875, "step": 6190 }, { "epoch": 0.41848046505340003, "grad_norm": 0.9764143824577332, "learning_rate": 1.9744321822543725e-05, "loss": 0.2217254638671875, "step": 6191 }, { "epoch": 0.4185480600243342, "grad_norm": 1.3086576461791992, "learning_rate": 1.9741191565019584e-05, "loss": 0.262603759765625, "step": 6192 }, { "epoch": 0.41861565499526837, "grad_norm": 0.989229679107666, "learning_rate": 1.973806107808711e-05, "loss": 0.226837158203125, "step": 6193 }, { "epoch": 0.41868324996620254, "grad_norm": 1.056758165359497, "learning_rate": 1.9734930361897776e-05, "loss": 0.18108749389648438, "step": 6194 }, { "epoch": 0.41875084493713666, "grad_norm": 1.5795515775680542, "learning_rate": 1.9731799416603062e-05, "loss": 0.33074951171875, "step": 6195 }, { "epoch": 0.4188184399080708, "grad_norm": 1.4752593040466309, "learning_rate": 1.9728668242354472e-05, "loss": 0.2525177001953125, "step": 6196 }, { "epoch": 0.418886034879005, "grad_norm": 1.8315140008926392, "learning_rate": 1.9725536839303502e-05, "loss": 0.213897705078125, "step": 6197 }, { "epoch": 0.41895362984993917, "grad_norm": 0.986625075340271, "learning_rate": 1.972240520760167e-05, "loss": 0.298492431640625, "step": 6198 }, { "epoch": 0.41902122482087334, "grad_norm": 1.562512993812561, "learning_rate": 1.971927334740051e-05, "loss": 0.29559326171875, "step": 6199 }, { "epoch": 0.4190888197918075, "grad_norm": 1.0643670558929443, "learning_rate": 1.971614125885155e-05, "loss": 0.207000732421875, "step": 6200 }, { "epoch": 0.4191564147627416, "grad_norm": 1.0856797695159912, "learning_rate": 1.9713008942106352e-05, "loss": 0.2407989501953125, "step": 6201 }, { "epoch": 0.4192240097336758, "grad_norm": 1.0602482557296753, "learning_rate": 1.970987639731647e-05, "loss": 0.206573486328125, "step": 6202 }, { "epoch": 0.41929160470460997, "grad_norm": 0.9972699880599976, "learning_rate": 1.9706743624633476e-05, "loss": 0.217071533203125, "step": 6203 }, { "epoch": 0.41935919967554414, "grad_norm": 1.0020431280136108, "learning_rate": 1.9703610624208956e-05, "loss": 0.25555419921875, "step": 6204 }, { "epoch": 0.4194267946464783, "grad_norm": 0.7667901515960693, "learning_rate": 1.97004773961945e-05, "loss": 0.18193817138671875, "step": 6205 }, { "epoch": 0.4194943896174125, "grad_norm": 1.1732581853866577, "learning_rate": 1.969734394074172e-05, "loss": 0.217041015625, "step": 6206 }, { "epoch": 0.41956198458834665, "grad_norm": 0.4325636029243469, "learning_rate": 1.9694210258002227e-05, "loss": 0.07251739501953125, "step": 6207 }, { "epoch": 0.41962957955928076, "grad_norm": 0.936020016670227, "learning_rate": 1.9691076348127644e-05, "loss": 0.205230712890625, "step": 6208 }, { "epoch": 0.41969717453021493, "grad_norm": 0.4421731233596802, "learning_rate": 1.968794221126962e-05, "loss": 0.078643798828125, "step": 6209 }, { "epoch": 0.4197647695011491, "grad_norm": 2.3156821727752686, "learning_rate": 1.9684807847579796e-05, "loss": 0.273101806640625, "step": 6210 }, { "epoch": 0.4198323644720833, "grad_norm": 1.7198519706726074, "learning_rate": 1.968167325720983e-05, "loss": 0.299102783203125, "step": 6211 }, { "epoch": 0.41989995944301745, "grad_norm": 2.373079299926758, "learning_rate": 1.96785384403114e-05, "loss": 0.281402587890625, "step": 6212 }, { "epoch": 0.4199675544139516, "grad_norm": 1.2585595846176147, "learning_rate": 1.9675403397036187e-05, "loss": 0.22662353515625, "step": 6213 }, { "epoch": 0.4200351493848858, "grad_norm": 0.8700026273727417, "learning_rate": 1.967226812753588e-05, "loss": 0.1516571044921875, "step": 6214 }, { "epoch": 0.4201027443558199, "grad_norm": 1.2826523780822754, "learning_rate": 1.9669132631962183e-05, "loss": 0.23638916015625, "step": 6215 }, { "epoch": 0.42017033932675407, "grad_norm": 0.7383918166160583, "learning_rate": 1.966599691046681e-05, "loss": 0.1478424072265625, "step": 6216 }, { "epoch": 0.42023793429768824, "grad_norm": 1.2702152729034424, "learning_rate": 1.966286096320149e-05, "loss": 0.15406036376953125, "step": 6217 }, { "epoch": 0.4203055292686224, "grad_norm": 0.9600173234939575, "learning_rate": 1.9659724790317962e-05, "loss": 0.2256011962890625, "step": 6218 }, { "epoch": 0.4203731242395566, "grad_norm": 1.5732704401016235, "learning_rate": 1.965658839196797e-05, "loss": 0.31170654296875, "step": 6219 }, { "epoch": 0.42044071921049075, "grad_norm": 1.1478831768035889, "learning_rate": 1.9653451768303268e-05, "loss": 0.21710205078125, "step": 6220 }, { "epoch": 0.4205083141814249, "grad_norm": 1.2221521139144897, "learning_rate": 1.9650314919475632e-05, "loss": 0.306121826171875, "step": 6221 }, { "epoch": 0.42057590915235904, "grad_norm": 1.047965407371521, "learning_rate": 1.964717784563684e-05, "loss": 0.1796875, "step": 6222 }, { "epoch": 0.4206435041232932, "grad_norm": 1.4962557554244995, "learning_rate": 1.9644040546938688e-05, "loss": 0.190032958984375, "step": 6223 }, { "epoch": 0.4207110990942274, "grad_norm": 1.2900410890579224, "learning_rate": 1.964090302353297e-05, "loss": 0.2647705078125, "step": 6224 }, { "epoch": 0.42077869406516155, "grad_norm": 1.3902300596237183, "learning_rate": 1.96377652755715e-05, "loss": 0.274505615234375, "step": 6225 }, { "epoch": 0.4208462890360957, "grad_norm": 1.8137766122817993, "learning_rate": 1.963462730320611e-05, "loss": 0.214019775390625, "step": 6226 }, { "epoch": 0.4209138840070299, "grad_norm": 1.146294355392456, "learning_rate": 1.9631489106588624e-05, "loss": 0.260589599609375, "step": 6227 }, { "epoch": 0.42098147897796406, "grad_norm": 1.3007702827453613, "learning_rate": 1.962835068587089e-05, "loss": 0.2081146240234375, "step": 6228 }, { "epoch": 0.4210490739488982, "grad_norm": 1.3325881958007812, "learning_rate": 1.9625212041204767e-05, "loss": 0.23529815673828125, "step": 6229 }, { "epoch": 0.42111666891983235, "grad_norm": 1.1061841249465942, "learning_rate": 1.9622073172742126e-05, "loss": 0.27178955078125, "step": 6230 }, { "epoch": 0.4211842638907665, "grad_norm": 1.372031331062317, "learning_rate": 1.961893408063484e-05, "loss": 0.2517242431640625, "step": 6231 }, { "epoch": 0.4212518588617007, "grad_norm": 2.0751936435699463, "learning_rate": 1.9615794765034794e-05, "loss": 0.316650390625, "step": 6232 }, { "epoch": 0.42131945383263486, "grad_norm": 0.6928516626358032, "learning_rate": 1.9612655226093893e-05, "loss": 0.1070556640625, "step": 6233 }, { "epoch": 0.42138704880356903, "grad_norm": 1.2220906019210815, "learning_rate": 1.9609515463964052e-05, "loss": 0.223419189453125, "step": 6234 }, { "epoch": 0.4214546437745032, "grad_norm": 1.554107904434204, "learning_rate": 1.9606375478797185e-05, "loss": 0.2571563720703125, "step": 6235 }, { "epoch": 0.4215222387454373, "grad_norm": 1.532523274421692, "learning_rate": 1.960323527074522e-05, "loss": 0.3486328125, "step": 6236 }, { "epoch": 0.4215898337163715, "grad_norm": 0.8512517809867859, "learning_rate": 1.960009483996011e-05, "loss": 0.1598052978515625, "step": 6237 }, { "epoch": 0.42165742868730566, "grad_norm": 1.1776847839355469, "learning_rate": 1.9596954186593802e-05, "loss": 0.3297119140625, "step": 6238 }, { "epoch": 0.42172502365823983, "grad_norm": 1.2602934837341309, "learning_rate": 1.9593813310798263e-05, "loss": 0.24420166015625, "step": 6239 }, { "epoch": 0.421792618629174, "grad_norm": 0.9728042483329773, "learning_rate": 1.959067221272547e-05, "loss": 0.16448974609375, "step": 6240 }, { "epoch": 0.42186021360010817, "grad_norm": 1.6957838535308838, "learning_rate": 1.95875308925274e-05, "loss": 0.29150390625, "step": 6241 }, { "epoch": 0.42192780857104234, "grad_norm": 1.0787513256072998, "learning_rate": 1.9584389350356058e-05, "loss": 0.22784423828125, "step": 6242 }, { "epoch": 0.42199540354197645, "grad_norm": 1.2850899696350098, "learning_rate": 1.958124758636345e-05, "loss": 0.280517578125, "step": 6243 }, { "epoch": 0.4220629985129106, "grad_norm": 0.9093238711357117, "learning_rate": 1.9578105600701595e-05, "loss": 0.224822998046875, "step": 6244 }, { "epoch": 0.4221305934838448, "grad_norm": 0.9027424454689026, "learning_rate": 1.957496339352252e-05, "loss": 0.2002716064453125, "step": 6245 }, { "epoch": 0.42219818845477897, "grad_norm": 1.2999255657196045, "learning_rate": 1.9571820964978263e-05, "loss": 0.23095703125, "step": 6246 }, { "epoch": 0.42226578342571314, "grad_norm": 1.4737616777420044, "learning_rate": 1.9568678315220876e-05, "loss": 0.2952880859375, "step": 6247 }, { "epoch": 0.4223333783966473, "grad_norm": 1.5282201766967773, "learning_rate": 1.956553544440242e-05, "loss": 0.294769287109375, "step": 6248 }, { "epoch": 0.4224009733675815, "grad_norm": 1.2605537176132202, "learning_rate": 1.9562392352674967e-05, "loss": 0.246185302734375, "step": 6249 }, { "epoch": 0.4224685683385156, "grad_norm": 1.880733847618103, "learning_rate": 1.9559249040190597e-05, "loss": 0.2999725341796875, "step": 6250 }, { "epoch": 0.42253616330944976, "grad_norm": 1.3119261264801025, "learning_rate": 1.9556105507101408e-05, "loss": 0.2154541015625, "step": 6251 }, { "epoch": 0.42260375828038393, "grad_norm": 1.2853933572769165, "learning_rate": 1.9552961753559498e-05, "loss": 0.2102508544921875, "step": 6252 }, { "epoch": 0.4226713532513181, "grad_norm": 1.7578073740005493, "learning_rate": 1.9549817779716986e-05, "loss": 0.29266357421875, "step": 6253 }, { "epoch": 0.4227389482222523, "grad_norm": 1.5967637300491333, "learning_rate": 1.954667358572599e-05, "loss": 0.279571533203125, "step": 6254 }, { "epoch": 0.42280654319318645, "grad_norm": 1.403220295906067, "learning_rate": 1.9543529171738654e-05, "loss": 0.269073486328125, "step": 6255 }, { "epoch": 0.4228741381641206, "grad_norm": 1.9081462621688843, "learning_rate": 1.954038453790712e-05, "loss": 0.28240966796875, "step": 6256 }, { "epoch": 0.42294173313505473, "grad_norm": 0.9416032433509827, "learning_rate": 1.9537239684383546e-05, "loss": 0.27618408203125, "step": 6257 }, { "epoch": 0.4230093281059889, "grad_norm": 1.3854597806930542, "learning_rate": 1.9534094611320104e-05, "loss": 0.18658447265625, "step": 6258 }, { "epoch": 0.4230769230769231, "grad_norm": 2.9224095344543457, "learning_rate": 1.9530949318868962e-05, "loss": 0.3621826171875, "step": 6259 }, { "epoch": 0.42314451804785724, "grad_norm": 1.6633003950119019, "learning_rate": 1.9527803807182315e-05, "loss": 0.251708984375, "step": 6260 }, { "epoch": 0.4232121130187914, "grad_norm": 1.2062932252883911, "learning_rate": 1.952465807641236e-05, "loss": 0.279388427734375, "step": 6261 }, { "epoch": 0.4232797079897256, "grad_norm": 1.0628974437713623, "learning_rate": 1.952151212671131e-05, "loss": 0.274444580078125, "step": 6262 }, { "epoch": 0.4233473029606597, "grad_norm": 0.5383665561676025, "learning_rate": 1.9518365958231385e-05, "loss": 0.1323089599609375, "step": 6263 }, { "epoch": 0.42341489793159387, "grad_norm": 1.0115019083023071, "learning_rate": 1.9515219571124817e-05, "loss": 0.1961669921875, "step": 6264 }, { "epoch": 0.42348249290252804, "grad_norm": 1.233506202697754, "learning_rate": 1.9512072965543843e-05, "loss": 0.1771697998046875, "step": 6265 }, { "epoch": 0.4235500878734622, "grad_norm": 2.1797499656677246, "learning_rate": 1.9508926141640717e-05, "loss": 0.312835693359375, "step": 6266 }, { "epoch": 0.4236176828443964, "grad_norm": 1.6097733974456787, "learning_rate": 1.950577909956771e-05, "loss": 0.319091796875, "step": 6267 }, { "epoch": 0.42368527781533055, "grad_norm": 2.382859706878662, "learning_rate": 1.9502631839477085e-05, "loss": 0.284942626953125, "step": 6268 }, { "epoch": 0.4237528727862647, "grad_norm": 1.486810564994812, "learning_rate": 1.9499484361521134e-05, "loss": 0.360595703125, "step": 6269 }, { "epoch": 0.42382046775719884, "grad_norm": 1.311790943145752, "learning_rate": 1.9496336665852146e-05, "loss": 0.284942626953125, "step": 6270 }, { "epoch": 0.423888062728133, "grad_norm": 0.9485945701599121, "learning_rate": 1.9493188752622423e-05, "loss": 0.260528564453125, "step": 6271 }, { "epoch": 0.4239556576990672, "grad_norm": 1.3821227550506592, "learning_rate": 1.9490040621984293e-05, "loss": 0.290679931640625, "step": 6272 }, { "epoch": 0.42402325267000135, "grad_norm": 1.0914028882980347, "learning_rate": 1.9486892274090066e-05, "loss": 0.2254638671875, "step": 6273 }, { "epoch": 0.4240908476409355, "grad_norm": 1.0469906330108643, "learning_rate": 1.948374370909209e-05, "loss": 0.24017333984375, "step": 6274 }, { "epoch": 0.4241584426118697, "grad_norm": 0.5869485139846802, "learning_rate": 1.9480594927142713e-05, "loss": 0.10541534423828125, "step": 6275 }, { "epoch": 0.42422603758280386, "grad_norm": 1.5814464092254639, "learning_rate": 1.9477445928394284e-05, "loss": 0.33050537109375, "step": 6276 }, { "epoch": 0.424293632553738, "grad_norm": 2.1408071517944336, "learning_rate": 1.9474296712999182e-05, "loss": 0.251312255859375, "step": 6277 }, { "epoch": 0.42436122752467215, "grad_norm": 1.2544145584106445, "learning_rate": 1.9471147281109777e-05, "loss": 0.25225830078125, "step": 6278 }, { "epoch": 0.4244288224956063, "grad_norm": 1.4559412002563477, "learning_rate": 1.9467997632878457e-05, "loss": 0.27239990234375, "step": 6279 }, { "epoch": 0.4244964174665405, "grad_norm": 0.7540981769561768, "learning_rate": 1.9464847768457627e-05, "loss": 0.17681884765625, "step": 6280 }, { "epoch": 0.42456401243747466, "grad_norm": 1.0995256900787354, "learning_rate": 1.94616976879997e-05, "loss": 0.254852294921875, "step": 6281 }, { "epoch": 0.42463160740840883, "grad_norm": 1.5402241945266724, "learning_rate": 1.9458547391657083e-05, "loss": 0.258453369140625, "step": 6282 }, { "epoch": 0.424699202379343, "grad_norm": 1.339565396308899, "learning_rate": 1.9455396879582224e-05, "loss": 0.186920166015625, "step": 6283 }, { "epoch": 0.4247667973502771, "grad_norm": 1.9363422393798828, "learning_rate": 1.9452246151927555e-05, "loss": 0.255706787109375, "step": 6284 }, { "epoch": 0.4248343923212113, "grad_norm": 0.9271208047866821, "learning_rate": 1.9449095208845527e-05, "loss": 0.184295654296875, "step": 6285 }, { "epoch": 0.42490198729214546, "grad_norm": 1.618420124053955, "learning_rate": 1.9445944050488605e-05, "loss": 0.3338623046875, "step": 6286 }, { "epoch": 0.4249695822630796, "grad_norm": 1.7336621284484863, "learning_rate": 1.9442792677009255e-05, "loss": 0.32916259765625, "step": 6287 }, { "epoch": 0.4250371772340138, "grad_norm": 1.122049331665039, "learning_rate": 1.9439641088559976e-05, "loss": 0.261474609375, "step": 6288 }, { "epoch": 0.42510477220494797, "grad_norm": 1.4381781816482544, "learning_rate": 1.9436489285293246e-05, "loss": 0.252471923828125, "step": 6289 }, { "epoch": 0.42517236717588214, "grad_norm": 2.2425425052642822, "learning_rate": 1.9433337267361573e-05, "loss": 0.35186767578125, "step": 6290 }, { "epoch": 0.42523996214681625, "grad_norm": 0.7673970460891724, "learning_rate": 1.9430185034917474e-05, "loss": 0.198974609375, "step": 6291 }, { "epoch": 0.4253075571177504, "grad_norm": 1.1278537511825562, "learning_rate": 1.942703258811347e-05, "loss": 0.2375640869140625, "step": 6292 }, { "epoch": 0.4253751520886846, "grad_norm": 1.1150143146514893, "learning_rate": 1.9423879927102106e-05, "loss": 0.228363037109375, "step": 6293 }, { "epoch": 0.42544274705961876, "grad_norm": 1.2934448719024658, "learning_rate": 1.9420727052035913e-05, "loss": 0.227783203125, "step": 6294 }, { "epoch": 0.42551034203055293, "grad_norm": 1.008139967918396, "learning_rate": 1.9417573963067447e-05, "loss": 0.18914794921875, "step": 6295 }, { "epoch": 0.4255779370014871, "grad_norm": 1.3357460498809814, "learning_rate": 1.9414420660349288e-05, "loss": 0.29803466796875, "step": 6296 }, { "epoch": 0.4256455319724213, "grad_norm": 1.0283905267715454, "learning_rate": 1.9411267144034004e-05, "loss": 0.230194091796875, "step": 6297 }, { "epoch": 0.4257131269433554, "grad_norm": 1.5491877794265747, "learning_rate": 1.940811341427418e-05, "loss": 0.290924072265625, "step": 6298 }, { "epoch": 0.42578072191428956, "grad_norm": 0.482286661863327, "learning_rate": 1.940495947122242e-05, "loss": 0.09409332275390625, "step": 6299 }, { "epoch": 0.42584831688522373, "grad_norm": 1.0531991720199585, "learning_rate": 1.9401805315031318e-05, "loss": 0.263153076171875, "step": 6300 }, { "epoch": 0.4259159118561579, "grad_norm": 1.734246850013733, "learning_rate": 1.9398650945853504e-05, "loss": 0.257965087890625, "step": 6301 }, { "epoch": 0.4259835068270921, "grad_norm": 0.8398504853248596, "learning_rate": 1.9395496363841607e-05, "loss": 0.225738525390625, "step": 6302 }, { "epoch": 0.42605110179802624, "grad_norm": 0.9578942656517029, "learning_rate": 1.9392341569148254e-05, "loss": 0.212982177734375, "step": 6303 }, { "epoch": 0.4261186967689604, "grad_norm": 1.5874947309494019, "learning_rate": 1.9389186561926105e-05, "loss": 0.302093505859375, "step": 6304 }, { "epoch": 0.42618629173989453, "grad_norm": 1.2617064714431763, "learning_rate": 1.9386031342327812e-05, "loss": 0.233367919921875, "step": 6305 }, { "epoch": 0.4262538867108287, "grad_norm": 1.1507683992385864, "learning_rate": 1.938287591050605e-05, "loss": 0.265472412109375, "step": 6306 }, { "epoch": 0.42632148168176287, "grad_norm": 1.8611353635787964, "learning_rate": 1.937972026661349e-05, "loss": 0.27410888671875, "step": 6307 }, { "epoch": 0.42638907665269704, "grad_norm": 1.168299674987793, "learning_rate": 1.9376564410802826e-05, "loss": 0.221954345703125, "step": 6308 }, { "epoch": 0.4264566716236312, "grad_norm": 1.08597993850708, "learning_rate": 1.937340834322676e-05, "loss": 0.271209716796875, "step": 6309 }, { "epoch": 0.4265242665945654, "grad_norm": 1.1833981275558472, "learning_rate": 1.9370252064038e-05, "loss": 0.146820068359375, "step": 6310 }, { "epoch": 0.42659186156549955, "grad_norm": 1.308039903640747, "learning_rate": 1.936709557338927e-05, "loss": 0.29486083984375, "step": 6311 }, { "epoch": 0.42665945653643367, "grad_norm": 0.8360847234725952, "learning_rate": 1.936393887143329e-05, "loss": 0.185089111328125, "step": 6312 }, { "epoch": 0.42672705150736784, "grad_norm": 1.065929889678955, "learning_rate": 1.9360781958322815e-05, "loss": 0.22998046875, "step": 6313 }, { "epoch": 0.426794646478302, "grad_norm": 1.2489792108535767, "learning_rate": 1.9357624834210584e-05, "loss": 0.20123291015625, "step": 6314 }, { "epoch": 0.4268622414492362, "grad_norm": 1.190634846687317, "learning_rate": 1.935446749924937e-05, "loss": 0.252960205078125, "step": 6315 }, { "epoch": 0.42692983642017035, "grad_norm": 1.26352059841156, "learning_rate": 1.935130995359194e-05, "loss": 0.235260009765625, "step": 6316 }, { "epoch": 0.4269974313911045, "grad_norm": 0.9916363954544067, "learning_rate": 1.9348152197391065e-05, "loss": 0.22998046875, "step": 6317 }, { "epoch": 0.4270650263620387, "grad_norm": 1.6475605964660645, "learning_rate": 1.934499423079955e-05, "loss": 0.30859375, "step": 6318 }, { "epoch": 0.4271326213329728, "grad_norm": 1.105616569519043, "learning_rate": 1.9341836053970195e-05, "loss": 0.26446533203125, "step": 6319 }, { "epoch": 0.427200216303907, "grad_norm": 1.1985605955123901, "learning_rate": 1.9338677667055806e-05, "loss": 0.2308349609375, "step": 6320 }, { "epoch": 0.42726781127484115, "grad_norm": 1.171537160873413, "learning_rate": 1.9335519070209213e-05, "loss": 0.254425048828125, "step": 6321 }, { "epoch": 0.4273354062457753, "grad_norm": 1.258973240852356, "learning_rate": 1.933236026358324e-05, "loss": 0.2840576171875, "step": 6322 }, { "epoch": 0.4274030012167095, "grad_norm": 1.0967555046081543, "learning_rate": 1.9329201247330745e-05, "loss": 0.233978271484375, "step": 6323 }, { "epoch": 0.42747059618764366, "grad_norm": 1.2485054731369019, "learning_rate": 1.932604202160456e-05, "loss": 0.26226806640625, "step": 6324 }, { "epoch": 0.42753819115857783, "grad_norm": 1.089268445968628, "learning_rate": 1.932288258655756e-05, "loss": 0.223419189453125, "step": 6325 }, { "epoch": 0.42760578612951194, "grad_norm": 2.844587564468384, "learning_rate": 1.931972294234262e-05, "loss": 0.294647216796875, "step": 6326 }, { "epoch": 0.4276733811004461, "grad_norm": 0.8673184514045715, "learning_rate": 1.9316563089112622e-05, "loss": 0.216888427734375, "step": 6327 }, { "epoch": 0.4277409760713803, "grad_norm": 1.33928382396698, "learning_rate": 1.9313403027020458e-05, "loss": 0.260009765625, "step": 6328 }, { "epoch": 0.42780857104231446, "grad_norm": 1.8897751569747925, "learning_rate": 1.9310242756219027e-05, "loss": 0.298583984375, "step": 6329 }, { "epoch": 0.4278761660132486, "grad_norm": 1.5188343524932861, "learning_rate": 1.9307082276861244e-05, "loss": 0.2108001708984375, "step": 6330 }, { "epoch": 0.4279437609841828, "grad_norm": 1.0074950456619263, "learning_rate": 1.9303921589100038e-05, "loss": 0.170135498046875, "step": 6331 }, { "epoch": 0.4280113559551169, "grad_norm": 0.8036895990371704, "learning_rate": 1.9300760693088338e-05, "loss": 0.215667724609375, "step": 6332 }, { "epoch": 0.4280789509260511, "grad_norm": 0.8971067667007446, "learning_rate": 1.9297599588979095e-05, "loss": 0.1563720703125, "step": 6333 }, { "epoch": 0.42814654589698525, "grad_norm": 0.87752765417099, "learning_rate": 1.9294438276925252e-05, "loss": 0.183197021484375, "step": 6334 }, { "epoch": 0.4282141408679194, "grad_norm": 0.645721435546875, "learning_rate": 1.929127675707978e-05, "loss": 0.1754302978515625, "step": 6335 }, { "epoch": 0.4282817358388536, "grad_norm": 1.0360689163208008, "learning_rate": 1.928811502959565e-05, "loss": 0.212646484375, "step": 6336 }, { "epoch": 0.42834933080978776, "grad_norm": 1.9580779075622559, "learning_rate": 1.928495309462585e-05, "loss": 0.2863006591796875, "step": 6337 }, { "epoch": 0.42841692578072194, "grad_norm": 1.0078359842300415, "learning_rate": 1.9281790952323366e-05, "loss": 0.202392578125, "step": 6338 }, { "epoch": 0.42848452075165605, "grad_norm": 2.1014468669891357, "learning_rate": 1.9278628602841214e-05, "loss": 0.2568359375, "step": 6339 }, { "epoch": 0.4285521157225902, "grad_norm": 1.2136566638946533, "learning_rate": 1.9275466046332403e-05, "loss": 0.2366943359375, "step": 6340 }, { "epoch": 0.4286197106935244, "grad_norm": 1.0182130336761475, "learning_rate": 1.927230328294995e-05, "loss": 0.197174072265625, "step": 6341 }, { "epoch": 0.42868730566445856, "grad_norm": 1.4570573568344116, "learning_rate": 1.9269140312846897e-05, "loss": 0.3033447265625, "step": 6342 }, { "epoch": 0.42875490063539273, "grad_norm": 0.9928702712059021, "learning_rate": 1.9265977136176286e-05, "loss": 0.19361114501953125, "step": 6343 }, { "epoch": 0.4288224956063269, "grad_norm": 2.060253381729126, "learning_rate": 1.9262813753091173e-05, "loss": 0.30462646484375, "step": 6344 }, { "epoch": 0.4288900905772611, "grad_norm": 0.636197030544281, "learning_rate": 1.925965016374462e-05, "loss": 0.1491851806640625, "step": 6345 }, { "epoch": 0.4289576855481952, "grad_norm": 1.2073485851287842, "learning_rate": 1.9256486368289698e-05, "loss": 0.1993255615234375, "step": 6346 }, { "epoch": 0.42902528051912936, "grad_norm": 1.2964359521865845, "learning_rate": 1.9253322366879495e-05, "loss": 0.181793212890625, "step": 6347 }, { "epoch": 0.42909287549006353, "grad_norm": 1.3650363683700562, "learning_rate": 1.925015815966711e-05, "loss": 0.2717933654785156, "step": 6348 }, { "epoch": 0.4291604704609977, "grad_norm": 0.6393421292304993, "learning_rate": 1.924699374680564e-05, "loss": 0.10861968994140625, "step": 6349 }, { "epoch": 0.42922806543193187, "grad_norm": 3.092336416244507, "learning_rate": 1.9243829128448196e-05, "loss": 0.287109375, "step": 6350 }, { "epoch": 0.42929566040286604, "grad_norm": 1.7980836629867554, "learning_rate": 1.9240664304747914e-05, "loss": 0.3558349609375, "step": 6351 }, { "epoch": 0.4293632553738002, "grad_norm": 1.4774116277694702, "learning_rate": 1.9237499275857917e-05, "loss": 0.238311767578125, "step": 6352 }, { "epoch": 0.4294308503447343, "grad_norm": 1.2221511602401733, "learning_rate": 1.9234334041931352e-05, "loss": 0.264739990234375, "step": 6353 }, { "epoch": 0.4294984453156685, "grad_norm": 1.3203867673873901, "learning_rate": 1.9231168603121373e-05, "loss": 0.2621917724609375, "step": 6354 }, { "epoch": 0.42956604028660267, "grad_norm": 1.8758361339569092, "learning_rate": 1.922800295958114e-05, "loss": 0.33197021484375, "step": 6355 }, { "epoch": 0.42963363525753684, "grad_norm": 0.7582902908325195, "learning_rate": 1.9224837111463834e-05, "loss": 0.1261138916015625, "step": 6356 }, { "epoch": 0.429701230228471, "grad_norm": 0.880903422832489, "learning_rate": 1.9221671058922637e-05, "loss": 0.212738037109375, "step": 6357 }, { "epoch": 0.4297688251994052, "grad_norm": 0.7664320468902588, "learning_rate": 1.9218504802110734e-05, "loss": 0.1714019775390625, "step": 6358 }, { "epoch": 0.42983642017033935, "grad_norm": 0.8299577832221985, "learning_rate": 1.921533834118134e-05, "loss": 0.1760711669921875, "step": 6359 }, { "epoch": 0.42990401514127347, "grad_norm": 0.8324858546257019, "learning_rate": 1.9212171676287662e-05, "loss": 0.1781463623046875, "step": 6360 }, { "epoch": 0.42997161011220764, "grad_norm": 1.1705842018127441, "learning_rate": 1.920900480758292e-05, "loss": 0.206756591796875, "step": 6361 }, { "epoch": 0.4300392050831418, "grad_norm": 2.497544050216675, "learning_rate": 1.920583773522035e-05, "loss": 0.29888916015625, "step": 6362 }, { "epoch": 0.430106800054076, "grad_norm": 0.9050761461257935, "learning_rate": 1.9202670459353197e-05, "loss": 0.212188720703125, "step": 6363 }, { "epoch": 0.43017439502501015, "grad_norm": 2.090236186981201, "learning_rate": 1.919950298013471e-05, "loss": 0.330291748046875, "step": 6364 }, { "epoch": 0.4302419899959443, "grad_norm": 0.8948322534561157, "learning_rate": 1.9196335297718153e-05, "loss": 0.19903564453125, "step": 6365 }, { "epoch": 0.4303095849668785, "grad_norm": 1.0213615894317627, "learning_rate": 1.919316741225679e-05, "loss": 0.2129669189453125, "step": 6366 }, { "epoch": 0.4303771799378126, "grad_norm": 0.9021698236465454, "learning_rate": 1.918999932390392e-05, "loss": 0.23321533203125, "step": 6367 }, { "epoch": 0.4304447749087468, "grad_norm": 0.951430082321167, "learning_rate": 1.9186831032812822e-05, "loss": 0.215362548828125, "step": 6368 }, { "epoch": 0.43051236987968095, "grad_norm": 0.8084784746170044, "learning_rate": 1.91836625391368e-05, "loss": 0.11556243896484375, "step": 6369 }, { "epoch": 0.4305799648506151, "grad_norm": 3.1067845821380615, "learning_rate": 1.918049384302917e-05, "loss": 0.254180908203125, "step": 6370 }, { "epoch": 0.4306475598215493, "grad_norm": 1.2090317010879517, "learning_rate": 1.9177324944643244e-05, "loss": 0.24835205078125, "step": 6371 }, { "epoch": 0.43071515479248346, "grad_norm": 0.8918283581733704, "learning_rate": 1.917415584413236e-05, "loss": 0.1287994384765625, "step": 6372 }, { "epoch": 0.4307827497634176, "grad_norm": 1.1967016458511353, "learning_rate": 1.917098654164986e-05, "loss": 0.199981689453125, "step": 6373 }, { "epoch": 0.43085034473435174, "grad_norm": 0.9686679840087891, "learning_rate": 1.9167817037349094e-05, "loss": 0.1396331787109375, "step": 6374 }, { "epoch": 0.4309179397052859, "grad_norm": 1.4022773504257202, "learning_rate": 1.9164647331383417e-05, "loss": 0.199737548828125, "step": 6375 }, { "epoch": 0.4309855346762201, "grad_norm": 1.2205404043197632, "learning_rate": 1.9161477423906203e-05, "loss": 0.27886962890625, "step": 6376 }, { "epoch": 0.43105312964715425, "grad_norm": 1.3574391603469849, "learning_rate": 1.9158307315070834e-05, "loss": 0.259765625, "step": 6377 }, { "epoch": 0.4311207246180884, "grad_norm": 1.6650426387786865, "learning_rate": 1.9155137005030695e-05, "loss": 0.314697265625, "step": 6378 }, { "epoch": 0.4311883195890226, "grad_norm": 2.515030860900879, "learning_rate": 1.9151966493939185e-05, "loss": 0.328857421875, "step": 6379 }, { "epoch": 0.43125591455995677, "grad_norm": 2.316990852355957, "learning_rate": 1.9148795781949716e-05, "loss": 0.2852783203125, "step": 6380 }, { "epoch": 0.4313235095308909, "grad_norm": 1.266252875328064, "learning_rate": 1.9145624869215708e-05, "loss": 0.255615234375, "step": 6381 }, { "epoch": 0.43139110450182505, "grad_norm": 0.8020464181900024, "learning_rate": 1.9142453755890586e-05, "loss": 0.1305999755859375, "step": 6382 }, { "epoch": 0.4314586994727592, "grad_norm": 1.038453459739685, "learning_rate": 1.913928244212779e-05, "loss": 0.237884521484375, "step": 6383 }, { "epoch": 0.4315262944436934, "grad_norm": 1.1892873048782349, "learning_rate": 1.9136110928080765e-05, "loss": 0.2543792724609375, "step": 6384 }, { "epoch": 0.43159388941462756, "grad_norm": 1.1819102764129639, "learning_rate": 1.9132939213902973e-05, "loss": 0.2027130126953125, "step": 6385 }, { "epoch": 0.43166148438556173, "grad_norm": 1.8892961740493774, "learning_rate": 1.9129767299747885e-05, "loss": 0.24688720703125, "step": 6386 }, { "epoch": 0.4317290793564959, "grad_norm": 1.2114218473434448, "learning_rate": 1.9126595185768963e-05, "loss": 0.2149200439453125, "step": 6387 }, { "epoch": 0.43179667432743, "grad_norm": 1.3201879262924194, "learning_rate": 1.9123422872119706e-05, "loss": 0.21533203125, "step": 6388 }, { "epoch": 0.4318642692983642, "grad_norm": 1.3305357694625854, "learning_rate": 1.9120250358953607e-05, "loss": 0.245849609375, "step": 6389 }, { "epoch": 0.43193186426929836, "grad_norm": 0.910522997379303, "learning_rate": 1.9117077646424175e-05, "loss": 0.14940643310546875, "step": 6390 }, { "epoch": 0.43199945924023253, "grad_norm": 1.3115181922912598, "learning_rate": 1.9113904734684924e-05, "loss": 0.201904296875, "step": 6391 }, { "epoch": 0.4320670542111667, "grad_norm": 1.3681164979934692, "learning_rate": 1.9110731623889373e-05, "loss": 0.243011474609375, "step": 6392 }, { "epoch": 0.43213464918210087, "grad_norm": 1.869335412979126, "learning_rate": 1.910755831419106e-05, "loss": 0.29901123046875, "step": 6393 }, { "epoch": 0.432202244153035, "grad_norm": 1.8514024019241333, "learning_rate": 1.910438480574354e-05, "loss": 0.240234375, "step": 6394 }, { "epoch": 0.43226983912396916, "grad_norm": 0.9437934756278992, "learning_rate": 1.9101211098700348e-05, "loss": 0.1619873046875, "step": 6395 }, { "epoch": 0.43233743409490333, "grad_norm": 0.8410161137580872, "learning_rate": 1.9098037193215063e-05, "loss": 0.17523193359375, "step": 6396 }, { "epoch": 0.4324050290658375, "grad_norm": 2.0931077003479004, "learning_rate": 1.9094863089441253e-05, "loss": 0.2129974365234375, "step": 6397 }, { "epoch": 0.43247262403677167, "grad_norm": 1.1987618207931519, "learning_rate": 1.9091688787532504e-05, "loss": 0.239501953125, "step": 6398 }, { "epoch": 0.43254021900770584, "grad_norm": 1.4942649602890015, "learning_rate": 1.90885142876424e-05, "loss": 0.25640869140625, "step": 6399 }, { "epoch": 0.43260781397864, "grad_norm": 1.3543500900268555, "learning_rate": 1.908533958992455e-05, "loss": 0.207855224609375, "step": 6400 }, { "epoch": 0.4326754089495741, "grad_norm": 1.200799822807312, "learning_rate": 1.9082164694532566e-05, "loss": 0.2320556640625, "step": 6401 }, { "epoch": 0.4327430039205083, "grad_norm": 3.0894176959991455, "learning_rate": 1.9078989601620067e-05, "loss": 0.15542221069335938, "step": 6402 }, { "epoch": 0.43281059889144247, "grad_norm": 0.7717179656028748, "learning_rate": 1.9075814311340685e-05, "loss": 0.16400146484375, "step": 6403 }, { "epoch": 0.43287819386237664, "grad_norm": 1.3597486019134521, "learning_rate": 1.9072638823848055e-05, "loss": 0.230712890625, "step": 6404 }, { "epoch": 0.4329457888333108, "grad_norm": 0.7451679706573486, "learning_rate": 1.906946313929583e-05, "loss": 0.170196533203125, "step": 6405 }, { "epoch": 0.433013383804245, "grad_norm": 1.1193523406982422, "learning_rate": 1.9066287257837678e-05, "loss": 0.15325164794921875, "step": 6406 }, { "epoch": 0.43308097877517915, "grad_norm": 0.7220299243927002, "learning_rate": 1.906311117962726e-05, "loss": 0.135406494140625, "step": 6407 }, { "epoch": 0.43314857374611326, "grad_norm": 2.0806612968444824, "learning_rate": 1.905993490481825e-05, "loss": 0.35546875, "step": 6408 }, { "epoch": 0.43321616871704743, "grad_norm": 1.2523269653320312, "learning_rate": 1.9056758433564343e-05, "loss": 0.1937255859375, "step": 6409 }, { "epoch": 0.4332837636879816, "grad_norm": 1.5698562860488892, "learning_rate": 1.9053581766019238e-05, "loss": 0.28240966796875, "step": 6410 }, { "epoch": 0.4333513586589158, "grad_norm": 2.736649751663208, "learning_rate": 1.9050404902336634e-05, "loss": 0.29229736328125, "step": 6411 }, { "epoch": 0.43341895362984995, "grad_norm": 1.1483583450317383, "learning_rate": 1.9047227842670253e-05, "loss": 0.28411865234375, "step": 6412 }, { "epoch": 0.4334865486007841, "grad_norm": 1.3691962957382202, "learning_rate": 1.904405058717382e-05, "loss": 0.230743408203125, "step": 6413 }, { "epoch": 0.4335541435717183, "grad_norm": 1.1228119134902954, "learning_rate": 1.9040873136001066e-05, "loss": 0.251220703125, "step": 6414 }, { "epoch": 0.4336217385426524, "grad_norm": 2.4573025703430176, "learning_rate": 1.903769548930574e-05, "loss": 0.28204345703125, "step": 6415 }, { "epoch": 0.4336893335135866, "grad_norm": 1.1890426874160767, "learning_rate": 1.90345176472416e-05, "loss": 0.208831787109375, "step": 6416 }, { "epoch": 0.43375692848452074, "grad_norm": 0.7924277186393738, "learning_rate": 1.90313396099624e-05, "loss": 0.207916259765625, "step": 6417 }, { "epoch": 0.4338245234554549, "grad_norm": 1.0337592363357544, "learning_rate": 1.9028161377621926e-05, "loss": 0.219512939453125, "step": 6418 }, { "epoch": 0.4338921184263891, "grad_norm": 1.4370399713516235, "learning_rate": 1.9024982950373954e-05, "loss": 0.2685546875, "step": 6419 }, { "epoch": 0.43395971339732325, "grad_norm": 1.6631441116333008, "learning_rate": 1.902180432837227e-05, "loss": 0.255706787109375, "step": 6420 }, { "epoch": 0.4340273083682574, "grad_norm": 0.702263355255127, "learning_rate": 1.9018625511770686e-05, "loss": 0.1313018798828125, "step": 6421 }, { "epoch": 0.43409490333919154, "grad_norm": 1.1369431018829346, "learning_rate": 1.9015446500723005e-05, "loss": 0.2001953125, "step": 6422 }, { "epoch": 0.4341624983101257, "grad_norm": 0.8776907324790955, "learning_rate": 1.9012267295383053e-05, "loss": 0.18133544921875, "step": 6423 }, { "epoch": 0.4342300932810599, "grad_norm": 0.6515027284622192, "learning_rate": 1.9009087895904656e-05, "loss": 0.0981597900390625, "step": 6424 }, { "epoch": 0.43429768825199405, "grad_norm": 0.9064714908599854, "learning_rate": 1.900590830244165e-05, "loss": 0.22479248046875, "step": 6425 }, { "epoch": 0.4343652832229282, "grad_norm": 1.2236404418945312, "learning_rate": 1.9002728515147895e-05, "loss": 0.2245330810546875, "step": 6426 }, { "epoch": 0.4344328781938624, "grad_norm": 2.213433027267456, "learning_rate": 1.899954853417724e-05, "loss": 0.31768798828125, "step": 6427 }, { "epoch": 0.43450047316479656, "grad_norm": 1.5600600242614746, "learning_rate": 1.899636835968355e-05, "loss": 0.22723388671875, "step": 6428 }, { "epoch": 0.4345680681357307, "grad_norm": 3.512375831604004, "learning_rate": 1.899318799182071e-05, "loss": 0.28570556640625, "step": 6429 }, { "epoch": 0.43463566310666485, "grad_norm": 1.4317553043365479, "learning_rate": 1.89900074307426e-05, "loss": 0.16693115234375, "step": 6430 }, { "epoch": 0.434703258077599, "grad_norm": 1.5698165893554688, "learning_rate": 1.8986826676603115e-05, "loss": 0.27880859375, "step": 6431 }, { "epoch": 0.4347708530485332, "grad_norm": 1.0355199575424194, "learning_rate": 1.8983645729556168e-05, "loss": 0.2234649658203125, "step": 6432 }, { "epoch": 0.43483844801946736, "grad_norm": 0.9646576642990112, "learning_rate": 1.8980464589755662e-05, "loss": 0.250732421875, "step": 6433 }, { "epoch": 0.43490604299040153, "grad_norm": 1.6303908824920654, "learning_rate": 1.8977283257355524e-05, "loss": 0.254913330078125, "step": 6434 }, { "epoch": 0.4349736379613357, "grad_norm": 1.4308065176010132, "learning_rate": 1.897410173250969e-05, "loss": 0.291107177734375, "step": 6435 }, { "epoch": 0.4350412329322698, "grad_norm": 1.3118089437484741, "learning_rate": 1.8970920015372096e-05, "loss": 0.270263671875, "step": 6436 }, { "epoch": 0.435108827903204, "grad_norm": 1.1375727653503418, "learning_rate": 1.89677381060967e-05, "loss": 0.285614013671875, "step": 6437 }, { "epoch": 0.43517642287413816, "grad_norm": 0.7817766070365906, "learning_rate": 1.8964556004837454e-05, "loss": 0.1585693359375, "step": 6438 }, { "epoch": 0.43524401784507233, "grad_norm": 1.4727824926376343, "learning_rate": 1.896137371174834e-05, "loss": 0.2698974609375, "step": 6439 }, { "epoch": 0.4353116128160065, "grad_norm": 1.2993746995925903, "learning_rate": 1.895819122698333e-05, "loss": 0.24920654296875, "step": 6440 }, { "epoch": 0.43537920778694067, "grad_norm": 1.6020060777664185, "learning_rate": 1.895500855069641e-05, "loss": 0.27734375, "step": 6441 }, { "epoch": 0.43544680275787484, "grad_norm": 1.216081142425537, "learning_rate": 1.8951825683041577e-05, "loss": 0.293304443359375, "step": 6442 }, { "epoch": 0.43551439772880896, "grad_norm": 1.1318974494934082, "learning_rate": 1.8948642624172848e-05, "loss": 0.2493743896484375, "step": 6443 }, { "epoch": 0.4355819926997431, "grad_norm": 1.3775070905685425, "learning_rate": 1.8945459374244226e-05, "loss": 0.1738739013671875, "step": 6444 }, { "epoch": 0.4356495876706773, "grad_norm": 2.0283374786376953, "learning_rate": 1.894227593340975e-05, "loss": 0.3702392578125, "step": 6445 }, { "epoch": 0.43571718264161147, "grad_norm": 2.187330722808838, "learning_rate": 1.8939092301823444e-05, "loss": 0.2515869140625, "step": 6446 }, { "epoch": 0.43578477761254564, "grad_norm": 0.9452978372573853, "learning_rate": 1.8935908479639357e-05, "loss": 0.202362060546875, "step": 6447 }, { "epoch": 0.4358523725834798, "grad_norm": 0.9043641686439514, "learning_rate": 1.893272446701154e-05, "loss": 0.18011474609375, "step": 6448 }, { "epoch": 0.435919967554414, "grad_norm": 1.952592372894287, "learning_rate": 1.8929540264094053e-05, "loss": 0.292938232421875, "step": 6449 }, { "epoch": 0.4359875625253481, "grad_norm": 1.452781319618225, "learning_rate": 1.892635587104097e-05, "loss": 0.212310791015625, "step": 6450 }, { "epoch": 0.43605515749628226, "grad_norm": 1.163179874420166, "learning_rate": 1.8923171288006373e-05, "loss": 0.31298828125, "step": 6451 }, { "epoch": 0.43612275246721643, "grad_norm": 1.0121947526931763, "learning_rate": 1.8919986515144358e-05, "loss": 0.134063720703125, "step": 6452 }, { "epoch": 0.4361903474381506, "grad_norm": 1.1329292058944702, "learning_rate": 1.891680155260901e-05, "loss": 0.1935272216796875, "step": 6453 }, { "epoch": 0.4362579424090848, "grad_norm": 1.269515037536621, "learning_rate": 1.891361640055445e-05, "loss": 0.1871185302734375, "step": 6454 }, { "epoch": 0.43632553738001895, "grad_norm": 1.1971828937530518, "learning_rate": 1.8910431059134784e-05, "loss": 0.34185791015625, "step": 6455 }, { "epoch": 0.4363931323509531, "grad_norm": 0.9461413025856018, "learning_rate": 1.8907245528504145e-05, "loss": 0.21295166015625, "step": 6456 }, { "epoch": 0.43646072732188723, "grad_norm": 0.7571327686309814, "learning_rate": 1.8904059808816676e-05, "loss": 0.213409423828125, "step": 6457 }, { "epoch": 0.4365283222928214, "grad_norm": 0.9146372675895691, "learning_rate": 1.8900873900226508e-05, "loss": 0.186431884765625, "step": 6458 }, { "epoch": 0.4365959172637556, "grad_norm": 2.1729280948638916, "learning_rate": 1.88976878028878e-05, "loss": 0.27471923828125, "step": 6459 }, { "epoch": 0.43666351223468974, "grad_norm": 1.2787843942642212, "learning_rate": 1.889450151695472e-05, "loss": 0.2651519775390625, "step": 6460 }, { "epoch": 0.4367311072056239, "grad_norm": 1.0735623836517334, "learning_rate": 1.8891315042581438e-05, "loss": 0.281219482421875, "step": 6461 }, { "epoch": 0.4367987021765581, "grad_norm": 0.9681832790374756, "learning_rate": 1.888812837992213e-05, "loss": 0.246124267578125, "step": 6462 }, { "epoch": 0.4368662971474922, "grad_norm": 1.5353903770446777, "learning_rate": 1.888494152913099e-05, "loss": 0.29864501953125, "step": 6463 }, { "epoch": 0.43693389211842637, "grad_norm": 1.46281099319458, "learning_rate": 1.8881754490362227e-05, "loss": 0.309539794921875, "step": 6464 }, { "epoch": 0.43700148708936054, "grad_norm": 0.8116837739944458, "learning_rate": 1.8878567263770036e-05, "loss": 0.20606231689453125, "step": 6465 }, { "epoch": 0.4370690820602947, "grad_norm": 1.312204360961914, "learning_rate": 1.887537984950864e-05, "loss": 0.329681396484375, "step": 6466 }, { "epoch": 0.4371366770312289, "grad_norm": 0.900015115737915, "learning_rate": 1.8872192247732268e-05, "loss": 0.18310546875, "step": 6467 }, { "epoch": 0.43720427200216305, "grad_norm": 1.1832973957061768, "learning_rate": 1.8869004458595153e-05, "loss": 0.25823974609375, "step": 6468 }, { "epoch": 0.4372718669730972, "grad_norm": 0.9570740461349487, "learning_rate": 1.8865816482251543e-05, "loss": 0.24261474609375, "step": 6469 }, { "epoch": 0.43733946194403134, "grad_norm": 0.648245632648468, "learning_rate": 1.8862628318855688e-05, "loss": 0.172393798828125, "step": 6470 }, { "epoch": 0.4374070569149655, "grad_norm": 1.4242198467254639, "learning_rate": 1.885943996856185e-05, "loss": 0.2191925048828125, "step": 6471 }, { "epoch": 0.4374746518858997, "grad_norm": 0.8410940766334534, "learning_rate": 1.8856251431524313e-05, "loss": 0.249908447265625, "step": 6472 }, { "epoch": 0.43754224685683385, "grad_norm": 0.9021347165107727, "learning_rate": 1.8853062707897344e-05, "loss": 0.1943359375, "step": 6473 }, { "epoch": 0.437609841827768, "grad_norm": 1.2478454113006592, "learning_rate": 1.8849873797835244e-05, "loss": 0.2978515625, "step": 6474 }, { "epoch": 0.4376774367987022, "grad_norm": 1.3570812940597534, "learning_rate": 1.8846684701492304e-05, "loss": 0.183135986328125, "step": 6475 }, { "epoch": 0.43774503176963636, "grad_norm": 0.6093403100967407, "learning_rate": 1.8843495419022835e-05, "loss": 0.10720062255859375, "step": 6476 }, { "epoch": 0.4378126267405705, "grad_norm": 0.819830596446991, "learning_rate": 1.8840305950581157e-05, "loss": 0.156097412109375, "step": 6477 }, { "epoch": 0.43788022171150465, "grad_norm": 1.5856760740280151, "learning_rate": 1.8837116296321595e-05, "loss": 0.3096923828125, "step": 6478 }, { "epoch": 0.4379478166824388, "grad_norm": 1.0779998302459717, "learning_rate": 1.8833926456398483e-05, "loss": 0.198883056640625, "step": 6479 }, { "epoch": 0.438015411653373, "grad_norm": 1.3664817810058594, "learning_rate": 1.8830736430966167e-05, "loss": 0.2602691650390625, "step": 6480 }, { "epoch": 0.43808300662430716, "grad_norm": 0.6357117891311646, "learning_rate": 1.8827546220179e-05, "loss": 0.136993408203125, "step": 6481 }, { "epoch": 0.43815060159524133, "grad_norm": 0.8420872092247009, "learning_rate": 1.882435582419134e-05, "loss": 0.1882476806640625, "step": 6482 }, { "epoch": 0.4382181965661755, "grad_norm": 1.3318848609924316, "learning_rate": 1.8821165243157568e-05, "loss": 0.2252197265625, "step": 6483 }, { "epoch": 0.4382857915371096, "grad_norm": 1.2964686155319214, "learning_rate": 1.8817974477232048e-05, "loss": 0.27984619140625, "step": 6484 }, { "epoch": 0.4383533865080438, "grad_norm": 1.9775418043136597, "learning_rate": 1.8814783526569187e-05, "loss": 0.290863037109375, "step": 6485 }, { "epoch": 0.43842098147897796, "grad_norm": 0.6063159108161926, "learning_rate": 1.8811592391323373e-05, "loss": 0.1226806640625, "step": 6486 }, { "epoch": 0.4384885764499121, "grad_norm": 1.109296441078186, "learning_rate": 1.8808401071649017e-05, "loss": 0.17406463623046875, "step": 6487 }, { "epoch": 0.4385561714208463, "grad_norm": 2.1868247985839844, "learning_rate": 1.8805209567700528e-05, "loss": 0.288238525390625, "step": 6488 }, { "epoch": 0.43862376639178047, "grad_norm": 0.937612771987915, "learning_rate": 1.880201787963234e-05, "loss": 0.23760986328125, "step": 6489 }, { "epoch": 0.43869136136271464, "grad_norm": 1.3199820518493652, "learning_rate": 1.8798826007598883e-05, "loss": 0.2772979736328125, "step": 6490 }, { "epoch": 0.43875895633364875, "grad_norm": 1.733873963356018, "learning_rate": 1.8795633951754596e-05, "loss": 0.2318115234375, "step": 6491 }, { "epoch": 0.4388265513045829, "grad_norm": 0.9954944252967834, "learning_rate": 1.8792441712253937e-05, "loss": 0.192535400390625, "step": 6492 }, { "epoch": 0.4388941462755171, "grad_norm": 2.0538649559020996, "learning_rate": 1.8789249289251357e-05, "loss": 0.28131103515625, "step": 6493 }, { "epoch": 0.43896174124645126, "grad_norm": 1.4673665761947632, "learning_rate": 1.878605668290134e-05, "loss": 0.2008056640625, "step": 6494 }, { "epoch": 0.43902933621738544, "grad_norm": 0.9557294845581055, "learning_rate": 1.878286389335835e-05, "loss": 0.2408905029296875, "step": 6495 }, { "epoch": 0.4390969311883196, "grad_norm": 1.4930822849273682, "learning_rate": 1.877967092077688e-05, "loss": 0.246490478515625, "step": 6496 }, { "epoch": 0.4391645261592538, "grad_norm": 0.7940108180046082, "learning_rate": 1.8776477765311425e-05, "loss": 0.18719482421875, "step": 6497 }, { "epoch": 0.4392321211301879, "grad_norm": 1.275568962097168, "learning_rate": 1.877328442711649e-05, "loss": 0.187744140625, "step": 6498 }, { "epoch": 0.43929971610112206, "grad_norm": 1.2959659099578857, "learning_rate": 1.877009090634659e-05, "loss": 0.251495361328125, "step": 6499 }, { "epoch": 0.43936731107205623, "grad_norm": 1.5460755825042725, "learning_rate": 1.8766897203156245e-05, "loss": 0.272705078125, "step": 6500 }, { "epoch": 0.4394349060429904, "grad_norm": 0.7670238614082336, "learning_rate": 1.8763703317699984e-05, "loss": 0.205596923828125, "step": 6501 }, { "epoch": 0.4395025010139246, "grad_norm": 1.3467128276824951, "learning_rate": 1.8760509250132355e-05, "loss": 0.19087982177734375, "step": 6502 }, { "epoch": 0.43957009598485874, "grad_norm": 0.7955343723297119, "learning_rate": 1.8757315000607904e-05, "loss": 0.1475372314453125, "step": 6503 }, { "epoch": 0.4396376909557929, "grad_norm": 1.0938106775283813, "learning_rate": 1.8754120569281183e-05, "loss": 0.21671295166015625, "step": 6504 }, { "epoch": 0.43970528592672703, "grad_norm": 1.2176578044891357, "learning_rate": 1.8750925956306762e-05, "loss": 0.2504425048828125, "step": 6505 }, { "epoch": 0.4397728808976612, "grad_norm": 0.9484041929244995, "learning_rate": 1.8747731161839222e-05, "loss": 0.205718994140625, "step": 6506 }, { "epoch": 0.43984047586859537, "grad_norm": 2.02221417427063, "learning_rate": 1.8744536186033133e-05, "loss": 0.311279296875, "step": 6507 }, { "epoch": 0.43990807083952954, "grad_norm": 1.806585669517517, "learning_rate": 1.8741341029043105e-05, "loss": 0.25042724609375, "step": 6508 }, { "epoch": 0.4399756658104637, "grad_norm": 1.070030689239502, "learning_rate": 1.873814569102372e-05, "loss": 0.217681884765625, "step": 6509 }, { "epoch": 0.4400432607813979, "grad_norm": 1.619498610496521, "learning_rate": 1.8734950172129606e-05, "loss": 0.283172607421875, "step": 6510 }, { "epoch": 0.44011085575233205, "grad_norm": 1.2182923555374146, "learning_rate": 1.8731754472515375e-05, "loss": 0.21148681640625, "step": 6511 }, { "epoch": 0.44017845072326617, "grad_norm": 1.9943629503250122, "learning_rate": 1.8728558592335657e-05, "loss": 0.2680511474609375, "step": 6512 }, { "epoch": 0.44024604569420034, "grad_norm": 1.2427456378936768, "learning_rate": 1.8725362531745083e-05, "loss": 0.24786376953125, "step": 6513 }, { "epoch": 0.4403136406651345, "grad_norm": 2.095869779586792, "learning_rate": 1.8722166290898303e-05, "loss": 0.33074951171875, "step": 6514 }, { "epoch": 0.4403812356360687, "grad_norm": 3.0210845470428467, "learning_rate": 1.8718969869949973e-05, "loss": 0.2393341064453125, "step": 6515 }, { "epoch": 0.44044883060700285, "grad_norm": 1.1237934827804565, "learning_rate": 1.8715773269054748e-05, "loss": 0.205108642578125, "step": 6516 }, { "epoch": 0.440516425577937, "grad_norm": 1.5357576608657837, "learning_rate": 1.8712576488367304e-05, "loss": 0.270965576171875, "step": 6517 }, { "epoch": 0.4405840205488712, "grad_norm": 1.4861979484558105, "learning_rate": 1.8709379528042325e-05, "loss": 0.2457733154296875, "step": 6518 }, { "epoch": 0.4406516155198053, "grad_norm": 1.4610929489135742, "learning_rate": 1.8706182388234493e-05, "loss": 0.3414306640625, "step": 6519 }, { "epoch": 0.4407192104907395, "grad_norm": 1.7982475757598877, "learning_rate": 1.870298506909851e-05, "loss": 0.28155517578125, "step": 6520 }, { "epoch": 0.44078680546167365, "grad_norm": 1.7562198638916016, "learning_rate": 1.8699787570789084e-05, "loss": 0.2021484375, "step": 6521 }, { "epoch": 0.4408544004326078, "grad_norm": 1.1919540166854858, "learning_rate": 1.8696589893460917e-05, "loss": 0.25909423828125, "step": 6522 }, { "epoch": 0.440921995403542, "grad_norm": 1.1699235439300537, "learning_rate": 1.8693392037268753e-05, "loss": 0.19189453125, "step": 6523 }, { "epoch": 0.44098959037447616, "grad_norm": 1.1771434545516968, "learning_rate": 1.8690194002367308e-05, "loss": 0.174285888671875, "step": 6524 }, { "epoch": 0.4410571853454103, "grad_norm": 0.7555035948753357, "learning_rate": 1.8686995788911327e-05, "loss": 0.14361572265625, "step": 6525 }, { "epoch": 0.44112478031634444, "grad_norm": 1.104783296585083, "learning_rate": 1.8683797397055558e-05, "loss": 0.293853759765625, "step": 6526 }, { "epoch": 0.4411923752872786, "grad_norm": 1.7078890800476074, "learning_rate": 1.868059882695477e-05, "loss": 0.241943359375, "step": 6527 }, { "epoch": 0.4412599702582128, "grad_norm": 1.2630406618118286, "learning_rate": 1.867740007876372e-05, "loss": 0.2690277099609375, "step": 6528 }, { "epoch": 0.44132756522914696, "grad_norm": 2.257136583328247, "learning_rate": 1.867420115263718e-05, "loss": 0.32666015625, "step": 6529 }, { "epoch": 0.4413951602000811, "grad_norm": 1.6573041677474976, "learning_rate": 1.8671002048729938e-05, "loss": 0.29974365234375, "step": 6530 }, { "epoch": 0.4414627551710153, "grad_norm": 0.6584070324897766, "learning_rate": 1.8667802767196792e-05, "loss": 0.119842529296875, "step": 6531 }, { "epoch": 0.4415303501419494, "grad_norm": 0.8774755597114563, "learning_rate": 1.8664603308192537e-05, "loss": 0.2427978515625, "step": 6532 }, { "epoch": 0.4415979451128836, "grad_norm": 1.5808919668197632, "learning_rate": 1.8661403671871984e-05, "loss": 0.29034423828125, "step": 6533 }, { "epoch": 0.44166554008381775, "grad_norm": 2.0822203159332275, "learning_rate": 1.8658203858389947e-05, "loss": 0.2947998046875, "step": 6534 }, { "epoch": 0.4417331350547519, "grad_norm": 1.3166056871414185, "learning_rate": 1.865500386790126e-05, "loss": 0.1986846923828125, "step": 6535 }, { "epoch": 0.4418007300256861, "grad_norm": 1.7531830072402954, "learning_rate": 1.865180370056076e-05, "loss": 0.31500244140625, "step": 6536 }, { "epoch": 0.44186832499662027, "grad_norm": 1.768569827079773, "learning_rate": 1.864860335652328e-05, "loss": 0.342041015625, "step": 6537 }, { "epoch": 0.44193591996755444, "grad_norm": 0.6168690323829651, "learning_rate": 1.864540283594368e-05, "loss": 0.1248931884765625, "step": 6538 }, { "epoch": 0.44200351493848855, "grad_norm": 1.180997610092163, "learning_rate": 1.8642202138976827e-05, "loss": 0.2288818359375, "step": 6539 }, { "epoch": 0.4420711099094227, "grad_norm": 1.631669044494629, "learning_rate": 1.863900126577758e-05, "loss": 0.27685546875, "step": 6540 }, { "epoch": 0.4421387048803569, "grad_norm": 1.2303071022033691, "learning_rate": 1.863580021650082e-05, "loss": 0.258148193359375, "step": 6541 }, { "epoch": 0.44220629985129106, "grad_norm": 1.349692702293396, "learning_rate": 1.8632598991301428e-05, "loss": 0.268157958984375, "step": 6542 }, { "epoch": 0.44227389482222523, "grad_norm": 0.7871484756469727, "learning_rate": 1.8629397590334317e-05, "loss": 0.20848846435546875, "step": 6543 }, { "epoch": 0.4423414897931594, "grad_norm": 1.5539555549621582, "learning_rate": 1.8626196013754375e-05, "loss": 0.314178466796875, "step": 6544 }, { "epoch": 0.4424090847640936, "grad_norm": 0.7042185068130493, "learning_rate": 1.862299426171652e-05, "loss": 0.160888671875, "step": 6545 }, { "epoch": 0.4424766797350277, "grad_norm": 1.5439034700393677, "learning_rate": 1.861979233437567e-05, "loss": 0.3271484375, "step": 6546 }, { "epoch": 0.44254427470596186, "grad_norm": 1.404106616973877, "learning_rate": 1.8616590231886755e-05, "loss": 0.2669525146484375, "step": 6547 }, { "epoch": 0.44261186967689603, "grad_norm": 0.6637881994247437, "learning_rate": 1.8613387954404712e-05, "loss": 0.1309814453125, "step": 6548 }, { "epoch": 0.4426794646478302, "grad_norm": 1.566575288772583, "learning_rate": 1.861018550208449e-05, "loss": 0.273345947265625, "step": 6549 }, { "epoch": 0.44274705961876437, "grad_norm": 1.1983317136764526, "learning_rate": 1.8606982875081048e-05, "loss": 0.271087646484375, "step": 6550 }, { "epoch": 0.44281465458969854, "grad_norm": 0.8788195848464966, "learning_rate": 1.8603780073549336e-05, "loss": 0.15884971618652344, "step": 6551 }, { "epoch": 0.4428822495606327, "grad_norm": 0.6534798741340637, "learning_rate": 1.8600577097644335e-05, "loss": 0.0953216552734375, "step": 6552 }, { "epoch": 0.4429498445315668, "grad_norm": 1.8113733530044556, "learning_rate": 1.859737394752102e-05, "loss": 0.255828857421875, "step": 6553 }, { "epoch": 0.443017439502501, "grad_norm": 1.5197999477386475, "learning_rate": 1.8594170623334385e-05, "loss": 0.32733154296875, "step": 6554 }, { "epoch": 0.44308503447343517, "grad_norm": 1.5716538429260254, "learning_rate": 1.859096712523942e-05, "loss": 0.25623321533203125, "step": 6555 }, { "epoch": 0.44315262944436934, "grad_norm": 0.9301180243492126, "learning_rate": 1.8587763453391133e-05, "loss": 0.16168975830078125, "step": 6556 }, { "epoch": 0.4432202244153035, "grad_norm": 0.994699239730835, "learning_rate": 1.858455960794454e-05, "loss": 0.290283203125, "step": 6557 }, { "epoch": 0.4432878193862377, "grad_norm": 0.6913023591041565, "learning_rate": 1.8581355589054665e-05, "loss": 0.11606597900390625, "step": 6558 }, { "epoch": 0.44335541435717185, "grad_norm": 1.7780388593673706, "learning_rate": 1.857815139687653e-05, "loss": 0.2356719970703125, "step": 6559 }, { "epoch": 0.44342300932810597, "grad_norm": 0.9727783203125, "learning_rate": 1.8574947031565173e-05, "loss": 0.187042236328125, "step": 6560 }, { "epoch": 0.44349060429904014, "grad_norm": 1.61183762550354, "learning_rate": 1.8571742493275653e-05, "loss": 0.2236175537109375, "step": 6561 }, { "epoch": 0.4435581992699743, "grad_norm": 1.251065731048584, "learning_rate": 1.8568537782163022e-05, "loss": 0.2367401123046875, "step": 6562 }, { "epoch": 0.4436257942409085, "grad_norm": 1.2460118532180786, "learning_rate": 1.8565332898382335e-05, "loss": 0.282958984375, "step": 6563 }, { "epoch": 0.44369338921184265, "grad_norm": 1.7823129892349243, "learning_rate": 1.856212784208867e-05, "loss": 0.3394775390625, "step": 6564 }, { "epoch": 0.4437609841827768, "grad_norm": 1.722031831741333, "learning_rate": 1.855892261343711e-05, "loss": 0.301727294921875, "step": 6565 }, { "epoch": 0.443828579153711, "grad_norm": 1.1193134784698486, "learning_rate": 1.8555717212582738e-05, "loss": 0.27197265625, "step": 6566 }, { "epoch": 0.4438961741246451, "grad_norm": 2.06541109085083, "learning_rate": 1.855251163968066e-05, "loss": 0.30377197265625, "step": 6567 }, { "epoch": 0.4439637690955793, "grad_norm": 1.4140942096710205, "learning_rate": 1.8549305894885968e-05, "loss": 0.162139892578125, "step": 6568 }, { "epoch": 0.44403136406651345, "grad_norm": 1.8846616744995117, "learning_rate": 1.8546099978353787e-05, "loss": 0.333740234375, "step": 6569 }, { "epoch": 0.4440989590374476, "grad_norm": 1.6129920482635498, "learning_rate": 1.854289389023924e-05, "loss": 0.281463623046875, "step": 6570 }, { "epoch": 0.4441665540083818, "grad_norm": 1.2695801258087158, "learning_rate": 1.853968763069745e-05, "loss": 0.314727783203125, "step": 6571 }, { "epoch": 0.44423414897931596, "grad_norm": 1.445719838142395, "learning_rate": 1.8536481199883552e-05, "loss": 0.30853271484375, "step": 6572 }, { "epoch": 0.4443017439502501, "grad_norm": 1.1943659782409668, "learning_rate": 1.8533274597952714e-05, "loss": 0.22991943359375, "step": 6573 }, { "epoch": 0.44436933892118424, "grad_norm": 0.8465738296508789, "learning_rate": 1.8530067825060073e-05, "loss": 0.187591552734375, "step": 6574 }, { "epoch": 0.4444369338921184, "grad_norm": 1.3444976806640625, "learning_rate": 1.8526860881360795e-05, "loss": 0.17071533203125, "step": 6575 }, { "epoch": 0.4445045288630526, "grad_norm": 1.3284412622451782, "learning_rate": 1.8523653767010053e-05, "loss": 0.237579345703125, "step": 6576 }, { "epoch": 0.44457212383398675, "grad_norm": 2.568274974822998, "learning_rate": 1.8520446482163035e-05, "loss": 0.290863037109375, "step": 6577 }, { "epoch": 0.4446397188049209, "grad_norm": 2.3993234634399414, "learning_rate": 1.851723902697492e-05, "loss": 0.29656982421875, "step": 6578 }, { "epoch": 0.4447073137758551, "grad_norm": 1.3722172975540161, "learning_rate": 1.8514031401600906e-05, "loss": 0.1678924560546875, "step": 6579 }, { "epoch": 0.44477490874678927, "grad_norm": 1.3354698419570923, "learning_rate": 1.8510823606196197e-05, "loss": 0.30401611328125, "step": 6580 }, { "epoch": 0.4448425037177234, "grad_norm": 1.3976950645446777, "learning_rate": 1.850761564091601e-05, "loss": 0.239990234375, "step": 6581 }, { "epoch": 0.44491009868865755, "grad_norm": 1.7334784269332886, "learning_rate": 1.8504407505915565e-05, "loss": 0.309234619140625, "step": 6582 }, { "epoch": 0.4449776936595917, "grad_norm": 1.8089239597320557, "learning_rate": 1.850119920135009e-05, "loss": 0.2772216796875, "step": 6583 }, { "epoch": 0.4450452886305259, "grad_norm": 1.522990345954895, "learning_rate": 1.849799072737483e-05, "loss": 0.1866455078125, "step": 6584 }, { "epoch": 0.44511288360146006, "grad_norm": 0.8313410878181458, "learning_rate": 1.849478208414502e-05, "loss": 0.1848907470703125, "step": 6585 }, { "epoch": 0.44518047857239423, "grad_norm": 2.7252652645111084, "learning_rate": 1.849157327181592e-05, "loss": 0.305023193359375, "step": 6586 }, { "epoch": 0.44524807354332835, "grad_norm": 1.9220203161239624, "learning_rate": 1.848836429054279e-05, "loss": 0.22763824462890625, "step": 6587 }, { "epoch": 0.4453156685142625, "grad_norm": 0.9142850041389465, "learning_rate": 1.8485155140480903e-05, "loss": 0.145263671875, "step": 6588 }, { "epoch": 0.4453832634851967, "grad_norm": 2.033498764038086, "learning_rate": 1.8481945821785534e-05, "loss": 0.2156982421875, "step": 6589 }, { "epoch": 0.44545085845613086, "grad_norm": 2.1578168869018555, "learning_rate": 1.8478736334611975e-05, "loss": 0.4041748046875, "step": 6590 }, { "epoch": 0.44551845342706503, "grad_norm": 1.0660033226013184, "learning_rate": 1.8475526679115517e-05, "loss": 0.1720123291015625, "step": 6591 }, { "epoch": 0.4455860483979992, "grad_norm": 1.4990702867507935, "learning_rate": 1.8472316855451465e-05, "loss": 0.29656982421875, "step": 6592 }, { "epoch": 0.44565364336893337, "grad_norm": 1.224735975265503, "learning_rate": 1.8469106863775128e-05, "loss": 0.26251220703125, "step": 6593 }, { "epoch": 0.4457212383398675, "grad_norm": 1.1729681491851807, "learning_rate": 1.8465896704241825e-05, "loss": 0.18015289306640625, "step": 6594 }, { "epoch": 0.44578883331080166, "grad_norm": 0.8169445395469666, "learning_rate": 1.846268637700689e-05, "loss": 0.20050048828125, "step": 6595 }, { "epoch": 0.44585642828173583, "grad_norm": 1.355068564414978, "learning_rate": 1.8459475882225652e-05, "loss": 0.16921234130859375, "step": 6596 }, { "epoch": 0.44592402325267, "grad_norm": 1.1185660362243652, "learning_rate": 1.8456265220053457e-05, "loss": 0.1758880615234375, "step": 6597 }, { "epoch": 0.44599161822360417, "grad_norm": 1.3400592803955078, "learning_rate": 1.8453054390645655e-05, "loss": 0.190185546875, "step": 6598 }, { "epoch": 0.44605921319453834, "grad_norm": 1.232817530632019, "learning_rate": 1.8449843394157605e-05, "loss": 0.27716064453125, "step": 6599 }, { "epoch": 0.4461268081654725, "grad_norm": 1.1529417037963867, "learning_rate": 1.8446632230744685e-05, "loss": 0.2989501953125, "step": 6600 }, { "epoch": 0.4461944031364066, "grad_norm": 1.19415283203125, "learning_rate": 1.8443420900562253e-05, "loss": 0.22382354736328125, "step": 6601 }, { "epoch": 0.4462619981073408, "grad_norm": 1.043630838394165, "learning_rate": 1.844020940376571e-05, "loss": 0.21185302734375, "step": 6602 }, { "epoch": 0.44632959307827497, "grad_norm": 0.4154839515686035, "learning_rate": 1.8436997740510438e-05, "loss": 0.05902862548828125, "step": 6603 }, { "epoch": 0.44639718804920914, "grad_norm": 0.9410978555679321, "learning_rate": 1.8433785910951844e-05, "loss": 0.1712188720703125, "step": 6604 }, { "epoch": 0.4464647830201433, "grad_norm": 1.2495957612991333, "learning_rate": 1.8430573915245333e-05, "loss": 0.17863082885742188, "step": 6605 }, { "epoch": 0.4465323779910775, "grad_norm": 2.1704745292663574, "learning_rate": 1.842736175354632e-05, "loss": 0.331146240234375, "step": 6606 }, { "epoch": 0.44659997296201165, "grad_norm": 1.167699933052063, "learning_rate": 1.8424149426010233e-05, "loss": 0.28912353515625, "step": 6607 }, { "epoch": 0.44666756793294576, "grad_norm": 1.47804594039917, "learning_rate": 1.8420936932792502e-05, "loss": 0.274749755859375, "step": 6608 }, { "epoch": 0.44673516290387993, "grad_norm": 1.4087471961975098, "learning_rate": 1.8417724274048566e-05, "loss": 0.26702880859375, "step": 6609 }, { "epoch": 0.4468027578748141, "grad_norm": 1.4833686351776123, "learning_rate": 1.8414511449933874e-05, "loss": 0.2373046875, "step": 6610 }, { "epoch": 0.4468703528457483, "grad_norm": 1.40095853805542, "learning_rate": 1.8411298460603885e-05, "loss": 0.29644775390625, "step": 6611 }, { "epoch": 0.44693794781668245, "grad_norm": 0.9430888891220093, "learning_rate": 1.8408085306214065e-05, "loss": 0.20701217651367188, "step": 6612 }, { "epoch": 0.4470055427876166, "grad_norm": 1.393438696861267, "learning_rate": 1.8404871986919877e-05, "loss": 0.227447509765625, "step": 6613 }, { "epoch": 0.4470731377585508, "grad_norm": 1.3299760818481445, "learning_rate": 1.840165850287681e-05, "loss": 0.2772216796875, "step": 6614 }, { "epoch": 0.4471407327294849, "grad_norm": 1.0564769506454468, "learning_rate": 1.839844485424035e-05, "loss": 0.215240478515625, "step": 6615 }, { "epoch": 0.4472083277004191, "grad_norm": 1.861035943031311, "learning_rate": 1.8395231041165996e-05, "loss": 0.29473876953125, "step": 6616 }, { "epoch": 0.44727592267135324, "grad_norm": 1.1965166330337524, "learning_rate": 1.8392017063809244e-05, "loss": 0.231719970703125, "step": 6617 }, { "epoch": 0.4473435176422874, "grad_norm": 1.2247235774993896, "learning_rate": 1.8388802922325612e-05, "loss": 0.26678466796875, "step": 6618 }, { "epoch": 0.4474111126132216, "grad_norm": 1.0574766397476196, "learning_rate": 1.8385588616870625e-05, "loss": 0.232452392578125, "step": 6619 }, { "epoch": 0.44747870758415575, "grad_norm": 1.6773416996002197, "learning_rate": 1.838237414759981e-05, "loss": 0.269287109375, "step": 6620 }, { "epoch": 0.4475463025550899, "grad_norm": 1.3748352527618408, "learning_rate": 1.837915951466869e-05, "loss": 0.27660369873046875, "step": 6621 }, { "epoch": 0.44761389752602404, "grad_norm": 1.2699168920516968, "learning_rate": 1.837594471823282e-05, "loss": 0.24005126953125, "step": 6622 }, { "epoch": 0.4476814924969582, "grad_norm": 1.4601184129714966, "learning_rate": 1.8372729758447752e-05, "loss": 0.306732177734375, "step": 6623 }, { "epoch": 0.4477490874678924, "grad_norm": 1.0822430849075317, "learning_rate": 1.8369514635469043e-05, "loss": 0.09841156005859375, "step": 6624 }, { "epoch": 0.44781668243882655, "grad_norm": 1.7612873315811157, "learning_rate": 1.836629934945226e-05, "loss": 0.307281494140625, "step": 6625 }, { "epoch": 0.4478842774097607, "grad_norm": 0.820430338382721, "learning_rate": 1.8363083900552975e-05, "loss": 0.23486328125, "step": 6626 }, { "epoch": 0.4479518723806949, "grad_norm": 1.3345009088516235, "learning_rate": 1.8359868288926783e-05, "loss": 0.2769775390625, "step": 6627 }, { "epoch": 0.44801946735162906, "grad_norm": 1.0458407402038574, "learning_rate": 1.8356652514729266e-05, "loss": 0.1860809326171875, "step": 6628 }, { "epoch": 0.4480870623225632, "grad_norm": 2.0397281646728516, "learning_rate": 1.8353436578116027e-05, "loss": 0.334442138671875, "step": 6629 }, { "epoch": 0.44815465729349735, "grad_norm": 1.8157799243927002, "learning_rate": 1.835022047924267e-05, "loss": 0.298095703125, "step": 6630 }, { "epoch": 0.4482222522644315, "grad_norm": 1.4268293380737305, "learning_rate": 1.834700421826481e-05, "loss": 0.267333984375, "step": 6631 }, { "epoch": 0.4482898472353657, "grad_norm": 1.1161340475082397, "learning_rate": 1.8343787795338076e-05, "loss": 0.16045379638671875, "step": 6632 }, { "epoch": 0.44835744220629986, "grad_norm": 1.0910401344299316, "learning_rate": 1.8340571210618092e-05, "loss": 0.24969482421875, "step": 6633 }, { "epoch": 0.44842503717723403, "grad_norm": 0.6836324334144592, "learning_rate": 1.8337354464260492e-05, "loss": 0.122314453125, "step": 6634 }, { "epoch": 0.4484926321481682, "grad_norm": 0.8093506693840027, "learning_rate": 1.8334137556420933e-05, "loss": 0.15802001953125, "step": 6635 }, { "epoch": 0.4485602271191023, "grad_norm": 1.2116997241973877, "learning_rate": 1.8330920487255065e-05, "loss": 0.243408203125, "step": 6636 }, { "epoch": 0.4486278220900365, "grad_norm": 0.8815470933914185, "learning_rate": 1.832770325691855e-05, "loss": 0.185272216796875, "step": 6637 }, { "epoch": 0.44869541706097066, "grad_norm": 1.6384003162384033, "learning_rate": 1.8324485865567054e-05, "loss": 0.321807861328125, "step": 6638 }, { "epoch": 0.44876301203190483, "grad_norm": 1.3929990530014038, "learning_rate": 1.8321268313356257e-05, "loss": 0.2244110107421875, "step": 6639 }, { "epoch": 0.448830607002839, "grad_norm": 1.0490165948867798, "learning_rate": 1.8318050600441846e-05, "loss": 0.2180938720703125, "step": 6640 }, { "epoch": 0.44889820197377317, "grad_norm": 0.5137063264846802, "learning_rate": 1.831483272697951e-05, "loss": 0.107818603515625, "step": 6641 }, { "epoch": 0.44896579694470734, "grad_norm": 0.5653291940689087, "learning_rate": 1.8311614693124958e-05, "loss": 0.16156005859375, "step": 6642 }, { "epoch": 0.44903339191564146, "grad_norm": 1.0418272018432617, "learning_rate": 1.830839649903388e-05, "loss": 0.19147491455078125, "step": 6643 }, { "epoch": 0.4491009868865756, "grad_norm": 0.9469217658042908, "learning_rate": 1.830517814486202e-05, "loss": 0.227142333984375, "step": 6644 }, { "epoch": 0.4491685818575098, "grad_norm": 1.5379822254180908, "learning_rate": 1.830195963076508e-05, "loss": 0.2527351379394531, "step": 6645 }, { "epoch": 0.44923617682844397, "grad_norm": 1.0277140140533447, "learning_rate": 1.8298740956898793e-05, "loss": 0.221893310546875, "step": 6646 }, { "epoch": 0.44930377179937814, "grad_norm": 1.0556628704071045, "learning_rate": 1.829552212341891e-05, "loss": 0.21588134765625, "step": 6647 }, { "epoch": 0.4493713667703123, "grad_norm": 0.9137539863586426, "learning_rate": 1.8292303130481166e-05, "loss": 0.1811065673828125, "step": 6648 }, { "epoch": 0.4494389617412465, "grad_norm": 2.2446653842926025, "learning_rate": 1.828908397824133e-05, "loss": 0.28021240234375, "step": 6649 }, { "epoch": 0.4495065567121806, "grad_norm": 1.5777591466903687, "learning_rate": 1.8285864666855154e-05, "loss": 0.33245849609375, "step": 6650 }, { "epoch": 0.44957415168311476, "grad_norm": 1.379873514175415, "learning_rate": 1.828264519647841e-05, "loss": 0.2301025390625, "step": 6651 }, { "epoch": 0.44964174665404893, "grad_norm": 1.9969229698181152, "learning_rate": 1.827942556726687e-05, "loss": 0.27117919921875, "step": 6652 }, { "epoch": 0.4497093416249831, "grad_norm": 1.0431690216064453, "learning_rate": 1.827620577937634e-05, "loss": 0.19732666015625, "step": 6653 }, { "epoch": 0.4497769365959173, "grad_norm": 1.144736647605896, "learning_rate": 1.827298583296259e-05, "loss": 0.21649169921875, "step": 6654 }, { "epoch": 0.44984453156685145, "grad_norm": 1.4184175729751587, "learning_rate": 1.8269765728181435e-05, "loss": 0.230865478515625, "step": 6655 }, { "epoch": 0.44991212653778556, "grad_norm": 1.5681589841842651, "learning_rate": 1.8266545465188677e-05, "loss": 0.27532958984375, "step": 6656 }, { "epoch": 0.44997972150871973, "grad_norm": 1.0638103485107422, "learning_rate": 1.8263325044140137e-05, "loss": 0.1754302978515625, "step": 6657 }, { "epoch": 0.4500473164796539, "grad_norm": 1.766811728477478, "learning_rate": 1.826010446519164e-05, "loss": 0.297943115234375, "step": 6658 }, { "epoch": 0.4501149114505881, "grad_norm": 1.3688507080078125, "learning_rate": 1.8256883728499012e-05, "loss": 0.275482177734375, "step": 6659 }, { "epoch": 0.45018250642152224, "grad_norm": 1.3092259168624878, "learning_rate": 1.8253662834218088e-05, "loss": 0.22576904296875, "step": 6660 }, { "epoch": 0.4502501013924564, "grad_norm": 1.358196496963501, "learning_rate": 1.8250441782504727e-05, "loss": 0.305023193359375, "step": 6661 }, { "epoch": 0.4503176963633906, "grad_norm": 1.6977035999298096, "learning_rate": 1.8247220573514782e-05, "loss": 0.37652587890625, "step": 6662 }, { "epoch": 0.4503852913343247, "grad_norm": 1.054000973701477, "learning_rate": 1.824399920740411e-05, "loss": 0.22406005859375, "step": 6663 }, { "epoch": 0.45045288630525887, "grad_norm": 1.5634231567382812, "learning_rate": 1.8240777684328577e-05, "loss": 0.28253173828125, "step": 6664 }, { "epoch": 0.45052048127619304, "grad_norm": 1.9482237100601196, "learning_rate": 1.823755600444407e-05, "loss": 0.35150146484375, "step": 6665 }, { "epoch": 0.4505880762471272, "grad_norm": 1.1891309022903442, "learning_rate": 1.8234334167906466e-05, "loss": 0.25830078125, "step": 6666 }, { "epoch": 0.4506556712180614, "grad_norm": 1.3472377061843872, "learning_rate": 1.8231112174871668e-05, "loss": 0.295989990234375, "step": 6667 }, { "epoch": 0.45072326618899555, "grad_norm": 1.1914987564086914, "learning_rate": 1.822789002549556e-05, "loss": 0.34197998046875, "step": 6668 }, { "epoch": 0.4507908611599297, "grad_norm": 1.7495824098587036, "learning_rate": 1.8224667719934063e-05, "loss": 0.311737060546875, "step": 6669 }, { "epoch": 0.45085845613086384, "grad_norm": 1.343694806098938, "learning_rate": 1.822144525834309e-05, "loss": 0.2652587890625, "step": 6670 }, { "epoch": 0.450926051101798, "grad_norm": 0.90375155210495, "learning_rate": 1.821822264087856e-05, "loss": 0.19647216796875, "step": 6671 }, { "epoch": 0.4509936460727322, "grad_norm": 0.7519214749336243, "learning_rate": 1.82149998676964e-05, "loss": 0.141845703125, "step": 6672 }, { "epoch": 0.45106124104366635, "grad_norm": 1.3976868391036987, "learning_rate": 1.8211776938952558e-05, "loss": 0.305908203125, "step": 6673 }, { "epoch": 0.4511288360146005, "grad_norm": 0.961560070514679, "learning_rate": 1.8208553854802972e-05, "loss": 0.213623046875, "step": 6674 }, { "epoch": 0.4511964309855347, "grad_norm": 1.721189022064209, "learning_rate": 1.8205330615403594e-05, "loss": 0.2919921875, "step": 6675 }, { "epoch": 0.45126402595646886, "grad_norm": 1.4237974882125854, "learning_rate": 1.8202107220910393e-05, "loss": 0.182464599609375, "step": 6676 }, { "epoch": 0.451331620927403, "grad_norm": 1.4828312397003174, "learning_rate": 1.8198883671479324e-05, "loss": 0.27105712890625, "step": 6677 }, { "epoch": 0.45139921589833715, "grad_norm": 1.6479597091674805, "learning_rate": 1.819565996726637e-05, "loss": 0.2818603515625, "step": 6678 }, { "epoch": 0.4514668108692713, "grad_norm": 0.8429113626480103, "learning_rate": 1.8192436108427523e-05, "loss": 0.1819000244140625, "step": 6679 }, { "epoch": 0.4515344058402055, "grad_norm": 0.9533909559249878, "learning_rate": 1.8189212095118752e-05, "loss": 0.19708251953125, "step": 6680 }, { "epoch": 0.45160200081113966, "grad_norm": 1.2429766654968262, "learning_rate": 1.8185987927496072e-05, "loss": 0.256439208984375, "step": 6681 }, { "epoch": 0.45166959578207383, "grad_norm": 1.6130499839782715, "learning_rate": 1.8182763605715483e-05, "loss": 0.291290283203125, "step": 6682 }, { "epoch": 0.451737190753008, "grad_norm": 0.8383955955505371, "learning_rate": 1.8179539129933e-05, "loss": 0.1780242919921875, "step": 6683 }, { "epoch": 0.4518047857239421, "grad_norm": 1.4147160053253174, "learning_rate": 1.8176314500304634e-05, "loss": 0.2506103515625, "step": 6684 }, { "epoch": 0.4518723806948763, "grad_norm": 1.1136554479599, "learning_rate": 1.8173089716986417e-05, "loss": 0.289794921875, "step": 6685 }, { "epoch": 0.45193997566581046, "grad_norm": 0.915596604347229, "learning_rate": 1.8169864780134392e-05, "loss": 0.1772613525390625, "step": 6686 }, { "epoch": 0.4520075706367446, "grad_norm": 1.0747441053390503, "learning_rate": 1.8166639689904598e-05, "loss": 0.250946044921875, "step": 6687 }, { "epoch": 0.4520751656076788, "grad_norm": 1.332125186920166, "learning_rate": 1.8163414446453077e-05, "loss": 0.28094482421875, "step": 6688 }, { "epoch": 0.45214276057861297, "grad_norm": 1.0076098442077637, "learning_rate": 1.8160189049935895e-05, "loss": 0.2182464599609375, "step": 6689 }, { "epoch": 0.45221035554954714, "grad_norm": 1.1758711338043213, "learning_rate": 1.8156963500509117e-05, "loss": 0.25787353515625, "step": 6690 }, { "epoch": 0.45227795052048125, "grad_norm": 0.8427936434745789, "learning_rate": 1.8153737798328807e-05, "loss": 0.15350341796875, "step": 6691 }, { "epoch": 0.4523455454914154, "grad_norm": 0.8502866625785828, "learning_rate": 1.8150511943551052e-05, "loss": 0.170135498046875, "step": 6692 }, { "epoch": 0.4524131404623496, "grad_norm": 1.481112003326416, "learning_rate": 1.814728593633193e-05, "loss": 0.203826904296875, "step": 6693 }, { "epoch": 0.45248073543328377, "grad_norm": 1.2984530925750732, "learning_rate": 1.814405977682755e-05, "loss": 0.1463165283203125, "step": 6694 }, { "epoch": 0.45254833040421794, "grad_norm": 1.8150466680526733, "learning_rate": 1.8140833465194002e-05, "loss": 0.16625595092773438, "step": 6695 }, { "epoch": 0.4526159253751521, "grad_norm": 1.9396042823791504, "learning_rate": 1.8137607001587397e-05, "loss": 0.32354736328125, "step": 6696 }, { "epoch": 0.4526835203460863, "grad_norm": 1.1062331199645996, "learning_rate": 1.8134380386163854e-05, "loss": 0.20074462890625, "step": 6697 }, { "epoch": 0.4527511153170204, "grad_norm": 1.0305074453353882, "learning_rate": 1.8131153619079493e-05, "loss": 0.153045654296875, "step": 6698 }, { "epoch": 0.45281871028795456, "grad_norm": 1.5403714179992676, "learning_rate": 1.812792670049045e-05, "loss": 0.172821044921875, "step": 6699 }, { "epoch": 0.45288630525888873, "grad_norm": 2.1137642860412598, "learning_rate": 1.812469963055286e-05, "loss": 0.284881591796875, "step": 6700 }, { "epoch": 0.4529539002298229, "grad_norm": 2.246920585632324, "learning_rate": 1.8121472409422875e-05, "loss": 0.26947021484375, "step": 6701 }, { "epoch": 0.4530214952007571, "grad_norm": 1.308631181716919, "learning_rate": 1.8118245037256634e-05, "loss": 0.227294921875, "step": 6702 }, { "epoch": 0.45308909017169124, "grad_norm": 0.7198202013969421, "learning_rate": 1.8115017514210312e-05, "loss": 0.12422943115234375, "step": 6703 }, { "epoch": 0.4531566851426254, "grad_norm": 0.8834448456764221, "learning_rate": 1.8111789840440068e-05, "loss": 0.180145263671875, "step": 6704 }, { "epoch": 0.45322428011355953, "grad_norm": 1.0012036561965942, "learning_rate": 1.810856201610208e-05, "loss": 0.17974853515625, "step": 6705 }, { "epoch": 0.4532918750844937, "grad_norm": 1.2022160291671753, "learning_rate": 1.810533404135253e-05, "loss": 0.317535400390625, "step": 6706 }, { "epoch": 0.45335947005542787, "grad_norm": 1.5938329696655273, "learning_rate": 1.8102105916347606e-05, "loss": 0.2751617431640625, "step": 6707 }, { "epoch": 0.45342706502636204, "grad_norm": 1.7566120624542236, "learning_rate": 1.8098877641243505e-05, "loss": 0.305938720703125, "step": 6708 }, { "epoch": 0.4534946599972962, "grad_norm": 1.687620997428894, "learning_rate": 1.8095649216196436e-05, "loss": 0.306671142578125, "step": 6709 }, { "epoch": 0.4535622549682304, "grad_norm": 2.055185556411743, "learning_rate": 1.80924206413626e-05, "loss": 0.34002685546875, "step": 6710 }, { "epoch": 0.45362984993916455, "grad_norm": 1.341102123260498, "learning_rate": 1.8089191916898225e-05, "loss": 0.19921875, "step": 6711 }, { "epoch": 0.45369744491009867, "grad_norm": 1.9309371709823608, "learning_rate": 1.8085963042959537e-05, "loss": 0.3197021484375, "step": 6712 }, { "epoch": 0.45376503988103284, "grad_norm": 1.0164380073547363, "learning_rate": 1.8082734019702764e-05, "loss": 0.246734619140625, "step": 6713 }, { "epoch": 0.453832634851967, "grad_norm": 1.6244086027145386, "learning_rate": 1.8079504847284146e-05, "loss": 0.2694091796875, "step": 6714 }, { "epoch": 0.4539002298229012, "grad_norm": 1.3634815216064453, "learning_rate": 1.8076275525859934e-05, "loss": 0.29644775390625, "step": 6715 }, { "epoch": 0.45396782479383535, "grad_norm": 1.3779698610305786, "learning_rate": 1.807304605558638e-05, "loss": 0.1412200927734375, "step": 6716 }, { "epoch": 0.4540354197647695, "grad_norm": 1.1081645488739014, "learning_rate": 1.8069816436619746e-05, "loss": 0.23651123046875, "step": 6717 }, { "epoch": 0.45410301473570364, "grad_norm": 0.5941119194030762, "learning_rate": 1.80665866691163e-05, "loss": 0.119720458984375, "step": 6718 }, { "epoch": 0.4541706097066378, "grad_norm": 0.9525200724601746, "learning_rate": 1.8063356753232326e-05, "loss": 0.214630126953125, "step": 6719 }, { "epoch": 0.454238204677572, "grad_norm": 1.3188176155090332, "learning_rate": 1.80601266891241e-05, "loss": 0.292816162109375, "step": 6720 }, { "epoch": 0.45430579964850615, "grad_norm": 1.0557000637054443, "learning_rate": 1.805689647694791e-05, "loss": 0.29644775390625, "step": 6721 }, { "epoch": 0.4543733946194403, "grad_norm": 0.6677674651145935, "learning_rate": 1.805366611686006e-05, "loss": 0.1657562255859375, "step": 6722 }, { "epoch": 0.4544409895903745, "grad_norm": 0.8235024213790894, "learning_rate": 1.805043560901685e-05, "loss": 0.215362548828125, "step": 6723 }, { "epoch": 0.45450858456130866, "grad_norm": 0.9818089604377747, "learning_rate": 1.8047204953574598e-05, "loss": 0.28265380859375, "step": 6724 }, { "epoch": 0.4545761795322428, "grad_norm": 0.9471373558044434, "learning_rate": 1.8043974150689622e-05, "loss": 0.203765869140625, "step": 6725 }, { "epoch": 0.45464377450317695, "grad_norm": 1.0447673797607422, "learning_rate": 1.8040743200518244e-05, "loss": 0.227813720703125, "step": 6726 }, { "epoch": 0.4547113694741111, "grad_norm": 1.029309868812561, "learning_rate": 1.8037512103216796e-05, "loss": 0.27447509765625, "step": 6727 }, { "epoch": 0.4547789644450453, "grad_norm": 0.7646734714508057, "learning_rate": 1.803428085894163e-05, "loss": 0.214324951171875, "step": 6728 }, { "epoch": 0.45484655941597946, "grad_norm": 1.7466237545013428, "learning_rate": 1.8031049467849082e-05, "loss": 0.299346923828125, "step": 6729 }, { "epoch": 0.4549141543869136, "grad_norm": 1.0698025226593018, "learning_rate": 1.802781793009551e-05, "loss": 0.29595947265625, "step": 6730 }, { "epoch": 0.4549817493578478, "grad_norm": 0.6734480261802673, "learning_rate": 1.8024586245837274e-05, "loss": 0.12738037109375, "step": 6731 }, { "epoch": 0.4550493443287819, "grad_norm": 0.7857526540756226, "learning_rate": 1.802135441523075e-05, "loss": 0.223541259765625, "step": 6732 }, { "epoch": 0.4551169392997161, "grad_norm": 1.3940733671188354, "learning_rate": 1.801812243843231e-05, "loss": 0.26361083984375, "step": 6733 }, { "epoch": 0.45518453427065025, "grad_norm": 0.7704018354415894, "learning_rate": 1.8014890315598335e-05, "loss": 0.1858367919921875, "step": 6734 }, { "epoch": 0.4552521292415844, "grad_norm": 1.1598469018936157, "learning_rate": 1.8011658046885223e-05, "loss": 0.295654296875, "step": 6735 }, { "epoch": 0.4553197242125186, "grad_norm": 1.157151460647583, "learning_rate": 1.8008425632449354e-05, "loss": 0.26769256591796875, "step": 6736 }, { "epoch": 0.45538731918345277, "grad_norm": 1.4301358461380005, "learning_rate": 1.8005193072447157e-05, "loss": 0.23040771484375, "step": 6737 }, { "epoch": 0.45545491415438694, "grad_norm": 1.4882183074951172, "learning_rate": 1.8001960367035024e-05, "loss": 0.178070068359375, "step": 6738 }, { "epoch": 0.45552250912532105, "grad_norm": 1.7338111400604248, "learning_rate": 1.7998727516369375e-05, "loss": 0.315826416015625, "step": 6739 }, { "epoch": 0.4555901040962552, "grad_norm": 1.3201165199279785, "learning_rate": 1.799549452060664e-05, "loss": 0.242645263671875, "step": 6740 }, { "epoch": 0.4556576990671894, "grad_norm": 1.8284329175949097, "learning_rate": 1.799226137990326e-05, "loss": 0.2864990234375, "step": 6741 }, { "epoch": 0.45572529403812356, "grad_norm": 0.6781604290008545, "learning_rate": 1.798902809441566e-05, "loss": 0.12554931640625, "step": 6742 }, { "epoch": 0.45579288900905773, "grad_norm": 1.8406944274902344, "learning_rate": 1.798579466430029e-05, "loss": 0.3311767578125, "step": 6743 }, { "epoch": 0.4558604839799919, "grad_norm": 0.831932544708252, "learning_rate": 1.798256108971361e-05, "loss": 0.1546478271484375, "step": 6744 }, { "epoch": 0.4559280789509261, "grad_norm": 1.4175865650177002, "learning_rate": 1.7979327370812074e-05, "loss": 0.183441162109375, "step": 6745 }, { "epoch": 0.4559956739218602, "grad_norm": 1.4706168174743652, "learning_rate": 1.7976093507752155e-05, "loss": 0.22625732421875, "step": 6746 }, { "epoch": 0.45606326889279436, "grad_norm": 1.035830020904541, "learning_rate": 1.797285950069032e-05, "loss": 0.2740478515625, "step": 6747 }, { "epoch": 0.45613086386372853, "grad_norm": 1.0311470031738281, "learning_rate": 1.7969625349783052e-05, "loss": 0.246795654296875, "step": 6748 }, { "epoch": 0.4561984588346627, "grad_norm": 0.9150312542915344, "learning_rate": 1.796639105518685e-05, "loss": 0.1793670654296875, "step": 6749 }, { "epoch": 0.45626605380559687, "grad_norm": 1.2795639038085938, "learning_rate": 1.7963156617058197e-05, "loss": 0.25385284423828125, "step": 6750 }, { "epoch": 0.45633364877653104, "grad_norm": 0.9994786381721497, "learning_rate": 1.79599220355536e-05, "loss": 0.210052490234375, "step": 6751 }, { "epoch": 0.4564012437474652, "grad_norm": 0.730406641960144, "learning_rate": 1.7956687310829563e-05, "loss": 0.1026763916015625, "step": 6752 }, { "epoch": 0.45646883871839933, "grad_norm": 0.5830718278884888, "learning_rate": 1.7953452443042605e-05, "loss": 0.140472412109375, "step": 6753 }, { "epoch": 0.4565364336893335, "grad_norm": 1.0514779090881348, "learning_rate": 1.7950217432349258e-05, "loss": 0.2048492431640625, "step": 6754 }, { "epoch": 0.45660402866026767, "grad_norm": 1.096236228942871, "learning_rate": 1.794698227890604e-05, "loss": 0.2147979736328125, "step": 6755 }, { "epoch": 0.45667162363120184, "grad_norm": 1.1710783243179321, "learning_rate": 1.794374698286949e-05, "loss": 0.2166290283203125, "step": 6756 }, { "epoch": 0.456739218602136, "grad_norm": 1.151632308959961, "learning_rate": 1.7940511544396158e-05, "loss": 0.201080322265625, "step": 6757 }, { "epoch": 0.4568068135730702, "grad_norm": 1.431351900100708, "learning_rate": 1.793727596364259e-05, "loss": 0.34649658203125, "step": 6758 }, { "epoch": 0.45687440854400435, "grad_norm": 1.114563226699829, "learning_rate": 1.793404024076535e-05, "loss": 0.241912841796875, "step": 6759 }, { "epoch": 0.45694200351493847, "grad_norm": 1.0361515283584595, "learning_rate": 1.793080437592099e-05, "loss": 0.18450927734375, "step": 6760 }, { "epoch": 0.45700959848587264, "grad_norm": 1.1338143348693848, "learning_rate": 1.7927568369266087e-05, "loss": 0.217132568359375, "step": 6761 }, { "epoch": 0.4570771934568068, "grad_norm": 1.6716828346252441, "learning_rate": 1.7924332220957223e-05, "loss": 0.2339019775390625, "step": 6762 }, { "epoch": 0.457144788427741, "grad_norm": 0.658719003200531, "learning_rate": 1.792109593115098e-05, "loss": 0.13922119140625, "step": 6763 }, { "epoch": 0.45721238339867515, "grad_norm": 1.82086980342865, "learning_rate": 1.7917859500003943e-05, "loss": 0.33746337890625, "step": 6764 }, { "epoch": 0.4572799783696093, "grad_norm": 0.9194405674934387, "learning_rate": 1.7914622927672723e-05, "loss": 0.198760986328125, "step": 6765 }, { "epoch": 0.4573475733405435, "grad_norm": 1.3258594274520874, "learning_rate": 1.791138621431392e-05, "loss": 0.1839447021484375, "step": 6766 }, { "epoch": 0.4574151683114776, "grad_norm": 1.6249910593032837, "learning_rate": 1.7908149360084148e-05, "loss": 0.311767578125, "step": 6767 }, { "epoch": 0.4574827632824118, "grad_norm": 1.1340323686599731, "learning_rate": 1.790491236514002e-05, "loss": 0.265594482421875, "step": 6768 }, { "epoch": 0.45755035825334595, "grad_norm": 1.016685128211975, "learning_rate": 1.7901675229638167e-05, "loss": 0.246337890625, "step": 6769 }, { "epoch": 0.4576179532242801, "grad_norm": 1.3611483573913574, "learning_rate": 1.789843795373522e-05, "loss": 0.3051605224609375, "step": 6770 }, { "epoch": 0.4576855481952143, "grad_norm": 1.8818693161010742, "learning_rate": 1.7895200537587825e-05, "loss": 0.207122802734375, "step": 6771 }, { "epoch": 0.45775314316614846, "grad_norm": 1.3219141960144043, "learning_rate": 1.789196298135262e-05, "loss": 0.23419189453125, "step": 6772 }, { "epoch": 0.45782073813708263, "grad_norm": 1.5737403631210327, "learning_rate": 1.788872528518626e-05, "loss": 0.305023193359375, "step": 6773 }, { "epoch": 0.45788833310801674, "grad_norm": 1.2820590734481812, "learning_rate": 1.788548744924541e-05, "loss": 0.29736328125, "step": 6774 }, { "epoch": 0.4579559280789509, "grad_norm": 0.964157223701477, "learning_rate": 1.7882249473686727e-05, "loss": 0.1883697509765625, "step": 6775 }, { "epoch": 0.4580235230498851, "grad_norm": 1.3222264051437378, "learning_rate": 1.7879011358666895e-05, "loss": 0.307373046875, "step": 6776 }, { "epoch": 0.45809111802081925, "grad_norm": 1.5836296081542969, "learning_rate": 1.7875773104342586e-05, "loss": 0.273040771484375, "step": 6777 }, { "epoch": 0.4581587129917534, "grad_norm": 2.3520452976226807, "learning_rate": 1.787253471087049e-05, "loss": 0.317138671875, "step": 6778 }, { "epoch": 0.4582263079626876, "grad_norm": 1.335679292678833, "learning_rate": 1.78692961784073e-05, "loss": 0.27001953125, "step": 6779 }, { "epoch": 0.45829390293362177, "grad_norm": 1.6464860439300537, "learning_rate": 1.786605750710972e-05, "loss": 0.292999267578125, "step": 6780 }, { "epoch": 0.4583614979045559, "grad_norm": 1.116339921951294, "learning_rate": 1.7862818697134452e-05, "loss": 0.230072021484375, "step": 6781 }, { "epoch": 0.45842909287549005, "grad_norm": 1.7140390872955322, "learning_rate": 1.785957974863821e-05, "loss": 0.34234619140625, "step": 6782 }, { "epoch": 0.4584966878464242, "grad_norm": 1.2226967811584473, "learning_rate": 1.785634066177772e-05, "loss": 0.221527099609375, "step": 6783 }, { "epoch": 0.4585642828173584, "grad_norm": 1.7407889366149902, "learning_rate": 1.7853101436709706e-05, "loss": 0.310394287109375, "step": 6784 }, { "epoch": 0.45863187778829256, "grad_norm": 1.509628415107727, "learning_rate": 1.7849862073590896e-05, "loss": 0.2130126953125, "step": 6785 }, { "epoch": 0.45869947275922673, "grad_norm": 1.8225229978561401, "learning_rate": 1.7846622572578036e-05, "loss": 0.27392578125, "step": 6786 }, { "epoch": 0.45876706773016085, "grad_norm": 1.6821866035461426, "learning_rate": 1.7843382933827878e-05, "loss": 0.19629669189453125, "step": 6787 }, { "epoch": 0.458834662701095, "grad_norm": 0.9290105104446411, "learning_rate": 1.7840143157497167e-05, "loss": 0.249053955078125, "step": 6788 }, { "epoch": 0.4589022576720292, "grad_norm": 1.5887194871902466, "learning_rate": 1.783690324374267e-05, "loss": 0.232147216796875, "step": 6789 }, { "epoch": 0.45896985264296336, "grad_norm": 1.1760761737823486, "learning_rate": 1.7833663192721148e-05, "loss": 0.2490234375, "step": 6790 }, { "epoch": 0.45903744761389753, "grad_norm": 0.9040592312812805, "learning_rate": 1.783042300458938e-05, "loss": 0.1473388671875, "step": 6791 }, { "epoch": 0.4591050425848317, "grad_norm": 0.7161474227905273, "learning_rate": 1.7827182679504147e-05, "loss": 0.225006103515625, "step": 6792 }, { "epoch": 0.4591726375557659, "grad_norm": 0.9978246092796326, "learning_rate": 1.7823942217622235e-05, "loss": 0.20916748046875, "step": 6793 }, { "epoch": 0.4592402325267, "grad_norm": 0.7151889204978943, "learning_rate": 1.7820701619100426e-05, "loss": 0.1409759521484375, "step": 6794 }, { "epoch": 0.45930782749763416, "grad_norm": 0.9811913371086121, "learning_rate": 1.7817460884095545e-05, "loss": 0.26702880859375, "step": 6795 }, { "epoch": 0.45937542246856833, "grad_norm": 1.9689416885375977, "learning_rate": 1.7814220012764378e-05, "loss": 0.2896728515625, "step": 6796 }, { "epoch": 0.4594430174395025, "grad_norm": 1.0932700634002686, "learning_rate": 1.781097900526375e-05, "loss": 0.27947998046875, "step": 6797 }, { "epoch": 0.45951061241043667, "grad_norm": 1.280894160270691, "learning_rate": 1.7807737861750467e-05, "loss": 0.28765869140625, "step": 6798 }, { "epoch": 0.45957820738137084, "grad_norm": 1.434448480606079, "learning_rate": 1.780449658238137e-05, "loss": 0.25927734375, "step": 6799 }, { "epoch": 0.459645802352305, "grad_norm": 0.8262415528297424, "learning_rate": 1.7801255167313292e-05, "loss": 0.21820068359375, "step": 6800 }, { "epoch": 0.4597133973232391, "grad_norm": 0.6034436821937561, "learning_rate": 1.7798013616703063e-05, "loss": 0.1751556396484375, "step": 6801 }, { "epoch": 0.4597809922941733, "grad_norm": 1.2472347021102905, "learning_rate": 1.779477193070753e-05, "loss": 0.277191162109375, "step": 6802 }, { "epoch": 0.45984858726510747, "grad_norm": 1.537408709526062, "learning_rate": 1.7791530109483558e-05, "loss": 0.2320556640625, "step": 6803 }, { "epoch": 0.45991618223604164, "grad_norm": 1.4704501628875732, "learning_rate": 1.7788288153187997e-05, "loss": 0.21265411376953125, "step": 6804 }, { "epoch": 0.4599837772069758, "grad_norm": 1.468420147895813, "learning_rate": 1.7785046061977715e-05, "loss": 0.252471923828125, "step": 6805 }, { "epoch": 0.46005137217791, "grad_norm": 0.9568310976028442, "learning_rate": 1.7781803836009587e-05, "loss": 0.247314453125, "step": 6806 }, { "epoch": 0.46011896714884415, "grad_norm": 2.1417577266693115, "learning_rate": 1.777856147544049e-05, "loss": 0.25457763671875, "step": 6807 }, { "epoch": 0.46018656211977826, "grad_norm": 1.892424464225769, "learning_rate": 1.7775318980427302e-05, "loss": 0.2763214111328125, "step": 6808 }, { "epoch": 0.46025415709071243, "grad_norm": 1.1547809839248657, "learning_rate": 1.777207635112693e-05, "loss": 0.12000274658203125, "step": 6809 }, { "epoch": 0.4603217520616466, "grad_norm": 0.9239471554756165, "learning_rate": 1.776883358769626e-05, "loss": 0.221954345703125, "step": 6810 }, { "epoch": 0.4603893470325808, "grad_norm": 1.200925350189209, "learning_rate": 1.77655906902922e-05, "loss": 0.2130126953125, "step": 6811 }, { "epoch": 0.46045694200351495, "grad_norm": 1.3109577894210815, "learning_rate": 1.7762347659071667e-05, "loss": 0.33160400390625, "step": 6812 }, { "epoch": 0.4605245369744491, "grad_norm": 1.8568850755691528, "learning_rate": 1.7759104494191576e-05, "loss": 0.321044921875, "step": 6813 }, { "epoch": 0.4605921319453833, "grad_norm": 1.0362709760665894, "learning_rate": 1.7755861195808847e-05, "loss": 0.26507568359375, "step": 6814 }, { "epoch": 0.4606597269163174, "grad_norm": 1.342003345489502, "learning_rate": 1.775261776408042e-05, "loss": 0.25482177734375, "step": 6815 }, { "epoch": 0.4607273218872516, "grad_norm": 1.1728051900863647, "learning_rate": 1.774937419916322e-05, "loss": 0.31121826171875, "step": 6816 }, { "epoch": 0.46079491685818574, "grad_norm": 1.3058699369430542, "learning_rate": 1.7746130501214208e-05, "loss": 0.23150634765625, "step": 6817 }, { "epoch": 0.4608625118291199, "grad_norm": 1.0524280071258545, "learning_rate": 1.774288667039032e-05, "loss": 0.245697021484375, "step": 6818 }, { "epoch": 0.4609301068000541, "grad_norm": 1.511147141456604, "learning_rate": 1.7739642706848517e-05, "loss": 0.2640838623046875, "step": 6819 }, { "epoch": 0.46099770177098826, "grad_norm": 0.8803019523620605, "learning_rate": 1.773639861074576e-05, "loss": 0.210723876953125, "step": 6820 }, { "epoch": 0.4610652967419224, "grad_norm": 1.0924944877624512, "learning_rate": 1.7733154382239022e-05, "loss": 0.161224365234375, "step": 6821 }, { "epoch": 0.46113289171285654, "grad_norm": 1.1689953804016113, "learning_rate": 1.772991002148528e-05, "loss": 0.224334716796875, "step": 6822 }, { "epoch": 0.4612004866837907, "grad_norm": 1.4532042741775513, "learning_rate": 1.772666552864151e-05, "loss": 0.2481689453125, "step": 6823 }, { "epoch": 0.4612680816547249, "grad_norm": 1.7419923543930054, "learning_rate": 1.7723420903864706e-05, "loss": 0.248504638671875, "step": 6824 }, { "epoch": 0.46133567662565905, "grad_norm": 1.5128690004348755, "learning_rate": 1.7720176147311865e-05, "loss": 0.254852294921875, "step": 6825 }, { "epoch": 0.4614032715965932, "grad_norm": 0.9208815693855286, "learning_rate": 1.771693125913998e-05, "loss": 0.216827392578125, "step": 6826 }, { "epoch": 0.4614708665675274, "grad_norm": 0.828907310962677, "learning_rate": 1.771368623950607e-05, "loss": 0.189422607421875, "step": 6827 }, { "epoch": 0.46153846153846156, "grad_norm": 1.2826420068740845, "learning_rate": 1.7710441088567135e-05, "loss": 0.208709716796875, "step": 6828 }, { "epoch": 0.4616060565093957, "grad_norm": 1.4463374614715576, "learning_rate": 1.7707195806480213e-05, "loss": 0.2900390625, "step": 6829 }, { "epoch": 0.46167365148032985, "grad_norm": 1.374252200126648, "learning_rate": 1.770395039340232e-05, "loss": 0.24658203125, "step": 6830 }, { "epoch": 0.461741246451264, "grad_norm": 0.9618971943855286, "learning_rate": 1.770070484949049e-05, "loss": 0.185028076171875, "step": 6831 }, { "epoch": 0.4618088414221982, "grad_norm": 1.4928224086761475, "learning_rate": 1.7697459174901766e-05, "loss": 0.329498291015625, "step": 6832 }, { "epoch": 0.46187643639313236, "grad_norm": 1.5030617713928223, "learning_rate": 1.769421336979319e-05, "loss": 0.212127685546875, "step": 6833 }, { "epoch": 0.46194403136406653, "grad_norm": 1.9328993558883667, "learning_rate": 1.7690967434321812e-05, "loss": 0.355224609375, "step": 6834 }, { "epoch": 0.4620116263350007, "grad_norm": 1.1585036516189575, "learning_rate": 1.76877213686447e-05, "loss": 0.18036651611328125, "step": 6835 }, { "epoch": 0.4620792213059348, "grad_norm": 0.7703167200088501, "learning_rate": 1.768447517291891e-05, "loss": 0.1456298828125, "step": 6836 }, { "epoch": 0.462146816276869, "grad_norm": 1.0143799781799316, "learning_rate": 1.768122884730152e-05, "loss": 0.1898193359375, "step": 6837 }, { "epoch": 0.46221441124780316, "grad_norm": 1.7847285270690918, "learning_rate": 1.7677982391949602e-05, "loss": 0.28338623046875, "step": 6838 }, { "epoch": 0.46228200621873733, "grad_norm": 0.8106418251991272, "learning_rate": 1.7674735807020244e-05, "loss": 0.168609619140625, "step": 6839 }, { "epoch": 0.4623496011896715, "grad_norm": 0.753180205821991, "learning_rate": 1.7671489092670526e-05, "loss": 0.2110595703125, "step": 6840 }, { "epoch": 0.46241719616060567, "grad_norm": 2.0144546031951904, "learning_rate": 1.766824224905756e-05, "loss": 0.2554931640625, "step": 6841 }, { "epoch": 0.46248479113153984, "grad_norm": 1.5552496910095215, "learning_rate": 1.7664995276338442e-05, "loss": 0.251800537109375, "step": 6842 }, { "epoch": 0.46255238610247396, "grad_norm": 0.7696810364723206, "learning_rate": 1.7661748174670274e-05, "loss": 0.21112060546875, "step": 6843 }, { "epoch": 0.4626199810734081, "grad_norm": 1.5517462491989136, "learning_rate": 1.7658500944210175e-05, "loss": 0.211273193359375, "step": 6844 }, { "epoch": 0.4626875760443423, "grad_norm": 1.1696357727050781, "learning_rate": 1.765525358511527e-05, "loss": 0.2106475830078125, "step": 6845 }, { "epoch": 0.46275517101527647, "grad_norm": 0.956484317779541, "learning_rate": 1.7652006097542684e-05, "loss": 0.230194091796875, "step": 6846 }, { "epoch": 0.46282276598621064, "grad_norm": 1.6735496520996094, "learning_rate": 1.764875848164955e-05, "loss": 0.256195068359375, "step": 6847 }, { "epoch": 0.4628903609571448, "grad_norm": 1.4455622434616089, "learning_rate": 1.7645510737593004e-05, "loss": 0.343658447265625, "step": 6848 }, { "epoch": 0.4629579559280789, "grad_norm": 1.2524433135986328, "learning_rate": 1.7642262865530204e-05, "loss": 0.322662353515625, "step": 6849 }, { "epoch": 0.4630255508990131, "grad_norm": 1.2816624641418457, "learning_rate": 1.763901486561829e-05, "loss": 0.263427734375, "step": 6850 }, { "epoch": 0.46309314586994726, "grad_norm": 1.8557939529418945, "learning_rate": 1.763576673801442e-05, "loss": 0.2818603515625, "step": 6851 }, { "epoch": 0.46316074084088144, "grad_norm": 1.623447060585022, "learning_rate": 1.7632518482875774e-05, "loss": 0.16168212890625, "step": 6852 }, { "epoch": 0.4632283358118156, "grad_norm": 1.3078718185424805, "learning_rate": 1.76292701003595e-05, "loss": 0.2265472412109375, "step": 6853 }, { "epoch": 0.4632959307827498, "grad_norm": 1.845872402191162, "learning_rate": 1.76260215906228e-05, "loss": 0.2808837890625, "step": 6854 }, { "epoch": 0.46336352575368395, "grad_norm": 1.168203353881836, "learning_rate": 1.7622772953822843e-05, "loss": 0.255218505859375, "step": 6855 }, { "epoch": 0.46343112072461806, "grad_norm": 1.9570914506912231, "learning_rate": 1.761952419011681e-05, "loss": 0.32952880859375, "step": 6856 }, { "epoch": 0.46349871569555223, "grad_norm": 1.2922433614730835, "learning_rate": 1.761627529966191e-05, "loss": 0.24615478515625, "step": 6857 }, { "epoch": 0.4635663106664864, "grad_norm": 1.2720087766647339, "learning_rate": 1.7613026282615345e-05, "loss": 0.30889892578125, "step": 6858 }, { "epoch": 0.4636339056374206, "grad_norm": 1.4789117574691772, "learning_rate": 1.760977713913432e-05, "loss": 0.220947265625, "step": 6859 }, { "epoch": 0.46370150060835474, "grad_norm": 0.9698345065116882, "learning_rate": 1.760652786937604e-05, "loss": 0.25286865234375, "step": 6860 }, { "epoch": 0.4637690955792889, "grad_norm": 0.9555352330207825, "learning_rate": 1.760327847349773e-05, "loss": 0.227447509765625, "step": 6861 }, { "epoch": 0.4638366905502231, "grad_norm": 1.319912314414978, "learning_rate": 1.7600028951656625e-05, "loss": 0.2886962890625, "step": 6862 }, { "epoch": 0.4639042855211572, "grad_norm": 0.9748911261558533, "learning_rate": 1.7596779304009947e-05, "loss": 0.19305419921875, "step": 6863 }, { "epoch": 0.46397188049209137, "grad_norm": 1.404209017753601, "learning_rate": 1.7593529530714937e-05, "loss": 0.316925048828125, "step": 6864 }, { "epoch": 0.46403947546302554, "grad_norm": 0.9998300671577454, "learning_rate": 1.7590279631928838e-05, "loss": 0.28289794921875, "step": 6865 }, { "epoch": 0.4641070704339597, "grad_norm": 1.2461093664169312, "learning_rate": 1.7587029607808907e-05, "loss": 0.243316650390625, "step": 6866 }, { "epoch": 0.4641746654048939, "grad_norm": 0.5979264974594116, "learning_rate": 1.758377945851239e-05, "loss": 0.1221160888671875, "step": 6867 }, { "epoch": 0.46424226037582805, "grad_norm": 1.013344407081604, "learning_rate": 1.7580529184196555e-05, "loss": 0.2177276611328125, "step": 6868 }, { "epoch": 0.4643098553467622, "grad_norm": 1.871390461921692, "learning_rate": 1.757727878501867e-05, "loss": 0.292236328125, "step": 6869 }, { "epoch": 0.46437745031769634, "grad_norm": 1.6204822063446045, "learning_rate": 1.7574028261136007e-05, "loss": 0.247344970703125, "step": 6870 }, { "epoch": 0.4644450452886305, "grad_norm": 1.211026668548584, "learning_rate": 1.7570777612705855e-05, "loss": 0.303497314453125, "step": 6871 }, { "epoch": 0.4645126402595647, "grad_norm": 1.3312268257141113, "learning_rate": 1.7567526839885492e-05, "loss": 0.3265380859375, "step": 6872 }, { "epoch": 0.46458023523049885, "grad_norm": 0.48187869787216187, "learning_rate": 1.7564275942832213e-05, "loss": 0.08625030517578125, "step": 6873 }, { "epoch": 0.464647830201433, "grad_norm": 1.1232820749282837, "learning_rate": 1.7561024921703312e-05, "loss": 0.280975341796875, "step": 6874 }, { "epoch": 0.4647154251723672, "grad_norm": 1.4528783559799194, "learning_rate": 1.7557773776656103e-05, "loss": 0.27593994140625, "step": 6875 }, { "epoch": 0.46478302014330136, "grad_norm": 0.4974750578403473, "learning_rate": 1.7554522507847893e-05, "loss": 0.143951416015625, "step": 6876 }, { "epoch": 0.4648506151142355, "grad_norm": 1.5163935422897339, "learning_rate": 1.7551271115436e-05, "loss": 0.268951416015625, "step": 6877 }, { "epoch": 0.46491821008516965, "grad_norm": 1.728309154510498, "learning_rate": 1.754801959957774e-05, "loss": 0.254486083984375, "step": 6878 }, { "epoch": 0.4649858050561038, "grad_norm": 0.6946132779121399, "learning_rate": 1.7544767960430447e-05, "loss": 0.07430267333984375, "step": 6879 }, { "epoch": 0.465053400027038, "grad_norm": 0.9221879839897156, "learning_rate": 1.7541516198151457e-05, "loss": 0.22515869140625, "step": 6880 }, { "epoch": 0.46512099499797216, "grad_norm": 1.5207799673080444, "learning_rate": 1.7538264312898104e-05, "loss": 0.304779052734375, "step": 6881 }, { "epoch": 0.46518858996890633, "grad_norm": 0.9624907970428467, "learning_rate": 1.7535012304827737e-05, "loss": 0.234161376953125, "step": 6882 }, { "epoch": 0.4652561849398405, "grad_norm": 1.1715878248214722, "learning_rate": 1.753176017409771e-05, "loss": 0.255767822265625, "step": 6883 }, { "epoch": 0.4653237799107746, "grad_norm": 2.0022740364074707, "learning_rate": 1.7528507920865386e-05, "loss": 0.32861328125, "step": 6884 }, { "epoch": 0.4653913748817088, "grad_norm": 1.707406759262085, "learning_rate": 1.752525554528812e-05, "loss": 0.282745361328125, "step": 6885 }, { "epoch": 0.46545896985264296, "grad_norm": 0.6805803179740906, "learning_rate": 1.7522003047523286e-05, "loss": 0.130096435546875, "step": 6886 }, { "epoch": 0.4655265648235771, "grad_norm": 1.325792670249939, "learning_rate": 1.751875042772826e-05, "loss": 0.1979827880859375, "step": 6887 }, { "epoch": 0.4655941597945113, "grad_norm": 1.0233170986175537, "learning_rate": 1.7515497686060428e-05, "loss": 0.28570556640625, "step": 6888 }, { "epoch": 0.46566175476544547, "grad_norm": 0.893435537815094, "learning_rate": 1.7512244822677174e-05, "loss": 0.255615234375, "step": 6889 }, { "epoch": 0.46572934973637964, "grad_norm": 2.030702829360962, "learning_rate": 1.750899183773589e-05, "loss": 0.32977294921875, "step": 6890 }, { "epoch": 0.46579694470731375, "grad_norm": 1.639695405960083, "learning_rate": 1.7505738731393978e-05, "loss": 0.2867431640625, "step": 6891 }, { "epoch": 0.4658645396782479, "grad_norm": 1.1040858030319214, "learning_rate": 1.7502485503808844e-05, "loss": 0.2117156982421875, "step": 6892 }, { "epoch": 0.4659321346491821, "grad_norm": 1.430368423461914, "learning_rate": 1.7499232155137897e-05, "loss": 0.281494140625, "step": 6893 }, { "epoch": 0.46599972962011627, "grad_norm": 0.9799531102180481, "learning_rate": 1.7495978685538553e-05, "loss": 0.1403961181640625, "step": 6894 }, { "epoch": 0.46606732459105044, "grad_norm": 1.4030605554580688, "learning_rate": 1.749272509516824e-05, "loss": 0.25408935546875, "step": 6895 }, { "epoch": 0.4661349195619846, "grad_norm": 0.9038058519363403, "learning_rate": 1.748947138418439e-05, "loss": 0.15296173095703125, "step": 6896 }, { "epoch": 0.4662025145329188, "grad_norm": 1.5006169080734253, "learning_rate": 1.7486217552744426e-05, "loss": 0.27618408203125, "step": 6897 }, { "epoch": 0.4662701095038529, "grad_norm": 1.2596403360366821, "learning_rate": 1.7482963601005798e-05, "loss": 0.262451171875, "step": 6898 }, { "epoch": 0.46633770447478706, "grad_norm": 1.352502703666687, "learning_rate": 1.7479709529125947e-05, "loss": 0.281463623046875, "step": 6899 }, { "epoch": 0.46640529944572123, "grad_norm": 1.2930021286010742, "learning_rate": 1.7476455337262334e-05, "loss": 0.1588134765625, "step": 6900 }, { "epoch": 0.4664728944166554, "grad_norm": 1.2287019491195679, "learning_rate": 1.747320102557241e-05, "loss": 0.1526947021484375, "step": 6901 }, { "epoch": 0.4665404893875896, "grad_norm": 1.1956934928894043, "learning_rate": 1.7469946594213632e-05, "loss": 0.2383880615234375, "step": 6902 }, { "epoch": 0.46660808435852374, "grad_norm": 0.7344143986701965, "learning_rate": 1.7466692043343484e-05, "loss": 0.15800094604492188, "step": 6903 }, { "epoch": 0.4666756793294579, "grad_norm": 1.1063905954360962, "learning_rate": 1.746343737311943e-05, "loss": 0.264404296875, "step": 6904 }, { "epoch": 0.46674327430039203, "grad_norm": 2.8064286708831787, "learning_rate": 1.7460182583698962e-05, "loss": 0.31939697265625, "step": 6905 }, { "epoch": 0.4668108692713262, "grad_norm": 1.516924262046814, "learning_rate": 1.745692767523956e-05, "loss": 0.27301025390625, "step": 6906 }, { "epoch": 0.46687846424226037, "grad_norm": 2.7655723094940186, "learning_rate": 1.7453672647898712e-05, "loss": 0.32598876953125, "step": 6907 }, { "epoch": 0.46694605921319454, "grad_norm": 1.6080795526504517, "learning_rate": 1.7450417501833924e-05, "loss": 0.334381103515625, "step": 6908 }, { "epoch": 0.4670136541841287, "grad_norm": 1.3025625944137573, "learning_rate": 1.74471622372027e-05, "loss": 0.22735595703125, "step": 6909 }, { "epoch": 0.4670812491550629, "grad_norm": 2.9280598163604736, "learning_rate": 1.744390685416255e-05, "loss": 0.2996826171875, "step": 6910 }, { "epoch": 0.467148844125997, "grad_norm": 1.1928784847259521, "learning_rate": 1.744065135287098e-05, "loss": 0.234405517578125, "step": 6911 }, { "epoch": 0.46721643909693117, "grad_norm": 0.9869206547737122, "learning_rate": 1.743739573348553e-05, "loss": 0.266845703125, "step": 6912 }, { "epoch": 0.46728403406786534, "grad_norm": 0.8150429725646973, "learning_rate": 1.7434139996163712e-05, "loss": 0.158050537109375, "step": 6913 }, { "epoch": 0.4673516290387995, "grad_norm": 0.9346449971199036, "learning_rate": 1.743088414106306e-05, "loss": 0.2137908935546875, "step": 6914 }, { "epoch": 0.4674192240097337, "grad_norm": 0.7594039440155029, "learning_rate": 1.742762816834112e-05, "loss": 0.1629180908203125, "step": 6915 }, { "epoch": 0.46748681898066785, "grad_norm": 1.3040636777877808, "learning_rate": 1.742437207815543e-05, "loss": 0.31365966796875, "step": 6916 }, { "epoch": 0.467554413951602, "grad_norm": 1.256622314453125, "learning_rate": 1.742111587066354e-05, "loss": 0.1579437255859375, "step": 6917 }, { "epoch": 0.46762200892253614, "grad_norm": 3.240659236907959, "learning_rate": 1.741785954602301e-05, "loss": 0.21337890625, "step": 6918 }, { "epoch": 0.4676896038934703, "grad_norm": 1.6929852962493896, "learning_rate": 1.7414603104391402e-05, "loss": 0.188507080078125, "step": 6919 }, { "epoch": 0.4677571988644045, "grad_norm": 1.6593924760818481, "learning_rate": 1.7411346545926273e-05, "loss": 0.26397705078125, "step": 6920 }, { "epoch": 0.46782479383533865, "grad_norm": 1.2990995645523071, "learning_rate": 1.7408089870785205e-05, "loss": 0.20501708984375, "step": 6921 }, { "epoch": 0.4678923888062728, "grad_norm": 2.1408352851867676, "learning_rate": 1.7404833079125773e-05, "loss": 0.32171630859375, "step": 6922 }, { "epoch": 0.467959983777207, "grad_norm": 1.0565935373306274, "learning_rate": 1.7401576171105562e-05, "loss": 0.201019287109375, "step": 6923 }, { "epoch": 0.46802757874814116, "grad_norm": 0.8447917103767395, "learning_rate": 1.739831914688216e-05, "loss": 0.1600189208984375, "step": 6924 }, { "epoch": 0.4680951737190753, "grad_norm": 0.8852856159210205, "learning_rate": 1.7395062006613164e-05, "loss": 0.1526947021484375, "step": 6925 }, { "epoch": 0.46816276869000945, "grad_norm": 0.8678167462348938, "learning_rate": 1.7391804750456174e-05, "loss": 0.23309326171875, "step": 6926 }, { "epoch": 0.4682303636609436, "grad_norm": 1.7857788801193237, "learning_rate": 1.7388547378568792e-05, "loss": 0.277587890625, "step": 6927 }, { "epoch": 0.4682979586318778, "grad_norm": 2.170280694961548, "learning_rate": 1.7385289891108633e-05, "loss": 0.28436279296875, "step": 6928 }, { "epoch": 0.46836555360281196, "grad_norm": 1.791199803352356, "learning_rate": 1.738203228823332e-05, "loss": 0.229461669921875, "step": 6929 }, { "epoch": 0.4684331485737461, "grad_norm": 2.3467559814453125, "learning_rate": 1.7378774570100468e-05, "loss": 0.2823028564453125, "step": 6930 }, { "epoch": 0.4685007435446803, "grad_norm": 1.0670983791351318, "learning_rate": 1.737551673686771e-05, "loss": 0.26171875, "step": 6931 }, { "epoch": 0.4685683385156144, "grad_norm": 1.1620439291000366, "learning_rate": 1.7372258788692674e-05, "loss": 0.27374267578125, "step": 6932 }, { "epoch": 0.4686359334865486, "grad_norm": 1.1998491287231445, "learning_rate": 1.7369000725733014e-05, "loss": 0.215576171875, "step": 6933 }, { "epoch": 0.46870352845748275, "grad_norm": 1.630384087562561, "learning_rate": 1.736574254814636e-05, "loss": 0.28021240234375, "step": 6934 }, { "epoch": 0.4687711234284169, "grad_norm": 1.0806092023849487, "learning_rate": 1.736248425609037e-05, "loss": 0.173614501953125, "step": 6935 }, { "epoch": 0.4688387183993511, "grad_norm": 0.9143494963645935, "learning_rate": 1.7359225849722704e-05, "loss": 0.174072265625, "step": 6936 }, { "epoch": 0.46890631337028527, "grad_norm": 1.4774137735366821, "learning_rate": 1.7355967329201016e-05, "loss": 0.3092041015625, "step": 6937 }, { "epoch": 0.46897390834121944, "grad_norm": 0.7572931051254272, "learning_rate": 1.7352708694682975e-05, "loss": 0.1293487548828125, "step": 6938 }, { "epoch": 0.46904150331215355, "grad_norm": 1.4301875829696655, "learning_rate": 1.734944994632626e-05, "loss": 0.243560791015625, "step": 6939 }, { "epoch": 0.4691090982830877, "grad_norm": 1.1898914575576782, "learning_rate": 1.7346191084288542e-05, "loss": 0.2802734375, "step": 6940 }, { "epoch": 0.4691766932540219, "grad_norm": 1.7361412048339844, "learning_rate": 1.7342932108727508e-05, "loss": 0.257110595703125, "step": 6941 }, { "epoch": 0.46924428822495606, "grad_norm": 2.1699373722076416, "learning_rate": 1.733967301980085e-05, "loss": 0.236724853515625, "step": 6942 }, { "epoch": 0.46931188319589023, "grad_norm": 1.4289554357528687, "learning_rate": 1.7336413817666262e-05, "loss": 0.2481689453125, "step": 6943 }, { "epoch": 0.4693794781668244, "grad_norm": 1.1152079105377197, "learning_rate": 1.7333154502481445e-05, "loss": 0.1305389404296875, "step": 6944 }, { "epoch": 0.4694470731377586, "grad_norm": 1.4002842903137207, "learning_rate": 1.7329895074404097e-05, "loss": 0.305572509765625, "step": 6945 }, { "epoch": 0.4695146681086927, "grad_norm": 1.2202777862548828, "learning_rate": 1.732663553359194e-05, "loss": 0.235107421875, "step": 6946 }, { "epoch": 0.46958226307962686, "grad_norm": 1.28535795211792, "learning_rate": 1.7323375880202692e-05, "loss": 0.2631988525390625, "step": 6947 }, { "epoch": 0.46964985805056103, "grad_norm": 1.6346007585525513, "learning_rate": 1.732011611439406e-05, "loss": 0.187286376953125, "step": 6948 }, { "epoch": 0.4697174530214952, "grad_norm": 1.8789116144180298, "learning_rate": 1.7316856236323788e-05, "loss": 0.34295654296875, "step": 6949 }, { "epoch": 0.46978504799242937, "grad_norm": 1.3158966302871704, "learning_rate": 1.7313596246149603e-05, "loss": 0.191558837890625, "step": 6950 }, { "epoch": 0.46985264296336354, "grad_norm": 1.22022545337677, "learning_rate": 1.731033614402924e-05, "loss": 0.21875, "step": 6951 }, { "epoch": 0.4699202379342977, "grad_norm": 1.7400081157684326, "learning_rate": 1.730707593012045e-05, "loss": 0.247314453125, "step": 6952 }, { "epoch": 0.46998783290523183, "grad_norm": 0.8513106107711792, "learning_rate": 1.7303815604580974e-05, "loss": 0.1661224365234375, "step": 6953 }, { "epoch": 0.470055427876166, "grad_norm": 0.7216182947158813, "learning_rate": 1.7300555167568577e-05, "loss": 0.1531982421875, "step": 6954 }, { "epoch": 0.47012302284710017, "grad_norm": 1.7017221450805664, "learning_rate": 1.7297294619241012e-05, "loss": 0.22141265869140625, "step": 6955 }, { "epoch": 0.47019061781803434, "grad_norm": 1.5837552547454834, "learning_rate": 1.7294033959756045e-05, "loss": 0.1765289306640625, "step": 6956 }, { "epoch": 0.4702582127889685, "grad_norm": 1.8685928583145142, "learning_rate": 1.729077318927145e-05, "loss": 0.2639007568359375, "step": 6957 }, { "epoch": 0.4703258077599027, "grad_norm": 1.7382766008377075, "learning_rate": 1.7287512307945e-05, "loss": 0.228790283203125, "step": 6958 }, { "epoch": 0.47039340273083685, "grad_norm": 1.0169329643249512, "learning_rate": 1.7284251315934486e-05, "loss": 0.164581298828125, "step": 6959 }, { "epoch": 0.47046099770177097, "grad_norm": 1.4003067016601562, "learning_rate": 1.7280990213397685e-05, "loss": 0.268524169921875, "step": 6960 }, { "epoch": 0.47052859267270514, "grad_norm": 1.138575553894043, "learning_rate": 1.7277729000492385e-05, "loss": 0.1350250244140625, "step": 6961 }, { "epoch": 0.4705961876436393, "grad_norm": 1.8236074447631836, "learning_rate": 1.7274467677376397e-05, "loss": 0.282958984375, "step": 6962 }, { "epoch": 0.4706637826145735, "grad_norm": 0.9462010860443115, "learning_rate": 1.7271206244207515e-05, "loss": 0.1832427978515625, "step": 6963 }, { "epoch": 0.47073137758550765, "grad_norm": 1.2948983907699585, "learning_rate": 1.7267944701143555e-05, "loss": 0.25628662109375, "step": 6964 }, { "epoch": 0.4707989725564418, "grad_norm": 1.0378751754760742, "learning_rate": 1.7264683048342323e-05, "loss": 0.17655181884765625, "step": 6965 }, { "epoch": 0.470866567527376, "grad_norm": 1.2132092714309692, "learning_rate": 1.7261421285961638e-05, "loss": 0.2508544921875, "step": 6966 }, { "epoch": 0.4709341624983101, "grad_norm": 1.5456759929656982, "learning_rate": 1.7258159414159333e-05, "loss": 0.2279052734375, "step": 6967 }, { "epoch": 0.4710017574692443, "grad_norm": 1.7785660028457642, "learning_rate": 1.7254897433093226e-05, "loss": 0.287139892578125, "step": 6968 }, { "epoch": 0.47106935244017845, "grad_norm": 0.8780733346939087, "learning_rate": 1.7251635342921165e-05, "loss": 0.1968536376953125, "step": 6969 }, { "epoch": 0.4711369474111126, "grad_norm": 2.2390193939208984, "learning_rate": 1.724837314380098e-05, "loss": 0.3099365234375, "step": 6970 }, { "epoch": 0.4712045423820468, "grad_norm": 0.7799318432807922, "learning_rate": 1.7245110835890523e-05, "loss": 0.139801025390625, "step": 6971 }, { "epoch": 0.47127213735298096, "grad_norm": 1.2657442092895508, "learning_rate": 1.7241848419347638e-05, "loss": 0.2162017822265625, "step": 6972 }, { "epoch": 0.47133973232391513, "grad_norm": 1.7202577590942383, "learning_rate": 1.7238585894330186e-05, "loss": 0.298736572265625, "step": 6973 }, { "epoch": 0.47140732729484924, "grad_norm": 1.7029801607131958, "learning_rate": 1.7235323260996025e-05, "loss": 0.2315673828125, "step": 6974 }, { "epoch": 0.4714749222657834, "grad_norm": 1.6524062156677246, "learning_rate": 1.7232060519503024e-05, "loss": 0.23223876953125, "step": 6975 }, { "epoch": 0.4715425172367176, "grad_norm": 1.169264554977417, "learning_rate": 1.7228797670009057e-05, "loss": 0.27032470703125, "step": 6976 }, { "epoch": 0.47161011220765175, "grad_norm": 0.7914039492607117, "learning_rate": 1.7225534712671996e-05, "loss": 0.112335205078125, "step": 6977 }, { "epoch": 0.4716777071785859, "grad_norm": 0.8675037026405334, "learning_rate": 1.722227164764972e-05, "loss": 0.20452880859375, "step": 6978 }, { "epoch": 0.4717453021495201, "grad_norm": 0.5759336948394775, "learning_rate": 1.7219008475100127e-05, "loss": 0.08208465576171875, "step": 6979 }, { "epoch": 0.4718128971204542, "grad_norm": 1.1760140657424927, "learning_rate": 1.7215745195181106e-05, "loss": 0.29351806640625, "step": 6980 }, { "epoch": 0.4718804920913884, "grad_norm": 0.7608829736709595, "learning_rate": 1.721248180805055e-05, "loss": 0.19110107421875, "step": 6981 }, { "epoch": 0.47194808706232255, "grad_norm": 1.6013410091400146, "learning_rate": 1.7209218313866365e-05, "loss": 0.26318359375, "step": 6982 }, { "epoch": 0.4720156820332567, "grad_norm": 1.61472749710083, "learning_rate": 1.7205954712786457e-05, "loss": 0.234130859375, "step": 6983 }, { "epoch": 0.4720832770041909, "grad_norm": 1.2650697231292725, "learning_rate": 1.7202691004968745e-05, "loss": 0.258087158203125, "step": 6984 }, { "epoch": 0.47215087197512506, "grad_norm": 0.926613450050354, "learning_rate": 1.7199427190571145e-05, "loss": 0.19915771484375, "step": 6985 }, { "epoch": 0.47221846694605923, "grad_norm": 1.0170358419418335, "learning_rate": 1.7196163269751573e-05, "loss": 0.26031494140625, "step": 6986 }, { "epoch": 0.47228606191699335, "grad_norm": 0.9863895177841187, "learning_rate": 1.7192899242667967e-05, "loss": 0.2029266357421875, "step": 6987 }, { "epoch": 0.4723536568879275, "grad_norm": 1.5908396244049072, "learning_rate": 1.718963510947826e-05, "loss": 0.230133056640625, "step": 6988 }, { "epoch": 0.4724212518588617, "grad_norm": 1.3355491161346436, "learning_rate": 1.718637087034039e-05, "loss": 0.33905029296875, "step": 6989 }, { "epoch": 0.47248884682979586, "grad_norm": 1.0157768726348877, "learning_rate": 1.71831065254123e-05, "loss": 0.186920166015625, "step": 6990 }, { "epoch": 0.47255644180073003, "grad_norm": 1.1845269203186035, "learning_rate": 1.7179842074851935e-05, "loss": 0.287933349609375, "step": 6991 }, { "epoch": 0.4726240367716642, "grad_norm": 1.3905538320541382, "learning_rate": 1.7176577518817256e-05, "loss": 0.27410888671875, "step": 6992 }, { "epoch": 0.4726916317425984, "grad_norm": 0.8001975417137146, "learning_rate": 1.7173312857466225e-05, "loss": 0.1434783935546875, "step": 6993 }, { "epoch": 0.4727592267135325, "grad_norm": 1.8565800189971924, "learning_rate": 1.7170048090956798e-05, "loss": 0.28680419921875, "step": 6994 }, { "epoch": 0.47282682168446666, "grad_norm": 0.9222558736801147, "learning_rate": 1.7166783219446955e-05, "loss": 0.2174530029296875, "step": 6995 }, { "epoch": 0.47289441665540083, "grad_norm": 1.4369803667068481, "learning_rate": 1.7163518243094656e-05, "loss": 0.3135986328125, "step": 6996 }, { "epoch": 0.472962011626335, "grad_norm": 1.0695548057556152, "learning_rate": 1.7160253162057895e-05, "loss": 0.23138427734375, "step": 6997 }, { "epoch": 0.47302960659726917, "grad_norm": 1.8570221662521362, "learning_rate": 1.7156987976494652e-05, "loss": 0.309356689453125, "step": 6998 }, { "epoch": 0.47309720156820334, "grad_norm": 1.6174010038375854, "learning_rate": 1.7153722686562913e-05, "loss": 0.291748046875, "step": 6999 }, { "epoch": 0.4731647965391375, "grad_norm": 1.568915605545044, "learning_rate": 1.7150457292420676e-05, "loss": 0.23779296875, "step": 7000 }, { "epoch": 0.4732323915100716, "grad_norm": 1.066281795501709, "learning_rate": 1.7147191794225945e-05, "loss": 0.2187347412109375, "step": 7001 }, { "epoch": 0.4732999864810058, "grad_norm": 1.5004249811172485, "learning_rate": 1.7143926192136723e-05, "loss": 0.31085205078125, "step": 7002 }, { "epoch": 0.47336758145193997, "grad_norm": 1.0929869413375854, "learning_rate": 1.7140660486311014e-05, "loss": 0.28961181640625, "step": 7003 }, { "epoch": 0.47343517642287414, "grad_norm": 1.1622097492218018, "learning_rate": 1.713739467690684e-05, "loss": 0.258636474609375, "step": 7004 }, { "epoch": 0.4735027713938083, "grad_norm": 1.0607709884643555, "learning_rate": 1.713412876408222e-05, "loss": 0.32623291015625, "step": 7005 }, { "epoch": 0.4735703663647425, "grad_norm": 0.9888121485710144, "learning_rate": 1.7130862747995183e-05, "loss": 0.25323486328125, "step": 7006 }, { "epoch": 0.47363796133567665, "grad_norm": 1.129827857017517, "learning_rate": 1.712759662880375e-05, "loss": 0.1944427490234375, "step": 7007 }, { "epoch": 0.47370555630661076, "grad_norm": 1.3927806615829468, "learning_rate": 1.712433040666596e-05, "loss": 0.289581298828125, "step": 7008 }, { "epoch": 0.47377315127754493, "grad_norm": 0.8305893540382385, "learning_rate": 1.7121064081739854e-05, "loss": 0.273529052734375, "step": 7009 }, { "epoch": 0.4738407462484791, "grad_norm": 0.5774232745170593, "learning_rate": 1.7117797654183477e-05, "loss": 0.144775390625, "step": 7010 }, { "epoch": 0.4739083412194133, "grad_norm": 0.865983784198761, "learning_rate": 1.711453112415488e-05, "loss": 0.222259521484375, "step": 7011 }, { "epoch": 0.47397593619034745, "grad_norm": 1.6498136520385742, "learning_rate": 1.711126449181211e-05, "loss": 0.298583984375, "step": 7012 }, { "epoch": 0.4740435311612816, "grad_norm": 1.3438800573349, "learning_rate": 1.7107997757313244e-05, "loss": 0.32037353515625, "step": 7013 }, { "epoch": 0.4741111261322158, "grad_norm": 1.2224222421646118, "learning_rate": 1.710473092081634e-05, "loss": 0.28948974609375, "step": 7014 }, { "epoch": 0.4741787211031499, "grad_norm": 1.620630145072937, "learning_rate": 1.7101463982479456e-05, "loss": 0.234375, "step": 7015 }, { "epoch": 0.4742463160740841, "grad_norm": 1.2795989513397217, "learning_rate": 1.7098196942460676e-05, "loss": 0.17235565185546875, "step": 7016 }, { "epoch": 0.47431391104501824, "grad_norm": 0.9930826425552368, "learning_rate": 1.7094929800918082e-05, "loss": 0.269287109375, "step": 7017 }, { "epoch": 0.4743815060159524, "grad_norm": 0.9968725442886353, "learning_rate": 1.709166255800976e-05, "loss": 0.17493438720703125, "step": 7018 }, { "epoch": 0.4744491009868866, "grad_norm": 2.021832227706909, "learning_rate": 1.7088395213893787e-05, "loss": 0.285430908203125, "step": 7019 }, { "epoch": 0.47451669595782076, "grad_norm": 1.109148621559143, "learning_rate": 1.7085127768728272e-05, "loss": 0.233123779296875, "step": 7020 }, { "epoch": 0.4745842909287549, "grad_norm": 0.8746255040168762, "learning_rate": 1.7081860222671305e-05, "loss": 0.1892852783203125, "step": 7021 }, { "epoch": 0.47465188589968904, "grad_norm": 1.3621934652328491, "learning_rate": 1.7078592575880995e-05, "loss": 0.279510498046875, "step": 7022 }, { "epoch": 0.4747194808706232, "grad_norm": 1.5149160623550415, "learning_rate": 1.707532482851545e-05, "loss": 0.270782470703125, "step": 7023 }, { "epoch": 0.4747870758415574, "grad_norm": 1.4559170007705688, "learning_rate": 1.707205698073278e-05, "loss": 0.33966064453125, "step": 7024 }, { "epoch": 0.47485467081249155, "grad_norm": 1.4070191383361816, "learning_rate": 1.706878903269111e-05, "loss": 0.27923583984375, "step": 7025 }, { "epoch": 0.4749222657834257, "grad_norm": 1.168744683265686, "learning_rate": 1.7065520984548562e-05, "loss": 0.1536102294921875, "step": 7026 }, { "epoch": 0.4749898607543599, "grad_norm": 0.7777051329612732, "learning_rate": 1.7062252836463263e-05, "loss": 0.209014892578125, "step": 7027 }, { "epoch": 0.47505745572529406, "grad_norm": 1.3308496475219727, "learning_rate": 1.705898458859335e-05, "loss": 0.23638916015625, "step": 7028 }, { "epoch": 0.4751250506962282, "grad_norm": 1.0342791080474854, "learning_rate": 1.705571624109695e-05, "loss": 0.232147216796875, "step": 7029 }, { "epoch": 0.47519264566716235, "grad_norm": 1.031862735748291, "learning_rate": 1.705244779413222e-05, "loss": 0.1651763916015625, "step": 7030 }, { "epoch": 0.4752602406380965, "grad_norm": 1.4252947568893433, "learning_rate": 1.7049179247857302e-05, "loss": 0.2246551513671875, "step": 7031 }, { "epoch": 0.4753278356090307, "grad_norm": 0.7253857254981995, "learning_rate": 1.704591060243034e-05, "loss": 0.1652679443359375, "step": 7032 }, { "epoch": 0.47539543057996486, "grad_norm": 0.9058345556259155, "learning_rate": 1.704264185800951e-05, "loss": 0.23492431640625, "step": 7033 }, { "epoch": 0.47546302555089903, "grad_norm": 1.655111312866211, "learning_rate": 1.7039373014752957e-05, "loss": 0.3197021484375, "step": 7034 }, { "epoch": 0.4755306205218332, "grad_norm": 1.0905778408050537, "learning_rate": 1.703610407281886e-05, "loss": 0.220184326171875, "step": 7035 }, { "epoch": 0.4755982154927673, "grad_norm": 1.7524819374084473, "learning_rate": 1.7032835032365383e-05, "loss": 0.22723388671875, "step": 7036 }, { "epoch": 0.4756658104637015, "grad_norm": 0.9682641625404358, "learning_rate": 1.7029565893550704e-05, "loss": 0.16466522216796875, "step": 7037 }, { "epoch": 0.47573340543463566, "grad_norm": 1.5972087383270264, "learning_rate": 1.7026296656533007e-05, "loss": 0.2637939453125, "step": 7038 }, { "epoch": 0.47580100040556983, "grad_norm": 0.7013539671897888, "learning_rate": 1.7023027321470474e-05, "loss": 0.11383056640625, "step": 7039 }, { "epoch": 0.475868595376504, "grad_norm": 0.9501599669456482, "learning_rate": 1.7019757888521304e-05, "loss": 0.228179931640625, "step": 7040 }, { "epoch": 0.47593619034743817, "grad_norm": 0.7408357858657837, "learning_rate": 1.701648835784368e-05, "loss": 0.144775390625, "step": 7041 }, { "epoch": 0.4760037853183723, "grad_norm": 1.5538722276687622, "learning_rate": 1.701321872959581e-05, "loss": 0.171356201171875, "step": 7042 }, { "epoch": 0.47607138028930646, "grad_norm": 0.7192076444625854, "learning_rate": 1.70099490039359e-05, "loss": 0.121368408203125, "step": 7043 }, { "epoch": 0.4761389752602406, "grad_norm": 2.1180949211120605, "learning_rate": 1.700667918102216e-05, "loss": 0.29229736328125, "step": 7044 }, { "epoch": 0.4762065702311748, "grad_norm": 1.52908456325531, "learning_rate": 1.700340926101279e-05, "loss": 0.251312255859375, "step": 7045 }, { "epoch": 0.47627416520210897, "grad_norm": 1.134506344795227, "learning_rate": 1.7000139244066035e-05, "loss": 0.1894073486328125, "step": 7046 }, { "epoch": 0.47634176017304314, "grad_norm": 1.150459885597229, "learning_rate": 1.6996869130340097e-05, "loss": 0.254547119140625, "step": 7047 }, { "epoch": 0.4764093551439773, "grad_norm": 0.6611562371253967, "learning_rate": 1.6993598919993215e-05, "loss": 0.12877273559570312, "step": 7048 }, { "epoch": 0.4764769501149114, "grad_norm": 1.6585795879364014, "learning_rate": 1.699032861318362e-05, "loss": 0.277008056640625, "step": 7049 }, { "epoch": 0.4765445450858456, "grad_norm": 1.0887340307235718, "learning_rate": 1.698705821006954e-05, "loss": 0.19195556640625, "step": 7050 }, { "epoch": 0.47661214005677977, "grad_norm": 0.8458667993545532, "learning_rate": 1.698378771080924e-05, "loss": 0.1631011962890625, "step": 7051 }, { "epoch": 0.47667973502771394, "grad_norm": 2.3797152042388916, "learning_rate": 1.6980517115560943e-05, "loss": 0.1920166015625, "step": 7052 }, { "epoch": 0.4767473299986481, "grad_norm": 1.0426032543182373, "learning_rate": 1.6977246424482918e-05, "loss": 0.1790924072265625, "step": 7053 }, { "epoch": 0.4768149249695823, "grad_norm": 0.667339026927948, "learning_rate": 1.6973975637733412e-05, "loss": 0.15456390380859375, "step": 7054 }, { "epoch": 0.47688251994051645, "grad_norm": 0.9539164900779724, "learning_rate": 1.697070475547069e-05, "loss": 0.21187591552734375, "step": 7055 }, { "epoch": 0.47695011491145056, "grad_norm": 1.6832184791564941, "learning_rate": 1.6967433777853015e-05, "loss": 0.2249755859375, "step": 7056 }, { "epoch": 0.47701770988238473, "grad_norm": 1.3650044202804565, "learning_rate": 1.696416270503866e-05, "loss": 0.282562255859375, "step": 7057 }, { "epoch": 0.4770853048533189, "grad_norm": 1.0987164974212646, "learning_rate": 1.6960891537185894e-05, "loss": 0.2022552490234375, "step": 7058 }, { "epoch": 0.4771528998242531, "grad_norm": 0.8992971181869507, "learning_rate": 1.6957620274453003e-05, "loss": 0.113525390625, "step": 7059 }, { "epoch": 0.47722049479518724, "grad_norm": 1.5870994329452515, "learning_rate": 1.6954348916998268e-05, "loss": 0.349884033203125, "step": 7060 }, { "epoch": 0.4772880897661214, "grad_norm": 1.5356091260910034, "learning_rate": 1.695107746497998e-05, "loss": 0.26373291015625, "step": 7061 }, { "epoch": 0.4773556847370556, "grad_norm": 2.033846378326416, "learning_rate": 1.694780591855643e-05, "loss": 0.3099365234375, "step": 7062 }, { "epoch": 0.4774232797079897, "grad_norm": 1.2395641803741455, "learning_rate": 1.6944534277885916e-05, "loss": 0.238006591796875, "step": 7063 }, { "epoch": 0.47749087467892387, "grad_norm": 1.2536441087722778, "learning_rate": 1.6941262543126744e-05, "loss": 0.32379150390625, "step": 7064 }, { "epoch": 0.47755846964985804, "grad_norm": 1.5532879829406738, "learning_rate": 1.6937990714437216e-05, "loss": 0.275634765625, "step": 7065 }, { "epoch": 0.4776260646207922, "grad_norm": 2.1121644973754883, "learning_rate": 1.693471879197564e-05, "loss": 0.27349853515625, "step": 7066 }, { "epoch": 0.4776936595917264, "grad_norm": 1.409589171409607, "learning_rate": 1.6931446775900345e-05, "loss": 0.215972900390625, "step": 7067 }, { "epoch": 0.47776125456266055, "grad_norm": 1.9811400175094604, "learning_rate": 1.692817466636964e-05, "loss": 0.3006591796875, "step": 7068 }, { "epoch": 0.4778288495335947, "grad_norm": 1.0794330835342407, "learning_rate": 1.692490246354185e-05, "loss": 0.2598876953125, "step": 7069 }, { "epoch": 0.47789644450452884, "grad_norm": 1.3547208309173584, "learning_rate": 1.692163016757531e-05, "loss": 0.2161865234375, "step": 7070 }, { "epoch": 0.477964039475463, "grad_norm": 1.6288665533065796, "learning_rate": 1.691835777862835e-05, "loss": 0.2757568359375, "step": 7071 }, { "epoch": 0.4780316344463972, "grad_norm": 3.7127740383148193, "learning_rate": 1.6915085296859314e-05, "loss": 0.3814697265625, "step": 7072 }, { "epoch": 0.47809922941733135, "grad_norm": 1.1271655559539795, "learning_rate": 1.6911812722426543e-05, "loss": 0.2318115234375, "step": 7073 }, { "epoch": 0.4781668243882655, "grad_norm": 1.1951603889465332, "learning_rate": 1.690854005548838e-05, "loss": 0.292999267578125, "step": 7074 }, { "epoch": 0.4782344193591997, "grad_norm": 1.3357912302017212, "learning_rate": 1.6905267296203182e-05, "loss": 0.26434326171875, "step": 7075 }, { "epoch": 0.47830201433013386, "grad_norm": 0.946026086807251, "learning_rate": 1.6901994444729305e-05, "loss": 0.215423583984375, "step": 7076 }, { "epoch": 0.478369609301068, "grad_norm": 0.7018853425979614, "learning_rate": 1.6898721501225105e-05, "loss": 0.1480712890625, "step": 7077 }, { "epoch": 0.47843720427200215, "grad_norm": 1.94639253616333, "learning_rate": 1.689544846584895e-05, "loss": 0.20684814453125, "step": 7078 }, { "epoch": 0.4785047992429363, "grad_norm": 1.210717797279358, "learning_rate": 1.689217533875921e-05, "loss": 0.2401123046875, "step": 7079 }, { "epoch": 0.4785723942138705, "grad_norm": 0.8455948829650879, "learning_rate": 1.6888902120114265e-05, "loss": 0.176544189453125, "step": 7080 }, { "epoch": 0.47863998918480466, "grad_norm": 0.8044003248214722, "learning_rate": 1.6885628810072485e-05, "loss": 0.163421630859375, "step": 7081 }, { "epoch": 0.47870758415573883, "grad_norm": 2.2460973262786865, "learning_rate": 1.6882355408792256e-05, "loss": 0.288330078125, "step": 7082 }, { "epoch": 0.478775179126673, "grad_norm": 1.6166996955871582, "learning_rate": 1.6879081916431963e-05, "loss": 0.25244140625, "step": 7083 }, { "epoch": 0.4788427740976071, "grad_norm": 0.6315218210220337, "learning_rate": 1.687580833315001e-05, "loss": 0.148193359375, "step": 7084 }, { "epoch": 0.4789103690685413, "grad_norm": 1.0456856489181519, "learning_rate": 1.6872534659104777e-05, "loss": 0.2802276611328125, "step": 7085 }, { "epoch": 0.47897796403947546, "grad_norm": 1.5406293869018555, "learning_rate": 1.6869260894454678e-05, "loss": 0.32525634765625, "step": 7086 }, { "epoch": 0.4790455590104096, "grad_norm": 1.213462471961975, "learning_rate": 1.6865987039358106e-05, "loss": 0.261810302734375, "step": 7087 }, { "epoch": 0.4791131539813438, "grad_norm": 1.0544320344924927, "learning_rate": 1.6862713093973483e-05, "loss": 0.2900390625, "step": 7088 }, { "epoch": 0.47918074895227797, "grad_norm": 0.5380593538284302, "learning_rate": 1.6859439058459212e-05, "loss": 0.10870361328125, "step": 7089 }, { "epoch": 0.47924834392321214, "grad_norm": 2.2380409240722656, "learning_rate": 1.6856164932973714e-05, "loss": 0.31231689453125, "step": 7090 }, { "epoch": 0.47931593889414625, "grad_norm": 1.3273165225982666, "learning_rate": 1.6852890717675412e-05, "loss": 0.30029296875, "step": 7091 }, { "epoch": 0.4793835338650804, "grad_norm": 1.3547828197479248, "learning_rate": 1.6849616412722742e-05, "loss": 0.282958984375, "step": 7092 }, { "epoch": 0.4794511288360146, "grad_norm": 0.9503468871116638, "learning_rate": 1.6846342018274122e-05, "loss": 0.15447235107421875, "step": 7093 }, { "epoch": 0.47951872380694877, "grad_norm": 1.0434297323226929, "learning_rate": 1.6843067534487993e-05, "loss": 0.26318359375, "step": 7094 }, { "epoch": 0.47958631877788294, "grad_norm": 0.7221973538398743, "learning_rate": 1.6839792961522795e-05, "loss": 0.202606201171875, "step": 7095 }, { "epoch": 0.4796539137488171, "grad_norm": 0.9175110459327698, "learning_rate": 1.683651829953697e-05, "loss": 0.1772613525390625, "step": 7096 }, { "epoch": 0.4797215087197513, "grad_norm": 1.5669001340866089, "learning_rate": 1.683324354868897e-05, "loss": 0.31768798828125, "step": 7097 }, { "epoch": 0.4797891036906854, "grad_norm": 1.8944755792617798, "learning_rate": 1.682996870913725e-05, "loss": 0.2026824951171875, "step": 7098 }, { "epoch": 0.47985669866161956, "grad_norm": 1.8333287239074707, "learning_rate": 1.682669378104026e-05, "loss": 0.204925537109375, "step": 7099 }, { "epoch": 0.47992429363255373, "grad_norm": 1.0507148504257202, "learning_rate": 1.6823418764556466e-05, "loss": 0.1617584228515625, "step": 7100 }, { "epoch": 0.4799918886034879, "grad_norm": 1.1897615194320679, "learning_rate": 1.6820143659844334e-05, "loss": 0.310546875, "step": 7101 }, { "epoch": 0.4800594835744221, "grad_norm": 1.0242772102355957, "learning_rate": 1.681686846706233e-05, "loss": 0.232513427734375, "step": 7102 }, { "epoch": 0.48012707854535625, "grad_norm": 1.7186590433120728, "learning_rate": 1.6813593186368928e-05, "loss": 0.225555419921875, "step": 7103 }, { "epoch": 0.4801946735162904, "grad_norm": 0.9055353403091431, "learning_rate": 1.681031781792261e-05, "loss": 0.232666015625, "step": 7104 }, { "epoch": 0.48026226848722453, "grad_norm": 2.0510807037353516, "learning_rate": 1.680704236188186e-05, "loss": 0.248779296875, "step": 7105 }, { "epoch": 0.4803298634581587, "grad_norm": 1.5124351978302002, "learning_rate": 1.6803766818405162e-05, "loss": 0.335968017578125, "step": 7106 }, { "epoch": 0.48039745842909287, "grad_norm": 1.438179850578308, "learning_rate": 1.6800491187651006e-05, "loss": 0.29895782470703125, "step": 7107 }, { "epoch": 0.48046505340002704, "grad_norm": 1.9229341745376587, "learning_rate": 1.6797215469777893e-05, "loss": 0.2789306640625, "step": 7108 }, { "epoch": 0.4805326483709612, "grad_norm": 1.221867561340332, "learning_rate": 1.6793939664944318e-05, "loss": 0.230560302734375, "step": 7109 }, { "epoch": 0.4806002433418954, "grad_norm": 1.2251781225204468, "learning_rate": 1.6790663773308786e-05, "loss": 0.2322998046875, "step": 7110 }, { "epoch": 0.4806678383128295, "grad_norm": 1.0376980304718018, "learning_rate": 1.6787387795029806e-05, "loss": 0.2550048828125, "step": 7111 }, { "epoch": 0.48073543328376367, "grad_norm": 1.5471501350402832, "learning_rate": 1.6784111730265882e-05, "loss": 0.24322509765625, "step": 7112 }, { "epoch": 0.48080302825469784, "grad_norm": 1.2624006271362305, "learning_rate": 1.678083557917554e-05, "loss": 0.1793975830078125, "step": 7113 }, { "epoch": 0.480870623225632, "grad_norm": 1.196607232093811, "learning_rate": 1.6777559341917302e-05, "loss": 0.1916961669921875, "step": 7114 }, { "epoch": 0.4809382181965662, "grad_norm": 1.0906096696853638, "learning_rate": 1.6774283018649683e-05, "loss": 0.22003173828125, "step": 7115 }, { "epoch": 0.48100581316750035, "grad_norm": 0.6803476214408875, "learning_rate": 1.677100660953122e-05, "loss": 0.09368896484375, "step": 7116 }, { "epoch": 0.4810734081384345, "grad_norm": 1.4341208934783936, "learning_rate": 1.6767730114720446e-05, "loss": 0.26885986328125, "step": 7117 }, { "epoch": 0.48114100310936864, "grad_norm": 0.9735182523727417, "learning_rate": 1.676445353437589e-05, "loss": 0.2117156982421875, "step": 7118 }, { "epoch": 0.4812085980803028, "grad_norm": 2.5115206241607666, "learning_rate": 1.6761176868656105e-05, "loss": 0.3133544921875, "step": 7119 }, { "epoch": 0.481276193051237, "grad_norm": 2.1064019203186035, "learning_rate": 1.6757900117719627e-05, "loss": 0.322265625, "step": 7120 }, { "epoch": 0.48134378802217115, "grad_norm": 1.7569814920425415, "learning_rate": 1.6754623281725005e-05, "loss": 0.2573699951171875, "step": 7121 }, { "epoch": 0.4814113829931053, "grad_norm": 1.711076259613037, "learning_rate": 1.6751346360830803e-05, "loss": 0.182769775390625, "step": 7122 }, { "epoch": 0.4814789779640395, "grad_norm": 1.6520280838012695, "learning_rate": 1.6748069355195574e-05, "loss": 0.32086181640625, "step": 7123 }, { "epoch": 0.48154657293497366, "grad_norm": 1.312401294708252, "learning_rate": 1.6744792264977873e-05, "loss": 0.2176513671875, "step": 7124 }, { "epoch": 0.4816141679059078, "grad_norm": 0.8520826697349548, "learning_rate": 1.6741515090336274e-05, "loss": 0.22290802001953125, "step": 7125 }, { "epoch": 0.48168176287684195, "grad_norm": 1.5728973150253296, "learning_rate": 1.6738237831429346e-05, "loss": 0.27862548828125, "step": 7126 }, { "epoch": 0.4817493578477761, "grad_norm": 1.0127052068710327, "learning_rate": 1.673496048841566e-05, "loss": 0.201202392578125, "step": 7127 }, { "epoch": 0.4818169528187103, "grad_norm": 0.9206596612930298, "learning_rate": 1.6731683061453797e-05, "loss": 0.226806640625, "step": 7128 }, { "epoch": 0.48188454778964446, "grad_norm": 0.8644051551818848, "learning_rate": 1.6728405550702334e-05, "loss": 0.2294921875, "step": 7129 }, { "epoch": 0.48195214276057863, "grad_norm": 1.2131859064102173, "learning_rate": 1.6725127956319863e-05, "loss": 0.29901123046875, "step": 7130 }, { "epoch": 0.4820197377315128, "grad_norm": 1.3696463108062744, "learning_rate": 1.6721850278464978e-05, "loss": 0.32269287109375, "step": 7131 }, { "epoch": 0.4820873327024469, "grad_norm": 1.6692945957183838, "learning_rate": 1.6718572517296265e-05, "loss": 0.255859375, "step": 7132 }, { "epoch": 0.4821549276733811, "grad_norm": 1.0105458498001099, "learning_rate": 1.6715294672972324e-05, "loss": 0.231109619140625, "step": 7133 }, { "epoch": 0.48222252264431525, "grad_norm": 2.064197063446045, "learning_rate": 1.6712016745651763e-05, "loss": 0.333038330078125, "step": 7134 }, { "epoch": 0.4822901176152494, "grad_norm": 1.2346035242080688, "learning_rate": 1.6708738735493188e-05, "loss": 0.265899658203125, "step": 7135 }, { "epoch": 0.4823577125861836, "grad_norm": 1.284570574760437, "learning_rate": 1.6705460642655202e-05, "loss": 0.18701934814453125, "step": 7136 }, { "epoch": 0.48242530755711777, "grad_norm": 0.7089293003082275, "learning_rate": 1.6702182467296423e-05, "loss": 0.191680908203125, "step": 7137 }, { "epoch": 0.48249290252805194, "grad_norm": 1.5280256271362305, "learning_rate": 1.669890420957547e-05, "loss": 0.35296630859375, "step": 7138 }, { "epoch": 0.48256049749898605, "grad_norm": 1.4766619205474854, "learning_rate": 1.6695625869650965e-05, "loss": 0.27001953125, "step": 7139 }, { "epoch": 0.4826280924699202, "grad_norm": 1.7757247686386108, "learning_rate": 1.669234744768154e-05, "loss": 0.25885009765625, "step": 7140 }, { "epoch": 0.4826956874408544, "grad_norm": 1.3526197671890259, "learning_rate": 1.668906894382582e-05, "loss": 0.308868408203125, "step": 7141 }, { "epoch": 0.48276328241178856, "grad_norm": 1.011892318725586, "learning_rate": 1.668579035824244e-05, "loss": 0.21612548828125, "step": 7142 }, { "epoch": 0.48283087738272273, "grad_norm": 0.9112437963485718, "learning_rate": 1.6682511691090033e-05, "loss": 0.19622802734375, "step": 7143 }, { "epoch": 0.4828984723536569, "grad_norm": 1.2047022581100464, "learning_rate": 1.6679232942527254e-05, "loss": 0.28594970703125, "step": 7144 }, { "epoch": 0.4829660673245911, "grad_norm": 1.6148957014083862, "learning_rate": 1.6675954112712738e-05, "loss": 0.307220458984375, "step": 7145 }, { "epoch": 0.4830336622955252, "grad_norm": 1.1388663053512573, "learning_rate": 1.6672675201805144e-05, "loss": 0.226593017578125, "step": 7146 }, { "epoch": 0.48310125726645936, "grad_norm": 0.5964834094047546, "learning_rate": 1.666939620996312e-05, "loss": 0.11399078369140625, "step": 7147 }, { "epoch": 0.48316885223739353, "grad_norm": 1.0143013000488281, "learning_rate": 1.6666117137345325e-05, "loss": 0.3014373779296875, "step": 7148 }, { "epoch": 0.4832364472083277, "grad_norm": 2.217785596847534, "learning_rate": 1.666283798411042e-05, "loss": 0.37835693359375, "step": 7149 }, { "epoch": 0.4833040421792619, "grad_norm": 1.0392189025878906, "learning_rate": 1.6659558750417073e-05, "loss": 0.23675537109375, "step": 7150 }, { "epoch": 0.48337163715019604, "grad_norm": 1.2236093282699585, "learning_rate": 1.6656279436423952e-05, "loss": 0.1742095947265625, "step": 7151 }, { "epoch": 0.4834392321211302, "grad_norm": 1.0593945980072021, "learning_rate": 1.6653000042289734e-05, "loss": 0.2025146484375, "step": 7152 }, { "epoch": 0.48350682709206433, "grad_norm": 1.1874754428863525, "learning_rate": 1.664972056817309e-05, "loss": 0.25750732421875, "step": 7153 }, { "epoch": 0.4835744220629985, "grad_norm": 1.2231477499008179, "learning_rate": 1.6646441014232704e-05, "loss": 0.29931640625, "step": 7154 }, { "epoch": 0.48364201703393267, "grad_norm": 0.6416627764701843, "learning_rate": 1.6643161380627272e-05, "loss": 0.1293487548828125, "step": 7155 }, { "epoch": 0.48370961200486684, "grad_norm": 0.9311603307723999, "learning_rate": 1.6639881667515466e-05, "loss": 0.23236846923828125, "step": 7156 }, { "epoch": 0.483777206975801, "grad_norm": 1.1531598567962646, "learning_rate": 1.663660187505599e-05, "loss": 0.2147216796875, "step": 7157 }, { "epoch": 0.4838448019467352, "grad_norm": 1.3998315334320068, "learning_rate": 1.6633322003407535e-05, "loss": 0.29095458984375, "step": 7158 }, { "epoch": 0.48391239691766935, "grad_norm": 1.3046766519546509, "learning_rate": 1.6630042052728807e-05, "loss": 0.26715087890625, "step": 7159 }, { "epoch": 0.48397999188860347, "grad_norm": 1.541144847869873, "learning_rate": 1.6626762023178505e-05, "loss": 0.3131103515625, "step": 7160 }, { "epoch": 0.48404758685953764, "grad_norm": 1.4858473539352417, "learning_rate": 1.6623481914915342e-05, "loss": 0.302459716796875, "step": 7161 }, { "epoch": 0.4841151818304718, "grad_norm": 1.6649644374847412, "learning_rate": 1.6620201728098025e-05, "loss": 0.32550048828125, "step": 7162 }, { "epoch": 0.484182776801406, "grad_norm": 1.2022502422332764, "learning_rate": 1.661692146288527e-05, "loss": 0.19268798828125, "step": 7163 }, { "epoch": 0.48425037177234015, "grad_norm": 1.0643271207809448, "learning_rate": 1.6613641119435805e-05, "loss": 0.21759033203125, "step": 7164 }, { "epoch": 0.4843179667432743, "grad_norm": 1.4835519790649414, "learning_rate": 1.6610360697908345e-05, "loss": 0.349945068359375, "step": 7165 }, { "epoch": 0.4843855617142085, "grad_norm": 1.2720266580581665, "learning_rate": 1.6607080198461623e-05, "loss": 0.2071685791015625, "step": 7166 }, { "epoch": 0.4844531566851426, "grad_norm": 1.7539985179901123, "learning_rate": 1.6603799621254356e-05, "loss": 0.29632568359375, "step": 7167 }, { "epoch": 0.4845207516560768, "grad_norm": 0.9574828743934631, "learning_rate": 1.6600518966445298e-05, "loss": 0.231292724609375, "step": 7168 }, { "epoch": 0.48458834662701095, "grad_norm": 1.1056578159332275, "learning_rate": 1.6597238234193183e-05, "loss": 0.260101318359375, "step": 7169 }, { "epoch": 0.4846559415979451, "grad_norm": 0.7156451344490051, "learning_rate": 1.659395742465674e-05, "loss": 0.11536407470703125, "step": 7170 }, { "epoch": 0.4847235365688793, "grad_norm": 0.9596807360649109, "learning_rate": 1.659067653799473e-05, "loss": 0.1799774169921875, "step": 7171 }, { "epoch": 0.48479113153981346, "grad_norm": 1.0295801162719727, "learning_rate": 1.6587395574365892e-05, "loss": 0.2728271484375, "step": 7172 }, { "epoch": 0.4848587265107476, "grad_norm": 0.8469823598861694, "learning_rate": 1.658411453392899e-05, "loss": 0.14249420166015625, "step": 7173 }, { "epoch": 0.48492632148168174, "grad_norm": 1.207513451576233, "learning_rate": 1.658083341684277e-05, "loss": 0.203765869140625, "step": 7174 }, { "epoch": 0.4849939164526159, "grad_norm": 0.8873195648193359, "learning_rate": 1.6577552223265997e-05, "loss": 0.19476318359375, "step": 7175 }, { "epoch": 0.4850615114235501, "grad_norm": 0.8101474642753601, "learning_rate": 1.6574270953357442e-05, "loss": 0.1907958984375, "step": 7176 }, { "epoch": 0.48512910639448426, "grad_norm": 1.2517589330673218, "learning_rate": 1.6570989607275865e-05, "loss": 0.262451171875, "step": 7177 }, { "epoch": 0.4851967013654184, "grad_norm": 1.2634613513946533, "learning_rate": 1.6567708185180046e-05, "loss": 0.207183837890625, "step": 7178 }, { "epoch": 0.4852642963363526, "grad_norm": 0.9215534329414368, "learning_rate": 1.6564426687228748e-05, "loss": 0.204010009765625, "step": 7179 }, { "epoch": 0.4853318913072867, "grad_norm": 0.9816086888313293, "learning_rate": 1.656114511358076e-05, "loss": 0.168060302734375, "step": 7180 }, { "epoch": 0.4853994862782209, "grad_norm": 0.9057413339614868, "learning_rate": 1.655786346439487e-05, "loss": 0.248321533203125, "step": 7181 }, { "epoch": 0.48546708124915505, "grad_norm": 0.9698501229286194, "learning_rate": 1.655458173982985e-05, "loss": 0.220947265625, "step": 7182 }, { "epoch": 0.4855346762200892, "grad_norm": 1.0936905145645142, "learning_rate": 1.6551299940044496e-05, "loss": 0.1288909912109375, "step": 7183 }, { "epoch": 0.4856022711910234, "grad_norm": 1.3692673444747925, "learning_rate": 1.6548018065197607e-05, "loss": 0.21624755859375, "step": 7184 }, { "epoch": 0.48566986616195756, "grad_norm": 1.6880340576171875, "learning_rate": 1.6544736115447978e-05, "loss": 0.233306884765625, "step": 7185 }, { "epoch": 0.48573746113289173, "grad_norm": 1.0991432666778564, "learning_rate": 1.654145409095441e-05, "loss": 0.2536468505859375, "step": 7186 }, { "epoch": 0.48580505610382585, "grad_norm": 2.4717977046966553, "learning_rate": 1.65381719918757e-05, "loss": 0.361328125, "step": 7187 }, { "epoch": 0.48587265107476, "grad_norm": 1.234113335609436, "learning_rate": 1.6534889818370667e-05, "loss": 0.2197265625, "step": 7188 }, { "epoch": 0.4859402460456942, "grad_norm": 1.3090662956237793, "learning_rate": 1.653160757059812e-05, "loss": 0.292572021484375, "step": 7189 }, { "epoch": 0.48600784101662836, "grad_norm": 1.4826539754867554, "learning_rate": 1.6528325248716874e-05, "loss": 0.3095703125, "step": 7190 }, { "epoch": 0.48607543598756253, "grad_norm": 1.6249206066131592, "learning_rate": 1.652504285288575e-05, "loss": 0.3045654296875, "step": 7191 }, { "epoch": 0.4861430309584967, "grad_norm": 1.1887397766113281, "learning_rate": 1.6521760383263563e-05, "loss": 0.2830810546875, "step": 7192 }, { "epoch": 0.4862106259294309, "grad_norm": 1.484131097793579, "learning_rate": 1.6518477840009152e-05, "loss": 0.266632080078125, "step": 7193 }, { "epoch": 0.486278220900365, "grad_norm": 1.4660297632217407, "learning_rate": 1.6515195223281334e-05, "loss": 0.187744140625, "step": 7194 }, { "epoch": 0.48634581587129916, "grad_norm": 1.321024775505066, "learning_rate": 1.6511912533238953e-05, "loss": 0.253814697265625, "step": 7195 }, { "epoch": 0.48641341084223333, "grad_norm": 0.5531312227249146, "learning_rate": 1.6508629770040833e-05, "loss": 0.109954833984375, "step": 7196 }, { "epoch": 0.4864810058131675, "grad_norm": 1.1030036211013794, "learning_rate": 1.650534693384583e-05, "loss": 0.26708984375, "step": 7197 }, { "epoch": 0.48654860078410167, "grad_norm": 0.9657611846923828, "learning_rate": 1.650206402481278e-05, "loss": 0.186798095703125, "step": 7198 }, { "epoch": 0.48661619575503584, "grad_norm": 1.3987488746643066, "learning_rate": 1.649878104310053e-05, "loss": 0.30303955078125, "step": 7199 }, { "epoch": 0.48668379072597, "grad_norm": 1.158065915107727, "learning_rate": 1.6495497988867926e-05, "loss": 0.201751708984375, "step": 7200 }, { "epoch": 0.4867513856969041, "grad_norm": 1.9574453830718994, "learning_rate": 1.6492214862273837e-05, "loss": 0.229949951171875, "step": 7201 }, { "epoch": 0.4868189806678383, "grad_norm": 0.9897100925445557, "learning_rate": 1.648893166347711e-05, "loss": 0.213775634765625, "step": 7202 }, { "epoch": 0.48688657563877247, "grad_norm": 0.8858935832977295, "learning_rate": 1.6485648392636615e-05, "loss": 0.180999755859375, "step": 7203 }, { "epoch": 0.48695417060970664, "grad_norm": 1.534507155418396, "learning_rate": 1.6482365049911207e-05, "loss": 0.26922607421875, "step": 7204 }, { "epoch": 0.4870217655806408, "grad_norm": 1.4230029582977295, "learning_rate": 1.647908163545976e-05, "loss": 0.2459716796875, "step": 7205 }, { "epoch": 0.487089360551575, "grad_norm": 1.2178677320480347, "learning_rate": 1.6475798149441142e-05, "loss": 0.18776702880859375, "step": 7206 }, { "epoch": 0.48715695552250915, "grad_norm": 1.2245820760726929, "learning_rate": 1.6472514592014238e-05, "loss": 0.256256103515625, "step": 7207 }, { "epoch": 0.48722455049344326, "grad_norm": 1.1100202798843384, "learning_rate": 1.6469230963337913e-05, "loss": 0.3048095703125, "step": 7208 }, { "epoch": 0.48729214546437744, "grad_norm": 1.3221266269683838, "learning_rate": 1.6465947263571062e-05, "loss": 0.28167724609375, "step": 7209 }, { "epoch": 0.4873597404353116, "grad_norm": 1.5334595441818237, "learning_rate": 1.6462663492872568e-05, "loss": 0.27862548828125, "step": 7210 }, { "epoch": 0.4874273354062458, "grad_norm": 1.3588919639587402, "learning_rate": 1.6459379651401313e-05, "loss": 0.2957763671875, "step": 7211 }, { "epoch": 0.48749493037717995, "grad_norm": 1.0597174167633057, "learning_rate": 1.6456095739316194e-05, "loss": 0.250335693359375, "step": 7212 }, { "epoch": 0.4875625253481141, "grad_norm": 1.6381444931030273, "learning_rate": 1.6452811756776108e-05, "loss": 0.3193359375, "step": 7213 }, { "epoch": 0.4876301203190483, "grad_norm": 1.100021243095398, "learning_rate": 1.6449527703939958e-05, "loss": 0.281890869140625, "step": 7214 }, { "epoch": 0.4876977152899824, "grad_norm": 1.56055748462677, "learning_rate": 1.644624358096664e-05, "loss": 0.196624755859375, "step": 7215 }, { "epoch": 0.4877653102609166, "grad_norm": 1.1622512340545654, "learning_rate": 1.6442959388015066e-05, "loss": 0.273284912109375, "step": 7216 }, { "epoch": 0.48783290523185074, "grad_norm": 1.3159226179122925, "learning_rate": 1.643967512524414e-05, "loss": 0.278076171875, "step": 7217 }, { "epoch": 0.4879005002027849, "grad_norm": 0.68651282787323, "learning_rate": 1.6436390792812783e-05, "loss": 0.1700286865234375, "step": 7218 }, { "epoch": 0.4879680951737191, "grad_norm": 1.318476915359497, "learning_rate": 1.6433106390879905e-05, "loss": 0.309234619140625, "step": 7219 }, { "epoch": 0.48803569014465326, "grad_norm": 1.3425372838974, "learning_rate": 1.6429821919604427e-05, "loss": 0.251708984375, "step": 7220 }, { "epoch": 0.4881032851155874, "grad_norm": 1.5170412063598633, "learning_rate": 1.642653737914527e-05, "loss": 0.222412109375, "step": 7221 }, { "epoch": 0.48817088008652154, "grad_norm": 0.9585236310958862, "learning_rate": 1.6423252769661364e-05, "loss": 0.2027740478515625, "step": 7222 }, { "epoch": 0.4882384750574557, "grad_norm": 1.950035572052002, "learning_rate": 1.641996809131164e-05, "loss": 0.3360595703125, "step": 7223 }, { "epoch": 0.4883060700283899, "grad_norm": 2.3227028846740723, "learning_rate": 1.6416683344255025e-05, "loss": 0.3247222900390625, "step": 7224 }, { "epoch": 0.48837366499932405, "grad_norm": 1.9863938093185425, "learning_rate": 1.6413398528650463e-05, "loss": 0.26654052734375, "step": 7225 }, { "epoch": 0.4884412599702582, "grad_norm": 1.3262754678726196, "learning_rate": 1.641011364465689e-05, "loss": 0.25970458984375, "step": 7226 }, { "epoch": 0.4885088549411924, "grad_norm": 2.051913261413574, "learning_rate": 1.6406828692433246e-05, "loss": 0.33660888671875, "step": 7227 }, { "epoch": 0.48857644991212656, "grad_norm": 1.550772786140442, "learning_rate": 1.640354367213849e-05, "loss": 0.23980712890625, "step": 7228 }, { "epoch": 0.4886440448830607, "grad_norm": 1.2763235569000244, "learning_rate": 1.6400258583931552e-05, "loss": 0.260589599609375, "step": 7229 }, { "epoch": 0.48871163985399485, "grad_norm": 1.6523417234420776, "learning_rate": 1.63969734279714e-05, "loss": 0.197540283203125, "step": 7230 }, { "epoch": 0.488779234824929, "grad_norm": 1.568930983543396, "learning_rate": 1.6393688204416988e-05, "loss": 0.26715087890625, "step": 7231 }, { "epoch": 0.4888468297958632, "grad_norm": 1.189367413520813, "learning_rate": 1.6390402913427273e-05, "loss": 0.264434814453125, "step": 7232 }, { "epoch": 0.48891442476679736, "grad_norm": 1.065519094467163, "learning_rate": 1.6387117555161217e-05, "loss": 0.266448974609375, "step": 7233 }, { "epoch": 0.48898201973773153, "grad_norm": 1.1101255416870117, "learning_rate": 1.6383832129777785e-05, "loss": 0.2779541015625, "step": 7234 }, { "epoch": 0.48904961470866565, "grad_norm": 2.283334970474243, "learning_rate": 1.638054663743595e-05, "loss": 0.37774658203125, "step": 7235 }, { "epoch": 0.4891172096795998, "grad_norm": 1.1890445947647095, "learning_rate": 1.6377261078294684e-05, "loss": 0.18353271484375, "step": 7236 }, { "epoch": 0.489184804650534, "grad_norm": 1.0983518362045288, "learning_rate": 1.6373975452512964e-05, "loss": 0.2244110107421875, "step": 7237 }, { "epoch": 0.48925239962146816, "grad_norm": 1.1460012197494507, "learning_rate": 1.637068976024976e-05, "loss": 0.26470947265625, "step": 7238 }, { "epoch": 0.48931999459240233, "grad_norm": 1.5752424001693726, "learning_rate": 1.6367404001664067e-05, "loss": 0.283477783203125, "step": 7239 }, { "epoch": 0.4893875895633365, "grad_norm": 0.6123769283294678, "learning_rate": 1.6364118176914866e-05, "loss": 0.1110076904296875, "step": 7240 }, { "epoch": 0.48945518453427067, "grad_norm": 1.3010485172271729, "learning_rate": 1.6360832286161145e-05, "loss": 0.27471923828125, "step": 7241 }, { "epoch": 0.4895227795052048, "grad_norm": 0.7892248630523682, "learning_rate": 1.6357546329561888e-05, "loss": 0.16912841796875, "step": 7242 }, { "epoch": 0.48959037447613896, "grad_norm": 1.5905567407608032, "learning_rate": 1.6354260307276103e-05, "loss": 0.248077392578125, "step": 7243 }, { "epoch": 0.4896579694470731, "grad_norm": 0.9092161059379578, "learning_rate": 1.635097421946278e-05, "loss": 0.22442626953125, "step": 7244 }, { "epoch": 0.4897255644180073, "grad_norm": 1.049790620803833, "learning_rate": 1.6347688066280924e-05, "loss": 0.2601165771484375, "step": 7245 }, { "epoch": 0.48979315938894147, "grad_norm": 1.3038640022277832, "learning_rate": 1.6344401847889533e-05, "loss": 0.240753173828125, "step": 7246 }, { "epoch": 0.48986075435987564, "grad_norm": 0.7859784364700317, "learning_rate": 1.6341115564447624e-05, "loss": 0.165557861328125, "step": 7247 }, { "epoch": 0.4899283493308098, "grad_norm": 1.4324311017990112, "learning_rate": 1.6337829216114203e-05, "loss": 0.315521240234375, "step": 7248 }, { "epoch": 0.4899959443017439, "grad_norm": 0.6899241805076599, "learning_rate": 1.6334542803048287e-05, "loss": 0.13318634033203125, "step": 7249 }, { "epoch": 0.4900635392726781, "grad_norm": 0.7406214475631714, "learning_rate": 1.633125632540889e-05, "loss": 0.176544189453125, "step": 7250 }, { "epoch": 0.49013113424361227, "grad_norm": 1.242358684539795, "learning_rate": 1.6327969783355032e-05, "loss": 0.24725341796875, "step": 7251 }, { "epoch": 0.49019872921454644, "grad_norm": 2.004936695098877, "learning_rate": 1.6324683177045736e-05, "loss": 0.2330780029296875, "step": 7252 }, { "epoch": 0.4902663241854806, "grad_norm": 0.9981931447982788, "learning_rate": 1.6321396506640033e-05, "loss": 0.202392578125, "step": 7253 }, { "epoch": 0.4903339191564148, "grad_norm": 1.5397207736968994, "learning_rate": 1.6318109772296943e-05, "loss": 0.226226806640625, "step": 7254 }, { "epoch": 0.49040151412734895, "grad_norm": 1.1018133163452148, "learning_rate": 1.631482297417551e-05, "loss": 0.27838134765625, "step": 7255 }, { "epoch": 0.49046910909828306, "grad_norm": 0.9182684421539307, "learning_rate": 1.6311536112434764e-05, "loss": 0.18536376953125, "step": 7256 }, { "epoch": 0.49053670406921723, "grad_norm": 0.7674893140792847, "learning_rate": 1.6308249187233744e-05, "loss": 0.1324615478515625, "step": 7257 }, { "epoch": 0.4906042990401514, "grad_norm": 1.8988739252090454, "learning_rate": 1.6304962198731493e-05, "loss": 0.2870635986328125, "step": 7258 }, { "epoch": 0.4906718940110856, "grad_norm": 1.1703797578811646, "learning_rate": 1.6301675147087053e-05, "loss": 0.290008544921875, "step": 7259 }, { "epoch": 0.49073948898201974, "grad_norm": 1.1357183456420898, "learning_rate": 1.629838803245948e-05, "loss": 0.330078125, "step": 7260 }, { "epoch": 0.4908070839529539, "grad_norm": 1.1996394395828247, "learning_rate": 1.6295100855007816e-05, "loss": 0.233428955078125, "step": 7261 }, { "epoch": 0.4908746789238881, "grad_norm": 2.330366611480713, "learning_rate": 1.6291813614891118e-05, "loss": 0.26422119140625, "step": 7262 }, { "epoch": 0.4909422738948222, "grad_norm": 0.5071462988853455, "learning_rate": 1.6288526312268446e-05, "loss": 0.1143646240234375, "step": 7263 }, { "epoch": 0.49100986886575637, "grad_norm": 1.3108253479003906, "learning_rate": 1.6285238947298855e-05, "loss": 0.20904541015625, "step": 7264 }, { "epoch": 0.49107746383669054, "grad_norm": 1.2303197383880615, "learning_rate": 1.6281951520141412e-05, "loss": 0.2467041015625, "step": 7265 }, { "epoch": 0.4911450588076247, "grad_norm": 0.7102958559989929, "learning_rate": 1.627866403095518e-05, "loss": 0.15081787109375, "step": 7266 }, { "epoch": 0.4912126537785589, "grad_norm": 1.5391924381256104, "learning_rate": 1.6275376479899233e-05, "loss": 0.141937255859375, "step": 7267 }, { "epoch": 0.49128024874949305, "grad_norm": 1.304362416267395, "learning_rate": 1.627208886713264e-05, "loss": 0.202178955078125, "step": 7268 }, { "epoch": 0.4913478437204272, "grad_norm": 1.5788142681121826, "learning_rate": 1.6268801192814476e-05, "loss": 0.2033233642578125, "step": 7269 }, { "epoch": 0.49141543869136134, "grad_norm": 0.757673442363739, "learning_rate": 1.6265513457103818e-05, "loss": 0.1628570556640625, "step": 7270 }, { "epoch": 0.4914830336622955, "grad_norm": 2.0697431564331055, "learning_rate": 1.626222566015975e-05, "loss": 0.34552001953125, "step": 7271 }, { "epoch": 0.4915506286332297, "grad_norm": 1.346733808517456, "learning_rate": 1.6258937802141355e-05, "loss": 0.29998779296875, "step": 7272 }, { "epoch": 0.49161822360416385, "grad_norm": 1.4406472444534302, "learning_rate": 1.625564988320772e-05, "loss": 0.255218505859375, "step": 7273 }, { "epoch": 0.491685818575098, "grad_norm": 1.2487581968307495, "learning_rate": 1.6252361903517936e-05, "loss": 0.2501220703125, "step": 7274 }, { "epoch": 0.4917534135460322, "grad_norm": 1.152395248413086, "learning_rate": 1.6249073863231093e-05, "loss": 0.24505615234375, "step": 7275 }, { "epoch": 0.49182100851696636, "grad_norm": 1.2732230424880981, "learning_rate": 1.624578576250629e-05, "loss": 0.23565673828125, "step": 7276 }, { "epoch": 0.4918886034879005, "grad_norm": 1.2389607429504395, "learning_rate": 1.6242497601502627e-05, "loss": 0.1983642578125, "step": 7277 }, { "epoch": 0.49195619845883465, "grad_norm": 1.07180655002594, "learning_rate": 1.62392093803792e-05, "loss": 0.239227294921875, "step": 7278 }, { "epoch": 0.4920237934297688, "grad_norm": 1.80379319190979, "learning_rate": 1.6235921099295117e-05, "loss": 0.297027587890625, "step": 7279 }, { "epoch": 0.492091388400703, "grad_norm": 0.9472713470458984, "learning_rate": 1.6232632758409484e-05, "loss": 0.19207763671875, "step": 7280 }, { "epoch": 0.49215898337163716, "grad_norm": 1.4872666597366333, "learning_rate": 1.6229344357881413e-05, "loss": 0.309661865234375, "step": 7281 }, { "epoch": 0.49222657834257133, "grad_norm": 0.7455207109451294, "learning_rate": 1.6226055897870015e-05, "loss": 0.16327667236328125, "step": 7282 }, { "epoch": 0.4922941733135055, "grad_norm": 0.893076479434967, "learning_rate": 1.6222767378534413e-05, "loss": 0.1824493408203125, "step": 7283 }, { "epoch": 0.4923617682844396, "grad_norm": 0.8099800944328308, "learning_rate": 1.6219478800033714e-05, "loss": 0.114837646484375, "step": 7284 }, { "epoch": 0.4924293632553738, "grad_norm": 0.7767333984375, "learning_rate": 1.621619016252705e-05, "loss": 0.19622802734375, "step": 7285 }, { "epoch": 0.49249695822630796, "grad_norm": 1.2392932176589966, "learning_rate": 1.621290146617355e-05, "loss": 0.318206787109375, "step": 7286 }, { "epoch": 0.4925645531972421, "grad_norm": 1.3429728746414185, "learning_rate": 1.6209612711132326e-05, "loss": 0.3155517578125, "step": 7287 }, { "epoch": 0.4926321481681763, "grad_norm": 1.5915085077285767, "learning_rate": 1.6206323897562515e-05, "loss": 0.307861328125, "step": 7288 }, { "epoch": 0.49269974313911047, "grad_norm": 1.010998249053955, "learning_rate": 1.6203035025623254e-05, "loss": 0.17962646484375, "step": 7289 }, { "epoch": 0.49276733811004464, "grad_norm": 1.0045890808105469, "learning_rate": 1.6199746095473677e-05, "loss": 0.24013519287109375, "step": 7290 }, { "epoch": 0.49283493308097875, "grad_norm": 0.7869091033935547, "learning_rate": 1.6196457107272924e-05, "loss": 0.1715545654296875, "step": 7291 }, { "epoch": 0.4929025280519129, "grad_norm": 0.7638562917709351, "learning_rate": 1.619316806118013e-05, "loss": 0.227691650390625, "step": 7292 }, { "epoch": 0.4929701230228471, "grad_norm": 0.9057885408401489, "learning_rate": 1.6189878957354454e-05, "loss": 0.222137451171875, "step": 7293 }, { "epoch": 0.49303771799378127, "grad_norm": 1.5918995141983032, "learning_rate": 1.618658979595503e-05, "loss": 0.173583984375, "step": 7294 }, { "epoch": 0.49310531296471544, "grad_norm": 1.3778443336486816, "learning_rate": 1.618330057714101e-05, "loss": 0.31549072265625, "step": 7295 }, { "epoch": 0.4931729079356496, "grad_norm": 1.2213705778121948, "learning_rate": 1.6180011301071553e-05, "loss": 0.218658447265625, "step": 7296 }, { "epoch": 0.4932405029065838, "grad_norm": 0.8488979339599609, "learning_rate": 1.6176721967905807e-05, "loss": 0.15442657470703125, "step": 7297 }, { "epoch": 0.4933080978775179, "grad_norm": 0.857176661491394, "learning_rate": 1.617343257780294e-05, "loss": 0.184661865234375, "step": 7298 }, { "epoch": 0.49337569284845206, "grad_norm": 1.0126190185546875, "learning_rate": 1.6170143130922102e-05, "loss": 0.295166015625, "step": 7299 }, { "epoch": 0.49344328781938623, "grad_norm": 1.2838705778121948, "learning_rate": 1.616685362742246e-05, "loss": 0.2039947509765625, "step": 7300 }, { "epoch": 0.4935108827903204, "grad_norm": 1.0641921758651733, "learning_rate": 1.616356406746319e-05, "loss": 0.224609375, "step": 7301 }, { "epoch": 0.4935784777612546, "grad_norm": 1.387137532234192, "learning_rate": 1.6160274451203454e-05, "loss": 0.29449462890625, "step": 7302 }, { "epoch": 0.49364607273218875, "grad_norm": 1.2972856760025024, "learning_rate": 1.615698477880242e-05, "loss": 0.18505859375, "step": 7303 }, { "epoch": 0.49371366770312286, "grad_norm": 0.9845162630081177, "learning_rate": 1.615369505041927e-05, "loss": 0.20221710205078125, "step": 7304 }, { "epoch": 0.49378126267405703, "grad_norm": 1.065803050994873, "learning_rate": 1.6150405266213176e-05, "loss": 0.23895263671875, "step": 7305 }, { "epoch": 0.4938488576449912, "grad_norm": 1.1420377492904663, "learning_rate": 1.6147115426343322e-05, "loss": 0.153289794921875, "step": 7306 }, { "epoch": 0.49391645261592537, "grad_norm": 1.5626702308654785, "learning_rate": 1.6143825530968893e-05, "loss": 0.313323974609375, "step": 7307 }, { "epoch": 0.49398404758685954, "grad_norm": 0.876642107963562, "learning_rate": 1.614053558024907e-05, "loss": 0.198272705078125, "step": 7308 }, { "epoch": 0.4940516425577937, "grad_norm": 1.0482808351516724, "learning_rate": 1.6137245574343035e-05, "loss": 0.175872802734375, "step": 7309 }, { "epoch": 0.4941192375287279, "grad_norm": 1.3375146389007568, "learning_rate": 1.613395551341e-05, "loss": 0.223419189453125, "step": 7310 }, { "epoch": 0.494186832499662, "grad_norm": 2.0808916091918945, "learning_rate": 1.6130665397609136e-05, "loss": 0.30682373046875, "step": 7311 }, { "epoch": 0.49425442747059617, "grad_norm": 2.5848066806793213, "learning_rate": 1.6127375227099653e-05, "loss": 0.30682373046875, "step": 7312 }, { "epoch": 0.49432202244153034, "grad_norm": 1.812197208404541, "learning_rate": 1.612408500204074e-05, "loss": 0.28424072265625, "step": 7313 }, { "epoch": 0.4943896174124645, "grad_norm": 1.259722113609314, "learning_rate": 1.612079472259161e-05, "loss": 0.1860198974609375, "step": 7314 }, { "epoch": 0.4944572123833987, "grad_norm": 0.9351344704627991, "learning_rate": 1.6117504388911458e-05, "loss": 0.21319580078125, "step": 7315 }, { "epoch": 0.49452480735433285, "grad_norm": 1.2411102056503296, "learning_rate": 1.6114214001159494e-05, "loss": 0.2111663818359375, "step": 7316 }, { "epoch": 0.494592402325267, "grad_norm": 1.7418880462646484, "learning_rate": 1.6110923559494926e-05, "loss": 0.246063232421875, "step": 7317 }, { "epoch": 0.49465999729620114, "grad_norm": 0.864647388458252, "learning_rate": 1.610763306407697e-05, "loss": 0.16312408447265625, "step": 7318 }, { "epoch": 0.4947275922671353, "grad_norm": 1.0108171701431274, "learning_rate": 1.6104342515064837e-05, "loss": 0.29754638671875, "step": 7319 }, { "epoch": 0.4947951872380695, "grad_norm": 1.2725987434387207, "learning_rate": 1.6101051912617746e-05, "loss": 0.27630615234375, "step": 7320 }, { "epoch": 0.49486278220900365, "grad_norm": 1.001508355140686, "learning_rate": 1.609776125689492e-05, "loss": 0.2440185546875, "step": 7321 }, { "epoch": 0.4949303771799378, "grad_norm": 1.447710633277893, "learning_rate": 1.609447054805557e-05, "loss": 0.3078765869140625, "step": 7322 }, { "epoch": 0.494997972150872, "grad_norm": 0.7315608263015747, "learning_rate": 1.6091179786258935e-05, "loss": 0.1898040771484375, "step": 7323 }, { "epoch": 0.49506556712180616, "grad_norm": 0.9383994340896606, "learning_rate": 1.6087888971664233e-05, "loss": 0.1957244873046875, "step": 7324 }, { "epoch": 0.4951331620927403, "grad_norm": 1.9986628293991089, "learning_rate": 1.6084598104430695e-05, "loss": 0.2528839111328125, "step": 7325 }, { "epoch": 0.49520075706367445, "grad_norm": 1.8774222135543823, "learning_rate": 1.6081307184717554e-05, "loss": 0.2794189453125, "step": 7326 }, { "epoch": 0.4952683520346086, "grad_norm": 1.1959370374679565, "learning_rate": 1.607801621268405e-05, "loss": 0.27593994140625, "step": 7327 }, { "epoch": 0.4953359470055428, "grad_norm": 0.9998138546943665, "learning_rate": 1.607472518848942e-05, "loss": 0.215911865234375, "step": 7328 }, { "epoch": 0.49540354197647696, "grad_norm": 1.6308486461639404, "learning_rate": 1.60714341122929e-05, "loss": 0.2265625, "step": 7329 }, { "epoch": 0.49547113694741113, "grad_norm": 0.8946141600608826, "learning_rate": 1.606814298425373e-05, "loss": 0.17498779296875, "step": 7330 }, { "epoch": 0.4955387319183453, "grad_norm": 1.4725847244262695, "learning_rate": 1.6064851804531167e-05, "loss": 0.294952392578125, "step": 7331 }, { "epoch": 0.4956063268892794, "grad_norm": 1.353726863861084, "learning_rate": 1.6061560573284448e-05, "loss": 0.3427734375, "step": 7332 }, { "epoch": 0.4956739218602136, "grad_norm": 0.5676902532577515, "learning_rate": 1.6058269290672827e-05, "loss": 0.110504150390625, "step": 7333 }, { "epoch": 0.49574151683114775, "grad_norm": 1.6123536825180054, "learning_rate": 1.6054977956855555e-05, "loss": 0.3751220703125, "step": 7334 }, { "epoch": 0.4958091118020819, "grad_norm": 0.7465479969978333, "learning_rate": 1.605168657199189e-05, "loss": 0.168304443359375, "step": 7335 }, { "epoch": 0.4958767067730161, "grad_norm": 1.6955444812774658, "learning_rate": 1.604839513624109e-05, "loss": 0.26491546630859375, "step": 7336 }, { "epoch": 0.49594430174395027, "grad_norm": 1.5660817623138428, "learning_rate": 1.604510364976241e-05, "loss": 0.332275390625, "step": 7337 }, { "epoch": 0.49601189671488444, "grad_norm": 1.1941397190093994, "learning_rate": 1.6041812112715114e-05, "loss": 0.296966552734375, "step": 7338 }, { "epoch": 0.49607949168581855, "grad_norm": 1.9725441932678223, "learning_rate": 1.6038520525258477e-05, "loss": 0.31781005859375, "step": 7339 }, { "epoch": 0.4961470866567527, "grad_norm": 1.0302760601043701, "learning_rate": 1.6035228887551756e-05, "loss": 0.20220947265625, "step": 7340 }, { "epoch": 0.4962146816276869, "grad_norm": 1.2640681266784668, "learning_rate": 1.603193719975422e-05, "loss": 0.220855712890625, "step": 7341 }, { "epoch": 0.49628227659862106, "grad_norm": 2.4980382919311523, "learning_rate": 1.6028645462025147e-05, "loss": 0.2894287109375, "step": 7342 }, { "epoch": 0.49634987156955523, "grad_norm": 0.9976871609687805, "learning_rate": 1.6025353674523804e-05, "loss": 0.24200439453125, "step": 7343 }, { "epoch": 0.4964174665404894, "grad_norm": 1.2233728170394897, "learning_rate": 1.602206183740948e-05, "loss": 0.292144775390625, "step": 7344 }, { "epoch": 0.4964850615114236, "grad_norm": 1.2028447389602661, "learning_rate": 1.6018769950841448e-05, "loss": 0.21368408203125, "step": 7345 }, { "epoch": 0.4965526564823577, "grad_norm": 1.3149082660675049, "learning_rate": 1.6015478014978987e-05, "loss": 0.3291015625, "step": 7346 }, { "epoch": 0.49662025145329186, "grad_norm": 1.175262212753296, "learning_rate": 1.601218602998139e-05, "loss": 0.2666473388671875, "step": 7347 }, { "epoch": 0.49668784642422603, "grad_norm": 0.9880493879318237, "learning_rate": 1.6008893996007932e-05, "loss": 0.1821746826171875, "step": 7348 }, { "epoch": 0.4967554413951602, "grad_norm": 0.8567071557044983, "learning_rate": 1.600560191321791e-05, "loss": 0.2144775390625, "step": 7349 }, { "epoch": 0.4968230363660944, "grad_norm": 1.104771375656128, "learning_rate": 1.6002309781770616e-05, "loss": 0.25775146484375, "step": 7350 }, { "epoch": 0.49689063133702854, "grad_norm": 1.1359479427337646, "learning_rate": 1.599901760182534e-05, "loss": 0.24066162109375, "step": 7351 }, { "epoch": 0.4969582263079627, "grad_norm": 1.4934223890304565, "learning_rate": 1.5995725373541376e-05, "loss": 0.2872314453125, "step": 7352 }, { "epoch": 0.49702582127889683, "grad_norm": 1.1237456798553467, "learning_rate": 1.599243309707803e-05, "loss": 0.32720947265625, "step": 7353 }, { "epoch": 0.497093416249831, "grad_norm": 1.7714860439300537, "learning_rate": 1.59891407725946e-05, "loss": 0.1921844482421875, "step": 7354 }, { "epoch": 0.49716101122076517, "grad_norm": 0.9675358533859253, "learning_rate": 1.5985848400250383e-05, "loss": 0.209259033203125, "step": 7355 }, { "epoch": 0.49722860619169934, "grad_norm": 1.6298223733901978, "learning_rate": 1.5982555980204692e-05, "loss": 0.3087158203125, "step": 7356 }, { "epoch": 0.4972962011626335, "grad_norm": 1.6333942413330078, "learning_rate": 1.5979263512616835e-05, "loss": 0.2301788330078125, "step": 7357 }, { "epoch": 0.4973637961335677, "grad_norm": 1.4529485702514648, "learning_rate": 1.5975970997646117e-05, "loss": 0.191009521484375, "step": 7358 }, { "epoch": 0.49743139110450185, "grad_norm": 1.456331491470337, "learning_rate": 1.597267843545185e-05, "loss": 0.2782135009765625, "step": 7359 }, { "epoch": 0.49749898607543597, "grad_norm": 1.7114136219024658, "learning_rate": 1.5969385826193357e-05, "loss": 0.30426025390625, "step": 7360 }, { "epoch": 0.49756658104637014, "grad_norm": 1.503808856010437, "learning_rate": 1.5966093170029945e-05, "loss": 0.23370361328125, "step": 7361 }, { "epoch": 0.4976341760173043, "grad_norm": 1.3007357120513916, "learning_rate": 1.5962800467120943e-05, "loss": 0.230224609375, "step": 7362 }, { "epoch": 0.4977017709882385, "grad_norm": 1.3261284828186035, "learning_rate": 1.595950771762566e-05, "loss": 0.232421875, "step": 7363 }, { "epoch": 0.49776936595917265, "grad_norm": 1.6069111824035645, "learning_rate": 1.5956214921703424e-05, "loss": 0.238739013671875, "step": 7364 }, { "epoch": 0.4978369609301068, "grad_norm": 1.2775195837020874, "learning_rate": 1.5952922079513573e-05, "loss": 0.17273712158203125, "step": 7365 }, { "epoch": 0.49790455590104093, "grad_norm": 1.102789282798767, "learning_rate": 1.5949629191215418e-05, "loss": 0.2650146484375, "step": 7366 }, { "epoch": 0.4979721508719751, "grad_norm": 1.2529211044311523, "learning_rate": 1.5946336256968304e-05, "loss": 0.204193115234375, "step": 7367 }, { "epoch": 0.4980397458429093, "grad_norm": 1.0902819633483887, "learning_rate": 1.594304327693155e-05, "loss": 0.17424392700195312, "step": 7368 }, { "epoch": 0.49810734081384345, "grad_norm": 0.9516711235046387, "learning_rate": 1.59397502512645e-05, "loss": 0.2625732421875, "step": 7369 }, { "epoch": 0.4981749357847776, "grad_norm": 1.565308928489685, "learning_rate": 1.593645718012649e-05, "loss": 0.24334716796875, "step": 7370 }, { "epoch": 0.4982425307557118, "grad_norm": 1.2016047239303589, "learning_rate": 1.5933164063676857e-05, "loss": 0.1575927734375, "step": 7371 }, { "epoch": 0.49831012572664596, "grad_norm": 0.7124266028404236, "learning_rate": 1.592987090207494e-05, "loss": 0.10695648193359375, "step": 7372 }, { "epoch": 0.4983777206975801, "grad_norm": 1.4013124704360962, "learning_rate": 1.5926577695480093e-05, "loss": 0.2611083984375, "step": 7373 }, { "epoch": 0.49844531566851424, "grad_norm": 1.8003870248794556, "learning_rate": 1.592328444405165e-05, "loss": 0.2591552734375, "step": 7374 }, { "epoch": 0.4985129106394484, "grad_norm": 1.0407336950302124, "learning_rate": 1.5919991147948963e-05, "loss": 0.220947265625, "step": 7375 }, { "epoch": 0.4985805056103826, "grad_norm": 1.4403667449951172, "learning_rate": 1.5916697807331383e-05, "loss": 0.1985931396484375, "step": 7376 }, { "epoch": 0.49864810058131676, "grad_norm": 1.7013150453567505, "learning_rate": 1.5913404422358265e-05, "loss": 0.2113189697265625, "step": 7377 }, { "epoch": 0.4987156955522509, "grad_norm": 0.8747414350509644, "learning_rate": 1.591011099318896e-05, "loss": 0.228179931640625, "step": 7378 }, { "epoch": 0.4987832905231851, "grad_norm": 1.3073811531066895, "learning_rate": 1.5906817519982825e-05, "loss": 0.29541015625, "step": 7379 }, { "epoch": 0.4988508854941192, "grad_norm": 1.4864046573638916, "learning_rate": 1.5903524002899218e-05, "loss": 0.2578277587890625, "step": 7380 }, { "epoch": 0.4989184804650534, "grad_norm": 0.6484842896461487, "learning_rate": 1.5900230442097503e-05, "loss": 0.14391326904296875, "step": 7381 }, { "epoch": 0.49898607543598755, "grad_norm": 2.0256919860839844, "learning_rate": 1.5896936837737037e-05, "loss": 0.326202392578125, "step": 7382 }, { "epoch": 0.4990536704069217, "grad_norm": 1.7010695934295654, "learning_rate": 1.5893643189977192e-05, "loss": 0.255645751953125, "step": 7383 }, { "epoch": 0.4991212653778559, "grad_norm": 1.2901536226272583, "learning_rate": 1.5890349498977328e-05, "loss": 0.242218017578125, "step": 7384 }, { "epoch": 0.49918886034879006, "grad_norm": 1.0896823406219482, "learning_rate": 1.5887055764896822e-05, "loss": 0.22412109375, "step": 7385 }, { "epoch": 0.49925645531972423, "grad_norm": 0.8275570273399353, "learning_rate": 1.588376198789504e-05, "loss": 0.166259765625, "step": 7386 }, { "epoch": 0.49932405029065835, "grad_norm": 0.9380097389221191, "learning_rate": 1.5880468168131358e-05, "loss": 0.09624862670898438, "step": 7387 }, { "epoch": 0.4993916452615925, "grad_norm": 1.3449969291687012, "learning_rate": 1.587717430576515e-05, "loss": 0.318572998046875, "step": 7388 }, { "epoch": 0.4994592402325267, "grad_norm": 1.598379135131836, "learning_rate": 1.587388040095579e-05, "loss": 0.276763916015625, "step": 7389 }, { "epoch": 0.49952683520346086, "grad_norm": 1.3578190803527832, "learning_rate": 1.5870586453862668e-05, "loss": 0.28814697265625, "step": 7390 }, { "epoch": 0.49959443017439503, "grad_norm": 1.4131581783294678, "learning_rate": 1.5867292464645156e-05, "loss": 0.2270050048828125, "step": 7391 }, { "epoch": 0.4996620251453292, "grad_norm": 1.2273595333099365, "learning_rate": 1.5863998433462643e-05, "loss": 0.277618408203125, "step": 7392 }, { "epoch": 0.4997296201162634, "grad_norm": 1.371262550354004, "learning_rate": 1.586070436047451e-05, "loss": 0.2557373046875, "step": 7393 }, { "epoch": 0.4997972150871975, "grad_norm": 0.5548596978187561, "learning_rate": 1.585741024584015e-05, "loss": 0.09511566162109375, "step": 7394 }, { "epoch": 0.49986481005813166, "grad_norm": 0.7283433675765991, "learning_rate": 1.585411608971895e-05, "loss": 0.2467041015625, "step": 7395 }, { "epoch": 0.49993240502906583, "grad_norm": 1.3626539707183838, "learning_rate": 1.5850821892270302e-05, "loss": 0.2376708984375, "step": 7396 }, { "epoch": 0.5, "grad_norm": 1.0496853590011597, "learning_rate": 1.5847527653653595e-05, "loss": 0.1540374755859375, "step": 7397 }, { "epoch": 0.5000675949709341, "grad_norm": 0.9792417287826538, "learning_rate": 1.584423337402823e-05, "loss": 0.1694183349609375, "step": 7398 }, { "epoch": 0.5001351899418683, "grad_norm": 1.0301767587661743, "learning_rate": 1.584093905355361e-05, "loss": 0.30401611328125, "step": 7399 }, { "epoch": 0.5002027849128025, "grad_norm": 1.8317737579345703, "learning_rate": 1.583764469238913e-05, "loss": 0.232086181640625, "step": 7400 }, { "epoch": 0.5002703798837367, "grad_norm": 1.0895732641220093, "learning_rate": 1.583435029069418e-05, "loss": 0.271514892578125, "step": 7401 }, { "epoch": 0.5003379748546708, "grad_norm": 1.1755121946334839, "learning_rate": 1.583105584862818e-05, "loss": 0.2046966552734375, "step": 7402 }, { "epoch": 0.500405569825605, "grad_norm": 1.753149390220642, "learning_rate": 1.5827761366350536e-05, "loss": 0.257080078125, "step": 7403 }, { "epoch": 0.5004731647965391, "grad_norm": 1.0291357040405273, "learning_rate": 1.5824466844020645e-05, "loss": 0.2200927734375, "step": 7404 }, { "epoch": 0.5005407597674733, "grad_norm": 1.5075178146362305, "learning_rate": 1.5821172281797914e-05, "loss": 0.3145751953125, "step": 7405 }, { "epoch": 0.5006083547384075, "grad_norm": 0.796922504901886, "learning_rate": 1.581787767984177e-05, "loss": 0.1243896484375, "step": 7406 }, { "epoch": 0.5006759497093416, "grad_norm": 1.3166148662567139, "learning_rate": 1.581458303831161e-05, "loss": 0.22064208984375, "step": 7407 }, { "epoch": 0.5007435446802758, "grad_norm": 0.8924745321273804, "learning_rate": 1.5811288357366862e-05, "loss": 0.205535888671875, "step": 7408 }, { "epoch": 0.5008111396512099, "grad_norm": 1.224709153175354, "learning_rate": 1.5807993637166937e-05, "loss": 0.28472900390625, "step": 7409 }, { "epoch": 0.5008787346221442, "grad_norm": 2.236222267150879, "learning_rate": 1.5804698877871254e-05, "loss": 0.339599609375, "step": 7410 }, { "epoch": 0.5009463295930783, "grad_norm": 1.0584386587142944, "learning_rate": 1.5801404079639236e-05, "loss": 0.1732635498046875, "step": 7411 }, { "epoch": 0.5010139245640124, "grad_norm": 0.9587101936340332, "learning_rate": 1.5798109242630306e-05, "loss": 0.229278564453125, "step": 7412 }, { "epoch": 0.5010815195349466, "grad_norm": 1.9638603925704956, "learning_rate": 1.5794814367003884e-05, "loss": 0.3184814453125, "step": 7413 }, { "epoch": 0.5011491145058807, "grad_norm": 0.9835919141769409, "learning_rate": 1.5791519452919398e-05, "loss": 0.2296142578125, "step": 7414 }, { "epoch": 0.501216709476815, "grad_norm": 1.6328649520874023, "learning_rate": 1.5788224500536286e-05, "loss": 0.35809326171875, "step": 7415 }, { "epoch": 0.5012843044477491, "grad_norm": 1.0148036479949951, "learning_rate": 1.578492951001397e-05, "loss": 0.1241912841796875, "step": 7416 }, { "epoch": 0.5013518994186833, "grad_norm": 1.1840112209320068, "learning_rate": 1.5781634481511882e-05, "loss": 0.19889068603515625, "step": 7417 }, { "epoch": 0.5014194943896174, "grad_norm": 0.8873448371887207, "learning_rate": 1.577833941518945e-05, "loss": 0.22784423828125, "step": 7418 }, { "epoch": 0.5014870893605515, "grad_norm": 1.6279369592666626, "learning_rate": 1.577504431120612e-05, "loss": 0.332611083984375, "step": 7419 }, { "epoch": 0.5015546843314858, "grad_norm": 1.4835073947906494, "learning_rate": 1.5771749169721327e-05, "loss": 0.223480224609375, "step": 7420 }, { "epoch": 0.5016222793024199, "grad_norm": 0.8250454068183899, "learning_rate": 1.5768453990894515e-05, "loss": 0.204925537109375, "step": 7421 }, { "epoch": 0.5016898742733541, "grad_norm": 1.3950666189193726, "learning_rate": 1.5765158774885108e-05, "loss": 0.29522705078125, "step": 7422 }, { "epoch": 0.5017574692442882, "grad_norm": 0.7591578364372253, "learning_rate": 1.576186352185257e-05, "loss": 0.1557769775390625, "step": 7423 }, { "epoch": 0.5018250642152224, "grad_norm": 0.9886935353279114, "learning_rate": 1.5758568231956338e-05, "loss": 0.258636474609375, "step": 7424 }, { "epoch": 0.5018926591861566, "grad_norm": 1.033241629600525, "learning_rate": 1.5755272905355853e-05, "loss": 0.214996337890625, "step": 7425 }, { "epoch": 0.5019602541570907, "grad_norm": 1.0544304847717285, "learning_rate": 1.575197754221057e-05, "loss": 0.2238616943359375, "step": 7426 }, { "epoch": 0.5020278491280249, "grad_norm": 0.7657948732376099, "learning_rate": 1.5748682142679937e-05, "loss": 0.138092041015625, "step": 7427 }, { "epoch": 0.502095444098959, "grad_norm": 0.8320727348327637, "learning_rate": 1.5745386706923406e-05, "loss": 0.1057891845703125, "step": 7428 }, { "epoch": 0.5021630390698932, "grad_norm": 1.6430600881576538, "learning_rate": 1.5742091235100434e-05, "loss": 0.26751708984375, "step": 7429 }, { "epoch": 0.5022306340408274, "grad_norm": 1.2490198612213135, "learning_rate": 1.5738795727370464e-05, "loss": 0.28369140625, "step": 7430 }, { "epoch": 0.5022982290117616, "grad_norm": 1.5478792190551758, "learning_rate": 1.5735500183892966e-05, "loss": 0.26470947265625, "step": 7431 }, { "epoch": 0.5023658239826957, "grad_norm": 0.9370691180229187, "learning_rate": 1.5732204604827395e-05, "loss": 0.2351531982421875, "step": 7432 }, { "epoch": 0.5024334189536298, "grad_norm": 1.230208396911621, "learning_rate": 1.572890899033322e-05, "loss": 0.32733154296875, "step": 7433 }, { "epoch": 0.502501013924564, "grad_norm": 1.0308905839920044, "learning_rate": 1.5725613340569888e-05, "loss": 0.266265869140625, "step": 7434 }, { "epoch": 0.5025686088954981, "grad_norm": 1.1242530345916748, "learning_rate": 1.5722317655696865e-05, "loss": 0.282196044921875, "step": 7435 }, { "epoch": 0.5026362038664324, "grad_norm": 1.3945446014404297, "learning_rate": 1.5719021935873626e-05, "loss": 0.219482421875, "step": 7436 }, { "epoch": 0.5027037988373665, "grad_norm": 1.609125018119812, "learning_rate": 1.5715726181259637e-05, "loss": 0.22613525390625, "step": 7437 }, { "epoch": 0.5027713938083007, "grad_norm": 0.6816949844360352, "learning_rate": 1.5712430392014364e-05, "loss": 0.1008758544921875, "step": 7438 }, { "epoch": 0.5028389887792348, "grad_norm": 0.8821330070495605, "learning_rate": 1.5709134568297276e-05, "loss": 0.242584228515625, "step": 7439 }, { "epoch": 0.502906583750169, "grad_norm": 1.4214178323745728, "learning_rate": 1.5705838710267845e-05, "loss": 0.2943115234375, "step": 7440 }, { "epoch": 0.5029741787211032, "grad_norm": 1.147491693496704, "learning_rate": 1.5702542818085555e-05, "loss": 0.202545166015625, "step": 7441 }, { "epoch": 0.5030417736920373, "grad_norm": 0.9812301397323608, "learning_rate": 1.569924689190987e-05, "loss": 0.19731903076171875, "step": 7442 }, { "epoch": 0.5031093686629715, "grad_norm": 0.9740952849388123, "learning_rate": 1.5695950931900268e-05, "loss": 0.249725341796875, "step": 7443 }, { "epoch": 0.5031769636339056, "grad_norm": 1.2728407382965088, "learning_rate": 1.5692654938216234e-05, "loss": 0.27117919921875, "step": 7444 }, { "epoch": 0.5032445586048399, "grad_norm": 1.0467878580093384, "learning_rate": 1.568935891101725e-05, "loss": 0.22259521484375, "step": 7445 }, { "epoch": 0.503312153575774, "grad_norm": 0.8037771582603455, "learning_rate": 1.5686062850462792e-05, "loss": 0.183624267578125, "step": 7446 }, { "epoch": 0.5033797485467081, "grad_norm": 1.1828856468200684, "learning_rate": 1.568276675671235e-05, "loss": 0.25665283203125, "step": 7447 }, { "epoch": 0.5034473435176423, "grad_norm": 1.3364089727401733, "learning_rate": 1.56794706299254e-05, "loss": 0.35418701171875, "step": 7448 }, { "epoch": 0.5035149384885764, "grad_norm": 1.0533181428909302, "learning_rate": 1.567617447026144e-05, "loss": 0.169403076171875, "step": 7449 }, { "epoch": 0.5035825334595107, "grad_norm": 1.3916012048721313, "learning_rate": 1.5672878277879956e-05, "loss": 0.1886444091796875, "step": 7450 }, { "epoch": 0.5036501284304448, "grad_norm": 1.2571074962615967, "learning_rate": 1.566958205294043e-05, "loss": 0.229827880859375, "step": 7451 }, { "epoch": 0.503717723401379, "grad_norm": 1.8277473449707031, "learning_rate": 1.566628579560237e-05, "loss": 0.3380126953125, "step": 7452 }, { "epoch": 0.5037853183723131, "grad_norm": 1.5271003246307373, "learning_rate": 1.566298950602525e-05, "loss": 0.232177734375, "step": 7453 }, { "epoch": 0.5038529133432472, "grad_norm": 1.6168153285980225, "learning_rate": 1.565969318436858e-05, "loss": 0.22998046875, "step": 7454 }, { "epoch": 0.5039205083141814, "grad_norm": 0.9860362410545349, "learning_rate": 1.5656396830791847e-05, "loss": 0.2053680419921875, "step": 7455 }, { "epoch": 0.5039881032851156, "grad_norm": 1.7359203100204468, "learning_rate": 1.5653100445454557e-05, "loss": 0.29266357421875, "step": 7456 }, { "epoch": 0.5040556982560498, "grad_norm": 0.9411312937736511, "learning_rate": 1.5649804028516202e-05, "loss": 0.20001220703125, "step": 7457 }, { "epoch": 0.5041232932269839, "grad_norm": 1.488659143447876, "learning_rate": 1.564650758013629e-05, "loss": 0.244964599609375, "step": 7458 }, { "epoch": 0.5041908881979181, "grad_norm": 1.4589563608169556, "learning_rate": 1.564321110047432e-05, "loss": 0.2139434814453125, "step": 7459 }, { "epoch": 0.5042584831688522, "grad_norm": 0.9345669746398926, "learning_rate": 1.5639914589689797e-05, "loss": 0.141754150390625, "step": 7460 }, { "epoch": 0.5043260781397864, "grad_norm": 1.6268584728240967, "learning_rate": 1.5636618047942225e-05, "loss": 0.2884521484375, "step": 7461 }, { "epoch": 0.5043936731107206, "grad_norm": 1.4544556140899658, "learning_rate": 1.5633321475391118e-05, "loss": 0.2721405029296875, "step": 7462 }, { "epoch": 0.5044612680816547, "grad_norm": 1.8449687957763672, "learning_rate": 1.5630024872195975e-05, "loss": 0.34735107421875, "step": 7463 }, { "epoch": 0.5045288630525889, "grad_norm": 0.8762348294258118, "learning_rate": 1.5626728238516306e-05, "loss": 0.1319122314453125, "step": 7464 }, { "epoch": 0.504596458023523, "grad_norm": 0.8969306349754333, "learning_rate": 1.5623431574511635e-05, "loss": 0.1780548095703125, "step": 7465 }, { "epoch": 0.5046640529944572, "grad_norm": 1.5926799774169922, "learning_rate": 1.5620134880341464e-05, "loss": 0.256866455078125, "step": 7466 }, { "epoch": 0.5047316479653914, "grad_norm": 2.408932685852051, "learning_rate": 1.5616838156165316e-05, "loss": 0.357177734375, "step": 7467 }, { "epoch": 0.5047992429363255, "grad_norm": 1.4393142461776733, "learning_rate": 1.5613541402142693e-05, "loss": 0.2028350830078125, "step": 7468 }, { "epoch": 0.5048668379072597, "grad_norm": 1.9474551677703857, "learning_rate": 1.5610244618433126e-05, "loss": 0.2581787109375, "step": 7469 }, { "epoch": 0.5049344328781938, "grad_norm": 2.319009780883789, "learning_rate": 1.560694780519613e-05, "loss": 0.26141357421875, "step": 7470 }, { "epoch": 0.5050020278491281, "grad_norm": 1.6734693050384521, "learning_rate": 1.5603650962591224e-05, "loss": 0.3526611328125, "step": 7471 }, { "epoch": 0.5050696228200622, "grad_norm": 1.009778380393982, "learning_rate": 1.560035409077793e-05, "loss": 0.22216796875, "step": 7472 }, { "epoch": 0.5051372177909963, "grad_norm": 1.1957285404205322, "learning_rate": 1.5597057189915774e-05, "loss": 0.272705078125, "step": 7473 }, { "epoch": 0.5052048127619305, "grad_norm": 0.8530582189559937, "learning_rate": 1.5593760260164276e-05, "loss": 0.2388916015625, "step": 7474 }, { "epoch": 0.5052724077328646, "grad_norm": 1.3494908809661865, "learning_rate": 1.5590463301682962e-05, "loss": 0.303619384765625, "step": 7475 }, { "epoch": 0.5053400027037989, "grad_norm": 0.7477255463600159, "learning_rate": 1.5587166314631363e-05, "loss": 0.26190185546875, "step": 7476 }, { "epoch": 0.505407597674733, "grad_norm": 1.5090522766113281, "learning_rate": 1.5583869299169008e-05, "loss": 0.21632766723632812, "step": 7477 }, { "epoch": 0.5054751926456672, "grad_norm": 1.054335594177246, "learning_rate": 1.558057225545542e-05, "loss": 0.19487762451171875, "step": 7478 }, { "epoch": 0.5055427876166013, "grad_norm": 2.3780245780944824, "learning_rate": 1.5577275183650142e-05, "loss": 0.2962646484375, "step": 7479 }, { "epoch": 0.5056103825875354, "grad_norm": 1.0113399028778076, "learning_rate": 1.5573978083912698e-05, "loss": 0.27447509765625, "step": 7480 }, { "epoch": 0.5056779775584697, "grad_norm": 1.0183097124099731, "learning_rate": 1.5570680956402623e-05, "loss": 0.241546630859375, "step": 7481 }, { "epoch": 0.5057455725294038, "grad_norm": 1.0701464414596558, "learning_rate": 1.5567383801279456e-05, "loss": 0.2231903076171875, "step": 7482 }, { "epoch": 0.505813167500338, "grad_norm": 1.0097169876098633, "learning_rate": 1.5564086618702735e-05, "loss": 0.261474609375, "step": 7483 }, { "epoch": 0.5058807624712721, "grad_norm": 1.572317361831665, "learning_rate": 1.556078940883199e-05, "loss": 0.268463134765625, "step": 7484 }, { "epoch": 0.5059483574422063, "grad_norm": 0.9656485915184021, "learning_rate": 1.5557492171826772e-05, "loss": 0.17999267578125, "step": 7485 }, { "epoch": 0.5060159524131405, "grad_norm": 1.1393980979919434, "learning_rate": 1.555419490784661e-05, "loss": 0.232391357421875, "step": 7486 }, { "epoch": 0.5060835473840746, "grad_norm": 0.7118345499038696, "learning_rate": 1.5550897617051056e-05, "loss": 0.11505126953125, "step": 7487 }, { "epoch": 0.5061511423550088, "grad_norm": 1.0361149311065674, "learning_rate": 1.554760029959965e-05, "loss": 0.19189453125, "step": 7488 }, { "epoch": 0.5062187373259429, "grad_norm": 1.325050711631775, "learning_rate": 1.5544302955651928e-05, "loss": 0.2841796875, "step": 7489 }, { "epoch": 0.5062863322968771, "grad_norm": 1.7320544719696045, "learning_rate": 1.5541005585367454e-05, "loss": 0.245452880859375, "step": 7490 }, { "epoch": 0.5063539272678113, "grad_norm": 1.733610987663269, "learning_rate": 1.553770818890576e-05, "loss": 0.253448486328125, "step": 7491 }, { "epoch": 0.5064215222387455, "grad_norm": 1.4656813144683838, "learning_rate": 1.5534410766426397e-05, "loss": 0.271881103515625, "step": 7492 }, { "epoch": 0.5064891172096796, "grad_norm": 0.79682856798172, "learning_rate": 1.5531113318088922e-05, "loss": 0.15404510498046875, "step": 7493 }, { "epoch": 0.5065567121806137, "grad_norm": 1.346968412399292, "learning_rate": 1.552781584405288e-05, "loss": 0.1923065185546875, "step": 7494 }, { "epoch": 0.5066243071515479, "grad_norm": 1.4079304933547974, "learning_rate": 1.5524518344477826e-05, "loss": 0.314453125, "step": 7495 }, { "epoch": 0.506691902122482, "grad_norm": 1.239675760269165, "learning_rate": 1.5521220819523314e-05, "loss": 0.1496734619140625, "step": 7496 }, { "epoch": 0.5067594970934163, "grad_norm": 2.249741315841675, "learning_rate": 1.5517923269348897e-05, "loss": 0.237457275390625, "step": 7497 }, { "epoch": 0.5068270920643504, "grad_norm": 1.2067270278930664, "learning_rate": 1.551462569411413e-05, "loss": 0.245819091796875, "step": 7498 }, { "epoch": 0.5068946870352846, "grad_norm": 1.1488144397735596, "learning_rate": 1.551132809397857e-05, "loss": 0.150054931640625, "step": 7499 }, { "epoch": 0.5069622820062187, "grad_norm": 1.3879836797714233, "learning_rate": 1.550803046910178e-05, "loss": 0.306182861328125, "step": 7500 }, { "epoch": 0.5070298769771528, "grad_norm": 0.7551290988922119, "learning_rate": 1.5504732819643315e-05, "loss": 0.119384765625, "step": 7501 }, { "epoch": 0.5070974719480871, "grad_norm": 1.2404773235321045, "learning_rate": 1.5501435145762738e-05, "loss": 0.218048095703125, "step": 7502 }, { "epoch": 0.5071650669190212, "grad_norm": 1.37900972366333, "learning_rate": 1.5498137447619607e-05, "loss": 0.1790008544921875, "step": 7503 }, { "epoch": 0.5072326618899554, "grad_norm": 1.5033191442489624, "learning_rate": 1.5494839725373493e-05, "loss": 0.2218475341796875, "step": 7504 }, { "epoch": 0.5073002568608895, "grad_norm": 2.680982828140259, "learning_rate": 1.5491541979183956e-05, "loss": 0.314849853515625, "step": 7505 }, { "epoch": 0.5073678518318238, "grad_norm": 1.2772107124328613, "learning_rate": 1.5488244209210554e-05, "loss": 0.2930908203125, "step": 7506 }, { "epoch": 0.5074354468027579, "grad_norm": 2.0095880031585693, "learning_rate": 1.5484946415612867e-05, "loss": 0.2324371337890625, "step": 7507 }, { "epoch": 0.507503041773692, "grad_norm": 1.4141381978988647, "learning_rate": 1.5481648598550458e-05, "loss": 0.2686767578125, "step": 7508 }, { "epoch": 0.5075706367446262, "grad_norm": 0.8373148441314697, "learning_rate": 1.547835075818289e-05, "loss": 0.14403533935546875, "step": 7509 }, { "epoch": 0.5076382317155603, "grad_norm": 1.5623310804367065, "learning_rate": 1.547505289466974e-05, "loss": 0.289154052734375, "step": 7510 }, { "epoch": 0.5077058266864946, "grad_norm": 1.286565899848938, "learning_rate": 1.5471755008170575e-05, "loss": 0.2006683349609375, "step": 7511 }, { "epoch": 0.5077734216574287, "grad_norm": 1.4637935161590576, "learning_rate": 1.546845709884497e-05, "loss": 0.2291259765625, "step": 7512 }, { "epoch": 0.5078410166283629, "grad_norm": 1.4220080375671387, "learning_rate": 1.5465159166852497e-05, "loss": 0.22857666015625, "step": 7513 }, { "epoch": 0.507908611599297, "grad_norm": 1.5938688516616821, "learning_rate": 1.5461861212352727e-05, "loss": 0.30169677734375, "step": 7514 }, { "epoch": 0.5079762065702311, "grad_norm": 1.0380886793136597, "learning_rate": 1.5458563235505246e-05, "loss": 0.1832122802734375, "step": 7515 }, { "epoch": 0.5080438015411654, "grad_norm": 2.1519439220428467, "learning_rate": 1.5455265236469622e-05, "loss": 0.24713134765625, "step": 7516 }, { "epoch": 0.5081113965120995, "grad_norm": 0.7204477190971375, "learning_rate": 1.5451967215405432e-05, "loss": 0.146514892578125, "step": 7517 }, { "epoch": 0.5081789914830337, "grad_norm": 1.0148851871490479, "learning_rate": 1.544866917247226e-05, "loss": 0.191497802734375, "step": 7518 }, { "epoch": 0.5082465864539678, "grad_norm": 0.8965744972229004, "learning_rate": 1.544537110782968e-05, "loss": 0.16375732421875, "step": 7519 }, { "epoch": 0.508314181424902, "grad_norm": 1.1350735425949097, "learning_rate": 1.5442073021637282e-05, "loss": 0.266326904296875, "step": 7520 }, { "epoch": 0.5083817763958361, "grad_norm": 1.092584252357483, "learning_rate": 1.543877491405464e-05, "loss": 0.194305419921875, "step": 7521 }, { "epoch": 0.5084493713667703, "grad_norm": 1.1316580772399902, "learning_rate": 1.5435476785241334e-05, "loss": 0.2354736328125, "step": 7522 }, { "epoch": 0.5085169663377045, "grad_norm": 1.5752941370010376, "learning_rate": 1.5432178635356955e-05, "loss": 0.31292724609375, "step": 7523 }, { "epoch": 0.5085845613086386, "grad_norm": 1.1582635641098022, "learning_rate": 1.5428880464561087e-05, "loss": 0.19316673278808594, "step": 7524 }, { "epoch": 0.5086521562795728, "grad_norm": 1.2396488189697266, "learning_rate": 1.5425582273013316e-05, "loss": 0.207244873046875, "step": 7525 }, { "epoch": 0.508719751250507, "grad_norm": 1.25617516040802, "learning_rate": 1.5422284060873228e-05, "loss": 0.20487213134765625, "step": 7526 }, { "epoch": 0.5087873462214412, "grad_norm": 1.0903427600860596, "learning_rate": 1.541898582830041e-05, "loss": 0.22003173828125, "step": 7527 }, { "epoch": 0.5088549411923753, "grad_norm": 0.8866198062896729, "learning_rate": 1.541568757545445e-05, "loss": 0.270263671875, "step": 7528 }, { "epoch": 0.5089225361633094, "grad_norm": 1.5020099878311157, "learning_rate": 1.541238930249494e-05, "loss": 0.264312744140625, "step": 7529 }, { "epoch": 0.5089901311342436, "grad_norm": 1.7669035196304321, "learning_rate": 1.5409091009581474e-05, "loss": 0.285247802734375, "step": 7530 }, { "epoch": 0.5090577261051777, "grad_norm": 0.7710207104682922, "learning_rate": 1.540579269687364e-05, "loss": 0.1853179931640625, "step": 7531 }, { "epoch": 0.509125321076112, "grad_norm": 1.8088622093200684, "learning_rate": 1.540249436453103e-05, "loss": 0.25128173828125, "step": 7532 }, { "epoch": 0.5091929160470461, "grad_norm": 0.9583909511566162, "learning_rate": 1.5399196012713245e-05, "loss": 0.20590591430664062, "step": 7533 }, { "epoch": 0.5092605110179803, "grad_norm": 1.058057427406311, "learning_rate": 1.5395897641579866e-05, "loss": 0.255157470703125, "step": 7534 }, { "epoch": 0.5093281059889144, "grad_norm": 1.4837658405303955, "learning_rate": 1.53925992512905e-05, "loss": 0.31854248046875, "step": 7535 }, { "epoch": 0.5093957009598485, "grad_norm": 1.3688722848892212, "learning_rate": 1.538930084200474e-05, "loss": 0.210784912109375, "step": 7536 }, { "epoch": 0.5094632959307828, "grad_norm": 2.554492950439453, "learning_rate": 1.538600241388219e-05, "loss": 0.33978271484375, "step": 7537 }, { "epoch": 0.5095308909017169, "grad_norm": 0.883003294467926, "learning_rate": 1.5382703967082438e-05, "loss": 0.18177413940429688, "step": 7538 }, { "epoch": 0.5095984858726511, "grad_norm": 1.5876333713531494, "learning_rate": 1.5379405501765083e-05, "loss": 0.199310302734375, "step": 7539 }, { "epoch": 0.5096660808435852, "grad_norm": 1.5982016324996948, "learning_rate": 1.537610701808974e-05, "loss": 0.299560546875, "step": 7540 }, { "epoch": 0.5097336758145194, "grad_norm": 1.4033886194229126, "learning_rate": 1.5372808516215998e-05, "loss": 0.29058837890625, "step": 7541 }, { "epoch": 0.5098012707854536, "grad_norm": 1.0995986461639404, "learning_rate": 1.536950999630346e-05, "loss": 0.25091552734375, "step": 7542 }, { "epoch": 0.5098688657563877, "grad_norm": 1.4347509145736694, "learning_rate": 1.536621145851173e-05, "loss": 0.278167724609375, "step": 7543 }, { "epoch": 0.5099364607273219, "grad_norm": 1.3039441108703613, "learning_rate": 1.5362912903000413e-05, "loss": 0.29339599609375, "step": 7544 }, { "epoch": 0.510004055698256, "grad_norm": 1.3090922832489014, "learning_rate": 1.5359614329929114e-05, "loss": 0.2276458740234375, "step": 7545 }, { "epoch": 0.5100716506691902, "grad_norm": 1.4222743511199951, "learning_rate": 1.5356315739457437e-05, "loss": 0.2340087890625, "step": 7546 }, { "epoch": 0.5101392456401244, "grad_norm": 0.9484078884124756, "learning_rate": 1.5353017131744987e-05, "loss": 0.191986083984375, "step": 7547 }, { "epoch": 0.5102068406110586, "grad_norm": 1.0298912525177002, "learning_rate": 1.5349718506951373e-05, "loss": 0.2162628173828125, "step": 7548 }, { "epoch": 0.5102744355819927, "grad_norm": 0.7987802624702454, "learning_rate": 1.53464198652362e-05, "loss": 0.12257003784179688, "step": 7549 }, { "epoch": 0.5103420305529268, "grad_norm": 1.4604380130767822, "learning_rate": 1.5343121206759085e-05, "loss": 0.256683349609375, "step": 7550 }, { "epoch": 0.510409625523861, "grad_norm": 1.0751430988311768, "learning_rate": 1.5339822531679632e-05, "loss": 0.18301773071289062, "step": 7551 }, { "epoch": 0.5104772204947952, "grad_norm": 0.9857984781265259, "learning_rate": 1.5336523840157446e-05, "loss": 0.2349853515625, "step": 7552 }, { "epoch": 0.5105448154657294, "grad_norm": 0.9105361700057983, "learning_rate": 1.5333225132352143e-05, "loss": 0.220062255859375, "step": 7553 }, { "epoch": 0.5106124104366635, "grad_norm": 1.3864243030548096, "learning_rate": 1.5329926408423344e-05, "loss": 0.29559326171875, "step": 7554 }, { "epoch": 0.5106800054075977, "grad_norm": 0.8757885098457336, "learning_rate": 1.532662766853065e-05, "loss": 0.192413330078125, "step": 7555 }, { "epoch": 0.5107476003785318, "grad_norm": 1.6725293397903442, "learning_rate": 1.5323328912833678e-05, "loss": 0.3160400390625, "step": 7556 }, { "epoch": 0.510815195349466, "grad_norm": 1.1847777366638184, "learning_rate": 1.532003014149204e-05, "loss": 0.237579345703125, "step": 7557 }, { "epoch": 0.5108827903204002, "grad_norm": 0.8593237400054932, "learning_rate": 1.5316731354665357e-05, "loss": 0.2033843994140625, "step": 7558 }, { "epoch": 0.5109503852913343, "grad_norm": 0.8393316268920898, "learning_rate": 1.531343255251324e-05, "loss": 0.11679840087890625, "step": 7559 }, { "epoch": 0.5110179802622685, "grad_norm": 1.475286841392517, "learning_rate": 1.5310133735195306e-05, "loss": 0.220062255859375, "step": 7560 }, { "epoch": 0.5110855752332026, "grad_norm": 1.4863626956939697, "learning_rate": 1.5306834902871175e-05, "loss": 0.2166748046875, "step": 7561 }, { "epoch": 0.5111531702041369, "grad_norm": 2.2946245670318604, "learning_rate": 1.530353605570046e-05, "loss": 0.212738037109375, "step": 7562 }, { "epoch": 0.511220765175071, "grad_norm": 0.6377472877502441, "learning_rate": 1.5300237193842786e-05, "loss": 0.129852294921875, "step": 7563 }, { "epoch": 0.5112883601460051, "grad_norm": 1.3100625276565552, "learning_rate": 1.529693831745777e-05, "loss": 0.28179931640625, "step": 7564 }, { "epoch": 0.5113559551169393, "grad_norm": 1.806918740272522, "learning_rate": 1.5293639426705026e-05, "loss": 0.240936279296875, "step": 7565 }, { "epoch": 0.5114235500878734, "grad_norm": 0.7688915729522705, "learning_rate": 1.529034052174419e-05, "loss": 0.157806396484375, "step": 7566 }, { "epoch": 0.5114911450588077, "grad_norm": 1.1202611923217773, "learning_rate": 1.528704160273487e-05, "loss": 0.216278076171875, "step": 7567 }, { "epoch": 0.5115587400297418, "grad_norm": 1.3975776433944702, "learning_rate": 1.528374266983669e-05, "loss": 0.295074462890625, "step": 7568 }, { "epoch": 0.511626335000676, "grad_norm": 1.6043803691864014, "learning_rate": 1.528044372320927e-05, "loss": 0.30877685546875, "step": 7569 }, { "epoch": 0.5116939299716101, "grad_norm": 1.7925935983657837, "learning_rate": 1.5277144763012246e-05, "loss": 0.34375, "step": 7570 }, { "epoch": 0.5117615249425442, "grad_norm": 1.4433130025863647, "learning_rate": 1.5273845789405236e-05, "loss": 0.27777099609375, "step": 7571 }, { "epoch": 0.5118291199134785, "grad_norm": 1.7075332403182983, "learning_rate": 1.5270546802547862e-05, "loss": 0.2742919921875, "step": 7572 }, { "epoch": 0.5118967148844126, "grad_norm": 0.6727943420410156, "learning_rate": 1.5267247802599744e-05, "loss": 0.16081619262695312, "step": 7573 }, { "epoch": 0.5119643098553468, "grad_norm": 1.3090465068817139, "learning_rate": 1.5263948789720526e-05, "loss": 0.222198486328125, "step": 7574 }, { "epoch": 0.5120319048262809, "grad_norm": 1.1236014366149902, "learning_rate": 1.5260649764069816e-05, "loss": 0.21600341796875, "step": 7575 }, { "epoch": 0.5120994997972151, "grad_norm": 1.4152992963790894, "learning_rate": 1.5257350725807256e-05, "loss": 0.19171524047851562, "step": 7576 }, { "epoch": 0.5121670947681493, "grad_norm": 0.9906905293464661, "learning_rate": 1.5254051675092462e-05, "loss": 0.244384765625, "step": 7577 }, { "epoch": 0.5122346897390834, "grad_norm": 1.2062445878982544, "learning_rate": 1.525075261208507e-05, "loss": 0.26910400390625, "step": 7578 }, { "epoch": 0.5123022847100176, "grad_norm": 1.1194987297058105, "learning_rate": 1.524745353694471e-05, "loss": 0.2120361328125, "step": 7579 }, { "epoch": 0.5123698796809517, "grad_norm": 1.3643600940704346, "learning_rate": 1.5244154449831009e-05, "loss": 0.22625732421875, "step": 7580 }, { "epoch": 0.5124374746518859, "grad_norm": 1.2413755655288696, "learning_rate": 1.524085535090359e-05, "loss": 0.187164306640625, "step": 7581 }, { "epoch": 0.51250506962282, "grad_norm": 1.3297293186187744, "learning_rate": 1.5237556240322096e-05, "loss": 0.2412109375, "step": 7582 }, { "epoch": 0.5125726645937543, "grad_norm": 1.306725263595581, "learning_rate": 1.5234257118246155e-05, "loss": 0.18613433837890625, "step": 7583 }, { "epoch": 0.5126402595646884, "grad_norm": 1.3166025876998901, "learning_rate": 1.5230957984835397e-05, "loss": 0.29840087890625, "step": 7584 }, { "epoch": 0.5127078545356225, "grad_norm": 0.8540059924125671, "learning_rate": 1.5227658840249452e-05, "loss": 0.1992645263671875, "step": 7585 }, { "epoch": 0.5127754495065567, "grad_norm": 1.4355051517486572, "learning_rate": 1.5224359684647962e-05, "loss": 0.2546844482421875, "step": 7586 }, { "epoch": 0.5128430444774909, "grad_norm": 0.9556520581245422, "learning_rate": 1.5221060518190552e-05, "loss": 0.198211669921875, "step": 7587 }, { "epoch": 0.5129106394484251, "grad_norm": 1.133439540863037, "learning_rate": 1.521776134103686e-05, "loss": 0.2877197265625, "step": 7588 }, { "epoch": 0.5129782344193592, "grad_norm": 0.5258931517601013, "learning_rate": 1.5214462153346522e-05, "loss": 0.149505615234375, "step": 7589 }, { "epoch": 0.5130458293902934, "grad_norm": 1.127860188484192, "learning_rate": 1.5211162955279167e-05, "loss": 0.2096710205078125, "step": 7590 }, { "epoch": 0.5131134243612275, "grad_norm": 1.0523144006729126, "learning_rate": 1.5207863746994439e-05, "loss": 0.22601318359375, "step": 7591 }, { "epoch": 0.5131810193321616, "grad_norm": 1.7450588941574097, "learning_rate": 1.520456452865197e-05, "loss": 0.34796142578125, "step": 7592 }, { "epoch": 0.5132486143030959, "grad_norm": 1.0297349691390991, "learning_rate": 1.5201265300411394e-05, "loss": 0.24615478515625, "step": 7593 }, { "epoch": 0.51331620927403, "grad_norm": 0.8952192068099976, "learning_rate": 1.5197966062432352e-05, "loss": 0.214752197265625, "step": 7594 }, { "epoch": 0.5133838042449642, "grad_norm": 1.0829548835754395, "learning_rate": 1.5194666814874483e-05, "loss": 0.191009521484375, "step": 7595 }, { "epoch": 0.5134513992158983, "grad_norm": 1.0796751976013184, "learning_rate": 1.519136755789742e-05, "loss": 0.283599853515625, "step": 7596 }, { "epoch": 0.5135189941868324, "grad_norm": 1.0988322496414185, "learning_rate": 1.5188068291660807e-05, "loss": 0.189544677734375, "step": 7597 }, { "epoch": 0.5135865891577667, "grad_norm": 1.1340298652648926, "learning_rate": 1.5184769016324277e-05, "loss": 0.298187255859375, "step": 7598 }, { "epoch": 0.5136541841287008, "grad_norm": 2.1600279808044434, "learning_rate": 1.5181469732047476e-05, "loss": 0.3218994140625, "step": 7599 }, { "epoch": 0.513721779099635, "grad_norm": 1.210741639137268, "learning_rate": 1.5178170438990039e-05, "loss": 0.13172149658203125, "step": 7600 }, { "epoch": 0.5137893740705691, "grad_norm": 0.9656565189361572, "learning_rate": 1.5174871137311612e-05, "loss": 0.26312255859375, "step": 7601 }, { "epoch": 0.5138569690415034, "grad_norm": 1.087512493133545, "learning_rate": 1.5171571827171828e-05, "loss": 0.2415618896484375, "step": 7602 }, { "epoch": 0.5139245640124375, "grad_norm": 0.8344461917877197, "learning_rate": 1.5168272508730333e-05, "loss": 0.1592559814453125, "step": 7603 }, { "epoch": 0.5139921589833716, "grad_norm": 1.0429792404174805, "learning_rate": 1.5164973182146767e-05, "loss": 0.2381591796875, "step": 7604 }, { "epoch": 0.5140597539543058, "grad_norm": 1.777224063873291, "learning_rate": 1.5161673847580773e-05, "loss": 0.28570556640625, "step": 7605 }, { "epoch": 0.5141273489252399, "grad_norm": 1.369680643081665, "learning_rate": 1.515837450519199e-05, "loss": 0.26605224609375, "step": 7606 }, { "epoch": 0.5141949438961742, "grad_norm": 0.8957949280738831, "learning_rate": 1.5155075155140068e-05, "loss": 0.1519012451171875, "step": 7607 }, { "epoch": 0.5142625388671083, "grad_norm": 1.9747929573059082, "learning_rate": 1.5151775797584644e-05, "loss": 0.30029296875, "step": 7608 }, { "epoch": 0.5143301338380425, "grad_norm": 1.3844274282455444, "learning_rate": 1.5148476432685365e-05, "loss": 0.29962158203125, "step": 7609 }, { "epoch": 0.5143977288089766, "grad_norm": 1.351348876953125, "learning_rate": 1.514517706060187e-05, "loss": 0.249267578125, "step": 7610 }, { "epoch": 0.5144653237799107, "grad_norm": 1.959069848060608, "learning_rate": 1.5141877681493804e-05, "loss": 0.261322021484375, "step": 7611 }, { "epoch": 0.514532918750845, "grad_norm": 0.8658393621444702, "learning_rate": 1.513857829552082e-05, "loss": 0.157623291015625, "step": 7612 }, { "epoch": 0.5146005137217791, "grad_norm": 1.1758791208267212, "learning_rate": 1.513527890284255e-05, "loss": 0.237335205078125, "step": 7613 }, { "epoch": 0.5146681086927133, "grad_norm": 0.9821550846099854, "learning_rate": 1.513197950361865e-05, "loss": 0.259979248046875, "step": 7614 }, { "epoch": 0.5147357036636474, "grad_norm": 0.7707541584968567, "learning_rate": 1.512868009800876e-05, "loss": 0.1195526123046875, "step": 7615 }, { "epoch": 0.5148032986345816, "grad_norm": 0.5211396813392639, "learning_rate": 1.5125380686172525e-05, "loss": 0.11405563354492188, "step": 7616 }, { "epoch": 0.5148708936055157, "grad_norm": 1.0888290405273438, "learning_rate": 1.5122081268269593e-05, "loss": 0.27545166015625, "step": 7617 }, { "epoch": 0.5149384885764499, "grad_norm": 1.105966329574585, "learning_rate": 1.5118781844459612e-05, "loss": 0.2356414794921875, "step": 7618 }, { "epoch": 0.5150060835473841, "grad_norm": 1.0275208950042725, "learning_rate": 1.511548241490222e-05, "loss": 0.228759765625, "step": 7619 }, { "epoch": 0.5150736785183182, "grad_norm": 1.6149976253509521, "learning_rate": 1.5112182979757074e-05, "loss": 0.260009765625, "step": 7620 }, { "epoch": 0.5151412734892524, "grad_norm": 1.3294140100479126, "learning_rate": 1.510888353918382e-05, "loss": 0.3101806640625, "step": 7621 }, { "epoch": 0.5152088684601865, "grad_norm": 0.766019344329834, "learning_rate": 1.5105584093342105e-05, "loss": 0.121734619140625, "step": 7622 }, { "epoch": 0.5152764634311208, "grad_norm": 1.55750572681427, "learning_rate": 1.510228464239157e-05, "loss": 0.309814453125, "step": 7623 }, { "epoch": 0.5153440584020549, "grad_norm": 0.9584190845489502, "learning_rate": 1.5098985186491869e-05, "loss": 0.209381103515625, "step": 7624 }, { "epoch": 0.515411653372989, "grad_norm": 1.1925030946731567, "learning_rate": 1.5095685725802653e-05, "loss": 0.242156982421875, "step": 7625 }, { "epoch": 0.5154792483439232, "grad_norm": 0.7508548498153687, "learning_rate": 1.5092386260483565e-05, "loss": 0.123260498046875, "step": 7626 }, { "epoch": 0.5155468433148573, "grad_norm": 0.8094297051429749, "learning_rate": 1.5089086790694253e-05, "loss": 0.13062286376953125, "step": 7627 }, { "epoch": 0.5156144382857916, "grad_norm": 1.1393288373947144, "learning_rate": 1.5085787316594368e-05, "loss": 0.282958984375, "step": 7628 }, { "epoch": 0.5156820332567257, "grad_norm": 1.1065596342086792, "learning_rate": 1.508248783834356e-05, "loss": 0.206573486328125, "step": 7629 }, { "epoch": 0.5157496282276599, "grad_norm": 1.4268076419830322, "learning_rate": 1.5079188356101477e-05, "loss": 0.29754638671875, "step": 7630 }, { "epoch": 0.515817223198594, "grad_norm": 1.5097543001174927, "learning_rate": 1.5075888870027767e-05, "loss": 0.353759765625, "step": 7631 }, { "epoch": 0.5158848181695281, "grad_norm": 1.1937094926834106, "learning_rate": 1.5072589380282084e-05, "loss": 0.20174407958984375, "step": 7632 }, { "epoch": 0.5159524131404624, "grad_norm": 1.621018409729004, "learning_rate": 1.5069289887024078e-05, "loss": 0.15268325805664062, "step": 7633 }, { "epoch": 0.5160200081113965, "grad_norm": 2.011902093887329, "learning_rate": 1.5065990390413393e-05, "loss": 0.273193359375, "step": 7634 }, { "epoch": 0.5160876030823307, "grad_norm": 0.659832775592804, "learning_rate": 1.5062690890609682e-05, "loss": 0.13568496704101562, "step": 7635 }, { "epoch": 0.5161551980532648, "grad_norm": 0.9827043414115906, "learning_rate": 1.5059391387772595e-05, "loss": 0.308837890625, "step": 7636 }, { "epoch": 0.516222793024199, "grad_norm": 1.3442814350128174, "learning_rate": 1.505609188206179e-05, "loss": 0.216796875, "step": 7637 }, { "epoch": 0.5162903879951332, "grad_norm": 1.819924235343933, "learning_rate": 1.5052792373636905e-05, "loss": 0.2935791015625, "step": 7638 }, { "epoch": 0.5163579829660673, "grad_norm": 1.0424818992614746, "learning_rate": 1.5049492862657602e-05, "loss": 0.2589111328125, "step": 7639 }, { "epoch": 0.5164255779370015, "grad_norm": 1.5338977575302124, "learning_rate": 1.5046193349283518e-05, "loss": 0.29388427734375, "step": 7640 }, { "epoch": 0.5164931729079356, "grad_norm": 1.3619234561920166, "learning_rate": 1.5042893833674318e-05, "loss": 0.31585693359375, "step": 7641 }, { "epoch": 0.5165607678788698, "grad_norm": 0.7634899020195007, "learning_rate": 1.5039594315989645e-05, "loss": 0.210205078125, "step": 7642 }, { "epoch": 0.516628362849804, "grad_norm": 1.2517378330230713, "learning_rate": 1.5036294796389156e-05, "loss": 0.2231292724609375, "step": 7643 }, { "epoch": 0.5166959578207382, "grad_norm": 1.4415128231048584, "learning_rate": 1.5032995275032492e-05, "loss": 0.2764892578125, "step": 7644 }, { "epoch": 0.5167635527916723, "grad_norm": 1.6079031229019165, "learning_rate": 1.502969575207932e-05, "loss": 0.224151611328125, "step": 7645 }, { "epoch": 0.5168311477626064, "grad_norm": 1.490787148475647, "learning_rate": 1.502639622768928e-05, "loss": 0.1992034912109375, "step": 7646 }, { "epoch": 0.5168987427335406, "grad_norm": 0.8927667140960693, "learning_rate": 1.5023096702022024e-05, "loss": 0.279296875, "step": 7647 }, { "epoch": 0.5169663377044748, "grad_norm": 0.9465149641036987, "learning_rate": 1.501979717523721e-05, "loss": 0.18900299072265625, "step": 7648 }, { "epoch": 0.517033932675409, "grad_norm": 1.7400022745132446, "learning_rate": 1.5016497647494481e-05, "loss": 0.30194091796875, "step": 7649 }, { "epoch": 0.5171015276463431, "grad_norm": 1.06365966796875, "learning_rate": 1.5013198118953499e-05, "loss": 0.2313232421875, "step": 7650 }, { "epoch": 0.5171691226172773, "grad_norm": 1.18476140499115, "learning_rate": 1.5009898589773906e-05, "loss": 0.27496337890625, "step": 7651 }, { "epoch": 0.5172367175882114, "grad_norm": 1.7637431621551514, "learning_rate": 1.5006599060115353e-05, "loss": 0.268829345703125, "step": 7652 }, { "epoch": 0.5173043125591456, "grad_norm": 1.4627960920333862, "learning_rate": 1.5003299530137503e-05, "loss": 0.16204071044921875, "step": 7653 }, { "epoch": 0.5173719075300798, "grad_norm": 1.989098310470581, "learning_rate": 1.5e-05, "loss": 0.356353759765625, "step": 7654 }, { "epoch": 0.5174395025010139, "grad_norm": 0.7611169219017029, "learning_rate": 1.4996700469862503e-05, "loss": 0.11176300048828125, "step": 7655 }, { "epoch": 0.5175070974719481, "grad_norm": 1.2733076810836792, "learning_rate": 1.4993400939884648e-05, "loss": 0.28887939453125, "step": 7656 }, { "epoch": 0.5175746924428822, "grad_norm": 1.4090783596038818, "learning_rate": 1.49901014102261e-05, "loss": 0.293792724609375, "step": 7657 }, { "epoch": 0.5176422874138165, "grad_norm": 0.8778305649757385, "learning_rate": 1.4986801881046504e-05, "loss": 0.226165771484375, "step": 7658 }, { "epoch": 0.5177098823847506, "grad_norm": 0.9616151452064514, "learning_rate": 1.4983502352505523e-05, "loss": 0.2459716796875, "step": 7659 }, { "epoch": 0.5177774773556847, "grad_norm": 1.263387680053711, "learning_rate": 1.4980202824762792e-05, "loss": 0.18423080444335938, "step": 7660 }, { "epoch": 0.5178450723266189, "grad_norm": 1.1838716268539429, "learning_rate": 1.4976903297977978e-05, "loss": 0.264251708984375, "step": 7661 }, { "epoch": 0.517912667297553, "grad_norm": 1.4984556436538696, "learning_rate": 1.4973603772310721e-05, "loss": 0.275390625, "step": 7662 }, { "epoch": 0.5179802622684873, "grad_norm": 1.086478590965271, "learning_rate": 1.4970304247920684e-05, "loss": 0.1313629150390625, "step": 7663 }, { "epoch": 0.5180478572394214, "grad_norm": 1.1605398654937744, "learning_rate": 1.4967004724967505e-05, "loss": 0.19420623779296875, "step": 7664 }, { "epoch": 0.5181154522103556, "grad_norm": 1.673108696937561, "learning_rate": 1.4963705203610846e-05, "loss": 0.355865478515625, "step": 7665 }, { "epoch": 0.5181830471812897, "grad_norm": 1.3545399904251099, "learning_rate": 1.4960405684010359e-05, "loss": 0.260009765625, "step": 7666 }, { "epoch": 0.5182506421522238, "grad_norm": 2.086399555206299, "learning_rate": 1.4957106166325686e-05, "loss": 0.223602294921875, "step": 7667 }, { "epoch": 0.518318237123158, "grad_norm": 1.944635272026062, "learning_rate": 1.4953806650716486e-05, "loss": 0.32891845703125, "step": 7668 }, { "epoch": 0.5183858320940922, "grad_norm": 2.1711771488189697, "learning_rate": 1.4950507137342402e-05, "loss": 0.33575439453125, "step": 7669 }, { "epoch": 0.5184534270650264, "grad_norm": 0.9712299704551697, "learning_rate": 1.4947207626363098e-05, "loss": 0.231292724609375, "step": 7670 }, { "epoch": 0.5185210220359605, "grad_norm": 0.8702589869499207, "learning_rate": 1.4943908117938213e-05, "loss": 0.181915283203125, "step": 7671 }, { "epoch": 0.5185886170068947, "grad_norm": 0.9909679293632507, "learning_rate": 1.4940608612227407e-05, "loss": 0.195587158203125, "step": 7672 }, { "epoch": 0.5186562119778289, "grad_norm": 1.5744640827178955, "learning_rate": 1.4937309109390319e-05, "loss": 0.343963623046875, "step": 7673 }, { "epoch": 0.518723806948763, "grad_norm": 1.1641985177993774, "learning_rate": 1.493400960958661e-05, "loss": 0.32891845703125, "step": 7674 }, { "epoch": 0.5187914019196972, "grad_norm": 0.8005398511886597, "learning_rate": 1.4930710112975923e-05, "loss": 0.151123046875, "step": 7675 }, { "epoch": 0.5188589968906313, "grad_norm": 1.5184564590454102, "learning_rate": 1.4927410619717918e-05, "loss": 0.287322998046875, "step": 7676 }, { "epoch": 0.5189265918615655, "grad_norm": 1.0846840143203735, "learning_rate": 1.4924111129972232e-05, "loss": 0.2182159423828125, "step": 7677 }, { "epoch": 0.5189941868324996, "grad_norm": 0.9409766793251038, "learning_rate": 1.4920811643898524e-05, "loss": 0.1644134521484375, "step": 7678 }, { "epoch": 0.5190617818034339, "grad_norm": 0.8899232745170593, "learning_rate": 1.4917512161656445e-05, "loss": 0.1540069580078125, "step": 7679 }, { "epoch": 0.519129376774368, "grad_norm": 1.283799648284912, "learning_rate": 1.4914212683405635e-05, "loss": 0.1865234375, "step": 7680 }, { "epoch": 0.5191969717453021, "grad_norm": 1.6728265285491943, "learning_rate": 1.491091320930575e-05, "loss": 0.24517822265625, "step": 7681 }, { "epoch": 0.5192645667162363, "grad_norm": 1.0813599824905396, "learning_rate": 1.4907613739516438e-05, "loss": 0.2662353515625, "step": 7682 }, { "epoch": 0.5193321616871704, "grad_norm": 0.8779166340827942, "learning_rate": 1.4904314274197352e-05, "loss": 0.197418212890625, "step": 7683 }, { "epoch": 0.5193997566581047, "grad_norm": 1.3411046266555786, "learning_rate": 1.490101481350813e-05, "loss": 0.220977783203125, "step": 7684 }, { "epoch": 0.5194673516290388, "grad_norm": 0.8486068844795227, "learning_rate": 1.4897715357608434e-05, "loss": 0.1107635498046875, "step": 7685 }, { "epoch": 0.519534946599973, "grad_norm": 1.0214699506759644, "learning_rate": 1.4894415906657899e-05, "loss": 0.2301483154296875, "step": 7686 }, { "epoch": 0.5196025415709071, "grad_norm": 1.2437890768051147, "learning_rate": 1.4891116460816182e-05, "loss": 0.3267822265625, "step": 7687 }, { "epoch": 0.5196701365418412, "grad_norm": 1.756555199623108, "learning_rate": 1.4887817020242923e-05, "loss": 0.40185546875, "step": 7688 }, { "epoch": 0.5197377315127755, "grad_norm": 0.9302569627761841, "learning_rate": 1.4884517585097782e-05, "loss": 0.2090606689453125, "step": 7689 }, { "epoch": 0.5198053264837096, "grad_norm": 1.6373062133789062, "learning_rate": 1.4881218155540396e-05, "loss": 0.26483154296875, "step": 7690 }, { "epoch": 0.5198729214546438, "grad_norm": 0.8516231775283813, "learning_rate": 1.487791873173041e-05, "loss": 0.187286376953125, "step": 7691 }, { "epoch": 0.5199405164255779, "grad_norm": 1.3155187368392944, "learning_rate": 1.4874619313827483e-05, "loss": 0.2677001953125, "step": 7692 }, { "epoch": 0.5200081113965122, "grad_norm": 1.9907200336456299, "learning_rate": 1.4871319901991245e-05, "loss": 0.316009521484375, "step": 7693 }, { "epoch": 0.5200757063674463, "grad_norm": 1.0901985168457031, "learning_rate": 1.4868020496381353e-05, "loss": 0.154541015625, "step": 7694 }, { "epoch": 0.5201433013383804, "grad_norm": 1.1704919338226318, "learning_rate": 1.486472109715745e-05, "loss": 0.239044189453125, "step": 7695 }, { "epoch": 0.5202108963093146, "grad_norm": 1.672042727470398, "learning_rate": 1.4861421704479185e-05, "loss": 0.2315673828125, "step": 7696 }, { "epoch": 0.5202784912802487, "grad_norm": 1.2034480571746826, "learning_rate": 1.4858122318506195e-05, "loss": 0.25823974609375, "step": 7697 }, { "epoch": 0.520346086251183, "grad_norm": 1.6786531209945679, "learning_rate": 1.4854822939398132e-05, "loss": 0.3077392578125, "step": 7698 }, { "epoch": 0.5204136812221171, "grad_norm": 1.0272932052612305, "learning_rate": 1.4851523567314634e-05, "loss": 0.2579498291015625, "step": 7699 }, { "epoch": 0.5204812761930513, "grad_norm": 0.6914843320846558, "learning_rate": 1.4848224202415357e-05, "loss": 0.10491943359375, "step": 7700 }, { "epoch": 0.5205488711639854, "grad_norm": 2.0216243267059326, "learning_rate": 1.4844924844859938e-05, "loss": 0.296142578125, "step": 7701 }, { "epoch": 0.5206164661349195, "grad_norm": 1.2076715230941772, "learning_rate": 1.4841625494808012e-05, "loss": 0.218597412109375, "step": 7702 }, { "epoch": 0.5206840611058537, "grad_norm": 1.19094979763031, "learning_rate": 1.4838326152419231e-05, "loss": 0.1753082275390625, "step": 7703 }, { "epoch": 0.5207516560767879, "grad_norm": 1.159949779510498, "learning_rate": 1.4835026817853235e-05, "loss": 0.30029296875, "step": 7704 }, { "epoch": 0.5208192510477221, "grad_norm": 1.208964228630066, "learning_rate": 1.4831727491269673e-05, "loss": 0.2274169921875, "step": 7705 }, { "epoch": 0.5208868460186562, "grad_norm": 0.5546855926513672, "learning_rate": 1.4828428172828176e-05, "loss": 0.125, "step": 7706 }, { "epoch": 0.5209544409895904, "grad_norm": 0.6990338563919067, "learning_rate": 1.4825128862688393e-05, "loss": 0.119873046875, "step": 7707 }, { "epoch": 0.5210220359605245, "grad_norm": 1.0615767240524292, "learning_rate": 1.482182956100996e-05, "loss": 0.251708984375, "step": 7708 }, { "epoch": 0.5210896309314587, "grad_norm": 1.9841177463531494, "learning_rate": 1.4818530267952528e-05, "loss": 0.283966064453125, "step": 7709 }, { "epoch": 0.5211572259023929, "grad_norm": 1.060694694519043, "learning_rate": 1.4815230983675722e-05, "loss": 0.242340087890625, "step": 7710 }, { "epoch": 0.521224820873327, "grad_norm": 1.9154621362686157, "learning_rate": 1.4811931708339196e-05, "loss": 0.276123046875, "step": 7711 }, { "epoch": 0.5212924158442612, "grad_norm": 0.7790022492408752, "learning_rate": 1.4808632442102579e-05, "loss": 0.22149658203125, "step": 7712 }, { "epoch": 0.5213600108151953, "grad_norm": 0.81998610496521, "learning_rate": 1.4805333185125521e-05, "loss": 0.197174072265625, "step": 7713 }, { "epoch": 0.5214276057861296, "grad_norm": 1.8476672172546387, "learning_rate": 1.4802033937567652e-05, "loss": 0.26824951171875, "step": 7714 }, { "epoch": 0.5214952007570637, "grad_norm": 0.6959226727485657, "learning_rate": 1.4798734699588607e-05, "loss": 0.1751708984375, "step": 7715 }, { "epoch": 0.5215627957279978, "grad_norm": 0.8132414817810059, "learning_rate": 1.4795435471348036e-05, "loss": 0.250885009765625, "step": 7716 }, { "epoch": 0.521630390698932, "grad_norm": 1.1144577264785767, "learning_rate": 1.4792136253005564e-05, "loss": 0.28704833984375, "step": 7717 }, { "epoch": 0.5216979856698661, "grad_norm": 1.2791972160339355, "learning_rate": 1.4788837044720837e-05, "loss": 0.272674560546875, "step": 7718 }, { "epoch": 0.5217655806408004, "grad_norm": 1.159619688987732, "learning_rate": 1.4785537846653482e-05, "loss": 0.263763427734375, "step": 7719 }, { "epoch": 0.5218331756117345, "grad_norm": 2.3140459060668945, "learning_rate": 1.4782238658963143e-05, "loss": 0.3797607421875, "step": 7720 }, { "epoch": 0.5219007705826687, "grad_norm": 1.3051611185073853, "learning_rate": 1.4778939481809447e-05, "loss": 0.2786102294921875, "step": 7721 }, { "epoch": 0.5219683655536028, "grad_norm": 0.7405394911766052, "learning_rate": 1.4775640315352042e-05, "loss": 0.1641845703125, "step": 7722 }, { "epoch": 0.5220359605245369, "grad_norm": 1.3530710935592651, "learning_rate": 1.4772341159750547e-05, "loss": 0.2633056640625, "step": 7723 }, { "epoch": 0.5221035554954712, "grad_norm": 1.4145245552062988, "learning_rate": 1.4769042015164606e-05, "loss": 0.217498779296875, "step": 7724 }, { "epoch": 0.5221711504664053, "grad_norm": 1.5023709535598755, "learning_rate": 1.476574288175385e-05, "loss": 0.298309326171875, "step": 7725 }, { "epoch": 0.5222387454373395, "grad_norm": 1.4371390342712402, "learning_rate": 1.4762443759677906e-05, "loss": 0.292755126953125, "step": 7726 }, { "epoch": 0.5223063404082736, "grad_norm": 1.4814822673797607, "learning_rate": 1.4759144649096412e-05, "loss": 0.31622314453125, "step": 7727 }, { "epoch": 0.5223739353792077, "grad_norm": 0.8525787591934204, "learning_rate": 1.4755845550168995e-05, "loss": 0.10894775390625, "step": 7728 }, { "epoch": 0.522441530350142, "grad_norm": 0.8372777700424194, "learning_rate": 1.4752546463055296e-05, "loss": 0.2332611083984375, "step": 7729 }, { "epoch": 0.5225091253210761, "grad_norm": 1.2854589223861694, "learning_rate": 1.474924738791493e-05, "loss": 0.298187255859375, "step": 7730 }, { "epoch": 0.5225767202920103, "grad_norm": 1.1252073049545288, "learning_rate": 1.4745948324907543e-05, "loss": 0.20050048828125, "step": 7731 }, { "epoch": 0.5226443152629444, "grad_norm": 1.430349349975586, "learning_rate": 1.4742649274192748e-05, "loss": 0.26654052734375, "step": 7732 }, { "epoch": 0.5227119102338786, "grad_norm": 0.960645318031311, "learning_rate": 1.4739350235930185e-05, "loss": 0.19537353515625, "step": 7733 }, { "epoch": 0.5227795052048128, "grad_norm": 1.38677179813385, "learning_rate": 1.4736051210279477e-05, "loss": 0.2827301025390625, "step": 7734 }, { "epoch": 0.5228471001757469, "grad_norm": 0.7347385883331299, "learning_rate": 1.4732752197400257e-05, "loss": 0.1650848388671875, "step": 7735 }, { "epoch": 0.5229146951466811, "grad_norm": 1.7544037103652954, "learning_rate": 1.4729453197452147e-05, "loss": 0.32550048828125, "step": 7736 }, { "epoch": 0.5229822901176152, "grad_norm": 1.206300139427185, "learning_rate": 1.4726154210594768e-05, "loss": 0.2775115966796875, "step": 7737 }, { "epoch": 0.5230498850885494, "grad_norm": 0.8913323879241943, "learning_rate": 1.4722855236987758e-05, "loss": 0.190093994140625, "step": 7738 }, { "epoch": 0.5231174800594836, "grad_norm": 1.4424301385879517, "learning_rate": 1.471955627679073e-05, "loss": 0.28656005859375, "step": 7739 }, { "epoch": 0.5231850750304178, "grad_norm": 1.0555546283721924, "learning_rate": 1.4716257330163316e-05, "loss": 0.276123046875, "step": 7740 }, { "epoch": 0.5232526700013519, "grad_norm": 0.7384349703788757, "learning_rate": 1.4712958397265133e-05, "loss": 0.19573974609375, "step": 7741 }, { "epoch": 0.523320264972286, "grad_norm": 0.9795055985450745, "learning_rate": 1.4709659478255817e-05, "loss": 0.2291259765625, "step": 7742 }, { "epoch": 0.5233878599432202, "grad_norm": 1.2611737251281738, "learning_rate": 1.4706360573294971e-05, "loss": 0.256866455078125, "step": 7743 }, { "epoch": 0.5234554549141544, "grad_norm": 1.3644130229949951, "learning_rate": 1.4703061682542232e-05, "loss": 0.2913818359375, "step": 7744 }, { "epoch": 0.5235230498850886, "grad_norm": 1.2534542083740234, "learning_rate": 1.4699762806157211e-05, "loss": 0.24102783203125, "step": 7745 }, { "epoch": 0.5235906448560227, "grad_norm": 0.8462126851081848, "learning_rate": 1.4696463944299543e-05, "loss": 0.1346282958984375, "step": 7746 }, { "epoch": 0.5236582398269569, "grad_norm": 1.9407817125320435, "learning_rate": 1.4693165097128825e-05, "loss": 0.278564453125, "step": 7747 }, { "epoch": 0.523725834797891, "grad_norm": 0.8256918787956238, "learning_rate": 1.4689866264804697e-05, "loss": 0.1533050537109375, "step": 7748 }, { "epoch": 0.5237934297688251, "grad_norm": 0.6384140849113464, "learning_rate": 1.4686567447486765e-05, "loss": 0.158782958984375, "step": 7749 }, { "epoch": 0.5238610247397594, "grad_norm": 0.6763937473297119, "learning_rate": 1.4683268645334644e-05, "loss": 0.12926483154296875, "step": 7750 }, { "epoch": 0.5239286197106935, "grad_norm": 1.0389384031295776, "learning_rate": 1.4679969858507964e-05, "loss": 0.2376708984375, "step": 7751 }, { "epoch": 0.5239962146816277, "grad_norm": 1.9373360872268677, "learning_rate": 1.4676671087166325e-05, "loss": 0.345245361328125, "step": 7752 }, { "epoch": 0.5240638096525618, "grad_norm": 1.204986810684204, "learning_rate": 1.4673372331469353e-05, "loss": 0.28326416015625, "step": 7753 }, { "epoch": 0.5241314046234961, "grad_norm": 1.1623536348342896, "learning_rate": 1.4670073591576657e-05, "loss": 0.194976806640625, "step": 7754 }, { "epoch": 0.5241989995944302, "grad_norm": 1.3703370094299316, "learning_rate": 1.4666774867647856e-05, "loss": 0.24493408203125, "step": 7755 }, { "epoch": 0.5242665945653643, "grad_norm": 1.159042239189148, "learning_rate": 1.4663476159842555e-05, "loss": 0.24114990234375, "step": 7756 }, { "epoch": 0.5243341895362985, "grad_norm": 2.171731472015381, "learning_rate": 1.4660177468320372e-05, "loss": 0.34796142578125, "step": 7757 }, { "epoch": 0.5244017845072326, "grad_norm": 2.0826101303100586, "learning_rate": 1.4656878793240916e-05, "loss": 0.269561767578125, "step": 7758 }, { "epoch": 0.5244693794781669, "grad_norm": 1.2356709241867065, "learning_rate": 1.4653580134763801e-05, "loss": 0.245880126953125, "step": 7759 }, { "epoch": 0.524536974449101, "grad_norm": 1.5334784984588623, "learning_rate": 1.4650281493048631e-05, "loss": 0.262054443359375, "step": 7760 }, { "epoch": 0.5246045694200352, "grad_norm": 1.6309752464294434, "learning_rate": 1.4646982868255015e-05, "loss": 0.2135009765625, "step": 7761 }, { "epoch": 0.5246721643909693, "grad_norm": 1.4939525127410889, "learning_rate": 1.464368426054257e-05, "loss": 0.281982421875, "step": 7762 }, { "epoch": 0.5247397593619034, "grad_norm": 1.2286574840545654, "learning_rate": 1.4640385670070887e-05, "loss": 0.310455322265625, "step": 7763 }, { "epoch": 0.5248073543328377, "grad_norm": 1.4909729957580566, "learning_rate": 1.4637087096999591e-05, "loss": 0.211212158203125, "step": 7764 }, { "epoch": 0.5248749493037718, "grad_norm": 1.1809351444244385, "learning_rate": 1.4633788541488272e-05, "loss": 0.275421142578125, "step": 7765 }, { "epoch": 0.524942544274706, "grad_norm": 1.8046070337295532, "learning_rate": 1.4630490003696542e-05, "loss": 0.35015869140625, "step": 7766 }, { "epoch": 0.5250101392456401, "grad_norm": 1.3730127811431885, "learning_rate": 1.4627191483784003e-05, "loss": 0.22161865234375, "step": 7767 }, { "epoch": 0.5250777342165743, "grad_norm": 1.351294994354248, "learning_rate": 1.4623892981910261e-05, "loss": 0.25396728515625, "step": 7768 }, { "epoch": 0.5251453291875084, "grad_norm": 1.7290279865264893, "learning_rate": 1.4620594498234912e-05, "loss": 0.25689697265625, "step": 7769 }, { "epoch": 0.5252129241584426, "grad_norm": 1.2389432191848755, "learning_rate": 1.4617296032917564e-05, "loss": 0.1912078857421875, "step": 7770 }, { "epoch": 0.5252805191293768, "grad_norm": 1.4386987686157227, "learning_rate": 1.4613997586117817e-05, "loss": 0.33544921875, "step": 7771 }, { "epoch": 0.5253481141003109, "grad_norm": 1.4551725387573242, "learning_rate": 1.4610699157995261e-05, "loss": 0.30511474609375, "step": 7772 }, { "epoch": 0.5254157090712451, "grad_norm": 1.4031991958618164, "learning_rate": 1.4607400748709502e-05, "loss": 0.2998809814453125, "step": 7773 }, { "epoch": 0.5254833040421792, "grad_norm": 2.1207587718963623, "learning_rate": 1.4604102358420133e-05, "loss": 0.3505859375, "step": 7774 }, { "epoch": 0.5255508990131135, "grad_norm": 1.1876975297927856, "learning_rate": 1.4600803987286764e-05, "loss": 0.20184326171875, "step": 7775 }, { "epoch": 0.5256184939840476, "grad_norm": 1.032962679862976, "learning_rate": 1.4597505635468971e-05, "loss": 0.2568359375, "step": 7776 }, { "epoch": 0.5256860889549817, "grad_norm": 1.348474144935608, "learning_rate": 1.4594207303126367e-05, "loss": 0.239837646484375, "step": 7777 }, { "epoch": 0.5257536839259159, "grad_norm": 1.9809271097183228, "learning_rate": 1.459090899041853e-05, "loss": 0.357696533203125, "step": 7778 }, { "epoch": 0.52582127889685, "grad_norm": 0.9058419466018677, "learning_rate": 1.4587610697505062e-05, "loss": 0.226806640625, "step": 7779 }, { "epoch": 0.5258888738677843, "grad_norm": 1.8521883487701416, "learning_rate": 1.458431242454555e-05, "loss": 0.2431793212890625, "step": 7780 }, { "epoch": 0.5259564688387184, "grad_norm": 1.5120019912719727, "learning_rate": 1.4581014171699597e-05, "loss": 0.3074951171875, "step": 7781 }, { "epoch": 0.5260240638096526, "grad_norm": 0.8079192638397217, "learning_rate": 1.4577715939126774e-05, "loss": 0.21917724609375, "step": 7782 }, { "epoch": 0.5260916587805867, "grad_norm": 1.4761782884597778, "learning_rate": 1.4574417726986687e-05, "loss": 0.1629486083984375, "step": 7783 }, { "epoch": 0.5261592537515208, "grad_norm": 1.2874126434326172, "learning_rate": 1.4571119535438918e-05, "loss": 0.247100830078125, "step": 7784 }, { "epoch": 0.5262268487224551, "grad_norm": 1.299676537513733, "learning_rate": 1.4567821364643048e-05, "loss": 0.272247314453125, "step": 7785 }, { "epoch": 0.5262944436933892, "grad_norm": 1.2608875036239624, "learning_rate": 1.456452321475867e-05, "loss": 0.291351318359375, "step": 7786 }, { "epoch": 0.5263620386643234, "grad_norm": 0.8012346625328064, "learning_rate": 1.4561225085945364e-05, "loss": 0.156402587890625, "step": 7787 }, { "epoch": 0.5264296336352575, "grad_norm": 1.206352949142456, "learning_rate": 1.4557926978362723e-05, "loss": 0.2742767333984375, "step": 7788 }, { "epoch": 0.5264972286061917, "grad_norm": 0.9199779033660889, "learning_rate": 1.455462889217032e-05, "loss": 0.234832763671875, "step": 7789 }, { "epoch": 0.5265648235771259, "grad_norm": 1.2491728067398071, "learning_rate": 1.4551330827527743e-05, "loss": 0.2640380859375, "step": 7790 }, { "epoch": 0.52663241854806, "grad_norm": 1.2068036794662476, "learning_rate": 1.4548032784594565e-05, "loss": 0.177215576171875, "step": 7791 }, { "epoch": 0.5267000135189942, "grad_norm": 1.3687736988067627, "learning_rate": 1.454473476353038e-05, "loss": 0.32025146484375, "step": 7792 }, { "epoch": 0.5267676084899283, "grad_norm": 1.0859571695327759, "learning_rate": 1.4541436764494753e-05, "loss": 0.18902587890625, "step": 7793 }, { "epoch": 0.5268352034608625, "grad_norm": 1.0710110664367676, "learning_rate": 1.4538138787647272e-05, "loss": 0.16448974609375, "step": 7794 }, { "epoch": 0.5269027984317967, "grad_norm": 1.169297456741333, "learning_rate": 1.4534840833147507e-05, "loss": 0.188262939453125, "step": 7795 }, { "epoch": 0.5269703934027309, "grad_norm": 1.5183579921722412, "learning_rate": 1.4531542901155032e-05, "loss": 0.30023193359375, "step": 7796 }, { "epoch": 0.527037988373665, "grad_norm": 1.2192468643188477, "learning_rate": 1.4528244991829431e-05, "loss": 0.33172607421875, "step": 7797 }, { "epoch": 0.5271055833445991, "grad_norm": 1.4835656881332397, "learning_rate": 1.4524947105330264e-05, "loss": 0.2000274658203125, "step": 7798 }, { "epoch": 0.5271731783155333, "grad_norm": 0.8835840821266174, "learning_rate": 1.4521649241817113e-05, "loss": 0.17517852783203125, "step": 7799 }, { "epoch": 0.5272407732864675, "grad_norm": 1.4445143938064575, "learning_rate": 1.4518351401449543e-05, "loss": 0.211456298828125, "step": 7800 }, { "epoch": 0.5273083682574017, "grad_norm": 0.8575953841209412, "learning_rate": 1.4515053584387137e-05, "loss": 0.1682281494140625, "step": 7801 }, { "epoch": 0.5273759632283358, "grad_norm": 1.0337464809417725, "learning_rate": 1.4511755790789445e-05, "loss": 0.24237060546875, "step": 7802 }, { "epoch": 0.52744355819927, "grad_norm": 0.9270486831665039, "learning_rate": 1.4508458020816048e-05, "loss": 0.2017669677734375, "step": 7803 }, { "epoch": 0.5275111531702041, "grad_norm": 1.2081133127212524, "learning_rate": 1.4505160274626505e-05, "loss": 0.311767578125, "step": 7804 }, { "epoch": 0.5275787481411383, "grad_norm": 1.5326941013336182, "learning_rate": 1.4501862552380394e-05, "loss": 0.2921142578125, "step": 7805 }, { "epoch": 0.5276463431120725, "grad_norm": 1.7272461652755737, "learning_rate": 1.4498564854237266e-05, "loss": 0.318817138671875, "step": 7806 }, { "epoch": 0.5277139380830066, "grad_norm": 1.5031468868255615, "learning_rate": 1.4495267180356685e-05, "loss": 0.314300537109375, "step": 7807 }, { "epoch": 0.5277815330539408, "grad_norm": 2.1026787757873535, "learning_rate": 1.4491969530898225e-05, "loss": 0.2586517333984375, "step": 7808 }, { "epoch": 0.5278491280248749, "grad_norm": 2.1376070976257324, "learning_rate": 1.448867190602143e-05, "loss": 0.261474609375, "step": 7809 }, { "epoch": 0.5279167229958092, "grad_norm": 1.0142985582351685, "learning_rate": 1.4485374305885875e-05, "loss": 0.13751220703125, "step": 7810 }, { "epoch": 0.5279843179667433, "grad_norm": 1.1419156789779663, "learning_rate": 1.4482076730651107e-05, "loss": 0.19012451171875, "step": 7811 }, { "epoch": 0.5280519129376774, "grad_norm": 0.986053466796875, "learning_rate": 1.4478779180476689e-05, "loss": 0.202972412109375, "step": 7812 }, { "epoch": 0.5281195079086116, "grad_norm": 1.1323615312576294, "learning_rate": 1.4475481655522173e-05, "loss": 0.239715576171875, "step": 7813 }, { "epoch": 0.5281871028795457, "grad_norm": 2.3612565994262695, "learning_rate": 1.4472184155947122e-05, "loss": 0.3507080078125, "step": 7814 }, { "epoch": 0.52825469785048, "grad_norm": 0.8398503065109253, "learning_rate": 1.4468886681911079e-05, "loss": 0.148773193359375, "step": 7815 }, { "epoch": 0.5283222928214141, "grad_norm": 1.758082389831543, "learning_rate": 1.4465589233573603e-05, "loss": 0.22003173828125, "step": 7816 }, { "epoch": 0.5283898877923483, "grad_norm": 2.396554946899414, "learning_rate": 1.4462291811094241e-05, "loss": 0.32753753662109375, "step": 7817 }, { "epoch": 0.5284574827632824, "grad_norm": 2.4176955223083496, "learning_rate": 1.4458994414632552e-05, "loss": 0.28314208984375, "step": 7818 }, { "epoch": 0.5285250777342165, "grad_norm": 0.933938205242157, "learning_rate": 1.4455697044348073e-05, "loss": 0.194091796875, "step": 7819 }, { "epoch": 0.5285926727051508, "grad_norm": 1.5471330881118774, "learning_rate": 1.4452399700400354e-05, "loss": 0.33270263671875, "step": 7820 }, { "epoch": 0.5286602676760849, "grad_norm": 1.2873470783233643, "learning_rate": 1.444910238294895e-05, "loss": 0.25457763671875, "step": 7821 }, { "epoch": 0.5287278626470191, "grad_norm": 1.397341012954712, "learning_rate": 1.444580509215339e-05, "loss": 0.2330474853515625, "step": 7822 }, { "epoch": 0.5287954576179532, "grad_norm": 1.4662734270095825, "learning_rate": 1.4442507828173235e-05, "loss": 0.33892822265625, "step": 7823 }, { "epoch": 0.5288630525888874, "grad_norm": 0.8632918000221252, "learning_rate": 1.443921059116801e-05, "loss": 0.25347900390625, "step": 7824 }, { "epoch": 0.5289306475598216, "grad_norm": 1.623748779296875, "learning_rate": 1.4435913381297269e-05, "loss": 0.2847900390625, "step": 7825 }, { "epoch": 0.5289982425307557, "grad_norm": 1.2131222486495972, "learning_rate": 1.4432616198720541e-05, "loss": 0.2758331298828125, "step": 7826 }, { "epoch": 0.5290658375016899, "grad_norm": 1.414566993713379, "learning_rate": 1.442931904359738e-05, "loss": 0.24432373046875, "step": 7827 }, { "epoch": 0.529133432472624, "grad_norm": 1.741155743598938, "learning_rate": 1.4426021916087303e-05, "loss": 0.3056640625, "step": 7828 }, { "epoch": 0.5292010274435582, "grad_norm": 1.1726018190383911, "learning_rate": 1.442272481634986e-05, "loss": 0.2906494140625, "step": 7829 }, { "epoch": 0.5292686224144924, "grad_norm": 0.8775549530982971, "learning_rate": 1.4419427744544583e-05, "loss": 0.15509033203125, "step": 7830 }, { "epoch": 0.5293362173854266, "grad_norm": 1.531343698501587, "learning_rate": 1.4416130700830996e-05, "loss": 0.21966552734375, "step": 7831 }, { "epoch": 0.5294038123563607, "grad_norm": 0.9113011956214905, "learning_rate": 1.441283368536864e-05, "loss": 0.32098388671875, "step": 7832 }, { "epoch": 0.5294714073272948, "grad_norm": 1.3924038410186768, "learning_rate": 1.4409536698317038e-05, "loss": 0.2184295654296875, "step": 7833 }, { "epoch": 0.529539002298229, "grad_norm": 1.1545178890228271, "learning_rate": 1.4406239739835731e-05, "loss": 0.2530364990234375, "step": 7834 }, { "epoch": 0.5296065972691631, "grad_norm": 1.5941600799560547, "learning_rate": 1.440294281008423e-05, "loss": 0.26165771484375, "step": 7835 }, { "epoch": 0.5296741922400974, "grad_norm": 1.589469313621521, "learning_rate": 1.439964590922207e-05, "loss": 0.300048828125, "step": 7836 }, { "epoch": 0.5297417872110315, "grad_norm": 1.34469473361969, "learning_rate": 1.4396349037408775e-05, "loss": 0.1790008544921875, "step": 7837 }, { "epoch": 0.5298093821819657, "grad_norm": 0.8830236196517944, "learning_rate": 1.4393052194803873e-05, "loss": 0.187225341796875, "step": 7838 }, { "epoch": 0.5298769771528998, "grad_norm": 0.6852948069572449, "learning_rate": 1.4389755381566873e-05, "loss": 0.1435089111328125, "step": 7839 }, { "epoch": 0.529944572123834, "grad_norm": 1.7698575258255005, "learning_rate": 1.438645859785731e-05, "loss": 0.2432098388671875, "step": 7840 }, { "epoch": 0.5300121670947682, "grad_norm": 0.8087881207466125, "learning_rate": 1.4383161843834693e-05, "loss": 0.193115234375, "step": 7841 }, { "epoch": 0.5300797620657023, "grad_norm": 1.2015395164489746, "learning_rate": 1.4379865119658535e-05, "loss": 0.265838623046875, "step": 7842 }, { "epoch": 0.5301473570366365, "grad_norm": 1.2974857091903687, "learning_rate": 1.4376568425488371e-05, "loss": 0.233245849609375, "step": 7843 }, { "epoch": 0.5302149520075706, "grad_norm": 1.0047990083694458, "learning_rate": 1.4373271761483694e-05, "loss": 0.27069091796875, "step": 7844 }, { "epoch": 0.5302825469785049, "grad_norm": 0.8664054870605469, "learning_rate": 1.436997512780403e-05, "loss": 0.190399169921875, "step": 7845 }, { "epoch": 0.530350141949439, "grad_norm": 1.4524554014205933, "learning_rate": 1.4366678524608886e-05, "loss": 0.274688720703125, "step": 7846 }, { "epoch": 0.5304177369203731, "grad_norm": 1.332747459411621, "learning_rate": 1.4363381952057779e-05, "loss": 0.29437255859375, "step": 7847 }, { "epoch": 0.5304853318913073, "grad_norm": 1.157206654548645, "learning_rate": 1.4360085410310206e-05, "loss": 0.2181396484375, "step": 7848 }, { "epoch": 0.5305529268622414, "grad_norm": 1.0167896747589111, "learning_rate": 1.4356788899525681e-05, "loss": 0.221221923828125, "step": 7849 }, { "epoch": 0.5306205218331757, "grad_norm": 1.562455415725708, "learning_rate": 1.4353492419863709e-05, "loss": 0.221832275390625, "step": 7850 }, { "epoch": 0.5306881168041098, "grad_norm": 1.0411378145217896, "learning_rate": 1.4350195971483802e-05, "loss": 0.150787353515625, "step": 7851 }, { "epoch": 0.5307557117750439, "grad_norm": 1.6026697158813477, "learning_rate": 1.4346899554545446e-05, "loss": 0.2921142578125, "step": 7852 }, { "epoch": 0.5308233067459781, "grad_norm": 0.7846418619155884, "learning_rate": 1.4343603169208154e-05, "loss": 0.09968185424804688, "step": 7853 }, { "epoch": 0.5308909017169122, "grad_norm": 1.8673464059829712, "learning_rate": 1.4340306815631427e-05, "loss": 0.283721923828125, "step": 7854 }, { "epoch": 0.5309584966878464, "grad_norm": 1.266628623008728, "learning_rate": 1.4337010493974752e-05, "loss": 0.26422119140625, "step": 7855 }, { "epoch": 0.5310260916587806, "grad_norm": 1.1007558107376099, "learning_rate": 1.433371420439764e-05, "loss": 0.183685302734375, "step": 7856 }, { "epoch": 0.5310936866297148, "grad_norm": 1.7719993591308594, "learning_rate": 1.433041794705957e-05, "loss": 0.22357177734375, "step": 7857 }, { "epoch": 0.5311612816006489, "grad_norm": 0.6953379511833191, "learning_rate": 1.4327121722120048e-05, "loss": 0.135772705078125, "step": 7858 }, { "epoch": 0.531228876571583, "grad_norm": 2.1400115489959717, "learning_rate": 1.4323825529738559e-05, "loss": 0.35589599609375, "step": 7859 }, { "epoch": 0.5312964715425172, "grad_norm": 1.8180023431777954, "learning_rate": 1.43205293700746e-05, "loss": 0.2440185546875, "step": 7860 }, { "epoch": 0.5313640665134514, "grad_norm": 1.6786408424377441, "learning_rate": 1.4317233243287654e-05, "loss": 0.27960205078125, "step": 7861 }, { "epoch": 0.5314316614843856, "grad_norm": 1.2997313737869263, "learning_rate": 1.4313937149537209e-05, "loss": 0.285888671875, "step": 7862 }, { "epoch": 0.5314992564553197, "grad_norm": 0.8534317016601562, "learning_rate": 1.4310641088982748e-05, "loss": 0.18315887451171875, "step": 7863 }, { "epoch": 0.5315668514262539, "grad_norm": 1.8899198770523071, "learning_rate": 1.4307345061783766e-05, "loss": 0.274078369140625, "step": 7864 }, { "epoch": 0.531634446397188, "grad_norm": 0.5760321021080017, "learning_rate": 1.4304049068099736e-05, "loss": 0.1175079345703125, "step": 7865 }, { "epoch": 0.5317020413681222, "grad_norm": 1.7390834093093872, "learning_rate": 1.4300753108090134e-05, "loss": 0.22503662109375, "step": 7866 }, { "epoch": 0.5317696363390564, "grad_norm": 0.8699038624763489, "learning_rate": 1.4297457181914453e-05, "loss": 0.192291259765625, "step": 7867 }, { "epoch": 0.5318372313099905, "grad_norm": 1.2961479425430298, "learning_rate": 1.4294161289732156e-05, "loss": 0.27313232421875, "step": 7868 }, { "epoch": 0.5319048262809247, "grad_norm": 2.3857192993164062, "learning_rate": 1.4290865431702732e-05, "loss": 0.3133544921875, "step": 7869 }, { "epoch": 0.5319724212518588, "grad_norm": 0.9031935334205627, "learning_rate": 1.428756960798564e-05, "loss": 0.236083984375, "step": 7870 }, { "epoch": 0.5320400162227931, "grad_norm": 1.0760146379470825, "learning_rate": 1.4284273818740366e-05, "loss": 0.20623779296875, "step": 7871 }, { "epoch": 0.5321076111937272, "grad_norm": 1.3833558559417725, "learning_rate": 1.4280978064126372e-05, "loss": 0.1995391845703125, "step": 7872 }, { "epoch": 0.5321752061646613, "grad_norm": 0.8464686870574951, "learning_rate": 1.4277682344303138e-05, "loss": 0.14990234375, "step": 7873 }, { "epoch": 0.5322428011355955, "grad_norm": 1.2103098630905151, "learning_rate": 1.4274386659430118e-05, "loss": 0.202911376953125, "step": 7874 }, { "epoch": 0.5323103961065296, "grad_norm": 1.6251814365386963, "learning_rate": 1.4271091009666786e-05, "loss": 0.214935302734375, "step": 7875 }, { "epoch": 0.5323779910774639, "grad_norm": 1.1874459981918335, "learning_rate": 1.42677953951726e-05, "loss": 0.309906005859375, "step": 7876 }, { "epoch": 0.532445586048398, "grad_norm": 1.0261900424957275, "learning_rate": 1.4264499816107035e-05, "loss": 0.240478515625, "step": 7877 }, { "epoch": 0.5325131810193322, "grad_norm": 1.133100986480713, "learning_rate": 1.4261204272629539e-05, "loss": 0.24993896484375, "step": 7878 }, { "epoch": 0.5325807759902663, "grad_norm": 1.0530227422714233, "learning_rate": 1.425790876489957e-05, "loss": 0.22222900390625, "step": 7879 }, { "epoch": 0.5326483709612004, "grad_norm": 1.101925253868103, "learning_rate": 1.42546132930766e-05, "loss": 0.243499755859375, "step": 7880 }, { "epoch": 0.5327159659321347, "grad_norm": 0.9875394701957703, "learning_rate": 1.4251317857320064e-05, "loss": 0.1896514892578125, "step": 7881 }, { "epoch": 0.5327835609030688, "grad_norm": 1.1622518301010132, "learning_rate": 1.424802245778943e-05, "loss": 0.2328338623046875, "step": 7882 }, { "epoch": 0.532851155874003, "grad_norm": 1.454123854637146, "learning_rate": 1.4244727094644144e-05, "loss": 0.292816162109375, "step": 7883 }, { "epoch": 0.5329187508449371, "grad_norm": 1.7179604768753052, "learning_rate": 1.4241431768043666e-05, "loss": 0.2939453125, "step": 7884 }, { "epoch": 0.5329863458158713, "grad_norm": 0.9569705128669739, "learning_rate": 1.4238136478147427e-05, "loss": 0.2316741943359375, "step": 7885 }, { "epoch": 0.5330539407868055, "grad_norm": 0.9523961544036865, "learning_rate": 1.4234841225114891e-05, "loss": 0.222991943359375, "step": 7886 }, { "epoch": 0.5331215357577396, "grad_norm": 1.1468769311904907, "learning_rate": 1.4231546009105489e-05, "loss": 0.248626708984375, "step": 7887 }, { "epoch": 0.5331891307286738, "grad_norm": 1.2679939270019531, "learning_rate": 1.4228250830278673e-05, "loss": 0.255645751953125, "step": 7888 }, { "epoch": 0.5332567256996079, "grad_norm": 0.9375802278518677, "learning_rate": 1.4224955688793885e-05, "loss": 0.1583709716796875, "step": 7889 }, { "epoch": 0.5333243206705421, "grad_norm": 0.9123002886772156, "learning_rate": 1.4221660584810553e-05, "loss": 0.1595458984375, "step": 7890 }, { "epoch": 0.5333919156414763, "grad_norm": 0.7640177011489868, "learning_rate": 1.4218365518488127e-05, "loss": 0.20013427734375, "step": 7891 }, { "epoch": 0.5334595106124105, "grad_norm": 1.3188138008117676, "learning_rate": 1.4215070489986034e-05, "loss": 0.222381591796875, "step": 7892 }, { "epoch": 0.5335271055833446, "grad_norm": 1.1574681997299194, "learning_rate": 1.4211775499463718e-05, "loss": 0.29656982421875, "step": 7893 }, { "epoch": 0.5335947005542787, "grad_norm": 1.508417010307312, "learning_rate": 1.4208480547080601e-05, "loss": 0.31048583984375, "step": 7894 }, { "epoch": 0.5336622955252129, "grad_norm": 1.7048425674438477, "learning_rate": 1.4205185632996117e-05, "loss": 0.2431488037109375, "step": 7895 }, { "epoch": 0.533729890496147, "grad_norm": 1.181604266166687, "learning_rate": 1.4201890757369695e-05, "loss": 0.26220703125, "step": 7896 }, { "epoch": 0.5337974854670813, "grad_norm": 1.1990958452224731, "learning_rate": 1.4198595920360769e-05, "loss": 0.1794891357421875, "step": 7897 }, { "epoch": 0.5338650804380154, "grad_norm": 1.0786116123199463, "learning_rate": 1.4195301122128746e-05, "loss": 0.2037811279296875, "step": 7898 }, { "epoch": 0.5339326754089496, "grad_norm": 2.0916783809661865, "learning_rate": 1.4192006362833065e-05, "loss": 0.32415771484375, "step": 7899 }, { "epoch": 0.5340002703798837, "grad_norm": 1.3146405220031738, "learning_rate": 1.4188711642633142e-05, "loss": 0.2454681396484375, "step": 7900 }, { "epoch": 0.5340678653508178, "grad_norm": 1.6256306171417236, "learning_rate": 1.418541696168839e-05, "loss": 0.33831787109375, "step": 7901 }, { "epoch": 0.5341354603217521, "grad_norm": 1.217002511024475, "learning_rate": 1.4182122320158238e-05, "loss": 0.27020263671875, "step": 7902 }, { "epoch": 0.5342030552926862, "grad_norm": 0.9487540125846863, "learning_rate": 1.4178827718202088e-05, "loss": 0.21405029296875, "step": 7903 }, { "epoch": 0.5342706502636204, "grad_norm": 0.5397583842277527, "learning_rate": 1.4175533155979363e-05, "loss": 0.063507080078125, "step": 7904 }, { "epoch": 0.5343382452345545, "grad_norm": 1.2115755081176758, "learning_rate": 1.4172238633649468e-05, "loss": 0.232147216796875, "step": 7905 }, { "epoch": 0.5344058402054888, "grad_norm": 1.1873496770858765, "learning_rate": 1.4168944151371821e-05, "loss": 0.258575439453125, "step": 7906 }, { "epoch": 0.5344734351764229, "grad_norm": 1.5417144298553467, "learning_rate": 1.416564970930582e-05, "loss": 0.229888916015625, "step": 7907 }, { "epoch": 0.534541030147357, "grad_norm": 1.4009559154510498, "learning_rate": 1.4162355307610876e-05, "loss": 0.267730712890625, "step": 7908 }, { "epoch": 0.5346086251182912, "grad_norm": 0.7566015124320984, "learning_rate": 1.4159060946446389e-05, "loss": 0.19406890869140625, "step": 7909 }, { "epoch": 0.5346762200892253, "grad_norm": 1.592915654182434, "learning_rate": 1.4155766625971769e-05, "loss": 0.3135986328125, "step": 7910 }, { "epoch": 0.5347438150601596, "grad_norm": 1.2726706266403198, "learning_rate": 1.4152472346346404e-05, "loss": 0.2833709716796875, "step": 7911 }, { "epoch": 0.5348114100310937, "grad_norm": 1.0433317422866821, "learning_rate": 1.41491781077297e-05, "loss": 0.1953125, "step": 7912 }, { "epoch": 0.5348790050020279, "grad_norm": 0.8584320545196533, "learning_rate": 1.4145883910281055e-05, "loss": 0.1623992919921875, "step": 7913 }, { "epoch": 0.534946599972962, "grad_norm": 1.5528749227523804, "learning_rate": 1.4142589754159851e-05, "loss": 0.1833648681640625, "step": 7914 }, { "epoch": 0.5350141949438961, "grad_norm": 1.1059845685958862, "learning_rate": 1.4139295639525494e-05, "loss": 0.28759765625, "step": 7915 }, { "epoch": 0.5350817899148304, "grad_norm": 1.0306165218353271, "learning_rate": 1.413600156653736e-05, "loss": 0.238525390625, "step": 7916 }, { "epoch": 0.5351493848857645, "grad_norm": 1.395236611366272, "learning_rate": 1.4132707535354845e-05, "loss": 0.26275634765625, "step": 7917 }, { "epoch": 0.5352169798566987, "grad_norm": 0.8028931617736816, "learning_rate": 1.4129413546137332e-05, "loss": 0.1694793701171875, "step": 7918 }, { "epoch": 0.5352845748276328, "grad_norm": 1.331748366355896, "learning_rate": 1.4126119599044212e-05, "loss": 0.2059478759765625, "step": 7919 }, { "epoch": 0.535352169798567, "grad_norm": 1.7018659114837646, "learning_rate": 1.4122825694234853e-05, "loss": 0.2906494140625, "step": 7920 }, { "epoch": 0.5354197647695011, "grad_norm": 1.5546858310699463, "learning_rate": 1.4119531831868646e-05, "loss": 0.31451416015625, "step": 7921 }, { "epoch": 0.5354873597404353, "grad_norm": 1.7511457204818726, "learning_rate": 1.4116238012104959e-05, "loss": 0.33551025390625, "step": 7922 }, { "epoch": 0.5355549547113695, "grad_norm": 1.0296939611434937, "learning_rate": 1.4112944235103182e-05, "loss": 0.11358642578125, "step": 7923 }, { "epoch": 0.5356225496823036, "grad_norm": 1.1526057720184326, "learning_rate": 1.4109650501022675e-05, "loss": 0.25152587890625, "step": 7924 }, { "epoch": 0.5356901446532378, "grad_norm": 0.8753486275672913, "learning_rate": 1.410635681002281e-05, "loss": 0.233642578125, "step": 7925 }, { "epoch": 0.535757739624172, "grad_norm": 1.0448989868164062, "learning_rate": 1.4103063162262967e-05, "loss": 0.2181243896484375, "step": 7926 }, { "epoch": 0.5358253345951062, "grad_norm": 1.0332064628601074, "learning_rate": 1.40997695579025e-05, "loss": 0.2735595703125, "step": 7927 }, { "epoch": 0.5358929295660403, "grad_norm": 0.8666203618049622, "learning_rate": 1.4096475997100785e-05, "loss": 0.2215118408203125, "step": 7928 }, { "epoch": 0.5359605245369744, "grad_norm": 0.8788991570472717, "learning_rate": 1.4093182480017176e-05, "loss": 0.200653076171875, "step": 7929 }, { "epoch": 0.5360281195079086, "grad_norm": 2.1526622772216797, "learning_rate": 1.4089889006811045e-05, "loss": 0.3472900390625, "step": 7930 }, { "epoch": 0.5360957144788427, "grad_norm": 1.1123450994491577, "learning_rate": 1.4086595577641736e-05, "loss": 0.1987152099609375, "step": 7931 }, { "epoch": 0.536163309449777, "grad_norm": 0.6183909177780151, "learning_rate": 1.408330219266862e-05, "loss": 0.14532470703125, "step": 7932 }, { "epoch": 0.5362309044207111, "grad_norm": 1.7000349760055542, "learning_rate": 1.4080008852051037e-05, "loss": 0.327423095703125, "step": 7933 }, { "epoch": 0.5362984993916453, "grad_norm": 0.8599895238876343, "learning_rate": 1.4076715555948353e-05, "loss": 0.20465087890625, "step": 7934 }, { "epoch": 0.5363660943625794, "grad_norm": 0.6820560693740845, "learning_rate": 1.4073422304519915e-05, "loss": 0.1529541015625, "step": 7935 }, { "epoch": 0.5364336893335135, "grad_norm": 1.523812174797058, "learning_rate": 1.4070129097925061e-05, "loss": 0.2845458984375, "step": 7936 }, { "epoch": 0.5365012843044478, "grad_norm": 1.5174890756607056, "learning_rate": 1.4066835936323147e-05, "loss": 0.2906494140625, "step": 7937 }, { "epoch": 0.5365688792753819, "grad_norm": 1.2480074167251587, "learning_rate": 1.406354281987351e-05, "loss": 0.299530029296875, "step": 7938 }, { "epoch": 0.5366364742463161, "grad_norm": 0.44139358401298523, "learning_rate": 1.4060249748735503e-05, "loss": 0.08493423461914062, "step": 7939 }, { "epoch": 0.5367040692172502, "grad_norm": 1.2790461778640747, "learning_rate": 1.4056956723068451e-05, "loss": 0.20233154296875, "step": 7940 }, { "epoch": 0.5367716641881844, "grad_norm": 1.1840884685516357, "learning_rate": 1.40536637430317e-05, "loss": 0.226318359375, "step": 7941 }, { "epoch": 0.5368392591591186, "grad_norm": 1.2451798915863037, "learning_rate": 1.405037080878458e-05, "loss": 0.217987060546875, "step": 7942 }, { "epoch": 0.5369068541300527, "grad_norm": 1.4431627988815308, "learning_rate": 1.4047077920486432e-05, "loss": 0.1571807861328125, "step": 7943 }, { "epoch": 0.5369744491009869, "grad_norm": 2.6087217330932617, "learning_rate": 1.4043785078296573e-05, "loss": 0.24847412109375, "step": 7944 }, { "epoch": 0.537042044071921, "grad_norm": 1.1309235095977783, "learning_rate": 1.4040492282374342e-05, "loss": 0.218780517578125, "step": 7945 }, { "epoch": 0.5371096390428552, "grad_norm": 1.5363503694534302, "learning_rate": 1.403719953287906e-05, "loss": 0.223907470703125, "step": 7946 }, { "epoch": 0.5371772340137894, "grad_norm": 1.5301913022994995, "learning_rate": 1.4033906829970056e-05, "loss": 0.30877685546875, "step": 7947 }, { "epoch": 0.5372448289847236, "grad_norm": 0.5951558351516724, "learning_rate": 1.403061417380665e-05, "loss": 0.19061279296875, "step": 7948 }, { "epoch": 0.5373124239556577, "grad_norm": 1.7554993629455566, "learning_rate": 1.402732156454815e-05, "loss": 0.31298828125, "step": 7949 }, { "epoch": 0.5373800189265918, "grad_norm": 1.5662777423858643, "learning_rate": 1.4024029002353887e-05, "loss": 0.28204345703125, "step": 7950 }, { "epoch": 0.537447613897526, "grad_norm": 1.0384987592697144, "learning_rate": 1.4020736487383166e-05, "loss": 0.315460205078125, "step": 7951 }, { "epoch": 0.5375152088684602, "grad_norm": 1.8878594636917114, "learning_rate": 1.401744401979531e-05, "loss": 0.3226318359375, "step": 7952 }, { "epoch": 0.5375828038393944, "grad_norm": 1.310099482536316, "learning_rate": 1.4014151599749618e-05, "loss": 0.2459716796875, "step": 7953 }, { "epoch": 0.5376503988103285, "grad_norm": 1.383310317993164, "learning_rate": 1.4010859227405405e-05, "loss": 0.28173828125, "step": 7954 }, { "epoch": 0.5377179937812627, "grad_norm": 1.1853880882263184, "learning_rate": 1.4007566902921968e-05, "loss": 0.280120849609375, "step": 7955 }, { "epoch": 0.5377855887521968, "grad_norm": 0.967471718788147, "learning_rate": 1.4004274626458627e-05, "loss": 0.2135772705078125, "step": 7956 }, { "epoch": 0.537853183723131, "grad_norm": 1.266091227531433, "learning_rate": 1.4000982398174663e-05, "loss": 0.26885986328125, "step": 7957 }, { "epoch": 0.5379207786940652, "grad_norm": 2.0801498889923096, "learning_rate": 1.3997690218229386e-05, "loss": 0.316558837890625, "step": 7958 }, { "epoch": 0.5379883736649993, "grad_norm": 0.9413555860519409, "learning_rate": 1.3994398086782095e-05, "loss": 0.14422607421875, "step": 7959 }, { "epoch": 0.5380559686359335, "grad_norm": 1.2323548793792725, "learning_rate": 1.399110600399207e-05, "loss": 0.247222900390625, "step": 7960 }, { "epoch": 0.5381235636068676, "grad_norm": 0.9919885396957397, "learning_rate": 1.3987813970018618e-05, "loss": 0.237060546875, "step": 7961 }, { "epoch": 0.5381911585778019, "grad_norm": 0.7144025564193726, "learning_rate": 1.3984521985021015e-05, "loss": 0.1424102783203125, "step": 7962 }, { "epoch": 0.538258753548736, "grad_norm": 1.903613805770874, "learning_rate": 1.3981230049158556e-05, "loss": 0.29241943359375, "step": 7963 }, { "epoch": 0.5383263485196701, "grad_norm": 1.1708096265792847, "learning_rate": 1.397793816259052e-05, "loss": 0.296661376953125, "step": 7964 }, { "epoch": 0.5383939434906043, "grad_norm": 0.8349901437759399, "learning_rate": 1.3974646325476197e-05, "loss": 0.17908477783203125, "step": 7965 }, { "epoch": 0.5384615384615384, "grad_norm": 1.955036997795105, "learning_rate": 1.3971354537974857e-05, "loss": 0.282318115234375, "step": 7966 }, { "epoch": 0.5385291334324727, "grad_norm": 2.2513880729675293, "learning_rate": 1.3968062800245783e-05, "loss": 0.2208251953125, "step": 7967 }, { "epoch": 0.5385967284034068, "grad_norm": 0.6477939486503601, "learning_rate": 1.3964771112448246e-05, "loss": 0.1104278564453125, "step": 7968 }, { "epoch": 0.538664323374341, "grad_norm": 2.0277135372161865, "learning_rate": 1.3961479474741527e-05, "loss": 0.34466552734375, "step": 7969 }, { "epoch": 0.5387319183452751, "grad_norm": 1.5055372714996338, "learning_rate": 1.3958187887284885e-05, "loss": 0.26275634765625, "step": 7970 }, { "epoch": 0.5387995133162092, "grad_norm": 1.2388204336166382, "learning_rate": 1.3954896350237592e-05, "loss": 0.18476104736328125, "step": 7971 }, { "epoch": 0.5388671082871435, "grad_norm": 1.184699296951294, "learning_rate": 1.3951604863758915e-05, "loss": 0.2208251953125, "step": 7972 }, { "epoch": 0.5389347032580776, "grad_norm": 1.343645691871643, "learning_rate": 1.3948313428008112e-05, "loss": 0.247283935546875, "step": 7973 }, { "epoch": 0.5390022982290118, "grad_norm": 1.0364760160446167, "learning_rate": 1.3945022043144446e-05, "loss": 0.27001953125, "step": 7974 }, { "epoch": 0.5390698931999459, "grad_norm": 0.968031644821167, "learning_rate": 1.3941730709327173e-05, "loss": 0.2098541259765625, "step": 7975 }, { "epoch": 0.5391374881708801, "grad_norm": 1.018213152885437, "learning_rate": 1.3938439426715556e-05, "loss": 0.17218017578125, "step": 7976 }, { "epoch": 0.5392050831418143, "grad_norm": 0.8351171612739563, "learning_rate": 1.3935148195468834e-05, "loss": 0.1333160400390625, "step": 7977 }, { "epoch": 0.5392726781127484, "grad_norm": 1.2100112438201904, "learning_rate": 1.3931857015746272e-05, "loss": 0.278778076171875, "step": 7978 }, { "epoch": 0.5393402730836826, "grad_norm": 1.6310335397720337, "learning_rate": 1.3928565887707102e-05, "loss": 0.2641143798828125, "step": 7979 }, { "epoch": 0.5394078680546167, "grad_norm": 1.0781402587890625, "learning_rate": 1.3925274811510584e-05, "loss": 0.2084197998046875, "step": 7980 }, { "epoch": 0.5394754630255509, "grad_norm": 1.3776189088821411, "learning_rate": 1.3921983787315947e-05, "loss": 0.28509521484375, "step": 7981 }, { "epoch": 0.539543057996485, "grad_norm": 0.5900130271911621, "learning_rate": 1.3918692815282448e-05, "loss": 0.07135772705078125, "step": 7982 }, { "epoch": 0.5396106529674192, "grad_norm": 0.9707421660423279, "learning_rate": 1.391540189556931e-05, "loss": 0.13053512573242188, "step": 7983 }, { "epoch": 0.5396782479383534, "grad_norm": 0.9765785336494446, "learning_rate": 1.3912111028335771e-05, "loss": 0.154998779296875, "step": 7984 }, { "epoch": 0.5397458429092875, "grad_norm": 1.2684165239334106, "learning_rate": 1.3908820213741074e-05, "loss": 0.228729248046875, "step": 7985 }, { "epoch": 0.5398134378802217, "grad_norm": 1.506712794303894, "learning_rate": 1.3905529451944433e-05, "loss": 0.2135009765625, "step": 7986 }, { "epoch": 0.5398810328511559, "grad_norm": 1.110841155052185, "learning_rate": 1.3902238743105087e-05, "loss": 0.2423095703125, "step": 7987 }, { "epoch": 0.5399486278220901, "grad_norm": 0.9245505928993225, "learning_rate": 1.3898948087382255e-05, "loss": 0.227569580078125, "step": 7988 }, { "epoch": 0.5400162227930242, "grad_norm": 0.6725277304649353, "learning_rate": 1.3895657484935167e-05, "loss": 0.0891876220703125, "step": 7989 }, { "epoch": 0.5400838177639583, "grad_norm": 1.065959095954895, "learning_rate": 1.389236693592303e-05, "loss": 0.193328857421875, "step": 7990 }, { "epoch": 0.5401514127348925, "grad_norm": 1.6420694589614868, "learning_rate": 1.3889076440505078e-05, "loss": 0.265625, "step": 7991 }, { "epoch": 0.5402190077058266, "grad_norm": 0.9375958442687988, "learning_rate": 1.3885785998840505e-05, "loss": 0.1943817138671875, "step": 7992 }, { "epoch": 0.5402866026767609, "grad_norm": 1.2977516651153564, "learning_rate": 1.3882495611088546e-05, "loss": 0.28460693359375, "step": 7993 }, { "epoch": 0.540354197647695, "grad_norm": 2.568434476852417, "learning_rate": 1.3879205277408399e-05, "loss": 0.29486083984375, "step": 7994 }, { "epoch": 0.5404217926186292, "grad_norm": 0.9934461116790771, "learning_rate": 1.3875914997959264e-05, "loss": 0.24676513671875, "step": 7995 }, { "epoch": 0.5404893875895633, "grad_norm": 1.535360336303711, "learning_rate": 1.3872624772900353e-05, "loss": 0.241302490234375, "step": 7996 }, { "epoch": 0.5405569825604974, "grad_norm": 0.5169686675071716, "learning_rate": 1.3869334602390866e-05, "loss": 0.109832763671875, "step": 7997 }, { "epoch": 0.5406245775314317, "grad_norm": 1.28365159034729, "learning_rate": 1.3866044486590009e-05, "loss": 0.248809814453125, "step": 7998 }, { "epoch": 0.5406921725023658, "grad_norm": 0.5610086917877197, "learning_rate": 1.3862754425656963e-05, "loss": 0.1472015380859375, "step": 7999 }, { "epoch": 0.5407597674733, "grad_norm": 1.1506096124649048, "learning_rate": 1.3859464419750936e-05, "loss": 0.1973876953125, "step": 8000 }, { "epoch": 0.5408273624442341, "grad_norm": 0.7153785228729248, "learning_rate": 1.3856174469031108e-05, "loss": 0.10860824584960938, "step": 8001 }, { "epoch": 0.5408949574151684, "grad_norm": 1.2314705848693848, "learning_rate": 1.385288457365668e-05, "loss": 0.215728759765625, "step": 8002 }, { "epoch": 0.5409625523861025, "grad_norm": 1.2243887186050415, "learning_rate": 1.3849594733786825e-05, "loss": 0.17723846435546875, "step": 8003 }, { "epoch": 0.5410301473570366, "grad_norm": 1.1302075386047363, "learning_rate": 1.3846304949580733e-05, "loss": 0.250335693359375, "step": 8004 }, { "epoch": 0.5410977423279708, "grad_norm": 2.403008222579956, "learning_rate": 1.3843015221197586e-05, "loss": 0.299346923828125, "step": 8005 }, { "epoch": 0.5411653372989049, "grad_norm": 1.7823108434677124, "learning_rate": 1.383972554879655e-05, "loss": 0.25655364990234375, "step": 8006 }, { "epoch": 0.5412329322698392, "grad_norm": 1.352574110031128, "learning_rate": 1.3836435932536816e-05, "loss": 0.268096923828125, "step": 8007 }, { "epoch": 0.5413005272407733, "grad_norm": 1.4107146263122559, "learning_rate": 1.3833146372577539e-05, "loss": 0.2900390625, "step": 8008 }, { "epoch": 0.5413681222117075, "grad_norm": 1.151641845703125, "learning_rate": 1.3829856869077902e-05, "loss": 0.232666015625, "step": 8009 }, { "epoch": 0.5414357171826416, "grad_norm": 1.1122643947601318, "learning_rate": 1.3826567422197063e-05, "loss": 0.201568603515625, "step": 8010 }, { "epoch": 0.5415033121535757, "grad_norm": 1.5905275344848633, "learning_rate": 1.3823278032094195e-05, "loss": 0.2498779296875, "step": 8011 }, { "epoch": 0.54157090712451, "grad_norm": 0.9327800869941711, "learning_rate": 1.381998869892845e-05, "loss": 0.14727783203125, "step": 8012 }, { "epoch": 0.5416385020954441, "grad_norm": 1.1178719997406006, "learning_rate": 1.3816699422858991e-05, "loss": 0.24127197265625, "step": 8013 }, { "epoch": 0.5417060970663783, "grad_norm": 2.1453683376312256, "learning_rate": 1.3813410204044971e-05, "loss": 0.274658203125, "step": 8014 }, { "epoch": 0.5417736920373124, "grad_norm": 1.34963858127594, "learning_rate": 1.381012104264555e-05, "loss": 0.30303955078125, "step": 8015 }, { "epoch": 0.5418412870082466, "grad_norm": 1.8386837244033813, "learning_rate": 1.3806831938819868e-05, "loss": 0.263427734375, "step": 8016 }, { "epoch": 0.5419088819791807, "grad_norm": 1.4393048286437988, "learning_rate": 1.3803542892727079e-05, "loss": 0.26666259765625, "step": 8017 }, { "epoch": 0.5419764769501149, "grad_norm": 1.4335166215896606, "learning_rate": 1.3800253904526329e-05, "loss": 0.179168701171875, "step": 8018 }, { "epoch": 0.5420440719210491, "grad_norm": 1.040583848953247, "learning_rate": 1.379696497437675e-05, "loss": 0.1826629638671875, "step": 8019 }, { "epoch": 0.5421116668919832, "grad_norm": 1.5250012874603271, "learning_rate": 1.3793676102437489e-05, "loss": 0.24443817138671875, "step": 8020 }, { "epoch": 0.5421792618629174, "grad_norm": 1.1029361486434937, "learning_rate": 1.3790387288867678e-05, "loss": 0.2263946533203125, "step": 8021 }, { "epoch": 0.5422468568338515, "grad_norm": 1.7083797454833984, "learning_rate": 1.3787098533826459e-05, "loss": 0.286224365234375, "step": 8022 }, { "epoch": 0.5423144518047858, "grad_norm": 1.0425639152526855, "learning_rate": 1.3783809837472949e-05, "loss": 0.23681640625, "step": 8023 }, { "epoch": 0.5423820467757199, "grad_norm": 1.6896783113479614, "learning_rate": 1.3780521199966288e-05, "loss": 0.3149261474609375, "step": 8024 }, { "epoch": 0.542449641746654, "grad_norm": 1.5729494094848633, "learning_rate": 1.3777232621465592e-05, "loss": 0.253936767578125, "step": 8025 }, { "epoch": 0.5425172367175882, "grad_norm": 1.1428931951522827, "learning_rate": 1.3773944102129985e-05, "loss": 0.313323974609375, "step": 8026 }, { "epoch": 0.5425848316885223, "grad_norm": 1.2509658336639404, "learning_rate": 1.3770655642118588e-05, "loss": 0.26373291015625, "step": 8027 }, { "epoch": 0.5426524266594566, "grad_norm": 1.4338802099227905, "learning_rate": 1.376736724159052e-05, "loss": 0.344940185546875, "step": 8028 }, { "epoch": 0.5427200216303907, "grad_norm": 0.9450979232788086, "learning_rate": 1.3764078900704889e-05, "loss": 0.263641357421875, "step": 8029 }, { "epoch": 0.5427876166013249, "grad_norm": 2.537975788116455, "learning_rate": 1.3760790619620803e-05, "loss": 0.34283447265625, "step": 8030 }, { "epoch": 0.542855211572259, "grad_norm": 1.7170641422271729, "learning_rate": 1.3757502398497379e-05, "loss": 0.3046875, "step": 8031 }, { "epoch": 0.5429228065431931, "grad_norm": 0.8749875426292419, "learning_rate": 1.375421423749371e-05, "loss": 0.14288330078125, "step": 8032 }, { "epoch": 0.5429904015141274, "grad_norm": 0.7990261316299438, "learning_rate": 1.3750926136768906e-05, "loss": 0.18829345703125, "step": 8033 }, { "epoch": 0.5430579964850615, "grad_norm": 1.0147688388824463, "learning_rate": 1.3747638096482063e-05, "loss": 0.22271728515625, "step": 8034 }, { "epoch": 0.5431255914559957, "grad_norm": 1.1977167129516602, "learning_rate": 1.3744350116792282e-05, "loss": 0.2099151611328125, "step": 8035 }, { "epoch": 0.5431931864269298, "grad_norm": 2.0559589862823486, "learning_rate": 1.3741062197858644e-05, "loss": 0.33526611328125, "step": 8036 }, { "epoch": 0.543260781397864, "grad_norm": 1.2613266706466675, "learning_rate": 1.3737774339840253e-05, "loss": 0.280029296875, "step": 8037 }, { "epoch": 0.5433283763687982, "grad_norm": 1.4212558269500732, "learning_rate": 1.373448654289618e-05, "loss": 0.29119873046875, "step": 8038 }, { "epoch": 0.5433959713397323, "grad_norm": 1.6291600465774536, "learning_rate": 1.3731198807185527e-05, "loss": 0.2491455078125, "step": 8039 }, { "epoch": 0.5434635663106665, "grad_norm": 1.3951420783996582, "learning_rate": 1.3727911132867368e-05, "loss": 0.20697021484375, "step": 8040 }, { "epoch": 0.5435311612816006, "grad_norm": 2.368360996246338, "learning_rate": 1.372462352010077e-05, "loss": 0.34600830078125, "step": 8041 }, { "epoch": 0.5435987562525348, "grad_norm": 1.439077615737915, "learning_rate": 1.3721335969044824e-05, "loss": 0.25531005859375, "step": 8042 }, { "epoch": 0.543666351223469, "grad_norm": 1.3922511339187622, "learning_rate": 1.371804847985859e-05, "loss": 0.301025390625, "step": 8043 }, { "epoch": 0.5437339461944032, "grad_norm": 1.0040074586868286, "learning_rate": 1.3714761052701151e-05, "loss": 0.2203521728515625, "step": 8044 }, { "epoch": 0.5438015411653373, "grad_norm": 1.1126461029052734, "learning_rate": 1.3711473687731558e-05, "loss": 0.18597412109375, "step": 8045 }, { "epoch": 0.5438691361362714, "grad_norm": 1.2051951885223389, "learning_rate": 1.3708186385108885e-05, "loss": 0.31201171875, "step": 8046 }, { "epoch": 0.5439367311072056, "grad_norm": 1.3379428386688232, "learning_rate": 1.3704899144992185e-05, "loss": 0.266571044921875, "step": 8047 }, { "epoch": 0.5440043260781398, "grad_norm": 0.7326334118843079, "learning_rate": 1.3701611967540524e-05, "loss": 0.206573486328125, "step": 8048 }, { "epoch": 0.544071921049074, "grad_norm": 1.0861599445343018, "learning_rate": 1.3698324852912945e-05, "loss": 0.25921630859375, "step": 8049 }, { "epoch": 0.5441395160200081, "grad_norm": 1.2182047367095947, "learning_rate": 1.3695037801268507e-05, "loss": 0.30059814453125, "step": 8050 }, { "epoch": 0.5442071109909423, "grad_norm": 1.8168199062347412, "learning_rate": 1.3691750812766255e-05, "loss": 0.256134033203125, "step": 8051 }, { "epoch": 0.5442747059618764, "grad_norm": 1.3600133657455444, "learning_rate": 1.3688463887565239e-05, "loss": 0.26727294921875, "step": 8052 }, { "epoch": 0.5443423009328106, "grad_norm": 1.1456588506698608, "learning_rate": 1.3685177025824496e-05, "loss": 0.250030517578125, "step": 8053 }, { "epoch": 0.5444098959037448, "grad_norm": 1.541342854499817, "learning_rate": 1.368189022770306e-05, "loss": 0.241241455078125, "step": 8054 }, { "epoch": 0.5444774908746789, "grad_norm": 0.9832486510276794, "learning_rate": 1.3678603493359974e-05, "loss": 0.1946868896484375, "step": 8055 }, { "epoch": 0.5445450858456131, "grad_norm": 1.1316670179367065, "learning_rate": 1.3675316822954267e-05, "loss": 0.1569976806640625, "step": 8056 }, { "epoch": 0.5446126808165472, "grad_norm": 1.738877534866333, "learning_rate": 1.3672030216644976e-05, "loss": 0.253631591796875, "step": 8057 }, { "epoch": 0.5446802757874815, "grad_norm": 1.034520149230957, "learning_rate": 1.3668743674591115e-05, "loss": 0.22210693359375, "step": 8058 }, { "epoch": 0.5447478707584156, "grad_norm": 0.893616795539856, "learning_rate": 1.3665457196951717e-05, "loss": 0.1776275634765625, "step": 8059 }, { "epoch": 0.5448154657293497, "grad_norm": 1.016859531402588, "learning_rate": 1.3662170783885796e-05, "loss": 0.218017578125, "step": 8060 }, { "epoch": 0.5448830607002839, "grad_norm": 1.319494605064392, "learning_rate": 1.365888443555238e-05, "loss": 0.3255615234375, "step": 8061 }, { "epoch": 0.544950655671218, "grad_norm": 1.0872091054916382, "learning_rate": 1.3655598152110468e-05, "loss": 0.244903564453125, "step": 8062 }, { "epoch": 0.5450182506421523, "grad_norm": 1.0290552377700806, "learning_rate": 1.365231193371908e-05, "loss": 0.234893798828125, "step": 8063 }, { "epoch": 0.5450858456130864, "grad_norm": 1.8164992332458496, "learning_rate": 1.3649025780537228e-05, "loss": 0.313812255859375, "step": 8064 }, { "epoch": 0.5451534405840206, "grad_norm": 1.6586920022964478, "learning_rate": 1.3645739692723902e-05, "loss": 0.31640625, "step": 8065 }, { "epoch": 0.5452210355549547, "grad_norm": 1.3359872102737427, "learning_rate": 1.3642453670438114e-05, "loss": 0.226226806640625, "step": 8066 }, { "epoch": 0.5452886305258888, "grad_norm": 1.6189866065979004, "learning_rate": 1.3639167713838859e-05, "loss": 0.2557373046875, "step": 8067 }, { "epoch": 0.545356225496823, "grad_norm": 2.015949010848999, "learning_rate": 1.3635881823085138e-05, "loss": 0.297149658203125, "step": 8068 }, { "epoch": 0.5454238204677572, "grad_norm": 1.7351127862930298, "learning_rate": 1.3632595998335932e-05, "loss": 0.17999267578125, "step": 8069 }, { "epoch": 0.5454914154386914, "grad_norm": 1.3911687135696411, "learning_rate": 1.362931023975024e-05, "loss": 0.243804931640625, "step": 8070 }, { "epoch": 0.5455590104096255, "grad_norm": 2.399744987487793, "learning_rate": 1.362602454748704e-05, "loss": 0.31097412109375, "step": 8071 }, { "epoch": 0.5456266053805597, "grad_norm": 1.3157846927642822, "learning_rate": 1.3622738921705317e-05, "loss": 0.302978515625, "step": 8072 }, { "epoch": 0.5456942003514939, "grad_norm": 1.5505985021591187, "learning_rate": 1.3619453362564048e-05, "loss": 0.282501220703125, "step": 8073 }, { "epoch": 0.545761795322428, "grad_norm": 1.1901357173919678, "learning_rate": 1.3616167870222217e-05, "loss": 0.2467041015625, "step": 8074 }, { "epoch": 0.5458293902933622, "grad_norm": 1.0507850646972656, "learning_rate": 1.3612882444838787e-05, "loss": 0.2801513671875, "step": 8075 }, { "epoch": 0.5458969852642963, "grad_norm": 1.3088116645812988, "learning_rate": 1.360959708657273e-05, "loss": 0.237762451171875, "step": 8076 }, { "epoch": 0.5459645802352305, "grad_norm": 1.2520939111709595, "learning_rate": 1.3606311795583016e-05, "loss": 0.288330078125, "step": 8077 }, { "epoch": 0.5460321752061646, "grad_norm": 1.546011209487915, "learning_rate": 1.36030265720286e-05, "loss": 0.204345703125, "step": 8078 }, { "epoch": 0.5460997701770989, "grad_norm": 1.5140557289123535, "learning_rate": 1.3599741416068449e-05, "loss": 0.2730712890625, "step": 8079 }, { "epoch": 0.546167365148033, "grad_norm": 1.760534644126892, "learning_rate": 1.3596456327861513e-05, "loss": 0.200592041015625, "step": 8080 }, { "epoch": 0.5462349601189671, "grad_norm": 2.0211973190307617, "learning_rate": 1.3593171307566755e-05, "loss": 0.33856201171875, "step": 8081 }, { "epoch": 0.5463025550899013, "grad_norm": 1.2710479497909546, "learning_rate": 1.358988635534311e-05, "loss": 0.279632568359375, "step": 8082 }, { "epoch": 0.5463701500608354, "grad_norm": 1.1991214752197266, "learning_rate": 1.3586601471349541e-05, "loss": 0.21319580078125, "step": 8083 }, { "epoch": 0.5464377450317697, "grad_norm": 0.8817214369773865, "learning_rate": 1.3583316655744976e-05, "loss": 0.182159423828125, "step": 8084 }, { "epoch": 0.5465053400027038, "grad_norm": 0.9623850584030151, "learning_rate": 1.3580031908688364e-05, "loss": 0.202178955078125, "step": 8085 }, { "epoch": 0.546572934973638, "grad_norm": 1.4973255395889282, "learning_rate": 1.3576747230338635e-05, "loss": 0.2662353515625, "step": 8086 }, { "epoch": 0.5466405299445721, "grad_norm": 1.3770089149475098, "learning_rate": 1.3573462620854734e-05, "loss": 0.2019500732421875, "step": 8087 }, { "epoch": 0.5467081249155062, "grad_norm": 1.5336226224899292, "learning_rate": 1.3570178080395579e-05, "loss": 0.29095458984375, "step": 8088 }, { "epoch": 0.5467757198864405, "grad_norm": 1.3587173223495483, "learning_rate": 1.3566893609120098e-05, "loss": 0.26171875, "step": 8089 }, { "epoch": 0.5468433148573746, "grad_norm": 1.009297251701355, "learning_rate": 1.3563609207187221e-05, "loss": 0.1367340087890625, "step": 8090 }, { "epoch": 0.5469109098283088, "grad_norm": 1.185014009475708, "learning_rate": 1.356032487475586e-05, "loss": 0.27374267578125, "step": 8091 }, { "epoch": 0.5469785047992429, "grad_norm": 1.672408103942871, "learning_rate": 1.3557040611984937e-05, "loss": 0.31011962890625, "step": 8092 }, { "epoch": 0.5470460997701772, "grad_norm": 0.9842954277992249, "learning_rate": 1.3553756419033357e-05, "loss": 0.214874267578125, "step": 8093 }, { "epoch": 0.5471136947411113, "grad_norm": 1.7832484245300293, "learning_rate": 1.3550472296060045e-05, "loss": 0.2427825927734375, "step": 8094 }, { "epoch": 0.5471812897120454, "grad_norm": 1.1680430173873901, "learning_rate": 1.3547188243223891e-05, "loss": 0.259429931640625, "step": 8095 }, { "epoch": 0.5472488846829796, "grad_norm": 1.5247184038162231, "learning_rate": 1.3543904260683807e-05, "loss": 0.283447265625, "step": 8096 }, { "epoch": 0.5473164796539137, "grad_norm": 1.5493834018707275, "learning_rate": 1.3540620348598688e-05, "loss": 0.247344970703125, "step": 8097 }, { "epoch": 0.547384074624848, "grad_norm": 1.047298789024353, "learning_rate": 1.3537336507127438e-05, "loss": 0.216033935546875, "step": 8098 }, { "epoch": 0.5474516695957821, "grad_norm": 1.2739344835281372, "learning_rate": 1.3534052736428944e-05, "loss": 0.238189697265625, "step": 8099 }, { "epoch": 0.5475192645667163, "grad_norm": 0.8239198327064514, "learning_rate": 1.353076903666209e-05, "loss": 0.12030029296875, "step": 8100 }, { "epoch": 0.5475868595376504, "grad_norm": 0.9119490385055542, "learning_rate": 1.3527485407985768e-05, "loss": 0.151458740234375, "step": 8101 }, { "epoch": 0.5476544545085845, "grad_norm": 1.3340752124786377, "learning_rate": 1.3524201850558857e-05, "loss": 0.28118896484375, "step": 8102 }, { "epoch": 0.5477220494795187, "grad_norm": 1.5629185438156128, "learning_rate": 1.3520918364540247e-05, "loss": 0.2615966796875, "step": 8103 }, { "epoch": 0.5477896444504529, "grad_norm": 1.0039186477661133, "learning_rate": 1.3517634950088795e-05, "loss": 0.18853759765625, "step": 8104 }, { "epoch": 0.5478572394213871, "grad_norm": 1.6799684762954712, "learning_rate": 1.3514351607363389e-05, "loss": 0.314422607421875, "step": 8105 }, { "epoch": 0.5479248343923212, "grad_norm": 0.9741514921188354, "learning_rate": 1.3511068336522886e-05, "loss": 0.2212982177734375, "step": 8106 }, { "epoch": 0.5479924293632554, "grad_norm": 1.0301291942596436, "learning_rate": 1.3507785137726163e-05, "loss": 0.183441162109375, "step": 8107 }, { "epoch": 0.5480600243341895, "grad_norm": 2.4993817806243896, "learning_rate": 1.350450201113207e-05, "loss": 0.3399658203125, "step": 8108 }, { "epoch": 0.5481276193051237, "grad_norm": 1.1744601726531982, "learning_rate": 1.3501218956899473e-05, "loss": 0.1552734375, "step": 8109 }, { "epoch": 0.5481952142760579, "grad_norm": 2.0983314514160156, "learning_rate": 1.3497935975187225e-05, "loss": 0.34600830078125, "step": 8110 }, { "epoch": 0.548262809246992, "grad_norm": 0.8720482587814331, "learning_rate": 1.3494653066154172e-05, "loss": 0.1742706298828125, "step": 8111 }, { "epoch": 0.5483304042179262, "grad_norm": 1.8811404705047607, "learning_rate": 1.3491370229959168e-05, "loss": 0.269989013671875, "step": 8112 }, { "epoch": 0.5483979991888603, "grad_norm": 0.82491135597229, "learning_rate": 1.3488087466761051e-05, "loss": 0.16474151611328125, "step": 8113 }, { "epoch": 0.5484655941597945, "grad_norm": 1.2112926244735718, "learning_rate": 1.348480477671867e-05, "loss": 0.21734619140625, "step": 8114 }, { "epoch": 0.5485331891307287, "grad_norm": 1.1100984811782837, "learning_rate": 1.3481522159990852e-05, "loss": 0.1739349365234375, "step": 8115 }, { "epoch": 0.5486007841016628, "grad_norm": 1.8375858068466187, "learning_rate": 1.3478239616736441e-05, "loss": 0.207733154296875, "step": 8116 }, { "epoch": 0.548668379072597, "grad_norm": 1.4064050912857056, "learning_rate": 1.3474957147114254e-05, "loss": 0.259613037109375, "step": 8117 }, { "epoch": 0.5487359740435311, "grad_norm": 1.2855623960494995, "learning_rate": 1.3471674751283127e-05, "loss": 0.3082275390625, "step": 8118 }, { "epoch": 0.5488035690144654, "grad_norm": 1.4027775526046753, "learning_rate": 1.3468392429401878e-05, "loss": 0.29302978515625, "step": 8119 }, { "epoch": 0.5488711639853995, "grad_norm": 1.693394422531128, "learning_rate": 1.3465110181629334e-05, "loss": 0.28326416015625, "step": 8120 }, { "epoch": 0.5489387589563336, "grad_norm": 1.4725314378738403, "learning_rate": 1.34618280081243e-05, "loss": 0.32513427734375, "step": 8121 }, { "epoch": 0.5490063539272678, "grad_norm": 1.291115641593933, "learning_rate": 1.3458545909045596e-05, "loss": 0.219451904296875, "step": 8122 }, { "epoch": 0.5490739488982019, "grad_norm": 0.7335776090621948, "learning_rate": 1.3455263884552028e-05, "loss": 0.14447021484375, "step": 8123 }, { "epoch": 0.5491415438691362, "grad_norm": 1.0966538190841675, "learning_rate": 1.3451981934802395e-05, "loss": 0.2792205810546875, "step": 8124 }, { "epoch": 0.5492091388400703, "grad_norm": 1.403502345085144, "learning_rate": 1.3448700059955507e-05, "loss": 0.22503662109375, "step": 8125 }, { "epoch": 0.5492767338110045, "grad_norm": 1.212248682975769, "learning_rate": 1.3445418260170152e-05, "loss": 0.23876953125, "step": 8126 }, { "epoch": 0.5493443287819386, "grad_norm": 1.1857913732528687, "learning_rate": 1.3442136535605138e-05, "loss": 0.309539794921875, "step": 8127 }, { "epoch": 0.5494119237528727, "grad_norm": 1.2005611658096313, "learning_rate": 1.343885488641924e-05, "loss": 0.2259521484375, "step": 8128 }, { "epoch": 0.549479518723807, "grad_norm": 0.9084820747375488, "learning_rate": 1.3435573312771254e-05, "loss": 0.1731548309326172, "step": 8129 }, { "epoch": 0.5495471136947411, "grad_norm": 1.7561109066009521, "learning_rate": 1.3432291814819958e-05, "loss": 0.342529296875, "step": 8130 }, { "epoch": 0.5496147086656753, "grad_norm": 1.5868077278137207, "learning_rate": 1.3429010392724134e-05, "loss": 0.22021484375, "step": 8131 }, { "epoch": 0.5496823036366094, "grad_norm": 1.3349801301956177, "learning_rate": 1.3425729046642557e-05, "loss": 0.273712158203125, "step": 8132 }, { "epoch": 0.5497498986075436, "grad_norm": 1.1171399354934692, "learning_rate": 1.3422447776734004e-05, "loss": 0.2818603515625, "step": 8133 }, { "epoch": 0.5498174935784778, "grad_norm": 0.47169390320777893, "learning_rate": 1.3419166583157235e-05, "loss": 0.10590362548828125, "step": 8134 }, { "epoch": 0.5498850885494119, "grad_norm": 1.1361379623413086, "learning_rate": 1.3415885466071015e-05, "loss": 0.2204437255859375, "step": 8135 }, { "epoch": 0.5499526835203461, "grad_norm": 1.2185697555541992, "learning_rate": 1.3412604425634112e-05, "loss": 0.24755859375, "step": 8136 }, { "epoch": 0.5500202784912802, "grad_norm": 1.351412057876587, "learning_rate": 1.3409323462005275e-05, "loss": 0.31005859375, "step": 8137 }, { "epoch": 0.5500878734622144, "grad_norm": 1.122391939163208, "learning_rate": 1.3406042575343262e-05, "loss": 0.2495269775390625, "step": 8138 }, { "epoch": 0.5501554684331486, "grad_norm": 0.7512651085853577, "learning_rate": 1.340276176580682e-05, "loss": 0.1591644287109375, "step": 8139 }, { "epoch": 0.5502230634040828, "grad_norm": 0.847312867641449, "learning_rate": 1.3399481033554704e-05, "loss": 0.15503692626953125, "step": 8140 }, { "epoch": 0.5502906583750169, "grad_norm": 1.35615873336792, "learning_rate": 1.3396200378745641e-05, "loss": 0.23876953125, "step": 8141 }, { "epoch": 0.550358253345951, "grad_norm": 0.9339510202407837, "learning_rate": 1.3392919801538383e-05, "loss": 0.185089111328125, "step": 8142 }, { "epoch": 0.5504258483168852, "grad_norm": 2.0414748191833496, "learning_rate": 1.3389639302091654e-05, "loss": 0.32489013671875, "step": 8143 }, { "epoch": 0.5504934432878194, "grad_norm": 1.2799230813980103, "learning_rate": 1.3386358880564198e-05, "loss": 0.3134765625, "step": 8144 }, { "epoch": 0.5505610382587536, "grad_norm": 1.0615663528442383, "learning_rate": 1.3383078537114734e-05, "loss": 0.15489959716796875, "step": 8145 }, { "epoch": 0.5506286332296877, "grad_norm": 1.1710469722747803, "learning_rate": 1.337979827190198e-05, "loss": 0.27301025390625, "step": 8146 }, { "epoch": 0.5506962282006219, "grad_norm": 1.155821681022644, "learning_rate": 1.3376518085084664e-05, "loss": 0.286865234375, "step": 8147 }, { "epoch": 0.550763823171556, "grad_norm": 0.6972351670265198, "learning_rate": 1.3373237976821497e-05, "loss": 0.1315460205078125, "step": 8148 }, { "epoch": 0.5508314181424901, "grad_norm": 1.0866276025772095, "learning_rate": 1.3369957947271199e-05, "loss": 0.2037506103515625, "step": 8149 }, { "epoch": 0.5508990131134244, "grad_norm": 1.5073882341384888, "learning_rate": 1.3366677996592466e-05, "loss": 0.1944732666015625, "step": 8150 }, { "epoch": 0.5509666080843585, "grad_norm": 1.4763766527175903, "learning_rate": 1.3363398124944013e-05, "loss": 0.27593994140625, "step": 8151 }, { "epoch": 0.5510342030552927, "grad_norm": 0.8426774144172668, "learning_rate": 1.3360118332484532e-05, "loss": 0.181732177734375, "step": 8152 }, { "epoch": 0.5511017980262268, "grad_norm": 1.5048692226409912, "learning_rate": 1.3356838619372734e-05, "loss": 0.224517822265625, "step": 8153 }, { "epoch": 0.5511693929971611, "grad_norm": 1.7787424325942993, "learning_rate": 1.3353558985767292e-05, "loss": 0.216156005859375, "step": 8154 }, { "epoch": 0.5512369879680952, "grad_norm": 1.7408794164657593, "learning_rate": 1.3350279431826911e-05, "loss": 0.220977783203125, "step": 8155 }, { "epoch": 0.5513045829390293, "grad_norm": 1.5702946186065674, "learning_rate": 1.3346999957710267e-05, "loss": 0.273468017578125, "step": 8156 }, { "epoch": 0.5513721779099635, "grad_norm": 0.841108500957489, "learning_rate": 1.3343720563576052e-05, "loss": 0.1932525634765625, "step": 8157 }, { "epoch": 0.5514397728808976, "grad_norm": 0.9414536952972412, "learning_rate": 1.3340441249582933e-05, "loss": 0.194610595703125, "step": 8158 }, { "epoch": 0.5515073678518319, "grad_norm": 0.8747954964637756, "learning_rate": 1.3337162015889582e-05, "loss": 0.2120513916015625, "step": 8159 }, { "epoch": 0.551574962822766, "grad_norm": 1.0917131900787354, "learning_rate": 1.3333882862654681e-05, "loss": 0.2403411865234375, "step": 8160 }, { "epoch": 0.5516425577937002, "grad_norm": 1.5115700960159302, "learning_rate": 1.3330603790036884e-05, "loss": 0.279754638671875, "step": 8161 }, { "epoch": 0.5517101527646343, "grad_norm": 1.3540065288543701, "learning_rate": 1.3327324798194861e-05, "loss": 0.280731201171875, "step": 8162 }, { "epoch": 0.5517777477355684, "grad_norm": 0.8935394883155823, "learning_rate": 1.3324045887287261e-05, "loss": 0.21868896484375, "step": 8163 }, { "epoch": 0.5518453427065027, "grad_norm": 1.4409986734390259, "learning_rate": 1.3320767057472748e-05, "loss": 0.250457763671875, "step": 8164 }, { "epoch": 0.5519129376774368, "grad_norm": 1.388689637184143, "learning_rate": 1.3317488308909964e-05, "loss": 0.22540283203125, "step": 8165 }, { "epoch": 0.551980532648371, "grad_norm": 1.0724068880081177, "learning_rate": 1.3314209641757567e-05, "loss": 0.273773193359375, "step": 8166 }, { "epoch": 0.5520481276193051, "grad_norm": 0.9726023077964783, "learning_rate": 1.3310931056174183e-05, "loss": 0.22845458984375, "step": 8167 }, { "epoch": 0.5521157225902393, "grad_norm": 1.2668471336364746, "learning_rate": 1.3307652552318462e-05, "loss": 0.2163848876953125, "step": 8168 }, { "epoch": 0.5521833175611734, "grad_norm": 1.5288587808609009, "learning_rate": 1.3304374130349038e-05, "loss": 0.283447265625, "step": 8169 }, { "epoch": 0.5522509125321076, "grad_norm": 1.5444103479385376, "learning_rate": 1.3301095790424533e-05, "loss": 0.31976318359375, "step": 8170 }, { "epoch": 0.5523185075030418, "grad_norm": 1.168121099472046, "learning_rate": 1.3297817532703582e-05, "loss": 0.29010009765625, "step": 8171 }, { "epoch": 0.5523861024739759, "grad_norm": 1.1217230558395386, "learning_rate": 1.32945393573448e-05, "loss": 0.26416015625, "step": 8172 }, { "epoch": 0.5524536974449101, "grad_norm": 1.3056317567825317, "learning_rate": 1.329126126450682e-05, "loss": 0.243988037109375, "step": 8173 }, { "epoch": 0.5525212924158442, "grad_norm": 0.8374953866004944, "learning_rate": 1.3287983254348237e-05, "loss": 0.212860107421875, "step": 8174 }, { "epoch": 0.5525888873867785, "grad_norm": 1.5577017068862915, "learning_rate": 1.328470532702768e-05, "loss": 0.269073486328125, "step": 8175 }, { "epoch": 0.5526564823577126, "grad_norm": 1.7004971504211426, "learning_rate": 1.3281427482703736e-05, "loss": 0.2659912109375, "step": 8176 }, { "epoch": 0.5527240773286467, "grad_norm": 0.5504128336906433, "learning_rate": 1.3278149721535024e-05, "loss": 0.1307220458984375, "step": 8177 }, { "epoch": 0.5527916722995809, "grad_norm": 1.3542988300323486, "learning_rate": 1.3274872043680134e-05, "loss": 0.2051544189453125, "step": 8178 }, { "epoch": 0.552859267270515, "grad_norm": 1.4434267282485962, "learning_rate": 1.3271594449297668e-05, "loss": 0.210205078125, "step": 8179 }, { "epoch": 0.5529268622414493, "grad_norm": 0.8176648020744324, "learning_rate": 1.326831693854621e-05, "loss": 0.1243896484375, "step": 8180 }, { "epoch": 0.5529944572123834, "grad_norm": 1.9994558095932007, "learning_rate": 1.3265039511584343e-05, "loss": 0.331817626953125, "step": 8181 }, { "epoch": 0.5530620521833176, "grad_norm": 0.6971865892410278, "learning_rate": 1.3261762168570661e-05, "loss": 0.1584625244140625, "step": 8182 }, { "epoch": 0.5531296471542517, "grad_norm": 0.7270757555961609, "learning_rate": 1.3258484909663729e-05, "loss": 0.1186981201171875, "step": 8183 }, { "epoch": 0.5531972421251858, "grad_norm": 1.4054663181304932, "learning_rate": 1.325520773502213e-05, "loss": 0.2245941162109375, "step": 8184 }, { "epoch": 0.5532648370961201, "grad_norm": 1.2672526836395264, "learning_rate": 1.3251930644804429e-05, "loss": 0.241302490234375, "step": 8185 }, { "epoch": 0.5533324320670542, "grad_norm": 1.2834677696228027, "learning_rate": 1.32486536391692e-05, "loss": 0.26055908203125, "step": 8186 }, { "epoch": 0.5534000270379884, "grad_norm": 0.6691225171089172, "learning_rate": 1.3245376718274994e-05, "loss": 0.1290130615234375, "step": 8187 }, { "epoch": 0.5534676220089225, "grad_norm": 1.7113858461380005, "learning_rate": 1.3242099882280376e-05, "loss": 0.31878662109375, "step": 8188 }, { "epoch": 0.5535352169798567, "grad_norm": 1.0546128749847412, "learning_rate": 1.3238823131343897e-05, "loss": 0.25262451171875, "step": 8189 }, { "epoch": 0.5536028119507909, "grad_norm": 1.7042367458343506, "learning_rate": 1.3235546465624111e-05, "loss": 0.22682952880859375, "step": 8190 }, { "epoch": 0.553670406921725, "grad_norm": 1.3333336114883423, "learning_rate": 1.3232269885279557e-05, "loss": 0.3106689453125, "step": 8191 }, { "epoch": 0.5537380018926592, "grad_norm": 1.0718272924423218, "learning_rate": 1.3228993390468783e-05, "loss": 0.196563720703125, "step": 8192 }, { "epoch": 0.5538055968635933, "grad_norm": 1.3654463291168213, "learning_rate": 1.3225716981350321e-05, "loss": 0.267059326171875, "step": 8193 }, { "epoch": 0.5538731918345275, "grad_norm": 1.0374404191970825, "learning_rate": 1.32224406580827e-05, "loss": 0.205780029296875, "step": 8194 }, { "epoch": 0.5539407868054617, "grad_norm": 1.2782405614852905, "learning_rate": 1.3219164420824464e-05, "loss": 0.2263031005859375, "step": 8195 }, { "epoch": 0.5540083817763959, "grad_norm": 1.3306368589401245, "learning_rate": 1.321588826973412e-05, "loss": 0.243896484375, "step": 8196 }, { "epoch": 0.55407597674733, "grad_norm": 1.0367964506149292, "learning_rate": 1.32126122049702e-05, "loss": 0.1778564453125, "step": 8197 }, { "epoch": 0.5541435717182641, "grad_norm": 1.2996190786361694, "learning_rate": 1.3209336226691217e-05, "loss": 0.296905517578125, "step": 8198 }, { "epoch": 0.5542111666891983, "grad_norm": 0.9063792824745178, "learning_rate": 1.3206060335055686e-05, "loss": 0.18310546875, "step": 8199 }, { "epoch": 0.5542787616601325, "grad_norm": 1.1520789861679077, "learning_rate": 1.3202784530222106e-05, "loss": 0.214996337890625, "step": 8200 }, { "epoch": 0.5543463566310667, "grad_norm": 0.965057373046875, "learning_rate": 1.3199508812348993e-05, "loss": 0.207000732421875, "step": 8201 }, { "epoch": 0.5544139516020008, "grad_norm": 1.9719654321670532, "learning_rate": 1.3196233181594836e-05, "loss": 0.26116943359375, "step": 8202 }, { "epoch": 0.554481546572935, "grad_norm": 1.1704034805297852, "learning_rate": 1.3192957638118142e-05, "loss": 0.27728271484375, "step": 8203 }, { "epoch": 0.5545491415438691, "grad_norm": 1.0692881345748901, "learning_rate": 1.3189682182077392e-05, "loss": 0.195953369140625, "step": 8204 }, { "epoch": 0.5546167365148033, "grad_norm": 1.596992015838623, "learning_rate": 1.3186406813631071e-05, "loss": 0.290618896484375, "step": 8205 }, { "epoch": 0.5546843314857375, "grad_norm": 1.0820094347000122, "learning_rate": 1.3183131532937678e-05, "loss": 0.2265167236328125, "step": 8206 }, { "epoch": 0.5547519264566716, "grad_norm": 1.6256147623062134, "learning_rate": 1.317985634015567e-05, "loss": 0.217987060546875, "step": 8207 }, { "epoch": 0.5548195214276058, "grad_norm": 0.8546744585037231, "learning_rate": 1.317658123544354e-05, "loss": 0.22259521484375, "step": 8208 }, { "epoch": 0.5548871163985399, "grad_norm": 0.8984284996986389, "learning_rate": 1.3173306218959743e-05, "loss": 0.16717529296875, "step": 8209 }, { "epoch": 0.5549547113694742, "grad_norm": 1.3774739503860474, "learning_rate": 1.3170031290862753e-05, "loss": 0.2734375, "step": 8210 }, { "epoch": 0.5550223063404083, "grad_norm": 1.0909240245819092, "learning_rate": 1.3166756451311028e-05, "loss": 0.22174072265625, "step": 8211 }, { "epoch": 0.5550899013113424, "grad_norm": 0.825563371181488, "learning_rate": 1.3163481700463032e-05, "loss": 0.18011474609375, "step": 8212 }, { "epoch": 0.5551574962822766, "grad_norm": 1.0073379278182983, "learning_rate": 1.3160207038477207e-05, "loss": 0.24514007568359375, "step": 8213 }, { "epoch": 0.5552250912532107, "grad_norm": 1.405547022819519, "learning_rate": 1.315693246551201e-05, "loss": 0.2455596923828125, "step": 8214 }, { "epoch": 0.555292686224145, "grad_norm": 1.2028899192810059, "learning_rate": 1.3153657981725885e-05, "loss": 0.26605224609375, "step": 8215 }, { "epoch": 0.5553602811950791, "grad_norm": 1.4137301445007324, "learning_rate": 1.3150383587277262e-05, "loss": 0.28277587890625, "step": 8216 }, { "epoch": 0.5554278761660133, "grad_norm": 1.022395133972168, "learning_rate": 1.3147109282324588e-05, "loss": 0.1930389404296875, "step": 8217 }, { "epoch": 0.5554954711369474, "grad_norm": 1.093638300895691, "learning_rate": 1.3143835067026286e-05, "loss": 0.264404296875, "step": 8218 }, { "epoch": 0.5555630661078815, "grad_norm": 1.1936982870101929, "learning_rate": 1.3140560941540795e-05, "loss": 0.299407958984375, "step": 8219 }, { "epoch": 0.5556306610788158, "grad_norm": 1.5687811374664307, "learning_rate": 1.3137286906026522e-05, "loss": 0.242340087890625, "step": 8220 }, { "epoch": 0.5556982560497499, "grad_norm": 0.8077190518379211, "learning_rate": 1.31340129606419e-05, "loss": 0.18480682373046875, "step": 8221 }, { "epoch": 0.5557658510206841, "grad_norm": 1.2855859994888306, "learning_rate": 1.3130739105545326e-05, "loss": 0.2224273681640625, "step": 8222 }, { "epoch": 0.5558334459916182, "grad_norm": 0.8961377143859863, "learning_rate": 1.3127465340895223e-05, "loss": 0.16705322265625, "step": 8223 }, { "epoch": 0.5559010409625524, "grad_norm": 1.305195927619934, "learning_rate": 1.3124191666849992e-05, "loss": 0.285064697265625, "step": 8224 }, { "epoch": 0.5559686359334866, "grad_norm": 1.643549919128418, "learning_rate": 1.3120918083568036e-05, "loss": 0.2836456298828125, "step": 8225 }, { "epoch": 0.5560362309044207, "grad_norm": 2.720662832260132, "learning_rate": 1.3117644591207745e-05, "loss": 0.38385009765625, "step": 8226 }, { "epoch": 0.5561038258753549, "grad_norm": 1.2553695440292358, "learning_rate": 1.3114371189927516e-05, "loss": 0.218414306640625, "step": 8227 }, { "epoch": 0.556171420846289, "grad_norm": 1.584168553352356, "learning_rate": 1.3111097879885743e-05, "loss": 0.2445068359375, "step": 8228 }, { "epoch": 0.5562390158172232, "grad_norm": 2.282498359680176, "learning_rate": 1.310782466124079e-05, "loss": 0.34588623046875, "step": 8229 }, { "epoch": 0.5563066107881574, "grad_norm": 2.088618278503418, "learning_rate": 1.3104551534151053e-05, "loss": 0.33990478515625, "step": 8230 }, { "epoch": 0.5563742057590916, "grad_norm": 1.2263833284378052, "learning_rate": 1.3101278498774898e-05, "loss": 0.234283447265625, "step": 8231 }, { "epoch": 0.5564418007300257, "grad_norm": 1.2168781757354736, "learning_rate": 1.3098005555270701e-05, "loss": 0.3233642578125, "step": 8232 }, { "epoch": 0.5565093957009598, "grad_norm": 1.112409234046936, "learning_rate": 1.3094732703796819e-05, "loss": 0.23223876953125, "step": 8233 }, { "epoch": 0.556576990671894, "grad_norm": 1.2916643619537354, "learning_rate": 1.3091459944511621e-05, "loss": 0.218963623046875, "step": 8234 }, { "epoch": 0.5566445856428281, "grad_norm": 0.777998685836792, "learning_rate": 1.3088187277573455e-05, "loss": 0.170379638671875, "step": 8235 }, { "epoch": 0.5567121806137624, "grad_norm": 1.9005045890808105, "learning_rate": 1.3084914703140685e-05, "loss": 0.2810821533203125, "step": 8236 }, { "epoch": 0.5567797755846965, "grad_norm": 1.7080472707748413, "learning_rate": 1.3081642221371646e-05, "loss": 0.30328369140625, "step": 8237 }, { "epoch": 0.5568473705556307, "grad_norm": 1.6614526510238647, "learning_rate": 1.3078369832424694e-05, "loss": 0.300079345703125, "step": 8238 }, { "epoch": 0.5569149655265648, "grad_norm": 1.6664042472839355, "learning_rate": 1.3075097536458154e-05, "loss": 0.1871337890625, "step": 8239 }, { "epoch": 0.556982560497499, "grad_norm": 1.2799204587936401, "learning_rate": 1.3071825333630365e-05, "loss": 0.247039794921875, "step": 8240 }, { "epoch": 0.5570501554684332, "grad_norm": 1.4936553239822388, "learning_rate": 1.3068553224099663e-05, "loss": 0.27386474609375, "step": 8241 }, { "epoch": 0.5571177504393673, "grad_norm": 0.9138373136520386, "learning_rate": 1.306528120802436e-05, "loss": 0.1966705322265625, "step": 8242 }, { "epoch": 0.5571853454103015, "grad_norm": 0.9451839327812195, "learning_rate": 1.306200928556279e-05, "loss": 0.1757354736328125, "step": 8243 }, { "epoch": 0.5572529403812356, "grad_norm": 1.1756187677383423, "learning_rate": 1.3058737456873257e-05, "loss": 0.24951171875, "step": 8244 }, { "epoch": 0.5573205353521697, "grad_norm": 1.1582932472229004, "learning_rate": 1.3055465722114087e-05, "loss": 0.252838134765625, "step": 8245 }, { "epoch": 0.557388130323104, "grad_norm": 2.1280357837677, "learning_rate": 1.3052194081443572e-05, "loss": 0.18399810791015625, "step": 8246 }, { "epoch": 0.5574557252940381, "grad_norm": 1.7518330812454224, "learning_rate": 1.3048922535020021e-05, "loss": 0.336883544921875, "step": 8247 }, { "epoch": 0.5575233202649723, "grad_norm": 0.8714554309844971, "learning_rate": 1.304565108300173e-05, "loss": 0.19580078125, "step": 8248 }, { "epoch": 0.5575909152359064, "grad_norm": 1.4037485122680664, "learning_rate": 1.3042379725547001e-05, "loss": 0.258148193359375, "step": 8249 }, { "epoch": 0.5576585102068407, "grad_norm": 1.8772214651107788, "learning_rate": 1.3039108462814107e-05, "loss": 0.325408935546875, "step": 8250 }, { "epoch": 0.5577261051777748, "grad_norm": 1.0098295211791992, "learning_rate": 1.3035837294961344e-05, "loss": 0.148284912109375, "step": 8251 }, { "epoch": 0.5577937001487089, "grad_norm": 1.3273844718933105, "learning_rate": 1.303256622214699e-05, "loss": 0.34259033203125, "step": 8252 }, { "epoch": 0.5578612951196431, "grad_norm": 0.9906526207923889, "learning_rate": 1.3029295244529312e-05, "loss": 0.15997314453125, "step": 8253 }, { "epoch": 0.5579288900905772, "grad_norm": 2.079084634780884, "learning_rate": 1.3026024362266594e-05, "loss": 0.32330322265625, "step": 8254 }, { "epoch": 0.5579964850615114, "grad_norm": 1.2457396984100342, "learning_rate": 1.3022753575517085e-05, "loss": 0.14820098876953125, "step": 8255 }, { "epoch": 0.5580640800324456, "grad_norm": 1.6951051950454712, "learning_rate": 1.3019482884439058e-05, "loss": 0.25701904296875, "step": 8256 }, { "epoch": 0.5581316750033798, "grad_norm": 1.596152663230896, "learning_rate": 1.3016212289190762e-05, "loss": 0.2362213134765625, "step": 8257 }, { "epoch": 0.5581992699743139, "grad_norm": 1.2103092670440674, "learning_rate": 1.3012941789930457e-05, "loss": 0.22979736328125, "step": 8258 }, { "epoch": 0.558266864945248, "grad_norm": 1.170962929725647, "learning_rate": 1.3009671386816383e-05, "loss": 0.2330322265625, "step": 8259 }, { "epoch": 0.5583344599161822, "grad_norm": 1.5747309923171997, "learning_rate": 1.3006401080006787e-05, "loss": 0.2735595703125, "step": 8260 }, { "epoch": 0.5584020548871164, "grad_norm": 1.1358983516693115, "learning_rate": 1.30031308696599e-05, "loss": 0.29461669921875, "step": 8261 }, { "epoch": 0.5584696498580506, "grad_norm": 0.5881479978561401, "learning_rate": 1.2999860755933969e-05, "loss": 0.1321868896484375, "step": 8262 }, { "epoch": 0.5585372448289847, "grad_norm": 1.5390920639038086, "learning_rate": 1.2996590738987208e-05, "loss": 0.258453369140625, "step": 8263 }, { "epoch": 0.5586048397999189, "grad_norm": 1.0767065286636353, "learning_rate": 1.2993320818977845e-05, "loss": 0.198638916015625, "step": 8264 }, { "epoch": 0.558672434770853, "grad_norm": 1.2422865629196167, "learning_rate": 1.2990050996064104e-05, "loss": 0.221771240234375, "step": 8265 }, { "epoch": 0.5587400297417872, "grad_norm": 0.9798508882522583, "learning_rate": 1.298678127040419e-05, "loss": 0.158660888671875, "step": 8266 }, { "epoch": 0.5588076247127214, "grad_norm": 1.1606031656265259, "learning_rate": 1.2983511642156327e-05, "loss": 0.2198944091796875, "step": 8267 }, { "epoch": 0.5588752196836555, "grad_norm": 1.262640357017517, "learning_rate": 1.2980242111478702e-05, "loss": 0.239410400390625, "step": 8268 }, { "epoch": 0.5589428146545897, "grad_norm": 2.1805732250213623, "learning_rate": 1.2976972678529527e-05, "loss": 0.227142333984375, "step": 8269 }, { "epoch": 0.5590104096255238, "grad_norm": 0.8167533278465271, "learning_rate": 1.2973703343466995e-05, "loss": 0.12847900390625, "step": 8270 }, { "epoch": 0.5590780045964581, "grad_norm": 1.3107454776763916, "learning_rate": 1.29704341064493e-05, "loss": 0.20814895629882812, "step": 8271 }, { "epoch": 0.5591455995673922, "grad_norm": 1.2709171772003174, "learning_rate": 1.296716496763462e-05, "loss": 0.220733642578125, "step": 8272 }, { "epoch": 0.5592131945383263, "grad_norm": 0.9301912784576416, "learning_rate": 1.2963895927181142e-05, "loss": 0.21173095703125, "step": 8273 }, { "epoch": 0.5592807895092605, "grad_norm": 1.2418485879898071, "learning_rate": 1.2960626985247047e-05, "loss": 0.1993408203125, "step": 8274 }, { "epoch": 0.5593483844801946, "grad_norm": 1.682626724243164, "learning_rate": 1.2957358141990494e-05, "loss": 0.30694580078125, "step": 8275 }, { "epoch": 0.5594159794511289, "grad_norm": 1.2088851928710938, "learning_rate": 1.2954089397569659e-05, "loss": 0.2155914306640625, "step": 8276 }, { "epoch": 0.559483574422063, "grad_norm": 0.819830596446991, "learning_rate": 1.2950820752142702e-05, "loss": 0.1431121826171875, "step": 8277 }, { "epoch": 0.5595511693929972, "grad_norm": 1.2328307628631592, "learning_rate": 1.2947552205867785e-05, "loss": 0.226318359375, "step": 8278 }, { "epoch": 0.5596187643639313, "grad_norm": 0.7111769318580627, "learning_rate": 1.294428375890305e-05, "loss": 0.1501312255859375, "step": 8279 }, { "epoch": 0.5596863593348654, "grad_norm": 1.4541027545928955, "learning_rate": 1.2941015411406655e-05, "loss": 0.222442626953125, "step": 8280 }, { "epoch": 0.5597539543057997, "grad_norm": 1.4177507162094116, "learning_rate": 1.2937747163536736e-05, "loss": 0.238800048828125, "step": 8281 }, { "epoch": 0.5598215492767338, "grad_norm": 2.31538462638855, "learning_rate": 1.293447901545144e-05, "loss": 0.37506103515625, "step": 8282 }, { "epoch": 0.559889144247668, "grad_norm": 1.053470492362976, "learning_rate": 1.2931210967308888e-05, "loss": 0.12722015380859375, "step": 8283 }, { "epoch": 0.5599567392186021, "grad_norm": 1.4168133735656738, "learning_rate": 1.2927943019267223e-05, "loss": 0.269134521484375, "step": 8284 }, { "epoch": 0.5600243341895363, "grad_norm": 1.550269365310669, "learning_rate": 1.2924675171484551e-05, "loss": 0.3145751953125, "step": 8285 }, { "epoch": 0.5600919291604705, "grad_norm": 1.6006598472595215, "learning_rate": 1.2921407424119008e-05, "loss": 0.30487060546875, "step": 8286 }, { "epoch": 0.5601595241314046, "grad_norm": 1.2838436365127563, "learning_rate": 1.2918139777328701e-05, "loss": 0.3018798828125, "step": 8287 }, { "epoch": 0.5602271191023388, "grad_norm": 1.1885602474212646, "learning_rate": 1.2914872231271732e-05, "loss": 0.2056427001953125, "step": 8288 }, { "epoch": 0.5602947140732729, "grad_norm": 1.062721610069275, "learning_rate": 1.2911604786106216e-05, "loss": 0.2197113037109375, "step": 8289 }, { "epoch": 0.5603623090442071, "grad_norm": 1.0788822174072266, "learning_rate": 1.2908337441990245e-05, "loss": 0.2529296875, "step": 8290 }, { "epoch": 0.5604299040151413, "grad_norm": 1.9211405515670776, "learning_rate": 1.2905070199081922e-05, "loss": 0.26190185546875, "step": 8291 }, { "epoch": 0.5604974989860755, "grad_norm": 0.7139926552772522, "learning_rate": 1.2901803057539325e-05, "loss": 0.162811279296875, "step": 8292 }, { "epoch": 0.5605650939570096, "grad_norm": 0.7275173664093018, "learning_rate": 1.2898536017520548e-05, "loss": 0.14865875244140625, "step": 8293 }, { "epoch": 0.5606326889279437, "grad_norm": 1.5474790334701538, "learning_rate": 1.2895269079183664e-05, "loss": 0.338348388671875, "step": 8294 }, { "epoch": 0.5607002838988779, "grad_norm": 1.6016777753829956, "learning_rate": 1.2892002242686756e-05, "loss": 0.2918243408203125, "step": 8295 }, { "epoch": 0.560767878869812, "grad_norm": 1.0321992635726929, "learning_rate": 1.2888735508187884e-05, "loss": 0.2288818359375, "step": 8296 }, { "epoch": 0.5608354738407463, "grad_norm": 1.7905176877975464, "learning_rate": 1.2885468875845122e-05, "loss": 0.302734375, "step": 8297 }, { "epoch": 0.5609030688116804, "grad_norm": 1.0178906917572021, "learning_rate": 1.2882202345816528e-05, "loss": 0.2286529541015625, "step": 8298 }, { "epoch": 0.5609706637826146, "grad_norm": 1.4158111810684204, "learning_rate": 1.287893591826015e-05, "loss": 0.323394775390625, "step": 8299 }, { "epoch": 0.5610382587535487, "grad_norm": 0.798473596572876, "learning_rate": 1.2875669593334048e-05, "loss": 0.172088623046875, "step": 8300 }, { "epoch": 0.5611058537244829, "grad_norm": 1.2656400203704834, "learning_rate": 1.2872403371196255e-05, "loss": 0.2125396728515625, "step": 8301 }, { "epoch": 0.5611734486954171, "grad_norm": 1.5574804544448853, "learning_rate": 1.2869137252004822e-05, "loss": 0.22364044189453125, "step": 8302 }, { "epoch": 0.5612410436663512, "grad_norm": 1.456709384918213, "learning_rate": 1.2865871235917779e-05, "loss": 0.2969970703125, "step": 8303 }, { "epoch": 0.5613086386372854, "grad_norm": 1.1445462703704834, "learning_rate": 1.2862605323093162e-05, "loss": 0.272491455078125, "step": 8304 }, { "epoch": 0.5613762336082195, "grad_norm": 1.0999553203582764, "learning_rate": 1.2859339513688986e-05, "loss": 0.24688720703125, "step": 8305 }, { "epoch": 0.5614438285791538, "grad_norm": 0.6415221095085144, "learning_rate": 1.2856073807863279e-05, "loss": 0.16363525390625, "step": 8306 }, { "epoch": 0.5615114235500879, "grad_norm": 1.493558406829834, "learning_rate": 1.2852808205774053e-05, "loss": 0.1602020263671875, "step": 8307 }, { "epoch": 0.561579018521022, "grad_norm": 0.8986724615097046, "learning_rate": 1.2849542707579324e-05, "loss": 0.11916351318359375, "step": 8308 }, { "epoch": 0.5616466134919562, "grad_norm": 0.6986016631126404, "learning_rate": 1.2846277313437091e-05, "loss": 0.14495849609375, "step": 8309 }, { "epoch": 0.5617142084628903, "grad_norm": 1.6243408918380737, "learning_rate": 1.284301202350535e-05, "loss": 0.2852020263671875, "step": 8310 }, { "epoch": 0.5617818034338246, "grad_norm": 1.2481271028518677, "learning_rate": 1.283974683794211e-05, "loss": 0.228668212890625, "step": 8311 }, { "epoch": 0.5618493984047587, "grad_norm": 0.8664505481719971, "learning_rate": 1.2836481756905345e-05, "loss": 0.188690185546875, "step": 8312 }, { "epoch": 0.5619169933756929, "grad_norm": 1.0734059810638428, "learning_rate": 1.2833216780553055e-05, "loss": 0.26397705078125, "step": 8313 }, { "epoch": 0.561984588346627, "grad_norm": 1.1139271259307861, "learning_rate": 1.2829951909043203e-05, "loss": 0.2838134765625, "step": 8314 }, { "epoch": 0.5620521833175611, "grad_norm": 0.8601908087730408, "learning_rate": 1.2826687142533778e-05, "loss": 0.195892333984375, "step": 8315 }, { "epoch": 0.5621197782884954, "grad_norm": 1.0783482789993286, "learning_rate": 1.2823422481182741e-05, "loss": 0.225921630859375, "step": 8316 }, { "epoch": 0.5621873732594295, "grad_norm": 1.2312383651733398, "learning_rate": 1.2820157925148068e-05, "loss": 0.238983154296875, "step": 8317 }, { "epoch": 0.5622549682303637, "grad_norm": 1.4479105472564697, "learning_rate": 1.2816893474587704e-05, "loss": 0.2783203125, "step": 8318 }, { "epoch": 0.5623225632012978, "grad_norm": 1.0587010383605957, "learning_rate": 1.2813629129659614e-05, "loss": 0.16943359375, "step": 8319 }, { "epoch": 0.562390158172232, "grad_norm": 0.8450105786323547, "learning_rate": 1.2810364890521738e-05, "loss": 0.2006988525390625, "step": 8320 }, { "epoch": 0.5624577531431662, "grad_norm": 1.2692885398864746, "learning_rate": 1.2807100757332033e-05, "loss": 0.1991424560546875, "step": 8321 }, { "epoch": 0.5625253481141003, "grad_norm": 0.7258793115615845, "learning_rate": 1.280383673024843e-05, "loss": 0.1558685302734375, "step": 8322 }, { "epoch": 0.5625929430850345, "grad_norm": 1.1734000444412231, "learning_rate": 1.280057280942886e-05, "loss": 0.2879638671875, "step": 8323 }, { "epoch": 0.5626605380559686, "grad_norm": 2.041391611099243, "learning_rate": 1.279730899503126e-05, "loss": 0.3238525390625, "step": 8324 }, { "epoch": 0.5627281330269028, "grad_norm": 1.140699028968811, "learning_rate": 1.2794045287213542e-05, "loss": 0.173431396484375, "step": 8325 }, { "epoch": 0.562795727997837, "grad_norm": 0.9520389437675476, "learning_rate": 1.2790781686133638e-05, "loss": 0.2886962890625, "step": 8326 }, { "epoch": 0.5628633229687712, "grad_norm": 1.3424997329711914, "learning_rate": 1.2787518191949452e-05, "loss": 0.210418701171875, "step": 8327 }, { "epoch": 0.5629309179397053, "grad_norm": 1.0538482666015625, "learning_rate": 1.2784254804818898e-05, "loss": 0.2346649169921875, "step": 8328 }, { "epoch": 0.5629985129106394, "grad_norm": 1.8871917724609375, "learning_rate": 1.2780991524899872e-05, "loss": 0.240814208984375, "step": 8329 }, { "epoch": 0.5630661078815736, "grad_norm": 0.818939745426178, "learning_rate": 1.2777728352350283e-05, "loss": 0.16245269775390625, "step": 8330 }, { "epoch": 0.5631337028525077, "grad_norm": 1.0343900918960571, "learning_rate": 1.277446528732801e-05, "loss": 0.275238037109375, "step": 8331 }, { "epoch": 0.563201297823442, "grad_norm": 1.1687779426574707, "learning_rate": 1.2771202329990947e-05, "loss": 0.1883544921875, "step": 8332 }, { "epoch": 0.5632688927943761, "grad_norm": 0.8155841827392578, "learning_rate": 1.2767939480496982e-05, "loss": 0.199615478515625, "step": 8333 }, { "epoch": 0.5633364877653103, "grad_norm": 1.6699761152267456, "learning_rate": 1.2764676739003979e-05, "loss": 0.28863525390625, "step": 8334 }, { "epoch": 0.5634040827362444, "grad_norm": 1.0167272090911865, "learning_rate": 1.276141410566982e-05, "loss": 0.1913604736328125, "step": 8335 }, { "epoch": 0.5634716777071785, "grad_norm": 0.5648692846298218, "learning_rate": 1.2758151580652365e-05, "loss": 0.118865966796875, "step": 8336 }, { "epoch": 0.5635392726781128, "grad_norm": 1.1185081005096436, "learning_rate": 1.2754889164109485e-05, "loss": 0.2083892822265625, "step": 8337 }, { "epoch": 0.5636068676490469, "grad_norm": 1.09995698928833, "learning_rate": 1.2751626856199022e-05, "loss": 0.2803955078125, "step": 8338 }, { "epoch": 0.5636744626199811, "grad_norm": 0.9872773885726929, "learning_rate": 1.2748364657078837e-05, "loss": 0.149139404296875, "step": 8339 }, { "epoch": 0.5637420575909152, "grad_norm": 1.2381302118301392, "learning_rate": 1.2745102566906771e-05, "loss": 0.28497314453125, "step": 8340 }, { "epoch": 0.5638096525618495, "grad_norm": 1.3557324409484863, "learning_rate": 1.2741840585840673e-05, "loss": 0.26202392578125, "step": 8341 }, { "epoch": 0.5638772475327836, "grad_norm": 1.797041893005371, "learning_rate": 1.2738578714038361e-05, "loss": 0.315338134765625, "step": 8342 }, { "epoch": 0.5639448425037177, "grad_norm": 0.6864050030708313, "learning_rate": 1.2735316951657681e-05, "loss": 0.159027099609375, "step": 8343 }, { "epoch": 0.5640124374746519, "grad_norm": 1.3326184749603271, "learning_rate": 1.2732055298856453e-05, "loss": 0.258636474609375, "step": 8344 }, { "epoch": 0.564080032445586, "grad_norm": 0.9604476690292358, "learning_rate": 1.2728793755792486e-05, "loss": 0.1956939697265625, "step": 8345 }, { "epoch": 0.5641476274165202, "grad_norm": 1.1330676078796387, "learning_rate": 1.272553232262361e-05, "loss": 0.21282958984375, "step": 8346 }, { "epoch": 0.5642152223874544, "grad_norm": 0.9638552665710449, "learning_rate": 1.2722270999507619e-05, "loss": 0.158416748046875, "step": 8347 }, { "epoch": 0.5642828173583886, "grad_norm": 1.4015535116195679, "learning_rate": 1.2719009786602323e-05, "loss": 0.23846435546875, "step": 8348 }, { "epoch": 0.5643504123293227, "grad_norm": 0.7582740783691406, "learning_rate": 1.2715748684065518e-05, "loss": 0.177001953125, "step": 8349 }, { "epoch": 0.5644180073002568, "grad_norm": 1.136760950088501, "learning_rate": 1.2712487692055003e-05, "loss": 0.1998291015625, "step": 8350 }, { "epoch": 0.564485602271191, "grad_norm": 1.058398962020874, "learning_rate": 1.270922681072855e-05, "loss": 0.247100830078125, "step": 8351 }, { "epoch": 0.5645531972421252, "grad_norm": 0.7106238603591919, "learning_rate": 1.2705966040243955e-05, "loss": 0.1486663818359375, "step": 8352 }, { "epoch": 0.5646207922130594, "grad_norm": 0.8527969121932983, "learning_rate": 1.2702705380758989e-05, "loss": 0.207000732421875, "step": 8353 }, { "epoch": 0.5646883871839935, "grad_norm": 1.748774766921997, "learning_rate": 1.2699444832431427e-05, "loss": 0.312713623046875, "step": 8354 }, { "epoch": 0.5647559821549277, "grad_norm": 1.4791312217712402, "learning_rate": 1.2696184395419025e-05, "loss": 0.283905029296875, "step": 8355 }, { "epoch": 0.5648235771258618, "grad_norm": 0.8497365117073059, "learning_rate": 1.2692924069879553e-05, "loss": 0.1318511962890625, "step": 8356 }, { "epoch": 0.564891172096796, "grad_norm": 1.250994324684143, "learning_rate": 1.2689663855970765e-05, "loss": 0.255889892578125, "step": 8357 }, { "epoch": 0.5649587670677302, "grad_norm": 1.239432454109192, "learning_rate": 1.2686403753850401e-05, "loss": 0.208282470703125, "step": 8358 }, { "epoch": 0.5650263620386643, "grad_norm": 1.517401933670044, "learning_rate": 1.2683143763676218e-05, "loss": 0.279815673828125, "step": 8359 }, { "epoch": 0.5650939570095985, "grad_norm": 1.17010498046875, "learning_rate": 1.267988388560594e-05, "loss": 0.269134521484375, "step": 8360 }, { "epoch": 0.5651615519805326, "grad_norm": 2.1788346767425537, "learning_rate": 1.2676624119797315e-05, "loss": 0.305755615234375, "step": 8361 }, { "epoch": 0.5652291469514669, "grad_norm": 1.0398213863372803, "learning_rate": 1.2673364466408058e-05, "loss": 0.217681884765625, "step": 8362 }, { "epoch": 0.565296741922401, "grad_norm": 0.6959192752838135, "learning_rate": 1.2670104925595905e-05, "loss": 0.1255645751953125, "step": 8363 }, { "epoch": 0.5653643368933351, "grad_norm": 0.8263839483261108, "learning_rate": 1.266684549751856e-05, "loss": 0.1032257080078125, "step": 8364 }, { "epoch": 0.5654319318642693, "grad_norm": 0.8364479541778564, "learning_rate": 1.2663586182333739e-05, "loss": 0.2127685546875, "step": 8365 }, { "epoch": 0.5654995268352034, "grad_norm": 1.3275158405303955, "learning_rate": 1.2660326980199148e-05, "loss": 0.281005859375, "step": 8366 }, { "epoch": 0.5655671218061377, "grad_norm": 1.4095863103866577, "learning_rate": 1.2657067891272495e-05, "loss": 0.29742431640625, "step": 8367 }, { "epoch": 0.5656347167770718, "grad_norm": 0.44812244176864624, "learning_rate": 1.2653808915711462e-05, "loss": 0.1133270263671875, "step": 8368 }, { "epoch": 0.565702311748006, "grad_norm": 0.7186625599861145, "learning_rate": 1.2650550053673744e-05, "loss": 0.151397705078125, "step": 8369 }, { "epoch": 0.5657699067189401, "grad_norm": 1.3844705820083618, "learning_rate": 1.264729130531703e-05, "loss": 0.321563720703125, "step": 8370 }, { "epoch": 0.5658375016898742, "grad_norm": 0.8779376149177551, "learning_rate": 1.2644032670798988e-05, "loss": 0.230010986328125, "step": 8371 }, { "epoch": 0.5659050966608085, "grad_norm": 1.3528239727020264, "learning_rate": 1.26407741502773e-05, "loss": 0.32147216796875, "step": 8372 }, { "epoch": 0.5659726916317426, "grad_norm": 1.0235639810562134, "learning_rate": 1.2637515743909628e-05, "loss": 0.19110107421875, "step": 8373 }, { "epoch": 0.5660402866026768, "grad_norm": 1.392750859260559, "learning_rate": 1.2634257451853643e-05, "loss": 0.237548828125, "step": 8374 }, { "epoch": 0.5661078815736109, "grad_norm": 0.8488494753837585, "learning_rate": 1.2630999274266989e-05, "loss": 0.149871826171875, "step": 8375 }, { "epoch": 0.566175476544545, "grad_norm": 0.9000512957572937, "learning_rate": 1.2627741211307327e-05, "loss": 0.159149169921875, "step": 8376 }, { "epoch": 0.5662430715154793, "grad_norm": 1.2224136590957642, "learning_rate": 1.2624483263132294e-05, "loss": 0.199066162109375, "step": 8377 }, { "epoch": 0.5663106664864134, "grad_norm": 1.543480396270752, "learning_rate": 1.2621225429899535e-05, "loss": 0.259185791015625, "step": 8378 }, { "epoch": 0.5663782614573476, "grad_norm": 0.7890622019767761, "learning_rate": 1.2617967711766688e-05, "loss": 0.1613922119140625, "step": 8379 }, { "epoch": 0.5664458564282817, "grad_norm": 1.2093833684921265, "learning_rate": 1.2614710108891368e-05, "loss": 0.2843017578125, "step": 8380 }, { "epoch": 0.5665134513992159, "grad_norm": 0.8260181546211243, "learning_rate": 1.2611452621431212e-05, "loss": 0.2016143798828125, "step": 8381 }, { "epoch": 0.56658104637015, "grad_norm": 1.5504752397537231, "learning_rate": 1.260819524954383e-05, "loss": 0.239166259765625, "step": 8382 }, { "epoch": 0.5666486413410842, "grad_norm": 1.2923674583435059, "learning_rate": 1.2604937993386842e-05, "loss": 0.202239990234375, "step": 8383 }, { "epoch": 0.5667162363120184, "grad_norm": 1.1018120050430298, "learning_rate": 1.260168085311784e-05, "loss": 0.2493896484375, "step": 8384 }, { "epoch": 0.5667838312829525, "grad_norm": 1.300481915473938, "learning_rate": 1.259842382889444e-05, "loss": 0.2186737060546875, "step": 8385 }, { "epoch": 0.5668514262538867, "grad_norm": 1.1860238313674927, "learning_rate": 1.2595166920874225e-05, "loss": 0.1875457763671875, "step": 8386 }, { "epoch": 0.5669190212248209, "grad_norm": 1.0710420608520508, "learning_rate": 1.25919101292148e-05, "loss": 0.17724609375, "step": 8387 }, { "epoch": 0.5669866161957551, "grad_norm": 2.0385305881500244, "learning_rate": 1.2588653454073728e-05, "loss": 0.33477783203125, "step": 8388 }, { "epoch": 0.5670542111666892, "grad_norm": 1.2016202211380005, "learning_rate": 1.2585396895608602e-05, "loss": 0.209320068359375, "step": 8389 }, { "epoch": 0.5671218061376233, "grad_norm": 0.6861995458602905, "learning_rate": 1.2582140453976985e-05, "loss": 0.13665771484375, "step": 8390 }, { "epoch": 0.5671894011085575, "grad_norm": 2.0280137062072754, "learning_rate": 1.257888412933646e-05, "loss": 0.315032958984375, "step": 8391 }, { "epoch": 0.5672569960794916, "grad_norm": 0.9803822636604309, "learning_rate": 1.2575627921844577e-05, "loss": 0.208892822265625, "step": 8392 }, { "epoch": 0.5673245910504259, "grad_norm": 1.1471660137176514, "learning_rate": 1.2572371831658885e-05, "loss": 0.35833740234375, "step": 8393 }, { "epoch": 0.56739218602136, "grad_norm": 1.0293278694152832, "learning_rate": 1.2569115858936942e-05, "loss": 0.24395751953125, "step": 8394 }, { "epoch": 0.5674597809922942, "grad_norm": 1.9512590169906616, "learning_rate": 1.256586000383629e-05, "loss": 0.33978271484375, "step": 8395 }, { "epoch": 0.5675273759632283, "grad_norm": 0.9758478403091431, "learning_rate": 1.2562604266514477e-05, "loss": 0.1905059814453125, "step": 8396 }, { "epoch": 0.5675949709341624, "grad_norm": 1.223636269569397, "learning_rate": 1.2559348647129018e-05, "loss": 0.2086944580078125, "step": 8397 }, { "epoch": 0.5676625659050967, "grad_norm": 0.8242920637130737, "learning_rate": 1.2556093145837455e-05, "loss": 0.1350860595703125, "step": 8398 }, { "epoch": 0.5677301608760308, "grad_norm": 1.2428174018859863, "learning_rate": 1.25528377627973e-05, "loss": 0.206451416015625, "step": 8399 }, { "epoch": 0.567797755846965, "grad_norm": 0.7450483441352844, "learning_rate": 1.2549582498166079e-05, "loss": 0.177978515625, "step": 8400 }, { "epoch": 0.5678653508178991, "grad_norm": 1.0917537212371826, "learning_rate": 1.254632735210129e-05, "loss": 0.1984710693359375, "step": 8401 }, { "epoch": 0.5679329457888334, "grad_norm": 0.6416839361190796, "learning_rate": 1.2543072324760445e-05, "loss": 0.161376953125, "step": 8402 }, { "epoch": 0.5680005407597675, "grad_norm": 1.2611565589904785, "learning_rate": 1.2539817416301045e-05, "loss": 0.2073974609375, "step": 8403 }, { "epoch": 0.5680681357307016, "grad_norm": 1.6486891508102417, "learning_rate": 1.253656262688057e-05, "loss": 0.2745208740234375, "step": 8404 }, { "epoch": 0.5681357307016358, "grad_norm": 1.5178320407867432, "learning_rate": 1.2533307956656522e-05, "loss": 0.1649932861328125, "step": 8405 }, { "epoch": 0.5682033256725699, "grad_norm": 0.7460311055183411, "learning_rate": 1.253005340578637e-05, "loss": 0.222503662109375, "step": 8406 }, { "epoch": 0.5682709206435042, "grad_norm": 0.9254472255706787, "learning_rate": 1.2526798974427598e-05, "loss": 0.233673095703125, "step": 8407 }, { "epoch": 0.5683385156144383, "grad_norm": 1.149276852607727, "learning_rate": 1.2523544662737668e-05, "loss": 0.26068115234375, "step": 8408 }, { "epoch": 0.5684061105853725, "grad_norm": 1.3552576303482056, "learning_rate": 1.2520290470874054e-05, "loss": 0.248321533203125, "step": 8409 }, { "epoch": 0.5684737055563066, "grad_norm": 1.0139272212982178, "learning_rate": 1.2517036398994203e-05, "loss": 0.276214599609375, "step": 8410 }, { "epoch": 0.5685413005272407, "grad_norm": 1.3521697521209717, "learning_rate": 1.2513782447255574e-05, "loss": 0.1925506591796875, "step": 8411 }, { "epoch": 0.568608895498175, "grad_norm": 1.0391513109207153, "learning_rate": 1.2510528615815612e-05, "loss": 0.199066162109375, "step": 8412 }, { "epoch": 0.5686764904691091, "grad_norm": 0.8694677948951721, "learning_rate": 1.250727490483176e-05, "loss": 0.25238037109375, "step": 8413 }, { "epoch": 0.5687440854400433, "grad_norm": 0.8017217516899109, "learning_rate": 1.250402131446145e-05, "loss": 0.12369155883789062, "step": 8414 }, { "epoch": 0.5688116804109774, "grad_norm": 1.732171893119812, "learning_rate": 1.2500767844862106e-05, "loss": 0.34747314453125, "step": 8415 }, { "epoch": 0.5688792753819116, "grad_norm": 0.9898457527160645, "learning_rate": 1.2497514496191164e-05, "loss": 0.220916748046875, "step": 8416 }, { "epoch": 0.5689468703528457, "grad_norm": 1.488276720046997, "learning_rate": 1.2494261268606026e-05, "loss": 0.29461669921875, "step": 8417 }, { "epoch": 0.5690144653237799, "grad_norm": 0.8458920121192932, "learning_rate": 1.2491008162264114e-05, "loss": 0.18023681640625, "step": 8418 }, { "epoch": 0.5690820602947141, "grad_norm": 1.1250025033950806, "learning_rate": 1.2487755177322828e-05, "loss": 0.20904541015625, "step": 8419 }, { "epoch": 0.5691496552656482, "grad_norm": 0.7506368160247803, "learning_rate": 1.2484502313939576e-05, "loss": 0.143280029296875, "step": 8420 }, { "epoch": 0.5692172502365824, "grad_norm": 1.5787253379821777, "learning_rate": 1.2481249572271738e-05, "loss": 0.27978515625, "step": 8421 }, { "epoch": 0.5692848452075165, "grad_norm": 0.5130428671836853, "learning_rate": 1.2477996952476718e-05, "loss": 0.125885009765625, "step": 8422 }, { "epoch": 0.5693524401784508, "grad_norm": 1.440205454826355, "learning_rate": 1.2474744454711882e-05, "loss": 0.27655029296875, "step": 8423 }, { "epoch": 0.5694200351493849, "grad_norm": 1.6821379661560059, "learning_rate": 1.2471492079134618e-05, "loss": 0.303314208984375, "step": 8424 }, { "epoch": 0.569487630120319, "grad_norm": 1.1147016286849976, "learning_rate": 1.2468239825902286e-05, "loss": 0.2994384765625, "step": 8425 }, { "epoch": 0.5695552250912532, "grad_norm": 0.9242891073226929, "learning_rate": 1.2464987695172267e-05, "loss": 0.1313934326171875, "step": 8426 }, { "epoch": 0.5696228200621873, "grad_norm": 2.7045085430145264, "learning_rate": 1.2461735687101902e-05, "loss": 0.40277099609375, "step": 8427 }, { "epoch": 0.5696904150331216, "grad_norm": 1.697556495666504, "learning_rate": 1.2458483801848547e-05, "loss": 0.20965576171875, "step": 8428 }, { "epoch": 0.5697580100040557, "grad_norm": 1.2812753915786743, "learning_rate": 1.2455232039569557e-05, "loss": 0.2628173828125, "step": 8429 }, { "epoch": 0.5698256049749899, "grad_norm": 0.843357503414154, "learning_rate": 1.2451980400422262e-05, "loss": 0.1499786376953125, "step": 8430 }, { "epoch": 0.569893199945924, "grad_norm": 1.3126193284988403, "learning_rate": 1.2448728884564003e-05, "loss": 0.227447509765625, "step": 8431 }, { "epoch": 0.5699607949168581, "grad_norm": 1.715436577796936, "learning_rate": 1.2445477492152104e-05, "loss": 0.2252349853515625, "step": 8432 }, { "epoch": 0.5700283898877924, "grad_norm": 1.5109328031539917, "learning_rate": 1.2442226223343896e-05, "loss": 0.290374755859375, "step": 8433 }, { "epoch": 0.5700959848587265, "grad_norm": 1.4817713499069214, "learning_rate": 1.2438975078296685e-05, "loss": 0.2576446533203125, "step": 8434 }, { "epoch": 0.5701635798296607, "grad_norm": 1.0068414211273193, "learning_rate": 1.243572405716779e-05, "loss": 0.207733154296875, "step": 8435 }, { "epoch": 0.5702311748005948, "grad_norm": 1.242130160331726, "learning_rate": 1.2432473160114507e-05, "loss": 0.160369873046875, "step": 8436 }, { "epoch": 0.570298769771529, "grad_norm": 1.3883296251296997, "learning_rate": 1.2429222387294146e-05, "loss": 0.275390625, "step": 8437 }, { "epoch": 0.5703663647424632, "grad_norm": 1.7525197267532349, "learning_rate": 1.2425971738863996e-05, "loss": 0.273651123046875, "step": 8438 }, { "epoch": 0.5704339597133973, "grad_norm": 1.1605119705200195, "learning_rate": 1.2422721214981334e-05, "loss": 0.2481689453125, "step": 8439 }, { "epoch": 0.5705015546843315, "grad_norm": 0.9615430235862732, "learning_rate": 1.2419470815803449e-05, "loss": 0.18129730224609375, "step": 8440 }, { "epoch": 0.5705691496552656, "grad_norm": 1.6320269107818604, "learning_rate": 1.2416220541487612e-05, "loss": 0.3382415771484375, "step": 8441 }, { "epoch": 0.5706367446261998, "grad_norm": 2.01466703414917, "learning_rate": 1.2412970392191099e-05, "loss": 0.257049560546875, "step": 8442 }, { "epoch": 0.570704339597134, "grad_norm": 0.8109310269355774, "learning_rate": 1.2409720368071161e-05, "loss": 0.186920166015625, "step": 8443 }, { "epoch": 0.5707719345680682, "grad_norm": 1.8089241981506348, "learning_rate": 1.2406470469285064e-05, "loss": 0.2301025390625, "step": 8444 }, { "epoch": 0.5708395295390023, "grad_norm": 1.330124020576477, "learning_rate": 1.2403220695990052e-05, "loss": 0.293487548828125, "step": 8445 }, { "epoch": 0.5709071245099364, "grad_norm": 1.0770961046218872, "learning_rate": 1.2399971048343378e-05, "loss": 0.19610595703125, "step": 8446 }, { "epoch": 0.5709747194808706, "grad_norm": 1.2647953033447266, "learning_rate": 1.2396721526502268e-05, "loss": 0.275390625, "step": 8447 }, { "epoch": 0.5710423144518048, "grad_norm": 1.2370637655258179, "learning_rate": 1.2393472130623962e-05, "loss": 0.28369140625, "step": 8448 }, { "epoch": 0.571109909422739, "grad_norm": 1.0318788290023804, "learning_rate": 1.2390222860865689e-05, "loss": 0.2445831298828125, "step": 8449 }, { "epoch": 0.5711775043936731, "grad_norm": 0.7777938842773438, "learning_rate": 1.2386973717384656e-05, "loss": 0.1925048828125, "step": 8450 }, { "epoch": 0.5712450993646073, "grad_norm": 1.6833882331848145, "learning_rate": 1.2383724700338091e-05, "loss": 0.278106689453125, "step": 8451 }, { "epoch": 0.5713126943355414, "grad_norm": 1.5442181825637817, "learning_rate": 1.2380475809883191e-05, "loss": 0.273040771484375, "step": 8452 }, { "epoch": 0.5713802893064756, "grad_norm": 1.0729196071624756, "learning_rate": 1.2377227046177163e-05, "loss": 0.2102508544921875, "step": 8453 }, { "epoch": 0.5714478842774098, "grad_norm": 1.4155962467193604, "learning_rate": 1.23739784093772e-05, "loss": 0.245361328125, "step": 8454 }, { "epoch": 0.5715154792483439, "grad_norm": 2.075653314590454, "learning_rate": 1.2370729899640498e-05, "loss": 0.3122406005859375, "step": 8455 }, { "epoch": 0.5715830742192781, "grad_norm": 0.9746745228767395, "learning_rate": 1.2367481517124229e-05, "loss": 0.226165771484375, "step": 8456 }, { "epoch": 0.5716506691902122, "grad_norm": 1.1666184663772583, "learning_rate": 1.2364233261985577e-05, "loss": 0.307159423828125, "step": 8457 }, { "epoch": 0.5717182641611465, "grad_norm": 1.1392649412155151, "learning_rate": 1.236098513438171e-05, "loss": 0.1480560302734375, "step": 8458 }, { "epoch": 0.5717858591320806, "grad_norm": 1.0427688360214233, "learning_rate": 1.2357737134469802e-05, "loss": 0.234619140625, "step": 8459 }, { "epoch": 0.5718534541030147, "grad_norm": 2.0008466243743896, "learning_rate": 1.2354489262406994e-05, "loss": 0.3731689453125, "step": 8460 }, { "epoch": 0.5719210490739489, "grad_norm": 0.526665985584259, "learning_rate": 1.2351241518350454e-05, "loss": 0.1122283935546875, "step": 8461 }, { "epoch": 0.571988644044883, "grad_norm": 1.4609065055847168, "learning_rate": 1.2347993902457322e-05, "loss": 0.233123779296875, "step": 8462 }, { "epoch": 0.5720562390158173, "grad_norm": 0.6294900178909302, "learning_rate": 1.2344746414884733e-05, "loss": 0.1186981201171875, "step": 8463 }, { "epoch": 0.5721238339867514, "grad_norm": 1.1141316890716553, "learning_rate": 1.2341499055789826e-05, "loss": 0.233734130859375, "step": 8464 }, { "epoch": 0.5721914289576856, "grad_norm": 1.2203834056854248, "learning_rate": 1.2338251825329728e-05, "loss": 0.2075958251953125, "step": 8465 }, { "epoch": 0.5722590239286197, "grad_norm": 1.7816685438156128, "learning_rate": 1.2335004723661564e-05, "loss": 0.335357666015625, "step": 8466 }, { "epoch": 0.5723266188995538, "grad_norm": 1.1563177108764648, "learning_rate": 1.233175775094244e-05, "loss": 0.15512847900390625, "step": 8467 }, { "epoch": 0.5723942138704881, "grad_norm": 1.3034772872924805, "learning_rate": 1.2328510907329475e-05, "loss": 0.2305908203125, "step": 8468 }, { "epoch": 0.5724618088414222, "grad_norm": 1.3538590669631958, "learning_rate": 1.232526419297976e-05, "loss": 0.2301788330078125, "step": 8469 }, { "epoch": 0.5725294038123564, "grad_norm": 0.6799721717834473, "learning_rate": 1.23220176080504e-05, "loss": 0.1852569580078125, "step": 8470 }, { "epoch": 0.5725969987832905, "grad_norm": 1.466444492340088, "learning_rate": 1.231877115269848e-05, "loss": 0.28167724609375, "step": 8471 }, { "epoch": 0.5726645937542247, "grad_norm": 0.9712956547737122, "learning_rate": 1.2315524827081093e-05, "loss": 0.16058349609375, "step": 8472 }, { "epoch": 0.5727321887251589, "grad_norm": 1.0127527713775635, "learning_rate": 1.2312278631355305e-05, "loss": 0.189971923828125, "step": 8473 }, { "epoch": 0.572799783696093, "grad_norm": 1.090070128440857, "learning_rate": 1.2309032565678188e-05, "loss": 0.223602294921875, "step": 8474 }, { "epoch": 0.5728673786670272, "grad_norm": 1.1658107042312622, "learning_rate": 1.230578663020682e-05, "loss": 0.265380859375, "step": 8475 }, { "epoch": 0.5729349736379613, "grad_norm": 1.126978874206543, "learning_rate": 1.230254082509824e-05, "loss": 0.283721923828125, "step": 8476 }, { "epoch": 0.5730025686088955, "grad_norm": 1.3379542827606201, "learning_rate": 1.2299295150509514e-05, "loss": 0.25872802734375, "step": 8477 }, { "epoch": 0.5730701635798297, "grad_norm": 1.45326828956604, "learning_rate": 1.2296049606597681e-05, "loss": 0.27020263671875, "step": 8478 }, { "epoch": 0.5731377585507639, "grad_norm": 1.6683411598205566, "learning_rate": 1.229280419351979e-05, "loss": 0.2525634765625, "step": 8479 }, { "epoch": 0.573205353521698, "grad_norm": 0.960355818271637, "learning_rate": 1.2289558911432862e-05, "loss": 0.21044921875, "step": 8480 }, { "epoch": 0.5732729484926321, "grad_norm": 1.3911612033843994, "learning_rate": 1.2286313760493934e-05, "loss": 0.2570037841796875, "step": 8481 }, { "epoch": 0.5733405434635663, "grad_norm": 1.5024362802505493, "learning_rate": 1.2283068740860017e-05, "loss": 0.320159912109375, "step": 8482 }, { "epoch": 0.5734081384345004, "grad_norm": 0.9241510629653931, "learning_rate": 1.2279823852688141e-05, "loss": 0.17852783203125, "step": 8483 }, { "epoch": 0.5734757334054347, "grad_norm": 1.3982352018356323, "learning_rate": 1.22765790961353e-05, "loss": 0.30523681640625, "step": 8484 }, { "epoch": 0.5735433283763688, "grad_norm": 0.9994831085205078, "learning_rate": 1.2273334471358493e-05, "loss": 0.26837158203125, "step": 8485 }, { "epoch": 0.573610923347303, "grad_norm": 1.2207471132278442, "learning_rate": 1.2270089978514727e-05, "loss": 0.32476806640625, "step": 8486 }, { "epoch": 0.5736785183182371, "grad_norm": 1.1363409757614136, "learning_rate": 1.2266845617760978e-05, "loss": 0.1222686767578125, "step": 8487 }, { "epoch": 0.5737461132891712, "grad_norm": 0.8602338433265686, "learning_rate": 1.2263601389254247e-05, "loss": 0.18927001953125, "step": 8488 }, { "epoch": 0.5738137082601055, "grad_norm": 1.1141971349716187, "learning_rate": 1.2260357293151487e-05, "loss": 0.25787353515625, "step": 8489 }, { "epoch": 0.5738813032310396, "grad_norm": 1.6059664487838745, "learning_rate": 1.2257113329609684e-05, "loss": 0.2452392578125, "step": 8490 }, { "epoch": 0.5739488982019738, "grad_norm": 1.5386909246444702, "learning_rate": 1.2253869498785791e-05, "loss": 0.260467529296875, "step": 8491 }, { "epoch": 0.5740164931729079, "grad_norm": 0.9990387558937073, "learning_rate": 1.225062580083678e-05, "loss": 0.2219085693359375, "step": 8492 }, { "epoch": 0.5740840881438422, "grad_norm": 1.0492627620697021, "learning_rate": 1.2247382235919582e-05, "loss": 0.29364013671875, "step": 8493 }, { "epoch": 0.5741516831147763, "grad_norm": 1.4373029470443726, "learning_rate": 1.2244138804191152e-05, "loss": 0.33148193359375, "step": 8494 }, { "epoch": 0.5742192780857104, "grad_norm": 1.2329553365707397, "learning_rate": 1.2240895505808423e-05, "loss": 0.180908203125, "step": 8495 }, { "epoch": 0.5742868730566446, "grad_norm": 1.9878208637237549, "learning_rate": 1.2237652340928334e-05, "loss": 0.3074951171875, "step": 8496 }, { "epoch": 0.5743544680275787, "grad_norm": 0.8204700350761414, "learning_rate": 1.2234409309707806e-05, "loss": 0.16762542724609375, "step": 8497 }, { "epoch": 0.574422062998513, "grad_norm": 0.8341597318649292, "learning_rate": 1.2231166412303746e-05, "loss": 0.1571044921875, "step": 8498 }, { "epoch": 0.5744896579694471, "grad_norm": 1.093239665031433, "learning_rate": 1.2227923648873077e-05, "loss": 0.272674560546875, "step": 8499 }, { "epoch": 0.5745572529403813, "grad_norm": 1.5396100282669067, "learning_rate": 1.2224681019572698e-05, "loss": 0.209259033203125, "step": 8500 }, { "epoch": 0.5746248479113154, "grad_norm": 1.2847843170166016, "learning_rate": 1.2221438524559519e-05, "loss": 0.30938720703125, "step": 8501 }, { "epoch": 0.5746924428822495, "grad_norm": 0.9700186252593994, "learning_rate": 1.2218196163990415e-05, "loss": 0.26129150390625, "step": 8502 }, { "epoch": 0.5747600378531837, "grad_norm": 1.0989139080047607, "learning_rate": 1.2214953938022286e-05, "loss": 0.2215576171875, "step": 8503 }, { "epoch": 0.5748276328241179, "grad_norm": 0.9839774370193481, "learning_rate": 1.2211711846812e-05, "loss": 0.202911376953125, "step": 8504 }, { "epoch": 0.5748952277950521, "grad_norm": 1.1670318841934204, "learning_rate": 1.2208469890516444e-05, "loss": 0.271728515625, "step": 8505 }, { "epoch": 0.5749628227659862, "grad_norm": 1.7063826322555542, "learning_rate": 1.2205228069292467e-05, "loss": 0.283294677734375, "step": 8506 }, { "epoch": 0.5750304177369203, "grad_norm": 1.083051085472107, "learning_rate": 1.220198638329694e-05, "loss": 0.242523193359375, "step": 8507 }, { "epoch": 0.5750980127078545, "grad_norm": 0.9101759791374207, "learning_rate": 1.2198744832686717e-05, "loss": 0.231842041015625, "step": 8508 }, { "epoch": 0.5751656076787887, "grad_norm": 1.099847435951233, "learning_rate": 1.2195503417618631e-05, "loss": 0.239166259765625, "step": 8509 }, { "epoch": 0.5752332026497229, "grad_norm": 0.7408796548843384, "learning_rate": 1.2192262138249536e-05, "loss": 0.2042388916015625, "step": 8510 }, { "epoch": 0.575300797620657, "grad_norm": 1.223517894744873, "learning_rate": 1.2189020994736255e-05, "loss": 0.268798828125, "step": 8511 }, { "epoch": 0.5753683925915912, "grad_norm": 1.1054922342300415, "learning_rate": 1.2185779987235627e-05, "loss": 0.22821044921875, "step": 8512 }, { "epoch": 0.5754359875625253, "grad_norm": 1.3663140535354614, "learning_rate": 1.2182539115904459e-05, "loss": 0.2037353515625, "step": 8513 }, { "epoch": 0.5755035825334595, "grad_norm": 1.1722311973571777, "learning_rate": 1.2179298380899574e-05, "loss": 0.2131500244140625, "step": 8514 }, { "epoch": 0.5755711775043937, "grad_norm": 1.0069016218185425, "learning_rate": 1.2176057782377771e-05, "loss": 0.177703857421875, "step": 8515 }, { "epoch": 0.5756387724753278, "grad_norm": 1.278513789176941, "learning_rate": 1.2172817320495857e-05, "loss": 0.189178466796875, "step": 8516 }, { "epoch": 0.575706367446262, "grad_norm": 1.675653338432312, "learning_rate": 1.216957699541062e-05, "loss": 0.261749267578125, "step": 8517 }, { "epoch": 0.5757739624171961, "grad_norm": 0.9344409704208374, "learning_rate": 1.2166336807278856e-05, "loss": 0.224395751953125, "step": 8518 }, { "epoch": 0.5758415573881304, "grad_norm": 1.331440806388855, "learning_rate": 1.2163096756257336e-05, "loss": 0.2095947265625, "step": 8519 }, { "epoch": 0.5759091523590645, "grad_norm": 0.8873569965362549, "learning_rate": 1.2159856842502835e-05, "loss": 0.17315673828125, "step": 8520 }, { "epoch": 0.5759767473299986, "grad_norm": 0.8341138958930969, "learning_rate": 1.2156617066172128e-05, "loss": 0.14264678955078125, "step": 8521 }, { "epoch": 0.5760443423009328, "grad_norm": 1.5054608583450317, "learning_rate": 1.2153377427421965e-05, "loss": 0.174224853515625, "step": 8522 }, { "epoch": 0.5761119372718669, "grad_norm": 1.486326813697815, "learning_rate": 1.2150137926409108e-05, "loss": 0.2401123046875, "step": 8523 }, { "epoch": 0.5761795322428012, "grad_norm": 1.0276719331741333, "learning_rate": 1.2146898563290298e-05, "loss": 0.1835174560546875, "step": 8524 }, { "epoch": 0.5762471272137353, "grad_norm": 1.3706151247024536, "learning_rate": 1.2143659338222285e-05, "loss": 0.267547607421875, "step": 8525 }, { "epoch": 0.5763147221846695, "grad_norm": 1.0424860715866089, "learning_rate": 1.214042025136179e-05, "loss": 0.155181884765625, "step": 8526 }, { "epoch": 0.5763823171556036, "grad_norm": 1.3555208444595337, "learning_rate": 1.213718130286555e-05, "loss": 0.31060791015625, "step": 8527 }, { "epoch": 0.5764499121265377, "grad_norm": 1.1701034307479858, "learning_rate": 1.213394249289028e-05, "loss": 0.164947509765625, "step": 8528 }, { "epoch": 0.576517507097472, "grad_norm": 1.46163809299469, "learning_rate": 1.2130703821592701e-05, "loss": 0.289215087890625, "step": 8529 }, { "epoch": 0.5765851020684061, "grad_norm": 2.0303843021392822, "learning_rate": 1.2127465289129509e-05, "loss": 0.27752685546875, "step": 8530 }, { "epoch": 0.5766526970393403, "grad_norm": 1.0454952716827393, "learning_rate": 1.2124226895657418e-05, "loss": 0.2115478515625, "step": 8531 }, { "epoch": 0.5767202920102744, "grad_norm": 2.0310587882995605, "learning_rate": 1.212098864133311e-05, "loss": 0.38055419921875, "step": 8532 }, { "epoch": 0.5767878869812086, "grad_norm": 1.138189673423767, "learning_rate": 1.2117750526313272e-05, "loss": 0.23223876953125, "step": 8533 }, { "epoch": 0.5768554819521428, "grad_norm": 0.9262678027153015, "learning_rate": 1.2114512550754597e-05, "loss": 0.1729888916015625, "step": 8534 }, { "epoch": 0.5769230769230769, "grad_norm": 1.4039121866226196, "learning_rate": 1.2111274714813741e-05, "loss": 0.2490692138671875, "step": 8535 }, { "epoch": 0.5769906718940111, "grad_norm": 2.217364549636841, "learning_rate": 1.2108037018647382e-05, "loss": 0.3173828125, "step": 8536 }, { "epoch": 0.5770582668649452, "grad_norm": 0.9382442235946655, "learning_rate": 1.2104799462412174e-05, "loss": 0.19586181640625, "step": 8537 }, { "epoch": 0.5771258618358794, "grad_norm": 1.311828851699829, "learning_rate": 1.210156204626478e-05, "loss": 0.2132568359375, "step": 8538 }, { "epoch": 0.5771934568068136, "grad_norm": 1.4182380437850952, "learning_rate": 1.2098324770361834e-05, "loss": 0.291168212890625, "step": 8539 }, { "epoch": 0.5772610517777478, "grad_norm": 0.9309943318367004, "learning_rate": 1.2095087634859983e-05, "loss": 0.170867919921875, "step": 8540 }, { "epoch": 0.5773286467486819, "grad_norm": 1.512079119682312, "learning_rate": 1.2091850639915853e-05, "loss": 0.266448974609375, "step": 8541 }, { "epoch": 0.577396241719616, "grad_norm": 1.7364259958267212, "learning_rate": 1.2088613785686083e-05, "loss": 0.34698486328125, "step": 8542 }, { "epoch": 0.5774638366905502, "grad_norm": 0.9753556847572327, "learning_rate": 1.2085377072327283e-05, "loss": 0.1676483154296875, "step": 8543 }, { "epoch": 0.5775314316614844, "grad_norm": 0.7315055727958679, "learning_rate": 1.208214049999606e-05, "loss": 0.179931640625, "step": 8544 }, { "epoch": 0.5775990266324186, "grad_norm": 1.1027897596359253, "learning_rate": 1.2078904068849028e-05, "loss": 0.29193115234375, "step": 8545 }, { "epoch": 0.5776666216033527, "grad_norm": 1.6247597932815552, "learning_rate": 1.2075667779042781e-05, "loss": 0.2303009033203125, "step": 8546 }, { "epoch": 0.5777342165742869, "grad_norm": 0.9462374448776245, "learning_rate": 1.2072431630733919e-05, "loss": 0.198394775390625, "step": 8547 }, { "epoch": 0.577801811545221, "grad_norm": 1.3567386865615845, "learning_rate": 1.2069195624079016e-05, "loss": 0.236083984375, "step": 8548 }, { "epoch": 0.5778694065161551, "grad_norm": 1.3027931451797485, "learning_rate": 1.2065959759234657e-05, "loss": 0.1659088134765625, "step": 8549 }, { "epoch": 0.5779370014870894, "grad_norm": 0.8066061735153198, "learning_rate": 1.2062724036357406e-05, "loss": 0.201416015625, "step": 8550 }, { "epoch": 0.5780045964580235, "grad_norm": 1.3718552589416504, "learning_rate": 1.2059488455603844e-05, "loss": 0.284942626953125, "step": 8551 }, { "epoch": 0.5780721914289577, "grad_norm": 1.7607148885726929, "learning_rate": 1.205625301713051e-05, "loss": 0.20428466796875, "step": 8552 }, { "epoch": 0.5781397863998918, "grad_norm": 1.6983541250228882, "learning_rate": 1.205301772109396e-05, "loss": 0.31719970703125, "step": 8553 }, { "epoch": 0.5782073813708261, "grad_norm": 1.34561288356781, "learning_rate": 1.2049782567650748e-05, "loss": 0.25885009765625, "step": 8554 }, { "epoch": 0.5782749763417602, "grad_norm": 0.9164870977401733, "learning_rate": 1.2046547556957394e-05, "loss": 0.16876220703125, "step": 8555 }, { "epoch": 0.5783425713126943, "grad_norm": 0.7350897789001465, "learning_rate": 1.2043312689170443e-05, "loss": 0.17706680297851562, "step": 8556 }, { "epoch": 0.5784101662836285, "grad_norm": 1.0546529293060303, "learning_rate": 1.2040077964446405e-05, "loss": 0.1938934326171875, "step": 8557 }, { "epoch": 0.5784777612545626, "grad_norm": 1.9146062135696411, "learning_rate": 1.203684338294181e-05, "loss": 0.315032958984375, "step": 8558 }, { "epoch": 0.5785453562254969, "grad_norm": 1.146517276763916, "learning_rate": 1.2033608944813155e-05, "loss": 0.20672607421875, "step": 8559 }, { "epoch": 0.578612951196431, "grad_norm": 0.735661506652832, "learning_rate": 1.203037465021695e-05, "loss": 0.1262359619140625, "step": 8560 }, { "epoch": 0.5786805461673652, "grad_norm": 1.6959306001663208, "learning_rate": 1.2027140499309685e-05, "loss": 0.32855224609375, "step": 8561 }, { "epoch": 0.5787481411382993, "grad_norm": 1.0554931163787842, "learning_rate": 1.202390649224785e-05, "loss": 0.2237396240234375, "step": 8562 }, { "epoch": 0.5788157361092334, "grad_norm": 1.3502073287963867, "learning_rate": 1.2020672629187927e-05, "loss": 0.320770263671875, "step": 8563 }, { "epoch": 0.5788833310801677, "grad_norm": 1.1134308576583862, "learning_rate": 1.2017438910286395e-05, "loss": 0.259368896484375, "step": 8564 }, { "epoch": 0.5789509260511018, "grad_norm": 1.5540642738342285, "learning_rate": 1.2014205335699709e-05, "loss": 0.240753173828125, "step": 8565 }, { "epoch": 0.579018521022036, "grad_norm": 1.0436720848083496, "learning_rate": 1.2010971905584343e-05, "loss": 0.1829681396484375, "step": 8566 }, { "epoch": 0.5790861159929701, "grad_norm": 0.7928972840309143, "learning_rate": 1.2007738620096748e-05, "loss": 0.12580108642578125, "step": 8567 }, { "epoch": 0.5791537109639043, "grad_norm": 0.878368079662323, "learning_rate": 1.2004505479393359e-05, "loss": 0.236663818359375, "step": 8568 }, { "epoch": 0.5792213059348384, "grad_norm": 0.9971000552177429, "learning_rate": 1.200127248363063e-05, "loss": 0.1869354248046875, "step": 8569 }, { "epoch": 0.5792889009057726, "grad_norm": 0.5661313533782959, "learning_rate": 1.1998039632964981e-05, "loss": 0.11685562133789062, "step": 8570 }, { "epoch": 0.5793564958767068, "grad_norm": 1.5927445888519287, "learning_rate": 1.1994806927552852e-05, "loss": 0.2708740234375, "step": 8571 }, { "epoch": 0.5794240908476409, "grad_norm": 1.7429814338684082, "learning_rate": 1.1991574367550645e-05, "loss": 0.29156494140625, "step": 8572 }, { "epoch": 0.5794916858185751, "grad_norm": 1.5115605592727661, "learning_rate": 1.1988341953114783e-05, "loss": 0.23895263671875, "step": 8573 }, { "epoch": 0.5795592807895092, "grad_norm": 0.8708714246749878, "learning_rate": 1.1985109684401662e-05, "loss": 0.210968017578125, "step": 8574 }, { "epoch": 0.5796268757604435, "grad_norm": 1.190146565437317, "learning_rate": 1.1981877561567692e-05, "loss": 0.30487060546875, "step": 8575 }, { "epoch": 0.5796944707313776, "grad_norm": 1.0862793922424316, "learning_rate": 1.1978645584769248e-05, "loss": 0.198089599609375, "step": 8576 }, { "epoch": 0.5797620657023117, "grad_norm": 0.9980295300483704, "learning_rate": 1.1975413754162728e-05, "loss": 0.236419677734375, "step": 8577 }, { "epoch": 0.5798296606732459, "grad_norm": 0.8561152815818787, "learning_rate": 1.1972182069904495e-05, "loss": 0.18243408203125, "step": 8578 }, { "epoch": 0.57989725564418, "grad_norm": 1.2019156217575073, "learning_rate": 1.196895053215092e-05, "loss": 0.214874267578125, "step": 8579 }, { "epoch": 0.5799648506151143, "grad_norm": 1.7956651449203491, "learning_rate": 1.1965719141058377e-05, "loss": 0.3082275390625, "step": 8580 }, { "epoch": 0.5800324455860484, "grad_norm": 1.8269603252410889, "learning_rate": 1.1962487896783204e-05, "loss": 0.294342041015625, "step": 8581 }, { "epoch": 0.5801000405569826, "grad_norm": 1.1158596277236938, "learning_rate": 1.1959256799481762e-05, "loss": 0.20361328125, "step": 8582 }, { "epoch": 0.5801676355279167, "grad_norm": 2.5054376125335693, "learning_rate": 1.195602584931038e-05, "loss": 0.31793212890625, "step": 8583 }, { "epoch": 0.5802352304988508, "grad_norm": 0.8371723890304565, "learning_rate": 1.1952795046425405e-05, "loss": 0.1572113037109375, "step": 8584 }, { "epoch": 0.5803028254697851, "grad_norm": 1.012298345565796, "learning_rate": 1.1949564390983149e-05, "loss": 0.238494873046875, "step": 8585 }, { "epoch": 0.5803704204407192, "grad_norm": 1.2063924074172974, "learning_rate": 1.1946333883139942e-05, "loss": 0.22735595703125, "step": 8586 }, { "epoch": 0.5804380154116534, "grad_norm": 1.1724638938903809, "learning_rate": 1.194310352305209e-05, "loss": 0.26776123046875, "step": 8587 }, { "epoch": 0.5805056103825875, "grad_norm": 1.0795624256134033, "learning_rate": 1.1939873310875906e-05, "loss": 0.2818603515625, "step": 8588 }, { "epoch": 0.5805732053535217, "grad_norm": 1.5571848154067993, "learning_rate": 1.193664324676768e-05, "loss": 0.28399658203125, "step": 8589 }, { "epoch": 0.5806408003244559, "grad_norm": 1.0039318799972534, "learning_rate": 1.19334133308837e-05, "loss": 0.217742919921875, "step": 8590 }, { "epoch": 0.58070839529539, "grad_norm": 1.2692840099334717, "learning_rate": 1.1930183563380258e-05, "loss": 0.27862548828125, "step": 8591 }, { "epoch": 0.5807759902663242, "grad_norm": 1.1959179639816284, "learning_rate": 1.192695394441362e-05, "loss": 0.1995849609375, "step": 8592 }, { "epoch": 0.5808435852372583, "grad_norm": 1.1268326044082642, "learning_rate": 1.1923724474140072e-05, "loss": 0.19158935546875, "step": 8593 }, { "epoch": 0.5809111802081925, "grad_norm": 1.5406686067581177, "learning_rate": 1.1920495152715856e-05, "loss": 0.2861328125, "step": 8594 }, { "epoch": 0.5809787751791267, "grad_norm": 1.4444363117218018, "learning_rate": 1.1917265980297238e-05, "loss": 0.2064971923828125, "step": 8595 }, { "epoch": 0.5810463701500609, "grad_norm": 1.4290262460708618, "learning_rate": 1.1914036957040462e-05, "loss": 0.186614990234375, "step": 8596 }, { "epoch": 0.581113965120995, "grad_norm": 1.1699581146240234, "learning_rate": 1.1910808083101775e-05, "loss": 0.12069320678710938, "step": 8597 }, { "epoch": 0.5811815600919291, "grad_norm": 1.5666059255599976, "learning_rate": 1.1907579358637399e-05, "loss": 0.25335693359375, "step": 8598 }, { "epoch": 0.5812491550628633, "grad_norm": 1.1605746746063232, "learning_rate": 1.1904350783803568e-05, "loss": 0.19344329833984375, "step": 8599 }, { "epoch": 0.5813167500337975, "grad_norm": 2.637993335723877, "learning_rate": 1.1901122358756494e-05, "loss": 0.32220458984375, "step": 8600 }, { "epoch": 0.5813843450047317, "grad_norm": 1.0642226934432983, "learning_rate": 1.1897894083652398e-05, "loss": 0.26025390625, "step": 8601 }, { "epoch": 0.5814519399756658, "grad_norm": 0.8763611316680908, "learning_rate": 1.1894665958647476e-05, "loss": 0.079681396484375, "step": 8602 }, { "epoch": 0.5815195349466, "grad_norm": 1.6435619592666626, "learning_rate": 1.1891437983897922e-05, "loss": 0.250244140625, "step": 8603 }, { "epoch": 0.5815871299175341, "grad_norm": 0.7395709753036499, "learning_rate": 1.1888210159559938e-05, "loss": 0.11217498779296875, "step": 8604 }, { "epoch": 0.5816547248884683, "grad_norm": 0.6473127007484436, "learning_rate": 1.1884982485789692e-05, "loss": 0.1394195556640625, "step": 8605 }, { "epoch": 0.5817223198594025, "grad_norm": 2.991299629211426, "learning_rate": 1.188175496274337e-05, "loss": 0.2723541259765625, "step": 8606 }, { "epoch": 0.5817899148303366, "grad_norm": 1.2483309507369995, "learning_rate": 1.1878527590577131e-05, "loss": 0.27569580078125, "step": 8607 }, { "epoch": 0.5818575098012708, "grad_norm": 1.424967885017395, "learning_rate": 1.1875300369447141e-05, "loss": 0.265716552734375, "step": 8608 }, { "epoch": 0.5819251047722049, "grad_norm": 1.1344114542007446, "learning_rate": 1.1872073299509548e-05, "loss": 0.1912689208984375, "step": 8609 }, { "epoch": 0.5819926997431392, "grad_norm": 1.194523811340332, "learning_rate": 1.186884638092051e-05, "loss": 0.195465087890625, "step": 8610 }, { "epoch": 0.5820602947140733, "grad_norm": 1.616773009300232, "learning_rate": 1.1865619613836148e-05, "loss": 0.2320556640625, "step": 8611 }, { "epoch": 0.5821278896850074, "grad_norm": 0.762999415397644, "learning_rate": 1.1862392998412605e-05, "loss": 0.10297775268554688, "step": 8612 }, { "epoch": 0.5821954846559416, "grad_norm": 1.3067505359649658, "learning_rate": 1.1859166534806005e-05, "loss": 0.298980712890625, "step": 8613 }, { "epoch": 0.5822630796268757, "grad_norm": 2.5059401988983154, "learning_rate": 1.1855940223172454e-05, "loss": 0.30340576171875, "step": 8614 }, { "epoch": 0.58233067459781, "grad_norm": 2.6509597301483154, "learning_rate": 1.1852714063668073e-05, "loss": 0.287139892578125, "step": 8615 }, { "epoch": 0.5823982695687441, "grad_norm": 1.2976118326187134, "learning_rate": 1.1849488056448952e-05, "loss": 0.22711181640625, "step": 8616 }, { "epoch": 0.5824658645396783, "grad_norm": 0.974632203578949, "learning_rate": 1.1846262201671199e-05, "loss": 0.168731689453125, "step": 8617 }, { "epoch": 0.5825334595106124, "grad_norm": 0.8288564682006836, "learning_rate": 1.1843036499490887e-05, "loss": 0.1983642578125, "step": 8618 }, { "epoch": 0.5826010544815465, "grad_norm": 1.7451767921447754, "learning_rate": 1.183981095006411e-05, "loss": 0.31109619140625, "step": 8619 }, { "epoch": 0.5826686494524808, "grad_norm": 1.5783699750900269, "learning_rate": 1.183658555354692e-05, "loss": 0.2916259765625, "step": 8620 }, { "epoch": 0.5827362444234149, "grad_norm": 1.7120453119277954, "learning_rate": 1.1833360310095406e-05, "loss": 0.2615966796875, "step": 8621 }, { "epoch": 0.5828038393943491, "grad_norm": 1.1393934488296509, "learning_rate": 1.1830135219865605e-05, "loss": 0.234283447265625, "step": 8622 }, { "epoch": 0.5828714343652832, "grad_norm": 1.5047861337661743, "learning_rate": 1.1826910283013583e-05, "loss": 0.24676513671875, "step": 8623 }, { "epoch": 0.5829390293362174, "grad_norm": 1.873436689376831, "learning_rate": 1.1823685499695368e-05, "loss": 0.30950927734375, "step": 8624 }, { "epoch": 0.5830066243071516, "grad_norm": 0.9441726207733154, "learning_rate": 1.1820460870067006e-05, "loss": 0.24432373046875, "step": 8625 }, { "epoch": 0.5830742192780857, "grad_norm": 1.3398982286453247, "learning_rate": 1.1817236394284522e-05, "loss": 0.268341064453125, "step": 8626 }, { "epoch": 0.5831418142490199, "grad_norm": 1.2861456871032715, "learning_rate": 1.181401207250393e-05, "loss": 0.33660888671875, "step": 8627 }, { "epoch": 0.583209409219954, "grad_norm": 1.0675132274627686, "learning_rate": 1.1810787904881249e-05, "loss": 0.2099609375, "step": 8628 }, { "epoch": 0.5832770041908882, "grad_norm": 0.9901946783065796, "learning_rate": 1.180756389157248e-05, "loss": 0.1988677978515625, "step": 8629 }, { "epoch": 0.5833445991618224, "grad_norm": 1.018497109413147, "learning_rate": 1.180434003273363e-05, "loss": 0.226287841796875, "step": 8630 }, { "epoch": 0.5834121941327565, "grad_norm": 0.9720938801765442, "learning_rate": 1.1801116328520677e-05, "loss": 0.25372314453125, "step": 8631 }, { "epoch": 0.5834797891036907, "grad_norm": 0.8894199728965759, "learning_rate": 1.1797892779089611e-05, "loss": 0.19602203369140625, "step": 8632 }, { "epoch": 0.5835473840746248, "grad_norm": 1.304927945137024, "learning_rate": 1.1794669384596405e-05, "loss": 0.21246337890625, "step": 8633 }, { "epoch": 0.583614979045559, "grad_norm": 0.9422487616539001, "learning_rate": 1.1791446145197034e-05, "loss": 0.15511322021484375, "step": 8634 }, { "epoch": 0.5836825740164931, "grad_norm": 1.0131844282150269, "learning_rate": 1.1788223061047445e-05, "loss": 0.187591552734375, "step": 8635 }, { "epoch": 0.5837501689874274, "grad_norm": 0.9115725159645081, "learning_rate": 1.1785000132303602e-05, "loss": 0.246429443359375, "step": 8636 }, { "epoch": 0.5838177639583615, "grad_norm": 1.0917506217956543, "learning_rate": 1.1781777359121446e-05, "loss": 0.274322509765625, "step": 8637 }, { "epoch": 0.5838853589292956, "grad_norm": 0.8476849794387817, "learning_rate": 1.1778554741656912e-05, "loss": 0.17340087890625, "step": 8638 }, { "epoch": 0.5839529539002298, "grad_norm": 0.9166689515113831, "learning_rate": 1.1775332280065941e-05, "loss": 0.16756439208984375, "step": 8639 }, { "epoch": 0.584020548871164, "grad_norm": 1.1547247171401978, "learning_rate": 1.1772109974504441e-05, "loss": 0.190826416015625, "step": 8640 }, { "epoch": 0.5840881438420982, "grad_norm": 1.1510658264160156, "learning_rate": 1.1768887825128338e-05, "loss": 0.29156494140625, "step": 8641 }, { "epoch": 0.5841557388130323, "grad_norm": 1.0805569887161255, "learning_rate": 1.1765665832093531e-05, "loss": 0.263092041015625, "step": 8642 }, { "epoch": 0.5842233337839665, "grad_norm": 1.2353674173355103, "learning_rate": 1.1762443995555935e-05, "loss": 0.189453125, "step": 8643 }, { "epoch": 0.5842909287549006, "grad_norm": 1.0658156871795654, "learning_rate": 1.1759222315671423e-05, "loss": 0.175628662109375, "step": 8644 }, { "epoch": 0.5843585237258347, "grad_norm": 1.9917439222335815, "learning_rate": 1.1756000792595894e-05, "loss": 0.246368408203125, "step": 8645 }, { "epoch": 0.584426118696769, "grad_norm": 1.5794428586959839, "learning_rate": 1.1752779426485219e-05, "loss": 0.293212890625, "step": 8646 }, { "epoch": 0.5844937136677031, "grad_norm": 1.2399277687072754, "learning_rate": 1.1749558217495273e-05, "loss": 0.293670654296875, "step": 8647 }, { "epoch": 0.5845613086386373, "grad_norm": 1.200768232345581, "learning_rate": 1.1746337165781913e-05, "loss": 0.1699676513671875, "step": 8648 }, { "epoch": 0.5846289036095714, "grad_norm": 2.409402370452881, "learning_rate": 1.1743116271500993e-05, "loss": 0.34234619140625, "step": 8649 }, { "epoch": 0.5846964985805057, "grad_norm": 1.089873194694519, "learning_rate": 1.1739895534808367e-05, "loss": 0.21990966796875, "step": 8650 }, { "epoch": 0.5847640935514398, "grad_norm": 1.7377744913101196, "learning_rate": 1.1736674955859865e-05, "loss": 0.30999755859375, "step": 8651 }, { "epoch": 0.5848316885223739, "grad_norm": 1.4450035095214844, "learning_rate": 1.1733454534811329e-05, "loss": 0.254058837890625, "step": 8652 }, { "epoch": 0.5848992834933081, "grad_norm": 0.8342389464378357, "learning_rate": 1.1730234271818568e-05, "loss": 0.1817626953125, "step": 8653 }, { "epoch": 0.5849668784642422, "grad_norm": 1.4424325227737427, "learning_rate": 1.1727014167037413e-05, "loss": 0.26922607421875, "step": 8654 }, { "epoch": 0.5850344734351764, "grad_norm": 1.6369824409484863, "learning_rate": 1.1723794220623663e-05, "loss": 0.16657638549804688, "step": 8655 }, { "epoch": 0.5851020684061106, "grad_norm": 1.0708439350128174, "learning_rate": 1.1720574432733129e-05, "loss": 0.21026611328125, "step": 8656 }, { "epoch": 0.5851696633770448, "grad_norm": 1.2356168031692505, "learning_rate": 1.1717354803521593e-05, "loss": 0.15827178955078125, "step": 8657 }, { "epoch": 0.5852372583479789, "grad_norm": 1.0245498418807983, "learning_rate": 1.1714135333144849e-05, "loss": 0.1579742431640625, "step": 8658 }, { "epoch": 0.585304853318913, "grad_norm": 2.3983070850372314, "learning_rate": 1.1710916021758671e-05, "loss": 0.29315185546875, "step": 8659 }, { "epoch": 0.5853724482898472, "grad_norm": 2.0502922534942627, "learning_rate": 1.1707696869518833e-05, "loss": 0.34698486328125, "step": 8660 }, { "epoch": 0.5854400432607814, "grad_norm": 1.214535117149353, "learning_rate": 1.1704477876581095e-05, "loss": 0.210540771484375, "step": 8661 }, { "epoch": 0.5855076382317156, "grad_norm": 0.7831224799156189, "learning_rate": 1.1701259043101208e-05, "loss": 0.2090606689453125, "step": 8662 }, { "epoch": 0.5855752332026497, "grad_norm": 2.320401191711426, "learning_rate": 1.169804036923493e-05, "loss": 0.3421630859375, "step": 8663 }, { "epoch": 0.5856428281735839, "grad_norm": 1.2439037561416626, "learning_rate": 1.1694821855137985e-05, "loss": 0.2311248779296875, "step": 8664 }, { "epoch": 0.585710423144518, "grad_norm": 1.2710713148117065, "learning_rate": 1.169160350096612e-05, "loss": 0.18337249755859375, "step": 8665 }, { "epoch": 0.5857780181154522, "grad_norm": 1.7776380777359009, "learning_rate": 1.1688385306875045e-05, "loss": 0.29638671875, "step": 8666 }, { "epoch": 0.5858456130863864, "grad_norm": 2.1331233978271484, "learning_rate": 1.1685167273020492e-05, "loss": 0.340423583984375, "step": 8667 }, { "epoch": 0.5859132080573205, "grad_norm": 1.2760483026504517, "learning_rate": 1.1681949399558155e-05, "loss": 0.221038818359375, "step": 8668 }, { "epoch": 0.5859808030282547, "grad_norm": 0.9569857120513916, "learning_rate": 1.1678731686643747e-05, "loss": 0.203216552734375, "step": 8669 }, { "epoch": 0.5860483979991888, "grad_norm": 1.222282886505127, "learning_rate": 1.1675514134432948e-05, "loss": 0.202545166015625, "step": 8670 }, { "epoch": 0.5861159929701231, "grad_norm": 0.6421390175819397, "learning_rate": 1.1672296743081453e-05, "loss": 0.121429443359375, "step": 8671 }, { "epoch": 0.5861835879410572, "grad_norm": 2.0917718410491943, "learning_rate": 1.166907951274494e-05, "loss": 0.30804443359375, "step": 8672 }, { "epoch": 0.5862511829119913, "grad_norm": 1.6067899465560913, "learning_rate": 1.1665862443579068e-05, "loss": 0.24884033203125, "step": 8673 }, { "epoch": 0.5863187778829255, "grad_norm": 0.8117628693580627, "learning_rate": 1.166264553573951e-05, "loss": 0.0908050537109375, "step": 8674 }, { "epoch": 0.5863863728538596, "grad_norm": 1.1485308408737183, "learning_rate": 1.1659428789381912e-05, "loss": 0.245025634765625, "step": 8675 }, { "epoch": 0.5864539678247939, "grad_norm": 1.911790370941162, "learning_rate": 1.165621220466193e-05, "loss": 0.2698974609375, "step": 8676 }, { "epoch": 0.586521562795728, "grad_norm": 1.381845235824585, "learning_rate": 1.165299578173519e-05, "loss": 0.251312255859375, "step": 8677 }, { "epoch": 0.5865891577666622, "grad_norm": 1.5671926736831665, "learning_rate": 1.1649779520757332e-05, "loss": 0.198150634765625, "step": 8678 }, { "epoch": 0.5866567527375963, "grad_norm": 0.7959797978401184, "learning_rate": 1.1646563421883974e-05, "loss": 0.1844482421875, "step": 8679 }, { "epoch": 0.5867243477085304, "grad_norm": 1.2023972272872925, "learning_rate": 1.1643347485270736e-05, "loss": 0.170257568359375, "step": 8680 }, { "epoch": 0.5867919426794647, "grad_norm": 0.5789236426353455, "learning_rate": 1.1640131711073218e-05, "loss": 0.13426971435546875, "step": 8681 }, { "epoch": 0.5868595376503988, "grad_norm": 0.981625497341156, "learning_rate": 1.1636916099447025e-05, "loss": 0.240203857421875, "step": 8682 }, { "epoch": 0.586927132621333, "grad_norm": 1.9367399215698242, "learning_rate": 1.1633700650547746e-05, "loss": 0.320831298828125, "step": 8683 }, { "epoch": 0.5869947275922671, "grad_norm": 1.4485079050064087, "learning_rate": 1.1630485364530961e-05, "loss": 0.296875, "step": 8684 }, { "epoch": 0.5870623225632013, "grad_norm": 1.3312944173812866, "learning_rate": 1.1627270241552254e-05, "loss": 0.27239990234375, "step": 8685 }, { "epoch": 0.5871299175341355, "grad_norm": 0.9569301605224609, "learning_rate": 1.1624055281767183e-05, "loss": 0.17645263671875, "step": 8686 }, { "epoch": 0.5871975125050696, "grad_norm": 1.7480695247650146, "learning_rate": 1.1620840485331314e-05, "loss": 0.219146728515625, "step": 8687 }, { "epoch": 0.5872651074760038, "grad_norm": 1.5659935474395752, "learning_rate": 1.1617625852400194e-05, "loss": 0.172943115234375, "step": 8688 }, { "epoch": 0.5873327024469379, "grad_norm": 1.8408092260360718, "learning_rate": 1.1614411383129377e-05, "loss": 0.3228759765625, "step": 8689 }, { "epoch": 0.5874002974178721, "grad_norm": 1.3599721193313599, "learning_rate": 1.1611197077674383e-05, "loss": 0.301910400390625, "step": 8690 }, { "epoch": 0.5874678923888063, "grad_norm": 1.545274257659912, "learning_rate": 1.1607982936190755e-05, "loss": 0.264373779296875, "step": 8691 }, { "epoch": 0.5875354873597405, "grad_norm": 1.3732770681381226, "learning_rate": 1.1604768958834005e-05, "loss": 0.237640380859375, "step": 8692 }, { "epoch": 0.5876030823306746, "grad_norm": 1.6533161401748657, "learning_rate": 1.1601555145759652e-05, "loss": 0.2084503173828125, "step": 8693 }, { "epoch": 0.5876706773016087, "grad_norm": 1.9460654258728027, "learning_rate": 1.159834149712319e-05, "loss": 0.244720458984375, "step": 8694 }, { "epoch": 0.5877382722725429, "grad_norm": 1.0501976013183594, "learning_rate": 1.1595128013080123e-05, "loss": 0.193695068359375, "step": 8695 }, { "epoch": 0.587805867243477, "grad_norm": 1.9216392040252686, "learning_rate": 1.1591914693785944e-05, "loss": 0.35565185546875, "step": 8696 }, { "epoch": 0.5878734622144113, "grad_norm": 1.675023078918457, "learning_rate": 1.1588701539396116e-05, "loss": 0.2298583984375, "step": 8697 }, { "epoch": 0.5879410571853454, "grad_norm": 1.3912330865859985, "learning_rate": 1.158548855006613e-05, "loss": 0.2049560546875, "step": 8698 }, { "epoch": 0.5880086521562796, "grad_norm": 1.7530525922775269, "learning_rate": 1.1582275725951438e-05, "loss": 0.192352294921875, "step": 8699 }, { "epoch": 0.5880762471272137, "grad_norm": 1.4255003929138184, "learning_rate": 1.1579063067207502e-05, "loss": 0.3125, "step": 8700 }, { "epoch": 0.5881438420981479, "grad_norm": 1.3547179698944092, "learning_rate": 1.1575850573989768e-05, "loss": 0.20941162109375, "step": 8701 }, { "epoch": 0.5882114370690821, "grad_norm": 1.1492656469345093, "learning_rate": 1.1572638246453683e-05, "loss": 0.24615478515625, "step": 8702 }, { "epoch": 0.5882790320400162, "grad_norm": 1.389634370803833, "learning_rate": 1.1569426084754668e-05, "loss": 0.218353271484375, "step": 8703 }, { "epoch": 0.5883466270109504, "grad_norm": 1.2142902612686157, "learning_rate": 1.1566214089048156e-05, "loss": 0.216033935546875, "step": 8704 }, { "epoch": 0.5884142219818845, "grad_norm": 1.2505815029144287, "learning_rate": 1.156300225948956e-05, "loss": 0.27679443359375, "step": 8705 }, { "epoch": 0.5884818169528188, "grad_norm": 1.0153955221176147, "learning_rate": 1.1559790596234294e-05, "loss": 0.24298095703125, "step": 8706 }, { "epoch": 0.5885494119237529, "grad_norm": 0.6816235780715942, "learning_rate": 1.155657909943775e-05, "loss": 0.14186859130859375, "step": 8707 }, { "epoch": 0.588617006894687, "grad_norm": 1.0355020761489868, "learning_rate": 1.1553367769255319e-05, "loss": 0.185455322265625, "step": 8708 }, { "epoch": 0.5886846018656212, "grad_norm": 1.2584949731826782, "learning_rate": 1.1550156605842397e-05, "loss": 0.22107696533203125, "step": 8709 }, { "epoch": 0.5887521968365553, "grad_norm": 1.834608793258667, "learning_rate": 1.1546945609354348e-05, "loss": 0.27362060546875, "step": 8710 }, { "epoch": 0.5888197918074896, "grad_norm": 0.90956711769104, "learning_rate": 1.1543734779946551e-05, "loss": 0.224609375, "step": 8711 }, { "epoch": 0.5888873867784237, "grad_norm": 1.4346027374267578, "learning_rate": 1.1540524117774352e-05, "loss": 0.31585693359375, "step": 8712 }, { "epoch": 0.5889549817493579, "grad_norm": 2.761007785797119, "learning_rate": 1.1537313622993115e-05, "loss": 0.231536865234375, "step": 8713 }, { "epoch": 0.589022576720292, "grad_norm": 1.337691068649292, "learning_rate": 1.1534103295758174e-05, "loss": 0.236724853515625, "step": 8714 }, { "epoch": 0.5890901716912261, "grad_norm": 1.8728818893432617, "learning_rate": 1.1530893136224878e-05, "loss": 0.31805419921875, "step": 8715 }, { "epoch": 0.5891577666621604, "grad_norm": 1.4023407697677612, "learning_rate": 1.1527683144548537e-05, "loss": 0.22320556640625, "step": 8716 }, { "epoch": 0.5892253616330945, "grad_norm": 1.5419727563858032, "learning_rate": 1.1524473320884487e-05, "loss": 0.284393310546875, "step": 8717 }, { "epoch": 0.5892929566040287, "grad_norm": 1.2005198001861572, "learning_rate": 1.152126366538803e-05, "loss": 0.276458740234375, "step": 8718 }, { "epoch": 0.5893605515749628, "grad_norm": 1.2729240655899048, "learning_rate": 1.1518054178214468e-05, "loss": 0.2581787109375, "step": 8719 }, { "epoch": 0.589428146545897, "grad_norm": 0.703842282295227, "learning_rate": 1.1514844859519103e-05, "loss": 0.09769439697265625, "step": 8720 }, { "epoch": 0.5894957415168312, "grad_norm": 1.262624740600586, "learning_rate": 1.1511635709457211e-05, "loss": 0.16420364379882812, "step": 8721 }, { "epoch": 0.5895633364877653, "grad_norm": 1.4782085418701172, "learning_rate": 1.1508426728184085e-05, "loss": 0.34246826171875, "step": 8722 }, { "epoch": 0.5896309314586995, "grad_norm": 1.1175646781921387, "learning_rate": 1.1505217915854983e-05, "loss": 0.232635498046875, "step": 8723 }, { "epoch": 0.5896985264296336, "grad_norm": 1.2587714195251465, "learning_rate": 1.1502009272625173e-05, "loss": 0.2966766357421875, "step": 8724 }, { "epoch": 0.5897661214005678, "grad_norm": 0.8958353996276855, "learning_rate": 1.1498800798649906e-05, "loss": 0.15219497680664062, "step": 8725 }, { "epoch": 0.589833716371502, "grad_norm": 1.3369441032409668, "learning_rate": 1.1495592494084437e-05, "loss": 0.211395263671875, "step": 8726 }, { "epoch": 0.5899013113424362, "grad_norm": 1.0896002054214478, "learning_rate": 1.1492384359083988e-05, "loss": 0.269256591796875, "step": 8727 }, { "epoch": 0.5899689063133703, "grad_norm": 1.4854395389556885, "learning_rate": 1.1489176393803807e-05, "loss": 0.23260498046875, "step": 8728 }, { "epoch": 0.5900365012843044, "grad_norm": 1.4945905208587646, "learning_rate": 1.1485968598399098e-05, "loss": 0.272705078125, "step": 8729 }, { "epoch": 0.5901040962552386, "grad_norm": 1.1535590887069702, "learning_rate": 1.1482760973025085e-05, "loss": 0.175537109375, "step": 8730 }, { "epoch": 0.5901716912261727, "grad_norm": 1.0845868587493896, "learning_rate": 1.1479553517836973e-05, "loss": 0.211822509765625, "step": 8731 }, { "epoch": 0.590239286197107, "grad_norm": 1.282453179359436, "learning_rate": 1.1476346232989947e-05, "loss": 0.2192230224609375, "step": 8732 }, { "epoch": 0.5903068811680411, "grad_norm": 1.6082372665405273, "learning_rate": 1.1473139118639208e-05, "loss": 0.247344970703125, "step": 8733 }, { "epoch": 0.5903744761389753, "grad_norm": 0.8400898575782776, "learning_rate": 1.146993217493993e-05, "loss": 0.1167755126953125, "step": 8734 }, { "epoch": 0.5904420711099094, "grad_norm": 1.4736950397491455, "learning_rate": 1.146672540204729e-05, "loss": 0.186279296875, "step": 8735 }, { "epoch": 0.5905096660808435, "grad_norm": 1.4225786924362183, "learning_rate": 1.1463518800116443e-05, "loss": 0.264495849609375, "step": 8736 }, { "epoch": 0.5905772610517778, "grad_norm": 1.7892001867294312, "learning_rate": 1.1460312369302554e-05, "loss": 0.32415771484375, "step": 8737 }, { "epoch": 0.5906448560227119, "grad_norm": 1.196136474609375, "learning_rate": 1.1457106109760763e-05, "loss": 0.242095947265625, "step": 8738 }, { "epoch": 0.5907124509936461, "grad_norm": 0.9230466485023499, "learning_rate": 1.1453900021646216e-05, "loss": 0.28607177734375, "step": 8739 }, { "epoch": 0.5907800459645802, "grad_norm": 1.2982206344604492, "learning_rate": 1.1450694105114033e-05, "loss": 0.236358642578125, "step": 8740 }, { "epoch": 0.5908476409355145, "grad_norm": 0.9266104698181152, "learning_rate": 1.1447488360319346e-05, "loss": 0.2608642578125, "step": 8741 }, { "epoch": 0.5909152359064486, "grad_norm": 0.8558182716369629, "learning_rate": 1.1444282787417266e-05, "loss": 0.16872406005859375, "step": 8742 }, { "epoch": 0.5909828308773827, "grad_norm": 1.2673689126968384, "learning_rate": 1.1441077386562894e-05, "loss": 0.262847900390625, "step": 8743 }, { "epoch": 0.5910504258483169, "grad_norm": 0.8347617387771606, "learning_rate": 1.1437872157911335e-05, "loss": 0.14205169677734375, "step": 8744 }, { "epoch": 0.591118020819251, "grad_norm": 1.1211658716201782, "learning_rate": 1.1434667101617669e-05, "loss": 0.246795654296875, "step": 8745 }, { "epoch": 0.5911856157901852, "grad_norm": 1.997299313545227, "learning_rate": 1.1431462217836984e-05, "loss": 0.276336669921875, "step": 8746 }, { "epoch": 0.5912532107611194, "grad_norm": 0.8416732549667358, "learning_rate": 1.1428257506724345e-05, "loss": 0.133026123046875, "step": 8747 }, { "epoch": 0.5913208057320536, "grad_norm": 0.9639549851417542, "learning_rate": 1.1425052968434827e-05, "loss": 0.1829833984375, "step": 8748 }, { "epoch": 0.5913884007029877, "grad_norm": 0.8830463886260986, "learning_rate": 1.1421848603123472e-05, "loss": 0.119781494140625, "step": 8749 }, { "epoch": 0.5914559956739218, "grad_norm": 0.8846745491027832, "learning_rate": 1.1418644410945339e-05, "loss": 0.219879150390625, "step": 8750 }, { "epoch": 0.591523590644856, "grad_norm": 1.2141871452331543, "learning_rate": 1.1415440392055456e-05, "loss": 0.1985626220703125, "step": 8751 }, { "epoch": 0.5915911856157902, "grad_norm": 0.7200417518615723, "learning_rate": 1.1412236546608868e-05, "loss": 0.1064453125, "step": 8752 }, { "epoch": 0.5916587805867244, "grad_norm": 1.811071753501892, "learning_rate": 1.1409032874760584e-05, "loss": 0.32708740234375, "step": 8753 }, { "epoch": 0.5917263755576585, "grad_norm": 1.0651339292526245, "learning_rate": 1.1405829376665617e-05, "loss": 0.2374267578125, "step": 8754 }, { "epoch": 0.5917939705285927, "grad_norm": 1.5960450172424316, "learning_rate": 1.1402626052478983e-05, "loss": 0.28948974609375, "step": 8755 }, { "epoch": 0.5918615654995268, "grad_norm": 0.916114330291748, "learning_rate": 1.1399422902355667e-05, "loss": 0.214996337890625, "step": 8756 }, { "epoch": 0.591929160470461, "grad_norm": 1.3324310779571533, "learning_rate": 1.1396219926450668e-05, "loss": 0.256256103515625, "step": 8757 }, { "epoch": 0.5919967554413952, "grad_norm": 2.056840658187866, "learning_rate": 1.1393017124918956e-05, "loss": 0.31121826171875, "step": 8758 }, { "epoch": 0.5920643504123293, "grad_norm": 0.8435330390930176, "learning_rate": 1.1389814497915508e-05, "loss": 0.192596435546875, "step": 8759 }, { "epoch": 0.5921319453832635, "grad_norm": 1.229914903640747, "learning_rate": 1.1386612045595285e-05, "loss": 0.203948974609375, "step": 8760 }, { "epoch": 0.5921995403541976, "grad_norm": 2.707557201385498, "learning_rate": 1.1383409768113247e-05, "loss": 0.2784423828125, "step": 8761 }, { "epoch": 0.5922671353251318, "grad_norm": 1.7455836534500122, "learning_rate": 1.1380207665624332e-05, "loss": 0.26397705078125, "step": 8762 }, { "epoch": 0.592334730296066, "grad_norm": 1.6012353897094727, "learning_rate": 1.1377005738283483e-05, "loss": 0.26043701171875, "step": 8763 }, { "epoch": 0.5924023252670001, "grad_norm": 0.911712646484375, "learning_rate": 1.1373803986245624e-05, "loss": 0.197052001953125, "step": 8764 }, { "epoch": 0.5924699202379343, "grad_norm": 1.2355965375900269, "learning_rate": 1.1370602409665686e-05, "loss": 0.2308349609375, "step": 8765 }, { "epoch": 0.5925375152088684, "grad_norm": 1.0179815292358398, "learning_rate": 1.1367401008698571e-05, "loss": 0.23370361328125, "step": 8766 }, { "epoch": 0.5926051101798027, "grad_norm": 0.9903427958488464, "learning_rate": 1.1364199783499185e-05, "loss": 0.1838836669921875, "step": 8767 }, { "epoch": 0.5926727051507368, "grad_norm": 1.1417438983917236, "learning_rate": 1.136099873422243e-05, "loss": 0.2457733154296875, "step": 8768 }, { "epoch": 0.5927403001216709, "grad_norm": 1.0854034423828125, "learning_rate": 1.1357797861023179e-05, "loss": 0.12255096435546875, "step": 8769 }, { "epoch": 0.5928078950926051, "grad_norm": 1.0758370161056519, "learning_rate": 1.135459716405632e-05, "loss": 0.238006591796875, "step": 8770 }, { "epoch": 0.5928754900635392, "grad_norm": 1.200682282447815, "learning_rate": 1.1351396643476719e-05, "loss": 0.300048828125, "step": 8771 }, { "epoch": 0.5929430850344735, "grad_norm": 1.3573596477508545, "learning_rate": 1.1348196299439247e-05, "loss": 0.272491455078125, "step": 8772 }, { "epoch": 0.5930106800054076, "grad_norm": 1.0517750978469849, "learning_rate": 1.1344996132098738e-05, "loss": 0.20796966552734375, "step": 8773 }, { "epoch": 0.5930782749763418, "grad_norm": 0.9888129830360413, "learning_rate": 1.1341796141610056e-05, "loss": 0.153961181640625, "step": 8774 }, { "epoch": 0.5931458699472759, "grad_norm": 0.7403098344802856, "learning_rate": 1.133859632812802e-05, "loss": 0.12322998046875, "step": 8775 }, { "epoch": 0.59321346491821, "grad_norm": 0.9361429214477539, "learning_rate": 1.1335396691807467e-05, "loss": 0.218109130859375, "step": 8776 }, { "epoch": 0.5932810598891443, "grad_norm": 0.9879920482635498, "learning_rate": 1.1332197232803214e-05, "loss": 0.2003936767578125, "step": 8777 }, { "epoch": 0.5933486548600784, "grad_norm": 1.5289268493652344, "learning_rate": 1.1328997951270063e-05, "loss": 0.22296142578125, "step": 8778 }, { "epoch": 0.5934162498310126, "grad_norm": 1.2021197080612183, "learning_rate": 1.1325798847362825e-05, "loss": 0.19842529296875, "step": 8779 }, { "epoch": 0.5934838448019467, "grad_norm": 0.6930555701255798, "learning_rate": 1.1322599921236284e-05, "loss": 0.190155029296875, "step": 8780 }, { "epoch": 0.5935514397728809, "grad_norm": 2.28719162940979, "learning_rate": 1.1319401173045234e-05, "loss": 0.2865447998046875, "step": 8781 }, { "epoch": 0.593619034743815, "grad_norm": 1.395162582397461, "learning_rate": 1.131620260294444e-05, "loss": 0.258392333984375, "step": 8782 }, { "epoch": 0.5936866297147492, "grad_norm": 1.5146242380142212, "learning_rate": 1.1313004211088675e-05, "loss": 0.25592041015625, "step": 8783 }, { "epoch": 0.5937542246856834, "grad_norm": 2.3935439586639404, "learning_rate": 1.1309805997632693e-05, "loss": 0.269439697265625, "step": 8784 }, { "epoch": 0.5938218196566175, "grad_norm": 0.8918249011039734, "learning_rate": 1.1306607962731253e-05, "loss": 0.1553192138671875, "step": 8785 }, { "epoch": 0.5938894146275517, "grad_norm": 0.7999897599220276, "learning_rate": 1.130341010653908e-05, "loss": 0.1595458984375, "step": 8786 }, { "epoch": 0.5939570095984859, "grad_norm": 0.9315902590751648, "learning_rate": 1.130021242921092e-05, "loss": 0.1802978515625, "step": 8787 }, { "epoch": 0.5940246045694201, "grad_norm": 1.212805151939392, "learning_rate": 1.1297014930901493e-05, "loss": 0.20977783203125, "step": 8788 }, { "epoch": 0.5940921995403542, "grad_norm": 1.2952481508255005, "learning_rate": 1.129381761176551e-05, "loss": 0.166656494140625, "step": 8789 }, { "epoch": 0.5941597945112883, "grad_norm": 1.051926851272583, "learning_rate": 1.1290620471957683e-05, "loss": 0.204071044921875, "step": 8790 }, { "epoch": 0.5942273894822225, "grad_norm": 1.3806465864181519, "learning_rate": 1.1287423511632697e-05, "loss": 0.15726470947265625, "step": 8791 }, { "epoch": 0.5942949844531566, "grad_norm": 1.1380137205123901, "learning_rate": 1.1284226730945256e-05, "loss": 0.222869873046875, "step": 8792 }, { "epoch": 0.5943625794240909, "grad_norm": 1.5730233192443848, "learning_rate": 1.128103013005003e-05, "loss": 0.2813720703125, "step": 8793 }, { "epoch": 0.594430174395025, "grad_norm": 1.5181282758712769, "learning_rate": 1.1277833709101702e-05, "loss": 0.31060791015625, "step": 8794 }, { "epoch": 0.5944977693659592, "grad_norm": 1.2162768840789795, "learning_rate": 1.127463746825492e-05, "loss": 0.265380859375, "step": 8795 }, { "epoch": 0.5945653643368933, "grad_norm": 1.2389870882034302, "learning_rate": 1.1271441407664346e-05, "loss": 0.236724853515625, "step": 8796 }, { "epoch": 0.5946329593078274, "grad_norm": 1.2547255754470825, "learning_rate": 1.1268245527484623e-05, "loss": 0.2047576904296875, "step": 8797 }, { "epoch": 0.5947005542787617, "grad_norm": 1.1314682960510254, "learning_rate": 1.1265049827870394e-05, "loss": 0.2113037109375, "step": 8798 }, { "epoch": 0.5947681492496958, "grad_norm": 1.1329410076141357, "learning_rate": 1.126185430897628e-05, "loss": 0.195098876953125, "step": 8799 }, { "epoch": 0.59483574422063, "grad_norm": 1.4297471046447754, "learning_rate": 1.1258658970956901e-05, "loss": 0.14501953125, "step": 8800 }, { "epoch": 0.5949033391915641, "grad_norm": 1.6093987226486206, "learning_rate": 1.1255463813966871e-05, "loss": 0.266387939453125, "step": 8801 }, { "epoch": 0.5949709341624984, "grad_norm": 1.691339135169983, "learning_rate": 1.1252268838160783e-05, "loss": 0.2790374755859375, "step": 8802 }, { "epoch": 0.5950385291334325, "grad_norm": 1.2757127285003662, "learning_rate": 1.1249074043693242e-05, "loss": 0.299102783203125, "step": 8803 }, { "epoch": 0.5951061241043666, "grad_norm": 1.1215835809707642, "learning_rate": 1.124587943071882e-05, "loss": 0.1991424560546875, "step": 8804 }, { "epoch": 0.5951737190753008, "grad_norm": 1.6092748641967773, "learning_rate": 1.12426849993921e-05, "loss": 0.15130615234375, "step": 8805 }, { "epoch": 0.5952413140462349, "grad_norm": 1.9786065816879272, "learning_rate": 1.1239490749867644e-05, "loss": 0.272979736328125, "step": 8806 }, { "epoch": 0.5953089090171692, "grad_norm": 1.0565028190612793, "learning_rate": 1.1236296682300016e-05, "loss": 0.2564697265625, "step": 8807 }, { "epoch": 0.5953765039881033, "grad_norm": 1.4201432466506958, "learning_rate": 1.1233102796843756e-05, "loss": 0.2786102294921875, "step": 8808 }, { "epoch": 0.5954440989590375, "grad_norm": 0.824255645275116, "learning_rate": 1.1229909093653413e-05, "loss": 0.159210205078125, "step": 8809 }, { "epoch": 0.5955116939299716, "grad_norm": 0.8951945900917053, "learning_rate": 1.122671557288351e-05, "loss": 0.22906494140625, "step": 8810 }, { "epoch": 0.5955792889009057, "grad_norm": 0.8517686724662781, "learning_rate": 1.1223522234688578e-05, "loss": 0.2130126953125, "step": 8811 }, { "epoch": 0.59564688387184, "grad_norm": 1.3678406476974487, "learning_rate": 1.1220329079223125e-05, "loss": 0.2757568359375, "step": 8812 }, { "epoch": 0.5957144788427741, "grad_norm": 0.9166393280029297, "learning_rate": 1.1217136106641651e-05, "loss": 0.2010498046875, "step": 8813 }, { "epoch": 0.5957820738137083, "grad_norm": 0.8960816860198975, "learning_rate": 1.1213943317098666e-05, "loss": 0.19835662841796875, "step": 8814 }, { "epoch": 0.5958496687846424, "grad_norm": 0.7645251154899597, "learning_rate": 1.1210750710748642e-05, "loss": 0.11429977416992188, "step": 8815 }, { "epoch": 0.5959172637555766, "grad_norm": 1.8056097030639648, "learning_rate": 1.1207558287746068e-05, "loss": 0.291259765625, "step": 8816 }, { "epoch": 0.5959848587265107, "grad_norm": 1.1297422647476196, "learning_rate": 1.1204366048245404e-05, "loss": 0.226715087890625, "step": 8817 }, { "epoch": 0.5960524536974449, "grad_norm": 0.7350589036941528, "learning_rate": 1.1201173992401123e-05, "loss": 0.1756744384765625, "step": 8818 }, { "epoch": 0.5961200486683791, "grad_norm": 1.5941945314407349, "learning_rate": 1.1197982120367662e-05, "loss": 0.2494659423828125, "step": 8819 }, { "epoch": 0.5961876436393132, "grad_norm": 1.2608461380004883, "learning_rate": 1.1194790432299474e-05, "loss": 0.271759033203125, "step": 8820 }, { "epoch": 0.5962552386102474, "grad_norm": 1.5194624662399292, "learning_rate": 1.1191598928350988e-05, "loss": 0.29150390625, "step": 8821 }, { "epoch": 0.5963228335811815, "grad_norm": 0.8856238722801208, "learning_rate": 1.1188407608676628e-05, "loss": 0.183380126953125, "step": 8822 }, { "epoch": 0.5963904285521158, "grad_norm": 0.8275324106216431, "learning_rate": 1.1185216473430819e-05, "loss": 0.18084716796875, "step": 8823 }, { "epoch": 0.5964580235230499, "grad_norm": 1.4882255792617798, "learning_rate": 1.1182025522767953e-05, "loss": 0.2659912109375, "step": 8824 }, { "epoch": 0.596525618493984, "grad_norm": 1.2424148321151733, "learning_rate": 1.1178834756842441e-05, "loss": 0.27728271484375, "step": 8825 }, { "epoch": 0.5965932134649182, "grad_norm": 1.1729336977005005, "learning_rate": 1.117564417580866e-05, "loss": 0.18933868408203125, "step": 8826 }, { "epoch": 0.5966608084358523, "grad_norm": 1.1159700155258179, "learning_rate": 1.1172453779821008e-05, "loss": 0.2065582275390625, "step": 8827 }, { "epoch": 0.5967284034067866, "grad_norm": 1.4775516986846924, "learning_rate": 1.1169263569033836e-05, "loss": 0.271514892578125, "step": 8828 }, { "epoch": 0.5967959983777207, "grad_norm": 1.6396979093551636, "learning_rate": 1.116607354360152e-05, "loss": 0.3218994140625, "step": 8829 }, { "epoch": 0.5968635933486549, "grad_norm": 1.1927645206451416, "learning_rate": 1.1162883703678404e-05, "loss": 0.258544921875, "step": 8830 }, { "epoch": 0.596931188319589, "grad_norm": 1.08157217502594, "learning_rate": 1.1159694049418848e-05, "loss": 0.284515380859375, "step": 8831 }, { "epoch": 0.5969987832905231, "grad_norm": 1.259727120399475, "learning_rate": 1.1156504580977164e-05, "loss": 0.23612213134765625, "step": 8832 }, { "epoch": 0.5970663782614574, "grad_norm": 1.616077184677124, "learning_rate": 1.1153315298507697e-05, "loss": 0.342742919921875, "step": 8833 }, { "epoch": 0.5971339732323915, "grad_norm": 2.0647635459899902, "learning_rate": 1.1150126202164757e-05, "loss": 0.254852294921875, "step": 8834 }, { "epoch": 0.5972015682033257, "grad_norm": 1.9012821912765503, "learning_rate": 1.1146937292102657e-05, "loss": 0.2283782958984375, "step": 8835 }, { "epoch": 0.5972691631742598, "grad_norm": 1.1610593795776367, "learning_rate": 1.1143748568475694e-05, "loss": 0.2152862548828125, "step": 8836 }, { "epoch": 0.597336758145194, "grad_norm": 1.9210753440856934, "learning_rate": 1.114056003143815e-05, "loss": 0.274627685546875, "step": 8837 }, { "epoch": 0.5974043531161282, "grad_norm": 1.1101304292678833, "learning_rate": 1.1137371681144316e-05, "loss": 0.244354248046875, "step": 8838 }, { "epoch": 0.5974719480870623, "grad_norm": 1.3974648714065552, "learning_rate": 1.113418351774846e-05, "loss": 0.30963134765625, "step": 8839 }, { "epoch": 0.5975395430579965, "grad_norm": 0.808290958404541, "learning_rate": 1.1130995541404851e-05, "loss": 0.1838226318359375, "step": 8840 }, { "epoch": 0.5976071380289306, "grad_norm": 1.1777044534683228, "learning_rate": 1.1127807752267734e-05, "loss": 0.31512451171875, "step": 8841 }, { "epoch": 0.5976747329998648, "grad_norm": 1.2974060773849487, "learning_rate": 1.1124620150491361e-05, "loss": 0.21868896484375, "step": 8842 }, { "epoch": 0.597742327970799, "grad_norm": 1.6855406761169434, "learning_rate": 1.1121432736229964e-05, "loss": 0.20540618896484375, "step": 8843 }, { "epoch": 0.5978099229417332, "grad_norm": 0.9675050973892212, "learning_rate": 1.1118245509637777e-05, "loss": 0.14398956298828125, "step": 8844 }, { "epoch": 0.5978775179126673, "grad_norm": 1.2662079334259033, "learning_rate": 1.1115058470869005e-05, "loss": 0.23602294921875, "step": 8845 }, { "epoch": 0.5979451128836014, "grad_norm": 1.2823137044906616, "learning_rate": 1.1111871620077869e-05, "loss": 0.32696533203125, "step": 8846 }, { "epoch": 0.5980127078545356, "grad_norm": 0.6714131236076355, "learning_rate": 1.1108684957418568e-05, "loss": 0.08242034912109375, "step": 8847 }, { "epoch": 0.5980803028254698, "grad_norm": 0.9632045030593872, "learning_rate": 1.1105498483045282e-05, "loss": 0.213348388671875, "step": 8848 }, { "epoch": 0.598147897796404, "grad_norm": 1.8742170333862305, "learning_rate": 1.1102312197112205e-05, "loss": 0.2953338623046875, "step": 8849 }, { "epoch": 0.5982154927673381, "grad_norm": 0.9892733693122864, "learning_rate": 1.1099126099773496e-05, "loss": 0.2344207763671875, "step": 8850 }, { "epoch": 0.5982830877382723, "grad_norm": 0.6056962609291077, "learning_rate": 1.1095940191183332e-05, "loss": 0.0885467529296875, "step": 8851 }, { "epoch": 0.5983506827092064, "grad_norm": 0.8844606280326843, "learning_rate": 1.1092754471495853e-05, "loss": 0.25146484375, "step": 8852 }, { "epoch": 0.5984182776801406, "grad_norm": 0.9493812322616577, "learning_rate": 1.1089568940865221e-05, "loss": 0.233062744140625, "step": 8853 }, { "epoch": 0.5984858726510748, "grad_norm": 1.1441692113876343, "learning_rate": 1.1086383599445555e-05, "loss": 0.2058563232421875, "step": 8854 }, { "epoch": 0.5985534676220089, "grad_norm": 1.4634389877319336, "learning_rate": 1.1083198447390992e-05, "loss": 0.2716064453125, "step": 8855 }, { "epoch": 0.5986210625929431, "grad_norm": 1.3170453310012817, "learning_rate": 1.1080013484855645e-05, "loss": 0.25115966796875, "step": 8856 }, { "epoch": 0.5986886575638772, "grad_norm": 1.3670003414154053, "learning_rate": 1.1076828711993627e-05, "loss": 0.29412841796875, "step": 8857 }, { "epoch": 0.5987562525348115, "grad_norm": 1.8647903203964233, "learning_rate": 1.1073644128959032e-05, "loss": 0.30938720703125, "step": 8858 }, { "epoch": 0.5988238475057456, "grad_norm": 1.1679127216339111, "learning_rate": 1.107045973590595e-05, "loss": 0.205078125, "step": 8859 }, { "epoch": 0.5988914424766797, "grad_norm": 1.1244091987609863, "learning_rate": 1.1067275532988468e-05, "loss": 0.312835693359375, "step": 8860 }, { "epoch": 0.5989590374476139, "grad_norm": 1.241241216659546, "learning_rate": 1.1064091520360647e-05, "loss": 0.204498291015625, "step": 8861 }, { "epoch": 0.599026632418548, "grad_norm": 1.610346794128418, "learning_rate": 1.106090769817656e-05, "loss": 0.29010009765625, "step": 8862 }, { "epoch": 0.5990942273894823, "grad_norm": 0.9129722714424133, "learning_rate": 1.105772406659025e-05, "loss": 0.1986083984375, "step": 8863 }, { "epoch": 0.5991618223604164, "grad_norm": 1.7373982667922974, "learning_rate": 1.1054540625755775e-05, "loss": 0.24737548828125, "step": 8864 }, { "epoch": 0.5992294173313506, "grad_norm": 1.2326514720916748, "learning_rate": 1.1051357375827153e-05, "loss": 0.264404296875, "step": 8865 }, { "epoch": 0.5992970123022847, "grad_norm": 1.4726489782333374, "learning_rate": 1.1048174316958427e-05, "loss": 0.235107421875, "step": 8866 }, { "epoch": 0.5993646072732188, "grad_norm": 1.2378010749816895, "learning_rate": 1.1044991449303594e-05, "loss": 0.19781494140625, "step": 8867 }, { "epoch": 0.5994322022441531, "grad_norm": 1.1124252080917358, "learning_rate": 1.1041808773016676e-05, "loss": 0.16076278686523438, "step": 8868 }, { "epoch": 0.5994997972150872, "grad_norm": 1.3149735927581787, "learning_rate": 1.103862628825166e-05, "loss": 0.253265380859375, "step": 8869 }, { "epoch": 0.5995673921860214, "grad_norm": 1.2437840700149536, "learning_rate": 1.1035443995162545e-05, "loss": 0.221527099609375, "step": 8870 }, { "epoch": 0.5996349871569555, "grad_norm": 1.0586507320404053, "learning_rate": 1.1032261893903305e-05, "loss": 0.24127197265625, "step": 8871 }, { "epoch": 0.5997025821278897, "grad_norm": 2.059936761856079, "learning_rate": 1.1029079984627905e-05, "loss": 0.30596923828125, "step": 8872 }, { "epoch": 0.5997701770988239, "grad_norm": 3.5042943954467773, "learning_rate": 1.1025898267490316e-05, "loss": 0.36639404296875, "step": 8873 }, { "epoch": 0.599837772069758, "grad_norm": 0.725626528263092, "learning_rate": 1.1022716742644479e-05, "loss": 0.12249755859375, "step": 8874 }, { "epoch": 0.5999053670406922, "grad_norm": 1.390519380569458, "learning_rate": 1.1019535410244342e-05, "loss": 0.25726318359375, "step": 8875 }, { "epoch": 0.5999729620116263, "grad_norm": 1.7458745241165161, "learning_rate": 1.1016354270443833e-05, "loss": 0.32818603515625, "step": 8876 }, { "epoch": 0.6000405569825605, "grad_norm": 1.3382118940353394, "learning_rate": 1.1013173323396886e-05, "loss": 0.26763916015625, "step": 8877 }, { "epoch": 0.6001081519534947, "grad_norm": 1.3124451637268066, "learning_rate": 1.10099925692574e-05, "loss": 0.259002685546875, "step": 8878 }, { "epoch": 0.6001757469244289, "grad_norm": 1.9343289136886597, "learning_rate": 1.100681200817929e-05, "loss": 0.21490478515625, "step": 8879 }, { "epoch": 0.600243341895363, "grad_norm": 0.70476895570755, "learning_rate": 1.1003631640316446e-05, "loss": 0.1348876953125, "step": 8880 }, { "epoch": 0.6003109368662971, "grad_norm": 1.1610640287399292, "learning_rate": 1.1000451465822764e-05, "loss": 0.3253173828125, "step": 8881 }, { "epoch": 0.6003785318372313, "grad_norm": 0.7758596539497375, "learning_rate": 1.0997271484852111e-05, "loss": 0.212615966796875, "step": 8882 }, { "epoch": 0.6004461268081654, "grad_norm": 1.7309017181396484, "learning_rate": 1.099409169755835e-05, "loss": 0.31787109375, "step": 8883 }, { "epoch": 0.6005137217790997, "grad_norm": 2.3744876384735107, "learning_rate": 1.0990912104095348e-05, "loss": 0.32025146484375, "step": 8884 }, { "epoch": 0.6005813167500338, "grad_norm": 1.2766506671905518, "learning_rate": 1.0987732704616948e-05, "loss": 0.27667236328125, "step": 8885 }, { "epoch": 0.600648911720968, "grad_norm": 1.1047215461730957, "learning_rate": 1.0984553499276999e-05, "loss": 0.27850341796875, "step": 8886 }, { "epoch": 0.6007165066919021, "grad_norm": 1.873347520828247, "learning_rate": 1.0981374488229317e-05, "loss": 0.2772216796875, "step": 8887 }, { "epoch": 0.6007841016628362, "grad_norm": 0.6705580949783325, "learning_rate": 1.097819567162773e-05, "loss": 0.07128143310546875, "step": 8888 }, { "epoch": 0.6008516966337705, "grad_norm": 0.9070338010787964, "learning_rate": 1.0975017049626048e-05, "loss": 0.216888427734375, "step": 8889 }, { "epoch": 0.6009192916047046, "grad_norm": 1.1022164821624756, "learning_rate": 1.0971838622378076e-05, "loss": 0.2775726318359375, "step": 8890 }, { "epoch": 0.6009868865756388, "grad_norm": 0.5962676405906677, "learning_rate": 1.0968660390037596e-05, "loss": 0.08942222595214844, "step": 8891 }, { "epoch": 0.6010544815465729, "grad_norm": 1.418540596961975, "learning_rate": 1.09654823527584e-05, "loss": 0.242218017578125, "step": 8892 }, { "epoch": 0.601122076517507, "grad_norm": 1.343884825706482, "learning_rate": 1.0962304510694264e-05, "loss": 0.263336181640625, "step": 8893 }, { "epoch": 0.6011896714884413, "grad_norm": 0.619712769985199, "learning_rate": 1.0959126863998937e-05, "loss": 0.1270751953125, "step": 8894 }, { "epoch": 0.6012572664593754, "grad_norm": 1.1872260570526123, "learning_rate": 1.095594941282619e-05, "loss": 0.2261810302734375, "step": 8895 }, { "epoch": 0.6013248614303096, "grad_norm": 1.147497534751892, "learning_rate": 1.0952772157329753e-05, "loss": 0.19012451171875, "step": 8896 }, { "epoch": 0.6013924564012437, "grad_norm": 0.9574732780456543, "learning_rate": 1.094959509766337e-05, "loss": 0.198150634765625, "step": 8897 }, { "epoch": 0.601460051372178, "grad_norm": 0.9494892358779907, "learning_rate": 1.0946418233980765e-05, "loss": 0.2420654296875, "step": 8898 }, { "epoch": 0.6015276463431121, "grad_norm": 1.2826522588729858, "learning_rate": 1.094324156643566e-05, "loss": 0.277252197265625, "step": 8899 }, { "epoch": 0.6015952413140462, "grad_norm": 0.9429945349693298, "learning_rate": 1.094006509518175e-05, "loss": 0.17706298828125, "step": 8900 }, { "epoch": 0.6016628362849804, "grad_norm": 0.9955583214759827, "learning_rate": 1.0936888820372744e-05, "loss": 0.159423828125, "step": 8901 }, { "epoch": 0.6017304312559145, "grad_norm": 1.3198730945587158, "learning_rate": 1.0933712742162318e-05, "loss": 0.283477783203125, "step": 8902 }, { "epoch": 0.6017980262268487, "grad_norm": 1.786297082901001, "learning_rate": 1.0930536860704167e-05, "loss": 0.234527587890625, "step": 8903 }, { "epoch": 0.6018656211977829, "grad_norm": 0.9154138565063477, "learning_rate": 1.0927361176151946e-05, "loss": 0.2000274658203125, "step": 8904 }, { "epoch": 0.6019332161687171, "grad_norm": 1.0710763931274414, "learning_rate": 1.0924185688659318e-05, "loss": 0.2819061279296875, "step": 8905 }, { "epoch": 0.6020008111396512, "grad_norm": 2.0498709678649902, "learning_rate": 1.0921010398379937e-05, "loss": 0.278472900390625, "step": 8906 }, { "epoch": 0.6020684061105853, "grad_norm": 1.312256932258606, "learning_rate": 1.0917835305467435e-05, "loss": 0.253875732421875, "step": 8907 }, { "epoch": 0.6021360010815195, "grad_norm": 0.9096048474311829, "learning_rate": 1.0914660410075451e-05, "loss": 0.1776275634765625, "step": 8908 }, { "epoch": 0.6022035960524537, "grad_norm": 1.4146831035614014, "learning_rate": 1.09114857123576e-05, "loss": 0.277496337890625, "step": 8909 }, { "epoch": 0.6022711910233879, "grad_norm": 1.126314401626587, "learning_rate": 1.0908311212467502e-05, "loss": 0.211944580078125, "step": 8910 }, { "epoch": 0.602338785994322, "grad_norm": 1.6956794261932373, "learning_rate": 1.0905136910558746e-05, "loss": 0.324676513671875, "step": 8911 }, { "epoch": 0.6024063809652562, "grad_norm": 1.1646915674209595, "learning_rate": 1.0901962806784941e-05, "loss": 0.208038330078125, "step": 8912 }, { "epoch": 0.6024739759361903, "grad_norm": 1.6369041204452515, "learning_rate": 1.0898788901299651e-05, "loss": 0.33697509765625, "step": 8913 }, { "epoch": 0.6025415709071245, "grad_norm": 1.2840126752853394, "learning_rate": 1.0895615194256467e-05, "loss": 0.33612060546875, "step": 8914 }, { "epoch": 0.6026091658780587, "grad_norm": 0.9498893618583679, "learning_rate": 1.0892441685808936e-05, "loss": 0.123931884765625, "step": 8915 }, { "epoch": 0.6026767608489928, "grad_norm": 1.5920377969741821, "learning_rate": 1.0889268376110632e-05, "loss": 0.25238037109375, "step": 8916 }, { "epoch": 0.602744355819927, "grad_norm": 1.7757099866867065, "learning_rate": 1.0886095265315083e-05, "loss": 0.1795501708984375, "step": 8917 }, { "epoch": 0.6028119507908611, "grad_norm": 1.3683229684829712, "learning_rate": 1.0882922353575825e-05, "loss": 0.29034423828125, "step": 8918 }, { "epoch": 0.6028795457617954, "grad_norm": 1.0929683446884155, "learning_rate": 1.0879749641046396e-05, "loss": 0.21173095703125, "step": 8919 }, { "epoch": 0.6029471407327295, "grad_norm": 1.1248630285263062, "learning_rate": 1.0876577127880293e-05, "loss": 0.1732025146484375, "step": 8920 }, { "epoch": 0.6030147357036636, "grad_norm": 0.8958355784416199, "learning_rate": 1.0873404814231038e-05, "loss": 0.209716796875, "step": 8921 }, { "epoch": 0.6030823306745978, "grad_norm": 1.6666632890701294, "learning_rate": 1.087023270025212e-05, "loss": 0.3194580078125, "step": 8922 }, { "epoch": 0.6031499256455319, "grad_norm": 1.1890668869018555, "learning_rate": 1.0867060786097028e-05, "loss": 0.2928466796875, "step": 8923 }, { "epoch": 0.6032175206164662, "grad_norm": 1.159217357635498, "learning_rate": 1.0863889071919236e-05, "loss": 0.25439453125, "step": 8924 }, { "epoch": 0.6032851155874003, "grad_norm": 1.6635774374008179, "learning_rate": 1.0860717557872214e-05, "loss": 0.284210205078125, "step": 8925 }, { "epoch": 0.6033527105583345, "grad_norm": 1.938253402709961, "learning_rate": 1.0857546244109415e-05, "loss": 0.257080078125, "step": 8926 }, { "epoch": 0.6034203055292686, "grad_norm": 1.1509528160095215, "learning_rate": 1.0854375130784297e-05, "loss": 0.207244873046875, "step": 8927 }, { "epoch": 0.6034879005002027, "grad_norm": 1.473512053489685, "learning_rate": 1.0851204218050292e-05, "loss": 0.283935546875, "step": 8928 }, { "epoch": 0.603555495471137, "grad_norm": 2.024977207183838, "learning_rate": 1.084803350606082e-05, "loss": 0.273162841796875, "step": 8929 }, { "epoch": 0.6036230904420711, "grad_norm": 0.8436741828918457, "learning_rate": 1.0844862994969311e-05, "loss": 0.1842041015625, "step": 8930 }, { "epoch": 0.6036906854130053, "grad_norm": 1.3786475658416748, "learning_rate": 1.0841692684929169e-05, "loss": 0.244415283203125, "step": 8931 }, { "epoch": 0.6037582803839394, "grad_norm": 1.5104800462722778, "learning_rate": 1.08385225760938e-05, "loss": 0.34619140625, "step": 8932 }, { "epoch": 0.6038258753548736, "grad_norm": 1.644345760345459, "learning_rate": 1.0835352668616584e-05, "loss": 0.296966552734375, "step": 8933 }, { "epoch": 0.6038934703258078, "grad_norm": 1.7100239992141724, "learning_rate": 1.083218296265091e-05, "loss": 0.167724609375, "step": 8934 }, { "epoch": 0.6039610652967419, "grad_norm": 1.3608397245407104, "learning_rate": 1.0829013458350137e-05, "loss": 0.2908935546875, "step": 8935 }, { "epoch": 0.6040286602676761, "grad_norm": 2.7389678955078125, "learning_rate": 1.0825844155867641e-05, "loss": 0.170318603515625, "step": 8936 }, { "epoch": 0.6040962552386102, "grad_norm": 1.0983905792236328, "learning_rate": 1.0822675055356756e-05, "loss": 0.210540771484375, "step": 8937 }, { "epoch": 0.6041638502095444, "grad_norm": 1.255293846130371, "learning_rate": 1.0819506156970835e-05, "loss": 0.2308197021484375, "step": 8938 }, { "epoch": 0.6042314451804786, "grad_norm": 2.061891555786133, "learning_rate": 1.0816337460863198e-05, "loss": 0.3800048828125, "step": 8939 }, { "epoch": 0.6042990401514128, "grad_norm": 0.737104594707489, "learning_rate": 1.081316896718718e-05, "loss": 0.1801910400390625, "step": 8940 }, { "epoch": 0.6043666351223469, "grad_norm": 0.7807987928390503, "learning_rate": 1.0810000676096088e-05, "loss": 0.159881591796875, "step": 8941 }, { "epoch": 0.604434230093281, "grad_norm": 2.225675344467163, "learning_rate": 1.0806832587743211e-05, "loss": 0.3031768798828125, "step": 8942 }, { "epoch": 0.6045018250642152, "grad_norm": 0.8765717148780823, "learning_rate": 1.0803664702281855e-05, "loss": 0.19203948974609375, "step": 8943 }, { "epoch": 0.6045694200351494, "grad_norm": 1.8018145561218262, "learning_rate": 1.0800497019865294e-05, "loss": 0.24676513671875, "step": 8944 }, { "epoch": 0.6046370150060836, "grad_norm": 1.6057004928588867, "learning_rate": 1.079732954064681e-05, "loss": 0.23565673828125, "step": 8945 }, { "epoch": 0.6047046099770177, "grad_norm": 1.5688700675964355, "learning_rate": 1.0794162264779652e-05, "loss": 0.2369384765625, "step": 8946 }, { "epoch": 0.6047722049479519, "grad_norm": 1.0970368385314941, "learning_rate": 1.0790995192417082e-05, "loss": 0.27471923828125, "step": 8947 }, { "epoch": 0.604839799918886, "grad_norm": 0.9264625310897827, "learning_rate": 1.0787828323712338e-05, "loss": 0.2125244140625, "step": 8948 }, { "epoch": 0.6049073948898201, "grad_norm": 0.7377169728279114, "learning_rate": 1.0784661658818662e-05, "loss": 0.16046142578125, "step": 8949 }, { "epoch": 0.6049749898607544, "grad_norm": 1.3425794839859009, "learning_rate": 1.0781495197889263e-05, "loss": 0.2652587890625, "step": 8950 }, { "epoch": 0.6050425848316885, "grad_norm": 1.034875750541687, "learning_rate": 1.0778328941077363e-05, "loss": 0.196136474609375, "step": 8951 }, { "epoch": 0.6051101798026227, "grad_norm": 1.1504508256912231, "learning_rate": 1.0775162888536168e-05, "loss": 0.26031494140625, "step": 8952 }, { "epoch": 0.6051777747735568, "grad_norm": 1.251838207244873, "learning_rate": 1.077199704041886e-05, "loss": 0.240936279296875, "step": 8953 }, { "epoch": 0.6052453697444911, "grad_norm": 2.149585485458374, "learning_rate": 1.0768831396878632e-05, "loss": 0.298370361328125, "step": 8954 }, { "epoch": 0.6053129647154252, "grad_norm": 1.6871174573898315, "learning_rate": 1.076566595806865e-05, "loss": 0.257049560546875, "step": 8955 }, { "epoch": 0.6053805596863593, "grad_norm": 1.0361285209655762, "learning_rate": 1.076250072414209e-05, "loss": 0.282958984375, "step": 8956 }, { "epoch": 0.6054481546572935, "grad_norm": 0.845202624797821, "learning_rate": 1.0759335695252088e-05, "loss": 0.1768951416015625, "step": 8957 }, { "epoch": 0.6055157496282276, "grad_norm": 1.1956219673156738, "learning_rate": 1.0756170871551807e-05, "loss": 0.269744873046875, "step": 8958 }, { "epoch": 0.6055833445991619, "grad_norm": 0.8960331082344055, "learning_rate": 1.0753006253194363e-05, "loss": 0.2336883544921875, "step": 8959 }, { "epoch": 0.605650939570096, "grad_norm": 1.109706997871399, "learning_rate": 1.0749841840332892e-05, "loss": 0.192352294921875, "step": 8960 }, { "epoch": 0.6057185345410302, "grad_norm": 1.0651582479476929, "learning_rate": 1.0746677633120501e-05, "loss": 0.1438140869140625, "step": 8961 }, { "epoch": 0.6057861295119643, "grad_norm": 1.1398553848266602, "learning_rate": 1.0743513631710306e-05, "loss": 0.272064208984375, "step": 8962 }, { "epoch": 0.6058537244828984, "grad_norm": 1.8495392799377441, "learning_rate": 1.0740349836255388e-05, "loss": 0.2694091796875, "step": 8963 }, { "epoch": 0.6059213194538327, "grad_norm": 1.5085415840148926, "learning_rate": 1.0737186246908831e-05, "loss": 0.1756591796875, "step": 8964 }, { "epoch": 0.6059889144247668, "grad_norm": 1.4370297193527222, "learning_rate": 1.073402286382372e-05, "loss": 0.2398834228515625, "step": 8965 }, { "epoch": 0.606056509395701, "grad_norm": 0.7755864858627319, "learning_rate": 1.0730859687153105e-05, "loss": 0.1573486328125, "step": 8966 }, { "epoch": 0.6061241043666351, "grad_norm": 1.332322120666504, "learning_rate": 1.0727696717050054e-05, "loss": 0.300018310546875, "step": 8967 }, { "epoch": 0.6061916993375693, "grad_norm": 1.5311386585235596, "learning_rate": 1.07245339536676e-05, "loss": 0.1968994140625, "step": 8968 }, { "epoch": 0.6062592943085034, "grad_norm": 0.6893250346183777, "learning_rate": 1.072137139715879e-05, "loss": 0.12459182739257812, "step": 8969 }, { "epoch": 0.6063268892794376, "grad_norm": 2.1714611053466797, "learning_rate": 1.0718209047676632e-05, "loss": 0.371490478515625, "step": 8970 }, { "epoch": 0.6063944842503718, "grad_norm": 0.8745097517967224, "learning_rate": 1.0715046905374152e-05, "loss": 0.2628173828125, "step": 8971 }, { "epoch": 0.6064620792213059, "grad_norm": 1.057100534439087, "learning_rate": 1.0711884970404347e-05, "loss": 0.2080078125, "step": 8972 }, { "epoch": 0.6065296741922401, "grad_norm": 0.513579785823822, "learning_rate": 1.0708723242920222e-05, "loss": 0.0994577407836914, "step": 8973 }, { "epoch": 0.6065972691631742, "grad_norm": 1.7582265138626099, "learning_rate": 1.0705561723074749e-05, "loss": 0.31158447265625, "step": 8974 }, { "epoch": 0.6066648641341085, "grad_norm": 1.4463063478469849, "learning_rate": 1.070240041102091e-05, "loss": 0.2200927734375, "step": 8975 }, { "epoch": 0.6067324591050426, "grad_norm": 1.7209147214889526, "learning_rate": 1.0699239306911664e-05, "loss": 0.2979736328125, "step": 8976 }, { "epoch": 0.6068000540759767, "grad_norm": 1.4251883029937744, "learning_rate": 1.0696078410899964e-05, "loss": 0.2386474609375, "step": 8977 }, { "epoch": 0.6068676490469109, "grad_norm": 2.6857426166534424, "learning_rate": 1.0692917723138762e-05, "loss": 0.25384521484375, "step": 8978 }, { "epoch": 0.606935244017845, "grad_norm": 1.1556071043014526, "learning_rate": 1.068975724378098e-05, "loss": 0.20404052734375, "step": 8979 }, { "epoch": 0.6070028389887793, "grad_norm": 1.3730216026306152, "learning_rate": 1.068659697297955e-05, "loss": 0.28192138671875, "step": 8980 }, { "epoch": 0.6070704339597134, "grad_norm": 1.4836645126342773, "learning_rate": 1.0683436910887378e-05, "loss": 0.287078857421875, "step": 8981 }, { "epoch": 0.6071380289306476, "grad_norm": 1.2799944877624512, "learning_rate": 1.068027705765738e-05, "loss": 0.26104736328125, "step": 8982 }, { "epoch": 0.6072056239015817, "grad_norm": 0.6596825122833252, "learning_rate": 1.067711741344244e-05, "loss": 0.16973876953125, "step": 8983 }, { "epoch": 0.6072732188725158, "grad_norm": 1.2704493999481201, "learning_rate": 1.067395797839544e-05, "loss": 0.303955078125, "step": 8984 }, { "epoch": 0.6073408138434501, "grad_norm": 1.081610918045044, "learning_rate": 1.0670798752669258e-05, "loss": 0.168060302734375, "step": 8985 }, { "epoch": 0.6074084088143842, "grad_norm": 1.5274113416671753, "learning_rate": 1.0667639736416759e-05, "loss": 0.2886962890625, "step": 8986 }, { "epoch": 0.6074760037853184, "grad_norm": 1.2344974279403687, "learning_rate": 1.0664480929790793e-05, "loss": 0.208404541015625, "step": 8987 }, { "epoch": 0.6075435987562525, "grad_norm": 1.1551593542099, "learning_rate": 1.0661322332944197e-05, "loss": 0.218231201171875, "step": 8988 }, { "epoch": 0.6076111937271867, "grad_norm": 0.8316689729690552, "learning_rate": 1.065816394602981e-05, "loss": 0.224945068359375, "step": 8989 }, { "epoch": 0.6076787886981209, "grad_norm": 1.1812920570373535, "learning_rate": 1.065500576920045e-05, "loss": 0.244415283203125, "step": 8990 }, { "epoch": 0.607746383669055, "grad_norm": 0.794867217540741, "learning_rate": 1.065184780260894e-05, "loss": 0.16204071044921875, "step": 8991 }, { "epoch": 0.6078139786399892, "grad_norm": 2.1899778842926025, "learning_rate": 1.0648690046408067e-05, "loss": 0.340789794921875, "step": 8992 }, { "epoch": 0.6078815736109233, "grad_norm": 1.1422629356384277, "learning_rate": 1.0645532500750631e-05, "loss": 0.232421875, "step": 8993 }, { "epoch": 0.6079491685818575, "grad_norm": 1.2132872343063354, "learning_rate": 1.0642375165789411e-05, "loss": 0.21030426025390625, "step": 8994 }, { "epoch": 0.6080167635527917, "grad_norm": 1.1782439947128296, "learning_rate": 1.0639218041677188e-05, "loss": 0.269134521484375, "step": 8995 }, { "epoch": 0.6080843585237259, "grad_norm": 1.0758846998214722, "learning_rate": 1.063606112856671e-05, "loss": 0.2080535888671875, "step": 8996 }, { "epoch": 0.60815195349466, "grad_norm": 2.0021955966949463, "learning_rate": 1.0632904426610735e-05, "loss": 0.3809814453125, "step": 8997 }, { "epoch": 0.6082195484655941, "grad_norm": 1.591574788093567, "learning_rate": 1.0629747935961997e-05, "loss": 0.251495361328125, "step": 8998 }, { "epoch": 0.6082871434365283, "grad_norm": 1.2494560480117798, "learning_rate": 1.0626591656773241e-05, "loss": 0.228973388671875, "step": 8999 }, { "epoch": 0.6083547384074625, "grad_norm": 1.3117438554763794, "learning_rate": 1.0623435589197177e-05, "loss": 0.2064208984375, "step": 9000 }, { "epoch": 0.6084223333783967, "grad_norm": 1.0796504020690918, "learning_rate": 1.0620279733386511e-05, "loss": 0.237518310546875, "step": 9001 }, { "epoch": 0.6084899283493308, "grad_norm": 1.3468726873397827, "learning_rate": 1.0617124089493957e-05, "loss": 0.321533203125, "step": 9002 }, { "epoch": 0.608557523320265, "grad_norm": 1.4793652296066284, "learning_rate": 1.0613968657672187e-05, "loss": 0.14723968505859375, "step": 9003 }, { "epoch": 0.6086251182911991, "grad_norm": 1.1486109495162964, "learning_rate": 1.06108134380739e-05, "loss": 0.19696044921875, "step": 9004 }, { "epoch": 0.6086927132621333, "grad_norm": 1.1326135396957397, "learning_rate": 1.0607658430851745e-05, "loss": 0.1978759765625, "step": 9005 }, { "epoch": 0.6087603082330675, "grad_norm": 0.7149946689605713, "learning_rate": 1.0604503636158396e-05, "loss": 0.1549530029296875, "step": 9006 }, { "epoch": 0.6088279032040016, "grad_norm": 0.9164831042289734, "learning_rate": 1.0601349054146492e-05, "loss": 0.257232666015625, "step": 9007 }, { "epoch": 0.6088954981749358, "grad_norm": 1.2319015264511108, "learning_rate": 1.0598194684968683e-05, "loss": 0.19181442260742188, "step": 9008 }, { "epoch": 0.6089630931458699, "grad_norm": 1.3241873979568481, "learning_rate": 1.0595040528777584e-05, "loss": 0.2084503173828125, "step": 9009 }, { "epoch": 0.6090306881168042, "grad_norm": 0.6091230511665344, "learning_rate": 1.059188658572582e-05, "loss": 0.161224365234375, "step": 9010 }, { "epoch": 0.6090982830877383, "grad_norm": 1.6916964054107666, "learning_rate": 1.0588732855966002e-05, "loss": 0.247467041015625, "step": 9011 }, { "epoch": 0.6091658780586724, "grad_norm": 1.737565040588379, "learning_rate": 1.0585579339650713e-05, "loss": 0.27227783203125, "step": 9012 }, { "epoch": 0.6092334730296066, "grad_norm": 0.8392034769058228, "learning_rate": 1.0582426036932552e-05, "loss": 0.172515869140625, "step": 9013 }, { "epoch": 0.6093010680005407, "grad_norm": 1.0866390466690063, "learning_rate": 1.0579272947964091e-05, "loss": 0.176055908203125, "step": 9014 }, { "epoch": 0.609368662971475, "grad_norm": 1.1362988948822021, "learning_rate": 1.0576120072897902e-05, "loss": 0.26617431640625, "step": 9015 }, { "epoch": 0.6094362579424091, "grad_norm": 1.081556797027588, "learning_rate": 1.0572967411886528e-05, "loss": 0.225006103515625, "step": 9016 }, { "epoch": 0.6095038529133433, "grad_norm": 1.0140562057495117, "learning_rate": 1.0569814965082526e-05, "loss": 0.221832275390625, "step": 9017 }, { "epoch": 0.6095714478842774, "grad_norm": 1.13369882106781, "learning_rate": 1.0566662732638425e-05, "loss": 0.256072998046875, "step": 9018 }, { "epoch": 0.6096390428552115, "grad_norm": 1.0102684497833252, "learning_rate": 1.0563510714706757e-05, "loss": 0.2068023681640625, "step": 9019 }, { "epoch": 0.6097066378261458, "grad_norm": 1.0100454092025757, "learning_rate": 1.0560358911440025e-05, "loss": 0.224456787109375, "step": 9020 }, { "epoch": 0.6097742327970799, "grad_norm": 1.4365088939666748, "learning_rate": 1.0557207322990744e-05, "loss": 0.338592529296875, "step": 9021 }, { "epoch": 0.6098418277680141, "grad_norm": 1.0118434429168701, "learning_rate": 1.0554055949511401e-05, "loss": 0.1355133056640625, "step": 9022 }, { "epoch": 0.6099094227389482, "grad_norm": 1.8532851934432983, "learning_rate": 1.0550904791154477e-05, "loss": 0.2598876953125, "step": 9023 }, { "epoch": 0.6099770177098823, "grad_norm": 1.0796794891357422, "learning_rate": 1.0547753848072452e-05, "loss": 0.18084716796875, "step": 9024 }, { "epoch": 0.6100446126808166, "grad_norm": 1.4692974090576172, "learning_rate": 1.0544603120417777e-05, "loss": 0.1839447021484375, "step": 9025 }, { "epoch": 0.6101122076517507, "grad_norm": 1.327945590019226, "learning_rate": 1.0541452608342916e-05, "loss": 0.256378173828125, "step": 9026 }, { "epoch": 0.6101798026226849, "grad_norm": 1.6029249429702759, "learning_rate": 1.0538302312000303e-05, "loss": 0.289306640625, "step": 9027 }, { "epoch": 0.610247397593619, "grad_norm": 1.047330379486084, "learning_rate": 1.0535152231542374e-05, "loss": 0.1976318359375, "step": 9028 }, { "epoch": 0.6103149925645532, "grad_norm": 1.4216065406799316, "learning_rate": 1.0532002367121544e-05, "loss": 0.18988037109375, "step": 9029 }, { "epoch": 0.6103825875354874, "grad_norm": 1.1481757164001465, "learning_rate": 1.0528852718890229e-05, "loss": 0.2930908203125, "step": 9030 }, { "epoch": 0.6104501825064215, "grad_norm": 0.9506162405014038, "learning_rate": 1.052570328700082e-05, "loss": 0.207183837890625, "step": 9031 }, { "epoch": 0.6105177774773557, "grad_norm": 1.0433510541915894, "learning_rate": 1.0522554071605716e-05, "loss": 0.2217864990234375, "step": 9032 }, { "epoch": 0.6105853724482898, "grad_norm": 1.744141697883606, "learning_rate": 1.0519405072857288e-05, "loss": 0.1654052734375, "step": 9033 }, { "epoch": 0.610652967419224, "grad_norm": 1.3609764575958252, "learning_rate": 1.051625629090791e-05, "loss": 0.2394256591796875, "step": 9034 }, { "epoch": 0.6107205623901582, "grad_norm": 1.6303045749664307, "learning_rate": 1.0513107725909938e-05, "loss": 0.206268310546875, "step": 9035 }, { "epoch": 0.6107881573610924, "grad_norm": 1.1592377424240112, "learning_rate": 1.0509959378015712e-05, "loss": 0.2005767822265625, "step": 9036 }, { "epoch": 0.6108557523320265, "grad_norm": 0.9900119304656982, "learning_rate": 1.050681124737758e-05, "loss": 0.167449951171875, "step": 9037 }, { "epoch": 0.6109233473029606, "grad_norm": 1.3148952722549438, "learning_rate": 1.050366333414786e-05, "loss": 0.254669189453125, "step": 9038 }, { "epoch": 0.6109909422738948, "grad_norm": 1.1705266237258911, "learning_rate": 1.050051563847887e-05, "loss": 0.2752685546875, "step": 9039 }, { "epoch": 0.611058537244829, "grad_norm": 1.478313684463501, "learning_rate": 1.0497368160522913e-05, "loss": 0.242919921875, "step": 9040 }, { "epoch": 0.6111261322157632, "grad_norm": 2.075540781021118, "learning_rate": 1.0494220900432293e-05, "loss": 0.257476806640625, "step": 9041 }, { "epoch": 0.6111937271866973, "grad_norm": 1.952148675918579, "learning_rate": 1.0491073858359278e-05, "loss": 0.242706298828125, "step": 9042 }, { "epoch": 0.6112613221576315, "grad_norm": 1.4111828804016113, "learning_rate": 1.0487927034456157e-05, "loss": 0.29254150390625, "step": 9043 }, { "epoch": 0.6113289171285656, "grad_norm": 1.4030160903930664, "learning_rate": 1.0484780428875182e-05, "loss": 0.226318359375, "step": 9044 }, { "epoch": 0.6113965120994997, "grad_norm": 1.3685963153839111, "learning_rate": 1.0481634041768617e-05, "loss": 0.262786865234375, "step": 9045 }, { "epoch": 0.611464107070434, "grad_norm": 1.1866590976715088, "learning_rate": 1.0478487873288694e-05, "loss": 0.18038177490234375, "step": 9046 }, { "epoch": 0.6115317020413681, "grad_norm": 1.2035177946090698, "learning_rate": 1.047534192358764e-05, "loss": 0.218536376953125, "step": 9047 }, { "epoch": 0.6115992970123023, "grad_norm": 1.717772364616394, "learning_rate": 1.0472196192817691e-05, "loss": 0.251312255859375, "step": 9048 }, { "epoch": 0.6116668919832364, "grad_norm": 1.3448238372802734, "learning_rate": 1.0469050681131042e-05, "loss": 0.206695556640625, "step": 9049 }, { "epoch": 0.6117344869541707, "grad_norm": 2.5292611122131348, "learning_rate": 1.0465905388679905e-05, "loss": 0.291748046875, "step": 9050 }, { "epoch": 0.6118020819251048, "grad_norm": 0.7685922384262085, "learning_rate": 1.0462760315616455e-05, "loss": 0.164093017578125, "step": 9051 }, { "epoch": 0.6118696768960389, "grad_norm": 0.990249752998352, "learning_rate": 1.0459615462092882e-05, "loss": 0.2391357421875, "step": 9052 }, { "epoch": 0.6119372718669731, "grad_norm": 1.6102616786956787, "learning_rate": 1.0456470828261347e-05, "loss": 0.2474517822265625, "step": 9053 }, { "epoch": 0.6120048668379072, "grad_norm": 1.3499653339385986, "learning_rate": 1.045332641427401e-05, "loss": 0.288726806640625, "step": 9054 }, { "epoch": 0.6120724618088415, "grad_norm": 1.2071611881256104, "learning_rate": 1.0450182220283018e-05, "loss": 0.20711517333984375, "step": 9055 }, { "epoch": 0.6121400567797756, "grad_norm": 1.3328852653503418, "learning_rate": 1.0447038246440503e-05, "loss": 0.298583984375, "step": 9056 }, { "epoch": 0.6122076517507098, "grad_norm": 1.268898844718933, "learning_rate": 1.0443894492898598e-05, "loss": 0.309051513671875, "step": 9057 }, { "epoch": 0.6122752467216439, "grad_norm": 0.9201053977012634, "learning_rate": 1.0440750959809404e-05, "loss": 0.21484375, "step": 9058 }, { "epoch": 0.612342841692578, "grad_norm": 1.2349270582199097, "learning_rate": 1.0437607647325037e-05, "loss": 0.2755126953125, "step": 9059 }, { "epoch": 0.6124104366635122, "grad_norm": 0.886298656463623, "learning_rate": 1.043446455559758e-05, "loss": 0.194671630859375, "step": 9060 }, { "epoch": 0.6124780316344464, "grad_norm": 1.6008628606796265, "learning_rate": 1.0431321684779128e-05, "loss": 0.206573486328125, "step": 9061 }, { "epoch": 0.6125456266053806, "grad_norm": 1.064792275428772, "learning_rate": 1.0428179035021738e-05, "loss": 0.2071533203125, "step": 9062 }, { "epoch": 0.6126132215763147, "grad_norm": 1.54010009765625, "learning_rate": 1.0425036606477484e-05, "loss": 0.244903564453125, "step": 9063 }, { "epoch": 0.6126808165472489, "grad_norm": 1.0561320781707764, "learning_rate": 1.0421894399298405e-05, "loss": 0.16779518127441406, "step": 9064 }, { "epoch": 0.612748411518183, "grad_norm": 1.295946717262268, "learning_rate": 1.041875241363655e-05, "loss": 0.2498779296875, "step": 9065 }, { "epoch": 0.6128160064891172, "grad_norm": 0.647320568561554, "learning_rate": 1.0415610649643943e-05, "loss": 0.1710052490234375, "step": 9066 }, { "epoch": 0.6128836014600514, "grad_norm": 1.2374277114868164, "learning_rate": 1.0412469107472605e-05, "loss": 0.267852783203125, "step": 9067 }, { "epoch": 0.6129511964309855, "grad_norm": 0.8123091459274292, "learning_rate": 1.0409327787274537e-05, "loss": 0.1046600341796875, "step": 9068 }, { "epoch": 0.6130187914019197, "grad_norm": 1.2766531705856323, "learning_rate": 1.0406186689201741e-05, "loss": 0.235137939453125, "step": 9069 }, { "epoch": 0.6130863863728538, "grad_norm": 0.6706578135490417, "learning_rate": 1.0403045813406205e-05, "loss": 0.1342620849609375, "step": 9070 }, { "epoch": 0.6131539813437881, "grad_norm": 1.5803463459014893, "learning_rate": 1.0399905160039895e-05, "loss": 0.22347259521484375, "step": 9071 }, { "epoch": 0.6132215763147222, "grad_norm": 1.8904812335968018, "learning_rate": 1.0396764729254783e-05, "loss": 0.3258056640625, "step": 9072 }, { "epoch": 0.6132891712856563, "grad_norm": 1.0665268898010254, "learning_rate": 1.0393624521202819e-05, "loss": 0.259185791015625, "step": 9073 }, { "epoch": 0.6133567662565905, "grad_norm": 1.4269825220108032, "learning_rate": 1.0390484536035952e-05, "loss": 0.2564697265625, "step": 9074 }, { "epoch": 0.6134243612275246, "grad_norm": 1.0117034912109375, "learning_rate": 1.0387344773906104e-05, "loss": 0.2016143798828125, "step": 9075 }, { "epoch": 0.6134919561984589, "grad_norm": 1.0959148406982422, "learning_rate": 1.0384205234965205e-05, "loss": 0.276519775390625, "step": 9076 }, { "epoch": 0.613559551169393, "grad_norm": 1.3222991228103638, "learning_rate": 1.0381065919365159e-05, "loss": 0.22125244140625, "step": 9077 }, { "epoch": 0.6136271461403272, "grad_norm": 0.9080170392990112, "learning_rate": 1.0377926827257876e-05, "loss": 0.201507568359375, "step": 9078 }, { "epoch": 0.6136947411112613, "grad_norm": 1.7127325534820557, "learning_rate": 1.037478795879523e-05, "loss": 0.2452392578125, "step": 9079 }, { "epoch": 0.6137623360821954, "grad_norm": 1.3145720958709717, "learning_rate": 1.0371649314129112e-05, "loss": 0.239593505859375, "step": 9080 }, { "epoch": 0.6138299310531297, "grad_norm": 1.2522683143615723, "learning_rate": 1.0368510893411382e-05, "loss": 0.2209320068359375, "step": 9081 }, { "epoch": 0.6138975260240638, "grad_norm": 1.2814266681671143, "learning_rate": 1.0365372696793894e-05, "loss": 0.208221435546875, "step": 9082 }, { "epoch": 0.613965120994998, "grad_norm": 1.7183727025985718, "learning_rate": 1.0362234724428504e-05, "loss": 0.28460693359375, "step": 9083 }, { "epoch": 0.6140327159659321, "grad_norm": 1.366109013557434, "learning_rate": 1.0359096976467033e-05, "loss": 0.29229736328125, "step": 9084 }, { "epoch": 0.6141003109368663, "grad_norm": 1.8758808374404907, "learning_rate": 1.0355959453061315e-05, "loss": 0.3413238525390625, "step": 9085 }, { "epoch": 0.6141679059078005, "grad_norm": 0.8035728335380554, "learning_rate": 1.0352822154363158e-05, "loss": 0.2174072265625, "step": 9086 }, { "epoch": 0.6142355008787346, "grad_norm": 2.790156841278076, "learning_rate": 1.0349685080524369e-05, "loss": 0.3900909423828125, "step": 9087 }, { "epoch": 0.6143030958496688, "grad_norm": 1.3047772645950317, "learning_rate": 1.0346548231696733e-05, "loss": 0.185302734375, "step": 9088 }, { "epoch": 0.6143706908206029, "grad_norm": 1.3779131174087524, "learning_rate": 1.0343411608032034e-05, "loss": 0.246551513671875, "step": 9089 }, { "epoch": 0.6144382857915371, "grad_norm": 1.0410338640213013, "learning_rate": 1.0340275209682037e-05, "loss": 0.24365234375, "step": 9090 }, { "epoch": 0.6145058807624713, "grad_norm": 1.8858720064163208, "learning_rate": 1.0337139036798513e-05, "loss": 0.367034912109375, "step": 9091 }, { "epoch": 0.6145734757334055, "grad_norm": 1.4811118841171265, "learning_rate": 1.0334003089533193e-05, "loss": 0.32928466796875, "step": 9092 }, { "epoch": 0.6146410707043396, "grad_norm": 1.4264148473739624, "learning_rate": 1.033086736803782e-05, "loss": 0.2384033203125, "step": 9093 }, { "epoch": 0.6147086656752737, "grad_norm": 0.7474474310874939, "learning_rate": 1.0327731872464128e-05, "loss": 0.165069580078125, "step": 9094 }, { "epoch": 0.6147762606462079, "grad_norm": 1.0690197944641113, "learning_rate": 1.0324596602963817e-05, "loss": 0.1963958740234375, "step": 9095 }, { "epoch": 0.614843855617142, "grad_norm": 1.0091633796691895, "learning_rate": 1.0321461559688605e-05, "loss": 0.228424072265625, "step": 9096 }, { "epoch": 0.6149114505880763, "grad_norm": 1.7279901504516602, "learning_rate": 1.0318326742790171e-05, "loss": 0.2830810546875, "step": 9097 }, { "epoch": 0.6149790455590104, "grad_norm": 1.461660385131836, "learning_rate": 1.0315192152420208e-05, "loss": 0.32476806640625, "step": 9098 }, { "epoch": 0.6150466405299446, "grad_norm": 0.6484946012496948, "learning_rate": 1.0312057788730382e-05, "loss": 0.090179443359375, "step": 9099 }, { "epoch": 0.6151142355008787, "grad_norm": 1.0376629829406738, "learning_rate": 1.0308923651872357e-05, "loss": 0.232177734375, "step": 9100 }, { "epoch": 0.6151818304718129, "grad_norm": 1.052412986755371, "learning_rate": 1.0305789741997776e-05, "loss": 0.272552490234375, "step": 9101 }, { "epoch": 0.6152494254427471, "grad_norm": 1.688044548034668, "learning_rate": 1.0302656059258281e-05, "loss": 0.2381591796875, "step": 9102 }, { "epoch": 0.6153170204136812, "grad_norm": 1.3075087070465088, "learning_rate": 1.0299522603805495e-05, "loss": 0.312744140625, "step": 9103 }, { "epoch": 0.6153846153846154, "grad_norm": 1.2401517629623413, "learning_rate": 1.0296389375791048e-05, "loss": 0.28924560546875, "step": 9104 }, { "epoch": 0.6154522103555495, "grad_norm": 0.5232731103897095, "learning_rate": 1.0293256375366526e-05, "loss": 0.09422683715820312, "step": 9105 }, { "epoch": 0.6155198053264838, "grad_norm": 0.9028338193893433, "learning_rate": 1.0290123602683532e-05, "loss": 0.284149169921875, "step": 9106 }, { "epoch": 0.6155874002974179, "grad_norm": 0.9542725086212158, "learning_rate": 1.0286991057893652e-05, "loss": 0.1914520263671875, "step": 9107 }, { "epoch": 0.615654995268352, "grad_norm": 0.49908512830734253, "learning_rate": 1.028385874114845e-05, "loss": 0.10837554931640625, "step": 9108 }, { "epoch": 0.6157225902392862, "grad_norm": 1.8710635900497437, "learning_rate": 1.0280726652599496e-05, "loss": 0.28173828125, "step": 9109 }, { "epoch": 0.6157901852102203, "grad_norm": 1.082998514175415, "learning_rate": 1.0277594792398328e-05, "loss": 0.246795654296875, "step": 9110 }, { "epoch": 0.6158577801811546, "grad_norm": 1.3097821474075317, "learning_rate": 1.0274463160696504e-05, "loss": 0.1534576416015625, "step": 9111 }, { "epoch": 0.6159253751520887, "grad_norm": 1.028941035270691, "learning_rate": 1.027133175764553e-05, "loss": 0.16363525390625, "step": 9112 }, { "epoch": 0.6159929701230229, "grad_norm": 0.9896889328956604, "learning_rate": 1.0268200583396939e-05, "loss": 0.21807861328125, "step": 9113 }, { "epoch": 0.616060565093957, "grad_norm": 0.9304651618003845, "learning_rate": 1.0265069638102225e-05, "loss": 0.12110137939453125, "step": 9114 }, { "epoch": 0.6161281600648911, "grad_norm": 1.1155202388763428, "learning_rate": 1.026193892191289e-05, "loss": 0.2087860107421875, "step": 9115 }, { "epoch": 0.6161957550358254, "grad_norm": 1.9900130033493042, "learning_rate": 1.0258808434980422e-05, "loss": 0.33148193359375, "step": 9116 }, { "epoch": 0.6162633500067595, "grad_norm": 1.4212100505828857, "learning_rate": 1.025567817745628e-05, "loss": 0.29791259765625, "step": 9117 }, { "epoch": 0.6163309449776937, "grad_norm": 0.8836989998817444, "learning_rate": 1.0252548149491934e-05, "loss": 0.1378173828125, "step": 9118 }, { "epoch": 0.6163985399486278, "grad_norm": 1.3525900840759277, "learning_rate": 1.0249418351238828e-05, "loss": 0.238800048828125, "step": 9119 }, { "epoch": 0.616466134919562, "grad_norm": 1.7017618417739868, "learning_rate": 1.0246288782848415e-05, "loss": 0.270751953125, "step": 9120 }, { "epoch": 0.6165337298904962, "grad_norm": 1.2044621706008911, "learning_rate": 1.0243159444472106e-05, "loss": 0.26043701171875, "step": 9121 }, { "epoch": 0.6166013248614303, "grad_norm": 2.0883147716522217, "learning_rate": 1.0240030336261332e-05, "loss": 0.251739501953125, "step": 9122 }, { "epoch": 0.6166689198323645, "grad_norm": 2.1185407638549805, "learning_rate": 1.0236901458367485e-05, "loss": 0.26910400390625, "step": 9123 }, { "epoch": 0.6167365148032986, "grad_norm": 2.164940595626831, "learning_rate": 1.0233772810941975e-05, "loss": 0.30328369140625, "step": 9124 }, { "epoch": 0.6168041097742328, "grad_norm": 1.1937592029571533, "learning_rate": 1.023064439413617e-05, "loss": 0.219512939453125, "step": 9125 }, { "epoch": 0.616871704745167, "grad_norm": 1.1016651391983032, "learning_rate": 1.0227516208101455e-05, "loss": 0.243133544921875, "step": 9126 }, { "epoch": 0.6169392997161012, "grad_norm": 0.9399459958076477, "learning_rate": 1.0224388252989185e-05, "loss": 0.12410545349121094, "step": 9127 }, { "epoch": 0.6170068946870353, "grad_norm": 0.8854364156723022, "learning_rate": 1.0221260528950705e-05, "loss": 0.1597442626953125, "step": 9128 }, { "epoch": 0.6170744896579694, "grad_norm": 1.4033938646316528, "learning_rate": 1.0218133036137366e-05, "loss": 0.175567626953125, "step": 9129 }, { "epoch": 0.6171420846289036, "grad_norm": 1.1095050573349, "learning_rate": 1.0215005774700484e-05, "loss": 0.1722259521484375, "step": 9130 }, { "epoch": 0.6172096795998377, "grad_norm": 1.3298128843307495, "learning_rate": 1.0211878744791382e-05, "loss": 0.24591064453125, "step": 9131 }, { "epoch": 0.617277274570772, "grad_norm": 1.2925938367843628, "learning_rate": 1.020875194656136e-05, "loss": 0.203125, "step": 9132 }, { "epoch": 0.6173448695417061, "grad_norm": 1.45337975025177, "learning_rate": 1.0205625380161723e-05, "loss": 0.281158447265625, "step": 9133 }, { "epoch": 0.6174124645126403, "grad_norm": 1.304772138595581, "learning_rate": 1.0202499045743738e-05, "loss": 0.25537109375, "step": 9134 }, { "epoch": 0.6174800594835744, "grad_norm": 1.8399689197540283, "learning_rate": 1.019937294345869e-05, "loss": 0.249053955078125, "step": 9135 }, { "epoch": 0.6175476544545085, "grad_norm": 1.4723069667816162, "learning_rate": 1.0196247073457829e-05, "loss": 0.2857666015625, "step": 9136 }, { "epoch": 0.6176152494254428, "grad_norm": 0.7367537617683411, "learning_rate": 1.0193121435892416e-05, "loss": 0.15637969970703125, "step": 9137 }, { "epoch": 0.6176828443963769, "grad_norm": 1.920952558517456, "learning_rate": 1.0189996030913673e-05, "loss": 0.2099761962890625, "step": 9138 }, { "epoch": 0.6177504393673111, "grad_norm": 1.566994309425354, "learning_rate": 1.018687085867284e-05, "loss": 0.2633209228515625, "step": 9139 }, { "epoch": 0.6178180343382452, "grad_norm": 1.307938575744629, "learning_rate": 1.0183745919321131e-05, "loss": 0.2328643798828125, "step": 9140 }, { "epoch": 0.6178856293091795, "grad_norm": 1.3989498615264893, "learning_rate": 1.018062121300974e-05, "loss": 0.255859375, "step": 9141 }, { "epoch": 0.6179532242801136, "grad_norm": 1.2089438438415527, "learning_rate": 1.017749673988987e-05, "loss": 0.246917724609375, "step": 9142 }, { "epoch": 0.6180208192510477, "grad_norm": 1.4420546293258667, "learning_rate": 1.0174372500112694e-05, "loss": 0.3115234375, "step": 9143 }, { "epoch": 0.6180884142219819, "grad_norm": 1.3260319232940674, "learning_rate": 1.0171248493829392e-05, "loss": 0.2733154296875, "step": 9144 }, { "epoch": 0.618156009192916, "grad_norm": 0.9641135931015015, "learning_rate": 1.0168124721191111e-05, "loss": 0.23004150390625, "step": 9145 }, { "epoch": 0.6182236041638502, "grad_norm": 2.327533483505249, "learning_rate": 1.0165001182349013e-05, "loss": 0.290802001953125, "step": 9146 }, { "epoch": 0.6182911991347844, "grad_norm": 1.052639126777649, "learning_rate": 1.016187787745422e-05, "loss": 0.245025634765625, "step": 9147 }, { "epoch": 0.6183587941057186, "grad_norm": 1.1351370811462402, "learning_rate": 1.0158754806657867e-05, "loss": 0.208953857421875, "step": 9148 }, { "epoch": 0.6184263890766527, "grad_norm": 1.206314206123352, "learning_rate": 1.015563197011106e-05, "loss": 0.174224853515625, "step": 9149 }, { "epoch": 0.6184939840475868, "grad_norm": 1.256301760673523, "learning_rate": 1.015250936796491e-05, "loss": 0.14240264892578125, "step": 9150 }, { "epoch": 0.618561579018521, "grad_norm": 1.1495647430419922, "learning_rate": 1.0149387000370502e-05, "loss": 0.207489013671875, "step": 9151 }, { "epoch": 0.6186291739894552, "grad_norm": 1.5880080461502075, "learning_rate": 1.0146264867478911e-05, "loss": 0.30029296875, "step": 9152 }, { "epoch": 0.6186967689603894, "grad_norm": 1.4363877773284912, "learning_rate": 1.014314296944122e-05, "loss": 0.22918701171875, "step": 9153 }, { "epoch": 0.6187643639313235, "grad_norm": 1.0197116136550903, "learning_rate": 1.0140021306408468e-05, "loss": 0.18824005126953125, "step": 9154 }, { "epoch": 0.6188319589022576, "grad_norm": 1.3937962055206299, "learning_rate": 1.0136899878531714e-05, "loss": 0.2535400390625, "step": 9155 }, { "epoch": 0.6188995538731918, "grad_norm": 1.6140469312667847, "learning_rate": 1.0133778685961985e-05, "loss": 0.28619384765625, "step": 9156 }, { "epoch": 0.618967148844126, "grad_norm": 1.8288383483886719, "learning_rate": 1.0130657728850308e-05, "loss": 0.3358154296875, "step": 9157 }, { "epoch": 0.6190347438150602, "grad_norm": 1.3402998447418213, "learning_rate": 1.012753700734769e-05, "loss": 0.1792755126953125, "step": 9158 }, { "epoch": 0.6191023387859943, "grad_norm": 1.672136664390564, "learning_rate": 1.0124416521605137e-05, "loss": 0.300537109375, "step": 9159 }, { "epoch": 0.6191699337569285, "grad_norm": 1.1353024244308472, "learning_rate": 1.0121296271773628e-05, "loss": 0.1904296875, "step": 9160 }, { "epoch": 0.6192375287278626, "grad_norm": 1.4021713733673096, "learning_rate": 1.0118176258004147e-05, "loss": 0.27545166015625, "step": 9161 }, { "epoch": 0.6193051236987968, "grad_norm": 1.041098952293396, "learning_rate": 1.0115056480447664e-05, "loss": 0.1614990234375, "step": 9162 }, { "epoch": 0.619372718669731, "grad_norm": 1.3155274391174316, "learning_rate": 1.011193693925512e-05, "loss": 0.271026611328125, "step": 9163 }, { "epoch": 0.6194403136406651, "grad_norm": 1.2303266525268555, "learning_rate": 1.0108817634577469e-05, "loss": 0.2746734619140625, "step": 9164 }, { "epoch": 0.6195079086115993, "grad_norm": 1.1357604265213013, "learning_rate": 1.0105698566565637e-05, "loss": 0.267333984375, "step": 9165 }, { "epoch": 0.6195755035825334, "grad_norm": 0.49576687812805176, "learning_rate": 1.010257973537055e-05, "loss": 0.082550048828125, "step": 9166 }, { "epoch": 0.6196430985534677, "grad_norm": 1.8268625736236572, "learning_rate": 1.0099461141143108e-05, "loss": 0.20304107666015625, "step": 9167 }, { "epoch": 0.6197106935244018, "grad_norm": 1.0076549053192139, "learning_rate": 1.0096342784034213e-05, "loss": 0.167694091796875, "step": 9168 }, { "epoch": 0.6197782884953359, "grad_norm": 1.2947945594787598, "learning_rate": 1.0093224664194747e-05, "loss": 0.247039794921875, "step": 9169 }, { "epoch": 0.6198458834662701, "grad_norm": 1.8213623762130737, "learning_rate": 1.0090106781775593e-05, "loss": 0.2896728515625, "step": 9170 }, { "epoch": 0.6199134784372042, "grad_norm": 1.1887999773025513, "learning_rate": 1.0086989136927602e-05, "loss": 0.226470947265625, "step": 9171 }, { "epoch": 0.6199810734081385, "grad_norm": 1.9877498149871826, "learning_rate": 1.0083871729801637e-05, "loss": 0.25030517578125, "step": 9172 }, { "epoch": 0.6200486683790726, "grad_norm": 0.781780481338501, "learning_rate": 1.0080754560548523e-05, "loss": 0.1759033203125, "step": 9173 }, { "epoch": 0.6201162633500068, "grad_norm": 0.980894148349762, "learning_rate": 1.0077637629319098e-05, "loss": 0.17070770263671875, "step": 9174 }, { "epoch": 0.6201838583209409, "grad_norm": 1.2561625242233276, "learning_rate": 1.0074520936264182e-05, "loss": 0.23529052734375, "step": 9175 }, { "epoch": 0.620251453291875, "grad_norm": 1.4992115497589111, "learning_rate": 1.0071404481534567e-05, "loss": 0.2063140869140625, "step": 9176 }, { "epoch": 0.6203190482628093, "grad_norm": 0.8352760076522827, "learning_rate": 1.0068288265281056e-05, "loss": 0.1979827880859375, "step": 9177 }, { "epoch": 0.6203866432337434, "grad_norm": 1.6221874952316284, "learning_rate": 1.0065172287654425e-05, "loss": 0.20440673828125, "step": 9178 }, { "epoch": 0.6204542382046776, "grad_norm": 1.0309135913848877, "learning_rate": 1.0062056548805456e-05, "loss": 0.2027587890625, "step": 9179 }, { "epoch": 0.6205218331756117, "grad_norm": 1.2661041021347046, "learning_rate": 1.0058941048884894e-05, "loss": 0.2081146240234375, "step": 9180 }, { "epoch": 0.6205894281465459, "grad_norm": 1.2644070386886597, "learning_rate": 1.0055825788043494e-05, "loss": 0.33233642578125, "step": 9181 }, { "epoch": 0.6206570231174801, "grad_norm": 1.7298238277435303, "learning_rate": 1.0052710766431987e-05, "loss": 0.17281341552734375, "step": 9182 }, { "epoch": 0.6207246180884142, "grad_norm": 1.683408260345459, "learning_rate": 1.0049595984201106e-05, "loss": 0.34271240234375, "step": 9183 }, { "epoch": 0.6207922130593484, "grad_norm": 1.1790227890014648, "learning_rate": 1.0046481441501554e-05, "loss": 0.2047576904296875, "step": 9184 }, { "epoch": 0.6208598080302825, "grad_norm": 1.1329232454299927, "learning_rate": 1.0043367138484036e-05, "loss": 0.251922607421875, "step": 9185 }, { "epoch": 0.6209274030012167, "grad_norm": 1.738147497177124, "learning_rate": 1.0040253075299244e-05, "loss": 0.19957733154296875, "step": 9186 }, { "epoch": 0.6209949979721509, "grad_norm": 1.3190184831619263, "learning_rate": 1.0037139252097846e-05, "loss": 0.31597900390625, "step": 9187 }, { "epoch": 0.6210625929430851, "grad_norm": 1.6804872751235962, "learning_rate": 1.0034025669030523e-05, "loss": 0.266693115234375, "step": 9188 }, { "epoch": 0.6211301879140192, "grad_norm": 2.5393426418304443, "learning_rate": 1.0030912326247913e-05, "loss": 0.282501220703125, "step": 9189 }, { "epoch": 0.6211977828849533, "grad_norm": 1.47535240650177, "learning_rate": 1.002779922390067e-05, "loss": 0.29119873046875, "step": 9190 }, { "epoch": 0.6212653778558875, "grad_norm": 0.5317853093147278, "learning_rate": 1.0024686362139418e-05, "loss": 0.1114501953125, "step": 9191 }, { "epoch": 0.6213329728268217, "grad_norm": 1.8673723936080933, "learning_rate": 1.002157374111479e-05, "loss": 0.287811279296875, "step": 9192 }, { "epoch": 0.6214005677977559, "grad_norm": 0.8910331726074219, "learning_rate": 1.0018461360977377e-05, "loss": 0.2494049072265625, "step": 9193 }, { "epoch": 0.62146816276869, "grad_norm": 0.49255213141441345, "learning_rate": 1.0015349221877788e-05, "loss": 0.0973358154296875, "step": 9194 }, { "epoch": 0.6215357577396242, "grad_norm": 0.7607232332229614, "learning_rate": 1.0012237323966599e-05, "loss": 0.2191162109375, "step": 9195 }, { "epoch": 0.6216033527105583, "grad_norm": 1.367566704750061, "learning_rate": 1.0009125667394395e-05, "loss": 0.2270050048828125, "step": 9196 }, { "epoch": 0.6216709476814924, "grad_norm": 0.734374463558197, "learning_rate": 1.0006014252311723e-05, "loss": 0.1606292724609375, "step": 9197 }, { "epoch": 0.6217385426524267, "grad_norm": 1.0519212484359741, "learning_rate": 1.0002903078869137e-05, "loss": 0.24749755859375, "step": 9198 }, { "epoch": 0.6218061376233608, "grad_norm": 1.429099678993225, "learning_rate": 9.999792147217183e-06, "loss": 0.1547698974609375, "step": 9199 }, { "epoch": 0.621873732594295, "grad_norm": 1.4567419290542603, "learning_rate": 9.996681457506373e-06, "loss": 0.2445068359375, "step": 9200 }, { "epoch": 0.6219413275652291, "grad_norm": 1.3854970932006836, "learning_rate": 9.993571009887232e-06, "loss": 0.270294189453125, "step": 9201 }, { "epoch": 0.6220089225361634, "grad_norm": 2.62738037109375, "learning_rate": 9.99046080451026e-06, "loss": 0.39306640625, "step": 9202 }, { "epoch": 0.6220765175070975, "grad_norm": 1.0789529085159302, "learning_rate": 9.987350841525954e-06, "loss": 0.19318771362304688, "step": 9203 }, { "epoch": 0.6221441124780316, "grad_norm": 2.389045238494873, "learning_rate": 9.98424112108478e-06, "loss": 0.34649658203125, "step": 9204 }, { "epoch": 0.6222117074489658, "grad_norm": 1.1469333171844482, "learning_rate": 9.981131643337219e-06, "loss": 0.252838134765625, "step": 9205 }, { "epoch": 0.6222793024198999, "grad_norm": 1.9873510599136353, "learning_rate": 9.978022408433716e-06, "loss": 0.25921630859375, "step": 9206 }, { "epoch": 0.6223468973908342, "grad_norm": 0.7435899972915649, "learning_rate": 9.974913416524725e-06, "loss": 0.1004486083984375, "step": 9207 }, { "epoch": 0.6224144923617683, "grad_norm": 1.7924407720565796, "learning_rate": 9.971804667760668e-06, "loss": 0.324005126953125, "step": 9208 }, { "epoch": 0.6224820873327025, "grad_norm": 1.4752651453018188, "learning_rate": 9.968696162291978e-06, "loss": 0.197509765625, "step": 9209 }, { "epoch": 0.6225496823036366, "grad_norm": 1.5042792558670044, "learning_rate": 9.965587900269054e-06, "loss": 0.253143310546875, "step": 9210 }, { "epoch": 0.6226172772745707, "grad_norm": 1.321305751800537, "learning_rate": 9.962479881842292e-06, "loss": 0.1902618408203125, "step": 9211 }, { "epoch": 0.622684872245505, "grad_norm": 1.623177170753479, "learning_rate": 9.959372107162089e-06, "loss": 0.208587646484375, "step": 9212 }, { "epoch": 0.6227524672164391, "grad_norm": 1.5860013961791992, "learning_rate": 9.956264576378805e-06, "loss": 0.271697998046875, "step": 9213 }, { "epoch": 0.6228200621873733, "grad_norm": 1.6069574356079102, "learning_rate": 9.953157289642808e-06, "loss": 0.21435546875, "step": 9214 }, { "epoch": 0.6228876571583074, "grad_norm": 1.3866325616836548, "learning_rate": 9.950050247104447e-06, "loss": 0.281982421875, "step": 9215 }, { "epoch": 0.6229552521292416, "grad_norm": 1.3780654668807983, "learning_rate": 9.946943448914064e-06, "loss": 0.1749420166015625, "step": 9216 }, { "epoch": 0.6230228471001757, "grad_norm": 1.7617095708847046, "learning_rate": 9.943836895221978e-06, "loss": 0.297607421875, "step": 9217 }, { "epoch": 0.6230904420711099, "grad_norm": 0.8589896559715271, "learning_rate": 9.940730586178512e-06, "loss": 0.15123748779296875, "step": 9218 }, { "epoch": 0.6231580370420441, "grad_norm": 1.0381197929382324, "learning_rate": 9.937624521933955e-06, "loss": 0.199554443359375, "step": 9219 }, { "epoch": 0.6232256320129782, "grad_norm": 1.4167406558990479, "learning_rate": 9.934518702638613e-06, "loss": 0.27703857421875, "step": 9220 }, { "epoch": 0.6232932269839124, "grad_norm": 1.0984524488449097, "learning_rate": 9.93141312844276e-06, "loss": 0.1674652099609375, "step": 9221 }, { "epoch": 0.6233608219548465, "grad_norm": 1.2230221033096313, "learning_rate": 9.928307799496654e-06, "loss": 0.2620849609375, "step": 9222 }, { "epoch": 0.6234284169257808, "grad_norm": 1.2641502618789673, "learning_rate": 9.925202715950562e-06, "loss": 0.219390869140625, "step": 9223 }, { "epoch": 0.6234960118967149, "grad_norm": 1.00826895236969, "learning_rate": 9.922097877954718e-06, "loss": 0.1668853759765625, "step": 9224 }, { "epoch": 0.623563606867649, "grad_norm": 0.8920652866363525, "learning_rate": 9.918993285659363e-06, "loss": 0.240142822265625, "step": 9225 }, { "epoch": 0.6236312018385832, "grad_norm": 0.9983978867530823, "learning_rate": 9.915888939214706e-06, "loss": 0.2620849609375, "step": 9226 }, { "epoch": 0.6236987968095173, "grad_norm": 1.8153079748153687, "learning_rate": 9.912784838770965e-06, "loss": 0.192657470703125, "step": 9227 }, { "epoch": 0.6237663917804516, "grad_norm": 1.758467435836792, "learning_rate": 9.909680984478324e-06, "loss": 0.33660888671875, "step": 9228 }, { "epoch": 0.6238339867513857, "grad_norm": 1.3362541198730469, "learning_rate": 9.906577376486982e-06, "loss": 0.20903778076171875, "step": 9229 }, { "epoch": 0.6239015817223199, "grad_norm": 1.038674235343933, "learning_rate": 9.903474014947094e-06, "loss": 0.2086639404296875, "step": 9230 }, { "epoch": 0.623969176693254, "grad_norm": 0.9358503818511963, "learning_rate": 9.900370900008831e-06, "loss": 0.202972412109375, "step": 9231 }, { "epoch": 0.6240367716641881, "grad_norm": 1.7005913257598877, "learning_rate": 9.897268031822343e-06, "loss": 0.2007598876953125, "step": 9232 }, { "epoch": 0.6241043666351224, "grad_norm": 0.5352729558944702, "learning_rate": 9.894165410537757e-06, "loss": 0.10663604736328125, "step": 9233 }, { "epoch": 0.6241719616060565, "grad_norm": 1.1349551677703857, "learning_rate": 9.891063036305202e-06, "loss": 0.11968231201171875, "step": 9234 }, { "epoch": 0.6242395565769907, "grad_norm": 0.8904979825019836, "learning_rate": 9.887960909274786e-06, "loss": 0.206817626953125, "step": 9235 }, { "epoch": 0.6243071515479248, "grad_norm": 1.1685034036636353, "learning_rate": 9.884859029596617e-06, "loss": 0.210906982421875, "step": 9236 }, { "epoch": 0.624374746518859, "grad_norm": 1.6060189008712769, "learning_rate": 9.881757397420776e-06, "loss": 0.2759246826171875, "step": 9237 }, { "epoch": 0.6244423414897932, "grad_norm": 0.7302956581115723, "learning_rate": 9.878656012897346e-06, "loss": 0.10819625854492188, "step": 9238 }, { "epoch": 0.6245099364607273, "grad_norm": 1.0750740766525269, "learning_rate": 9.875554876176381e-06, "loss": 0.1909332275390625, "step": 9239 }, { "epoch": 0.6245775314316615, "grad_norm": 1.1907631158828735, "learning_rate": 9.872453987407945e-06, "loss": 0.246124267578125, "step": 9240 }, { "epoch": 0.6246451264025956, "grad_norm": 1.3937013149261475, "learning_rate": 9.869353346742068e-06, "loss": 0.2501220703125, "step": 9241 }, { "epoch": 0.6247127213735298, "grad_norm": 1.2877709865570068, "learning_rate": 9.866252954328792e-06, "loss": 0.2557373046875, "step": 9242 }, { "epoch": 0.624780316344464, "grad_norm": 1.5047556161880493, "learning_rate": 9.863152810318115e-06, "loss": 0.293701171875, "step": 9243 }, { "epoch": 0.6248479113153982, "grad_norm": 1.2509557008743286, "learning_rate": 9.860052914860057e-06, "loss": 0.28521728515625, "step": 9244 }, { "epoch": 0.6249155062863323, "grad_norm": 1.7869012355804443, "learning_rate": 9.856953268104605e-06, "loss": 0.253021240234375, "step": 9245 }, { "epoch": 0.6249831012572664, "grad_norm": 1.0989583730697632, "learning_rate": 9.853853870201731e-06, "loss": 0.213653564453125, "step": 9246 }, { "epoch": 0.6250506962282006, "grad_norm": 0.9921362400054932, "learning_rate": 9.85075472130142e-06, "loss": 0.236541748046875, "step": 9247 }, { "epoch": 0.6251182911991348, "grad_norm": 1.0525310039520264, "learning_rate": 9.847655821553608e-06, "loss": 0.1958770751953125, "step": 9248 }, { "epoch": 0.625185886170069, "grad_norm": 2.1045680046081543, "learning_rate": 9.844557171108256e-06, "loss": 0.27508544921875, "step": 9249 }, { "epoch": 0.6252534811410031, "grad_norm": 0.9071335196495056, "learning_rate": 9.841458770115285e-06, "loss": 0.151702880859375, "step": 9250 }, { "epoch": 0.6253210761119373, "grad_norm": 1.298221230506897, "learning_rate": 9.838360618724626e-06, "loss": 0.246063232421875, "step": 9251 }, { "epoch": 0.6253886710828714, "grad_norm": 1.3510966300964355, "learning_rate": 9.835262717086174e-06, "loss": 0.1874542236328125, "step": 9252 }, { "epoch": 0.6254562660538056, "grad_norm": 1.3370932340621948, "learning_rate": 9.832165065349832e-06, "loss": 0.26904296875, "step": 9253 }, { "epoch": 0.6255238610247398, "grad_norm": 0.6470035910606384, "learning_rate": 9.829067663665481e-06, "loss": 0.11034393310546875, "step": 9254 }, { "epoch": 0.6255914559956739, "grad_norm": 1.0699273347854614, "learning_rate": 9.825970512183002e-06, "loss": 0.26458740234375, "step": 9255 }, { "epoch": 0.6256590509666081, "grad_norm": 1.6920052766799927, "learning_rate": 9.82287361105224e-06, "loss": 0.26812744140625, "step": 9256 }, { "epoch": 0.6257266459375422, "grad_norm": 1.1082289218902588, "learning_rate": 9.819776960423049e-06, "loss": 0.1856536865234375, "step": 9257 }, { "epoch": 0.6257942409084765, "grad_norm": 1.2411552667617798, "learning_rate": 9.81668056044527e-06, "loss": 0.27203369140625, "step": 9258 }, { "epoch": 0.6258618358794106, "grad_norm": 1.1078085899353027, "learning_rate": 9.813584411268711e-06, "loss": 0.17482376098632812, "step": 9259 }, { "epoch": 0.6259294308503447, "grad_norm": 1.2850866317749023, "learning_rate": 9.810488513043197e-06, "loss": 0.2293701171875, "step": 9260 }, { "epoch": 0.6259970258212789, "grad_norm": 1.19088876247406, "learning_rate": 9.80739286591852e-06, "loss": 0.2033233642578125, "step": 9261 }, { "epoch": 0.626064620792213, "grad_norm": 1.1705156564712524, "learning_rate": 9.804297470044473e-06, "loss": 0.2166748046875, "step": 9262 }, { "epoch": 0.6261322157631473, "grad_norm": 1.2276487350463867, "learning_rate": 9.80120232557082e-06, "loss": 0.2891845703125, "step": 9263 }, { "epoch": 0.6261998107340814, "grad_norm": 0.7469097971916199, "learning_rate": 9.798107432647337e-06, "loss": 0.207366943359375, "step": 9264 }, { "epoch": 0.6262674057050156, "grad_norm": 1.2767884731292725, "learning_rate": 9.79501279142376e-06, "loss": 0.193023681640625, "step": 9265 }, { "epoch": 0.6263350006759497, "grad_norm": 2.427299976348877, "learning_rate": 9.791918402049838e-06, "loss": 0.354217529296875, "step": 9266 }, { "epoch": 0.6264025956468838, "grad_norm": 1.5509929656982422, "learning_rate": 9.788824264675293e-06, "loss": 0.2508392333984375, "step": 9267 }, { "epoch": 0.6264701906178181, "grad_norm": 1.2160180807113647, "learning_rate": 9.785730379449832e-06, "loss": 0.181976318359375, "step": 9268 }, { "epoch": 0.6265377855887522, "grad_norm": 1.6895475387573242, "learning_rate": 9.782636746523168e-06, "loss": 0.277801513671875, "step": 9269 }, { "epoch": 0.6266053805596864, "grad_norm": 1.053412675857544, "learning_rate": 9.77954336604498e-06, "loss": 0.2448883056640625, "step": 9270 }, { "epoch": 0.6266729755306205, "grad_norm": 1.447805404663086, "learning_rate": 9.776450238164955e-06, "loss": 0.24530029296875, "step": 9271 }, { "epoch": 0.6267405705015547, "grad_norm": 0.881493091583252, "learning_rate": 9.773357363032745e-06, "loss": 0.1361541748046875, "step": 9272 }, { "epoch": 0.6268081654724889, "grad_norm": 1.2722797393798828, "learning_rate": 9.770264740798018e-06, "loss": 0.1875152587890625, "step": 9273 }, { "epoch": 0.626875760443423, "grad_norm": 1.1745860576629639, "learning_rate": 9.767172371610398e-06, "loss": 0.294921875, "step": 9274 }, { "epoch": 0.6269433554143572, "grad_norm": 0.9411157369613647, "learning_rate": 9.764080255619531e-06, "loss": 0.1975555419921875, "step": 9275 }, { "epoch": 0.6270109503852913, "grad_norm": 1.584262490272522, "learning_rate": 9.760988392975014e-06, "loss": 0.3307342529296875, "step": 9276 }, { "epoch": 0.6270785453562255, "grad_norm": 0.8486475348472595, "learning_rate": 9.757896783826465e-06, "loss": 0.1711273193359375, "step": 9277 }, { "epoch": 0.6271461403271597, "grad_norm": 0.9678213596343994, "learning_rate": 9.754805428323466e-06, "loss": 0.2020263671875, "step": 9278 }, { "epoch": 0.6272137352980938, "grad_norm": 1.511741280555725, "learning_rate": 9.751714326615605e-06, "loss": 0.301116943359375, "step": 9279 }, { "epoch": 0.627281330269028, "grad_norm": 1.168351173400879, "learning_rate": 9.748623478852445e-06, "loss": 0.23382568359375, "step": 9280 }, { "epoch": 0.6273489252399621, "grad_norm": 1.108681321144104, "learning_rate": 9.745532885183532e-06, "loss": 0.189422607421875, "step": 9281 }, { "epoch": 0.6274165202108963, "grad_norm": 1.5364285707473755, "learning_rate": 9.742442545758419e-06, "loss": 0.215728759765625, "step": 9282 }, { "epoch": 0.6274841151818304, "grad_norm": 0.8418446183204651, "learning_rate": 9.73935246072663e-06, "loss": 0.2083740234375, "step": 9283 }, { "epoch": 0.6275517101527647, "grad_norm": 1.1914514303207397, "learning_rate": 9.73626263023769e-06, "loss": 0.188995361328125, "step": 9284 }, { "epoch": 0.6276193051236988, "grad_norm": 1.9835715293884277, "learning_rate": 9.73317305444109e-06, "loss": 0.15770721435546875, "step": 9285 }, { "epoch": 0.6276869000946329, "grad_norm": 1.28965425491333, "learning_rate": 9.73008373348634e-06, "loss": 0.2479705810546875, "step": 9286 }, { "epoch": 0.6277544950655671, "grad_norm": 1.3228368759155273, "learning_rate": 9.726994667522905e-06, "loss": 0.292327880859375, "step": 9287 }, { "epoch": 0.6278220900365012, "grad_norm": 0.7769538760185242, "learning_rate": 9.723905856700265e-06, "loss": 0.09723663330078125, "step": 9288 }, { "epoch": 0.6278896850074355, "grad_norm": 1.2005553245544434, "learning_rate": 9.720817301167869e-06, "loss": 0.182464599609375, "step": 9289 }, { "epoch": 0.6279572799783696, "grad_norm": 0.6759127974510193, "learning_rate": 9.717729001075165e-06, "loss": 0.1115264892578125, "step": 9290 }, { "epoch": 0.6280248749493038, "grad_norm": 2.621419906616211, "learning_rate": 9.714640956571585e-06, "loss": 0.3278656005859375, "step": 9291 }, { "epoch": 0.6280924699202379, "grad_norm": 2.012636661529541, "learning_rate": 9.711553167806538e-06, "loss": 0.31854248046875, "step": 9292 }, { "epoch": 0.628160064891172, "grad_norm": 1.4525604248046875, "learning_rate": 9.708465634929444e-06, "loss": 0.224578857421875, "step": 9293 }, { "epoch": 0.6282276598621063, "grad_norm": 1.2964544296264648, "learning_rate": 9.70537835808968e-06, "loss": 0.2402496337890625, "step": 9294 }, { "epoch": 0.6282952548330404, "grad_norm": 1.4134513139724731, "learning_rate": 9.70229133743665e-06, "loss": 0.2059173583984375, "step": 9295 }, { "epoch": 0.6283628498039746, "grad_norm": 0.8464410305023193, "learning_rate": 9.699204573119702e-06, "loss": 0.1701202392578125, "step": 9296 }, { "epoch": 0.6284304447749087, "grad_norm": 2.1197052001953125, "learning_rate": 9.696118065288211e-06, "loss": 0.249786376953125, "step": 9297 }, { "epoch": 0.628498039745843, "grad_norm": 1.2870856523513794, "learning_rate": 9.693031814091504e-06, "loss": 0.2081451416015625, "step": 9298 }, { "epoch": 0.6285656347167771, "grad_norm": 0.998665452003479, "learning_rate": 9.689945819678924e-06, "loss": 0.1428375244140625, "step": 9299 }, { "epoch": 0.6286332296877112, "grad_norm": 1.6189889907836914, "learning_rate": 9.686860082199786e-06, "loss": 0.23358154296875, "step": 9300 }, { "epoch": 0.6287008246586454, "grad_norm": 2.2527644634246826, "learning_rate": 9.683774601803405e-06, "loss": 0.3082275390625, "step": 9301 }, { "epoch": 0.6287684196295795, "grad_norm": 0.8927502632141113, "learning_rate": 9.68068937863907e-06, "loss": 0.1332550048828125, "step": 9302 }, { "epoch": 0.6288360146005137, "grad_norm": 1.1918877363204956, "learning_rate": 9.677604412856059e-06, "loss": 0.20147705078125, "step": 9303 }, { "epoch": 0.6289036095714479, "grad_norm": 1.652961254119873, "learning_rate": 9.67451970460365e-06, "loss": 0.23089599609375, "step": 9304 }, { "epoch": 0.6289712045423821, "grad_norm": 1.532443642616272, "learning_rate": 9.67143525403109e-06, "loss": 0.277252197265625, "step": 9305 }, { "epoch": 0.6290387995133162, "grad_norm": 1.4437310695648193, "learning_rate": 9.668351061287634e-06, "loss": 0.1847076416015625, "step": 9306 }, { "epoch": 0.6291063944842503, "grad_norm": 0.9092546701431274, "learning_rate": 9.665267126522511e-06, "loss": 0.133453369140625, "step": 9307 }, { "epoch": 0.6291739894551845, "grad_norm": 1.3876858949661255, "learning_rate": 9.662183449884945e-06, "loss": 0.26287841796875, "step": 9308 }, { "epoch": 0.6292415844261187, "grad_norm": 1.7088505029678345, "learning_rate": 9.659100031524132e-06, "loss": 0.2480010986328125, "step": 9309 }, { "epoch": 0.6293091793970529, "grad_norm": 1.172465205192566, "learning_rate": 9.656016871589282e-06, "loss": 0.17353057861328125, "step": 9310 }, { "epoch": 0.629376774367987, "grad_norm": 1.4592921733856201, "learning_rate": 9.652933970229562e-06, "loss": 0.2799072265625, "step": 9311 }, { "epoch": 0.6294443693389212, "grad_norm": 0.860610842704773, "learning_rate": 9.649851327594153e-06, "loss": 0.244232177734375, "step": 9312 }, { "epoch": 0.6295119643098553, "grad_norm": 1.619313359260559, "learning_rate": 9.646768943832204e-06, "loss": 0.316162109375, "step": 9313 }, { "epoch": 0.6295795592807895, "grad_norm": 1.1798005104064941, "learning_rate": 9.643686819092877e-06, "loss": 0.2147216796875, "step": 9314 }, { "epoch": 0.6296471542517237, "grad_norm": 2.226156234741211, "learning_rate": 9.640604953525283e-06, "loss": 0.334625244140625, "step": 9315 }, { "epoch": 0.6297147492226578, "grad_norm": 1.463586688041687, "learning_rate": 9.63752334727855e-06, "loss": 0.1758270263671875, "step": 9316 }, { "epoch": 0.629782344193592, "grad_norm": 0.9897434711456299, "learning_rate": 9.634442000501795e-06, "loss": 0.196136474609375, "step": 9317 }, { "epoch": 0.6298499391645261, "grad_norm": 1.3622926473617554, "learning_rate": 9.631360913344095e-06, "loss": 0.208587646484375, "step": 9318 }, { "epoch": 0.6299175341354604, "grad_norm": 0.7779579758644104, "learning_rate": 9.628280085954545e-06, "loss": 0.246917724609375, "step": 9319 }, { "epoch": 0.6299851291063945, "grad_norm": 1.7432568073272705, "learning_rate": 9.625199518482207e-06, "loss": 0.26837158203125, "step": 9320 }, { "epoch": 0.6300527240773286, "grad_norm": 1.132779836654663, "learning_rate": 9.62211921107615e-06, "loss": 0.193603515625, "step": 9321 }, { "epoch": 0.6301203190482628, "grad_norm": 1.3674787282943726, "learning_rate": 9.6190391638854e-06, "loss": 0.292236328125, "step": 9322 }, { "epoch": 0.6301879140191969, "grad_norm": 1.7991877794265747, "learning_rate": 9.615959377059005e-06, "loss": 0.26434326171875, "step": 9323 }, { "epoch": 0.6302555089901312, "grad_norm": 1.0141938924789429, "learning_rate": 9.612879850745977e-06, "loss": 0.22412109375, "step": 9324 }, { "epoch": 0.6303231039610653, "grad_norm": 1.33531653881073, "learning_rate": 9.609800585095329e-06, "loss": 0.209930419921875, "step": 9325 }, { "epoch": 0.6303906989319995, "grad_norm": 1.3739370107650757, "learning_rate": 9.606721580256048e-06, "loss": 0.212127685546875, "step": 9326 }, { "epoch": 0.6304582939029336, "grad_norm": 1.1452447175979614, "learning_rate": 9.603642836377112e-06, "loss": 0.20172119140625, "step": 9327 }, { "epoch": 0.6305258888738677, "grad_norm": 1.1300121545791626, "learning_rate": 9.600564353607498e-06, "loss": 0.2603759765625, "step": 9328 }, { "epoch": 0.630593483844802, "grad_norm": 0.840930700302124, "learning_rate": 9.597486132096158e-06, "loss": 0.2772216796875, "step": 9329 }, { "epoch": 0.6306610788157361, "grad_norm": 1.3232420682907104, "learning_rate": 9.594408171992042e-06, "loss": 0.294158935546875, "step": 9330 }, { "epoch": 0.6307286737866703, "grad_norm": 0.973618745803833, "learning_rate": 9.59133047344407e-06, "loss": 0.2679443359375, "step": 9331 }, { "epoch": 0.6307962687576044, "grad_norm": 0.5461096167564392, "learning_rate": 9.588253036601169e-06, "loss": 0.1180877685546875, "step": 9332 }, { "epoch": 0.6308638637285386, "grad_norm": 1.0480015277862549, "learning_rate": 9.585175861612237e-06, "loss": 0.21246719360351562, "step": 9333 }, { "epoch": 0.6309314586994728, "grad_norm": 1.1446776390075684, "learning_rate": 9.582098948626176e-06, "loss": 0.258880615234375, "step": 9334 }, { "epoch": 0.6309990536704069, "grad_norm": 1.3980282545089722, "learning_rate": 9.579022297791859e-06, "loss": 0.302978515625, "step": 9335 }, { "epoch": 0.6310666486413411, "grad_norm": 0.8288956880569458, "learning_rate": 9.575945909258157e-06, "loss": 0.17935562133789062, "step": 9336 }, { "epoch": 0.6311342436122752, "grad_norm": 1.020573616027832, "learning_rate": 9.572869783173929e-06, "loss": 0.1886444091796875, "step": 9337 }, { "epoch": 0.6312018385832094, "grad_norm": 1.307571530342102, "learning_rate": 9.569793919688003e-06, "loss": 0.29144287109375, "step": 9338 }, { "epoch": 0.6312694335541436, "grad_norm": 1.2537099123001099, "learning_rate": 9.566718318949225e-06, "loss": 0.258575439453125, "step": 9339 }, { "epoch": 0.6313370285250778, "grad_norm": 1.2514803409576416, "learning_rate": 9.563642981106395e-06, "loss": 0.1793670654296875, "step": 9340 }, { "epoch": 0.6314046234960119, "grad_norm": 0.6650391817092896, "learning_rate": 9.560567906308333e-06, "loss": 0.1040496826171875, "step": 9341 }, { "epoch": 0.631472218466946, "grad_norm": 1.4355183839797974, "learning_rate": 9.557493094703817e-06, "loss": 0.28131103515625, "step": 9342 }, { "epoch": 0.6315398134378802, "grad_norm": 1.1052591800689697, "learning_rate": 9.55441854644164e-06, "loss": 0.17739105224609375, "step": 9343 }, { "epoch": 0.6316074084088144, "grad_norm": 1.8747693300247192, "learning_rate": 9.551344261670551e-06, "loss": 0.201629638671875, "step": 9344 }, { "epoch": 0.6316750033797486, "grad_norm": 0.7693402171134949, "learning_rate": 9.548270240539318e-06, "loss": 0.199676513671875, "step": 9345 }, { "epoch": 0.6317425983506827, "grad_norm": 1.4151374101638794, "learning_rate": 9.545196483196667e-06, "loss": 0.2569580078125, "step": 9346 }, { "epoch": 0.6318101933216169, "grad_norm": 1.096099615097046, "learning_rate": 9.542122989791343e-06, "loss": 0.245208740234375, "step": 9347 }, { "epoch": 0.631877788292551, "grad_norm": 1.3299330472946167, "learning_rate": 9.539049760472044e-06, "loss": 0.267547607421875, "step": 9348 }, { "epoch": 0.6319453832634851, "grad_norm": 1.6064457893371582, "learning_rate": 9.53597679538748e-06, "loss": 0.23114013671875, "step": 9349 }, { "epoch": 0.6320129782344194, "grad_norm": 1.6614214181900024, "learning_rate": 9.532904094686345e-06, "loss": 0.29986572265625, "step": 9350 }, { "epoch": 0.6320805732053535, "grad_norm": 0.990667998790741, "learning_rate": 9.529831658517301e-06, "loss": 0.22698974609375, "step": 9351 }, { "epoch": 0.6321481681762877, "grad_norm": 0.6666408777236938, "learning_rate": 9.526759487029025e-06, "loss": 0.1603851318359375, "step": 9352 }, { "epoch": 0.6322157631472218, "grad_norm": 2.1192667484283447, "learning_rate": 9.523687580370158e-06, "loss": 0.308563232421875, "step": 9353 }, { "epoch": 0.6322833581181561, "grad_norm": 1.613942265510559, "learning_rate": 9.52061593868935e-06, "loss": 0.3372802734375, "step": 9354 }, { "epoch": 0.6323509530890902, "grad_norm": 1.9354677200317383, "learning_rate": 9.51754456213521e-06, "loss": 0.24395751953125, "step": 9355 }, { "epoch": 0.6324185480600243, "grad_norm": 1.267829418182373, "learning_rate": 9.514473450856367e-06, "loss": 0.17852020263671875, "step": 9356 }, { "epoch": 0.6324861430309585, "grad_norm": 1.3734464645385742, "learning_rate": 9.511402605001408e-06, "loss": 0.30792236328125, "step": 9357 }, { "epoch": 0.6325537380018926, "grad_norm": 1.3869760036468506, "learning_rate": 9.508332024718926e-06, "loss": 0.28802490234375, "step": 9358 }, { "epoch": 0.6326213329728269, "grad_norm": 2.151794672012329, "learning_rate": 9.50526171015749e-06, "loss": 0.306976318359375, "step": 9359 }, { "epoch": 0.632688927943761, "grad_norm": 1.8805962800979614, "learning_rate": 9.50219166146567e-06, "loss": 0.28905487060546875, "step": 9360 }, { "epoch": 0.6327565229146952, "grad_norm": 1.6691282987594604, "learning_rate": 9.499121878792005e-06, "loss": 0.276885986328125, "step": 9361 }, { "epoch": 0.6328241178856293, "grad_norm": 1.1023783683776855, "learning_rate": 9.496052362285028e-06, "loss": 0.2713623046875, "step": 9362 }, { "epoch": 0.6328917128565634, "grad_norm": 1.3515866994857788, "learning_rate": 9.492983112093276e-06, "loss": 0.19610595703125, "step": 9363 }, { "epoch": 0.6329593078274977, "grad_norm": 0.8068363070487976, "learning_rate": 9.489914128365241e-06, "loss": 0.1863250732421875, "step": 9364 }, { "epoch": 0.6330269027984318, "grad_norm": 1.4369688034057617, "learning_rate": 9.486845411249431e-06, "loss": 0.2586669921875, "step": 9365 }, { "epoch": 0.633094497769366, "grad_norm": 0.9728603959083557, "learning_rate": 9.483776960894321e-06, "loss": 0.182952880859375, "step": 9366 }, { "epoch": 0.6331620927403001, "grad_norm": 1.552174687385559, "learning_rate": 9.480708777448395e-06, "loss": 0.240264892578125, "step": 9367 }, { "epoch": 0.6332296877112343, "grad_norm": 0.9925088882446289, "learning_rate": 9.477640861060096e-06, "loss": 0.15130615234375, "step": 9368 }, { "epoch": 0.6332972826821684, "grad_norm": 1.6310901641845703, "learning_rate": 9.474573211877875e-06, "loss": 0.3035888671875, "step": 9369 }, { "epoch": 0.6333648776531026, "grad_norm": 1.7477394342422485, "learning_rate": 9.471505830050165e-06, "loss": 0.17830276489257812, "step": 9370 }, { "epoch": 0.6334324726240368, "grad_norm": 0.8724532723426819, "learning_rate": 9.46843871572539e-06, "loss": 0.1471710205078125, "step": 9371 }, { "epoch": 0.6335000675949709, "grad_norm": 0.9502626061439514, "learning_rate": 9.465371869051941e-06, "loss": 0.1576690673828125, "step": 9372 }, { "epoch": 0.6335676625659051, "grad_norm": 0.8849481344223022, "learning_rate": 9.462305290178232e-06, "loss": 0.2122039794921875, "step": 9373 }, { "epoch": 0.6336352575368392, "grad_norm": 1.8657793998718262, "learning_rate": 9.459238979252625e-06, "loss": 0.2060394287109375, "step": 9374 }, { "epoch": 0.6337028525077735, "grad_norm": 1.621248483657837, "learning_rate": 9.456172936423493e-06, "loss": 0.2921142578125, "step": 9375 }, { "epoch": 0.6337704474787076, "grad_norm": 1.5122647285461426, "learning_rate": 9.453107161839194e-06, "loss": 0.19887542724609375, "step": 9376 }, { "epoch": 0.6338380424496417, "grad_norm": 1.849664330482483, "learning_rate": 9.450041655648063e-06, "loss": 0.31622314453125, "step": 9377 }, { "epoch": 0.6339056374205759, "grad_norm": 0.693082332611084, "learning_rate": 9.446976417998432e-06, "loss": 0.15678977966308594, "step": 9378 }, { "epoch": 0.63397323239151, "grad_norm": 1.537545919418335, "learning_rate": 9.443911449038614e-06, "loss": 0.27276611328125, "step": 9379 }, { "epoch": 0.6340408273624443, "grad_norm": 1.2548452615737915, "learning_rate": 9.44084674891692e-06, "loss": 0.2298583984375, "step": 9380 }, { "epoch": 0.6341084223333784, "grad_norm": 1.7931547164916992, "learning_rate": 9.437782317781626e-06, "loss": 0.287261962890625, "step": 9381 }, { "epoch": 0.6341760173043126, "grad_norm": 1.1463199853897095, "learning_rate": 9.434718155781016e-06, "loss": 0.19482421875, "step": 9382 }, { "epoch": 0.6342436122752467, "grad_norm": 1.3266137838363647, "learning_rate": 9.43165426306335e-06, "loss": 0.2230072021484375, "step": 9383 }, { "epoch": 0.6343112072461808, "grad_norm": 1.246433138847351, "learning_rate": 9.428590639776884e-06, "loss": 0.2498779296875, "step": 9384 }, { "epoch": 0.6343788022171151, "grad_norm": 1.4065779447555542, "learning_rate": 9.425527286069856e-06, "loss": 0.228973388671875, "step": 9385 }, { "epoch": 0.6344463971880492, "grad_norm": 1.1556652784347534, "learning_rate": 9.422464202090475e-06, "loss": 0.214111328125, "step": 9386 }, { "epoch": 0.6345139921589834, "grad_norm": 1.103509783744812, "learning_rate": 9.419401387986965e-06, "loss": 0.17041015625, "step": 9387 }, { "epoch": 0.6345815871299175, "grad_norm": 1.143216848373413, "learning_rate": 9.416338843907518e-06, "loss": 0.226593017578125, "step": 9388 }, { "epoch": 0.6346491821008517, "grad_norm": 1.2179259061813354, "learning_rate": 9.413276570000332e-06, "loss": 0.212646484375, "step": 9389 }, { "epoch": 0.6347167770717859, "grad_norm": 2.376138210296631, "learning_rate": 9.41021456641356e-06, "loss": 0.330841064453125, "step": 9390 }, { "epoch": 0.63478437204272, "grad_norm": 0.8696386218070984, "learning_rate": 9.407152833295373e-06, "loss": 0.19439697265625, "step": 9391 }, { "epoch": 0.6348519670136542, "grad_norm": 1.9395893812179565, "learning_rate": 9.404091370793911e-06, "loss": 0.2911529541015625, "step": 9392 }, { "epoch": 0.6349195619845883, "grad_norm": 0.7168618440628052, "learning_rate": 9.401030179057314e-06, "loss": 0.1403350830078125, "step": 9393 }, { "epoch": 0.6349871569555225, "grad_norm": 1.331200361251831, "learning_rate": 9.397969258233692e-06, "loss": 0.19234657287597656, "step": 9394 }, { "epoch": 0.6350547519264567, "grad_norm": 1.5011523962020874, "learning_rate": 9.394908608471156e-06, "loss": 0.1959228515625, "step": 9395 }, { "epoch": 0.6351223468973909, "grad_norm": 0.6083897352218628, "learning_rate": 9.391848229917807e-06, "loss": 0.11096954345703125, "step": 9396 }, { "epoch": 0.635189941868325, "grad_norm": 1.22101891040802, "learning_rate": 9.388788122721707e-06, "loss": 0.19293212890625, "step": 9397 }, { "epoch": 0.6352575368392591, "grad_norm": 1.6079745292663574, "learning_rate": 9.385728287030938e-06, "loss": 0.323486328125, "step": 9398 }, { "epoch": 0.6353251318101933, "grad_norm": 0.9503464102745056, "learning_rate": 9.382668722993547e-06, "loss": 0.24652099609375, "step": 9399 }, { "epoch": 0.6353927267811275, "grad_norm": 1.0190130472183228, "learning_rate": 9.379609430757583e-06, "loss": 0.243988037109375, "step": 9400 }, { "epoch": 0.6354603217520617, "grad_norm": 1.3421779870986938, "learning_rate": 9.376550410471061e-06, "loss": 0.267333984375, "step": 9401 }, { "epoch": 0.6355279167229958, "grad_norm": 0.7075475454330444, "learning_rate": 9.373491662282008e-06, "loss": 0.161712646484375, "step": 9402 }, { "epoch": 0.63559551169393, "grad_norm": 1.5497690439224243, "learning_rate": 9.370433186338413e-06, "loss": 0.2320404052734375, "step": 9403 }, { "epoch": 0.6356631066648641, "grad_norm": 1.5906257629394531, "learning_rate": 9.367374982788275e-06, "loss": 0.2388153076171875, "step": 9404 }, { "epoch": 0.6357307016357983, "grad_norm": 1.170353889465332, "learning_rate": 9.364317051779559e-06, "loss": 0.266571044921875, "step": 9405 }, { "epoch": 0.6357982966067325, "grad_norm": 0.7667110562324524, "learning_rate": 9.36125939346024e-06, "loss": 0.1811981201171875, "step": 9406 }, { "epoch": 0.6358658915776666, "grad_norm": 1.8351620435714722, "learning_rate": 9.358202007978251e-06, "loss": 0.33465576171875, "step": 9407 }, { "epoch": 0.6359334865486008, "grad_norm": 2.38285493850708, "learning_rate": 9.355144895481538e-06, "loss": 0.3157501220703125, "step": 9408 }, { "epoch": 0.6360010815195349, "grad_norm": 1.752458095550537, "learning_rate": 9.352088056118024e-06, "loss": 0.254730224609375, "step": 9409 }, { "epoch": 0.636068676490469, "grad_norm": 1.8805495500564575, "learning_rate": 9.349031490035605e-06, "loss": 0.244293212890625, "step": 9410 }, { "epoch": 0.6361362714614033, "grad_norm": 1.5851820707321167, "learning_rate": 9.34597519738219e-06, "loss": 0.254913330078125, "step": 9411 }, { "epoch": 0.6362038664323374, "grad_norm": 1.3611161708831787, "learning_rate": 9.342919178305655e-06, "loss": 0.27716064453125, "step": 9412 }, { "epoch": 0.6362714614032716, "grad_norm": 1.5198813676834106, "learning_rate": 9.339863432953878e-06, "loss": 0.2755584716796875, "step": 9413 }, { "epoch": 0.6363390563742057, "grad_norm": 0.6923110485076904, "learning_rate": 9.336807961474699e-06, "loss": 0.180419921875, "step": 9414 }, { "epoch": 0.63640665134514, "grad_norm": 0.665044367313385, "learning_rate": 9.333752764015976e-06, "loss": 0.1418304443359375, "step": 9415 }, { "epoch": 0.6364742463160741, "grad_norm": 0.9651584625244141, "learning_rate": 9.330697840725527e-06, "loss": 0.2103271484375, "step": 9416 }, { "epoch": 0.6365418412870082, "grad_norm": 1.7423456907272339, "learning_rate": 9.32764319175118e-06, "loss": 0.21234130859375, "step": 9417 }, { "epoch": 0.6366094362579424, "grad_norm": 1.4975755214691162, "learning_rate": 9.324588817240726e-06, "loss": 0.320709228515625, "step": 9418 }, { "epoch": 0.6366770312288765, "grad_norm": 1.9700582027435303, "learning_rate": 9.321534717341966e-06, "loss": 0.371124267578125, "step": 9419 }, { "epoch": 0.6367446261998108, "grad_norm": 1.4470908641815186, "learning_rate": 9.318480892202667e-06, "loss": 0.228118896484375, "step": 9420 }, { "epoch": 0.6368122211707449, "grad_norm": 1.1745421886444092, "learning_rate": 9.315427341970592e-06, "loss": 0.27032470703125, "step": 9421 }, { "epoch": 0.6368798161416791, "grad_norm": 1.1611132621765137, "learning_rate": 9.312374066793501e-06, "loss": 0.282684326171875, "step": 9422 }, { "epoch": 0.6369474111126132, "grad_norm": 0.7755191922187805, "learning_rate": 9.309321066819119e-06, "loss": 0.1563262939453125, "step": 9423 }, { "epoch": 0.6370150060835473, "grad_norm": 1.581835150718689, "learning_rate": 9.306268342195173e-06, "loss": 0.32952880859375, "step": 9424 }, { "epoch": 0.6370826010544816, "grad_norm": 0.8730971217155457, "learning_rate": 9.303215893069373e-06, "loss": 0.239593505859375, "step": 9425 }, { "epoch": 0.6371501960254157, "grad_norm": 1.5679247379302979, "learning_rate": 9.30016371958942e-06, "loss": 0.2305908203125, "step": 9426 }, { "epoch": 0.6372177909963499, "grad_norm": 1.5179493427276611, "learning_rate": 9.297111821902989e-06, "loss": 0.30645751953125, "step": 9427 }, { "epoch": 0.637285385967284, "grad_norm": 0.728032648563385, "learning_rate": 9.294060200157758e-06, "loss": 0.160125732421875, "step": 9428 }, { "epoch": 0.6373529809382182, "grad_norm": 1.5224719047546387, "learning_rate": 9.291008854501376e-06, "loss": 0.32342529296875, "step": 9429 }, { "epoch": 0.6374205759091524, "grad_norm": 1.8312140703201294, "learning_rate": 9.287957785081493e-06, "loss": 0.2802734375, "step": 9430 }, { "epoch": 0.6374881708800865, "grad_norm": 1.1853688955307007, "learning_rate": 9.284906992045738e-06, "loss": 0.237030029296875, "step": 9431 }, { "epoch": 0.6375557658510207, "grad_norm": 1.0254862308502197, "learning_rate": 9.281856475541716e-06, "loss": 0.1966552734375, "step": 9432 }, { "epoch": 0.6376233608219548, "grad_norm": 0.921813428401947, "learning_rate": 9.278806235717043e-06, "loss": 0.144195556640625, "step": 9433 }, { "epoch": 0.637690955792889, "grad_norm": 0.8980833888053894, "learning_rate": 9.2757562727193e-06, "loss": 0.17630577087402344, "step": 9434 }, { "epoch": 0.6377585507638232, "grad_norm": 1.234607219696045, "learning_rate": 9.272706586696075e-06, "loss": 0.2196044921875, "step": 9435 }, { "epoch": 0.6378261457347574, "grad_norm": 1.6184747219085693, "learning_rate": 9.269657177794915e-06, "loss": 0.27685546875, "step": 9436 }, { "epoch": 0.6378937407056915, "grad_norm": 1.1430745124816895, "learning_rate": 9.266608046163383e-06, "loss": 0.260406494140625, "step": 9437 }, { "epoch": 0.6379613356766256, "grad_norm": 1.264091968536377, "learning_rate": 9.263559191949003e-06, "loss": 0.219390869140625, "step": 9438 }, { "epoch": 0.6380289306475598, "grad_norm": 1.1002570390701294, "learning_rate": 9.260510615299313e-06, "loss": 0.2032318115234375, "step": 9439 }, { "epoch": 0.638096525618494, "grad_norm": 1.1517549753189087, "learning_rate": 9.257462316361803e-06, "loss": 0.275970458984375, "step": 9440 }, { "epoch": 0.6381641205894282, "grad_norm": 1.3966455459594727, "learning_rate": 9.254414295283985e-06, "loss": 0.2550048828125, "step": 9441 }, { "epoch": 0.6382317155603623, "grad_norm": 1.549939513206482, "learning_rate": 9.251366552213331e-06, "loss": 0.20721435546875, "step": 9442 }, { "epoch": 0.6382993105312965, "grad_norm": 1.3343188762664795, "learning_rate": 9.248319087297319e-06, "loss": 0.32708740234375, "step": 9443 }, { "epoch": 0.6383669055022306, "grad_norm": 1.545060157775879, "learning_rate": 9.245271900683396e-06, "loss": 0.3138427734375, "step": 9444 }, { "epoch": 0.6384345004731647, "grad_norm": 0.8494114279747009, "learning_rate": 9.242224992519004e-06, "loss": 0.1234283447265625, "step": 9445 }, { "epoch": 0.638502095444099, "grad_norm": 1.2131257057189941, "learning_rate": 9.239178362951581e-06, "loss": 0.25335693359375, "step": 9446 }, { "epoch": 0.6385696904150331, "grad_norm": 0.978485643863678, "learning_rate": 9.236132012128528e-06, "loss": 0.24835205078125, "step": 9447 }, { "epoch": 0.6386372853859673, "grad_norm": 1.9176762104034424, "learning_rate": 9.233085940197258e-06, "loss": 0.30126953125, "step": 9448 }, { "epoch": 0.6387048803569014, "grad_norm": 1.255252480506897, "learning_rate": 9.23004014730515e-06, "loss": 0.280517578125, "step": 9449 }, { "epoch": 0.6387724753278357, "grad_norm": 1.2365416288375854, "learning_rate": 9.226994633599586e-06, "loss": 0.34906005859375, "step": 9450 }, { "epoch": 0.6388400702987698, "grad_norm": 1.001429796218872, "learning_rate": 9.22394939922792e-06, "loss": 0.28533935546875, "step": 9451 }, { "epoch": 0.6389076652697039, "grad_norm": 1.2546210289001465, "learning_rate": 9.220904444337508e-06, "loss": 0.23388671875, "step": 9452 }, { "epoch": 0.6389752602406381, "grad_norm": 1.2595038414001465, "learning_rate": 9.217859769075673e-06, "loss": 0.228668212890625, "step": 9453 }, { "epoch": 0.6390428552115722, "grad_norm": 1.2611969709396362, "learning_rate": 9.214815373589744e-06, "loss": 0.2938079833984375, "step": 9454 }, { "epoch": 0.6391104501825065, "grad_norm": 0.6162327527999878, "learning_rate": 9.21177125802703e-06, "loss": 0.1602935791015625, "step": 9455 }, { "epoch": 0.6391780451534406, "grad_norm": 0.8561105728149414, "learning_rate": 9.208727422534811e-06, "loss": 0.18634033203125, "step": 9456 }, { "epoch": 0.6392456401243748, "grad_norm": 0.9449155330657959, "learning_rate": 9.20568386726038e-06, "loss": 0.225921630859375, "step": 9457 }, { "epoch": 0.6393132350953089, "grad_norm": 1.101675271987915, "learning_rate": 9.202640592350992e-06, "loss": 0.229583740234375, "step": 9458 }, { "epoch": 0.639380830066243, "grad_norm": 0.6724432706832886, "learning_rate": 9.199597597953915e-06, "loss": 0.088165283203125, "step": 9459 }, { "epoch": 0.6394484250371772, "grad_norm": 0.8047280311584473, "learning_rate": 9.196554884216369e-06, "loss": 0.09891891479492188, "step": 9460 }, { "epoch": 0.6395160200081114, "grad_norm": 0.7579066157341003, "learning_rate": 9.193512451285592e-06, "loss": 0.1797943115234375, "step": 9461 }, { "epoch": 0.6395836149790456, "grad_norm": 0.6953067183494568, "learning_rate": 9.190470299308793e-06, "loss": 0.11495208740234375, "step": 9462 }, { "epoch": 0.6396512099499797, "grad_norm": 0.9628934860229492, "learning_rate": 9.187428428433174e-06, "loss": 0.251220703125, "step": 9463 }, { "epoch": 0.6397188049209139, "grad_norm": 1.4549425840377808, "learning_rate": 9.184386838805909e-06, "loss": 0.1914825439453125, "step": 9464 }, { "epoch": 0.639786399891848, "grad_norm": 1.0484673976898193, "learning_rate": 9.181345530574185e-06, "loss": 0.1976318359375, "step": 9465 }, { "epoch": 0.6398539948627822, "grad_norm": 1.3232245445251465, "learning_rate": 9.178304503885142e-06, "loss": 0.306854248046875, "step": 9466 }, { "epoch": 0.6399215898337164, "grad_norm": 1.9557567834854126, "learning_rate": 9.175263758885932e-06, "loss": 0.29034423828125, "step": 9467 }, { "epoch": 0.6399891848046505, "grad_norm": 1.2022550106048584, "learning_rate": 9.172223295723691e-06, "loss": 0.2821044921875, "step": 9468 }, { "epoch": 0.6400567797755847, "grad_norm": 0.8641106486320496, "learning_rate": 9.169183114545523e-06, "loss": 0.17119598388671875, "step": 9469 }, { "epoch": 0.6401243747465188, "grad_norm": 1.071853518486023, "learning_rate": 9.16614321549854e-06, "loss": 0.1959381103515625, "step": 9470 }, { "epoch": 0.6401919697174531, "grad_norm": 1.0644965171813965, "learning_rate": 9.163103598729825e-06, "loss": 0.1667327880859375, "step": 9471 }, { "epoch": 0.6402595646883872, "grad_norm": 1.5377944707870483, "learning_rate": 9.160064264386466e-06, "loss": 0.1792144775390625, "step": 9472 }, { "epoch": 0.6403271596593213, "grad_norm": 1.0089623928070068, "learning_rate": 9.157025212615506e-06, "loss": 0.25555419921875, "step": 9473 }, { "epoch": 0.6403947546302555, "grad_norm": 1.1614614725112915, "learning_rate": 9.153986443564011e-06, "loss": 0.184814453125, "step": 9474 }, { "epoch": 0.6404623496011896, "grad_norm": 0.6595767736434937, "learning_rate": 9.150947957379002e-06, "loss": 0.1402740478515625, "step": 9475 }, { "epoch": 0.6405299445721239, "grad_norm": 2.1123297214508057, "learning_rate": 9.147909754207512e-06, "loss": 0.311248779296875, "step": 9476 }, { "epoch": 0.640597539543058, "grad_norm": 1.2891756296157837, "learning_rate": 9.144871834196536e-06, "loss": 0.2003631591796875, "step": 9477 }, { "epoch": 0.6406651345139922, "grad_norm": 1.272087574005127, "learning_rate": 9.141834197493078e-06, "loss": 0.2470703125, "step": 9478 }, { "epoch": 0.6407327294849263, "grad_norm": 1.1287983655929565, "learning_rate": 9.138796844244112e-06, "loss": 0.204681396484375, "step": 9479 }, { "epoch": 0.6408003244558604, "grad_norm": 0.8186604380607605, "learning_rate": 9.1357597745966e-06, "loss": 0.1389617919921875, "step": 9480 }, { "epoch": 0.6408679194267947, "grad_norm": 1.5810781717300415, "learning_rate": 9.132722988697507e-06, "loss": 0.217926025390625, "step": 9481 }, { "epoch": 0.6409355143977288, "grad_norm": 1.668878436088562, "learning_rate": 9.129686486693758e-06, "loss": 0.27362060546875, "step": 9482 }, { "epoch": 0.641003109368663, "grad_norm": 1.9759061336517334, "learning_rate": 9.126650268732287e-06, "loss": 0.306396484375, "step": 9483 }, { "epoch": 0.6410707043395971, "grad_norm": 2.033999443054199, "learning_rate": 9.123614334959997e-06, "loss": 0.244354248046875, "step": 9484 }, { "epoch": 0.6411382993105313, "grad_norm": 1.0491020679473877, "learning_rate": 9.120578685523798e-06, "loss": 0.272186279296875, "step": 9485 }, { "epoch": 0.6412058942814655, "grad_norm": 1.5382369756698608, "learning_rate": 9.117543320570559e-06, "loss": 0.25946044921875, "step": 9486 }, { "epoch": 0.6412734892523996, "grad_norm": 1.2990658283233643, "learning_rate": 9.114508240247162e-06, "loss": 0.2410736083984375, "step": 9487 }, { "epoch": 0.6413410842233338, "grad_norm": 0.7690669298171997, "learning_rate": 9.111473444700453e-06, "loss": 0.146575927734375, "step": 9488 }, { "epoch": 0.6414086791942679, "grad_norm": 1.8290205001831055, "learning_rate": 9.108438934077287e-06, "loss": 0.35467529296875, "step": 9489 }, { "epoch": 0.6414762741652021, "grad_norm": 1.0698597431182861, "learning_rate": 9.10540470852448e-06, "loss": 0.291839599609375, "step": 9490 }, { "epoch": 0.6415438691361363, "grad_norm": 1.3040388822555542, "learning_rate": 9.102370768188848e-06, "loss": 0.26300048828125, "step": 9491 }, { "epoch": 0.6416114641070705, "grad_norm": 0.7465845942497253, "learning_rate": 9.099337113217203e-06, "loss": 0.10569953918457031, "step": 9492 }, { "epoch": 0.6416790590780046, "grad_norm": 1.2187339067459106, "learning_rate": 9.096303743756315e-06, "loss": 0.2255859375, "step": 9493 }, { "epoch": 0.6417466540489387, "grad_norm": 0.8641928434371948, "learning_rate": 9.093270659952974e-06, "loss": 0.217559814453125, "step": 9494 }, { "epoch": 0.6418142490198729, "grad_norm": 0.556660532951355, "learning_rate": 9.090237861953927e-06, "loss": 0.1118621826171875, "step": 9495 }, { "epoch": 0.641881843990807, "grad_norm": 0.9003328084945679, "learning_rate": 9.087205349905926e-06, "loss": 0.23968505859375, "step": 9496 }, { "epoch": 0.6419494389617413, "grad_norm": 0.8200783729553223, "learning_rate": 9.0841731239557e-06, "loss": 0.173187255859375, "step": 9497 }, { "epoch": 0.6420170339326754, "grad_norm": 0.8671213388442993, "learning_rate": 9.081141184249973e-06, "loss": 0.184722900390625, "step": 9498 }, { "epoch": 0.6420846289036096, "grad_norm": 0.9623718857765198, "learning_rate": 9.07810953093544e-06, "loss": 0.17369461059570312, "step": 9499 }, { "epoch": 0.6421522238745437, "grad_norm": 1.257734775543213, "learning_rate": 9.075078164158799e-06, "loss": 0.26922607421875, "step": 9500 }, { "epoch": 0.6422198188454779, "grad_norm": 0.6917570233345032, "learning_rate": 9.072047084066727e-06, "loss": 0.1309051513671875, "step": 9501 }, { "epoch": 0.6422874138164121, "grad_norm": 1.1349685192108154, "learning_rate": 9.069016290805873e-06, "loss": 0.15631103515625, "step": 9502 }, { "epoch": 0.6423550087873462, "grad_norm": 1.139602780342102, "learning_rate": 9.0659857845229e-06, "loss": 0.2353973388671875, "step": 9503 }, { "epoch": 0.6424226037582804, "grad_norm": 1.7646822929382324, "learning_rate": 9.062955565364436e-06, "loss": 0.3917236328125, "step": 9504 }, { "epoch": 0.6424901987292145, "grad_norm": 1.5402421951293945, "learning_rate": 9.059925633477108e-06, "loss": 0.220428466796875, "step": 9505 }, { "epoch": 0.6425577937001488, "grad_norm": 1.6510118246078491, "learning_rate": 9.056895989007513e-06, "loss": 0.3209228515625, "step": 9506 }, { "epoch": 0.6426253886710829, "grad_norm": 1.7297229766845703, "learning_rate": 9.053866632102254e-06, "loss": 0.243621826171875, "step": 9507 }, { "epoch": 0.642692983642017, "grad_norm": 1.2886496782302856, "learning_rate": 9.050837562907903e-06, "loss": 0.1457061767578125, "step": 9508 }, { "epoch": 0.6427605786129512, "grad_norm": 1.7046924829483032, "learning_rate": 9.047808781571034e-06, "loss": 0.265472412109375, "step": 9509 }, { "epoch": 0.6428281735838853, "grad_norm": 1.4537960290908813, "learning_rate": 9.044780288238186e-06, "loss": 0.2916107177734375, "step": 9510 }, { "epoch": 0.6428957685548196, "grad_norm": 1.319735050201416, "learning_rate": 9.04175208305591e-06, "loss": 0.21185302734375, "step": 9511 }, { "epoch": 0.6429633635257537, "grad_norm": 1.4765300750732422, "learning_rate": 9.038724166170713e-06, "loss": 0.23095703125, "step": 9512 }, { "epoch": 0.6430309584966879, "grad_norm": 0.7647505402565002, "learning_rate": 9.035696537729119e-06, "loss": 0.135009765625, "step": 9513 }, { "epoch": 0.643098553467622, "grad_norm": 2.579550266265869, "learning_rate": 9.03266919787762e-06, "loss": 0.326629638671875, "step": 9514 }, { "epoch": 0.6431661484385561, "grad_norm": 1.8991034030914307, "learning_rate": 9.029642146762692e-06, "loss": 0.26947021484375, "step": 9515 }, { "epoch": 0.6432337434094904, "grad_norm": 1.4692291021347046, "learning_rate": 9.026615384530807e-06, "loss": 0.318359375, "step": 9516 }, { "epoch": 0.6433013383804245, "grad_norm": 1.1669977903366089, "learning_rate": 9.023588911328415e-06, "loss": 0.24993896484375, "step": 9517 }, { "epoch": 0.6433689333513587, "grad_norm": 1.8488190174102783, "learning_rate": 9.020562727301966e-06, "loss": 0.3372802734375, "step": 9518 }, { "epoch": 0.6434365283222928, "grad_norm": 1.3180241584777832, "learning_rate": 9.017536832597869e-06, "loss": 0.2589111328125, "step": 9519 }, { "epoch": 0.643504123293227, "grad_norm": 0.9484921097755432, "learning_rate": 9.01451122736255e-06, "loss": 0.185150146484375, "step": 9520 }, { "epoch": 0.6435717182641612, "grad_norm": 0.8714422583580017, "learning_rate": 9.011485911742396e-06, "loss": 0.1649932861328125, "step": 9521 }, { "epoch": 0.6436393132350953, "grad_norm": 0.7424283623695374, "learning_rate": 9.008460885883805e-06, "loss": 0.151031494140625, "step": 9522 }, { "epoch": 0.6437069082060295, "grad_norm": 1.1624900102615356, "learning_rate": 9.00543614993313e-06, "loss": 0.250213623046875, "step": 9523 }, { "epoch": 0.6437745031769636, "grad_norm": 0.9681988954544067, "learning_rate": 9.002411704036739e-06, "loss": 0.202117919921875, "step": 9524 }, { "epoch": 0.6438420981478978, "grad_norm": 0.9743382334709167, "learning_rate": 8.999387548340966e-06, "loss": 0.1611175537109375, "step": 9525 }, { "epoch": 0.643909693118832, "grad_norm": 1.2015700340270996, "learning_rate": 8.996363682992137e-06, "loss": 0.262115478515625, "step": 9526 }, { "epoch": 0.6439772880897662, "grad_norm": 2.55063533782959, "learning_rate": 8.993340108136577e-06, "loss": 0.318206787109375, "step": 9527 }, { "epoch": 0.6440448830607003, "grad_norm": 1.4266456365585327, "learning_rate": 8.990316823920569e-06, "loss": 0.278778076171875, "step": 9528 }, { "epoch": 0.6441124780316344, "grad_norm": 1.4002454280853271, "learning_rate": 8.987293830490411e-06, "loss": 0.2200927734375, "step": 9529 }, { "epoch": 0.6441800730025686, "grad_norm": 1.2773712873458862, "learning_rate": 8.984271127992367e-06, "loss": 0.181793212890625, "step": 9530 }, { "epoch": 0.6442476679735027, "grad_norm": 0.9049526453018188, "learning_rate": 8.981248716572705e-06, "loss": 0.1660919189453125, "step": 9531 }, { "epoch": 0.644315262944437, "grad_norm": 1.3995647430419922, "learning_rate": 8.978226596377652e-06, "loss": 0.29100799560546875, "step": 9532 }, { "epoch": 0.6443828579153711, "grad_norm": 1.4224613904953003, "learning_rate": 8.97520476755345e-06, "loss": 0.296234130859375, "step": 9533 }, { "epoch": 0.6444504528863053, "grad_norm": 0.6139699816703796, "learning_rate": 8.972183230246303e-06, "loss": 0.13652801513671875, "step": 9534 }, { "epoch": 0.6445180478572394, "grad_norm": 1.1358505487442017, "learning_rate": 8.969161984602428e-06, "loss": 0.1870574951171875, "step": 9535 }, { "epoch": 0.6445856428281735, "grad_norm": 1.1732890605926514, "learning_rate": 8.966141030767995e-06, "loss": 0.177337646484375, "step": 9536 }, { "epoch": 0.6446532377991078, "grad_norm": 1.3016372919082642, "learning_rate": 8.963120368889183e-06, "loss": 0.258270263671875, "step": 9537 }, { "epoch": 0.6447208327700419, "grad_norm": 2.2613015174865723, "learning_rate": 8.960099999112156e-06, "loss": 0.303192138671875, "step": 9538 }, { "epoch": 0.6447884277409761, "grad_norm": 1.0938339233398438, "learning_rate": 8.957079921583046e-06, "loss": 0.22906494140625, "step": 9539 }, { "epoch": 0.6448560227119102, "grad_norm": 1.1763277053833008, "learning_rate": 8.954060136447995e-06, "loss": 0.20587158203125, "step": 9540 }, { "epoch": 0.6449236176828443, "grad_norm": 1.1496983766555786, "learning_rate": 8.95104064385311e-06, "loss": 0.241607666015625, "step": 9541 }, { "epoch": 0.6449912126537786, "grad_norm": 2.1371877193450928, "learning_rate": 8.9480214439445e-06, "loss": 0.182281494140625, "step": 9542 }, { "epoch": 0.6450588076247127, "grad_norm": 1.794323205947876, "learning_rate": 8.945002536868242e-06, "loss": 0.284820556640625, "step": 9543 }, { "epoch": 0.6451264025956469, "grad_norm": 1.952196478843689, "learning_rate": 8.941983922770427e-06, "loss": 0.26898193359375, "step": 9544 }, { "epoch": 0.645193997566581, "grad_norm": 1.2214645147323608, "learning_rate": 8.938965601797098e-06, "loss": 0.1460113525390625, "step": 9545 }, { "epoch": 0.6452615925375152, "grad_norm": 1.4724714756011963, "learning_rate": 8.935947574094309e-06, "loss": 0.252960205078125, "step": 9546 }, { "epoch": 0.6453291875084494, "grad_norm": 1.8331340551376343, "learning_rate": 8.932929839808085e-06, "loss": 0.314697265625, "step": 9547 }, { "epoch": 0.6453967824793835, "grad_norm": 1.301688551902771, "learning_rate": 8.92991239908445e-06, "loss": 0.27862548828125, "step": 9548 }, { "epoch": 0.6454643774503177, "grad_norm": 0.8544780015945435, "learning_rate": 8.926895252069404e-06, "loss": 0.1773223876953125, "step": 9549 }, { "epoch": 0.6455319724212518, "grad_norm": 1.3854960203170776, "learning_rate": 8.923878398908927e-06, "loss": 0.217987060546875, "step": 9550 }, { "epoch": 0.645599567392186, "grad_norm": 1.229312777519226, "learning_rate": 8.920861839749007e-06, "loss": 0.282958984375, "step": 9551 }, { "epoch": 0.6456671623631202, "grad_norm": 0.6897075176239014, "learning_rate": 8.917845574735593e-06, "loss": 0.1456451416015625, "step": 9552 }, { "epoch": 0.6457347573340544, "grad_norm": 1.425679326057434, "learning_rate": 8.914829604014637e-06, "loss": 0.297637939453125, "step": 9553 }, { "epoch": 0.6458023523049885, "grad_norm": 1.3652220964431763, "learning_rate": 8.911813927732062e-06, "loss": 0.23968505859375, "step": 9554 }, { "epoch": 0.6458699472759226, "grad_norm": 1.2704914808273315, "learning_rate": 8.908798546033799e-06, "loss": 0.271759033203125, "step": 9555 }, { "epoch": 0.6459375422468568, "grad_norm": 1.963033676147461, "learning_rate": 8.905783459065739e-06, "loss": 0.29620361328125, "step": 9556 }, { "epoch": 0.646005137217791, "grad_norm": 1.0306411981582642, "learning_rate": 8.90276866697378e-06, "loss": 0.231658935546875, "step": 9557 }, { "epoch": 0.6460727321887252, "grad_norm": 1.0268057584762573, "learning_rate": 8.899754169903782e-06, "loss": 0.1402587890625, "step": 9558 }, { "epoch": 0.6461403271596593, "grad_norm": 1.5879937410354614, "learning_rate": 8.896739968001621e-06, "loss": 0.31781005859375, "step": 9559 }, { "epoch": 0.6462079221305935, "grad_norm": 1.160309910774231, "learning_rate": 8.893726061413138e-06, "loss": 0.0948486328125, "step": 9560 }, { "epoch": 0.6462755171015276, "grad_norm": 0.9736533761024475, "learning_rate": 8.890712450284155e-06, "loss": 0.195709228515625, "step": 9561 }, { "epoch": 0.6463431120724618, "grad_norm": 2.268915891647339, "learning_rate": 8.887699134760503e-06, "loss": 0.296966552734375, "step": 9562 }, { "epoch": 0.646410707043396, "grad_norm": 1.087380051612854, "learning_rate": 8.884686114987973e-06, "loss": 0.217498779296875, "step": 9563 }, { "epoch": 0.6464783020143301, "grad_norm": 2.3772225379943848, "learning_rate": 8.881673391112365e-06, "loss": 0.351043701171875, "step": 9564 }, { "epoch": 0.6465458969852643, "grad_norm": 1.1729497909545898, "learning_rate": 8.878660963279447e-06, "loss": 0.18951416015625, "step": 9565 }, { "epoch": 0.6466134919561984, "grad_norm": 1.4042543172836304, "learning_rate": 8.875648831634977e-06, "loss": 0.22589111328125, "step": 9566 }, { "epoch": 0.6466810869271327, "grad_norm": 1.2949119806289673, "learning_rate": 8.872636996324704e-06, "loss": 0.2337188720703125, "step": 9567 }, { "epoch": 0.6467486818980668, "grad_norm": 1.1801220178604126, "learning_rate": 8.869625457494362e-06, "loss": 0.178375244140625, "step": 9568 }, { "epoch": 0.6468162768690009, "grad_norm": 2.002870559692383, "learning_rate": 8.866614215289662e-06, "loss": 0.3409423828125, "step": 9569 }, { "epoch": 0.6468838718399351, "grad_norm": 1.7697455883026123, "learning_rate": 8.863603269856312e-06, "loss": 0.30010986328125, "step": 9570 }, { "epoch": 0.6469514668108692, "grad_norm": 1.132217526435852, "learning_rate": 8.860592621339998e-06, "loss": 0.13471221923828125, "step": 9571 }, { "epoch": 0.6470190617818035, "grad_norm": 1.2781211137771606, "learning_rate": 8.857582269886387e-06, "loss": 0.2571563720703125, "step": 9572 }, { "epoch": 0.6470866567527376, "grad_norm": 1.7729897499084473, "learning_rate": 8.854572215641154e-06, "loss": 0.268035888671875, "step": 9573 }, { "epoch": 0.6471542517236718, "grad_norm": 1.3054170608520508, "learning_rate": 8.851562458749928e-06, "loss": 0.327789306640625, "step": 9574 }, { "epoch": 0.6472218466946059, "grad_norm": 1.2023502588272095, "learning_rate": 8.84855299935835e-06, "loss": 0.238922119140625, "step": 9575 }, { "epoch": 0.64728944166554, "grad_norm": 1.3975348472595215, "learning_rate": 8.845543837612031e-06, "loss": 0.225555419921875, "step": 9576 }, { "epoch": 0.6473570366364743, "grad_norm": 1.05685293674469, "learning_rate": 8.84253497365658e-06, "loss": 0.16693115234375, "step": 9577 }, { "epoch": 0.6474246316074084, "grad_norm": 1.1753427982330322, "learning_rate": 8.839526407637576e-06, "loss": 0.15472030639648438, "step": 9578 }, { "epoch": 0.6474922265783426, "grad_norm": 1.3027896881103516, "learning_rate": 8.836518139700597e-06, "loss": 0.28485107421875, "step": 9579 }, { "epoch": 0.6475598215492767, "grad_norm": 1.587975025177002, "learning_rate": 8.833510169991198e-06, "loss": 0.296722412109375, "step": 9580 }, { "epoch": 0.6476274165202109, "grad_norm": 1.040527582168579, "learning_rate": 8.830502498654932e-06, "loss": 0.158721923828125, "step": 9581 }, { "epoch": 0.6476950114911451, "grad_norm": 1.7660435438156128, "learning_rate": 8.827495125837316e-06, "loss": 0.1800079345703125, "step": 9582 }, { "epoch": 0.6477626064620792, "grad_norm": 1.2042856216430664, "learning_rate": 8.824488051683877e-06, "loss": 0.228424072265625, "step": 9583 }, { "epoch": 0.6478302014330134, "grad_norm": 1.7156202793121338, "learning_rate": 8.821481276340111e-06, "loss": 0.25750732421875, "step": 9584 }, { "epoch": 0.6478977964039475, "grad_norm": 1.5475364923477173, "learning_rate": 8.818474799951504e-06, "loss": 0.296142578125, "step": 9585 }, { "epoch": 0.6479653913748817, "grad_norm": 1.2700740098953247, "learning_rate": 8.815468622663531e-06, "loss": 0.24701690673828125, "step": 9586 }, { "epoch": 0.6480329863458159, "grad_norm": 1.5454310178756714, "learning_rate": 8.812462744621641e-06, "loss": 0.183441162109375, "step": 9587 }, { "epoch": 0.6481005813167501, "grad_norm": 1.4233182668685913, "learning_rate": 8.809457165971288e-06, "loss": 0.287353515625, "step": 9588 }, { "epoch": 0.6481681762876842, "grad_norm": 1.1642305850982666, "learning_rate": 8.806451886857892e-06, "loss": 0.19561767578125, "step": 9589 }, { "epoch": 0.6482357712586183, "grad_norm": 1.628201961517334, "learning_rate": 8.803446907426878e-06, "loss": 0.25213623046875, "step": 9590 }, { "epoch": 0.6483033662295525, "grad_norm": 1.6295125484466553, "learning_rate": 8.80044222782363e-06, "loss": 0.3472900390625, "step": 9591 }, { "epoch": 0.6483709612004867, "grad_norm": 1.92583167552948, "learning_rate": 8.797437848193546e-06, "loss": 0.263153076171875, "step": 9592 }, { "epoch": 0.6484385561714209, "grad_norm": 1.0581085681915283, "learning_rate": 8.794433768681992e-06, "loss": 0.2020721435546875, "step": 9593 }, { "epoch": 0.648506151142355, "grad_norm": 1.637900948524475, "learning_rate": 8.791429989434327e-06, "loss": 0.3194580078125, "step": 9594 }, { "epoch": 0.6485737461132892, "grad_norm": 1.1571693420410156, "learning_rate": 8.788426510595885e-06, "loss": 0.2099609375, "step": 9595 }, { "epoch": 0.6486413410842233, "grad_norm": 0.6135069727897644, "learning_rate": 8.785423332311998e-06, "loss": 0.1203460693359375, "step": 9596 }, { "epoch": 0.6487089360551574, "grad_norm": 1.475090742111206, "learning_rate": 8.782420454727985e-06, "loss": 0.26898193359375, "step": 9597 }, { "epoch": 0.6487765310260917, "grad_norm": 1.3761801719665527, "learning_rate": 8.77941787798913e-06, "loss": 0.26651763916015625, "step": 9598 }, { "epoch": 0.6488441259970258, "grad_norm": 1.4699022769927979, "learning_rate": 8.776415602240724e-06, "loss": 0.302978515625, "step": 9599 }, { "epoch": 0.64891172096796, "grad_norm": 1.3958125114440918, "learning_rate": 8.773413627628034e-06, "loss": 0.261993408203125, "step": 9600 }, { "epoch": 0.6489793159388941, "grad_norm": 1.5406105518341064, "learning_rate": 8.770411954296322e-06, "loss": 0.267822265625, "step": 9601 }, { "epoch": 0.6490469109098284, "grad_norm": 1.3316727876663208, "learning_rate": 8.767410582390817e-06, "loss": 0.2049713134765625, "step": 9602 }, { "epoch": 0.6491145058807625, "grad_norm": 0.9016716480255127, "learning_rate": 8.764409512056751e-06, "loss": 0.159515380859375, "step": 9603 }, { "epoch": 0.6491821008516966, "grad_norm": 2.11617112159729, "learning_rate": 8.761408743439326e-06, "loss": 0.27423095703125, "step": 9604 }, { "epoch": 0.6492496958226308, "grad_norm": 1.0853191614151, "learning_rate": 8.758408276683745e-06, "loss": 0.147216796875, "step": 9605 }, { "epoch": 0.6493172907935649, "grad_norm": 1.6655731201171875, "learning_rate": 8.755408111935195e-06, "loss": 0.27667236328125, "step": 9606 }, { "epoch": 0.6493848857644992, "grad_norm": 1.2626674175262451, "learning_rate": 8.752408249338823e-06, "loss": 0.2742919921875, "step": 9607 }, { "epoch": 0.6494524807354333, "grad_norm": 0.5784248113632202, "learning_rate": 8.749408689039806e-06, "loss": 0.1110382080078125, "step": 9608 }, { "epoch": 0.6495200757063675, "grad_norm": 1.028445839881897, "learning_rate": 8.746409431183256e-06, "loss": 0.254180908203125, "step": 9609 }, { "epoch": 0.6495876706773016, "grad_norm": 1.4030640125274658, "learning_rate": 8.743410475914315e-06, "loss": 0.261505126953125, "step": 9610 }, { "epoch": 0.6496552656482357, "grad_norm": 0.9123268127441406, "learning_rate": 8.740411823378084e-06, "loss": 0.186309814453125, "step": 9611 }, { "epoch": 0.64972286061917, "grad_norm": 1.4466495513916016, "learning_rate": 8.737413473719658e-06, "loss": 0.233795166015625, "step": 9612 }, { "epoch": 0.6497904555901041, "grad_norm": 1.4638727903366089, "learning_rate": 8.734415427084114e-06, "loss": 0.265228271484375, "step": 9613 }, { "epoch": 0.6498580505610383, "grad_norm": 0.8538960218429565, "learning_rate": 8.731417683616518e-06, "loss": 0.1890869140625, "step": 9614 }, { "epoch": 0.6499256455319724, "grad_norm": 1.9745956659317017, "learning_rate": 8.728420243461912e-06, "loss": 0.23797607421875, "step": 9615 }, { "epoch": 0.6499932405029066, "grad_norm": 0.7599024176597595, "learning_rate": 8.725423106765348e-06, "loss": 0.1263427734375, "step": 9616 }, { "epoch": 0.6500608354738407, "grad_norm": 1.4090207815170288, "learning_rate": 8.722426273671823e-06, "loss": 0.226043701171875, "step": 9617 }, { "epoch": 0.6501284304447749, "grad_norm": 0.928443193435669, "learning_rate": 8.719429744326366e-06, "loss": 0.193084716796875, "step": 9618 }, { "epoch": 0.6501960254157091, "grad_norm": 0.9955430030822754, "learning_rate": 8.716433518873952e-06, "loss": 0.1419219970703125, "step": 9619 }, { "epoch": 0.6502636203866432, "grad_norm": 1.6338318586349487, "learning_rate": 8.713437597459556e-06, "loss": 0.2001800537109375, "step": 9620 }, { "epoch": 0.6503312153575774, "grad_norm": 2.265521764755249, "learning_rate": 8.710441980228156e-06, "loss": 0.32049560546875, "step": 9621 }, { "epoch": 0.6503988103285115, "grad_norm": 1.7109148502349854, "learning_rate": 8.707446667324677e-06, "loss": 0.27471923828125, "step": 9622 }, { "epoch": 0.6504664052994458, "grad_norm": 1.708365559577942, "learning_rate": 8.704451658894064e-06, "loss": 0.314697265625, "step": 9623 }, { "epoch": 0.6505340002703799, "grad_norm": 1.0723851919174194, "learning_rate": 8.701456955081233e-06, "loss": 0.1437835693359375, "step": 9624 }, { "epoch": 0.650601595241314, "grad_norm": 1.2968895435333252, "learning_rate": 8.698462556031086e-06, "loss": 0.21991729736328125, "step": 9625 }, { "epoch": 0.6506691902122482, "grad_norm": 1.3882712125778198, "learning_rate": 8.695468461888507e-06, "loss": 0.255584716796875, "step": 9626 }, { "epoch": 0.6507367851831823, "grad_norm": 1.4397547245025635, "learning_rate": 8.692474672798372e-06, "loss": 0.2642669677734375, "step": 9627 }, { "epoch": 0.6508043801541166, "grad_norm": 1.7663673162460327, "learning_rate": 8.689481188905534e-06, "loss": 0.25225830078125, "step": 9628 }, { "epoch": 0.6508719751250507, "grad_norm": 2.01784348487854, "learning_rate": 8.68648801035485e-06, "loss": 0.296630859375, "step": 9629 }, { "epoch": 0.6509395700959849, "grad_norm": 0.8362120985984802, "learning_rate": 8.683495137291134e-06, "loss": 0.17926025390625, "step": 9630 }, { "epoch": 0.651007165066919, "grad_norm": 1.1159709692001343, "learning_rate": 8.680502569859208e-06, "loss": 0.20589447021484375, "step": 9631 }, { "epoch": 0.6510747600378531, "grad_norm": 1.4058983325958252, "learning_rate": 8.677510308203866e-06, "loss": 0.275421142578125, "step": 9632 }, { "epoch": 0.6511423550087874, "grad_norm": 1.2026033401489258, "learning_rate": 8.674518352469888e-06, "loss": 0.169677734375, "step": 9633 }, { "epoch": 0.6512099499797215, "grad_norm": 0.9948782920837402, "learning_rate": 8.671526702802064e-06, "loss": 0.194488525390625, "step": 9634 }, { "epoch": 0.6512775449506557, "grad_norm": 1.267423391342163, "learning_rate": 8.66853535934512e-06, "loss": 0.198822021484375, "step": 9635 }, { "epoch": 0.6513451399215898, "grad_norm": 1.6588293313980103, "learning_rate": 8.665544322243818e-06, "loss": 0.1958465576171875, "step": 9636 }, { "epoch": 0.651412734892524, "grad_norm": 1.1729893684387207, "learning_rate": 8.662553591642873e-06, "loss": 0.208465576171875, "step": 9637 }, { "epoch": 0.6514803298634582, "grad_norm": 1.2932605743408203, "learning_rate": 8.659563167687e-06, "loss": 0.1703948974609375, "step": 9638 }, { "epoch": 0.6515479248343923, "grad_norm": 1.952351689338684, "learning_rate": 8.65657305052089e-06, "loss": 0.23095703125, "step": 9639 }, { "epoch": 0.6516155198053265, "grad_norm": 2.4665210247039795, "learning_rate": 8.653583240289227e-06, "loss": 0.1702880859375, "step": 9640 }, { "epoch": 0.6516831147762606, "grad_norm": 1.4717861413955688, "learning_rate": 8.650593737136672e-06, "loss": 0.24078369140625, "step": 9641 }, { "epoch": 0.6517507097471948, "grad_norm": 1.9752284288406372, "learning_rate": 8.64760454120788e-06, "loss": 0.249053955078125, "step": 9642 }, { "epoch": 0.651818304718129, "grad_norm": 1.5961359739303589, "learning_rate": 8.644615652647486e-06, "loss": 0.30645751953125, "step": 9643 }, { "epoch": 0.6518858996890632, "grad_norm": 1.6568243503570557, "learning_rate": 8.641627071600103e-06, "loss": 0.351806640625, "step": 9644 }, { "epoch": 0.6519534946599973, "grad_norm": 1.2249500751495361, "learning_rate": 8.638638798210359e-06, "loss": 0.2803955078125, "step": 9645 }, { "epoch": 0.6520210896309314, "grad_norm": 0.6143536567687988, "learning_rate": 8.635650832622817e-06, "loss": 0.1251068115234375, "step": 9646 }, { "epoch": 0.6520886846018656, "grad_norm": 1.12287175655365, "learning_rate": 8.63266317498208e-06, "loss": 0.2205810546875, "step": 9647 }, { "epoch": 0.6521562795727998, "grad_norm": 1.435164213180542, "learning_rate": 8.629675825432684e-06, "loss": 0.256805419921875, "step": 9648 }, { "epoch": 0.652223874543734, "grad_norm": 1.3708553314208984, "learning_rate": 8.626688784119193e-06, "loss": 0.228057861328125, "step": 9649 }, { "epoch": 0.6522914695146681, "grad_norm": 1.1910598278045654, "learning_rate": 8.623702051186136e-06, "loss": 0.3106689453125, "step": 9650 }, { "epoch": 0.6523590644856023, "grad_norm": 1.2393277883529663, "learning_rate": 8.620715626778026e-06, "loss": 0.267608642578125, "step": 9651 }, { "epoch": 0.6524266594565364, "grad_norm": 1.854285478591919, "learning_rate": 8.617729511039368e-06, "loss": 0.318359375, "step": 9652 }, { "epoch": 0.6524942544274706, "grad_norm": 1.0273628234863281, "learning_rate": 8.614743704114647e-06, "loss": 0.312591552734375, "step": 9653 }, { "epoch": 0.6525618493984048, "grad_norm": 0.8091950416564941, "learning_rate": 8.611758206148334e-06, "loss": 0.174957275390625, "step": 9654 }, { "epoch": 0.6526294443693389, "grad_norm": 1.1914719343185425, "learning_rate": 8.608773017284887e-06, "loss": 0.297607421875, "step": 9655 }, { "epoch": 0.6526970393402731, "grad_norm": 1.0996980667114258, "learning_rate": 8.605788137668748e-06, "loss": 0.246490478515625, "step": 9656 }, { "epoch": 0.6527646343112072, "grad_norm": 1.3928108215332031, "learning_rate": 8.602803567444339e-06, "loss": 0.2202301025390625, "step": 9657 }, { "epoch": 0.6528322292821415, "grad_norm": 1.323185920715332, "learning_rate": 8.599819306756088e-06, "loss": 0.2330322265625, "step": 9658 }, { "epoch": 0.6528998242530756, "grad_norm": 1.2326922416687012, "learning_rate": 8.59683535574837e-06, "loss": 0.15590667724609375, "step": 9659 }, { "epoch": 0.6529674192240097, "grad_norm": 1.9931291341781616, "learning_rate": 8.593851714565585e-06, "loss": 0.265472412109375, "step": 9660 }, { "epoch": 0.6530350141949439, "grad_norm": 1.486220121383667, "learning_rate": 8.590868383352093e-06, "loss": 0.2041015625, "step": 9661 }, { "epoch": 0.653102609165878, "grad_norm": 1.300414800643921, "learning_rate": 8.587885362252246e-06, "loss": 0.296875, "step": 9662 }, { "epoch": 0.6531702041368123, "grad_norm": 1.482184648513794, "learning_rate": 8.584902651410382e-06, "loss": 0.28778076171875, "step": 9663 }, { "epoch": 0.6532377991077464, "grad_norm": 0.5324842929840088, "learning_rate": 8.58192025097082e-06, "loss": 0.109405517578125, "step": 9664 }, { "epoch": 0.6533053940786806, "grad_norm": 2.5363564491271973, "learning_rate": 8.578938161077874e-06, "loss": 0.32421875, "step": 9665 }, { "epoch": 0.6533729890496147, "grad_norm": 1.1712403297424316, "learning_rate": 8.575956381875831e-06, "loss": 0.2132568359375, "step": 9666 }, { "epoch": 0.6534405840205488, "grad_norm": 1.0726783275604248, "learning_rate": 8.572974913508967e-06, "loss": 0.2039642333984375, "step": 9667 }, { "epoch": 0.6535081789914831, "grad_norm": 0.5555509924888611, "learning_rate": 8.569993756121548e-06, "loss": 0.150238037109375, "step": 9668 }, { "epoch": 0.6535757739624172, "grad_norm": 0.6271743774414062, "learning_rate": 8.567012909857819e-06, "loss": 0.1608428955078125, "step": 9669 }, { "epoch": 0.6536433689333514, "grad_norm": 2.2874088287353516, "learning_rate": 8.564032374862004e-06, "loss": 0.252777099609375, "step": 9670 }, { "epoch": 0.6537109639042855, "grad_norm": 1.2825628519058228, "learning_rate": 8.56105215127834e-06, "loss": 0.258056640625, "step": 9671 }, { "epoch": 0.6537785588752196, "grad_norm": 1.4133926630020142, "learning_rate": 8.558072239251004e-06, "loss": 0.2723388671875, "step": 9672 }, { "epoch": 0.6538461538461539, "grad_norm": 1.3455771207809448, "learning_rate": 8.555092638924203e-06, "loss": 0.252899169921875, "step": 9673 }, { "epoch": 0.653913748817088, "grad_norm": 1.489794373512268, "learning_rate": 8.5521133504421e-06, "loss": 0.183563232421875, "step": 9674 }, { "epoch": 0.6539813437880222, "grad_norm": 1.077781081199646, "learning_rate": 8.549134373948851e-06, "loss": 0.2320556640625, "step": 9675 }, { "epoch": 0.6540489387589563, "grad_norm": 1.122037649154663, "learning_rate": 8.546155709588604e-06, "loss": 0.201568603515625, "step": 9676 }, { "epoch": 0.6541165337298905, "grad_norm": 0.9018890261650085, "learning_rate": 8.54317735750547e-06, "loss": 0.233062744140625, "step": 9677 }, { "epoch": 0.6541841287008247, "grad_norm": 1.591081976890564, "learning_rate": 8.540199317843576e-06, "loss": 0.2168426513671875, "step": 9678 }, { "epoch": 0.6542517236717588, "grad_norm": 1.7048887014389038, "learning_rate": 8.53722159074701e-06, "loss": 0.248138427734375, "step": 9679 }, { "epoch": 0.654319318642693, "grad_norm": 1.3331801891326904, "learning_rate": 8.534244176359855e-06, "loss": 0.2135009765625, "step": 9680 }, { "epoch": 0.6543869136136271, "grad_norm": 0.7951328754425049, "learning_rate": 8.531267074826178e-06, "loss": 0.1804046630859375, "step": 9681 }, { "epoch": 0.6544545085845613, "grad_norm": 0.9931503534317017, "learning_rate": 8.52829028629003e-06, "loss": 0.24041748046875, "step": 9682 }, { "epoch": 0.6545221035554954, "grad_norm": 0.9478740692138672, "learning_rate": 8.525313810895437e-06, "loss": 0.19879913330078125, "step": 9683 }, { "epoch": 0.6545896985264297, "grad_norm": 1.204063057899475, "learning_rate": 8.522337648786439e-06, "loss": 0.24945068359375, "step": 9684 }, { "epoch": 0.6546572934973638, "grad_norm": 0.7029522061347961, "learning_rate": 8.519361800107019e-06, "loss": 0.12810516357421875, "step": 9685 }, { "epoch": 0.6547248884682979, "grad_norm": 1.3419288396835327, "learning_rate": 8.516386265001183e-06, "loss": 0.248077392578125, "step": 9686 }, { "epoch": 0.6547924834392321, "grad_norm": 1.0854175090789795, "learning_rate": 8.513411043612899e-06, "loss": 0.1876373291015625, "step": 9687 }, { "epoch": 0.6548600784101662, "grad_norm": 0.5286819934844971, "learning_rate": 8.510436136086132e-06, "loss": 0.09599494934082031, "step": 9688 }, { "epoch": 0.6549276733811005, "grad_norm": 1.2103747129440308, "learning_rate": 8.50746154256482e-06, "loss": 0.1959228515625, "step": 9689 }, { "epoch": 0.6549952683520346, "grad_norm": 1.6843152046203613, "learning_rate": 8.504487263192897e-06, "loss": 0.19480133056640625, "step": 9690 }, { "epoch": 0.6550628633229688, "grad_norm": 1.1040695905685425, "learning_rate": 8.501513298114273e-06, "loss": 0.2537841796875, "step": 9691 }, { "epoch": 0.6551304582939029, "grad_norm": 1.230368733406067, "learning_rate": 8.498539647472852e-06, "loss": 0.21441650390625, "step": 9692 }, { "epoch": 0.655198053264837, "grad_norm": 2.2389259338378906, "learning_rate": 8.495566311412514e-06, "loss": 0.293853759765625, "step": 9693 }, { "epoch": 0.6552656482357713, "grad_norm": 1.657171368598938, "learning_rate": 8.492593290077129e-06, "loss": 0.303466796875, "step": 9694 }, { "epoch": 0.6553332432067054, "grad_norm": 1.9181596040725708, "learning_rate": 8.48962058361055e-06, "loss": 0.260833740234375, "step": 9695 }, { "epoch": 0.6554008381776396, "grad_norm": 0.9336168169975281, "learning_rate": 8.486648192156608e-06, "loss": 0.20183563232421875, "step": 9696 }, { "epoch": 0.6554684331485737, "grad_norm": 1.7157787084579468, "learning_rate": 8.483676115859144e-06, "loss": 0.24932861328125, "step": 9697 }, { "epoch": 0.655536028119508, "grad_norm": 1.526614785194397, "learning_rate": 8.480704354861944e-06, "loss": 0.23760986328125, "step": 9698 }, { "epoch": 0.6556036230904421, "grad_norm": 2.027282953262329, "learning_rate": 8.477732909308813e-06, "loss": 0.258941650390625, "step": 9699 }, { "epoch": 0.6556712180613762, "grad_norm": 1.5141123533248901, "learning_rate": 8.474761779343535e-06, "loss": 0.221649169921875, "step": 9700 }, { "epoch": 0.6557388130323104, "grad_norm": 1.0131430625915527, "learning_rate": 8.471790965109847e-06, "loss": 0.21858978271484375, "step": 9701 }, { "epoch": 0.6558064080032445, "grad_norm": 0.7323838472366333, "learning_rate": 8.46882046675152e-06, "loss": 0.106658935546875, "step": 9702 }, { "epoch": 0.6558740029741787, "grad_norm": 1.1976351737976074, "learning_rate": 8.465850284412274e-06, "loss": 0.21286773681640625, "step": 9703 }, { "epoch": 0.6559415979451129, "grad_norm": 1.6911176443099976, "learning_rate": 8.462880418235826e-06, "loss": 0.1990814208984375, "step": 9704 }, { "epoch": 0.6560091929160471, "grad_norm": 1.3282485008239746, "learning_rate": 8.459910868365878e-06, "loss": 0.2980499267578125, "step": 9705 }, { "epoch": 0.6560767878869812, "grad_norm": 1.1781513690948486, "learning_rate": 8.456941634946115e-06, "loss": 0.2369232177734375, "step": 9706 }, { "epoch": 0.6561443828579153, "grad_norm": 1.2927836179733276, "learning_rate": 8.4539727181202e-06, "loss": 0.177490234375, "step": 9707 }, { "epoch": 0.6562119778288495, "grad_norm": 1.1566444635391235, "learning_rate": 8.451004118031805e-06, "loss": 0.233642578125, "step": 9708 }, { "epoch": 0.6562795727997837, "grad_norm": 1.1633836030960083, "learning_rate": 8.448035834824548e-06, "loss": 0.2099761962890625, "step": 9709 }, { "epoch": 0.6563471677707179, "grad_norm": 2.019723653793335, "learning_rate": 8.445067868642075e-06, "loss": 0.2818450927734375, "step": 9710 }, { "epoch": 0.656414762741652, "grad_norm": 1.0734050273895264, "learning_rate": 8.44210021962798e-06, "loss": 0.17584228515625, "step": 9711 }, { "epoch": 0.6564823577125862, "grad_norm": 1.4891244173049927, "learning_rate": 8.43913288792585e-06, "loss": 0.283447265625, "step": 9712 }, { "epoch": 0.6565499526835203, "grad_norm": 1.0280765295028687, "learning_rate": 8.436165873679286e-06, "loss": 0.2059326171875, "step": 9713 }, { "epoch": 0.6566175476544545, "grad_norm": 1.3619478940963745, "learning_rate": 8.433199177031825e-06, "loss": 0.1663055419921875, "step": 9714 }, { "epoch": 0.6566851426253887, "grad_norm": 1.787182331085205, "learning_rate": 8.43023279812703e-06, "loss": 0.332977294921875, "step": 9715 }, { "epoch": 0.6567527375963228, "grad_norm": 1.4072571992874146, "learning_rate": 8.427266737108432e-06, "loss": 0.300048828125, "step": 9716 }, { "epoch": 0.656820332567257, "grad_norm": 1.0881634950637817, "learning_rate": 8.424300994119544e-06, "loss": 0.2042236328125, "step": 9717 }, { "epoch": 0.6568879275381911, "grad_norm": 0.9766320586204529, "learning_rate": 8.421335569303867e-06, "loss": 0.212493896484375, "step": 9718 }, { "epoch": 0.6569555225091254, "grad_norm": 1.7322330474853516, "learning_rate": 8.41837046280489e-06, "loss": 0.35797119140625, "step": 9719 }, { "epoch": 0.6570231174800595, "grad_norm": 1.7398040294647217, "learning_rate": 8.415405674766071e-06, "loss": 0.25634765625, "step": 9720 }, { "epoch": 0.6570907124509936, "grad_norm": 0.9646586775779724, "learning_rate": 8.412441205330888e-06, "loss": 0.217041015625, "step": 9721 }, { "epoch": 0.6571583074219278, "grad_norm": 0.8393633365631104, "learning_rate": 8.409477054642756e-06, "loss": 0.194183349609375, "step": 9722 }, { "epoch": 0.6572259023928619, "grad_norm": 1.3331621885299683, "learning_rate": 8.406513222845121e-06, "loss": 0.1830902099609375, "step": 9723 }, { "epoch": 0.6572934973637962, "grad_norm": 0.7575993537902832, "learning_rate": 8.403549710081375e-06, "loss": 0.12995147705078125, "step": 9724 }, { "epoch": 0.6573610923347303, "grad_norm": 0.8574427366256714, "learning_rate": 8.400586516494913e-06, "loss": 0.11724853515625, "step": 9725 }, { "epoch": 0.6574286873056645, "grad_norm": 0.9991349577903748, "learning_rate": 8.397623642229126e-06, "loss": 0.16897201538085938, "step": 9726 }, { "epoch": 0.6574962822765986, "grad_norm": 0.710125207901001, "learning_rate": 8.394661087427355e-06, "loss": 0.12789535522460938, "step": 9727 }, { "epoch": 0.6575638772475327, "grad_norm": 1.1649348735809326, "learning_rate": 8.391698852232965e-06, "loss": 0.203094482421875, "step": 9728 }, { "epoch": 0.657631472218467, "grad_norm": 1.3627939224243164, "learning_rate": 8.388736936789281e-06, "loss": 0.215850830078125, "step": 9729 }, { "epoch": 0.6576990671894011, "grad_norm": 1.4925552606582642, "learning_rate": 8.38577534123962e-06, "loss": 0.23211669921875, "step": 9730 }, { "epoch": 0.6577666621603353, "grad_norm": 1.1395831108093262, "learning_rate": 8.38281406572728e-06, "loss": 0.226898193359375, "step": 9731 }, { "epoch": 0.6578342571312694, "grad_norm": 1.2562401294708252, "learning_rate": 8.37985311039555e-06, "loss": 0.286102294921875, "step": 9732 }, { "epoch": 0.6579018521022036, "grad_norm": 0.8492088317871094, "learning_rate": 8.37689247538769e-06, "loss": 0.1461181640625, "step": 9733 }, { "epoch": 0.6579694470731378, "grad_norm": 1.1846425533294678, "learning_rate": 8.37393216084697e-06, "loss": 0.27593994140625, "step": 9734 }, { "epoch": 0.6580370420440719, "grad_norm": 1.0079199075698853, "learning_rate": 8.370972166916616e-06, "loss": 0.225311279296875, "step": 9735 }, { "epoch": 0.6581046370150061, "grad_norm": 0.7042648196220398, "learning_rate": 8.368012493739847e-06, "loss": 0.16015625, "step": 9736 }, { "epoch": 0.6581722319859402, "grad_norm": 1.3145240545272827, "learning_rate": 8.36505314145989e-06, "loss": 0.27069091796875, "step": 9737 }, { "epoch": 0.6582398269568744, "grad_norm": 1.4425150156021118, "learning_rate": 8.362094110219911e-06, "loss": 0.275909423828125, "step": 9738 }, { "epoch": 0.6583074219278086, "grad_norm": 1.6684972047805786, "learning_rate": 8.359135400163111e-06, "loss": 0.29315185546875, "step": 9739 }, { "epoch": 0.6583750168987428, "grad_norm": 1.1320194005966187, "learning_rate": 8.35617701143263e-06, "loss": 0.2162628173828125, "step": 9740 }, { "epoch": 0.6584426118696769, "grad_norm": 0.8243952393531799, "learning_rate": 8.353218944171627e-06, "loss": 0.1927337646484375, "step": 9741 }, { "epoch": 0.658510206840611, "grad_norm": 0.8521551489830017, "learning_rate": 8.350261198523229e-06, "loss": 0.18267822265625, "step": 9742 }, { "epoch": 0.6585778018115452, "grad_norm": 2.050938367843628, "learning_rate": 8.347303774630547e-06, "loss": 0.2799072265625, "step": 9743 }, { "epoch": 0.6586453967824794, "grad_norm": 1.0955904722213745, "learning_rate": 8.344346672636681e-06, "loss": 0.2574462890625, "step": 9744 }, { "epoch": 0.6587129917534136, "grad_norm": 0.519183874130249, "learning_rate": 8.341389892684716e-06, "loss": 0.1175689697265625, "step": 9745 }, { "epoch": 0.6587805867243477, "grad_norm": 1.0473787784576416, "learning_rate": 8.338433434917708e-06, "loss": 0.209381103515625, "step": 9746 }, { "epoch": 0.6588481816952819, "grad_norm": 1.3489340543746948, "learning_rate": 8.335477299478733e-06, "loss": 0.2188568115234375, "step": 9747 }, { "epoch": 0.658915776666216, "grad_norm": 0.5724443197250366, "learning_rate": 8.332521486510806e-06, "loss": 0.08148193359375, "step": 9748 }, { "epoch": 0.6589833716371502, "grad_norm": 1.3504762649536133, "learning_rate": 8.329565996156946e-06, "loss": 0.254302978515625, "step": 9749 }, { "epoch": 0.6590509666080844, "grad_norm": 1.099846363067627, "learning_rate": 8.326610828560182e-06, "loss": 0.2108154296875, "step": 9750 }, { "epoch": 0.6591185615790185, "grad_norm": 1.5786659717559814, "learning_rate": 8.323655983863471e-06, "loss": 0.298828125, "step": 9751 }, { "epoch": 0.6591861565499527, "grad_norm": 1.851562261581421, "learning_rate": 8.320701462209812e-06, "loss": 0.2348480224609375, "step": 9752 }, { "epoch": 0.6592537515208868, "grad_norm": 1.9112911224365234, "learning_rate": 8.317747263742155e-06, "loss": 0.210052490234375, "step": 9753 }, { "epoch": 0.6593213464918211, "grad_norm": 1.140405297279358, "learning_rate": 8.314793388603439e-06, "loss": 0.2037353515625, "step": 9754 }, { "epoch": 0.6593889414627552, "grad_norm": 1.1210025548934937, "learning_rate": 8.311839836936596e-06, "loss": 0.193939208984375, "step": 9755 }, { "epoch": 0.6594565364336893, "grad_norm": 0.8082180023193359, "learning_rate": 8.308886608884534e-06, "loss": 0.190399169921875, "step": 9756 }, { "epoch": 0.6595241314046235, "grad_norm": 1.104473352432251, "learning_rate": 8.305933704590149e-06, "loss": 0.181304931640625, "step": 9757 }, { "epoch": 0.6595917263755576, "grad_norm": 1.279786467552185, "learning_rate": 8.302981124196322e-06, "loss": 0.2579345703125, "step": 9758 }, { "epoch": 0.6596593213464919, "grad_norm": 1.0452314615249634, "learning_rate": 8.300028867845919e-06, "loss": 0.1262054443359375, "step": 9759 }, { "epoch": 0.659726916317426, "grad_norm": 0.6702598333358765, "learning_rate": 8.297076935681782e-06, "loss": 0.0948486328125, "step": 9760 }, { "epoch": 0.6597945112883602, "grad_norm": 1.5774391889572144, "learning_rate": 8.29412532784675e-06, "loss": 0.2230987548828125, "step": 9761 }, { "epoch": 0.6598621062592943, "grad_norm": 0.8263081312179565, "learning_rate": 8.291174044483631e-06, "loss": 0.1215057373046875, "step": 9762 }, { "epoch": 0.6599297012302284, "grad_norm": 1.0017571449279785, "learning_rate": 8.288223085735248e-06, "loss": 0.156646728515625, "step": 9763 }, { "epoch": 0.6599972962011627, "grad_norm": 1.1675808429718018, "learning_rate": 8.285272451744357e-06, "loss": 0.1552734375, "step": 9764 }, { "epoch": 0.6600648911720968, "grad_norm": 0.7735393047332764, "learning_rate": 8.28232214265375e-06, "loss": 0.1665191650390625, "step": 9765 }, { "epoch": 0.660132486143031, "grad_norm": 0.9995909929275513, "learning_rate": 8.279372158606176e-06, "loss": 0.22735595703125, "step": 9766 }, { "epoch": 0.6602000811139651, "grad_norm": 1.1575819253921509, "learning_rate": 8.276422499744371e-06, "loss": 0.2078704833984375, "step": 9767 }, { "epoch": 0.6602676760848993, "grad_norm": 1.0438634157180786, "learning_rate": 8.273473166211059e-06, "loss": 0.245147705078125, "step": 9768 }, { "epoch": 0.6603352710558335, "grad_norm": 2.896207571029663, "learning_rate": 8.270524158148946e-06, "loss": 0.375152587890625, "step": 9769 }, { "epoch": 0.6604028660267676, "grad_norm": 1.2364295721054077, "learning_rate": 8.267575475700729e-06, "loss": 0.217864990234375, "step": 9770 }, { "epoch": 0.6604704609977018, "grad_norm": 2.3562939167022705, "learning_rate": 8.264627119009074e-06, "loss": 0.29669189453125, "step": 9771 }, { "epoch": 0.6605380559686359, "grad_norm": 2.4974207878112793, "learning_rate": 8.261679088216645e-06, "loss": 0.30523681640625, "step": 9772 }, { "epoch": 0.6606056509395701, "grad_norm": 1.274694561958313, "learning_rate": 8.25873138346609e-06, "loss": 0.214202880859375, "step": 9773 }, { "epoch": 0.6606732459105042, "grad_norm": 1.2556484937667847, "learning_rate": 8.255784004900034e-06, "loss": 0.263397216796875, "step": 9774 }, { "epoch": 0.6607408408814385, "grad_norm": 0.887398898601532, "learning_rate": 8.252836952661083e-06, "loss": 0.194244384765625, "step": 9775 }, { "epoch": 0.6608084358523726, "grad_norm": 1.1917227506637573, "learning_rate": 8.249890226891852e-06, "loss": 0.22198486328125, "step": 9776 }, { "epoch": 0.6608760308233067, "grad_norm": 1.4399971961975098, "learning_rate": 8.246943827734899e-06, "loss": 0.23541259765625, "step": 9777 }, { "epoch": 0.6609436257942409, "grad_norm": 1.1215026378631592, "learning_rate": 8.243997755332806e-06, "loss": 0.227325439453125, "step": 9778 }, { "epoch": 0.661011220765175, "grad_norm": 1.0045452117919922, "learning_rate": 8.241052009828118e-06, "loss": 0.2415771484375, "step": 9779 }, { "epoch": 0.6610788157361093, "grad_norm": 2.23966908454895, "learning_rate": 8.238106591363365e-06, "loss": 0.3507232666015625, "step": 9780 }, { "epoch": 0.6611464107070434, "grad_norm": 1.2695530652999878, "learning_rate": 8.235161500081068e-06, "loss": 0.251129150390625, "step": 9781 }, { "epoch": 0.6612140056779776, "grad_norm": 2.0700619220733643, "learning_rate": 8.232216736123728e-06, "loss": 0.254638671875, "step": 9782 }, { "epoch": 0.6612816006489117, "grad_norm": 1.0704536437988281, "learning_rate": 8.22927229963383e-06, "loss": 0.173553466796875, "step": 9783 }, { "epoch": 0.6613491956198458, "grad_norm": 1.88365638256073, "learning_rate": 8.226328190753847e-06, "loss": 0.2576751708984375, "step": 9784 }, { "epoch": 0.6614167905907801, "grad_norm": 2.377331256866455, "learning_rate": 8.223384409626227e-06, "loss": 0.34844970703125, "step": 9785 }, { "epoch": 0.6614843855617142, "grad_norm": 1.3914557695388794, "learning_rate": 8.220440956393416e-06, "loss": 0.293487548828125, "step": 9786 }, { "epoch": 0.6615519805326484, "grad_norm": 1.2842391729354858, "learning_rate": 8.217497831197833e-06, "loss": 0.27081298828125, "step": 9787 }, { "epoch": 0.6616195755035825, "grad_norm": 1.211037516593933, "learning_rate": 8.214555034181877e-06, "loss": 0.169525146484375, "step": 9788 }, { "epoch": 0.6616871704745168, "grad_norm": 2.0407092571258545, "learning_rate": 8.21161256548796e-06, "loss": 0.2513275146484375, "step": 9789 }, { "epoch": 0.6617547654454509, "grad_norm": 2.012190818786621, "learning_rate": 8.20867042525843e-06, "loss": 0.267822265625, "step": 9790 }, { "epoch": 0.661822360416385, "grad_norm": 1.3964130878448486, "learning_rate": 8.205728613635668e-06, "loss": 0.27825927734375, "step": 9791 }, { "epoch": 0.6618899553873192, "grad_norm": 1.2385884523391724, "learning_rate": 8.202787130762007e-06, "loss": 0.22747802734375, "step": 9792 }, { "epoch": 0.6619575503582533, "grad_norm": 1.404977560043335, "learning_rate": 8.199845976779779e-06, "loss": 0.31231689453125, "step": 9793 }, { "epoch": 0.6620251453291875, "grad_norm": 2.057192325592041, "learning_rate": 8.196905151831287e-06, "loss": 0.291534423828125, "step": 9794 }, { "epoch": 0.6620927403001217, "grad_norm": 1.520811915397644, "learning_rate": 8.193964656058837e-06, "loss": 0.305328369140625, "step": 9795 }, { "epoch": 0.6621603352710559, "grad_norm": 1.3002911806106567, "learning_rate": 8.191024489604702e-06, "loss": 0.2701416015625, "step": 9796 }, { "epoch": 0.66222793024199, "grad_norm": 1.5514966249465942, "learning_rate": 8.188084652611146e-06, "loss": 0.2733154296875, "step": 9797 }, { "epoch": 0.6622955252129241, "grad_norm": 1.0593137741088867, "learning_rate": 8.185145145220414e-06, "loss": 0.1681365966796875, "step": 9798 }, { "epoch": 0.6623631201838583, "grad_norm": 1.4680535793304443, "learning_rate": 8.18220596757474e-06, "loss": 0.190521240234375, "step": 9799 }, { "epoch": 0.6624307151547925, "grad_norm": 1.2598689794540405, "learning_rate": 8.17926711981635e-06, "loss": 0.23675537109375, "step": 9800 }, { "epoch": 0.6624983101257267, "grad_norm": 1.2291581630706787, "learning_rate": 8.176328602087422e-06, "loss": 0.176849365234375, "step": 9801 }, { "epoch": 0.6625659050966608, "grad_norm": 1.6180317401885986, "learning_rate": 8.173390414530167e-06, "loss": 0.27386474609375, "step": 9802 }, { "epoch": 0.6626335000675949, "grad_norm": 1.2340795993804932, "learning_rate": 8.170452557286725e-06, "loss": 0.229705810546875, "step": 9803 }, { "epoch": 0.6627010950385291, "grad_norm": 1.3831758499145508, "learning_rate": 8.167515030499265e-06, "loss": 0.18084716796875, "step": 9804 }, { "epoch": 0.6627686900094633, "grad_norm": 1.5333940982818604, "learning_rate": 8.164577834309926e-06, "loss": 0.2876434326171875, "step": 9805 }, { "epoch": 0.6628362849803975, "grad_norm": 1.0417683124542236, "learning_rate": 8.161640968860808e-06, "loss": 0.1524505615234375, "step": 9806 }, { "epoch": 0.6629038799513316, "grad_norm": 0.9354917407035828, "learning_rate": 8.158704434294034e-06, "loss": 0.2533721923828125, "step": 9807 }, { "epoch": 0.6629714749222658, "grad_norm": 1.0875951051712036, "learning_rate": 8.155768230751685e-06, "loss": 0.2415924072265625, "step": 9808 }, { "epoch": 0.6630390698931999, "grad_norm": 2.2317025661468506, "learning_rate": 8.152832358375832e-06, "loss": 0.327850341796875, "step": 9809 }, { "epoch": 0.663106664864134, "grad_norm": 1.0167961120605469, "learning_rate": 8.14989681730853e-06, "loss": 0.17612457275390625, "step": 9810 }, { "epoch": 0.6631742598350683, "grad_norm": 1.1776655912399292, "learning_rate": 8.146961607691823e-06, "loss": 0.30889892578125, "step": 9811 }, { "epoch": 0.6632418548060024, "grad_norm": 0.8961367011070251, "learning_rate": 8.144026729667725e-06, "loss": 0.2545166015625, "step": 9812 }, { "epoch": 0.6633094497769366, "grad_norm": 1.0081356763839722, "learning_rate": 8.141092183378263e-06, "loss": 0.18780517578125, "step": 9813 }, { "epoch": 0.6633770447478707, "grad_norm": 1.304523229598999, "learning_rate": 8.138157968965405e-06, "loss": 0.28118896484375, "step": 9814 }, { "epoch": 0.663444639718805, "grad_norm": 1.3987338542938232, "learning_rate": 8.135224086571148e-06, "loss": 0.34014892578125, "step": 9815 }, { "epoch": 0.6635122346897391, "grad_norm": 1.521306037902832, "learning_rate": 8.132290536337431e-06, "loss": 0.25335693359375, "step": 9816 }, { "epoch": 0.6635798296606732, "grad_norm": 1.1326133012771606, "learning_rate": 8.129357318406213e-06, "loss": 0.256988525390625, "step": 9817 }, { "epoch": 0.6636474246316074, "grad_norm": 1.4323714971542358, "learning_rate": 8.126424432919422e-06, "loss": 0.209075927734375, "step": 9818 }, { "epoch": 0.6637150196025415, "grad_norm": 1.7446470260620117, "learning_rate": 8.123491880018951e-06, "loss": 0.3165283203125, "step": 9819 }, { "epoch": 0.6637826145734758, "grad_norm": 1.891797423362732, "learning_rate": 8.120559659846714e-06, "loss": 0.3253173828125, "step": 9820 }, { "epoch": 0.6638502095444099, "grad_norm": 0.9721418619155884, "learning_rate": 8.117627772544582e-06, "loss": 0.1948699951171875, "step": 9821 }, { "epoch": 0.6639178045153441, "grad_norm": 1.2895691394805908, "learning_rate": 8.11469621825442e-06, "loss": 0.239166259765625, "step": 9822 }, { "epoch": 0.6639853994862782, "grad_norm": 1.150774598121643, "learning_rate": 8.111764997118076e-06, "loss": 0.19085693359375, "step": 9823 }, { "epoch": 0.6640529944572123, "grad_norm": 1.3615785837173462, "learning_rate": 8.108834109277376e-06, "loss": 0.33154296875, "step": 9824 }, { "epoch": 0.6641205894281466, "grad_norm": 1.0895148515701294, "learning_rate": 8.105903554874133e-06, "loss": 0.256744384765625, "step": 9825 }, { "epoch": 0.6641881843990807, "grad_norm": 1.6453944444656372, "learning_rate": 8.102973334050162e-06, "loss": 0.2059326171875, "step": 9826 }, { "epoch": 0.6642557793700149, "grad_norm": 1.6046843528747559, "learning_rate": 8.10004344694722e-06, "loss": 0.22796630859375, "step": 9827 }, { "epoch": 0.664323374340949, "grad_norm": 1.62650728225708, "learning_rate": 8.097113893707095e-06, "loss": 0.313751220703125, "step": 9828 }, { "epoch": 0.6643909693118832, "grad_norm": 0.9048435091972351, "learning_rate": 8.094184674471534e-06, "loss": 0.228515625, "step": 9829 }, { "epoch": 0.6644585642828174, "grad_norm": 0.8439739346504211, "learning_rate": 8.09125578938225e-06, "loss": 0.174713134765625, "step": 9830 }, { "epoch": 0.6645261592537515, "grad_norm": 2.2905499935150146, "learning_rate": 8.088327238580991e-06, "loss": 0.291595458984375, "step": 9831 }, { "epoch": 0.6645937542246857, "grad_norm": 1.440183162689209, "learning_rate": 8.08539902220943e-06, "loss": 0.3092041015625, "step": 9832 }, { "epoch": 0.6646613491956198, "grad_norm": 1.9308011531829834, "learning_rate": 8.082471140409272e-06, "loss": 0.24517822265625, "step": 9833 }, { "epoch": 0.664728944166554, "grad_norm": 1.3631635904312134, "learning_rate": 8.079543593322182e-06, "loss": 0.27606201171875, "step": 9834 }, { "epoch": 0.6647965391374882, "grad_norm": 1.9859709739685059, "learning_rate": 8.076616381089808e-06, "loss": 0.31829833984375, "step": 9835 }, { "epoch": 0.6648641341084224, "grad_norm": 0.7695609927177429, "learning_rate": 8.07368950385379e-06, "loss": 0.1427001953125, "step": 9836 }, { "epoch": 0.6649317290793565, "grad_norm": 1.868573546409607, "learning_rate": 8.070762961755747e-06, "loss": 0.233154296875, "step": 9837 }, { "epoch": 0.6649993240502906, "grad_norm": 0.8912971615791321, "learning_rate": 8.067836754937282e-06, "loss": 0.142333984375, "step": 9838 }, { "epoch": 0.6650669190212248, "grad_norm": 1.0071436166763306, "learning_rate": 8.064910883539995e-06, "loss": 0.18780517578125, "step": 9839 }, { "epoch": 0.665134513992159, "grad_norm": 1.3518797159194946, "learning_rate": 8.061985347705443e-06, "loss": 0.28076171875, "step": 9840 }, { "epoch": 0.6652021089630932, "grad_norm": 1.5597270727157593, "learning_rate": 8.05906014757518e-06, "loss": 0.2618408203125, "step": 9841 }, { "epoch": 0.6652697039340273, "grad_norm": 0.8652744293212891, "learning_rate": 8.056135283290765e-06, "loss": 0.1526031494140625, "step": 9842 }, { "epoch": 0.6653372989049615, "grad_norm": 1.3514431715011597, "learning_rate": 8.053210754993697e-06, "loss": 0.16016387939453125, "step": 9843 }, { "epoch": 0.6654048938758956, "grad_norm": 1.3302069902420044, "learning_rate": 8.050286562825499e-06, "loss": 0.2478809356689453, "step": 9844 }, { "epoch": 0.6654724888468297, "grad_norm": 1.0798951387405396, "learning_rate": 8.04736270692766e-06, "loss": 0.192352294921875, "step": 9845 }, { "epoch": 0.665540083817764, "grad_norm": 1.3072603940963745, "learning_rate": 8.044439187441648e-06, "loss": 0.26788330078125, "step": 9846 }, { "epoch": 0.6656076787886981, "grad_norm": 1.4357848167419434, "learning_rate": 8.041516004508924e-06, "loss": 0.2294769287109375, "step": 9847 }, { "epoch": 0.6656752737596323, "grad_norm": 1.0133135318756104, "learning_rate": 8.038593158270932e-06, "loss": 0.13091278076171875, "step": 9848 }, { "epoch": 0.6657428687305664, "grad_norm": 1.003961443901062, "learning_rate": 8.035670648869094e-06, "loss": 0.239959716796875, "step": 9849 }, { "epoch": 0.6658104637015007, "grad_norm": 0.996309757232666, "learning_rate": 8.032748476444821e-06, "loss": 0.2045135498046875, "step": 9850 }, { "epoch": 0.6658780586724348, "grad_norm": 1.1957814693450928, "learning_rate": 8.0298266411395e-06, "loss": 0.246490478515625, "step": 9851 }, { "epoch": 0.6659456536433689, "grad_norm": 1.0880343914031982, "learning_rate": 8.026905143094525e-06, "loss": 0.2615966796875, "step": 9852 }, { "epoch": 0.6660132486143031, "grad_norm": 0.7949883341789246, "learning_rate": 8.023983982451238e-06, "loss": 0.10797882080078125, "step": 9853 }, { "epoch": 0.6660808435852372, "grad_norm": 1.2593973875045776, "learning_rate": 8.021063159350986e-06, "loss": 0.314544677734375, "step": 9854 }, { "epoch": 0.6661484385561715, "grad_norm": 0.9061602354049683, "learning_rate": 8.018142673935107e-06, "loss": 0.18624114990234375, "step": 9855 }, { "epoch": 0.6662160335271056, "grad_norm": 1.4693585634231567, "learning_rate": 8.015222526344897e-06, "loss": 0.30694580078125, "step": 9856 }, { "epoch": 0.6662836284980398, "grad_norm": 1.6288537979125977, "learning_rate": 8.012302716721663e-06, "loss": 0.2641448974609375, "step": 9857 }, { "epoch": 0.6663512234689739, "grad_norm": 1.127349853515625, "learning_rate": 8.00938324520668e-06, "loss": 0.201324462890625, "step": 9858 }, { "epoch": 0.666418818439908, "grad_norm": 1.1921168565750122, "learning_rate": 8.006464111941211e-06, "loss": 0.236968994140625, "step": 9859 }, { "epoch": 0.6664864134108422, "grad_norm": 1.6327921152114868, "learning_rate": 8.003545317066501e-06, "loss": 0.19384765625, "step": 9860 }, { "epoch": 0.6665540083817764, "grad_norm": 1.5575976371765137, "learning_rate": 8.000626860723778e-06, "loss": 0.2846527099609375, "step": 9861 }, { "epoch": 0.6666216033527106, "grad_norm": 1.7246447801589966, "learning_rate": 7.997708743054254e-06, "loss": 0.2982177734375, "step": 9862 }, { "epoch": 0.6666891983236447, "grad_norm": 1.7079826593399048, "learning_rate": 7.99479096419913e-06, "loss": 0.2373809814453125, "step": 9863 }, { "epoch": 0.6667567932945789, "grad_norm": 2.6748318672180176, "learning_rate": 7.991873524299583e-06, "loss": 0.3890380859375, "step": 9864 }, { "epoch": 0.666824388265513, "grad_norm": 1.7267204523086548, "learning_rate": 7.98895642349678e-06, "loss": 0.20318603515625, "step": 9865 }, { "epoch": 0.6668919832364472, "grad_norm": 1.4007776975631714, "learning_rate": 7.986039661931865e-06, "loss": 0.189453125, "step": 9866 }, { "epoch": 0.6669595782073814, "grad_norm": 2.253199815750122, "learning_rate": 7.983123239745965e-06, "loss": 0.34063720703125, "step": 9867 }, { "epoch": 0.6670271731783155, "grad_norm": 2.1374309062957764, "learning_rate": 7.980207157080211e-06, "loss": 0.28106689453125, "step": 9868 }, { "epoch": 0.6670947681492497, "grad_norm": 2.5706887245178223, "learning_rate": 7.977291414075681e-06, "loss": 0.297027587890625, "step": 9869 }, { "epoch": 0.6671623631201838, "grad_norm": 1.1937657594680786, "learning_rate": 7.974376010873469e-06, "loss": 0.2141876220703125, "step": 9870 }, { "epoch": 0.6672299580911181, "grad_norm": 0.9155257344245911, "learning_rate": 7.971460947614639e-06, "loss": 0.1700592041015625, "step": 9871 }, { "epoch": 0.6672975530620522, "grad_norm": 1.8078926801681519, "learning_rate": 7.968546224440239e-06, "loss": 0.268402099609375, "step": 9872 }, { "epoch": 0.6673651480329863, "grad_norm": 1.403389811515808, "learning_rate": 7.9656318414913e-06, "loss": 0.29010009765625, "step": 9873 }, { "epoch": 0.6674327430039205, "grad_norm": 1.2468093633651733, "learning_rate": 7.962717798908839e-06, "loss": 0.212860107421875, "step": 9874 }, { "epoch": 0.6675003379748546, "grad_norm": 1.6646051406860352, "learning_rate": 7.959804096833854e-06, "loss": 0.2060089111328125, "step": 9875 }, { "epoch": 0.6675679329457889, "grad_norm": 0.9486430883407593, "learning_rate": 7.956890735407331e-06, "loss": 0.2249755859375, "step": 9876 }, { "epoch": 0.667635527916723, "grad_norm": 1.1374057531356812, "learning_rate": 7.953977714770236e-06, "loss": 0.14620208740234375, "step": 9877 }, { "epoch": 0.6677031228876572, "grad_norm": 0.8018350601196289, "learning_rate": 7.951065035063515e-06, "loss": 0.111419677734375, "step": 9878 }, { "epoch": 0.6677707178585913, "grad_norm": 1.1153010129928589, "learning_rate": 7.948152696428106e-06, "loss": 0.16606903076171875, "step": 9879 }, { "epoch": 0.6678383128295254, "grad_norm": 1.625230073928833, "learning_rate": 7.945240699004918e-06, "loss": 0.2224273681640625, "step": 9880 }, { "epoch": 0.6679059078004597, "grad_norm": 1.2962557077407837, "learning_rate": 7.94232904293487e-06, "loss": 0.163665771484375, "step": 9881 }, { "epoch": 0.6679735027713938, "grad_norm": 0.8924061059951782, "learning_rate": 7.939417728358822e-06, "loss": 0.196624755859375, "step": 9882 }, { "epoch": 0.668041097742328, "grad_norm": 1.354074239730835, "learning_rate": 7.936506755417661e-06, "loss": 0.2022552490234375, "step": 9883 }, { "epoch": 0.6681086927132621, "grad_norm": 1.028648018836975, "learning_rate": 7.933596124252227e-06, "loss": 0.1765899658203125, "step": 9884 }, { "epoch": 0.6681762876841963, "grad_norm": 1.1574243307113647, "learning_rate": 7.930685835003362e-06, "loss": 0.282958984375, "step": 9885 }, { "epoch": 0.6682438826551305, "grad_norm": 0.830230176448822, "learning_rate": 7.927775887811879e-06, "loss": 0.1347198486328125, "step": 9886 }, { "epoch": 0.6683114776260646, "grad_norm": 1.1698962450027466, "learning_rate": 7.92486628281858e-06, "loss": 0.201690673828125, "step": 9887 }, { "epoch": 0.6683790725969988, "grad_norm": 1.173871397972107, "learning_rate": 7.921957020164248e-06, "loss": 0.26983642578125, "step": 9888 }, { "epoch": 0.6684466675679329, "grad_norm": 2.1549899578094482, "learning_rate": 7.919048099989656e-06, "loss": 0.33502197265625, "step": 9889 }, { "epoch": 0.6685142625388671, "grad_norm": 1.3130989074707031, "learning_rate": 7.916139522435551e-06, "loss": 0.219573974609375, "step": 9890 }, { "epoch": 0.6685818575098013, "grad_norm": 1.2790299654006958, "learning_rate": 7.913231287642664e-06, "loss": 0.271240234375, "step": 9891 }, { "epoch": 0.6686494524807355, "grad_norm": 0.8229416012763977, "learning_rate": 7.910323395751732e-06, "loss": 0.1460285186767578, "step": 9892 }, { "epoch": 0.6687170474516696, "grad_norm": 0.5620315670967102, "learning_rate": 7.907415846903433e-06, "loss": 0.065338134765625, "step": 9893 }, { "epoch": 0.6687846424226037, "grad_norm": 1.2509137392044067, "learning_rate": 7.904508641238475e-06, "loss": 0.15472412109375, "step": 9894 }, { "epoch": 0.6688522373935379, "grad_norm": 1.413560390472412, "learning_rate": 7.901601778897502e-06, "loss": 0.2681884765625, "step": 9895 }, { "epoch": 0.6689198323644721, "grad_norm": 1.5520879030227661, "learning_rate": 7.898695260021187e-06, "loss": 0.29937744140625, "step": 9896 }, { "epoch": 0.6689874273354063, "grad_norm": 1.4978035688400269, "learning_rate": 7.895789084750159e-06, "loss": 0.2445831298828125, "step": 9897 }, { "epoch": 0.6690550223063404, "grad_norm": 1.4508118629455566, "learning_rate": 7.892883253225035e-06, "loss": 0.28692626953125, "step": 9898 }, { "epoch": 0.6691226172772746, "grad_norm": 1.8214104175567627, "learning_rate": 7.889977765586417e-06, "loss": 0.254913330078125, "step": 9899 }, { "epoch": 0.6691902122482087, "grad_norm": 1.4562536478042603, "learning_rate": 7.887072621974892e-06, "loss": 0.23663330078125, "step": 9900 }, { "epoch": 0.6692578072191429, "grad_norm": 1.0594768524169922, "learning_rate": 7.884167822531026e-06, "loss": 0.1526641845703125, "step": 9901 }, { "epoch": 0.6693254021900771, "grad_norm": 1.3618992567062378, "learning_rate": 7.881263367395376e-06, "loss": 0.216583251953125, "step": 9902 }, { "epoch": 0.6693929971610112, "grad_norm": 1.5948835611343384, "learning_rate": 7.878359256708476e-06, "loss": 0.234100341796875, "step": 9903 }, { "epoch": 0.6694605921319454, "grad_norm": 1.2853246927261353, "learning_rate": 7.875455490610837e-06, "loss": 0.1874847412109375, "step": 9904 }, { "epoch": 0.6695281871028795, "grad_norm": 0.895232617855072, "learning_rate": 7.872552069242982e-06, "loss": 0.139404296875, "step": 9905 }, { "epoch": 0.6695957820738138, "grad_norm": 1.145602822303772, "learning_rate": 7.869648992745369e-06, "loss": 0.185943603515625, "step": 9906 }, { "epoch": 0.6696633770447479, "grad_norm": 1.81386137008667, "learning_rate": 7.866746261258493e-06, "loss": 0.234039306640625, "step": 9907 }, { "epoch": 0.669730972015682, "grad_norm": 1.589070200920105, "learning_rate": 7.863843874922784e-06, "loss": 0.24249267578125, "step": 9908 }, { "epoch": 0.6697985669866162, "grad_norm": 1.2045128345489502, "learning_rate": 7.860941833878692e-06, "loss": 0.264007568359375, "step": 9909 }, { "epoch": 0.6698661619575503, "grad_norm": 1.0033881664276123, "learning_rate": 7.858040138266637e-06, "loss": 0.203125, "step": 9910 }, { "epoch": 0.6699337569284846, "grad_norm": 1.0868850946426392, "learning_rate": 7.855138788227003e-06, "loss": 0.14898681640625, "step": 9911 }, { "epoch": 0.6700013518994187, "grad_norm": 1.0253404378890991, "learning_rate": 7.852237783900194e-06, "loss": 0.181304931640625, "step": 9912 }, { "epoch": 0.6700689468703529, "grad_norm": 2.062283992767334, "learning_rate": 7.849337125426575e-06, "loss": 0.314727783203125, "step": 9913 }, { "epoch": 0.670136541841287, "grad_norm": 0.7991200089454651, "learning_rate": 7.846436812946493e-06, "loss": 0.1195831298828125, "step": 9914 }, { "epoch": 0.6702041368122211, "grad_norm": 1.6879838705062866, "learning_rate": 7.843536846600287e-06, "loss": 0.2871856689453125, "step": 9915 }, { "epoch": 0.6702717317831554, "grad_norm": 2.2041027545928955, "learning_rate": 7.840637226528273e-06, "loss": 0.276763916015625, "step": 9916 }, { "epoch": 0.6703393267540895, "grad_norm": 1.0938935279846191, "learning_rate": 7.837737952870748e-06, "loss": 0.2279510498046875, "step": 9917 }, { "epoch": 0.6704069217250237, "grad_norm": 1.1601346731185913, "learning_rate": 7.834839025768014e-06, "loss": 0.250213623046875, "step": 9918 }, { "epoch": 0.6704745166959578, "grad_norm": 1.1906001567840576, "learning_rate": 7.831940445360316e-06, "loss": 0.271514892578125, "step": 9919 }, { "epoch": 0.670542111666892, "grad_norm": 1.7041410207748413, "learning_rate": 7.829042211787923e-06, "loss": 0.29949951171875, "step": 9920 }, { "epoch": 0.6706097066378262, "grad_norm": 0.7257993817329407, "learning_rate": 7.826144325191063e-06, "loss": 0.18796539306640625, "step": 9921 }, { "epoch": 0.6706773016087603, "grad_norm": 2.2401609420776367, "learning_rate": 7.823246785709955e-06, "loss": 0.313079833984375, "step": 9922 }, { "epoch": 0.6707448965796945, "grad_norm": 1.1021628379821777, "learning_rate": 7.820349593484806e-06, "loss": 0.218658447265625, "step": 9923 }, { "epoch": 0.6708124915506286, "grad_norm": 1.4983848333358765, "learning_rate": 7.817452748655779e-06, "loss": 0.2518310546875, "step": 9924 }, { "epoch": 0.6708800865215628, "grad_norm": 1.240179181098938, "learning_rate": 7.814556251363062e-06, "loss": 0.19808197021484375, "step": 9925 }, { "epoch": 0.670947681492497, "grad_norm": 1.5778635740280151, "learning_rate": 7.811660101746803e-06, "loss": 0.2012481689453125, "step": 9926 }, { "epoch": 0.6710152764634312, "grad_norm": 1.0113149881362915, "learning_rate": 7.808764299947128e-06, "loss": 0.2613525390625, "step": 9927 }, { "epoch": 0.6710828714343653, "grad_norm": 0.9959412813186646, "learning_rate": 7.805868846104159e-06, "loss": 0.24871826171875, "step": 9928 }, { "epoch": 0.6711504664052994, "grad_norm": 1.5240375995635986, "learning_rate": 7.802973740357996e-06, "loss": 0.26611328125, "step": 9929 }, { "epoch": 0.6712180613762336, "grad_norm": 1.427059292793274, "learning_rate": 7.800078982848715e-06, "loss": 0.204376220703125, "step": 9930 }, { "epoch": 0.6712856563471677, "grad_norm": 0.9046964049339294, "learning_rate": 7.797184573716401e-06, "loss": 0.160736083984375, "step": 9931 }, { "epoch": 0.671353251318102, "grad_norm": 1.0759565830230713, "learning_rate": 7.794290513101079e-06, "loss": 0.19727325439453125, "step": 9932 }, { "epoch": 0.6714208462890361, "grad_norm": 1.8015512228012085, "learning_rate": 7.791396801142798e-06, "loss": 0.28607177734375, "step": 9933 }, { "epoch": 0.6714884412599702, "grad_norm": 0.7151185870170593, "learning_rate": 7.788503437981577e-06, "loss": 0.13759422302246094, "step": 9934 }, { "epoch": 0.6715560362309044, "grad_norm": 1.2388927936553955, "learning_rate": 7.785610423757396e-06, "loss": 0.2572021484375, "step": 9935 }, { "epoch": 0.6716236312018385, "grad_norm": 1.3145097494125366, "learning_rate": 7.782717758610254e-06, "loss": 0.27276611328125, "step": 9936 }, { "epoch": 0.6716912261727728, "grad_norm": 0.7427395582199097, "learning_rate": 7.779825442680111e-06, "loss": 0.196990966796875, "step": 9937 }, { "epoch": 0.6717588211437069, "grad_norm": 1.7327959537506104, "learning_rate": 7.776933476106913e-06, "loss": 0.23077392578125, "step": 9938 }, { "epoch": 0.6718264161146411, "grad_norm": 1.2567366361618042, "learning_rate": 7.774041859030596e-06, "loss": 0.23712158203125, "step": 9939 }, { "epoch": 0.6718940110855752, "grad_norm": 1.0743587017059326, "learning_rate": 7.771150591591071e-06, "loss": 0.26373291015625, "step": 9940 }, { "epoch": 0.6719616060565093, "grad_norm": 1.1679877042770386, "learning_rate": 7.768259673928233e-06, "loss": 0.2462158203125, "step": 9941 }, { "epoch": 0.6720292010274436, "grad_norm": 0.8944918513298035, "learning_rate": 7.76536910618197e-06, "loss": 0.158203125, "step": 9942 }, { "epoch": 0.6720967959983777, "grad_norm": 1.0706865787506104, "learning_rate": 7.762478888492132e-06, "loss": 0.15845489501953125, "step": 9943 }, { "epoch": 0.6721643909693119, "grad_norm": 1.7620407342910767, "learning_rate": 7.75958902099859e-06, "loss": 0.31072998046875, "step": 9944 }, { "epoch": 0.672231985940246, "grad_norm": 1.0616685152053833, "learning_rate": 7.75669950384115e-06, "loss": 0.254241943359375, "step": 9945 }, { "epoch": 0.6722995809111803, "grad_norm": 1.50761079788208, "learning_rate": 7.75381033715963e-06, "loss": 0.26812744140625, "step": 9946 }, { "epoch": 0.6723671758821144, "grad_norm": 1.1598955392837524, "learning_rate": 7.750921521093839e-06, "loss": 0.23370361328125, "step": 9947 }, { "epoch": 0.6724347708530485, "grad_norm": 1.6183183193206787, "learning_rate": 7.748033055783536e-06, "loss": 0.24761962890625, "step": 9948 }, { "epoch": 0.6725023658239827, "grad_norm": 1.6351984739303589, "learning_rate": 7.745144941368501e-06, "loss": 0.286163330078125, "step": 9949 }, { "epoch": 0.6725699607949168, "grad_norm": 0.9631335735321045, "learning_rate": 7.74225717798847e-06, "loss": 0.261138916015625, "step": 9950 }, { "epoch": 0.672637555765851, "grad_norm": 1.089084267616272, "learning_rate": 7.73936976578317e-06, "loss": 0.247222900390625, "step": 9951 }, { "epoch": 0.6727051507367852, "grad_norm": 0.9522059559822083, "learning_rate": 7.736482704892315e-06, "loss": 0.238128662109375, "step": 9952 }, { "epoch": 0.6727727457077194, "grad_norm": 1.1782821416854858, "learning_rate": 7.7335959954556e-06, "loss": 0.196563720703125, "step": 9953 }, { "epoch": 0.6728403406786535, "grad_norm": 1.5095133781433105, "learning_rate": 7.730709637612698e-06, "loss": 0.285125732421875, "step": 9954 }, { "epoch": 0.6729079356495876, "grad_norm": 1.8241559267044067, "learning_rate": 7.72782363150327e-06, "loss": 0.34112548828125, "step": 9955 }, { "epoch": 0.6729755306205218, "grad_norm": 1.848259687423706, "learning_rate": 7.724937977266956e-06, "loss": 0.24713134765625, "step": 9956 }, { "epoch": 0.673043125591456, "grad_norm": 1.6278079748153687, "learning_rate": 7.722052675043396e-06, "loss": 0.2772216796875, "step": 9957 }, { "epoch": 0.6731107205623902, "grad_norm": 1.5228583812713623, "learning_rate": 7.719167724972183e-06, "loss": 0.26837158203125, "step": 9958 }, { "epoch": 0.6731783155333243, "grad_norm": 1.5987637042999268, "learning_rate": 7.71628312719291e-06, "loss": 0.245452880859375, "step": 9959 }, { "epoch": 0.6732459105042585, "grad_norm": 1.5611803531646729, "learning_rate": 7.713398881845167e-06, "loss": 0.26202392578125, "step": 9960 }, { "epoch": 0.6733135054751926, "grad_norm": 1.0380699634552002, "learning_rate": 7.71051498906849e-06, "loss": 0.2567138671875, "step": 9961 }, { "epoch": 0.6733811004461268, "grad_norm": 1.215491533279419, "learning_rate": 7.707631449002437e-06, "loss": 0.23760986328125, "step": 9962 }, { "epoch": 0.673448695417061, "grad_norm": 0.6740692853927612, "learning_rate": 7.704748261786522e-06, "loss": 0.14879226684570312, "step": 9963 }, { "epoch": 0.6735162903879951, "grad_norm": 0.7754976749420166, "learning_rate": 7.701865427560259e-06, "loss": 0.163177490234375, "step": 9964 }, { "epoch": 0.6735838853589293, "grad_norm": 1.4323731660842896, "learning_rate": 7.69898294646313e-06, "loss": 0.306610107421875, "step": 9965 }, { "epoch": 0.6736514803298634, "grad_norm": 2.357257604598999, "learning_rate": 7.696100818634613e-06, "loss": 0.342376708984375, "step": 9966 }, { "epoch": 0.6737190753007977, "grad_norm": 1.2195338010787964, "learning_rate": 7.693219044214154e-06, "loss": 0.2086334228515625, "step": 9967 }, { "epoch": 0.6737866702717318, "grad_norm": 1.5448994636535645, "learning_rate": 7.69033762334121e-06, "loss": 0.2857666015625, "step": 9968 }, { "epoch": 0.6738542652426659, "grad_norm": 1.2719745635986328, "learning_rate": 7.687456556155184e-06, "loss": 0.2825927734375, "step": 9969 }, { "epoch": 0.6739218602136001, "grad_norm": 0.9060397148132324, "learning_rate": 7.684575842795486e-06, "loss": 0.1728973388671875, "step": 9970 }, { "epoch": 0.6739894551845342, "grad_norm": 1.7533613443374634, "learning_rate": 7.681695483401502e-06, "loss": 0.2578582763671875, "step": 9971 }, { "epoch": 0.6740570501554685, "grad_norm": 1.1183056831359863, "learning_rate": 7.678815478112597e-06, "loss": 0.1955718994140625, "step": 9972 }, { "epoch": 0.6741246451264026, "grad_norm": 1.710673451423645, "learning_rate": 7.675935827068143e-06, "loss": 0.317901611328125, "step": 9973 }, { "epoch": 0.6741922400973368, "grad_norm": 1.365464448928833, "learning_rate": 7.673056530407446e-06, "loss": 0.201751708984375, "step": 9974 }, { "epoch": 0.6742598350682709, "grad_norm": 1.131686806678772, "learning_rate": 7.670177588269849e-06, "loss": 0.1841583251953125, "step": 9975 }, { "epoch": 0.674327430039205, "grad_norm": 0.9350654482841492, "learning_rate": 7.667299000794641e-06, "loss": 0.18505859375, "step": 9976 }, { "epoch": 0.6743950250101393, "grad_norm": 1.7859652042388916, "learning_rate": 7.664420768121112e-06, "loss": 0.1942138671875, "step": 9977 }, { "epoch": 0.6744626199810734, "grad_norm": 2.167679786682129, "learning_rate": 7.661542890388524e-06, "loss": 0.26904296875, "step": 9978 }, { "epoch": 0.6745302149520076, "grad_norm": 1.0833277702331543, "learning_rate": 7.658665367736131e-06, "loss": 0.1781005859375, "step": 9979 }, { "epoch": 0.6745978099229417, "grad_norm": 1.3089884519577026, "learning_rate": 7.655788200303159e-06, "loss": 0.249542236328125, "step": 9980 }, { "epoch": 0.6746654048938759, "grad_norm": 1.3875727653503418, "learning_rate": 7.65291138822883e-06, "loss": 0.203826904296875, "step": 9981 }, { "epoch": 0.6747329998648101, "grad_norm": 1.15737783908844, "learning_rate": 7.650034931652338e-06, "loss": 0.256500244140625, "step": 9982 }, { "epoch": 0.6748005948357442, "grad_norm": 1.2089364528656006, "learning_rate": 7.647158830712857e-06, "loss": 0.23944091796875, "step": 9983 }, { "epoch": 0.6748681898066784, "grad_norm": 1.4091850519180298, "learning_rate": 7.644283085549573e-06, "loss": 0.206207275390625, "step": 9984 }, { "epoch": 0.6749357847776125, "grad_norm": 1.0423227548599243, "learning_rate": 7.641407696301605e-06, "loss": 0.27764892578125, "step": 9985 }, { "epoch": 0.6750033797485467, "grad_norm": 1.1464512348175049, "learning_rate": 7.638532663108107e-06, "loss": 0.24383544921875, "step": 9986 }, { "epoch": 0.6750709747194809, "grad_norm": 1.44074285030365, "learning_rate": 7.635657986108167e-06, "loss": 0.2212677001953125, "step": 9987 }, { "epoch": 0.6751385696904151, "grad_norm": 1.4746211767196655, "learning_rate": 7.6327836654409e-06, "loss": 0.3124847412109375, "step": 9988 }, { "epoch": 0.6752061646613492, "grad_norm": 0.6908828020095825, "learning_rate": 7.629909701245376e-06, "loss": 0.17001724243164062, "step": 9989 }, { "epoch": 0.6752737596322833, "grad_norm": 1.950876235961914, "learning_rate": 7.627036093660651e-06, "loss": 0.308929443359375, "step": 9990 }, { "epoch": 0.6753413546032175, "grad_norm": 1.0503143072128296, "learning_rate": 7.6241628428257736e-06, "loss": 0.194183349609375, "step": 9991 }, { "epoch": 0.6754089495741517, "grad_norm": 0.9948955774307251, "learning_rate": 7.62128994887977e-06, "loss": 0.2108154296875, "step": 9992 }, { "epoch": 0.6754765445450859, "grad_norm": 1.0096487998962402, "learning_rate": 7.618417411961642e-06, "loss": 0.178741455078125, "step": 9993 }, { "epoch": 0.67554413951602, "grad_norm": 1.1039798259735107, "learning_rate": 7.615545232210388e-06, "loss": 0.29150390625, "step": 9994 }, { "epoch": 0.6756117344869542, "grad_norm": 1.1927313804626465, "learning_rate": 7.612673409764979e-06, "loss": 0.2515106201171875, "step": 9995 }, { "epoch": 0.6756793294578883, "grad_norm": 0.8234179019927979, "learning_rate": 7.609801944764367e-06, "loss": 0.11907958984375, "step": 9996 }, { "epoch": 0.6757469244288224, "grad_norm": 1.327134609222412, "learning_rate": 7.606930837347504e-06, "loss": 0.19854736328125, "step": 9997 }, { "epoch": 0.6758145193997567, "grad_norm": 1.3763539791107178, "learning_rate": 7.604060087653295e-06, "loss": 0.19525146484375, "step": 9998 }, { "epoch": 0.6758821143706908, "grad_norm": 1.0850086212158203, "learning_rate": 7.601189695820664e-06, "loss": 0.226287841796875, "step": 9999 }, { "epoch": 0.675949709341625, "grad_norm": 0.995334804058075, "learning_rate": 7.598319661988479e-06, "loss": 0.124420166015625, "step": 10000 }, { "epoch": 0.6760173043125591, "grad_norm": 0.7429175972938538, "learning_rate": 7.595449986295623e-06, "loss": 0.2110595703125, "step": 10001 }, { "epoch": 0.6760848992834934, "grad_norm": 0.8903583884239197, "learning_rate": 7.592580668880945e-06, "loss": 0.27325439453125, "step": 10002 }, { "epoch": 0.6761524942544275, "grad_norm": 1.0314791202545166, "learning_rate": 7.589711709883281e-06, "loss": 0.2384033203125, "step": 10003 }, { "epoch": 0.6762200892253616, "grad_norm": 0.9605602622032166, "learning_rate": 7.586843109441449e-06, "loss": 0.216796875, "step": 10004 }, { "epoch": 0.6762876841962958, "grad_norm": 0.4835294485092163, "learning_rate": 7.583974867694251e-06, "loss": 0.08835220336914062, "step": 10005 }, { "epoch": 0.6763552791672299, "grad_norm": 1.1134309768676758, "learning_rate": 7.5811069847804655e-06, "loss": 0.24371337890625, "step": 10006 }, { "epoch": 0.6764228741381642, "grad_norm": 1.1044775247573853, "learning_rate": 7.578239460838863e-06, "loss": 0.1905059814453125, "step": 10007 }, { "epoch": 0.6764904691090983, "grad_norm": 1.3692569732666016, "learning_rate": 7.575372296008191e-06, "loss": 0.265380859375, "step": 10008 }, { "epoch": 0.6765580640800325, "grad_norm": 0.9715110063552856, "learning_rate": 7.572505490427175e-06, "loss": 0.237457275390625, "step": 10009 }, { "epoch": 0.6766256590509666, "grad_norm": 1.4116238355636597, "learning_rate": 7.569639044234545e-06, "loss": 0.26220703125, "step": 10010 }, { "epoch": 0.6766932540219007, "grad_norm": 1.995328664779663, "learning_rate": 7.566772957568977e-06, "loss": 0.244842529296875, "step": 10011 }, { "epoch": 0.676760848992835, "grad_norm": 0.7568061351776123, "learning_rate": 7.563907230569166e-06, "loss": 0.20379638671875, "step": 10012 }, { "epoch": 0.6768284439637691, "grad_norm": 1.0290178060531616, "learning_rate": 7.561041863373764e-06, "loss": 0.170501708984375, "step": 10013 }, { "epoch": 0.6768960389347033, "grad_norm": 1.472050666809082, "learning_rate": 7.558176856121423e-06, "loss": 0.27435302734375, "step": 10014 }, { "epoch": 0.6769636339056374, "grad_norm": 1.7462106943130493, "learning_rate": 7.55531220895077e-06, "loss": 0.220947265625, "step": 10015 }, { "epoch": 0.6770312288765716, "grad_norm": 1.268723964691162, "learning_rate": 7.552447922000398e-06, "loss": 0.198455810546875, "step": 10016 }, { "epoch": 0.6770988238475057, "grad_norm": 1.8820372819900513, "learning_rate": 7.54958399540892e-06, "loss": 0.256866455078125, "step": 10017 }, { "epoch": 0.6771664188184399, "grad_norm": 1.7978789806365967, "learning_rate": 7.546720429314899e-06, "loss": 0.288177490234375, "step": 10018 }, { "epoch": 0.6772340137893741, "grad_norm": 1.4268485307693481, "learning_rate": 7.543857223856897e-06, "loss": 0.1280517578125, "step": 10019 }, { "epoch": 0.6773016087603082, "grad_norm": 2.011176347732544, "learning_rate": 7.5409943791734515e-06, "loss": 0.246124267578125, "step": 10020 }, { "epoch": 0.6773692037312424, "grad_norm": 1.139793872833252, "learning_rate": 7.538131895403085e-06, "loss": 0.235748291015625, "step": 10021 }, { "epoch": 0.6774367987021765, "grad_norm": 0.8326368927955627, "learning_rate": 7.535269772684295e-06, "loss": 0.1905517578125, "step": 10022 }, { "epoch": 0.6775043936731108, "grad_norm": 1.32478666305542, "learning_rate": 7.5324080111555885e-06, "loss": 0.256011962890625, "step": 10023 }, { "epoch": 0.6775719886440449, "grad_norm": 0.8914163112640381, "learning_rate": 7.5295466109554125e-06, "loss": 0.1963653564453125, "step": 10024 }, { "epoch": 0.677639583614979, "grad_norm": 1.0073893070220947, "learning_rate": 7.526685572222233e-06, "loss": 0.179107666015625, "step": 10025 }, { "epoch": 0.6777071785859132, "grad_norm": 0.8944457769393921, "learning_rate": 7.523824895094484e-06, "loss": 0.1336822509765625, "step": 10026 }, { "epoch": 0.6777747735568473, "grad_norm": 1.7418097257614136, "learning_rate": 7.520964579710579e-06, "loss": 0.2978515625, "step": 10027 }, { "epoch": 0.6778423685277816, "grad_norm": 2.1568334102630615, "learning_rate": 7.5181046262089195e-06, "loss": 0.29986572265625, "step": 10028 }, { "epoch": 0.6779099634987157, "grad_norm": 0.930778443813324, "learning_rate": 7.515245034727889e-06, "loss": 0.1852264404296875, "step": 10029 }, { "epoch": 0.6779775584696499, "grad_norm": 1.2539033889770508, "learning_rate": 7.512385805405848e-06, "loss": 0.287628173828125, "step": 10030 }, { "epoch": 0.678045153440584, "grad_norm": 1.4507490396499634, "learning_rate": 7.509526938381148e-06, "loss": 0.23541259765625, "step": 10031 }, { "epoch": 0.6781127484115181, "grad_norm": 4.081177234649658, "learning_rate": 7.50666843379212e-06, "loss": 0.366302490234375, "step": 10032 }, { "epoch": 0.6781803433824524, "grad_norm": 1.0643900632858276, "learning_rate": 7.503810291777071e-06, "loss": 0.225128173828125, "step": 10033 }, { "epoch": 0.6782479383533865, "grad_norm": 1.835333228111267, "learning_rate": 7.500952512474298e-06, "loss": 0.31878662109375, "step": 10034 }, { "epoch": 0.6783155333243207, "grad_norm": 1.4980626106262207, "learning_rate": 7.498095096022074e-06, "loss": 0.236907958984375, "step": 10035 }, { "epoch": 0.6783831282952548, "grad_norm": 0.9526354074478149, "learning_rate": 7.495238042558675e-06, "loss": 0.252960205078125, "step": 10036 }, { "epoch": 0.678450723266189, "grad_norm": 1.0956913232803345, "learning_rate": 7.492381352222318e-06, "loss": 0.1903228759765625, "step": 10037 }, { "epoch": 0.6785183182371232, "grad_norm": 1.2074871063232422, "learning_rate": 7.4895250251512475e-06, "loss": 0.1930389404296875, "step": 10038 }, { "epoch": 0.6785859132080573, "grad_norm": 1.5982695817947388, "learning_rate": 7.4866690614836695e-06, "loss": 0.29461669921875, "step": 10039 }, { "epoch": 0.6786535081789915, "grad_norm": 0.8668258786201477, "learning_rate": 7.483813461357756e-06, "loss": 0.14434814453125, "step": 10040 }, { "epoch": 0.6787211031499256, "grad_norm": 0.9619578123092651, "learning_rate": 7.480958224911694e-06, "loss": 0.231048583984375, "step": 10041 }, { "epoch": 0.6787886981208598, "grad_norm": 1.8574433326721191, "learning_rate": 7.478103352283634e-06, "loss": 0.296966552734375, "step": 10042 }, { "epoch": 0.678856293091794, "grad_norm": 0.9706752896308899, "learning_rate": 7.475248843611713e-06, "loss": 0.1778717041015625, "step": 10043 }, { "epoch": 0.6789238880627282, "grad_norm": 2.6424598693847656, "learning_rate": 7.472394699034048e-06, "loss": 0.2667694091796875, "step": 10044 }, { "epoch": 0.6789914830336623, "grad_norm": 0.6787582039833069, "learning_rate": 7.469540918688741e-06, "loss": 0.1433238983154297, "step": 10045 }, { "epoch": 0.6790590780045964, "grad_norm": 0.5632864832878113, "learning_rate": 7.466687502713876e-06, "loss": 0.0861053466796875, "step": 10046 }, { "epoch": 0.6791266729755306, "grad_norm": 1.9200397729873657, "learning_rate": 7.463834451247519e-06, "loss": 0.251007080078125, "step": 10047 }, { "epoch": 0.6791942679464648, "grad_norm": 1.2120020389556885, "learning_rate": 7.460981764427712e-06, "loss": 0.201324462890625, "step": 10048 }, { "epoch": 0.679261862917399, "grad_norm": 1.1180800199508667, "learning_rate": 7.4581294423925046e-06, "loss": 0.242431640625, "step": 10049 }, { "epoch": 0.6793294578883331, "grad_norm": 0.6851253509521484, "learning_rate": 7.4552774852798885e-06, "loss": 0.14300537109375, "step": 10050 }, { "epoch": 0.6793970528592673, "grad_norm": 1.4927473068237305, "learning_rate": 7.452425893227865e-06, "loss": 0.240203857421875, "step": 10051 }, { "epoch": 0.6794646478302014, "grad_norm": 1.40849769115448, "learning_rate": 7.449574666374426e-06, "loss": 0.238525390625, "step": 10052 }, { "epoch": 0.6795322428011356, "grad_norm": 1.5019694566726685, "learning_rate": 7.446723804857508e-06, "loss": 0.26776123046875, "step": 10053 }, { "epoch": 0.6795998377720698, "grad_norm": 0.8739873170852661, "learning_rate": 7.44387330881507e-06, "loss": 0.1499176025390625, "step": 10054 }, { "epoch": 0.6796674327430039, "grad_norm": 1.1439685821533203, "learning_rate": 7.4410231783850345e-06, "loss": 0.2720794677734375, "step": 10055 }, { "epoch": 0.6797350277139381, "grad_norm": 1.7499016523361206, "learning_rate": 7.438173413705304e-06, "loss": 0.2708740234375, "step": 10056 }, { "epoch": 0.6798026226848722, "grad_norm": 1.585997462272644, "learning_rate": 7.435324014913774e-06, "loss": 0.258087158203125, "step": 10057 }, { "epoch": 0.6798702176558064, "grad_norm": 1.4808366298675537, "learning_rate": 7.432474982148309e-06, "loss": 0.209625244140625, "step": 10058 }, { "epoch": 0.6799378126267406, "grad_norm": 1.1646989583969116, "learning_rate": 7.429626315546762e-06, "loss": 0.1890869140625, "step": 10059 }, { "epoch": 0.6800054075976747, "grad_norm": 1.3411939144134521, "learning_rate": 7.426778015246984e-06, "loss": 0.27734375, "step": 10060 }, { "epoch": 0.6800730025686089, "grad_norm": 1.5293450355529785, "learning_rate": 7.4239300813867725e-06, "loss": 0.313568115234375, "step": 10061 }, { "epoch": 0.680140597539543, "grad_norm": 0.5467767715454102, "learning_rate": 7.42108251410395e-06, "loss": 0.11578369140625, "step": 10062 }, { "epoch": 0.6802081925104773, "grad_norm": 1.2191044092178345, "learning_rate": 7.418235313536282e-06, "loss": 0.224151611328125, "step": 10063 }, { "epoch": 0.6802757874814114, "grad_norm": 1.1205662488937378, "learning_rate": 7.415388479821534e-06, "loss": 0.290283203125, "step": 10064 }, { "epoch": 0.6803433824523455, "grad_norm": 1.2241630554199219, "learning_rate": 7.412542013097473e-06, "loss": 0.295623779296875, "step": 10065 }, { "epoch": 0.6804109774232797, "grad_norm": 1.0832149982452393, "learning_rate": 7.409695913501801e-06, "loss": 0.235076904296875, "step": 10066 }, { "epoch": 0.6804785723942138, "grad_norm": 1.8831199407577515, "learning_rate": 7.406850181172253e-06, "loss": 0.321380615234375, "step": 10067 }, { "epoch": 0.6805461673651481, "grad_norm": 1.2607783079147339, "learning_rate": 7.404004816246512e-06, "loss": 0.29742431640625, "step": 10068 }, { "epoch": 0.6806137623360822, "grad_norm": 0.6969607472419739, "learning_rate": 7.4011598188622585e-06, "loss": 0.10833740234375, "step": 10069 }, { "epoch": 0.6806813573070164, "grad_norm": 1.1666744947433472, "learning_rate": 7.398315189157151e-06, "loss": 0.2716064453125, "step": 10070 }, { "epoch": 0.6807489522779505, "grad_norm": 1.217923641204834, "learning_rate": 7.395470927268826e-06, "loss": 0.195648193359375, "step": 10071 }, { "epoch": 0.6808165472488846, "grad_norm": 1.1850212812423706, "learning_rate": 7.392627033334905e-06, "loss": 0.265625, "step": 10072 }, { "epoch": 0.6808841422198189, "grad_norm": 1.3731597661972046, "learning_rate": 7.389783507493009e-06, "loss": 0.331298828125, "step": 10073 }, { "epoch": 0.680951737190753, "grad_norm": 1.0253103971481323, "learning_rate": 7.38694034988071e-06, "loss": 0.2167205810546875, "step": 10074 }, { "epoch": 0.6810193321616872, "grad_norm": 1.3043071031570435, "learning_rate": 7.384097560635575e-06, "loss": 0.25872802734375, "step": 10075 }, { "epoch": 0.6810869271326213, "grad_norm": 1.0340672731399536, "learning_rate": 7.381255139895174e-06, "loss": 0.13397216796875, "step": 10076 }, { "epoch": 0.6811545221035555, "grad_norm": 1.5839054584503174, "learning_rate": 7.378413087797019e-06, "loss": 0.3089599609375, "step": 10077 }, { "epoch": 0.6812221170744897, "grad_norm": 1.2489339113235474, "learning_rate": 7.375571404478648e-06, "loss": 0.2381134033203125, "step": 10078 }, { "epoch": 0.6812897120454238, "grad_norm": 1.2202560901641846, "learning_rate": 7.3727300900775385e-06, "loss": 0.3095703125, "step": 10079 }, { "epoch": 0.681357307016358, "grad_norm": 1.4492121934890747, "learning_rate": 7.369889144731185e-06, "loss": 0.2275238037109375, "step": 10080 }, { "epoch": 0.6814249019872921, "grad_norm": 1.5267525911331177, "learning_rate": 7.367048568577045e-06, "loss": 0.29156494140625, "step": 10081 }, { "epoch": 0.6814924969582263, "grad_norm": 1.052762746810913, "learning_rate": 7.364208361752565e-06, "loss": 0.17742156982421875, "step": 10082 }, { "epoch": 0.6815600919291604, "grad_norm": 1.4194890260696411, "learning_rate": 7.361368524395171e-06, "loss": 0.244598388671875, "step": 10083 }, { "epoch": 0.6816276869000947, "grad_norm": 0.9270319938659668, "learning_rate": 7.3585290566422715e-06, "loss": 0.247528076171875, "step": 10084 }, { "epoch": 0.6816952818710288, "grad_norm": 1.498055100440979, "learning_rate": 7.355689958631259e-06, "loss": 0.30340576171875, "step": 10085 }, { "epoch": 0.6817628768419629, "grad_norm": 1.253089427947998, "learning_rate": 7.3528512304995045e-06, "loss": 0.2518157958984375, "step": 10086 }, { "epoch": 0.6818304718128971, "grad_norm": 1.572736144065857, "learning_rate": 7.350012872384365e-06, "loss": 0.276458740234375, "step": 10087 }, { "epoch": 0.6818980667838312, "grad_norm": 1.6065462827682495, "learning_rate": 7.347174884423173e-06, "loss": 0.2496337890625, "step": 10088 }, { "epoch": 0.6819656617547655, "grad_norm": 0.9039642810821533, "learning_rate": 7.344337266753263e-06, "loss": 0.217620849609375, "step": 10089 }, { "epoch": 0.6820332567256996, "grad_norm": 0.8958940505981445, "learning_rate": 7.341500019511915e-06, "loss": 0.195526123046875, "step": 10090 }, { "epoch": 0.6821008516966338, "grad_norm": 1.1679527759552002, "learning_rate": 7.338663142836435e-06, "loss": 0.260009765625, "step": 10091 }, { "epoch": 0.6821684466675679, "grad_norm": 2.046877145767212, "learning_rate": 7.335826636864065e-06, "loss": 0.321685791015625, "step": 10092 }, { "epoch": 0.682236041638502, "grad_norm": 1.3270221948623657, "learning_rate": 7.332990501732071e-06, "loss": 0.1851043701171875, "step": 10093 }, { "epoch": 0.6823036366094363, "grad_norm": 1.0874847173690796, "learning_rate": 7.330154737577678e-06, "loss": 0.1898651123046875, "step": 10094 }, { "epoch": 0.6823712315803704, "grad_norm": 1.15444016456604, "learning_rate": 7.327319344538098e-06, "loss": 0.30938720703125, "step": 10095 }, { "epoch": 0.6824388265513046, "grad_norm": 1.123085379600525, "learning_rate": 7.3244843227505225e-06, "loss": 0.24365234375, "step": 10096 }, { "epoch": 0.6825064215222387, "grad_norm": 1.8222297430038452, "learning_rate": 7.32164967235213e-06, "loss": 0.279998779296875, "step": 10097 }, { "epoch": 0.682574016493173, "grad_norm": 1.0678223371505737, "learning_rate": 7.318815393480076e-06, "loss": 0.2089385986328125, "step": 10098 }, { "epoch": 0.6826416114641071, "grad_norm": 1.322527289390564, "learning_rate": 7.315981486271504e-06, "loss": 0.22509765625, "step": 10099 }, { "epoch": 0.6827092064350412, "grad_norm": 0.7262814044952393, "learning_rate": 7.313147950863533e-06, "loss": 0.1310882568359375, "step": 10100 }, { "epoch": 0.6827768014059754, "grad_norm": 0.9445438981056213, "learning_rate": 7.310314787393263e-06, "loss": 0.14532852172851562, "step": 10101 }, { "epoch": 0.6828443963769095, "grad_norm": 1.0088895559310913, "learning_rate": 7.3074819959977954e-06, "loss": 0.208465576171875, "step": 10102 }, { "epoch": 0.6829119913478437, "grad_norm": 1.6419519186019897, "learning_rate": 7.304649576814178e-06, "loss": 0.214141845703125, "step": 10103 }, { "epoch": 0.6829795863187779, "grad_norm": 1.315329909324646, "learning_rate": 7.3018175299794755e-06, "loss": 0.30633544921875, "step": 10104 }, { "epoch": 0.6830471812897121, "grad_norm": 1.4959145784378052, "learning_rate": 7.298985855630717e-06, "loss": 0.24347686767578125, "step": 10105 }, { "epoch": 0.6831147762606462, "grad_norm": 1.286940336227417, "learning_rate": 7.296154553904914e-06, "loss": 0.247833251953125, "step": 10106 }, { "epoch": 0.6831823712315803, "grad_norm": 2.1761791706085205, "learning_rate": 7.293323624939063e-06, "loss": 0.2933349609375, "step": 10107 }, { "epoch": 0.6832499662025145, "grad_norm": 1.1570919752120972, "learning_rate": 7.290493068870143e-06, "loss": 0.1717529296875, "step": 10108 }, { "epoch": 0.6833175611734487, "grad_norm": 1.2995268106460571, "learning_rate": 7.287662885835112e-06, "loss": 0.209320068359375, "step": 10109 }, { "epoch": 0.6833851561443829, "grad_norm": 1.8634488582611084, "learning_rate": 7.284833075970915e-06, "loss": 0.3367919921875, "step": 10110 }, { "epoch": 0.683452751115317, "grad_norm": 1.5132145881652832, "learning_rate": 7.282003639414472e-06, "loss": 0.28704833984375, "step": 10111 }, { "epoch": 0.6835203460862512, "grad_norm": 1.0163244009017944, "learning_rate": 7.279174576302692e-06, "loss": 0.214752197265625, "step": 10112 }, { "epoch": 0.6835879410571853, "grad_norm": 1.968732476234436, "learning_rate": 7.27634588677246e-06, "loss": 0.249267578125, "step": 10113 }, { "epoch": 0.6836555360281195, "grad_norm": 0.9191800355911255, "learning_rate": 7.273517570960644e-06, "loss": 0.1728973388671875, "step": 10114 }, { "epoch": 0.6837231309990537, "grad_norm": 1.3196618556976318, "learning_rate": 7.2706896290041065e-06, "loss": 0.2242889404296875, "step": 10115 }, { "epoch": 0.6837907259699878, "grad_norm": 1.229912281036377, "learning_rate": 7.267862061039664e-06, "loss": 0.2982177734375, "step": 10116 }, { "epoch": 0.683858320940922, "grad_norm": 1.6183295249938965, "learning_rate": 7.265034867204144e-06, "loss": 0.2772216796875, "step": 10117 }, { "epoch": 0.6839259159118561, "grad_norm": 1.6873793601989746, "learning_rate": 7.262208047634342e-06, "loss": 0.259002685546875, "step": 10118 }, { "epoch": 0.6839935108827904, "grad_norm": 1.2574517726898193, "learning_rate": 7.259381602467034e-06, "loss": 0.26727294921875, "step": 10119 }, { "epoch": 0.6840611058537245, "grad_norm": 0.6910883188247681, "learning_rate": 7.256555531838983e-06, "loss": 0.1601104736328125, "step": 10120 }, { "epoch": 0.6841287008246586, "grad_norm": 1.098686933517456, "learning_rate": 7.253729835886931e-06, "loss": 0.1915130615234375, "step": 10121 }, { "epoch": 0.6841962957955928, "grad_norm": 1.352939486503601, "learning_rate": 7.250904514747604e-06, "loss": 0.2303466796875, "step": 10122 }, { "epoch": 0.6842638907665269, "grad_norm": 1.2661162614822388, "learning_rate": 7.248079568557709e-06, "loss": 0.2027587890625, "step": 10123 }, { "epoch": 0.6843314857374612, "grad_norm": 1.2291324138641357, "learning_rate": 7.245254997453931e-06, "loss": 0.1563873291015625, "step": 10124 }, { "epoch": 0.6843990807083953, "grad_norm": 2.2209150791168213, "learning_rate": 7.242430801572944e-06, "loss": 0.2859649658203125, "step": 10125 }, { "epoch": 0.6844666756793295, "grad_norm": 1.5423766374588013, "learning_rate": 7.239606981051396e-06, "loss": 0.2253570556640625, "step": 10126 }, { "epoch": 0.6845342706502636, "grad_norm": 1.3795191049575806, "learning_rate": 7.23678353602592e-06, "loss": 0.1873779296875, "step": 10127 }, { "epoch": 0.6846018656211977, "grad_norm": 1.810936689376831, "learning_rate": 7.233960466633146e-06, "loss": 0.263641357421875, "step": 10128 }, { "epoch": 0.684669460592132, "grad_norm": 1.6425435543060303, "learning_rate": 7.231137773009651e-06, "loss": 0.28076171875, "step": 10129 }, { "epoch": 0.6847370555630661, "grad_norm": 1.7311453819274902, "learning_rate": 7.228315455292028e-06, "loss": 0.222686767578125, "step": 10130 }, { "epoch": 0.6848046505340003, "grad_norm": 1.1028927564620972, "learning_rate": 7.225493513616838e-06, "loss": 0.21429443359375, "step": 10131 }, { "epoch": 0.6848722455049344, "grad_norm": 1.0688432455062866, "learning_rate": 7.222671948120619e-06, "loss": 0.19134521484375, "step": 10132 }, { "epoch": 0.6849398404758686, "grad_norm": 1.2087585926055908, "learning_rate": 7.219850758939898e-06, "loss": 0.2230224609375, "step": 10133 }, { "epoch": 0.6850074354468028, "grad_norm": 2.082472562789917, "learning_rate": 7.21702994621118e-06, "loss": 0.24700927734375, "step": 10134 }, { "epoch": 0.6850750304177369, "grad_norm": 0.7203773856163025, "learning_rate": 7.214209510070957e-06, "loss": 0.10667037963867188, "step": 10135 }, { "epoch": 0.6851426253886711, "grad_norm": 1.2732182741165161, "learning_rate": 7.211389450655698e-06, "loss": 0.2193603515625, "step": 10136 }, { "epoch": 0.6852102203596052, "grad_norm": 0.9802137017250061, "learning_rate": 7.208569768101853e-06, "loss": 0.229705810546875, "step": 10137 }, { "epoch": 0.6852778153305394, "grad_norm": 1.1648797988891602, "learning_rate": 7.205750462545856e-06, "loss": 0.22821044921875, "step": 10138 }, { "epoch": 0.6853454103014736, "grad_norm": 1.0213828086853027, "learning_rate": 7.202931534124126e-06, "loss": 0.14759063720703125, "step": 10139 }, { "epoch": 0.6854130052724078, "grad_norm": 0.9460623860359192, "learning_rate": 7.200112982973051e-06, "loss": 0.1834564208984375, "step": 10140 }, { "epoch": 0.6854806002433419, "grad_norm": 1.3704230785369873, "learning_rate": 7.197294809229026e-06, "loss": 0.26507568359375, "step": 10141 }, { "epoch": 0.685548195214276, "grad_norm": 1.374754786491394, "learning_rate": 7.194477013028394e-06, "loss": 0.18517684936523438, "step": 10142 }, { "epoch": 0.6856157901852102, "grad_norm": 1.637284755706787, "learning_rate": 7.191659594507511e-06, "loss": 0.2344512939453125, "step": 10143 }, { "epoch": 0.6856833851561444, "grad_norm": 1.6634833812713623, "learning_rate": 7.188842553802701e-06, "loss": 0.20318603515625, "step": 10144 }, { "epoch": 0.6857509801270786, "grad_norm": 1.2806614637374878, "learning_rate": 7.186025891050255e-06, "loss": 0.17412567138671875, "step": 10145 }, { "epoch": 0.6858185750980127, "grad_norm": 1.0596295595169067, "learning_rate": 7.183209606386478e-06, "loss": 0.1810302734375, "step": 10146 }, { "epoch": 0.6858861700689469, "grad_norm": 1.0396223068237305, "learning_rate": 7.18039369994763e-06, "loss": 0.2263031005859375, "step": 10147 }, { "epoch": 0.685953765039881, "grad_norm": 1.0909478664398193, "learning_rate": 7.177578171869967e-06, "loss": 0.20147705078125, "step": 10148 }, { "epoch": 0.6860213600108152, "grad_norm": 1.449947714805603, "learning_rate": 7.174763022289716e-06, "loss": 0.271820068359375, "step": 10149 }, { "epoch": 0.6860889549817494, "grad_norm": 1.3384006023406982, "learning_rate": 7.171948251343096e-06, "loss": 0.250244140625, "step": 10150 }, { "epoch": 0.6861565499526835, "grad_norm": 1.0319976806640625, "learning_rate": 7.169133859166296e-06, "loss": 0.1923675537109375, "step": 10151 }, { "epoch": 0.6862241449236177, "grad_norm": 1.0899434089660645, "learning_rate": 7.166319845895512e-06, "loss": 0.11066436767578125, "step": 10152 }, { "epoch": 0.6862917398945518, "grad_norm": 1.0729295015335083, "learning_rate": 7.163506211666878e-06, "loss": 0.240234375, "step": 10153 }, { "epoch": 0.6863593348654861, "grad_norm": 1.0928157567977905, "learning_rate": 7.1606929566165605e-06, "loss": 0.17624664306640625, "step": 10154 }, { "epoch": 0.6864269298364202, "grad_norm": 1.0178087949752808, "learning_rate": 7.157880080880658e-06, "loss": 0.182647705078125, "step": 10155 }, { "epoch": 0.6864945248073543, "grad_norm": 1.2870100736618042, "learning_rate": 7.1550675845952935e-06, "loss": 0.260162353515625, "step": 10156 }, { "epoch": 0.6865621197782885, "grad_norm": 1.3275710344314575, "learning_rate": 7.152255467896551e-06, "loss": 0.2273101806640625, "step": 10157 }, { "epoch": 0.6866297147492226, "grad_norm": 1.4493048191070557, "learning_rate": 7.149443730920483e-06, "loss": 0.32122802734375, "step": 10158 }, { "epoch": 0.6866973097201569, "grad_norm": 0.945454478263855, "learning_rate": 7.146632373803155e-06, "loss": 0.25811767578125, "step": 10159 }, { "epoch": 0.686764904691091, "grad_norm": 2.936434030532837, "learning_rate": 7.14382139668059e-06, "loss": 0.26934814453125, "step": 10160 }, { "epoch": 0.6868324996620252, "grad_norm": 1.816231608390808, "learning_rate": 7.141010799688806e-06, "loss": 0.199859619140625, "step": 10161 }, { "epoch": 0.6869000946329593, "grad_norm": 1.0404666662216187, "learning_rate": 7.138200582963792e-06, "loss": 0.168548583984375, "step": 10162 }, { "epoch": 0.6869676896038934, "grad_norm": 1.155251383781433, "learning_rate": 7.135390746641527e-06, "loss": 0.1572113037109375, "step": 10163 }, { "epoch": 0.6870352845748277, "grad_norm": 1.1482563018798828, "learning_rate": 7.13258129085796e-06, "loss": 0.20697021484375, "step": 10164 }, { "epoch": 0.6871028795457618, "grad_norm": 0.8105752468109131, "learning_rate": 7.129772215749051e-06, "loss": 0.173126220703125, "step": 10165 }, { "epoch": 0.687170474516696, "grad_norm": 1.874417781829834, "learning_rate": 7.126963521450693e-06, "loss": 0.279937744140625, "step": 10166 }, { "epoch": 0.6872380694876301, "grad_norm": 0.6649011373519897, "learning_rate": 7.1241552080988086e-06, "loss": 0.1146392822265625, "step": 10167 }, { "epoch": 0.6873056644585643, "grad_norm": 1.594136118888855, "learning_rate": 7.12134727582928e-06, "loss": 0.3125, "step": 10168 }, { "epoch": 0.6873732594294985, "grad_norm": 1.75857675075531, "learning_rate": 7.118539724777956e-06, "loss": 0.310638427734375, "step": 10169 }, { "epoch": 0.6874408544004326, "grad_norm": 0.9025448560714722, "learning_rate": 7.115732555080705e-06, "loss": 0.21771240234375, "step": 10170 }, { "epoch": 0.6875084493713668, "grad_norm": 1.1733686923980713, "learning_rate": 7.112925766873334e-06, "loss": 0.229736328125, "step": 10171 }, { "epoch": 0.6875760443423009, "grad_norm": 1.8708497285842896, "learning_rate": 7.110119360291669e-06, "loss": 0.27557373046875, "step": 10172 }, { "epoch": 0.6876436393132351, "grad_norm": 0.5619907975196838, "learning_rate": 7.1073133354714944e-06, "loss": 0.10311126708984375, "step": 10173 }, { "epoch": 0.6877112342841692, "grad_norm": 0.9604158997535706, "learning_rate": 7.104507692548587e-06, "loss": 0.244873046875, "step": 10174 }, { "epoch": 0.6877788292551035, "grad_norm": 0.6989738941192627, "learning_rate": 7.101702431658698e-06, "loss": 0.135345458984375, "step": 10175 }, { "epoch": 0.6878464242260376, "grad_norm": 1.387899398803711, "learning_rate": 7.098897552937564e-06, "loss": 0.1905517578125, "step": 10176 }, { "epoch": 0.6879140191969717, "grad_norm": 1.1346021890640259, "learning_rate": 7.096093056520899e-06, "loss": 0.184539794921875, "step": 10177 }, { "epoch": 0.6879816141679059, "grad_norm": 1.187543511390686, "learning_rate": 7.093288942544415e-06, "loss": 0.27850341796875, "step": 10178 }, { "epoch": 0.68804920913884, "grad_norm": 1.066391944885254, "learning_rate": 7.090485211143777e-06, "loss": 0.16828155517578125, "step": 10179 }, { "epoch": 0.6881168041097743, "grad_norm": 1.5787365436553955, "learning_rate": 7.0876818624546496e-06, "loss": 0.24603271484375, "step": 10180 }, { "epoch": 0.6881843990807084, "grad_norm": 1.257588267326355, "learning_rate": 7.084878896612691e-06, "loss": 0.24493408203125, "step": 10181 }, { "epoch": 0.6882519940516426, "grad_norm": 1.3082492351531982, "learning_rate": 7.082076313753503e-06, "loss": 0.24249267578125, "step": 10182 }, { "epoch": 0.6883195890225767, "grad_norm": 0.5789242386817932, "learning_rate": 7.0792741140127165e-06, "loss": 0.0977783203125, "step": 10183 }, { "epoch": 0.6883871839935108, "grad_norm": 1.6109356880187988, "learning_rate": 7.076472297525894e-06, "loss": 0.28717041015625, "step": 10184 }, { "epoch": 0.6884547789644451, "grad_norm": 1.4085789918899536, "learning_rate": 7.073670864428623e-06, "loss": 0.1544647216796875, "step": 10185 }, { "epoch": 0.6885223739353792, "grad_norm": 1.4840139150619507, "learning_rate": 7.070869814856451e-06, "loss": 0.221588134765625, "step": 10186 }, { "epoch": 0.6885899689063134, "grad_norm": 1.6338064670562744, "learning_rate": 7.068069148944906e-06, "loss": 0.254119873046875, "step": 10187 }, { "epoch": 0.6886575638772475, "grad_norm": 1.5927847623825073, "learning_rate": 7.065268866829505e-06, "loss": 0.26422119140625, "step": 10188 }, { "epoch": 0.6887251588481816, "grad_norm": 1.1281089782714844, "learning_rate": 7.0624689686457405e-06, "loss": 0.200286865234375, "step": 10189 }, { "epoch": 0.6887927538191159, "grad_norm": 1.4670565128326416, "learning_rate": 7.0596694545290855e-06, "loss": 0.322021484375, "step": 10190 }, { "epoch": 0.68886034879005, "grad_norm": 0.9351571798324585, "learning_rate": 7.056870324615014e-06, "loss": 0.1900634765625, "step": 10191 }, { "epoch": 0.6889279437609842, "grad_norm": 1.164068341255188, "learning_rate": 7.0540715790389486e-06, "loss": 0.17462539672851562, "step": 10192 }, { "epoch": 0.6889955387319183, "grad_norm": 0.8162940144538879, "learning_rate": 7.05127321793631e-06, "loss": 0.174530029296875, "step": 10193 }, { "epoch": 0.6890631337028525, "grad_norm": 1.5379149913787842, "learning_rate": 7.048475241442516e-06, "loss": 0.232330322265625, "step": 10194 }, { "epoch": 0.6891307286737867, "grad_norm": 1.6957135200500488, "learning_rate": 7.045677649692928e-06, "loss": 0.3348388671875, "step": 10195 }, { "epoch": 0.6891983236447208, "grad_norm": 1.0448254346847534, "learning_rate": 7.0428804428229306e-06, "loss": 0.175079345703125, "step": 10196 }, { "epoch": 0.689265918615655, "grad_norm": 1.9673351049423218, "learning_rate": 7.040083620967861e-06, "loss": 0.2906646728515625, "step": 10197 }, { "epoch": 0.6893335135865891, "grad_norm": 0.9900448322296143, "learning_rate": 7.0372871842630465e-06, "loss": 0.29058837890625, "step": 10198 }, { "epoch": 0.6894011085575233, "grad_norm": 0.803225576877594, "learning_rate": 7.034491132843799e-06, "loss": 0.1478118896484375, "step": 10199 }, { "epoch": 0.6894687035284575, "grad_norm": 2.085655927658081, "learning_rate": 7.031695466845408e-06, "loss": 0.212158203125, "step": 10200 }, { "epoch": 0.6895362984993917, "grad_norm": 3.291759967803955, "learning_rate": 7.028900186403145e-06, "loss": 0.29046630859375, "step": 10201 }, { "epoch": 0.6896038934703258, "grad_norm": 1.1336994171142578, "learning_rate": 7.026105291652261e-06, "loss": 0.1365509033203125, "step": 10202 }, { "epoch": 0.6896714884412599, "grad_norm": 1.4562617540359497, "learning_rate": 7.023310782727993e-06, "loss": 0.328857421875, "step": 10203 }, { "epoch": 0.6897390834121941, "grad_norm": 0.879804790019989, "learning_rate": 7.0205166597655555e-06, "loss": 0.0995025634765625, "step": 10204 }, { "epoch": 0.6898066783831283, "grad_norm": 1.1379250288009644, "learning_rate": 7.017722922900146e-06, "loss": 0.181793212890625, "step": 10205 }, { "epoch": 0.6898742733540625, "grad_norm": 0.9637003540992737, "learning_rate": 7.014929572266938e-06, "loss": 0.210968017578125, "step": 10206 }, { "epoch": 0.6899418683249966, "grad_norm": 1.0974538326263428, "learning_rate": 7.012136608001107e-06, "loss": 0.17498779296875, "step": 10207 }, { "epoch": 0.6900094632959308, "grad_norm": 1.1259965896606445, "learning_rate": 7.00934403023777e-06, "loss": 0.220062255859375, "step": 10208 }, { "epoch": 0.6900770582668649, "grad_norm": 1.0711710453033447, "learning_rate": 7.0065518391120695e-06, "loss": 0.1754608154296875, "step": 10209 }, { "epoch": 0.690144653237799, "grad_norm": 2.6211612224578857, "learning_rate": 7.0037600347591e-06, "loss": 0.3167724609375, "step": 10210 }, { "epoch": 0.6902122482087333, "grad_norm": 1.0116322040557861, "learning_rate": 7.000968617313951e-06, "loss": 0.184600830078125, "step": 10211 }, { "epoch": 0.6902798431796674, "grad_norm": 1.464312195777893, "learning_rate": 6.9981775869116836e-06, "loss": 0.246368408203125, "step": 10212 }, { "epoch": 0.6903474381506016, "grad_norm": 0.7464079856872559, "learning_rate": 6.9953869436873475e-06, "loss": 0.17864990234375, "step": 10213 }, { "epoch": 0.6904150331215357, "grad_norm": 1.9538350105285645, "learning_rate": 6.99259668777597e-06, "loss": 0.259490966796875, "step": 10214 }, { "epoch": 0.69048262809247, "grad_norm": 1.1338497400283813, "learning_rate": 6.989806819312564e-06, "loss": 0.166168212890625, "step": 10215 }, { "epoch": 0.6905502230634041, "grad_norm": 0.8996465802192688, "learning_rate": 6.987017338432118e-06, "loss": 0.12628936767578125, "step": 10216 }, { "epoch": 0.6906178180343382, "grad_norm": 1.2581868171691895, "learning_rate": 6.984228245269606e-06, "loss": 0.181427001953125, "step": 10217 }, { "epoch": 0.6906854130052724, "grad_norm": 0.8704243898391724, "learning_rate": 6.9814395399599816e-06, "loss": 0.1558685302734375, "step": 10218 }, { "epoch": 0.6907530079762065, "grad_norm": 1.4200165271759033, "learning_rate": 6.9786512226381725e-06, "loss": 0.236236572265625, "step": 10219 }, { "epoch": 0.6908206029471408, "grad_norm": 1.165279746055603, "learning_rate": 6.975863293439115e-06, "loss": 0.267578125, "step": 10220 }, { "epoch": 0.6908881979180749, "grad_norm": 0.8640837073326111, "learning_rate": 6.97307575249768e-06, "loss": 0.167022705078125, "step": 10221 }, { "epoch": 0.6909557928890091, "grad_norm": 1.3329755067825317, "learning_rate": 6.970288599948762e-06, "loss": 0.260955810546875, "step": 10222 }, { "epoch": 0.6910233878599432, "grad_norm": 1.0956947803497314, "learning_rate": 6.9675018359272205e-06, "loss": 0.168731689453125, "step": 10223 }, { "epoch": 0.6910909828308773, "grad_norm": 1.706919550895691, "learning_rate": 6.964715460567895e-06, "loss": 0.324066162109375, "step": 10224 }, { "epoch": 0.6911585778018116, "grad_norm": 1.7661094665527344, "learning_rate": 6.961929474005605e-06, "loss": 0.237548828125, "step": 10225 }, { "epoch": 0.6912261727727457, "grad_norm": 1.3072893619537354, "learning_rate": 6.9591438763751555e-06, "loss": 0.205169677734375, "step": 10226 }, { "epoch": 0.6912937677436799, "grad_norm": 1.4960415363311768, "learning_rate": 6.95635866781133e-06, "loss": 0.27490234375, "step": 10227 }, { "epoch": 0.691361362714614, "grad_norm": 2.036182165145874, "learning_rate": 6.953573848448895e-06, "loss": 0.340087890625, "step": 10228 }, { "epoch": 0.6914289576855482, "grad_norm": 1.0159549713134766, "learning_rate": 6.950789418422598e-06, "loss": 0.218170166015625, "step": 10229 }, { "epoch": 0.6914965526564824, "grad_norm": 1.2838391065597534, "learning_rate": 6.948005377867166e-06, "loss": 0.19097900390625, "step": 10230 }, { "epoch": 0.6915641476274165, "grad_norm": 1.1421988010406494, "learning_rate": 6.94522172691731e-06, "loss": 0.1794281005859375, "step": 10231 }, { "epoch": 0.6916317425983507, "grad_norm": 1.1079330444335938, "learning_rate": 6.942438465707712e-06, "loss": 0.170440673828125, "step": 10232 }, { "epoch": 0.6916993375692848, "grad_norm": 1.3842182159423828, "learning_rate": 6.939655594373061e-06, "loss": 0.233123779296875, "step": 10233 }, { "epoch": 0.691766932540219, "grad_norm": 1.6972473859786987, "learning_rate": 6.93687311304799e-06, "loss": 0.24481201171875, "step": 10234 }, { "epoch": 0.6918345275111532, "grad_norm": 2.0423641204833984, "learning_rate": 6.934091021867146e-06, "loss": 0.2250213623046875, "step": 10235 }, { "epoch": 0.6919021224820874, "grad_norm": 1.4562829732894897, "learning_rate": 6.93130932096514e-06, "loss": 0.2777099609375, "step": 10236 }, { "epoch": 0.6919697174530215, "grad_norm": 1.0781017541885376, "learning_rate": 6.928528010476568e-06, "loss": 0.2685546875, "step": 10237 }, { "epoch": 0.6920373124239556, "grad_norm": 1.2944868803024292, "learning_rate": 6.9257470905360075e-06, "loss": 0.2608642578125, "step": 10238 }, { "epoch": 0.6921049073948898, "grad_norm": 1.6336711645126343, "learning_rate": 6.922966561278014e-06, "loss": 0.1837921142578125, "step": 10239 }, { "epoch": 0.692172502365824, "grad_norm": 0.9275354146957397, "learning_rate": 6.92018642283713e-06, "loss": 0.1456146240234375, "step": 10240 }, { "epoch": 0.6922400973367582, "grad_norm": 1.476407527923584, "learning_rate": 6.9174066753478745e-06, "loss": 0.28424072265625, "step": 10241 }, { "epoch": 0.6923076923076923, "grad_norm": 1.0486732721328735, "learning_rate": 6.91462731894475e-06, "loss": 0.191864013671875, "step": 10242 }, { "epoch": 0.6923752872786265, "grad_norm": 0.9743675589561462, "learning_rate": 6.911848353762231e-06, "loss": 0.1746826171875, "step": 10243 }, { "epoch": 0.6924428822495606, "grad_norm": 0.8939890265464783, "learning_rate": 6.909069779934801e-06, "loss": 0.22564697265625, "step": 10244 }, { "epoch": 0.6925104772204947, "grad_norm": 1.7630841732025146, "learning_rate": 6.90629159759688e-06, "loss": 0.252960205078125, "step": 10245 }, { "epoch": 0.692578072191429, "grad_norm": 1.1248717308044434, "learning_rate": 6.903513806882916e-06, "loss": 0.2203369140625, "step": 10246 }, { "epoch": 0.6926456671623631, "grad_norm": 1.146496295928955, "learning_rate": 6.9007364079272956e-06, "loss": 0.202545166015625, "step": 10247 }, { "epoch": 0.6927132621332973, "grad_norm": 1.3302026987075806, "learning_rate": 6.897959400864423e-06, "loss": 0.277801513671875, "step": 10248 }, { "epoch": 0.6927808571042314, "grad_norm": 2.192016839981079, "learning_rate": 6.895182785828664e-06, "loss": 0.225494384765625, "step": 10249 }, { "epoch": 0.6928484520751657, "grad_norm": 2.185181140899658, "learning_rate": 6.892406562954356e-06, "loss": 0.27142333984375, "step": 10250 }, { "epoch": 0.6929160470460998, "grad_norm": 1.2852977514266968, "learning_rate": 6.889630732375844e-06, "loss": 0.27178955078125, "step": 10251 }, { "epoch": 0.6929836420170339, "grad_norm": 2.1053953170776367, "learning_rate": 6.886855294227435e-06, "loss": 0.38470458984375, "step": 10252 }, { "epoch": 0.6930512369879681, "grad_norm": 1.3777180910110474, "learning_rate": 6.884080248643422e-06, "loss": 0.2165679931640625, "step": 10253 }, { "epoch": 0.6931188319589022, "grad_norm": 0.5897243618965149, "learning_rate": 6.881305595758077e-06, "loss": 0.09844970703125, "step": 10254 }, { "epoch": 0.6931864269298365, "grad_norm": 1.5576955080032349, "learning_rate": 6.878531335705659e-06, "loss": 0.26593017578125, "step": 10255 }, { "epoch": 0.6932540219007706, "grad_norm": 0.9708219170570374, "learning_rate": 6.875757468620396e-06, "loss": 0.08555221557617188, "step": 10256 }, { "epoch": 0.6933216168717048, "grad_norm": 1.2972657680511475, "learning_rate": 6.872983994636523e-06, "loss": 0.1817169189453125, "step": 10257 }, { "epoch": 0.6933892118426389, "grad_norm": 0.9864192008972168, "learning_rate": 6.870210913888212e-06, "loss": 0.15948486328125, "step": 10258 }, { "epoch": 0.693456806813573, "grad_norm": 1.3458927869796753, "learning_rate": 6.867438226509668e-06, "loss": 0.28118896484375, "step": 10259 }, { "epoch": 0.6935244017845072, "grad_norm": 2.0090177059173584, "learning_rate": 6.864665932635029e-06, "loss": 0.277099609375, "step": 10260 }, { "epoch": 0.6935919967554414, "grad_norm": 1.3128212690353394, "learning_rate": 6.861894032398448e-06, "loss": 0.18896484375, "step": 10261 }, { "epoch": 0.6936595917263756, "grad_norm": 1.351604700088501, "learning_rate": 6.859122525934052e-06, "loss": 0.29217529296875, "step": 10262 }, { "epoch": 0.6937271866973097, "grad_norm": 1.3352274894714355, "learning_rate": 6.856351413375923e-06, "loss": 0.268707275390625, "step": 10263 }, { "epoch": 0.6937947816682439, "grad_norm": 1.1555993556976318, "learning_rate": 6.853580694858163e-06, "loss": 0.2197265625, "step": 10264 }, { "epoch": 0.693862376639178, "grad_norm": 1.6706427335739136, "learning_rate": 6.850810370514832e-06, "loss": 0.19110107421875, "step": 10265 }, { "epoch": 0.6939299716101122, "grad_norm": 1.128324270248413, "learning_rate": 6.848040440479975e-06, "loss": 0.213104248046875, "step": 10266 }, { "epoch": 0.6939975665810464, "grad_norm": 1.6590670347213745, "learning_rate": 6.845270904887617e-06, "loss": 0.234039306640625, "step": 10267 }, { "epoch": 0.6940651615519805, "grad_norm": 1.4140995740890503, "learning_rate": 6.842501763871766e-06, "loss": 0.2100677490234375, "step": 10268 }, { "epoch": 0.6941327565229147, "grad_norm": 1.0466364622116089, "learning_rate": 6.839733017566406e-06, "loss": 0.1920013427734375, "step": 10269 }, { "epoch": 0.6942003514938488, "grad_norm": 0.7567197680473328, "learning_rate": 6.836964666105521e-06, "loss": 0.0859527587890625, "step": 10270 }, { "epoch": 0.6942679464647831, "grad_norm": 1.518445611000061, "learning_rate": 6.8341967096230405e-06, "loss": 0.277679443359375, "step": 10271 }, { "epoch": 0.6943355414357172, "grad_norm": 1.2987785339355469, "learning_rate": 6.831429148252911e-06, "loss": 0.27667236328125, "step": 10272 }, { "epoch": 0.6944031364066513, "grad_norm": 0.8996280431747437, "learning_rate": 6.8286619821290445e-06, "loss": 0.13813018798828125, "step": 10273 }, { "epoch": 0.6944707313775855, "grad_norm": 1.2094595432281494, "learning_rate": 6.825895211385318e-06, "loss": 0.2325897216796875, "step": 10274 }, { "epoch": 0.6945383263485196, "grad_norm": 1.192438006401062, "learning_rate": 6.823128836155624e-06, "loss": 0.2452392578125, "step": 10275 }, { "epoch": 0.6946059213194539, "grad_norm": 0.5882506966590881, "learning_rate": 6.8203628565737995e-06, "loss": 0.0927734375, "step": 10276 }, { "epoch": 0.694673516290388, "grad_norm": 0.5823749899864197, "learning_rate": 6.817597272773693e-06, "loss": 0.0769500732421875, "step": 10277 }, { "epoch": 0.6947411112613222, "grad_norm": 1.4055122137069702, "learning_rate": 6.814832084889118e-06, "loss": 0.277008056640625, "step": 10278 }, { "epoch": 0.6948087062322563, "grad_norm": 1.9913008213043213, "learning_rate": 6.812067293053868e-06, "loss": 0.264404296875, "step": 10279 }, { "epoch": 0.6948763012031904, "grad_norm": 0.9200242161750793, "learning_rate": 6.809302897401723e-06, "loss": 0.14838409423828125, "step": 10280 }, { "epoch": 0.6949438961741247, "grad_norm": 1.1550986766815186, "learning_rate": 6.806538898066443e-06, "loss": 0.229705810546875, "step": 10281 }, { "epoch": 0.6950114911450588, "grad_norm": 1.32150399684906, "learning_rate": 6.803775295181758e-06, "loss": 0.291839599609375, "step": 10282 }, { "epoch": 0.695079086115993, "grad_norm": 1.3441095352172852, "learning_rate": 6.801012088881407e-06, "loss": 0.248291015625, "step": 10283 }, { "epoch": 0.6951466810869271, "grad_norm": 1.275460958480835, "learning_rate": 6.798249279299076e-06, "loss": 0.234649658203125, "step": 10284 }, { "epoch": 0.6952142760578613, "grad_norm": 1.2643977403640747, "learning_rate": 6.795486866568444e-06, "loss": 0.29071044921875, "step": 10285 }, { "epoch": 0.6952818710287955, "grad_norm": 1.1798278093338013, "learning_rate": 6.792724850823192e-06, "loss": 0.2591552734375, "step": 10286 }, { "epoch": 0.6953494659997296, "grad_norm": 1.50374174118042, "learning_rate": 6.789963232196943e-06, "loss": 0.2318115234375, "step": 10287 }, { "epoch": 0.6954170609706638, "grad_norm": 1.6681355237960815, "learning_rate": 6.787202010823335e-06, "loss": 0.31298828125, "step": 10288 }, { "epoch": 0.6954846559415979, "grad_norm": 1.767149806022644, "learning_rate": 6.784441186835967e-06, "loss": 0.214508056640625, "step": 10289 }, { "epoch": 0.6955522509125321, "grad_norm": 1.364267349243164, "learning_rate": 6.7816807603684275e-06, "loss": 0.292816162109375, "step": 10290 }, { "epoch": 0.6956198458834663, "grad_norm": 1.1985864639282227, "learning_rate": 6.778920731554283e-06, "loss": 0.22515869140625, "step": 10291 }, { "epoch": 0.6956874408544005, "grad_norm": 0.9153357148170471, "learning_rate": 6.7761611005270785e-06, "loss": 0.2151031494140625, "step": 10292 }, { "epoch": 0.6957550358253346, "grad_norm": 0.8761605620384216, "learning_rate": 6.773401867420345e-06, "loss": 0.1084136962890625, "step": 10293 }, { "epoch": 0.6958226307962687, "grad_norm": 1.0306907892227173, "learning_rate": 6.770643032367587e-06, "loss": 0.220367431640625, "step": 10294 }, { "epoch": 0.6958902257672029, "grad_norm": 1.2021965980529785, "learning_rate": 6.767884595502294e-06, "loss": 0.21197509765625, "step": 10295 }, { "epoch": 0.6959578207381371, "grad_norm": 0.9968361258506775, "learning_rate": 6.765126556957949e-06, "loss": 0.17633056640625, "step": 10296 }, { "epoch": 0.6960254157090713, "grad_norm": 0.9483680725097656, "learning_rate": 6.7623689168679895e-06, "loss": 0.253997802734375, "step": 10297 }, { "epoch": 0.6960930106800054, "grad_norm": 1.2669587135314941, "learning_rate": 6.759611675365843e-06, "loss": 0.234771728515625, "step": 10298 }, { "epoch": 0.6961606056509396, "grad_norm": 1.2584228515625, "learning_rate": 6.756854832584943e-06, "loss": 0.284027099609375, "step": 10299 }, { "epoch": 0.6962282006218737, "grad_norm": 2.2037558555603027, "learning_rate": 6.754098388658658e-06, "loss": 0.28790283203125, "step": 10300 }, { "epoch": 0.6962957955928079, "grad_norm": 1.2053889036178589, "learning_rate": 6.7513423437203795e-06, "loss": 0.251373291015625, "step": 10301 }, { "epoch": 0.6963633905637421, "grad_norm": 3.416999578475952, "learning_rate": 6.748586697903456e-06, "loss": 0.319976806640625, "step": 10302 }, { "epoch": 0.6964309855346762, "grad_norm": 1.3510510921478271, "learning_rate": 6.7458314513412235e-06, "loss": 0.292724609375, "step": 10303 }, { "epoch": 0.6964985805056104, "grad_norm": 1.664483904838562, "learning_rate": 6.743076604166997e-06, "loss": 0.2779541015625, "step": 10304 }, { "epoch": 0.6965661754765445, "grad_norm": 0.8702383637428284, "learning_rate": 6.740322156514073e-06, "loss": 0.22332763671875, "step": 10305 }, { "epoch": 0.6966337704474788, "grad_norm": 1.4542982578277588, "learning_rate": 6.737568108515725e-06, "loss": 0.30450439453125, "step": 10306 }, { "epoch": 0.6967013654184129, "grad_norm": 1.0164265632629395, "learning_rate": 6.7348144603052264e-06, "loss": 0.168121337890625, "step": 10307 }, { "epoch": 0.696768960389347, "grad_norm": 1.1281684637069702, "learning_rate": 6.7320612120157995e-06, "loss": 0.224456787109375, "step": 10308 }, { "epoch": 0.6968365553602812, "grad_norm": 1.342549204826355, "learning_rate": 6.729308363780669e-06, "loss": 0.28936767578125, "step": 10309 }, { "epoch": 0.6969041503312153, "grad_norm": 2.1080732345581055, "learning_rate": 6.726555915733033e-06, "loss": 0.266571044921875, "step": 10310 }, { "epoch": 0.6969717453021496, "grad_norm": 1.2103309631347656, "learning_rate": 6.72380386800607e-06, "loss": 0.2679443359375, "step": 10311 }, { "epoch": 0.6970393402730837, "grad_norm": 1.1565808057785034, "learning_rate": 6.721052220732955e-06, "loss": 0.13604736328125, "step": 10312 }, { "epoch": 0.6971069352440179, "grad_norm": 1.6943588256835938, "learning_rate": 6.71830097404681e-06, "loss": 0.3309326171875, "step": 10313 }, { "epoch": 0.697174530214952, "grad_norm": 1.6198971271514893, "learning_rate": 6.7155501280807714e-06, "loss": 0.206756591796875, "step": 10314 }, { "epoch": 0.6972421251858861, "grad_norm": 1.458593487739563, "learning_rate": 6.712799682967938e-06, "loss": 0.2271728515625, "step": 10315 }, { "epoch": 0.6973097201568204, "grad_norm": 0.9208642840385437, "learning_rate": 6.710049638841394e-06, "loss": 0.16655731201171875, "step": 10316 }, { "epoch": 0.6973773151277545, "grad_norm": 1.3329213857650757, "learning_rate": 6.707299995834203e-06, "loss": 0.177947998046875, "step": 10317 }, { "epoch": 0.6974449100986887, "grad_norm": 1.2395412921905518, "learning_rate": 6.70455075407941e-06, "loss": 0.27069091796875, "step": 10318 }, { "epoch": 0.6975125050696228, "grad_norm": 1.8881925344467163, "learning_rate": 6.701801913710042e-06, "loss": 0.2214813232421875, "step": 10319 }, { "epoch": 0.6975801000405569, "grad_norm": 1.1967411041259766, "learning_rate": 6.699053474859101e-06, "loss": 0.2527008056640625, "step": 10320 }, { "epoch": 0.6976476950114912, "grad_norm": 0.5580734014511108, "learning_rate": 6.6963054376595785e-06, "loss": 0.09090042114257812, "step": 10321 }, { "epoch": 0.6977152899824253, "grad_norm": 1.0695194005966187, "learning_rate": 6.6935578022444365e-06, "loss": 0.28411865234375, "step": 10322 }, { "epoch": 0.6977828849533595, "grad_norm": 1.4795843362808228, "learning_rate": 6.690810568746626e-06, "loss": 0.2555694580078125, "step": 10323 }, { "epoch": 0.6978504799242936, "grad_norm": 0.9558562636375427, "learning_rate": 6.68806373729907e-06, "loss": 0.1627655029296875, "step": 10324 }, { "epoch": 0.6979180748952278, "grad_norm": 1.087212324142456, "learning_rate": 6.685317308034691e-06, "loss": 0.177032470703125, "step": 10325 }, { "epoch": 0.697985669866162, "grad_norm": 1.5729154348373413, "learning_rate": 6.682571281086359e-06, "loss": 0.31536865234375, "step": 10326 }, { "epoch": 0.6980532648370961, "grad_norm": 1.7223106622695923, "learning_rate": 6.679825656586959e-06, "loss": 0.217193603515625, "step": 10327 }, { "epoch": 0.6981208598080303, "grad_norm": 2.150812864303589, "learning_rate": 6.677080434669337e-06, "loss": 0.2808837890625, "step": 10328 }, { "epoch": 0.6981884547789644, "grad_norm": 1.2701395750045776, "learning_rate": 6.674335615466323e-06, "loss": 0.205535888671875, "step": 10329 }, { "epoch": 0.6982560497498986, "grad_norm": 1.4465413093566895, "learning_rate": 6.67159119911073e-06, "loss": 0.321746826171875, "step": 10330 }, { "epoch": 0.6983236447208327, "grad_norm": 1.539954662322998, "learning_rate": 6.6688471857353465e-06, "loss": 0.247406005859375, "step": 10331 }, { "epoch": 0.698391239691767, "grad_norm": 2.315941095352173, "learning_rate": 6.666103575472947e-06, "loss": 0.25482177734375, "step": 10332 }, { "epoch": 0.6984588346627011, "grad_norm": 1.1963093280792236, "learning_rate": 6.663360368456286e-06, "loss": 0.206878662109375, "step": 10333 }, { "epoch": 0.6985264296336352, "grad_norm": 1.6610149145126343, "learning_rate": 6.660617564818095e-06, "loss": 0.277435302734375, "step": 10334 }, { "epoch": 0.6985940246045694, "grad_norm": 1.4856083393096924, "learning_rate": 6.657875164691083e-06, "loss": 0.2247314453125, "step": 10335 }, { "epoch": 0.6986616195755035, "grad_norm": 0.9374023675918579, "learning_rate": 6.655133168207959e-06, "loss": 0.20526123046875, "step": 10336 }, { "epoch": 0.6987292145464378, "grad_norm": 0.9328019618988037, "learning_rate": 6.65239157550138e-06, "loss": 0.18743896484375, "step": 10337 }, { "epoch": 0.6987968095173719, "grad_norm": 1.5651476383209229, "learning_rate": 6.6496503867040185e-06, "loss": 0.299652099609375, "step": 10338 }, { "epoch": 0.6988644044883061, "grad_norm": 0.9515236020088196, "learning_rate": 6.646909601948492e-06, "loss": 0.130340576171875, "step": 10339 }, { "epoch": 0.6989319994592402, "grad_norm": 1.1379003524780273, "learning_rate": 6.64416922136743e-06, "loss": 0.21112060546875, "step": 10340 }, { "epoch": 0.6989995944301743, "grad_norm": 1.8783578872680664, "learning_rate": 6.6414292450934275e-06, "loss": 0.24639892578125, "step": 10341 }, { "epoch": 0.6990671894011086, "grad_norm": 1.3816099166870117, "learning_rate": 6.638689673259059e-06, "loss": 0.30145263671875, "step": 10342 }, { "epoch": 0.6991347843720427, "grad_norm": 0.7313968539237976, "learning_rate": 6.6359505059968815e-06, "loss": 0.125518798828125, "step": 10343 }, { "epoch": 0.6992023793429769, "grad_norm": 2.30112624168396, "learning_rate": 6.633211743439435e-06, "loss": 0.25323486328125, "step": 10344 }, { "epoch": 0.699269974313911, "grad_norm": 0.7019345760345459, "learning_rate": 6.630473385719237e-06, "loss": 0.1291351318359375, "step": 10345 }, { "epoch": 0.6993375692848453, "grad_norm": 1.3442693948745728, "learning_rate": 6.627735432968785e-06, "loss": 0.21099853515625, "step": 10346 }, { "epoch": 0.6994051642557794, "grad_norm": 1.7486528158187866, "learning_rate": 6.62499788532056e-06, "loss": 0.276824951171875, "step": 10347 }, { "epoch": 0.6994727592267135, "grad_norm": 0.39147505164146423, "learning_rate": 6.622260742907016e-06, "loss": 0.07918548583984375, "step": 10348 }, { "epoch": 0.6995403541976477, "grad_norm": 1.1305748224258423, "learning_rate": 6.619524005860608e-06, "loss": 0.217681884765625, "step": 10349 }, { "epoch": 0.6996079491685818, "grad_norm": 0.9275104999542236, "learning_rate": 6.616787674313735e-06, "loss": 0.243133544921875, "step": 10350 }, { "epoch": 0.699675544139516, "grad_norm": 1.424800157546997, "learning_rate": 6.61405174839882e-06, "loss": 0.316009521484375, "step": 10351 }, { "epoch": 0.6997431391104502, "grad_norm": 1.121342420578003, "learning_rate": 6.611316228248222e-06, "loss": 0.2256622314453125, "step": 10352 }, { "epoch": 0.6998107340813844, "grad_norm": 1.38944411277771, "learning_rate": 6.608581113994319e-06, "loss": 0.28741455078125, "step": 10353 }, { "epoch": 0.6998783290523185, "grad_norm": 1.165103793144226, "learning_rate": 6.605846405769455e-06, "loss": 0.169891357421875, "step": 10354 }, { "epoch": 0.6999459240232526, "grad_norm": 1.7659488916397095, "learning_rate": 6.603112103705932e-06, "loss": 0.201904296875, "step": 10355 }, { "epoch": 0.7000135189941868, "grad_norm": 0.7644330859184265, "learning_rate": 6.60037820793607e-06, "loss": 0.12823486328125, "step": 10356 }, { "epoch": 0.700081113965121, "grad_norm": 1.578248381614685, "learning_rate": 6.597644718592148e-06, "loss": 0.228179931640625, "step": 10357 }, { "epoch": 0.7001487089360552, "grad_norm": 1.879407286643982, "learning_rate": 6.594911635806428e-06, "loss": 0.2821044921875, "step": 10358 }, { "epoch": 0.7002163039069893, "grad_norm": 1.867336630821228, "learning_rate": 6.592178959711153e-06, "loss": 0.25341796875, "step": 10359 }, { "epoch": 0.7002838988779235, "grad_norm": 1.1934927701950073, "learning_rate": 6.5894466904385485e-06, "loss": 0.20056915283203125, "step": 10360 }, { "epoch": 0.7003514938488576, "grad_norm": 1.0098294019699097, "learning_rate": 6.5867148281208114e-06, "loss": 0.179901123046875, "step": 10361 }, { "epoch": 0.7004190888197918, "grad_norm": 1.6493310928344727, "learning_rate": 6.583983372890144e-06, "loss": 0.25921630859375, "step": 10362 }, { "epoch": 0.700486683790726, "grad_norm": 1.262853741645813, "learning_rate": 6.581252324878689e-06, "loss": 0.285491943359375, "step": 10363 }, { "epoch": 0.7005542787616601, "grad_norm": 0.9424628615379333, "learning_rate": 6.5785216842186074e-06, "loss": 0.17615509033203125, "step": 10364 }, { "epoch": 0.7006218737325943, "grad_norm": 0.7579466104507446, "learning_rate": 6.575791451042019e-06, "loss": 0.17767333984375, "step": 10365 }, { "epoch": 0.7006894687035284, "grad_norm": 1.388529658317566, "learning_rate": 6.573061625481031e-06, "loss": 0.261962890625, "step": 10366 }, { "epoch": 0.7007570636744627, "grad_norm": 2.079777717590332, "learning_rate": 6.570332207667732e-06, "loss": 0.279144287109375, "step": 10367 }, { "epoch": 0.7008246586453968, "grad_norm": 1.2218410968780518, "learning_rate": 6.567603197734174e-06, "loss": 0.27886962890625, "step": 10368 }, { "epoch": 0.7008922536163309, "grad_norm": 1.085287094116211, "learning_rate": 6.564874595812419e-06, "loss": 0.21826171875, "step": 10369 }, { "epoch": 0.7009598485872651, "grad_norm": 0.5641934871673584, "learning_rate": 6.562146402034489e-06, "loss": 0.08612823486328125, "step": 10370 }, { "epoch": 0.7010274435581992, "grad_norm": 0.9474499225616455, "learning_rate": 6.559418616532391e-06, "loss": 0.18634033203125, "step": 10371 }, { "epoch": 0.7010950385291335, "grad_norm": 1.5460057258605957, "learning_rate": 6.556691239438111e-06, "loss": 0.296539306640625, "step": 10372 }, { "epoch": 0.7011626335000676, "grad_norm": 1.651962399482727, "learning_rate": 6.553964270883618e-06, "loss": 0.3016357421875, "step": 10373 }, { "epoch": 0.7012302284710018, "grad_norm": 0.966573178768158, "learning_rate": 6.5512377110008514e-06, "loss": 0.2357177734375, "step": 10374 }, { "epoch": 0.7012978234419359, "grad_norm": 1.6592000722885132, "learning_rate": 6.54851155992176e-06, "loss": 0.32562255859375, "step": 10375 }, { "epoch": 0.70136541841287, "grad_norm": 0.78684002161026, "learning_rate": 6.545785817778227e-06, "loss": 0.1209259033203125, "step": 10376 }, { "epoch": 0.7014330133838043, "grad_norm": 0.7924097776412964, "learning_rate": 6.543060484702158e-06, "loss": 0.134979248046875, "step": 10377 }, { "epoch": 0.7015006083547384, "grad_norm": 0.7775483131408691, "learning_rate": 6.540335560825419e-06, "loss": 0.139129638671875, "step": 10378 }, { "epoch": 0.7015682033256726, "grad_norm": 1.428910732269287, "learning_rate": 6.537611046279847e-06, "loss": 0.294219970703125, "step": 10379 }, { "epoch": 0.7016357982966067, "grad_norm": 1.1931971311569214, "learning_rate": 6.534886941197285e-06, "loss": 0.29669189453125, "step": 10380 }, { "epoch": 0.7017033932675409, "grad_norm": 1.071344017982483, "learning_rate": 6.532163245709535e-06, "loss": 0.1998138427734375, "step": 10381 }, { "epoch": 0.7017709882384751, "grad_norm": 1.8073756694793701, "learning_rate": 6.52943995994839e-06, "loss": 0.2246551513671875, "step": 10382 }, { "epoch": 0.7018385832094092, "grad_norm": 1.8540574312210083, "learning_rate": 6.526717084045617e-06, "loss": 0.2547607421875, "step": 10383 }, { "epoch": 0.7019061781803434, "grad_norm": 1.34348464012146, "learning_rate": 6.5239946181329675e-06, "loss": 0.298095703125, "step": 10384 }, { "epoch": 0.7019737731512775, "grad_norm": 1.4414703845977783, "learning_rate": 6.521272562342167e-06, "loss": 0.297882080078125, "step": 10385 }, { "epoch": 0.7020413681222117, "grad_norm": 1.4052528142929077, "learning_rate": 6.51855091680493e-06, "loss": 0.311767578125, "step": 10386 }, { "epoch": 0.7021089630931459, "grad_norm": 2.1414647102355957, "learning_rate": 6.515829681652941e-06, "loss": 0.2833251953125, "step": 10387 }, { "epoch": 0.7021765580640801, "grad_norm": 0.5910675525665283, "learning_rate": 6.513108857017886e-06, "loss": 0.11127471923828125, "step": 10388 }, { "epoch": 0.7022441530350142, "grad_norm": 0.9277917742729187, "learning_rate": 6.510388443031396e-06, "loss": 0.12763023376464844, "step": 10389 }, { "epoch": 0.7023117480059483, "grad_norm": 0.7989482283592224, "learning_rate": 6.507668439825106e-06, "loss": 0.09309005737304688, "step": 10390 }, { "epoch": 0.7023793429768825, "grad_norm": 0.9482844471931458, "learning_rate": 6.50494884753064e-06, "loss": 0.2005462646484375, "step": 10391 }, { "epoch": 0.7024469379478167, "grad_norm": 1.0791901350021362, "learning_rate": 6.5022296662795675e-06, "loss": 0.23126220703125, "step": 10392 }, { "epoch": 0.7025145329187509, "grad_norm": 1.5873730182647705, "learning_rate": 6.499510896203477e-06, "loss": 0.26220703125, "step": 10393 }, { "epoch": 0.702582127889685, "grad_norm": 1.565035343170166, "learning_rate": 6.496792537433913e-06, "loss": 0.17449951171875, "step": 10394 }, { "epoch": 0.7026497228606192, "grad_norm": 0.7426137924194336, "learning_rate": 6.494074590102408e-06, "loss": 0.079254150390625, "step": 10395 }, { "epoch": 0.7027173178315533, "grad_norm": 0.9977294206619263, "learning_rate": 6.4913570543404705e-06, "loss": 0.17649078369140625, "step": 10396 }, { "epoch": 0.7027849128024874, "grad_norm": 2.2865588665008545, "learning_rate": 6.4886399302795935e-06, "loss": 0.2400665283203125, "step": 10397 }, { "epoch": 0.7028525077734217, "grad_norm": 1.3508002758026123, "learning_rate": 6.48592321805125e-06, "loss": 0.2989501953125, "step": 10398 }, { "epoch": 0.7029201027443558, "grad_norm": 1.306593656539917, "learning_rate": 6.483206917786887e-06, "loss": 0.223419189453125, "step": 10399 }, { "epoch": 0.70298769771529, "grad_norm": 1.1878242492675781, "learning_rate": 6.4804910296179355e-06, "loss": 0.1910247802734375, "step": 10400 }, { "epoch": 0.7030552926862241, "grad_norm": 1.4965211153030396, "learning_rate": 6.47777555367582e-06, "loss": 0.232574462890625, "step": 10401 }, { "epoch": 0.7031228876571584, "grad_norm": 2.0926244258880615, "learning_rate": 6.475060490091917e-06, "loss": 0.25830078125, "step": 10402 }, { "epoch": 0.7031904826280925, "grad_norm": 1.5484782457351685, "learning_rate": 6.4723458389975985e-06, "loss": 0.29595947265625, "step": 10403 }, { "epoch": 0.7032580775990266, "grad_norm": 1.4472779035568237, "learning_rate": 6.469631600524231e-06, "loss": 0.275146484375, "step": 10404 }, { "epoch": 0.7033256725699608, "grad_norm": 1.1670461893081665, "learning_rate": 6.466917774803128e-06, "loss": 0.2623443603515625, "step": 10405 }, { "epoch": 0.7033932675408949, "grad_norm": 1.8094688653945923, "learning_rate": 6.464204361965614e-06, "loss": 0.2914276123046875, "step": 10406 }, { "epoch": 0.7034608625118292, "grad_norm": 1.3547660112380981, "learning_rate": 6.461491362142976e-06, "loss": 0.26104736328125, "step": 10407 }, { "epoch": 0.7035284574827633, "grad_norm": 3.323798179626465, "learning_rate": 6.458778775466486e-06, "loss": 0.432952880859375, "step": 10408 }, { "epoch": 0.7035960524536975, "grad_norm": 2.083019495010376, "learning_rate": 6.456066602067396e-06, "loss": 0.27484130859375, "step": 10409 }, { "epoch": 0.7036636474246316, "grad_norm": 1.1057970523834229, "learning_rate": 6.453354842076939e-06, "loss": 0.1993865966796875, "step": 10410 }, { "epoch": 0.7037312423955657, "grad_norm": 0.8297266960144043, "learning_rate": 6.45064349562632e-06, "loss": 0.1572418212890625, "step": 10411 }, { "epoch": 0.7037988373665, "grad_norm": 1.6316370964050293, "learning_rate": 6.447932562846748e-06, "loss": 0.288787841796875, "step": 10412 }, { "epoch": 0.7038664323374341, "grad_norm": 1.0877283811569214, "learning_rate": 6.445222043869378e-06, "loss": 0.279052734375, "step": 10413 }, { "epoch": 0.7039340273083683, "grad_norm": 2.3558316230773926, "learning_rate": 6.4425119388253655e-06, "loss": 0.266387939453125, "step": 10414 }, { "epoch": 0.7040016222793024, "grad_norm": 0.8011128306388855, "learning_rate": 6.439802247845845e-06, "loss": 0.18134307861328125, "step": 10415 }, { "epoch": 0.7040692172502366, "grad_norm": 1.4985718727111816, "learning_rate": 6.437092971061921e-06, "loss": 0.29290771484375, "step": 10416 }, { "epoch": 0.7041368122211707, "grad_norm": 0.9749922752380371, "learning_rate": 6.434384108604702e-06, "loss": 0.21685791015625, "step": 10417 }, { "epoch": 0.7042044071921049, "grad_norm": 0.9956938624382019, "learning_rate": 6.43167566060524e-06, "loss": 0.194305419921875, "step": 10418 }, { "epoch": 0.7042720021630391, "grad_norm": 0.6730072498321533, "learning_rate": 6.428967627194598e-06, "loss": 0.17572021484375, "step": 10419 }, { "epoch": 0.7043395971339732, "grad_norm": 1.1312886476516724, "learning_rate": 6.426260008503806e-06, "loss": 0.1948089599609375, "step": 10420 }, { "epoch": 0.7044071921049074, "grad_norm": 1.2537881135940552, "learning_rate": 6.423552804663875e-06, "loss": 0.16295433044433594, "step": 10421 }, { "epoch": 0.7044747870758415, "grad_norm": 0.999426543712616, "learning_rate": 6.420846015805795e-06, "loss": 0.26568603515625, "step": 10422 }, { "epoch": 0.7045423820467758, "grad_norm": 0.8452553153038025, "learning_rate": 6.4181396420605375e-06, "loss": 0.1505584716796875, "step": 10423 }, { "epoch": 0.7046099770177099, "grad_norm": 1.173523187637329, "learning_rate": 6.415433683559054e-06, "loss": 0.2916259765625, "step": 10424 }, { "epoch": 0.704677571988644, "grad_norm": 1.1284980773925781, "learning_rate": 6.412728140432278e-06, "loss": 0.232208251953125, "step": 10425 }, { "epoch": 0.7047451669595782, "grad_norm": 1.5126127004623413, "learning_rate": 6.410023012811115e-06, "loss": 0.26043701171875, "step": 10426 }, { "epoch": 0.7048127619305123, "grad_norm": 1.4888501167297363, "learning_rate": 6.407318300826457e-06, "loss": 0.2513427734375, "step": 10427 }, { "epoch": 0.7048803569014466, "grad_norm": 1.0734630823135376, "learning_rate": 6.404614004609185e-06, "loss": 0.1931610107421875, "step": 10428 }, { "epoch": 0.7049479518723807, "grad_norm": 0.9947347044944763, "learning_rate": 6.401910124290132e-06, "loss": 0.184234619140625, "step": 10429 }, { "epoch": 0.7050155468433149, "grad_norm": 0.9548178911209106, "learning_rate": 6.39920666000015e-06, "loss": 0.218353271484375, "step": 10430 }, { "epoch": 0.705083141814249, "grad_norm": 1.0011297464370728, "learning_rate": 6.396503611870025e-06, "loss": 0.2064056396484375, "step": 10431 }, { "epoch": 0.7051507367851831, "grad_norm": 1.545825719833374, "learning_rate": 6.3938009800305654e-06, "loss": 0.328125, "step": 10432 }, { "epoch": 0.7052183317561174, "grad_norm": 0.9686801433563232, "learning_rate": 6.391098764612536e-06, "loss": 0.2125091552734375, "step": 10433 }, { "epoch": 0.7052859267270515, "grad_norm": 1.1775588989257812, "learning_rate": 6.388396965746687e-06, "loss": 0.27325439453125, "step": 10434 }, { "epoch": 0.7053535216979857, "grad_norm": 1.6596753597259521, "learning_rate": 6.385695583563747e-06, "loss": 0.2659912109375, "step": 10435 }, { "epoch": 0.7054211166689198, "grad_norm": 1.4863955974578857, "learning_rate": 6.382994618194427e-06, "loss": 0.3173828125, "step": 10436 }, { "epoch": 0.705488711639854, "grad_norm": 1.1920785903930664, "learning_rate": 6.380294069769416e-06, "loss": 0.295257568359375, "step": 10437 }, { "epoch": 0.7055563066107882, "grad_norm": 0.9903531074523926, "learning_rate": 6.3775939384193825e-06, "loss": 0.186737060546875, "step": 10438 }, { "epoch": 0.7056239015817223, "grad_norm": 1.330058217048645, "learning_rate": 6.374894224274977e-06, "loss": 0.2755126953125, "step": 10439 }, { "epoch": 0.7056914965526565, "grad_norm": 1.1656162738800049, "learning_rate": 6.372194927466822e-06, "loss": 0.273681640625, "step": 10440 }, { "epoch": 0.7057590915235906, "grad_norm": 1.3961787223815918, "learning_rate": 6.369496048125544e-06, "loss": 0.2266998291015625, "step": 10441 }, { "epoch": 0.7058266864945248, "grad_norm": 0.7983741164207458, "learning_rate": 6.366797586381709e-06, "loss": 0.1640472412109375, "step": 10442 }, { "epoch": 0.705894281465459, "grad_norm": 1.16058349609375, "learning_rate": 6.3640995423659055e-06, "loss": 0.1801605224609375, "step": 10443 }, { "epoch": 0.7059618764363932, "grad_norm": 2.2304935455322266, "learning_rate": 6.3614019162086636e-06, "loss": 0.290252685546875, "step": 10444 }, { "epoch": 0.7060294714073273, "grad_norm": 1.6048779487609863, "learning_rate": 6.358704708040523e-06, "loss": 0.2794189453125, "step": 10445 }, { "epoch": 0.7060970663782614, "grad_norm": 1.9678891897201538, "learning_rate": 6.356007917991989e-06, "loss": 0.2977447509765625, "step": 10446 }, { "epoch": 0.7061646613491956, "grad_norm": 2.425837516784668, "learning_rate": 6.353311546193548e-06, "loss": 0.31341552734375, "step": 10447 }, { "epoch": 0.7062322563201298, "grad_norm": 1.1892125606536865, "learning_rate": 6.350615592775668e-06, "loss": 0.1981201171875, "step": 10448 }, { "epoch": 0.706299851291064, "grad_norm": 0.9846500158309937, "learning_rate": 6.347920057868796e-06, "loss": 0.1565704345703125, "step": 10449 }, { "epoch": 0.7063674462619981, "grad_norm": 1.2201396226882935, "learning_rate": 6.345224941603358e-06, "loss": 0.2493896484375, "step": 10450 }, { "epoch": 0.7064350412329322, "grad_norm": 1.2890008687973022, "learning_rate": 6.342530244109761e-06, "loss": 0.27044677734375, "step": 10451 }, { "epoch": 0.7065026362038664, "grad_norm": 1.162806510925293, "learning_rate": 6.33983596551839e-06, "loss": 0.2098388671875, "step": 10452 }, { "epoch": 0.7065702311748006, "grad_norm": 1.505527138710022, "learning_rate": 6.337142105959608e-06, "loss": 0.2119140625, "step": 10453 }, { "epoch": 0.7066378261457348, "grad_norm": 1.6552413702011108, "learning_rate": 6.3344486655637755e-06, "loss": 0.2550201416015625, "step": 10454 }, { "epoch": 0.7067054211166689, "grad_norm": 1.4327956438064575, "learning_rate": 6.331755644461194e-06, "loss": 0.251953125, "step": 10455 }, { "epoch": 0.7067730160876031, "grad_norm": 1.4028037786483765, "learning_rate": 6.329063042782189e-06, "loss": 0.266815185546875, "step": 10456 }, { "epoch": 0.7068406110585372, "grad_norm": 1.4596234560012817, "learning_rate": 6.3263708606570365e-06, "loss": 0.2891845703125, "step": 10457 }, { "epoch": 0.7069082060294714, "grad_norm": 2.4349875450134277, "learning_rate": 6.323679098216004e-06, "loss": 0.28564453125, "step": 10458 }, { "epoch": 0.7069758010004056, "grad_norm": 1.5134034156799316, "learning_rate": 6.32098775558934e-06, "loss": 0.1636962890625, "step": 10459 }, { "epoch": 0.7070433959713397, "grad_norm": 0.7957103848457336, "learning_rate": 6.318296832907251e-06, "loss": 0.16009521484375, "step": 10460 }, { "epoch": 0.7071109909422739, "grad_norm": 1.1021287441253662, "learning_rate": 6.315606330299957e-06, "loss": 0.271270751953125, "step": 10461 }, { "epoch": 0.707178585913208, "grad_norm": 1.2250458002090454, "learning_rate": 6.3129162478976376e-06, "loss": 0.298828125, "step": 10462 }, { "epoch": 0.7072461808841423, "grad_norm": 0.7515692114830017, "learning_rate": 6.3102265858304535e-06, "loss": 0.1601104736328125, "step": 10463 }, { "epoch": 0.7073137758550764, "grad_norm": 2.158801794052124, "learning_rate": 6.307537344228547e-06, "loss": 0.2202911376953125, "step": 10464 }, { "epoch": 0.7073813708260105, "grad_norm": 1.1417415142059326, "learning_rate": 6.304848523222045e-06, "loss": 0.328887939453125, "step": 10465 }, { "epoch": 0.7074489657969447, "grad_norm": 1.5772572755813599, "learning_rate": 6.302160122941039e-06, "loss": 0.29315185546875, "step": 10466 }, { "epoch": 0.7075165607678788, "grad_norm": 2.210991382598877, "learning_rate": 6.299472143515627e-06, "loss": 0.2613525390625, "step": 10467 }, { "epoch": 0.7075841557388131, "grad_norm": 1.197177529335022, "learning_rate": 6.296784585075851e-06, "loss": 0.19654464721679688, "step": 10468 }, { "epoch": 0.7076517507097472, "grad_norm": 1.46853768825531, "learning_rate": 6.294097447751767e-06, "loss": 0.2530517578125, "step": 10469 }, { "epoch": 0.7077193456806814, "grad_norm": 1.1675564050674438, "learning_rate": 6.291410731673389e-06, "loss": 0.2857666015625, "step": 10470 }, { "epoch": 0.7077869406516155, "grad_norm": 0.9376272559165955, "learning_rate": 6.288724436970721e-06, "loss": 0.1986083984375, "step": 10471 }, { "epoch": 0.7078545356225496, "grad_norm": 1.0376473665237427, "learning_rate": 6.2860385637737365e-06, "loss": 0.212493896484375, "step": 10472 }, { "epoch": 0.7079221305934839, "grad_norm": 0.8448933959007263, "learning_rate": 6.283353112212398e-06, "loss": 0.21673583984375, "step": 10473 }, { "epoch": 0.707989725564418, "grad_norm": 0.8411234021186829, "learning_rate": 6.280668082416644e-06, "loss": 0.15401458740234375, "step": 10474 }, { "epoch": 0.7080573205353522, "grad_norm": 1.5227279663085938, "learning_rate": 6.277983474516395e-06, "loss": 0.23992919921875, "step": 10475 }, { "epoch": 0.7081249155062863, "grad_norm": 0.8765318989753723, "learning_rate": 6.275299288641547e-06, "loss": 0.228851318359375, "step": 10476 }, { "epoch": 0.7081925104772205, "grad_norm": 0.944529116153717, "learning_rate": 6.272615524921976e-06, "loss": 0.219024658203125, "step": 10477 }, { "epoch": 0.7082601054481547, "grad_norm": 0.6339702606201172, "learning_rate": 6.2699321834875414e-06, "loss": 0.0740966796875, "step": 10478 }, { "epoch": 0.7083277004190888, "grad_norm": 1.508961796760559, "learning_rate": 6.267249264468075e-06, "loss": 0.245208740234375, "step": 10479 }, { "epoch": 0.708395295390023, "grad_norm": 1.4928536415100098, "learning_rate": 6.264566767993407e-06, "loss": 0.174285888671875, "step": 10480 }, { "epoch": 0.7084628903609571, "grad_norm": 1.2446688413619995, "learning_rate": 6.261884694193316e-06, "loss": 0.282928466796875, "step": 10481 }, { "epoch": 0.7085304853318913, "grad_norm": 1.133617639541626, "learning_rate": 6.259203043197587e-06, "loss": 0.22894287109375, "step": 10482 }, { "epoch": 0.7085980803028255, "grad_norm": 0.9494367241859436, "learning_rate": 6.2565218151359795e-06, "loss": 0.2567138671875, "step": 10483 }, { "epoch": 0.7086656752737597, "grad_norm": 2.0193979740142822, "learning_rate": 6.253841010138213e-06, "loss": 0.24542236328125, "step": 10484 }, { "epoch": 0.7087332702446938, "grad_norm": 1.3783588409423828, "learning_rate": 6.251160628334013e-06, "loss": 0.1895751953125, "step": 10485 }, { "epoch": 0.7088008652156279, "grad_norm": 1.1751554012298584, "learning_rate": 6.248480669853072e-06, "loss": 0.18331146240234375, "step": 10486 }, { "epoch": 0.7088684601865621, "grad_norm": 1.4814380407333374, "learning_rate": 6.245801134825061e-06, "loss": 0.29888916015625, "step": 10487 }, { "epoch": 0.7089360551574962, "grad_norm": 1.17531156539917, "learning_rate": 6.243122023379633e-06, "loss": 0.251220703125, "step": 10488 }, { "epoch": 0.7090036501284305, "grad_norm": 1.2391793727874756, "learning_rate": 6.240443335646418e-06, "loss": 0.2547607421875, "step": 10489 }, { "epoch": 0.7090712450993646, "grad_norm": 1.2649832963943481, "learning_rate": 6.2377650717550315e-06, "loss": 0.1788330078125, "step": 10490 }, { "epoch": 0.7091388400702988, "grad_norm": 1.1788914203643799, "learning_rate": 6.235087231835063e-06, "loss": 0.25177001953125, "step": 10491 }, { "epoch": 0.7092064350412329, "grad_norm": 1.0793043375015259, "learning_rate": 6.232409816016076e-06, "loss": 0.1798248291015625, "step": 10492 }, { "epoch": 0.709274030012167, "grad_norm": 0.7250484228134155, "learning_rate": 6.229732824427638e-06, "loss": 0.10965728759765625, "step": 10493 }, { "epoch": 0.7093416249831013, "grad_norm": 1.3552714586257935, "learning_rate": 6.227056257199258e-06, "loss": 0.278106689453125, "step": 10494 }, { "epoch": 0.7094092199540354, "grad_norm": 0.8897371292114258, "learning_rate": 6.224380114460459e-06, "loss": 0.257568359375, "step": 10495 }, { "epoch": 0.7094768149249696, "grad_norm": 1.6405909061431885, "learning_rate": 6.22170439634073e-06, "loss": 0.236572265625, "step": 10496 }, { "epoch": 0.7095444098959037, "grad_norm": 1.029051423072815, "learning_rate": 6.219029102969524e-06, "loss": 0.24212646484375, "step": 10497 }, { "epoch": 0.709612004866838, "grad_norm": 1.1704199314117432, "learning_rate": 6.216354234476304e-06, "loss": 0.2794189453125, "step": 10498 }, { "epoch": 0.7096795998377721, "grad_norm": 0.8909435272216797, "learning_rate": 6.213679790990489e-06, "loss": 0.1390380859375, "step": 10499 }, { "epoch": 0.7097471948087062, "grad_norm": 1.0677906274795532, "learning_rate": 6.211005772641491e-06, "loss": 0.209259033203125, "step": 10500 }, { "epoch": 0.7098147897796404, "grad_norm": 1.1184412240982056, "learning_rate": 6.208332179558689e-06, "loss": 0.18658447265625, "step": 10501 }, { "epoch": 0.7098823847505745, "grad_norm": 1.105013370513916, "learning_rate": 6.2056590118714514e-06, "loss": 0.20624160766601562, "step": 10502 }, { "epoch": 0.7099499797215088, "grad_norm": 1.256136417388916, "learning_rate": 6.202986269709117e-06, "loss": 0.2310791015625, "step": 10503 }, { "epoch": 0.7100175746924429, "grad_norm": 1.2395068407058716, "learning_rate": 6.200313953201025e-06, "loss": 0.19287109375, "step": 10504 }, { "epoch": 0.7100851696633771, "grad_norm": 1.3536959886550903, "learning_rate": 6.1976420624764595e-06, "loss": 0.260894775390625, "step": 10505 }, { "epoch": 0.7101527646343112, "grad_norm": 1.264070987701416, "learning_rate": 6.194970597664721e-06, "loss": 0.1735382080078125, "step": 10506 }, { "epoch": 0.7102203596052453, "grad_norm": 2.162914276123047, "learning_rate": 6.192299558895058e-06, "loss": 0.2536468505859375, "step": 10507 }, { "epoch": 0.7102879545761795, "grad_norm": 1.528185486793518, "learning_rate": 6.189628946296713e-06, "loss": 0.1952667236328125, "step": 10508 }, { "epoch": 0.7103555495471137, "grad_norm": 1.3142136335372925, "learning_rate": 6.186958759998921e-06, "loss": 0.20819091796875, "step": 10509 }, { "epoch": 0.7104231445180479, "grad_norm": 1.3593041896820068, "learning_rate": 6.18428900013086e-06, "loss": 0.27545166015625, "step": 10510 }, { "epoch": 0.710490739488982, "grad_norm": 0.9341634511947632, "learning_rate": 6.181619666821727e-06, "loss": 0.1837615966796875, "step": 10511 }, { "epoch": 0.7105583344599162, "grad_norm": 1.1041233539581299, "learning_rate": 6.178950760200676e-06, "loss": 0.2058258056640625, "step": 10512 }, { "epoch": 0.7106259294308503, "grad_norm": 1.2424002885818481, "learning_rate": 6.176282280396845e-06, "loss": 0.21392822265625, "step": 10513 }, { "epoch": 0.7106935244017845, "grad_norm": 1.0995328426361084, "learning_rate": 6.173614227539352e-06, "loss": 0.2061004638671875, "step": 10514 }, { "epoch": 0.7107611193727187, "grad_norm": 2.4016406536102295, "learning_rate": 6.170946601757293e-06, "loss": 0.342620849609375, "step": 10515 }, { "epoch": 0.7108287143436528, "grad_norm": 1.1376301050186157, "learning_rate": 6.168279403179738e-06, "loss": 0.1671295166015625, "step": 10516 }, { "epoch": 0.710896309314587, "grad_norm": 1.5170354843139648, "learning_rate": 6.16561263193576e-06, "loss": 0.2429046630859375, "step": 10517 }, { "epoch": 0.7109639042855211, "grad_norm": 1.0678507089614868, "learning_rate": 6.1629462881543795e-06, "loss": 0.249969482421875, "step": 10518 }, { "epoch": 0.7110314992564554, "grad_norm": 1.4162955284118652, "learning_rate": 6.16028037196461e-06, "loss": 0.278564453125, "step": 10519 }, { "epoch": 0.7110990942273895, "grad_norm": 1.1407641172409058, "learning_rate": 6.157614883495458e-06, "loss": 0.23394775390625, "step": 10520 }, { "epoch": 0.7111666891983236, "grad_norm": 1.0020054578781128, "learning_rate": 6.154949822875878e-06, "loss": 0.1621551513671875, "step": 10521 }, { "epoch": 0.7112342841692578, "grad_norm": 0.6965578198432922, "learning_rate": 6.1522851902348435e-06, "loss": 0.17132568359375, "step": 10522 }, { "epoch": 0.7113018791401919, "grad_norm": 1.739631175994873, "learning_rate": 6.149620985701265e-06, "loss": 0.276702880859375, "step": 10523 }, { "epoch": 0.7113694741111262, "grad_norm": 1.3337502479553223, "learning_rate": 6.146957209404068e-06, "loss": 0.28179931640625, "step": 10524 }, { "epoch": 0.7114370690820603, "grad_norm": 1.0752068758010864, "learning_rate": 6.144293861472137e-06, "loss": 0.264801025390625, "step": 10525 }, { "epoch": 0.7115046640529945, "grad_norm": 1.3354198932647705, "learning_rate": 6.141630942034342e-06, "loss": 0.243927001953125, "step": 10526 }, { "epoch": 0.7115722590239286, "grad_norm": 1.4595814943313599, "learning_rate": 6.138968451219532e-06, "loss": 0.2444000244140625, "step": 10527 }, { "epoch": 0.7116398539948627, "grad_norm": 1.6135268211364746, "learning_rate": 6.1363063891565344e-06, "loss": 0.24871826171875, "step": 10528 }, { "epoch": 0.711707448965797, "grad_norm": 0.4543502926826477, "learning_rate": 6.133644755974151e-06, "loss": 0.08386611938476562, "step": 10529 }, { "epoch": 0.7117750439367311, "grad_norm": 0.9632035493850708, "learning_rate": 6.130983551801185e-06, "loss": 0.221893310546875, "step": 10530 }, { "epoch": 0.7118426389076653, "grad_norm": 2.678575277328491, "learning_rate": 6.128322776766384e-06, "loss": 0.24224853515625, "step": 10531 }, { "epoch": 0.7119102338785994, "grad_norm": 1.2663109302520752, "learning_rate": 6.125662430998495e-06, "loss": 0.182464599609375, "step": 10532 }, { "epoch": 0.7119778288495336, "grad_norm": 1.1086331605911255, "learning_rate": 6.123002514626257e-06, "loss": 0.16107940673828125, "step": 10533 }, { "epoch": 0.7120454238204678, "grad_norm": 1.091773509979248, "learning_rate": 6.120343027778353e-06, "loss": 0.267669677734375, "step": 10534 }, { "epoch": 0.7121130187914019, "grad_norm": 1.8076272010803223, "learning_rate": 6.1176839705834845e-06, "loss": 0.234649658203125, "step": 10535 }, { "epoch": 0.7121806137623361, "grad_norm": 3.0660719871520996, "learning_rate": 6.1150253431702935e-06, "loss": 0.326507568359375, "step": 10536 }, { "epoch": 0.7122482087332702, "grad_norm": 1.7410476207733154, "learning_rate": 6.112367145667438e-06, "loss": 0.27618408203125, "step": 10537 }, { "epoch": 0.7123158037042044, "grad_norm": 0.7662447094917297, "learning_rate": 6.10970937820353e-06, "loss": 0.08740234375, "step": 10538 }, { "epoch": 0.7123833986751386, "grad_norm": 0.9527754783630371, "learning_rate": 6.107052040907172e-06, "loss": 0.17144775390625, "step": 10539 }, { "epoch": 0.7124509936460728, "grad_norm": 0.8793551921844482, "learning_rate": 6.10439513390694e-06, "loss": 0.183441162109375, "step": 10540 }, { "epoch": 0.7125185886170069, "grad_norm": 1.659074306488037, "learning_rate": 6.101738657331392e-06, "loss": 0.303192138671875, "step": 10541 }, { "epoch": 0.712586183587941, "grad_norm": 1.2491202354431152, "learning_rate": 6.0990826113090665e-06, "loss": 0.254852294921875, "step": 10542 }, { "epoch": 0.7126537785588752, "grad_norm": 1.3169313669204712, "learning_rate": 6.096426995968478e-06, "loss": 0.32647705078125, "step": 10543 }, { "epoch": 0.7127213735298094, "grad_norm": 2.0663795471191406, "learning_rate": 6.093771811438122e-06, "loss": 0.2450408935546875, "step": 10544 }, { "epoch": 0.7127889685007436, "grad_norm": 1.6428920030593872, "learning_rate": 6.091117057846468e-06, "loss": 0.2572021484375, "step": 10545 }, { "epoch": 0.7128565634716777, "grad_norm": 1.3146389722824097, "learning_rate": 6.0884627353219854e-06, "loss": 0.182769775390625, "step": 10546 }, { "epoch": 0.7129241584426119, "grad_norm": 1.322279453277588, "learning_rate": 6.085808843993086e-06, "loss": 0.247802734375, "step": 10547 }, { "epoch": 0.712991753413546, "grad_norm": 1.3424423933029175, "learning_rate": 6.0831553839881945e-06, "loss": 0.2428131103515625, "step": 10548 }, { "epoch": 0.7130593483844802, "grad_norm": 1.2433758974075317, "learning_rate": 6.080502355435702e-06, "loss": 0.267120361328125, "step": 10549 }, { "epoch": 0.7131269433554144, "grad_norm": 1.17608642578125, "learning_rate": 6.0778497584639715e-06, "loss": 0.19408035278320312, "step": 10550 }, { "epoch": 0.7131945383263485, "grad_norm": 2.2468326091766357, "learning_rate": 6.075197593201358e-06, "loss": 0.358123779296875, "step": 10551 }, { "epoch": 0.7132621332972827, "grad_norm": 0.9103673100471497, "learning_rate": 6.0725458597761874e-06, "loss": 0.1586761474609375, "step": 10552 }, { "epoch": 0.7133297282682168, "grad_norm": 2.609424352645874, "learning_rate": 6.069894558316768e-06, "loss": 0.33154296875, "step": 10553 }, { "epoch": 0.7133973232391511, "grad_norm": 1.6951674222946167, "learning_rate": 6.067243688951384e-06, "loss": 0.2183837890625, "step": 10554 }, { "epoch": 0.7134649182100852, "grad_norm": 1.6054060459136963, "learning_rate": 6.0645932518083054e-06, "loss": 0.181793212890625, "step": 10555 }, { "epoch": 0.7135325131810193, "grad_norm": 0.9106293320655823, "learning_rate": 6.061943247015771e-06, "loss": 0.25146484375, "step": 10556 }, { "epoch": 0.7136001081519535, "grad_norm": 1.5194206237792969, "learning_rate": 6.0592936747020085e-06, "loss": 0.27081298828125, "step": 10557 }, { "epoch": 0.7136677031228876, "grad_norm": 0.6673310399055481, "learning_rate": 6.056644534995215e-06, "loss": 0.0924224853515625, "step": 10558 }, { "epoch": 0.7137352980938219, "grad_norm": 0.9245324730873108, "learning_rate": 6.053995828023587e-06, "loss": 0.1567535400390625, "step": 10559 }, { "epoch": 0.713802893064756, "grad_norm": 1.5737013816833496, "learning_rate": 6.051347553915266e-06, "loss": 0.3125, "step": 10560 }, { "epoch": 0.7138704880356902, "grad_norm": 0.9705219864845276, "learning_rate": 6.048699712798406e-06, "loss": 0.16485595703125, "step": 10561 }, { "epoch": 0.7139380830066243, "grad_norm": 1.3946646451950073, "learning_rate": 6.046052304801121e-06, "loss": 0.1851043701171875, "step": 10562 }, { "epoch": 0.7140056779775584, "grad_norm": 1.4162626266479492, "learning_rate": 6.043405330051508e-06, "loss": 0.256866455078125, "step": 10563 }, { "epoch": 0.7140732729484927, "grad_norm": 1.188302993774414, "learning_rate": 6.040758788677648e-06, "loss": 0.20648193359375, "step": 10564 }, { "epoch": 0.7141408679194268, "grad_norm": 1.3211861848831177, "learning_rate": 6.038112680807594e-06, "loss": 0.26220703125, "step": 10565 }, { "epoch": 0.714208462890361, "grad_norm": 3.2221789360046387, "learning_rate": 6.035467006569382e-06, "loss": 0.334320068359375, "step": 10566 }, { "epoch": 0.7142760578612951, "grad_norm": 2.1184051036834717, "learning_rate": 6.032821766091027e-06, "loss": 0.346099853515625, "step": 10567 }, { "epoch": 0.7143436528322293, "grad_norm": 1.0919665098190308, "learning_rate": 6.0301769595005205e-06, "loss": 0.242156982421875, "step": 10568 }, { "epoch": 0.7144112478031635, "grad_norm": 0.9696438312530518, "learning_rate": 6.027532586925834e-06, "loss": 0.226409912109375, "step": 10569 }, { "epoch": 0.7144788427740976, "grad_norm": 1.5048235654830933, "learning_rate": 6.024888648494921e-06, "loss": 0.2009429931640625, "step": 10570 }, { "epoch": 0.7145464377450318, "grad_norm": 0.9440188407897949, "learning_rate": 6.022245144335705e-06, "loss": 0.17623138427734375, "step": 10571 }, { "epoch": 0.7146140327159659, "grad_norm": 1.324313759803772, "learning_rate": 6.019602074576113e-06, "loss": 0.217926025390625, "step": 10572 }, { "epoch": 0.7146816276869001, "grad_norm": 1.0962392091751099, "learning_rate": 6.01695943934401e-06, "loss": 0.16864013671875, "step": 10573 }, { "epoch": 0.7147492226578342, "grad_norm": 1.4547736644744873, "learning_rate": 6.01431723876728e-06, "loss": 0.25762939453125, "step": 10574 }, { "epoch": 0.7148168176287685, "grad_norm": 1.4081634283065796, "learning_rate": 6.011675472973762e-06, "loss": 0.2218475341796875, "step": 10575 }, { "epoch": 0.7148844125997026, "grad_norm": 1.7234362363815308, "learning_rate": 6.009034142091284e-06, "loss": 0.27374267578125, "step": 10576 }, { "epoch": 0.7149520075706367, "grad_norm": 1.3856050968170166, "learning_rate": 6.006393246247647e-06, "loss": 0.262054443359375, "step": 10577 }, { "epoch": 0.7150196025415709, "grad_norm": 1.3477760553359985, "learning_rate": 6.003752785570636e-06, "loss": 0.294219970703125, "step": 10578 }, { "epoch": 0.715087197512505, "grad_norm": 1.1828933954238892, "learning_rate": 6.001112760188012e-06, "loss": 0.146759033203125, "step": 10579 }, { "epoch": 0.7151547924834393, "grad_norm": 0.9379999041557312, "learning_rate": 5.9984731702275166e-06, "loss": 0.15477752685546875, "step": 10580 }, { "epoch": 0.7152223874543734, "grad_norm": 1.4459425210952759, "learning_rate": 5.99583401581687e-06, "loss": 0.269775390625, "step": 10581 }, { "epoch": 0.7152899824253075, "grad_norm": 1.5572326183319092, "learning_rate": 5.993195297083768e-06, "loss": 0.183258056640625, "step": 10582 }, { "epoch": 0.7153575773962417, "grad_norm": 1.1890758275985718, "learning_rate": 5.990557014155891e-06, "loss": 0.29541015625, "step": 10583 }, { "epoch": 0.7154251723671758, "grad_norm": 1.1929396390914917, "learning_rate": 5.987919167160889e-06, "loss": 0.26422119140625, "step": 10584 }, { "epoch": 0.7154927673381101, "grad_norm": 1.985094428062439, "learning_rate": 5.985281756226416e-06, "loss": 0.29449462890625, "step": 10585 }, { "epoch": 0.7155603623090442, "grad_norm": 1.6410655975341797, "learning_rate": 5.982644781480061e-06, "loss": 0.251800537109375, "step": 10586 }, { "epoch": 0.7156279572799784, "grad_norm": 1.4052506685256958, "learning_rate": 5.980008243049436e-06, "loss": 0.165435791015625, "step": 10587 }, { "epoch": 0.7156955522509125, "grad_norm": 1.7212936878204346, "learning_rate": 5.977372141062111e-06, "loss": 0.27777099609375, "step": 10588 }, { "epoch": 0.7157631472218466, "grad_norm": 1.8494815826416016, "learning_rate": 5.974736475645624e-06, "loss": 0.195404052734375, "step": 10589 }, { "epoch": 0.7158307421927809, "grad_norm": 1.1359206438064575, "learning_rate": 5.972101246927517e-06, "loss": 0.1641082763671875, "step": 10590 }, { "epoch": 0.715898337163715, "grad_norm": 1.13462495803833, "learning_rate": 5.969466455035298e-06, "loss": 0.21942138671875, "step": 10591 }, { "epoch": 0.7159659321346492, "grad_norm": 0.735317051410675, "learning_rate": 5.96683210009645e-06, "loss": 0.2216796875, "step": 10592 }, { "epoch": 0.7160335271055833, "grad_norm": 1.3005330562591553, "learning_rate": 5.964198182238443e-06, "loss": 0.249755859375, "step": 10593 }, { "epoch": 0.7161011220765175, "grad_norm": 1.0251567363739014, "learning_rate": 5.96156470158872e-06, "loss": 0.26824951171875, "step": 10594 }, { "epoch": 0.7161687170474517, "grad_norm": 1.5334035158157349, "learning_rate": 5.958931658274701e-06, "loss": 0.33612060546875, "step": 10595 }, { "epoch": 0.7162363120183858, "grad_norm": 0.8483339548110962, "learning_rate": 5.956299052423807e-06, "loss": 0.12870025634765625, "step": 10596 }, { "epoch": 0.71630390698932, "grad_norm": 1.1873793601989746, "learning_rate": 5.953666884163394e-06, "loss": 0.2200927734375, "step": 10597 }, { "epoch": 0.7163715019602541, "grad_norm": 1.2074135541915894, "learning_rate": 5.9510351536208466e-06, "loss": 0.27032470703125, "step": 10598 }, { "epoch": 0.7164390969311883, "grad_norm": 1.347786784172058, "learning_rate": 5.948403860923483e-06, "loss": 0.329315185546875, "step": 10599 }, { "epoch": 0.7165066919021225, "grad_norm": 1.0797446966171265, "learning_rate": 5.945773006198637e-06, "loss": 0.13564300537109375, "step": 10600 }, { "epoch": 0.7165742868730567, "grad_norm": 1.4693422317504883, "learning_rate": 5.943142589573607e-06, "loss": 0.2149658203125, "step": 10601 }, { "epoch": 0.7166418818439908, "grad_norm": 1.0480585098266602, "learning_rate": 5.940512611175651e-06, "loss": 0.27362060546875, "step": 10602 }, { "epoch": 0.7167094768149249, "grad_norm": 1.0527704954147339, "learning_rate": 5.937883071132041e-06, "loss": 0.2381439208984375, "step": 10603 }, { "epoch": 0.7167770717858591, "grad_norm": 1.4729163646697998, "learning_rate": 5.935253969570004e-06, "loss": 0.316070556640625, "step": 10604 }, { "epoch": 0.7168446667567933, "grad_norm": 1.131661295890808, "learning_rate": 5.932625306616754e-06, "loss": 0.20953369140625, "step": 10605 }, { "epoch": 0.7169122617277275, "grad_norm": 1.0998969078063965, "learning_rate": 5.92999708239948e-06, "loss": 0.251190185546875, "step": 10606 }, { "epoch": 0.7169798566986616, "grad_norm": 1.5684776306152344, "learning_rate": 5.9273692970453545e-06, "loss": 0.21246337890625, "step": 10607 }, { "epoch": 0.7170474516695958, "grad_norm": 1.113639235496521, "learning_rate": 5.924741950681517e-06, "loss": 0.259307861328125, "step": 10608 }, { "epoch": 0.7171150466405299, "grad_norm": 0.9594089388847351, "learning_rate": 5.922115043435114e-06, "loss": 0.221923828125, "step": 10609 }, { "epoch": 0.7171826416114641, "grad_norm": 1.1043592691421509, "learning_rate": 5.91948857543323e-06, "loss": 0.25433349609375, "step": 10610 }, { "epoch": 0.7172502365823983, "grad_norm": 1.145776391029358, "learning_rate": 5.916862546802964e-06, "loss": 0.1880950927734375, "step": 10611 }, { "epoch": 0.7173178315533324, "grad_norm": 1.3875724077224731, "learning_rate": 5.914236957671381e-06, "loss": 0.178924560546875, "step": 10612 }, { "epoch": 0.7173854265242666, "grad_norm": 0.6941669583320618, "learning_rate": 5.911611808165508e-06, "loss": 0.113037109375, "step": 10613 }, { "epoch": 0.7174530214952007, "grad_norm": 2.4944519996643066, "learning_rate": 5.908987098412386e-06, "loss": 0.2239532470703125, "step": 10614 }, { "epoch": 0.717520616466135, "grad_norm": 1.6412245035171509, "learning_rate": 5.906362828538993e-06, "loss": 0.2810821533203125, "step": 10615 }, { "epoch": 0.7175882114370691, "grad_norm": 1.3713734149932861, "learning_rate": 5.903738998672326e-06, "loss": 0.270751953125, "step": 10616 }, { "epoch": 0.7176558064080032, "grad_norm": 1.628263235092163, "learning_rate": 5.901115608939333e-06, "loss": 0.2647705078125, "step": 10617 }, { "epoch": 0.7177234013789374, "grad_norm": 1.256791591644287, "learning_rate": 5.898492659466953e-06, "loss": 0.214385986328125, "step": 10618 }, { "epoch": 0.7177909963498715, "grad_norm": 0.8976793885231018, "learning_rate": 5.895870150382101e-06, "loss": 0.179901123046875, "step": 10619 }, { "epoch": 0.7178585913208058, "grad_norm": 1.9410321712493896, "learning_rate": 5.893248081811669e-06, "loss": 0.25146484375, "step": 10620 }, { "epoch": 0.7179261862917399, "grad_norm": 1.2820324897766113, "learning_rate": 5.890626453882523e-06, "loss": 0.251068115234375, "step": 10621 }, { "epoch": 0.7179937812626741, "grad_norm": 1.4542436599731445, "learning_rate": 5.88800526672153e-06, "loss": 0.272491455078125, "step": 10622 }, { "epoch": 0.7180613762336082, "grad_norm": 1.3410526514053345, "learning_rate": 5.885384520455505e-06, "loss": 0.226531982421875, "step": 10623 }, { "epoch": 0.7181289712045423, "grad_norm": 1.9094358682632446, "learning_rate": 5.882764215211255e-06, "loss": 0.22900390625, "step": 10624 }, { "epoch": 0.7181965661754766, "grad_norm": 1.1909881830215454, "learning_rate": 5.880144351115581e-06, "loss": 0.25213623046875, "step": 10625 }, { "epoch": 0.7182641611464107, "grad_norm": 1.036526083946228, "learning_rate": 5.877524928295229e-06, "loss": 0.15903472900390625, "step": 10626 }, { "epoch": 0.7183317561173449, "grad_norm": 1.6401610374450684, "learning_rate": 5.8749059468769625e-06, "loss": 0.31951904296875, "step": 10627 }, { "epoch": 0.718399351088279, "grad_norm": 1.2569377422332764, "learning_rate": 5.872287406987485e-06, "loss": 0.269561767578125, "step": 10628 }, { "epoch": 0.7184669460592132, "grad_norm": 1.359511137008667, "learning_rate": 5.869669308753514e-06, "loss": 0.291778564453125, "step": 10629 }, { "epoch": 0.7185345410301474, "grad_norm": 1.6371296644210815, "learning_rate": 5.867051652301721e-06, "loss": 0.2309722900390625, "step": 10630 }, { "epoch": 0.7186021360010815, "grad_norm": 2.3846070766448975, "learning_rate": 5.8644344377587675e-06, "loss": 0.434326171875, "step": 10631 }, { "epoch": 0.7186697309720157, "grad_norm": 2.597003936767578, "learning_rate": 5.861817665251288e-06, "loss": 0.342529296875, "step": 10632 }, { "epoch": 0.7187373259429498, "grad_norm": 0.8940592408180237, "learning_rate": 5.8592013349059014e-06, "loss": 0.177398681640625, "step": 10633 }, { "epoch": 0.718804920913884, "grad_norm": 0.8882266879081726, "learning_rate": 5.856585446849193e-06, "loss": 0.171478271484375, "step": 10634 }, { "epoch": 0.7188725158848182, "grad_norm": 1.1107639074325562, "learning_rate": 5.853970001207754e-06, "loss": 0.17303466796875, "step": 10635 }, { "epoch": 0.7189401108557524, "grad_norm": 1.0380979776382446, "learning_rate": 5.8513549981081195e-06, "loss": 0.19329833984375, "step": 10636 }, { "epoch": 0.7190077058266865, "grad_norm": 1.4947383403778076, "learning_rate": 5.848740437676822e-06, "loss": 0.2154541015625, "step": 10637 }, { "epoch": 0.7190753007976206, "grad_norm": 1.2711410522460938, "learning_rate": 5.846126320040382e-06, "loss": 0.2424468994140625, "step": 10638 }, { "epoch": 0.7191428957685548, "grad_norm": 0.8260920643806458, "learning_rate": 5.843512645325267e-06, "loss": 0.2126007080078125, "step": 10639 }, { "epoch": 0.719210490739489, "grad_norm": 1.3952727317810059, "learning_rate": 5.840899413657959e-06, "loss": 0.289794921875, "step": 10640 }, { "epoch": 0.7192780857104232, "grad_norm": 2.1847596168518066, "learning_rate": 5.8382866251648976e-06, "loss": 0.33660888671875, "step": 10641 }, { "epoch": 0.7193456806813573, "grad_norm": 1.2153743505477905, "learning_rate": 5.835674279972505e-06, "loss": 0.231689453125, "step": 10642 }, { "epoch": 0.7194132756522915, "grad_norm": 1.9326609373092651, "learning_rate": 5.833062378207185e-06, "loss": 0.27978515625, "step": 10643 }, { "epoch": 0.7194808706232256, "grad_norm": 1.5324218273162842, "learning_rate": 5.8304509199953135e-06, "loss": 0.2659912109375, "step": 10644 }, { "epoch": 0.7195484655941597, "grad_norm": 1.1505744457244873, "learning_rate": 5.827839905463252e-06, "loss": 0.22711181640625, "step": 10645 }, { "epoch": 0.719616060565094, "grad_norm": 1.4144322872161865, "learning_rate": 5.825229334737337e-06, "loss": 0.2252655029296875, "step": 10646 }, { "epoch": 0.7196836555360281, "grad_norm": 1.672852873802185, "learning_rate": 5.822619207943884e-06, "loss": 0.27685546875, "step": 10647 }, { "epoch": 0.7197512505069623, "grad_norm": 0.882543683052063, "learning_rate": 5.820009525209188e-06, "loss": 0.1299591064453125, "step": 10648 }, { "epoch": 0.7198188454778964, "grad_norm": 0.9193049073219299, "learning_rate": 5.817400286659519e-06, "loss": 0.1661376953125, "step": 10649 }, { "epoch": 0.7198864404488307, "grad_norm": 1.1289889812469482, "learning_rate": 5.814791492421127e-06, "loss": 0.2159881591796875, "step": 10650 }, { "epoch": 0.7199540354197648, "grad_norm": 1.8203147649765015, "learning_rate": 5.8121831426202535e-06, "loss": 0.322296142578125, "step": 10651 }, { "epoch": 0.7200216303906989, "grad_norm": 1.5337077379226685, "learning_rate": 5.8095752373830885e-06, "loss": 0.317718505859375, "step": 10652 }, { "epoch": 0.7200892253616331, "grad_norm": 1.7228827476501465, "learning_rate": 5.806967776835833e-06, "loss": 0.3411865234375, "step": 10653 }, { "epoch": 0.7201568203325672, "grad_norm": 1.5208964347839355, "learning_rate": 5.8043607611046485e-06, "loss": 0.233367919921875, "step": 10654 }, { "epoch": 0.7202244153035015, "grad_norm": 1.011398434638977, "learning_rate": 5.801754190315677e-06, "loss": 0.228546142578125, "step": 10655 }, { "epoch": 0.7202920102744356, "grad_norm": 0.5017054677009583, "learning_rate": 5.799148064595039e-06, "loss": 0.1144256591796875, "step": 10656 }, { "epoch": 0.7203596052453698, "grad_norm": 0.7996707558631897, "learning_rate": 5.796542384068839e-06, "loss": 0.1412811279296875, "step": 10657 }, { "epoch": 0.7204272002163039, "grad_norm": 1.2432899475097656, "learning_rate": 5.793937148863155e-06, "loss": 0.2532196044921875, "step": 10658 }, { "epoch": 0.720494795187238, "grad_norm": 1.418317198753357, "learning_rate": 5.791332359104042e-06, "loss": 0.2838134765625, "step": 10659 }, { "epoch": 0.7205623901581723, "grad_norm": 0.9569143056869507, "learning_rate": 5.788728014917538e-06, "loss": 0.27960205078125, "step": 10660 }, { "epoch": 0.7206299851291064, "grad_norm": 1.9908865690231323, "learning_rate": 5.786124116429656e-06, "loss": 0.2662353515625, "step": 10661 }, { "epoch": 0.7206975801000406, "grad_norm": 1.246933102607727, "learning_rate": 5.783520663766391e-06, "loss": 0.241485595703125, "step": 10662 }, { "epoch": 0.7207651750709747, "grad_norm": 0.7361838221549988, "learning_rate": 5.780917657053708e-06, "loss": 0.14292526245117188, "step": 10663 }, { "epoch": 0.7208327700419089, "grad_norm": 1.4267427921295166, "learning_rate": 5.778315096417571e-06, "loss": 0.2421112060546875, "step": 10664 }, { "epoch": 0.720900365012843, "grad_norm": 1.3027182817459106, "learning_rate": 5.77571298198389e-06, "loss": 0.2107086181640625, "step": 10665 }, { "epoch": 0.7209679599837772, "grad_norm": 1.071110725402832, "learning_rate": 5.773111313878584e-06, "loss": 0.157196044921875, "step": 10666 }, { "epoch": 0.7210355549547114, "grad_norm": 1.4105877876281738, "learning_rate": 5.770510092227533e-06, "loss": 0.28173828125, "step": 10667 }, { "epoch": 0.7211031499256455, "grad_norm": 1.5656808614730835, "learning_rate": 5.767909317156601e-06, "loss": 0.30322265625, "step": 10668 }, { "epoch": 0.7211707448965797, "grad_norm": 1.1052072048187256, "learning_rate": 5.765308988791632e-06, "loss": 0.196990966796875, "step": 10669 }, { "epoch": 0.7212383398675138, "grad_norm": 1.1223359107971191, "learning_rate": 5.762709107258443e-06, "loss": 0.248626708984375, "step": 10670 }, { "epoch": 0.7213059348384481, "grad_norm": 1.1167341470718384, "learning_rate": 5.760109672682835e-06, "loss": 0.2032318115234375, "step": 10671 }, { "epoch": 0.7213735298093822, "grad_norm": 1.1805647611618042, "learning_rate": 5.7575106851905825e-06, "loss": 0.29473876953125, "step": 10672 }, { "epoch": 0.7214411247803163, "grad_norm": 0.8053738474845886, "learning_rate": 5.754912144907441e-06, "loss": 0.141204833984375, "step": 10673 }, { "epoch": 0.7215087197512505, "grad_norm": 0.7073264122009277, "learning_rate": 5.752314051959145e-06, "loss": 0.15869140625, "step": 10674 }, { "epoch": 0.7215763147221846, "grad_norm": 1.468652367591858, "learning_rate": 5.749716406471406e-06, "loss": 0.24078369140625, "step": 10675 }, { "epoch": 0.7216439096931189, "grad_norm": 0.883063554763794, "learning_rate": 5.747119208569907e-06, "loss": 0.183135986328125, "step": 10676 }, { "epoch": 0.721711504664053, "grad_norm": 1.232884168624878, "learning_rate": 5.744522458380337e-06, "loss": 0.240814208984375, "step": 10677 }, { "epoch": 0.7217790996349872, "grad_norm": 1.1517695188522339, "learning_rate": 5.7419261560283185e-06, "loss": 0.182342529296875, "step": 10678 }, { "epoch": 0.7218466946059213, "grad_norm": 0.9974513649940491, "learning_rate": 5.739330301639492e-06, "loss": 0.22222900390625, "step": 10679 }, { "epoch": 0.7219142895768554, "grad_norm": 2.3344359397888184, "learning_rate": 5.7367348953394575e-06, "loss": 0.33929443359375, "step": 10680 }, { "epoch": 0.7219818845477897, "grad_norm": 0.7382712960243225, "learning_rate": 5.734139937253796e-06, "loss": 0.12921142578125, "step": 10681 }, { "epoch": 0.7220494795187238, "grad_norm": 1.644630789756775, "learning_rate": 5.731545427508069e-06, "loss": 0.294342041015625, "step": 10682 }, { "epoch": 0.722117074489658, "grad_norm": 1.852327585220337, "learning_rate": 5.728951366227814e-06, "loss": 0.3389892578125, "step": 10683 }, { "epoch": 0.7221846694605921, "grad_norm": 0.6946032047271729, "learning_rate": 5.726357753538547e-06, "loss": 0.144287109375, "step": 10684 }, { "epoch": 0.7222522644315263, "grad_norm": 0.9503932595252991, "learning_rate": 5.723764589565765e-06, "loss": 0.155120849609375, "step": 10685 }, { "epoch": 0.7223198594024605, "grad_norm": 0.9750795364379883, "learning_rate": 5.72117187443494e-06, "loss": 0.14069366455078125, "step": 10686 }, { "epoch": 0.7223874543733946, "grad_norm": 0.8397223949432373, "learning_rate": 5.7185796082715184e-06, "loss": 0.1693115234375, "step": 10687 }, { "epoch": 0.7224550493443288, "grad_norm": 0.7936559319496155, "learning_rate": 5.715987791200947e-06, "loss": 0.1588134765625, "step": 10688 }, { "epoch": 0.7225226443152629, "grad_norm": 0.7480923533439636, "learning_rate": 5.713396423348613e-06, "loss": 0.116302490234375, "step": 10689 }, { "epoch": 0.7225902392861971, "grad_norm": 4.38391637802124, "learning_rate": 5.710805504839923e-06, "loss": 0.316192626953125, "step": 10690 }, { "epoch": 0.7226578342571313, "grad_norm": 0.7365559339523315, "learning_rate": 5.70821503580022e-06, "loss": 0.157135009765625, "step": 10691 }, { "epoch": 0.7227254292280655, "grad_norm": 1.6118279695510864, "learning_rate": 5.705625016354865e-06, "loss": 0.290283203125, "step": 10692 }, { "epoch": 0.7227930241989996, "grad_norm": 1.2642710208892822, "learning_rate": 5.703035446629176e-06, "loss": 0.18218994140625, "step": 10693 }, { "epoch": 0.7228606191699337, "grad_norm": 1.2752058506011963, "learning_rate": 5.700446326748442e-06, "loss": 0.20788955688476562, "step": 10694 }, { "epoch": 0.7229282141408679, "grad_norm": 1.0541688203811646, "learning_rate": 5.697857656837952e-06, "loss": 0.13562774658203125, "step": 10695 }, { "epoch": 0.7229958091118021, "grad_norm": 1.8565267324447632, "learning_rate": 5.695269437022957e-06, "loss": 0.255096435546875, "step": 10696 }, { "epoch": 0.7230634040827363, "grad_norm": 1.5422803163528442, "learning_rate": 5.692681667428693e-06, "loss": 0.313720703125, "step": 10697 }, { "epoch": 0.7231309990536704, "grad_norm": 0.44582200050354004, "learning_rate": 5.690094348180372e-06, "loss": 0.06473159790039062, "step": 10698 }, { "epoch": 0.7231985940246046, "grad_norm": 1.5916523933410645, "learning_rate": 5.687507479403183e-06, "loss": 0.251739501953125, "step": 10699 }, { "epoch": 0.7232661889955387, "grad_norm": 1.2573047876358032, "learning_rate": 5.684921061222291e-06, "loss": 0.2582855224609375, "step": 10700 }, { "epoch": 0.7233337839664729, "grad_norm": 1.3027225732803345, "learning_rate": 5.682335093762858e-06, "loss": 0.1670379638671875, "step": 10701 }, { "epoch": 0.7234013789374071, "grad_norm": 1.5698531866073608, "learning_rate": 5.679749577149989e-06, "loss": 0.2659454345703125, "step": 10702 }, { "epoch": 0.7234689739083412, "grad_norm": 1.4427571296691895, "learning_rate": 5.6771645115088035e-06, "loss": 0.270477294921875, "step": 10703 }, { "epoch": 0.7235365688792754, "grad_norm": 1.3733888864517212, "learning_rate": 5.6745798969643756e-06, "loss": 0.202178955078125, "step": 10704 }, { "epoch": 0.7236041638502095, "grad_norm": 1.053511142730713, "learning_rate": 5.671995733641769e-06, "loss": 0.230560302734375, "step": 10705 }, { "epoch": 0.7236717588211438, "grad_norm": 1.0943249464035034, "learning_rate": 5.669412021666021e-06, "loss": 0.224578857421875, "step": 10706 }, { "epoch": 0.7237393537920779, "grad_norm": 0.9284440875053406, "learning_rate": 5.666828761162136e-06, "loss": 0.167938232421875, "step": 10707 }, { "epoch": 0.723806948763012, "grad_norm": 0.9127782583236694, "learning_rate": 5.664245952255125e-06, "loss": 0.2169189453125, "step": 10708 }, { "epoch": 0.7238745437339462, "grad_norm": 1.4262319803237915, "learning_rate": 5.661663595069952e-06, "loss": 0.2830810546875, "step": 10709 }, { "epoch": 0.7239421387048803, "grad_norm": 1.1537057161331177, "learning_rate": 5.6590816897315685e-06, "loss": 0.229644775390625, "step": 10710 }, { "epoch": 0.7240097336758146, "grad_norm": 1.6145442724227905, "learning_rate": 5.656500236364905e-06, "loss": 0.24041748046875, "step": 10711 }, { "epoch": 0.7240773286467487, "grad_norm": 1.0537141561508179, "learning_rate": 5.653919235094865e-06, "loss": 0.1771240234375, "step": 10712 }, { "epoch": 0.7241449236176828, "grad_norm": 1.3594415187835693, "learning_rate": 5.651338686046329e-06, "loss": 0.2691650390625, "step": 10713 }, { "epoch": 0.724212518588617, "grad_norm": 1.3976733684539795, "learning_rate": 5.648758589344179e-06, "loss": 0.2099609375, "step": 10714 }, { "epoch": 0.7242801135595511, "grad_norm": 1.3492077589035034, "learning_rate": 5.646178945113232e-06, "loss": 0.264190673828125, "step": 10715 }, { "epoch": 0.7243477085304854, "grad_norm": 1.9798997640609741, "learning_rate": 5.643599753478322e-06, "loss": 0.2971343994140625, "step": 10716 }, { "epoch": 0.7244153035014195, "grad_norm": 2.076439619064331, "learning_rate": 5.6410210145642485e-06, "loss": 0.33026123046875, "step": 10717 }, { "epoch": 0.7244828984723537, "grad_norm": 0.9836852550506592, "learning_rate": 5.638442728495773e-06, "loss": 0.182464599609375, "step": 10718 }, { "epoch": 0.7245504934432878, "grad_norm": 1.1101531982421875, "learning_rate": 5.635864895397665e-06, "loss": 0.28814697265625, "step": 10719 }, { "epoch": 0.7246180884142219, "grad_norm": 1.4762275218963623, "learning_rate": 5.633287515394638e-06, "loss": 0.25677490234375, "step": 10720 }, { "epoch": 0.7246856833851562, "grad_norm": 1.3914523124694824, "learning_rate": 5.630710588611419e-06, "loss": 0.262908935546875, "step": 10721 }, { "epoch": 0.7247532783560903, "grad_norm": 1.4790221452713013, "learning_rate": 5.628134115172687e-06, "loss": 0.241912841796875, "step": 10722 }, { "epoch": 0.7248208733270245, "grad_norm": 0.9627858996391296, "learning_rate": 5.625558095203112e-06, "loss": 0.21240234375, "step": 10723 }, { "epoch": 0.7248884682979586, "grad_norm": 1.6356955766677856, "learning_rate": 5.622982528827334e-06, "loss": 0.27716064453125, "step": 10724 }, { "epoch": 0.7249560632688928, "grad_norm": 1.351906418800354, "learning_rate": 5.620407416169975e-06, "loss": 0.28350830078125, "step": 10725 }, { "epoch": 0.725023658239827, "grad_norm": 1.295432209968567, "learning_rate": 5.617832757355633e-06, "loss": 0.226226806640625, "step": 10726 }, { "epoch": 0.7250912532107611, "grad_norm": 1.262141227722168, "learning_rate": 5.615258552508897e-06, "loss": 0.2112579345703125, "step": 10727 }, { "epoch": 0.7251588481816953, "grad_norm": 0.9650362133979797, "learning_rate": 5.612684801754312e-06, "loss": 0.1333465576171875, "step": 10728 }, { "epoch": 0.7252264431526294, "grad_norm": 0.9583173990249634, "learning_rate": 5.6101115052164106e-06, "loss": 0.2116851806640625, "step": 10729 }, { "epoch": 0.7252940381235636, "grad_norm": 1.2575907707214355, "learning_rate": 5.607538663019719e-06, "loss": 0.22833251953125, "step": 10730 }, { "epoch": 0.7253616330944977, "grad_norm": 0.9670007228851318, "learning_rate": 5.604966275288709e-06, "loss": 0.221954345703125, "step": 10731 }, { "epoch": 0.725429228065432, "grad_norm": 1.3094868659973145, "learning_rate": 5.602394342147862e-06, "loss": 0.2118988037109375, "step": 10732 }, { "epoch": 0.7254968230363661, "grad_norm": 2.092454195022583, "learning_rate": 5.599822863721619e-06, "loss": 0.2830657958984375, "step": 10733 }, { "epoch": 0.7255644180073002, "grad_norm": 1.5756199359893799, "learning_rate": 5.597251840134407e-06, "loss": 0.1866607666015625, "step": 10734 }, { "epoch": 0.7256320129782344, "grad_norm": 1.1216580867767334, "learning_rate": 5.594681271510624e-06, "loss": 0.17754364013671875, "step": 10735 }, { "epoch": 0.7256996079491685, "grad_norm": 1.313821792602539, "learning_rate": 5.592111157974652e-06, "loss": 0.2695770263671875, "step": 10736 }, { "epoch": 0.7257672029201028, "grad_norm": 0.9807543158531189, "learning_rate": 5.589541499650849e-06, "loss": 0.147918701171875, "step": 10737 }, { "epoch": 0.7258347978910369, "grad_norm": 1.947798252105713, "learning_rate": 5.586972296663552e-06, "loss": 0.33917236328125, "step": 10738 }, { "epoch": 0.7259023928619711, "grad_norm": 1.444808840751648, "learning_rate": 5.5844035491370714e-06, "loss": 0.2845458984375, "step": 10739 }, { "epoch": 0.7259699878329052, "grad_norm": 1.7616617679595947, "learning_rate": 5.5818352571957095e-06, "loss": 0.217193603515625, "step": 10740 }, { "epoch": 0.7260375828038393, "grad_norm": 1.2458713054656982, "learning_rate": 5.579267420963725e-06, "loss": 0.2049560546875, "step": 10741 }, { "epoch": 0.7261051777747736, "grad_norm": 2.1794867515563965, "learning_rate": 5.5767000405653645e-06, "loss": 0.349609375, "step": 10742 }, { "epoch": 0.7261727727457077, "grad_norm": 1.2875257730484009, "learning_rate": 5.574133116124869e-06, "loss": 0.235748291015625, "step": 10743 }, { "epoch": 0.7262403677166419, "grad_norm": 1.4417152404785156, "learning_rate": 5.57156664776642e-06, "loss": 0.208404541015625, "step": 10744 }, { "epoch": 0.726307962687576, "grad_norm": 1.4577523469924927, "learning_rate": 5.569000635614219e-06, "loss": 0.289886474609375, "step": 10745 }, { "epoch": 0.7263755576585103, "grad_norm": 1.4602638483047485, "learning_rate": 5.566435079792417e-06, "loss": 0.2373046875, "step": 10746 }, { "epoch": 0.7264431526294444, "grad_norm": 1.5953418016433716, "learning_rate": 5.563869980425152e-06, "loss": 0.223541259765625, "step": 10747 }, { "epoch": 0.7265107476003785, "grad_norm": 0.9317561388015747, "learning_rate": 5.561305337636542e-06, "loss": 0.22314453125, "step": 10748 }, { "epoch": 0.7265783425713127, "grad_norm": 0.7188029289245605, "learning_rate": 5.558741151550676e-06, "loss": 0.14470672607421875, "step": 10749 }, { "epoch": 0.7266459375422468, "grad_norm": 0.6667448282241821, "learning_rate": 5.556177422291625e-06, "loss": 0.135894775390625, "step": 10750 }, { "epoch": 0.726713532513181, "grad_norm": 1.0868563652038574, "learning_rate": 5.5536141499834485e-06, "loss": 0.233367919921875, "step": 10751 }, { "epoch": 0.7267811274841152, "grad_norm": 1.1559842824935913, "learning_rate": 5.551051334750162e-06, "loss": 0.23553466796875, "step": 10752 }, { "epoch": 0.7268487224550494, "grad_norm": 0.8111181855201721, "learning_rate": 5.5484889767157735e-06, "loss": 0.160003662109375, "step": 10753 }, { "epoch": 0.7269163174259835, "grad_norm": 1.1505355834960938, "learning_rate": 5.545927076004268e-06, "loss": 0.1896514892578125, "step": 10754 }, { "epoch": 0.7269839123969176, "grad_norm": 0.8807658553123474, "learning_rate": 5.5433656327395995e-06, "loss": 0.1962890625, "step": 10755 }, { "epoch": 0.7270515073678518, "grad_norm": 0.9146528244018555, "learning_rate": 5.540804647045721e-06, "loss": 0.15375137329101562, "step": 10756 }, { "epoch": 0.727119102338786, "grad_norm": 1.3827625513076782, "learning_rate": 5.53824411904653e-06, "loss": 0.20311737060546875, "step": 10757 }, { "epoch": 0.7271866973097202, "grad_norm": 0.9106522798538208, "learning_rate": 5.535684048865936e-06, "loss": 0.18658447265625, "step": 10758 }, { "epoch": 0.7272542922806543, "grad_norm": 1.0092839002609253, "learning_rate": 5.533124436627805e-06, "loss": 0.18902587890625, "step": 10759 }, { "epoch": 0.7273218872515885, "grad_norm": 0.6557203531265259, "learning_rate": 5.5305652824559884e-06, "loss": 0.13629150390625, "step": 10760 }, { "epoch": 0.7273894822225226, "grad_norm": 1.1707180738449097, "learning_rate": 5.528006586474313e-06, "loss": 0.24017333984375, "step": 10761 }, { "epoch": 0.7274570771934568, "grad_norm": 0.6370288729667664, "learning_rate": 5.525448348806584e-06, "loss": 0.12671279907226562, "step": 10762 }, { "epoch": 0.727524672164391, "grad_norm": 1.2813715934753418, "learning_rate": 5.522890569576587e-06, "loss": 0.2415771484375, "step": 10763 }, { "epoch": 0.7275922671353251, "grad_norm": 1.9071693420410156, "learning_rate": 5.52033324890808e-06, "loss": 0.36248779296875, "step": 10764 }, { "epoch": 0.7276598621062593, "grad_norm": 1.33219313621521, "learning_rate": 5.517776386924805e-06, "loss": 0.28826904296875, "step": 10765 }, { "epoch": 0.7277274570771934, "grad_norm": 1.6146568059921265, "learning_rate": 5.515219983750477e-06, "loss": 0.2484130859375, "step": 10766 }, { "epoch": 0.7277950520481277, "grad_norm": 1.1261482238769531, "learning_rate": 5.512664039508793e-06, "loss": 0.1697998046875, "step": 10767 }, { "epoch": 0.7278626470190618, "grad_norm": 1.524885654449463, "learning_rate": 5.510108554323416e-06, "loss": 0.18408966064453125, "step": 10768 }, { "epoch": 0.7279302419899959, "grad_norm": 1.1718804836273193, "learning_rate": 5.507553528318016e-06, "loss": 0.27978515625, "step": 10769 }, { "epoch": 0.7279978369609301, "grad_norm": 1.0362251996994019, "learning_rate": 5.504998961616199e-06, "loss": 0.1595611572265625, "step": 10770 }, { "epoch": 0.7280654319318642, "grad_norm": 1.4101202487945557, "learning_rate": 5.502444854341586e-06, "loss": 0.2308349609375, "step": 10771 }, { "epoch": 0.7281330269027985, "grad_norm": 1.0201873779296875, "learning_rate": 5.499891206617756e-06, "loss": 0.14807891845703125, "step": 10772 }, { "epoch": 0.7282006218737326, "grad_norm": 0.7320089936256409, "learning_rate": 5.49733801856827e-06, "loss": 0.1644744873046875, "step": 10773 }, { "epoch": 0.7282682168446668, "grad_norm": 1.0706071853637695, "learning_rate": 5.494785290316665e-06, "loss": 0.170684814453125, "step": 10774 }, { "epoch": 0.7283358118156009, "grad_norm": 1.1454312801361084, "learning_rate": 5.492233021986461e-06, "loss": 0.276214599609375, "step": 10775 }, { "epoch": 0.728403406786535, "grad_norm": 0.7207339406013489, "learning_rate": 5.489681213701153e-06, "loss": 0.1553497314453125, "step": 10776 }, { "epoch": 0.7284710017574693, "grad_norm": 1.2556039094924927, "learning_rate": 5.487129865584212e-06, "loss": 0.259124755859375, "step": 10777 }, { "epoch": 0.7285385967284034, "grad_norm": 1.2383160591125488, "learning_rate": 5.484578977759087e-06, "loss": 0.2845458984375, "step": 10778 }, { "epoch": 0.7286061916993376, "grad_norm": 0.8453804850578308, "learning_rate": 5.482028550349201e-06, "loss": 0.1575927734375, "step": 10779 }, { "epoch": 0.7286737866702717, "grad_norm": 1.7984371185302734, "learning_rate": 5.479478583477977e-06, "loss": 0.2935791015625, "step": 10780 }, { "epoch": 0.7287413816412059, "grad_norm": 0.9673557877540588, "learning_rate": 5.476929077268775e-06, "loss": 0.1524200439453125, "step": 10781 }, { "epoch": 0.7288089766121401, "grad_norm": 1.171615719795227, "learning_rate": 5.474380031844979e-06, "loss": 0.1974945068359375, "step": 10782 }, { "epoch": 0.7288765715830742, "grad_norm": 1.8251698017120361, "learning_rate": 5.471831447329906e-06, "loss": 0.24505615234375, "step": 10783 }, { "epoch": 0.7289441665540084, "grad_norm": 1.154237985610962, "learning_rate": 5.469283323846887e-06, "loss": 0.19989776611328125, "step": 10784 }, { "epoch": 0.7290117615249425, "grad_norm": 2.754607915878296, "learning_rate": 5.466735661519212e-06, "loss": 0.322265625, "step": 10785 }, { "epoch": 0.7290793564958767, "grad_norm": 1.6708276271820068, "learning_rate": 5.464188460470153e-06, "loss": 0.2894287109375, "step": 10786 }, { "epoch": 0.7291469514668109, "grad_norm": 1.5408624410629272, "learning_rate": 5.461641720822957e-06, "loss": 0.285186767578125, "step": 10787 }, { "epoch": 0.7292145464377451, "grad_norm": 1.666750431060791, "learning_rate": 5.459095442700855e-06, "loss": 0.30987548828125, "step": 10788 }, { "epoch": 0.7292821414086792, "grad_norm": 0.9413655400276184, "learning_rate": 5.4565496262270485e-06, "loss": 0.17650604248046875, "step": 10789 }, { "epoch": 0.7293497363796133, "grad_norm": 1.696254014968872, "learning_rate": 5.454004271524721e-06, "loss": 0.212646484375, "step": 10790 }, { "epoch": 0.7294173313505475, "grad_norm": 1.3622081279754639, "learning_rate": 5.451459378717034e-06, "loss": 0.264984130859375, "step": 10791 }, { "epoch": 0.7294849263214817, "grad_norm": 1.1157469749450684, "learning_rate": 5.448914947927118e-06, "loss": 0.207672119140625, "step": 10792 }, { "epoch": 0.7295525212924159, "grad_norm": 1.3836430311203003, "learning_rate": 5.446370979278105e-06, "loss": 0.22637939453125, "step": 10793 }, { "epoch": 0.72962011626335, "grad_norm": 0.9156525135040283, "learning_rate": 5.4438274728930675e-06, "loss": 0.19195556640625, "step": 10794 }, { "epoch": 0.7296877112342842, "grad_norm": 1.130223274230957, "learning_rate": 5.44128442889509e-06, "loss": 0.164825439453125, "step": 10795 }, { "epoch": 0.7297553062052183, "grad_norm": 2.943963050842285, "learning_rate": 5.438741847407218e-06, "loss": 0.3521575927734375, "step": 10796 }, { "epoch": 0.7298229011761524, "grad_norm": 1.2327800989151, "learning_rate": 5.436199728552476e-06, "loss": 0.26605224609375, "step": 10797 }, { "epoch": 0.7298904961470867, "grad_norm": 1.0654001235961914, "learning_rate": 5.433658072453873e-06, "loss": 0.21966552734375, "step": 10798 }, { "epoch": 0.7299580911180208, "grad_norm": 1.4548048973083496, "learning_rate": 5.431116879234374e-06, "loss": 0.2636260986328125, "step": 10799 }, { "epoch": 0.730025686088955, "grad_norm": 1.3771684169769287, "learning_rate": 5.428576149016954e-06, "loss": 0.23320770263671875, "step": 10800 }, { "epoch": 0.7300932810598891, "grad_norm": 1.284351110458374, "learning_rate": 5.426035881924545e-06, "loss": 0.18914794921875, "step": 10801 }, { "epoch": 0.7301608760308234, "grad_norm": 1.4647703170776367, "learning_rate": 5.423496078080058e-06, "loss": 0.2132415771484375, "step": 10802 }, { "epoch": 0.7302284710017575, "grad_norm": 0.8890506625175476, "learning_rate": 5.420956737606389e-06, "loss": 0.13232803344726562, "step": 10803 }, { "epoch": 0.7302960659726916, "grad_norm": 1.2058032751083374, "learning_rate": 5.418417860626402e-06, "loss": 0.175140380859375, "step": 10804 }, { "epoch": 0.7303636609436258, "grad_norm": 1.1381338834762573, "learning_rate": 5.415879447262942e-06, "loss": 0.228302001953125, "step": 10805 }, { "epoch": 0.7304312559145599, "grad_norm": 1.5676683187484741, "learning_rate": 5.413341497638847e-06, "loss": 0.2772216796875, "step": 10806 }, { "epoch": 0.7304988508854942, "grad_norm": 1.344647765159607, "learning_rate": 5.4108040118768995e-06, "loss": 0.2939453125, "step": 10807 }, { "epoch": 0.7305664458564283, "grad_norm": 1.409813404083252, "learning_rate": 5.4082669900998926e-06, "loss": 0.25372314453125, "step": 10808 }, { "epoch": 0.7306340408273625, "grad_norm": 1.065057396888733, "learning_rate": 5.405730432430581e-06, "loss": 0.170196533203125, "step": 10809 }, { "epoch": 0.7307016357982966, "grad_norm": 1.0910582542419434, "learning_rate": 5.403194338991696e-06, "loss": 0.1367340087890625, "step": 10810 }, { "epoch": 0.7307692307692307, "grad_norm": 0.7440112829208374, "learning_rate": 5.400658709905956e-06, "loss": 0.10639572143554688, "step": 10811 }, { "epoch": 0.730836825740165, "grad_norm": 1.9085637331008911, "learning_rate": 5.398123545296034e-06, "loss": 0.2684326171875, "step": 10812 }, { "epoch": 0.7309044207110991, "grad_norm": 1.0230286121368408, "learning_rate": 5.395588845284615e-06, "loss": 0.193572998046875, "step": 10813 }, { "epoch": 0.7309720156820333, "grad_norm": 1.0287240743637085, "learning_rate": 5.3930546099943354e-06, "loss": 0.2191162109375, "step": 10814 }, { "epoch": 0.7310396106529674, "grad_norm": 0.9130420684814453, "learning_rate": 5.3905208395478205e-06, "loss": 0.161651611328125, "step": 10815 }, { "epoch": 0.7311072056239016, "grad_norm": 1.0358808040618896, "learning_rate": 5.387987534067667e-06, "loss": 0.160736083984375, "step": 10816 }, { "epoch": 0.7311748005948357, "grad_norm": 1.0052778720855713, "learning_rate": 5.3854546936764534e-06, "loss": 0.1584320068359375, "step": 10817 }, { "epoch": 0.7312423955657699, "grad_norm": 2.017691135406494, "learning_rate": 5.3829223184967285e-06, "loss": 0.306640625, "step": 10818 }, { "epoch": 0.7313099905367041, "grad_norm": 1.2530688047409058, "learning_rate": 5.38039040865104e-06, "loss": 0.298797607421875, "step": 10819 }, { "epoch": 0.7313775855076382, "grad_norm": 1.24416184425354, "learning_rate": 5.377858964261878e-06, "loss": 0.1937713623046875, "step": 10820 }, { "epoch": 0.7314451804785724, "grad_norm": 1.0428013801574707, "learning_rate": 5.375327985451742e-06, "loss": 0.180389404296875, "step": 10821 }, { "epoch": 0.7315127754495065, "grad_norm": 2.7782979011535645, "learning_rate": 5.3727974723431e-06, "loss": 0.3181304931640625, "step": 10822 }, { "epoch": 0.7315803704204408, "grad_norm": 1.593501329421997, "learning_rate": 5.370267425058376e-06, "loss": 0.274566650390625, "step": 10823 }, { "epoch": 0.7316479653913749, "grad_norm": 1.035624623298645, "learning_rate": 5.3677378437200056e-06, "loss": 0.23120880126953125, "step": 10824 }, { "epoch": 0.731715560362309, "grad_norm": 1.8273252248764038, "learning_rate": 5.365208728450383e-06, "loss": 0.3433837890625, "step": 10825 }, { "epoch": 0.7317831553332432, "grad_norm": 1.508286476135254, "learning_rate": 5.3626800793718786e-06, "loss": 0.301544189453125, "step": 10826 }, { "epoch": 0.7318507503041773, "grad_norm": 1.7585299015045166, "learning_rate": 5.360151896606845e-06, "loss": 0.34161376953125, "step": 10827 }, { "epoch": 0.7319183452751116, "grad_norm": 1.3849592208862305, "learning_rate": 5.3576241802776136e-06, "loss": 0.23443603515625, "step": 10828 }, { "epoch": 0.7319859402460457, "grad_norm": 1.0056837797164917, "learning_rate": 5.355096930506488e-06, "loss": 0.2256622314453125, "step": 10829 }, { "epoch": 0.7320535352169799, "grad_norm": 1.023587942123413, "learning_rate": 5.352570147415756e-06, "loss": 0.14926910400390625, "step": 10830 }, { "epoch": 0.732121130187914, "grad_norm": 1.0934126377105713, "learning_rate": 5.3500438311276715e-06, "loss": 0.261322021484375, "step": 10831 }, { "epoch": 0.7321887251588481, "grad_norm": 1.1884673833847046, "learning_rate": 5.347517981764486e-06, "loss": 0.26678466796875, "step": 10832 }, { "epoch": 0.7322563201297824, "grad_norm": 0.9884302616119385, "learning_rate": 5.344992599448408e-06, "loss": 0.17462921142578125, "step": 10833 }, { "epoch": 0.7323239151007165, "grad_norm": 1.3286027908325195, "learning_rate": 5.342467684301624e-06, "loss": 0.276763916015625, "step": 10834 }, { "epoch": 0.7323915100716507, "grad_norm": 0.9719746112823486, "learning_rate": 5.339943236446324e-06, "loss": 0.158782958984375, "step": 10835 }, { "epoch": 0.7324591050425848, "grad_norm": 1.3005969524383545, "learning_rate": 5.337419256004635e-06, "loss": 0.2542724609375, "step": 10836 }, { "epoch": 0.7325267000135189, "grad_norm": 1.5850846767425537, "learning_rate": 5.334895743098699e-06, "loss": 0.3001708984375, "step": 10837 }, { "epoch": 0.7325942949844532, "grad_norm": 1.5775749683380127, "learning_rate": 5.332372697850614e-06, "loss": 0.28143310546875, "step": 10838 }, { "epoch": 0.7326618899553873, "grad_norm": 1.137515902519226, "learning_rate": 5.329850120382459e-06, "loss": 0.19635009765625, "step": 10839 }, { "epoch": 0.7327294849263215, "grad_norm": 1.236065149307251, "learning_rate": 5.327328010816293e-06, "loss": 0.23944091796875, "step": 10840 }, { "epoch": 0.7327970798972556, "grad_norm": 1.2777940034866333, "learning_rate": 5.324806369274153e-06, "loss": 0.238250732421875, "step": 10841 }, { "epoch": 0.7328646748681898, "grad_norm": 2.241319179534912, "learning_rate": 5.322285195878044e-06, "loss": 0.26751708984375, "step": 10842 }, { "epoch": 0.732932269839124, "grad_norm": 1.5771371126174927, "learning_rate": 5.319764490749972e-06, "loss": 0.236083984375, "step": 10843 }, { "epoch": 0.7329998648100581, "grad_norm": 1.9192100763320923, "learning_rate": 5.317244254011885e-06, "loss": 0.302398681640625, "step": 10844 }, { "epoch": 0.7330674597809923, "grad_norm": 1.221513271331787, "learning_rate": 5.31472448578575e-06, "loss": 0.13727188110351562, "step": 10845 }, { "epoch": 0.7331350547519264, "grad_norm": 1.800684928894043, "learning_rate": 5.312205186193469e-06, "loss": 0.30279541015625, "step": 10846 }, { "epoch": 0.7332026497228606, "grad_norm": 1.0457571744918823, "learning_rate": 5.309686355356942e-06, "loss": 0.11833953857421875, "step": 10847 }, { "epoch": 0.7332702446937948, "grad_norm": 1.6064841747283936, "learning_rate": 5.307167993398066e-06, "loss": 0.349700927734375, "step": 10848 }, { "epoch": 0.733337839664729, "grad_norm": 1.0044175386428833, "learning_rate": 5.304650100438671e-06, "loss": 0.2433929443359375, "step": 10849 }, { "epoch": 0.7334054346356631, "grad_norm": 1.934963583946228, "learning_rate": 5.302132676600601e-06, "loss": 0.231781005859375, "step": 10850 }, { "epoch": 0.7334730296065972, "grad_norm": 1.7115106582641602, "learning_rate": 5.299615722005666e-06, "loss": 0.24462890625, "step": 10851 }, { "epoch": 0.7335406245775314, "grad_norm": 0.505394697189331, "learning_rate": 5.2970992367756465e-06, "loss": 0.1092071533203125, "step": 10852 }, { "epoch": 0.7336082195484656, "grad_norm": 2.619903087615967, "learning_rate": 5.2945832210323085e-06, "loss": 0.252716064453125, "step": 10853 }, { "epoch": 0.7336758145193998, "grad_norm": 1.0090432167053223, "learning_rate": 5.2920676748973916e-06, "loss": 0.14260101318359375, "step": 10854 }, { "epoch": 0.7337434094903339, "grad_norm": 0.995823860168457, "learning_rate": 5.289552598492607e-06, "loss": 0.222198486328125, "step": 10855 }, { "epoch": 0.7338110044612681, "grad_norm": 1.7750768661499023, "learning_rate": 5.2870379919396685e-06, "loss": 0.3090057373046875, "step": 10856 }, { "epoch": 0.7338785994322022, "grad_norm": 1.6200873851776123, "learning_rate": 5.28452385536023e-06, "loss": 0.337158203125, "step": 10857 }, { "epoch": 0.7339461944031364, "grad_norm": 1.1828266382217407, "learning_rate": 5.282010188875948e-06, "loss": 0.2340087890625, "step": 10858 }, { "epoch": 0.7340137893740706, "grad_norm": 0.9502033591270447, "learning_rate": 5.279496992608448e-06, "loss": 0.20819091796875, "step": 10859 }, { "epoch": 0.7340813843450047, "grad_norm": 1.0524239540100098, "learning_rate": 5.276984266679329e-06, "loss": 0.2060546875, "step": 10860 }, { "epoch": 0.7341489793159389, "grad_norm": 1.1172014474868774, "learning_rate": 5.27447201121019e-06, "loss": 0.2114715576171875, "step": 10861 }, { "epoch": 0.734216574286873, "grad_norm": 1.7507386207580566, "learning_rate": 5.271960226322564e-06, "loss": 0.2292022705078125, "step": 10862 }, { "epoch": 0.7342841692578073, "grad_norm": 1.4292569160461426, "learning_rate": 5.269448912138008e-06, "loss": 0.237152099609375, "step": 10863 }, { "epoch": 0.7343517642287414, "grad_norm": 0.9160042405128479, "learning_rate": 5.266938068778027e-06, "loss": 0.19384765625, "step": 10864 }, { "epoch": 0.7344193591996755, "grad_norm": 2.129960060119629, "learning_rate": 5.264427696364111e-06, "loss": 0.239105224609375, "step": 10865 }, { "epoch": 0.7344869541706097, "grad_norm": 2.356466293334961, "learning_rate": 5.261917795017729e-06, "loss": 0.28741455078125, "step": 10866 }, { "epoch": 0.7345545491415438, "grad_norm": 1.3079102039337158, "learning_rate": 5.259408364860324e-06, "loss": 0.294921875, "step": 10867 }, { "epoch": 0.7346221441124781, "grad_norm": 1.726733684539795, "learning_rate": 5.256899406013312e-06, "loss": 0.1980438232421875, "step": 10868 }, { "epoch": 0.7346897390834122, "grad_norm": 1.1648234128952026, "learning_rate": 5.25439091859811e-06, "loss": 0.242462158203125, "step": 10869 }, { "epoch": 0.7347573340543464, "grad_norm": 1.104382038116455, "learning_rate": 5.251882902736077e-06, "loss": 0.206634521484375, "step": 10870 }, { "epoch": 0.7348249290252805, "grad_norm": 1.9403905868530273, "learning_rate": 5.249375358548568e-06, "loss": 0.237945556640625, "step": 10871 }, { "epoch": 0.7348925239962146, "grad_norm": 1.5045216083526611, "learning_rate": 5.2468682861569265e-06, "loss": 0.232635498046875, "step": 10872 }, { "epoch": 0.7349601189671489, "grad_norm": 0.9796749949455261, "learning_rate": 5.244361685682441e-06, "loss": 0.10498046875, "step": 10873 }, { "epoch": 0.735027713938083, "grad_norm": 1.4399586915969849, "learning_rate": 5.241855557246417e-06, "loss": 0.227874755859375, "step": 10874 }, { "epoch": 0.7350953089090172, "grad_norm": 0.8330674767494202, "learning_rate": 5.239349900970099e-06, "loss": 0.223052978515625, "step": 10875 }, { "epoch": 0.7351629038799513, "grad_norm": 2.0526156425476074, "learning_rate": 5.236844716974736e-06, "loss": 0.32879638671875, "step": 10876 }, { "epoch": 0.7352304988508855, "grad_norm": 1.861168622970581, "learning_rate": 5.234340005381543e-06, "loss": 0.3299560546875, "step": 10877 }, { "epoch": 0.7352980938218197, "grad_norm": 1.403490424156189, "learning_rate": 5.2318357663117115e-06, "loss": 0.23382568359375, "step": 10878 }, { "epoch": 0.7353656887927538, "grad_norm": 1.1648601293563843, "learning_rate": 5.229331999886415e-06, "loss": 0.2429656982421875, "step": 10879 }, { "epoch": 0.735433283763688, "grad_norm": 1.2692105770111084, "learning_rate": 5.226828706226797e-06, "loss": 0.250885009765625, "step": 10880 }, { "epoch": 0.7355008787346221, "grad_norm": 1.3272227048873901, "learning_rate": 5.224325885453987e-06, "loss": 0.293975830078125, "step": 10881 }, { "epoch": 0.7355684737055563, "grad_norm": 1.4202111959457397, "learning_rate": 5.221823537689084e-06, "loss": 0.20150375366210938, "step": 10882 }, { "epoch": 0.7356360686764905, "grad_norm": 1.745645523071289, "learning_rate": 5.219321663053168e-06, "loss": 0.2799835205078125, "step": 10883 }, { "epoch": 0.7357036636474247, "grad_norm": 1.2916967868804932, "learning_rate": 5.216820261667291e-06, "loss": 0.241607666015625, "step": 10884 }, { "epoch": 0.7357712586183588, "grad_norm": 1.8389934301376343, "learning_rate": 5.2143193336525e-06, "loss": 0.2445068359375, "step": 10885 }, { "epoch": 0.7358388535892929, "grad_norm": 1.2427740097045898, "learning_rate": 5.211818879129785e-06, "loss": 0.214019775390625, "step": 10886 }, { "epoch": 0.7359064485602271, "grad_norm": 1.4513031244277954, "learning_rate": 5.2093188982201566e-06, "loss": 0.296875, "step": 10887 }, { "epoch": 0.7359740435311612, "grad_norm": 1.682084560394287, "learning_rate": 5.206819391044557e-06, "loss": 0.278106689453125, "step": 10888 }, { "epoch": 0.7360416385020955, "grad_norm": 1.028119683265686, "learning_rate": 5.204320357723943e-06, "loss": 0.2288055419921875, "step": 10889 }, { "epoch": 0.7361092334730296, "grad_norm": 1.661616563796997, "learning_rate": 5.201821798379231e-06, "loss": 0.22705841064453125, "step": 10890 }, { "epoch": 0.7361768284439638, "grad_norm": 1.2974210977554321, "learning_rate": 5.199323713131311e-06, "loss": 0.2777099609375, "step": 10891 }, { "epoch": 0.7362444234148979, "grad_norm": 0.9662218689918518, "learning_rate": 5.196826102101061e-06, "loss": 0.181732177734375, "step": 10892 }, { "epoch": 0.736312018385832, "grad_norm": 1.847411036491394, "learning_rate": 5.194328965409331e-06, "loss": 0.2855224609375, "step": 10893 }, { "epoch": 0.7363796133567663, "grad_norm": 1.6790302991867065, "learning_rate": 5.191832303176945e-06, "loss": 0.2962646484375, "step": 10894 }, { "epoch": 0.7364472083277004, "grad_norm": 1.3660359382629395, "learning_rate": 5.1893361155247075e-06, "loss": 0.2426300048828125, "step": 10895 }, { "epoch": 0.7365148032986346, "grad_norm": 1.4146677255630493, "learning_rate": 5.186840402573402e-06, "loss": 0.24444580078125, "step": 10896 }, { "epoch": 0.7365823982695687, "grad_norm": 0.9793562293052673, "learning_rate": 5.1843451644437805e-06, "loss": 0.18298721313476562, "step": 10897 }, { "epoch": 0.736649993240503, "grad_norm": 0.9625979661941528, "learning_rate": 5.181850401256593e-06, "loss": 0.1769866943359375, "step": 10898 }, { "epoch": 0.7367175882114371, "grad_norm": 1.217463731765747, "learning_rate": 5.1793561131325304e-06, "loss": 0.2330169677734375, "step": 10899 }, { "epoch": 0.7367851831823712, "grad_norm": 1.6834930181503296, "learning_rate": 5.176862300192298e-06, "loss": 0.29278564453125, "step": 10900 }, { "epoch": 0.7368527781533054, "grad_norm": 1.1562104225158691, "learning_rate": 5.174368962556557e-06, "loss": 0.2601318359375, "step": 10901 }, { "epoch": 0.7369203731242395, "grad_norm": 0.8242129683494568, "learning_rate": 5.171876100345952e-06, "loss": 0.220794677734375, "step": 10902 }, { "epoch": 0.7369879680951738, "grad_norm": 2.0635101795196533, "learning_rate": 5.1693837136811e-06, "loss": 0.31671142578125, "step": 10903 }, { "epoch": 0.7370555630661079, "grad_norm": 1.0097506046295166, "learning_rate": 5.1668918026826e-06, "loss": 0.171539306640625, "step": 10904 }, { "epoch": 0.7371231580370421, "grad_norm": 0.8756203055381775, "learning_rate": 5.164400367471027e-06, "loss": 0.182861328125, "step": 10905 }, { "epoch": 0.7371907530079762, "grad_norm": 1.021133303642273, "learning_rate": 5.161909408166931e-06, "loss": 0.22021484375, "step": 10906 }, { "epoch": 0.7372583479789103, "grad_norm": 1.5281604528427124, "learning_rate": 5.159418924890839e-06, "loss": 0.28173828125, "step": 10907 }, { "epoch": 0.7373259429498445, "grad_norm": 1.1068919897079468, "learning_rate": 5.156928917763258e-06, "loss": 0.27423095703125, "step": 10908 }, { "epoch": 0.7373935379207787, "grad_norm": 0.7493174076080322, "learning_rate": 5.154439386904671e-06, "loss": 0.143768310546875, "step": 10909 }, { "epoch": 0.7374611328917129, "grad_norm": 0.9317288994789124, "learning_rate": 5.151950332435528e-06, "loss": 0.1798553466796875, "step": 10910 }, { "epoch": 0.737528727862647, "grad_norm": 1.38706374168396, "learning_rate": 5.149461754476283e-06, "loss": 0.193756103515625, "step": 10911 }, { "epoch": 0.7375963228335812, "grad_norm": 1.9421508312225342, "learning_rate": 5.146973653147328e-06, "loss": 0.31390380859375, "step": 10912 }, { "epoch": 0.7376639178045153, "grad_norm": 0.8242008686065674, "learning_rate": 5.144486028569069e-06, "loss": 0.161773681640625, "step": 10913 }, { "epoch": 0.7377315127754495, "grad_norm": 1.306134581565857, "learning_rate": 5.1419988808618664e-06, "loss": 0.319091796875, "step": 10914 }, { "epoch": 0.7377991077463837, "grad_norm": 1.070156455039978, "learning_rate": 5.139512210146065e-06, "loss": 0.22000885009765625, "step": 10915 }, { "epoch": 0.7378667027173178, "grad_norm": 1.282615303993225, "learning_rate": 5.137026016541983e-06, "loss": 0.31890869140625, "step": 10916 }, { "epoch": 0.737934297688252, "grad_norm": 0.87269526720047, "learning_rate": 5.134540300169919e-06, "loss": 0.136688232421875, "step": 10917 }, { "epoch": 0.7380018926591861, "grad_norm": 0.8545360565185547, "learning_rate": 5.132055061150149e-06, "loss": 0.171600341796875, "step": 10918 }, { "epoch": 0.7380694876301204, "grad_norm": 1.6304235458374023, "learning_rate": 5.129570299602924e-06, "loss": 0.2293243408203125, "step": 10919 }, { "epoch": 0.7381370826010545, "grad_norm": 1.7427297830581665, "learning_rate": 5.127086015648471e-06, "loss": 0.324005126953125, "step": 10920 }, { "epoch": 0.7382046775719886, "grad_norm": 0.9032418131828308, "learning_rate": 5.124602209406993e-06, "loss": 0.207122802734375, "step": 10921 }, { "epoch": 0.7382722725429228, "grad_norm": 1.5047762393951416, "learning_rate": 5.122118880998678e-06, "loss": 0.2703857421875, "step": 10922 }, { "epoch": 0.7383398675138569, "grad_norm": 1.1376391649246216, "learning_rate": 5.119636030543675e-06, "loss": 0.247955322265625, "step": 10923 }, { "epoch": 0.7384074624847912, "grad_norm": 1.3499398231506348, "learning_rate": 5.117153658162135e-06, "loss": 0.2663726806640625, "step": 10924 }, { "epoch": 0.7384750574557253, "grad_norm": 1.2019180059432983, "learning_rate": 5.114671763974154e-06, "loss": 0.2784423828125, "step": 10925 }, { "epoch": 0.7385426524266595, "grad_norm": 0.8930602073669434, "learning_rate": 5.112190348099834e-06, "loss": 0.187896728515625, "step": 10926 }, { "epoch": 0.7386102473975936, "grad_norm": 1.3715500831604004, "learning_rate": 5.10970941065924e-06, "loss": 0.277252197265625, "step": 10927 }, { "epoch": 0.7386778423685277, "grad_norm": 1.2526236772537231, "learning_rate": 5.107228951772403e-06, "loss": 0.306884765625, "step": 10928 }, { "epoch": 0.738745437339462, "grad_norm": 1.307910680770874, "learning_rate": 5.104748971559356e-06, "loss": 0.17877197265625, "step": 10929 }, { "epoch": 0.7388130323103961, "grad_norm": 1.107236623764038, "learning_rate": 5.102269470140093e-06, "loss": 0.22705078125, "step": 10930 }, { "epoch": 0.7388806272813303, "grad_norm": 1.1607831716537476, "learning_rate": 5.099790447634585e-06, "loss": 0.1671905517578125, "step": 10931 }, { "epoch": 0.7389482222522644, "grad_norm": 1.947443962097168, "learning_rate": 5.097311904162784e-06, "loss": 0.3101806640625, "step": 10932 }, { "epoch": 0.7390158172231986, "grad_norm": 1.0928319692611694, "learning_rate": 5.094833839844617e-06, "loss": 0.25830078125, "step": 10933 }, { "epoch": 0.7390834121941328, "grad_norm": 1.122230887413025, "learning_rate": 5.092356254799984e-06, "loss": 0.241424560546875, "step": 10934 }, { "epoch": 0.7391510071650669, "grad_norm": 1.494559645652771, "learning_rate": 5.089879149148781e-06, "loss": 0.2691650390625, "step": 10935 }, { "epoch": 0.7392186021360011, "grad_norm": 1.0647996664047241, "learning_rate": 5.087402523010844e-06, "loss": 0.246307373046875, "step": 10936 }, { "epoch": 0.7392861971069352, "grad_norm": 1.5872997045516968, "learning_rate": 5.08492637650603e-06, "loss": 0.25579833984375, "step": 10937 }, { "epoch": 0.7393537920778694, "grad_norm": 2.4207875728607178, "learning_rate": 5.0824507097541285e-06, "loss": 0.29107666015625, "step": 10938 }, { "epoch": 0.7394213870488036, "grad_norm": 1.5866206884384155, "learning_rate": 5.079975522874943e-06, "loss": 0.2724456787109375, "step": 10939 }, { "epoch": 0.7394889820197378, "grad_norm": 1.906178593635559, "learning_rate": 5.0775008159882395e-06, "loss": 0.260009765625, "step": 10940 }, { "epoch": 0.7395565769906719, "grad_norm": 1.1641380786895752, "learning_rate": 5.0750265892137415e-06, "loss": 0.146240234375, "step": 10941 }, { "epoch": 0.739624171961606, "grad_norm": 0.9118512868881226, "learning_rate": 5.0725528426711865e-06, "loss": 0.13362884521484375, "step": 10942 }, { "epoch": 0.7396917669325402, "grad_norm": 0.6977435350418091, "learning_rate": 5.070079576480263e-06, "loss": 0.113922119140625, "step": 10943 }, { "epoch": 0.7397593619034744, "grad_norm": 2.0912222862243652, "learning_rate": 5.067606790760645e-06, "loss": 0.296630859375, "step": 10944 }, { "epoch": 0.7398269568744086, "grad_norm": 1.0622010231018066, "learning_rate": 5.065134485631976e-06, "loss": 0.22216796875, "step": 10945 }, { "epoch": 0.7398945518453427, "grad_norm": 2.0430569648742676, "learning_rate": 5.0626626612138875e-06, "loss": 0.34283447265625, "step": 10946 }, { "epoch": 0.7399621468162769, "grad_norm": 1.3254669904708862, "learning_rate": 5.060191317625973e-06, "loss": 0.282623291015625, "step": 10947 }, { "epoch": 0.740029741787211, "grad_norm": 1.28646981716156, "learning_rate": 5.057720454987826e-06, "loss": 0.26141357421875, "step": 10948 }, { "epoch": 0.7400973367581452, "grad_norm": 1.03058660030365, "learning_rate": 5.055250073418986e-06, "loss": 0.19181060791015625, "step": 10949 }, { "epoch": 0.7401649317290794, "grad_norm": 1.3234877586364746, "learning_rate": 5.052780173039003e-06, "loss": 0.24847412109375, "step": 10950 }, { "epoch": 0.7402325267000135, "grad_norm": 2.256838083267212, "learning_rate": 5.05031075396737e-06, "loss": 0.22393798828125, "step": 10951 }, { "epoch": 0.7403001216709477, "grad_norm": 1.1123028993606567, "learning_rate": 5.0478418163235745e-06, "loss": 0.224334716796875, "step": 10952 }, { "epoch": 0.7403677166418818, "grad_norm": 1.219931721687317, "learning_rate": 5.045373360227094e-06, "loss": 0.295166015625, "step": 10953 }, { "epoch": 0.7404353116128161, "grad_norm": 0.9865418076515198, "learning_rate": 5.0429053857973485e-06, "loss": 0.21612548828125, "step": 10954 }, { "epoch": 0.7405029065837502, "grad_norm": 0.4481137692928314, "learning_rate": 5.040437893153767e-06, "loss": 0.1015167236328125, "step": 10955 }, { "epoch": 0.7405705015546843, "grad_norm": 1.493270993232727, "learning_rate": 5.037970882415738e-06, "loss": 0.189300537109375, "step": 10956 }, { "epoch": 0.7406380965256185, "grad_norm": 1.1528668403625488, "learning_rate": 5.035504353702632e-06, "loss": 0.27264404296875, "step": 10957 }, { "epoch": 0.7407056914965526, "grad_norm": 2.190505266189575, "learning_rate": 5.033038307133792e-06, "loss": 0.213104248046875, "step": 10958 }, { "epoch": 0.7407732864674869, "grad_norm": 1.774127721786499, "learning_rate": 5.030572742828545e-06, "loss": 0.2079925537109375, "step": 10959 }, { "epoch": 0.740840881438421, "grad_norm": 1.2616671323776245, "learning_rate": 5.028107660906181e-06, "loss": 0.3287353515625, "step": 10960 }, { "epoch": 0.7409084764093552, "grad_norm": 1.339673399925232, "learning_rate": 5.025643061485992e-06, "loss": 0.26348876953125, "step": 10961 }, { "epoch": 0.7409760713802893, "grad_norm": 0.9842624068260193, "learning_rate": 5.023178944687219e-06, "loss": 0.236358642578125, "step": 10962 }, { "epoch": 0.7410436663512234, "grad_norm": 1.0844416618347168, "learning_rate": 5.020715310629088e-06, "loss": 0.22296142578125, "step": 10963 }, { "epoch": 0.7411112613221577, "grad_norm": 1.6032341718673706, "learning_rate": 5.018252159430819e-06, "loss": 0.3001708984375, "step": 10964 }, { "epoch": 0.7411788562930918, "grad_norm": 1.2274025678634644, "learning_rate": 5.01578949121158e-06, "loss": 0.2111053466796875, "step": 10965 }, { "epoch": 0.741246451264026, "grad_norm": 1.0427212715148926, "learning_rate": 5.013327306090543e-06, "loss": 0.217559814453125, "step": 10966 }, { "epoch": 0.7413140462349601, "grad_norm": 1.0610032081604004, "learning_rate": 5.010865604186829e-06, "loss": 0.23394775390625, "step": 10967 }, { "epoch": 0.7413816412058942, "grad_norm": 0.7432801127433777, "learning_rate": 5.008404385619563e-06, "loss": 0.1716156005859375, "step": 10968 }, { "epoch": 0.7414492361768285, "grad_norm": 1.1012738943099976, "learning_rate": 5.005943650507831e-06, "loss": 0.20709228515625, "step": 10969 }, { "epoch": 0.7415168311477626, "grad_norm": 1.235371470451355, "learning_rate": 5.003483398970698e-06, "loss": 0.251800537109375, "step": 10970 }, { "epoch": 0.7415844261186968, "grad_norm": 1.1532031297683716, "learning_rate": 5.001023631127204e-06, "loss": 0.1853179931640625, "step": 10971 }, { "epoch": 0.7416520210896309, "grad_norm": 1.3749630451202393, "learning_rate": 4.998564347096372e-06, "loss": 0.2580413818359375, "step": 10972 }, { "epoch": 0.7417196160605651, "grad_norm": 0.921646237373352, "learning_rate": 4.996105546997188e-06, "loss": 0.1818389892578125, "step": 10973 }, { "epoch": 0.7417872110314992, "grad_norm": 1.2016735076904297, "learning_rate": 4.9936472309486425e-06, "loss": 0.219757080078125, "step": 10974 }, { "epoch": 0.7418548060024334, "grad_norm": 0.9478489756584167, "learning_rate": 4.991189399069666e-06, "loss": 0.2344512939453125, "step": 10975 }, { "epoch": 0.7419224009733676, "grad_norm": 1.5396876335144043, "learning_rate": 4.988732051479187e-06, "loss": 0.298919677734375, "step": 10976 }, { "epoch": 0.7419899959443017, "grad_norm": 1.1149169206619263, "learning_rate": 4.986275188296121e-06, "loss": 0.2669677734375, "step": 10977 }, { "epoch": 0.7420575909152359, "grad_norm": 0.8811542987823486, "learning_rate": 4.983818809639326e-06, "loss": 0.208770751953125, "step": 10978 }, { "epoch": 0.74212518588617, "grad_norm": 1.3487268686294556, "learning_rate": 4.981362915627676e-06, "loss": 0.228729248046875, "step": 10979 }, { "epoch": 0.7421927808571043, "grad_norm": 2.2321343421936035, "learning_rate": 4.978907506379985e-06, "loss": 0.2406005859375, "step": 10980 }, { "epoch": 0.7422603758280384, "grad_norm": 2.1852378845214844, "learning_rate": 4.976452582015073e-06, "loss": 0.3402099609375, "step": 10981 }, { "epoch": 0.7423279707989725, "grad_norm": 0.8584709167480469, "learning_rate": 4.97399814265172e-06, "loss": 0.117218017578125, "step": 10982 }, { "epoch": 0.7423955657699067, "grad_norm": 0.8259710073471069, "learning_rate": 4.9715441884086895e-06, "loss": 0.1555023193359375, "step": 10983 }, { "epoch": 0.7424631607408408, "grad_norm": 1.635474681854248, "learning_rate": 4.969090719404718e-06, "loss": 0.2822265625, "step": 10984 }, { "epoch": 0.7425307557117751, "grad_norm": 1.1246825456619263, "learning_rate": 4.966637735758518e-06, "loss": 0.28143310546875, "step": 10985 }, { "epoch": 0.7425983506827092, "grad_norm": 1.3790560960769653, "learning_rate": 4.964185237588781e-06, "loss": 0.23785400390625, "step": 10986 }, { "epoch": 0.7426659456536434, "grad_norm": 1.0021147727966309, "learning_rate": 4.961733225014174e-06, "loss": 0.29364013671875, "step": 10987 }, { "epoch": 0.7427335406245775, "grad_norm": 1.10125732421875, "learning_rate": 4.9592816981533405e-06, "loss": 0.2694091796875, "step": 10988 }, { "epoch": 0.7428011355955116, "grad_norm": 1.0191147327423096, "learning_rate": 4.956830657124898e-06, "loss": 0.19459915161132812, "step": 10989 }, { "epoch": 0.7428687305664459, "grad_norm": 0.9863284826278687, "learning_rate": 4.9543801020474546e-06, "loss": 0.1785736083984375, "step": 10990 }, { "epoch": 0.74293632553738, "grad_norm": 0.8867786526679993, "learning_rate": 4.951930033039566e-06, "loss": 0.1396484375, "step": 10991 }, { "epoch": 0.7430039205083142, "grad_norm": 1.2599841356277466, "learning_rate": 4.949480450219795e-06, "loss": 0.253692626953125, "step": 10992 }, { "epoch": 0.7430715154792483, "grad_norm": 1.6108829975128174, "learning_rate": 4.947031353706663e-06, "loss": 0.2432098388671875, "step": 10993 }, { "epoch": 0.7431391104501825, "grad_norm": 0.880213737487793, "learning_rate": 4.9445827436186735e-06, "loss": 0.18939208984375, "step": 10994 }, { "epoch": 0.7432067054211167, "grad_norm": 1.2613979578018188, "learning_rate": 4.942134620074304e-06, "loss": 0.1820068359375, "step": 10995 }, { "epoch": 0.7432743003920508, "grad_norm": 1.259181022644043, "learning_rate": 4.939686983192011e-06, "loss": 0.191436767578125, "step": 10996 }, { "epoch": 0.743341895362985, "grad_norm": 1.3039228916168213, "learning_rate": 4.937239833090227e-06, "loss": 0.27899169921875, "step": 10997 }, { "epoch": 0.7434094903339191, "grad_norm": 1.6293513774871826, "learning_rate": 4.9347931698873575e-06, "loss": 0.3372802734375, "step": 10998 }, { "epoch": 0.7434770853048533, "grad_norm": 1.211095929145813, "learning_rate": 4.932346993701792e-06, "loss": 0.201568603515625, "step": 10999 }, { "epoch": 0.7435446802757875, "grad_norm": 1.4888582229614258, "learning_rate": 4.929901304651887e-06, "loss": 0.214752197265625, "step": 11000 }, { "epoch": 0.7436122752467217, "grad_norm": 0.8957228064537048, "learning_rate": 4.927456102855984e-06, "loss": 0.1688232421875, "step": 11001 }, { "epoch": 0.7436798702176558, "grad_norm": 1.823867917060852, "learning_rate": 4.925011388432388e-06, "loss": 0.32958984375, "step": 11002 }, { "epoch": 0.7437474651885899, "grad_norm": 1.0650519132614136, "learning_rate": 4.922567161499408e-06, "loss": 0.2121429443359375, "step": 11003 }, { "epoch": 0.7438150601595241, "grad_norm": 1.4363151788711548, "learning_rate": 4.920123422175289e-06, "loss": 0.259185791015625, "step": 11004 }, { "epoch": 0.7438826551304583, "grad_norm": 1.6076589822769165, "learning_rate": 4.917680170578292e-06, "loss": 0.274810791015625, "step": 11005 }, { "epoch": 0.7439502501013925, "grad_norm": 1.166524052619934, "learning_rate": 4.915237406826626e-06, "loss": 0.2493896484375, "step": 11006 }, { "epoch": 0.7440178450723266, "grad_norm": 1.1299995183944702, "learning_rate": 4.9127951310384934e-06, "loss": 0.14265823364257812, "step": 11007 }, { "epoch": 0.7440854400432608, "grad_norm": 1.4684784412384033, "learning_rate": 4.910353343332063e-06, "loss": 0.31451416015625, "step": 11008 }, { "epoch": 0.7441530350141949, "grad_norm": 1.3317409753799438, "learning_rate": 4.907912043825485e-06, "loss": 0.28460693359375, "step": 11009 }, { "epoch": 0.7442206299851291, "grad_norm": 1.414229393005371, "learning_rate": 4.905471232636885e-06, "loss": 0.261962890625, "step": 11010 }, { "epoch": 0.7442882249560633, "grad_norm": 2.2975969314575195, "learning_rate": 4.903030909884363e-06, "loss": 0.260284423828125, "step": 11011 }, { "epoch": 0.7443558199269974, "grad_norm": 1.6169507503509521, "learning_rate": 4.900591075685998e-06, "loss": 0.260650634765625, "step": 11012 }, { "epoch": 0.7444234148979316, "grad_norm": 1.3167128562927246, "learning_rate": 4.898151730159845e-06, "loss": 0.24951171875, "step": 11013 }, { "epoch": 0.7444910098688657, "grad_norm": 0.8389123678207397, "learning_rate": 4.895712873423934e-06, "loss": 0.15399169921875, "step": 11014 }, { "epoch": 0.7445586048398, "grad_norm": 1.4066431522369385, "learning_rate": 4.893274505596267e-06, "loss": 0.22991943359375, "step": 11015 }, { "epoch": 0.7446261998107341, "grad_norm": 0.9153070449829102, "learning_rate": 4.89083662679484e-06, "loss": 0.167724609375, "step": 11016 }, { "epoch": 0.7446937947816682, "grad_norm": 1.329360842704773, "learning_rate": 4.888399237137597e-06, "loss": 0.2791748046875, "step": 11017 }, { "epoch": 0.7447613897526024, "grad_norm": 1.8711345195770264, "learning_rate": 4.885962336742489e-06, "loss": 0.275146484375, "step": 11018 }, { "epoch": 0.7448289847235365, "grad_norm": 1.1725050210952759, "learning_rate": 4.88352592572742e-06, "loss": 0.281036376953125, "step": 11019 }, { "epoch": 0.7448965796944708, "grad_norm": 1.4730898141860962, "learning_rate": 4.8810900042102805e-06, "loss": 0.28961181640625, "step": 11020 }, { "epoch": 0.7449641746654049, "grad_norm": 1.395029902458191, "learning_rate": 4.878654572308936e-06, "loss": 0.235565185546875, "step": 11021 }, { "epoch": 0.7450317696363391, "grad_norm": 1.3751071691513062, "learning_rate": 4.876219630141227e-06, "loss": 0.2745819091796875, "step": 11022 }, { "epoch": 0.7450993646072732, "grad_norm": 1.0409842729568481, "learning_rate": 4.873785177824971e-06, "loss": 0.1811370849609375, "step": 11023 }, { "epoch": 0.7451669595782073, "grad_norm": 1.5997776985168457, "learning_rate": 4.871351215477962e-06, "loss": 0.2960662841796875, "step": 11024 }, { "epoch": 0.7452345545491416, "grad_norm": 1.1053589582443237, "learning_rate": 4.868917743217971e-06, "loss": 0.2740478515625, "step": 11025 }, { "epoch": 0.7453021495200757, "grad_norm": 1.815348744392395, "learning_rate": 4.866484761162745e-06, "loss": 0.31243896484375, "step": 11026 }, { "epoch": 0.7453697444910099, "grad_norm": 0.9104681015014648, "learning_rate": 4.864052269430005e-06, "loss": 0.2470703125, "step": 11027 }, { "epoch": 0.745437339461944, "grad_norm": 2.1026523113250732, "learning_rate": 4.861620268137447e-06, "loss": 0.2398834228515625, "step": 11028 }, { "epoch": 0.7455049344328782, "grad_norm": 1.879143238067627, "learning_rate": 4.85918875740276e-06, "loss": 0.281768798828125, "step": 11029 }, { "epoch": 0.7455725294038124, "grad_norm": 1.1121513843536377, "learning_rate": 4.856757737343574e-06, "loss": 0.1568450927734375, "step": 11030 }, { "epoch": 0.7456401243747465, "grad_norm": 1.0844945907592773, "learning_rate": 4.854327208077536e-06, "loss": 0.15542221069335938, "step": 11031 }, { "epoch": 0.7457077193456807, "grad_norm": 0.8328249454498291, "learning_rate": 4.8518971697222475e-06, "loss": 0.23992919921875, "step": 11032 }, { "epoch": 0.7457753143166148, "grad_norm": 1.3804417848587036, "learning_rate": 4.849467622395273e-06, "loss": 0.265106201171875, "step": 11033 }, { "epoch": 0.745842909287549, "grad_norm": 0.9656127095222473, "learning_rate": 4.8470385662141865e-06, "loss": 0.14984130859375, "step": 11034 }, { "epoch": 0.7459105042584832, "grad_norm": 0.574694037437439, "learning_rate": 4.844610001296514e-06, "loss": 0.08957672119140625, "step": 11035 }, { "epoch": 0.7459780992294174, "grad_norm": 2.109724521636963, "learning_rate": 4.842181927759767e-06, "loss": 0.23488998413085938, "step": 11036 }, { "epoch": 0.7460456942003515, "grad_norm": 0.9075211882591248, "learning_rate": 4.839754345721428e-06, "loss": 0.1856842041015625, "step": 11037 }, { "epoch": 0.7461132891712856, "grad_norm": 1.4133135080337524, "learning_rate": 4.83732725529896e-06, "loss": 0.296539306640625, "step": 11038 }, { "epoch": 0.7461808841422198, "grad_norm": 1.8708584308624268, "learning_rate": 4.834900656609795e-06, "loss": 0.267578125, "step": 11039 }, { "epoch": 0.746248479113154, "grad_norm": 1.586988091468811, "learning_rate": 4.832474549771363e-06, "loss": 0.3233642578125, "step": 11040 }, { "epoch": 0.7463160740840882, "grad_norm": 1.1248794794082642, "learning_rate": 4.830048934901032e-06, "loss": 0.1507568359375, "step": 11041 }, { "epoch": 0.7463836690550223, "grad_norm": 1.3341572284698486, "learning_rate": 4.827623812116191e-06, "loss": 0.241485595703125, "step": 11042 }, { "epoch": 0.7464512640259565, "grad_norm": 0.5337401628494263, "learning_rate": 4.825199181534161e-06, "loss": 0.0992889404296875, "step": 11043 }, { "epoch": 0.7465188589968906, "grad_norm": 2.2760732173919678, "learning_rate": 4.822775043272276e-06, "loss": 0.32012939453125, "step": 11044 }, { "epoch": 0.7465864539678247, "grad_norm": 2.684589385986328, "learning_rate": 4.82035139744783e-06, "loss": 0.292999267578125, "step": 11045 }, { "epoch": 0.746654048938759, "grad_norm": 1.4253681898117065, "learning_rate": 4.8179282441780815e-06, "loss": 0.235992431640625, "step": 11046 }, { "epoch": 0.7467216439096931, "grad_norm": 0.8269115686416626, "learning_rate": 4.815505583580291e-06, "loss": 0.09055328369140625, "step": 11047 }, { "epoch": 0.7467892388806273, "grad_norm": 1.2755897045135498, "learning_rate": 4.813083415771676e-06, "loss": 0.23333740234375, "step": 11048 }, { "epoch": 0.7468568338515614, "grad_norm": 1.2823337316513062, "learning_rate": 4.810661740869438e-06, "loss": 0.2130126953125, "step": 11049 }, { "epoch": 0.7469244288224957, "grad_norm": 2.7199833393096924, "learning_rate": 4.808240558990751e-06, "loss": 0.368804931640625, "step": 11050 }, { "epoch": 0.7469920237934298, "grad_norm": 1.51023530960083, "learning_rate": 4.805819870252769e-06, "loss": 0.271820068359375, "step": 11051 }, { "epoch": 0.7470596187643639, "grad_norm": 1.821274995803833, "learning_rate": 4.803399674772612e-06, "loss": 0.28955078125, "step": 11052 }, { "epoch": 0.7471272137352981, "grad_norm": 1.3517372608184814, "learning_rate": 4.800979972667402e-06, "loss": 0.1800537109375, "step": 11053 }, { "epoch": 0.7471948087062322, "grad_norm": 1.4969780445098877, "learning_rate": 4.798560764054196e-06, "loss": 0.309906005859375, "step": 11054 }, { "epoch": 0.7472624036771665, "grad_norm": 1.743306040763855, "learning_rate": 4.796142049050069e-06, "loss": 0.318267822265625, "step": 11055 }, { "epoch": 0.7473299986481006, "grad_norm": 2.1396713256835938, "learning_rate": 4.7937238277720516e-06, "loss": 0.2535400390625, "step": 11056 }, { "epoch": 0.7473975936190348, "grad_norm": 1.2912448644638062, "learning_rate": 4.791306100337138e-06, "loss": 0.208465576171875, "step": 11057 }, { "epoch": 0.7474651885899689, "grad_norm": 0.8653277158737183, "learning_rate": 4.788888866862331e-06, "loss": 0.1740264892578125, "step": 11058 }, { "epoch": 0.747532783560903, "grad_norm": 1.4740647077560425, "learning_rate": 4.786472127464575e-06, "loss": 0.2469482421875, "step": 11059 }, { "epoch": 0.7476003785318373, "grad_norm": 1.8773093223571777, "learning_rate": 4.784055882260818e-06, "loss": 0.31658935546875, "step": 11060 }, { "epoch": 0.7476679735027714, "grad_norm": 1.2110316753387451, "learning_rate": 4.781640131367969e-06, "loss": 0.270172119140625, "step": 11061 }, { "epoch": 0.7477355684737056, "grad_norm": 1.7238682508468628, "learning_rate": 4.779224874902919e-06, "loss": 0.279998779296875, "step": 11062 }, { "epoch": 0.7478031634446397, "grad_norm": 1.4391264915466309, "learning_rate": 4.776810112982532e-06, "loss": 0.1888427734375, "step": 11063 }, { "epoch": 0.7478707584155739, "grad_norm": 2.3356802463531494, "learning_rate": 4.774395845723649e-06, "loss": 0.26300048828125, "step": 11064 }, { "epoch": 0.747938353386508, "grad_norm": 1.3270065784454346, "learning_rate": 4.771982073243082e-06, "loss": 0.172332763671875, "step": 11065 }, { "epoch": 0.7480059483574422, "grad_norm": 0.9094356894493103, "learning_rate": 4.76956879565764e-06, "loss": 0.192169189453125, "step": 11066 }, { "epoch": 0.7480735433283764, "grad_norm": 1.5163763761520386, "learning_rate": 4.767156013084076e-06, "loss": 0.261749267578125, "step": 11067 }, { "epoch": 0.7481411382993105, "grad_norm": 1.7547633647918701, "learning_rate": 4.764743725639138e-06, "loss": 0.2222900390625, "step": 11068 }, { "epoch": 0.7482087332702447, "grad_norm": 1.27576744556427, "learning_rate": 4.76233193343956e-06, "loss": 0.287933349609375, "step": 11069 }, { "epoch": 0.7482763282411788, "grad_norm": 0.9735353589057922, "learning_rate": 4.75992063660202e-06, "loss": 0.2042999267578125, "step": 11070 }, { "epoch": 0.7483439232121131, "grad_norm": 1.2070956230163574, "learning_rate": 4.757509835243214e-06, "loss": 0.29144287109375, "step": 11071 }, { "epoch": 0.7484115181830472, "grad_norm": 0.9984704256057739, "learning_rate": 4.755099529479767e-06, "loss": 0.198455810546875, "step": 11072 }, { "epoch": 0.7484791131539813, "grad_norm": 1.3978384733200073, "learning_rate": 4.752689719428323e-06, "loss": 0.246002197265625, "step": 11073 }, { "epoch": 0.7485467081249155, "grad_norm": 1.2200274467468262, "learning_rate": 4.750280405205478e-06, "loss": 0.2410888671875, "step": 11074 }, { "epoch": 0.7486143030958496, "grad_norm": 1.3263840675354004, "learning_rate": 4.747871586927809e-06, "loss": 0.19415283203125, "step": 11075 }, { "epoch": 0.7486818980667839, "grad_norm": 2.2077174186706543, "learning_rate": 4.7454632647118705e-06, "loss": 0.2955474853515625, "step": 11076 }, { "epoch": 0.748749493037718, "grad_norm": 1.2070012092590332, "learning_rate": 4.743055438674192e-06, "loss": 0.22010040283203125, "step": 11077 }, { "epoch": 0.7488170880086522, "grad_norm": 1.552767276763916, "learning_rate": 4.740648108931273e-06, "loss": 0.27008056640625, "step": 11078 }, { "epoch": 0.7488846829795863, "grad_norm": 0.9744763374328613, "learning_rate": 4.738241275599611e-06, "loss": 0.1302032470703125, "step": 11079 }, { "epoch": 0.7489522779505204, "grad_norm": 1.051719069480896, "learning_rate": 4.735834938795647e-06, "loss": 0.228912353515625, "step": 11080 }, { "epoch": 0.7490198729214547, "grad_norm": 2.056370496749878, "learning_rate": 4.7334290986358166e-06, "loss": 0.2672119140625, "step": 11081 }, { "epoch": 0.7490874678923888, "grad_norm": 1.4473141431808472, "learning_rate": 4.731023755236543e-06, "loss": 0.242401123046875, "step": 11082 }, { "epoch": 0.749155062863323, "grad_norm": 1.8316683769226074, "learning_rate": 4.728618908714194e-06, "loss": 0.2918701171875, "step": 11083 }, { "epoch": 0.7492226578342571, "grad_norm": 1.1678143739700317, "learning_rate": 4.726214559185143e-06, "loss": 0.24078369140625, "step": 11084 }, { "epoch": 0.7492902528051913, "grad_norm": 1.6608966588974, "learning_rate": 4.723810706765724e-06, "loss": 0.27734375, "step": 11085 }, { "epoch": 0.7493578477761255, "grad_norm": 1.41032075881958, "learning_rate": 4.721407351572248e-06, "loss": 0.243255615234375, "step": 11086 }, { "epoch": 0.7494254427470596, "grad_norm": 0.6771409511566162, "learning_rate": 4.719004493721009e-06, "loss": 0.099761962890625, "step": 11087 }, { "epoch": 0.7494930377179938, "grad_norm": 1.711119294166565, "learning_rate": 4.716602133328268e-06, "loss": 0.2478485107421875, "step": 11088 }, { "epoch": 0.7495606326889279, "grad_norm": 1.0682311058044434, "learning_rate": 4.714200270510268e-06, "loss": 0.16657018661499023, "step": 11089 }, { "epoch": 0.7496282276598621, "grad_norm": 0.6924692988395691, "learning_rate": 4.711798905383226e-06, "loss": 0.1372528076171875, "step": 11090 }, { "epoch": 0.7496958226307963, "grad_norm": 2.0999042987823486, "learning_rate": 4.7093980380633335e-06, "loss": 0.30206298828125, "step": 11091 }, { "epoch": 0.7497634176017305, "grad_norm": 1.1961055994033813, "learning_rate": 4.706997668666762e-06, "loss": 0.2413330078125, "step": 11092 }, { "epoch": 0.7498310125726646, "grad_norm": 0.8369735479354858, "learning_rate": 4.704597797309652e-06, "loss": 0.1538848876953125, "step": 11093 }, { "epoch": 0.7498986075435987, "grad_norm": 1.398837685585022, "learning_rate": 4.7021984241081245e-06, "loss": 0.31298828125, "step": 11094 }, { "epoch": 0.7499662025145329, "grad_norm": 0.8572232723236084, "learning_rate": 4.699799549178287e-06, "loss": 0.17356109619140625, "step": 11095 }, { "epoch": 0.7500337974854671, "grad_norm": 1.3988559246063232, "learning_rate": 4.697401172636195e-06, "loss": 0.29449462890625, "step": 11096 }, { "epoch": 0.7501013924564013, "grad_norm": 1.3339781761169434, "learning_rate": 4.695003294597911e-06, "loss": 0.195892333984375, "step": 11097 }, { "epoch": 0.7501689874273354, "grad_norm": 1.2833234071731567, "learning_rate": 4.692605915179452e-06, "loss": 0.15836334228515625, "step": 11098 }, { "epoch": 0.7502365823982695, "grad_norm": 1.713915228843689, "learning_rate": 4.69020903449682e-06, "loss": 0.209808349609375, "step": 11099 }, { "epoch": 0.7503041773692037, "grad_norm": 1.326116681098938, "learning_rate": 4.687812652665991e-06, "loss": 0.253814697265625, "step": 11100 }, { "epoch": 0.7503717723401379, "grad_norm": 1.1134369373321533, "learning_rate": 4.685416769802916e-06, "loss": 0.2293548583984375, "step": 11101 }, { "epoch": 0.7504393673110721, "grad_norm": 1.3679183721542358, "learning_rate": 4.683021386023524e-06, "loss": 0.260040283203125, "step": 11102 }, { "epoch": 0.7505069622820062, "grad_norm": 1.1874895095825195, "learning_rate": 4.680626501443719e-06, "loss": 0.249176025390625, "step": 11103 }, { "epoch": 0.7505745572529404, "grad_norm": 1.606223464012146, "learning_rate": 4.678232116179378e-06, "loss": 0.24884033203125, "step": 11104 }, { "epoch": 0.7506421522238745, "grad_norm": 2.180783748626709, "learning_rate": 4.675838230346359e-06, "loss": 0.32122802734375, "step": 11105 }, { "epoch": 0.7507097471948087, "grad_norm": 1.1105180978775024, "learning_rate": 4.673444844060491e-06, "loss": 0.2015533447265625, "step": 11106 }, { "epoch": 0.7507773421657429, "grad_norm": 1.5125092267990112, "learning_rate": 4.671051957437576e-06, "loss": 0.288665771484375, "step": 11107 }, { "epoch": 0.750844937136677, "grad_norm": 1.480539321899414, "learning_rate": 4.668659570593413e-06, "loss": 0.29595947265625, "step": 11108 }, { "epoch": 0.7509125321076112, "grad_norm": 1.6620503664016724, "learning_rate": 4.66626768364374e-06, "loss": 0.3082275390625, "step": 11109 }, { "epoch": 0.7509801270785453, "grad_norm": 1.181201696395874, "learning_rate": 4.663876296704306e-06, "loss": 0.226287841796875, "step": 11110 }, { "epoch": 0.7510477220494796, "grad_norm": 0.7507166862487793, "learning_rate": 4.661485409890815e-06, "loss": 0.103851318359375, "step": 11111 }, { "epoch": 0.7511153170204137, "grad_norm": 0.8452222943305969, "learning_rate": 4.6590950233189564e-06, "loss": 0.18134307861328125, "step": 11112 }, { "epoch": 0.7511829119913478, "grad_norm": 0.8564566969871521, "learning_rate": 4.65670513710439e-06, "loss": 0.13311767578125, "step": 11113 }, { "epoch": 0.751250506962282, "grad_norm": 0.728100597858429, "learning_rate": 4.654315751362752e-06, "loss": 0.16143798828125, "step": 11114 }, { "epoch": 0.7513181019332161, "grad_norm": 1.260148286819458, "learning_rate": 4.651926866209657e-06, "loss": 0.25860595703125, "step": 11115 }, { "epoch": 0.7513856969041504, "grad_norm": 1.6729925870895386, "learning_rate": 4.649538481760696e-06, "loss": 0.2316436767578125, "step": 11116 }, { "epoch": 0.7514532918750845, "grad_norm": 1.3116446733474731, "learning_rate": 4.64715059813143e-06, "loss": 0.26885986328125, "step": 11117 }, { "epoch": 0.7515208868460187, "grad_norm": 2.073866367340088, "learning_rate": 4.644763215437403e-06, "loss": 0.31964111328125, "step": 11118 }, { "epoch": 0.7515884818169528, "grad_norm": 1.1287842988967896, "learning_rate": 4.642376333794129e-06, "loss": 0.278900146484375, "step": 11119 }, { "epoch": 0.7516560767878869, "grad_norm": 1.4263641834259033, "learning_rate": 4.639989953317098e-06, "loss": 0.259368896484375, "step": 11120 }, { "epoch": 0.7517236717588212, "grad_norm": 1.1607279777526855, "learning_rate": 4.637604074121791e-06, "loss": 0.28179931640625, "step": 11121 }, { "epoch": 0.7517912667297553, "grad_norm": 1.027115821838379, "learning_rate": 4.635218696323631e-06, "loss": 0.215423583984375, "step": 11122 }, { "epoch": 0.7518588617006895, "grad_norm": 1.2687748670578003, "learning_rate": 4.632833820038053e-06, "loss": 0.26220703125, "step": 11123 }, { "epoch": 0.7519264566716236, "grad_norm": 1.4639900922775269, "learning_rate": 4.630449445380449e-06, "loss": 0.294158935546875, "step": 11124 }, { "epoch": 0.7519940516425578, "grad_norm": 1.5947387218475342, "learning_rate": 4.628065572466188e-06, "loss": 0.165069580078125, "step": 11125 }, { "epoch": 0.752061646613492, "grad_norm": 1.283979058265686, "learning_rate": 4.625682201410616e-06, "loss": 0.21380615234375, "step": 11126 }, { "epoch": 0.7521292415844261, "grad_norm": 1.3664418458938599, "learning_rate": 4.623299332329057e-06, "loss": 0.249847412109375, "step": 11127 }, { "epoch": 0.7521968365553603, "grad_norm": 1.304857850074768, "learning_rate": 4.620916965336809e-06, "loss": 0.229827880859375, "step": 11128 }, { "epoch": 0.7522644315262944, "grad_norm": 1.1870063543319702, "learning_rate": 4.618535100549143e-06, "loss": 0.25, "step": 11129 }, { "epoch": 0.7523320264972286, "grad_norm": 2.387263298034668, "learning_rate": 4.616153738081311e-06, "loss": 0.278778076171875, "step": 11130 }, { "epoch": 0.7523996214681627, "grad_norm": 1.0832685232162476, "learning_rate": 4.613772878048536e-06, "loss": 0.27315521240234375, "step": 11131 }, { "epoch": 0.752467216439097, "grad_norm": 1.9972478151321411, "learning_rate": 4.611392520566028e-06, "loss": 0.300537109375, "step": 11132 }, { "epoch": 0.7525348114100311, "grad_norm": 1.2817851305007935, "learning_rate": 4.609012665748945e-06, "loss": 0.12827301025390625, "step": 11133 }, { "epoch": 0.7526024063809652, "grad_norm": 0.8193529844284058, "learning_rate": 4.60663331371246e-06, "loss": 0.18389892578125, "step": 11134 }, { "epoch": 0.7526700013518994, "grad_norm": 0.6459169983863831, "learning_rate": 4.604254464571681e-06, "loss": 0.0885009765625, "step": 11135 }, { "epoch": 0.7527375963228335, "grad_norm": 0.8609448671340942, "learning_rate": 4.601876118441728e-06, "loss": 0.2069091796875, "step": 11136 }, { "epoch": 0.7528051912937678, "grad_norm": 1.1542435884475708, "learning_rate": 4.599498275437677e-06, "loss": 0.2314453125, "step": 11137 }, { "epoch": 0.7528727862647019, "grad_norm": 1.1772441864013672, "learning_rate": 4.597120935674572e-06, "loss": 0.177093505859375, "step": 11138 }, { "epoch": 0.7529403812356361, "grad_norm": 0.9655442237854004, "learning_rate": 4.5947440992674526e-06, "loss": 0.17999267578125, "step": 11139 }, { "epoch": 0.7530079762065702, "grad_norm": 1.0481810569763184, "learning_rate": 4.592367766331326e-06, "loss": 0.183319091796875, "step": 11140 }, { "epoch": 0.7530755711775043, "grad_norm": 1.0308817625045776, "learning_rate": 4.58999193698117e-06, "loss": 0.129730224609375, "step": 11141 }, { "epoch": 0.7531431661484386, "grad_norm": 1.2799140214920044, "learning_rate": 4.587616611331943e-06, "loss": 0.261260986328125, "step": 11142 }, { "epoch": 0.7532107611193727, "grad_norm": 1.7375812530517578, "learning_rate": 4.585241789498578e-06, "loss": 0.24298095703125, "step": 11143 }, { "epoch": 0.7532783560903069, "grad_norm": 1.0693033933639526, "learning_rate": 4.582867471595981e-06, "loss": 0.23218536376953125, "step": 11144 }, { "epoch": 0.753345951061241, "grad_norm": 0.9926162362098694, "learning_rate": 4.580493657739047e-06, "loss": 0.209991455078125, "step": 11145 }, { "epoch": 0.7534135460321753, "grad_norm": 1.3334788084030151, "learning_rate": 4.5781203480426184e-06, "loss": 0.2720947265625, "step": 11146 }, { "epoch": 0.7534811410031094, "grad_norm": 1.367436170578003, "learning_rate": 4.575747542621545e-06, "loss": 0.17596435546875, "step": 11147 }, { "epoch": 0.7535487359740435, "grad_norm": 0.6037573218345642, "learning_rate": 4.573375241590634e-06, "loss": 0.1166839599609375, "step": 11148 }, { "epoch": 0.7536163309449777, "grad_norm": 1.2030795812606812, "learning_rate": 4.5710034450646714e-06, "loss": 0.16412734985351562, "step": 11149 }, { "epoch": 0.7536839259159118, "grad_norm": 0.9892080426216125, "learning_rate": 4.568632153158425e-06, "loss": 0.16928863525390625, "step": 11150 }, { "epoch": 0.753751520886846, "grad_norm": 0.6669098138809204, "learning_rate": 4.566261365986616e-06, "loss": 0.1463165283203125, "step": 11151 }, { "epoch": 0.7538191158577802, "grad_norm": 1.4829704761505127, "learning_rate": 4.5638910836639734e-06, "loss": 0.296661376953125, "step": 11152 }, { "epoch": 0.7538867108287144, "grad_norm": 1.3049581050872803, "learning_rate": 4.561521306305183e-06, "loss": 0.261016845703125, "step": 11153 }, { "epoch": 0.7539543057996485, "grad_norm": 3.081730842590332, "learning_rate": 4.559152034024907e-06, "loss": 0.38421630859375, "step": 11154 }, { "epoch": 0.7540219007705826, "grad_norm": 1.2024587392807007, "learning_rate": 4.556783266937786e-06, "loss": 0.22548675537109375, "step": 11155 }, { "epoch": 0.7540894957415168, "grad_norm": 1.4320809841156006, "learning_rate": 4.554415005158437e-06, "loss": 0.27874755859375, "step": 11156 }, { "epoch": 0.754157090712451, "grad_norm": 1.2043933868408203, "learning_rate": 4.5520472488014466e-06, "loss": 0.18859481811523438, "step": 11157 }, { "epoch": 0.7542246856833852, "grad_norm": 1.6337324380874634, "learning_rate": 4.5496799979813936e-06, "loss": 0.29437255859375, "step": 11158 }, { "epoch": 0.7542922806543193, "grad_norm": 1.7782769203186035, "learning_rate": 4.547313252812803e-06, "loss": 0.35064697265625, "step": 11159 }, { "epoch": 0.7543598756252535, "grad_norm": 1.198358178138733, "learning_rate": 4.544947013410207e-06, "loss": 0.252197265625, "step": 11160 }, { "epoch": 0.7544274705961876, "grad_norm": 1.2518104314804077, "learning_rate": 4.542581279888098e-06, "loss": 0.330657958984375, "step": 11161 }, { "epoch": 0.7544950655671218, "grad_norm": 1.598042368888855, "learning_rate": 4.54021605236093e-06, "loss": 0.219085693359375, "step": 11162 }, { "epoch": 0.754562660538056, "grad_norm": 0.9959537386894226, "learning_rate": 4.53785133094317e-06, "loss": 0.24127197265625, "step": 11163 }, { "epoch": 0.7546302555089901, "grad_norm": 1.2386385202407837, "learning_rate": 4.535487115749214e-06, "loss": 0.22821044921875, "step": 11164 }, { "epoch": 0.7546978504799243, "grad_norm": 0.9244169592857361, "learning_rate": 4.533123406893476e-06, "loss": 0.193511962890625, "step": 11165 }, { "epoch": 0.7547654454508584, "grad_norm": 1.0717039108276367, "learning_rate": 4.53076020449032e-06, "loss": 0.2262725830078125, "step": 11166 }, { "epoch": 0.7548330404217927, "grad_norm": 1.3705823421478271, "learning_rate": 4.528397508654093e-06, "loss": 0.2408447265625, "step": 11167 }, { "epoch": 0.7549006353927268, "grad_norm": 1.5001310110092163, "learning_rate": 4.526035319499117e-06, "loss": 0.2308349609375, "step": 11168 }, { "epoch": 0.7549682303636609, "grad_norm": 1.072703242301941, "learning_rate": 4.523673637139688e-06, "loss": 0.18603515625, "step": 11169 }, { "epoch": 0.7550358253345951, "grad_norm": 1.3625870943069458, "learning_rate": 4.521312461690076e-06, "loss": 0.2396240234375, "step": 11170 }, { "epoch": 0.7551034203055292, "grad_norm": 0.9301614761352539, "learning_rate": 4.518951793264542e-06, "loss": 0.1457672119140625, "step": 11171 }, { "epoch": 0.7551710152764635, "grad_norm": 1.690268874168396, "learning_rate": 4.516591631977296e-06, "loss": 0.31427001953125, "step": 11172 }, { "epoch": 0.7552386102473976, "grad_norm": 0.993850588798523, "learning_rate": 4.5142319779425395e-06, "loss": 0.2194061279296875, "step": 11173 }, { "epoch": 0.7553062052183318, "grad_norm": 1.8919751644134521, "learning_rate": 4.511872831274459e-06, "loss": 0.295623779296875, "step": 11174 }, { "epoch": 0.7553738001892659, "grad_norm": 1.819542646408081, "learning_rate": 4.509514192087187e-06, "loss": 0.26031494140625, "step": 11175 }, { "epoch": 0.7554413951602, "grad_norm": 1.7907154560089111, "learning_rate": 4.507156060494862e-06, "loss": 0.3047027587890625, "step": 11176 }, { "epoch": 0.7555089901311343, "grad_norm": 0.8734055757522583, "learning_rate": 4.504798436611582e-06, "loss": 0.1968841552734375, "step": 11177 }, { "epoch": 0.7555765851020684, "grad_norm": 1.3670752048492432, "learning_rate": 4.502441320551422e-06, "loss": 0.247528076171875, "step": 11178 }, { "epoch": 0.7556441800730026, "grad_norm": 1.1853276491165161, "learning_rate": 4.500084712428434e-06, "loss": 0.24755859375, "step": 11179 }, { "epoch": 0.7557117750439367, "grad_norm": 2.0285754203796387, "learning_rate": 4.497728612356646e-06, "loss": 0.3057861328125, "step": 11180 }, { "epoch": 0.7557793700148709, "grad_norm": 1.813578486442566, "learning_rate": 4.495373020450062e-06, "loss": 0.21173095703125, "step": 11181 }, { "epoch": 0.7558469649858051, "grad_norm": 0.9967448115348816, "learning_rate": 4.493017936822657e-06, "loss": 0.23712158203125, "step": 11182 }, { "epoch": 0.7559145599567392, "grad_norm": 1.2910773754119873, "learning_rate": 4.490663361588382e-06, "loss": 0.1973876953125, "step": 11183 }, { "epoch": 0.7559821549276734, "grad_norm": 1.5043104887008667, "learning_rate": 4.488309294861182e-06, "loss": 0.291229248046875, "step": 11184 }, { "epoch": 0.7560497498986075, "grad_norm": 0.9717921614646912, "learning_rate": 4.485955736754943e-06, "loss": 0.2137451171875, "step": 11185 }, { "epoch": 0.7561173448695417, "grad_norm": 1.8633860349655151, "learning_rate": 4.483602687383547e-06, "loss": 0.34417724609375, "step": 11186 }, { "epoch": 0.7561849398404759, "grad_norm": 2.228837728500366, "learning_rate": 4.481250146860863e-06, "loss": 0.282989501953125, "step": 11187 }, { "epoch": 0.7562525348114101, "grad_norm": 1.32601797580719, "learning_rate": 4.478898115300704e-06, "loss": 0.19403076171875, "step": 11188 }, { "epoch": 0.7563201297823442, "grad_norm": 1.087465763092041, "learning_rate": 4.476546592816888e-06, "loss": 0.30377197265625, "step": 11189 }, { "epoch": 0.7563877247532783, "grad_norm": 1.322067141532898, "learning_rate": 4.474195579523192e-06, "loss": 0.176849365234375, "step": 11190 }, { "epoch": 0.7564553197242125, "grad_norm": 1.267462134361267, "learning_rate": 4.471845075533375e-06, "loss": 0.19976806640625, "step": 11191 }, { "epoch": 0.7565229146951467, "grad_norm": 1.2789943218231201, "learning_rate": 4.4694950809611665e-06, "loss": 0.1600494384765625, "step": 11192 }, { "epoch": 0.7565905096660809, "grad_norm": 1.3818188905715942, "learning_rate": 4.467145595920273e-06, "loss": 0.30780029296875, "step": 11193 }, { "epoch": 0.756658104637015, "grad_norm": 1.0986593961715698, "learning_rate": 4.464796620524375e-06, "loss": 0.267852783203125, "step": 11194 }, { "epoch": 0.7567256996079492, "grad_norm": 1.419407606124878, "learning_rate": 4.462448154887144e-06, "loss": 0.2205810546875, "step": 11195 }, { "epoch": 0.7567932945788833, "grad_norm": 1.9081616401672363, "learning_rate": 4.460100199122199e-06, "loss": 0.2545166015625, "step": 11196 }, { "epoch": 0.7568608895498175, "grad_norm": 1.843673825263977, "learning_rate": 4.4577527533431525e-06, "loss": 0.3475341796875, "step": 11197 }, { "epoch": 0.7569284845207517, "grad_norm": 1.7028497457504272, "learning_rate": 4.455405817663591e-06, "loss": 0.249786376953125, "step": 11198 }, { "epoch": 0.7569960794916858, "grad_norm": 1.2979977130889893, "learning_rate": 4.453059392197065e-06, "loss": 0.207427978515625, "step": 11199 }, { "epoch": 0.75706367446262, "grad_norm": 1.0320652723312378, "learning_rate": 4.450713477057128e-06, "loss": 0.24517822265625, "step": 11200 }, { "epoch": 0.7571312694335541, "grad_norm": 1.0617785453796387, "learning_rate": 4.448368072357268e-06, "loss": 0.246551513671875, "step": 11201 }, { "epoch": 0.7571988644044884, "grad_norm": 1.281502604484558, "learning_rate": 4.4460231782109835e-06, "loss": 0.25958251953125, "step": 11202 }, { "epoch": 0.7572664593754225, "grad_norm": 1.0802453756332397, "learning_rate": 4.443678794731733e-06, "loss": 0.194732666015625, "step": 11203 }, { "epoch": 0.7573340543463566, "grad_norm": 1.0214473009109497, "learning_rate": 4.441334922032953e-06, "loss": 0.28631591796875, "step": 11204 }, { "epoch": 0.7574016493172908, "grad_norm": 0.8256707191467285, "learning_rate": 4.438991560228053e-06, "loss": 0.13254547119140625, "step": 11205 }, { "epoch": 0.7574692442882249, "grad_norm": 1.2469297647476196, "learning_rate": 4.436648709430419e-06, "loss": 0.2447509765625, "step": 11206 }, { "epoch": 0.7575368392591592, "grad_norm": 1.347470998764038, "learning_rate": 4.434306369753413e-06, "loss": 0.27215576171875, "step": 11207 }, { "epoch": 0.7576044342300933, "grad_norm": 1.6145931482315063, "learning_rate": 4.4319645413103725e-06, "loss": 0.223968505859375, "step": 11208 }, { "epoch": 0.7576720292010275, "grad_norm": 1.3444219827651978, "learning_rate": 4.429623224214609e-06, "loss": 0.26378631591796875, "step": 11209 }, { "epoch": 0.7577396241719616, "grad_norm": 1.1120189428329468, "learning_rate": 4.42728241857941e-06, "loss": 0.2186737060546875, "step": 11210 }, { "epoch": 0.7578072191428957, "grad_norm": 1.1171244382858276, "learning_rate": 4.424942124518039e-06, "loss": 0.2523040771484375, "step": 11211 }, { "epoch": 0.75787481411383, "grad_norm": 1.4191489219665527, "learning_rate": 4.422602342143729e-06, "loss": 0.17906951904296875, "step": 11212 }, { "epoch": 0.7579424090847641, "grad_norm": 1.237396001815796, "learning_rate": 4.420263071569706e-06, "loss": 0.233978271484375, "step": 11213 }, { "epoch": 0.7580100040556983, "grad_norm": 1.5292978286743164, "learning_rate": 4.417924312909143e-06, "loss": 0.2424163818359375, "step": 11214 }, { "epoch": 0.7580775990266324, "grad_norm": 1.1638089418411255, "learning_rate": 4.415586066275215e-06, "loss": 0.18212890625, "step": 11215 }, { "epoch": 0.7581451939975666, "grad_norm": 1.6475368738174438, "learning_rate": 4.4132483317810555e-06, "loss": 0.33349609375, "step": 11216 }, { "epoch": 0.7582127889685008, "grad_norm": 1.8547992706298828, "learning_rate": 4.410911109539781e-06, "loss": 0.3486328125, "step": 11217 }, { "epoch": 0.7582803839394349, "grad_norm": 1.293476939201355, "learning_rate": 4.408574399664482e-06, "loss": 0.261627197265625, "step": 11218 }, { "epoch": 0.7583479789103691, "grad_norm": 1.559131383895874, "learning_rate": 4.406238202268218e-06, "loss": 0.22674560546875, "step": 11219 }, { "epoch": 0.7584155738813032, "grad_norm": 1.806182622909546, "learning_rate": 4.403902517464033e-06, "loss": 0.187744140625, "step": 11220 }, { "epoch": 0.7584831688522374, "grad_norm": 1.111265778541565, "learning_rate": 4.40156734536494e-06, "loss": 0.2725830078125, "step": 11221 }, { "epoch": 0.7585507638231715, "grad_norm": 1.143933892250061, "learning_rate": 4.3992326860839295e-06, "loss": 0.2691650390625, "step": 11222 }, { "epoch": 0.7586183587941058, "grad_norm": 1.6491042375564575, "learning_rate": 4.3968985397339635e-06, "loss": 0.296051025390625, "step": 11223 }, { "epoch": 0.7586859537650399, "grad_norm": 1.5324381589889526, "learning_rate": 4.3945649064279945e-06, "loss": 0.259185791015625, "step": 11224 }, { "epoch": 0.758753548735974, "grad_norm": 1.2051492929458618, "learning_rate": 4.392231786278921e-06, "loss": 0.296478271484375, "step": 11225 }, { "epoch": 0.7588211437069082, "grad_norm": 0.7095165848731995, "learning_rate": 4.389899179399651e-06, "loss": 0.184173583984375, "step": 11226 }, { "epoch": 0.7588887386778423, "grad_norm": 1.693467617034912, "learning_rate": 4.387567085903035e-06, "loss": 0.25787353515625, "step": 11227 }, { "epoch": 0.7589563336487766, "grad_norm": 1.9559061527252197, "learning_rate": 4.3852355059019235e-06, "loss": 0.258575439453125, "step": 11228 }, { "epoch": 0.7590239286197107, "grad_norm": 1.8710671663284302, "learning_rate": 4.382904439509133e-06, "loss": 0.3045654296875, "step": 11229 }, { "epoch": 0.7590915235906448, "grad_norm": 1.4992096424102783, "learning_rate": 4.380573886837451e-06, "loss": 0.18353271484375, "step": 11230 }, { "epoch": 0.759159118561579, "grad_norm": 1.2881399393081665, "learning_rate": 4.3782438479996465e-06, "loss": 0.21575927734375, "step": 11231 }, { "epoch": 0.7592267135325131, "grad_norm": 1.165669322013855, "learning_rate": 4.375914323108461e-06, "loss": 0.16822052001953125, "step": 11232 }, { "epoch": 0.7592943085034474, "grad_norm": 1.1771740913391113, "learning_rate": 4.373585312276611e-06, "loss": 0.200927734375, "step": 11233 }, { "epoch": 0.7593619034743815, "grad_norm": 1.2822022438049316, "learning_rate": 4.37125681561679e-06, "loss": 0.299072265625, "step": 11234 }, { "epoch": 0.7594294984453157, "grad_norm": 0.6790097951889038, "learning_rate": 4.368928833241661e-06, "loss": 0.14833831787109375, "step": 11235 }, { "epoch": 0.7594970934162498, "grad_norm": 1.4468191862106323, "learning_rate": 4.366601365263865e-06, "loss": 0.264312744140625, "step": 11236 }, { "epoch": 0.7595646883871839, "grad_norm": 1.6363470554351807, "learning_rate": 4.364274411796032e-06, "loss": 0.2763862609863281, "step": 11237 }, { "epoch": 0.7596322833581182, "grad_norm": 0.9718819856643677, "learning_rate": 4.361947972950739e-06, "loss": 0.232391357421875, "step": 11238 }, { "epoch": 0.7596998783290523, "grad_norm": 1.7698783874511719, "learning_rate": 4.359622048840563e-06, "loss": 0.29632568359375, "step": 11239 }, { "epoch": 0.7597674732999865, "grad_norm": 0.815245509147644, "learning_rate": 4.357296639578045e-06, "loss": 0.21307373046875, "step": 11240 }, { "epoch": 0.7598350682709206, "grad_norm": 1.115356683731079, "learning_rate": 4.354971745275702e-06, "loss": 0.252471923828125, "step": 11241 }, { "epoch": 0.7599026632418548, "grad_norm": 1.370672583580017, "learning_rate": 4.352647366046025e-06, "loss": 0.19998550415039062, "step": 11242 }, { "epoch": 0.759970258212789, "grad_norm": 1.848252773284912, "learning_rate": 4.350323502001486e-06, "loss": 0.22955322265625, "step": 11243 }, { "epoch": 0.7600378531837231, "grad_norm": 2.216050863265991, "learning_rate": 4.348000153254523e-06, "loss": 0.22833251953125, "step": 11244 }, { "epoch": 0.7601054481546573, "grad_norm": 1.2106074094772339, "learning_rate": 4.345677319917559e-06, "loss": 0.16902923583984375, "step": 11245 }, { "epoch": 0.7601730431255914, "grad_norm": 0.98111891746521, "learning_rate": 4.343355002102984e-06, "loss": 0.12458038330078125, "step": 11246 }, { "epoch": 0.7602406380965256, "grad_norm": 1.1097787618637085, "learning_rate": 4.341033199923165e-06, "loss": 0.22625732421875, "step": 11247 }, { "epoch": 0.7603082330674598, "grad_norm": 2.043419122695923, "learning_rate": 4.338711913490451e-06, "loss": 0.1738433837890625, "step": 11248 }, { "epoch": 0.760375828038394, "grad_norm": 1.587598204612732, "learning_rate": 4.3363911429171494e-06, "loss": 0.207672119140625, "step": 11249 }, { "epoch": 0.7604434230093281, "grad_norm": 0.7413046956062317, "learning_rate": 4.33407088831557e-06, "loss": 0.15789794921875, "step": 11250 }, { "epoch": 0.7605110179802622, "grad_norm": 4.901864051818848, "learning_rate": 4.331751149797963e-06, "loss": 0.261383056640625, "step": 11251 }, { "epoch": 0.7605786129511964, "grad_norm": 1.0474097728729248, "learning_rate": 4.329431927476585e-06, "loss": 0.1600341796875, "step": 11252 }, { "epoch": 0.7606462079221306, "grad_norm": 0.8876421451568604, "learning_rate": 4.32711322146365e-06, "loss": 0.1352996826171875, "step": 11253 }, { "epoch": 0.7607138028930648, "grad_norm": 1.1967686414718628, "learning_rate": 4.32479503187135e-06, "loss": 0.261505126953125, "step": 11254 }, { "epoch": 0.7607813978639989, "grad_norm": 0.654584527015686, "learning_rate": 4.322477358811861e-06, "loss": 0.098114013671875, "step": 11255 }, { "epoch": 0.7608489928349331, "grad_norm": 1.0610414743423462, "learning_rate": 4.32016020239731e-06, "loss": 0.2686767578125, "step": 11256 }, { "epoch": 0.7609165878058672, "grad_norm": 1.4788228273391724, "learning_rate": 4.31784356273983e-06, "loss": 0.26947021484375, "step": 11257 }, { "epoch": 0.7609841827768014, "grad_norm": 1.090200424194336, "learning_rate": 4.3155274399515125e-06, "loss": 0.206512451171875, "step": 11258 }, { "epoch": 0.7610517777477356, "grad_norm": 1.7417958974838257, "learning_rate": 4.313211834144421e-06, "loss": 0.271514892578125, "step": 11259 }, { "epoch": 0.7611193727186697, "grad_norm": 1.1366947889328003, "learning_rate": 4.310896745430602e-06, "loss": 0.28704833984375, "step": 11260 }, { "epoch": 0.7611869676896039, "grad_norm": 1.394356369972229, "learning_rate": 4.3085821739220734e-06, "loss": 0.20465850830078125, "step": 11261 }, { "epoch": 0.761254562660538, "grad_norm": 1.1587307453155518, "learning_rate": 4.306268119730823e-06, "loss": 0.22265625, "step": 11262 }, { "epoch": 0.7613221576314723, "grad_norm": 1.2978795766830444, "learning_rate": 4.303954582968835e-06, "loss": 0.250518798828125, "step": 11263 }, { "epoch": 0.7613897526024064, "grad_norm": 2.2789947986602783, "learning_rate": 4.3016415637480314e-06, "loss": 0.351837158203125, "step": 11264 }, { "epoch": 0.7614573475733405, "grad_norm": 1.5099273920059204, "learning_rate": 4.299329062180349e-06, "loss": 0.266448974609375, "step": 11265 }, { "epoch": 0.7615249425442747, "grad_norm": 1.138152003288269, "learning_rate": 4.297017078377675e-06, "loss": 0.221282958984375, "step": 11266 }, { "epoch": 0.7615925375152088, "grad_norm": 1.2436119318008423, "learning_rate": 4.294705612451868e-06, "loss": 0.266326904296875, "step": 11267 }, { "epoch": 0.7616601324861431, "grad_norm": 1.1557537317276, "learning_rate": 4.2923946645147825e-06, "loss": 0.209320068359375, "step": 11268 }, { "epoch": 0.7617277274570772, "grad_norm": 1.9746911525726318, "learning_rate": 4.2900842346782345e-06, "loss": 0.279693603515625, "step": 11269 }, { "epoch": 0.7617953224280114, "grad_norm": 1.3589308261871338, "learning_rate": 4.287774323054015e-06, "loss": 0.2237548828125, "step": 11270 }, { "epoch": 0.7618629173989455, "grad_norm": 1.1127339601516724, "learning_rate": 4.285464929753892e-06, "loss": 0.2615966796875, "step": 11271 }, { "epoch": 0.7619305123698796, "grad_norm": 0.9160709977149963, "learning_rate": 4.283156054889609e-06, "loss": 0.2042999267578125, "step": 11272 }, { "epoch": 0.7619981073408139, "grad_norm": 1.3160685300827026, "learning_rate": 4.280847698572884e-06, "loss": 0.181640625, "step": 11273 }, { "epoch": 0.762065702311748, "grad_norm": 1.0453976392745972, "learning_rate": 4.278539860915407e-06, "loss": 0.241180419921875, "step": 11274 }, { "epoch": 0.7621332972826822, "grad_norm": 1.7042056322097778, "learning_rate": 4.276232542028845e-06, "loss": 0.27587890625, "step": 11275 }, { "epoch": 0.7622008922536163, "grad_norm": 1.5105950832366943, "learning_rate": 4.273925742024851e-06, "loss": 0.226318359375, "step": 11276 }, { "epoch": 0.7622684872245505, "grad_norm": 1.2103233337402344, "learning_rate": 4.2716194610150264e-06, "loss": 0.238616943359375, "step": 11277 }, { "epoch": 0.7623360821954847, "grad_norm": 1.2410451173782349, "learning_rate": 4.269313699110974e-06, "loss": 0.1846923828125, "step": 11278 }, { "epoch": 0.7624036771664188, "grad_norm": 1.1215966939926147, "learning_rate": 4.267008456424264e-06, "loss": 0.232879638671875, "step": 11279 }, { "epoch": 0.762471272137353, "grad_norm": 1.3566261529922485, "learning_rate": 4.264703733066424e-06, "loss": 0.242431640625, "step": 11280 }, { "epoch": 0.7625388671082871, "grad_norm": 1.6560966968536377, "learning_rate": 4.262399529148983e-06, "loss": 0.24664306640625, "step": 11281 }, { "epoch": 0.7626064620792213, "grad_norm": 1.1471995115280151, "learning_rate": 4.260095844783431e-06, "loss": 0.2513275146484375, "step": 11282 }, { "epoch": 0.7626740570501555, "grad_norm": 1.07528555393219, "learning_rate": 4.25779268008123e-06, "loss": 0.23455810546875, "step": 11283 }, { "epoch": 0.7627416520210897, "grad_norm": 1.3226128816604614, "learning_rate": 4.255490035153826e-06, "loss": 0.263916015625, "step": 11284 }, { "epoch": 0.7628092469920238, "grad_norm": 0.8449698090553284, "learning_rate": 4.253187910112633e-06, "loss": 0.217071533203125, "step": 11285 }, { "epoch": 0.7628768419629579, "grad_norm": 1.0878266096115112, "learning_rate": 4.250886305069038e-06, "loss": 0.2593994140625, "step": 11286 }, { "epoch": 0.7629444369338921, "grad_norm": 1.3478171825408936, "learning_rate": 4.248585220134423e-06, "loss": 0.220672607421875, "step": 11287 }, { "epoch": 0.7630120319048262, "grad_norm": 1.5839602947235107, "learning_rate": 4.246284655420106e-06, "loss": 0.1852264404296875, "step": 11288 }, { "epoch": 0.7630796268757605, "grad_norm": 1.0083668231964111, "learning_rate": 4.243984611037423e-06, "loss": 0.21685791015625, "step": 11289 }, { "epoch": 0.7631472218466946, "grad_norm": 0.9062209725379944, "learning_rate": 4.241685087097651e-06, "loss": 0.16156005859375, "step": 11290 }, { "epoch": 0.7632148168176288, "grad_norm": 0.6271420121192932, "learning_rate": 4.239386083712055e-06, "loss": 0.11846923828125, "step": 11291 }, { "epoch": 0.7632824117885629, "grad_norm": 1.201465368270874, "learning_rate": 4.23708760099189e-06, "loss": 0.17706298828125, "step": 11292 }, { "epoch": 0.763350006759497, "grad_norm": 0.7685189247131348, "learning_rate": 4.23478963904835e-06, "loss": 0.10928726196289062, "step": 11293 }, { "epoch": 0.7634176017304313, "grad_norm": 1.0770984888076782, "learning_rate": 4.232492197992641e-06, "loss": 0.1965484619140625, "step": 11294 }, { "epoch": 0.7634851967013654, "grad_norm": 1.277748942375183, "learning_rate": 4.230195277935922e-06, "loss": 0.272430419921875, "step": 11295 }, { "epoch": 0.7635527916722996, "grad_norm": 1.736965537071228, "learning_rate": 4.227898878989332e-06, "loss": 0.276611328125, "step": 11296 }, { "epoch": 0.7636203866432337, "grad_norm": 1.476589322090149, "learning_rate": 4.2256030012639855e-06, "loss": 0.23101806640625, "step": 11297 }, { "epoch": 0.763687981614168, "grad_norm": 1.7185957431793213, "learning_rate": 4.223307644870971e-06, "loss": 0.30157470703125, "step": 11298 }, { "epoch": 0.7637555765851021, "grad_norm": 1.2012310028076172, "learning_rate": 4.221012809921347e-06, "loss": 0.18597412109375, "step": 11299 }, { "epoch": 0.7638231715560362, "grad_norm": 1.432773232460022, "learning_rate": 4.218718496526167e-06, "loss": 0.239654541015625, "step": 11300 }, { "epoch": 0.7638907665269704, "grad_norm": 1.4114036560058594, "learning_rate": 4.2164247047964285e-06, "loss": 0.271240234375, "step": 11301 }, { "epoch": 0.7639583614979045, "grad_norm": 1.3356361389160156, "learning_rate": 4.214131434843127e-06, "loss": 0.26898193359375, "step": 11302 }, { "epoch": 0.7640259564688388, "grad_norm": 1.1144027709960938, "learning_rate": 4.211838686777221e-06, "loss": 0.1815643310546875, "step": 11303 }, { "epoch": 0.7640935514397729, "grad_norm": 1.7708051204681396, "learning_rate": 4.2095464607096455e-06, "loss": 0.2640380859375, "step": 11304 }, { "epoch": 0.7641611464107071, "grad_norm": 1.661919116973877, "learning_rate": 4.207254756751329e-06, "loss": 0.30999755859375, "step": 11305 }, { "epoch": 0.7642287413816412, "grad_norm": 1.328273892402649, "learning_rate": 4.204963575013135e-06, "loss": 0.2325592041015625, "step": 11306 }, { "epoch": 0.7642963363525753, "grad_norm": 0.8895401358604431, "learning_rate": 4.202672915605944e-06, "loss": 0.204315185546875, "step": 11307 }, { "epoch": 0.7643639313235095, "grad_norm": 0.8197454214096069, "learning_rate": 4.200382778640583e-06, "loss": 0.1148529052734375, "step": 11308 }, { "epoch": 0.7644315262944437, "grad_norm": 0.7248836159706116, "learning_rate": 4.198093164227866e-06, "loss": 0.1344146728515625, "step": 11309 }, { "epoch": 0.7644991212653779, "grad_norm": 1.587103009223938, "learning_rate": 4.195804072478579e-06, "loss": 0.230926513671875, "step": 11310 }, { "epoch": 0.764566716236312, "grad_norm": 2.034677743911743, "learning_rate": 4.19351550350348e-06, "loss": 0.31756591796875, "step": 11311 }, { "epoch": 0.7646343112072462, "grad_norm": 0.8599672317504883, "learning_rate": 4.191227457413303e-06, "loss": 0.17449951171875, "step": 11312 }, { "epoch": 0.7647019061781803, "grad_norm": 1.2042747735977173, "learning_rate": 4.188939934318768e-06, "loss": 0.25909423828125, "step": 11313 }, { "epoch": 0.7647695011491145, "grad_norm": 0.6617211103439331, "learning_rate": 4.1866529343305485e-06, "loss": 0.126495361328125, "step": 11314 }, { "epoch": 0.7648370961200487, "grad_norm": 1.7558796405792236, "learning_rate": 4.184366457559302e-06, "loss": 0.225555419921875, "step": 11315 }, { "epoch": 0.7649046910909828, "grad_norm": 1.8918591737747192, "learning_rate": 4.182080504115678e-06, "loss": 0.292877197265625, "step": 11316 }, { "epoch": 0.764972286061917, "grad_norm": 2.5434072017669678, "learning_rate": 4.179795074110265e-06, "loss": 0.285552978515625, "step": 11317 }, { "epoch": 0.7650398810328511, "grad_norm": 1.9989955425262451, "learning_rate": 4.177510167653667e-06, "loss": 0.3157958984375, "step": 11318 }, { "epoch": 0.7651074760037854, "grad_norm": 1.344262957572937, "learning_rate": 4.175225784856423e-06, "loss": 0.240692138671875, "step": 11319 }, { "epoch": 0.7651750709747195, "grad_norm": 1.134077548980713, "learning_rate": 4.1729419258290765e-06, "loss": 0.18402099609375, "step": 11320 }, { "epoch": 0.7652426659456536, "grad_norm": 0.8165876269340515, "learning_rate": 4.170658590682134e-06, "loss": 0.1287994384765625, "step": 11321 }, { "epoch": 0.7653102609165878, "grad_norm": 1.4012892246246338, "learning_rate": 4.168375779526075e-06, "loss": 0.13623046875, "step": 11322 }, { "epoch": 0.7653778558875219, "grad_norm": 1.7290799617767334, "learning_rate": 4.166093492471357e-06, "loss": 0.22930908203125, "step": 11323 }, { "epoch": 0.7654454508584562, "grad_norm": 1.439126968383789, "learning_rate": 4.1638117296284116e-06, "loss": 0.16854095458984375, "step": 11324 }, { "epoch": 0.7655130458293903, "grad_norm": 1.0690162181854248, "learning_rate": 4.161530491107643e-06, "loss": 0.16516876220703125, "step": 11325 }, { "epoch": 0.7655806408003245, "grad_norm": 1.6379352807998657, "learning_rate": 4.159249777019435e-06, "loss": 0.314453125, "step": 11326 }, { "epoch": 0.7656482357712586, "grad_norm": 1.558560848236084, "learning_rate": 4.1569695874741395e-06, "loss": 0.2451171875, "step": 11327 }, { "epoch": 0.7657158307421927, "grad_norm": 1.137497901916504, "learning_rate": 4.154689922582082e-06, "loss": 0.239471435546875, "step": 11328 }, { "epoch": 0.765783425713127, "grad_norm": 1.6272351741790771, "learning_rate": 4.152410782453583e-06, "loss": 0.29931640625, "step": 11329 }, { "epoch": 0.7658510206840611, "grad_norm": 1.3954179286956787, "learning_rate": 4.150132167198901e-06, "loss": 0.226806640625, "step": 11330 }, { "epoch": 0.7659186156549953, "grad_norm": 2.485170841217041, "learning_rate": 4.147854076928304e-06, "loss": 0.347015380859375, "step": 11331 }, { "epoch": 0.7659862106259294, "grad_norm": 1.142810344696045, "learning_rate": 4.145576511752016e-06, "loss": 0.24407958984375, "step": 11332 }, { "epoch": 0.7660538055968636, "grad_norm": 1.09688401222229, "learning_rate": 4.143299471780238e-06, "loss": 0.239715576171875, "step": 11333 }, { "epoch": 0.7661214005677978, "grad_norm": 2.4134998321533203, "learning_rate": 4.141022957123149e-06, "loss": 0.32684326171875, "step": 11334 }, { "epoch": 0.7661889955387319, "grad_norm": 0.9651199579238892, "learning_rate": 4.138746967890902e-06, "loss": 0.2064361572265625, "step": 11335 }, { "epoch": 0.7662565905096661, "grad_norm": 1.5303678512573242, "learning_rate": 4.13647150419362e-06, "loss": 0.2314453125, "step": 11336 }, { "epoch": 0.7663241854806002, "grad_norm": 1.0988696813583374, "learning_rate": 4.1341965661414085e-06, "loss": 0.232635498046875, "step": 11337 }, { "epoch": 0.7663917804515344, "grad_norm": 1.3282289505004883, "learning_rate": 4.131922153844338e-06, "loss": 0.2193145751953125, "step": 11338 }, { "epoch": 0.7664593754224686, "grad_norm": 2.0046021938323975, "learning_rate": 4.1296482674124645e-06, "loss": 0.308258056640625, "step": 11339 }, { "epoch": 0.7665269703934028, "grad_norm": 1.485355257987976, "learning_rate": 4.127374906955809e-06, "loss": 0.27392578125, "step": 11340 }, { "epoch": 0.7665945653643369, "grad_norm": 1.5648669004440308, "learning_rate": 4.125102072584365e-06, "loss": 0.2933349609375, "step": 11341 }, { "epoch": 0.766662160335271, "grad_norm": 3.8076109886169434, "learning_rate": 4.122829764408124e-06, "loss": 0.30523681640625, "step": 11342 }, { "epoch": 0.7667297553062052, "grad_norm": 1.2982593774795532, "learning_rate": 4.120557982537014e-06, "loss": 0.261627197265625, "step": 11343 }, { "epoch": 0.7667973502771394, "grad_norm": 2.384060859680176, "learning_rate": 4.118286727080973e-06, "loss": 0.2609710693359375, "step": 11344 }, { "epoch": 0.7668649452480736, "grad_norm": 2.005746364593506, "learning_rate": 4.116015998149892e-06, "loss": 0.32501220703125, "step": 11345 }, { "epoch": 0.7669325402190077, "grad_norm": 1.078348159790039, "learning_rate": 4.113745795853643e-06, "loss": 0.2313232421875, "step": 11346 }, { "epoch": 0.7670001351899419, "grad_norm": 1.2970168590545654, "learning_rate": 4.111476120302073e-06, "loss": 0.2707977294921875, "step": 11347 }, { "epoch": 0.767067730160876, "grad_norm": 1.9200377464294434, "learning_rate": 4.109206971605004e-06, "loss": 0.337738037109375, "step": 11348 }, { "epoch": 0.7671353251318102, "grad_norm": 1.1221204996109009, "learning_rate": 4.106938349872229e-06, "loss": 0.16021728515625, "step": 11349 }, { "epoch": 0.7672029201027444, "grad_norm": 1.3678922653198242, "learning_rate": 4.1046702552135235e-06, "loss": 0.266204833984375, "step": 11350 }, { "epoch": 0.7672705150736785, "grad_norm": 1.3913072347640991, "learning_rate": 4.1024026877386254e-06, "loss": 0.190582275390625, "step": 11351 }, { "epoch": 0.7673381100446127, "grad_norm": 1.6331911087036133, "learning_rate": 4.100135647557258e-06, "loss": 0.265380859375, "step": 11352 }, { "epoch": 0.7674057050155468, "grad_norm": 1.597663164138794, "learning_rate": 4.097869134779114e-06, "loss": 0.2181243896484375, "step": 11353 }, { "epoch": 0.7674732999864811, "grad_norm": 1.5678651332855225, "learning_rate": 4.095603149513856e-06, "loss": 0.13089370727539062, "step": 11354 }, { "epoch": 0.7675408949574152, "grad_norm": 2.123971462249756, "learning_rate": 4.09333769187114e-06, "loss": 0.26385498046875, "step": 11355 }, { "epoch": 0.7676084899283493, "grad_norm": 1.085717797279358, "learning_rate": 4.091072761960566e-06, "loss": 0.2348785400390625, "step": 11356 }, { "epoch": 0.7676760848992835, "grad_norm": 1.1619065999984741, "learning_rate": 4.088808359891737e-06, "loss": 0.22650146484375, "step": 11357 }, { "epoch": 0.7677436798702176, "grad_norm": 1.3468962907791138, "learning_rate": 4.086544485774215e-06, "loss": 0.1843109130859375, "step": 11358 }, { "epoch": 0.7678112748411519, "grad_norm": 1.03424870967865, "learning_rate": 4.084281139717543e-06, "loss": 0.1844940185546875, "step": 11359 }, { "epoch": 0.767878869812086, "grad_norm": 1.2449259757995605, "learning_rate": 4.082018321831232e-06, "loss": 0.1768341064453125, "step": 11360 }, { "epoch": 0.7679464647830201, "grad_norm": 1.7620913982391357, "learning_rate": 4.079756032224774e-06, "loss": 0.243621826171875, "step": 11361 }, { "epoch": 0.7680140597539543, "grad_norm": 1.0098719596862793, "learning_rate": 4.0774942710076305e-06, "loss": 0.13793563842773438, "step": 11362 }, { "epoch": 0.7680816547248884, "grad_norm": 1.2536028623580933, "learning_rate": 4.07523303828924e-06, "loss": 0.20025634765625, "step": 11363 }, { "epoch": 0.7681492496958227, "grad_norm": 1.821428656578064, "learning_rate": 4.072972334179017e-06, "loss": 0.2890625, "step": 11364 }, { "epoch": 0.7682168446667568, "grad_norm": 1.4874982833862305, "learning_rate": 4.070712158786346e-06, "loss": 0.25189208984375, "step": 11365 }, { "epoch": 0.768284439637691, "grad_norm": 1.4070782661437988, "learning_rate": 4.068452512220589e-06, "loss": 0.2578125, "step": 11366 }, { "epoch": 0.7683520346086251, "grad_norm": 1.4375030994415283, "learning_rate": 4.066193394591079e-06, "loss": 0.274169921875, "step": 11367 }, { "epoch": 0.7684196295795592, "grad_norm": 1.2729151248931885, "learning_rate": 4.063934806007137e-06, "loss": 0.247650146484375, "step": 11368 }, { "epoch": 0.7684872245504935, "grad_norm": 1.4142943620681763, "learning_rate": 4.061676746578029e-06, "loss": 0.324066162109375, "step": 11369 }, { "epoch": 0.7685548195214276, "grad_norm": 1.3127918243408203, "learning_rate": 4.059419216413031e-06, "loss": 0.175567626953125, "step": 11370 }, { "epoch": 0.7686224144923618, "grad_norm": 0.8205884695053101, "learning_rate": 4.057162215621375e-06, "loss": 0.15655517578125, "step": 11371 }, { "epoch": 0.7686900094632959, "grad_norm": 0.9658701419830322, "learning_rate": 4.054905744312255e-06, "loss": 0.21600341796875, "step": 11372 }, { "epoch": 0.7687576044342301, "grad_norm": 1.7388887405395508, "learning_rate": 4.052649802594865e-06, "loss": 0.27679443359375, "step": 11373 }, { "epoch": 0.7688251994051643, "grad_norm": 1.0213056802749634, "learning_rate": 4.050394390578359e-06, "loss": 0.16161727905273438, "step": 11374 }, { "epoch": 0.7688927943760984, "grad_norm": 1.1371597051620483, "learning_rate": 4.048139508371869e-06, "loss": 0.2098388671875, "step": 11375 }, { "epoch": 0.7689603893470326, "grad_norm": 1.748754620552063, "learning_rate": 4.045885156084495e-06, "loss": 0.275390625, "step": 11376 }, { "epoch": 0.7690279843179667, "grad_norm": 1.0749154090881348, "learning_rate": 4.043631333825322e-06, "loss": 0.1915283203125, "step": 11377 }, { "epoch": 0.7690955792889009, "grad_norm": 1.2551740407943726, "learning_rate": 4.041378041703398e-06, "loss": 0.209625244140625, "step": 11378 }, { "epoch": 0.769163174259835, "grad_norm": 1.4097191095352173, "learning_rate": 4.039125279827764e-06, "loss": 0.26849365234375, "step": 11379 }, { "epoch": 0.7692307692307693, "grad_norm": 2.498772144317627, "learning_rate": 4.036873048307405e-06, "loss": 0.336639404296875, "step": 11380 }, { "epoch": 0.7692983642017034, "grad_norm": 0.5602665543556213, "learning_rate": 4.034621347251315e-06, "loss": 0.11751842498779297, "step": 11381 }, { "epoch": 0.7693659591726375, "grad_norm": 0.9630283117294312, "learning_rate": 4.032370176768429e-06, "loss": 0.11708831787109375, "step": 11382 }, { "epoch": 0.7694335541435717, "grad_norm": 2.1939871311187744, "learning_rate": 4.0301195369676855e-06, "loss": 0.303802490234375, "step": 11383 }, { "epoch": 0.7695011491145058, "grad_norm": 1.899286150932312, "learning_rate": 4.027869427957983e-06, "loss": 0.2582550048828125, "step": 11384 }, { "epoch": 0.7695687440854401, "grad_norm": 2.4457430839538574, "learning_rate": 4.025619849848184e-06, "loss": 0.27569580078125, "step": 11385 }, { "epoch": 0.7696363390563742, "grad_norm": 0.9289012551307678, "learning_rate": 4.023370802747151e-06, "loss": 0.1570587158203125, "step": 11386 }, { "epoch": 0.7697039340273084, "grad_norm": 1.1263080835342407, "learning_rate": 4.0211222867637e-06, "loss": 0.18856048583984375, "step": 11387 }, { "epoch": 0.7697715289982425, "grad_norm": 1.4493361711502075, "learning_rate": 4.018874302006631e-06, "loss": 0.24249267578125, "step": 11388 }, { "epoch": 0.7698391239691766, "grad_norm": 1.0451536178588867, "learning_rate": 4.016626848584713e-06, "loss": 0.2164459228515625, "step": 11389 }, { "epoch": 0.7699067189401109, "grad_norm": 0.6988568902015686, "learning_rate": 4.014379926606694e-06, "loss": 0.1374359130859375, "step": 11390 }, { "epoch": 0.769974313911045, "grad_norm": 1.8687905073165894, "learning_rate": 4.0121335361812885e-06, "loss": 0.23956298828125, "step": 11391 }, { "epoch": 0.7700419088819792, "grad_norm": 0.5275114178657532, "learning_rate": 4.009887677417203e-06, "loss": 0.1091461181640625, "step": 11392 }, { "epoch": 0.7701095038529133, "grad_norm": 0.8787016272544861, "learning_rate": 4.0076423504230905e-06, "loss": 0.2043609619140625, "step": 11393 }, { "epoch": 0.7701770988238476, "grad_norm": 0.7787524461746216, "learning_rate": 4.005397555307611e-06, "loss": 0.1527252197265625, "step": 11394 }, { "epoch": 0.7702446937947817, "grad_norm": 1.5627210140228271, "learning_rate": 4.003153292179369e-06, "loss": 0.3070068359375, "step": 11395 }, { "epoch": 0.7703122887657158, "grad_norm": 1.0099918842315674, "learning_rate": 4.000909561146953e-06, "loss": 0.172576904296875, "step": 11396 }, { "epoch": 0.77037988373665, "grad_norm": 1.0732539892196655, "learning_rate": 3.998666362318945e-06, "loss": 0.2449951171875, "step": 11397 }, { "epoch": 0.7704474787075841, "grad_norm": 1.23598313331604, "learning_rate": 3.996423695803864e-06, "loss": 0.171905517578125, "step": 11398 }, { "epoch": 0.7705150736785183, "grad_norm": 1.5623502731323242, "learning_rate": 3.994181561710243e-06, "loss": 0.28472900390625, "step": 11399 }, { "epoch": 0.7705826686494525, "grad_norm": 1.434294581413269, "learning_rate": 3.9919399601465595e-06, "loss": 0.264923095703125, "step": 11400 }, { "epoch": 0.7706502636203867, "grad_norm": 1.857585072517395, "learning_rate": 3.989698891221281e-06, "loss": 0.323089599609375, "step": 11401 }, { "epoch": 0.7707178585913208, "grad_norm": 1.58608877658844, "learning_rate": 3.987458355042841e-06, "loss": 0.229217529296875, "step": 11402 }, { "epoch": 0.7707854535622549, "grad_norm": 1.392796516418457, "learning_rate": 3.9852183517196525e-06, "loss": 0.2735595703125, "step": 11403 }, { "epoch": 0.7708530485331891, "grad_norm": 1.0090210437774658, "learning_rate": 3.982978881360097e-06, "loss": 0.15918731689453125, "step": 11404 }, { "epoch": 0.7709206435041233, "grad_norm": 2.045302629470825, "learning_rate": 3.980739944072546e-06, "loss": 0.32122802734375, "step": 11405 }, { "epoch": 0.7709882384750575, "grad_norm": 0.6771449446678162, "learning_rate": 3.9785015399653184e-06, "loss": 0.051952362060546875, "step": 11406 }, { "epoch": 0.7710558334459916, "grad_norm": 1.034712791442871, "learning_rate": 3.9762636691467245e-06, "loss": 0.2270660400390625, "step": 11407 }, { "epoch": 0.7711234284169258, "grad_norm": 1.9212193489074707, "learning_rate": 3.9740263317250605e-06, "loss": 0.262969970703125, "step": 11408 }, { "epoch": 0.7711910233878599, "grad_norm": 1.283308506011963, "learning_rate": 3.971789527808562e-06, "loss": 0.27264404296875, "step": 11409 }, { "epoch": 0.7712586183587941, "grad_norm": 0.9471365213394165, "learning_rate": 3.96955325750548e-06, "loss": 0.195892333984375, "step": 11410 }, { "epoch": 0.7713262133297283, "grad_norm": 1.1267814636230469, "learning_rate": 3.967317520923998e-06, "loss": 0.250274658203125, "step": 11411 }, { "epoch": 0.7713938083006624, "grad_norm": 1.0248582363128662, "learning_rate": 3.965082318172312e-06, "loss": 0.20587158203125, "step": 11412 }, { "epoch": 0.7714614032715966, "grad_norm": 1.2596429586410522, "learning_rate": 3.962847649358569e-06, "loss": 0.19720458984375, "step": 11413 }, { "epoch": 0.7715289982425307, "grad_norm": 1.04978346824646, "learning_rate": 3.960613514590896e-06, "loss": 0.2633056640625, "step": 11414 }, { "epoch": 0.771596593213465, "grad_norm": 1.3069199323654175, "learning_rate": 3.9583799139773925e-06, "loss": 0.257843017578125, "step": 11415 }, { "epoch": 0.7716641881843991, "grad_norm": 1.2615344524383545, "learning_rate": 3.956146847626138e-06, "loss": 0.178375244140625, "step": 11416 }, { "epoch": 0.7717317831553332, "grad_norm": 1.2957664728164673, "learning_rate": 3.953914315645175e-06, "loss": 0.29522705078125, "step": 11417 }, { "epoch": 0.7717993781262674, "grad_norm": 1.3283907175064087, "learning_rate": 3.951682318142541e-06, "loss": 0.2703857421875, "step": 11418 }, { "epoch": 0.7718669730972015, "grad_norm": 1.2291045188903809, "learning_rate": 3.949450855226221e-06, "loss": 0.2210235595703125, "step": 11419 }, { "epoch": 0.7719345680681358, "grad_norm": 1.3730690479278564, "learning_rate": 3.947219927004186e-06, "loss": 0.25836181640625, "step": 11420 }, { "epoch": 0.7720021630390699, "grad_norm": 0.9339744448661804, "learning_rate": 3.944989533584398e-06, "loss": 0.23062896728515625, "step": 11421 }, { "epoch": 0.7720697580100041, "grad_norm": 0.8588963747024536, "learning_rate": 3.942759675074755e-06, "loss": 0.120880126953125, "step": 11422 }, { "epoch": 0.7721373529809382, "grad_norm": 1.4358274936676025, "learning_rate": 3.940530351583169e-06, "loss": 0.267303466796875, "step": 11423 }, { "epoch": 0.7722049479518723, "grad_norm": 1.5076109170913696, "learning_rate": 3.9383015632175036e-06, "loss": 0.2111358642578125, "step": 11424 }, { "epoch": 0.7722725429228066, "grad_norm": 1.1936841011047363, "learning_rate": 3.9360733100856e-06, "loss": 0.165802001953125, "step": 11425 }, { "epoch": 0.7723401378937407, "grad_norm": 1.2306647300720215, "learning_rate": 3.933845592295276e-06, "loss": 0.2003326416015625, "step": 11426 }, { "epoch": 0.7724077328646749, "grad_norm": 1.5662992000579834, "learning_rate": 3.9316184099543215e-06, "loss": 0.287017822265625, "step": 11427 }, { "epoch": 0.772475327835609, "grad_norm": 2.0758121013641357, "learning_rate": 3.929391763170501e-06, "loss": 0.3472900390625, "step": 11428 }, { "epoch": 0.7725429228065432, "grad_norm": 1.1203093528747559, "learning_rate": 3.9271656520515544e-06, "loss": 0.21173095703125, "step": 11429 }, { "epoch": 0.7726105177774774, "grad_norm": 1.1888937950134277, "learning_rate": 3.924940076705196e-06, "loss": 0.243408203125, "step": 11430 }, { "epoch": 0.7726781127484115, "grad_norm": 1.0495964288711548, "learning_rate": 3.9227150372391114e-06, "loss": 0.17584228515625, "step": 11431 }, { "epoch": 0.7727457077193457, "grad_norm": 1.4938589334487915, "learning_rate": 3.920490533760962e-06, "loss": 0.1579437255859375, "step": 11432 }, { "epoch": 0.7728133026902798, "grad_norm": 1.534609317779541, "learning_rate": 3.918266566378379e-06, "loss": 0.217681884765625, "step": 11433 }, { "epoch": 0.772880897661214, "grad_norm": 1.7732688188552856, "learning_rate": 3.916043135198985e-06, "loss": 0.2660064697265625, "step": 11434 }, { "epoch": 0.7729484926321482, "grad_norm": 1.4053311347961426, "learning_rate": 3.913820240330346e-06, "loss": 0.24072265625, "step": 11435 }, { "epoch": 0.7730160876030824, "grad_norm": 0.7880384922027588, "learning_rate": 3.911597881880032e-06, "loss": 0.144866943359375, "step": 11436 }, { "epoch": 0.7730836825740165, "grad_norm": 1.3893803358078003, "learning_rate": 3.90937605995557e-06, "loss": 0.242767333984375, "step": 11437 }, { "epoch": 0.7731512775449506, "grad_norm": 2.4763736724853516, "learning_rate": 3.907154774664466e-06, "loss": 0.35968017578125, "step": 11438 }, { "epoch": 0.7732188725158848, "grad_norm": 1.447306752204895, "learning_rate": 3.9049340261142004e-06, "loss": 0.196624755859375, "step": 11439 }, { "epoch": 0.773286467486819, "grad_norm": 0.6502360105514526, "learning_rate": 3.902713814412226e-06, "loss": 0.1175079345703125, "step": 11440 }, { "epoch": 0.7733540624577532, "grad_norm": 1.263708472251892, "learning_rate": 3.9004941396659705e-06, "loss": 0.1123809814453125, "step": 11441 }, { "epoch": 0.7734216574286873, "grad_norm": 0.8594240546226501, "learning_rate": 3.898275001982834e-06, "loss": 0.14791107177734375, "step": 11442 }, { "epoch": 0.7734892523996215, "grad_norm": 0.6760515570640564, "learning_rate": 3.896056401470197e-06, "loss": 0.11481475830078125, "step": 11443 }, { "epoch": 0.7735568473705556, "grad_norm": 0.985600471496582, "learning_rate": 3.893838338235403e-06, "loss": 0.296234130859375, "step": 11444 }, { "epoch": 0.7736244423414897, "grad_norm": 2.0436947345733643, "learning_rate": 3.89162081238578e-06, "loss": 0.255126953125, "step": 11445 }, { "epoch": 0.773692037312424, "grad_norm": 0.9672298431396484, "learning_rate": 3.8894038240286194e-06, "loss": 0.1910858154296875, "step": 11446 }, { "epoch": 0.7737596322833581, "grad_norm": 1.082651972770691, "learning_rate": 3.887187373271207e-06, "loss": 0.09352874755859375, "step": 11447 }, { "epoch": 0.7738272272542923, "grad_norm": 1.8740733861923218, "learning_rate": 3.884971460220772e-06, "loss": 0.27459716796875, "step": 11448 }, { "epoch": 0.7738948222252264, "grad_norm": 1.2909691333770752, "learning_rate": 3.8827560849845435e-06, "loss": 0.314605712890625, "step": 11449 }, { "epoch": 0.7739624171961607, "grad_norm": 1.791495442390442, "learning_rate": 3.880541247669714e-06, "loss": 0.3402099609375, "step": 11450 }, { "epoch": 0.7740300121670948, "grad_norm": 2.202545642852783, "learning_rate": 3.878326948383449e-06, "loss": 0.259185791015625, "step": 11451 }, { "epoch": 0.7740976071380289, "grad_norm": 1.3850957155227661, "learning_rate": 3.876113187232892e-06, "loss": 0.278076171875, "step": 11452 }, { "epoch": 0.7741652021089631, "grad_norm": 1.0770015716552734, "learning_rate": 3.87389996432516e-06, "loss": 0.14693450927734375, "step": 11453 }, { "epoch": 0.7742327970798972, "grad_norm": 1.115891933441162, "learning_rate": 3.871687279767337e-06, "loss": 0.255401611328125, "step": 11454 }, { "epoch": 0.7743003920508315, "grad_norm": 0.9497148394584656, "learning_rate": 3.8694751336664916e-06, "loss": 0.1541290283203125, "step": 11455 }, { "epoch": 0.7743679870217656, "grad_norm": 1.851615309715271, "learning_rate": 3.867263526129658e-06, "loss": 0.283599853515625, "step": 11456 }, { "epoch": 0.7744355819926998, "grad_norm": 1.7491016387939453, "learning_rate": 3.8650524572638485e-06, "loss": 0.19066619873046875, "step": 11457 }, { "epoch": 0.7745031769636339, "grad_norm": 1.3197394609451294, "learning_rate": 3.862841927176049e-06, "loss": 0.258392333984375, "step": 11458 }, { "epoch": 0.774570771934568, "grad_norm": 0.8031932711601257, "learning_rate": 3.860631935973215e-06, "loss": 0.1817474365234375, "step": 11459 }, { "epoch": 0.7746383669055023, "grad_norm": 1.3945235013961792, "learning_rate": 3.858422483762291e-06, "loss": 0.294952392578125, "step": 11460 }, { "epoch": 0.7747059618764364, "grad_norm": 1.4074715375900269, "learning_rate": 3.856213570650167e-06, "loss": 0.211273193359375, "step": 11461 }, { "epoch": 0.7747735568473706, "grad_norm": 1.601649284362793, "learning_rate": 3.854005196743737e-06, "loss": 0.23193359375, "step": 11462 }, { "epoch": 0.7748411518183047, "grad_norm": 1.5712907314300537, "learning_rate": 3.8517973621498525e-06, "loss": 0.282318115234375, "step": 11463 }, { "epoch": 0.7749087467892389, "grad_norm": 1.4974101781845093, "learning_rate": 3.84959006697534e-06, "loss": 0.25445556640625, "step": 11464 }, { "epoch": 0.774976341760173, "grad_norm": 1.3482342958450317, "learning_rate": 3.8473833113270055e-06, "loss": 0.186279296875, "step": 11465 }, { "epoch": 0.7750439367311072, "grad_norm": 1.119482398033142, "learning_rate": 3.845177095311623e-06, "loss": 0.154815673828125, "step": 11466 }, { "epoch": 0.7751115317020414, "grad_norm": 1.8267263174057007, "learning_rate": 3.842971419035945e-06, "loss": 0.290313720703125, "step": 11467 }, { "epoch": 0.7751791266729755, "grad_norm": 1.6398488283157349, "learning_rate": 3.840766282606693e-06, "loss": 0.2721366882324219, "step": 11468 }, { "epoch": 0.7752467216439097, "grad_norm": 1.785310983657837, "learning_rate": 3.838561686130567e-06, "loss": 0.2677001953125, "step": 11469 }, { "epoch": 0.7753143166148438, "grad_norm": 1.051285743713379, "learning_rate": 3.836357629714235e-06, "loss": 0.18255615234375, "step": 11470 }, { "epoch": 0.7753819115857781, "grad_norm": 1.2439862489700317, "learning_rate": 3.834154113464354e-06, "loss": 0.250396728515625, "step": 11471 }, { "epoch": 0.7754495065567122, "grad_norm": 1.698898196220398, "learning_rate": 3.83195113748753e-06, "loss": 0.19875335693359375, "step": 11472 }, { "epoch": 0.7755171015276463, "grad_norm": 1.699621558189392, "learning_rate": 3.829748701890372e-06, "loss": 0.2076416015625, "step": 11473 }, { "epoch": 0.7755846964985805, "grad_norm": 1.5457909107208252, "learning_rate": 3.827546806779429e-06, "loss": 0.28839111328125, "step": 11474 }, { "epoch": 0.7756522914695146, "grad_norm": 0.8863258957862854, "learning_rate": 3.825345452261257e-06, "loss": 0.15460205078125, "step": 11475 }, { "epoch": 0.7757198864404489, "grad_norm": 1.1557973623275757, "learning_rate": 3.82314463844237e-06, "loss": 0.210723876953125, "step": 11476 }, { "epoch": 0.775787481411383, "grad_norm": 1.1968744993209839, "learning_rate": 3.820944365429245e-06, "loss": 0.15447616577148438, "step": 11477 }, { "epoch": 0.7758550763823172, "grad_norm": 1.3866357803344727, "learning_rate": 3.818744633328359e-06, "loss": 0.264404296875, "step": 11478 }, { "epoch": 0.7759226713532513, "grad_norm": 1.413835883140564, "learning_rate": 3.816545442246142e-06, "loss": 0.247955322265625, "step": 11479 }, { "epoch": 0.7759902663241854, "grad_norm": 1.1133389472961426, "learning_rate": 3.8143467922890068e-06, "loss": 0.2874755859375, "step": 11480 }, { "epoch": 0.7760578612951197, "grad_norm": 1.3268959522247314, "learning_rate": 3.812148683563336e-06, "loss": 0.256561279296875, "step": 11481 }, { "epoch": 0.7761254562660538, "grad_norm": 1.8709161281585693, "learning_rate": 3.8099511161754884e-06, "loss": 0.292724609375, "step": 11482 }, { "epoch": 0.776193051236988, "grad_norm": 1.0427709817886353, "learning_rate": 3.807754090231792e-06, "loss": 0.229461669921875, "step": 11483 }, { "epoch": 0.7762606462079221, "grad_norm": 1.1985136270523071, "learning_rate": 3.805557605838565e-06, "loss": 0.1964874267578125, "step": 11484 }, { "epoch": 0.7763282411788562, "grad_norm": 0.8639458417892456, "learning_rate": 3.803361663102069e-06, "loss": 0.1888427734375, "step": 11485 }, { "epoch": 0.7763958361497905, "grad_norm": 1.5697225332260132, "learning_rate": 3.8011662621285757e-06, "loss": 0.22515869140625, "step": 11486 }, { "epoch": 0.7764634311207246, "grad_norm": 2.0547637939453125, "learning_rate": 3.7989714030242955e-06, "loss": 0.31744384765625, "step": 11487 }, { "epoch": 0.7765310260916588, "grad_norm": 2.4739160537719727, "learning_rate": 3.7967770858954413e-06, "loss": 0.25527191162109375, "step": 11488 }, { "epoch": 0.7765986210625929, "grad_norm": 1.8854458332061768, "learning_rate": 3.7945833108481886e-06, "loss": 0.28253173828125, "step": 11489 }, { "epoch": 0.7766662160335271, "grad_norm": 1.0206488370895386, "learning_rate": 3.7923900779886723e-06, "loss": 0.1923828125, "step": 11490 }, { "epoch": 0.7767338110044613, "grad_norm": 1.6390506029129028, "learning_rate": 3.7901973874230274e-06, "loss": 0.1976165771484375, "step": 11491 }, { "epoch": 0.7768014059753954, "grad_norm": 1.7502893209457397, "learning_rate": 3.7880052392573456e-06, "loss": 0.3150634765625, "step": 11492 }, { "epoch": 0.7768690009463296, "grad_norm": 2.105635643005371, "learning_rate": 3.7858136335976974e-06, "loss": 0.2354736328125, "step": 11493 }, { "epoch": 0.7769365959172637, "grad_norm": 0.9477331042289734, "learning_rate": 3.783622570550126e-06, "loss": 0.191986083984375, "step": 11494 }, { "epoch": 0.7770041908881979, "grad_norm": 2.525160551071167, "learning_rate": 3.781432050220647e-06, "loss": 0.36199951171875, "step": 11495 }, { "epoch": 0.7770717858591321, "grad_norm": 1.148697853088379, "learning_rate": 3.7792420727152498e-06, "loss": 0.162689208984375, "step": 11496 }, { "epoch": 0.7771393808300663, "grad_norm": 1.110442042350769, "learning_rate": 3.777052638139912e-06, "loss": 0.168426513671875, "step": 11497 }, { "epoch": 0.7772069758010004, "grad_norm": 1.2394403219223022, "learning_rate": 3.7748637466005528e-06, "loss": 0.2740478515625, "step": 11498 }, { "epoch": 0.7772745707719345, "grad_norm": 0.8806547522544861, "learning_rate": 3.7726753982030975e-06, "loss": 0.17901611328125, "step": 11499 }, { "epoch": 0.7773421657428687, "grad_norm": 0.9933570623397827, "learning_rate": 3.7704875930534346e-06, "loss": 0.1769561767578125, "step": 11500 }, { "epoch": 0.7774097607138029, "grad_norm": 1.3388909101486206, "learning_rate": 3.7683003312574066e-06, "loss": 0.2615966796875, "step": 11501 }, { "epoch": 0.7774773556847371, "grad_norm": 0.9057415127754211, "learning_rate": 3.766113612920868e-06, "loss": 0.1450653076171875, "step": 11502 }, { "epoch": 0.7775449506556712, "grad_norm": 1.5165936946868896, "learning_rate": 3.7639274381496054e-06, "loss": 0.1693878173828125, "step": 11503 }, { "epoch": 0.7776125456266054, "grad_norm": 0.887792706489563, "learning_rate": 3.761741807049416e-06, "loss": 0.21002197265625, "step": 11504 }, { "epoch": 0.7776801405975395, "grad_norm": 0.9633283019065857, "learning_rate": 3.7595567197260468e-06, "loss": 0.247772216796875, "step": 11505 }, { "epoch": 0.7777477355684737, "grad_norm": 1.3976788520812988, "learning_rate": 3.7573721762852266e-06, "loss": 0.1423492431640625, "step": 11506 }, { "epoch": 0.7778153305394079, "grad_norm": 1.3387311697006226, "learning_rate": 3.755188176832659e-06, "loss": 0.29559326171875, "step": 11507 }, { "epoch": 0.777882925510342, "grad_norm": 1.047269582748413, "learning_rate": 3.7530047214740166e-06, "loss": 0.1893157958984375, "step": 11508 }, { "epoch": 0.7779505204812762, "grad_norm": 0.9794830083847046, "learning_rate": 3.750821810314948e-06, "loss": 0.192535400390625, "step": 11509 }, { "epoch": 0.7780181154522103, "grad_norm": 1.5047305822372437, "learning_rate": 3.748639443461085e-06, "loss": 0.277923583984375, "step": 11510 }, { "epoch": 0.7780857104231446, "grad_norm": 0.8354161381721497, "learning_rate": 3.7464576210180134e-06, "loss": 0.18177032470703125, "step": 11511 }, { "epoch": 0.7781533053940787, "grad_norm": 1.5671008825302124, "learning_rate": 3.7442763430913022e-06, "loss": 0.245391845703125, "step": 11512 }, { "epoch": 0.7782209003650128, "grad_norm": 1.4576315879821777, "learning_rate": 3.742095609786509e-06, "loss": 0.2271575927734375, "step": 11513 }, { "epoch": 0.778288495335947, "grad_norm": 1.1384973526000977, "learning_rate": 3.7399154212091336e-06, "loss": 0.17600250244140625, "step": 11514 }, { "epoch": 0.7783560903068811, "grad_norm": 1.2963013648986816, "learning_rate": 3.737735777464685e-06, "loss": 0.295806884765625, "step": 11515 }, { "epoch": 0.7784236852778154, "grad_norm": 1.2555947303771973, "learning_rate": 3.7355566786586097e-06, "loss": 0.22784423828125, "step": 11516 }, { "epoch": 0.7784912802487495, "grad_norm": 1.203673005104065, "learning_rate": 3.733378124896361e-06, "loss": 0.29888916015625, "step": 11517 }, { "epoch": 0.7785588752196837, "grad_norm": 1.228451132774353, "learning_rate": 3.731200116283343e-06, "loss": 0.205322265625, "step": 11518 }, { "epoch": 0.7786264701906178, "grad_norm": 0.9880654811859131, "learning_rate": 3.7290226529249433e-06, "loss": 0.250946044921875, "step": 11519 }, { "epoch": 0.7786940651615519, "grad_norm": 1.2697876691818237, "learning_rate": 3.726845734926522e-06, "loss": 0.25665283203125, "step": 11520 }, { "epoch": 0.7787616601324862, "grad_norm": 0.7167930603027344, "learning_rate": 3.724669362393411e-06, "loss": 0.1376190185546875, "step": 11521 }, { "epoch": 0.7788292551034203, "grad_norm": 1.4254460334777832, "learning_rate": 3.722493535430913e-06, "loss": 0.203765869140625, "step": 11522 }, { "epoch": 0.7788968500743545, "grad_norm": 1.0942600965499878, "learning_rate": 3.7203182541443197e-06, "loss": 0.186004638671875, "step": 11523 }, { "epoch": 0.7789644450452886, "grad_norm": 0.9668694138526917, "learning_rate": 3.7181435186388718e-06, "loss": 0.212677001953125, "step": 11524 }, { "epoch": 0.7790320400162228, "grad_norm": 1.2751024961471558, "learning_rate": 3.7159693290197982e-06, "loss": 0.2301025390625, "step": 11525 }, { "epoch": 0.779099634987157, "grad_norm": 0.8665258884429932, "learning_rate": 3.7137956853923116e-06, "loss": 0.1776275634765625, "step": 11526 }, { "epoch": 0.7791672299580911, "grad_norm": 1.5247530937194824, "learning_rate": 3.7116225878615694e-06, "loss": 0.27032470703125, "step": 11527 }, { "epoch": 0.7792348249290253, "grad_norm": 2.0983619689941406, "learning_rate": 3.7094500365327327e-06, "loss": 0.223602294921875, "step": 11528 }, { "epoch": 0.7793024198999594, "grad_norm": 1.352441430091858, "learning_rate": 3.7072780315109172e-06, "loss": 0.19366455078125, "step": 11529 }, { "epoch": 0.7793700148708936, "grad_norm": 0.899967610836029, "learning_rate": 3.705106572901219e-06, "loss": 0.245330810546875, "step": 11530 }, { "epoch": 0.7794376098418277, "grad_norm": 1.6376363039016724, "learning_rate": 3.702935660808709e-06, "loss": 0.220855712890625, "step": 11531 }, { "epoch": 0.779505204812762, "grad_norm": 1.7000339031219482, "learning_rate": 3.7007652953384263e-06, "loss": 0.3060302734375, "step": 11532 }, { "epoch": 0.7795727997836961, "grad_norm": 1.2472490072250366, "learning_rate": 3.6985954765953872e-06, "loss": 0.249267578125, "step": 11533 }, { "epoch": 0.7796403947546302, "grad_norm": 1.4474962949752808, "learning_rate": 3.6964262046845827e-06, "loss": 0.35540771484375, "step": 11534 }, { "epoch": 0.7797079897255644, "grad_norm": 1.2252686023712158, "learning_rate": 3.6942574797109736e-06, "loss": 0.235443115234375, "step": 11535 }, { "epoch": 0.7797755846964985, "grad_norm": 1.010749340057373, "learning_rate": 3.6920893017794976e-06, "loss": 0.21697998046875, "step": 11536 }, { "epoch": 0.7798431796674328, "grad_norm": 1.487527847290039, "learning_rate": 3.6899216709950643e-06, "loss": 0.3108673095703125, "step": 11537 }, { "epoch": 0.7799107746383669, "grad_norm": 1.3239377737045288, "learning_rate": 3.6877545874625535e-06, "loss": 0.2738037109375, "step": 11538 }, { "epoch": 0.7799783696093011, "grad_norm": 0.8922629952430725, "learning_rate": 3.685588051286834e-06, "loss": 0.1693267822265625, "step": 11539 }, { "epoch": 0.7800459645802352, "grad_norm": 1.5384607315063477, "learning_rate": 3.6834220625727204e-06, "loss": 0.254302978515625, "step": 11540 }, { "epoch": 0.7801135595511693, "grad_norm": 1.4395564794540405, "learning_rate": 3.6812566214250275e-06, "loss": 0.304443359375, "step": 11541 }, { "epoch": 0.7801811545221036, "grad_norm": 1.6720539331436157, "learning_rate": 3.6790917279485297e-06, "loss": 0.2627716064453125, "step": 11542 }, { "epoch": 0.7802487494930377, "grad_norm": 1.3704661130905151, "learning_rate": 3.67692738224798e-06, "loss": 0.208831787109375, "step": 11543 }, { "epoch": 0.7803163444639719, "grad_norm": 1.5094057321548462, "learning_rate": 3.674763584428099e-06, "loss": 0.27386474609375, "step": 11544 }, { "epoch": 0.780383939434906, "grad_norm": 1.1317336559295654, "learning_rate": 3.672600334593587e-06, "loss": 0.2625732421875, "step": 11545 }, { "epoch": 0.7804515344058403, "grad_norm": 1.3996750116348267, "learning_rate": 3.670437632849115e-06, "loss": 0.230224609375, "step": 11546 }, { "epoch": 0.7805191293767744, "grad_norm": 1.6063001155853271, "learning_rate": 3.6682754792993294e-06, "loss": 0.300628662109375, "step": 11547 }, { "epoch": 0.7805867243477085, "grad_norm": 1.1211384534835815, "learning_rate": 3.6661138740488463e-06, "loss": 0.2322998046875, "step": 11548 }, { "epoch": 0.7806543193186427, "grad_norm": 1.5950591564178467, "learning_rate": 3.663952817202259e-06, "loss": 0.261077880859375, "step": 11549 }, { "epoch": 0.7807219142895768, "grad_norm": 1.9829919338226318, "learning_rate": 3.661792308864132e-06, "loss": 0.311767578125, "step": 11550 }, { "epoch": 0.780789509260511, "grad_norm": 1.7229348421096802, "learning_rate": 3.6596323491390015e-06, "loss": 0.279449462890625, "step": 11551 }, { "epoch": 0.7808571042314452, "grad_norm": 1.3255008459091187, "learning_rate": 3.657472938131391e-06, "loss": 0.2769927978515625, "step": 11552 }, { "epoch": 0.7809246992023794, "grad_norm": 1.5043545961380005, "learning_rate": 3.655314075945768e-06, "loss": 0.26922607421875, "step": 11553 }, { "epoch": 0.7809922941733135, "grad_norm": 1.1074271202087402, "learning_rate": 3.653155762686608e-06, "loss": 0.1571197509765625, "step": 11554 }, { "epoch": 0.7810598891442476, "grad_norm": 1.4924993515014648, "learning_rate": 3.6509979984583367e-06, "loss": 0.292327880859375, "step": 11555 }, { "epoch": 0.7811274841151818, "grad_norm": 1.8225287199020386, "learning_rate": 3.6488407833653612e-06, "loss": 0.311492919921875, "step": 11556 }, { "epoch": 0.781195079086116, "grad_norm": 1.1166287660598755, "learning_rate": 3.6466841175120615e-06, "loss": 0.243316650390625, "step": 11557 }, { "epoch": 0.7812626740570502, "grad_norm": 1.6241906881332397, "learning_rate": 3.6445280010027883e-06, "loss": 0.28094482421875, "step": 11558 }, { "epoch": 0.7813302690279843, "grad_norm": 1.691778302192688, "learning_rate": 3.64237243394187e-06, "loss": 0.252197265625, "step": 11559 }, { "epoch": 0.7813978639989185, "grad_norm": 1.0540746450424194, "learning_rate": 3.6402174164336054e-06, "loss": 0.14711380004882812, "step": 11560 }, { "epoch": 0.7814654589698526, "grad_norm": 1.9232265949249268, "learning_rate": 3.6380629485822676e-06, "loss": 0.27008056640625, "step": 11561 }, { "epoch": 0.7815330539407868, "grad_norm": 1.4209264516830444, "learning_rate": 3.6359090304921013e-06, "loss": 0.2801513671875, "step": 11562 }, { "epoch": 0.781600648911721, "grad_norm": 1.7092260122299194, "learning_rate": 3.6337556622673348e-06, "loss": 0.19561767578125, "step": 11563 }, { "epoch": 0.7816682438826551, "grad_norm": 1.6489475965499878, "learning_rate": 3.631602844012148e-06, "loss": 0.31256103515625, "step": 11564 }, { "epoch": 0.7817358388535893, "grad_norm": 1.2358567714691162, "learning_rate": 3.629450575830723e-06, "loss": 0.257965087890625, "step": 11565 }, { "epoch": 0.7818034338245234, "grad_norm": 1.1386030912399292, "learning_rate": 3.6272988578271827e-06, "loss": 0.2581634521484375, "step": 11566 }, { "epoch": 0.7818710287954577, "grad_norm": 0.9972821474075317, "learning_rate": 3.625147690105654e-06, "loss": 0.16112518310546875, "step": 11567 }, { "epoch": 0.7819386237663918, "grad_norm": 1.002224326133728, "learning_rate": 3.6229970727702204e-06, "loss": 0.2530517578125, "step": 11568 }, { "epoch": 0.7820062187373259, "grad_norm": 1.633560061454773, "learning_rate": 3.6208470059249404e-06, "loss": 0.20379638671875, "step": 11569 }, { "epoch": 0.7820738137082601, "grad_norm": 0.9074855446815491, "learning_rate": 3.618697489673849e-06, "loss": 0.10498809814453125, "step": 11570 }, { "epoch": 0.7821414086791942, "grad_norm": 0.5767277479171753, "learning_rate": 3.6165485241209528e-06, "loss": 0.0765380859375, "step": 11571 }, { "epoch": 0.7822090036501285, "grad_norm": 1.047825574874878, "learning_rate": 3.614400109370232e-06, "loss": 0.24359130859375, "step": 11572 }, { "epoch": 0.7822765986210626, "grad_norm": 1.6572530269622803, "learning_rate": 3.6122522455256407e-06, "loss": 0.1653289794921875, "step": 11573 }, { "epoch": 0.7823441935919968, "grad_norm": 1.1702005863189697, "learning_rate": 3.6101049326911036e-06, "loss": 0.231658935546875, "step": 11574 }, { "epoch": 0.7824117885629309, "grad_norm": 1.2464731931686401, "learning_rate": 3.607958170970519e-06, "loss": 0.277008056640625, "step": 11575 }, { "epoch": 0.782479383533865, "grad_norm": 0.8753792643547058, "learning_rate": 3.605811960467774e-06, "loss": 0.180328369140625, "step": 11576 }, { "epoch": 0.7825469785047993, "grad_norm": 1.45425546169281, "learning_rate": 3.6036663012866977e-06, "loss": 0.263580322265625, "step": 11577 }, { "epoch": 0.7826145734757334, "grad_norm": 2.314328908920288, "learning_rate": 3.6015211935311264e-06, "loss": 0.22052001953125, "step": 11578 }, { "epoch": 0.7826821684466676, "grad_norm": 1.9880261421203613, "learning_rate": 3.5993766373048375e-06, "loss": 0.2445068359375, "step": 11579 }, { "epoch": 0.7827497634176017, "grad_norm": 1.1657578945159912, "learning_rate": 3.5972326327116113e-06, "loss": 0.246246337890625, "step": 11580 }, { "epoch": 0.7828173583885359, "grad_norm": 2.303654432296753, "learning_rate": 3.5950891798551877e-06, "loss": 0.2633819580078125, "step": 11581 }, { "epoch": 0.7828849533594701, "grad_norm": 0.9564726948738098, "learning_rate": 3.5929462788392677e-06, "loss": 0.21929931640625, "step": 11582 }, { "epoch": 0.7829525483304042, "grad_norm": 0.47895917296409607, "learning_rate": 3.5908039297675492e-06, "loss": 0.06207275390625, "step": 11583 }, { "epoch": 0.7830201433013384, "grad_norm": 1.5296977758407593, "learning_rate": 3.588662132743692e-06, "loss": 0.203369140625, "step": 11584 }, { "epoch": 0.7830877382722725, "grad_norm": 1.2312225103378296, "learning_rate": 3.586520887871326e-06, "loss": 0.23895263671875, "step": 11585 }, { "epoch": 0.7831553332432067, "grad_norm": 1.0512125492095947, "learning_rate": 3.584380195254058e-06, "loss": 0.1582012176513672, "step": 11586 }, { "epoch": 0.7832229282141409, "grad_norm": 1.7604734897613525, "learning_rate": 3.5822400549954704e-06, "loss": 0.26239013671875, "step": 11587 }, { "epoch": 0.7832905231850751, "grad_norm": 1.7700512409210205, "learning_rate": 3.5801004671991107e-06, "loss": 0.3173980712890625, "step": 11588 }, { "epoch": 0.7833581181560092, "grad_norm": 1.10506010055542, "learning_rate": 3.577961431968519e-06, "loss": 0.210174560546875, "step": 11589 }, { "epoch": 0.7834257131269433, "grad_norm": 1.0643823146820068, "learning_rate": 3.5758229494071775e-06, "loss": 0.2049560546875, "step": 11590 }, { "epoch": 0.7834933080978775, "grad_norm": 1.3372806310653687, "learning_rate": 3.5736850196185705e-06, "loss": 0.247528076171875, "step": 11591 }, { "epoch": 0.7835609030688117, "grad_norm": 1.4895132780075073, "learning_rate": 3.5715476427061433e-06, "loss": 0.156768798828125, "step": 11592 }, { "epoch": 0.7836284980397459, "grad_norm": 1.483102560043335, "learning_rate": 3.5694108187733133e-06, "loss": 0.291961669921875, "step": 11593 }, { "epoch": 0.78369609301068, "grad_norm": 1.031495451927185, "learning_rate": 3.567274547923479e-06, "loss": 0.23516845703125, "step": 11594 }, { "epoch": 0.7837636879816142, "grad_norm": 1.3877596855163574, "learning_rate": 3.5651388302599934e-06, "loss": 0.262451171875, "step": 11595 }, { "epoch": 0.7838312829525483, "grad_norm": 1.1651262044906616, "learning_rate": 3.563003665886207e-06, "loss": 0.09763336181640625, "step": 11596 }, { "epoch": 0.7838988779234825, "grad_norm": 1.5116761922836304, "learning_rate": 3.5608690549054285e-06, "loss": 0.295989990234375, "step": 11597 }, { "epoch": 0.7839664728944167, "grad_norm": 1.296197772026062, "learning_rate": 3.5587349974209454e-06, "loss": 0.26031494140625, "step": 11598 }, { "epoch": 0.7840340678653508, "grad_norm": 1.6140292882919312, "learning_rate": 3.5566014935360153e-06, "loss": 0.30059814453125, "step": 11599 }, { "epoch": 0.784101662836285, "grad_norm": 2.289954662322998, "learning_rate": 3.5544685433538704e-06, "loss": 0.294158935546875, "step": 11600 }, { "epoch": 0.7841692578072191, "grad_norm": 1.0041570663452148, "learning_rate": 3.552336146977713e-06, "loss": 0.1973876953125, "step": 11601 }, { "epoch": 0.7842368527781534, "grad_norm": 1.9296594858169556, "learning_rate": 3.5502043045107328e-06, "loss": 0.2664794921875, "step": 11602 }, { "epoch": 0.7843044477490875, "grad_norm": 0.8562572598457336, "learning_rate": 3.5480730160560654e-06, "loss": 0.1634674072265625, "step": 11603 }, { "epoch": 0.7843720427200216, "grad_norm": 1.0979503393173218, "learning_rate": 3.545942281716848e-06, "loss": 0.20147705078125, "step": 11604 }, { "epoch": 0.7844396376909558, "grad_norm": 1.2048985958099365, "learning_rate": 3.54381210159618e-06, "loss": 0.2040557861328125, "step": 11605 }, { "epoch": 0.7845072326618899, "grad_norm": 0.945729672908783, "learning_rate": 3.5416824757971206e-06, "loss": 0.1790771484375, "step": 11606 }, { "epoch": 0.7845748276328242, "grad_norm": 1.047415852546692, "learning_rate": 3.5395534044227274e-06, "loss": 0.1785736083984375, "step": 11607 }, { "epoch": 0.7846424226037583, "grad_norm": 1.497100591659546, "learning_rate": 3.537424887576006e-06, "loss": 0.2425994873046875, "step": 11608 }, { "epoch": 0.7847100175746925, "grad_norm": 1.5026013851165771, "learning_rate": 3.5352969253599564e-06, "loss": 0.311798095703125, "step": 11609 }, { "epoch": 0.7847776125456266, "grad_norm": 1.1078765392303467, "learning_rate": 3.5331695178775407e-06, "loss": 0.2493896484375, "step": 11610 }, { "epoch": 0.7848452075165607, "grad_norm": 0.8449754118919373, "learning_rate": 3.5310426652316934e-06, "loss": 0.11702728271484375, "step": 11611 }, { "epoch": 0.784912802487495, "grad_norm": 1.4066768884658813, "learning_rate": 3.528916367525329e-06, "loss": 0.26507568359375, "step": 11612 }, { "epoch": 0.7849803974584291, "grad_norm": 2.2835240364074707, "learning_rate": 3.5267906248613263e-06, "loss": 0.286712646484375, "step": 11613 }, { "epoch": 0.7850479924293633, "grad_norm": 1.1300208568572998, "learning_rate": 3.524665437342542e-06, "loss": 0.205322265625, "step": 11614 }, { "epoch": 0.7851155874002974, "grad_norm": 1.1333601474761963, "learning_rate": 3.522540805071817e-06, "loss": 0.175079345703125, "step": 11615 }, { "epoch": 0.7851831823712315, "grad_norm": 1.4445877075195312, "learning_rate": 3.520416728151935e-06, "loss": 0.240875244140625, "step": 11616 }, { "epoch": 0.7852507773421658, "grad_norm": 1.7454026937484741, "learning_rate": 3.5182932066856867e-06, "loss": 0.2554931640625, "step": 11617 }, { "epoch": 0.7853183723130999, "grad_norm": 1.0803415775299072, "learning_rate": 3.5161702407758224e-06, "loss": 0.1078033447265625, "step": 11618 }, { "epoch": 0.7853859672840341, "grad_norm": 1.0915268659591675, "learning_rate": 3.51404783052505e-06, "loss": 0.212982177734375, "step": 11619 }, { "epoch": 0.7854535622549682, "grad_norm": 1.750827431678772, "learning_rate": 3.5119259760360794e-06, "loss": 0.2581787109375, "step": 11620 }, { "epoch": 0.7855211572259024, "grad_norm": 1.9724894762039185, "learning_rate": 3.509804677411572e-06, "loss": 0.21502685546875, "step": 11621 }, { "epoch": 0.7855887521968365, "grad_norm": 1.7158857583999634, "learning_rate": 3.507683934754173e-06, "loss": 0.293060302734375, "step": 11622 }, { "epoch": 0.7856563471677707, "grad_norm": 1.3043489456176758, "learning_rate": 3.5055637481664925e-06, "loss": 0.2479248046875, "step": 11623 }, { "epoch": 0.7857239421387049, "grad_norm": 1.306846261024475, "learning_rate": 3.503444117751122e-06, "loss": 0.271087646484375, "step": 11624 }, { "epoch": 0.785791537109639, "grad_norm": 1.3888320922851562, "learning_rate": 3.5013250436106232e-06, "loss": 0.245269775390625, "step": 11625 }, { "epoch": 0.7858591320805732, "grad_norm": 1.0857957601547241, "learning_rate": 3.4992065258475265e-06, "loss": 0.206512451171875, "step": 11626 }, { "epoch": 0.7859267270515073, "grad_norm": 1.5655851364135742, "learning_rate": 3.497088564564337e-06, "loss": 0.294647216796875, "step": 11627 }, { "epoch": 0.7859943220224416, "grad_norm": 0.5188371539115906, "learning_rate": 3.4949711598635476e-06, "loss": 0.10282135009765625, "step": 11628 }, { "epoch": 0.7860619169933757, "grad_norm": 1.2618024349212646, "learning_rate": 3.4928543118475962e-06, "loss": 0.161163330078125, "step": 11629 }, { "epoch": 0.7861295119643098, "grad_norm": 1.6183847188949585, "learning_rate": 3.490738020618913e-06, "loss": 0.3133544921875, "step": 11630 }, { "epoch": 0.786197106935244, "grad_norm": 1.378738522529602, "learning_rate": 3.4886222862799076e-06, "loss": 0.267303466796875, "step": 11631 }, { "epoch": 0.7862647019061781, "grad_norm": 1.3042080402374268, "learning_rate": 3.4865071089329364e-06, "loss": 0.203155517578125, "step": 11632 }, { "epoch": 0.7863322968771124, "grad_norm": 1.4766106605529785, "learning_rate": 3.4843924886803575e-06, "loss": 0.1992034912109375, "step": 11633 }, { "epoch": 0.7863998918480465, "grad_norm": 1.2762264013290405, "learning_rate": 3.482278425624484e-06, "loss": 0.18927001953125, "step": 11634 }, { "epoch": 0.7864674868189807, "grad_norm": 1.3719394207000732, "learning_rate": 3.480164919867607e-06, "loss": 0.24761962890625, "step": 11635 }, { "epoch": 0.7865350817899148, "grad_norm": 1.8247535228729248, "learning_rate": 3.478051971511994e-06, "loss": 0.2248382568359375, "step": 11636 }, { "epoch": 0.7866026767608489, "grad_norm": 1.651012897491455, "learning_rate": 3.475939580659881e-06, "loss": 0.31341552734375, "step": 11637 }, { "epoch": 0.7866702717317832, "grad_norm": 1.6758787631988525, "learning_rate": 3.4738277474134745e-06, "loss": 0.293304443359375, "step": 11638 }, { "epoch": 0.7867378667027173, "grad_norm": 1.3604522943496704, "learning_rate": 3.4717164718749692e-06, "loss": 0.2669677734375, "step": 11639 }, { "epoch": 0.7868054616736515, "grad_norm": 1.347447395324707, "learning_rate": 3.469605754146511e-06, "loss": 0.29693603515625, "step": 11640 }, { "epoch": 0.7868730566445856, "grad_norm": 1.5894240140914917, "learning_rate": 3.467495594330232e-06, "loss": 0.260406494140625, "step": 11641 }, { "epoch": 0.7869406516155198, "grad_norm": 1.2297879457473755, "learning_rate": 3.4653859925282384e-06, "loss": 0.219940185546875, "step": 11642 }, { "epoch": 0.787008246586454, "grad_norm": 1.0038453340530396, "learning_rate": 3.4632769488425976e-06, "loss": 0.16925048828125, "step": 11643 }, { "epoch": 0.7870758415573881, "grad_norm": 1.6848064661026, "learning_rate": 3.4611684633753733e-06, "loss": 0.278533935546875, "step": 11644 }, { "epoch": 0.7871434365283223, "grad_norm": 0.9452807903289795, "learning_rate": 3.459060536228569e-06, "loss": 0.2215576171875, "step": 11645 }, { "epoch": 0.7872110314992564, "grad_norm": 1.2214161157608032, "learning_rate": 3.4569531675041927e-06, "loss": 0.19219970703125, "step": 11646 }, { "epoch": 0.7872786264701906, "grad_norm": 1.7862008810043335, "learning_rate": 3.4548463573042067e-06, "loss": 0.31256103515625, "step": 11647 }, { "epoch": 0.7873462214411248, "grad_norm": 1.166628360748291, "learning_rate": 3.452740105730554e-06, "loss": 0.260528564453125, "step": 11648 }, { "epoch": 0.787413816412059, "grad_norm": 1.1426846981048584, "learning_rate": 3.450634412885145e-06, "loss": 0.237640380859375, "step": 11649 }, { "epoch": 0.7874814113829931, "grad_norm": 1.3832368850708008, "learning_rate": 3.4485292788698704e-06, "loss": 0.260498046875, "step": 11650 }, { "epoch": 0.7875490063539272, "grad_norm": 2.087109088897705, "learning_rate": 3.4464247037865807e-06, "loss": 0.294586181640625, "step": 11651 }, { "epoch": 0.7876166013248614, "grad_norm": 1.1082234382629395, "learning_rate": 3.444320687737124e-06, "loss": 0.231201171875, "step": 11652 }, { "epoch": 0.7876841962957956, "grad_norm": 1.2404035329818726, "learning_rate": 3.4422172308232918e-06, "loss": 0.2729034423828125, "step": 11653 }, { "epoch": 0.7877517912667298, "grad_norm": 1.4343150854110718, "learning_rate": 3.4401143331468676e-06, "loss": 0.22149658203125, "step": 11654 }, { "epoch": 0.7878193862376639, "grad_norm": 0.849723219871521, "learning_rate": 3.4380119948096018e-06, "loss": 0.18353271484375, "step": 11655 }, { "epoch": 0.7878869812085981, "grad_norm": 1.7080341577529907, "learning_rate": 3.4359102159132145e-06, "loss": 0.247894287109375, "step": 11656 }, { "epoch": 0.7879545761795322, "grad_norm": 1.748573899269104, "learning_rate": 3.4338089965594162e-06, "loss": 0.2431488037109375, "step": 11657 }, { "epoch": 0.7880221711504664, "grad_norm": 0.9608876705169678, "learning_rate": 3.4317083368498585e-06, "loss": 0.1693115234375, "step": 11658 }, { "epoch": 0.7880897661214006, "grad_norm": 1.7340420484542847, "learning_rate": 3.4296082368861993e-06, "loss": 0.2384490966796875, "step": 11659 }, { "epoch": 0.7881573610923347, "grad_norm": 1.0220997333526611, "learning_rate": 3.4275086967700482e-06, "loss": 0.281494140625, "step": 11660 }, { "epoch": 0.7882249560632689, "grad_norm": 0.8989742398262024, "learning_rate": 3.425409716602996e-06, "loss": 0.179962158203125, "step": 11661 }, { "epoch": 0.788292551034203, "grad_norm": 1.320567011833191, "learning_rate": 3.4233112964866024e-06, "loss": 0.198455810546875, "step": 11662 }, { "epoch": 0.7883601460051373, "grad_norm": 1.3603553771972656, "learning_rate": 3.4212134365224025e-06, "loss": 0.251556396484375, "step": 11663 }, { "epoch": 0.7884277409760714, "grad_norm": 0.957797110080719, "learning_rate": 3.4191161368119057e-06, "loss": 0.1973876953125, "step": 11664 }, { "epoch": 0.7884953359470055, "grad_norm": 1.0796661376953125, "learning_rate": 3.4170193974565893e-06, "loss": 0.20232391357421875, "step": 11665 }, { "epoch": 0.7885629309179397, "grad_norm": 1.042220950126648, "learning_rate": 3.4149232185579095e-06, "loss": 0.188995361328125, "step": 11666 }, { "epoch": 0.7886305258888738, "grad_norm": 1.034382700920105, "learning_rate": 3.412827600217286e-06, "loss": 0.21044921875, "step": 11667 }, { "epoch": 0.7886981208598081, "grad_norm": 0.7558851838111877, "learning_rate": 3.410732542536133e-06, "loss": 0.09963226318359375, "step": 11668 }, { "epoch": 0.7887657158307422, "grad_norm": 1.2170459032058716, "learning_rate": 3.408638045615803e-06, "loss": 0.235015869140625, "step": 11669 }, { "epoch": 0.7888333108016764, "grad_norm": 1.1176698207855225, "learning_rate": 3.406544109557658e-06, "loss": 0.2082061767578125, "step": 11670 }, { "epoch": 0.7889009057726105, "grad_norm": 1.5664607286453247, "learning_rate": 3.4044507344630017e-06, "loss": 0.18105316162109375, "step": 11671 }, { "epoch": 0.7889685007435446, "grad_norm": 1.6242754459381104, "learning_rate": 3.4023579204331333e-06, "loss": 0.2371826171875, "step": 11672 }, { "epoch": 0.7890360957144789, "grad_norm": 1.7448869943618774, "learning_rate": 3.400265667569314e-06, "loss": 0.29119873046875, "step": 11673 }, { "epoch": 0.789103690685413, "grad_norm": 1.3089126348495483, "learning_rate": 3.398173975972779e-06, "loss": 0.3365478515625, "step": 11674 }, { "epoch": 0.7891712856563472, "grad_norm": 0.7408046722412109, "learning_rate": 3.3960828457447384e-06, "loss": 0.121337890625, "step": 11675 }, { "epoch": 0.7892388806272813, "grad_norm": 1.5753984451293945, "learning_rate": 3.393992276986374e-06, "loss": 0.25537109375, "step": 11676 }, { "epoch": 0.7893064755982155, "grad_norm": 1.5298291444778442, "learning_rate": 3.391902269798841e-06, "loss": 0.31744384765625, "step": 11677 }, { "epoch": 0.7893740705691497, "grad_norm": 0.8830499649047852, "learning_rate": 3.3898128242832653e-06, "loss": 0.15064239501953125, "step": 11678 }, { "epoch": 0.7894416655400838, "grad_norm": 1.3820799589157104, "learning_rate": 3.3877239405407477e-06, "loss": 0.287017822265625, "step": 11679 }, { "epoch": 0.789509260511018, "grad_norm": 1.4300498962402344, "learning_rate": 3.385635618672359e-06, "loss": 0.2439117431640625, "step": 11680 }, { "epoch": 0.7895768554819521, "grad_norm": 1.2635955810546875, "learning_rate": 3.383547858779155e-06, "loss": 0.2166595458984375, "step": 11681 }, { "epoch": 0.7896444504528863, "grad_norm": 1.9632216691970825, "learning_rate": 3.3814606609621402e-06, "loss": 0.31060791015625, "step": 11682 }, { "epoch": 0.7897120454238205, "grad_norm": 1.684163212776184, "learning_rate": 3.3793740253223167e-06, "loss": 0.264495849609375, "step": 11683 }, { "epoch": 0.7897796403947547, "grad_norm": 1.4837989807128906, "learning_rate": 3.377287951960647e-06, "loss": 0.247161865234375, "step": 11684 }, { "epoch": 0.7898472353656888, "grad_norm": 1.7268396615982056, "learning_rate": 3.375202440978066e-06, "loss": 0.2470855712890625, "step": 11685 }, { "epoch": 0.7899148303366229, "grad_norm": 1.1198968887329102, "learning_rate": 3.373117492475485e-06, "loss": 0.181427001953125, "step": 11686 }, { "epoch": 0.7899824253075571, "grad_norm": 1.6196279525756836, "learning_rate": 3.3710331065537868e-06, "loss": 0.23480224609375, "step": 11687 }, { "epoch": 0.7900500202784912, "grad_norm": 2.1558291912078857, "learning_rate": 3.3689492833138263e-06, "loss": 0.24726104736328125, "step": 11688 }, { "epoch": 0.7901176152494255, "grad_norm": 1.2798627614974976, "learning_rate": 3.366866022856433e-06, "loss": 0.229461669921875, "step": 11689 }, { "epoch": 0.7901852102203596, "grad_norm": 1.4249237775802612, "learning_rate": 3.3647833252824062e-06, "loss": 0.2791748046875, "step": 11690 }, { "epoch": 0.7902528051912938, "grad_norm": 0.7969524264335632, "learning_rate": 3.3627011906925205e-06, "loss": 0.18096923828125, "step": 11691 }, { "epoch": 0.7903204001622279, "grad_norm": 1.1343629360198975, "learning_rate": 3.360619619187523e-06, "loss": 0.220794677734375, "step": 11692 }, { "epoch": 0.790387995133162, "grad_norm": 1.074718952178955, "learning_rate": 3.358538610868131e-06, "loss": 0.1494903564453125, "step": 11693 }, { "epoch": 0.7904555901040963, "grad_norm": 1.1965317726135254, "learning_rate": 3.3564581658350446e-06, "loss": 0.24652099609375, "step": 11694 }, { "epoch": 0.7905231850750304, "grad_norm": 1.3597725629806519, "learning_rate": 3.3543782841889138e-06, "loss": 0.28912353515625, "step": 11695 }, { "epoch": 0.7905907800459646, "grad_norm": 1.0518275499343872, "learning_rate": 3.352298966030391e-06, "loss": 0.1385040283203125, "step": 11696 }, { "epoch": 0.7906583750168987, "grad_norm": 0.8819872140884399, "learning_rate": 3.3502202114600794e-06, "loss": 0.16090774536132812, "step": 11697 }, { "epoch": 0.790725969987833, "grad_norm": 1.2710154056549072, "learning_rate": 3.3481420205785618e-06, "loss": 0.1696929931640625, "step": 11698 }, { "epoch": 0.7907935649587671, "grad_norm": 0.8245055675506592, "learning_rate": 3.346064393486401e-06, "loss": 0.1157379150390625, "step": 11699 }, { "epoch": 0.7908611599297012, "grad_norm": 1.3615351915359497, "learning_rate": 3.343987330284112e-06, "loss": 0.2618408203125, "step": 11700 }, { "epoch": 0.7909287549006354, "grad_norm": 1.1373772621154785, "learning_rate": 3.3419108310722067e-06, "loss": 0.22430419921875, "step": 11701 }, { "epoch": 0.7909963498715695, "grad_norm": 1.0205004215240479, "learning_rate": 3.3398348959511577e-06, "loss": 0.1923980712890625, "step": 11702 }, { "epoch": 0.7910639448425038, "grad_norm": 1.4555758237838745, "learning_rate": 3.3377595250214105e-06, "loss": 0.309173583984375, "step": 11703 }, { "epoch": 0.7911315398134379, "grad_norm": 0.4866228997707367, "learning_rate": 3.335684718383383e-06, "loss": 0.0782928466796875, "step": 11704 }, { "epoch": 0.7911991347843721, "grad_norm": 1.2809627056121826, "learning_rate": 3.3336104761374692e-06, "loss": 0.1825714111328125, "step": 11705 }, { "epoch": 0.7912667297553062, "grad_norm": 1.0297799110412598, "learning_rate": 3.331536798384028e-06, "loss": 0.184356689453125, "step": 11706 }, { "epoch": 0.7913343247262403, "grad_norm": 1.14651358127594, "learning_rate": 3.329463685223411e-06, "loss": 0.1826171875, "step": 11707 }, { "epoch": 0.7914019196971745, "grad_norm": 1.1177972555160522, "learning_rate": 3.327391136755909e-06, "loss": 0.1980743408203125, "step": 11708 }, { "epoch": 0.7914695146681087, "grad_norm": 1.2433688640594482, "learning_rate": 3.325319153081821e-06, "loss": 0.2251129150390625, "step": 11709 }, { "epoch": 0.7915371096390429, "grad_norm": 0.9226219654083252, "learning_rate": 3.3232477343014e-06, "loss": 0.1788330078125, "step": 11710 }, { "epoch": 0.791604704609977, "grad_norm": 1.6275089979171753, "learning_rate": 3.321176880514864e-06, "loss": 0.1410369873046875, "step": 11711 }, { "epoch": 0.7916722995809112, "grad_norm": 1.2769263982772827, "learning_rate": 3.319106591822423e-06, "loss": 0.229400634765625, "step": 11712 }, { "epoch": 0.7917398945518453, "grad_norm": 1.2213985919952393, "learning_rate": 3.3170368683242464e-06, "loss": 0.254241943359375, "step": 11713 }, { "epoch": 0.7918074895227795, "grad_norm": 1.4102301597595215, "learning_rate": 3.3149677101204845e-06, "loss": 0.2248077392578125, "step": 11714 }, { "epoch": 0.7918750844937137, "grad_norm": 0.9052396416664124, "learning_rate": 3.3128991173112505e-06, "loss": 0.18714141845703125, "step": 11715 }, { "epoch": 0.7919426794646478, "grad_norm": 0.8486586809158325, "learning_rate": 3.310831089996641e-06, "loss": 0.15521240234375, "step": 11716 }, { "epoch": 0.792010274435582, "grad_norm": 1.8557636737823486, "learning_rate": 3.3087636282767157e-06, "loss": 0.28765869140625, "step": 11717 }, { "epoch": 0.7920778694065161, "grad_norm": 1.5791971683502197, "learning_rate": 3.3066967322515133e-06, "loss": 0.25152587890625, "step": 11718 }, { "epoch": 0.7921454643774504, "grad_norm": 1.2943187952041626, "learning_rate": 3.3046304020210387e-06, "loss": 0.2276611328125, "step": 11719 }, { "epoch": 0.7922130593483845, "grad_norm": 0.5654309391975403, "learning_rate": 3.302564637685287e-06, "loss": 0.1154327392578125, "step": 11720 }, { "epoch": 0.7922806543193186, "grad_norm": 1.1746047735214233, "learning_rate": 3.3004994393441963e-06, "loss": 0.1437225341796875, "step": 11721 }, { "epoch": 0.7923482492902528, "grad_norm": 1.6303545236587524, "learning_rate": 3.2984348070977033e-06, "loss": 0.31011962890625, "step": 11722 }, { "epoch": 0.7924158442611869, "grad_norm": 1.374860405921936, "learning_rate": 3.29637074104571e-06, "loss": 0.23309326171875, "step": 11723 }, { "epoch": 0.7924834392321212, "grad_norm": 1.769392490386963, "learning_rate": 3.2943072412880766e-06, "loss": 0.319854736328125, "step": 11724 }, { "epoch": 0.7925510342030553, "grad_norm": 1.2631334066390991, "learning_rate": 3.2922443079246613e-06, "loss": 0.2258758544921875, "step": 11725 }, { "epoch": 0.7926186291739895, "grad_norm": 0.9803410172462463, "learning_rate": 3.2901819410552743e-06, "loss": 0.17340087890625, "step": 11726 }, { "epoch": 0.7926862241449236, "grad_norm": 1.0580933094024658, "learning_rate": 3.2881201407797075e-06, "loss": 0.248565673828125, "step": 11727 }, { "epoch": 0.7927538191158577, "grad_norm": 0.9862923622131348, "learning_rate": 3.2860589071977245e-06, "loss": 0.185516357421875, "step": 11728 }, { "epoch": 0.792821414086792, "grad_norm": 1.019439697265625, "learning_rate": 3.2839982404090587e-06, "loss": 0.1612396240234375, "step": 11729 }, { "epoch": 0.7928890090577261, "grad_norm": 1.2039721012115479, "learning_rate": 3.281938140513417e-06, "loss": 0.2281494140625, "step": 11730 }, { "epoch": 0.7929566040286603, "grad_norm": 0.5105374455451965, "learning_rate": 3.279878607610488e-06, "loss": 0.05995368957519531, "step": 11731 }, { "epoch": 0.7930241989995944, "grad_norm": 1.598870038986206, "learning_rate": 3.2778196417999106e-06, "loss": 0.255157470703125, "step": 11732 }, { "epoch": 0.7930917939705286, "grad_norm": 1.4293718338012695, "learning_rate": 3.2757612431813283e-06, "loss": 0.2076416015625, "step": 11733 }, { "epoch": 0.7931593889414628, "grad_norm": 1.3943901062011719, "learning_rate": 3.273703411854323e-06, "loss": 0.27532958984375, "step": 11734 }, { "epoch": 0.7932269839123969, "grad_norm": 1.7292290925979614, "learning_rate": 3.27164614791847e-06, "loss": 0.235565185546875, "step": 11735 }, { "epoch": 0.7932945788833311, "grad_norm": 0.5959481000900269, "learning_rate": 3.26958945147332e-06, "loss": 0.11581039428710938, "step": 11736 }, { "epoch": 0.7933621738542652, "grad_norm": 1.7417415380477905, "learning_rate": 3.2675333226183764e-06, "loss": 0.31640625, "step": 11737 }, { "epoch": 0.7934297688251994, "grad_norm": 1.9760375022888184, "learning_rate": 3.2654777614531377e-06, "loss": 0.203765869140625, "step": 11738 }, { "epoch": 0.7934973637961336, "grad_norm": 1.0726984739303589, "learning_rate": 3.2634227680770618e-06, "loss": 0.169952392578125, "step": 11739 }, { "epoch": 0.7935649587670678, "grad_norm": 1.68157160282135, "learning_rate": 3.26136834258958e-06, "loss": 0.3631591796875, "step": 11740 }, { "epoch": 0.7936325537380019, "grad_norm": 1.457944631576538, "learning_rate": 3.2593144850901004e-06, "loss": 0.229339599609375, "step": 11741 }, { "epoch": 0.793700148708936, "grad_norm": 1.2375088930130005, "learning_rate": 3.257261195678001e-06, "loss": 0.180511474609375, "step": 11742 }, { "epoch": 0.7937677436798702, "grad_norm": 0.9591159224510193, "learning_rate": 3.255208474452627e-06, "loss": 0.1800537109375, "step": 11743 }, { "epoch": 0.7938353386508044, "grad_norm": 1.1007192134857178, "learning_rate": 3.2531563215133154e-06, "loss": 0.2056884765625, "step": 11744 }, { "epoch": 0.7939029336217386, "grad_norm": 1.4552370309829712, "learning_rate": 3.2511047369593507e-06, "loss": 0.21868896484375, "step": 11745 }, { "epoch": 0.7939705285926727, "grad_norm": 1.2987231016159058, "learning_rate": 3.2490537208900036e-06, "loss": 0.274505615234375, "step": 11746 }, { "epoch": 0.7940381235636068, "grad_norm": 1.359850287437439, "learning_rate": 3.247003273404516e-06, "loss": 0.18976593017578125, "step": 11747 }, { "epoch": 0.794105718534541, "grad_norm": 1.2367777824401855, "learning_rate": 3.244953394602098e-06, "loss": 0.1451416015625, "step": 11748 }, { "epoch": 0.7941733135054752, "grad_norm": 1.220048189163208, "learning_rate": 3.2429040845819446e-06, "loss": 0.2406005859375, "step": 11749 }, { "epoch": 0.7942409084764094, "grad_norm": 2.5722124576568604, "learning_rate": 3.2408553434432016e-06, "loss": 0.37188720703125, "step": 11750 }, { "epoch": 0.7943085034473435, "grad_norm": 1.5168861150741577, "learning_rate": 3.238807171285011e-06, "loss": 0.249298095703125, "step": 11751 }, { "epoch": 0.7943760984182777, "grad_norm": 1.3546884059906006, "learning_rate": 3.2367595682064706e-06, "loss": 0.2010955810546875, "step": 11752 }, { "epoch": 0.7944436933892118, "grad_norm": 1.737994909286499, "learning_rate": 3.2347125343066573e-06, "loss": 0.33514404296875, "step": 11753 }, { "epoch": 0.794511288360146, "grad_norm": 1.740674376487732, "learning_rate": 3.2326660696846195e-06, "loss": 0.2611541748046875, "step": 11754 }, { "epoch": 0.7945788833310802, "grad_norm": 1.943558931350708, "learning_rate": 3.230620174439377e-06, "loss": 0.219146728515625, "step": 11755 }, { "epoch": 0.7946464783020143, "grad_norm": 1.7277449369430542, "learning_rate": 3.2285748486699198e-06, "loss": 0.224639892578125, "step": 11756 }, { "epoch": 0.7947140732729485, "grad_norm": 2.371195077896118, "learning_rate": 3.2265300924752243e-06, "loss": 0.279449462890625, "step": 11757 }, { "epoch": 0.7947816682438826, "grad_norm": 1.2983050346374512, "learning_rate": 3.2244859059542176e-06, "loss": 0.30950927734375, "step": 11758 }, { "epoch": 0.7948492632148169, "grad_norm": 1.1668503284454346, "learning_rate": 3.2224422892058098e-06, "loss": 0.28076171875, "step": 11759 }, { "epoch": 0.794916858185751, "grad_norm": 1.4108343124389648, "learning_rate": 3.220399242328897e-06, "loss": 0.2631683349609375, "step": 11760 }, { "epoch": 0.7949844531566851, "grad_norm": 1.010830044746399, "learning_rate": 3.218356765422317e-06, "loss": 0.1849365234375, "step": 11761 }, { "epoch": 0.7950520481276193, "grad_norm": 1.076494812965393, "learning_rate": 3.2163148585849134e-06, "loss": 0.1821441650390625, "step": 11762 }, { "epoch": 0.7951196430985534, "grad_norm": 1.8489933013916016, "learning_rate": 3.2142735219154714e-06, "loss": 0.301544189453125, "step": 11763 }, { "epoch": 0.7951872380694877, "grad_norm": 1.5276378393173218, "learning_rate": 3.2122327555127745e-06, "loss": 0.27001953125, "step": 11764 }, { "epoch": 0.7952548330404218, "grad_norm": 2.1298532485961914, "learning_rate": 3.2101925594755645e-06, "loss": 0.231964111328125, "step": 11765 }, { "epoch": 0.795322428011356, "grad_norm": 1.1607712507247925, "learning_rate": 3.2081529339025584e-06, "loss": 0.2020721435546875, "step": 11766 }, { "epoch": 0.7953900229822901, "grad_norm": 1.142738699913025, "learning_rate": 3.206113878892446e-06, "loss": 0.221466064453125, "step": 11767 }, { "epoch": 0.7954576179532242, "grad_norm": 1.3125636577606201, "learning_rate": 3.20407539454389e-06, "loss": 0.267333984375, "step": 11768 }, { "epoch": 0.7955252129241585, "grad_norm": 1.9285571575164795, "learning_rate": 3.2020374809555248e-06, "loss": 0.3031005859375, "step": 11769 }, { "epoch": 0.7955928078950926, "grad_norm": 1.7175636291503906, "learning_rate": 3.2000001382259563e-06, "loss": 0.1912689208984375, "step": 11770 }, { "epoch": 0.7956604028660268, "grad_norm": 1.1784473657608032, "learning_rate": 3.1979633664537645e-06, "loss": 0.25445556640625, "step": 11771 }, { "epoch": 0.7957279978369609, "grad_norm": 2.214582681655884, "learning_rate": 3.1959271657374984e-06, "loss": 0.28558349609375, "step": 11772 }, { "epoch": 0.7957955928078951, "grad_norm": 1.1248711347579956, "learning_rate": 3.193891536175691e-06, "loss": 0.2313232421875, "step": 11773 }, { "epoch": 0.7958631877788293, "grad_norm": 1.023896336555481, "learning_rate": 3.191856477866825e-06, "loss": 0.16161346435546875, "step": 11774 }, { "epoch": 0.7959307827497634, "grad_norm": 2.062239646911621, "learning_rate": 3.189821990909381e-06, "loss": 0.2547607421875, "step": 11775 }, { "epoch": 0.7959983777206976, "grad_norm": 1.846155047416687, "learning_rate": 3.187788075401795e-06, "loss": 0.24609375, "step": 11776 }, { "epoch": 0.7960659726916317, "grad_norm": 1.7304471731185913, "learning_rate": 3.185754731442482e-06, "loss": 0.1868438720703125, "step": 11777 }, { "epoch": 0.7961335676625659, "grad_norm": 1.015657663345337, "learning_rate": 3.1837219591298267e-06, "loss": 0.193359375, "step": 11778 }, { "epoch": 0.7962011626335, "grad_norm": 1.6635346412658691, "learning_rate": 3.181689758562188e-06, "loss": 0.2579345703125, "step": 11779 }, { "epoch": 0.7962687576044343, "grad_norm": 0.7625197172164917, "learning_rate": 3.1796581298378967e-06, "loss": 0.10175323486328125, "step": 11780 }, { "epoch": 0.7963363525753684, "grad_norm": 1.1517454385757446, "learning_rate": 3.1776270730552546e-06, "loss": 0.279541015625, "step": 11781 }, { "epoch": 0.7964039475463025, "grad_norm": 1.9483816623687744, "learning_rate": 3.1755965883125366e-06, "loss": 0.30975341796875, "step": 11782 }, { "epoch": 0.7964715425172367, "grad_norm": 1.3990641832351685, "learning_rate": 3.1735666757079903e-06, "loss": 0.21148681640625, "step": 11783 }, { "epoch": 0.7965391374881708, "grad_norm": 1.1773115396499634, "learning_rate": 3.171537335339837e-06, "loss": 0.22149658203125, "step": 11784 }, { "epoch": 0.7966067324591051, "grad_norm": 1.7142393589019775, "learning_rate": 3.1695085673062636e-06, "loss": 0.24005126953125, "step": 11785 }, { "epoch": 0.7966743274300392, "grad_norm": 1.1823134422302246, "learning_rate": 3.167480371705448e-06, "loss": 0.222686767578125, "step": 11786 }, { "epoch": 0.7967419224009734, "grad_norm": 1.438616156578064, "learning_rate": 3.165452748635508e-06, "loss": 0.2090301513671875, "step": 11787 }, { "epoch": 0.7968095173719075, "grad_norm": 1.7188853025436401, "learning_rate": 3.1634256981945663e-06, "loss": 0.268829345703125, "step": 11788 }, { "epoch": 0.7968771123428416, "grad_norm": 1.368569016456604, "learning_rate": 3.161399220480702e-06, "loss": 0.3236083984375, "step": 11789 }, { "epoch": 0.7969447073137759, "grad_norm": 1.2237277030944824, "learning_rate": 3.1593733155919647e-06, "loss": 0.243438720703125, "step": 11790 }, { "epoch": 0.79701230228471, "grad_norm": 1.0793801546096802, "learning_rate": 3.157347983626383e-06, "loss": 0.26519775390625, "step": 11791 }, { "epoch": 0.7970798972556442, "grad_norm": 1.1604467630386353, "learning_rate": 3.155323224681955e-06, "loss": 0.16911888122558594, "step": 11792 }, { "epoch": 0.7971474922265783, "grad_norm": 1.298903226852417, "learning_rate": 3.153299038856651e-06, "loss": 0.1891632080078125, "step": 11793 }, { "epoch": 0.7972150871975126, "grad_norm": 0.5955862998962402, "learning_rate": 3.151275426248413e-06, "loss": 0.09566116333007812, "step": 11794 }, { "epoch": 0.7972826821684467, "grad_norm": 1.1852436065673828, "learning_rate": 3.1492523869551566e-06, "loss": 0.1756591796875, "step": 11795 }, { "epoch": 0.7973502771393808, "grad_norm": 1.8934650421142578, "learning_rate": 3.1472299210747673e-06, "loss": 0.27569580078125, "step": 11796 }, { "epoch": 0.797417872110315, "grad_norm": 1.7349574565887451, "learning_rate": 3.145208028705107e-06, "loss": 0.2998046875, "step": 11797 }, { "epoch": 0.7974854670812491, "grad_norm": 1.3353626728057861, "learning_rate": 3.1431867099440028e-06, "loss": 0.286651611328125, "step": 11798 }, { "epoch": 0.7975530620521833, "grad_norm": 2.528902053833008, "learning_rate": 3.1411659648892705e-06, "loss": 0.2182159423828125, "step": 11799 }, { "epoch": 0.7976206570231175, "grad_norm": 0.6529483795166016, "learning_rate": 3.1391457936386693e-06, "loss": 0.1397552490234375, "step": 11800 }, { "epoch": 0.7976882519940517, "grad_norm": 1.8988332748413086, "learning_rate": 3.1371261962899607e-06, "loss": 0.2789306640625, "step": 11801 }, { "epoch": 0.7977558469649858, "grad_norm": 1.017391324043274, "learning_rate": 3.1351071729408597e-06, "loss": 0.236785888671875, "step": 11802 }, { "epoch": 0.7978234419359199, "grad_norm": 0.9667219519615173, "learning_rate": 3.133088723689064e-06, "loss": 0.23944091796875, "step": 11803 }, { "epoch": 0.7978910369068541, "grad_norm": 1.1116981506347656, "learning_rate": 3.1310708486322327e-06, "loss": 0.232330322265625, "step": 11804 }, { "epoch": 0.7979586318777883, "grad_norm": 1.9798719882965088, "learning_rate": 3.1290535478680067e-06, "loss": 0.353179931640625, "step": 11805 }, { "epoch": 0.7980262268487225, "grad_norm": 1.0999428033828735, "learning_rate": 3.127036821493994e-06, "loss": 0.230712890625, "step": 11806 }, { "epoch": 0.7980938218196566, "grad_norm": 1.2886523008346558, "learning_rate": 3.1250206696077775e-06, "loss": 0.222503662109375, "step": 11807 }, { "epoch": 0.7981614167905908, "grad_norm": 1.2797445058822632, "learning_rate": 3.123005092306912e-06, "loss": 0.2264404296875, "step": 11808 }, { "epoch": 0.7982290117615249, "grad_norm": 1.0049173831939697, "learning_rate": 3.120990089688921e-06, "loss": 0.17089080810546875, "step": 11809 }, { "epoch": 0.7982966067324591, "grad_norm": 1.6539822816848755, "learning_rate": 3.118975661851303e-06, "loss": 0.26055908203125, "step": 11810 }, { "epoch": 0.7983642017033933, "grad_norm": 1.2344602346420288, "learning_rate": 3.1169618088915275e-06, "loss": 0.25128173828125, "step": 11811 }, { "epoch": 0.7984317966743274, "grad_norm": 0.7527523636817932, "learning_rate": 3.1149485309070474e-06, "loss": 0.13483810424804688, "step": 11812 }, { "epoch": 0.7984993916452616, "grad_norm": 2.1964759826660156, "learning_rate": 3.112935827995262e-06, "loss": 0.2623291015625, "step": 11813 }, { "epoch": 0.7985669866161957, "grad_norm": 1.0760741233825684, "learning_rate": 3.1109237002535696e-06, "loss": 0.201873779296875, "step": 11814 }, { "epoch": 0.79863458158713, "grad_norm": 1.2271919250488281, "learning_rate": 3.1089121477793293e-06, "loss": 0.2157745361328125, "step": 11815 }, { "epoch": 0.7987021765580641, "grad_norm": 0.8833864331245422, "learning_rate": 3.106901170669863e-06, "loss": 0.1771087646484375, "step": 11816 }, { "epoch": 0.7987697715289982, "grad_norm": 0.9478638768196106, "learning_rate": 3.104890769022483e-06, "loss": 0.19866943359375, "step": 11817 }, { "epoch": 0.7988373664999324, "grad_norm": 1.101012110710144, "learning_rate": 3.1028809429344617e-06, "loss": 0.19476699829101562, "step": 11818 }, { "epoch": 0.7989049614708665, "grad_norm": 1.2643688917160034, "learning_rate": 3.1008716925030495e-06, "loss": 0.18672943115234375, "step": 11819 }, { "epoch": 0.7989725564418008, "grad_norm": 1.8570159673690796, "learning_rate": 3.0988630178254633e-06, "loss": 0.338958740234375, "step": 11820 }, { "epoch": 0.7990401514127349, "grad_norm": 1.0494966506958008, "learning_rate": 3.0968549189988976e-06, "loss": 0.205474853515625, "step": 11821 }, { "epoch": 0.7991077463836691, "grad_norm": 1.3340604305267334, "learning_rate": 3.0948473961205124e-06, "loss": 0.25628662109375, "step": 11822 }, { "epoch": 0.7991753413546032, "grad_norm": 1.5585472583770752, "learning_rate": 3.092840449287454e-06, "loss": 0.260589599609375, "step": 11823 }, { "epoch": 0.7992429363255373, "grad_norm": 1.2708789110183716, "learning_rate": 3.0908340785968176e-06, "loss": 0.21966552734375, "step": 11824 }, { "epoch": 0.7993105312964716, "grad_norm": 1.635237216949463, "learning_rate": 3.0888282841456986e-06, "loss": 0.301300048828125, "step": 11825 }, { "epoch": 0.7993781262674057, "grad_norm": 1.2055261135101318, "learning_rate": 3.086823066031134e-06, "loss": 0.232940673828125, "step": 11826 }, { "epoch": 0.7994457212383399, "grad_norm": 1.4129575490951538, "learning_rate": 3.084818424350161e-06, "loss": 0.23760986328125, "step": 11827 }, { "epoch": 0.799513316209274, "grad_norm": 1.4409981966018677, "learning_rate": 3.082814359199775e-06, "loss": 0.18964767456054688, "step": 11828 }, { "epoch": 0.7995809111802082, "grad_norm": 0.8091961145401001, "learning_rate": 3.080810870676936e-06, "loss": 0.184356689453125, "step": 11829 }, { "epoch": 0.7996485061511424, "grad_norm": 1.7291045188903809, "learning_rate": 3.0788079588785954e-06, "loss": 0.16516876220703125, "step": 11830 }, { "epoch": 0.7997161011220765, "grad_norm": 2.191342353820801, "learning_rate": 3.0768056239016624e-06, "loss": 0.277801513671875, "step": 11831 }, { "epoch": 0.7997836960930107, "grad_norm": 1.10447359085083, "learning_rate": 3.0748038658430224e-06, "loss": 0.19317626953125, "step": 11832 }, { "epoch": 0.7998512910639448, "grad_norm": 1.4086873531341553, "learning_rate": 3.0728026847995343e-06, "loss": 0.2596893310546875, "step": 11833 }, { "epoch": 0.799918886034879, "grad_norm": 1.2427250146865845, "learning_rate": 3.0708020808680265e-06, "loss": 0.282623291015625, "step": 11834 }, { "epoch": 0.7999864810058132, "grad_norm": 0.9067487716674805, "learning_rate": 3.068802054145296e-06, "loss": 0.232391357421875, "step": 11835 }, { "epoch": 0.8000540759767474, "grad_norm": 1.3794509172439575, "learning_rate": 3.0668026047281304e-06, "loss": 0.28594970703125, "step": 11836 }, { "epoch": 0.8001216709476815, "grad_norm": 1.1991990804672241, "learning_rate": 3.0648037327132596e-06, "loss": 0.19061279296875, "step": 11837 }, { "epoch": 0.8001892659186156, "grad_norm": 0.9194589257240295, "learning_rate": 3.062805438197415e-06, "loss": 0.1837310791015625, "step": 11838 }, { "epoch": 0.8002568608895498, "grad_norm": 1.7551848888397217, "learning_rate": 3.0608077212772757e-06, "loss": 0.263671875, "step": 11839 }, { "epoch": 0.800324455860484, "grad_norm": 0.7786551713943481, "learning_rate": 3.0588105820495047e-06, "loss": 0.16607666015625, "step": 11840 }, { "epoch": 0.8003920508314182, "grad_norm": 1.6138776540756226, "learning_rate": 3.056814020610746e-06, "loss": 0.30224609375, "step": 11841 }, { "epoch": 0.8004596458023523, "grad_norm": 1.610176682472229, "learning_rate": 3.054818037057593e-06, "loss": 0.294189453125, "step": 11842 }, { "epoch": 0.8005272407732865, "grad_norm": 0.8790087699890137, "learning_rate": 3.052822631486632e-06, "loss": 0.16666412353515625, "step": 11843 }, { "epoch": 0.8005948357442206, "grad_norm": 1.1651196479797363, "learning_rate": 3.050827803994411e-06, "loss": 0.22796630859375, "step": 11844 }, { "epoch": 0.8006624307151547, "grad_norm": 0.8168630599975586, "learning_rate": 3.0488335546774526e-06, "loss": 0.16400146484375, "step": 11845 }, { "epoch": 0.800730025686089, "grad_norm": 1.57257080078125, "learning_rate": 3.0468398836322497e-06, "loss": 0.21612548828125, "step": 11846 }, { "epoch": 0.8007976206570231, "grad_norm": 1.4485907554626465, "learning_rate": 3.0448467909552687e-06, "loss": 0.3140869140625, "step": 11847 }, { "epoch": 0.8008652156279573, "grad_norm": 1.0488115549087524, "learning_rate": 3.042854276742945e-06, "loss": 0.23760986328125, "step": 11848 }, { "epoch": 0.8009328105988914, "grad_norm": 1.4555011987686157, "learning_rate": 3.040862341091699e-06, "loss": 0.24212646484375, "step": 11849 }, { "epoch": 0.8010004055698257, "grad_norm": 1.460307240486145, "learning_rate": 3.0388709840979035e-06, "loss": 0.2255401611328125, "step": 11850 }, { "epoch": 0.8010680005407598, "grad_norm": 1.2902315855026245, "learning_rate": 3.0368802058579107e-06, "loss": 0.239532470703125, "step": 11851 }, { "epoch": 0.8011355955116939, "grad_norm": 0.7902944684028625, "learning_rate": 3.0348900064680597e-06, "loss": 0.153472900390625, "step": 11852 }, { "epoch": 0.8012031904826281, "grad_norm": 1.4706311225891113, "learning_rate": 3.0329003860246323e-06, "loss": 0.241058349609375, "step": 11853 }, { "epoch": 0.8012707854535622, "grad_norm": 0.9456811547279358, "learning_rate": 3.030911344623914e-06, "loss": 0.14404296875, "step": 11854 }, { "epoch": 0.8013383804244965, "grad_norm": 0.8068133592605591, "learning_rate": 3.028922882362132e-06, "loss": 0.14642333984375, "step": 11855 }, { "epoch": 0.8014059753954306, "grad_norm": 1.2114452123641968, "learning_rate": 3.026934999335512e-06, "loss": 0.1831817626953125, "step": 11856 }, { "epoch": 0.8014735703663648, "grad_norm": 1.2814821004867554, "learning_rate": 3.0249476956402383e-06, "loss": 0.15728759765625, "step": 11857 }, { "epoch": 0.8015411653372989, "grad_norm": 1.791735291481018, "learning_rate": 3.0229609713724655e-06, "loss": 0.1436614990234375, "step": 11858 }, { "epoch": 0.801608760308233, "grad_norm": 1.3990693092346191, "learning_rate": 3.0209748266283264e-06, "loss": 0.2673492431640625, "step": 11859 }, { "epoch": 0.8016763552791673, "grad_norm": 1.881870150566101, "learning_rate": 3.01898926150392e-06, "loss": 0.2900390625, "step": 11860 }, { "epoch": 0.8017439502501014, "grad_norm": 1.0515676736831665, "learning_rate": 3.0170042760953204e-06, "loss": 0.2802734375, "step": 11861 }, { "epoch": 0.8018115452210356, "grad_norm": 1.6917669773101807, "learning_rate": 3.0150198704985825e-06, "loss": 0.311614990234375, "step": 11862 }, { "epoch": 0.8018791401919697, "grad_norm": 1.6405830383300781, "learning_rate": 3.013036044809713e-06, "loss": 0.23382568359375, "step": 11863 }, { "epoch": 0.8019467351629039, "grad_norm": 2.1325700283050537, "learning_rate": 3.011052799124702e-06, "loss": 0.281524658203125, "step": 11864 }, { "epoch": 0.802014330133838, "grad_norm": 0.8200399875640869, "learning_rate": 3.009070133539523e-06, "loss": 0.1717071533203125, "step": 11865 }, { "epoch": 0.8020819251047722, "grad_norm": 1.5770832300186157, "learning_rate": 3.0070880481500928e-06, "loss": 0.2230377197265625, "step": 11866 }, { "epoch": 0.8021495200757064, "grad_norm": 1.2390731573104858, "learning_rate": 3.005106543052331e-06, "loss": 0.214385986328125, "step": 11867 }, { "epoch": 0.8022171150466405, "grad_norm": 1.0355061292648315, "learning_rate": 3.0031256183421107e-06, "loss": 0.290802001953125, "step": 11868 }, { "epoch": 0.8022847100175747, "grad_norm": 1.703702688217163, "learning_rate": 3.0011452741152783e-06, "loss": 0.25567626953125, "step": 11869 }, { "epoch": 0.8023523049885088, "grad_norm": 2.1512739658355713, "learning_rate": 2.999165510467661e-06, "loss": 0.271697998046875, "step": 11870 }, { "epoch": 0.8024198999594431, "grad_norm": 1.0116117000579834, "learning_rate": 2.9971863274950462e-06, "loss": 0.2134552001953125, "step": 11871 }, { "epoch": 0.8024874949303772, "grad_norm": 1.1743171215057373, "learning_rate": 2.9952077252932014e-06, "loss": 0.28363037109375, "step": 11872 }, { "epoch": 0.8025550899013113, "grad_norm": 1.387047290802002, "learning_rate": 2.993229703957865e-06, "loss": 0.231536865234375, "step": 11873 }, { "epoch": 0.8026226848722455, "grad_norm": 1.2900408506393433, "learning_rate": 2.991252263584745e-06, "loss": 0.15826416015625, "step": 11874 }, { "epoch": 0.8026902798431796, "grad_norm": 1.660800814628601, "learning_rate": 2.989275404269522e-06, "loss": 0.229278564453125, "step": 11875 }, { "epoch": 0.8027578748141139, "grad_norm": 1.149001121520996, "learning_rate": 2.9872991261078474e-06, "loss": 0.20251083374023438, "step": 11876 }, { "epoch": 0.802825469785048, "grad_norm": 1.4845330715179443, "learning_rate": 2.985323429195345e-06, "loss": 0.20015716552734375, "step": 11877 }, { "epoch": 0.8028930647559821, "grad_norm": 1.2455495595932007, "learning_rate": 2.98334831362762e-06, "loss": 0.2306365966796875, "step": 11878 }, { "epoch": 0.8029606597269163, "grad_norm": 0.8681292533874512, "learning_rate": 2.981373779500228e-06, "loss": 0.1719207763671875, "step": 11879 }, { "epoch": 0.8030282546978504, "grad_norm": 1.7073396444320679, "learning_rate": 2.979399826908718e-06, "loss": 0.24755859375, "step": 11880 }, { "epoch": 0.8030958496687847, "grad_norm": 1.6564371585845947, "learning_rate": 2.9774264559486015e-06, "loss": 0.246368408203125, "step": 11881 }, { "epoch": 0.8031634446397188, "grad_norm": 1.137073040008545, "learning_rate": 2.97545366671536e-06, "loss": 0.207733154296875, "step": 11882 }, { "epoch": 0.803231039610653, "grad_norm": 1.1271454095840454, "learning_rate": 2.9734814593044484e-06, "loss": 0.2786865234375, "step": 11883 }, { "epoch": 0.8032986345815871, "grad_norm": 2.448293685913086, "learning_rate": 2.9715098338112977e-06, "loss": 0.28631591796875, "step": 11884 }, { "epoch": 0.8033662295525212, "grad_norm": 2.385812520980835, "learning_rate": 2.9695387903313047e-06, "loss": 0.27313232421875, "step": 11885 }, { "epoch": 0.8034338245234555, "grad_norm": 1.7903344631195068, "learning_rate": 2.9675683289598434e-06, "loss": 0.30596923828125, "step": 11886 }, { "epoch": 0.8035014194943896, "grad_norm": 1.4197243452072144, "learning_rate": 2.9655984497922534e-06, "loss": 0.31085205078125, "step": 11887 }, { "epoch": 0.8035690144653238, "grad_norm": 1.2598421573638916, "learning_rate": 2.9636291529238505e-06, "loss": 0.228851318359375, "step": 11888 }, { "epoch": 0.8036366094362579, "grad_norm": 1.5040313005447388, "learning_rate": 2.961660438449926e-06, "loss": 0.22540283203125, "step": 11889 }, { "epoch": 0.8037042044071921, "grad_norm": 1.3472453355789185, "learning_rate": 2.959692306465729e-06, "loss": 0.24188232421875, "step": 11890 }, { "epoch": 0.8037717993781263, "grad_norm": 1.5410101413726807, "learning_rate": 2.957724757066504e-06, "loss": 0.20361328125, "step": 11891 }, { "epoch": 0.8038393943490604, "grad_norm": 1.1396616697311401, "learning_rate": 2.9557577903474365e-06, "loss": 0.2087860107421875, "step": 11892 }, { "epoch": 0.8039069893199946, "grad_norm": 1.2199599742889404, "learning_rate": 2.9537914064037153e-06, "loss": 0.2227783203125, "step": 11893 }, { "epoch": 0.8039745842909287, "grad_norm": 0.8874876499176025, "learning_rate": 2.951825605330479e-06, "loss": 0.17981719970703125, "step": 11894 }, { "epoch": 0.8040421792618629, "grad_norm": 0.9469290375709534, "learning_rate": 2.9498603872228475e-06, "loss": 0.2083740234375, "step": 11895 }, { "epoch": 0.8041097742327971, "grad_norm": 1.182270884513855, "learning_rate": 2.94789575217591e-06, "loss": 0.238128662109375, "step": 11896 }, { "epoch": 0.8041773692037313, "grad_norm": 0.7901829481124878, "learning_rate": 2.945931700284727e-06, "loss": 0.1570587158203125, "step": 11897 }, { "epoch": 0.8042449641746654, "grad_norm": 1.7067952156066895, "learning_rate": 2.9439682316443314e-06, "loss": 0.182952880859375, "step": 11898 }, { "epoch": 0.8043125591455995, "grad_norm": 1.0936182737350464, "learning_rate": 2.9420053463497273e-06, "loss": 0.1971588134765625, "step": 11899 }, { "epoch": 0.8043801541165337, "grad_norm": 1.0290955305099487, "learning_rate": 2.940043044495894e-06, "loss": 0.21087646484375, "step": 11900 }, { "epoch": 0.8044477490874679, "grad_norm": 1.447242259979248, "learning_rate": 2.9380813261777767e-06, "loss": 0.2283935546875, "step": 11901 }, { "epoch": 0.8045153440584021, "grad_norm": 0.7156626582145691, "learning_rate": 2.9361201914902985e-06, "loss": 0.11925506591796875, "step": 11902 }, { "epoch": 0.8045829390293362, "grad_norm": 0.5615459680557251, "learning_rate": 2.9341596405283465e-06, "loss": 0.10089111328125, "step": 11903 }, { "epoch": 0.8046505340002704, "grad_norm": 0.8481330275535583, "learning_rate": 2.932199673386795e-06, "loss": 0.1513671875, "step": 11904 }, { "epoch": 0.8047181289712045, "grad_norm": 1.8420060873031616, "learning_rate": 2.930240290160463e-06, "loss": 0.187286376953125, "step": 11905 }, { "epoch": 0.8047857239421387, "grad_norm": 1.2130851745605469, "learning_rate": 2.9282814909441728e-06, "loss": 0.2050323486328125, "step": 11906 }, { "epoch": 0.8048533189130729, "grad_norm": 1.1601823568344116, "learning_rate": 2.9263232758326957e-06, "loss": 0.161529541015625, "step": 11907 }, { "epoch": 0.804920913884007, "grad_norm": 1.4602882862091064, "learning_rate": 2.924365644920784e-06, "loss": 0.2170257568359375, "step": 11908 }, { "epoch": 0.8049885088549412, "grad_norm": 1.3478479385375977, "learning_rate": 2.92240859830316e-06, "loss": 0.21221923828125, "step": 11909 }, { "epoch": 0.8050561038258753, "grad_norm": 2.2135732173919678, "learning_rate": 2.9204521360745184e-06, "loss": 0.32965087890625, "step": 11910 }, { "epoch": 0.8051236987968096, "grad_norm": 1.1564072370529175, "learning_rate": 2.918496258329525e-06, "loss": 0.1758880615234375, "step": 11911 }, { "epoch": 0.8051912937677437, "grad_norm": 0.9607454538345337, "learning_rate": 2.9165409651628156e-06, "loss": 0.23345947265625, "step": 11912 }, { "epoch": 0.8052588887386778, "grad_norm": 2.573435068130493, "learning_rate": 2.914586256669001e-06, "loss": 0.3243408203125, "step": 11913 }, { "epoch": 0.805326483709612, "grad_norm": 1.497766137123108, "learning_rate": 2.9126321329426566e-06, "loss": 0.276092529296875, "step": 11914 }, { "epoch": 0.8053940786805461, "grad_norm": 1.3392750024795532, "learning_rate": 2.910678594078349e-06, "loss": 0.26043701171875, "step": 11915 }, { "epoch": 0.8054616736514804, "grad_norm": 1.4094938039779663, "learning_rate": 2.9087256401705857e-06, "loss": 0.1437530517578125, "step": 11916 }, { "epoch": 0.8055292686224145, "grad_norm": 1.5592488050460815, "learning_rate": 2.906773271313879e-06, "loss": 0.232086181640625, "step": 11917 }, { "epoch": 0.8055968635933487, "grad_norm": 1.1876426935195923, "learning_rate": 2.9048214876026807e-06, "loss": 0.167236328125, "step": 11918 }, { "epoch": 0.8056644585642828, "grad_norm": 1.2398639917373657, "learning_rate": 2.9028702891314408e-06, "loss": 0.2500457763671875, "step": 11919 }, { "epoch": 0.8057320535352169, "grad_norm": 1.3259605169296265, "learning_rate": 2.9009196759945716e-06, "loss": 0.26092529296875, "step": 11920 }, { "epoch": 0.8057996485061512, "grad_norm": 1.387316346168518, "learning_rate": 2.8989696482864446e-06, "loss": 0.2484588623046875, "step": 11921 }, { "epoch": 0.8058672434770853, "grad_norm": 1.3614680767059326, "learning_rate": 2.897020206101426e-06, "loss": 0.2464599609375, "step": 11922 }, { "epoch": 0.8059348384480195, "grad_norm": 1.0497716665267944, "learning_rate": 2.895071349533837e-06, "loss": 0.124420166015625, "step": 11923 }, { "epoch": 0.8060024334189536, "grad_norm": 1.3476605415344238, "learning_rate": 2.8931230786779757e-06, "loss": 0.211334228515625, "step": 11924 }, { "epoch": 0.8060700283898878, "grad_norm": 1.3945921659469604, "learning_rate": 2.891175393628111e-06, "loss": 0.27880859375, "step": 11925 }, { "epoch": 0.806137623360822, "grad_norm": 1.4707396030426025, "learning_rate": 2.889228294478485e-06, "loss": 0.199676513671875, "step": 11926 }, { "epoch": 0.8062052183317561, "grad_norm": 2.0866892337799072, "learning_rate": 2.8872817813233067e-06, "loss": 0.317413330078125, "step": 11927 }, { "epoch": 0.8062728133026903, "grad_norm": 1.1136683225631714, "learning_rate": 2.8853358542567725e-06, "loss": 0.1866302490234375, "step": 11928 }, { "epoch": 0.8063404082736244, "grad_norm": 3.152024030685425, "learning_rate": 2.883390513373021e-06, "loss": 0.351470947265625, "step": 11929 }, { "epoch": 0.8064080032445586, "grad_norm": 1.114015817642212, "learning_rate": 2.881445758766196e-06, "loss": 0.2645263671875, "step": 11930 }, { "epoch": 0.8064755982154928, "grad_norm": 0.9922312498092651, "learning_rate": 2.8795015905303834e-06, "loss": 0.1805877685546875, "step": 11931 }, { "epoch": 0.806543193186427, "grad_norm": 1.5271178483963013, "learning_rate": 2.8775580087596635e-06, "loss": 0.3173828125, "step": 11932 }, { "epoch": 0.8066107881573611, "grad_norm": 1.5294249057769775, "learning_rate": 2.87561501354808e-06, "loss": 0.26324462890625, "step": 11933 }, { "epoch": 0.8066783831282952, "grad_norm": 1.4502356052398682, "learning_rate": 2.8736726049896363e-06, "loss": 0.22283935546875, "step": 11934 }, { "epoch": 0.8067459780992294, "grad_norm": 1.5278798341751099, "learning_rate": 2.871730783178326e-06, "loss": 0.2828826904296875, "step": 11935 }, { "epoch": 0.8068135730701635, "grad_norm": 1.1870309114456177, "learning_rate": 2.8697895482081073e-06, "loss": 0.217620849609375, "step": 11936 }, { "epoch": 0.8068811680410978, "grad_norm": 0.9652218818664551, "learning_rate": 2.8678489001729068e-06, "loss": 0.174468994140625, "step": 11937 }, { "epoch": 0.8069487630120319, "grad_norm": 0.7951174974441528, "learning_rate": 2.8659088391666253e-06, "loss": 0.1843719482421875, "step": 11938 }, { "epoch": 0.8070163579829661, "grad_norm": 1.6040563583374023, "learning_rate": 2.863969365283136e-06, "loss": 0.29974365234375, "step": 11939 }, { "epoch": 0.8070839529539002, "grad_norm": 1.4522780179977417, "learning_rate": 2.862030478616277e-06, "loss": 0.1911773681640625, "step": 11940 }, { "epoch": 0.8071515479248343, "grad_norm": 1.756803035736084, "learning_rate": 2.8600921792598782e-06, "loss": 0.2354888916015625, "step": 11941 }, { "epoch": 0.8072191428957686, "grad_norm": 1.2943447828292847, "learning_rate": 2.858154467307708e-06, "loss": 0.2425537109375, "step": 11942 }, { "epoch": 0.8072867378667027, "grad_norm": 2.1031253337860107, "learning_rate": 2.8562173428535363e-06, "loss": 0.260650634765625, "step": 11943 }, { "epoch": 0.8073543328376369, "grad_norm": 1.1192381381988525, "learning_rate": 2.854280805991097e-06, "loss": 0.229888916015625, "step": 11944 }, { "epoch": 0.807421927808571, "grad_norm": 1.2784991264343262, "learning_rate": 2.8523448568140777e-06, "loss": 0.29052734375, "step": 11945 }, { "epoch": 0.8074895227795053, "grad_norm": 0.8860880732536316, "learning_rate": 2.850409495416167e-06, "loss": 0.140228271484375, "step": 11946 }, { "epoch": 0.8075571177504394, "grad_norm": 1.3938366174697876, "learning_rate": 2.8484747218909952e-06, "loss": 0.24395751953125, "step": 11947 }, { "epoch": 0.8076247127213735, "grad_norm": 1.5418988466262817, "learning_rate": 2.846540536332189e-06, "loss": 0.254974365234375, "step": 11948 }, { "epoch": 0.8076923076923077, "grad_norm": 1.355627179145813, "learning_rate": 2.8446069388333333e-06, "loss": 0.213470458984375, "step": 11949 }, { "epoch": 0.8077599026632418, "grad_norm": 0.7550249099731445, "learning_rate": 2.842673929487989e-06, "loss": 0.1445770263671875, "step": 11950 }, { "epoch": 0.807827497634176, "grad_norm": 1.7897329330444336, "learning_rate": 2.840741508389684e-06, "loss": 0.285797119140625, "step": 11951 }, { "epoch": 0.8078950926051102, "grad_norm": 1.1219416856765747, "learning_rate": 2.8388096756319216e-06, "loss": 0.17315673828125, "step": 11952 }, { "epoch": 0.8079626875760444, "grad_norm": 1.1850361824035645, "learning_rate": 2.836878431308174e-06, "loss": 0.199920654296875, "step": 11953 }, { "epoch": 0.8080302825469785, "grad_norm": 1.2890090942382812, "learning_rate": 2.8349477755118984e-06, "loss": 0.218658447265625, "step": 11954 }, { "epoch": 0.8080978775179126, "grad_norm": 1.6932737827301025, "learning_rate": 2.8330177083364983e-06, "loss": 0.2100067138671875, "step": 11955 }, { "epoch": 0.8081654724888468, "grad_norm": 0.8161848783493042, "learning_rate": 2.8310882298753622e-06, "loss": 0.12280654907226562, "step": 11956 }, { "epoch": 0.808233067459781, "grad_norm": 1.3216919898986816, "learning_rate": 2.8291593402218636e-06, "loss": 0.194366455078125, "step": 11957 }, { "epoch": 0.8083006624307152, "grad_norm": 1.6310639381408691, "learning_rate": 2.827231039469318e-06, "loss": 0.2610015869140625, "step": 11958 }, { "epoch": 0.8083682574016493, "grad_norm": 1.2608838081359863, "learning_rate": 2.8253033277110407e-06, "loss": 0.19708251953125, "step": 11959 }, { "epoch": 0.8084358523725835, "grad_norm": 1.715684175491333, "learning_rate": 2.823376205040302e-06, "loss": 0.27093505859375, "step": 11960 }, { "epoch": 0.8085034473435176, "grad_norm": 1.34602689743042, "learning_rate": 2.821449671550348e-06, "loss": 0.19158935546875, "step": 11961 }, { "epoch": 0.8085710423144518, "grad_norm": 1.5567187070846558, "learning_rate": 2.8195237273343964e-06, "loss": 0.27734375, "step": 11962 }, { "epoch": 0.808638637285386, "grad_norm": 2.1969947814941406, "learning_rate": 2.817598372485637e-06, "loss": 0.326263427734375, "step": 11963 }, { "epoch": 0.8087062322563201, "grad_norm": 1.019577980041504, "learning_rate": 2.8156736070972276e-06, "loss": 0.21240234375, "step": 11964 }, { "epoch": 0.8087738272272543, "grad_norm": 1.4204270839691162, "learning_rate": 2.8137494312623046e-06, "loss": 0.1484375, "step": 11965 }, { "epoch": 0.8088414221981884, "grad_norm": 1.4065881967544556, "learning_rate": 2.8118258450739663e-06, "loss": 0.27008056640625, "step": 11966 }, { "epoch": 0.8089090171691227, "grad_norm": 1.3087680339813232, "learning_rate": 2.809902848625297e-06, "loss": 0.31231689453125, "step": 11967 }, { "epoch": 0.8089766121400568, "grad_norm": 1.263419270515442, "learning_rate": 2.807980442009335e-06, "loss": 0.1943206787109375, "step": 11968 }, { "epoch": 0.8090442071109909, "grad_norm": 1.3100699186325073, "learning_rate": 2.806058625319095e-06, "loss": 0.24432373046875, "step": 11969 }, { "epoch": 0.8091118020819251, "grad_norm": 1.3578945398330688, "learning_rate": 2.804137398647582e-06, "loss": 0.294677734375, "step": 11970 }, { "epoch": 0.8091793970528592, "grad_norm": 2.1133196353912354, "learning_rate": 2.802216762087737e-06, "loss": 0.297088623046875, "step": 11971 }, { "epoch": 0.8092469920237935, "grad_norm": 1.3035674095153809, "learning_rate": 2.800296715732507e-06, "loss": 0.268798828125, "step": 11972 }, { "epoch": 0.8093145869947276, "grad_norm": 1.6288509368896484, "learning_rate": 2.7983772596747913e-06, "loss": 0.253570556640625, "step": 11973 }, { "epoch": 0.8093821819656618, "grad_norm": 1.3037588596343994, "learning_rate": 2.7964583940074656e-06, "loss": 0.270599365234375, "step": 11974 }, { "epoch": 0.8094497769365959, "grad_norm": 1.0214874744415283, "learning_rate": 2.7945401188233738e-06, "loss": 0.1808624267578125, "step": 11975 }, { "epoch": 0.80951737190753, "grad_norm": 1.074669361114502, "learning_rate": 2.7926224342153365e-06, "loss": 0.22015380859375, "step": 11976 }, { "epoch": 0.8095849668784643, "grad_norm": 0.4236086308956146, "learning_rate": 2.7907053402761435e-06, "loss": 0.054241180419921875, "step": 11977 }, { "epoch": 0.8096525618493984, "grad_norm": 0.839593768119812, "learning_rate": 2.7887888370985548e-06, "loss": 0.11870574951171875, "step": 11978 }, { "epoch": 0.8097201568203326, "grad_norm": 1.744693398475647, "learning_rate": 2.7868729247753032e-06, "loss": 0.24737548828125, "step": 11979 }, { "epoch": 0.8097877517912667, "grad_norm": 1.5342555046081543, "learning_rate": 2.7849576033990915e-06, "loss": 0.238922119140625, "step": 11980 }, { "epoch": 0.809855346762201, "grad_norm": 0.7774208188056946, "learning_rate": 2.783042873062595e-06, "loss": 0.1605682373046875, "step": 11981 }, { "epoch": 0.8099229417331351, "grad_norm": 1.450073480606079, "learning_rate": 2.7811287338584584e-06, "loss": 0.29425048828125, "step": 11982 }, { "epoch": 0.8099905367040692, "grad_norm": 0.7019791007041931, "learning_rate": 2.779215185879309e-06, "loss": 0.148406982421875, "step": 11983 }, { "epoch": 0.8100581316750034, "grad_norm": 1.2502233982086182, "learning_rate": 2.7773022292177214e-06, "loss": 0.29425048828125, "step": 11984 }, { "epoch": 0.8101257266459375, "grad_norm": 2.011176347732544, "learning_rate": 2.775389863966268e-06, "loss": 0.2514801025390625, "step": 11985 }, { "epoch": 0.8101933216168717, "grad_norm": 1.925784945487976, "learning_rate": 2.7734780902174763e-06, "loss": 0.220703125, "step": 11986 }, { "epoch": 0.8102609165878059, "grad_norm": 0.9302476644515991, "learning_rate": 2.7715669080638507e-06, "loss": 0.194061279296875, "step": 11987 }, { "epoch": 0.8103285115587401, "grad_norm": 1.4454412460327148, "learning_rate": 2.7696563175978674e-06, "loss": 0.31561279296875, "step": 11988 }, { "epoch": 0.8103961065296742, "grad_norm": 0.7387446761131287, "learning_rate": 2.767746318911969e-06, "loss": 0.10785675048828125, "step": 11989 }, { "epoch": 0.8104637015006083, "grad_norm": 0.9383262991905212, "learning_rate": 2.7658369120985734e-06, "loss": 0.1691436767578125, "step": 11990 }, { "epoch": 0.8105312964715425, "grad_norm": 1.2489320039749146, "learning_rate": 2.763928097250078e-06, "loss": 0.2242431640625, "step": 11991 }, { "epoch": 0.8105988914424767, "grad_norm": 0.8506916165351868, "learning_rate": 2.7620198744588325e-06, "loss": 0.127044677734375, "step": 11992 }, { "epoch": 0.8106664864134109, "grad_norm": 1.1638176441192627, "learning_rate": 2.7601122438171734e-06, "loss": 0.1912689208984375, "step": 11993 }, { "epoch": 0.810734081384345, "grad_norm": 1.224530577659607, "learning_rate": 2.758205205417401e-06, "loss": 0.24102783203125, "step": 11994 }, { "epoch": 0.8108016763552792, "grad_norm": 0.969437837600708, "learning_rate": 2.7562987593517906e-06, "loss": 0.2258758544921875, "step": 11995 }, { "epoch": 0.8108692713262133, "grad_norm": 3.0308377742767334, "learning_rate": 2.754392905712595e-06, "loss": 0.29791259765625, "step": 11996 }, { "epoch": 0.8109368662971475, "grad_norm": 1.26758873462677, "learning_rate": 2.7524876445920184e-06, "loss": 0.2916259765625, "step": 11997 }, { "epoch": 0.8110044612680817, "grad_norm": 1.2861827611923218, "learning_rate": 2.750582976082259e-06, "loss": 0.2172393798828125, "step": 11998 }, { "epoch": 0.8110720562390158, "grad_norm": 1.3062818050384521, "learning_rate": 2.7486789002754743e-06, "loss": 0.22128677368164062, "step": 11999 }, { "epoch": 0.81113965120995, "grad_norm": 1.7419273853302002, "learning_rate": 2.746775417263794e-06, "loss": 0.2635955810546875, "step": 12000 }, { "epoch": 0.8112072461808841, "grad_norm": 1.3770902156829834, "learning_rate": 2.7448725271393223e-06, "loss": 0.232177734375, "step": 12001 }, { "epoch": 0.8112748411518184, "grad_norm": 1.2988826036453247, "learning_rate": 2.7429702299941305e-06, "loss": 0.204559326171875, "step": 12002 }, { "epoch": 0.8113424361227525, "grad_norm": 1.9535526037216187, "learning_rate": 2.7410685259202644e-06, "loss": 0.312408447265625, "step": 12003 }, { "epoch": 0.8114100310936866, "grad_norm": 2.169865369796753, "learning_rate": 2.73916741500974e-06, "loss": 0.254241943359375, "step": 12004 }, { "epoch": 0.8114776260646208, "grad_norm": 1.1395128965377808, "learning_rate": 2.7372668973545474e-06, "loss": 0.250213623046875, "step": 12005 }, { "epoch": 0.8115452210355549, "grad_norm": 1.01144540309906, "learning_rate": 2.7353669730466373e-06, "loss": 0.216796875, "step": 12006 }, { "epoch": 0.8116128160064892, "grad_norm": 1.4814341068267822, "learning_rate": 2.733467642177955e-06, "loss": 0.244354248046875, "step": 12007 }, { "epoch": 0.8116804109774233, "grad_norm": 0.9397907257080078, "learning_rate": 2.7315689048403843e-06, "loss": 0.240142822265625, "step": 12008 }, { "epoch": 0.8117480059483574, "grad_norm": 0.956082820892334, "learning_rate": 2.7296707611258153e-06, "loss": 0.217071533203125, "step": 12009 }, { "epoch": 0.8118156009192916, "grad_norm": 0.8040357828140259, "learning_rate": 2.7277732111260756e-06, "loss": 0.1201324462890625, "step": 12010 }, { "epoch": 0.8118831958902257, "grad_norm": 1.3064091205596924, "learning_rate": 2.725876254932991e-06, "loss": 0.2489776611328125, "step": 12011 }, { "epoch": 0.81195079086116, "grad_norm": 1.2176275253295898, "learning_rate": 2.7239798926383456e-06, "loss": 0.2165985107421875, "step": 12012 }, { "epoch": 0.8120183858320941, "grad_norm": 1.792299747467041, "learning_rate": 2.7220841243338955e-06, "loss": 0.271453857421875, "step": 12013 }, { "epoch": 0.8120859808030283, "grad_norm": 1.8303112983703613, "learning_rate": 2.7201889501113723e-06, "loss": 0.27783203125, "step": 12014 }, { "epoch": 0.8121535757739624, "grad_norm": 1.3983898162841797, "learning_rate": 2.7182943700624763e-06, "loss": 0.26153564453125, "step": 12015 }, { "epoch": 0.8122211707448965, "grad_norm": 1.141014814376831, "learning_rate": 2.7164003842788755e-06, "loss": 0.2099609375, "step": 12016 }, { "epoch": 0.8122887657158308, "grad_norm": 1.279937505722046, "learning_rate": 2.7145069928522163e-06, "loss": 0.19354248046875, "step": 12017 }, { "epoch": 0.8123563606867649, "grad_norm": 1.6083099842071533, "learning_rate": 2.7126141958741113e-06, "loss": 0.26007080078125, "step": 12018 }, { "epoch": 0.8124239556576991, "grad_norm": 1.8353582620620728, "learning_rate": 2.7107219934361417e-06, "loss": 0.26171875, "step": 12019 }, { "epoch": 0.8124915506286332, "grad_norm": 1.1141027212142944, "learning_rate": 2.7088303856298755e-06, "loss": 0.258544921875, "step": 12020 }, { "epoch": 0.8125591455995674, "grad_norm": 0.7920536994934082, "learning_rate": 2.706939372546827e-06, "loss": 0.07971954345703125, "step": 12021 }, { "epoch": 0.8126267405705015, "grad_norm": 1.0933423042297363, "learning_rate": 2.7050489542785077e-06, "loss": 0.25885009765625, "step": 12022 }, { "epoch": 0.8126943355414357, "grad_norm": 1.5604292154312134, "learning_rate": 2.7031591309163744e-06, "loss": 0.3023681640625, "step": 12023 }, { "epoch": 0.8127619305123699, "grad_norm": 1.313576340675354, "learning_rate": 2.701269902551879e-06, "loss": 0.23126220703125, "step": 12024 }, { "epoch": 0.812829525483304, "grad_norm": 2.2790088653564453, "learning_rate": 2.6993812692764314e-06, "loss": 0.248321533203125, "step": 12025 }, { "epoch": 0.8128971204542382, "grad_norm": 1.1262320280075073, "learning_rate": 2.697493231181414e-06, "loss": 0.185272216796875, "step": 12026 }, { "epoch": 0.8129647154251723, "grad_norm": 0.999570369720459, "learning_rate": 2.695605788358183e-06, "loss": 0.236480712890625, "step": 12027 }, { "epoch": 0.8130323103961066, "grad_norm": 1.5146037340164185, "learning_rate": 2.6937189408980644e-06, "loss": 0.3193359375, "step": 12028 }, { "epoch": 0.8130999053670407, "grad_norm": 1.186523199081421, "learning_rate": 2.691832688892355e-06, "loss": 0.239898681640625, "step": 12029 }, { "epoch": 0.8131675003379748, "grad_norm": 1.0974133014678955, "learning_rate": 2.6899470324323233e-06, "loss": 0.16802978515625, "step": 12030 }, { "epoch": 0.813235095308909, "grad_norm": 1.2795313596725464, "learning_rate": 2.6880619716092098e-06, "loss": 0.281097412109375, "step": 12031 }, { "epoch": 0.8133026902798431, "grad_norm": 0.9820733666419983, "learning_rate": 2.686177506514221e-06, "loss": 0.2082977294921875, "step": 12032 }, { "epoch": 0.8133702852507774, "grad_norm": 0.9624720215797424, "learning_rate": 2.684293637238552e-06, "loss": 0.1741485595703125, "step": 12033 }, { "epoch": 0.8134378802217115, "grad_norm": 1.352311134338379, "learning_rate": 2.68241036387334e-06, "loss": 0.31719970703125, "step": 12034 }, { "epoch": 0.8135054751926457, "grad_norm": 1.280611515045166, "learning_rate": 2.6805276865097193e-06, "loss": 0.2271728515625, "step": 12035 }, { "epoch": 0.8135730701635798, "grad_norm": 1.275996208190918, "learning_rate": 2.6786456052387843e-06, "loss": 0.21533203125, "step": 12036 }, { "epoch": 0.8136406651345139, "grad_norm": 1.4196772575378418, "learning_rate": 2.6767641201515996e-06, "loss": 0.2657012939453125, "step": 12037 }, { "epoch": 0.8137082601054482, "grad_norm": 1.279968023300171, "learning_rate": 2.6748832313392094e-06, "loss": 0.27655029296875, "step": 12038 }, { "epoch": 0.8137758550763823, "grad_norm": 1.7560466527938843, "learning_rate": 2.673002938892608e-06, "loss": 0.2784423828125, "step": 12039 }, { "epoch": 0.8138434500473165, "grad_norm": 2.2094175815582275, "learning_rate": 2.6711232429027905e-06, "loss": 0.2946929931640625, "step": 12040 }, { "epoch": 0.8139110450182506, "grad_norm": 1.896359920501709, "learning_rate": 2.6692441434607024e-06, "loss": 0.34271240234375, "step": 12041 }, { "epoch": 0.8139786399891848, "grad_norm": 1.1161779165267944, "learning_rate": 2.667365640657268e-06, "loss": 0.219696044921875, "step": 12042 }, { "epoch": 0.814046234960119, "grad_norm": 0.6093437075614929, "learning_rate": 2.6654877345833773e-06, "loss": 0.091522216796875, "step": 12043 }, { "epoch": 0.8141138299310531, "grad_norm": 2.0864450931549072, "learning_rate": 2.6636104253298997e-06, "loss": 0.26385498046875, "step": 12044 }, { "epoch": 0.8141814249019873, "grad_norm": 0.7578505873680115, "learning_rate": 2.661733712987663e-06, "loss": 0.12811279296875, "step": 12045 }, { "epoch": 0.8142490198729214, "grad_norm": 0.9977037310600281, "learning_rate": 2.659857597647489e-06, "loss": 0.2022247314453125, "step": 12046 }, { "epoch": 0.8143166148438556, "grad_norm": 1.3085300922393799, "learning_rate": 2.6579820794001397e-06, "loss": 0.284149169921875, "step": 12047 }, { "epoch": 0.8143842098147898, "grad_norm": 2.1341898441314697, "learning_rate": 2.6561071583363748e-06, "loss": 0.1857757568359375, "step": 12048 }, { "epoch": 0.814451804785724, "grad_norm": 1.2497882843017578, "learning_rate": 2.6542328345469154e-06, "loss": 0.24639892578125, "step": 12049 }, { "epoch": 0.8145193997566581, "grad_norm": 1.5298889875411987, "learning_rate": 2.6523591081224413e-06, "loss": 0.214019775390625, "step": 12050 }, { "epoch": 0.8145869947275922, "grad_norm": 1.967480182647705, "learning_rate": 2.650485979153627e-06, "loss": 0.279510498046875, "step": 12051 }, { "epoch": 0.8146545896985264, "grad_norm": 1.5300109386444092, "learning_rate": 2.648613447731101e-06, "loss": 0.297454833984375, "step": 12052 }, { "epoch": 0.8147221846694606, "grad_norm": 1.1177732944488525, "learning_rate": 2.6467415139454693e-06, "loss": 0.2049102783203125, "step": 12053 }, { "epoch": 0.8147897796403948, "grad_norm": 1.4238524436950684, "learning_rate": 2.644870177887307e-06, "loss": 0.2242431640625, "step": 12054 }, { "epoch": 0.8148573746113289, "grad_norm": 1.2792719602584839, "learning_rate": 2.6429994396471612e-06, "loss": 0.247528076171875, "step": 12055 }, { "epoch": 0.8149249695822631, "grad_norm": 1.3488157987594604, "learning_rate": 2.64112929931555e-06, "loss": 0.25616455078125, "step": 12056 }, { "epoch": 0.8149925645531972, "grad_norm": 1.9589682817459106, "learning_rate": 2.6392597569829623e-06, "loss": 0.31494140625, "step": 12057 }, { "epoch": 0.8150601595241314, "grad_norm": 2.055525541305542, "learning_rate": 2.6373908127398545e-06, "loss": 0.272918701171875, "step": 12058 }, { "epoch": 0.8151277544950656, "grad_norm": 1.7313462495803833, "learning_rate": 2.6355224666766688e-06, "loss": 0.31890869140625, "step": 12059 }, { "epoch": 0.8151953494659997, "grad_norm": 1.1614015102386475, "learning_rate": 2.6336547188837926e-06, "loss": 0.19184112548828125, "step": 12060 }, { "epoch": 0.8152629444369339, "grad_norm": 1.4580488204956055, "learning_rate": 2.6317875694516113e-06, "loss": 0.2254638671875, "step": 12061 }, { "epoch": 0.815330539407868, "grad_norm": 1.7444634437561035, "learning_rate": 2.629921018470467e-06, "loss": 0.3143310546875, "step": 12062 }, { "epoch": 0.8153981343788023, "grad_norm": 1.4395617246627808, "learning_rate": 2.6280550660306657e-06, "loss": 0.293121337890625, "step": 12063 }, { "epoch": 0.8154657293497364, "grad_norm": 1.47008216381073, "learning_rate": 2.6261897122225036e-06, "loss": 0.210601806640625, "step": 12064 }, { "epoch": 0.8155333243206705, "grad_norm": 1.2653813362121582, "learning_rate": 2.6243249571362353e-06, "loss": 0.265289306640625, "step": 12065 }, { "epoch": 0.8156009192916047, "grad_norm": 1.040819525718689, "learning_rate": 2.6224608008620885e-06, "loss": 0.23406982421875, "step": 12066 }, { "epoch": 0.8156685142625388, "grad_norm": 1.5287431478500366, "learning_rate": 2.6205972434902646e-06, "loss": 0.224945068359375, "step": 12067 }, { "epoch": 0.8157361092334731, "grad_norm": 1.1661432981491089, "learning_rate": 2.618734285110933e-06, "loss": 0.230926513671875, "step": 12068 }, { "epoch": 0.8158037042044072, "grad_norm": 0.7341121435165405, "learning_rate": 2.6168719258142333e-06, "loss": 0.16634368896484375, "step": 12069 }, { "epoch": 0.8158712991753414, "grad_norm": 1.364912986755371, "learning_rate": 2.6150101656902796e-06, "loss": 0.260894775390625, "step": 12070 }, { "epoch": 0.8159388941462755, "grad_norm": 1.057715654373169, "learning_rate": 2.613149004829154e-06, "loss": 0.27001953125, "step": 12071 }, { "epoch": 0.8160064891172096, "grad_norm": 1.3962479829788208, "learning_rate": 2.6112884433209188e-06, "loss": 0.313140869140625, "step": 12072 }, { "epoch": 0.8160740840881439, "grad_norm": 0.8783037662506104, "learning_rate": 2.609428481255589e-06, "loss": 0.175628662109375, "step": 12073 }, { "epoch": 0.816141679059078, "grad_norm": 1.1748296022415161, "learning_rate": 2.6075691187231627e-06, "loss": 0.2023162841796875, "step": 12074 }, { "epoch": 0.8162092740300122, "grad_norm": 1.4636338949203491, "learning_rate": 2.6057103558136157e-06, "loss": 0.295318603515625, "step": 12075 }, { "epoch": 0.8162768690009463, "grad_norm": 1.5513286590576172, "learning_rate": 2.603852192616875e-06, "loss": 0.26611328125, "step": 12076 }, { "epoch": 0.8163444639718805, "grad_norm": 0.9267158508300781, "learning_rate": 2.6019946292228604e-06, "loss": 0.1558837890625, "step": 12077 }, { "epoch": 0.8164120589428147, "grad_norm": 0.8709496259689331, "learning_rate": 2.600137665721446e-06, "loss": 0.19036865234375, "step": 12078 }, { "epoch": 0.8164796539137488, "grad_norm": 1.66867995262146, "learning_rate": 2.598281302202487e-06, "loss": 0.326873779296875, "step": 12079 }, { "epoch": 0.816547248884683, "grad_norm": 1.3126569986343384, "learning_rate": 2.596425538755803e-06, "loss": 0.2581787109375, "step": 12080 }, { "epoch": 0.8166148438556171, "grad_norm": 1.9530712366104126, "learning_rate": 2.5945703754711895e-06, "loss": 0.240997314453125, "step": 12081 }, { "epoch": 0.8166824388265513, "grad_norm": 1.3250545263290405, "learning_rate": 2.592715812438406e-06, "loss": 0.15654754638671875, "step": 12082 }, { "epoch": 0.8167500337974855, "grad_norm": 1.8086405992507935, "learning_rate": 2.590861849747197e-06, "loss": 0.299560546875, "step": 12083 }, { "epoch": 0.8168176287684197, "grad_norm": 1.2226189374923706, "learning_rate": 2.589008487487261e-06, "loss": 0.292724609375, "step": 12084 }, { "epoch": 0.8168852237393538, "grad_norm": 1.822994351387024, "learning_rate": 2.587155725748278e-06, "loss": 0.33642578125, "step": 12085 }, { "epoch": 0.8169528187102879, "grad_norm": 0.9907007813453674, "learning_rate": 2.5853035646198946e-06, "loss": 0.183563232421875, "step": 12086 }, { "epoch": 0.8170204136812221, "grad_norm": 1.4290590286254883, "learning_rate": 2.583452004191729e-06, "loss": 0.21380615234375, "step": 12087 }, { "epoch": 0.8170880086521563, "grad_norm": 1.2726037502288818, "learning_rate": 2.5816010445533784e-06, "loss": 0.226409912109375, "step": 12088 }, { "epoch": 0.8171556036230905, "grad_norm": 1.1768219470977783, "learning_rate": 2.579750685794392e-06, "loss": 0.225677490234375, "step": 12089 }, { "epoch": 0.8172231985940246, "grad_norm": 0.8325135707855225, "learning_rate": 2.577900928004312e-06, "loss": 0.175048828125, "step": 12090 }, { "epoch": 0.8172907935649588, "grad_norm": 1.1199724674224854, "learning_rate": 2.576051771272637e-06, "loss": 0.213775634765625, "step": 12091 }, { "epoch": 0.8173583885358929, "grad_norm": 1.7248778343200684, "learning_rate": 2.5742032156888413e-06, "loss": 0.251922607421875, "step": 12092 }, { "epoch": 0.817425983506827, "grad_norm": 1.2882704734802246, "learning_rate": 2.572355261342369e-06, "loss": 0.27215576171875, "step": 12093 }, { "epoch": 0.8174935784777613, "grad_norm": 2.3791072368621826, "learning_rate": 2.5705079083226366e-06, "loss": 0.2320556640625, "step": 12094 }, { "epoch": 0.8175611734486954, "grad_norm": 1.86626136302948, "learning_rate": 2.568661156719025e-06, "loss": 0.2573699951171875, "step": 12095 }, { "epoch": 0.8176287684196296, "grad_norm": 1.266827940940857, "learning_rate": 2.5668150066209033e-06, "loss": 0.2123565673828125, "step": 12096 }, { "epoch": 0.8176963633905637, "grad_norm": 0.8007451295852661, "learning_rate": 2.564969458117591e-06, "loss": 0.1374664306640625, "step": 12097 }, { "epoch": 0.817763958361498, "grad_norm": 1.8315708637237549, "learning_rate": 2.563124511298384e-06, "loss": 0.2089996337890625, "step": 12098 }, { "epoch": 0.8178315533324321, "grad_norm": 1.687140941619873, "learning_rate": 2.561280166252564e-06, "loss": 0.2987060546875, "step": 12099 }, { "epoch": 0.8178991483033662, "grad_norm": 1.5426921844482422, "learning_rate": 2.5594364230693577e-06, "loss": 0.267364501953125, "step": 12100 }, { "epoch": 0.8179667432743004, "grad_norm": 1.0945755243301392, "learning_rate": 2.557593281837994e-06, "loss": 0.187469482421875, "step": 12101 }, { "epoch": 0.8180343382452345, "grad_norm": 1.3484326601028442, "learning_rate": 2.5557507426476367e-06, "loss": 0.28326416015625, "step": 12102 }, { "epoch": 0.8181019332161688, "grad_norm": 2.1698551177978516, "learning_rate": 2.553908805587452e-06, "loss": 0.301300048828125, "step": 12103 }, { "epoch": 0.8181695281871029, "grad_norm": 1.4226592779159546, "learning_rate": 2.55206747074656e-06, "loss": 0.25975799560546875, "step": 12104 }, { "epoch": 0.8182371231580371, "grad_norm": 0.8131844401359558, "learning_rate": 2.5502267382140583e-06, "loss": 0.1923065185546875, "step": 12105 }, { "epoch": 0.8183047181289712, "grad_norm": 1.611992597579956, "learning_rate": 2.5483866080790097e-06, "loss": 0.25653076171875, "step": 12106 }, { "epoch": 0.8183723130999053, "grad_norm": 0.8864562511444092, "learning_rate": 2.5465470804304535e-06, "loss": 0.15472412109375, "step": 12107 }, { "epoch": 0.8184399080708396, "grad_norm": 1.0159379243850708, "learning_rate": 2.544708155357397e-06, "loss": 0.239990234375, "step": 12108 }, { "epoch": 0.8185075030417737, "grad_norm": 1.2335784435272217, "learning_rate": 2.542869832948819e-06, "loss": 0.23779296875, "step": 12109 }, { "epoch": 0.8185750980127079, "grad_norm": 1.3994649648666382, "learning_rate": 2.5410321132936674e-06, "loss": 0.256591796875, "step": 12110 }, { "epoch": 0.818642692983642, "grad_norm": 1.7577463388442993, "learning_rate": 2.539194996480861e-06, "loss": 0.225189208984375, "step": 12111 }, { "epoch": 0.8187102879545762, "grad_norm": 1.5072362422943115, "learning_rate": 2.537358482599301e-06, "loss": 0.2574462890625, "step": 12112 }, { "epoch": 0.8187778829255103, "grad_norm": 1.2074971199035645, "learning_rate": 2.5355225717378342e-06, "loss": 0.20513916015625, "step": 12113 }, { "epoch": 0.8188454778964445, "grad_norm": 1.130509376525879, "learning_rate": 2.5336872639853077e-06, "loss": 0.17716598510742188, "step": 12114 }, { "epoch": 0.8189130728673787, "grad_norm": 0.714733362197876, "learning_rate": 2.5318525594305124e-06, "loss": 0.10755538940429688, "step": 12115 }, { "epoch": 0.8189806678383128, "grad_norm": 1.6033177375793457, "learning_rate": 2.5300184581622315e-06, "loss": 0.2752685546875, "step": 12116 }, { "epoch": 0.819048262809247, "grad_norm": 1.31119704246521, "learning_rate": 2.5281849602692085e-06, "loss": 0.245147705078125, "step": 12117 }, { "epoch": 0.8191158577801811, "grad_norm": 1.0553020238876343, "learning_rate": 2.5263520658401577e-06, "loss": 0.18634033203125, "step": 12118 }, { "epoch": 0.8191834527511154, "grad_norm": 2.3498847484588623, "learning_rate": 2.5245197749637665e-06, "loss": 0.316986083984375, "step": 12119 }, { "epoch": 0.8192510477220495, "grad_norm": 1.961171269416809, "learning_rate": 2.522688087728694e-06, "loss": 0.30108642578125, "step": 12120 }, { "epoch": 0.8193186426929836, "grad_norm": 0.9494591951370239, "learning_rate": 2.5208570042235673e-06, "loss": 0.181640625, "step": 12121 }, { "epoch": 0.8193862376639178, "grad_norm": 1.9796711206436157, "learning_rate": 2.519026524536984e-06, "loss": 0.260650634765625, "step": 12122 }, { "epoch": 0.8194538326348519, "grad_norm": 0.9394694566726685, "learning_rate": 2.517196648757517e-06, "loss": 0.176239013671875, "step": 12123 }, { "epoch": 0.8195214276057862, "grad_norm": 1.1151918172836304, "learning_rate": 2.5153673769737032e-06, "loss": 0.23309326171875, "step": 12124 }, { "epoch": 0.8195890225767203, "grad_norm": 1.3131815195083618, "learning_rate": 2.5135387092740625e-06, "loss": 0.25616455078125, "step": 12125 }, { "epoch": 0.8196566175476545, "grad_norm": 1.2688149213790894, "learning_rate": 2.5117106457470653e-06, "loss": 0.26226806640625, "step": 12126 }, { "epoch": 0.8197242125185886, "grad_norm": 1.890662431716919, "learning_rate": 2.5098831864811737e-06, "loss": 0.32379150390625, "step": 12127 }, { "epoch": 0.8197918074895227, "grad_norm": 1.266885757446289, "learning_rate": 2.50805633156481e-06, "loss": 0.277252197265625, "step": 12128 }, { "epoch": 0.819859402460457, "grad_norm": 1.3170015811920166, "learning_rate": 2.506230081086365e-06, "loss": 0.2466888427734375, "step": 12129 }, { "epoch": 0.8199269974313911, "grad_norm": 1.5293240547180176, "learning_rate": 2.5044044351342083e-06, "loss": 0.19089508056640625, "step": 12130 }, { "epoch": 0.8199945924023253, "grad_norm": 0.9570818543434143, "learning_rate": 2.5025793937966744e-06, "loss": 0.133575439453125, "step": 12131 }, { "epoch": 0.8200621873732594, "grad_norm": 1.7284958362579346, "learning_rate": 2.5007549571620703e-06, "loss": 0.298126220703125, "step": 12132 }, { "epoch": 0.8201297823441936, "grad_norm": 0.7131861448287964, "learning_rate": 2.498931125318673e-06, "loss": 0.14385223388671875, "step": 12133 }, { "epoch": 0.8201973773151278, "grad_norm": 1.2170186042785645, "learning_rate": 2.497107898354731e-06, "loss": 0.24383544921875, "step": 12134 }, { "epoch": 0.8202649722860619, "grad_norm": 1.3845911026000977, "learning_rate": 2.4952852763584627e-06, "loss": 0.251708984375, "step": 12135 }, { "epoch": 0.8203325672569961, "grad_norm": 0.938926100730896, "learning_rate": 2.49346325941806e-06, "loss": 0.1743316650390625, "step": 12136 }, { "epoch": 0.8204001622279302, "grad_norm": 0.5769031047821045, "learning_rate": 2.491641847621679e-06, "loss": 0.140594482421875, "step": 12137 }, { "epoch": 0.8204677571988644, "grad_norm": 1.4420863389968872, "learning_rate": 2.489821041057459e-06, "loss": 0.265045166015625, "step": 12138 }, { "epoch": 0.8205353521697986, "grad_norm": 1.5352649688720703, "learning_rate": 2.488000839813491e-06, "loss": 0.2373046875, "step": 12139 }, { "epoch": 0.8206029471407327, "grad_norm": 1.4107264280319214, "learning_rate": 2.4861812439778587e-06, "loss": 0.260467529296875, "step": 12140 }, { "epoch": 0.8206705421116669, "grad_norm": 0.9857616424560547, "learning_rate": 2.484362253638599e-06, "loss": 0.19879150390625, "step": 12141 }, { "epoch": 0.820738137082601, "grad_norm": 1.252435326576233, "learning_rate": 2.482543868883727e-06, "loss": 0.20654296875, "step": 12142 }, { "epoch": 0.8208057320535352, "grad_norm": 1.4575772285461426, "learning_rate": 2.4807260898012325e-06, "loss": 0.240875244140625, "step": 12143 }, { "epoch": 0.8208733270244694, "grad_norm": 1.7684112787246704, "learning_rate": 2.478908916479058e-06, "loss": 0.261993408203125, "step": 12144 }, { "epoch": 0.8209409219954036, "grad_norm": 1.6666600704193115, "learning_rate": 2.4770923490051432e-06, "loss": 0.24664306640625, "step": 12145 }, { "epoch": 0.8210085169663377, "grad_norm": 1.1751610040664673, "learning_rate": 2.4752763874673782e-06, "loss": 0.2449951171875, "step": 12146 }, { "epoch": 0.8210761119372718, "grad_norm": 1.323227047920227, "learning_rate": 2.4734610319536328e-06, "loss": 0.221710205078125, "step": 12147 }, { "epoch": 0.821143706908206, "grad_norm": 1.3875895738601685, "learning_rate": 2.4716462825517426e-06, "loss": 0.26861572265625, "step": 12148 }, { "epoch": 0.8212113018791402, "grad_norm": 1.3758138418197632, "learning_rate": 2.46983213934952e-06, "loss": 0.2274322509765625, "step": 12149 }, { "epoch": 0.8212788968500744, "grad_norm": 1.0854246616363525, "learning_rate": 2.468018602434739e-06, "loss": 0.2248077392578125, "step": 12150 }, { "epoch": 0.8213464918210085, "grad_norm": 2.0433144569396973, "learning_rate": 2.4662056718951615e-06, "loss": 0.3092041015625, "step": 12151 }, { "epoch": 0.8214140867919427, "grad_norm": 0.9196134805679321, "learning_rate": 2.464393347818492e-06, "loss": 0.193572998046875, "step": 12152 }, { "epoch": 0.8214816817628768, "grad_norm": 1.1380255222320557, "learning_rate": 2.4625816302924343e-06, "loss": 0.1284027099609375, "step": 12153 }, { "epoch": 0.821549276733811, "grad_norm": 1.6083346605300903, "learning_rate": 2.460770519404651e-06, "loss": 0.30352783203125, "step": 12154 }, { "epoch": 0.8216168717047452, "grad_norm": 1.0068087577819824, "learning_rate": 2.458960015242762e-06, "loss": 0.18414306640625, "step": 12155 }, { "epoch": 0.8216844666756793, "grad_norm": 1.4842889308929443, "learning_rate": 2.4571501178943844e-06, "loss": 0.251434326171875, "step": 12156 }, { "epoch": 0.8217520616466135, "grad_norm": 1.3050225973129272, "learning_rate": 2.455340827447086e-06, "loss": 0.250091552734375, "step": 12157 }, { "epoch": 0.8218196566175476, "grad_norm": 1.4822251796722412, "learning_rate": 2.453532143988414e-06, "loss": 0.30413818359375, "step": 12158 }, { "epoch": 0.8218872515884819, "grad_norm": 1.2803740501403809, "learning_rate": 2.451724067605881e-06, "loss": 0.1937255859375, "step": 12159 }, { "epoch": 0.821954846559416, "grad_norm": 1.0526710748672485, "learning_rate": 2.449916598386976e-06, "loss": 0.1505126953125, "step": 12160 }, { "epoch": 0.8220224415303501, "grad_norm": 1.2887494564056396, "learning_rate": 2.4481097364191535e-06, "loss": 0.284698486328125, "step": 12161 }, { "epoch": 0.8220900365012843, "grad_norm": 2.7412068843841553, "learning_rate": 2.4463034817898404e-06, "loss": 0.36358642578125, "step": 12162 }, { "epoch": 0.8221576314722184, "grad_norm": 1.5464116334915161, "learning_rate": 2.4444978345864323e-06, "loss": 0.207794189453125, "step": 12163 }, { "epoch": 0.8222252264431527, "grad_norm": 1.540300726890564, "learning_rate": 2.4426927948963085e-06, "loss": 0.27215576171875, "step": 12164 }, { "epoch": 0.8222928214140868, "grad_norm": 1.2542589902877808, "learning_rate": 2.440888362806793e-06, "loss": 0.19702911376953125, "step": 12165 }, { "epoch": 0.822360416385021, "grad_norm": 1.491422414779663, "learning_rate": 2.4390845384052075e-06, "loss": 0.287078857421875, "step": 12166 }, { "epoch": 0.8224280113559551, "grad_norm": 0.6981228590011597, "learning_rate": 2.4372813217788305e-06, "loss": 0.1154327392578125, "step": 12167 }, { "epoch": 0.8224956063268892, "grad_norm": 1.626225471496582, "learning_rate": 2.435478713014902e-06, "loss": 0.262939453125, "step": 12168 }, { "epoch": 0.8225632012978235, "grad_norm": 1.436185598373413, "learning_rate": 2.4336767122006544e-06, "loss": 0.307373046875, "step": 12169 }, { "epoch": 0.8226307962687576, "grad_norm": 0.8858365416526794, "learning_rate": 2.4318753194232775e-06, "loss": 0.13677978515625, "step": 12170 }, { "epoch": 0.8226983912396918, "grad_norm": 1.0893667936325073, "learning_rate": 2.4300745347699335e-06, "loss": 0.1666717529296875, "step": 12171 }, { "epoch": 0.8227659862106259, "grad_norm": 1.4404534101486206, "learning_rate": 2.428274358327754e-06, "loss": 0.2503662109375, "step": 12172 }, { "epoch": 0.8228335811815601, "grad_norm": 1.6760313510894775, "learning_rate": 2.426474790183843e-06, "loss": 0.21065521240234375, "step": 12173 }, { "epoch": 0.8229011761524943, "grad_norm": 1.442549467086792, "learning_rate": 2.424675830425274e-06, "loss": 0.244110107421875, "step": 12174 }, { "epoch": 0.8229687711234284, "grad_norm": 1.567214012145996, "learning_rate": 2.4228774791390996e-06, "loss": 0.260955810546875, "step": 12175 }, { "epoch": 0.8230363660943626, "grad_norm": 0.9725157022476196, "learning_rate": 2.421079736412321e-06, "loss": 0.1534423828125, "step": 12176 }, { "epoch": 0.8231039610652967, "grad_norm": 1.084399938583374, "learning_rate": 2.419282602331941e-06, "loss": 0.1722412109375, "step": 12177 }, { "epoch": 0.8231715560362309, "grad_norm": 0.9402669668197632, "learning_rate": 2.417486076984903e-06, "loss": 0.17559051513671875, "step": 12178 }, { "epoch": 0.823239151007165, "grad_norm": 1.1108314990997314, "learning_rate": 2.4156901604581338e-06, "loss": 0.15491485595703125, "step": 12179 }, { "epoch": 0.8233067459780993, "grad_norm": 1.0748493671417236, "learning_rate": 2.4138948528385447e-06, "loss": 0.215606689453125, "step": 12180 }, { "epoch": 0.8233743409490334, "grad_norm": 1.5370227098464966, "learning_rate": 2.4121001542129874e-06, "loss": 0.2564544677734375, "step": 12181 }, { "epoch": 0.8234419359199675, "grad_norm": 1.2054332494735718, "learning_rate": 2.4103060646683107e-06, "loss": 0.2085418701171875, "step": 12182 }, { "epoch": 0.8235095308909017, "grad_norm": 1.5541284084320068, "learning_rate": 2.4085125842913203e-06, "loss": 0.2283935546875, "step": 12183 }, { "epoch": 0.8235771258618358, "grad_norm": 1.1555830240249634, "learning_rate": 2.4067197131687984e-06, "loss": 0.1750946044921875, "step": 12184 }, { "epoch": 0.8236447208327701, "grad_norm": 1.2899723052978516, "learning_rate": 2.4049274513874924e-06, "loss": 0.226654052734375, "step": 12185 }, { "epoch": 0.8237123158037042, "grad_norm": 1.4460258483886719, "learning_rate": 2.4031357990341247e-06, "loss": 0.227691650390625, "step": 12186 }, { "epoch": 0.8237799107746384, "grad_norm": 0.9352294206619263, "learning_rate": 2.401344756195382e-06, "loss": 0.203216552734375, "step": 12187 }, { "epoch": 0.8238475057455725, "grad_norm": 1.2841945886611938, "learning_rate": 2.399554322957939e-06, "loss": 0.259765625, "step": 12188 }, { "epoch": 0.8239151007165066, "grad_norm": 1.3767690658569336, "learning_rate": 2.3977644994084145e-06, "loss": 0.276641845703125, "step": 12189 }, { "epoch": 0.8239826956874409, "grad_norm": 1.081154227256775, "learning_rate": 2.395975285633411e-06, "loss": 0.152069091796875, "step": 12190 }, { "epoch": 0.824050290658375, "grad_norm": 1.139635443687439, "learning_rate": 2.3941866817195157e-06, "loss": 0.18780517578125, "step": 12191 }, { "epoch": 0.8241178856293092, "grad_norm": 1.2879122495651245, "learning_rate": 2.392398687753256e-06, "loss": 0.305419921875, "step": 12192 }, { "epoch": 0.8241854806002433, "grad_norm": 2.2398910522460938, "learning_rate": 2.3906113038211612e-06, "loss": 0.2086181640625, "step": 12193 }, { "epoch": 0.8242530755711776, "grad_norm": 1.0972427129745483, "learning_rate": 2.3888245300097004e-06, "loss": 0.21466064453125, "step": 12194 }, { "epoch": 0.8243206705421117, "grad_norm": 1.0354037284851074, "learning_rate": 2.38703836640534e-06, "loss": 0.177642822265625, "step": 12195 }, { "epoch": 0.8243882655130458, "grad_norm": 1.055404543876648, "learning_rate": 2.3852528130945044e-06, "loss": 0.1393585205078125, "step": 12196 }, { "epoch": 0.82445586048398, "grad_norm": 0.7365958094596863, "learning_rate": 2.383467870163586e-06, "loss": 0.12127685546875, "step": 12197 }, { "epoch": 0.8245234554549141, "grad_norm": 1.422047734260559, "learning_rate": 2.381683537698955e-06, "loss": 0.224151611328125, "step": 12198 }, { "epoch": 0.8245910504258483, "grad_norm": 1.3951847553253174, "learning_rate": 2.3798998157869454e-06, "loss": 0.24224853515625, "step": 12199 }, { "epoch": 0.8246586453967825, "grad_norm": 1.376723289489746, "learning_rate": 2.378116704513864e-06, "loss": 0.3087158203125, "step": 12200 }, { "epoch": 0.8247262403677167, "grad_norm": 1.0849649906158447, "learning_rate": 2.376334203965998e-06, "loss": 0.20842742919921875, "step": 12201 }, { "epoch": 0.8247938353386508, "grad_norm": 1.668869137763977, "learning_rate": 2.374552314229585e-06, "loss": 0.2142333984375, "step": 12202 }, { "epoch": 0.8248614303095849, "grad_norm": 1.2727316617965698, "learning_rate": 2.3727710353908443e-06, "loss": 0.249267578125, "step": 12203 }, { "epoch": 0.8249290252805191, "grad_norm": 1.1109298467636108, "learning_rate": 2.370990367535977e-06, "loss": 0.1280078887939453, "step": 12204 }, { "epoch": 0.8249966202514533, "grad_norm": 1.1513183116912842, "learning_rate": 2.3692103107511264e-06, "loss": 0.28277587890625, "step": 12205 }, { "epoch": 0.8250642152223875, "grad_norm": 1.0936996936798096, "learning_rate": 2.367430865122438e-06, "loss": 0.2896728515625, "step": 12206 }, { "epoch": 0.8251318101933216, "grad_norm": 1.1077990531921387, "learning_rate": 2.3656520307359995e-06, "loss": 0.21148681640625, "step": 12207 }, { "epoch": 0.8251994051642558, "grad_norm": 1.444122552871704, "learning_rate": 2.363873807677891e-06, "loss": 0.241851806640625, "step": 12208 }, { "epoch": 0.8252670001351899, "grad_norm": 1.1287137269973755, "learning_rate": 2.36209619603415e-06, "loss": 0.21307373046875, "step": 12209 }, { "epoch": 0.8253345951061241, "grad_norm": 1.6902555227279663, "learning_rate": 2.3603191958907906e-06, "loss": 0.2947998046875, "step": 12210 }, { "epoch": 0.8254021900770583, "grad_norm": 1.6118879318237305, "learning_rate": 2.3585428073337927e-06, "loss": 0.228912353515625, "step": 12211 }, { "epoch": 0.8254697850479924, "grad_norm": 1.237810730934143, "learning_rate": 2.3567670304491097e-06, "loss": 0.26318359375, "step": 12212 }, { "epoch": 0.8255373800189266, "grad_norm": 0.897157609462738, "learning_rate": 2.354991865322667e-06, "loss": 0.1459808349609375, "step": 12213 }, { "epoch": 0.8256049749898607, "grad_norm": 1.3533471822738647, "learning_rate": 2.353217312040355e-06, "loss": 0.265869140625, "step": 12214 }, { "epoch": 0.825672569960795, "grad_norm": 1.2868616580963135, "learning_rate": 2.3514433706880394e-06, "loss": 0.19403839111328125, "step": 12215 }, { "epoch": 0.8257401649317291, "grad_norm": 1.1888878345489502, "learning_rate": 2.34967004135155e-06, "loss": 0.2364501953125, "step": 12216 }, { "epoch": 0.8258077599026632, "grad_norm": 1.529956340789795, "learning_rate": 2.347897324116704e-06, "loss": 0.30499267578125, "step": 12217 }, { "epoch": 0.8258753548735974, "grad_norm": 1.162655234336853, "learning_rate": 2.34612521906926e-06, "loss": 0.209228515625, "step": 12218 }, { "epoch": 0.8259429498445315, "grad_norm": 1.147014856338501, "learning_rate": 2.344353726294975e-06, "loss": 0.226593017578125, "step": 12219 }, { "epoch": 0.8260105448154658, "grad_norm": 1.2954862117767334, "learning_rate": 2.3425828458795617e-06, "loss": 0.228118896484375, "step": 12220 }, { "epoch": 0.8260781397863999, "grad_norm": 0.9521180987358093, "learning_rate": 2.3408125779087053e-06, "loss": 0.183197021484375, "step": 12221 }, { "epoch": 0.8261457347573341, "grad_norm": 0.7612638473510742, "learning_rate": 2.3390429224680637e-06, "loss": 0.14166259765625, "step": 12222 }, { "epoch": 0.8262133297282682, "grad_norm": 1.0505073070526123, "learning_rate": 2.3372738796432633e-06, "loss": 0.252838134765625, "step": 12223 }, { "epoch": 0.8262809246992023, "grad_norm": 1.6133297681808472, "learning_rate": 2.3355054495198996e-06, "loss": 0.2415618896484375, "step": 12224 }, { "epoch": 0.8263485196701366, "grad_norm": 1.7249817848205566, "learning_rate": 2.3337376321835435e-06, "loss": 0.27740478515625, "step": 12225 }, { "epoch": 0.8264161146410707, "grad_norm": 1.2456144094467163, "learning_rate": 2.3319704277197318e-06, "loss": 0.1641387939453125, "step": 12226 }, { "epoch": 0.8264837096120049, "grad_norm": 1.2140157222747803, "learning_rate": 2.330203836213971e-06, "loss": 0.232025146484375, "step": 12227 }, { "epoch": 0.826551304582939, "grad_norm": 0.9071502685546875, "learning_rate": 2.328437857751743e-06, "loss": 0.1525115966796875, "step": 12228 }, { "epoch": 0.8266188995538732, "grad_norm": 0.9315265417098999, "learning_rate": 2.326672492418489e-06, "loss": 0.12130355834960938, "step": 12229 }, { "epoch": 0.8266864945248074, "grad_norm": 1.8512176275253296, "learning_rate": 2.3249077402996443e-06, "loss": 0.20452880859375, "step": 12230 }, { "epoch": 0.8267540894957415, "grad_norm": 1.2145564556121826, "learning_rate": 2.323143601480579e-06, "loss": 0.187713623046875, "step": 12231 }, { "epoch": 0.8268216844666757, "grad_norm": 0.6437221765518188, "learning_rate": 2.321380076046668e-06, "loss": 0.101715087890625, "step": 12232 }, { "epoch": 0.8268892794376098, "grad_norm": 1.301169753074646, "learning_rate": 2.3196171640832364e-06, "loss": 0.24127197265625, "step": 12233 }, { "epoch": 0.826956874408544, "grad_norm": 0.9507426619529724, "learning_rate": 2.3178548656755845e-06, "loss": 0.16020965576171875, "step": 12234 }, { "epoch": 0.8270244693794782, "grad_norm": 0.9971044063568115, "learning_rate": 2.3160931809089836e-06, "loss": 0.166473388671875, "step": 12235 }, { "epoch": 0.8270920643504124, "grad_norm": 1.3325599431991577, "learning_rate": 2.314332109868674e-06, "loss": 0.21319580078125, "step": 12236 }, { "epoch": 0.8271596593213465, "grad_norm": 1.6173994541168213, "learning_rate": 2.31257165263987e-06, "loss": 0.2584381103515625, "step": 12237 }, { "epoch": 0.8272272542922806, "grad_norm": 1.2490620613098145, "learning_rate": 2.3108118093077507e-06, "loss": 0.253082275390625, "step": 12238 }, { "epoch": 0.8272948492632148, "grad_norm": 2.2397665977478027, "learning_rate": 2.3090525799574683e-06, "loss": 0.31866455078125, "step": 12239 }, { "epoch": 0.827362444234149, "grad_norm": 1.293822169303894, "learning_rate": 2.3072939646741485e-06, "loss": 0.27984619140625, "step": 12240 }, { "epoch": 0.8274300392050832, "grad_norm": 1.0961642265319824, "learning_rate": 2.30553596354288e-06, "loss": 0.2319488525390625, "step": 12241 }, { "epoch": 0.8274976341760173, "grad_norm": 1.3816766738891602, "learning_rate": 2.3037785766487252e-06, "loss": 0.246307373046875, "step": 12242 }, { "epoch": 0.8275652291469515, "grad_norm": 2.110499620437622, "learning_rate": 2.3020218040767256e-06, "loss": 0.316650390625, "step": 12243 }, { "epoch": 0.8276328241178856, "grad_norm": 1.106107234954834, "learning_rate": 2.3002656459118725e-06, "loss": 0.1570281982421875, "step": 12244 }, { "epoch": 0.8277004190888197, "grad_norm": 1.8350392580032349, "learning_rate": 2.29851010223915e-06, "loss": 0.372314453125, "step": 12245 }, { "epoch": 0.827768014059754, "grad_norm": 1.0330730676651, "learning_rate": 2.296755173143499e-06, "loss": 0.1702880859375, "step": 12246 }, { "epoch": 0.8278356090306881, "grad_norm": 1.578858494758606, "learning_rate": 2.295000858709832e-06, "loss": 0.231719970703125, "step": 12247 }, { "epoch": 0.8279032040016223, "grad_norm": 1.1046770811080933, "learning_rate": 2.2932471590230367e-06, "loss": 0.214263916015625, "step": 12248 }, { "epoch": 0.8279707989725564, "grad_norm": 1.2690712213516235, "learning_rate": 2.2914940741679657e-06, "loss": 0.25537109375, "step": 12249 }, { "epoch": 0.8280383939434907, "grad_norm": 2.0674662590026855, "learning_rate": 2.289741604229444e-06, "loss": 0.29034423828125, "step": 12250 }, { "epoch": 0.8281059889144248, "grad_norm": 1.848694086074829, "learning_rate": 2.2879897492922686e-06, "loss": 0.27276611328125, "step": 12251 }, { "epoch": 0.8281735838853589, "grad_norm": 1.0339395999908447, "learning_rate": 2.2862385094412034e-06, "loss": 0.27130126953125, "step": 12252 }, { "epoch": 0.8282411788562931, "grad_norm": 1.650235891342163, "learning_rate": 2.2844878847609862e-06, "loss": 0.3079833984375, "step": 12253 }, { "epoch": 0.8283087738272272, "grad_norm": 1.1565274000167847, "learning_rate": 2.2827378753363204e-06, "loss": 0.167205810546875, "step": 12254 }, { "epoch": 0.8283763687981615, "grad_norm": 1.3489353656768799, "learning_rate": 2.2809884812518816e-06, "loss": 0.3077392578125, "step": 12255 }, { "epoch": 0.8284439637690956, "grad_norm": 1.4253971576690674, "learning_rate": 2.2792397025923254e-06, "loss": 0.25030517578125, "step": 12256 }, { "epoch": 0.8285115587400298, "grad_norm": 1.350077509880066, "learning_rate": 2.2774915394422564e-06, "loss": 0.25933837890625, "step": 12257 }, { "epoch": 0.8285791537109639, "grad_norm": 1.2176425457000732, "learning_rate": 2.2757439918862693e-06, "loss": 0.1920166015625, "step": 12258 }, { "epoch": 0.828646748681898, "grad_norm": 1.396844506263733, "learning_rate": 2.273997060008924e-06, "loss": 0.294921875, "step": 12259 }, { "epoch": 0.8287143436528323, "grad_norm": 1.8095378875732422, "learning_rate": 2.2722507438947336e-06, "loss": 0.203582763671875, "step": 12260 }, { "epoch": 0.8287819386237664, "grad_norm": 0.9091499447822571, "learning_rate": 2.27050504362821e-06, "loss": 0.1536865234375, "step": 12261 }, { "epoch": 0.8288495335947006, "grad_norm": 1.13893723487854, "learning_rate": 2.268759959293816e-06, "loss": 0.1606597900390625, "step": 12262 }, { "epoch": 0.8289171285656347, "grad_norm": 0.7126216888427734, "learning_rate": 2.2670154909759905e-06, "loss": 0.1520538330078125, "step": 12263 }, { "epoch": 0.8289847235365688, "grad_norm": 0.9206727147102356, "learning_rate": 2.2652716387591414e-06, "loss": 0.1199951171875, "step": 12264 }, { "epoch": 0.829052318507503, "grad_norm": 0.914873480796814, "learning_rate": 2.263528402727647e-06, "loss": 0.1243438720703125, "step": 12265 }, { "epoch": 0.8291199134784372, "grad_norm": 1.1443647146224976, "learning_rate": 2.2617857829658513e-06, "loss": 0.167022705078125, "step": 12266 }, { "epoch": 0.8291875084493714, "grad_norm": 1.6325514316558838, "learning_rate": 2.2600437795580847e-06, "loss": 0.25689697265625, "step": 12267 }, { "epoch": 0.8292551034203055, "grad_norm": 1.2230358123779297, "learning_rate": 2.258302392588622e-06, "loss": 0.28106689453125, "step": 12268 }, { "epoch": 0.8293226983912397, "grad_norm": 1.344580054283142, "learning_rate": 2.2565616221417374e-06, "loss": 0.2371826171875, "step": 12269 }, { "epoch": 0.8293902933621738, "grad_norm": 0.9825798273086548, "learning_rate": 2.254821468301643e-06, "loss": 0.27911376953125, "step": 12270 }, { "epoch": 0.829457888333108, "grad_norm": 1.6196399927139282, "learning_rate": 2.2530819311525526e-06, "loss": 0.2019195556640625, "step": 12271 }, { "epoch": 0.8295254833040422, "grad_norm": 0.6877607703208923, "learning_rate": 2.251343010778634e-06, "loss": 0.1282501220703125, "step": 12272 }, { "epoch": 0.8295930782749763, "grad_norm": 1.5016121864318848, "learning_rate": 2.249604707264016e-06, "loss": 0.2032623291015625, "step": 12273 }, { "epoch": 0.8296606732459105, "grad_norm": 1.793103814125061, "learning_rate": 2.24786702069282e-06, "loss": 0.276702880859375, "step": 12274 }, { "epoch": 0.8297282682168446, "grad_norm": 1.8573189973831177, "learning_rate": 2.246129951149121e-06, "loss": 0.3043212890625, "step": 12275 }, { "epoch": 0.8297958631877789, "grad_norm": 1.321643352508545, "learning_rate": 2.244393498716972e-06, "loss": 0.275238037109375, "step": 12276 }, { "epoch": 0.829863458158713, "grad_norm": 0.9714827537536621, "learning_rate": 2.242657663480391e-06, "loss": 0.1335296630859375, "step": 12277 }, { "epoch": 0.8299310531296471, "grad_norm": 1.0776656866073608, "learning_rate": 2.240922445523369e-06, "loss": 0.25543212890625, "step": 12278 }, { "epoch": 0.8299986481005813, "grad_norm": 2.7125277519226074, "learning_rate": 2.2391878449298643e-06, "loss": 0.3016815185546875, "step": 12279 }, { "epoch": 0.8300662430715154, "grad_norm": 1.6112943887710571, "learning_rate": 2.2374538617838176e-06, "loss": 0.253326416015625, "step": 12280 }, { "epoch": 0.8301338380424497, "grad_norm": 1.4353809356689453, "learning_rate": 2.2357204961691134e-06, "loss": 0.241790771484375, "step": 12281 }, { "epoch": 0.8302014330133838, "grad_norm": 1.7217588424682617, "learning_rate": 2.23398774816964e-06, "loss": 0.291015625, "step": 12282 }, { "epoch": 0.830269027984318, "grad_norm": 1.3417062759399414, "learning_rate": 2.2322556178692274e-06, "loss": 0.1553192138671875, "step": 12283 }, { "epoch": 0.8303366229552521, "grad_norm": 1.2918379306793213, "learning_rate": 2.2305241053516857e-06, "loss": 0.19757080078125, "step": 12284 }, { "epoch": 0.8304042179261862, "grad_norm": 1.1159604787826538, "learning_rate": 2.2287932107008087e-06, "loss": 0.23944091796875, "step": 12285 }, { "epoch": 0.8304718128971205, "grad_norm": 1.5871003866195679, "learning_rate": 2.2270629340003306e-06, "loss": 0.256927490234375, "step": 12286 }, { "epoch": 0.8305394078680546, "grad_norm": 1.5260393619537354, "learning_rate": 2.2253332753339866e-06, "loss": 0.206878662109375, "step": 12287 }, { "epoch": 0.8306070028389888, "grad_norm": 1.2374415397644043, "learning_rate": 2.223604234785463e-06, "loss": 0.2212371826171875, "step": 12288 }, { "epoch": 0.8306745978099229, "grad_norm": 1.3865017890930176, "learning_rate": 2.221875812438421e-06, "loss": 0.255950927734375, "step": 12289 }, { "epoch": 0.8307421927808571, "grad_norm": 0.9939211010932922, "learning_rate": 2.2201480083764954e-06, "loss": 0.1959686279296875, "step": 12290 }, { "epoch": 0.8308097877517913, "grad_norm": 1.7013740539550781, "learning_rate": 2.2184208226832843e-06, "loss": 0.255767822265625, "step": 12291 }, { "epoch": 0.8308773827227254, "grad_norm": 1.4890141487121582, "learning_rate": 2.216694255442359e-06, "loss": 0.236419677734375, "step": 12292 }, { "epoch": 0.8309449776936596, "grad_norm": 1.1275357007980347, "learning_rate": 2.214968306737272e-06, "loss": 0.186187744140625, "step": 12293 }, { "epoch": 0.8310125726645937, "grad_norm": 0.9508123397827148, "learning_rate": 2.2132429766515232e-06, "loss": 0.139862060546875, "step": 12294 }, { "epoch": 0.8310801676355279, "grad_norm": 1.6961851119995117, "learning_rate": 2.2115182652685954e-06, "loss": 0.276580810546875, "step": 12295 }, { "epoch": 0.8311477626064621, "grad_norm": 0.8031942248344421, "learning_rate": 2.2097941726719517e-06, "loss": 0.1397552490234375, "step": 12296 }, { "epoch": 0.8312153575773963, "grad_norm": 1.2637078762054443, "learning_rate": 2.208070698945e-06, "loss": 0.221466064453125, "step": 12297 }, { "epoch": 0.8312829525483304, "grad_norm": 1.5092835426330566, "learning_rate": 2.206347844171149e-06, "loss": 0.2358245849609375, "step": 12298 }, { "epoch": 0.8313505475192645, "grad_norm": 1.0368947982788086, "learning_rate": 2.204625608433743e-06, "loss": 0.14653778076171875, "step": 12299 }, { "epoch": 0.8314181424901987, "grad_norm": 1.0089497566223145, "learning_rate": 2.2029039918161276e-06, "loss": 0.195953369140625, "step": 12300 }, { "epoch": 0.8314857374611329, "grad_norm": 1.7376078367233276, "learning_rate": 2.2011829944016026e-06, "loss": 0.2373046875, "step": 12301 }, { "epoch": 0.8315533324320671, "grad_norm": 3.0868358612060547, "learning_rate": 2.199462616273438e-06, "loss": 0.33038330078125, "step": 12302 }, { "epoch": 0.8316209274030012, "grad_norm": 1.8760805130004883, "learning_rate": 2.197742857514878e-06, "loss": 0.243194580078125, "step": 12303 }, { "epoch": 0.8316885223739354, "grad_norm": 1.7312698364257812, "learning_rate": 2.1960237182091346e-06, "loss": 0.25067138671875, "step": 12304 }, { "epoch": 0.8317561173448695, "grad_norm": 1.1871834993362427, "learning_rate": 2.1943051984393894e-06, "loss": 0.206573486328125, "step": 12305 }, { "epoch": 0.8318237123158037, "grad_norm": 1.0571160316467285, "learning_rate": 2.1925872982888012e-06, "loss": 0.25445556640625, "step": 12306 }, { "epoch": 0.8318913072867379, "grad_norm": 1.1776082515716553, "learning_rate": 2.1908700178404846e-06, "loss": 0.26275634765625, "step": 12307 }, { "epoch": 0.831958902257672, "grad_norm": 1.3993674516677856, "learning_rate": 2.189153357177534e-06, "loss": 0.23809814453125, "step": 12308 }, { "epoch": 0.8320264972286062, "grad_norm": 1.4465936422348022, "learning_rate": 2.1874373163830184e-06, "loss": 0.26763916015625, "step": 12309 }, { "epoch": 0.8320940921995403, "grad_norm": 1.7004505395889282, "learning_rate": 2.18572189553996e-06, "loss": 0.215728759765625, "step": 12310 }, { "epoch": 0.8321616871704746, "grad_norm": 1.2281749248504639, "learning_rate": 2.1840070947313717e-06, "loss": 0.1515045166015625, "step": 12311 }, { "epoch": 0.8322292821414087, "grad_norm": 1.180771827697754, "learning_rate": 2.182292914040221e-06, "loss": 0.182891845703125, "step": 12312 }, { "epoch": 0.8322968771123428, "grad_norm": 1.0921193361282349, "learning_rate": 2.180579353549451e-06, "loss": 0.15137481689453125, "step": 12313 }, { "epoch": 0.832364472083277, "grad_norm": 1.2680150270462036, "learning_rate": 2.178866413341976e-06, "loss": 0.265228271484375, "step": 12314 }, { "epoch": 0.8324320670542111, "grad_norm": 0.7448903918266296, "learning_rate": 2.1771540935006777e-06, "loss": 0.1555023193359375, "step": 12315 }, { "epoch": 0.8324996620251454, "grad_norm": 2.073505401611328, "learning_rate": 2.1754423941084086e-06, "loss": 0.32623291015625, "step": 12316 }, { "epoch": 0.8325672569960795, "grad_norm": 1.290447473526001, "learning_rate": 2.1737313152479916e-06, "loss": 0.245941162109375, "step": 12317 }, { "epoch": 0.8326348519670137, "grad_norm": 1.1610603332519531, "learning_rate": 2.172020857002219e-06, "loss": 0.189605712890625, "step": 12318 }, { "epoch": 0.8327024469379478, "grad_norm": 1.1767812967300415, "learning_rate": 2.170311019453854e-06, "loss": 0.239166259765625, "step": 12319 }, { "epoch": 0.8327700419088819, "grad_norm": 0.9731076955795288, "learning_rate": 2.1686018026856273e-06, "loss": 0.1009674072265625, "step": 12320 }, { "epoch": 0.8328376368798162, "grad_norm": 1.63236403465271, "learning_rate": 2.1668932067802424e-06, "loss": 0.28192138671875, "step": 12321 }, { "epoch": 0.8329052318507503, "grad_norm": 0.5822340250015259, "learning_rate": 2.1651852318203768e-06, "loss": 0.159576416015625, "step": 12322 }, { "epoch": 0.8329728268216845, "grad_norm": 1.099632740020752, "learning_rate": 2.163477877888664e-06, "loss": 0.2720947265625, "step": 12323 }, { "epoch": 0.8330404217926186, "grad_norm": 1.3617186546325684, "learning_rate": 2.161771145067722e-06, "loss": 0.270599365234375, "step": 12324 }, { "epoch": 0.8331080167635528, "grad_norm": 1.4556840658187866, "learning_rate": 2.1600650334401335e-06, "loss": 0.19464111328125, "step": 12325 }, { "epoch": 0.833175611734487, "grad_norm": 1.0702286958694458, "learning_rate": 2.158359543088449e-06, "loss": 0.1561431884765625, "step": 12326 }, { "epoch": 0.8332432067054211, "grad_norm": 1.6559559106826782, "learning_rate": 2.156654674095191e-06, "loss": 0.26953125, "step": 12327 }, { "epoch": 0.8333108016763553, "grad_norm": 1.0300073623657227, "learning_rate": 2.1549504265428516e-06, "loss": 0.2142486572265625, "step": 12328 }, { "epoch": 0.8333783966472894, "grad_norm": 1.121288776397705, "learning_rate": 2.1532468005138935e-06, "loss": 0.190521240234375, "step": 12329 }, { "epoch": 0.8334459916182236, "grad_norm": 1.1139241456985474, "learning_rate": 2.1515437960907487e-06, "loss": 0.2206268310546875, "step": 12330 }, { "epoch": 0.8335135865891578, "grad_norm": 1.610304594039917, "learning_rate": 2.149841413355818e-06, "loss": 0.25762939453125, "step": 12331 }, { "epoch": 0.833581181560092, "grad_norm": 1.3260438442230225, "learning_rate": 2.148139652391474e-06, "loss": 0.236968994140625, "step": 12332 }, { "epoch": 0.8336487765310261, "grad_norm": 1.637439250946045, "learning_rate": 2.146438513280058e-06, "loss": 0.257080078125, "step": 12333 }, { "epoch": 0.8337163715019602, "grad_norm": 0.5417437553405762, "learning_rate": 2.14473799610388e-06, "loss": 0.051601409912109375, "step": 12334 }, { "epoch": 0.8337839664728944, "grad_norm": 1.6787787675857544, "learning_rate": 2.143038100945231e-06, "loss": 0.2955322265625, "step": 12335 }, { "epoch": 0.8338515614438285, "grad_norm": 2.2396414279937744, "learning_rate": 2.141338827886347e-06, "loss": 0.32452392578125, "step": 12336 }, { "epoch": 0.8339191564147628, "grad_norm": 0.61038738489151, "learning_rate": 2.139640177009461e-06, "loss": 0.08086013793945312, "step": 12337 }, { "epoch": 0.8339867513856969, "grad_norm": 1.392382264137268, "learning_rate": 2.1379421483967616e-06, "loss": 0.20263671875, "step": 12338 }, { "epoch": 0.8340543463566311, "grad_norm": 0.6793553233146667, "learning_rate": 2.13624474213041e-06, "loss": 0.08399200439453125, "step": 12339 }, { "epoch": 0.8341219413275652, "grad_norm": 1.7792447805404663, "learning_rate": 2.1345479582925353e-06, "loss": 0.247528076171875, "step": 12340 }, { "epoch": 0.8341895362984993, "grad_norm": 1.8129932880401611, "learning_rate": 2.1328517969652406e-06, "loss": 0.2082672119140625, "step": 12341 }, { "epoch": 0.8342571312694336, "grad_norm": 1.5660934448242188, "learning_rate": 2.131156258230595e-06, "loss": 0.29071044921875, "step": 12342 }, { "epoch": 0.8343247262403677, "grad_norm": 1.6457492113113403, "learning_rate": 2.129461342170641e-06, "loss": 0.27691650390625, "step": 12343 }, { "epoch": 0.8343923212113019, "grad_norm": 1.2033733129501343, "learning_rate": 2.1277670488673885e-06, "loss": 0.275238037109375, "step": 12344 }, { "epoch": 0.834459916182236, "grad_norm": 1.577664852142334, "learning_rate": 2.1260733784028163e-06, "loss": 0.312255859375, "step": 12345 }, { "epoch": 0.8345275111531703, "grad_norm": 1.3601328134536743, "learning_rate": 2.124380330858878e-06, "loss": 0.18363571166992188, "step": 12346 }, { "epoch": 0.8345951061241044, "grad_norm": 1.0669257640838623, "learning_rate": 2.122687906317487e-06, "loss": 0.170623779296875, "step": 12347 }, { "epoch": 0.8346627010950385, "grad_norm": 1.5593657493591309, "learning_rate": 2.120996104860545e-06, "loss": 0.301361083984375, "step": 12348 }, { "epoch": 0.8347302960659727, "grad_norm": 1.1180930137634277, "learning_rate": 2.1193049265698986e-06, "loss": 0.27606201171875, "step": 12349 }, { "epoch": 0.8347978910369068, "grad_norm": 1.0364261865615845, "learning_rate": 2.117614371527389e-06, "loss": 0.247833251953125, "step": 12350 }, { "epoch": 0.834865486007841, "grad_norm": 1.3021948337554932, "learning_rate": 2.11592443981481e-06, "loss": 0.224334716796875, "step": 12351 }, { "epoch": 0.8349330809787752, "grad_norm": 1.346626877784729, "learning_rate": 2.1142351315139314e-06, "loss": 0.196533203125, "step": 12352 }, { "epoch": 0.8350006759497094, "grad_norm": 1.0197652578353882, "learning_rate": 2.1125464467064914e-06, "loss": 0.20435333251953125, "step": 12353 }, { "epoch": 0.8350682709206435, "grad_norm": 1.1609629392623901, "learning_rate": 2.110858385474203e-06, "loss": 0.210479736328125, "step": 12354 }, { "epoch": 0.8351358658915776, "grad_norm": 1.8452343940734863, "learning_rate": 2.1091709478987407e-06, "loss": 0.320068359375, "step": 12355 }, { "epoch": 0.8352034608625118, "grad_norm": 1.188429832458496, "learning_rate": 2.1074841340617563e-06, "loss": 0.2755126953125, "step": 12356 }, { "epoch": 0.835271055833446, "grad_norm": 1.1172690391540527, "learning_rate": 2.1057979440448673e-06, "loss": 0.1774749755859375, "step": 12357 }, { "epoch": 0.8353386508043802, "grad_norm": 2.6348178386688232, "learning_rate": 2.1041123779296583e-06, "loss": 0.301422119140625, "step": 12358 }, { "epoch": 0.8354062457753143, "grad_norm": 1.5574530363082886, "learning_rate": 2.102427435797698e-06, "loss": 0.2974853515625, "step": 12359 }, { "epoch": 0.8354738407462485, "grad_norm": 0.7286972403526306, "learning_rate": 2.1007431177305008e-06, "loss": 0.1151885986328125, "step": 12360 }, { "epoch": 0.8355414357171826, "grad_norm": 1.5593452453613281, "learning_rate": 2.0990594238095765e-06, "loss": 0.17900848388671875, "step": 12361 }, { "epoch": 0.8356090306881168, "grad_norm": 1.1776190996170044, "learning_rate": 2.097376354116382e-06, "loss": 0.1659088134765625, "step": 12362 }, { "epoch": 0.835676625659051, "grad_norm": 1.0107173919677734, "learning_rate": 2.0956939087323634e-06, "loss": 0.1683807373046875, "step": 12363 }, { "epoch": 0.8357442206299851, "grad_norm": 1.2320375442504883, "learning_rate": 2.094012087738924e-06, "loss": 0.22686767578125, "step": 12364 }, { "epoch": 0.8358118156009193, "grad_norm": 2.5776660442352295, "learning_rate": 2.092330891217442e-06, "loss": 0.27374267578125, "step": 12365 }, { "epoch": 0.8358794105718534, "grad_norm": 1.368282675743103, "learning_rate": 2.0906503192492628e-06, "loss": 0.26715087890625, "step": 12366 }, { "epoch": 0.8359470055427877, "grad_norm": 1.398827314376831, "learning_rate": 2.088970371915704e-06, "loss": 0.28802490234375, "step": 12367 }, { "epoch": 0.8360146005137218, "grad_norm": 1.5874710083007812, "learning_rate": 2.0872910492980505e-06, "loss": 0.3057861328125, "step": 12368 }, { "epoch": 0.8360821954846559, "grad_norm": 1.0640559196472168, "learning_rate": 2.0856123514775597e-06, "loss": 0.1998291015625, "step": 12369 }, { "epoch": 0.8361497904555901, "grad_norm": 1.4060884714126587, "learning_rate": 2.0839342785354583e-06, "loss": 0.31683349609375, "step": 12370 }, { "epoch": 0.8362173854265242, "grad_norm": 1.9569635391235352, "learning_rate": 2.0822568305529353e-06, "loss": 0.320526123046875, "step": 12371 }, { "epoch": 0.8362849803974585, "grad_norm": 0.9777275323867798, "learning_rate": 2.0805800076111703e-06, "loss": 0.18798065185546875, "step": 12372 }, { "epoch": 0.8363525753683926, "grad_norm": 1.1950223445892334, "learning_rate": 2.078903809791281e-06, "loss": 0.1959686279296875, "step": 12373 }, { "epoch": 0.8364201703393268, "grad_norm": 1.4555885791778564, "learning_rate": 2.0772282371743876e-06, "loss": 0.279541015625, "step": 12374 }, { "epoch": 0.8364877653102609, "grad_norm": 0.8915432691574097, "learning_rate": 2.0755532898415526e-06, "loss": 0.169281005859375, "step": 12375 }, { "epoch": 0.836555360281195, "grad_norm": 1.091076135635376, "learning_rate": 2.0738789678738286e-06, "loss": 0.2257080078125, "step": 12376 }, { "epoch": 0.8366229552521293, "grad_norm": 1.8091200590133667, "learning_rate": 2.0722052713522293e-06, "loss": 0.23711395263671875, "step": 12377 }, { "epoch": 0.8366905502230634, "grad_norm": 0.8840620517730713, "learning_rate": 2.0705322003577302e-06, "loss": 0.1550140380859375, "step": 12378 }, { "epoch": 0.8367581451939976, "grad_norm": 1.6706397533416748, "learning_rate": 2.0688597549712932e-06, "loss": 0.267852783203125, "step": 12379 }, { "epoch": 0.8368257401649317, "grad_norm": 1.0059229135513306, "learning_rate": 2.0671879352738405e-06, "loss": 0.1311798095703125, "step": 12380 }, { "epoch": 0.836893335135866, "grad_norm": 2.020328998565674, "learning_rate": 2.0655167413462633e-06, "loss": 0.2855224609375, "step": 12381 }, { "epoch": 0.8369609301068001, "grad_norm": 1.5775395631790161, "learning_rate": 2.063846173269424e-06, "loss": 0.20562744140625, "step": 12382 }, { "epoch": 0.8370285250777342, "grad_norm": 0.9167705178260803, "learning_rate": 2.0621762311241566e-06, "loss": 0.15044021606445312, "step": 12383 }, { "epoch": 0.8370961200486684, "grad_norm": 1.2464723587036133, "learning_rate": 2.0605069149912593e-06, "loss": 0.251556396484375, "step": 12384 }, { "epoch": 0.8371637150196025, "grad_norm": 1.0080368518829346, "learning_rate": 2.0588382249515143e-06, "loss": 0.2178955078125, "step": 12385 }, { "epoch": 0.8372313099905367, "grad_norm": 1.0314666032791138, "learning_rate": 2.0571701610856486e-06, "loss": 0.1345672607421875, "step": 12386 }, { "epoch": 0.8372989049614709, "grad_norm": 1.4956257343292236, "learning_rate": 2.0555027234743845e-06, "loss": 0.3245849609375, "step": 12387 }, { "epoch": 0.8373664999324051, "grad_norm": 1.1424148082733154, "learning_rate": 2.053835912198404e-06, "loss": 0.22625732421875, "step": 12388 }, { "epoch": 0.8374340949033392, "grad_norm": 2.0584633350372314, "learning_rate": 2.052169727338346e-06, "loss": 0.27203369140625, "step": 12389 }, { "epoch": 0.8375016898742733, "grad_norm": 1.044054388999939, "learning_rate": 2.050504168974846e-06, "loss": 0.218597412109375, "step": 12390 }, { "epoch": 0.8375692848452075, "grad_norm": 1.749496340751648, "learning_rate": 2.048839237188478e-06, "loss": 0.2384033203125, "step": 12391 }, { "epoch": 0.8376368798161417, "grad_norm": 1.4011213779449463, "learning_rate": 2.047174932059814e-06, "loss": 0.2611083984375, "step": 12392 }, { "epoch": 0.8377044747870759, "grad_norm": 1.2926768064498901, "learning_rate": 2.0455112536693794e-06, "loss": 0.263916015625, "step": 12393 }, { "epoch": 0.83777206975801, "grad_norm": 1.0370944738388062, "learning_rate": 2.043848202097675e-06, "loss": 0.2033843994140625, "step": 12394 }, { "epoch": 0.8378396647289441, "grad_norm": 1.5880521535873413, "learning_rate": 2.0421857774251666e-06, "loss": 0.27423095703125, "step": 12395 }, { "epoch": 0.8379072596998783, "grad_norm": 1.2868133783340454, "learning_rate": 2.0405239797322935e-06, "loss": 0.2242889404296875, "step": 12396 }, { "epoch": 0.8379748546708125, "grad_norm": 0.7363540530204773, "learning_rate": 2.038862809099462e-06, "loss": 0.150665283203125, "step": 12397 }, { "epoch": 0.8380424496417467, "grad_norm": 1.9119452238082886, "learning_rate": 2.037202265607059e-06, "loss": 0.2744140625, "step": 12398 }, { "epoch": 0.8381100446126808, "grad_norm": 1.8481097221374512, "learning_rate": 2.0355423493354174e-06, "loss": 0.365753173828125, "step": 12399 }, { "epoch": 0.838177639583615, "grad_norm": 0.9376907348632812, "learning_rate": 2.033883060364867e-06, "loss": 0.1867523193359375, "step": 12400 }, { "epoch": 0.8382452345545491, "grad_norm": 1.2497981786727905, "learning_rate": 2.032224398775692e-06, "loss": 0.2183380126953125, "step": 12401 }, { "epoch": 0.8383128295254832, "grad_norm": 0.9648842811584473, "learning_rate": 2.03056636464814e-06, "loss": 0.15987396240234375, "step": 12402 }, { "epoch": 0.8383804244964175, "grad_norm": 1.3877005577087402, "learning_rate": 2.028908958062446e-06, "loss": 0.252044677734375, "step": 12403 }, { "epoch": 0.8384480194673516, "grad_norm": 1.0965713262557983, "learning_rate": 2.027252179098803e-06, "loss": 0.207061767578125, "step": 12404 }, { "epoch": 0.8385156144382858, "grad_norm": 1.3403160572052002, "learning_rate": 2.025596027837377e-06, "loss": 0.217529296875, "step": 12405 }, { "epoch": 0.8385832094092199, "grad_norm": 1.245086431503296, "learning_rate": 2.023940504358302e-06, "loss": 0.238494873046875, "step": 12406 }, { "epoch": 0.8386508043801542, "grad_norm": 1.3288414478302002, "learning_rate": 2.0222856087416824e-06, "loss": 0.205108642578125, "step": 12407 }, { "epoch": 0.8387183993510883, "grad_norm": 1.7017978429794312, "learning_rate": 2.0206313410675926e-06, "loss": 0.20562744140625, "step": 12408 }, { "epoch": 0.8387859943220224, "grad_norm": 1.2497659921646118, "learning_rate": 2.018977701416075e-06, "loss": 0.20281982421875, "step": 12409 }, { "epoch": 0.8388535892929566, "grad_norm": 2.1002368927001953, "learning_rate": 2.017324689867142e-06, "loss": 0.305938720703125, "step": 12410 }, { "epoch": 0.8389211842638907, "grad_norm": 1.4724479913711548, "learning_rate": 2.015672306500787e-06, "loss": 0.254364013671875, "step": 12411 }, { "epoch": 0.838988779234825, "grad_norm": 1.8962424993515015, "learning_rate": 2.014020551396949e-06, "loss": 0.202850341796875, "step": 12412 }, { "epoch": 0.8390563742057591, "grad_norm": 2.2165064811706543, "learning_rate": 2.012369424635554e-06, "loss": 0.30303955078125, "step": 12413 }, { "epoch": 0.8391239691766933, "grad_norm": 1.7116011381149292, "learning_rate": 2.0107189262965013e-06, "loss": 0.2501029968261719, "step": 12414 }, { "epoch": 0.8391915641476274, "grad_norm": 1.0990755558013916, "learning_rate": 2.0090690564596394e-06, "loss": 0.2250518798828125, "step": 12415 }, { "epoch": 0.8392591591185615, "grad_norm": 1.3704628944396973, "learning_rate": 2.00741981520481e-06, "loss": 0.246856689453125, "step": 12416 }, { "epoch": 0.8393267540894958, "grad_norm": 1.7429893016815186, "learning_rate": 2.00577120261181e-06, "loss": 0.320281982421875, "step": 12417 }, { "epoch": 0.8393943490604299, "grad_norm": 1.2860561609268188, "learning_rate": 2.004123218760411e-06, "loss": 0.2548828125, "step": 12418 }, { "epoch": 0.8394619440313641, "grad_norm": 1.5381033420562744, "learning_rate": 2.002475863730348e-06, "loss": 0.252197265625, "step": 12419 }, { "epoch": 0.8395295390022982, "grad_norm": 0.8745687007904053, "learning_rate": 2.0008291376013367e-06, "loss": 0.194000244140625, "step": 12420 }, { "epoch": 0.8395971339732324, "grad_norm": 1.3467682600021362, "learning_rate": 1.999183040453051e-06, "loss": 0.26776123046875, "step": 12421 }, { "epoch": 0.8396647289441665, "grad_norm": 1.0118415355682373, "learning_rate": 1.9975375723651405e-06, "loss": 0.293701171875, "step": 12422 }, { "epoch": 0.8397323239151007, "grad_norm": 1.154473900794983, "learning_rate": 1.995892733417224e-06, "loss": 0.2435302734375, "step": 12423 }, { "epoch": 0.8397999188860349, "grad_norm": 0.7668124437332153, "learning_rate": 1.9942485236888883e-06, "loss": 0.15822601318359375, "step": 12424 }, { "epoch": 0.839867513856969, "grad_norm": 1.3643453121185303, "learning_rate": 1.9926049432596927e-06, "loss": 0.1627960205078125, "step": 12425 }, { "epoch": 0.8399351088279032, "grad_norm": 0.897088885307312, "learning_rate": 1.9909619922091577e-06, "loss": 0.213958740234375, "step": 12426 }, { "epoch": 0.8400027037988373, "grad_norm": 1.3259929418563843, "learning_rate": 1.98931967061679e-06, "loss": 0.2769775390625, "step": 12427 }, { "epoch": 0.8400702987697716, "grad_norm": 0.6441069841384888, "learning_rate": 1.9876779785620424e-06, "loss": 0.1153106689453125, "step": 12428 }, { "epoch": 0.8401378937407057, "grad_norm": 2.1209888458251953, "learning_rate": 1.986036916124362e-06, "loss": 0.2657470703125, "step": 12429 }, { "epoch": 0.8402054887116398, "grad_norm": 1.4408140182495117, "learning_rate": 1.984396483383148e-06, "loss": 0.180389404296875, "step": 12430 }, { "epoch": 0.840273083682574, "grad_norm": 1.8726738691329956, "learning_rate": 1.982756680417774e-06, "loss": 0.2047882080078125, "step": 12431 }, { "epoch": 0.8403406786535081, "grad_norm": 1.500587821006775, "learning_rate": 1.981117507307586e-06, "loss": 0.291717529296875, "step": 12432 }, { "epoch": 0.8404082736244424, "grad_norm": 1.1647416353225708, "learning_rate": 1.979478964131896e-06, "loss": 0.2935791015625, "step": 12433 }, { "epoch": 0.8404758685953765, "grad_norm": 1.448930025100708, "learning_rate": 1.9778410509699857e-06, "loss": 0.25885009765625, "step": 12434 }, { "epoch": 0.8405434635663107, "grad_norm": 1.1520652770996094, "learning_rate": 1.9762037679011148e-06, "loss": 0.2278289794921875, "step": 12435 }, { "epoch": 0.8406110585372448, "grad_norm": 1.079024076461792, "learning_rate": 1.9745671150044964e-06, "loss": 0.253692626953125, "step": 12436 }, { "epoch": 0.8406786535081789, "grad_norm": 0.9210001230239868, "learning_rate": 1.9729310923593257e-06, "loss": 0.160308837890625, "step": 12437 }, { "epoch": 0.8407462484791132, "grad_norm": 0.5615512728691101, "learning_rate": 1.971295700044763e-06, "loss": 0.08099365234375, "step": 12438 }, { "epoch": 0.8408138434500473, "grad_norm": 0.9384315609931946, "learning_rate": 1.9696609381399344e-06, "loss": 0.12779998779296875, "step": 12439 }, { "epoch": 0.8408814384209815, "grad_norm": 0.727977454662323, "learning_rate": 1.968026806723952e-06, "loss": 0.1297149658203125, "step": 12440 }, { "epoch": 0.8409490333919156, "grad_norm": 2.315340280532837, "learning_rate": 1.966393305875871e-06, "loss": 0.297088623046875, "step": 12441 }, { "epoch": 0.8410166283628498, "grad_norm": 1.476380467414856, "learning_rate": 1.9647604356747413e-06, "loss": 0.243011474609375, "step": 12442 }, { "epoch": 0.841084223333784, "grad_norm": 1.4412671327590942, "learning_rate": 1.963128196199566e-06, "loss": 0.298065185546875, "step": 12443 }, { "epoch": 0.8411518183047181, "grad_norm": 1.417003870010376, "learning_rate": 1.9614965875293246e-06, "loss": 0.2227783203125, "step": 12444 }, { "epoch": 0.8412194132756523, "grad_norm": 1.5260618925094604, "learning_rate": 1.959865609742963e-06, "loss": 0.2198486328125, "step": 12445 }, { "epoch": 0.8412870082465864, "grad_norm": 0.9165968894958496, "learning_rate": 1.9582352629194e-06, "loss": 0.223876953125, "step": 12446 }, { "epoch": 0.8413546032175206, "grad_norm": 1.0021165609359741, "learning_rate": 1.9566055471375195e-06, "loss": 0.25201416015625, "step": 12447 }, { "epoch": 0.8414221981884548, "grad_norm": 1.4655245542526245, "learning_rate": 1.9549764624761798e-06, "loss": 0.239593505859375, "step": 12448 }, { "epoch": 0.841489793159389, "grad_norm": 1.1539894342422485, "learning_rate": 1.9533480090142043e-06, "loss": 0.271728515625, "step": 12449 }, { "epoch": 0.8415573881303231, "grad_norm": 1.6415077447891235, "learning_rate": 1.9517201868303854e-06, "loss": 0.284027099609375, "step": 12450 }, { "epoch": 0.8416249831012572, "grad_norm": 1.4351624250411987, "learning_rate": 1.950092996003498e-06, "loss": 0.201080322265625, "step": 12451 }, { "epoch": 0.8416925780721914, "grad_norm": 1.3785151243209839, "learning_rate": 1.9484664366122594e-06, "loss": 0.26287841796875, "step": 12452 }, { "epoch": 0.8417601730431256, "grad_norm": 1.5114468336105347, "learning_rate": 1.9468405087353886e-06, "loss": 0.232757568359375, "step": 12453 }, { "epoch": 0.8418277680140598, "grad_norm": 1.5526256561279297, "learning_rate": 1.9452152124515437e-06, "loss": 0.24444580078125, "step": 12454 }, { "epoch": 0.8418953629849939, "grad_norm": 0.6208112835884094, "learning_rate": 1.9435905478393775e-06, "loss": 0.09686279296875, "step": 12455 }, { "epoch": 0.8419629579559281, "grad_norm": 1.5191930532455444, "learning_rate": 1.9419665149774977e-06, "loss": 0.23382568359375, "step": 12456 }, { "epoch": 0.8420305529268622, "grad_norm": 1.0421375036239624, "learning_rate": 1.9403431139444844e-06, "loss": 0.1901092529296875, "step": 12457 }, { "epoch": 0.8420981478977964, "grad_norm": 0.6150202751159668, "learning_rate": 1.938720344818889e-06, "loss": 0.0712890625, "step": 12458 }, { "epoch": 0.8421657428687306, "grad_norm": 1.3393701314926147, "learning_rate": 1.9370982076792302e-06, "loss": 0.233062744140625, "step": 12459 }, { "epoch": 0.8422333378396647, "grad_norm": 1.496093988418579, "learning_rate": 1.9354767026039975e-06, "loss": 0.28466796875, "step": 12460 }, { "epoch": 0.8423009328105989, "grad_norm": 1.644842267036438, "learning_rate": 1.9338558296716497e-06, "loss": 0.242401123046875, "step": 12461 }, { "epoch": 0.842368527781533, "grad_norm": 0.7391834855079651, "learning_rate": 1.9322355889606135e-06, "loss": 0.201873779296875, "step": 12462 }, { "epoch": 0.8424361227524673, "grad_norm": 2.027362823486328, "learning_rate": 1.9306159805492836e-06, "loss": 0.2533721923828125, "step": 12463 }, { "epoch": 0.8425037177234014, "grad_norm": 1.0609190464019775, "learning_rate": 1.928997004516038e-06, "loss": 0.1765899658203125, "step": 12464 }, { "epoch": 0.8425713126943355, "grad_norm": 1.1556612253189087, "learning_rate": 1.927378660939197e-06, "loss": 0.203155517578125, "step": 12465 }, { "epoch": 0.8426389076652697, "grad_norm": 1.0780715942382812, "learning_rate": 1.925760949897082e-06, "loss": 0.237213134765625, "step": 12466 }, { "epoch": 0.8427065026362038, "grad_norm": 0.9683095216751099, "learning_rate": 1.924143871467954e-06, "loss": 0.1348114013671875, "step": 12467 }, { "epoch": 0.8427740976071381, "grad_norm": 0.9251375794410706, "learning_rate": 1.922527425730065e-06, "loss": 0.1464996337890625, "step": 12468 }, { "epoch": 0.8428416925780722, "grad_norm": 1.0216152667999268, "learning_rate": 1.9209116127616265e-06, "loss": 0.2525634765625, "step": 12469 }, { "epoch": 0.8429092875490064, "grad_norm": 1.6094380617141724, "learning_rate": 1.9192964326408242e-06, "loss": 0.25689697265625, "step": 12470 }, { "epoch": 0.8429768825199405, "grad_norm": 1.5007753372192383, "learning_rate": 1.9176818854458084e-06, "loss": 0.290985107421875, "step": 12471 }, { "epoch": 0.8430444774908746, "grad_norm": 1.4414979219436646, "learning_rate": 1.9160679712547004e-06, "loss": 0.2325439453125, "step": 12472 }, { "epoch": 0.8431120724618089, "grad_norm": 1.4548296928405762, "learning_rate": 1.914454690145591e-06, "loss": 0.223419189453125, "step": 12473 }, { "epoch": 0.843179667432743, "grad_norm": 1.7015494108200073, "learning_rate": 1.912842042196542e-06, "loss": 0.287200927734375, "step": 12474 }, { "epoch": 0.8432472624036772, "grad_norm": 1.354111909866333, "learning_rate": 1.9112300274855856e-06, "loss": 0.165069580078125, "step": 12475 }, { "epoch": 0.8433148573746113, "grad_norm": 0.9139377474784851, "learning_rate": 1.9096186460907134e-06, "loss": 0.203460693359375, "step": 12476 }, { "epoch": 0.8433824523455455, "grad_norm": 0.8651631474494934, "learning_rate": 1.908007898089905e-06, "loss": 0.152130126953125, "step": 12477 }, { "epoch": 0.8434500473164797, "grad_norm": 1.178870677947998, "learning_rate": 1.9063977835610886e-06, "loss": 0.240570068359375, "step": 12478 }, { "epoch": 0.8435176422874138, "grad_norm": 1.2057163715362549, "learning_rate": 1.9047883025821777e-06, "loss": 0.187408447265625, "step": 12479 }, { "epoch": 0.843585237258348, "grad_norm": 1.4915775060653687, "learning_rate": 1.9031794552310456e-06, "loss": 0.257720947265625, "step": 12480 }, { "epoch": 0.8436528322292821, "grad_norm": 1.1514382362365723, "learning_rate": 1.9015712415855413e-06, "loss": 0.210693359375, "step": 12481 }, { "epoch": 0.8437204272002163, "grad_norm": 1.325024962425232, "learning_rate": 1.8999636617234828e-06, "loss": 0.2750244140625, "step": 12482 }, { "epoch": 0.8437880221711505, "grad_norm": 1.1348226070404053, "learning_rate": 1.8983567157226429e-06, "loss": 0.218414306640625, "step": 12483 }, { "epoch": 0.8438556171420847, "grad_norm": 0.867870569229126, "learning_rate": 1.8967504036607858e-06, "loss": 0.193450927734375, "step": 12484 }, { "epoch": 0.8439232121130188, "grad_norm": 0.9700995683670044, "learning_rate": 1.8951447256156329e-06, "loss": 0.2093505859375, "step": 12485 }, { "epoch": 0.8439908070839529, "grad_norm": 1.348793387413025, "learning_rate": 1.893539681664877e-06, "loss": 0.222869873046875, "step": 12486 }, { "epoch": 0.8440584020548871, "grad_norm": 1.3895233869552612, "learning_rate": 1.8919352718861794e-06, "loss": 0.206146240234375, "step": 12487 }, { "epoch": 0.8441259970258213, "grad_norm": 1.9587006568908691, "learning_rate": 1.890331496357171e-06, "loss": 0.280517578125, "step": 12488 }, { "epoch": 0.8441935919967555, "grad_norm": 1.3046238422393799, "learning_rate": 1.88872835515545e-06, "loss": 0.2755126953125, "step": 12489 }, { "epoch": 0.8442611869676896, "grad_norm": 2.638582944869995, "learning_rate": 1.8871258483585963e-06, "loss": 0.2029571533203125, "step": 12490 }, { "epoch": 0.8443287819386238, "grad_norm": 1.1895445585250854, "learning_rate": 1.8855239760441356e-06, "loss": 0.2139739990234375, "step": 12491 }, { "epoch": 0.8443963769095579, "grad_norm": 1.444629430770874, "learning_rate": 1.8839227382895847e-06, "loss": 0.26043701171875, "step": 12492 }, { "epoch": 0.844463971880492, "grad_norm": 1.5432761907577515, "learning_rate": 1.8823221351724247e-06, "loss": 0.269561767578125, "step": 12493 }, { "epoch": 0.8445315668514263, "grad_norm": 1.1039822101593018, "learning_rate": 1.8807221667700902e-06, "loss": 0.20720672607421875, "step": 12494 }, { "epoch": 0.8445991618223604, "grad_norm": 1.778845191001892, "learning_rate": 1.8791228331600097e-06, "loss": 0.256591796875, "step": 12495 }, { "epoch": 0.8446667567932946, "grad_norm": 1.8268203735351562, "learning_rate": 1.8775241344195626e-06, "loss": 0.3209228515625, "step": 12496 }, { "epoch": 0.8447343517642287, "grad_norm": 0.9255513548851013, "learning_rate": 1.8759260706261073e-06, "loss": 0.199676513671875, "step": 12497 }, { "epoch": 0.844801946735163, "grad_norm": 1.161102294921875, "learning_rate": 1.8743286418569649e-06, "loss": 0.20379638671875, "step": 12498 }, { "epoch": 0.8448695417060971, "grad_norm": 1.205933690071106, "learning_rate": 1.8727318481894323e-06, "loss": 0.21142578125, "step": 12499 }, { "epoch": 0.8449371366770312, "grad_norm": 1.4407238960266113, "learning_rate": 1.8711356897007691e-06, "loss": 0.258026123046875, "step": 12500 }, { "epoch": 0.8450047316479654, "grad_norm": 1.273140549659729, "learning_rate": 1.8695401664682088e-06, "loss": 0.245086669921875, "step": 12501 }, { "epoch": 0.8450723266188995, "grad_norm": 2.164881944656372, "learning_rate": 1.867945278568951e-06, "loss": 0.252960205078125, "step": 12502 }, { "epoch": 0.8451399215898338, "grad_norm": 1.4819583892822266, "learning_rate": 1.8663510260801726e-06, "loss": 0.246185302734375, "step": 12503 }, { "epoch": 0.8452075165607679, "grad_norm": 2.32966685295105, "learning_rate": 1.864757409079003e-06, "loss": 0.2225341796875, "step": 12504 }, { "epoch": 0.8452751115317021, "grad_norm": 0.64895099401474, "learning_rate": 1.8631644276425613e-06, "loss": 0.134521484375, "step": 12505 }, { "epoch": 0.8453427065026362, "grad_norm": 1.1667141914367676, "learning_rate": 1.8615720818479238e-06, "loss": 0.19915771484375, "step": 12506 }, { "epoch": 0.8454103014735703, "grad_norm": 0.7913764119148254, "learning_rate": 1.8599803717721303e-06, "loss": 0.157073974609375, "step": 12507 }, { "epoch": 0.8454778964445046, "grad_norm": 0.9149518609046936, "learning_rate": 1.8583892974922063e-06, "loss": 0.201751708984375, "step": 12508 }, { "epoch": 0.8455454914154387, "grad_norm": 1.297123908996582, "learning_rate": 1.8567988590851348e-06, "loss": 0.262298583984375, "step": 12509 }, { "epoch": 0.8456130863863729, "grad_norm": 1.5791934728622437, "learning_rate": 1.8552090566278713e-06, "loss": 0.3310546875, "step": 12510 }, { "epoch": 0.845680681357307, "grad_norm": 1.2014073133468628, "learning_rate": 1.8536198901973388e-06, "loss": 0.22833251953125, "step": 12511 }, { "epoch": 0.8457482763282412, "grad_norm": 0.8228884339332581, "learning_rate": 1.8520313598704347e-06, "loss": 0.1623382568359375, "step": 12512 }, { "epoch": 0.8458158712991753, "grad_norm": 1.006777048110962, "learning_rate": 1.850443465724019e-06, "loss": 0.233184814453125, "step": 12513 }, { "epoch": 0.8458834662701095, "grad_norm": 1.1267229318618774, "learning_rate": 1.848856207834923e-06, "loss": 0.208221435546875, "step": 12514 }, { "epoch": 0.8459510612410437, "grad_norm": 1.3677269220352173, "learning_rate": 1.8472695862799493e-06, "loss": 0.23004150390625, "step": 12515 }, { "epoch": 0.8460186562119778, "grad_norm": 1.3242089748382568, "learning_rate": 1.845683601135873e-06, "loss": 0.208892822265625, "step": 12516 }, { "epoch": 0.846086251182912, "grad_norm": 0.7920706868171692, "learning_rate": 1.8440982524794275e-06, "loss": 0.10233306884765625, "step": 12517 }, { "epoch": 0.8461538461538461, "grad_norm": 1.014853596687317, "learning_rate": 1.8425135403873216e-06, "loss": 0.16754150390625, "step": 12518 }, { "epoch": 0.8462214411247804, "grad_norm": 2.135143756866455, "learning_rate": 1.8409294649362424e-06, "loss": 0.28643798828125, "step": 12519 }, { "epoch": 0.8462890360957145, "grad_norm": 2.395414352416992, "learning_rate": 1.839346026202825e-06, "loss": 0.264984130859375, "step": 12520 }, { "epoch": 0.8463566310666486, "grad_norm": 1.0412214994430542, "learning_rate": 1.8377632242636932e-06, "loss": 0.2264862060546875, "step": 12521 }, { "epoch": 0.8464242260375828, "grad_norm": 1.2450320720672607, "learning_rate": 1.8361810591954343e-06, "loss": 0.26031494140625, "step": 12522 }, { "epoch": 0.8464918210085169, "grad_norm": 1.225406527519226, "learning_rate": 1.8345995310746e-06, "loss": 0.2871856689453125, "step": 12523 }, { "epoch": 0.8465594159794512, "grad_norm": 1.4097042083740234, "learning_rate": 1.8330186399777143e-06, "loss": 0.306396484375, "step": 12524 }, { "epoch": 0.8466270109503853, "grad_norm": 1.4807894229888916, "learning_rate": 1.8314383859812711e-06, "loss": 0.22088623046875, "step": 12525 }, { "epoch": 0.8466946059213194, "grad_norm": 0.5558255314826965, "learning_rate": 1.8298587691617309e-06, "loss": 0.08489990234375, "step": 12526 }, { "epoch": 0.8467622008922536, "grad_norm": 1.767174482345581, "learning_rate": 1.8282797895955338e-06, "loss": 0.19493865966796875, "step": 12527 }, { "epoch": 0.8468297958631877, "grad_norm": 1.5472865104675293, "learning_rate": 1.826701447359071e-06, "loss": 0.23236083984375, "step": 12528 }, { "epoch": 0.846897390834122, "grad_norm": 0.7099654674530029, "learning_rate": 1.8251237425287176e-06, "loss": 0.1103973388671875, "step": 12529 }, { "epoch": 0.8469649858050561, "grad_norm": 1.5748997926712036, "learning_rate": 1.8235466751808093e-06, "loss": 0.2554931640625, "step": 12530 }, { "epoch": 0.8470325807759903, "grad_norm": 1.240138292312622, "learning_rate": 1.8219702453916537e-06, "loss": 0.15990447998046875, "step": 12531 }, { "epoch": 0.8471001757469244, "grad_norm": 1.3020522594451904, "learning_rate": 1.8203944532375378e-06, "loss": 0.3355712890625, "step": 12532 }, { "epoch": 0.8471677707178585, "grad_norm": 1.2680617570877075, "learning_rate": 1.818819298794694e-06, "loss": 0.182586669921875, "step": 12533 }, { "epoch": 0.8472353656887928, "grad_norm": 1.1926038265228271, "learning_rate": 1.8172447821393496e-06, "loss": 0.25848388671875, "step": 12534 }, { "epoch": 0.8473029606597269, "grad_norm": 0.7032752633094788, "learning_rate": 1.8156709033476855e-06, "loss": 0.1495513916015625, "step": 12535 }, { "epoch": 0.8473705556306611, "grad_norm": 0.674426257610321, "learning_rate": 1.8140976624958538e-06, "loss": 0.1218414306640625, "step": 12536 }, { "epoch": 0.8474381506015952, "grad_norm": 1.0411126613616943, "learning_rate": 1.8125250596599807e-06, "loss": 0.19387054443359375, "step": 12537 }, { "epoch": 0.8475057455725294, "grad_norm": 1.476836919784546, "learning_rate": 1.8109530949161585e-06, "loss": 0.235595703125, "step": 12538 }, { "epoch": 0.8475733405434636, "grad_norm": 1.4017879962921143, "learning_rate": 1.8093817683404428e-06, "loss": 0.1586761474609375, "step": 12539 }, { "epoch": 0.8476409355143977, "grad_norm": 1.6454246044158936, "learning_rate": 1.8078110800088748e-06, "loss": 0.1764678955078125, "step": 12540 }, { "epoch": 0.8477085304853319, "grad_norm": 1.0102101564407349, "learning_rate": 1.8062410299974453e-06, "loss": 0.24468994140625, "step": 12541 }, { "epoch": 0.847776125456266, "grad_norm": 1.0496675968170166, "learning_rate": 1.8046716183821233e-06, "loss": 0.1946258544921875, "step": 12542 }, { "epoch": 0.8478437204272002, "grad_norm": 1.1118345260620117, "learning_rate": 1.8031028452388548e-06, "loss": 0.272247314453125, "step": 12543 }, { "epoch": 0.8479113153981344, "grad_norm": 0.7632459402084351, "learning_rate": 1.8015347106435343e-06, "loss": 0.16820144653320312, "step": 12544 }, { "epoch": 0.8479789103690686, "grad_norm": 2.1816420555114746, "learning_rate": 1.7999672146720525e-06, "loss": 0.32330322265625, "step": 12545 }, { "epoch": 0.8480465053400027, "grad_norm": 1.3105436563491821, "learning_rate": 1.7984003574002406e-06, "loss": 0.200958251953125, "step": 12546 }, { "epoch": 0.8481141003109368, "grad_norm": 1.2296643257141113, "learning_rate": 1.7968341389039212e-06, "loss": 0.1916961669921875, "step": 12547 }, { "epoch": 0.848181695281871, "grad_norm": 1.4148690700531006, "learning_rate": 1.795268559258877e-06, "loss": 0.19598388671875, "step": 12548 }, { "epoch": 0.8482492902528052, "grad_norm": 1.769376516342163, "learning_rate": 1.793703618540859e-06, "loss": 0.28253173828125, "step": 12549 }, { "epoch": 0.8483168852237394, "grad_norm": 1.4724849462509155, "learning_rate": 1.7921393168255885e-06, "loss": 0.267974853515625, "step": 12550 }, { "epoch": 0.8483844801946735, "grad_norm": 1.7983945608139038, "learning_rate": 1.7905756541887563e-06, "loss": 0.2357635498046875, "step": 12551 }, { "epoch": 0.8484520751656077, "grad_norm": 1.0103394985198975, "learning_rate": 1.789012630706024e-06, "loss": 0.2420654296875, "step": 12552 }, { "epoch": 0.8485196701365418, "grad_norm": 1.4259202480316162, "learning_rate": 1.7874502464530173e-06, "loss": 0.23095703125, "step": 12553 }, { "epoch": 0.848587265107476, "grad_norm": 0.8815598487854004, "learning_rate": 1.785888501505336e-06, "loss": 0.14740467071533203, "step": 12554 }, { "epoch": 0.8486548600784102, "grad_norm": 1.5088341236114502, "learning_rate": 1.7843273959385449e-06, "loss": 0.25994873046875, "step": 12555 }, { "epoch": 0.8487224550493443, "grad_norm": 0.8002213835716248, "learning_rate": 1.7827669298281861e-06, "loss": 0.2192535400390625, "step": 12556 }, { "epoch": 0.8487900500202785, "grad_norm": 0.7516466379165649, "learning_rate": 1.781207103249755e-06, "loss": 0.1922607421875, "step": 12557 }, { "epoch": 0.8488576449912126, "grad_norm": 1.179343819618225, "learning_rate": 1.7796479162787376e-06, "loss": 0.258514404296875, "step": 12558 }, { "epoch": 0.8489252399621469, "grad_norm": 1.98817777633667, "learning_rate": 1.7780893689905637e-06, "loss": 0.324432373046875, "step": 12559 }, { "epoch": 0.848992834933081, "grad_norm": 1.4585182666778564, "learning_rate": 1.7765314614606558e-06, "loss": 0.303741455078125, "step": 12560 }, { "epoch": 0.8490604299040151, "grad_norm": 0.6739336848258972, "learning_rate": 1.7749741937643926e-06, "loss": 0.07679176330566406, "step": 12561 }, { "epoch": 0.8491280248749493, "grad_norm": 1.797288179397583, "learning_rate": 1.7734175659771219e-06, "loss": 0.27545166015625, "step": 12562 }, { "epoch": 0.8491956198458834, "grad_norm": 1.500777006149292, "learning_rate": 1.771861578174165e-06, "loss": 0.2239990234375, "step": 12563 }, { "epoch": 0.8492632148168177, "grad_norm": 1.5372191667556763, "learning_rate": 1.7703062304308098e-06, "loss": 0.2078857421875, "step": 12564 }, { "epoch": 0.8493308097877518, "grad_norm": 1.5895745754241943, "learning_rate": 1.7687515228223134e-06, "loss": 0.172943115234375, "step": 12565 }, { "epoch": 0.849398404758686, "grad_norm": 1.5187252759933472, "learning_rate": 1.7671974554239033e-06, "loss": 0.248321533203125, "step": 12566 }, { "epoch": 0.8494659997296201, "grad_norm": 1.3256009817123413, "learning_rate": 1.765644028310775e-06, "loss": 0.2164306640625, "step": 12567 }, { "epoch": 0.8495335947005542, "grad_norm": 0.9776184558868408, "learning_rate": 1.7640912415580878e-06, "loss": 0.242034912109375, "step": 12568 }, { "epoch": 0.8496011896714885, "grad_norm": 1.0889283418655396, "learning_rate": 1.7625390952409854e-06, "loss": 0.1434478759765625, "step": 12569 }, { "epoch": 0.8496687846424226, "grad_norm": 1.1080169677734375, "learning_rate": 1.760987589434559e-06, "loss": 0.20538330078125, "step": 12570 }, { "epoch": 0.8497363796133568, "grad_norm": 0.8435669541358948, "learning_rate": 1.7594367242138886e-06, "loss": 0.1589508056640625, "step": 12571 }, { "epoch": 0.8498039745842909, "grad_norm": 1.3890612125396729, "learning_rate": 1.7578864996540128e-06, "loss": 0.2469329833984375, "step": 12572 }, { "epoch": 0.8498715695552251, "grad_norm": 1.0831550359725952, "learning_rate": 1.7563369158299381e-06, "loss": 0.213623046875, "step": 12573 }, { "epoch": 0.8499391645261593, "grad_norm": 1.7316581010818481, "learning_rate": 1.754787972816646e-06, "loss": 0.265380859375, "step": 12574 }, { "epoch": 0.8500067594970934, "grad_norm": 1.3942768573760986, "learning_rate": 1.753239670689082e-06, "loss": 0.21075439453125, "step": 12575 }, { "epoch": 0.8500743544680276, "grad_norm": 1.0540801286697388, "learning_rate": 1.7516920095221639e-06, "loss": 0.213409423828125, "step": 12576 }, { "epoch": 0.8501419494389617, "grad_norm": 1.4996252059936523, "learning_rate": 1.7501449893907772e-06, "loss": 0.290771484375, "step": 12577 }, { "epoch": 0.8502095444098959, "grad_norm": 1.4173994064331055, "learning_rate": 1.7485986103697753e-06, "loss": 0.205657958984375, "step": 12578 }, { "epoch": 0.85027713938083, "grad_norm": 1.4227098226547241, "learning_rate": 1.7470528725339818e-06, "loss": 0.282196044921875, "step": 12579 }, { "epoch": 0.8503447343517643, "grad_norm": 1.1057322025299072, "learning_rate": 1.74550777595819e-06, "loss": 0.224029541015625, "step": 12580 }, { "epoch": 0.8504123293226984, "grad_norm": 1.6719510555267334, "learning_rate": 1.7439633207171585e-06, "loss": 0.20599365234375, "step": 12581 }, { "epoch": 0.8504799242936325, "grad_norm": 1.323325753211975, "learning_rate": 1.742419506885624e-06, "loss": 0.23613739013671875, "step": 12582 }, { "epoch": 0.8505475192645667, "grad_norm": 1.2899291515350342, "learning_rate": 1.7408763345382772e-06, "loss": 0.1943511962890625, "step": 12583 }, { "epoch": 0.8506151142355008, "grad_norm": 1.8174593448638916, "learning_rate": 1.7393338037497948e-06, "loss": 0.308837890625, "step": 12584 }, { "epoch": 0.8506827092064351, "grad_norm": 1.485438585281372, "learning_rate": 1.7377919145948086e-06, "loss": 0.180938720703125, "step": 12585 }, { "epoch": 0.8507503041773692, "grad_norm": 1.8025261163711548, "learning_rate": 1.7362506671479272e-06, "loss": 0.343963623046875, "step": 12586 }, { "epoch": 0.8508178991483034, "grad_norm": 1.0160881280899048, "learning_rate": 1.7347100614837247e-06, "loss": 0.1866302490234375, "step": 12587 }, { "epoch": 0.8508854941192375, "grad_norm": 1.569461703300476, "learning_rate": 1.7331700976767444e-06, "loss": 0.2123260498046875, "step": 12588 }, { "epoch": 0.8509530890901716, "grad_norm": 1.4267176389694214, "learning_rate": 1.7316307758015022e-06, "loss": 0.251495361328125, "step": 12589 }, { "epoch": 0.8510206840611059, "grad_norm": 1.414119839668274, "learning_rate": 1.730092095932476e-06, "loss": 0.22760009765625, "step": 12590 }, { "epoch": 0.85108827903204, "grad_norm": 1.580783486366272, "learning_rate": 1.7285540581441206e-06, "loss": 0.2396240234375, "step": 12591 }, { "epoch": 0.8511558740029742, "grad_norm": 0.7496551871299744, "learning_rate": 1.727016662510854e-06, "loss": 0.151519775390625, "step": 12592 }, { "epoch": 0.8512234689739083, "grad_norm": 1.6835949420928955, "learning_rate": 1.7254799091070638e-06, "loss": 0.232177734375, "step": 12593 }, { "epoch": 0.8512910639448426, "grad_norm": 1.2950762510299683, "learning_rate": 1.723943798007107e-06, "loss": 0.28338623046875, "step": 12594 }, { "epoch": 0.8513586589157767, "grad_norm": 0.8761285543441772, "learning_rate": 1.7224083292853176e-06, "loss": 0.17550897598266602, "step": 12595 }, { "epoch": 0.8514262538867108, "grad_norm": 1.2825180292129517, "learning_rate": 1.7208735030159812e-06, "loss": 0.230865478515625, "step": 12596 }, { "epoch": 0.851493848857645, "grad_norm": 1.4783743619918823, "learning_rate": 1.719339319273368e-06, "loss": 0.2276611328125, "step": 12597 }, { "epoch": 0.8515614438285791, "grad_norm": 1.2981247901916504, "learning_rate": 1.7178057781317141e-06, "loss": 0.24859619140625, "step": 12598 }, { "epoch": 0.8516290387995133, "grad_norm": 1.1765515804290771, "learning_rate": 1.7162728796652099e-06, "loss": 0.24407958984375, "step": 12599 }, { "epoch": 0.8516966337704475, "grad_norm": 0.8063964247703552, "learning_rate": 1.7147406239480395e-06, "loss": 0.144561767578125, "step": 12600 }, { "epoch": 0.8517642287413817, "grad_norm": 1.3650968074798584, "learning_rate": 1.7132090110543369e-06, "loss": 0.266876220703125, "step": 12601 }, { "epoch": 0.8518318237123158, "grad_norm": 0.9794127941131592, "learning_rate": 1.711678041058213e-06, "loss": 0.197662353515625, "step": 12602 }, { "epoch": 0.8518994186832499, "grad_norm": 1.2242525815963745, "learning_rate": 1.7101477140337428e-06, "loss": 0.2874755859375, "step": 12603 }, { "epoch": 0.8519670136541841, "grad_norm": 1.1831353902816772, "learning_rate": 1.7086180300549765e-06, "loss": 0.23980712890625, "step": 12604 }, { "epoch": 0.8520346086251183, "grad_norm": 1.1405197381973267, "learning_rate": 1.7070889891959273e-06, "loss": 0.2855224609375, "step": 12605 }, { "epoch": 0.8521022035960525, "grad_norm": 0.8701456785202026, "learning_rate": 1.7055605915305816e-06, "loss": 0.145660400390625, "step": 12606 }, { "epoch": 0.8521697985669866, "grad_norm": 2.094264507293701, "learning_rate": 1.7040328371328867e-06, "loss": 0.22601318359375, "step": 12607 }, { "epoch": 0.8522373935379208, "grad_norm": 1.1622837781906128, "learning_rate": 1.7025057260767785e-06, "loss": 0.21417236328125, "step": 12608 }, { "epoch": 0.8523049885088549, "grad_norm": 1.5528212785720825, "learning_rate": 1.7009792584361312e-06, "loss": 0.264495849609375, "step": 12609 }, { "epoch": 0.8523725834797891, "grad_norm": 2.014910936355591, "learning_rate": 1.6994534342848172e-06, "loss": 0.32952880859375, "step": 12610 }, { "epoch": 0.8524401784507233, "grad_norm": 1.4380687475204468, "learning_rate": 1.6979282536966645e-06, "loss": 0.253204345703125, "step": 12611 }, { "epoch": 0.8525077734216574, "grad_norm": 1.6747915744781494, "learning_rate": 1.6964037167454605e-06, "loss": 0.28759765625, "step": 12612 }, { "epoch": 0.8525753683925916, "grad_norm": 1.867611289024353, "learning_rate": 1.6948798235049829e-06, "loss": 0.328948974609375, "step": 12613 }, { "epoch": 0.8526429633635257, "grad_norm": 1.3943331241607666, "learning_rate": 1.6933565740489625e-06, "loss": 0.206695556640625, "step": 12614 }, { "epoch": 0.85271055833446, "grad_norm": 0.809726357460022, "learning_rate": 1.6918339684511041e-06, "loss": 0.1656494140625, "step": 12615 }, { "epoch": 0.8527781533053941, "grad_norm": 0.9700350165367126, "learning_rate": 1.6903120067850819e-06, "loss": 0.22354888916015625, "step": 12616 }, { "epoch": 0.8528457482763282, "grad_norm": 1.5079351663589478, "learning_rate": 1.6887906891245353e-06, "loss": 0.3040771484375, "step": 12617 }, { "epoch": 0.8529133432472624, "grad_norm": 1.3219259977340698, "learning_rate": 1.6872700155430753e-06, "loss": 0.2738037109375, "step": 12618 }, { "epoch": 0.8529809382181965, "grad_norm": 1.691559076309204, "learning_rate": 1.6857499861142883e-06, "loss": 0.19477462768554688, "step": 12619 }, { "epoch": 0.8530485331891308, "grad_norm": 1.2451653480529785, "learning_rate": 1.6842306009117104e-06, "loss": 0.225830078125, "step": 12620 }, { "epoch": 0.8531161281600649, "grad_norm": 2.652775526046753, "learning_rate": 1.6827118600088725e-06, "loss": 0.3271484375, "step": 12621 }, { "epoch": 0.8531837231309991, "grad_norm": 1.4372854232788086, "learning_rate": 1.6811937634792513e-06, "loss": 0.265228271484375, "step": 12622 }, { "epoch": 0.8532513181019332, "grad_norm": 2.5812931060791016, "learning_rate": 1.6796763113963027e-06, "loss": 0.302337646484375, "step": 12623 }, { "epoch": 0.8533189130728673, "grad_norm": 1.9946457147598267, "learning_rate": 1.6781595038334563e-06, "loss": 0.25433349609375, "step": 12624 }, { "epoch": 0.8533865080438016, "grad_norm": 1.7394636869430542, "learning_rate": 1.676643340864097e-06, "loss": 0.299163818359375, "step": 12625 }, { "epoch": 0.8534541030147357, "grad_norm": 1.3230873346328735, "learning_rate": 1.6751278225615907e-06, "loss": 0.2160186767578125, "step": 12626 }, { "epoch": 0.8535216979856699, "grad_norm": 1.3595306873321533, "learning_rate": 1.6736129489992673e-06, "loss": 0.280242919921875, "step": 12627 }, { "epoch": 0.853589292956604, "grad_norm": 1.522841453552246, "learning_rate": 1.6720987202504268e-06, "loss": 0.3155364990234375, "step": 12628 }, { "epoch": 0.8536568879275382, "grad_norm": 0.5976607203483582, "learning_rate": 1.6705851363883352e-06, "loss": 0.10091400146484375, "step": 12629 }, { "epoch": 0.8537244828984724, "grad_norm": 1.4089792966842651, "learning_rate": 1.6690721974862288e-06, "loss": 0.26263427734375, "step": 12630 }, { "epoch": 0.8537920778694065, "grad_norm": 1.8662598133087158, "learning_rate": 1.6675599036173094e-06, "loss": 0.285308837890625, "step": 12631 }, { "epoch": 0.8538596728403407, "grad_norm": 1.2345184087753296, "learning_rate": 1.6660482548547633e-06, "loss": 0.279693603515625, "step": 12632 }, { "epoch": 0.8539272678112748, "grad_norm": 1.3641254901885986, "learning_rate": 1.6645372512717223e-06, "loss": 0.2384033203125, "step": 12633 }, { "epoch": 0.853994862782209, "grad_norm": 0.6852747201919556, "learning_rate": 1.663026892941299e-06, "loss": 0.116302490234375, "step": 12634 }, { "epoch": 0.8540624577531432, "grad_norm": 1.6220146417617798, "learning_rate": 1.6615171799365803e-06, "loss": 0.27685546875, "step": 12635 }, { "epoch": 0.8541300527240774, "grad_norm": 1.1624187231063843, "learning_rate": 1.6600081123306077e-06, "loss": 0.240081787109375, "step": 12636 }, { "epoch": 0.8541976476950115, "grad_norm": 1.6620904207229614, "learning_rate": 1.6584996901964077e-06, "loss": 0.33282470703125, "step": 12637 }, { "epoch": 0.8542652426659456, "grad_norm": 1.8037928342819214, "learning_rate": 1.6569919136069573e-06, "loss": 0.2531585693359375, "step": 12638 }, { "epoch": 0.8543328376368798, "grad_norm": 1.1289423704147339, "learning_rate": 1.655484782635221e-06, "loss": 0.219390869140625, "step": 12639 }, { "epoch": 0.854400432607814, "grad_norm": 2.2308883666992188, "learning_rate": 1.6539782973541173e-06, "loss": 0.3115234375, "step": 12640 }, { "epoch": 0.8544680275787482, "grad_norm": 1.9524545669555664, "learning_rate": 1.652472457836543e-06, "loss": 0.315338134765625, "step": 12641 }, { "epoch": 0.8545356225496823, "grad_norm": 1.6175788640975952, "learning_rate": 1.6509672641553596e-06, "loss": 0.2314910888671875, "step": 12642 }, { "epoch": 0.8546032175206165, "grad_norm": 1.2090717554092407, "learning_rate": 1.6494627163833958e-06, "loss": 0.20623779296875, "step": 12643 }, { "epoch": 0.8546708124915506, "grad_norm": 1.3648484945297241, "learning_rate": 1.6479588145934483e-06, "loss": 0.18373489379882812, "step": 12644 }, { "epoch": 0.8547384074624848, "grad_norm": 1.5997602939605713, "learning_rate": 1.6464555588582953e-06, "loss": 0.307861328125, "step": 12645 }, { "epoch": 0.854806002433419, "grad_norm": 1.241513729095459, "learning_rate": 1.6449529492506655e-06, "loss": 0.30615234375, "step": 12646 }, { "epoch": 0.8548735974043531, "grad_norm": 1.262593388557434, "learning_rate": 1.6434509858432607e-06, "loss": 0.259979248046875, "step": 12647 }, { "epoch": 0.8549411923752873, "grad_norm": 1.0079749822616577, "learning_rate": 1.6419496687087676e-06, "loss": 0.189117431640625, "step": 12648 }, { "epoch": 0.8550087873462214, "grad_norm": 1.3934924602508545, "learning_rate": 1.6404489979198168e-06, "loss": 0.2458953857421875, "step": 12649 }, { "epoch": 0.8550763823171557, "grad_norm": 1.2812116146087646, "learning_rate": 1.6389489735490315e-06, "loss": 0.221343994140625, "step": 12650 }, { "epoch": 0.8551439772880898, "grad_norm": 1.27891206741333, "learning_rate": 1.6374495956689805e-06, "loss": 0.21246337890625, "step": 12651 }, { "epoch": 0.8552115722590239, "grad_norm": 1.092752456665039, "learning_rate": 1.6359508643522225e-06, "loss": 0.23590087890625, "step": 12652 }, { "epoch": 0.8552791672299581, "grad_norm": 1.5558125972747803, "learning_rate": 1.634452779671271e-06, "loss": 0.28839111328125, "step": 12653 }, { "epoch": 0.8553467622008922, "grad_norm": 1.369524598121643, "learning_rate": 1.6329553416986133e-06, "loss": 0.2365875244140625, "step": 12654 }, { "epoch": 0.8554143571718265, "grad_norm": 1.0253576040267944, "learning_rate": 1.6314585505067042e-06, "loss": 0.1739044189453125, "step": 12655 }, { "epoch": 0.8554819521427606, "grad_norm": 1.216259241104126, "learning_rate": 1.6299624061679697e-06, "loss": 0.2577667236328125, "step": 12656 }, { "epoch": 0.8555495471136947, "grad_norm": 1.08980131149292, "learning_rate": 1.6284669087548016e-06, "loss": 0.24945068359375, "step": 12657 }, { "epoch": 0.8556171420846289, "grad_norm": 1.5391227006912231, "learning_rate": 1.6269720583395604e-06, "loss": 0.266510009765625, "step": 12658 }, { "epoch": 0.855684737055563, "grad_norm": 1.6048532724380493, "learning_rate": 1.6254778549945764e-06, "loss": 0.20770263671875, "step": 12659 }, { "epoch": 0.8557523320264973, "grad_norm": 1.3279836177825928, "learning_rate": 1.6239842987921472e-06, "loss": 0.182769775390625, "step": 12660 }, { "epoch": 0.8558199269974314, "grad_norm": 1.3352726697921753, "learning_rate": 1.6224913898045478e-06, "loss": 0.27532958984375, "step": 12661 }, { "epoch": 0.8558875219683656, "grad_norm": 1.5347834825515747, "learning_rate": 1.6209991281040026e-06, "loss": 0.2320098876953125, "step": 12662 }, { "epoch": 0.8559551169392997, "grad_norm": 1.142773151397705, "learning_rate": 1.6195075137627252e-06, "loss": 0.265960693359375, "step": 12663 }, { "epoch": 0.8560227119102338, "grad_norm": 2.6178200244903564, "learning_rate": 1.618016546852888e-06, "loss": 0.267822265625, "step": 12664 }, { "epoch": 0.856090306881168, "grad_norm": 1.1845064163208008, "learning_rate": 1.6165262274466314e-06, "loss": 0.13633346557617188, "step": 12665 }, { "epoch": 0.8561579018521022, "grad_norm": 0.8707521557807922, "learning_rate": 1.6150365556160663e-06, "loss": 0.1885986328125, "step": 12666 }, { "epoch": 0.8562254968230364, "grad_norm": 1.2588540315628052, "learning_rate": 1.613547531433273e-06, "loss": 0.281768798828125, "step": 12667 }, { "epoch": 0.8562930917939705, "grad_norm": 1.1244783401489258, "learning_rate": 1.6120591549703013e-06, "loss": 0.24835205078125, "step": 12668 }, { "epoch": 0.8563606867649047, "grad_norm": 1.6242481470108032, "learning_rate": 1.6105714262991644e-06, "loss": 0.224273681640625, "step": 12669 }, { "epoch": 0.8564282817358388, "grad_norm": 3.763955593109131, "learning_rate": 1.6090843454918503e-06, "loss": 0.338958740234375, "step": 12670 }, { "epoch": 0.856495876706773, "grad_norm": 1.3394509553909302, "learning_rate": 1.6075979126203128e-06, "loss": 0.221771240234375, "step": 12671 }, { "epoch": 0.8565634716777072, "grad_norm": 1.5497190952301025, "learning_rate": 1.6061121277564743e-06, "loss": 0.26345062255859375, "step": 12672 }, { "epoch": 0.8566310666486413, "grad_norm": 1.169639229774475, "learning_rate": 1.6046269909722243e-06, "loss": 0.210479736328125, "step": 12673 }, { "epoch": 0.8566986616195755, "grad_norm": 1.509725570678711, "learning_rate": 1.603142502339432e-06, "loss": 0.26239013671875, "step": 12674 }, { "epoch": 0.8567662565905096, "grad_norm": 1.5922770500183105, "learning_rate": 1.601658661929913e-06, "loss": 0.28070068359375, "step": 12675 }, { "epoch": 0.8568338515614439, "grad_norm": 2.6277830600738525, "learning_rate": 1.600175469815473e-06, "loss": 0.283935546875, "step": 12676 }, { "epoch": 0.856901446532378, "grad_norm": 2.090057849884033, "learning_rate": 1.5986929260678785e-06, "loss": 0.283966064453125, "step": 12677 }, { "epoch": 0.8569690415033121, "grad_norm": 1.483644962310791, "learning_rate": 1.5972110307588616e-06, "loss": 0.225372314453125, "step": 12678 }, { "epoch": 0.8570366364742463, "grad_norm": 2.1022915840148926, "learning_rate": 1.5957297839601254e-06, "loss": 0.216278076171875, "step": 12679 }, { "epoch": 0.8571042314451804, "grad_norm": 1.0165191888809204, "learning_rate": 1.5942491857433423e-06, "loss": 0.17346954345703125, "step": 12680 }, { "epoch": 0.8571718264161147, "grad_norm": 2.3847386837005615, "learning_rate": 1.5927692361801533e-06, "loss": 0.3955078125, "step": 12681 }, { "epoch": 0.8572394213870488, "grad_norm": 1.172465205192566, "learning_rate": 1.5912899353421678e-06, "loss": 0.1637420654296875, "step": 12682 }, { "epoch": 0.857307016357983, "grad_norm": 1.1308916807174683, "learning_rate": 1.5898112833009637e-06, "loss": 0.22625732421875, "step": 12683 }, { "epoch": 0.8573746113289171, "grad_norm": 1.2437140941619873, "learning_rate": 1.5883332801280853e-06, "loss": 0.2194366455078125, "step": 12684 }, { "epoch": 0.8574422062998512, "grad_norm": 1.1529836654663086, "learning_rate": 1.5868559258950505e-06, "loss": 0.1614837646484375, "step": 12685 }, { "epoch": 0.8575098012707855, "grad_norm": 0.9761092066764832, "learning_rate": 1.5853792206733369e-06, "loss": 0.19866943359375, "step": 12686 }, { "epoch": 0.8575773962417196, "grad_norm": 2.2270209789276123, "learning_rate": 1.5839031645344092e-06, "loss": 0.301849365234375, "step": 12687 }, { "epoch": 0.8576449912126538, "grad_norm": 1.4057331085205078, "learning_rate": 1.5824277575496722e-06, "loss": 0.306884765625, "step": 12688 }, { "epoch": 0.8577125861835879, "grad_norm": 1.6220486164093018, "learning_rate": 1.580952999790528e-06, "loss": 0.317962646484375, "step": 12689 }, { "epoch": 0.8577801811545221, "grad_norm": 1.4073057174682617, "learning_rate": 1.5794788913283287e-06, "loss": 0.28326416015625, "step": 12690 }, { "epoch": 0.8578477761254563, "grad_norm": 1.2894686460494995, "learning_rate": 1.5780054322344017e-06, "loss": 0.239837646484375, "step": 12691 }, { "epoch": 0.8579153710963904, "grad_norm": 1.5190259218215942, "learning_rate": 1.5765326225800435e-06, "loss": 0.2989501953125, "step": 12692 }, { "epoch": 0.8579829660673246, "grad_norm": 1.3393635749816895, "learning_rate": 1.5750604624365156e-06, "loss": 0.289306640625, "step": 12693 }, { "epoch": 0.8580505610382587, "grad_norm": 1.2849338054656982, "learning_rate": 1.5735889518750523e-06, "loss": 0.237518310546875, "step": 12694 }, { "epoch": 0.858118156009193, "grad_norm": 1.0969268083572388, "learning_rate": 1.572118090966852e-06, "loss": 0.2077789306640625, "step": 12695 }, { "epoch": 0.8581857509801271, "grad_norm": 1.0563665628433228, "learning_rate": 1.5706478797830874e-06, "loss": 0.235137939453125, "step": 12696 }, { "epoch": 0.8582533459510613, "grad_norm": 1.2082473039627075, "learning_rate": 1.5691783183948938e-06, "loss": 0.21112060546875, "step": 12697 }, { "epoch": 0.8583209409219954, "grad_norm": 0.9616526365280151, "learning_rate": 1.567709406873379e-06, "loss": 0.08506202697753906, "step": 12698 }, { "epoch": 0.8583885358929295, "grad_norm": 1.7194316387176514, "learning_rate": 1.5662411452896124e-06, "loss": 0.23919677734375, "step": 12699 }, { "epoch": 0.8584561308638637, "grad_norm": 1.9194121360778809, "learning_rate": 1.5647735337146513e-06, "loss": 0.2568359375, "step": 12700 }, { "epoch": 0.8585237258347979, "grad_norm": 1.52781343460083, "learning_rate": 1.5633065722194934e-06, "loss": 0.282928466796875, "step": 12701 }, { "epoch": 0.8585913208057321, "grad_norm": 1.0016138553619385, "learning_rate": 1.561840260875127e-06, "loss": 0.1773834228515625, "step": 12702 }, { "epoch": 0.8586589157766662, "grad_norm": 2.014472723007202, "learning_rate": 1.5603745997525036e-06, "loss": 0.2706298828125, "step": 12703 }, { "epoch": 0.8587265107476004, "grad_norm": 0.8526631593704224, "learning_rate": 1.5589095889225301e-06, "loss": 0.174468994140625, "step": 12704 }, { "epoch": 0.8587941057185345, "grad_norm": 0.9263810515403748, "learning_rate": 1.5574452284561047e-06, "loss": 0.2374725341796875, "step": 12705 }, { "epoch": 0.8588617006894687, "grad_norm": 0.9519993662834167, "learning_rate": 1.5559815184240773e-06, "loss": 0.2226715087890625, "step": 12706 }, { "epoch": 0.8589292956604029, "grad_norm": 0.6891090869903564, "learning_rate": 1.554518458897271e-06, "loss": 0.1502532958984375, "step": 12707 }, { "epoch": 0.858996890631337, "grad_norm": 0.7881636619567871, "learning_rate": 1.553056049946478e-06, "loss": 0.10009765625, "step": 12708 }, { "epoch": 0.8590644856022712, "grad_norm": 0.9195579290390015, "learning_rate": 1.551594291642458e-06, "loss": 0.19757080078125, "step": 12709 }, { "epoch": 0.8591320805732053, "grad_norm": 1.058647871017456, "learning_rate": 1.5501331840559396e-06, "loss": 0.201171875, "step": 12710 }, { "epoch": 0.8591996755441396, "grad_norm": 1.3443994522094727, "learning_rate": 1.5486727272576278e-06, "loss": 0.281829833984375, "step": 12711 }, { "epoch": 0.8592672705150737, "grad_norm": 1.217871904373169, "learning_rate": 1.5472129213181745e-06, "loss": 0.2027587890625, "step": 12712 }, { "epoch": 0.8593348654860078, "grad_norm": 1.3976672887802124, "learning_rate": 1.5457537663082299e-06, "loss": 0.2591552734375, "step": 12713 }, { "epoch": 0.859402460456942, "grad_norm": 0.6298932433128357, "learning_rate": 1.5442952622983825e-06, "loss": 0.12197113037109375, "step": 12714 }, { "epoch": 0.8594700554278761, "grad_norm": 1.6500455141067505, "learning_rate": 1.5428374093592124e-06, "loss": 0.298797607421875, "step": 12715 }, { "epoch": 0.8595376503988104, "grad_norm": 1.2602096796035767, "learning_rate": 1.5413802075612615e-06, "loss": 0.200958251953125, "step": 12716 }, { "epoch": 0.8596052453697445, "grad_norm": 0.9350396990776062, "learning_rate": 1.5399236569750286e-06, "loss": 0.21826171875, "step": 12717 }, { "epoch": 0.8596728403406787, "grad_norm": 1.8676352500915527, "learning_rate": 1.5384677576709987e-06, "loss": 0.29168701171875, "step": 12718 }, { "epoch": 0.8597404353116128, "grad_norm": 1.0233619213104248, "learning_rate": 1.537012509719616e-06, "loss": 0.1779022216796875, "step": 12719 }, { "epoch": 0.8598080302825469, "grad_norm": 1.3781739473342896, "learning_rate": 1.5355579131912934e-06, "loss": 0.21435546875, "step": 12720 }, { "epoch": 0.8598756252534812, "grad_norm": 1.2742669582366943, "learning_rate": 1.5341039681564134e-06, "loss": 0.219329833984375, "step": 12721 }, { "epoch": 0.8599432202244153, "grad_norm": 1.0527583360671997, "learning_rate": 1.5326506746853265e-06, "loss": 0.24267578125, "step": 12722 }, { "epoch": 0.8600108151953495, "grad_norm": 1.626363754272461, "learning_rate": 1.5311980328483494e-06, "loss": 0.33099365234375, "step": 12723 }, { "epoch": 0.8600784101662836, "grad_norm": 1.195544958114624, "learning_rate": 1.5297460427157794e-06, "loss": 0.197235107421875, "step": 12724 }, { "epoch": 0.8601460051372178, "grad_norm": 2.360379219055176, "learning_rate": 1.5282947043578605e-06, "loss": 0.29638671875, "step": 12725 }, { "epoch": 0.860213600108152, "grad_norm": 1.0646785497665405, "learning_rate": 1.5268440178448263e-06, "loss": 0.16123199462890625, "step": 12726 }, { "epoch": 0.8602811950790861, "grad_norm": 1.062532663345337, "learning_rate": 1.5253939832468704e-06, "loss": 0.1943511962890625, "step": 12727 }, { "epoch": 0.8603487900500203, "grad_norm": 1.5702934265136719, "learning_rate": 1.5239446006341455e-06, "loss": 0.24311065673828125, "step": 12728 }, { "epoch": 0.8604163850209544, "grad_norm": 0.7267982959747314, "learning_rate": 1.522495870076792e-06, "loss": 0.13409423828125, "step": 12729 }, { "epoch": 0.8604839799918886, "grad_norm": 1.245296835899353, "learning_rate": 1.5210477916449006e-06, "loss": 0.1974945068359375, "step": 12730 }, { "epoch": 0.8605515749628228, "grad_norm": 1.4679205417633057, "learning_rate": 1.5196003654085432e-06, "loss": 0.23419189453125, "step": 12731 }, { "epoch": 0.860619169933757, "grad_norm": 2.2552244663238525, "learning_rate": 1.5181535914377543e-06, "loss": 0.309722900390625, "step": 12732 }, { "epoch": 0.8606867649046911, "grad_norm": 1.1058686971664429, "learning_rate": 1.5167074698025357e-06, "loss": 0.254852294921875, "step": 12733 }, { "epoch": 0.8607543598756252, "grad_norm": 1.9482486248016357, "learning_rate": 1.5152620005728636e-06, "loss": 0.29742431640625, "step": 12734 }, { "epoch": 0.8608219548465594, "grad_norm": 1.4299358129501343, "learning_rate": 1.513817183818675e-06, "loss": 0.16057968139648438, "step": 12735 }, { "epoch": 0.8608895498174935, "grad_norm": 1.8039358854293823, "learning_rate": 1.512373019609879e-06, "loss": 0.285797119140625, "step": 12736 }, { "epoch": 0.8609571447884278, "grad_norm": 1.0199681520462036, "learning_rate": 1.51092950801636e-06, "loss": 0.14691162109375, "step": 12737 }, { "epoch": 0.8610247397593619, "grad_norm": 1.4774665832519531, "learning_rate": 1.5094866491079534e-06, "loss": 0.24774169921875, "step": 12738 }, { "epoch": 0.8610923347302961, "grad_norm": 0.7461338639259338, "learning_rate": 1.5080444429544821e-06, "loss": 0.1517791748046875, "step": 12739 }, { "epoch": 0.8611599297012302, "grad_norm": 1.4063811302185059, "learning_rate": 1.50660288962573e-06, "loss": 0.2508544921875, "step": 12740 }, { "epoch": 0.8612275246721643, "grad_norm": 1.2334916591644287, "learning_rate": 1.5051619891914382e-06, "loss": 0.2389068603515625, "step": 12741 }, { "epoch": 0.8612951196430986, "grad_norm": 1.186552882194519, "learning_rate": 1.5037217417213372e-06, "loss": 0.1821441650390625, "step": 12742 }, { "epoch": 0.8613627146140327, "grad_norm": 1.0357449054718018, "learning_rate": 1.5022821472851068e-06, "loss": 0.17510986328125, "step": 12743 }, { "epoch": 0.8614303095849669, "grad_norm": 0.9158915281295776, "learning_rate": 1.5008432059524109e-06, "loss": 0.185455322265625, "step": 12744 }, { "epoch": 0.861497904555901, "grad_norm": 1.866702675819397, "learning_rate": 1.4994049177928704e-06, "loss": 0.279754638671875, "step": 12745 }, { "epoch": 0.8615654995268353, "grad_norm": 1.341186285018921, "learning_rate": 1.4979672828760781e-06, "loss": 0.25628662109375, "step": 12746 }, { "epoch": 0.8616330944977694, "grad_norm": 1.1780587434768677, "learning_rate": 1.4965303012715987e-06, "loss": 0.27593994140625, "step": 12747 }, { "epoch": 0.8617006894687035, "grad_norm": 1.4088764190673828, "learning_rate": 1.4950939730489593e-06, "loss": 0.292877197265625, "step": 12748 }, { "epoch": 0.8617682844396377, "grad_norm": 1.2524763345718384, "learning_rate": 1.4936582982776565e-06, "loss": 0.3031005859375, "step": 12749 }, { "epoch": 0.8618358794105718, "grad_norm": 0.9178246259689331, "learning_rate": 1.492223277027166e-06, "loss": 0.1404876708984375, "step": 12750 }, { "epoch": 0.861903474381506, "grad_norm": 1.2148796319961548, "learning_rate": 1.4907889093669157e-06, "loss": 0.247711181640625, "step": 12751 }, { "epoch": 0.8619710693524402, "grad_norm": 1.226370930671692, "learning_rate": 1.489355195366307e-06, "loss": 0.199493408203125, "step": 12752 }, { "epoch": 0.8620386643233744, "grad_norm": 0.901874840259552, "learning_rate": 1.4879221350947225e-06, "loss": 0.178009033203125, "step": 12753 }, { "epoch": 0.8621062592943085, "grad_norm": 1.377584457397461, "learning_rate": 1.4864897286214885e-06, "loss": 0.1573333740234375, "step": 12754 }, { "epoch": 0.8621738542652426, "grad_norm": 0.9521936774253845, "learning_rate": 1.4850579760159261e-06, "loss": 0.1739501953125, "step": 12755 }, { "epoch": 0.8622414492361768, "grad_norm": 0.7783350348472595, "learning_rate": 1.483626877347305e-06, "loss": 0.1510772705078125, "step": 12756 }, { "epoch": 0.862309044207111, "grad_norm": 1.0078580379486084, "learning_rate": 1.4821964326848748e-06, "loss": 0.25244140625, "step": 12757 }, { "epoch": 0.8623766391780452, "grad_norm": 2.0684781074523926, "learning_rate": 1.4807666420978467e-06, "loss": 0.3092041015625, "step": 12758 }, { "epoch": 0.8624442341489793, "grad_norm": 1.8864372968673706, "learning_rate": 1.4793375056554038e-06, "loss": 0.3194580078125, "step": 12759 }, { "epoch": 0.8625118291199135, "grad_norm": 1.2408263683319092, "learning_rate": 1.4779090234266957e-06, "loss": 0.257232666015625, "step": 12760 }, { "epoch": 0.8625794240908476, "grad_norm": 1.2828500270843506, "learning_rate": 1.4764811954808422e-06, "loss": 0.26575469970703125, "step": 12761 }, { "epoch": 0.8626470190617818, "grad_norm": 1.154999017715454, "learning_rate": 1.4750540218869312e-06, "loss": 0.182525634765625, "step": 12762 }, { "epoch": 0.862714614032716, "grad_norm": 1.2468315362930298, "learning_rate": 1.4736275027140178e-06, "loss": 0.227325439453125, "step": 12763 }, { "epoch": 0.8627822090036501, "grad_norm": 1.5580023527145386, "learning_rate": 1.4722016380311232e-06, "loss": 0.2508544921875, "step": 12764 }, { "epoch": 0.8628498039745843, "grad_norm": 1.2540748119354248, "learning_rate": 1.4707764279072406e-06, "loss": 0.2049102783203125, "step": 12765 }, { "epoch": 0.8629173989455184, "grad_norm": 1.1711769104003906, "learning_rate": 1.469351872411338e-06, "loss": 0.228546142578125, "step": 12766 }, { "epoch": 0.8629849939164527, "grad_norm": 1.2765580415725708, "learning_rate": 1.4679279716123306e-06, "loss": 0.27484130859375, "step": 12767 }, { "epoch": 0.8630525888873868, "grad_norm": 1.396147608757019, "learning_rate": 1.466504725579128e-06, "loss": 0.2494354248046875, "step": 12768 }, { "epoch": 0.8631201838583209, "grad_norm": 0.7437299489974976, "learning_rate": 1.4650821343805888e-06, "loss": 0.1348876953125, "step": 12769 }, { "epoch": 0.8631877788292551, "grad_norm": 1.218286156654358, "learning_rate": 1.4636601980855492e-06, "loss": 0.28759765625, "step": 12770 }, { "epoch": 0.8632553738001892, "grad_norm": 1.4973727464675903, "learning_rate": 1.4622389167628109e-06, "loss": 0.1993255615234375, "step": 12771 }, { "epoch": 0.8633229687711235, "grad_norm": 2.2595112323760986, "learning_rate": 1.460818290481144e-06, "loss": 0.378509521484375, "step": 12772 }, { "epoch": 0.8633905637420576, "grad_norm": 1.4376689195632935, "learning_rate": 1.4593983193092853e-06, "loss": 0.274871826171875, "step": 12773 }, { "epoch": 0.8634581587129918, "grad_norm": 1.019683599472046, "learning_rate": 1.4579790033159497e-06, "loss": 0.217498779296875, "step": 12774 }, { "epoch": 0.8635257536839259, "grad_norm": 1.7843315601348877, "learning_rate": 1.4565603425698038e-06, "loss": 0.30914306640625, "step": 12775 }, { "epoch": 0.86359334865486, "grad_norm": 1.2765241861343384, "learning_rate": 1.4551423371394945e-06, "loss": 0.1778564453125, "step": 12776 }, { "epoch": 0.8636609436257943, "grad_norm": 1.0620839595794678, "learning_rate": 1.453724987093632e-06, "loss": 0.220458984375, "step": 12777 }, { "epoch": 0.8637285385967284, "grad_norm": 0.4581802487373352, "learning_rate": 1.4523082925007964e-06, "loss": 0.08163070678710938, "step": 12778 }, { "epoch": 0.8637961335676626, "grad_norm": 1.1802031993865967, "learning_rate": 1.4508922534295443e-06, "loss": 0.225372314453125, "step": 12779 }, { "epoch": 0.8638637285385967, "grad_norm": 1.6566332578659058, "learning_rate": 1.449476869948378e-06, "loss": 0.281585693359375, "step": 12780 }, { "epoch": 0.863931323509531, "grad_norm": 1.4136590957641602, "learning_rate": 1.4480621421257957e-06, "loss": 0.2940673828125, "step": 12781 }, { "epoch": 0.8639989184804651, "grad_norm": 1.4553858041763306, "learning_rate": 1.4466480700302443e-06, "loss": 0.31243896484375, "step": 12782 }, { "epoch": 0.8640665134513992, "grad_norm": 1.601167917251587, "learning_rate": 1.4452346537301475e-06, "loss": 0.28857421875, "step": 12783 }, { "epoch": 0.8641341084223334, "grad_norm": 1.369827389717102, "learning_rate": 1.4438218932938924e-06, "loss": 0.23406982421875, "step": 12784 }, { "epoch": 0.8642017033932675, "grad_norm": 1.7311122417449951, "learning_rate": 1.4424097887898407e-06, "loss": 0.3148193359375, "step": 12785 }, { "epoch": 0.8642692983642017, "grad_norm": 1.2549017667770386, "learning_rate": 1.440998340286318e-06, "loss": 0.24652099609375, "step": 12786 }, { "epoch": 0.8643368933351359, "grad_norm": 2.2642276287078857, "learning_rate": 1.4395875478516162e-06, "loss": 0.3575439453125, "step": 12787 }, { "epoch": 0.86440448830607, "grad_norm": 1.7477648258209229, "learning_rate": 1.4381774115540008e-06, "loss": 0.29864501953125, "step": 12788 }, { "epoch": 0.8644720832770042, "grad_norm": 1.5828062295913696, "learning_rate": 1.4367679314617022e-06, "loss": 0.24761962890625, "step": 12789 }, { "epoch": 0.8645396782479383, "grad_norm": 1.2980118989944458, "learning_rate": 1.4353591076429207e-06, "loss": 0.273651123046875, "step": 12790 }, { "epoch": 0.8646072732188725, "grad_norm": 0.668350875377655, "learning_rate": 1.4339509401658201e-06, "loss": 0.15248489379882812, "step": 12791 }, { "epoch": 0.8646748681898067, "grad_norm": 1.110617995262146, "learning_rate": 1.4325434290985446e-06, "loss": 0.1568450927734375, "step": 12792 }, { "epoch": 0.8647424631607409, "grad_norm": 1.5477663278579712, "learning_rate": 1.4311365745091876e-06, "loss": 0.1965789794921875, "step": 12793 }, { "epoch": 0.864810058131675, "grad_norm": 1.279238224029541, "learning_rate": 1.42973037646583e-06, "loss": 0.19762420654296875, "step": 12794 }, { "epoch": 0.8648776531026091, "grad_norm": 1.2332725524902344, "learning_rate": 1.4283248350365085e-06, "loss": 0.22418212890625, "step": 12795 }, { "epoch": 0.8649452480735433, "grad_norm": 0.9868276715278625, "learning_rate": 1.4269199502892344e-06, "loss": 0.208251953125, "step": 12796 }, { "epoch": 0.8650128430444775, "grad_norm": 1.8665086030960083, "learning_rate": 1.4255157222919812e-06, "loss": 0.21902847290039062, "step": 12797 }, { "epoch": 0.8650804380154117, "grad_norm": 1.6115573644638062, "learning_rate": 1.4241121511126981e-06, "loss": 0.3011474609375, "step": 12798 }, { "epoch": 0.8651480329863458, "grad_norm": 1.0723289251327515, "learning_rate": 1.4227092368192956e-06, "loss": 0.15606689453125, "step": 12799 }, { "epoch": 0.86521562795728, "grad_norm": 1.4430948495864868, "learning_rate": 1.4213069794796562e-06, "loss": 0.261322021484375, "step": 12800 }, { "epoch": 0.8652832229282141, "grad_norm": 1.5786159038543701, "learning_rate": 1.4199053791616307e-06, "loss": 0.24786376953125, "step": 12801 }, { "epoch": 0.8653508178991483, "grad_norm": 1.4196630716323853, "learning_rate": 1.4185044359330347e-06, "loss": 0.264007568359375, "step": 12802 }, { "epoch": 0.8654184128700825, "grad_norm": 1.085107684135437, "learning_rate": 1.4171041498616604e-06, "loss": 0.21466064453125, "step": 12803 }, { "epoch": 0.8654860078410166, "grad_norm": 1.4569331407546997, "learning_rate": 1.4157045210152543e-06, "loss": 0.188385009765625, "step": 12804 }, { "epoch": 0.8655536028119508, "grad_norm": 1.0117080211639404, "learning_rate": 1.4143055494615481e-06, "loss": 0.22711181640625, "step": 12805 }, { "epoch": 0.8656211977828849, "grad_norm": 1.1024799346923828, "learning_rate": 1.4129072352682216e-06, "loss": 0.1882781982421875, "step": 12806 }, { "epoch": 0.8656887927538192, "grad_norm": 1.8042035102844238, "learning_rate": 1.4115095785029453e-06, "loss": 0.221832275390625, "step": 12807 }, { "epoch": 0.8657563877247533, "grad_norm": 2.3572986125946045, "learning_rate": 1.4101125792333386e-06, "loss": 0.2203369140625, "step": 12808 }, { "epoch": 0.8658239826956874, "grad_norm": 1.2141557931900024, "learning_rate": 1.4087162375270024e-06, "loss": 0.214691162109375, "step": 12809 }, { "epoch": 0.8658915776666216, "grad_norm": 1.1218842267990112, "learning_rate": 1.4073205534514978e-06, "loss": 0.21783447265625, "step": 12810 }, { "epoch": 0.8659591726375557, "grad_norm": 2.0280601978302, "learning_rate": 1.4059255270743554e-06, "loss": 0.3088836669921875, "step": 12811 }, { "epoch": 0.86602676760849, "grad_norm": 1.0785191059112549, "learning_rate": 1.4045311584630765e-06, "loss": 0.1676025390625, "step": 12812 }, { "epoch": 0.8660943625794241, "grad_norm": 1.3068978786468506, "learning_rate": 1.4031374476851306e-06, "loss": 0.216766357421875, "step": 12813 }, { "epoch": 0.8661619575503583, "grad_norm": 1.8143012523651123, "learning_rate": 1.4017443948079533e-06, "loss": 0.312957763671875, "step": 12814 }, { "epoch": 0.8662295525212924, "grad_norm": 1.1415706872940063, "learning_rate": 1.4003519998989444e-06, "loss": 0.225433349609375, "step": 12815 }, { "epoch": 0.8662971474922265, "grad_norm": 0.6876869797706604, "learning_rate": 1.3989602630254883e-06, "loss": 0.17047119140625, "step": 12816 }, { "epoch": 0.8663647424631608, "grad_norm": 1.3442989587783813, "learning_rate": 1.397569184254911e-06, "loss": 0.250579833984375, "step": 12817 }, { "epoch": 0.8664323374340949, "grad_norm": 1.412314772605896, "learning_rate": 1.3961787636545337e-06, "loss": 0.234161376953125, "step": 12818 }, { "epoch": 0.8664999324050291, "grad_norm": 1.4857147932052612, "learning_rate": 1.3947890012916292e-06, "loss": 0.30462646484375, "step": 12819 }, { "epoch": 0.8665675273759632, "grad_norm": 1.7479647397994995, "learning_rate": 1.393399897233442e-06, "loss": 0.2210693359375, "step": 12820 }, { "epoch": 0.8666351223468974, "grad_norm": 0.8860748410224915, "learning_rate": 1.3920114515471887e-06, "loss": 0.11547088623046875, "step": 12821 }, { "epoch": 0.8667027173178316, "grad_norm": 1.098400592803955, "learning_rate": 1.3906236643000436e-06, "loss": 0.2167816162109375, "step": 12822 }, { "epoch": 0.8667703122887657, "grad_norm": 1.898488998413086, "learning_rate": 1.3892365355591646e-06, "loss": 0.227783203125, "step": 12823 }, { "epoch": 0.8668379072596999, "grad_norm": 1.0890942811965942, "learning_rate": 1.3878500653916664e-06, "loss": 0.192138671875, "step": 12824 }, { "epoch": 0.866905502230634, "grad_norm": 1.496188998222351, "learning_rate": 1.3864642538646339e-06, "loss": 0.2406463623046875, "step": 12825 }, { "epoch": 0.8669730972015682, "grad_norm": 1.6830116510391235, "learning_rate": 1.3850791010451231e-06, "loss": 0.197662353515625, "step": 12826 }, { "epoch": 0.8670406921725023, "grad_norm": 0.985359251499176, "learning_rate": 1.3836946070001554e-06, "loss": 0.1558074951171875, "step": 12827 }, { "epoch": 0.8671082871434366, "grad_norm": 1.8302013874053955, "learning_rate": 1.3823107717967193e-06, "loss": 0.3682861328125, "step": 12828 }, { "epoch": 0.8671758821143707, "grad_norm": 0.693556547164917, "learning_rate": 1.3809275955017809e-06, "loss": 0.1139068603515625, "step": 12829 }, { "epoch": 0.8672434770853048, "grad_norm": 1.701210379600525, "learning_rate": 1.3795450781822566e-06, "loss": 0.340362548828125, "step": 12830 }, { "epoch": 0.867311072056239, "grad_norm": 1.653723955154419, "learning_rate": 1.3781632199050482e-06, "loss": 0.250518798828125, "step": 12831 }, { "epoch": 0.8673786670271731, "grad_norm": 1.9734258651733398, "learning_rate": 1.3767820207370202e-06, "loss": 0.250732421875, "step": 12832 }, { "epoch": 0.8674462619981074, "grad_norm": 0.8285049200057983, "learning_rate": 1.3754014807449927e-06, "loss": 0.1043853759765625, "step": 12833 }, { "epoch": 0.8675138569690415, "grad_norm": 1.5871492624282837, "learning_rate": 1.374021599995779e-06, "loss": 0.2901611328125, "step": 12834 }, { "epoch": 0.8675814519399757, "grad_norm": 1.1595780849456787, "learning_rate": 1.372642378556132e-06, "loss": 0.2679595947265625, "step": 12835 }, { "epoch": 0.8676490469109098, "grad_norm": 1.6030203104019165, "learning_rate": 1.3712638164927988e-06, "loss": 0.27081298828125, "step": 12836 }, { "epoch": 0.8677166418818439, "grad_norm": 1.019776463508606, "learning_rate": 1.369885913872479e-06, "loss": 0.2433624267578125, "step": 12837 }, { "epoch": 0.8677842368527782, "grad_norm": 1.2290645837783813, "learning_rate": 1.3685086707618411e-06, "loss": 0.2508544921875, "step": 12838 }, { "epoch": 0.8678518318237123, "grad_norm": 0.8629575967788696, "learning_rate": 1.3671320872275283e-06, "loss": 0.17083740234375, "step": 12839 }, { "epoch": 0.8679194267946465, "grad_norm": 1.6745495796203613, "learning_rate": 1.3657561633361476e-06, "loss": 0.254913330078125, "step": 12840 }, { "epoch": 0.8679870217655806, "grad_norm": 1.2520958185195923, "learning_rate": 1.3643808991542706e-06, "loss": 0.21652984619140625, "step": 12841 }, { "epoch": 0.8680546167365149, "grad_norm": 0.9307299852371216, "learning_rate": 1.3630062947484507e-06, "loss": 0.2149658203125, "step": 12842 }, { "epoch": 0.868122211707449, "grad_norm": 1.3702285289764404, "learning_rate": 1.361632350185188e-06, "loss": 0.30487060546875, "step": 12843 }, { "epoch": 0.8681898066783831, "grad_norm": 1.4527126550674438, "learning_rate": 1.360259065530971e-06, "loss": 0.243804931640625, "step": 12844 }, { "epoch": 0.8682574016493173, "grad_norm": 1.1311241388320923, "learning_rate": 1.3588864408522483e-06, "loss": 0.219970703125, "step": 12845 }, { "epoch": 0.8683249966202514, "grad_norm": 1.183044672012329, "learning_rate": 1.3575144762154269e-06, "loss": 0.228515625, "step": 12846 }, { "epoch": 0.8683925915911856, "grad_norm": 0.8206768035888672, "learning_rate": 1.3561431716869e-06, "loss": 0.1370391845703125, "step": 12847 }, { "epoch": 0.8684601865621198, "grad_norm": 2.142420530319214, "learning_rate": 1.3547725273330164e-06, "loss": 0.29541015625, "step": 12848 }, { "epoch": 0.868527781533054, "grad_norm": 1.3005059957504272, "learning_rate": 1.3534025432200965e-06, "loss": 0.2442474365234375, "step": 12849 }, { "epoch": 0.8685953765039881, "grad_norm": 1.3496956825256348, "learning_rate": 1.3520332194144285e-06, "loss": 0.2346649169921875, "step": 12850 }, { "epoch": 0.8686629714749222, "grad_norm": 0.8634584546089172, "learning_rate": 1.35066455598227e-06, "loss": 0.179473876953125, "step": 12851 }, { "epoch": 0.8687305664458564, "grad_norm": 1.15312659740448, "learning_rate": 1.3492965529898443e-06, "loss": 0.3009033203125, "step": 12852 }, { "epoch": 0.8687981614167906, "grad_norm": 1.8881146907806396, "learning_rate": 1.3479292105033435e-06, "loss": 0.292724609375, "step": 12853 }, { "epoch": 0.8688657563877248, "grad_norm": 2.0431957244873047, "learning_rate": 1.3465625285889265e-06, "loss": 0.296478271484375, "step": 12854 }, { "epoch": 0.8689333513586589, "grad_norm": 1.3085076808929443, "learning_rate": 1.3451965073127303e-06, "loss": 0.255401611328125, "step": 12855 }, { "epoch": 0.8690009463295931, "grad_norm": 1.0930280685424805, "learning_rate": 1.343831146740842e-06, "loss": 0.223175048828125, "step": 12856 }, { "epoch": 0.8690685413005272, "grad_norm": 1.2247552871704102, "learning_rate": 1.3424664469393272e-06, "loss": 0.265960693359375, "step": 12857 }, { "epoch": 0.8691361362714614, "grad_norm": 1.3127548694610596, "learning_rate": 1.3411024079742263e-06, "loss": 0.20489501953125, "step": 12858 }, { "epoch": 0.8692037312423956, "grad_norm": 0.8501427173614502, "learning_rate": 1.339739029911528e-06, "loss": 0.2017059326171875, "step": 12859 }, { "epoch": 0.8692713262133297, "grad_norm": 1.378221035003662, "learning_rate": 1.3383763128172117e-06, "loss": 0.23822021484375, "step": 12860 }, { "epoch": 0.8693389211842639, "grad_norm": 0.9898054599761963, "learning_rate": 1.3370142567572107e-06, "loss": 0.1867218017578125, "step": 12861 }, { "epoch": 0.869406516155198, "grad_norm": 2.187878370285034, "learning_rate": 1.335652861797428e-06, "loss": 0.2652130126953125, "step": 12862 }, { "epoch": 0.8694741111261323, "grad_norm": 1.1309555768966675, "learning_rate": 1.3342921280037385e-06, "loss": 0.250152587890625, "step": 12863 }, { "epoch": 0.8695417060970664, "grad_norm": 1.2928913831710815, "learning_rate": 1.3329320554419817e-06, "loss": 0.1900482177734375, "step": 12864 }, { "epoch": 0.8696093010680005, "grad_norm": 1.6309010982513428, "learning_rate": 1.331572644177963e-06, "loss": 0.277099609375, "step": 12865 }, { "epoch": 0.8696768960389347, "grad_norm": 1.1636523008346558, "learning_rate": 1.3302138942774683e-06, "loss": 0.2276611328125, "step": 12866 }, { "epoch": 0.8697444910098688, "grad_norm": 1.3980770111083984, "learning_rate": 1.328855805806235e-06, "loss": 0.16349029541015625, "step": 12867 }, { "epoch": 0.8698120859808031, "grad_norm": 1.6355087757110596, "learning_rate": 1.3274983788299771e-06, "loss": 0.269134521484375, "step": 12868 }, { "epoch": 0.8698796809517372, "grad_norm": 1.4377349615097046, "learning_rate": 1.3261416134143772e-06, "loss": 0.26800537109375, "step": 12869 }, { "epoch": 0.8699472759226714, "grad_norm": 1.2361924648284912, "learning_rate": 1.3247855096250795e-06, "loss": 0.18414306640625, "step": 12870 }, { "epoch": 0.8700148708936055, "grad_norm": 1.5488888025283813, "learning_rate": 1.3234300675277094e-06, "loss": 0.279296875, "step": 12871 }, { "epoch": 0.8700824658645396, "grad_norm": 1.1869195699691772, "learning_rate": 1.32207528718784e-06, "loss": 0.2725830078125, "step": 12872 }, { "epoch": 0.8701500608354739, "grad_norm": 1.7171342372894287, "learning_rate": 1.3207211686710352e-06, "loss": 0.2321014404296875, "step": 12873 }, { "epoch": 0.870217655806408, "grad_norm": 1.6546845436096191, "learning_rate": 1.3193677120428095e-06, "loss": 0.233856201171875, "step": 12874 }, { "epoch": 0.8702852507773422, "grad_norm": 1.2231489419937134, "learning_rate": 1.3180149173686517e-06, "loss": 0.20562744140625, "step": 12875 }, { "epoch": 0.8703528457482763, "grad_norm": 1.4605122804641724, "learning_rate": 1.3166627847140216e-06, "loss": 0.212738037109375, "step": 12876 }, { "epoch": 0.8704204407192105, "grad_norm": 1.1578359603881836, "learning_rate": 1.3153113141443402e-06, "loss": 0.251251220703125, "step": 12877 }, { "epoch": 0.8704880356901447, "grad_norm": 1.1834865808486938, "learning_rate": 1.3139605057249999e-06, "loss": 0.19620895385742188, "step": 12878 }, { "epoch": 0.8705556306610788, "grad_norm": 1.3923557996749878, "learning_rate": 1.3126103595213667e-06, "loss": 0.244293212890625, "step": 12879 }, { "epoch": 0.870623225632013, "grad_norm": 1.0986515283584595, "learning_rate": 1.3112608755987637e-06, "loss": 0.1686859130859375, "step": 12880 }, { "epoch": 0.8706908206029471, "grad_norm": 1.0320955514907837, "learning_rate": 1.30991205402249e-06, "loss": 0.1732940673828125, "step": 12881 }, { "epoch": 0.8707584155738813, "grad_norm": 2.081495523452759, "learning_rate": 1.3085638948578072e-06, "loss": 0.277679443359375, "step": 12882 }, { "epoch": 0.8708260105448155, "grad_norm": 0.687572717666626, "learning_rate": 1.3072163981699476e-06, "loss": 0.13473129272460938, "step": 12883 }, { "epoch": 0.8708936055157497, "grad_norm": 0.7592830061912537, "learning_rate": 1.3058695640241176e-06, "loss": 0.1680908203125, "step": 12884 }, { "epoch": 0.8709612004866838, "grad_norm": 1.2981704473495483, "learning_rate": 1.3045233924854766e-06, "loss": 0.25244140625, "step": 12885 }, { "epoch": 0.8710287954576179, "grad_norm": 0.903210461139679, "learning_rate": 1.3031778836191677e-06, "loss": 0.1595916748046875, "step": 12886 }, { "epoch": 0.8710963904285521, "grad_norm": 1.5601693391799927, "learning_rate": 1.301833037490292e-06, "loss": 0.226959228515625, "step": 12887 }, { "epoch": 0.8711639853994863, "grad_norm": 1.3716914653778076, "learning_rate": 1.3004888541639209e-06, "loss": 0.31134033203125, "step": 12888 }, { "epoch": 0.8712315803704205, "grad_norm": 0.9723670482635498, "learning_rate": 1.2991453337050973e-06, "loss": 0.234405517578125, "step": 12889 }, { "epoch": 0.8712991753413546, "grad_norm": 1.5136545896530151, "learning_rate": 1.297802476178826e-06, "loss": 0.2991943359375, "step": 12890 }, { "epoch": 0.8713667703122888, "grad_norm": 0.8592450618743896, "learning_rate": 1.2964602816500849e-06, "loss": 0.1662445068359375, "step": 12891 }, { "epoch": 0.8714343652832229, "grad_norm": 1.602102518081665, "learning_rate": 1.2951187501838152e-06, "loss": 0.2221221923828125, "step": 12892 }, { "epoch": 0.871501960254157, "grad_norm": 1.7243869304656982, "learning_rate": 1.293777881844932e-06, "loss": 0.2686767578125, "step": 12893 }, { "epoch": 0.8715695552250913, "grad_norm": 2.723200559616089, "learning_rate": 1.2924376766983081e-06, "loss": 0.2890625, "step": 12894 }, { "epoch": 0.8716371501960254, "grad_norm": 1.8083192110061646, "learning_rate": 1.2910981348088036e-06, "loss": 0.2836761474609375, "step": 12895 }, { "epoch": 0.8717047451669596, "grad_norm": 1.9162673950195312, "learning_rate": 1.2897592562412197e-06, "loss": 0.2377471923828125, "step": 12896 }, { "epoch": 0.8717723401378937, "grad_norm": 0.9952794313430786, "learning_rate": 1.2884210410603513e-06, "loss": 0.2111358642578125, "step": 12897 }, { "epoch": 0.871839935108828, "grad_norm": 1.6218812465667725, "learning_rate": 1.2870834893309386e-06, "loss": 0.2969970703125, "step": 12898 }, { "epoch": 0.8719075300797621, "grad_norm": 1.2629575729370117, "learning_rate": 1.2857466011177094e-06, "loss": 0.29730224609375, "step": 12899 }, { "epoch": 0.8719751250506962, "grad_norm": 1.1283358335494995, "learning_rate": 1.284410376485347e-06, "loss": 0.24973297119140625, "step": 12900 }, { "epoch": 0.8720427200216304, "grad_norm": 0.9954634308815002, "learning_rate": 1.2830748154985084e-06, "loss": 0.193023681640625, "step": 12901 }, { "epoch": 0.8721103149925645, "grad_norm": 1.318585991859436, "learning_rate": 1.281739918221813e-06, "loss": 0.207122802734375, "step": 12902 }, { "epoch": 0.8721779099634988, "grad_norm": 0.7705079913139343, "learning_rate": 1.2804056847198547e-06, "loss": 0.11075973510742188, "step": 12903 }, { "epoch": 0.8722455049344329, "grad_norm": 1.077645182609558, "learning_rate": 1.2790721150571899e-06, "loss": 0.17601776123046875, "step": 12904 }, { "epoch": 0.8723130999053671, "grad_norm": 0.8889378309249878, "learning_rate": 1.277739209298347e-06, "loss": 0.1715087890625, "step": 12905 }, { "epoch": 0.8723806948763012, "grad_norm": 1.1272138357162476, "learning_rate": 1.2764069675078178e-06, "loss": 0.25, "step": 12906 }, { "epoch": 0.8724482898472353, "grad_norm": 1.2784035205841064, "learning_rate": 1.2750753897500638e-06, "loss": 0.219268798828125, "step": 12907 }, { "epoch": 0.8725158848181696, "grad_norm": 1.4834067821502686, "learning_rate": 1.2737444760895205e-06, "loss": 0.29046630859375, "step": 12908 }, { "epoch": 0.8725834797891037, "grad_norm": 1.3417880535125732, "learning_rate": 1.272414226590578e-06, "loss": 0.218841552734375, "step": 12909 }, { "epoch": 0.8726510747600379, "grad_norm": 1.3804705142974854, "learning_rate": 1.2710846413176114e-06, "loss": 0.21673583984375, "step": 12910 }, { "epoch": 0.872718669730972, "grad_norm": 1.3196570873260498, "learning_rate": 1.2697557203349441e-06, "loss": 0.254974365234375, "step": 12911 }, { "epoch": 0.8727862647019062, "grad_norm": 1.0391336679458618, "learning_rate": 1.268427463706885e-06, "loss": 0.20550537109375, "step": 12912 }, { "epoch": 0.8728538596728403, "grad_norm": 1.2330316305160522, "learning_rate": 1.2670998714977006e-06, "loss": 0.2239227294921875, "step": 12913 }, { "epoch": 0.8729214546437745, "grad_norm": 0.8191518187522888, "learning_rate": 1.2657729437716282e-06, "loss": 0.18267822265625, "step": 12914 }, { "epoch": 0.8729890496147087, "grad_norm": 1.7857069969177246, "learning_rate": 1.264446680592873e-06, "loss": 0.314300537109375, "step": 12915 }, { "epoch": 0.8730566445856428, "grad_norm": 1.5063402652740479, "learning_rate": 1.2631210820256084e-06, "loss": 0.34173583984375, "step": 12916 }, { "epoch": 0.873124239556577, "grad_norm": 1.2213729619979858, "learning_rate": 1.2617961481339751e-06, "loss": 0.20001220703125, "step": 12917 }, { "epoch": 0.8731918345275111, "grad_norm": 1.288440465927124, "learning_rate": 1.2604718789820802e-06, "loss": 0.244873046875, "step": 12918 }, { "epoch": 0.8732594294984453, "grad_norm": 0.7873660326004028, "learning_rate": 1.2591482746340005e-06, "loss": 0.1524658203125, "step": 12919 }, { "epoch": 0.8733270244693795, "grad_norm": 0.8858488202095032, "learning_rate": 1.25782533515378e-06, "loss": 0.174530029296875, "step": 12920 }, { "epoch": 0.8733946194403136, "grad_norm": 1.559400200843811, "learning_rate": 1.2565030606054357e-06, "loss": 0.272430419921875, "step": 12921 }, { "epoch": 0.8734622144112478, "grad_norm": 1.1116827726364136, "learning_rate": 1.2551814510529364e-06, "loss": 0.190338134765625, "step": 12922 }, { "epoch": 0.8735298093821819, "grad_norm": 1.1940287351608276, "learning_rate": 1.253860506560241e-06, "loss": 0.2388916015625, "step": 12923 }, { "epoch": 0.8735974043531162, "grad_norm": 1.2684588432312012, "learning_rate": 1.25254022719126e-06, "loss": 0.24737548828125, "step": 12924 }, { "epoch": 0.8736649993240503, "grad_norm": 2.1637487411499023, "learning_rate": 1.2512206130098774e-06, "loss": 0.34649658203125, "step": 12925 }, { "epoch": 0.8737325942949844, "grad_norm": 1.0584189891815186, "learning_rate": 1.2499016640799487e-06, "loss": 0.231414794921875, "step": 12926 }, { "epoch": 0.8738001892659186, "grad_norm": 1.4079887866973877, "learning_rate": 1.2485833804652814e-06, "loss": 0.21468353271484375, "step": 12927 }, { "epoch": 0.8738677842368527, "grad_norm": 1.223313331604004, "learning_rate": 1.2472657622296724e-06, "loss": 0.264801025390625, "step": 12928 }, { "epoch": 0.873935379207787, "grad_norm": 1.5462806224822998, "learning_rate": 1.2459488094368726e-06, "loss": 0.283050537109375, "step": 12929 }, { "epoch": 0.8740029741787211, "grad_norm": 0.852081835269928, "learning_rate": 1.2446325221506045e-06, "loss": 0.153411865234375, "step": 12930 }, { "epoch": 0.8740705691496553, "grad_norm": 1.2596747875213623, "learning_rate": 1.2433169004345602e-06, "loss": 0.16463470458984375, "step": 12931 }, { "epoch": 0.8741381641205894, "grad_norm": 1.4618648290634155, "learning_rate": 1.2420019443523939e-06, "loss": 0.226654052734375, "step": 12932 }, { "epoch": 0.8742057590915235, "grad_norm": 1.2690801620483398, "learning_rate": 1.240687653967733e-06, "loss": 0.2318115234375, "step": 12933 }, { "epoch": 0.8742733540624578, "grad_norm": 0.9851347804069519, "learning_rate": 1.2393740293441769e-06, "loss": 0.2294921875, "step": 12934 }, { "epoch": 0.8743409490333919, "grad_norm": 0.7888815999031067, "learning_rate": 1.238061070545276e-06, "loss": 0.1733856201171875, "step": 12935 }, { "epoch": 0.8744085440043261, "grad_norm": 1.2416062355041504, "learning_rate": 1.2367487776345666e-06, "loss": 0.309722900390625, "step": 12936 }, { "epoch": 0.8744761389752602, "grad_norm": 1.2498517036437988, "learning_rate": 1.2354371506755475e-06, "loss": 0.30657958984375, "step": 12937 }, { "epoch": 0.8745437339461944, "grad_norm": 1.8354703187942505, "learning_rate": 1.2341261897316747e-06, "loss": 0.2935791015625, "step": 12938 }, { "epoch": 0.8746113289171286, "grad_norm": 1.431696891784668, "learning_rate": 1.2328158948663893e-06, "loss": 0.234283447265625, "step": 12939 }, { "epoch": 0.8746789238880627, "grad_norm": 1.4221912622451782, "learning_rate": 1.231506266143087e-06, "loss": 0.258544921875, "step": 12940 }, { "epoch": 0.8747465188589969, "grad_norm": 1.1936969757080078, "learning_rate": 1.2301973036251374e-06, "loss": 0.219818115234375, "step": 12941 }, { "epoch": 0.874814113829931, "grad_norm": 0.8034670352935791, "learning_rate": 1.2288890073758762e-06, "loss": 0.0730133056640625, "step": 12942 }, { "epoch": 0.8748817088008652, "grad_norm": 1.4694956541061401, "learning_rate": 1.2275813774586065e-06, "loss": 0.284423828125, "step": 12943 }, { "epoch": 0.8749493037717994, "grad_norm": 1.2048225402832031, "learning_rate": 1.226274413936599e-06, "loss": 0.2479400634765625, "step": 12944 }, { "epoch": 0.8750168987427336, "grad_norm": 1.2228245735168457, "learning_rate": 1.224968116873093e-06, "loss": 0.2205047607421875, "step": 12945 }, { "epoch": 0.8750844937136677, "grad_norm": 1.3973602056503296, "learning_rate": 1.2236624863312933e-06, "loss": 0.290557861328125, "step": 12946 }, { "epoch": 0.8751520886846018, "grad_norm": 1.4532073736190796, "learning_rate": 1.222357522374381e-06, "loss": 0.248748779296875, "step": 12947 }, { "epoch": 0.875219683655536, "grad_norm": 2.1567575931549072, "learning_rate": 1.2210532250654889e-06, "loss": 0.327911376953125, "step": 12948 }, { "epoch": 0.8752872786264702, "grad_norm": 0.9098696112632751, "learning_rate": 1.219749594467736e-06, "loss": 0.1496124267578125, "step": 12949 }, { "epoch": 0.8753548735974044, "grad_norm": 1.217550277709961, "learning_rate": 1.2184466306441978e-06, "loss": 0.184173583984375, "step": 12950 }, { "epoch": 0.8754224685683385, "grad_norm": 2.2369797229766846, "learning_rate": 1.217144333657913e-06, "loss": 0.25238037109375, "step": 12951 }, { "epoch": 0.8754900635392727, "grad_norm": 1.4326646327972412, "learning_rate": 1.2158427035719034e-06, "loss": 0.28021240234375, "step": 12952 }, { "epoch": 0.8755576585102068, "grad_norm": 0.9717947244644165, "learning_rate": 1.214541740449145e-06, "loss": 0.14344024658203125, "step": 12953 }, { "epoch": 0.875625253481141, "grad_norm": 1.2532840967178345, "learning_rate": 1.2132414443525892e-06, "loss": 0.210845947265625, "step": 12954 }, { "epoch": 0.8756928484520752, "grad_norm": 1.7847522497177124, "learning_rate": 1.2119418153451523e-06, "loss": 0.2843017578125, "step": 12955 }, { "epoch": 0.8757604434230093, "grad_norm": 1.3027302026748657, "learning_rate": 1.2106428534897157e-06, "loss": 0.27099609375, "step": 12956 }, { "epoch": 0.8758280383939435, "grad_norm": 1.3373037576675415, "learning_rate": 1.2093445588491326e-06, "loss": 0.16831207275390625, "step": 12957 }, { "epoch": 0.8758956333648776, "grad_norm": 1.0756858587265015, "learning_rate": 1.2080469314862242e-06, "loss": 0.16698455810546875, "step": 12958 }, { "epoch": 0.8759632283358119, "grad_norm": 1.946138620376587, "learning_rate": 1.2067499714637735e-06, "loss": 0.268218994140625, "step": 12959 }, { "epoch": 0.876030823306746, "grad_norm": 1.1782708168029785, "learning_rate": 1.2054536788445425e-06, "loss": 0.2725830078125, "step": 12960 }, { "epoch": 0.8760984182776801, "grad_norm": 1.3486639261245728, "learning_rate": 1.2041580536912488e-06, "loss": 0.284576416015625, "step": 12961 }, { "epoch": 0.8761660132486143, "grad_norm": 1.7202571630477905, "learning_rate": 1.202863096066581e-06, "loss": 0.269989013671875, "step": 12962 }, { "epoch": 0.8762336082195484, "grad_norm": 1.599924087524414, "learning_rate": 1.2015688060332054e-06, "loss": 0.25360107421875, "step": 12963 }, { "epoch": 0.8763012031904827, "grad_norm": 1.2568409442901611, "learning_rate": 1.2002751836537367e-06, "loss": 0.267486572265625, "step": 12964 }, { "epoch": 0.8763687981614168, "grad_norm": 1.9836832284927368, "learning_rate": 1.198982228990777e-06, "loss": 0.303985595703125, "step": 12965 }, { "epoch": 0.876436393132351, "grad_norm": 1.0977349281311035, "learning_rate": 1.1976899421068843e-06, "loss": 0.2502593994140625, "step": 12966 }, { "epoch": 0.8765039881032851, "grad_norm": 1.331813097000122, "learning_rate": 1.1963983230645886e-06, "loss": 0.28497314453125, "step": 12967 }, { "epoch": 0.8765715830742192, "grad_norm": 1.0637269020080566, "learning_rate": 1.195107371926385e-06, "loss": 0.20123291015625, "step": 12968 }, { "epoch": 0.8766391780451535, "grad_norm": 1.0292917490005493, "learning_rate": 1.19381708875474e-06, "loss": 0.2022857666015625, "step": 12969 }, { "epoch": 0.8767067730160876, "grad_norm": 1.7397456169128418, "learning_rate": 1.1925274736120807e-06, "loss": 0.234466552734375, "step": 12970 }, { "epoch": 0.8767743679870218, "grad_norm": 0.8705191016197205, "learning_rate": 1.1912385265608134e-06, "loss": 0.1827239990234375, "step": 12971 }, { "epoch": 0.8768419629579559, "grad_norm": 0.48053857684135437, "learning_rate": 1.1899502476633016e-06, "loss": 0.0613250732421875, "step": 12972 }, { "epoch": 0.8769095579288901, "grad_norm": 1.3431849479675293, "learning_rate": 1.1886626369818793e-06, "loss": 0.2107086181640625, "step": 12973 }, { "epoch": 0.8769771528998243, "grad_norm": 1.0557245016098022, "learning_rate": 1.187375694578851e-06, "loss": 0.19500732421875, "step": 12974 }, { "epoch": 0.8770447478707584, "grad_norm": 1.4069688320159912, "learning_rate": 1.186089420516484e-06, "loss": 0.2947998046875, "step": 12975 }, { "epoch": 0.8771123428416926, "grad_norm": 1.400538682937622, "learning_rate": 1.1848038148570234e-06, "loss": 0.222564697265625, "step": 12976 }, { "epoch": 0.8771799378126267, "grad_norm": 1.0536214113235474, "learning_rate": 1.1835188776626643e-06, "loss": 0.14696502685546875, "step": 12977 }, { "epoch": 0.8772475327835609, "grad_norm": 1.5401639938354492, "learning_rate": 1.1822346089955888e-06, "loss": 0.246490478515625, "step": 12978 }, { "epoch": 0.877315127754495, "grad_norm": 0.9889383912086487, "learning_rate": 1.1809510089179338e-06, "loss": 0.1881561279296875, "step": 12979 }, { "epoch": 0.8773827227254293, "grad_norm": 1.2766362428665161, "learning_rate": 1.1796680774918094e-06, "loss": 0.190032958984375, "step": 12980 }, { "epoch": 0.8774503176963634, "grad_norm": 1.3748565912246704, "learning_rate": 1.1783858147792915e-06, "loss": 0.226959228515625, "step": 12981 }, { "epoch": 0.8775179126672975, "grad_norm": 0.7260121703147888, "learning_rate": 1.1771042208424232e-06, "loss": 0.11596298217773438, "step": 12982 }, { "epoch": 0.8775855076382317, "grad_norm": 1.996804118156433, "learning_rate": 1.1758232957432152e-06, "loss": 0.34320068359375, "step": 12983 }, { "epoch": 0.8776531026091658, "grad_norm": 0.6887691020965576, "learning_rate": 1.1745430395436513e-06, "loss": 0.136566162109375, "step": 12984 }, { "epoch": 0.8777206975801001, "grad_norm": 1.4766229391098022, "learning_rate": 1.1732634523056718e-06, "loss": 0.201324462890625, "step": 12985 }, { "epoch": 0.8777882925510342, "grad_norm": 1.955039381980896, "learning_rate": 1.1719845340911922e-06, "loss": 0.266693115234375, "step": 12986 }, { "epoch": 0.8778558875219684, "grad_norm": 1.4912816286087036, "learning_rate": 1.1707062849621013e-06, "loss": 0.2241363525390625, "step": 12987 }, { "epoch": 0.8779234824929025, "grad_norm": 1.9199702739715576, "learning_rate": 1.1694287049802398e-06, "loss": 0.271728515625, "step": 12988 }, { "epoch": 0.8779910774638366, "grad_norm": 1.988484501838684, "learning_rate": 1.1681517942074332e-06, "loss": 0.240020751953125, "step": 12989 }, { "epoch": 0.8780586724347709, "grad_norm": 0.8073177933692932, "learning_rate": 1.166875552705457e-06, "loss": 0.124786376953125, "step": 12990 }, { "epoch": 0.878126267405705, "grad_norm": 1.5714848041534424, "learning_rate": 1.1655999805360717e-06, "loss": 0.18133544921875, "step": 12991 }, { "epoch": 0.8781938623766392, "grad_norm": 1.4928826093673706, "learning_rate": 1.164325077760993e-06, "loss": 0.2475738525390625, "step": 12992 }, { "epoch": 0.8782614573475733, "grad_norm": 1.4043387174606323, "learning_rate": 1.1630508444419118e-06, "loss": 0.2762451171875, "step": 12993 }, { "epoch": 0.8783290523185076, "grad_norm": 0.9107736349105835, "learning_rate": 1.1617772806404818e-06, "loss": 0.175262451171875, "step": 12994 }, { "epoch": 0.8783966472894417, "grad_norm": 1.1948684453964233, "learning_rate": 1.1605043864183256e-06, "loss": 0.227508544921875, "step": 12995 }, { "epoch": 0.8784642422603758, "grad_norm": 1.4389166831970215, "learning_rate": 1.1592321618370339e-06, "loss": 0.26806640625, "step": 12996 }, { "epoch": 0.87853183723131, "grad_norm": 1.6341891288757324, "learning_rate": 1.1579606069581656e-06, "loss": 0.323516845703125, "step": 12997 }, { "epoch": 0.8785994322022441, "grad_norm": 1.251787781715393, "learning_rate": 1.1566897218432448e-06, "loss": 0.2557373046875, "step": 12998 }, { "epoch": 0.8786670271731783, "grad_norm": 1.9105108976364136, "learning_rate": 1.1554195065537643e-06, "loss": 0.289337158203125, "step": 12999 }, { "epoch": 0.8787346221441125, "grad_norm": 1.0357950925827026, "learning_rate": 1.1541499611511913e-06, "loss": 0.2183074951171875, "step": 13000 }, { "epoch": 0.8788022171150467, "grad_norm": 1.1671316623687744, "learning_rate": 1.1528810856969452e-06, "loss": 0.21038055419921875, "step": 13001 }, { "epoch": 0.8788698120859808, "grad_norm": 1.214426875114441, "learning_rate": 1.15161288025243e-06, "loss": 0.188751220703125, "step": 13002 }, { "epoch": 0.8789374070569149, "grad_norm": 1.4060574769973755, "learning_rate": 1.1503453448790018e-06, "loss": 0.27203369140625, "step": 13003 }, { "epoch": 0.8790050020278491, "grad_norm": 1.6154628992080688, "learning_rate": 1.1490784796379982e-06, "loss": 0.25115966796875, "step": 13004 }, { "epoch": 0.8790725969987833, "grad_norm": 2.4720778465270996, "learning_rate": 1.1478122845907152e-06, "loss": 0.35430908203125, "step": 13005 }, { "epoch": 0.8791401919697175, "grad_norm": 1.0106393098831177, "learning_rate": 1.1465467597984203e-06, "loss": 0.2459716796875, "step": 13006 }, { "epoch": 0.8792077869406516, "grad_norm": 3.7389485836029053, "learning_rate": 1.1452819053223479e-06, "loss": 0.30950927734375, "step": 13007 }, { "epoch": 0.8792753819115858, "grad_norm": 1.6308625936508179, "learning_rate": 1.1440177212236974e-06, "loss": 0.327301025390625, "step": 13008 }, { "epoch": 0.8793429768825199, "grad_norm": 0.8582924008369446, "learning_rate": 1.1427542075636383e-06, "loss": 0.137054443359375, "step": 13009 }, { "epoch": 0.8794105718534541, "grad_norm": 1.5044817924499512, "learning_rate": 1.1414913644033099e-06, "loss": 0.17430686950683594, "step": 13010 }, { "epoch": 0.8794781668243883, "grad_norm": 1.0565776824951172, "learning_rate": 1.1402291918038133e-06, "loss": 0.212158203125, "step": 13011 }, { "epoch": 0.8795457617953224, "grad_norm": 1.33222496509552, "learning_rate": 1.138967689826218e-06, "loss": 0.294830322265625, "step": 13012 }, { "epoch": 0.8796133567662566, "grad_norm": 1.0783475637435913, "learning_rate": 1.137706858531572e-06, "loss": 0.24127197265625, "step": 13013 }, { "epoch": 0.8796809517371907, "grad_norm": 1.1262441873550415, "learning_rate": 1.136446697980873e-06, "loss": 0.235321044921875, "step": 13014 }, { "epoch": 0.879748546708125, "grad_norm": 1.375251293182373, "learning_rate": 1.1351872082351005e-06, "loss": 0.310943603515625, "step": 13015 }, { "epoch": 0.8798161416790591, "grad_norm": 0.9120282530784607, "learning_rate": 1.133928389355196e-06, "loss": 0.158599853515625, "step": 13016 }, { "epoch": 0.8798837366499932, "grad_norm": 1.0548815727233887, "learning_rate": 1.132670241402069e-06, "loss": 0.19921875, "step": 13017 }, { "epoch": 0.8799513316209274, "grad_norm": 1.3378219604492188, "learning_rate": 1.131412764436594e-06, "loss": 0.2816162109375, "step": 13018 }, { "epoch": 0.8800189265918615, "grad_norm": 1.3750007152557373, "learning_rate": 1.1301559585196176e-06, "loss": 0.2672119140625, "step": 13019 }, { "epoch": 0.8800865215627958, "grad_norm": 1.2385238409042358, "learning_rate": 1.1288998237119524e-06, "loss": 0.281646728515625, "step": 13020 }, { "epoch": 0.8801541165337299, "grad_norm": 0.8801547884941101, "learning_rate": 1.127644360074377e-06, "loss": 0.206878662109375, "step": 13021 }, { "epoch": 0.8802217115046641, "grad_norm": 2.797170639038086, "learning_rate": 1.1263895676676389e-06, "loss": 0.3145751953125, "step": 13022 }, { "epoch": 0.8802893064755982, "grad_norm": 1.0526200532913208, "learning_rate": 1.1251354465524517e-06, "loss": 0.24603271484375, "step": 13023 }, { "epoch": 0.8803569014465323, "grad_norm": 0.8463039398193359, "learning_rate": 1.123881996789498e-06, "loss": 0.216094970703125, "step": 13024 }, { "epoch": 0.8804244964174666, "grad_norm": 1.8602430820465088, "learning_rate": 1.1226292184394265e-06, "loss": 0.29949951171875, "step": 13025 }, { "epoch": 0.8804920913884007, "grad_norm": 1.2751133441925049, "learning_rate": 1.121377111562862e-06, "loss": 0.2276611328125, "step": 13026 }, { "epoch": 0.8805596863593349, "grad_norm": 1.419754981994629, "learning_rate": 1.1201256762203754e-06, "loss": 0.222869873046875, "step": 13027 }, { "epoch": 0.880627281330269, "grad_norm": 0.8404243588447571, "learning_rate": 1.1188749124725305e-06, "loss": 0.221649169921875, "step": 13028 }, { "epoch": 0.8806948763012032, "grad_norm": 1.1049641370773315, "learning_rate": 1.117624820379844e-06, "loss": 0.2604522705078125, "step": 13029 }, { "epoch": 0.8807624712721374, "grad_norm": 1.0637826919555664, "learning_rate": 1.1163754000028004e-06, "loss": 0.211700439453125, "step": 13030 }, { "epoch": 0.8808300662430715, "grad_norm": 1.1368035078048706, "learning_rate": 1.1151266514018582e-06, "loss": 0.1721954345703125, "step": 13031 }, { "epoch": 0.8808976612140057, "grad_norm": 1.0992841720581055, "learning_rate": 1.1138785746374357e-06, "loss": 0.1560516357421875, "step": 13032 }, { "epoch": 0.8809652561849398, "grad_norm": 2.0436649322509766, "learning_rate": 1.1126311697699265e-06, "loss": 0.2845458984375, "step": 13033 }, { "epoch": 0.881032851155874, "grad_norm": 0.726749837398529, "learning_rate": 1.1113844368596854e-06, "loss": 0.1407012939453125, "step": 13034 }, { "epoch": 0.8811004461268082, "grad_norm": 0.940325915813446, "learning_rate": 1.1101383759670376e-06, "loss": 0.1732940673828125, "step": 13035 }, { "epoch": 0.8811680410977424, "grad_norm": 1.6597994565963745, "learning_rate": 1.1088929871522747e-06, "loss": 0.26275634765625, "step": 13036 }, { "epoch": 0.8812356360686765, "grad_norm": 1.3470393419265747, "learning_rate": 1.1076482704756568e-06, "loss": 0.242279052734375, "step": 13037 }, { "epoch": 0.8813032310396106, "grad_norm": 1.0835202932357788, "learning_rate": 1.1064042259974093e-06, "loss": 0.200927734375, "step": 13038 }, { "epoch": 0.8813708260105448, "grad_norm": 1.9262241125106812, "learning_rate": 1.1051608537777342e-06, "loss": 0.3084716796875, "step": 13039 }, { "epoch": 0.881438420981479, "grad_norm": 1.1585251092910767, "learning_rate": 1.1039181538767812e-06, "loss": 0.19287109375, "step": 13040 }, { "epoch": 0.8815060159524132, "grad_norm": 2.0126285552978516, "learning_rate": 1.1026761263546909e-06, "loss": 0.2408447265625, "step": 13041 }, { "epoch": 0.8815736109233473, "grad_norm": 1.2981257438659668, "learning_rate": 1.1014347712715583e-06, "loss": 0.229248046875, "step": 13042 }, { "epoch": 0.8816412058942814, "grad_norm": 0.9217079281806946, "learning_rate": 1.1001940886874407e-06, "loss": 0.1598358154296875, "step": 13043 }, { "epoch": 0.8817088008652156, "grad_norm": 1.0564968585968018, "learning_rate": 1.0989540786623764e-06, "loss": 0.244293212890625, "step": 13044 }, { "epoch": 0.8817763958361498, "grad_norm": 1.9924434423446655, "learning_rate": 1.097714741256366e-06, "loss": 0.311279296875, "step": 13045 }, { "epoch": 0.881843990807084, "grad_norm": 0.6917407512664795, "learning_rate": 1.0964760765293713e-06, "loss": 0.1458587646484375, "step": 13046 }, { "epoch": 0.8819115857780181, "grad_norm": 1.0582385063171387, "learning_rate": 1.0952380845413296e-06, "loss": 0.154632568359375, "step": 13047 }, { "epoch": 0.8819791807489523, "grad_norm": 2.363276243209839, "learning_rate": 1.0940007653521427e-06, "loss": 0.37933349609375, "step": 13048 }, { "epoch": 0.8820467757198864, "grad_norm": 0.5809128880500793, "learning_rate": 1.0927641190216797e-06, "loss": 0.08646392822265625, "step": 13049 }, { "epoch": 0.8821143706908205, "grad_norm": 1.5152251720428467, "learning_rate": 1.0915281456097758e-06, "loss": 0.30450439453125, "step": 13050 }, { "epoch": 0.8821819656617548, "grad_norm": 1.5062569379806519, "learning_rate": 1.090292845176235e-06, "loss": 0.2625579833984375, "step": 13051 }, { "epoch": 0.8822495606326889, "grad_norm": 1.1796579360961914, "learning_rate": 1.089058217780833e-06, "loss": 0.2098846435546875, "step": 13052 }, { "epoch": 0.8823171556036231, "grad_norm": 2.0620267391204834, "learning_rate": 1.087824263483303e-06, "loss": 0.283935546875, "step": 13053 }, { "epoch": 0.8823847505745572, "grad_norm": 0.8060035705566406, "learning_rate": 1.0865909823433546e-06, "loss": 0.11979293823242188, "step": 13054 }, { "epoch": 0.8824523455454915, "grad_norm": 2.2961761951446533, "learning_rate": 1.085358374420663e-06, "loss": 0.3529052734375, "step": 13055 }, { "epoch": 0.8825199405164256, "grad_norm": 1.1534407138824463, "learning_rate": 1.084126439774864e-06, "loss": 0.26483154296875, "step": 13056 }, { "epoch": 0.8825875354873597, "grad_norm": 1.2264543771743774, "learning_rate": 1.0828951784655716e-06, "loss": 0.232635498046875, "step": 13057 }, { "epoch": 0.8826551304582939, "grad_norm": 1.961803674697876, "learning_rate": 1.0816645905523598e-06, "loss": 0.240570068359375, "step": 13058 }, { "epoch": 0.882722725429228, "grad_norm": 1.1666204929351807, "learning_rate": 1.0804346760947742e-06, "loss": 0.2994384765625, "step": 13059 }, { "epoch": 0.8827903204001623, "grad_norm": 0.9908668398857117, "learning_rate": 1.0792054351523223e-06, "loss": 0.15955352783203125, "step": 13060 }, { "epoch": 0.8828579153710964, "grad_norm": 1.5757845640182495, "learning_rate": 1.077976867784483e-06, "loss": 0.21539306640625, "step": 13061 }, { "epoch": 0.8829255103420306, "grad_norm": 0.838152289390564, "learning_rate": 1.0767489740507008e-06, "loss": 0.1589508056640625, "step": 13062 }, { "epoch": 0.8829931053129647, "grad_norm": 1.1920469999313354, "learning_rate": 1.0755217540103978e-06, "loss": 0.294677734375, "step": 13063 }, { "epoch": 0.8830607002838988, "grad_norm": 1.5123108625411987, "learning_rate": 1.0742952077229417e-06, "loss": 0.20891952514648438, "step": 13064 }, { "epoch": 0.883128295254833, "grad_norm": 1.398177146911621, "learning_rate": 1.07306933524769e-06, "loss": 0.26971435546875, "step": 13065 }, { "epoch": 0.8831958902257672, "grad_norm": 1.245923638343811, "learning_rate": 1.0718441366439535e-06, "loss": 0.2740478515625, "step": 13066 }, { "epoch": 0.8832634851967014, "grad_norm": 1.5941665172576904, "learning_rate": 1.0706196119710132e-06, "loss": 0.25970458984375, "step": 13067 }, { "epoch": 0.8833310801676355, "grad_norm": 1.5199843645095825, "learning_rate": 1.0693957612881283e-06, "loss": 0.276123046875, "step": 13068 }, { "epoch": 0.8833986751385697, "grad_norm": 1.715487003326416, "learning_rate": 1.0681725846545033e-06, "loss": 0.220428466796875, "step": 13069 }, { "epoch": 0.8834662701095038, "grad_norm": 1.2931991815567017, "learning_rate": 1.066950082129332e-06, "loss": 0.278717041015625, "step": 13070 }, { "epoch": 0.883533865080438, "grad_norm": 0.6014288663864136, "learning_rate": 1.0657282537717661e-06, "loss": 0.08516311645507812, "step": 13071 }, { "epoch": 0.8836014600513722, "grad_norm": 1.6066396236419678, "learning_rate": 1.0645070996409229e-06, "loss": 0.268402099609375, "step": 13072 }, { "epoch": 0.8836690550223063, "grad_norm": 1.1062990427017212, "learning_rate": 1.0632866197958886e-06, "loss": 0.1868896484375, "step": 13073 }, { "epoch": 0.8837366499932405, "grad_norm": 1.1840554475784302, "learning_rate": 1.0620668142957207e-06, "loss": 0.2014007568359375, "step": 13074 }, { "epoch": 0.8838042449641746, "grad_norm": 0.6287338137626648, "learning_rate": 1.0608476831994374e-06, "loss": 0.11016845703125, "step": 13075 }, { "epoch": 0.8838718399351089, "grad_norm": 1.1311414241790771, "learning_rate": 1.0596292265660329e-06, "loss": 0.193695068359375, "step": 13076 }, { "epoch": 0.883939434906043, "grad_norm": 0.9067625403404236, "learning_rate": 1.0584114444544602e-06, "loss": 0.2001953125, "step": 13077 }, { "epoch": 0.8840070298769771, "grad_norm": 1.1356009244918823, "learning_rate": 1.0571943369236403e-06, "loss": 0.19482421875, "step": 13078 }, { "epoch": 0.8840746248479113, "grad_norm": 1.505509614944458, "learning_rate": 1.0559779040324712e-06, "loss": 0.287445068359375, "step": 13079 }, { "epoch": 0.8841422198188454, "grad_norm": 2.0246357917785645, "learning_rate": 1.0547621458398061e-06, "loss": 0.247833251953125, "step": 13080 }, { "epoch": 0.8842098147897797, "grad_norm": 1.526551365852356, "learning_rate": 1.0535470624044758e-06, "loss": 0.2872314453125, "step": 13081 }, { "epoch": 0.8842774097607138, "grad_norm": 0.6243298649787903, "learning_rate": 1.052332653785265e-06, "loss": 0.11290359497070312, "step": 13082 }, { "epoch": 0.884345004731648, "grad_norm": 0.830863893032074, "learning_rate": 1.0511189200409439e-06, "loss": 0.1514739990234375, "step": 13083 }, { "epoch": 0.8844125997025821, "grad_norm": 1.1879611015319824, "learning_rate": 1.0499058612302366e-06, "loss": 0.264556884765625, "step": 13084 }, { "epoch": 0.8844801946735162, "grad_norm": 0.8663718104362488, "learning_rate": 1.0486934774118378e-06, "loss": 0.18145751953125, "step": 13085 }, { "epoch": 0.8845477896444505, "grad_norm": 1.268190622329712, "learning_rate": 1.047481768644411e-06, "loss": 0.179290771484375, "step": 13086 }, { "epoch": 0.8846153846153846, "grad_norm": 1.2666012048721313, "learning_rate": 1.0462707349865875e-06, "loss": 0.27044677734375, "step": 13087 }, { "epoch": 0.8846829795863188, "grad_norm": 1.1393463611602783, "learning_rate": 1.0450603764969585e-06, "loss": 0.1766204833984375, "step": 13088 }, { "epoch": 0.8847505745572529, "grad_norm": 2.169637680053711, "learning_rate": 1.0438506932340992e-06, "loss": 0.347564697265625, "step": 13089 }, { "epoch": 0.8848181695281871, "grad_norm": 0.7886461615562439, "learning_rate": 1.042641685256534e-06, "loss": 0.1848907470703125, "step": 13090 }, { "epoch": 0.8848857644991213, "grad_norm": 1.2646743059158325, "learning_rate": 1.0414333526227598e-06, "loss": 0.217071533203125, "step": 13091 }, { "epoch": 0.8849533594700554, "grad_norm": 1.5616662502288818, "learning_rate": 1.0402256953912531e-06, "loss": 0.2850341796875, "step": 13092 }, { "epoch": 0.8850209544409896, "grad_norm": 1.189444899559021, "learning_rate": 1.0390187136204389e-06, "loss": 0.2123260498046875, "step": 13093 }, { "epoch": 0.8850885494119237, "grad_norm": 0.9654221534729004, "learning_rate": 1.0378124073687251e-06, "loss": 0.138702392578125, "step": 13094 }, { "epoch": 0.885156144382858, "grad_norm": 2.5011346340179443, "learning_rate": 1.0366067766944721e-06, "loss": 0.31982421875, "step": 13095 }, { "epoch": 0.8852237393537921, "grad_norm": 1.4679841995239258, "learning_rate": 1.035401821656023e-06, "loss": 0.3154296875, "step": 13096 }, { "epoch": 0.8852913343247263, "grad_norm": 2.241182327270508, "learning_rate": 1.0341975423116778e-06, "loss": 0.19721221923828125, "step": 13097 }, { "epoch": 0.8853589292956604, "grad_norm": 0.8317170739173889, "learning_rate": 1.0329939387197086e-06, "loss": 0.17252349853515625, "step": 13098 }, { "epoch": 0.8854265242665945, "grad_norm": 1.2725881338119507, "learning_rate": 1.0317910109383533e-06, "loss": 0.1580810546875, "step": 13099 }, { "epoch": 0.8854941192375287, "grad_norm": 1.3691222667694092, "learning_rate": 1.0305887590258156e-06, "loss": 0.2947998046875, "step": 13100 }, { "epoch": 0.8855617142084629, "grad_norm": 1.694573163986206, "learning_rate": 1.029387183040269e-06, "loss": 0.326324462890625, "step": 13101 }, { "epoch": 0.8856293091793971, "grad_norm": 1.1339977979660034, "learning_rate": 1.028186283039852e-06, "loss": 0.2253265380859375, "step": 13102 }, { "epoch": 0.8856969041503312, "grad_norm": 1.15224027633667, "learning_rate": 1.026986059082673e-06, "loss": 0.1588592529296875, "step": 13103 }, { "epoch": 0.8857644991212654, "grad_norm": 1.3297585248947144, "learning_rate": 1.025786511226804e-06, "loss": 0.254669189453125, "step": 13104 }, { "epoch": 0.8858320940921995, "grad_norm": 2.056969404220581, "learning_rate": 1.0245876395302916e-06, "loss": 0.2460784912109375, "step": 13105 }, { "epoch": 0.8858996890631337, "grad_norm": 1.3409929275512695, "learning_rate": 1.0233894440511365e-06, "loss": 0.3070068359375, "step": 13106 }, { "epoch": 0.8859672840340679, "grad_norm": 1.2405202388763428, "learning_rate": 1.0221919248473238e-06, "loss": 0.20068359375, "step": 13107 }, { "epoch": 0.886034879005002, "grad_norm": 1.615661382675171, "learning_rate": 1.0209950819767921e-06, "loss": 0.21575927734375, "step": 13108 }, { "epoch": 0.8861024739759362, "grad_norm": 1.2095329761505127, "learning_rate": 1.0197989154974518e-06, "loss": 0.222625732421875, "step": 13109 }, { "epoch": 0.8861700689468703, "grad_norm": 1.1985619068145752, "learning_rate": 1.0186034254671833e-06, "loss": 0.2132568359375, "step": 13110 }, { "epoch": 0.8862376639178046, "grad_norm": 1.4172379970550537, "learning_rate": 1.0174086119438304e-06, "loss": 0.1944122314453125, "step": 13111 }, { "epoch": 0.8863052588887387, "grad_norm": 0.7601717710494995, "learning_rate": 1.0162144749852049e-06, "loss": 0.1617431640625, "step": 13112 }, { "epoch": 0.8863728538596728, "grad_norm": 1.5293136835098267, "learning_rate": 1.0150210146490873e-06, "loss": 0.242828369140625, "step": 13113 }, { "epoch": 0.886440448830607, "grad_norm": 1.6916866302490234, "learning_rate": 1.0138282309932252e-06, "loss": 0.300750732421875, "step": 13114 }, { "epoch": 0.8865080438015411, "grad_norm": 1.217377781867981, "learning_rate": 1.0126361240753317e-06, "loss": 0.18585205078125, "step": 13115 }, { "epoch": 0.8865756387724754, "grad_norm": 1.0316789150238037, "learning_rate": 1.0114446939530897e-06, "loss": 0.18316650390625, "step": 13116 }, { "epoch": 0.8866432337434095, "grad_norm": 1.071345567703247, "learning_rate": 1.0102539406841444e-06, "loss": 0.211181640625, "step": 13117 }, { "epoch": 0.8867108287143437, "grad_norm": 0.9729219079017639, "learning_rate": 1.0090638643261195e-06, "loss": 0.15948486328125, "step": 13118 }, { "epoch": 0.8867784236852778, "grad_norm": 1.1171642541885376, "learning_rate": 1.0078744649365879e-06, "loss": 0.22930908203125, "step": 13119 }, { "epoch": 0.8868460186562119, "grad_norm": 0.8927968144416809, "learning_rate": 1.0066857425731097e-06, "loss": 0.1524505615234375, "step": 13120 }, { "epoch": 0.8869136136271462, "grad_norm": 1.568305253982544, "learning_rate": 1.005497697293199e-06, "loss": 0.28240966796875, "step": 13121 }, { "epoch": 0.8869812085980803, "grad_norm": 1.528802752494812, "learning_rate": 1.00431032915434e-06, "loss": 0.29534912109375, "step": 13122 }, { "epoch": 0.8870488035690145, "grad_norm": 0.6226051449775696, "learning_rate": 1.0031236382139846e-06, "loss": 0.10589599609375, "step": 13123 }, { "epoch": 0.8871163985399486, "grad_norm": 2.435300827026367, "learning_rate": 1.0019376245295542e-06, "loss": 0.33245849609375, "step": 13124 }, { "epoch": 0.8871839935108828, "grad_norm": 1.7864371538162231, "learning_rate": 1.000752288158434e-06, "loss": 0.2747802734375, "step": 13125 }, { "epoch": 0.887251588481817, "grad_norm": 1.5107674598693848, "learning_rate": 9.995676291579786e-07, "loss": 0.251434326171875, "step": 13126 }, { "epoch": 0.8873191834527511, "grad_norm": 2.0390243530273438, "learning_rate": 9.983836475855102e-07, "loss": 0.33648681640625, "step": 13127 }, { "epoch": 0.8873867784236853, "grad_norm": 1.2592988014221191, "learning_rate": 9.972003434983146e-07, "loss": 0.21612548828125, "step": 13128 }, { "epoch": 0.8874543733946194, "grad_norm": 1.0366798639297485, "learning_rate": 9.960177169536494e-07, "loss": 0.1984710693359375, "step": 13129 }, { "epoch": 0.8875219683655536, "grad_norm": 1.3586304187774658, "learning_rate": 9.948357680087356e-07, "loss": 0.236907958984375, "step": 13130 }, { "epoch": 0.8875895633364878, "grad_norm": 1.2835696935653687, "learning_rate": 9.936544967207672e-07, "loss": 0.278594970703125, "step": 13131 }, { "epoch": 0.887657158307422, "grad_norm": 0.9328903555870056, "learning_rate": 9.924739031468955e-07, "loss": 0.208221435546875, "step": 13132 }, { "epoch": 0.8877247532783561, "grad_norm": 1.2017768621444702, "learning_rate": 9.912939873442495e-07, "loss": 0.24420166015625, "step": 13133 }, { "epoch": 0.8877923482492902, "grad_norm": 1.2206342220306396, "learning_rate": 9.901147493699203e-07, "loss": 0.2121429443359375, "step": 13134 }, { "epoch": 0.8878599432202244, "grad_norm": 1.3887425661087036, "learning_rate": 9.889361892809672e-07, "loss": 0.22119140625, "step": 13135 }, { "epoch": 0.8879275381911585, "grad_norm": 1.5792704820632935, "learning_rate": 9.877583071344133e-07, "loss": 0.247467041015625, "step": 13136 }, { "epoch": 0.8879951331620928, "grad_norm": 0.7264682054519653, "learning_rate": 9.86581102987254e-07, "loss": 0.11353302001953125, "step": 13137 }, { "epoch": 0.8880627281330269, "grad_norm": 0.7186653017997742, "learning_rate": 9.854045768964493e-07, "loss": 0.11908721923828125, "step": 13138 }, { "epoch": 0.8881303231039611, "grad_norm": 1.653232216835022, "learning_rate": 9.842287289189283e-07, "loss": 0.2164154052734375, "step": 13139 }, { "epoch": 0.8881979180748952, "grad_norm": 1.9633311033248901, "learning_rate": 9.83053559111584e-07, "loss": 0.29425048828125, "step": 13140 }, { "epoch": 0.8882655130458293, "grad_norm": 1.7478479146957397, "learning_rate": 9.818790675312778e-07, "loss": 0.24310302734375, "step": 13141 }, { "epoch": 0.8883331080167636, "grad_norm": 0.9310465455055237, "learning_rate": 9.807052542348422e-07, "loss": 0.2400054931640625, "step": 13142 }, { "epoch": 0.8884007029876977, "grad_norm": 1.2602487802505493, "learning_rate": 9.79532119279067e-07, "loss": 0.26953125, "step": 13143 }, { "epoch": 0.8884682979586319, "grad_norm": 2.0661795139312744, "learning_rate": 9.783596627207264e-07, "loss": 0.20513916015625, "step": 13144 }, { "epoch": 0.888535892929566, "grad_norm": 0.6953306198120117, "learning_rate": 9.771878846165388e-07, "loss": 0.13359832763671875, "step": 13145 }, { "epoch": 0.8886034879005003, "grad_norm": 0.9855079054832458, "learning_rate": 9.760167850232122e-07, "loss": 0.1206207275390625, "step": 13146 }, { "epoch": 0.8886710828714344, "grad_norm": 2.4506001472473145, "learning_rate": 9.748463639974075e-07, "loss": 0.38873291015625, "step": 13147 }, { "epoch": 0.8887386778423685, "grad_norm": 0.7281376719474792, "learning_rate": 9.736766215957565e-07, "loss": 0.0836334228515625, "step": 13148 }, { "epoch": 0.8888062728133027, "grad_norm": 0.8830718398094177, "learning_rate": 9.725075578748588e-07, "loss": 0.178375244140625, "step": 13149 }, { "epoch": 0.8888738677842368, "grad_norm": 0.7929213047027588, "learning_rate": 9.713391728912823e-07, "loss": 0.15338134765625, "step": 13150 }, { "epoch": 0.888941462755171, "grad_norm": 1.7056515216827393, "learning_rate": 9.701714667015604e-07, "loss": 0.200286865234375, "step": 13151 }, { "epoch": 0.8890090577261052, "grad_norm": 0.8601817488670349, "learning_rate": 9.690044393621928e-07, "loss": 0.12408447265625, "step": 13152 }, { "epoch": 0.8890766526970394, "grad_norm": 1.1011171340942383, "learning_rate": 9.678380909296491e-07, "loss": 0.190460205078125, "step": 13153 }, { "epoch": 0.8891442476679735, "grad_norm": 1.6360242366790771, "learning_rate": 9.666724214603594e-07, "loss": 0.2944183349609375, "step": 13154 }, { "epoch": 0.8892118426389076, "grad_norm": 0.7848495244979858, "learning_rate": 9.65507431010737e-07, "loss": 0.12891387939453125, "step": 13155 }, { "epoch": 0.8892794376098418, "grad_norm": 1.6809664964675903, "learning_rate": 9.643431196371382e-07, "loss": 0.1905517578125, "step": 13156 }, { "epoch": 0.889347032580776, "grad_norm": 2.406390905380249, "learning_rate": 9.631794873959131e-07, "loss": 0.28173828125, "step": 13157 }, { "epoch": 0.8894146275517102, "grad_norm": 1.4973338842391968, "learning_rate": 9.620165343433517e-07, "loss": 0.30792236328125, "step": 13158 }, { "epoch": 0.8894822225226443, "grad_norm": 2.1456165313720703, "learning_rate": 9.60854260535734e-07, "loss": 0.28851318359375, "step": 13159 }, { "epoch": 0.8895498174935785, "grad_norm": 1.3493328094482422, "learning_rate": 9.596926660293e-07, "loss": 0.251708984375, "step": 13160 }, { "epoch": 0.8896174124645126, "grad_norm": 1.313058853149414, "learning_rate": 9.585317508802445e-07, "loss": 0.30523681640625, "step": 13161 }, { "epoch": 0.8896850074354468, "grad_norm": 1.4751760959625244, "learning_rate": 9.573715151447476e-07, "loss": 0.319244384765625, "step": 13162 }, { "epoch": 0.889752602406381, "grad_norm": 1.2099183797836304, "learning_rate": 9.562119588789481e-07, "loss": 0.23211669921875, "step": 13163 }, { "epoch": 0.8898201973773151, "grad_norm": 1.1246652603149414, "learning_rate": 9.55053082138951e-07, "loss": 0.1591796875, "step": 13164 }, { "epoch": 0.8898877923482493, "grad_norm": 1.1841474771499634, "learning_rate": 9.53894884980831e-07, "loss": 0.18243408203125, "step": 13165 }, { "epoch": 0.8899553873191834, "grad_norm": 1.105563998222351, "learning_rate": 9.527373674606271e-07, "loss": 0.2406463623046875, "step": 13166 }, { "epoch": 0.8900229822901177, "grad_norm": 1.1092171669006348, "learning_rate": 9.515805296343461e-07, "loss": 0.26995849609375, "step": 13167 }, { "epoch": 0.8900905772610518, "grad_norm": 1.261199712753296, "learning_rate": 9.504243715579696e-07, "loss": 0.137786865234375, "step": 13168 }, { "epoch": 0.8901581722319859, "grad_norm": 1.0832699537277222, "learning_rate": 9.492688932874316e-07, "loss": 0.189544677734375, "step": 13169 }, { "epoch": 0.8902257672029201, "grad_norm": 1.0875191688537598, "learning_rate": 9.481140948786487e-07, "loss": 0.2196044921875, "step": 13170 }, { "epoch": 0.8902933621738542, "grad_norm": 1.9737566709518433, "learning_rate": 9.469599763874931e-07, "loss": 0.22754669189453125, "step": 13171 }, { "epoch": 0.8903609571447885, "grad_norm": 1.3094172477722168, "learning_rate": 9.458065378698066e-07, "loss": 0.257415771484375, "step": 13172 }, { "epoch": 0.8904285521157226, "grad_norm": 1.9866830110549927, "learning_rate": 9.446537793814047e-07, "loss": 0.24920654296875, "step": 13173 }, { "epoch": 0.8904961470866567, "grad_norm": 1.5391860008239746, "learning_rate": 9.435017009780594e-07, "loss": 0.2630615234375, "step": 13174 }, { "epoch": 0.8905637420575909, "grad_norm": 1.3557703495025635, "learning_rate": 9.423503027155194e-07, "loss": 0.260284423828125, "step": 13175 }, { "epoch": 0.890631337028525, "grad_norm": 1.1915580034255981, "learning_rate": 9.411995846494953e-07, "loss": 0.207427978515625, "step": 13176 }, { "epoch": 0.8906989319994593, "grad_norm": 0.8704735040664673, "learning_rate": 9.400495468356657e-07, "loss": 0.1458282470703125, "step": 13177 }, { "epoch": 0.8907665269703934, "grad_norm": 1.2182668447494507, "learning_rate": 9.389001893296778e-07, "loss": 0.2703857421875, "step": 13178 }, { "epoch": 0.8908341219413276, "grad_norm": 3.237752676010132, "learning_rate": 9.377515121871438e-07, "loss": 0.3651123046875, "step": 13179 }, { "epoch": 0.8909017169122617, "grad_norm": 0.9258078932762146, "learning_rate": 9.36603515463641e-07, "loss": 0.1528778076171875, "step": 13180 }, { "epoch": 0.8909693118831958, "grad_norm": 1.70054292678833, "learning_rate": 9.354561992147232e-07, "loss": 0.201202392578125, "step": 13181 }, { "epoch": 0.8910369068541301, "grad_norm": 0.7093233466148376, "learning_rate": 9.343095634958976e-07, "loss": 0.1506500244140625, "step": 13182 }, { "epoch": 0.8911045018250642, "grad_norm": 1.0522679090499878, "learning_rate": 9.331636083626516e-07, "loss": 0.1898193359375, "step": 13183 }, { "epoch": 0.8911720967959984, "grad_norm": 1.1066595315933228, "learning_rate": 9.320183338704325e-07, "loss": 0.2318115234375, "step": 13184 }, { "epoch": 0.8912396917669325, "grad_norm": 1.2423434257507324, "learning_rate": 9.308737400746492e-07, "loss": 0.19293212890625, "step": 13185 }, { "epoch": 0.8913072867378667, "grad_norm": 1.8142811059951782, "learning_rate": 9.297298270306942e-07, "loss": 0.23663330078125, "step": 13186 }, { "epoch": 0.8913748817088009, "grad_norm": 1.1265709400177002, "learning_rate": 9.285865947939082e-07, "loss": 0.31597900390625, "step": 13187 }, { "epoch": 0.891442476679735, "grad_norm": 1.556515097618103, "learning_rate": 9.274440434196136e-07, "loss": 0.29827880859375, "step": 13188 }, { "epoch": 0.8915100716506692, "grad_norm": 1.223467469215393, "learning_rate": 9.263021729630927e-07, "loss": 0.2337188720703125, "step": 13189 }, { "epoch": 0.8915776666216033, "grad_norm": 2.4321796894073486, "learning_rate": 9.251609834795966e-07, "loss": 0.41363525390625, "step": 13190 }, { "epoch": 0.8916452615925375, "grad_norm": 1.024183750152588, "learning_rate": 9.240204750243409e-07, "loss": 0.16546630859375, "step": 13191 }, { "epoch": 0.8917128565634717, "grad_norm": 1.280346393585205, "learning_rate": 9.228806476525148e-07, "loss": 0.2384490966796875, "step": 13192 }, { "epoch": 0.8917804515344059, "grad_norm": 1.0076168775558472, "learning_rate": 9.217415014192627e-07, "loss": 0.1883544921875, "step": 13193 }, { "epoch": 0.89184804650534, "grad_norm": 1.3993853330612183, "learning_rate": 9.206030363797153e-07, "loss": 0.262542724609375, "step": 13194 }, { "epoch": 0.8919156414762741, "grad_norm": 0.9776406288146973, "learning_rate": 9.194652525889486e-07, "loss": 0.19781494140625, "step": 13195 }, { "epoch": 0.8919832364472083, "grad_norm": 1.4720466136932373, "learning_rate": 9.183281501020169e-07, "loss": 0.290802001953125, "step": 13196 }, { "epoch": 0.8920508314181425, "grad_norm": 0.9695463180541992, "learning_rate": 9.171917289739462e-07, "loss": 0.19891357421875, "step": 13197 }, { "epoch": 0.8921184263890767, "grad_norm": 1.436156153678894, "learning_rate": 9.160559892597142e-07, "loss": 0.21044921875, "step": 13198 }, { "epoch": 0.8921860213600108, "grad_norm": 1.150054693222046, "learning_rate": 9.149209310142853e-07, "loss": 0.1622772216796875, "step": 13199 }, { "epoch": 0.892253616330945, "grad_norm": 1.3493106365203857, "learning_rate": 9.137865542925738e-07, "loss": 0.246551513671875, "step": 13200 }, { "epoch": 0.8923212113018791, "grad_norm": 0.4599466621875763, "learning_rate": 9.12652859149471e-07, "loss": 0.05722618103027344, "step": 13201 }, { "epoch": 0.8923888062728133, "grad_norm": 1.0362950563430786, "learning_rate": 9.115198456398311e-07, "loss": 0.2030029296875, "step": 13202 }, { "epoch": 0.8924564012437475, "grad_norm": 1.3182718753814697, "learning_rate": 9.103875138184769e-07, "loss": 0.220428466796875, "step": 13203 }, { "epoch": 0.8925239962146816, "grad_norm": 1.0344001054763794, "learning_rate": 9.092558637401966e-07, "loss": 0.191009521484375, "step": 13204 }, { "epoch": 0.8925915911856158, "grad_norm": 1.6860021352767944, "learning_rate": 9.081248954597476e-07, "loss": 0.243682861328125, "step": 13205 }, { "epoch": 0.8926591861565499, "grad_norm": 1.0368088483810425, "learning_rate": 9.069946090318515e-07, "loss": 0.225799560546875, "step": 13206 }, { "epoch": 0.8927267811274842, "grad_norm": 1.4002748727798462, "learning_rate": 9.058650045111994e-07, "loss": 0.24462890625, "step": 13207 }, { "epoch": 0.8927943760984183, "grad_norm": 1.8774197101593018, "learning_rate": 9.047360819524509e-07, "loss": 0.319427490234375, "step": 13208 }, { "epoch": 0.8928619710693524, "grad_norm": 1.8476307392120361, "learning_rate": 9.036078414102239e-07, "loss": 0.295166015625, "step": 13209 }, { "epoch": 0.8929295660402866, "grad_norm": 1.6490230560302734, "learning_rate": 9.024802829391199e-07, "loss": 0.30706787109375, "step": 13210 }, { "epoch": 0.8929971610112207, "grad_norm": 1.648939609527588, "learning_rate": 9.013534065936851e-07, "loss": 0.31927490234375, "step": 13211 }, { "epoch": 0.893064755982155, "grad_norm": 1.0816360712051392, "learning_rate": 9.002272124284561e-07, "loss": 0.1502838134765625, "step": 13212 }, { "epoch": 0.8931323509530891, "grad_norm": 1.6993026733398438, "learning_rate": 8.991017004979174e-07, "loss": 0.2360382080078125, "step": 13213 }, { "epoch": 0.8931999459240233, "grad_norm": 1.157458782196045, "learning_rate": 8.979768708565323e-07, "loss": 0.233306884765625, "step": 13214 }, { "epoch": 0.8932675408949574, "grad_norm": 1.360164761543274, "learning_rate": 8.968527235587253e-07, "loss": 0.26708984375, "step": 13215 }, { "epoch": 0.8933351358658915, "grad_norm": 1.1715480089187622, "learning_rate": 8.9572925865889e-07, "loss": 0.20050048828125, "step": 13216 }, { "epoch": 0.8934027308368258, "grad_norm": 1.5254017114639282, "learning_rate": 8.946064762113859e-07, "loss": 0.194854736328125, "step": 13217 }, { "epoch": 0.8934703258077599, "grad_norm": 1.1015384197235107, "learning_rate": 8.934843762705447e-07, "loss": 0.2422943115234375, "step": 13218 }, { "epoch": 0.8935379207786941, "grad_norm": 1.5379596948623657, "learning_rate": 8.923629588906529e-07, "loss": 0.213714599609375, "step": 13219 }, { "epoch": 0.8936055157496282, "grad_norm": 1.8351861238479614, "learning_rate": 8.912422241259771e-07, "loss": 0.3009033203125, "step": 13220 }, { "epoch": 0.8936731107205624, "grad_norm": 0.9580949544906616, "learning_rate": 8.901221720307439e-07, "loss": 0.228759765625, "step": 13221 }, { "epoch": 0.8937407056914966, "grad_norm": 1.610381007194519, "learning_rate": 8.890028026591468e-07, "loss": 0.275054931640625, "step": 13222 }, { "epoch": 0.8938083006624307, "grad_norm": 1.3305511474609375, "learning_rate": 8.878841160653523e-07, "loss": 0.218170166015625, "step": 13223 }, { "epoch": 0.8938758956333649, "grad_norm": 1.3847525119781494, "learning_rate": 8.867661123034837e-07, "loss": 0.265228271484375, "step": 13224 }, { "epoch": 0.893943490604299, "grad_norm": 1.2494333982467651, "learning_rate": 8.856487914276412e-07, "loss": 0.1992340087890625, "step": 13225 }, { "epoch": 0.8940110855752332, "grad_norm": 1.693306803703308, "learning_rate": 8.845321534918866e-07, "loss": 0.2700347900390625, "step": 13226 }, { "epoch": 0.8940786805461673, "grad_norm": 1.598949670791626, "learning_rate": 8.834161985502498e-07, "loss": 0.248291015625, "step": 13227 }, { "epoch": 0.8941462755171016, "grad_norm": 1.144256830215454, "learning_rate": 8.823009266567261e-07, "loss": 0.226898193359375, "step": 13228 }, { "epoch": 0.8942138704880357, "grad_norm": 1.0108705759048462, "learning_rate": 8.811863378652824e-07, "loss": 0.185943603515625, "step": 13229 }, { "epoch": 0.8942814654589698, "grad_norm": 1.0391547679901123, "learning_rate": 8.80072432229847e-07, "loss": 0.162506103515625, "step": 13230 }, { "epoch": 0.894349060429904, "grad_norm": 1.4576101303100586, "learning_rate": 8.78959209804317e-07, "loss": 0.32025146484375, "step": 13231 }, { "epoch": 0.8944166554008381, "grad_norm": 1.8324099779129028, "learning_rate": 8.778466706425592e-07, "loss": 0.2156982421875, "step": 13232 }, { "epoch": 0.8944842503717724, "grad_norm": 2.1970107555389404, "learning_rate": 8.767348147984039e-07, "loss": 0.291656494140625, "step": 13233 }, { "epoch": 0.8945518453427065, "grad_norm": 1.440049648284912, "learning_rate": 8.756236423256514e-07, "loss": 0.305206298828125, "step": 13234 }, { "epoch": 0.8946194403136407, "grad_norm": 1.2322545051574707, "learning_rate": 8.74513153278062e-07, "loss": 0.184326171875, "step": 13235 }, { "epoch": 0.8946870352845748, "grad_norm": 1.4392169713974, "learning_rate": 8.734033477093762e-07, "loss": 0.243377685546875, "step": 13236 }, { "epoch": 0.8947546302555089, "grad_norm": 1.6165457963943481, "learning_rate": 8.722942256732841e-07, "loss": 0.237640380859375, "step": 13237 }, { "epoch": 0.8948222252264432, "grad_norm": 1.6418650150299072, "learning_rate": 8.711857872234597e-07, "loss": 0.240447998046875, "step": 13238 }, { "epoch": 0.8948898201973773, "grad_norm": 0.6700042486190796, "learning_rate": 8.700780324135332e-07, "loss": 0.10789012908935547, "step": 13239 }, { "epoch": 0.8949574151683115, "grad_norm": 2.2340548038482666, "learning_rate": 8.689709612971037e-07, "loss": 0.32012939453125, "step": 13240 }, { "epoch": 0.8950250101392456, "grad_norm": 0.5935867428779602, "learning_rate": 8.678645739277396e-07, "loss": 0.08160400390625, "step": 13241 }, { "epoch": 0.8950926051101799, "grad_norm": 1.4185682535171509, "learning_rate": 8.667588703589735e-07, "loss": 0.26580810546875, "step": 13242 }, { "epoch": 0.895160200081114, "grad_norm": 1.08126699924469, "learning_rate": 8.656538506443074e-07, "loss": 0.164794921875, "step": 13243 }, { "epoch": 0.8952277950520481, "grad_norm": 1.3697364330291748, "learning_rate": 8.645495148372084e-07, "loss": 0.260162353515625, "step": 13244 }, { "epoch": 0.8952953900229823, "grad_norm": 1.2115637063980103, "learning_rate": 8.634458629911124e-07, "loss": 0.210845947265625, "step": 13245 }, { "epoch": 0.8953629849939164, "grad_norm": 1.1833640336990356, "learning_rate": 8.623428951594164e-07, "loss": 0.28369140625, "step": 13246 }, { "epoch": 0.8954305799648506, "grad_norm": 2.0919387340545654, "learning_rate": 8.612406113954962e-07, "loss": 0.365966796875, "step": 13247 }, { "epoch": 0.8954981749357848, "grad_norm": 1.4212558269500732, "learning_rate": 8.601390117526809e-07, "loss": 0.22015380859375, "step": 13248 }, { "epoch": 0.895565769906719, "grad_norm": 1.566315770149231, "learning_rate": 8.590380962842776e-07, "loss": 0.2669677734375, "step": 13249 }, { "epoch": 0.8956333648776531, "grad_norm": 1.6933802366256714, "learning_rate": 8.579378650435487e-07, "loss": 0.32574462890625, "step": 13250 }, { "epoch": 0.8957009598485872, "grad_norm": 1.7327524423599243, "learning_rate": 8.568383180837369e-07, "loss": 0.279571533203125, "step": 13251 }, { "epoch": 0.8957685548195214, "grad_norm": 1.0804860591888428, "learning_rate": 8.557394554580428e-07, "loss": 0.225616455078125, "step": 13252 }, { "epoch": 0.8958361497904556, "grad_norm": 1.8468271493911743, "learning_rate": 8.546412772196372e-07, "loss": 0.2779541015625, "step": 13253 }, { "epoch": 0.8959037447613898, "grad_norm": 1.816606879234314, "learning_rate": 8.535437834216541e-07, "loss": 0.31171417236328125, "step": 13254 }, { "epoch": 0.8959713397323239, "grad_norm": 1.3044346570968628, "learning_rate": 8.52446974117198e-07, "loss": 0.24981689453125, "step": 13255 }, { "epoch": 0.8960389347032581, "grad_norm": 1.234694480895996, "learning_rate": 8.513508493593414e-07, "loss": 0.1965789794921875, "step": 13256 }, { "epoch": 0.8961065296741922, "grad_norm": 1.3191274404525757, "learning_rate": 8.5025540920112e-07, "loss": 0.192108154296875, "step": 13257 }, { "epoch": 0.8961741246451264, "grad_norm": 1.128610372543335, "learning_rate": 8.491606536955382e-07, "loss": 0.138641357421875, "step": 13258 }, { "epoch": 0.8962417196160606, "grad_norm": 1.07121741771698, "learning_rate": 8.480665828955653e-07, "loss": 0.19097900390625, "step": 13259 }, { "epoch": 0.8963093145869947, "grad_norm": 1.3071374893188477, "learning_rate": 8.469731968541439e-07, "loss": 0.258636474609375, "step": 13260 }, { "epoch": 0.8963769095579289, "grad_norm": 1.2709907293319702, "learning_rate": 8.458804956241734e-07, "loss": 0.21240234375, "step": 13261 }, { "epoch": 0.896444504528863, "grad_norm": 1.1574363708496094, "learning_rate": 8.447884792585281e-07, "loss": 0.173492431640625, "step": 13262 }, { "epoch": 0.8965120994997973, "grad_norm": 1.152725338935852, "learning_rate": 8.436971478100475e-07, "loss": 0.228271484375, "step": 13263 }, { "epoch": 0.8965796944707314, "grad_norm": 0.977055549621582, "learning_rate": 8.426065013315377e-07, "loss": 0.21978759765625, "step": 13264 }, { "epoch": 0.8966472894416655, "grad_norm": 1.4310733079910278, "learning_rate": 8.415165398757696e-07, "loss": 0.242095947265625, "step": 13265 }, { "epoch": 0.8967148844125997, "grad_norm": 1.8744688034057617, "learning_rate": 8.404272634954779e-07, "loss": 0.19110107421875, "step": 13266 }, { "epoch": 0.8967824793835338, "grad_norm": 1.2489111423492432, "learning_rate": 8.393386722433755e-07, "loss": 0.28997802734375, "step": 13267 }, { "epoch": 0.8968500743544681, "grad_norm": 1.8347843885421753, "learning_rate": 8.382507661721317e-07, "loss": 0.21337890625, "step": 13268 }, { "epoch": 0.8969176693254022, "grad_norm": 1.1677371263504028, "learning_rate": 8.371635453343862e-07, "loss": 0.199462890625, "step": 13269 }, { "epoch": 0.8969852642963364, "grad_norm": 1.3877906799316406, "learning_rate": 8.360770097827469e-07, "loss": 0.22064208984375, "step": 13270 }, { "epoch": 0.8970528592672705, "grad_norm": 2.6309547424316406, "learning_rate": 8.349911595697852e-07, "loss": 0.337615966796875, "step": 13271 }, { "epoch": 0.8971204542382046, "grad_norm": 1.0588364601135254, "learning_rate": 8.339059947480421e-07, "loss": 0.2386474609375, "step": 13272 }, { "epoch": 0.8971880492091389, "grad_norm": 1.4166043996810913, "learning_rate": 8.328215153700291e-07, "loss": 0.255218505859375, "step": 13273 }, { "epoch": 0.897255644180073, "grad_norm": 1.1713138818740845, "learning_rate": 8.31737721488211e-07, "loss": 0.2262420654296875, "step": 13274 }, { "epoch": 0.8973232391510072, "grad_norm": 1.4691176414489746, "learning_rate": 8.306546131550341e-07, "loss": 0.2335662841796875, "step": 13275 }, { "epoch": 0.8973908341219413, "grad_norm": 1.923295021057129, "learning_rate": 8.295721904229098e-07, "loss": 0.28515625, "step": 13276 }, { "epoch": 0.8974584290928755, "grad_norm": 1.6151546239852905, "learning_rate": 8.284904533442028e-07, "loss": 0.2487030029296875, "step": 13277 }, { "epoch": 0.8975260240638097, "grad_norm": 1.2415035963058472, "learning_rate": 8.274094019712631e-07, "loss": 0.254241943359375, "step": 13278 }, { "epoch": 0.8975936190347438, "grad_norm": 1.3232556581497192, "learning_rate": 8.263290363563902e-07, "loss": 0.1764984130859375, "step": 13279 }, { "epoch": 0.897661214005678, "grad_norm": 1.3647743463516235, "learning_rate": 8.252493565518676e-07, "loss": 0.200042724609375, "step": 13280 }, { "epoch": 0.8977288089766121, "grad_norm": 0.9017248153686523, "learning_rate": 8.241703626099317e-07, "loss": 0.1472015380859375, "step": 13281 }, { "epoch": 0.8977964039475463, "grad_norm": 1.5301103591918945, "learning_rate": 8.230920545827924e-07, "loss": 0.240692138671875, "step": 13282 }, { "epoch": 0.8978639989184805, "grad_norm": 0.9721776843070984, "learning_rate": 8.220144325226248e-07, "loss": 0.12605667114257812, "step": 13283 }, { "epoch": 0.8979315938894147, "grad_norm": 1.0545759201049805, "learning_rate": 8.209374964815702e-07, "loss": 0.203155517578125, "step": 13284 }, { "epoch": 0.8979991888603488, "grad_norm": 1.1506092548370361, "learning_rate": 8.198612465117372e-07, "loss": 0.244842529296875, "step": 13285 }, { "epoch": 0.8980667838312829, "grad_norm": 1.1957789659500122, "learning_rate": 8.187856826652057e-07, "loss": 0.196533203125, "step": 13286 }, { "epoch": 0.8981343788022171, "grad_norm": 1.439151644706726, "learning_rate": 8.177108049940108e-07, "loss": 0.2135162353515625, "step": 13287 }, { "epoch": 0.8982019737731513, "grad_norm": 1.0160140991210938, "learning_rate": 8.166366135501691e-07, "loss": 0.222503662109375, "step": 13288 }, { "epoch": 0.8982695687440855, "grad_norm": 1.531208872795105, "learning_rate": 8.155631083856541e-07, "loss": 0.22607421875, "step": 13289 }, { "epoch": 0.8983371637150196, "grad_norm": 1.6958602666854858, "learning_rate": 8.144902895524042e-07, "loss": 0.24676513671875, "step": 13290 }, { "epoch": 0.8984047586859538, "grad_norm": 1.1747939586639404, "learning_rate": 8.134181571023363e-07, "loss": 0.203887939453125, "step": 13291 }, { "epoch": 0.8984723536568879, "grad_norm": 1.0838432312011719, "learning_rate": 8.123467110873223e-07, "loss": 0.19049072265625, "step": 13292 }, { "epoch": 0.898539948627822, "grad_norm": 1.33686363697052, "learning_rate": 8.112759515592055e-07, "loss": 0.22316741943359375, "step": 13293 }, { "epoch": 0.8986075435987563, "grad_norm": 1.2312328815460205, "learning_rate": 8.10205878569798e-07, "loss": 0.235626220703125, "step": 13294 }, { "epoch": 0.8986751385696904, "grad_norm": 1.4628432989120483, "learning_rate": 8.09136492170875e-07, "loss": 0.1793670654296875, "step": 13295 }, { "epoch": 0.8987427335406246, "grad_norm": 1.2375030517578125, "learning_rate": 8.080677924141816e-07, "loss": 0.247100830078125, "step": 13296 }, { "epoch": 0.8988103285115587, "grad_norm": 1.3916395902633667, "learning_rate": 8.069997793514267e-07, "loss": 0.1905364990234375, "step": 13297 }, { "epoch": 0.898877923482493, "grad_norm": 1.195741057395935, "learning_rate": 8.059324530342854e-07, "loss": 0.208740234375, "step": 13298 }, { "epoch": 0.8989455184534271, "grad_norm": 1.1023378372192383, "learning_rate": 8.048658135144066e-07, "loss": 0.198974609375, "step": 13299 }, { "epoch": 0.8990131134243612, "grad_norm": 1.2630921602249146, "learning_rate": 8.037998608433988e-07, "loss": 0.273468017578125, "step": 13300 }, { "epoch": 0.8990807083952954, "grad_norm": 1.1412028074264526, "learning_rate": 8.02734595072836e-07, "loss": 0.2222900390625, "step": 13301 }, { "epoch": 0.8991483033662295, "grad_norm": 1.1641631126403809, "learning_rate": 8.016700162542683e-07, "loss": 0.252899169921875, "step": 13302 }, { "epoch": 0.8992158983371638, "grad_norm": 0.8693143725395203, "learning_rate": 8.006061244392016e-07, "loss": 0.16046142578125, "step": 13303 }, { "epoch": 0.8992834933080979, "grad_norm": 1.9070916175842285, "learning_rate": 7.995429196791159e-07, "loss": 0.27166748046875, "step": 13304 }, { "epoch": 0.899351088279032, "grad_norm": 1.278223991394043, "learning_rate": 7.984804020254555e-07, "loss": 0.2546234130859375, "step": 13305 }, { "epoch": 0.8994186832499662, "grad_norm": 1.1659412384033203, "learning_rate": 7.974185715296322e-07, "loss": 0.26092529296875, "step": 13306 }, { "epoch": 0.8994862782209003, "grad_norm": 1.5417560338974, "learning_rate": 7.963574282430236e-07, "loss": 0.2452545166015625, "step": 13307 }, { "epoch": 0.8995538731918346, "grad_norm": 1.6683828830718994, "learning_rate": 7.952969722169751e-07, "loss": 0.2940673828125, "step": 13308 }, { "epoch": 0.8996214681627687, "grad_norm": 1.628359079360962, "learning_rate": 7.942372035027939e-07, "loss": 0.301025390625, "step": 13309 }, { "epoch": 0.8996890631337029, "grad_norm": 2.7675421237945557, "learning_rate": 7.931781221517642e-07, "loss": 0.318115234375, "step": 13310 }, { "epoch": 0.899756658104637, "grad_norm": 1.6943563222885132, "learning_rate": 7.921197282151283e-07, "loss": 0.2428131103515625, "step": 13311 }, { "epoch": 0.8998242530755711, "grad_norm": 1.2310099601745605, "learning_rate": 7.910620217440966e-07, "loss": 0.205322265625, "step": 13312 }, { "epoch": 0.8998918480465053, "grad_norm": 1.4418909549713135, "learning_rate": 7.900050027898503e-07, "loss": 0.245452880859375, "step": 13313 }, { "epoch": 0.8999594430174395, "grad_norm": 1.1381635665893555, "learning_rate": 7.889486714035299e-07, "loss": 0.217315673828125, "step": 13314 }, { "epoch": 0.9000270379883737, "grad_norm": 2.4019594192504883, "learning_rate": 7.878930276362545e-07, "loss": 0.307464599609375, "step": 13315 }, { "epoch": 0.9000946329593078, "grad_norm": 1.252685546875, "learning_rate": 7.868380715390932e-07, "loss": 0.2515869140625, "step": 13316 }, { "epoch": 0.900162227930242, "grad_norm": 1.2086411714553833, "learning_rate": 7.857838031631004e-07, "loss": 0.23687744140625, "step": 13317 }, { "epoch": 0.9002298229011761, "grad_norm": 1.1725335121154785, "learning_rate": 7.847302225592834e-07, "loss": 0.18309783935546875, "step": 13318 }, { "epoch": 0.9002974178721103, "grad_norm": 1.0671731233596802, "learning_rate": 7.836773297786231e-07, "loss": 0.1911163330078125, "step": 13319 }, { "epoch": 0.9003650128430445, "grad_norm": 0.7957079410552979, "learning_rate": 7.826251248720639e-07, "loss": 0.13484573364257812, "step": 13320 }, { "epoch": 0.9004326078139786, "grad_norm": 1.0187015533447266, "learning_rate": 7.815736078905167e-07, "loss": 0.231201171875, "step": 13321 }, { "epoch": 0.9005002027849128, "grad_norm": 1.9097623825073242, "learning_rate": 7.805227788848607e-07, "loss": 0.3029937744140625, "step": 13322 }, { "epoch": 0.9005677977558469, "grad_norm": 1.1364268064498901, "learning_rate": 7.794726379059469e-07, "loss": 0.201568603515625, "step": 13323 }, { "epoch": 0.9006353927267812, "grad_norm": 1.1136287450790405, "learning_rate": 7.784231850045815e-07, "loss": 0.2762451171875, "step": 13324 }, { "epoch": 0.9007029876977153, "grad_norm": 0.989788830280304, "learning_rate": 7.773744202315453e-07, "loss": 0.130584716796875, "step": 13325 }, { "epoch": 0.9007705826686494, "grad_norm": 1.0729036331176758, "learning_rate": 7.763263436375845e-07, "loss": 0.23236083984375, "step": 13326 }, { "epoch": 0.9008381776395836, "grad_norm": 1.1529297828674316, "learning_rate": 7.752789552734085e-07, "loss": 0.217437744140625, "step": 13327 }, { "epoch": 0.9009057726105177, "grad_norm": 1.6214545965194702, "learning_rate": 7.742322551897036e-07, "loss": 0.199462890625, "step": 13328 }, { "epoch": 0.900973367581452, "grad_norm": 1.6171362400054932, "learning_rate": 7.731862434371074e-07, "loss": 0.2207489013671875, "step": 13329 }, { "epoch": 0.9010409625523861, "grad_norm": 1.7055896520614624, "learning_rate": 7.721409200662377e-07, "loss": 0.2540283203125, "step": 13330 }, { "epoch": 0.9011085575233203, "grad_norm": 1.7079339027404785, "learning_rate": 7.710962851276726e-07, "loss": 0.24505615234375, "step": 13331 }, { "epoch": 0.9011761524942544, "grad_norm": 0.6803663969039917, "learning_rate": 7.700523386719583e-07, "loss": 0.1070098876953125, "step": 13332 }, { "epoch": 0.9012437474651885, "grad_norm": 1.5447611808776855, "learning_rate": 7.690090807496075e-07, "loss": 0.2886962890625, "step": 13333 }, { "epoch": 0.9013113424361228, "grad_norm": 0.7930761575698853, "learning_rate": 7.679665114110984e-07, "loss": 0.1498565673828125, "step": 13334 }, { "epoch": 0.9013789374070569, "grad_norm": 1.4186139106750488, "learning_rate": 7.669246307068773e-07, "loss": 0.300201416015625, "step": 13335 }, { "epoch": 0.9014465323779911, "grad_norm": 1.7575002908706665, "learning_rate": 7.65883438687357e-07, "loss": 0.25689697265625, "step": 13336 }, { "epoch": 0.9015141273489252, "grad_norm": 0.9748865365982056, "learning_rate": 7.648429354029173e-07, "loss": 0.2042236328125, "step": 13337 }, { "epoch": 0.9015817223198594, "grad_norm": 1.5046035051345825, "learning_rate": 7.638031209039015e-07, "loss": 0.32318115234375, "step": 13338 }, { "epoch": 0.9016493172907936, "grad_norm": 1.1050037145614624, "learning_rate": 7.62763995240629e-07, "loss": 0.238128662109375, "step": 13339 }, { "epoch": 0.9017169122617277, "grad_norm": 1.7847518920898438, "learning_rate": 7.617255584633698e-07, "loss": 0.291534423828125, "step": 13340 }, { "epoch": 0.9017845072326619, "grad_norm": 1.6470568180084229, "learning_rate": 7.606878106223802e-07, "loss": 0.297698974609375, "step": 13341 }, { "epoch": 0.901852102203596, "grad_norm": 1.0156059265136719, "learning_rate": 7.596507517678636e-07, "loss": 0.20703125, "step": 13342 }, { "epoch": 0.9019196971745302, "grad_norm": 2.0852506160736084, "learning_rate": 7.586143819500046e-07, "loss": 0.3665771484375, "step": 13343 }, { "epoch": 0.9019872921454644, "grad_norm": 0.6042240262031555, "learning_rate": 7.575787012189484e-07, "loss": 0.102081298828125, "step": 13344 }, { "epoch": 0.9020548871163986, "grad_norm": 1.1509058475494385, "learning_rate": 7.565437096248078e-07, "loss": 0.217376708984375, "step": 13345 }, { "epoch": 0.9021224820873327, "grad_norm": 0.8329157829284668, "learning_rate": 7.555094072176616e-07, "loss": 0.17718505859375, "step": 13346 }, { "epoch": 0.9021900770582668, "grad_norm": 2.0258100032806396, "learning_rate": 7.544757940475544e-07, "loss": 0.285552978515625, "step": 13347 }, { "epoch": 0.902257672029201, "grad_norm": 1.4348552227020264, "learning_rate": 7.534428701645014e-07, "loss": 0.275421142578125, "step": 13348 }, { "epoch": 0.9023252670001352, "grad_norm": 1.2623647451400757, "learning_rate": 7.524106356184807e-07, "loss": 0.256103515625, "step": 13349 }, { "epoch": 0.9023928619710694, "grad_norm": 1.1739188432693481, "learning_rate": 7.513790904594392e-07, "loss": 0.234771728515625, "step": 13350 }, { "epoch": 0.9024604569420035, "grad_norm": 2.317331075668335, "learning_rate": 7.503482347372853e-07, "loss": 0.34539794921875, "step": 13351 }, { "epoch": 0.9025280519129377, "grad_norm": 1.8050464391708374, "learning_rate": 7.493180685019057e-07, "loss": 0.253662109375, "step": 13352 }, { "epoch": 0.9025956468838718, "grad_norm": 1.1888569593429565, "learning_rate": 7.482885918031374e-07, "loss": 0.23419189453125, "step": 13353 }, { "epoch": 0.902663241854806, "grad_norm": 1.5705890655517578, "learning_rate": 7.472598046908003e-07, "loss": 0.2071533203125, "step": 13354 }, { "epoch": 0.9027308368257402, "grad_norm": 1.6308430433273315, "learning_rate": 7.462317072146713e-07, "loss": 0.253326416015625, "step": 13355 }, { "epoch": 0.9027984317966743, "grad_norm": 1.092383861541748, "learning_rate": 7.45204299424494e-07, "loss": 0.2587890625, "step": 13356 }, { "epoch": 0.9028660267676085, "grad_norm": 1.1423027515411377, "learning_rate": 7.441775813699836e-07, "loss": 0.1755828857421875, "step": 13357 }, { "epoch": 0.9029336217385426, "grad_norm": 1.17097008228302, "learning_rate": 7.431515531008171e-07, "loss": 0.1620635986328125, "step": 13358 }, { "epoch": 0.9030012167094769, "grad_norm": 1.7912325859069824, "learning_rate": 7.421262146666414e-07, "loss": 0.20899200439453125, "step": 13359 }, { "epoch": 0.903068811680411, "grad_norm": 1.299294352531433, "learning_rate": 7.411015661170667e-07, "loss": 0.284454345703125, "step": 13360 }, { "epoch": 0.9031364066513451, "grad_norm": 1.2293941974639893, "learning_rate": 7.400776075016752e-07, "loss": 0.24029541015625, "step": 13361 }, { "epoch": 0.9032040016222793, "grad_norm": 1.9478671550750732, "learning_rate": 7.390543388700088e-07, "loss": 0.304107666015625, "step": 13362 }, { "epoch": 0.9032715965932134, "grad_norm": 1.5647903680801392, "learning_rate": 7.38031760271583e-07, "loss": 0.30792236328125, "step": 13363 }, { "epoch": 0.9033391915641477, "grad_norm": 1.580285906791687, "learning_rate": 7.370098717558716e-07, "loss": 0.283172607421875, "step": 13364 }, { "epoch": 0.9034067865350818, "grad_norm": 1.2670148611068726, "learning_rate": 7.359886733723281e-07, "loss": 0.22784423828125, "step": 13365 }, { "epoch": 0.903474381506016, "grad_norm": 0.9055078625679016, "learning_rate": 7.349681651703549e-07, "loss": 0.15853118896484375, "step": 13366 }, { "epoch": 0.9035419764769501, "grad_norm": 1.350603699684143, "learning_rate": 7.339483471993375e-07, "loss": 0.243255615234375, "step": 13367 }, { "epoch": 0.9036095714478842, "grad_norm": 1.3457236289978027, "learning_rate": 7.329292195086196e-07, "loss": 0.237579345703125, "step": 13368 }, { "epoch": 0.9036771664188185, "grad_norm": 1.212576150894165, "learning_rate": 7.319107821475101e-07, "loss": 0.21844482421875, "step": 13369 }, { "epoch": 0.9037447613897526, "grad_norm": 1.907607913017273, "learning_rate": 7.308930351652932e-07, "loss": 0.27813720703125, "step": 13370 }, { "epoch": 0.9038123563606868, "grad_norm": 0.9033158421516418, "learning_rate": 7.298759786112041e-07, "loss": 0.14106369018554688, "step": 13371 }, { "epoch": 0.9038799513316209, "grad_norm": 1.1364461183547974, "learning_rate": 7.288596125344638e-07, "loss": 0.196044921875, "step": 13372 }, { "epoch": 0.9039475463025551, "grad_norm": 1.7823354005813599, "learning_rate": 7.27843936984246e-07, "loss": 0.2119140625, "step": 13373 }, { "epoch": 0.9040151412734893, "grad_norm": 1.2243776321411133, "learning_rate": 7.268289520096949e-07, "loss": 0.286376953125, "step": 13374 }, { "epoch": 0.9040827362444234, "grad_norm": 1.842909574508667, "learning_rate": 7.258146576599244e-07, "loss": 0.24896240234375, "step": 13375 }, { "epoch": 0.9041503312153576, "grad_norm": 1.1834406852722168, "learning_rate": 7.248010539840105e-07, "loss": 0.2114715576171875, "step": 13376 }, { "epoch": 0.9042179261862917, "grad_norm": 0.9210668206214905, "learning_rate": 7.237881410309971e-07, "loss": 0.1513671875, "step": 13377 }, { "epoch": 0.9042855211572259, "grad_norm": 1.1526108980178833, "learning_rate": 7.227759188498983e-07, "loss": 0.2101287841796875, "step": 13378 }, { "epoch": 0.90435311612816, "grad_norm": 1.0370421409606934, "learning_rate": 7.217643874896868e-07, "loss": 0.2140045166015625, "step": 13379 }, { "epoch": 0.9044207110990943, "grad_norm": 1.006208062171936, "learning_rate": 7.207535469993115e-07, "loss": 0.1160736083984375, "step": 13380 }, { "epoch": 0.9044883060700284, "grad_norm": 1.987054705619812, "learning_rate": 7.197433974276835e-07, "loss": 0.2107696533203125, "step": 13381 }, { "epoch": 0.9045559010409625, "grad_norm": 1.4554152488708496, "learning_rate": 7.187339388236735e-07, "loss": 0.19628143310546875, "step": 13382 }, { "epoch": 0.9046234960118967, "grad_norm": 1.6528582572937012, "learning_rate": 7.177251712361326e-07, "loss": 0.31097412109375, "step": 13383 }, { "epoch": 0.9046910909828308, "grad_norm": 1.3417237997055054, "learning_rate": 7.167170947138685e-07, "loss": 0.24786376953125, "step": 13384 }, { "epoch": 0.9047586859537651, "grad_norm": 1.3352130651474, "learning_rate": 7.15709709305657e-07, "loss": 0.2750091552734375, "step": 13385 }, { "epoch": 0.9048262809246992, "grad_norm": 1.513014793395996, "learning_rate": 7.147030150602424e-07, "loss": 0.25433349609375, "step": 13386 }, { "epoch": 0.9048938758956334, "grad_norm": 1.16874098777771, "learning_rate": 7.136970120263375e-07, "loss": 0.190277099609375, "step": 13387 }, { "epoch": 0.9049614708665675, "grad_norm": 0.9134616851806641, "learning_rate": 7.126917002526151e-07, "loss": 0.1706085205078125, "step": 13388 }, { "epoch": 0.9050290658375016, "grad_norm": 1.216539978981018, "learning_rate": 7.116870797877195e-07, "loss": 0.14361572265625, "step": 13389 }, { "epoch": 0.9050966608084359, "grad_norm": 1.7025858163833618, "learning_rate": 7.106831506802602e-07, "loss": 0.29833984375, "step": 13390 }, { "epoch": 0.90516425577937, "grad_norm": 1.0582785606384277, "learning_rate": 7.096799129788184e-07, "loss": 0.14691162109375, "step": 13391 }, { "epoch": 0.9052318507503042, "grad_norm": 1.3747003078460693, "learning_rate": 7.086773667319285e-07, "loss": 0.316925048828125, "step": 13392 }, { "epoch": 0.9052994457212383, "grad_norm": 1.1233057975769043, "learning_rate": 7.076755119881067e-07, "loss": 0.17095947265625, "step": 13393 }, { "epoch": 0.9053670406921726, "grad_norm": 1.4096424579620361, "learning_rate": 7.066743487958278e-07, "loss": 0.2534027099609375, "step": 13394 }, { "epoch": 0.9054346356631067, "grad_norm": 2.4402074813842773, "learning_rate": 7.056738772035293e-07, "loss": 0.24468994140625, "step": 13395 }, { "epoch": 0.9055022306340408, "grad_norm": 1.6047029495239258, "learning_rate": 7.046740972596277e-07, "loss": 0.25067138671875, "step": 13396 }, { "epoch": 0.905569825604975, "grad_norm": 1.2363389730453491, "learning_rate": 7.036750090124927e-07, "loss": 0.29962158203125, "step": 13397 }, { "epoch": 0.9056374205759091, "grad_norm": 1.575453519821167, "learning_rate": 7.026766125104706e-07, "loss": 0.283905029296875, "step": 13398 }, { "epoch": 0.9057050155468434, "grad_norm": 1.1290251016616821, "learning_rate": 7.016789078018676e-07, "loss": 0.24395751953125, "step": 13399 }, { "epoch": 0.9057726105177775, "grad_norm": 0.6962059736251831, "learning_rate": 7.006818949349586e-07, "loss": 0.1121673583984375, "step": 13400 }, { "epoch": 0.9058402054887117, "grad_norm": 1.2238365411758423, "learning_rate": 6.99685573957985e-07, "loss": 0.31158447265625, "step": 13401 }, { "epoch": 0.9059078004596458, "grad_norm": 1.01006281375885, "learning_rate": 6.98689944919158e-07, "loss": 0.167510986328125, "step": 13402 }, { "epoch": 0.9059753954305799, "grad_norm": 1.1166701316833496, "learning_rate": 6.976950078666478e-07, "loss": 0.200958251953125, "step": 13403 }, { "epoch": 0.9060429904015141, "grad_norm": 1.4644328355789185, "learning_rate": 6.967007628486022e-07, "loss": 0.30230712890625, "step": 13404 }, { "epoch": 0.9061105853724483, "grad_norm": 1.188002586364746, "learning_rate": 6.957072099131229e-07, "loss": 0.217803955078125, "step": 13405 }, { "epoch": 0.9061781803433825, "grad_norm": 1.4077459573745728, "learning_rate": 6.947143491082846e-07, "loss": 0.29559326171875, "step": 13406 }, { "epoch": 0.9062457753143166, "grad_norm": 0.82663893699646, "learning_rate": 6.937221804821342e-07, "loss": 0.1175384521484375, "step": 13407 }, { "epoch": 0.9063133702852508, "grad_norm": 1.4408605098724365, "learning_rate": 6.927307040826697e-07, "loss": 0.27099609375, "step": 13408 }, { "epoch": 0.906380965256185, "grad_norm": 1.697624683380127, "learning_rate": 6.917399199578711e-07, "loss": 0.275177001953125, "step": 13409 }, { "epoch": 0.9064485602271191, "grad_norm": 0.9851111173629761, "learning_rate": 6.907498281556784e-07, "loss": 0.1336822509765625, "step": 13410 }, { "epoch": 0.9065161551980533, "grad_norm": 0.8850666284561157, "learning_rate": 6.897604287239967e-07, "loss": 0.186737060546875, "step": 13411 }, { "epoch": 0.9065837501689874, "grad_norm": 1.43946373462677, "learning_rate": 6.887717217106993e-07, "loss": 0.1848907470703125, "step": 13412 }, { "epoch": 0.9066513451399216, "grad_norm": 2.199091911315918, "learning_rate": 6.877837071636278e-07, "loss": 0.33319091796875, "step": 13413 }, { "epoch": 0.9067189401108557, "grad_norm": 1.2674834728240967, "learning_rate": 6.867963851305842e-07, "loss": 0.27899169921875, "step": 13414 }, { "epoch": 0.90678653508179, "grad_norm": 1.0458500385284424, "learning_rate": 6.858097556593484e-07, "loss": 0.1964569091796875, "step": 13415 }, { "epoch": 0.9068541300527241, "grad_norm": 1.2705531120300293, "learning_rate": 6.848238187976541e-07, "loss": 0.26275634765625, "step": 13416 }, { "epoch": 0.9069217250236582, "grad_norm": 1.390294075012207, "learning_rate": 6.838385745932097e-07, "loss": 0.2791748046875, "step": 13417 }, { "epoch": 0.9069893199945924, "grad_norm": 1.558625340461731, "learning_rate": 6.828540230936853e-07, "loss": 0.234893798828125, "step": 13418 }, { "epoch": 0.9070569149655265, "grad_norm": 1.4551029205322266, "learning_rate": 6.818701643467179e-07, "loss": 0.24298095703125, "step": 13419 }, { "epoch": 0.9071245099364608, "grad_norm": 0.8569836020469666, "learning_rate": 6.808869983999194e-07, "loss": 0.14205551147460938, "step": 13420 }, { "epoch": 0.9071921049073949, "grad_norm": 1.1025243997573853, "learning_rate": 6.799045253008551e-07, "loss": 0.2325439453125, "step": 13421 }, { "epoch": 0.9072596998783291, "grad_norm": 0.9583538174629211, "learning_rate": 6.789227450970653e-07, "loss": 0.143035888671875, "step": 13422 }, { "epoch": 0.9073272948492632, "grad_norm": 2.2859103679656982, "learning_rate": 6.77941657836057e-07, "loss": 0.2974090576171875, "step": 13423 }, { "epoch": 0.9073948898201973, "grad_norm": 1.5801849365234375, "learning_rate": 6.769612635652989e-07, "loss": 0.18114471435546875, "step": 13424 }, { "epoch": 0.9074624847911316, "grad_norm": 0.829196035861969, "learning_rate": 6.759815623322279e-07, "loss": 0.1620635986328125, "step": 13425 }, { "epoch": 0.9075300797620657, "grad_norm": 1.0443439483642578, "learning_rate": 6.750025541842497e-07, "loss": 0.233551025390625, "step": 13426 }, { "epoch": 0.9075976747329999, "grad_norm": 1.195239782333374, "learning_rate": 6.740242391687313e-07, "loss": 0.24053955078125, "step": 13427 }, { "epoch": 0.907665269703934, "grad_norm": 1.323538899421692, "learning_rate": 6.73046617333018e-07, "loss": 0.17766571044921875, "step": 13428 }, { "epoch": 0.9077328646748682, "grad_norm": 1.4552441835403442, "learning_rate": 6.720696887244055e-07, "loss": 0.224945068359375, "step": 13429 }, { "epoch": 0.9078004596458024, "grad_norm": 1.4698398113250732, "learning_rate": 6.710934533901625e-07, "loss": 0.237518310546875, "step": 13430 }, { "epoch": 0.9078680546167365, "grad_norm": 1.2599706649780273, "learning_rate": 6.70117911377533e-07, "loss": 0.216094970703125, "step": 13431 }, { "epoch": 0.9079356495876707, "grad_norm": 1.455686330795288, "learning_rate": 6.691430627337126e-07, "loss": 0.29254150390625, "step": 13432 }, { "epoch": 0.9080032445586048, "grad_norm": 0.8865966200828552, "learning_rate": 6.681689075058766e-07, "loss": 0.1815185546875, "step": 13433 }, { "epoch": 0.908070839529539, "grad_norm": 0.7318434119224548, "learning_rate": 6.671954457411544e-07, "loss": 0.120513916015625, "step": 13434 }, { "epoch": 0.9081384345004732, "grad_norm": 1.5345417261123657, "learning_rate": 6.662226774866531e-07, "loss": 0.2131500244140625, "step": 13435 }, { "epoch": 0.9082060294714073, "grad_norm": 1.1112529039382935, "learning_rate": 6.652506027894384e-07, "loss": 0.213043212890625, "step": 13436 }, { "epoch": 0.9082736244423415, "grad_norm": 1.046384572982788, "learning_rate": 6.642792216965476e-07, "loss": 0.225860595703125, "step": 13437 }, { "epoch": 0.9083412194132756, "grad_norm": 2.258831262588501, "learning_rate": 6.633085342549799e-07, "loss": 0.25689697265625, "step": 13438 }, { "epoch": 0.9084088143842098, "grad_norm": 0.9874386787414551, "learning_rate": 6.623385405117061e-07, "loss": 0.25469970703125, "step": 13439 }, { "epoch": 0.908476409355144, "grad_norm": 1.2086113691329956, "learning_rate": 6.613692405136568e-07, "loss": 0.28204345703125, "step": 13440 }, { "epoch": 0.9085440043260782, "grad_norm": 1.8418831825256348, "learning_rate": 6.604006343077345e-07, "loss": 0.3262939453125, "step": 13441 }, { "epoch": 0.9086115992970123, "grad_norm": 1.0962214469909668, "learning_rate": 6.594327219408086e-07, "loss": 0.180450439453125, "step": 13442 }, { "epoch": 0.9086791942679464, "grad_norm": 1.4889061450958252, "learning_rate": 6.584655034597064e-07, "loss": 0.23565673828125, "step": 13443 }, { "epoch": 0.9087467892388806, "grad_norm": 1.5845532417297363, "learning_rate": 6.574989789112374e-07, "loss": 0.231903076171875, "step": 13444 }, { "epoch": 0.9088143842098148, "grad_norm": 1.1431381702423096, "learning_rate": 6.565331483421588e-07, "loss": 0.23883056640625, "step": 13445 }, { "epoch": 0.908881979180749, "grad_norm": 2.6591906547546387, "learning_rate": 6.555680117992085e-07, "loss": 0.24932861328125, "step": 13446 }, { "epoch": 0.9089495741516831, "grad_norm": 1.2453804016113281, "learning_rate": 6.546035693290858e-07, "loss": 0.1635894775390625, "step": 13447 }, { "epoch": 0.9090171691226173, "grad_norm": 1.6427186727523804, "learning_rate": 6.536398209784567e-07, "loss": 0.27899169921875, "step": 13448 }, { "epoch": 0.9090847640935514, "grad_norm": 1.7737352848052979, "learning_rate": 6.526767667939504e-07, "loss": 0.272918701171875, "step": 13449 }, { "epoch": 0.9091523590644855, "grad_norm": 2.189032554626465, "learning_rate": 6.517144068221664e-07, "loss": 0.23638916015625, "step": 13450 }, { "epoch": 0.9092199540354198, "grad_norm": 1.1738479137420654, "learning_rate": 6.507527411096725e-07, "loss": 0.20693206787109375, "step": 13451 }, { "epoch": 0.9092875490063539, "grad_norm": 1.8594324588775635, "learning_rate": 6.497917697029965e-07, "loss": 0.246917724609375, "step": 13452 }, { "epoch": 0.9093551439772881, "grad_norm": 1.435901403427124, "learning_rate": 6.488314926486377e-07, "loss": 0.3035430908203125, "step": 13453 }, { "epoch": 0.9094227389482222, "grad_norm": 1.5154551267623901, "learning_rate": 6.478719099930608e-07, "loss": 0.265228271484375, "step": 13454 }, { "epoch": 0.9094903339191565, "grad_norm": 1.2944378852844238, "learning_rate": 6.469130217826969e-07, "loss": 0.232452392578125, "step": 13455 }, { "epoch": 0.9095579288900906, "grad_norm": 1.2446868419647217, "learning_rate": 6.459548280639388e-07, "loss": 0.23785400390625, "step": 13456 }, { "epoch": 0.9096255238610247, "grad_norm": 1.4632688760757446, "learning_rate": 6.449973288831562e-07, "loss": 0.2662811279296875, "step": 13457 }, { "epoch": 0.9096931188319589, "grad_norm": 1.5537316799163818, "learning_rate": 6.440405242866721e-07, "loss": 0.228759765625, "step": 13458 }, { "epoch": 0.909760713802893, "grad_norm": 1.2437623739242554, "learning_rate": 6.430844143207892e-07, "loss": 0.25494384765625, "step": 13459 }, { "epoch": 0.9098283087738273, "grad_norm": 1.99257230758667, "learning_rate": 6.421289990317658e-07, "loss": 0.2614898681640625, "step": 13460 }, { "epoch": 0.9098959037447614, "grad_norm": 1.365500569343567, "learning_rate": 6.41174278465833e-07, "loss": 0.28387451171875, "step": 13461 }, { "epoch": 0.9099634987156956, "grad_norm": 1.3732932806015015, "learning_rate": 6.402202526691842e-07, "loss": 0.187164306640625, "step": 13462 }, { "epoch": 0.9100310936866297, "grad_norm": 0.828582227230072, "learning_rate": 6.392669216879837e-07, "loss": 0.1367340087890625, "step": 13463 }, { "epoch": 0.9100986886575638, "grad_norm": 1.0522496700286865, "learning_rate": 6.383142855683566e-07, "loss": 0.240142822265625, "step": 13464 }, { "epoch": 0.910166283628498, "grad_norm": 1.2925645112991333, "learning_rate": 6.373623443563992e-07, "loss": 0.28155517578125, "step": 13465 }, { "epoch": 0.9102338785994322, "grad_norm": 1.24876070022583, "learning_rate": 6.364110980981713e-07, "loss": 0.202301025390625, "step": 13466 }, { "epoch": 0.9103014735703664, "grad_norm": 1.506317377090454, "learning_rate": 6.354605468397013e-07, "loss": 0.3175048828125, "step": 13467 }, { "epoch": 0.9103690685413005, "grad_norm": 0.502865195274353, "learning_rate": 6.345106906269821e-07, "loss": 0.10157012939453125, "step": 13468 }, { "epoch": 0.9104366635122347, "grad_norm": 1.1055785417556763, "learning_rate": 6.335615295059721e-07, "loss": 0.19692230224609375, "step": 13469 }, { "epoch": 0.9105042584831688, "grad_norm": 1.2489819526672363, "learning_rate": 6.326130635226013e-07, "loss": 0.2357177734375, "step": 13470 }, { "epoch": 0.910571853454103, "grad_norm": 1.131369948387146, "learning_rate": 6.316652927227579e-07, "loss": 0.15679168701171875, "step": 13471 }, { "epoch": 0.9106394484250372, "grad_norm": 1.3643646240234375, "learning_rate": 6.307182171523052e-07, "loss": 0.29510498046875, "step": 13472 }, { "epoch": 0.9107070433959713, "grad_norm": 1.3413276672363281, "learning_rate": 6.297718368570665e-07, "loss": 0.26092529296875, "step": 13473 }, { "epoch": 0.9107746383669055, "grad_norm": 1.0070680379867554, "learning_rate": 6.288261518828353e-07, "loss": 0.1673736572265625, "step": 13474 }, { "epoch": 0.9108422333378396, "grad_norm": 2.267245054244995, "learning_rate": 6.278811622753666e-07, "loss": 0.251373291015625, "step": 13475 }, { "epoch": 0.9109098283087739, "grad_norm": 2.3390328884124756, "learning_rate": 6.269368680803872e-07, "loss": 0.2706298828125, "step": 13476 }, { "epoch": 0.910977423279708, "grad_norm": 1.1847257614135742, "learning_rate": 6.259932693435854e-07, "loss": 0.193145751953125, "step": 13477 }, { "epoch": 0.9110450182506421, "grad_norm": 1.0704747438430786, "learning_rate": 6.250503661106233e-07, "loss": 0.2532958984375, "step": 13478 }, { "epoch": 0.9111126132215763, "grad_norm": 0.8631018400192261, "learning_rate": 6.241081584271192e-07, "loss": 0.15142822265625, "step": 13479 }, { "epoch": 0.9111802081925104, "grad_norm": 1.0779709815979004, "learning_rate": 6.231666463386648e-07, "loss": 0.217559814453125, "step": 13480 }, { "epoch": 0.9112478031634447, "grad_norm": 2.0240726470947266, "learning_rate": 6.22225829890819e-07, "loss": 0.30645751953125, "step": 13481 }, { "epoch": 0.9113153981343788, "grad_norm": 0.7707621455192566, "learning_rate": 6.212857091290985e-07, "loss": 0.09666061401367188, "step": 13482 }, { "epoch": 0.911382993105313, "grad_norm": 1.0542317628860474, "learning_rate": 6.203462840989988e-07, "loss": 0.168212890625, "step": 13483 }, { "epoch": 0.9114505880762471, "grad_norm": 1.6520322561264038, "learning_rate": 6.194075548459683e-07, "loss": 0.33123779296875, "step": 13484 }, { "epoch": 0.9115181830471812, "grad_norm": 0.9112411737442017, "learning_rate": 6.184695214154356e-07, "loss": 0.1943359375, "step": 13485 }, { "epoch": 0.9115857780181155, "grad_norm": 1.1392536163330078, "learning_rate": 6.175321838527848e-07, "loss": 0.172576904296875, "step": 13486 }, { "epoch": 0.9116533729890496, "grad_norm": 1.4660784006118774, "learning_rate": 6.165955422033692e-07, "loss": 0.2547607421875, "step": 13487 }, { "epoch": 0.9117209679599838, "grad_norm": 1.7263392210006714, "learning_rate": 6.156595965125111e-07, "loss": 0.2716064453125, "step": 13488 }, { "epoch": 0.9117885629309179, "grad_norm": 1.6279680728912354, "learning_rate": 6.147243468254976e-07, "loss": 0.200225830078125, "step": 13489 }, { "epoch": 0.9118561579018521, "grad_norm": 2.20991587638855, "learning_rate": 6.137897931875824e-07, "loss": 0.2669830322265625, "step": 13490 }, { "epoch": 0.9119237528727863, "grad_norm": 1.2338606119155884, "learning_rate": 6.128559356439828e-07, "loss": 0.246124267578125, "step": 13491 }, { "epoch": 0.9119913478437204, "grad_norm": 2.137059450149536, "learning_rate": 6.119227742398842e-07, "loss": 0.299346923828125, "step": 13492 }, { "epoch": 0.9120589428146546, "grad_norm": 0.7574939131736755, "learning_rate": 6.109903090204405e-07, "loss": 0.121734619140625, "step": 13493 }, { "epoch": 0.9121265377855887, "grad_norm": 1.026802659034729, "learning_rate": 6.100585400307723e-07, "loss": 0.1911163330078125, "step": 13494 }, { "epoch": 0.912194132756523, "grad_norm": 1.31307852268219, "learning_rate": 6.091274673159603e-07, "loss": 0.17662811279296875, "step": 13495 }, { "epoch": 0.9122617277274571, "grad_norm": 0.6876575350761414, "learning_rate": 6.081970909210582e-07, "loss": 0.153045654296875, "step": 13496 }, { "epoch": 0.9123293226983913, "grad_norm": 1.1229851245880127, "learning_rate": 6.072674108910803e-07, "loss": 0.224609375, "step": 13497 }, { "epoch": 0.9123969176693254, "grad_norm": 1.226235270500183, "learning_rate": 6.063384272710154e-07, "loss": 0.207794189453125, "step": 13498 }, { "epoch": 0.9124645126402595, "grad_norm": 0.9771819114685059, "learning_rate": 6.054101401058109e-07, "loss": 0.23626708984375, "step": 13499 }, { "epoch": 0.9125321076111937, "grad_norm": 1.01185142993927, "learning_rate": 6.044825494403794e-07, "loss": 0.1842041015625, "step": 13500 }, { "epoch": 0.9125997025821279, "grad_norm": 1.4363949298858643, "learning_rate": 6.035556553196098e-07, "loss": 0.228790283203125, "step": 13501 }, { "epoch": 0.9126672975530621, "grad_norm": 1.5446035861968994, "learning_rate": 6.026294577883462e-07, "loss": 0.26580810546875, "step": 13502 }, { "epoch": 0.9127348925239962, "grad_norm": 1.824514627456665, "learning_rate": 6.017039568914079e-07, "loss": 0.256134033203125, "step": 13503 }, { "epoch": 0.9128024874949304, "grad_norm": 0.8566840887069702, "learning_rate": 6.007791526735724e-07, "loss": 0.1069793701171875, "step": 13504 }, { "epoch": 0.9128700824658645, "grad_norm": 1.1264050006866455, "learning_rate": 5.998550451795903e-07, "loss": 0.17269134521484375, "step": 13505 }, { "epoch": 0.9129376774367987, "grad_norm": 1.6171808242797852, "learning_rate": 5.989316344541729e-07, "loss": 0.1613006591796875, "step": 13506 }, { "epoch": 0.9130052724077329, "grad_norm": 1.6909022331237793, "learning_rate": 5.980089205420059e-07, "loss": 0.20977783203125, "step": 13507 }, { "epoch": 0.913072867378667, "grad_norm": 1.1074895858764648, "learning_rate": 5.970869034877286e-07, "loss": 0.227569580078125, "step": 13508 }, { "epoch": 0.9131404623496012, "grad_norm": 1.0303269624710083, "learning_rate": 5.961655833359603e-07, "loss": 0.199798583984375, "step": 13509 }, { "epoch": 0.9132080573205353, "grad_norm": 1.065255045890808, "learning_rate": 5.95244960131277e-07, "loss": 0.1615753173828125, "step": 13510 }, { "epoch": 0.9132756522914696, "grad_norm": 0.4979267120361328, "learning_rate": 5.943250339182216e-07, "loss": 0.08329010009765625, "step": 13511 }, { "epoch": 0.9133432472624037, "grad_norm": 1.690529227256775, "learning_rate": 5.934058047413133e-07, "loss": 0.2608489990234375, "step": 13512 }, { "epoch": 0.9134108422333378, "grad_norm": 1.1656007766723633, "learning_rate": 5.924872726450231e-07, "loss": 0.241241455078125, "step": 13513 }, { "epoch": 0.913478437204272, "grad_norm": 1.748517632484436, "learning_rate": 5.91569437673799e-07, "loss": 0.23736572265625, "step": 13514 }, { "epoch": 0.9135460321752061, "grad_norm": 0.8591103553771973, "learning_rate": 5.906522998720504e-07, "loss": 0.135467529296875, "step": 13515 }, { "epoch": 0.9136136271461404, "grad_norm": 1.4788472652435303, "learning_rate": 5.897358592841551e-07, "loss": 0.172210693359375, "step": 13516 }, { "epoch": 0.9136812221170745, "grad_norm": 1.622941493988037, "learning_rate": 5.888201159544543e-07, "loss": 0.2907562255859375, "step": 13517 }, { "epoch": 0.9137488170880087, "grad_norm": 1.4713674783706665, "learning_rate": 5.879050699272592e-07, "loss": 0.2259521484375, "step": 13518 }, { "epoch": 0.9138164120589428, "grad_norm": 1.5968149900436401, "learning_rate": 5.869907212468428e-07, "loss": 0.265045166015625, "step": 13519 }, { "epoch": 0.9138840070298769, "grad_norm": 1.3872445821762085, "learning_rate": 5.86077069957453e-07, "loss": 0.21478271484375, "step": 13520 }, { "epoch": 0.9139516020008112, "grad_norm": 1.7471182346343994, "learning_rate": 5.851641161032895e-07, "loss": 0.1860198974609375, "step": 13521 }, { "epoch": 0.9140191969717453, "grad_norm": 1.5240834951400757, "learning_rate": 5.842518597285318e-07, "loss": 0.3108367919921875, "step": 13522 }, { "epoch": 0.9140867919426795, "grad_norm": 1.531416416168213, "learning_rate": 5.83340300877323e-07, "loss": 0.2149658203125, "step": 13523 }, { "epoch": 0.9141543869136136, "grad_norm": 1.5681124925613403, "learning_rate": 5.82429439593763e-07, "loss": 0.2669677734375, "step": 13524 }, { "epoch": 0.9142219818845478, "grad_norm": 1.3007245063781738, "learning_rate": 5.815192759219312e-07, "loss": 0.26824951171875, "step": 13525 }, { "epoch": 0.914289576855482, "grad_norm": 1.5303490161895752, "learning_rate": 5.80609809905861e-07, "loss": 0.258575439453125, "step": 13526 }, { "epoch": 0.9143571718264161, "grad_norm": 1.6319667100906372, "learning_rate": 5.797010415895637e-07, "loss": 0.212310791015625, "step": 13527 }, { "epoch": 0.9144247667973503, "grad_norm": 1.0883005857467651, "learning_rate": 5.787929710170093e-07, "loss": 0.234100341796875, "step": 13528 }, { "epoch": 0.9144923617682844, "grad_norm": 0.8707523345947266, "learning_rate": 5.778855982321357e-07, "loss": 0.1637420654296875, "step": 13529 }, { "epoch": 0.9145599567392186, "grad_norm": 0.5717575550079346, "learning_rate": 5.769789232788463e-07, "loss": 0.124237060546875, "step": 13530 }, { "epoch": 0.9146275517101528, "grad_norm": 1.43094801902771, "learning_rate": 5.760729462010112e-07, "loss": 0.304473876953125, "step": 13531 }, { "epoch": 0.914695146681087, "grad_norm": 1.3960298299789429, "learning_rate": 5.751676670424683e-07, "loss": 0.2177581787109375, "step": 13532 }, { "epoch": 0.9147627416520211, "grad_norm": 1.4452903270721436, "learning_rate": 5.742630858470243e-07, "loss": 0.2320556640625, "step": 13533 }, { "epoch": 0.9148303366229552, "grad_norm": 1.0138732194900513, "learning_rate": 5.733592026584428e-07, "loss": 0.228118896484375, "step": 13534 }, { "epoch": 0.9148979315938894, "grad_norm": 1.0594415664672852, "learning_rate": 5.724560175204585e-07, "loss": 0.1968841552734375, "step": 13535 }, { "epoch": 0.9149655265648236, "grad_norm": 1.5636943578720093, "learning_rate": 5.715535304767816e-07, "loss": 0.28912353515625, "step": 13536 }, { "epoch": 0.9150331215357578, "grad_norm": 1.2116060256958008, "learning_rate": 5.706517415710688e-07, "loss": 0.232940673828125, "step": 13537 }, { "epoch": 0.9151007165066919, "grad_norm": 1.3845937252044678, "learning_rate": 5.697506508469652e-07, "loss": 0.2772216796875, "step": 13538 }, { "epoch": 0.9151683114776261, "grad_norm": 1.4069122076034546, "learning_rate": 5.688502583480609e-07, "loss": 0.29473876953125, "step": 13539 }, { "epoch": 0.9152359064485602, "grad_norm": 1.008306622505188, "learning_rate": 5.679505641179312e-07, "loss": 0.19842529296875, "step": 13540 }, { "epoch": 0.9153035014194943, "grad_norm": 1.2653130292892456, "learning_rate": 5.670515682001026e-07, "loss": 0.267578125, "step": 13541 }, { "epoch": 0.9153710963904286, "grad_norm": 1.4098032712936401, "learning_rate": 5.66153270638079e-07, "loss": 0.301422119140625, "step": 13542 }, { "epoch": 0.9154386913613627, "grad_norm": 1.3240686655044556, "learning_rate": 5.652556714753221e-07, "loss": 0.220458984375, "step": 13543 }, { "epoch": 0.9155062863322969, "grad_norm": 1.1992506980895996, "learning_rate": 5.643587707552639e-07, "loss": 0.1905517578125, "step": 13544 }, { "epoch": 0.915573881303231, "grad_norm": 1.0741255283355713, "learning_rate": 5.634625685213047e-07, "loss": 0.163787841796875, "step": 13545 }, { "epoch": 0.9156414762741653, "grad_norm": 1.797256588935852, "learning_rate": 5.625670648168046e-07, "loss": 0.24932861328125, "step": 13546 }, { "epoch": 0.9157090712450994, "grad_norm": 1.4858754873275757, "learning_rate": 5.616722596850959e-07, "loss": 0.3087615966796875, "step": 13547 }, { "epoch": 0.9157766662160335, "grad_norm": 1.5075806379318237, "learning_rate": 5.607781531694723e-07, "loss": 0.25604248046875, "step": 13548 }, { "epoch": 0.9158442611869677, "grad_norm": 0.8383535742759705, "learning_rate": 5.598847453132023e-07, "loss": 0.1993408203125, "step": 13549 }, { "epoch": 0.9159118561579018, "grad_norm": 1.142011284828186, "learning_rate": 5.589920361595063e-07, "loss": 0.1826019287109375, "step": 13550 }, { "epoch": 0.915979451128836, "grad_norm": 0.9305489659309387, "learning_rate": 5.58100025751585e-07, "loss": 0.1729278564453125, "step": 13551 }, { "epoch": 0.9160470460997702, "grad_norm": 0.7546490430831909, "learning_rate": 5.572087141325971e-07, "loss": 0.170196533203125, "step": 13552 }, { "epoch": 0.9161146410707044, "grad_norm": 0.9854618310928345, "learning_rate": 5.563181013456714e-07, "loss": 0.162109375, "step": 13553 }, { "epoch": 0.9161822360416385, "grad_norm": 1.441117286682129, "learning_rate": 5.554281874339001e-07, "loss": 0.2709197998046875, "step": 13554 }, { "epoch": 0.9162498310125726, "grad_norm": 0.8306722044944763, "learning_rate": 5.545389724403421e-07, "loss": 0.1236114501953125, "step": 13555 }, { "epoch": 0.9163174259835069, "grad_norm": 0.9117178320884705, "learning_rate": 5.536504564080247e-07, "loss": 0.196929931640625, "step": 13556 }, { "epoch": 0.916385020954441, "grad_norm": 1.7147325277328491, "learning_rate": 5.5276263937994e-07, "loss": 0.309783935546875, "step": 13557 }, { "epoch": 0.9164526159253752, "grad_norm": 1.1984654664993286, "learning_rate": 5.51875521399044e-07, "loss": 0.26654052734375, "step": 13558 }, { "epoch": 0.9165202108963093, "grad_norm": 1.5238696336746216, "learning_rate": 5.50989102508262e-07, "loss": 0.26153564453125, "step": 13559 }, { "epoch": 0.9165878058672435, "grad_norm": 2.186828374862671, "learning_rate": 5.501033827504848e-07, "loss": 0.234771728515625, "step": 13560 }, { "epoch": 0.9166554008381776, "grad_norm": 1.5133785009384155, "learning_rate": 5.492183621685681e-07, "loss": 0.207733154296875, "step": 13561 }, { "epoch": 0.9167229958091118, "grad_norm": 0.8115653991699219, "learning_rate": 5.483340408053378e-07, "loss": 0.1429290771484375, "step": 13562 }, { "epoch": 0.916790590780046, "grad_norm": 1.2193833589553833, "learning_rate": 5.474504187035778e-07, "loss": 0.16619873046875, "step": 13563 }, { "epoch": 0.9168581857509801, "grad_norm": 1.4946482181549072, "learning_rate": 5.465674959060473e-07, "loss": 0.272918701171875, "step": 13564 }, { "epoch": 0.9169257807219143, "grad_norm": 2.014922618865967, "learning_rate": 5.456852724554673e-07, "loss": 0.267547607421875, "step": 13565 }, { "epoch": 0.9169933756928484, "grad_norm": 1.0235978364944458, "learning_rate": 5.448037483945234e-07, "loss": 0.196075439453125, "step": 13566 }, { "epoch": 0.9170609706637826, "grad_norm": 1.7821705341339111, "learning_rate": 5.4392292376587e-07, "loss": 0.21820068359375, "step": 13567 }, { "epoch": 0.9171285656347168, "grad_norm": 0.924911618232727, "learning_rate": 5.430427986121261e-07, "loss": 0.1646575927734375, "step": 13568 }, { "epoch": 0.9171961606056509, "grad_norm": 1.6156328916549683, "learning_rate": 5.421633729758796e-07, "loss": 0.287109375, "step": 13569 }, { "epoch": 0.9172637555765851, "grad_norm": 1.6198680400848389, "learning_rate": 5.412846468996812e-07, "loss": 0.2657012939453125, "step": 13570 }, { "epoch": 0.9173313505475192, "grad_norm": 1.2270689010620117, "learning_rate": 5.404066204260505e-07, "loss": 0.1805572509765625, "step": 13571 }, { "epoch": 0.9173989455184535, "grad_norm": 0.7010819911956787, "learning_rate": 5.395292935974683e-07, "loss": 0.1329345703125, "step": 13572 }, { "epoch": 0.9174665404893876, "grad_norm": 1.9563544988632202, "learning_rate": 5.38652666456389e-07, "loss": 0.37677001953125, "step": 13573 }, { "epoch": 0.9175341354603217, "grad_norm": 1.7168660163879395, "learning_rate": 5.377767390452271e-07, "loss": 0.2843017578125, "step": 13574 }, { "epoch": 0.9176017304312559, "grad_norm": 1.5613309144973755, "learning_rate": 5.369015114063686e-07, "loss": 0.265716552734375, "step": 13575 }, { "epoch": 0.91766932540219, "grad_norm": 0.6788888573646545, "learning_rate": 5.360269835821563e-07, "loss": 0.0928955078125, "step": 13576 }, { "epoch": 0.9177369203731243, "grad_norm": 1.5198169946670532, "learning_rate": 5.351531556149097e-07, "loss": 0.15972900390625, "step": 13577 }, { "epoch": 0.9178045153440584, "grad_norm": 0.8866245150566101, "learning_rate": 5.342800275469118e-07, "loss": 0.10443878173828125, "step": 13578 }, { "epoch": 0.9178721103149926, "grad_norm": 1.7638920545578003, "learning_rate": 5.334075994204052e-07, "loss": 0.172271728515625, "step": 13579 }, { "epoch": 0.9179397052859267, "grad_norm": 1.0896739959716797, "learning_rate": 5.325358712776079e-07, "loss": 0.172821044921875, "step": 13580 }, { "epoch": 0.9180073002568608, "grad_norm": 1.291451096534729, "learning_rate": 5.316648431606963e-07, "loss": 0.206207275390625, "step": 13581 }, { "epoch": 0.9180748952277951, "grad_norm": 1.2468210458755493, "learning_rate": 5.307945151118166e-07, "loss": 0.22381591796875, "step": 13582 }, { "epoch": 0.9181424901987292, "grad_norm": 1.0206385850906372, "learning_rate": 5.299248871730816e-07, "loss": 0.1510162353515625, "step": 13583 }, { "epoch": 0.9182100851696634, "grad_norm": 1.047954797744751, "learning_rate": 5.290559593865696e-07, "loss": 0.2346954345703125, "step": 13584 }, { "epoch": 0.9182776801405975, "grad_norm": 1.355778694152832, "learning_rate": 5.281877317943234e-07, "loss": 0.20613861083984375, "step": 13585 }, { "epoch": 0.9183452751115317, "grad_norm": 1.1402156352996826, "learning_rate": 5.273202044383546e-07, "loss": 0.24237060546875, "step": 13586 }, { "epoch": 0.9184128700824659, "grad_norm": 0.9115407466888428, "learning_rate": 5.264533773606362e-07, "loss": 0.1569061279296875, "step": 13587 }, { "epoch": 0.9184804650534, "grad_norm": 2.3582870960235596, "learning_rate": 5.255872506031179e-07, "loss": 0.340301513671875, "step": 13588 }, { "epoch": 0.9185480600243342, "grad_norm": 1.836834192276001, "learning_rate": 5.247218242077012e-07, "loss": 0.34722900390625, "step": 13589 }, { "epoch": 0.9186156549952683, "grad_norm": 1.2390750646591187, "learning_rate": 5.238570982162627e-07, "loss": 0.251068115234375, "step": 13590 }, { "epoch": 0.9186832499662025, "grad_norm": 0.5169277191162109, "learning_rate": 5.229930726706455e-07, "loss": 0.085845947265625, "step": 13591 }, { "epoch": 0.9187508449371367, "grad_norm": 1.5915864706039429, "learning_rate": 5.221297476126547e-07, "loss": 0.28106689453125, "step": 13592 }, { "epoch": 0.9188184399080709, "grad_norm": 1.5528404712677002, "learning_rate": 5.212671230840649e-07, "loss": 0.15338134765625, "step": 13593 }, { "epoch": 0.918886034879005, "grad_norm": 1.1233198642730713, "learning_rate": 5.204051991266129e-07, "loss": 0.268402099609375, "step": 13594 }, { "epoch": 0.9189536298499391, "grad_norm": 1.108896017074585, "learning_rate": 5.195439757820036e-07, "loss": 0.241119384765625, "step": 13595 }, { "epoch": 0.9190212248208733, "grad_norm": 1.1449862718582153, "learning_rate": 5.186834530919121e-07, "loss": 0.280670166015625, "step": 13596 }, { "epoch": 0.9190888197918075, "grad_norm": 1.7974426746368408, "learning_rate": 5.178236310979718e-07, "loss": 0.260955810546875, "step": 13597 }, { "epoch": 0.9191564147627417, "grad_norm": 2.2504384517669678, "learning_rate": 5.169645098417875e-07, "loss": 0.2393951416015625, "step": 13598 }, { "epoch": 0.9192240097336758, "grad_norm": 0.9490211009979248, "learning_rate": 5.161060893649311e-07, "loss": 0.21197509765625, "step": 13599 }, { "epoch": 0.91929160470461, "grad_norm": 0.9778227806091309, "learning_rate": 5.152483697089344e-07, "loss": 0.175567626953125, "step": 13600 }, { "epoch": 0.9193591996755441, "grad_norm": 0.9723868370056152, "learning_rate": 5.143913509153042e-07, "loss": 0.1542816162109375, "step": 13601 }, { "epoch": 0.9194267946464783, "grad_norm": 1.638815999031067, "learning_rate": 5.135350330255023e-07, "loss": 0.2594757080078125, "step": 13602 }, { "epoch": 0.9194943896174125, "grad_norm": 1.4113656282424927, "learning_rate": 5.126794160809672e-07, "loss": 0.2755126953125, "step": 13603 }, { "epoch": 0.9195619845883466, "grad_norm": 1.2340916395187378, "learning_rate": 5.118245001230993e-07, "loss": 0.248626708984375, "step": 13604 }, { "epoch": 0.9196295795592808, "grad_norm": 0.9494348168373108, "learning_rate": 5.109702851932602e-07, "loss": 0.170867919921875, "step": 13605 }, { "epoch": 0.9196971745302149, "grad_norm": 1.1231865882873535, "learning_rate": 5.101167713327853e-07, "loss": 0.1483154296875, "step": 13606 }, { "epoch": 0.9197647695011492, "grad_norm": 1.8655778169631958, "learning_rate": 5.092639585829733e-07, "loss": 0.2906951904296875, "step": 13607 }, { "epoch": 0.9198323644720833, "grad_norm": 1.1324788331985474, "learning_rate": 5.084118469850895e-07, "loss": 0.21240234375, "step": 13608 }, { "epoch": 0.9198999594430174, "grad_norm": 1.3997490406036377, "learning_rate": 5.07560436580361e-07, "loss": 0.191558837890625, "step": 13609 }, { "epoch": 0.9199675544139516, "grad_norm": 1.4387894868850708, "learning_rate": 5.067097274099864e-07, "loss": 0.23541259765625, "step": 13610 }, { "epoch": 0.9200351493848857, "grad_norm": 1.6354060173034668, "learning_rate": 5.058597195151276e-07, "loss": 0.2578125, "step": 13611 }, { "epoch": 0.92010274435582, "grad_norm": 1.5736757516860962, "learning_rate": 5.050104129369154e-07, "loss": 0.294830322265625, "step": 13612 }, { "epoch": 0.9201703393267541, "grad_norm": 0.9299677610397339, "learning_rate": 5.041618077164401e-07, "loss": 0.169158935546875, "step": 13613 }, { "epoch": 0.9202379342976883, "grad_norm": 1.7487640380859375, "learning_rate": 5.03313903894767e-07, "loss": 0.246856689453125, "step": 13614 }, { "epoch": 0.9203055292686224, "grad_norm": 1.5691783428192139, "learning_rate": 5.024667015129219e-07, "loss": 0.195404052734375, "step": 13615 }, { "epoch": 0.9203731242395565, "grad_norm": 1.1523517370224, "learning_rate": 5.016202006118953e-07, "loss": 0.218017578125, "step": 13616 }, { "epoch": 0.9204407192104908, "grad_norm": 1.0588589906692505, "learning_rate": 5.007744012326493e-07, "loss": 0.187469482421875, "step": 13617 }, { "epoch": 0.9205083141814249, "grad_norm": 1.333958387374878, "learning_rate": 4.999293034161046e-07, "loss": 0.253173828125, "step": 13618 }, { "epoch": 0.9205759091523591, "grad_norm": 1.3140300512313843, "learning_rate": 4.990849072031583e-07, "loss": 0.284912109375, "step": 13619 }, { "epoch": 0.9206435041232932, "grad_norm": 1.7095632553100586, "learning_rate": 4.982412126346631e-07, "loss": 0.2940673828125, "step": 13620 }, { "epoch": 0.9207110990942274, "grad_norm": 1.4015154838562012, "learning_rate": 4.973982197514443e-07, "loss": 0.242584228515625, "step": 13621 }, { "epoch": 0.9207786940651616, "grad_norm": 1.467411994934082, "learning_rate": 4.965559285942895e-07, "loss": 0.231231689453125, "step": 13622 }, { "epoch": 0.9208462890360957, "grad_norm": 1.4712400436401367, "learning_rate": 4.957143392039543e-07, "loss": 0.19589996337890625, "step": 13623 }, { "epoch": 0.9209138840070299, "grad_norm": 0.9136329293251038, "learning_rate": 4.948734516211612e-07, "loss": 0.17596435546875, "step": 13624 }, { "epoch": 0.920981478977964, "grad_norm": 1.1598858833312988, "learning_rate": 4.940332658865976e-07, "loss": 0.130859375, "step": 13625 }, { "epoch": 0.9210490739488982, "grad_norm": 1.5268446207046509, "learning_rate": 4.931937820409143e-07, "loss": 0.26702880859375, "step": 13626 }, { "epoch": 0.9211166689198323, "grad_norm": 1.5361486673355103, "learning_rate": 4.92355000124734e-07, "loss": 0.19728851318359375, "step": 13627 }, { "epoch": 0.9211842638907666, "grad_norm": 1.0706599950790405, "learning_rate": 4.915169201786441e-07, "loss": 0.19573974609375, "step": 13628 }, { "epoch": 0.9212518588617007, "grad_norm": 0.9934240579605103, "learning_rate": 4.90679542243187e-07, "loss": 0.2159423828125, "step": 13629 }, { "epoch": 0.9213194538326348, "grad_norm": 1.1387900114059448, "learning_rate": 4.898428663588905e-07, "loss": 0.1326141357421875, "step": 13630 }, { "epoch": 0.921387048803569, "grad_norm": 1.5373997688293457, "learning_rate": 4.890068925662305e-07, "loss": 0.271209716796875, "step": 13631 }, { "epoch": 0.9214546437745031, "grad_norm": 0.752856433391571, "learning_rate": 4.88171620905663e-07, "loss": 0.1434326171875, "step": 13632 }, { "epoch": 0.9215222387454374, "grad_norm": 1.5381309986114502, "learning_rate": 4.87337051417599e-07, "loss": 0.23236083984375, "step": 13633 }, { "epoch": 0.9215898337163715, "grad_norm": 2.0220625400543213, "learning_rate": 4.865031841424211e-07, "loss": 0.27569580078125, "step": 13634 }, { "epoch": 0.9216574286873057, "grad_norm": 1.3401598930358887, "learning_rate": 4.856700191204788e-07, "loss": 0.19805526733398438, "step": 13635 }, { "epoch": 0.9217250236582398, "grad_norm": 1.2364786863327026, "learning_rate": 4.848375563920848e-07, "loss": 0.2795257568359375, "step": 13636 }, { "epoch": 0.9217926186291739, "grad_norm": 0.7820473313331604, "learning_rate": 4.840057959975169e-07, "loss": 0.1424560546875, "step": 13637 }, { "epoch": 0.9218602136001082, "grad_norm": 1.4603708982467651, "learning_rate": 4.831747379770246e-07, "loss": 0.304779052734375, "step": 13638 }, { "epoch": 0.9219278085710423, "grad_norm": 0.6751595139503479, "learning_rate": 4.823443823708173e-07, "loss": 0.08545684814453125, "step": 13639 }, { "epoch": 0.9219954035419765, "grad_norm": 1.2226351499557495, "learning_rate": 4.815147292190713e-07, "loss": 0.2125244140625, "step": 13640 }, { "epoch": 0.9220629985129106, "grad_norm": 1.4184021949768066, "learning_rate": 4.806857785619361e-07, "loss": 0.282989501953125, "step": 13641 }, { "epoch": 0.9221305934838449, "grad_norm": 1.134128212928772, "learning_rate": 4.798575304395131e-07, "loss": 0.2509765625, "step": 13642 }, { "epoch": 0.922198188454779, "grad_norm": 0.967649519443512, "learning_rate": 4.790299848918849e-07, "loss": 0.21917724609375, "step": 13643 }, { "epoch": 0.9222657834257131, "grad_norm": 1.57233726978302, "learning_rate": 4.782031419590899e-07, "loss": 0.256378173828125, "step": 13644 }, { "epoch": 0.9223333783966473, "grad_norm": 0.9878026247024536, "learning_rate": 4.773770016811391e-07, "loss": 0.172271728515625, "step": 13645 }, { "epoch": 0.9224009733675814, "grad_norm": 0.8896597027778625, "learning_rate": 4.7655156409800227e-07, "loss": 0.1707000732421875, "step": 13646 }, { "epoch": 0.9224685683385156, "grad_norm": 1.0011649131774902, "learning_rate": 4.7572682924962085e-07, "loss": 0.16489410400390625, "step": 13647 }, { "epoch": 0.9225361633094498, "grad_norm": 0.7819023728370667, "learning_rate": 4.749027971759029e-07, "loss": 0.162567138671875, "step": 13648 }, { "epoch": 0.922603758280384, "grad_norm": 1.5244848728179932, "learning_rate": 4.7407946791671654e-07, "loss": 0.2536773681640625, "step": 13649 }, { "epoch": 0.9226713532513181, "grad_norm": 1.4001237154006958, "learning_rate": 4.732568415119015e-07, "loss": 0.32501220703125, "step": 13650 }, { "epoch": 0.9227389482222522, "grad_norm": 1.6718441247940063, "learning_rate": 4.724349180012627e-07, "loss": 0.235321044921875, "step": 13651 }, { "epoch": 0.9228065431931864, "grad_norm": 1.0758041143417358, "learning_rate": 4.716136974245666e-07, "loss": 0.22698211669921875, "step": 13652 }, { "epoch": 0.9228741381641206, "grad_norm": 2.2833142280578613, "learning_rate": 4.707931798215498e-07, "loss": 0.28192138671875, "step": 13653 }, { "epoch": 0.9229417331350548, "grad_norm": 1.1657195091247559, "learning_rate": 4.699733652319188e-07, "loss": 0.1384735107421875, "step": 13654 }, { "epoch": 0.9230093281059889, "grad_norm": 1.4889169931411743, "learning_rate": 4.691542536953353e-07, "loss": 0.22314453125, "step": 13655 }, { "epoch": 0.9230769230769231, "grad_norm": 1.432865023612976, "learning_rate": 4.683358452514358e-07, "loss": 0.233795166015625, "step": 13656 }, { "epoch": 0.9231445180478572, "grad_norm": 1.177922248840332, "learning_rate": 4.675181399398204e-07, "loss": 0.247833251953125, "step": 13657 }, { "epoch": 0.9232121130187914, "grad_norm": 1.1062812805175781, "learning_rate": 4.6670113780005244e-07, "loss": 0.207000732421875, "step": 13658 }, { "epoch": 0.9232797079897256, "grad_norm": 1.3870254755020142, "learning_rate": 4.658848388716652e-07, "loss": 0.21527099609375, "step": 13659 }, { "epoch": 0.9233473029606597, "grad_norm": 1.1768943071365356, "learning_rate": 4.6506924319415724e-07, "loss": 0.20636367797851562, "step": 13660 }, { "epoch": 0.9234148979315939, "grad_norm": 1.1726096868515015, "learning_rate": 4.642543508069902e-07, "loss": 0.244537353515625, "step": 13661 }, { "epoch": 0.923482492902528, "grad_norm": 1.9495530128479004, "learning_rate": 4.634401617495959e-07, "loss": 0.2991943359375, "step": 13662 }, { "epoch": 0.9235500878734623, "grad_norm": 1.0790141820907593, "learning_rate": 4.626266760613679e-07, "loss": 0.180908203125, "step": 13663 }, { "epoch": 0.9236176828443964, "grad_norm": 1.0516058206558228, "learning_rate": 4.6181389378166796e-07, "loss": 0.23529052734375, "step": 13664 }, { "epoch": 0.9236852778153305, "grad_norm": 1.0280659198760986, "learning_rate": 4.6100181494982296e-07, "loss": 0.1805419921875, "step": 13665 }, { "epoch": 0.9237528727862647, "grad_norm": 1.908799409866333, "learning_rate": 4.6019043960512653e-07, "loss": 0.299591064453125, "step": 13666 }, { "epoch": 0.9238204677571988, "grad_norm": 0.9084198474884033, "learning_rate": 4.5937976778684223e-07, "loss": 0.13021087646484375, "step": 13667 }, { "epoch": 0.9238880627281331, "grad_norm": 1.335538625717163, "learning_rate": 4.5856979953418866e-07, "loss": 0.20654296875, "step": 13668 }, { "epoch": 0.9239556576990672, "grad_norm": 1.0236282348632812, "learning_rate": 4.577605348863612e-07, "loss": 0.275054931640625, "step": 13669 }, { "epoch": 0.9240232526700014, "grad_norm": 1.5367636680603027, "learning_rate": 4.5695197388251673e-07, "loss": 0.2663116455078125, "step": 13670 }, { "epoch": 0.9240908476409355, "grad_norm": 1.166352391242981, "learning_rate": 4.5614411656177746e-07, "loss": 0.125335693359375, "step": 13671 }, { "epoch": 0.9241584426118696, "grad_norm": 0.8550564646720886, "learning_rate": 4.553369629632337e-07, "loss": 0.17270278930664062, "step": 13672 }, { "epoch": 0.9242260375828039, "grad_norm": 1.6185479164123535, "learning_rate": 4.5453051312594087e-07, "loss": 0.279510498046875, "step": 13673 }, { "epoch": 0.924293632553738, "grad_norm": 1.309982419013977, "learning_rate": 4.5372476708891776e-07, "loss": 0.219940185546875, "step": 13674 }, { "epoch": 0.9243612275246722, "grad_norm": 1.1731685400009155, "learning_rate": 4.529197248911532e-07, "loss": 0.275634765625, "step": 13675 }, { "epoch": 0.9244288224956063, "grad_norm": 1.233715295791626, "learning_rate": 4.521153865715993e-07, "loss": 0.195159912109375, "step": 13676 }, { "epoch": 0.9244964174665405, "grad_norm": 1.2346696853637695, "learning_rate": 4.5131175216917507e-07, "loss": 0.307891845703125, "step": 13677 }, { "epoch": 0.9245640124374747, "grad_norm": 0.8851105570793152, "learning_rate": 4.5050882172276764e-07, "loss": 0.16817474365234375, "step": 13678 }, { "epoch": 0.9246316074084088, "grad_norm": 1.2333050966262817, "learning_rate": 4.497065952712226e-07, "loss": 0.22948455810546875, "step": 13679 }, { "epoch": 0.924699202379343, "grad_norm": 1.6179553270339966, "learning_rate": 4.489050728533622e-07, "loss": 0.250244140625, "step": 13680 }, { "epoch": 0.9247667973502771, "grad_norm": 1.1032973527908325, "learning_rate": 4.481042545079639e-07, "loss": 0.22576904296875, "step": 13681 }, { "epoch": 0.9248343923212113, "grad_norm": 1.2211054563522339, "learning_rate": 4.473041402737815e-07, "loss": 0.200347900390625, "step": 13682 }, { "epoch": 0.9249019872921455, "grad_norm": 1.245652675628662, "learning_rate": 4.465047301895259e-07, "loss": 0.172119140625, "step": 13683 }, { "epoch": 0.9249695822630797, "grad_norm": 1.1119871139526367, "learning_rate": 4.457060242938793e-07, "loss": 0.204010009765625, "step": 13684 }, { "epoch": 0.9250371772340138, "grad_norm": 1.0055862665176392, "learning_rate": 4.4490802262548757e-07, "loss": 0.217041015625, "step": 13685 }, { "epoch": 0.9251047722049479, "grad_norm": 0.9252681136131287, "learning_rate": 4.4411072522296317e-07, "loss": 0.19952392578125, "step": 13686 }, { "epoch": 0.9251723671758821, "grad_norm": 1.8728129863739014, "learning_rate": 4.433141321248835e-07, "loss": 0.32281494140625, "step": 13687 }, { "epoch": 0.9252399621468163, "grad_norm": 0.9742860794067383, "learning_rate": 4.425182433697927e-07, "loss": 0.180419921875, "step": 13688 }, { "epoch": 0.9253075571177505, "grad_norm": 1.1937083005905151, "learning_rate": 4.417230589962018e-07, "loss": 0.1672821044921875, "step": 13689 }, { "epoch": 0.9253751520886846, "grad_norm": 1.4365513324737549, "learning_rate": 4.409285790425849e-07, "loss": 0.200286865234375, "step": 13690 }, { "epoch": 0.9254427470596187, "grad_norm": 0.7843390703201294, "learning_rate": 4.401348035473879e-07, "loss": 0.113494873046875, "step": 13691 }, { "epoch": 0.9255103420305529, "grad_norm": 1.1282209157943726, "learning_rate": 4.3934173254901346e-07, "loss": 0.225982666015625, "step": 13692 }, { "epoch": 0.925577937001487, "grad_norm": 1.2388710975646973, "learning_rate": 4.3854936608584086e-07, "loss": 0.276885986328125, "step": 13693 }, { "epoch": 0.9256455319724213, "grad_norm": 2.0266048908233643, "learning_rate": 4.3775770419620275e-07, "loss": 0.267425537109375, "step": 13694 }, { "epoch": 0.9257131269433554, "grad_norm": 1.2636386156082153, "learning_rate": 4.369667469184102e-07, "loss": 0.1935577392578125, "step": 13695 }, { "epoch": 0.9257807219142896, "grad_norm": 1.2577416896820068, "learning_rate": 4.3617649429073423e-07, "loss": 0.244140625, "step": 13696 }, { "epoch": 0.9258483168852237, "grad_norm": 1.3866115808486938, "learning_rate": 4.353869463514093e-07, "loss": 0.25677490234375, "step": 13697 }, { "epoch": 0.9259159118561578, "grad_norm": 1.7757164239883423, "learning_rate": 4.3459810313864144e-07, "loss": 0.31732177734375, "step": 13698 }, { "epoch": 0.9259835068270921, "grad_norm": 2.0111093521118164, "learning_rate": 4.3380996469059853e-07, "loss": 0.322021484375, "step": 13699 }, { "epoch": 0.9260511017980262, "grad_norm": 0.7967817783355713, "learning_rate": 4.33022531045415e-07, "loss": 0.1600341796875, "step": 13700 }, { "epoch": 0.9261186967689604, "grad_norm": 1.3738313913345337, "learning_rate": 4.322358022411921e-07, "loss": 0.21547698974609375, "step": 13701 }, { "epoch": 0.9261862917398945, "grad_norm": 1.0505975484848022, "learning_rate": 4.314497783159976e-07, "loss": 0.206939697265625, "step": 13702 }, { "epoch": 0.9262538867108288, "grad_norm": 0.9630950689315796, "learning_rate": 4.306644593078629e-07, "loss": 0.16390323638916016, "step": 13703 }, { "epoch": 0.9263214816817629, "grad_norm": 0.8995183110237122, "learning_rate": 4.298798452547892e-07, "loss": 0.15576171875, "step": 13704 }, { "epoch": 0.926389076652697, "grad_norm": 1.765149712562561, "learning_rate": 4.290959361947361e-07, "loss": 0.341552734375, "step": 13705 }, { "epoch": 0.9264566716236312, "grad_norm": 1.4671008586883545, "learning_rate": 4.2831273216563995e-07, "loss": 0.254852294921875, "step": 13706 }, { "epoch": 0.9265242665945653, "grad_norm": 1.56786048412323, "learning_rate": 4.2753023320539375e-07, "loss": 0.1954345703125, "step": 13707 }, { "epoch": 0.9265918615654996, "grad_norm": 1.2638365030288696, "learning_rate": 4.267484393518589e-07, "loss": 0.214263916015625, "step": 13708 }, { "epoch": 0.9266594565364337, "grad_norm": 1.6513220071792603, "learning_rate": 4.2596735064286684e-07, "loss": 0.23712158203125, "step": 13709 }, { "epoch": 0.9267270515073679, "grad_norm": 0.6532349586486816, "learning_rate": 4.2518696711620733e-07, "loss": 0.171142578125, "step": 13710 }, { "epoch": 0.926794646478302, "grad_norm": 1.5916341543197632, "learning_rate": 4.2440728880964177e-07, "loss": 0.3206787109375, "step": 13711 }, { "epoch": 0.9268622414492361, "grad_norm": 1.5544542074203491, "learning_rate": 4.2362831576089834e-07, "loss": 0.2548980712890625, "step": 13712 }, { "epoch": 0.9269298364201703, "grad_norm": 1.2650114297866821, "learning_rate": 4.2285004800766524e-07, "loss": 0.16575241088867188, "step": 13713 }, { "epoch": 0.9269974313911045, "grad_norm": 0.910610556602478, "learning_rate": 4.220724855876007e-07, "loss": 0.149169921875, "step": 13714 }, { "epoch": 0.9270650263620387, "grad_norm": 1.2919375896453857, "learning_rate": 4.212956285383296e-07, "loss": 0.1790924072265625, "step": 13715 }, { "epoch": 0.9271326213329728, "grad_norm": 0.7293087244033813, "learning_rate": 4.205194768974369e-07, "loss": 0.10543060302734375, "step": 13716 }, { "epoch": 0.927200216303907, "grad_norm": 1.19307541847229, "learning_rate": 4.197440307024858e-07, "loss": 0.162445068359375, "step": 13717 }, { "epoch": 0.9272678112748411, "grad_norm": 1.5466701984405518, "learning_rate": 4.1896928999098637e-07, "loss": 0.28399658203125, "step": 13718 }, { "epoch": 0.9273354062457753, "grad_norm": 1.258678674697876, "learning_rate": 4.181952548004353e-07, "loss": 0.21978759765625, "step": 13719 }, { "epoch": 0.9274030012167095, "grad_norm": 1.316185712814331, "learning_rate": 4.174219251682809e-07, "loss": 0.2365875244140625, "step": 13720 }, { "epoch": 0.9274705961876436, "grad_norm": 1.3491731882095337, "learning_rate": 4.1664930113194e-07, "loss": 0.261962890625, "step": 13721 }, { "epoch": 0.9275381911585778, "grad_norm": 2.4690463542938232, "learning_rate": 4.1587738272880093e-07, "loss": 0.3212890625, "step": 13722 }, { "epoch": 0.9276057861295119, "grad_norm": 1.1031458377838135, "learning_rate": 4.151061699962089e-07, "loss": 0.2293853759765625, "step": 13723 }, { "epoch": 0.9276733811004462, "grad_norm": 0.6971178650856018, "learning_rate": 4.143356629714856e-07, "loss": 0.1673583984375, "step": 13724 }, { "epoch": 0.9277409760713803, "grad_norm": 1.3558892011642456, "learning_rate": 4.13565861691908e-07, "loss": 0.238525390625, "step": 13725 }, { "epoch": 0.9278085710423144, "grad_norm": 1.4947223663330078, "learning_rate": 4.1279676619472796e-07, "loss": 0.232879638671875, "step": 13726 }, { "epoch": 0.9278761660132486, "grad_norm": 0.8557631373405457, "learning_rate": 4.120283765171573e-07, "loss": 0.1822509765625, "step": 13727 }, { "epoch": 0.9279437609841827, "grad_norm": 1.198416829109192, "learning_rate": 4.1126069269637465e-07, "loss": 0.253936767578125, "step": 13728 }, { "epoch": 0.928011355955117, "grad_norm": 1.1205799579620361, "learning_rate": 4.104937147695237e-07, "loss": 0.24468994140625, "step": 13729 }, { "epoch": 0.9280789509260511, "grad_norm": 1.4235384464263916, "learning_rate": 4.0972744277372133e-07, "loss": 0.23211669921875, "step": 13730 }, { "epoch": 0.9281465458969853, "grad_norm": 1.8156462907791138, "learning_rate": 4.089618767460396e-07, "loss": 0.330963134765625, "step": 13731 }, { "epoch": 0.9282141408679194, "grad_norm": 1.4440574645996094, "learning_rate": 4.0819701672352383e-07, "loss": 0.286163330078125, "step": 13732 }, { "epoch": 0.9282817358388535, "grad_norm": 1.4570053815841675, "learning_rate": 4.074328627431828e-07, "loss": 0.239105224609375, "step": 13733 }, { "epoch": 0.9283493308097878, "grad_norm": 0.868811845779419, "learning_rate": 4.0666941484198693e-07, "loss": 0.15380859375, "step": 13734 }, { "epoch": 0.9284169257807219, "grad_norm": 1.5428861379623413, "learning_rate": 4.0590667305688167e-07, "loss": 0.33880615234375, "step": 13735 }, { "epoch": 0.9284845207516561, "grad_norm": 1.3118997812271118, "learning_rate": 4.051446374247708e-07, "loss": 0.29058837890625, "step": 13736 }, { "epoch": 0.9285521157225902, "grad_norm": 1.1471248865127563, "learning_rate": 4.043833079825282e-07, "loss": 0.2010345458984375, "step": 13737 }, { "epoch": 0.9286197106935244, "grad_norm": 1.3788328170776367, "learning_rate": 4.036226847669894e-07, "loss": 0.2103271484375, "step": 13738 }, { "epoch": 0.9286873056644586, "grad_norm": 0.8740733861923218, "learning_rate": 4.028627678149582e-07, "loss": 0.16607666015625, "step": 13739 }, { "epoch": 0.9287549006353927, "grad_norm": 1.9527227878570557, "learning_rate": 4.02103557163207e-07, "loss": 0.3331298828125, "step": 13740 }, { "epoch": 0.9288224956063269, "grad_norm": 0.9404672980308533, "learning_rate": 4.0134505284846637e-07, "loss": 0.185638427734375, "step": 13741 }, { "epoch": 0.928890090577261, "grad_norm": 1.7110389471054077, "learning_rate": 4.0058725490744017e-07, "loss": 0.26751708984375, "step": 13742 }, { "epoch": 0.9289576855481952, "grad_norm": 1.3557641506195068, "learning_rate": 3.9983016337679923e-07, "loss": 0.1965789794921875, "step": 13743 }, { "epoch": 0.9290252805191294, "grad_norm": 1.0325959920883179, "learning_rate": 3.9907377829316915e-07, "loss": 0.203643798828125, "step": 13744 }, { "epoch": 0.9290928754900636, "grad_norm": 1.5817803144454956, "learning_rate": 3.983180996931507e-07, "loss": 0.1976318359375, "step": 13745 }, { "epoch": 0.9291604704609977, "grad_norm": 1.729596495628357, "learning_rate": 3.9756312761331293e-07, "loss": 0.255523681640625, "step": 13746 }, { "epoch": 0.9292280654319318, "grad_norm": 1.5770918130874634, "learning_rate": 3.9680886209017996e-07, "loss": 0.2521934509277344, "step": 13747 }, { "epoch": 0.929295660402866, "grad_norm": 2.4798645973205566, "learning_rate": 3.960553031602526e-07, "loss": 0.327880859375, "step": 13748 }, { "epoch": 0.9293632553738002, "grad_norm": 1.3119337558746338, "learning_rate": 3.953024508599884e-07, "loss": 0.259796142578125, "step": 13749 }, { "epoch": 0.9294308503447344, "grad_norm": 1.0669499635696411, "learning_rate": 3.945503052258198e-07, "loss": 0.18316650390625, "step": 13750 }, { "epoch": 0.9294984453156685, "grad_norm": 0.9412885308265686, "learning_rate": 3.9379886629413606e-07, "loss": 0.18605804443359375, "step": 13751 }, { "epoch": 0.9295660402866027, "grad_norm": 1.0650724172592163, "learning_rate": 3.9304813410129815e-07, "loss": 0.241546630859375, "step": 13752 }, { "epoch": 0.9296336352575368, "grad_norm": 0.746774435043335, "learning_rate": 3.9229810868363024e-07, "loss": 0.1327056884765625, "step": 13753 }, { "epoch": 0.929701230228471, "grad_norm": 1.2412426471710205, "learning_rate": 3.9154879007742674e-07, "loss": 0.1723480224609375, "step": 13754 }, { "epoch": 0.9297688251994052, "grad_norm": 1.50777268409729, "learning_rate": 3.908001783189419e-07, "loss": 0.28125, "step": 13755 }, { "epoch": 0.9298364201703393, "grad_norm": 1.4268858432769775, "learning_rate": 3.9005227344439675e-07, "loss": 0.242523193359375, "step": 13756 }, { "epoch": 0.9299040151412735, "grad_norm": 1.2045502662658691, "learning_rate": 3.893050754899807e-07, "loss": 0.1365966796875, "step": 13757 }, { "epoch": 0.9299716101122076, "grad_norm": 0.5988804697990417, "learning_rate": 3.8855858449184655e-07, "loss": 0.07806777954101562, "step": 13758 }, { "epoch": 0.9300392050831419, "grad_norm": 1.3629910945892334, "learning_rate": 3.878128004861187e-07, "loss": 0.25189208984375, "step": 13759 }, { "epoch": 0.930106800054076, "grad_norm": 0.9962878823280334, "learning_rate": 3.870677235088765e-07, "loss": 0.223724365234375, "step": 13760 }, { "epoch": 0.9301743950250101, "grad_norm": 1.6567432880401611, "learning_rate": 3.863233535961763e-07, "loss": 0.2965087890625, "step": 13761 }, { "epoch": 0.9302419899959443, "grad_norm": 1.0382338762283325, "learning_rate": 3.855796907840342e-07, "loss": 0.1828155517578125, "step": 13762 }, { "epoch": 0.9303095849668784, "grad_norm": 1.5632870197296143, "learning_rate": 3.848367351084331e-07, "loss": 0.213897705078125, "step": 13763 }, { "epoch": 0.9303771799378127, "grad_norm": 1.9157458543777466, "learning_rate": 3.840944866053209e-07, "loss": 0.328582763671875, "step": 13764 }, { "epoch": 0.9304447749087468, "grad_norm": 0.8034218549728394, "learning_rate": 3.833529453106138e-07, "loss": 0.229095458984375, "step": 13765 }, { "epoch": 0.930512369879681, "grad_norm": 1.2436388731002808, "learning_rate": 3.8261211126018824e-07, "loss": 0.2104034423828125, "step": 13766 }, { "epoch": 0.9305799648506151, "grad_norm": 1.589015007019043, "learning_rate": 3.8187198448989714e-07, "loss": 0.2089385986328125, "step": 13767 }, { "epoch": 0.9306475598215492, "grad_norm": 1.181474208831787, "learning_rate": 3.811325650355468e-07, "loss": 0.250213623046875, "step": 13768 }, { "epoch": 0.9307151547924835, "grad_norm": 1.5857295989990234, "learning_rate": 3.803938529329154e-07, "loss": 0.3243408203125, "step": 13769 }, { "epoch": 0.9307827497634176, "grad_norm": 1.039214849472046, "learning_rate": 3.796558482177492e-07, "loss": 0.1967926025390625, "step": 13770 }, { "epoch": 0.9308503447343518, "grad_norm": 0.9056203961372375, "learning_rate": 3.789185509257548e-07, "loss": 0.18072509765625, "step": 13771 }, { "epoch": 0.9309179397052859, "grad_norm": 1.629522681236267, "learning_rate": 3.7818196109261017e-07, "loss": 0.272003173828125, "step": 13772 }, { "epoch": 0.9309855346762201, "grad_norm": 0.9227272272109985, "learning_rate": 3.7744607875395355e-07, "loss": 0.189300537109375, "step": 13773 }, { "epoch": 0.9310531296471543, "grad_norm": 1.306398868560791, "learning_rate": 3.767109039453914e-07, "loss": 0.224761962890625, "step": 13774 }, { "epoch": 0.9311207246180884, "grad_norm": 0.7637975811958313, "learning_rate": 3.759764367024987e-07, "loss": 0.160400390625, "step": 13775 }, { "epoch": 0.9311883195890226, "grad_norm": 0.8883847594261169, "learning_rate": 3.752426770608103e-07, "loss": 0.13272857666015625, "step": 13776 }, { "epoch": 0.9312559145599567, "grad_norm": 2.0021915435791016, "learning_rate": 3.7450962505583276e-07, "loss": 0.31732177734375, "step": 13777 }, { "epoch": 0.9313235095308909, "grad_norm": 1.2021992206573486, "learning_rate": 3.737772807230344e-07, "loss": 0.169036865234375, "step": 13778 }, { "epoch": 0.931391104501825, "grad_norm": 0.8967941403388977, "learning_rate": 3.7304564409785025e-07, "loss": 0.138275146484375, "step": 13779 }, { "epoch": 0.9314586994727593, "grad_norm": 0.9916777610778809, "learning_rate": 3.7231471521568194e-07, "loss": 0.14739990234375, "step": 13780 }, { "epoch": 0.9315262944436934, "grad_norm": 0.6415284276008606, "learning_rate": 3.715844941118962e-07, "loss": 0.10401535034179688, "step": 13781 }, { "epoch": 0.9315938894146275, "grad_norm": 1.6968549489974976, "learning_rate": 3.708549808218248e-07, "loss": 0.24847412109375, "step": 13782 }, { "epoch": 0.9316614843855617, "grad_norm": 1.2370212078094482, "learning_rate": 3.701261753807694e-07, "loss": 0.2356414794921875, "step": 13783 }, { "epoch": 0.9317290793564958, "grad_norm": 1.4272047281265259, "learning_rate": 3.693980778239903e-07, "loss": 0.27484130859375, "step": 13784 }, { "epoch": 0.9317966743274301, "grad_norm": 1.4856642484664917, "learning_rate": 3.686706881867208e-07, "loss": 0.23230743408203125, "step": 13785 }, { "epoch": 0.9318642692983642, "grad_norm": 2.4900200366973877, "learning_rate": 3.679440065041512e-07, "loss": 0.38287353515625, "step": 13786 }, { "epoch": 0.9319318642692984, "grad_norm": 1.1787362098693848, "learning_rate": 3.672180328114483e-07, "loss": 0.244720458984375, "step": 13787 }, { "epoch": 0.9319994592402325, "grad_norm": 1.701169729232788, "learning_rate": 3.6649276714373734e-07, "loss": 0.2945556640625, "step": 13788 }, { "epoch": 0.9320670542111666, "grad_norm": 1.7012213468551636, "learning_rate": 3.65768209536112e-07, "loss": 0.26141357421875, "step": 13789 }, { "epoch": 0.9321346491821009, "grad_norm": 0.9726178050041199, "learning_rate": 3.650443600236292e-07, "loss": 0.1949920654296875, "step": 13790 }, { "epoch": 0.932202244153035, "grad_norm": 0.9864733219146729, "learning_rate": 3.6432121864131415e-07, "loss": 0.1833343505859375, "step": 13791 }, { "epoch": 0.9322698391239692, "grad_norm": 1.2362992763519287, "learning_rate": 3.6359878542415735e-07, "loss": 0.212860107421875, "step": 13792 }, { "epoch": 0.9323374340949033, "grad_norm": 1.7558239698410034, "learning_rate": 3.628770604071124e-07, "loss": 0.2845458984375, "step": 13793 }, { "epoch": 0.9324050290658376, "grad_norm": 1.5420727729797363, "learning_rate": 3.621560436251048e-07, "loss": 0.201690673828125, "step": 13794 }, { "epoch": 0.9324726240367717, "grad_norm": 1.051578164100647, "learning_rate": 3.614357351130165e-07, "loss": 0.18890380859375, "step": 13795 }, { "epoch": 0.9325402190077058, "grad_norm": 1.1788430213928223, "learning_rate": 3.607161349057064e-07, "loss": 0.212677001953125, "step": 13796 }, { "epoch": 0.93260781397864, "grad_norm": 1.41912043094635, "learning_rate": 3.599972430379883e-07, "loss": 0.28857421875, "step": 13797 }, { "epoch": 0.9326754089495741, "grad_norm": 1.262215256690979, "learning_rate": 3.5927905954464933e-07, "loss": 0.236968994140625, "step": 13798 }, { "epoch": 0.9327430039205084, "grad_norm": 1.22480309009552, "learning_rate": 3.5856158446044005e-07, "loss": 0.267364501953125, "step": 13799 }, { "epoch": 0.9328105988914425, "grad_norm": 0.8821123242378235, "learning_rate": 3.5784481782007605e-07, "loss": 0.11483001708984375, "step": 13800 }, { "epoch": 0.9328781938623767, "grad_norm": 1.3468124866485596, "learning_rate": 3.571287596582379e-07, "loss": 0.269287109375, "step": 13801 }, { "epoch": 0.9329457888333108, "grad_norm": 1.7565494775772095, "learning_rate": 3.5641341000957283e-07, "loss": 0.28271484375, "step": 13802 }, { "epoch": 0.9330133838042449, "grad_norm": 1.5564360618591309, "learning_rate": 3.556987689086949e-07, "loss": 0.252960205078125, "step": 13803 }, { "epoch": 0.9330809787751791, "grad_norm": 2.257723331451416, "learning_rate": 3.549848363901814e-07, "loss": 0.26739501953125, "step": 13804 }, { "epoch": 0.9331485737461133, "grad_norm": 1.2879618406295776, "learning_rate": 3.5427161248857975e-07, "loss": 0.23748779296875, "step": 13805 }, { "epoch": 0.9332161687170475, "grad_norm": 0.6135346293449402, "learning_rate": 3.535590972383973e-07, "loss": 0.100616455078125, "step": 13806 }, { "epoch": 0.9332837636879816, "grad_norm": 0.5607767701148987, "learning_rate": 3.528472906741098e-07, "loss": 0.08051300048828125, "step": 13807 }, { "epoch": 0.9333513586589158, "grad_norm": 0.5852412581443787, "learning_rate": 3.5213619283015977e-07, "loss": 0.08831787109375, "step": 13808 }, { "epoch": 0.93341895362985, "grad_norm": 1.1529951095581055, "learning_rate": 3.514258037409579e-07, "loss": 0.20953369140625, "step": 13809 }, { "epoch": 0.9334865486007841, "grad_norm": 1.0818595886230469, "learning_rate": 3.507161234408718e-07, "loss": 0.1636505126953125, "step": 13810 }, { "epoch": 0.9335541435717183, "grad_norm": 0.7099609971046448, "learning_rate": 3.500071519642423e-07, "loss": 0.1800537109375, "step": 13811 }, { "epoch": 0.9336217385426524, "grad_norm": 1.1711723804473877, "learning_rate": 3.4929888934537523e-07, "loss": 0.1873016357421875, "step": 13812 }, { "epoch": 0.9336893335135866, "grad_norm": 1.1700457334518433, "learning_rate": 3.4859133561853993e-07, "loss": 0.17877197265625, "step": 13813 }, { "epoch": 0.9337569284845207, "grad_norm": 0.8751954436302185, "learning_rate": 3.4788449081797236e-07, "loss": 0.15166473388671875, "step": 13814 }, { "epoch": 0.933824523455455, "grad_norm": 0.8781490325927734, "learning_rate": 3.4717835497787177e-07, "loss": 0.1353302001953125, "step": 13815 }, { "epoch": 0.9338921184263891, "grad_norm": 1.0925112962722778, "learning_rate": 3.464729281324075e-07, "loss": 0.28070068359375, "step": 13816 }, { "epoch": 0.9339597133973232, "grad_norm": 1.1899136304855347, "learning_rate": 3.457682103157139e-07, "loss": 0.30767822265625, "step": 13817 }, { "epoch": 0.9340273083682574, "grad_norm": 0.7237855792045593, "learning_rate": 3.4506420156188713e-07, "loss": 0.1218109130859375, "step": 13818 }, { "epoch": 0.9340949033391915, "grad_norm": 0.5731950998306274, "learning_rate": 3.443609019049915e-07, "loss": 0.142547607421875, "step": 13819 }, { "epoch": 0.9341624983101258, "grad_norm": 1.1327072381973267, "learning_rate": 3.436583113790598e-07, "loss": 0.2765350341796875, "step": 13820 }, { "epoch": 0.9342300932810599, "grad_norm": 1.0672379732131958, "learning_rate": 3.4295643001808316e-07, "loss": 0.187774658203125, "step": 13821 }, { "epoch": 0.934297688251994, "grad_norm": 0.7044118046760559, "learning_rate": 3.4225525785602785e-07, "loss": 0.1234588623046875, "step": 13822 }, { "epoch": 0.9343652832229282, "grad_norm": 1.0460867881774902, "learning_rate": 3.4155479492681663e-07, "loss": 0.15584945678710938, "step": 13823 }, { "epoch": 0.9344328781938623, "grad_norm": 1.57766592502594, "learning_rate": 3.4085504126434575e-07, "loss": 0.18255615234375, "step": 13824 }, { "epoch": 0.9345004731647966, "grad_norm": 1.7301025390625, "learning_rate": 3.4015599690247314e-07, "loss": 0.2069549560546875, "step": 13825 }, { "epoch": 0.9345680681357307, "grad_norm": 1.3569908142089844, "learning_rate": 3.3945766187502e-07, "loss": 0.22528076171875, "step": 13826 }, { "epoch": 0.9346356631066649, "grad_norm": 1.7673778533935547, "learning_rate": 3.3876003621577944e-07, "loss": 0.251617431640625, "step": 13827 }, { "epoch": 0.934703258077599, "grad_norm": 0.8515593409538269, "learning_rate": 3.38063119958506e-07, "loss": 0.14858627319335938, "step": 13828 }, { "epoch": 0.9347708530485331, "grad_norm": 1.0008234977722168, "learning_rate": 3.3736691313691947e-07, "loss": 0.14519119262695312, "step": 13829 }, { "epoch": 0.9348384480194674, "grad_norm": 1.147314429283142, "learning_rate": 3.3667141578470785e-07, "loss": 0.24090576171875, "step": 13830 }, { "epoch": 0.9349060429904015, "grad_norm": 1.4929150342941284, "learning_rate": 3.3597662793552255e-07, "loss": 0.3055419921875, "step": 13831 }, { "epoch": 0.9349736379613357, "grad_norm": 1.478933572769165, "learning_rate": 3.352825496229833e-07, "loss": 0.2755126953125, "step": 13832 }, { "epoch": 0.9350412329322698, "grad_norm": 2.8804373741149902, "learning_rate": 3.3458918088067336e-07, "loss": 0.387451171875, "step": 13833 }, { "epoch": 0.935108827903204, "grad_norm": 2.3881733417510986, "learning_rate": 3.338965217421408e-07, "loss": 0.22607421875, "step": 13834 }, { "epoch": 0.9351764228741382, "grad_norm": 1.188656210899353, "learning_rate": 3.332045722409055e-07, "loss": 0.21466064453125, "step": 13835 }, { "epoch": 0.9352440178450723, "grad_norm": 1.1784309148788452, "learning_rate": 3.325133324104407e-07, "loss": 0.18206787109375, "step": 13836 }, { "epoch": 0.9353116128160065, "grad_norm": 1.3121618032455444, "learning_rate": 3.318228022841996e-07, "loss": 0.21276092529296875, "step": 13837 }, { "epoch": 0.9353792077869406, "grad_norm": 1.6582263708114624, "learning_rate": 3.3113298189559217e-07, "loss": 0.2740478515625, "step": 13838 }, { "epoch": 0.9354468027578748, "grad_norm": 0.8838713765144348, "learning_rate": 3.3044387127799335e-07, "loss": 0.159942626953125, "step": 13839 }, { "epoch": 0.935514397728809, "grad_norm": 0.8777731657028198, "learning_rate": 3.2975547046474987e-07, "loss": 0.127044677734375, "step": 13840 }, { "epoch": 0.9355819926997432, "grad_norm": 1.5038659572601318, "learning_rate": 3.2906777948917165e-07, "loss": 0.257110595703125, "step": 13841 }, { "epoch": 0.9356495876706773, "grad_norm": 1.9616731405258179, "learning_rate": 3.283807983845305e-07, "loss": 0.239288330078125, "step": 13842 }, { "epoch": 0.9357171826416114, "grad_norm": 1.376381516456604, "learning_rate": 3.276945271840681e-07, "loss": 0.236083984375, "step": 13843 }, { "epoch": 0.9357847776125456, "grad_norm": 1.244472861289978, "learning_rate": 3.2700896592098964e-07, "loss": 0.2248382568359375, "step": 13844 }, { "epoch": 0.9358523725834798, "grad_norm": 1.3671408891677856, "learning_rate": 3.2632411462846845e-07, "loss": 0.259918212890625, "step": 13845 }, { "epoch": 0.935919967554414, "grad_norm": 1.660585880279541, "learning_rate": 3.256399733396431e-07, "loss": 0.25457763671875, "step": 13846 }, { "epoch": 0.9359875625253481, "grad_norm": 1.049425482749939, "learning_rate": 3.249565420876122e-07, "loss": 0.24066543579101562, "step": 13847 }, { "epoch": 0.9360551574962823, "grad_norm": 1.6935410499572754, "learning_rate": 3.242738209054491e-07, "loss": 0.26055908203125, "step": 13848 }, { "epoch": 0.9361227524672164, "grad_norm": 1.4822773933410645, "learning_rate": 3.235918098261842e-07, "loss": 0.2796783447265625, "step": 13849 }, { "epoch": 0.9361903474381505, "grad_norm": 1.2002125978469849, "learning_rate": 3.229105088828177e-07, "loss": 0.20770263671875, "step": 13850 }, { "epoch": 0.9362579424090848, "grad_norm": 1.8775306940078735, "learning_rate": 3.222299181083199e-07, "loss": 0.2843017578125, "step": 13851 }, { "epoch": 0.9363255373800189, "grad_norm": 1.6443710327148438, "learning_rate": 3.215500375356162e-07, "loss": 0.295257568359375, "step": 13852 }, { "epoch": 0.9363931323509531, "grad_norm": 1.5785237550735474, "learning_rate": 3.208708671976085e-07, "loss": 0.239593505859375, "step": 13853 }, { "epoch": 0.9364607273218872, "grad_norm": 0.9817377924919128, "learning_rate": 3.201924071271539e-07, "loss": 0.2037353515625, "step": 13854 }, { "epoch": 0.9365283222928215, "grad_norm": 0.7828853726387024, "learning_rate": 3.195146573570862e-07, "loss": 0.186676025390625, "step": 13855 }, { "epoch": 0.9365959172637556, "grad_norm": 1.0545061826705933, "learning_rate": 3.1883761792019404e-07, "loss": 0.2216796875, "step": 13856 }, { "epoch": 0.9366635122346897, "grad_norm": 1.1072026491165161, "learning_rate": 3.181612888492413e-07, "loss": 0.13069915771484375, "step": 13857 }, { "epoch": 0.9367311072056239, "grad_norm": 1.2109819650650024, "learning_rate": 3.174856701769485e-07, "loss": 0.2146453857421875, "step": 13858 }, { "epoch": 0.936798702176558, "grad_norm": 0.9149338603019714, "learning_rate": 3.168107619360111e-07, "loss": 0.272857666015625, "step": 13859 }, { "epoch": 0.9368662971474923, "grad_norm": 1.6145117282867432, "learning_rate": 3.1613656415907967e-07, "loss": 0.285400390625, "step": 13860 }, { "epoch": 0.9369338921184264, "grad_norm": 1.127223253250122, "learning_rate": 3.154630768787814e-07, "loss": 0.1765899658203125, "step": 13861 }, { "epoch": 0.9370014870893606, "grad_norm": 1.2034624814987183, "learning_rate": 3.147903001277019e-07, "loss": 0.248291015625, "step": 13862 }, { "epoch": 0.9370690820602947, "grad_norm": 1.9094856977462769, "learning_rate": 3.141182339383919e-07, "loss": 0.2568359375, "step": 13863 }, { "epoch": 0.9371366770312288, "grad_norm": 1.4138492345809937, "learning_rate": 3.1344687834337524e-07, "loss": 0.2147979736328125, "step": 13864 }, { "epoch": 0.937204272002163, "grad_norm": 1.6771409511566162, "learning_rate": 3.127762333751294e-07, "loss": 0.258209228515625, "step": 13865 }, { "epoch": 0.9372718669730972, "grad_norm": 1.6090247631072998, "learning_rate": 3.1210629906611164e-07, "loss": 0.292022705078125, "step": 13866 }, { "epoch": 0.9373394619440314, "grad_norm": 1.4605684280395508, "learning_rate": 3.1143707544873277e-07, "loss": 0.2335662841796875, "step": 13867 }, { "epoch": 0.9374070569149655, "grad_norm": 1.2204961776733398, "learning_rate": 3.107685625553752e-07, "loss": 0.221527099609375, "step": 13868 }, { "epoch": 0.9374746518858997, "grad_norm": 1.163063406944275, "learning_rate": 3.1010076041838643e-07, "loss": 0.15862464904785156, "step": 13869 }, { "epoch": 0.9375422468568338, "grad_norm": 0.9093019366264343, "learning_rate": 3.0943366907007885e-07, "loss": 0.1927490234375, "step": 13870 }, { "epoch": 0.937609841827768, "grad_norm": 1.3001617193222046, "learning_rate": 3.087672885427284e-07, "loss": 0.19573974609375, "step": 13871 }, { "epoch": 0.9376774367987022, "grad_norm": 1.547260046005249, "learning_rate": 3.081016188685826e-07, "loss": 0.36737060546875, "step": 13872 }, { "epoch": 0.9377450317696363, "grad_norm": 1.135481595993042, "learning_rate": 3.0743666007984727e-07, "loss": 0.201873779296875, "step": 13873 }, { "epoch": 0.9378126267405705, "grad_norm": 1.814839243888855, "learning_rate": 3.067724122086968e-07, "loss": 0.295196533203125, "step": 13874 }, { "epoch": 0.9378802217115046, "grad_norm": 1.1610620021820068, "learning_rate": 3.06108875287277e-07, "loss": 0.183746337890625, "step": 13875 }, { "epoch": 0.9379478166824389, "grad_norm": 1.4435875415802002, "learning_rate": 3.0544604934768573e-07, "loss": 0.28338623046875, "step": 13876 }, { "epoch": 0.938015411653373, "grad_norm": 1.1567254066467285, "learning_rate": 3.047839344220038e-07, "loss": 0.180450439453125, "step": 13877 }, { "epoch": 0.9380830066243071, "grad_norm": 1.6526564359664917, "learning_rate": 3.04122530542259e-07, "loss": 0.234405517578125, "step": 13878 }, { "epoch": 0.9381506015952413, "grad_norm": 1.2280350923538208, "learning_rate": 3.034618377404608e-07, "loss": 0.285003662109375, "step": 13879 }, { "epoch": 0.9382181965661754, "grad_norm": 1.0899144411087036, "learning_rate": 3.028018560485768e-07, "loss": 0.17645263671875, "step": 13880 }, { "epoch": 0.9382857915371097, "grad_norm": 1.57151198387146, "learning_rate": 3.021425854985382e-07, "loss": 0.283203125, "step": 13881 }, { "epoch": 0.9383533865080438, "grad_norm": 1.5576438903808594, "learning_rate": 3.014840261222479e-07, "loss": 0.294525146484375, "step": 13882 }, { "epoch": 0.938420981478978, "grad_norm": 1.4709805250167847, "learning_rate": 3.0082617795156865e-07, "loss": 0.2568817138671875, "step": 13883 }, { "epoch": 0.9384885764499121, "grad_norm": 1.6026886701583862, "learning_rate": 3.0016904101833163e-07, "loss": 0.25103759765625, "step": 13884 }, { "epoch": 0.9385561714208462, "grad_norm": 1.0039610862731934, "learning_rate": 2.995126153543348e-07, "loss": 0.2923583984375, "step": 13885 }, { "epoch": 0.9386237663917805, "grad_norm": 1.1456242799758911, "learning_rate": 2.9885690099133776e-07, "loss": 0.16754913330078125, "step": 13886 }, { "epoch": 0.9386913613627146, "grad_norm": 1.7231894731521606, "learning_rate": 2.9820189796106847e-07, "loss": 0.3105926513671875, "step": 13887 }, { "epoch": 0.9387589563336488, "grad_norm": 1.9343147277832031, "learning_rate": 2.975476062952248e-07, "loss": 0.253936767578125, "step": 13888 }, { "epoch": 0.9388265513045829, "grad_norm": 0.8162246346473694, "learning_rate": 2.9689402602545657e-07, "loss": 0.1917724609375, "step": 13889 }, { "epoch": 0.9388941462755171, "grad_norm": 1.882756233215332, "learning_rate": 2.9624115718339674e-07, "loss": 0.208984375, "step": 13890 }, { "epoch": 0.9389617412464513, "grad_norm": 1.9061779975891113, "learning_rate": 2.9558899980063003e-07, "loss": 0.1988677978515625, "step": 13891 }, { "epoch": 0.9390293362173854, "grad_norm": 1.1028809547424316, "learning_rate": 2.949375539087129e-07, "loss": 0.1878662109375, "step": 13892 }, { "epoch": 0.9390969311883196, "grad_norm": 1.6211943626403809, "learning_rate": 2.9428681953916683e-07, "loss": 0.20391845703125, "step": 13893 }, { "epoch": 0.9391645261592537, "grad_norm": 1.2623437643051147, "learning_rate": 2.9363679672347985e-07, "loss": 0.1463775634765625, "step": 13894 }, { "epoch": 0.939232121130188, "grad_norm": 0.8645606637001038, "learning_rate": 2.929874854931003e-07, "loss": 0.1038360595703125, "step": 13895 }, { "epoch": 0.9392997161011221, "grad_norm": 0.9440074563026428, "learning_rate": 2.9233888587944955e-07, "loss": 0.12961578369140625, "step": 13896 }, { "epoch": 0.9393673110720563, "grad_norm": 0.978918194770813, "learning_rate": 2.9169099791390927e-07, "loss": 0.143096923828125, "step": 13897 }, { "epoch": 0.9394349060429904, "grad_norm": 1.96715247631073, "learning_rate": 2.9104382162782774e-07, "loss": 0.266204833984375, "step": 13898 }, { "epoch": 0.9395025010139245, "grad_norm": 1.4870258569717407, "learning_rate": 2.9039735705252144e-07, "loss": 0.2577056884765625, "step": 13899 }, { "epoch": 0.9395700959848587, "grad_norm": 1.2308309078216553, "learning_rate": 2.897516042192672e-07, "loss": 0.1707611083984375, "step": 13900 }, { "epoch": 0.9396376909557929, "grad_norm": 1.6283371448516846, "learning_rate": 2.891065631593148e-07, "loss": 0.2548675537109375, "step": 13901 }, { "epoch": 0.9397052859267271, "grad_norm": 1.9700478315353394, "learning_rate": 2.884622339038695e-07, "loss": 0.2739715576171875, "step": 13902 }, { "epoch": 0.9397728808976612, "grad_norm": 1.0569926500320435, "learning_rate": 2.878186164841129e-07, "loss": 0.225860595703125, "step": 13903 }, { "epoch": 0.9398404758685954, "grad_norm": 0.9773551821708679, "learning_rate": 2.871757109311868e-07, "loss": 0.140167236328125, "step": 13904 }, { "epoch": 0.9399080708395295, "grad_norm": 1.961391568183899, "learning_rate": 2.865335172761979e-07, "loss": 0.2231597900390625, "step": 13905 }, { "epoch": 0.9399756658104637, "grad_norm": 1.9083538055419922, "learning_rate": 2.8589203555021815e-07, "loss": 0.3021240234375, "step": 13906 }, { "epoch": 0.9400432607813979, "grad_norm": 1.00557279586792, "learning_rate": 2.8525126578428927e-07, "loss": 0.252410888671875, "step": 13907 }, { "epoch": 0.940110855752332, "grad_norm": 2.5903265476226807, "learning_rate": 2.8461120800941323e-07, "loss": 0.327972412109375, "step": 13908 }, { "epoch": 0.9401784507232662, "grad_norm": 1.6123716831207275, "learning_rate": 2.839718622565618e-07, "loss": 0.3114013671875, "step": 13909 }, { "epoch": 0.9402460456942003, "grad_norm": 2.6701066493988037, "learning_rate": 2.8333322855666867e-07, "loss": 0.35113525390625, "step": 13910 }, { "epoch": 0.9403136406651346, "grad_norm": 1.32547926902771, "learning_rate": 2.8269530694063737e-07, "loss": 0.18389892578125, "step": 13911 }, { "epoch": 0.9403812356360687, "grad_norm": 1.0002827644348145, "learning_rate": 2.8205809743933164e-07, "loss": 0.2041015625, "step": 13912 }, { "epoch": 0.9404488306070028, "grad_norm": 0.7431910634040833, "learning_rate": 2.8142160008358343e-07, "loss": 0.10981369018554688, "step": 13913 }, { "epoch": 0.940516425577937, "grad_norm": 1.5115548372268677, "learning_rate": 2.807858149041964e-07, "loss": 0.20890045166015625, "step": 13914 }, { "epoch": 0.9405840205488711, "grad_norm": 1.3642743825912476, "learning_rate": 2.8015074193192605e-07, "loss": 0.2425537109375, "step": 13915 }, { "epoch": 0.9406516155198054, "grad_norm": 1.083813190460205, "learning_rate": 2.79516381197506e-07, "loss": 0.1514739990234375, "step": 13916 }, { "epoch": 0.9407192104907395, "grad_norm": 1.5159814357757568, "learning_rate": 2.7888273273162844e-07, "loss": 0.24658203125, "step": 13917 }, { "epoch": 0.9407868054616737, "grad_norm": 1.2675060033798218, "learning_rate": 2.7824979656495387e-07, "loss": 0.24981689453125, "step": 13918 }, { "epoch": 0.9408544004326078, "grad_norm": 1.1312990188598633, "learning_rate": 2.776175727281077e-07, "loss": 0.262664794921875, "step": 13919 }, { "epoch": 0.9409219954035419, "grad_norm": 1.033524513244629, "learning_rate": 2.7698606125168223e-07, "loss": 0.20538330078125, "step": 13920 }, { "epoch": 0.9409895903744762, "grad_norm": 1.2630265951156616, "learning_rate": 2.763552621662313e-07, "loss": 0.1631011962890625, "step": 13921 }, { "epoch": 0.9410571853454103, "grad_norm": 1.548333764076233, "learning_rate": 2.757251755022788e-07, "loss": 0.24200439453125, "step": 13922 }, { "epoch": 0.9411247803163445, "grad_norm": 1.5255017280578613, "learning_rate": 2.750958012903121e-07, "loss": 0.28564453125, "step": 13923 }, { "epoch": 0.9411923752872786, "grad_norm": 1.4301387071609497, "learning_rate": 2.7446713956078176e-07, "loss": 0.30029296875, "step": 13924 }, { "epoch": 0.9412599702582128, "grad_norm": 1.2360090017318726, "learning_rate": 2.738391903441101e-07, "loss": 0.281951904296875, "step": 13925 }, { "epoch": 0.941327565229147, "grad_norm": 1.1767128705978394, "learning_rate": 2.7321195367067787e-07, "loss": 0.199737548828125, "step": 13926 }, { "epoch": 0.9413951602000811, "grad_norm": 0.8933179974555969, "learning_rate": 2.7258542957083744e-07, "loss": 0.1690826416015625, "step": 13927 }, { "epoch": 0.9414627551710153, "grad_norm": 1.1921383142471313, "learning_rate": 2.719596180749012e-07, "loss": 0.171142578125, "step": 13928 }, { "epoch": 0.9415303501419494, "grad_norm": 0.9827927350997925, "learning_rate": 2.713345192131533e-07, "loss": 0.1692657470703125, "step": 13929 }, { "epoch": 0.9415979451128836, "grad_norm": 0.8738717436790466, "learning_rate": 2.707101330158379e-07, "loss": 0.16302490234375, "step": 13930 }, { "epoch": 0.9416655400838178, "grad_norm": 1.2864001989364624, "learning_rate": 2.700864595131658e-07, "loss": 0.298675537109375, "step": 13931 }, { "epoch": 0.941733135054752, "grad_norm": 1.118930697441101, "learning_rate": 2.694634987353145e-07, "loss": 0.18328857421875, "step": 13932 }, { "epoch": 0.9418007300256861, "grad_norm": 1.0331202745437622, "learning_rate": 2.6884125071242824e-07, "loss": 0.2286834716796875, "step": 13933 }, { "epoch": 0.9418683249966202, "grad_norm": 1.6087955236434937, "learning_rate": 2.6821971547461466e-07, "loss": 0.2618408203125, "step": 13934 }, { "epoch": 0.9419359199675544, "grad_norm": 2.397435188293457, "learning_rate": 2.6759889305194794e-07, "loss": 0.327239990234375, "step": 13935 }, { "epoch": 0.9420035149384886, "grad_norm": 1.8583859205245972, "learning_rate": 2.6697878347446583e-07, "loss": 0.258056640625, "step": 13936 }, { "epoch": 0.9420711099094228, "grad_norm": 1.3430795669555664, "learning_rate": 2.6635938677217255e-07, "loss": 0.2061767578125, "step": 13937 }, { "epoch": 0.9421387048803569, "grad_norm": 0.8823770880699158, "learning_rate": 2.657407029750408e-07, "loss": 0.1091461181640625, "step": 13938 }, { "epoch": 0.9422062998512911, "grad_norm": 0.9767645597457886, "learning_rate": 2.651227321130051e-07, "loss": 0.11492156982421875, "step": 13939 }, { "epoch": 0.9422738948222252, "grad_norm": 1.5395920276641846, "learning_rate": 2.6450547421596807e-07, "loss": 0.2279052734375, "step": 13940 }, { "epoch": 0.9423414897931593, "grad_norm": 1.424436092376709, "learning_rate": 2.638889293137925e-07, "loss": 0.26318359375, "step": 13941 }, { "epoch": 0.9424090847640936, "grad_norm": 0.9639647006988525, "learning_rate": 2.6327309743631457e-07, "loss": 0.1671295166015625, "step": 13942 }, { "epoch": 0.9424766797350277, "grad_norm": 1.1018261909484863, "learning_rate": 2.626579786133321e-07, "loss": 0.19793701171875, "step": 13943 }, { "epoch": 0.9425442747059619, "grad_norm": 1.2021063566207886, "learning_rate": 2.620435728746062e-07, "loss": 0.1507568359375, "step": 13944 }, { "epoch": 0.942611869676896, "grad_norm": 1.7080113887786865, "learning_rate": 2.614298802498666e-07, "loss": 0.281951904296875, "step": 13945 }, { "epoch": 0.9426794646478303, "grad_norm": 1.967104434967041, "learning_rate": 2.608169007688077e-07, "loss": 0.265655517578125, "step": 13946 }, { "epoch": 0.9427470596187644, "grad_norm": 1.5809792280197144, "learning_rate": 2.602046344610892e-07, "loss": 0.3482666015625, "step": 13947 }, { "epoch": 0.9428146545896985, "grad_norm": 1.3054696321487427, "learning_rate": 2.595930813563374e-07, "loss": 0.1923675537109375, "step": 13948 }, { "epoch": 0.9428822495606327, "grad_norm": 1.3225014209747314, "learning_rate": 2.589822414841403e-07, "loss": 0.27813720703125, "step": 13949 }, { "epoch": 0.9429498445315668, "grad_norm": 0.856173574924469, "learning_rate": 2.583721148740542e-07, "loss": 0.22772216796875, "step": 13950 }, { "epoch": 0.943017439502501, "grad_norm": 1.0225343704223633, "learning_rate": 2.5776270155560724e-07, "loss": 0.177734375, "step": 13951 }, { "epoch": 0.9430850344734352, "grad_norm": 1.601106882095337, "learning_rate": 2.571540015582774e-07, "loss": 0.2286376953125, "step": 13952 }, { "epoch": 0.9431526294443693, "grad_norm": 1.4531227350234985, "learning_rate": 2.565460149115245e-07, "loss": 0.27679443359375, "step": 13953 }, { "epoch": 0.9432202244153035, "grad_norm": 1.7290372848510742, "learning_rate": 2.5593874164476337e-07, "loss": 0.2219085693359375, "step": 13954 }, { "epoch": 0.9432878193862376, "grad_norm": 1.173264503479004, "learning_rate": 2.553321817873755e-07, "loss": 0.22412109375, "step": 13955 }, { "epoch": 0.9433554143571719, "grad_norm": 1.1497998237609863, "learning_rate": 2.5472633536871736e-07, "loss": 0.1434173583984375, "step": 13956 }, { "epoch": 0.943423009328106, "grad_norm": 1.1603734493255615, "learning_rate": 2.541212024180939e-07, "loss": 0.162811279296875, "step": 13957 }, { "epoch": 0.9434906042990402, "grad_norm": 1.4246989488601685, "learning_rate": 2.5351678296479333e-07, "loss": 0.29815673828125, "step": 13958 }, { "epoch": 0.9435581992699743, "grad_norm": 2.3037099838256836, "learning_rate": 2.529130770380589e-07, "loss": 0.28570556640625, "step": 13959 }, { "epoch": 0.9436257942409084, "grad_norm": 1.1713206768035889, "learning_rate": 2.523100846670989e-07, "loss": 0.247589111328125, "step": 13960 }, { "epoch": 0.9436933892118426, "grad_norm": 1.2637263536453247, "learning_rate": 2.517078058810934e-07, "loss": 0.2367706298828125, "step": 13961 }, { "epoch": 0.9437609841827768, "grad_norm": 0.9737777709960938, "learning_rate": 2.511062407091824e-07, "loss": 0.21820068359375, "step": 13962 }, { "epoch": 0.943828579153711, "grad_norm": 1.3531938791275024, "learning_rate": 2.505053891804726e-07, "loss": 0.219573974609375, "step": 13963 }, { "epoch": 0.9438961741246451, "grad_norm": 1.4347931146621704, "learning_rate": 2.4990525132404074e-07, "loss": 0.2451171875, "step": 13964 }, { "epoch": 0.9439637690955793, "grad_norm": 1.930981993675232, "learning_rate": 2.4930582716892024e-07, "loss": 0.2405548095703125, "step": 13965 }, { "epoch": 0.9440313640665134, "grad_norm": 1.8741657733917236, "learning_rate": 2.4870711674411964e-07, "loss": 0.3079681396484375, "step": 13966 }, { "epoch": 0.9440989590374476, "grad_norm": 1.401149868965149, "learning_rate": 2.4810912007860566e-07, "loss": 0.3228759765625, "step": 13967 }, { "epoch": 0.9441665540083818, "grad_norm": 1.2436819076538086, "learning_rate": 2.4751183720131185e-07, "loss": 0.16809844970703125, "step": 13968 }, { "epoch": 0.9442341489793159, "grad_norm": 0.6408612728118896, "learning_rate": 2.4691526814114187e-07, "loss": 0.08031272888183594, "step": 13969 }, { "epoch": 0.9443017439502501, "grad_norm": 1.5453170537948608, "learning_rate": 2.463194129269575e-07, "loss": 0.20892333984375, "step": 13970 }, { "epoch": 0.9443693389211842, "grad_norm": 1.5301856994628906, "learning_rate": 2.4572427158759405e-07, "loss": 0.27911376953125, "step": 13971 }, { "epoch": 0.9444369338921185, "grad_norm": 0.8904804587364197, "learning_rate": 2.4512984415184515e-07, "loss": 0.1756134033203125, "step": 13972 }, { "epoch": 0.9445045288630526, "grad_norm": 1.4426783323287964, "learning_rate": 2.445361306484745e-07, "loss": 0.2098388671875, "step": 13973 }, { "epoch": 0.9445721238339867, "grad_norm": 1.7283574342727661, "learning_rate": 2.43943131106209e-07, "loss": 0.29296875, "step": 13974 }, { "epoch": 0.9446397188049209, "grad_norm": 0.8920000195503235, "learning_rate": 2.4335084555374256e-07, "loss": 0.154327392578125, "step": 13975 }, { "epoch": 0.944707313775855, "grad_norm": 1.0944982767105103, "learning_rate": 2.427592740197304e-07, "loss": 0.1705169677734375, "step": 13976 }, { "epoch": 0.9447749087467893, "grad_norm": 1.3442329168319702, "learning_rate": 2.4216841653280143e-07, "loss": 0.33013916015625, "step": 13977 }, { "epoch": 0.9448425037177234, "grad_norm": 1.751177430152893, "learning_rate": 2.4157827312154103e-07, "loss": 0.3087158203125, "step": 13978 }, { "epoch": 0.9449100986886576, "grad_norm": 1.3973382711410522, "learning_rate": 2.409888438145047e-07, "loss": 0.270416259765625, "step": 13979 }, { "epoch": 0.9449776936595917, "grad_norm": 1.6734262704849243, "learning_rate": 2.404001286402163e-07, "loss": 0.2715301513671875, "step": 13980 }, { "epoch": 0.9450452886305258, "grad_norm": 1.3143235445022583, "learning_rate": 2.3981212762715475e-07, "loss": 0.271484375, "step": 13981 }, { "epoch": 0.9451128836014601, "grad_norm": 1.7756904363632202, "learning_rate": 2.392248408037773e-07, "loss": 0.28948974609375, "step": 13982 }, { "epoch": 0.9451804785723942, "grad_norm": 1.050058126449585, "learning_rate": 2.386382681984994e-07, "loss": 0.190887451171875, "step": 13983 }, { "epoch": 0.9452480735433284, "grad_norm": 1.1286243200302124, "learning_rate": 2.3805240983970023e-07, "loss": 0.13880157470703125, "step": 13984 }, { "epoch": 0.9453156685142625, "grad_norm": 1.1744951009750366, "learning_rate": 2.374672657557303e-07, "loss": 0.165374755859375, "step": 13985 }, { "epoch": 0.9453832634851967, "grad_norm": 1.4614698886871338, "learning_rate": 2.3688283597490035e-07, "loss": 0.32794189453125, "step": 13986 }, { "epoch": 0.9454508584561309, "grad_norm": 0.925143301486969, "learning_rate": 2.3629912052548942e-07, "loss": 0.167022705078125, "step": 13987 }, { "epoch": 0.945518453427065, "grad_norm": 0.8796502947807312, "learning_rate": 2.3571611943574323e-07, "loss": 0.1182861328125, "step": 13988 }, { "epoch": 0.9455860483979992, "grad_norm": 1.2977129220962524, "learning_rate": 2.3513383273386756e-07, "loss": 0.21551513671875, "step": 13989 }, { "epoch": 0.9456536433689333, "grad_norm": 1.3442052602767944, "learning_rate": 2.345522604480399e-07, "loss": 0.1674652099609375, "step": 13990 }, { "epoch": 0.9457212383398675, "grad_norm": 0.8246259093284607, "learning_rate": 2.3397140260639772e-07, "loss": 0.1507110595703125, "step": 13991 }, { "epoch": 0.9457888333108017, "grad_norm": 1.7773202657699585, "learning_rate": 2.333912592370485e-07, "loss": 0.2625732421875, "step": 13992 }, { "epoch": 0.9458564282817359, "grad_norm": 1.3810673952102661, "learning_rate": 2.3281183036806318e-07, "loss": 0.224456787109375, "step": 13993 }, { "epoch": 0.94592402325267, "grad_norm": 1.0387232303619385, "learning_rate": 2.3223311602747765e-07, "loss": 0.213623046875, "step": 13994 }, { "epoch": 0.9459916182236041, "grad_norm": 1.2915021181106567, "learning_rate": 2.3165511624329284e-07, "loss": 0.27490234375, "step": 13995 }, { "epoch": 0.9460592131945383, "grad_norm": 1.5156553983688354, "learning_rate": 2.31077831043478e-07, "loss": 0.258575439453125, "step": 13996 }, { "epoch": 0.9461268081654725, "grad_norm": 0.8855051398277283, "learning_rate": 2.305012604559642e-07, "loss": 0.208099365234375, "step": 13997 }, { "epoch": 0.9461944031364067, "grad_norm": 2.0608861446380615, "learning_rate": 2.2992540450865075e-07, "loss": 0.28790283203125, "step": 13998 }, { "epoch": 0.9462619981073408, "grad_norm": 1.2044792175292969, "learning_rate": 2.2935026322939868e-07, "loss": 0.302001953125, "step": 13999 }, { "epoch": 0.946329593078275, "grad_norm": 1.0148794651031494, "learning_rate": 2.2877583664604073e-07, "loss": 0.16546630859375, "step": 14000 }, { "epoch": 0.9463971880492091, "grad_norm": 0.955037534236908, "learning_rate": 2.2820212478636804e-07, "loss": 0.1493988037109375, "step": 14001 }, { "epoch": 0.9464647830201433, "grad_norm": 1.371325969696045, "learning_rate": 2.2762912767814005e-07, "loss": 0.2706298828125, "step": 14002 }, { "epoch": 0.9465323779910775, "grad_norm": 1.1074256896972656, "learning_rate": 2.2705684534908455e-07, "loss": 0.199859619140625, "step": 14003 }, { "epoch": 0.9465999729620116, "grad_norm": 1.1242284774780273, "learning_rate": 2.264852778268911e-07, "loss": 0.250701904296875, "step": 14004 }, { "epoch": 0.9466675679329458, "grad_norm": 1.0029712915420532, "learning_rate": 2.2591442513921423e-07, "loss": 0.13815689086914062, "step": 14005 }, { "epoch": 0.9467351629038799, "grad_norm": 0.7598258852958679, "learning_rate": 2.253442873136785e-07, "loss": 0.15850830078125, "step": 14006 }, { "epoch": 0.9468027578748142, "grad_norm": 1.6028797626495361, "learning_rate": 2.2477486437786522e-07, "loss": 0.27862548828125, "step": 14007 }, { "epoch": 0.9468703528457483, "grad_norm": 1.783189058303833, "learning_rate": 2.2420615635933395e-07, "loss": 0.30194091796875, "step": 14008 }, { "epoch": 0.9469379478166824, "grad_norm": 1.2780104875564575, "learning_rate": 2.2363816328559605e-07, "loss": 0.230804443359375, "step": 14009 }, { "epoch": 0.9470055427876166, "grad_norm": 1.7585827112197876, "learning_rate": 2.2307088518413787e-07, "loss": 0.21114349365234375, "step": 14010 }, { "epoch": 0.9470731377585507, "grad_norm": 1.5031449794769287, "learning_rate": 2.2250432208240746e-07, "loss": 0.218994140625, "step": 14011 }, { "epoch": 0.947140732729485, "grad_norm": 1.2582615613937378, "learning_rate": 2.2193847400781786e-07, "loss": 0.21417236328125, "step": 14012 }, { "epoch": 0.9472083277004191, "grad_norm": 1.695530652999878, "learning_rate": 2.2137334098774886e-07, "loss": 0.293121337890625, "step": 14013 }, { "epoch": 0.9472759226713533, "grad_norm": 1.5425740480422974, "learning_rate": 2.2080892304954525e-07, "loss": 0.239898681640625, "step": 14014 }, { "epoch": 0.9473435176422874, "grad_norm": 1.1579418182373047, "learning_rate": 2.2024522022051518e-07, "loss": 0.1317586898803711, "step": 14015 }, { "epoch": 0.9474111126132215, "grad_norm": 1.2271467447280884, "learning_rate": 2.1968223252793683e-07, "loss": 0.190093994140625, "step": 14016 }, { "epoch": 0.9474787075841558, "grad_norm": 1.6310930252075195, "learning_rate": 2.191199599990501e-07, "loss": 0.27679443359375, "step": 14017 }, { "epoch": 0.9475463025550899, "grad_norm": 1.6548621654510498, "learning_rate": 2.1855840266105987e-07, "loss": 0.31085205078125, "step": 14018 }, { "epoch": 0.9476138975260241, "grad_norm": 0.9054773449897766, "learning_rate": 2.1799756054114106e-07, "loss": 0.1507110595703125, "step": 14019 }, { "epoch": 0.9476814924969582, "grad_norm": 1.8723455667495728, "learning_rate": 2.1743743366642533e-07, "loss": 0.2027130126953125, "step": 14020 }, { "epoch": 0.9477490874678924, "grad_norm": 2.0275027751922607, "learning_rate": 2.1687802206402097e-07, "loss": 0.30464935302734375, "step": 14021 }, { "epoch": 0.9478166824388266, "grad_norm": 1.4066509008407593, "learning_rate": 2.1631932576099135e-07, "loss": 0.30078125, "step": 14022 }, { "epoch": 0.9478842774097607, "grad_norm": 1.9107781648635864, "learning_rate": 2.1576134478437315e-07, "loss": 0.335845947265625, "step": 14023 }, { "epoch": 0.9479518723806949, "grad_norm": 0.9199663996696472, "learning_rate": 2.1520407916116313e-07, "loss": 0.151763916015625, "step": 14024 }, { "epoch": 0.948019467351629, "grad_norm": 1.9472167491912842, "learning_rate": 2.1464752891832473e-07, "loss": 0.339019775390625, "step": 14025 }, { "epoch": 0.9480870623225632, "grad_norm": 1.1720495223999023, "learning_rate": 2.1409169408278806e-07, "loss": 0.18243408203125, "step": 14026 }, { "epoch": 0.9481546572934973, "grad_norm": 1.1494172811508179, "learning_rate": 2.1353657468144828e-07, "loss": 0.184234619140625, "step": 14027 }, { "epoch": 0.9482222522644316, "grad_norm": 0.9125779867172241, "learning_rate": 2.129821707411639e-07, "loss": 0.16139984130859375, "step": 14028 }, { "epoch": 0.9482898472353657, "grad_norm": 1.3529335260391235, "learning_rate": 2.1242848228876177e-07, "loss": 0.2493896484375, "step": 14029 }, { "epoch": 0.9483574422062998, "grad_norm": 1.3026701211929321, "learning_rate": 2.1187550935103383e-07, "loss": 0.15399169921875, "step": 14030 }, { "epoch": 0.948425037177234, "grad_norm": 1.6957899332046509, "learning_rate": 2.1132325195473367e-07, "loss": 0.289093017578125, "step": 14031 }, { "epoch": 0.9484926321481681, "grad_norm": 2.061676502227783, "learning_rate": 2.1077171012658657e-07, "loss": 0.210968017578125, "step": 14032 }, { "epoch": 0.9485602271191024, "grad_norm": 1.1910079717636108, "learning_rate": 2.1022088389327455e-07, "loss": 0.2777099609375, "step": 14033 }, { "epoch": 0.9486278220900365, "grad_norm": 1.5091724395751953, "learning_rate": 2.0967077328145457e-07, "loss": 0.237823486328125, "step": 14034 }, { "epoch": 0.9486954170609707, "grad_norm": 1.116391658782959, "learning_rate": 2.0912137831774202e-07, "loss": 0.17287826538085938, "step": 14035 }, { "epoch": 0.9487630120319048, "grad_norm": 1.1560571193695068, "learning_rate": 2.0857269902872235e-07, "loss": 0.270599365234375, "step": 14036 }, { "epoch": 0.9488306070028389, "grad_norm": 1.5066760778427124, "learning_rate": 2.0802473544094092e-07, "loss": 0.30230712890625, "step": 14037 }, { "epoch": 0.9488982019737732, "grad_norm": 2.0993614196777344, "learning_rate": 2.0747748758091324e-07, "loss": 0.3262939453125, "step": 14038 }, { "epoch": 0.9489657969447073, "grad_norm": 2.1915817260742188, "learning_rate": 2.0693095547511808e-07, "loss": 0.216949462890625, "step": 14039 }, { "epoch": 0.9490333919156415, "grad_norm": 1.445440649986267, "learning_rate": 2.0638513915000102e-07, "loss": 0.26202392578125, "step": 14040 }, { "epoch": 0.9491009868865756, "grad_norm": 1.2007845640182495, "learning_rate": 2.0584003863197086e-07, "loss": 0.230621337890625, "step": 14041 }, { "epoch": 0.9491685818575099, "grad_norm": 1.5112016201019287, "learning_rate": 2.0529565394740323e-07, "loss": 0.3077392578125, "step": 14042 }, { "epoch": 0.949236176828444, "grad_norm": 1.002685308456421, "learning_rate": 2.0475198512264037e-07, "loss": 0.188568115234375, "step": 14043 }, { "epoch": 0.9493037717993781, "grad_norm": 1.221553087234497, "learning_rate": 2.0420903218398457e-07, "loss": 0.25836181640625, "step": 14044 }, { "epoch": 0.9493713667703123, "grad_norm": 1.8279081583023071, "learning_rate": 2.0366679515771313e-07, "loss": 0.23883056640625, "step": 14045 }, { "epoch": 0.9494389617412464, "grad_norm": 1.222259283065796, "learning_rate": 2.0312527407005677e-07, "loss": 0.30499267578125, "step": 14046 }, { "epoch": 0.9495065567121806, "grad_norm": 1.2668890953063965, "learning_rate": 2.0258446894722282e-07, "loss": 0.13189125061035156, "step": 14047 }, { "epoch": 0.9495741516831148, "grad_norm": 1.516804575920105, "learning_rate": 2.0204437981537539e-07, "loss": 0.30731201171875, "step": 14048 }, { "epoch": 0.949641746654049, "grad_norm": 1.6147302389144897, "learning_rate": 2.0150500670064853e-07, "loss": 0.19622802734375, "step": 14049 }, { "epoch": 0.9497093416249831, "grad_norm": 1.0181525945663452, "learning_rate": 2.0096634962913973e-07, "loss": 0.2225189208984375, "step": 14050 }, { "epoch": 0.9497769365959172, "grad_norm": 1.1780195236206055, "learning_rate": 2.0042840862691314e-07, "loss": 0.1783447265625, "step": 14051 }, { "epoch": 0.9498445315668514, "grad_norm": 1.3022708892822266, "learning_rate": 1.9989118371999794e-07, "loss": 0.199676513671875, "step": 14052 }, { "epoch": 0.9499121265377856, "grad_norm": 1.1051579713821411, "learning_rate": 1.993546749343883e-07, "loss": 0.2276458740234375, "step": 14053 }, { "epoch": 0.9499797215087198, "grad_norm": 1.4789819717407227, "learning_rate": 1.988188822960435e-07, "loss": 0.272064208984375, "step": 14054 }, { "epoch": 0.9500473164796539, "grad_norm": 3.327003240585327, "learning_rate": 1.982838058308878e-07, "loss": 0.328338623046875, "step": 14055 }, { "epoch": 0.9501149114505881, "grad_norm": 2.1562981605529785, "learning_rate": 1.9774944556481545e-07, "loss": 0.245330810546875, "step": 14056 }, { "epoch": 0.9501825064215222, "grad_norm": 1.9705132246017456, "learning_rate": 1.972158015236758e-07, "loss": 0.31915283203125, "step": 14057 }, { "epoch": 0.9502501013924564, "grad_norm": 1.3421238660812378, "learning_rate": 1.9668287373329485e-07, "loss": 0.210693359375, "step": 14058 }, { "epoch": 0.9503176963633906, "grad_norm": 0.9689971804618835, "learning_rate": 1.9615066221945865e-07, "loss": 0.15521240234375, "step": 14059 }, { "epoch": 0.9503852913343247, "grad_norm": 3.957796573638916, "learning_rate": 1.9561916700791494e-07, "loss": 0.343597412109375, "step": 14060 }, { "epoch": 0.9504528863052589, "grad_norm": 1.005793809890747, "learning_rate": 1.9508838812438646e-07, "loss": 0.2134857177734375, "step": 14061 }, { "epoch": 0.950520481276193, "grad_norm": 1.3242005109786987, "learning_rate": 1.9455832559454934e-07, "loss": 0.28863525390625, "step": 14062 }, { "epoch": 0.9505880762471273, "grad_norm": 1.3753561973571777, "learning_rate": 1.9402897944405472e-07, "loss": 0.271820068359375, "step": 14063 }, { "epoch": 0.9506556712180614, "grad_norm": 1.304945468902588, "learning_rate": 1.9350034969851716e-07, "loss": 0.21875, "step": 14064 }, { "epoch": 0.9507232661889955, "grad_norm": 1.5786620378494263, "learning_rate": 1.929724363835128e-07, "loss": 0.2452392578125, "step": 14065 }, { "epoch": 0.9507908611599297, "grad_norm": 2.193324327468872, "learning_rate": 1.924452395245846e-07, "loss": 0.36895751953125, "step": 14066 }, { "epoch": 0.9508584561308638, "grad_norm": 0.8806732296943665, "learning_rate": 1.9191875914724377e-07, "loss": 0.1417999267578125, "step": 14067 }, { "epoch": 0.9509260511017981, "grad_norm": 2.3617546558380127, "learning_rate": 1.913929952769633e-07, "loss": 0.300537109375, "step": 14068 }, { "epoch": 0.9509936460727322, "grad_norm": 1.4505374431610107, "learning_rate": 1.9086794793918617e-07, "loss": 0.1823272705078125, "step": 14069 }, { "epoch": 0.9510612410436664, "grad_norm": 1.331680178642273, "learning_rate": 1.9034361715931204e-07, "loss": 0.2245635986328125, "step": 14070 }, { "epoch": 0.9511288360146005, "grad_norm": 1.3338855504989624, "learning_rate": 1.8982000296271395e-07, "loss": 0.2149658203125, "step": 14071 }, { "epoch": 0.9511964309855346, "grad_norm": 0.8023023009300232, "learning_rate": 1.8929710537472998e-07, "loss": 0.162200927734375, "step": 14072 }, { "epoch": 0.9512640259564689, "grad_norm": 1.4241305589675903, "learning_rate": 1.8877492442065492e-07, "loss": 0.30059814453125, "step": 14073 }, { "epoch": 0.951331620927403, "grad_norm": 2.012723207473755, "learning_rate": 1.8825346012576183e-07, "loss": 0.32373046875, "step": 14074 }, { "epoch": 0.9513992158983372, "grad_norm": 1.2739640474319458, "learning_rate": 1.877327125152789e-07, "loss": 0.24993896484375, "step": 14075 }, { "epoch": 0.9514668108692713, "grad_norm": 2.1049275398254395, "learning_rate": 1.872126816144043e-07, "loss": 0.27691650390625, "step": 14076 }, { "epoch": 0.9515344058402055, "grad_norm": 2.1066484451293945, "learning_rate": 1.8669336744829957e-07, "loss": 0.3460693359375, "step": 14077 }, { "epoch": 0.9516020008111397, "grad_norm": 1.470657467842102, "learning_rate": 1.8617477004209293e-07, "loss": 0.1618804931640625, "step": 14078 }, { "epoch": 0.9516695957820738, "grad_norm": 1.7216383218765259, "learning_rate": 1.8565688942087768e-07, "loss": 0.30303955078125, "step": 14079 }, { "epoch": 0.951737190753008, "grad_norm": 1.1310510635375977, "learning_rate": 1.8513972560971037e-07, "loss": 0.2392578125, "step": 14080 }, { "epoch": 0.9518047857239421, "grad_norm": 0.9052804112434387, "learning_rate": 1.846232786336144e-07, "loss": 0.1822052001953125, "step": 14081 }, { "epoch": 0.9518723806948763, "grad_norm": 0.9073736667633057, "learning_rate": 1.8410754851758305e-07, "loss": 0.226104736328125, "step": 14082 }, { "epoch": 0.9519399756658105, "grad_norm": 1.1112465858459473, "learning_rate": 1.835925352865664e-07, "loss": 0.1895599365234375, "step": 14083 }, { "epoch": 0.9520075706367446, "grad_norm": 1.6295586824417114, "learning_rate": 1.8307823896548448e-07, "loss": 0.28216552734375, "step": 14084 }, { "epoch": 0.9520751656076788, "grad_norm": 1.438578486442566, "learning_rate": 1.8256465957922408e-07, "loss": 0.2733154296875, "step": 14085 }, { "epoch": 0.9521427605786129, "grad_norm": 1.1159831285476685, "learning_rate": 1.8205179715263197e-07, "loss": 0.2328948974609375, "step": 14086 }, { "epoch": 0.9522103555495471, "grad_norm": 1.6137057542800903, "learning_rate": 1.8153965171052832e-07, "loss": 0.325164794921875, "step": 14087 }, { "epoch": 0.9522779505204813, "grad_norm": 1.4729841947555542, "learning_rate": 1.8102822327768997e-07, "loss": 0.2899169921875, "step": 14088 }, { "epoch": 0.9523455454914155, "grad_norm": 1.145788311958313, "learning_rate": 1.8051751187886379e-07, "loss": 0.2248077392578125, "step": 14089 }, { "epoch": 0.9524131404623496, "grad_norm": 1.231520175933838, "learning_rate": 1.8000751753876333e-07, "loss": 0.28564453125, "step": 14090 }, { "epoch": 0.9524807354332837, "grad_norm": 0.8823934197425842, "learning_rate": 1.794982402820622e-07, "loss": 0.148773193359375, "step": 14091 }, { "epoch": 0.9525483304042179, "grad_norm": 0.9164445996284485, "learning_rate": 1.7898968013340567e-07, "loss": 0.203582763671875, "step": 14092 }, { "epoch": 0.952615925375152, "grad_norm": 1.4582405090332031, "learning_rate": 1.784818371173974e-07, "loss": 0.2525634765625, "step": 14093 }, { "epoch": 0.9526835203460863, "grad_norm": 1.5015974044799805, "learning_rate": 1.7797471125861275e-07, "loss": 0.28424072265625, "step": 14094 }, { "epoch": 0.9527511153170204, "grad_norm": 1.3598095178604126, "learning_rate": 1.7746830258158875e-07, "loss": 0.225677490234375, "step": 14095 }, { "epoch": 0.9528187102879546, "grad_norm": 1.1898163557052612, "learning_rate": 1.769626111108291e-07, "loss": 0.194976806640625, "step": 14096 }, { "epoch": 0.9528863052588887, "grad_norm": 1.5249871015548706, "learning_rate": 1.7645763687080096e-07, "loss": 0.2747802734375, "step": 14097 }, { "epoch": 0.9529539002298228, "grad_norm": 1.3049476146697998, "learning_rate": 1.7595337988593972e-07, "loss": 0.23681640625, "step": 14098 }, { "epoch": 0.9530214952007571, "grad_norm": 1.6298763751983643, "learning_rate": 1.754498401806426e-07, "loss": 0.28106689453125, "step": 14099 }, { "epoch": 0.9530890901716912, "grad_norm": 1.6867071390151978, "learning_rate": 1.7494701777927668e-07, "loss": 0.295623779296875, "step": 14100 }, { "epoch": 0.9531566851426254, "grad_norm": 0.8338072896003723, "learning_rate": 1.7444491270616925e-07, "loss": 0.2021942138671875, "step": 14101 }, { "epoch": 0.9532242801135595, "grad_norm": 2.729301691055298, "learning_rate": 1.7394352498561583e-07, "loss": 0.3116455078125, "step": 14102 }, { "epoch": 0.9532918750844938, "grad_norm": 0.8439048528671265, "learning_rate": 1.7344285464187704e-07, "loss": 0.17710113525390625, "step": 14103 }, { "epoch": 0.9533594700554279, "grad_norm": 2.3911385536193848, "learning_rate": 1.7294290169917848e-07, "loss": 0.35931396484375, "step": 14104 }, { "epoch": 0.953427065026362, "grad_norm": 2.1023967266082764, "learning_rate": 1.7244366618170915e-07, "loss": 0.3309326171875, "step": 14105 }, { "epoch": 0.9534946599972962, "grad_norm": 1.3410751819610596, "learning_rate": 1.719451481136297e-07, "loss": 0.227783203125, "step": 14106 }, { "epoch": 0.9535622549682303, "grad_norm": 1.2434754371643066, "learning_rate": 1.7144734751905756e-07, "loss": 0.251251220703125, "step": 14107 }, { "epoch": 0.9536298499391646, "grad_norm": 1.4143826961517334, "learning_rate": 1.7095026442208005e-07, "loss": 0.2304840087890625, "step": 14108 }, { "epoch": 0.9536974449100987, "grad_norm": 1.7872323989868164, "learning_rate": 1.7045389884674967e-07, "loss": 0.3211669921875, "step": 14109 }, { "epoch": 0.9537650398810329, "grad_norm": 1.1285746097564697, "learning_rate": 1.6995825081708382e-07, "loss": 0.18846893310546875, "step": 14110 }, { "epoch": 0.953832634851967, "grad_norm": 0.8557150959968567, "learning_rate": 1.6946332035706503e-07, "loss": 0.173095703125, "step": 14111 }, { "epoch": 0.9539002298229011, "grad_norm": 0.9639334082603455, "learning_rate": 1.6896910749064076e-07, "loss": 0.2145843505859375, "step": 14112 }, { "epoch": 0.9539678247938354, "grad_norm": 0.9092450141906738, "learning_rate": 1.684756122417236e-07, "loss": 0.145233154296875, "step": 14113 }, { "epoch": 0.9540354197647695, "grad_norm": 1.113434076309204, "learning_rate": 1.6798283463419274e-07, "loss": 0.2198944091796875, "step": 14114 }, { "epoch": 0.9541030147357037, "grad_norm": 1.9202756881713867, "learning_rate": 1.6749077469189243e-07, "loss": 0.29180908203125, "step": 14115 }, { "epoch": 0.9541706097066378, "grad_norm": 1.5392673015594482, "learning_rate": 1.6699943243863202e-07, "loss": 0.2537841796875, "step": 14116 }, { "epoch": 0.954238204677572, "grad_norm": 1.7091147899627686, "learning_rate": 1.6650880789818245e-07, "loss": 0.25653076171875, "step": 14117 }, { "epoch": 0.9543057996485061, "grad_norm": 0.8578903675079346, "learning_rate": 1.6601890109428642e-07, "loss": 0.209625244140625, "step": 14118 }, { "epoch": 0.9543733946194403, "grad_norm": 1.337677240371704, "learning_rate": 1.655297120506466e-07, "loss": 0.1536407470703125, "step": 14119 }, { "epoch": 0.9544409895903745, "grad_norm": 1.472618579864502, "learning_rate": 1.6504124079093409e-07, "loss": 0.26947021484375, "step": 14120 }, { "epoch": 0.9545085845613086, "grad_norm": 1.4373677968978882, "learning_rate": 1.645534873387833e-07, "loss": 0.210296630859375, "step": 14121 }, { "epoch": 0.9545761795322428, "grad_norm": 1.2265585660934448, "learning_rate": 1.64066451717797e-07, "loss": 0.2615966796875, "step": 14122 }, { "epoch": 0.954643774503177, "grad_norm": 1.3168816566467285, "learning_rate": 1.6358013395153804e-07, "loss": 0.1979522705078125, "step": 14123 }, { "epoch": 0.9547113694741112, "grad_norm": 1.2491302490234375, "learning_rate": 1.6309453406354091e-07, "loss": 0.27313232421875, "step": 14124 }, { "epoch": 0.9547789644450453, "grad_norm": 1.0526492595672607, "learning_rate": 1.6260965207729685e-07, "loss": 0.16259765625, "step": 14125 }, { "epoch": 0.9548465594159794, "grad_norm": 1.5303016901016235, "learning_rate": 1.6212548801627203e-07, "loss": 0.27838134765625, "step": 14126 }, { "epoch": 0.9549141543869136, "grad_norm": 0.8549456596374512, "learning_rate": 1.6164204190389276e-07, "loss": 0.15927886962890625, "step": 14127 }, { "epoch": 0.9549817493578477, "grad_norm": 0.8302880525588989, "learning_rate": 1.611593137635503e-07, "loss": 0.21295166015625, "step": 14128 }, { "epoch": 0.955049344328782, "grad_norm": 0.7539011240005493, "learning_rate": 1.60677303618601e-07, "loss": 0.12168121337890625, "step": 14129 }, { "epoch": 0.9551169392997161, "grad_norm": 1.0743998289108276, "learning_rate": 1.6019601149236784e-07, "loss": 0.1583404541015625, "step": 14130 }, { "epoch": 0.9551845342706503, "grad_norm": 1.2616626024246216, "learning_rate": 1.5971543740814054e-07, "loss": 0.225341796875, "step": 14131 }, { "epoch": 0.9552521292415844, "grad_norm": 1.3015742301940918, "learning_rate": 1.5923558138917215e-07, "loss": 0.265228271484375, "step": 14132 }, { "epoch": 0.9553197242125185, "grad_norm": 1.295898675918579, "learning_rate": 1.5875644345867913e-07, "loss": 0.1551055908203125, "step": 14133 }, { "epoch": 0.9553873191834528, "grad_norm": 1.4009486436843872, "learning_rate": 1.5827802363984457e-07, "loss": 0.2114105224609375, "step": 14134 }, { "epoch": 0.9554549141543869, "grad_norm": 0.8852558135986328, "learning_rate": 1.5780032195582162e-07, "loss": 0.1344146728515625, "step": 14135 }, { "epoch": 0.9555225091253211, "grad_norm": 1.1926501989364624, "learning_rate": 1.5732333842971847e-07, "loss": 0.190155029296875, "step": 14136 }, { "epoch": 0.9555901040962552, "grad_norm": 1.5704731941223145, "learning_rate": 1.5684707308462e-07, "loss": 0.258026123046875, "step": 14137 }, { "epoch": 0.9556576990671894, "grad_norm": 1.4530056715011597, "learning_rate": 1.5637152594356775e-07, "loss": 0.2295074462890625, "step": 14138 }, { "epoch": 0.9557252940381236, "grad_norm": 1.1999435424804688, "learning_rate": 1.5589669702957333e-07, "loss": 0.207794189453125, "step": 14139 }, { "epoch": 0.9557928890090577, "grad_norm": 1.6442899703979492, "learning_rate": 1.554225863656117e-07, "loss": 0.245361328125, "step": 14140 }, { "epoch": 0.9558604839799919, "grad_norm": 1.3220380544662476, "learning_rate": 1.5494919397462282e-07, "loss": 0.20050048828125, "step": 14141 }, { "epoch": 0.955928078950926, "grad_norm": 1.4154460430145264, "learning_rate": 1.5447651987951006e-07, "loss": 0.267974853515625, "step": 14142 }, { "epoch": 0.9559956739218602, "grad_norm": 1.1031585931777954, "learning_rate": 1.5400456410314846e-07, "loss": 0.221405029296875, "step": 14143 }, { "epoch": 0.9560632688927944, "grad_norm": 1.135868787765503, "learning_rate": 1.5353332666837305e-07, "loss": 0.209381103515625, "step": 14144 }, { "epoch": 0.9561308638637286, "grad_norm": 2.290454149246216, "learning_rate": 1.530628075979823e-07, "loss": 0.276153564453125, "step": 14145 }, { "epoch": 0.9561984588346627, "grad_norm": 1.0775398015975952, "learning_rate": 1.5259300691474631e-07, "loss": 0.1961822509765625, "step": 14146 }, { "epoch": 0.9562660538055968, "grad_norm": 1.0370455980300903, "learning_rate": 1.5212392464139525e-07, "loss": 0.1917877197265625, "step": 14147 }, { "epoch": 0.956333648776531, "grad_norm": 1.7256964445114136, "learning_rate": 1.516555608006276e-07, "loss": 0.33087158203125, "step": 14148 }, { "epoch": 0.9564012437474652, "grad_norm": 0.943705141544342, "learning_rate": 1.5118791541510358e-07, "loss": 0.15240478515625, "step": 14149 }, { "epoch": 0.9564688387183994, "grad_norm": 0.8426241874694824, "learning_rate": 1.5072098850745341e-07, "loss": 0.12518310546875, "step": 14150 }, { "epoch": 0.9565364336893335, "grad_norm": 1.7848482131958008, "learning_rate": 1.5025478010026738e-07, "loss": 0.221435546875, "step": 14151 }, { "epoch": 0.9566040286602677, "grad_norm": 0.9152986407279968, "learning_rate": 1.497892902161041e-07, "loss": 0.1887054443359375, "step": 14152 }, { "epoch": 0.9566716236312018, "grad_norm": 2.0892248153686523, "learning_rate": 1.4932451887748888e-07, "loss": 0.315673828125, "step": 14153 }, { "epoch": 0.956739218602136, "grad_norm": 1.0135411024093628, "learning_rate": 1.4886046610690873e-07, "loss": 0.193756103515625, "step": 14154 }, { "epoch": 0.9568068135730702, "grad_norm": 1.3710142374038696, "learning_rate": 1.483971319268157e-07, "loss": 0.26190185546875, "step": 14155 }, { "epoch": 0.9568744085440043, "grad_norm": 1.8484746217727661, "learning_rate": 1.4793451635963184e-07, "loss": 0.2252197265625, "step": 14156 }, { "epoch": 0.9569420035149385, "grad_norm": 0.941064715385437, "learning_rate": 1.4747261942774093e-07, "loss": 0.1570892333984375, "step": 14157 }, { "epoch": 0.9570095984858726, "grad_norm": 1.0016220808029175, "learning_rate": 1.4701144115349008e-07, "loss": 0.1761016845703125, "step": 14158 }, { "epoch": 0.9570771934568069, "grad_norm": 1.449445128440857, "learning_rate": 1.4655098155919644e-07, "loss": 0.32745361328125, "step": 14159 }, { "epoch": 0.957144788427741, "grad_norm": 1.057098627090454, "learning_rate": 1.4609124066713718e-07, "loss": 0.162445068359375, "step": 14160 }, { "epoch": 0.9572123833986751, "grad_norm": 1.4809339046478271, "learning_rate": 1.456322184995612e-07, "loss": 0.29364013671875, "step": 14161 }, { "epoch": 0.9572799783696093, "grad_norm": 1.1825098991394043, "learning_rate": 1.451739150786757e-07, "loss": 0.286224365234375, "step": 14162 }, { "epoch": 0.9573475733405434, "grad_norm": 0.5639398097991943, "learning_rate": 1.44716330426658e-07, "loss": 0.0775146484375, "step": 14163 }, { "epoch": 0.9574151683114777, "grad_norm": 1.2171729803085327, "learning_rate": 1.4425946456564864e-07, "loss": 0.26812744140625, "step": 14164 }, { "epoch": 0.9574827632824118, "grad_norm": 0.9576282501220703, "learning_rate": 1.4380331751775166e-07, "loss": 0.16876220703125, "step": 14165 }, { "epoch": 0.957550358253346, "grad_norm": 1.0608237981796265, "learning_rate": 1.4334788930504273e-07, "loss": 0.23565673828125, "step": 14166 }, { "epoch": 0.9576179532242801, "grad_norm": 1.5118674039840698, "learning_rate": 1.4289317994955254e-07, "loss": 0.333740234375, "step": 14167 }, { "epoch": 0.9576855481952142, "grad_norm": 1.0848456621170044, "learning_rate": 1.4243918947328683e-07, "loss": 0.1939239501953125, "step": 14168 }, { "epoch": 0.9577531431661485, "grad_norm": 3.650583505630493, "learning_rate": 1.419859178982097e-07, "loss": 0.33807373046875, "step": 14169 }, { "epoch": 0.9578207381370826, "grad_norm": 0.7441399097442627, "learning_rate": 1.4153336524625694e-07, "loss": 0.11652755737304688, "step": 14170 }, { "epoch": 0.9578883331080168, "grad_norm": 1.5496443510055542, "learning_rate": 1.4108153153932267e-07, "loss": 0.18550872802734375, "step": 14171 }, { "epoch": 0.9579559280789509, "grad_norm": 0.9091188311576843, "learning_rate": 1.4063041679927112e-07, "loss": 0.158203125, "step": 14172 }, { "epoch": 0.9580235230498851, "grad_norm": 1.675173282623291, "learning_rate": 1.401800210479298e-07, "loss": 0.2886199951171875, "step": 14173 }, { "epoch": 0.9580911180208193, "grad_norm": 0.5572039484977722, "learning_rate": 1.3973034430709132e-07, "loss": 0.09194183349609375, "step": 14174 }, { "epoch": 0.9581587129917534, "grad_norm": 1.2416375875473022, "learning_rate": 1.3928138659851153e-07, "loss": 0.1939544677734375, "step": 14175 }, { "epoch": 0.9582263079626876, "grad_norm": 1.7093483209609985, "learning_rate": 1.388331479439181e-07, "loss": 0.264404296875, "step": 14176 }, { "epoch": 0.9582939029336217, "grad_norm": 0.8269778490066528, "learning_rate": 1.3838562836499868e-07, "loss": 0.15354156494140625, "step": 14177 }, { "epoch": 0.9583614979045559, "grad_norm": 1.538252353668213, "learning_rate": 1.379388278834043e-07, "loss": 0.2660980224609375, "step": 14178 }, { "epoch": 0.95842909287549, "grad_norm": 1.0519789457321167, "learning_rate": 1.3749274652075594e-07, "loss": 0.23614501953125, "step": 14179 }, { "epoch": 0.9584966878464243, "grad_norm": 1.3022137880325317, "learning_rate": 1.3704738429863638e-07, "loss": 0.215850830078125, "step": 14180 }, { "epoch": 0.9585642828173584, "grad_norm": 1.2856124639511108, "learning_rate": 1.3660274123859672e-07, "loss": 0.184783935546875, "step": 14181 }, { "epoch": 0.9586318777882925, "grad_norm": 0.9279839396476746, "learning_rate": 1.3615881736215142e-07, "loss": 0.159454345703125, "step": 14182 }, { "epoch": 0.9586994727592267, "grad_norm": 1.2612305879592896, "learning_rate": 1.3571561269077992e-07, "loss": 0.273529052734375, "step": 14183 }, { "epoch": 0.9587670677301608, "grad_norm": 1.2146422863006592, "learning_rate": 1.3527312724592679e-07, "loss": 0.170318603515625, "step": 14184 }, { "epoch": 0.9588346627010951, "grad_norm": 1.7021706104278564, "learning_rate": 1.3483136104900317e-07, "loss": 0.255767822265625, "step": 14185 }, { "epoch": 0.9589022576720292, "grad_norm": 2.1052281856536865, "learning_rate": 1.3439031412138202e-07, "loss": 0.261566162109375, "step": 14186 }, { "epoch": 0.9589698526429634, "grad_norm": 0.8578913807868958, "learning_rate": 1.3394998648440793e-07, "loss": 0.18013763427734375, "step": 14187 }, { "epoch": 0.9590374476138975, "grad_norm": 0.9750317931175232, "learning_rate": 1.3351037815938384e-07, "loss": 0.217437744140625, "step": 14188 }, { "epoch": 0.9591050425848316, "grad_norm": 1.1924289464950562, "learning_rate": 1.330714891675794e-07, "loss": 0.2660064697265625, "step": 14189 }, { "epoch": 0.9591726375557659, "grad_norm": 1.8822025060653687, "learning_rate": 1.3263331953023594e-07, "loss": 0.214874267578125, "step": 14190 }, { "epoch": 0.9592402325267, "grad_norm": 1.0557290315628052, "learning_rate": 1.321958692685482e-07, "loss": 0.22210693359375, "step": 14191 }, { "epoch": 0.9593078274976342, "grad_norm": 1.5289942026138306, "learning_rate": 1.317591384036876e-07, "loss": 0.2193603515625, "step": 14192 }, { "epoch": 0.9593754224685683, "grad_norm": 1.0044732093811035, "learning_rate": 1.313231269567855e-07, "loss": 0.1424713134765625, "step": 14193 }, { "epoch": 0.9594430174395026, "grad_norm": 1.0412824153900146, "learning_rate": 1.3088783494893674e-07, "loss": 0.218414306640625, "step": 14194 }, { "epoch": 0.9595106124104367, "grad_norm": 1.24591863155365, "learning_rate": 1.3045326240120447e-07, "loss": 0.28302001953125, "step": 14195 }, { "epoch": 0.9595782073813708, "grad_norm": 1.1751917600631714, "learning_rate": 1.3001940933461687e-07, "loss": 0.2706298828125, "step": 14196 }, { "epoch": 0.959645802352305, "grad_norm": 1.0664958953857422, "learning_rate": 1.2958627577016547e-07, "loss": 0.17255401611328125, "step": 14197 }, { "epoch": 0.9597133973232391, "grad_norm": 1.6691510677337646, "learning_rate": 1.291538617288085e-07, "loss": 0.27740478515625, "step": 14198 }, { "epoch": 0.9597809922941734, "grad_norm": 1.227844476699829, "learning_rate": 1.2872216723146756e-07, "loss": 0.198272705078125, "step": 14199 }, { "epoch": 0.9598485872651075, "grad_norm": 1.3577066659927368, "learning_rate": 1.2829119229903262e-07, "loss": 0.280670166015625, "step": 14200 }, { "epoch": 0.9599161822360417, "grad_norm": 0.9139822721481323, "learning_rate": 1.2786093695235534e-07, "loss": 0.173675537109375, "step": 14201 }, { "epoch": 0.9599837772069758, "grad_norm": 1.2871752977371216, "learning_rate": 1.2743140121225404e-07, "loss": 0.246185302734375, "step": 14202 }, { "epoch": 0.9600513721779099, "grad_norm": 1.0908215045928955, "learning_rate": 1.2700258509951546e-07, "loss": 0.1942596435546875, "step": 14203 }, { "epoch": 0.9601189671488441, "grad_norm": 1.232637882232666, "learning_rate": 1.2657448863488296e-07, "loss": 0.2650146484375, "step": 14204 }, { "epoch": 0.9601865621197783, "grad_norm": 1.60794997215271, "learning_rate": 1.2614711183907502e-07, "loss": 0.17479705810546875, "step": 14205 }, { "epoch": 0.9602541570907125, "grad_norm": 1.394680142402649, "learning_rate": 1.2572045473276838e-07, "loss": 0.214385986328125, "step": 14206 }, { "epoch": 0.9603217520616466, "grad_norm": 1.1123608350753784, "learning_rate": 1.252945173366099e-07, "loss": 0.1781005859375, "step": 14207 }, { "epoch": 0.9603893470325808, "grad_norm": 1.186161994934082, "learning_rate": 1.248692996712064e-07, "loss": 0.209686279296875, "step": 14208 }, { "epoch": 0.960456942003515, "grad_norm": 1.8419853448867798, "learning_rate": 1.244448017571348e-07, "loss": 0.25579833984375, "step": 14209 }, { "epoch": 0.9605245369744491, "grad_norm": 1.4486533403396606, "learning_rate": 1.2402102361493194e-07, "loss": 0.23974609375, "step": 14210 }, { "epoch": 0.9605921319453833, "grad_norm": 1.2325718402862549, "learning_rate": 1.2359796526510646e-07, "loss": 0.28460693359375, "step": 14211 }, { "epoch": 0.9606597269163174, "grad_norm": 0.8558566570281982, "learning_rate": 1.23175626728127e-07, "loss": 0.20770263671875, "step": 14212 }, { "epoch": 0.9607273218872516, "grad_norm": 1.4815226793289185, "learning_rate": 1.2275400802442715e-07, "loss": 0.2518463134765625, "step": 14213 }, { "epoch": 0.9607949168581857, "grad_norm": 1.3058569431304932, "learning_rate": 1.22333109174409e-07, "loss": 0.264404296875, "step": 14214 }, { "epoch": 0.9608625118291199, "grad_norm": 1.0865565538406372, "learning_rate": 1.2191293019843785e-07, "loss": 0.25079345703125, "step": 14215 }, { "epoch": 0.9609301068000541, "grad_norm": 1.2166436910629272, "learning_rate": 1.214934711168475e-07, "loss": 0.19455337524414062, "step": 14216 }, { "epoch": 0.9609977017709882, "grad_norm": 1.1156209707260132, "learning_rate": 1.2107473194992836e-07, "loss": 0.25634765625, "step": 14217 }, { "epoch": 0.9610652967419224, "grad_norm": 1.3055624961853027, "learning_rate": 1.2065671271794754e-07, "loss": 0.2508544921875, "step": 14218 }, { "epoch": 0.9611328917128565, "grad_norm": 0.9764801859855652, "learning_rate": 1.202394134411272e-07, "loss": 0.206939697265625, "step": 14219 }, { "epoch": 0.9612004866837908, "grad_norm": 1.475687861442566, "learning_rate": 1.1982283413965957e-07, "loss": 0.2454833984375, "step": 14220 }, { "epoch": 0.9612680816547249, "grad_norm": 1.7437037229537964, "learning_rate": 1.1940697483370344e-07, "loss": 0.242095947265625, "step": 14221 }, { "epoch": 0.961335676625659, "grad_norm": 2.0198588371276855, "learning_rate": 1.1899183554337945e-07, "loss": 0.28619384765625, "step": 14222 }, { "epoch": 0.9614032715965932, "grad_norm": 1.904904842376709, "learning_rate": 1.1857741628877316e-07, "loss": 0.301849365234375, "step": 14223 }, { "epoch": 0.9614708665675273, "grad_norm": 1.153362512588501, "learning_rate": 1.1816371708993856e-07, "loss": 0.239898681640625, "step": 14224 }, { "epoch": 0.9615384615384616, "grad_norm": 1.6310162544250488, "learning_rate": 1.1775073796689295e-07, "loss": 0.30291748046875, "step": 14225 }, { "epoch": 0.9616060565093957, "grad_norm": 1.2671682834625244, "learning_rate": 1.1733847893961703e-07, "loss": 0.1455364227294922, "step": 14226 }, { "epoch": 0.9616736514803299, "grad_norm": 0.8292323350906372, "learning_rate": 1.1692694002806148e-07, "loss": 0.135650634765625, "step": 14227 }, { "epoch": 0.961741246451264, "grad_norm": 0.880687952041626, "learning_rate": 1.165161212521354e-07, "loss": 0.09642791748046875, "step": 14228 }, { "epoch": 0.9618088414221981, "grad_norm": 1.2202943563461304, "learning_rate": 1.161060226317212e-07, "loss": 0.22601318359375, "step": 14229 }, { "epoch": 0.9618764363931324, "grad_norm": 1.6987345218658447, "learning_rate": 1.1569664418665804e-07, "loss": 0.165252685546875, "step": 14230 }, { "epoch": 0.9619440313640665, "grad_norm": 0.8399942517280579, "learning_rate": 1.1528798593675505e-07, "loss": 0.141998291015625, "step": 14231 }, { "epoch": 0.9620116263350007, "grad_norm": 1.0664702653884888, "learning_rate": 1.1488004790178807e-07, "loss": 0.192474365234375, "step": 14232 }, { "epoch": 0.9620792213059348, "grad_norm": 1.3129222393035889, "learning_rate": 1.1447283010149301e-07, "loss": 0.217010498046875, "step": 14233 }, { "epoch": 0.962146816276869, "grad_norm": 2.111213445663452, "learning_rate": 1.1406633255557408e-07, "loss": 0.27301025390625, "step": 14234 }, { "epoch": 0.9622144112478032, "grad_norm": 1.7704631090164185, "learning_rate": 1.1366055528370223e-07, "loss": 0.261566162109375, "step": 14235 }, { "epoch": 0.9622820062187373, "grad_norm": 1.5065150260925293, "learning_rate": 1.1325549830550841e-07, "loss": 0.249755859375, "step": 14236 }, { "epoch": 0.9623496011896715, "grad_norm": 1.3104909658432007, "learning_rate": 1.1285116164059361e-07, "loss": 0.291748046875, "step": 14237 }, { "epoch": 0.9624171961606056, "grad_norm": 1.0379122495651245, "learning_rate": 1.1244754530852219e-07, "loss": 0.249603271484375, "step": 14238 }, { "epoch": 0.9624847911315398, "grad_norm": 1.2318618297576904, "learning_rate": 1.1204464932882186e-07, "loss": 0.2015380859375, "step": 14239 }, { "epoch": 0.962552386102474, "grad_norm": 1.0170215368270874, "learning_rate": 1.1164247372099035e-07, "loss": 0.198272705078125, "step": 14240 }, { "epoch": 0.9626199810734082, "grad_norm": 1.5416089296340942, "learning_rate": 1.1124101850448377e-07, "loss": 0.308380126953125, "step": 14241 }, { "epoch": 0.9626875760443423, "grad_norm": 0.9098103642463684, "learning_rate": 1.1084028369873156e-07, "loss": 0.17535400390625, "step": 14242 }, { "epoch": 0.9627551710152764, "grad_norm": 1.7780332565307617, "learning_rate": 1.1044026932311824e-07, "loss": 0.32305908203125, "step": 14243 }, { "epoch": 0.9628227659862106, "grad_norm": 1.1137888431549072, "learning_rate": 1.1004097539700331e-07, "loss": 0.2503509521484375, "step": 14244 }, { "epoch": 0.9628903609571448, "grad_norm": 1.5165419578552246, "learning_rate": 1.0964240193970631e-07, "loss": 0.2689208984375, "step": 14245 }, { "epoch": 0.962957955928079, "grad_norm": 1.1570547819137573, "learning_rate": 1.0924454897051183e-07, "loss": 0.2153167724609375, "step": 14246 }, { "epoch": 0.9630255508990131, "grad_norm": 1.4270081520080566, "learning_rate": 1.0884741650866947e-07, "loss": 0.247314453125, "step": 14247 }, { "epoch": 0.9630931458699473, "grad_norm": 1.5099605321884155, "learning_rate": 1.0845100457339718e-07, "loss": 0.22869873046875, "step": 14248 }, { "epoch": 0.9631607408408814, "grad_norm": 1.5997313261032104, "learning_rate": 1.0805531318387462e-07, "loss": 0.2415771484375, "step": 14249 }, { "epoch": 0.9632283358118156, "grad_norm": 1.9313576221466064, "learning_rate": 1.0766034235924815e-07, "loss": 0.295501708984375, "step": 14250 }, { "epoch": 0.9632959307827498, "grad_norm": 1.2710100412368774, "learning_rate": 1.0726609211862914e-07, "loss": 0.235076904296875, "step": 14251 }, { "epoch": 0.9633635257536839, "grad_norm": 1.1823195219039917, "learning_rate": 1.0687256248109234e-07, "loss": 0.178955078125, "step": 14252 }, { "epoch": 0.9634311207246181, "grad_norm": 1.541170358657837, "learning_rate": 1.0647975346568084e-07, "loss": 0.290740966796875, "step": 14253 }, { "epoch": 0.9634987156955522, "grad_norm": 1.2276556491851807, "learning_rate": 1.0608766509140111e-07, "loss": 0.252044677734375, "step": 14254 }, { "epoch": 0.9635663106664865, "grad_norm": 1.3741424083709717, "learning_rate": 1.0569629737722297e-07, "loss": 0.28765869140625, "step": 14255 }, { "epoch": 0.9636339056374206, "grad_norm": 1.6364781856536865, "learning_rate": 1.0530565034208628e-07, "loss": 0.29840087890625, "step": 14256 }, { "epoch": 0.9637015006083547, "grad_norm": 1.6515657901763916, "learning_rate": 1.0491572400489092e-07, "loss": 0.1968994140625, "step": 14257 }, { "epoch": 0.9637690955792889, "grad_norm": 1.9706188440322876, "learning_rate": 1.045265183845051e-07, "loss": 0.251434326171875, "step": 14258 }, { "epoch": 0.963836690550223, "grad_norm": 0.6486075520515442, "learning_rate": 1.0413803349975881e-07, "loss": 0.126983642578125, "step": 14259 }, { "epoch": 0.9639042855211573, "grad_norm": 1.2012083530426025, "learning_rate": 1.0375026936945198e-07, "loss": 0.1961517333984375, "step": 14260 }, { "epoch": 0.9639718804920914, "grad_norm": 1.3805590867996216, "learning_rate": 1.033632260123446e-07, "loss": 0.20782470703125, "step": 14261 }, { "epoch": 0.9640394754630256, "grad_norm": 1.2794243097305298, "learning_rate": 1.0297690344716671e-07, "loss": 0.2476806640625, "step": 14262 }, { "epoch": 0.9641070704339597, "grad_norm": 0.8546679615974426, "learning_rate": 1.0259130169261e-07, "loss": 0.1125946044921875, "step": 14263 }, { "epoch": 0.9641746654048938, "grad_norm": 1.6281243562698364, "learning_rate": 1.0220642076733122e-07, "loss": 0.35882568359375, "step": 14264 }, { "epoch": 0.964242260375828, "grad_norm": 1.4166676998138428, "learning_rate": 1.0182226068995381e-07, "loss": 0.240264892578125, "step": 14265 }, { "epoch": 0.9643098553467622, "grad_norm": 1.0730979442596436, "learning_rate": 1.0143882147906791e-07, "loss": 0.16616439819335938, "step": 14266 }, { "epoch": 0.9643774503176964, "grad_norm": 2.8682289123535156, "learning_rate": 1.0105610315322367e-07, "loss": 0.286468505859375, "step": 14267 }, { "epoch": 0.9644450452886305, "grad_norm": 1.718702793121338, "learning_rate": 1.0067410573094127e-07, "loss": 0.27447509765625, "step": 14268 }, { "epoch": 0.9645126402595647, "grad_norm": 0.7336094379425049, "learning_rate": 1.002928292307026e-07, "loss": 0.14300537109375, "step": 14269 }, { "epoch": 0.9645802352304989, "grad_norm": 1.4326187372207642, "learning_rate": 9.99122736709579e-08, "loss": 0.21795654296875, "step": 14270 }, { "epoch": 0.964647830201433, "grad_norm": 1.1977083683013916, "learning_rate": 9.953243907012077e-08, "loss": 0.216033935546875, "step": 14271 }, { "epoch": 0.9647154251723672, "grad_norm": 1.5884041786193848, "learning_rate": 9.915332544656819e-08, "loss": 0.2396240234375, "step": 14272 }, { "epoch": 0.9647830201433013, "grad_norm": 1.4848257303237915, "learning_rate": 9.877493281864547e-08, "loss": 0.141937255859375, "step": 14273 }, { "epoch": 0.9648506151142355, "grad_norm": 1.410361886024475, "learning_rate": 9.83972612046613e-08, "loss": 0.28778076171875, "step": 14274 }, { "epoch": 0.9649182100851696, "grad_norm": 1.01218843460083, "learning_rate": 9.802031062288941e-08, "loss": 0.25640869140625, "step": 14275 }, { "epoch": 0.9649858050561039, "grad_norm": 1.5934909582138062, "learning_rate": 9.764408109156852e-08, "loss": 0.21002197265625, "step": 14276 }, { "epoch": 0.965053400027038, "grad_norm": 1.2601290941238403, "learning_rate": 9.726857262890576e-08, "loss": 0.18853759765625, "step": 14277 }, { "epoch": 0.9651209949979721, "grad_norm": 1.6679153442382812, "learning_rate": 9.689378525306659e-08, "loss": 0.215972900390625, "step": 14278 }, { "epoch": 0.9651885899689063, "grad_norm": 1.0876072645187378, "learning_rate": 9.651971898218815e-08, "loss": 0.236572265625, "step": 14279 }, { "epoch": 0.9652561849398404, "grad_norm": 1.3375508785247803, "learning_rate": 9.614637383436931e-08, "loss": 0.3135986328125, "step": 14280 }, { "epoch": 0.9653237799107747, "grad_norm": 1.1325111389160156, "learning_rate": 9.577374982767562e-08, "loss": 0.1893310546875, "step": 14281 }, { "epoch": 0.9653913748817088, "grad_norm": 1.261755108833313, "learning_rate": 9.540184698013766e-08, "loss": 0.26983642578125, "step": 14282 }, { "epoch": 0.965458969852643, "grad_norm": 1.2081350088119507, "learning_rate": 9.503066530974603e-08, "loss": 0.1379852294921875, "step": 14283 }, { "epoch": 0.9655265648235771, "grad_norm": 1.4836543798446655, "learning_rate": 9.466020483446469e-08, "loss": 0.240753173828125, "step": 14284 }, { "epoch": 0.9655941597945112, "grad_norm": 1.0815001726150513, "learning_rate": 9.429046557221931e-08, "loss": 0.2064361572265625, "step": 14285 }, { "epoch": 0.9656617547654455, "grad_norm": 1.413960337638855, "learning_rate": 9.392144754089726e-08, "loss": 0.28375244140625, "step": 14286 }, { "epoch": 0.9657293497363796, "grad_norm": 0.7988349795341492, "learning_rate": 9.355315075835591e-08, "loss": 0.164337158203125, "step": 14287 }, { "epoch": 0.9657969447073138, "grad_norm": 1.348418116569519, "learning_rate": 9.3185575242416e-08, "loss": 0.16107177734375, "step": 14288 }, { "epoch": 0.9658645396782479, "grad_norm": 0.7190960049629211, "learning_rate": 9.281872101086164e-08, "loss": 0.12186431884765625, "step": 14289 }, { "epoch": 0.9659321346491822, "grad_norm": 1.1485968828201294, "learning_rate": 9.24525880814453e-08, "loss": 0.292694091796875, "step": 14290 }, { "epoch": 0.9659997296201163, "grad_norm": 1.087045431137085, "learning_rate": 9.208717647188114e-08, "loss": 0.221710205078125, "step": 14291 }, { "epoch": 0.9660673245910504, "grad_norm": 1.515939474105835, "learning_rate": 9.172248619985169e-08, "loss": 0.24432373046875, "step": 14292 }, { "epoch": 0.9661349195619846, "grad_norm": 1.3459022045135498, "learning_rate": 9.135851728300116e-08, "loss": 0.18548583984375, "step": 14293 }, { "epoch": 0.9662025145329187, "grad_norm": 1.457373857498169, "learning_rate": 9.099526973894045e-08, "loss": 0.19866943359375, "step": 14294 }, { "epoch": 0.966270109503853, "grad_norm": 1.4042927026748657, "learning_rate": 9.063274358524721e-08, "loss": 0.228179931640625, "step": 14295 }, { "epoch": 0.9663377044747871, "grad_norm": 1.256745457649231, "learning_rate": 9.027093883946235e-08, "loss": 0.16497802734375, "step": 14296 }, { "epoch": 0.9664052994457213, "grad_norm": 2.300884962081909, "learning_rate": 8.990985551909192e-08, "loss": 0.273101806640625, "step": 14297 }, { "epoch": 0.9664728944166554, "grad_norm": 1.4909762144088745, "learning_rate": 8.954949364160858e-08, "loss": 0.2750396728515625, "step": 14298 }, { "epoch": 0.9665404893875895, "grad_norm": 1.9953175783157349, "learning_rate": 8.918985322444673e-08, "loss": 0.3228759765625, "step": 14299 }, { "epoch": 0.9666080843585237, "grad_norm": 0.8497288823127747, "learning_rate": 8.88309342850091e-08, "loss": 0.203399658203125, "step": 14300 }, { "epoch": 0.9666756793294579, "grad_norm": 1.2586817741394043, "learning_rate": 8.847273684066348e-08, "loss": 0.2261505126953125, "step": 14301 }, { "epoch": 0.9667432743003921, "grad_norm": 1.4459431171417236, "learning_rate": 8.811526090873933e-08, "loss": 0.32891845703125, "step": 14302 }, { "epoch": 0.9668108692713262, "grad_norm": 0.979073703289032, "learning_rate": 8.775850650653616e-08, "loss": 0.2104644775390625, "step": 14303 }, { "epoch": 0.9668784642422604, "grad_norm": 0.587670624256134, "learning_rate": 8.740247365131349e-08, "loss": 0.079437255859375, "step": 14304 }, { "epoch": 0.9669460592131945, "grad_norm": 1.343412160873413, "learning_rate": 8.704716236030086e-08, "loss": 0.27227783203125, "step": 14305 }, { "epoch": 0.9670136541841287, "grad_norm": 1.4581115245819092, "learning_rate": 8.669257265068787e-08, "loss": 0.203155517578125, "step": 14306 }, { "epoch": 0.9670812491550629, "grad_norm": 0.9193410873413086, "learning_rate": 8.63387045396341e-08, "loss": 0.1558990478515625, "step": 14307 }, { "epoch": 0.967148844125997, "grad_norm": 0.920661985874176, "learning_rate": 8.59855580442609e-08, "loss": 0.1531829833984375, "step": 14308 }, { "epoch": 0.9672164390969312, "grad_norm": 1.001247763633728, "learning_rate": 8.563313318165456e-08, "loss": 0.190460205078125, "step": 14309 }, { "epoch": 0.9672840340678653, "grad_norm": 1.1945735216140747, "learning_rate": 8.528142996886978e-08, "loss": 0.188873291015625, "step": 14310 }, { "epoch": 0.9673516290387996, "grad_norm": 1.6211144924163818, "learning_rate": 8.493044842292297e-08, "loss": 0.238006591796875, "step": 14311 }, { "epoch": 0.9674192240097337, "grad_norm": 1.2493706941604614, "learning_rate": 8.458018856079553e-08, "loss": 0.14142608642578125, "step": 14312 }, { "epoch": 0.9674868189806678, "grad_norm": 1.052741527557373, "learning_rate": 8.423065039943723e-08, "loss": 0.10657501220703125, "step": 14313 }, { "epoch": 0.967554413951602, "grad_norm": 1.2506948709487915, "learning_rate": 8.38818339557612e-08, "loss": 0.19873046875, "step": 14314 }, { "epoch": 0.9676220089225361, "grad_norm": 1.3994650840759277, "learning_rate": 8.353373924664398e-08, "loss": 0.24334716796875, "step": 14315 }, { "epoch": 0.9676896038934704, "grad_norm": 1.7893661260604858, "learning_rate": 8.318636628892873e-08, "loss": 0.26727294921875, "step": 14316 }, { "epoch": 0.9677571988644045, "grad_norm": 1.1568572521209717, "learning_rate": 8.283971509942367e-08, "loss": 0.209716796875, "step": 14317 }, { "epoch": 0.9678247938353387, "grad_norm": 1.37334406375885, "learning_rate": 8.249378569490207e-08, "loss": 0.281005859375, "step": 14318 }, { "epoch": 0.9678923888062728, "grad_norm": 1.1055481433868408, "learning_rate": 8.214857809210219e-08, "loss": 0.2255859375, "step": 14319 }, { "epoch": 0.9679599837772069, "grad_norm": 0.7738187313079834, "learning_rate": 8.180409230772735e-08, "loss": 0.17047119140625, "step": 14320 }, { "epoch": 0.9680275787481412, "grad_norm": 0.8834429383277893, "learning_rate": 8.146032835844586e-08, "loss": 0.167816162109375, "step": 14321 }, { "epoch": 0.9680951737190753, "grad_norm": 2.2335336208343506, "learning_rate": 8.111728626088943e-08, "loss": 0.28277587890625, "step": 14322 }, { "epoch": 0.9681627686900095, "grad_norm": 1.1497344970703125, "learning_rate": 8.077496603165979e-08, "loss": 0.175018310546875, "step": 14323 }, { "epoch": 0.9682303636609436, "grad_norm": 1.1055139303207397, "learning_rate": 8.043336768731868e-08, "loss": 0.193328857421875, "step": 14324 }, { "epoch": 0.9682979586318778, "grad_norm": 1.4877246618270874, "learning_rate": 8.009249124439621e-08, "loss": 0.220611572265625, "step": 14325 }, { "epoch": 0.968365553602812, "grad_norm": 0.9599109292030334, "learning_rate": 7.97523367193842e-08, "loss": 0.16016387939453125, "step": 14326 }, { "epoch": 0.9684331485737461, "grad_norm": 0.8215980529785156, "learning_rate": 7.941290412874114e-08, "loss": 0.12652206420898438, "step": 14327 }, { "epoch": 0.9685007435446803, "grad_norm": 1.9268395900726318, "learning_rate": 7.907419348889222e-08, "loss": 0.267913818359375, "step": 14328 }, { "epoch": 0.9685683385156144, "grad_norm": 1.6865887641906738, "learning_rate": 7.873620481622768e-08, "loss": 0.249359130859375, "step": 14329 }, { "epoch": 0.9686359334865486, "grad_norm": 1.3996784687042236, "learning_rate": 7.839893812709776e-08, "loss": 0.2677154541015625, "step": 14330 }, { "epoch": 0.9687035284574828, "grad_norm": 1.1723521947860718, "learning_rate": 7.806239343782439e-08, "loss": 0.32244873046875, "step": 14331 }, { "epoch": 0.968771123428417, "grad_norm": 1.6893428564071655, "learning_rate": 7.772657076469125e-08, "loss": 0.285308837890625, "step": 14332 }, { "epoch": 0.9688387183993511, "grad_norm": 1.0517330169677734, "learning_rate": 7.739147012394699e-08, "loss": 0.20843505859375, "step": 14333 }, { "epoch": 0.9689063133702852, "grad_norm": 1.4267531633377075, "learning_rate": 7.705709153180696e-08, "loss": 0.252166748046875, "step": 14334 }, { "epoch": 0.9689739083412194, "grad_norm": 2.1763253211975098, "learning_rate": 7.672343500444823e-08, "loss": 0.2650299072265625, "step": 14335 }, { "epoch": 0.9690415033121536, "grad_norm": 1.4361982345581055, "learning_rate": 7.639050055801788e-08, "loss": 0.2769775390625, "step": 14336 }, { "epoch": 0.9691090982830878, "grad_norm": 1.2518800497055054, "learning_rate": 7.605828820862304e-08, "loss": 0.2176513671875, "step": 14337 }, { "epoch": 0.9691766932540219, "grad_norm": 1.4271160364151, "learning_rate": 7.572679797233917e-08, "loss": 0.193145751953125, "step": 14338 }, { "epoch": 0.9692442882249561, "grad_norm": 1.1312278509140015, "learning_rate": 7.539602986520678e-08, "loss": 0.156005859375, "step": 14339 }, { "epoch": 0.9693118831958902, "grad_norm": 0.659460723400116, "learning_rate": 7.506598390322972e-08, "loss": 0.1186065673828125, "step": 14340 }, { "epoch": 0.9693794781668243, "grad_norm": 0.9819749593734741, "learning_rate": 7.47366601023769e-08, "loss": 0.19329833984375, "step": 14341 }, { "epoch": 0.9694470731377586, "grad_norm": 1.005165934562683, "learning_rate": 7.44080584785839e-08, "loss": 0.1737060546875, "step": 14342 }, { "epoch": 0.9695146681086927, "grad_norm": 1.637478232383728, "learning_rate": 7.408017904774967e-08, "loss": 0.235137939453125, "step": 14343 }, { "epoch": 0.9695822630796269, "grad_norm": 1.4118847846984863, "learning_rate": 7.375302182573984e-08, "loss": 0.2890625, "step": 14344 }, { "epoch": 0.969649858050561, "grad_norm": 1.5865942239761353, "learning_rate": 7.34265868283851e-08, "loss": 0.2633209228515625, "step": 14345 }, { "epoch": 0.9697174530214951, "grad_norm": 1.201135516166687, "learning_rate": 7.310087407147781e-08, "loss": 0.2061767578125, "step": 14346 }, { "epoch": 0.9697850479924294, "grad_norm": 1.5258307456970215, "learning_rate": 7.277588357078035e-08, "loss": 0.24658203125, "step": 14347 }, { "epoch": 0.9698526429633635, "grad_norm": 1.4077509641647339, "learning_rate": 7.245161534201683e-08, "loss": 0.29559326171875, "step": 14348 }, { "epoch": 0.9699202379342977, "grad_norm": 1.3012282848358154, "learning_rate": 7.212806940087802e-08, "loss": 0.235809326171875, "step": 14349 }, { "epoch": 0.9699878329052318, "grad_norm": 0.9334884881973267, "learning_rate": 7.180524576301972e-08, "loss": 0.203338623046875, "step": 14350 }, { "epoch": 0.970055427876166, "grad_norm": 1.0619993209838867, "learning_rate": 7.148314444405946e-08, "loss": 0.21173858642578125, "step": 14351 }, { "epoch": 0.9701230228471002, "grad_norm": 1.3562337160110474, "learning_rate": 7.116176545958474e-08, "loss": 0.302276611328125, "step": 14352 }, { "epoch": 0.9701906178180343, "grad_norm": 1.1968963146209717, "learning_rate": 7.084110882514484e-08, "loss": 0.17596435546875, "step": 14353 }, { "epoch": 0.9702582127889685, "grad_norm": 0.6224979162216187, "learning_rate": 7.052117455625562e-08, "loss": 0.10198211669921875, "step": 14354 }, { "epoch": 0.9703258077599026, "grad_norm": 0.9881818890571594, "learning_rate": 7.020196266839807e-08, "loss": 0.1869354248046875, "step": 14355 }, { "epoch": 0.9703934027308369, "grad_norm": 1.283553123474121, "learning_rate": 6.988347317701649e-08, "loss": 0.265716552734375, "step": 14356 }, { "epoch": 0.970460997701771, "grad_norm": 1.303979516029358, "learning_rate": 6.956570609752189e-08, "loss": 0.21148681640625, "step": 14357 }, { "epoch": 0.9705285926727052, "grad_norm": 0.7684270739555359, "learning_rate": 6.924866144529029e-08, "loss": 0.1153564453125, "step": 14358 }, { "epoch": 0.9705961876436393, "grad_norm": 1.1508034467697144, "learning_rate": 6.89323392356611e-08, "loss": 0.236602783203125, "step": 14359 }, { "epoch": 0.9706637826145734, "grad_norm": 1.2444194555282593, "learning_rate": 6.861673948394043e-08, "loss": 0.1855010986328125, "step": 14360 }, { "epoch": 0.9707313775855076, "grad_norm": 1.1155967712402344, "learning_rate": 6.830186220539936e-08, "loss": 0.15246963500976562, "step": 14361 }, { "epoch": 0.9707989725564418, "grad_norm": 1.0354052782058716, "learning_rate": 6.798770741527404e-08, "loss": 0.255340576171875, "step": 14362 }, { "epoch": 0.970866567527376, "grad_norm": 1.1564031839370728, "learning_rate": 6.767427512876568e-08, "loss": 0.2378692626953125, "step": 14363 }, { "epoch": 0.9709341624983101, "grad_norm": 1.1747733354568481, "learning_rate": 6.73615653610371e-08, "loss": 0.2686767578125, "step": 14364 }, { "epoch": 0.9710017574692443, "grad_norm": 1.761272668838501, "learning_rate": 6.704957812722124e-08, "loss": 0.214385986328125, "step": 14365 }, { "epoch": 0.9710693524401784, "grad_norm": 1.3303500413894653, "learning_rate": 6.673831344241432e-08, "loss": 0.2002105712890625, "step": 14366 }, { "epoch": 0.9711369474111126, "grad_norm": 1.2719206809997559, "learning_rate": 6.642777132167766e-08, "loss": 0.25885009765625, "step": 14367 }, { "epoch": 0.9712045423820468, "grad_norm": 0.8683293461799622, "learning_rate": 6.611795178003421e-08, "loss": 0.15322113037109375, "step": 14368 }, { "epoch": 0.9712721373529809, "grad_norm": 1.2668765783309937, "learning_rate": 6.580885483247867e-08, "loss": 0.18367767333984375, "step": 14369 }, { "epoch": 0.9713397323239151, "grad_norm": 1.9950045347213745, "learning_rate": 6.550048049396406e-08, "loss": 0.25799560546875, "step": 14370 }, { "epoch": 0.9714073272948492, "grad_norm": 1.3469537496566772, "learning_rate": 6.519282877941512e-08, "loss": 0.28466796875, "step": 14371 }, { "epoch": 0.9714749222657835, "grad_norm": 1.5927146673202515, "learning_rate": 6.488589970371329e-08, "loss": 0.177886962890625, "step": 14372 }, { "epoch": 0.9715425172367176, "grad_norm": 1.2281498908996582, "learning_rate": 6.457969328171331e-08, "loss": 0.239471435546875, "step": 14373 }, { "epoch": 0.9716101122076517, "grad_norm": 0.9830460548400879, "learning_rate": 6.427420952823005e-08, "loss": 0.152008056640625, "step": 14374 }, { "epoch": 0.9716777071785859, "grad_norm": 1.8123893737792969, "learning_rate": 6.396944845804497e-08, "loss": 0.2229461669921875, "step": 14375 }, { "epoch": 0.97174530214952, "grad_norm": 1.2331541776657104, "learning_rate": 6.366541008590465e-08, "loss": 0.1959228515625, "step": 14376 }, { "epoch": 0.9718128971204543, "grad_norm": 1.826202392578125, "learning_rate": 6.336209442651897e-08, "loss": 0.227142333984375, "step": 14377 }, { "epoch": 0.9718804920913884, "grad_norm": 1.5225766897201538, "learning_rate": 6.305950149456453e-08, "loss": 0.233734130859375, "step": 14378 }, { "epoch": 0.9719480870623226, "grad_norm": 0.9598221182823181, "learning_rate": 6.275763130468459e-08, "loss": 0.145172119140625, "step": 14379 }, { "epoch": 0.9720156820332567, "grad_norm": 2.165409803390503, "learning_rate": 6.245648387148417e-08, "loss": 0.2528076171875, "step": 14380 }, { "epoch": 0.9720832770041908, "grad_norm": 1.4855214357376099, "learning_rate": 6.215605920953327e-08, "loss": 0.239288330078125, "step": 14381 }, { "epoch": 0.9721508719751251, "grad_norm": 1.5563273429870605, "learning_rate": 6.185635733337024e-08, "loss": 0.304931640625, "step": 14382 }, { "epoch": 0.9722184669460592, "grad_norm": 1.2862257957458496, "learning_rate": 6.155737825749686e-08, "loss": 0.22418212890625, "step": 14383 }, { "epoch": 0.9722860619169934, "grad_norm": 0.7692824602127075, "learning_rate": 6.125912199637818e-08, "loss": 0.1347503662109375, "step": 14384 }, { "epoch": 0.9723536568879275, "grad_norm": 1.309730887413025, "learning_rate": 6.0961588564446e-08, "loss": 0.24591064453125, "step": 14385 }, { "epoch": 0.9724212518588617, "grad_norm": 0.9933772683143616, "learning_rate": 6.066477797609715e-08, "loss": 0.1526641845703125, "step": 14386 }, { "epoch": 0.9724888468297959, "grad_norm": 0.9888601303100586, "learning_rate": 6.036869024569346e-08, "loss": 0.2349853515625, "step": 14387 }, { "epoch": 0.97255644180073, "grad_norm": 1.5081074237823486, "learning_rate": 6.007332538756016e-08, "loss": 0.22216796875, "step": 14388 }, { "epoch": 0.9726240367716642, "grad_norm": 1.0553967952728271, "learning_rate": 5.977868341598913e-08, "loss": 0.216583251953125, "step": 14389 }, { "epoch": 0.9726916317425983, "grad_norm": 0.9550424814224243, "learning_rate": 5.948476434523897e-08, "loss": 0.1679840087890625, "step": 14390 }, { "epoch": 0.9727592267135325, "grad_norm": 0.929997980594635, "learning_rate": 5.9191568189529974e-08, "loss": 0.1195831298828125, "step": 14391 }, { "epoch": 0.9728268216844667, "grad_norm": 1.6107690334320068, "learning_rate": 5.8899094963049125e-08, "loss": 0.3153228759765625, "step": 14392 }, { "epoch": 0.9728944166554009, "grad_norm": 1.34040367603302, "learning_rate": 5.860734467994677e-08, "loss": 0.277801513671875, "step": 14393 }, { "epoch": 0.972962011626335, "grad_norm": 1.4932514429092407, "learning_rate": 5.831631735434162e-08, "loss": 0.231048583984375, "step": 14394 }, { "epoch": 0.9730296065972691, "grad_norm": 1.5462392568588257, "learning_rate": 5.802601300031407e-08, "loss": 0.1739501953125, "step": 14395 }, { "epoch": 0.9730972015682033, "grad_norm": 0.6060988903045654, "learning_rate": 5.7736431631911225e-08, "loss": 0.08514022827148438, "step": 14396 }, { "epoch": 0.9731647965391375, "grad_norm": 1.8622022867202759, "learning_rate": 5.7447573263143536e-08, "loss": 0.228118896484375, "step": 14397 }, { "epoch": 0.9732323915100717, "grad_norm": 1.4582666158676147, "learning_rate": 5.71594379079915e-08, "loss": 0.239898681640625, "step": 14398 }, { "epoch": 0.9732999864810058, "grad_norm": 1.6671262979507446, "learning_rate": 5.687202558039228e-08, "loss": 0.22027587890625, "step": 14399 }, { "epoch": 0.97336758145194, "grad_norm": 0.8921480178833008, "learning_rate": 5.6585336294254774e-08, "loss": 0.1595916748046875, "step": 14400 }, { "epoch": 0.9734351764228741, "grad_norm": 0.8352161645889282, "learning_rate": 5.6299370063451204e-08, "loss": 0.1951446533203125, "step": 14401 }, { "epoch": 0.9735027713938083, "grad_norm": 1.462673306465149, "learning_rate": 5.601412690181884e-08, "loss": 0.245361328125, "step": 14402 }, { "epoch": 0.9735703663647425, "grad_norm": 1.2777764797210693, "learning_rate": 5.572960682315664e-08, "loss": 0.264617919921875, "step": 14403 }, { "epoch": 0.9736379613356766, "grad_norm": 2.279775381088257, "learning_rate": 5.544580984123526e-08, "loss": 0.339935302734375, "step": 14404 }, { "epoch": 0.9737055563066108, "grad_norm": 1.0794036388397217, "learning_rate": 5.5162735969785386e-08, "loss": 0.18365478515625, "step": 14405 }, { "epoch": 0.9737731512775449, "grad_norm": 1.1877553462982178, "learning_rate": 5.488038522250105e-08, "loss": 0.19841766357421875, "step": 14406 }, { "epoch": 0.9738407462484792, "grad_norm": 1.3327454328536987, "learning_rate": 5.4598757613048e-08, "loss": 0.1867523193359375, "step": 14407 }, { "epoch": 0.9739083412194133, "grad_norm": 0.7284607291221619, "learning_rate": 5.4317853155052e-08, "loss": 0.1068267822265625, "step": 14408 }, { "epoch": 0.9739759361903474, "grad_norm": 1.0106433629989624, "learning_rate": 5.403767186210218e-08, "loss": 0.2267608642578125, "step": 14409 }, { "epoch": 0.9740435311612816, "grad_norm": 1.0180344581604004, "learning_rate": 5.375821374775936e-08, "loss": 0.185455322265625, "step": 14410 }, { "epoch": 0.9741111261322157, "grad_norm": 1.1260912418365479, "learning_rate": 5.347947882554438e-08, "loss": 0.234039306640625, "step": 14411 }, { "epoch": 0.97417872110315, "grad_norm": 1.088329792022705, "learning_rate": 5.320146710894147e-08, "loss": 0.2213897705078125, "step": 14412 }, { "epoch": 0.9742463160740841, "grad_norm": 1.1325124502182007, "learning_rate": 5.292417861140653e-08, "loss": 0.31634521484375, "step": 14413 }, { "epoch": 0.9743139110450183, "grad_norm": 1.1952979564666748, "learning_rate": 5.264761334635382e-08, "loss": 0.258209228515625, "step": 14414 }, { "epoch": 0.9743815060159524, "grad_norm": 0.9317258596420288, "learning_rate": 5.237177132716764e-08, "loss": 0.132659912109375, "step": 14415 }, { "epoch": 0.9744491009868865, "grad_norm": 1.4112378358840942, "learning_rate": 5.2096652567192315e-08, "loss": 0.27685546875, "step": 14416 }, { "epoch": 0.9745166959578208, "grad_norm": 0.7578220367431641, "learning_rate": 5.1822257079740534e-08, "loss": 0.09914398193359375, "step": 14417 }, { "epoch": 0.9745842909287549, "grad_norm": 1.28921639919281, "learning_rate": 5.154858487809e-08, "loss": 0.166473388671875, "step": 14418 }, { "epoch": 0.9746518858996891, "grad_norm": 1.3542431592941284, "learning_rate": 5.127563597548346e-08, "loss": 0.2152099609375, "step": 14419 }, { "epoch": 0.9747194808706232, "grad_norm": 0.8831194639205933, "learning_rate": 5.100341038512535e-08, "loss": 0.1703033447265625, "step": 14420 }, { "epoch": 0.9747870758415574, "grad_norm": 1.1902132034301758, "learning_rate": 5.073190812019013e-08, "loss": 0.228912353515625, "step": 14421 }, { "epoch": 0.9748546708124916, "grad_norm": 0.9549556374549866, "learning_rate": 5.046112919381229e-08, "loss": 0.1538848876953125, "step": 14422 }, { "epoch": 0.9749222657834257, "grad_norm": 1.445615530014038, "learning_rate": 5.0191073619096364e-08, "loss": 0.225372314453125, "step": 14423 }, { "epoch": 0.9749898607543599, "grad_norm": 1.5526676177978516, "learning_rate": 4.9921741409108545e-08, "loss": 0.29095458984375, "step": 14424 }, { "epoch": 0.975057455725294, "grad_norm": 1.620223045349121, "learning_rate": 4.965313257687842e-08, "loss": 0.269317626953125, "step": 14425 }, { "epoch": 0.9751250506962282, "grad_norm": 1.392520546913147, "learning_rate": 4.938524713540726e-08, "loss": 0.257171630859375, "step": 14426 }, { "epoch": 0.9751926456671623, "grad_norm": 1.7035987377166748, "learning_rate": 4.911808509765303e-08, "loss": 0.28857421875, "step": 14427 }, { "epoch": 0.9752602406380966, "grad_norm": 0.8313519358634949, "learning_rate": 4.885164647654539e-08, "loss": 0.1675262451171875, "step": 14428 }, { "epoch": 0.9753278356090307, "grad_norm": 1.3598010540008545, "learning_rate": 4.8585931284974015e-08, "loss": 0.3048095703125, "step": 14429 }, { "epoch": 0.9753954305799648, "grad_norm": 0.753987729549408, "learning_rate": 4.8320939535798635e-08, "loss": 0.12616348266601562, "step": 14430 }, { "epoch": 0.975463025550899, "grad_norm": 1.030350923538208, "learning_rate": 4.8056671241838986e-08, "loss": 0.2208251953125, "step": 14431 }, { "epoch": 0.9755306205218331, "grad_norm": 0.5640956163406372, "learning_rate": 4.779312641588318e-08, "loss": 0.06987762451171875, "step": 14432 }, { "epoch": 0.9755982154927674, "grad_norm": 1.6139079332351685, "learning_rate": 4.753030507068268e-08, "loss": 0.262847900390625, "step": 14433 }, { "epoch": 0.9756658104637015, "grad_norm": 0.6360719799995422, "learning_rate": 4.7268207218953974e-08, "loss": 0.138702392578125, "step": 14434 }, { "epoch": 0.9757334054346357, "grad_norm": 1.2689799070358276, "learning_rate": 4.7006832873380256e-08, "loss": 0.207427978515625, "step": 14435 }, { "epoch": 0.9758010004055698, "grad_norm": 0.9663581252098083, "learning_rate": 4.674618204660641e-08, "loss": 0.1440277099609375, "step": 14436 }, { "epoch": 0.9758685953765039, "grad_norm": 1.3222178220748901, "learning_rate": 4.648625475124568e-08, "loss": 0.24700927734375, "step": 14437 }, { "epoch": 0.9759361903474382, "grad_norm": 1.0952590703964233, "learning_rate": 4.622705099987467e-08, "loss": 0.16912841796875, "step": 14438 }, { "epoch": 0.9760037853183723, "grad_norm": 2.0933682918548584, "learning_rate": 4.596857080503669e-08, "loss": 0.2635498046875, "step": 14439 }, { "epoch": 0.9760713802893065, "grad_norm": 0.9729282259941101, "learning_rate": 4.571081417923673e-08, "loss": 0.15145111083984375, "step": 14440 }, { "epoch": 0.9761389752602406, "grad_norm": 1.2642790079116821, "learning_rate": 4.5453781134948135e-08, "loss": 0.25042724609375, "step": 14441 }, { "epoch": 0.9762065702311749, "grad_norm": 1.421461582183838, "learning_rate": 4.5197471684605974e-08, "loss": 0.16607666015625, "step": 14442 }, { "epoch": 0.976274165202109, "grad_norm": 1.4622962474822998, "learning_rate": 4.494188584061365e-08, "loss": 0.206207275390625, "step": 14443 }, { "epoch": 0.9763417601730431, "grad_norm": 1.4941774606704712, "learning_rate": 4.4687023615336275e-08, "loss": 0.3048095703125, "step": 14444 }, { "epoch": 0.9764093551439773, "grad_norm": 2.077329158782959, "learning_rate": 4.443288502110732e-08, "loss": 0.284423828125, "step": 14445 }, { "epoch": 0.9764769501149114, "grad_norm": 1.3844908475875854, "learning_rate": 4.41794700702236e-08, "loss": 0.192962646484375, "step": 14446 }, { "epoch": 0.9765445450858456, "grad_norm": 1.8987925052642822, "learning_rate": 4.392677877494533e-08, "loss": 0.25115966796875, "step": 14447 }, { "epoch": 0.9766121400567798, "grad_norm": 1.2226977348327637, "learning_rate": 4.367481114750105e-08, "loss": 0.25732421875, "step": 14448 }, { "epoch": 0.976679735027714, "grad_norm": 1.4512275457382202, "learning_rate": 4.3423567200081004e-08, "loss": 0.239288330078125, "step": 14449 }, { "epoch": 0.9767473299986481, "grad_norm": 1.3938173055648804, "learning_rate": 4.3173046944843806e-08, "loss": 0.280670166015625, "step": 14450 }, { "epoch": 0.9768149249695822, "grad_norm": 1.046010971069336, "learning_rate": 4.292325039390976e-08, "loss": 0.26202392578125, "step": 14451 }, { "epoch": 0.9768825199405164, "grad_norm": 1.3574124574661255, "learning_rate": 4.267417755936587e-08, "loss": 0.256805419921875, "step": 14452 }, { "epoch": 0.9769501149114506, "grad_norm": 1.1372432708740234, "learning_rate": 4.242582845326415e-08, "loss": 0.19219970703125, "step": 14453 }, { "epoch": 0.9770177098823848, "grad_norm": 1.1103955507278442, "learning_rate": 4.217820308762166e-08, "loss": 0.1930084228515625, "step": 14454 }, { "epoch": 0.9770853048533189, "grad_norm": 1.1228469610214233, "learning_rate": 4.193130147441715e-08, "loss": 0.20263671875, "step": 14455 }, { "epoch": 0.9771528998242531, "grad_norm": 1.466602087020874, "learning_rate": 4.168512362560106e-08, "loss": 0.259429931640625, "step": 14456 }, { "epoch": 0.9772204947951872, "grad_norm": 1.3987373113632202, "learning_rate": 4.143966955308387e-08, "loss": 0.256103515625, "step": 14457 }, { "epoch": 0.9772880897661214, "grad_norm": 1.2160749435424805, "learning_rate": 4.119493926874107e-08, "loss": 0.211669921875, "step": 14458 }, { "epoch": 0.9773556847370556, "grad_norm": 0.9313642382621765, "learning_rate": 4.0950932784414864e-08, "loss": 0.1776885986328125, "step": 14459 }, { "epoch": 0.9774232797079897, "grad_norm": 1.3094121217727661, "learning_rate": 4.0707650111912465e-08, "loss": 0.17852783203125, "step": 14460 }, { "epoch": 0.9774908746789239, "grad_norm": 1.0448907613754272, "learning_rate": 4.046509126300446e-08, "loss": 0.1982421875, "step": 14461 }, { "epoch": 0.977558469649858, "grad_norm": 1.2286763191223145, "learning_rate": 4.0223256249426465e-08, "loss": 0.255401611328125, "step": 14462 }, { "epoch": 0.9776260646207923, "grad_norm": 1.2004948854446411, "learning_rate": 3.998214508288245e-08, "loss": 0.253265380859375, "step": 14463 }, { "epoch": 0.9776936595917264, "grad_norm": 0.7386651039123535, "learning_rate": 3.9741757775038076e-08, "loss": 0.1292266845703125, "step": 14464 }, { "epoch": 0.9777612545626605, "grad_norm": 1.1430774927139282, "learning_rate": 3.950209433752405e-08, "loss": 0.225433349609375, "step": 14465 }, { "epoch": 0.9778288495335947, "grad_norm": 0.9686436653137207, "learning_rate": 3.92631547819361e-08, "loss": 0.1841888427734375, "step": 14466 }, { "epoch": 0.9778964445045288, "grad_norm": 1.5791963338851929, "learning_rate": 3.9024939119836624e-08, "loss": 0.29827880859375, "step": 14467 }, { "epoch": 0.9779640394754631, "grad_norm": 1.056003451347351, "learning_rate": 3.878744736275308e-08, "loss": 0.271759033203125, "step": 14468 }, { "epoch": 0.9780316344463972, "grad_norm": 1.4288982152938843, "learning_rate": 3.8550679522174615e-08, "loss": 0.2432403564453125, "step": 14469 }, { "epoch": 0.9780992294173313, "grad_norm": 1.6507352590560913, "learning_rate": 3.8314635609558726e-08, "loss": 0.25830078125, "step": 14470 }, { "epoch": 0.9781668243882655, "grad_norm": 1.1954519748687744, "learning_rate": 3.807931563632627e-08, "loss": 0.28570556640625, "step": 14471 }, { "epoch": 0.9782344193591996, "grad_norm": 1.2498865127563477, "learning_rate": 3.784471961386482e-08, "loss": 0.221466064453125, "step": 14472 }, { "epoch": 0.9783020143301339, "grad_norm": 1.508318543434143, "learning_rate": 3.761084755352362e-08, "loss": 0.2237548828125, "step": 14473 }, { "epoch": 0.978369609301068, "grad_norm": 1.2313942909240723, "learning_rate": 3.7377699466620285e-08, "loss": 0.25909423828125, "step": 14474 }, { "epoch": 0.9784372042720022, "grad_norm": 1.3016093969345093, "learning_rate": 3.714527536443413e-08, "loss": 0.186920166015625, "step": 14475 }, { "epoch": 0.9785047992429363, "grad_norm": 0.9097380638122559, "learning_rate": 3.6913575258212815e-08, "loss": 0.24627685546875, "step": 14476 }, { "epoch": 0.9785723942138704, "grad_norm": 1.1119396686553955, "learning_rate": 3.6682599159167383e-08, "loss": 0.19842529296875, "step": 14477 }, { "epoch": 0.9786399891848047, "grad_norm": 1.5566680431365967, "learning_rate": 3.645234707847389e-08, "loss": 0.1675262451171875, "step": 14478 }, { "epoch": 0.9787075841557388, "grad_norm": 2.1698129177093506, "learning_rate": 3.6222819027273427e-08, "loss": 0.32159423828125, "step": 14479 }, { "epoch": 0.978775179126673, "grad_norm": 0.6321439743041992, "learning_rate": 3.59940150166721e-08, "loss": 0.0857086181640625, "step": 14480 }, { "epoch": 0.9788427740976071, "grad_norm": 1.3128446340560913, "learning_rate": 3.57659350577394e-08, "loss": 0.2465057373046875, "step": 14481 }, { "epoch": 0.9789103690685413, "grad_norm": 1.5326926708221436, "learning_rate": 3.553857916151315e-08, "loss": 0.255950927734375, "step": 14482 }, { "epoch": 0.9789779640394755, "grad_norm": 1.3213003873825073, "learning_rate": 3.53119473389929e-08, "loss": 0.24151611328125, "step": 14483 }, { "epoch": 0.9790455590104096, "grad_norm": 1.023366093635559, "learning_rate": 3.508603960114487e-08, "loss": 0.2496337890625, "step": 14484 }, { "epoch": 0.9791131539813438, "grad_norm": 1.4431891441345215, "learning_rate": 3.4860855958901985e-08, "loss": 0.305938720703125, "step": 14485 }, { "epoch": 0.9791807489522779, "grad_norm": 0.8946941494941711, "learning_rate": 3.4636396423155526e-08, "loss": 0.19964599609375, "step": 14486 }, { "epoch": 0.9792483439232121, "grad_norm": 1.1929851770401, "learning_rate": 3.441266100477014e-08, "loss": 0.1303424835205078, "step": 14487 }, { "epoch": 0.9793159388941463, "grad_norm": 1.1967157125473022, "learning_rate": 3.418964971456884e-08, "loss": 0.16873931884765625, "step": 14488 }, { "epoch": 0.9793835338650805, "grad_norm": 0.611189603805542, "learning_rate": 3.396736256334632e-08, "loss": 0.07782745361328125, "step": 14489 }, { "epoch": 0.9794511288360146, "grad_norm": 1.6844302415847778, "learning_rate": 3.374579956185231e-08, "loss": 0.21080780029296875, "step": 14490 }, { "epoch": 0.9795187238069487, "grad_norm": 1.0142379999160767, "learning_rate": 3.352496072081324e-08, "loss": 0.18782806396484375, "step": 14491 }, { "epoch": 0.9795863187778829, "grad_norm": 1.488783359527588, "learning_rate": 3.3304846050910555e-08, "loss": 0.3021240234375, "step": 14492 }, { "epoch": 0.979653913748817, "grad_norm": 2.3645567893981934, "learning_rate": 3.308545556279741e-08, "loss": 0.30426025390625, "step": 14493 }, { "epoch": 0.9797215087197513, "grad_norm": 1.17124605178833, "learning_rate": 3.286678926708697e-08, "loss": 0.16768646240234375, "step": 14494 }, { "epoch": 0.9797891036906854, "grad_norm": 1.180242896080017, "learning_rate": 3.264884717436245e-08, "loss": 0.191925048828125, "step": 14495 }, { "epoch": 0.9798566986616196, "grad_norm": 0.9050097465515137, "learning_rate": 3.243162929516541e-08, "loss": 0.155975341796875, "step": 14496 }, { "epoch": 0.9799242936325537, "grad_norm": 1.1832337379455566, "learning_rate": 3.221513564001077e-08, "loss": 0.2574462890625, "step": 14497 }, { "epoch": 0.9799918886034878, "grad_norm": 1.7730683088302612, "learning_rate": 3.199936621937016e-08, "loss": 0.29541015625, "step": 14498 }, { "epoch": 0.9800594835744221, "grad_norm": 1.3756195306777954, "learning_rate": 3.178432104368523e-08, "loss": 0.240020751953125, "step": 14499 }, { "epoch": 0.9801270785453562, "grad_norm": 0.95904141664505, "learning_rate": 3.1570000123362644e-08, "loss": 0.1556243896484375, "step": 14500 }, { "epoch": 0.9801946735162904, "grad_norm": 1.5046601295471191, "learning_rate": 3.1356403468769115e-08, "loss": 0.240203857421875, "step": 14501 }, { "epoch": 0.9802622684872245, "grad_norm": 0.9073677659034729, "learning_rate": 3.114353109024304e-08, "loss": 0.1518707275390625, "step": 14502 }, { "epoch": 0.9803298634581588, "grad_norm": 0.7908309102058411, "learning_rate": 3.093138299808285e-08, "loss": 0.163360595703125, "step": 14503 }, { "epoch": 0.9803974584290929, "grad_norm": 1.6075413227081299, "learning_rate": 3.071995920255366e-08, "loss": 0.27362060546875, "step": 14504 }, { "epoch": 0.980465053400027, "grad_norm": 1.8584740161895752, "learning_rate": 3.0509259713885627e-08, "loss": 0.268829345703125, "step": 14505 }, { "epoch": 0.9805326483709612, "grad_norm": 0.97154301404953, "learning_rate": 3.02992845422756e-08, "loss": 0.20022964477539062, "step": 14506 }, { "epoch": 0.9806002433418953, "grad_norm": 1.789597988128662, "learning_rate": 3.009003369788044e-08, "loss": 0.2816162109375, "step": 14507 }, { "epoch": 0.9806678383128296, "grad_norm": 1.07893705368042, "learning_rate": 2.988150719082539e-08, "loss": 0.193328857421875, "step": 14508 }, { "epoch": 0.9807354332837637, "grad_norm": 1.1673665046691895, "learning_rate": 2.9673705031202368e-08, "loss": 0.229034423828125, "step": 14509 }, { "epoch": 0.9808030282546979, "grad_norm": 1.7297415733337402, "learning_rate": 2.9466627229065014e-08, "loss": 0.29766845703125, "step": 14510 }, { "epoch": 0.980870623225632, "grad_norm": 1.3040199279785156, "learning_rate": 2.9260273794433634e-08, "loss": 0.24810791015625, "step": 14511 }, { "epoch": 0.9809382181965661, "grad_norm": 2.0244154930114746, "learning_rate": 2.9054644737290247e-08, "loss": 0.33441162109375, "step": 14512 }, { "epoch": 0.9810058131675004, "grad_norm": 1.8556207418441772, "learning_rate": 2.884974006758856e-08, "loss": 0.2649078369140625, "step": 14513 }, { "epoch": 0.9810734081384345, "grad_norm": 1.3265234231948853, "learning_rate": 2.8645559795238973e-08, "loss": 0.296417236328125, "step": 14514 }, { "epoch": 0.9811410031093687, "grad_norm": 0.9299536347389221, "learning_rate": 2.8442103930123588e-08, "loss": 0.226837158203125, "step": 14515 }, { "epoch": 0.9812085980803028, "grad_norm": 0.9590960144996643, "learning_rate": 2.8239372482087854e-08, "loss": 0.21142578125, "step": 14516 }, { "epoch": 0.981276193051237, "grad_norm": 1.4255410432815552, "learning_rate": 2.8037365460938934e-08, "loss": 0.22540283203125, "step": 14517 }, { "epoch": 0.9813437880221711, "grad_norm": 1.47501540184021, "learning_rate": 2.7836082876450674e-08, "loss": 0.186431884765625, "step": 14518 }, { "epoch": 0.9814113829931053, "grad_norm": 2.4260799884796143, "learning_rate": 2.763552473836528e-08, "loss": 0.258697509765625, "step": 14519 }, { "epoch": 0.9814789779640395, "grad_norm": 1.0489643812179565, "learning_rate": 2.7435691056384994e-08, "loss": 0.21942138671875, "step": 14520 }, { "epoch": 0.9815465729349736, "grad_norm": 1.443928837776184, "learning_rate": 2.7236581840178743e-08, "loss": 0.271881103515625, "step": 14521 }, { "epoch": 0.9816141679059078, "grad_norm": 1.8096939325332642, "learning_rate": 2.7038197099382154e-08, "loss": 0.34515380859375, "step": 14522 }, { "epoch": 0.981681762876842, "grad_norm": 1.1875399351119995, "learning_rate": 2.6840536843592557e-08, "loss": 0.19449615478515625, "step": 14523 }, { "epoch": 0.9817493578477762, "grad_norm": 1.025634527206421, "learning_rate": 2.6643601082375624e-08, "loss": 0.198089599609375, "step": 14524 }, { "epoch": 0.9818169528187103, "grad_norm": 1.0190991163253784, "learning_rate": 2.6447389825258738e-08, "loss": 0.2521209716796875, "step": 14525 }, { "epoch": 0.9818845477896444, "grad_norm": 1.1670280694961548, "learning_rate": 2.625190308173764e-08, "loss": 0.29095458984375, "step": 14526 }, { "epoch": 0.9819521427605786, "grad_norm": 1.5754104852676392, "learning_rate": 2.605714086126809e-08, "loss": 0.208465576171875, "step": 14527 }, { "epoch": 0.9820197377315127, "grad_norm": 2.100829839706421, "learning_rate": 2.5863103173277558e-08, "loss": 0.28778076171875, "step": 14528 }, { "epoch": 0.982087332702447, "grad_norm": 1.1573785543441772, "learning_rate": 2.5669790027153527e-08, "loss": 0.27471923828125, "step": 14529 }, { "epoch": 0.9821549276733811, "grad_norm": 1.0370920896530151, "learning_rate": 2.547720143224852e-08, "loss": 0.229248046875, "step": 14530 }, { "epoch": 0.9822225226443153, "grad_norm": 2.600302219390869, "learning_rate": 2.5285337397883413e-08, "loss": 0.267578125, "step": 14531 }, { "epoch": 0.9822901176152494, "grad_norm": 1.518215537071228, "learning_rate": 2.5094197933339112e-08, "loss": 0.209136962890625, "step": 14532 }, { "epoch": 0.9823577125861835, "grad_norm": 1.0182338953018188, "learning_rate": 2.4903783047866556e-08, "loss": 0.166229248046875, "step": 14533 }, { "epoch": 0.9824253075571178, "grad_norm": 1.5570353269577026, "learning_rate": 2.4714092750678375e-08, "loss": 0.2264404296875, "step": 14534 }, { "epoch": 0.9824929025280519, "grad_norm": 1.116080403327942, "learning_rate": 2.4525127050950557e-08, "loss": 0.15802001953125, "step": 14535 }, { "epoch": 0.9825604974989861, "grad_norm": 3.0257863998413086, "learning_rate": 2.433688595783079e-08, "loss": 0.378936767578125, "step": 14536 }, { "epoch": 0.9826280924699202, "grad_norm": 1.1482737064361572, "learning_rate": 2.4149369480423454e-08, "loss": 0.1488037109375, "step": 14537 }, { "epoch": 0.9826956874408544, "grad_norm": 0.75910484790802, "learning_rate": 2.3962577627804626e-08, "loss": 0.179534912109375, "step": 14538 }, { "epoch": 0.9827632824117886, "grad_norm": 1.464901089668274, "learning_rate": 2.377651040901041e-08, "loss": 0.2335205078125, "step": 14539 }, { "epoch": 0.9828308773827227, "grad_norm": 1.1064329147338867, "learning_rate": 2.359116783304527e-08, "loss": 0.2370452880859375, "step": 14540 }, { "epoch": 0.9828984723536569, "grad_norm": 1.0440946817398071, "learning_rate": 2.3406549908877027e-08, "loss": 0.1648101806640625, "step": 14541 }, { "epoch": 0.982966067324591, "grad_norm": 0.9657308459281921, "learning_rate": 2.3222656645438545e-08, "loss": 0.22833251953125, "step": 14542 }, { "epoch": 0.9830336622955252, "grad_norm": 1.009753942489624, "learning_rate": 2.3039488051626035e-08, "loss": 0.235321044921875, "step": 14543 }, { "epoch": 0.9831012572664594, "grad_norm": 1.3839259147644043, "learning_rate": 2.2857044136305738e-08, "loss": 0.327789306640625, "step": 14544 }, { "epoch": 0.9831688522373936, "grad_norm": 1.1570932865142822, "learning_rate": 2.2675324908302265e-08, "loss": 0.19036865234375, "step": 14545 }, { "epoch": 0.9832364472083277, "grad_norm": 1.9153188467025757, "learning_rate": 2.249433037640858e-08, "loss": 0.308624267578125, "step": 14546 }, { "epoch": 0.9833040421792618, "grad_norm": 1.280347228050232, "learning_rate": 2.2314060549386006e-08, "loss": 0.20123291015625, "step": 14547 }, { "epoch": 0.983371637150196, "grad_norm": 1.2236053943634033, "learning_rate": 2.2134515435952575e-08, "loss": 0.151275634765625, "step": 14548 }, { "epoch": 0.9834392321211302, "grad_norm": 1.5005027055740356, "learning_rate": 2.195569504479633e-08, "loss": 0.254730224609375, "step": 14549 }, { "epoch": 0.9835068270920644, "grad_norm": 1.162742257118225, "learning_rate": 2.1777599384573687e-08, "loss": 0.255157470703125, "step": 14550 }, { "epoch": 0.9835744220629985, "grad_norm": 0.840095579624176, "learning_rate": 2.1600228463897752e-08, "loss": 0.1804656982421875, "step": 14551 }, { "epoch": 0.9836420170339327, "grad_norm": 0.5554410815238953, "learning_rate": 2.1423582291353327e-08, "loss": 0.1013641357421875, "step": 14552 }, { "epoch": 0.9837096120048668, "grad_norm": 0.812553346157074, "learning_rate": 2.124766087548691e-08, "loss": 0.140045166015625, "step": 14553 }, { "epoch": 0.983777206975801, "grad_norm": 2.2672436237335205, "learning_rate": 2.1072464224810016e-08, "loss": 0.280975341796875, "step": 14554 }, { "epoch": 0.9838448019467352, "grad_norm": 1.5307040214538574, "learning_rate": 2.0897992347800877e-08, "loss": 0.200439453125, "step": 14555 }, { "epoch": 0.9839123969176693, "grad_norm": 1.444843053817749, "learning_rate": 2.0724245252899397e-08, "loss": 0.21105194091796875, "step": 14556 }, { "epoch": 0.9839799918886035, "grad_norm": 1.2245206832885742, "learning_rate": 2.0551222948515526e-08, "loss": 0.25909423828125, "step": 14557 }, { "epoch": 0.9840475868595376, "grad_norm": 1.9621860980987549, "learning_rate": 2.0378925443019225e-08, "loss": 0.2632293701171875, "step": 14558 }, { "epoch": 0.9841151818304719, "grad_norm": 1.7166184186935425, "learning_rate": 2.0207352744747164e-08, "loss": 0.298919677734375, "step": 14559 }, { "epoch": 0.984182776801406, "grad_norm": 1.1804386377334595, "learning_rate": 2.0036504862002703e-08, "loss": 0.21844482421875, "step": 14560 }, { "epoch": 0.9842503717723401, "grad_norm": 0.9252364635467529, "learning_rate": 1.9866381803050894e-08, "loss": 0.16187286376953125, "step": 14561 }, { "epoch": 0.9843179667432743, "grad_norm": 1.3230340480804443, "learning_rate": 1.9696983576123484e-08, "loss": 0.17520904541015625, "step": 14562 }, { "epoch": 0.9843855617142084, "grad_norm": 1.6130236387252808, "learning_rate": 1.9528310189418918e-08, "loss": 0.224090576171875, "step": 14563 }, { "epoch": 0.9844531566851427, "grad_norm": 2.019246816635132, "learning_rate": 1.936036165109567e-08, "loss": 0.1941375732421875, "step": 14564 }, { "epoch": 0.9845207516560768, "grad_norm": 0.9619379639625549, "learning_rate": 1.9193137969282237e-08, "loss": 0.18853759765625, "step": 14565 }, { "epoch": 0.984588346627011, "grad_norm": 1.404246211051941, "learning_rate": 1.9026639152070478e-08, "loss": 0.30145263671875, "step": 14566 }, { "epoch": 0.9846559415979451, "grad_norm": 2.258500337600708, "learning_rate": 1.8860865207513956e-08, "loss": 0.240875244140625, "step": 14567 }, { "epoch": 0.9847235365688792, "grad_norm": 1.1769272089004517, "learning_rate": 1.8695816143636246e-08, "loss": 0.27105712890625, "step": 14568 }, { "epoch": 0.9847911315398135, "grad_norm": 1.4593051671981812, "learning_rate": 1.8531491968422632e-08, "loss": 0.266082763671875, "step": 14569 }, { "epoch": 0.9848587265107476, "grad_norm": 2.0571084022521973, "learning_rate": 1.8367892689825083e-08, "loss": 0.329681396484375, "step": 14570 }, { "epoch": 0.9849263214816818, "grad_norm": 0.9969786405563354, "learning_rate": 1.8205018315757272e-08, "loss": 0.18787384033203125, "step": 14571 }, { "epoch": 0.9849939164526159, "grad_norm": 0.853809118270874, "learning_rate": 1.804286885410289e-08, "loss": 0.199554443359375, "step": 14572 }, { "epoch": 0.9850615114235501, "grad_norm": 0.9187477827072144, "learning_rate": 1.7881444312705664e-08, "loss": 0.163970947265625, "step": 14573 }, { "epoch": 0.9851291063944843, "grad_norm": 0.6912538409233093, "learning_rate": 1.772074469937601e-08, "loss": 0.10228729248046875, "step": 14574 }, { "epoch": 0.9851967013654184, "grad_norm": 1.3187365531921387, "learning_rate": 1.7560770021891047e-08, "loss": 0.18906402587890625, "step": 14575 }, { "epoch": 0.9852642963363526, "grad_norm": 1.1579698324203491, "learning_rate": 1.7401520287991247e-08, "loss": 0.19509124755859375, "step": 14576 }, { "epoch": 0.9853318913072867, "grad_norm": 0.9138771295547485, "learning_rate": 1.7242995505382108e-08, "loss": 0.1694183349609375, "step": 14577 }, { "epoch": 0.9853994862782209, "grad_norm": 0.9602615833282471, "learning_rate": 1.7085195681732502e-08, "loss": 0.1859283447265625, "step": 14578 }, { "epoch": 0.985467081249155, "grad_norm": 1.03042471408844, "learning_rate": 1.6928120824679648e-08, "loss": 0.190826416015625, "step": 14579 }, { "epoch": 0.9855346762200893, "grad_norm": 1.779613733291626, "learning_rate": 1.6771770941822474e-08, "loss": 0.2950897216796875, "step": 14580 }, { "epoch": 0.9856022711910234, "grad_norm": 0.9299633502960205, "learning_rate": 1.6616146040728254e-08, "loss": 0.1959228515625, "step": 14581 }, { "epoch": 0.9856698661619575, "grad_norm": 1.2174259424209595, "learning_rate": 1.6461246128922636e-08, "loss": 0.26654052734375, "step": 14582 }, { "epoch": 0.9857374611328917, "grad_norm": 1.5786211490631104, "learning_rate": 1.6307071213906287e-08, "loss": 0.15866851806640625, "step": 14583 }, { "epoch": 0.9858050561038258, "grad_norm": 0.7161474823951721, "learning_rate": 1.6153621303134912e-08, "loss": 0.13381195068359375, "step": 14584 }, { "epoch": 0.9858726510747601, "grad_norm": 0.7764121890068054, "learning_rate": 1.6000896404035902e-08, "loss": 0.103607177734375, "step": 14585 }, { "epoch": 0.9859402460456942, "grad_norm": 1.9811500310897827, "learning_rate": 1.5848896523996682e-08, "loss": 0.24134063720703125, "step": 14586 }, { "epoch": 0.9860078410166284, "grad_norm": 1.4363981485366821, "learning_rate": 1.5697621670374695e-08, "loss": 0.2034912109375, "step": 14587 }, { "epoch": 0.9860754359875625, "grad_norm": 0.9062634110450745, "learning_rate": 1.5547071850487425e-08, "loss": 0.1549835205078125, "step": 14588 }, { "epoch": 0.9861430309584966, "grad_norm": 0.8817922472953796, "learning_rate": 1.5397247071620714e-08, "loss": 0.187286376953125, "step": 14589 }, { "epoch": 0.9862106259294309, "grad_norm": 1.2688777446746826, "learning_rate": 1.524814734102209e-08, "loss": 0.1920623779296875, "step": 14590 }, { "epoch": 0.986278220900365, "grad_norm": 1.2061678171157837, "learning_rate": 1.5099772665909116e-08, "loss": 0.26318359375, "step": 14591 }, { "epoch": 0.9863458158712992, "grad_norm": 0.9020772576332092, "learning_rate": 1.4952123053457722e-08, "loss": 0.172821044921875, "step": 14592 }, { "epoch": 0.9864134108422333, "grad_norm": 1.8471877574920654, "learning_rate": 1.4805198510813856e-08, "loss": 0.31085205078125, "step": 14593 }, { "epoch": 0.9864810058131676, "grad_norm": 1.5224405527114868, "learning_rate": 1.465899904508683e-08, "loss": 0.248626708984375, "step": 14594 }, { "epoch": 0.9865486007841017, "grad_norm": 1.251664161682129, "learning_rate": 1.4513524663350986e-08, "loss": 0.233551025390625, "step": 14595 }, { "epoch": 0.9866161957550358, "grad_norm": 0.9369530081748962, "learning_rate": 1.4368775372642362e-08, "loss": 0.1825408935546875, "step": 14596 }, { "epoch": 0.98668379072597, "grad_norm": 1.1425230503082275, "learning_rate": 1.4224751179968687e-08, "loss": 0.22698974609375, "step": 14597 }, { "epoch": 0.9867513856969041, "grad_norm": 1.8153334856033325, "learning_rate": 1.408145209229772e-08, "loss": 0.16040420532226562, "step": 14598 }, { "epoch": 0.9868189806678384, "grad_norm": 0.978680431842804, "learning_rate": 1.3938878116560583e-08, "loss": 0.178436279296875, "step": 14599 }, { "epoch": 0.9868865756387725, "grad_norm": 0.8392520546913147, "learning_rate": 1.3797029259660088e-08, "loss": 0.16259765625, "step": 14600 }, { "epoch": 0.9869541706097066, "grad_norm": 1.3272652626037598, "learning_rate": 1.3655905528455747e-08, "loss": 0.27191162109375, "step": 14601 }, { "epoch": 0.9870217655806408, "grad_norm": 1.2319328784942627, "learning_rate": 1.3515506929778765e-08, "loss": 0.2520751953125, "step": 14602 }, { "epoch": 0.9870893605515749, "grad_norm": 0.9283401966094971, "learning_rate": 1.3375833470420374e-08, "loss": 0.16626739501953125, "step": 14603 }, { "epoch": 0.9871569555225091, "grad_norm": 1.2024656534194946, "learning_rate": 1.3236885157140165e-08, "loss": 0.231048583984375, "step": 14604 }, { "epoch": 0.9872245504934433, "grad_norm": 1.8983104228973389, "learning_rate": 1.3098661996662765e-08, "loss": 0.2761688232421875, "step": 14605 }, { "epoch": 0.9872921454643775, "grad_norm": 0.8036725521087646, "learning_rate": 1.296116399567282e-08, "loss": 0.16680908203125, "step": 14606 }, { "epoch": 0.9873597404353116, "grad_norm": 0.707539975643158, "learning_rate": 1.2824391160825011e-08, "loss": 0.10631179809570312, "step": 14607 }, { "epoch": 0.9874273354062457, "grad_norm": 1.0262575149536133, "learning_rate": 1.268834349873904e-08, "loss": 0.204803466796875, "step": 14608 }, { "epoch": 0.98749493037718, "grad_norm": 1.987748622894287, "learning_rate": 1.2553021015994647e-08, "loss": 0.33868408203125, "step": 14609 }, { "epoch": 0.9875625253481141, "grad_norm": 1.3606476783752441, "learning_rate": 1.2418423719141591e-08, "loss": 0.2823486328125, "step": 14610 }, { "epoch": 0.9876301203190483, "grad_norm": 1.6436896324157715, "learning_rate": 1.2284551614692996e-08, "loss": 0.277252197265625, "step": 14611 }, { "epoch": 0.9876977152899824, "grad_norm": 1.00464928150177, "learning_rate": 1.2151404709123682e-08, "loss": 0.1553802490234375, "step": 14612 }, { "epoch": 0.9877653102609166, "grad_norm": 1.1459414958953857, "learning_rate": 1.2018983008878493e-08, "loss": 0.233428955078125, "step": 14613 }, { "epoch": 0.9878329052318507, "grad_norm": 1.0281589031219482, "learning_rate": 1.1887286520363972e-08, "loss": 0.197479248046875, "step": 14614 }, { "epoch": 0.9879005002027849, "grad_norm": 1.8360317945480347, "learning_rate": 1.1756315249953354e-08, "loss": 0.3043212890625, "step": 14615 }, { "epoch": 0.9879680951737191, "grad_norm": 2.1977827548980713, "learning_rate": 1.1626069203983237e-08, "loss": 0.3206787109375, "step": 14616 }, { "epoch": 0.9880356901446532, "grad_norm": 1.6463449001312256, "learning_rate": 1.149654838875358e-08, "loss": 0.2532958984375, "step": 14617 }, { "epoch": 0.9881032851155874, "grad_norm": 1.0665558576583862, "learning_rate": 1.1367752810536036e-08, "loss": 0.1774749755859375, "step": 14618 }, { "epoch": 0.9881708800865215, "grad_norm": 1.2075250148773193, "learning_rate": 1.1239682475558955e-08, "loss": 0.208709716796875, "step": 14619 }, { "epoch": 0.9882384750574558, "grad_norm": 0.785926878452301, "learning_rate": 1.1112337390019046e-08, "loss": 0.1822052001953125, "step": 14620 }, { "epoch": 0.9883060700283899, "grad_norm": 0.45005324482917786, "learning_rate": 1.098571756007971e-08, "loss": 0.06256484985351562, "step": 14621 }, { "epoch": 0.988373664999324, "grad_norm": 1.3555458784103394, "learning_rate": 1.0859822991867719e-08, "loss": 0.28802490234375, "step": 14622 }, { "epoch": 0.9884412599702582, "grad_norm": 1.1727670431137085, "learning_rate": 1.0734653691473196e-08, "loss": 0.2423095703125, "step": 14623 }, { "epoch": 0.9885088549411923, "grad_norm": 1.088172197341919, "learning_rate": 1.0610209664954628e-08, "loss": 0.1612396240234375, "step": 14624 }, { "epoch": 0.9885764499121266, "grad_norm": 1.6192479133605957, "learning_rate": 1.0486490918330537e-08, "loss": 0.295867919921875, "step": 14625 }, { "epoch": 0.9886440448830607, "grad_norm": 1.3579181432724, "learning_rate": 1.0363497457589465e-08, "loss": 0.15270233154296875, "step": 14626 }, { "epoch": 0.9887116398539949, "grad_norm": 1.2640669345855713, "learning_rate": 1.0241229288681652e-08, "loss": 0.223175048828125, "step": 14627 }, { "epoch": 0.988779234824929, "grad_norm": 0.8385286331176758, "learning_rate": 1.0119686417524033e-08, "loss": 0.1852264404296875, "step": 14628 }, { "epoch": 0.9888468297958631, "grad_norm": 1.2418607473373413, "learning_rate": 9.998868849996901e-09, "loss": 0.2453460693359375, "step": 14629 }, { "epoch": 0.9889144247667974, "grad_norm": 0.9638828635215759, "learning_rate": 9.878776591945582e-09, "loss": 0.16546630859375, "step": 14630 }, { "epoch": 0.9889820197377315, "grad_norm": 1.0631239414215088, "learning_rate": 9.759409649182094e-09, "loss": 0.241363525390625, "step": 14631 }, { "epoch": 0.9890496147086657, "grad_norm": 2.152918577194214, "learning_rate": 9.640768027481816e-09, "loss": 0.282562255859375, "step": 14632 }, { "epoch": 0.9891172096795998, "grad_norm": 1.1215916872024536, "learning_rate": 9.522851732585158e-09, "loss": 0.1844482421875, "step": 14633 }, { "epoch": 0.989184804650534, "grad_norm": 1.4904073476791382, "learning_rate": 9.40566077019922e-09, "loss": 0.29425048828125, "step": 14634 }, { "epoch": 0.9892523996214682, "grad_norm": 2.6425867080688477, "learning_rate": 9.289195145991137e-09, "loss": 0.33148193359375, "step": 14635 }, { "epoch": 0.9893199945924023, "grad_norm": 1.1848223209381104, "learning_rate": 9.173454865598063e-09, "loss": 0.243255615234375, "step": 14636 }, { "epoch": 0.9893875895633365, "grad_norm": 2.200794219970703, "learning_rate": 9.058439934620522e-09, "loss": 0.317901611328125, "step": 14637 }, { "epoch": 0.9894551845342706, "grad_norm": 1.2372087240219116, "learning_rate": 8.944150358622393e-09, "loss": 0.2901611328125, "step": 14638 }, { "epoch": 0.9895227795052048, "grad_norm": 1.3808971643447876, "learning_rate": 8.83058614313592e-09, "loss": 0.1805267333984375, "step": 14639 }, { "epoch": 0.989590374476139, "grad_norm": 1.4955387115478516, "learning_rate": 8.717747293655043e-09, "loss": 0.2315673828125, "step": 14640 }, { "epoch": 0.9896579694470732, "grad_norm": 1.2514926195144653, "learning_rate": 8.605633815638725e-09, "loss": 0.14522552490234375, "step": 14641 }, { "epoch": 0.9897255644180073, "grad_norm": 1.3563237190246582, "learning_rate": 8.494245714510962e-09, "loss": 0.2295074462890625, "step": 14642 }, { "epoch": 0.9897931593889414, "grad_norm": 1.6308791637420654, "learning_rate": 8.383582995664108e-09, "loss": 0.248779296875, "step": 14643 }, { "epoch": 0.9898607543598756, "grad_norm": 1.2761434316635132, "learning_rate": 8.273645664452212e-09, "loss": 0.230499267578125, "step": 14644 }, { "epoch": 0.9899283493308098, "grad_norm": 1.057171106338501, "learning_rate": 8.164433726192688e-09, "loss": 0.20880126953125, "step": 14645 }, { "epoch": 0.989995944301744, "grad_norm": 1.2801902294158936, "learning_rate": 8.055947186171308e-09, "loss": 0.22601318359375, "step": 14646 }, { "epoch": 0.9900635392726781, "grad_norm": 1.6294546127319336, "learning_rate": 7.948186049637207e-09, "loss": 0.290771484375, "step": 14647 }, { "epoch": 0.9901311342436123, "grad_norm": 1.4897788763046265, "learning_rate": 7.841150321804546e-09, "loss": 0.27606201171875, "step": 14648 }, { "epoch": 0.9901987292145464, "grad_norm": 1.310949683189392, "learning_rate": 7.734840007852518e-09, "loss": 0.2158203125, "step": 14649 }, { "epoch": 0.9902663241854806, "grad_norm": 1.5058125257492065, "learning_rate": 7.629255112923672e-09, "loss": 0.269439697265625, "step": 14650 }, { "epoch": 0.9903339191564148, "grad_norm": 1.2547050714492798, "learning_rate": 7.524395642128923e-09, "loss": 0.2454833984375, "step": 14651 }, { "epoch": 0.9904015141273489, "grad_norm": 0.8282046914100647, "learning_rate": 7.420261600540879e-09, "loss": 0.203887939453125, "step": 14652 }, { "epoch": 0.9904691090982831, "grad_norm": 0.557523787021637, "learning_rate": 7.316852993198841e-09, "loss": 0.08276748657226562, "step": 14653 }, { "epoch": 0.9905367040692172, "grad_norm": 0.943908154964447, "learning_rate": 7.214169825105476e-09, "loss": 0.201385498046875, "step": 14654 }, { "epoch": 0.9906042990401515, "grad_norm": 1.684498906135559, "learning_rate": 7.1122121012301425e-09, "loss": 0.15784454345703125, "step": 14655 }, { "epoch": 0.9906718940110856, "grad_norm": 1.529754400253296, "learning_rate": 7.010979826505559e-09, "loss": 0.1446380615234375, "step": 14656 }, { "epoch": 0.9907394889820197, "grad_norm": 0.9910631775856018, "learning_rate": 6.9104730058294765e-09, "loss": 0.1734771728515625, "step": 14657 }, { "epoch": 0.9908070839529539, "grad_norm": 0.7287756204605103, "learning_rate": 6.8106916440680015e-09, "loss": 0.1085205078125, "step": 14658 }, { "epoch": 0.990874678923888, "grad_norm": 0.8234944939613342, "learning_rate": 6.711635746043942e-09, "loss": 0.209625244140625, "step": 14659 }, { "epoch": 0.9909422738948223, "grad_norm": 1.3693926334381104, "learning_rate": 6.613305316555129e-09, "loss": 0.1742706298828125, "step": 14660 }, { "epoch": 0.9910098688657564, "grad_norm": 0.8749119639396667, "learning_rate": 6.515700360356092e-09, "loss": 0.140533447265625, "step": 14661 }, { "epoch": 0.9910774638366906, "grad_norm": 1.0957353115081787, "learning_rate": 6.418820882173049e-09, "loss": 0.1786651611328125, "step": 14662 }, { "epoch": 0.9911450588076247, "grad_norm": 1.4212175607681274, "learning_rate": 6.322666886688921e-09, "loss": 0.2880859375, "step": 14663 }, { "epoch": 0.9912126537785588, "grad_norm": 1.7324199676513672, "learning_rate": 6.227238378559985e-09, "loss": 0.2868499755859375, "step": 14664 }, { "epoch": 0.991280248749493, "grad_norm": 1.1130828857421875, "learning_rate": 6.132535362404212e-09, "loss": 0.2132568359375, "step": 14665 }, { "epoch": 0.9913478437204272, "grad_norm": 0.976332426071167, "learning_rate": 6.038557842799608e-09, "loss": 0.21337890625, "step": 14666 }, { "epoch": 0.9914154386913614, "grad_norm": 1.2707479000091553, "learning_rate": 5.945305824297531e-09, "loss": 0.2403564453125, "step": 14667 }, { "epoch": 0.9914830336622955, "grad_norm": 0.9038962125778198, "learning_rate": 5.852779311407708e-09, "loss": 0.14019775390625, "step": 14668 }, { "epoch": 0.9915506286332297, "grad_norm": 1.3170233964920044, "learning_rate": 5.760978308609888e-09, "loss": 0.158660888671875, "step": 14669 }, { "epoch": 0.9916182236041639, "grad_norm": 1.1247477531433105, "learning_rate": 5.669902820342188e-09, "loss": 0.232391357421875, "step": 14670 }, { "epoch": 0.991685818575098, "grad_norm": 1.0851171016693115, "learning_rate": 5.579552851014414e-09, "loss": 0.2183380126953125, "step": 14671 }, { "epoch": 0.9917534135460322, "grad_norm": 1.3750462532043457, "learning_rate": 5.489928404996403e-09, "loss": 0.262847900390625, "step": 14672 }, { "epoch": 0.9918210085169663, "grad_norm": 1.6781615018844604, "learning_rate": 5.4010294866246865e-09, "loss": 0.284027099609375, "step": 14673 }, { "epoch": 0.9918886034879005, "grad_norm": 1.3077343702316284, "learning_rate": 5.31285610020249e-09, "loss": 0.182159423828125, "step": 14674 }, { "epoch": 0.9919561984588346, "grad_norm": 1.4935632944107056, "learning_rate": 5.225408249996399e-09, "loss": 0.269439697265625, "step": 14675 }, { "epoch": 0.9920237934297689, "grad_norm": 1.00621497631073, "learning_rate": 5.1386859402347e-09, "loss": 0.194793701171875, "step": 14676 }, { "epoch": 0.992091388400703, "grad_norm": 2.040653944015503, "learning_rate": 5.052689175117364e-09, "loss": 0.257781982421875, "step": 14677 }, { "epoch": 0.9921589833716371, "grad_norm": 0.7469422221183777, "learning_rate": 4.9674179588010685e-09, "loss": 0.0993804931640625, "step": 14678 }, { "epoch": 0.9922265783425713, "grad_norm": 0.9374799132347107, "learning_rate": 4.882872295415841e-09, "loss": 0.200469970703125, "step": 14679 }, { "epoch": 0.9922941733135054, "grad_norm": 1.2972533702850342, "learning_rate": 4.799052189051745e-09, "loss": 0.2926025390625, "step": 14680 }, { "epoch": 0.9923617682844397, "grad_norm": 1.3850669860839844, "learning_rate": 4.715957643762203e-09, "loss": 0.263763427734375, "step": 14681 }, { "epoch": 0.9924293632553738, "grad_norm": 1.39956533908844, "learning_rate": 4.6335886635689995e-09, "loss": 0.2513885498046875, "step": 14682 }, { "epoch": 0.992496958226308, "grad_norm": 1.2864975929260254, "learning_rate": 4.551945252458945e-09, "loss": 0.18353271484375, "step": 14683 }, { "epoch": 0.9925645531972421, "grad_norm": 1.700631022453308, "learning_rate": 4.471027414380546e-09, "loss": 0.2747802734375, "step": 14684 }, { "epoch": 0.9926321481681762, "grad_norm": 2.036910057067871, "learning_rate": 4.3908351532506715e-09, "loss": 0.34051513671875, "step": 14685 }, { "epoch": 0.9926997431391105, "grad_norm": 1.2934374809265137, "learning_rate": 4.311368472949551e-09, "loss": 0.23046875, "step": 14686 }, { "epoch": 0.9927673381100446, "grad_norm": 1.5478029251098633, "learning_rate": 4.232627377320775e-09, "loss": 0.284271240234375, "step": 14687 }, { "epoch": 0.9928349330809788, "grad_norm": 3.096287250518799, "learning_rate": 4.154611870174629e-09, "loss": 0.3000030517578125, "step": 14688 }, { "epoch": 0.9929025280519129, "grad_norm": 1.389296293258667, "learning_rate": 4.077321955288094e-09, "loss": 0.2350921630859375, "step": 14689 }, { "epoch": 0.9929701230228472, "grad_norm": 2.956606864929199, "learning_rate": 4.000757636398178e-09, "loss": 0.27642822265625, "step": 14690 }, { "epoch": 0.9930377179937813, "grad_norm": 0.8360815644264221, "learning_rate": 3.924918917210251e-09, "loss": 0.19390869140625, "step": 14691 }, { "epoch": 0.9931053129647154, "grad_norm": 1.7278399467468262, "learning_rate": 3.849805801394712e-09, "loss": 0.280426025390625, "step": 14692 }, { "epoch": 0.9931729079356496, "grad_norm": 1.2670847177505493, "learning_rate": 3.775418292586985e-09, "loss": 0.257568359375, "step": 14693 }, { "epoch": 0.9932405029065837, "grad_norm": 0.8841302394866943, "learning_rate": 3.7017563943841926e-09, "loss": 0.149749755859375, "step": 14694 }, { "epoch": 0.993308097877518, "grad_norm": 0.9337226748466492, "learning_rate": 3.6288201103501507e-09, "loss": 0.1973876953125, "step": 14695 }, { "epoch": 0.9933756928484521, "grad_norm": 1.0151746273040771, "learning_rate": 3.5566094440170337e-09, "loss": 0.23822021484375, "step": 14696 }, { "epoch": 0.9934432878193863, "grad_norm": 0.9021186828613281, "learning_rate": 3.4851243988753832e-09, "loss": 0.114715576171875, "step": 14697 }, { "epoch": 0.9935108827903204, "grad_norm": 1.0720181465148926, "learning_rate": 3.4143649783874296e-09, "loss": 0.1500701904296875, "step": 14698 }, { "epoch": 0.9935784777612545, "grad_norm": 1.3811830282211304, "learning_rate": 3.3443311859737702e-09, "loss": 0.22076416015625, "step": 14699 }, { "epoch": 0.9936460727321887, "grad_norm": 1.0008230209350586, "learning_rate": 3.275023025026691e-09, "loss": 0.15549468994140625, "step": 14700 }, { "epoch": 0.9937136677031229, "grad_norm": 1.2740073204040527, "learning_rate": 3.20644049889518e-09, "loss": 0.205902099609375, "step": 14701 }, { "epoch": 0.9937812626740571, "grad_norm": 1.7067458629608154, "learning_rate": 3.1385836109015796e-09, "loss": 0.2479248046875, "step": 14702 }, { "epoch": 0.9938488576449912, "grad_norm": 1.2469255924224854, "learning_rate": 3.071452364326599e-09, "loss": 0.2445068359375, "step": 14703 }, { "epoch": 0.9939164526159254, "grad_norm": 0.49641817808151245, "learning_rate": 3.005046762420971e-09, "loss": 0.08858108520507812, "step": 14704 }, { "epoch": 0.9939840475868595, "grad_norm": 1.100131869316101, "learning_rate": 2.9393668083954607e-09, "loss": 0.2247314453125, "step": 14705 }, { "epoch": 0.9940516425577937, "grad_norm": 1.8543494939804077, "learning_rate": 2.8744125054308566e-09, "loss": 0.275115966796875, "step": 14706 }, { "epoch": 0.9941192375287279, "grad_norm": 1.0264025926589966, "learning_rate": 2.810183856666315e-09, "loss": 0.179656982421875, "step": 14707 }, { "epoch": 0.994186832499662, "grad_norm": 1.421500563621521, "learning_rate": 2.746680865212681e-09, "loss": 0.287353515625, "step": 14708 }, { "epoch": 0.9942544274705962, "grad_norm": 1.0343918800354004, "learning_rate": 2.68390353414083e-09, "loss": 0.15256118774414062, "step": 14709 }, { "epoch": 0.9943220224415303, "grad_norm": 1.291695237159729, "learning_rate": 2.621851866489999e-09, "loss": 0.1797332763671875, "step": 14710 }, { "epoch": 0.9943896174124646, "grad_norm": 1.0194072723388672, "learning_rate": 2.5605258652611208e-09, "loss": 0.15203857421875, "step": 14711 }, { "epoch": 0.9944572123833987, "grad_norm": 1.3049310445785522, "learning_rate": 2.4999255334218206e-09, "loss": 0.2542724609375, "step": 14712 }, { "epoch": 0.9945248073543328, "grad_norm": 1.6150565147399902, "learning_rate": 2.4400508739047532e-09, "loss": 0.31842041015625, "step": 14713 }, { "epoch": 0.994592402325267, "grad_norm": 1.3627090454101562, "learning_rate": 2.3809018896059356e-09, "loss": 0.2472686767578125, "step": 14714 }, { "epoch": 0.9946599972962011, "grad_norm": 1.0536593198776245, "learning_rate": 2.3224785833897422e-09, "loss": 0.240631103515625, "step": 14715 }, { "epoch": 0.9947275922671354, "grad_norm": 1.154813528060913, "learning_rate": 2.264780958080581e-09, "loss": 0.18292236328125, "step": 14716 }, { "epoch": 0.9947951872380695, "grad_norm": 1.6279056072235107, "learning_rate": 2.2078090164728835e-09, "loss": 0.2450714111328125, "step": 14717 }, { "epoch": 0.9948627822090037, "grad_norm": 0.6769044399261475, "learning_rate": 2.1515627613194476e-09, "loss": 0.1001434326171875, "step": 14718 }, { "epoch": 0.9949303771799378, "grad_norm": 1.6405744552612305, "learning_rate": 2.096042195346426e-09, "loss": 0.2967529296875, "step": 14719 }, { "epoch": 0.9949979721508719, "grad_norm": 1.199586033821106, "learning_rate": 2.041247321235007e-09, "loss": 0.1640625, "step": 14720 }, { "epoch": 0.9950655671218062, "grad_norm": 1.250037431716919, "learning_rate": 1.987178141641399e-09, "loss": 0.247161865234375, "step": 14721 }, { "epoch": 0.9951331620927403, "grad_norm": 1.0256431102752686, "learning_rate": 1.9338346591785127e-09, "loss": 0.184234619140625, "step": 14722 }, { "epoch": 0.9952007570636745, "grad_norm": 0.8127304315567017, "learning_rate": 1.881216876430947e-09, "loss": 0.1349334716796875, "step": 14723 }, { "epoch": 0.9952683520346086, "grad_norm": 2.3617546558380127, "learning_rate": 1.8293247959400016e-09, "loss": 0.294647216796875, "step": 14724 }, { "epoch": 0.9953359470055428, "grad_norm": 1.3179644346237183, "learning_rate": 1.7781584202203328e-09, "loss": 0.240020751953125, "step": 14725 }, { "epoch": 0.995403541976477, "grad_norm": 2.109086036682129, "learning_rate": 1.7277177517449615e-09, "loss": 0.297027587890625, "step": 14726 }, { "epoch": 0.9954711369474111, "grad_norm": 1.1752641201019287, "learning_rate": 1.6780027929569341e-09, "loss": 0.225341796875, "step": 14727 }, { "epoch": 0.9955387319183453, "grad_norm": 1.6224079132080078, "learning_rate": 1.6290135462609933e-09, "loss": 0.19989013671875, "step": 14728 }, { "epoch": 0.9956063268892794, "grad_norm": 1.3471611738204956, "learning_rate": 1.5807500140252451e-09, "loss": 0.2507171630859375, "step": 14729 }, { "epoch": 0.9956739218602136, "grad_norm": 1.5613412857055664, "learning_rate": 1.5332121985894842e-09, "loss": 0.2811279296875, "step": 14730 }, { "epoch": 0.9957415168311478, "grad_norm": 2.067563056945801, "learning_rate": 1.4864001022502071e-09, "loss": 0.3533935546875, "step": 14731 }, { "epoch": 0.9958091118020819, "grad_norm": 1.9503264427185059, "learning_rate": 1.440313727273934e-09, "loss": 0.16574859619140625, "step": 14732 }, { "epoch": 0.9958767067730161, "grad_norm": 1.9901195764541626, "learning_rate": 1.3949530758888829e-09, "loss": 0.34527587890625, "step": 14733 }, { "epoch": 0.9959443017439502, "grad_norm": 1.5043221712112427, "learning_rate": 1.3503181502932949e-09, "loss": 0.282562255859375, "step": 14734 }, { "epoch": 0.9960118967148844, "grad_norm": 2.0639588832855225, "learning_rate": 1.3064089526437783e-09, "loss": 0.290740966796875, "step": 14735 }, { "epoch": 0.9960794916858186, "grad_norm": 1.0810112953186035, "learning_rate": 1.2632254850669655e-09, "loss": 0.2144775390625, "step": 14736 }, { "epoch": 0.9961470866567528, "grad_norm": 1.1225709915161133, "learning_rate": 1.220767749651186e-09, "loss": 0.15707778930664062, "step": 14737 }, { "epoch": 0.9962146816276869, "grad_norm": 1.1209065914154053, "learning_rate": 1.1790357484497971e-09, "loss": 0.22064208984375, "step": 14738 }, { "epoch": 0.996282276598621, "grad_norm": 1.4035457372665405, "learning_rate": 1.138029483484515e-09, "loss": 0.29925537109375, "step": 14739 }, { "epoch": 0.9963498715695552, "grad_norm": 1.6809966564178467, "learning_rate": 1.0977489567387534e-09, "loss": 0.276031494140625, "step": 14740 }, { "epoch": 0.9964174665404893, "grad_norm": 1.1494619846343994, "learning_rate": 1.0581941701609532e-09, "loss": 0.217529296875, "step": 14741 }, { "epoch": 0.9964850615114236, "grad_norm": 1.2156810760498047, "learning_rate": 1.0193651256645843e-09, "loss": 0.2127685546875, "step": 14742 }, { "epoch": 0.9965526564823577, "grad_norm": 1.541126012802124, "learning_rate": 9.812618251281435e-10, "loss": 0.26336669921875, "step": 14743 }, { "epoch": 0.9966202514532919, "grad_norm": 1.0747747421264648, "learning_rate": 9.43884270396822e-10, "loss": 0.1930389404296875, "step": 14744 }, { "epoch": 0.996687846424226, "grad_norm": 2.3406407833099365, "learning_rate": 9.072324632791728e-10, "loss": 0.31048583984375, "step": 14745 }, { "epoch": 0.9967554413951601, "grad_norm": 1.4982587099075317, "learning_rate": 8.71306405547112e-10, "loss": 0.283782958984375, "step": 14746 }, { "epoch": 0.9968230363660944, "grad_norm": 1.5553295612335205, "learning_rate": 8.361060989409142e-10, "loss": 0.1778564453125, "step": 14747 }, { "epoch": 0.9968906313370285, "grad_norm": 0.7613769173622131, "learning_rate": 8.016315451608858e-10, "loss": 0.181884765625, "step": 14748 }, { "epoch": 0.9969582263079627, "grad_norm": 0.8365875482559204, "learning_rate": 7.678827458790228e-10, "loss": 0.24420166015625, "step": 14749 }, { "epoch": 0.9970258212788968, "grad_norm": 1.7460743188858032, "learning_rate": 7.348597027256876e-10, "loss": 0.1982421875, "step": 14750 }, { "epoch": 0.997093416249831, "grad_norm": 1.3518515825271606, "learning_rate": 7.025624172979362e-10, "loss": 0.257659912109375, "step": 14751 }, { "epoch": 0.9971610112207652, "grad_norm": 2.0889408588409424, "learning_rate": 6.709908911611828e-10, "loss": 0.28387451171875, "step": 14752 }, { "epoch": 0.9972286061916993, "grad_norm": 1.4167648553848267, "learning_rate": 6.401451258408742e-10, "loss": 0.19817352294921875, "step": 14753 }, { "epoch": 0.9972962011626335, "grad_norm": 1.1729532480239868, "learning_rate": 6.100251228308151e-10, "loss": 0.2113800048828125, "step": 14754 }, { "epoch": 0.9973637961335676, "grad_norm": 1.0965725183486938, "learning_rate": 5.806308835881735e-10, "loss": 0.2430419921875, "step": 14755 }, { "epoch": 0.9974313911045019, "grad_norm": 1.4407709836959839, "learning_rate": 5.51962409535145e-10, "loss": 0.2725830078125, "step": 14756 }, { "epoch": 0.997498986075436, "grad_norm": 1.2282716035842896, "learning_rate": 5.240197020572879e-10, "loss": 0.21533203125, "step": 14757 }, { "epoch": 0.9975665810463702, "grad_norm": 1.0697319507598877, "learning_rate": 4.968027625101845e-10, "loss": 0.2301025390625, "step": 14758 }, { "epoch": 0.9976341760173043, "grad_norm": 1.47311270236969, "learning_rate": 4.703115922077839e-10, "loss": 0.268402099609375, "step": 14759 }, { "epoch": 0.9977017709882384, "grad_norm": 1.4097124338150024, "learning_rate": 4.4454619243239347e-10, "loss": 0.16454696655273438, "step": 14760 }, { "epoch": 0.9977693659591726, "grad_norm": 1.6708511114120483, "learning_rate": 4.1950656443134893e-10, "loss": 0.2295684814453125, "step": 14761 }, { "epoch": 0.9978369609301068, "grad_norm": 1.1648060083389282, "learning_rate": 3.9519270941534847e-10, "loss": 0.220001220703125, "step": 14762 }, { "epoch": 0.997904555901041, "grad_norm": 1.2766621112823486, "learning_rate": 3.716046285617836e-10, "loss": 0.2856903076171875, "step": 14763 }, { "epoch": 0.9979721508719751, "grad_norm": 2.207432746887207, "learning_rate": 3.4874232301307375e-10, "loss": 0.28155517578125, "step": 14764 }, { "epoch": 0.9980397458429093, "grad_norm": 1.7050118446350098, "learning_rate": 3.2660579387333576e-10, "loss": 0.298492431640625, "step": 14765 }, { "epoch": 0.9981073408138434, "grad_norm": 1.1633473634719849, "learning_rate": 3.051950422133798e-10, "loss": 0.17693328857421875, "step": 14766 }, { "epoch": 0.9981749357847776, "grad_norm": 1.4000818729400635, "learning_rate": 2.845100690707092e-10, "loss": 0.21717071533203125, "step": 14767 }, { "epoch": 0.9982425307557118, "grad_norm": 1.4687381982803345, "learning_rate": 2.6455087544619006e-10, "loss": 0.27838134765625, "step": 14768 }, { "epoch": 0.9983101257266459, "grad_norm": 1.0096091032028198, "learning_rate": 2.4531746230405105e-10, "loss": 0.220733642578125, "step": 14769 }, { "epoch": 0.9983777206975801, "grad_norm": 1.547600269317627, "learning_rate": 2.2680983057687954e-10, "loss": 0.2679443359375, "step": 14770 }, { "epoch": 0.9984453156685142, "grad_norm": 1.740200400352478, "learning_rate": 2.0902798115896017e-10, "loss": 0.242584228515625, "step": 14771 }, { "epoch": 0.9985129106394485, "grad_norm": 1.1167445182800293, "learning_rate": 1.9197191490960553e-10, "loss": 0.202362060546875, "step": 14772 }, { "epoch": 0.9985805056103826, "grad_norm": 1.2987149953842163, "learning_rate": 1.7564163265815226e-10, "loss": 0.214935302734375, "step": 14773 }, { "epoch": 0.9986481005813167, "grad_norm": 2.5389883518218994, "learning_rate": 1.6003713519063823e-10, "loss": 0.32965087890625, "step": 14774 }, { "epoch": 0.9987156955522509, "grad_norm": 1.216087818145752, "learning_rate": 1.4515842326312535e-10, "loss": 0.301513671875, "step": 14775 }, { "epoch": 0.998783290523185, "grad_norm": 2.1286885738372803, "learning_rate": 1.3100549759670343e-10, "loss": 0.34423828125, "step": 14776 }, { "epoch": 0.9988508854941193, "grad_norm": 0.993255078792572, "learning_rate": 1.1757835887415968e-10, "loss": 0.1847991943359375, "step": 14777 }, { "epoch": 0.9989184804650534, "grad_norm": 1.0246965885162354, "learning_rate": 1.0487700774663989e-10, "loss": 0.2393798828125, "step": 14778 }, { "epoch": 0.9989860754359876, "grad_norm": 1.0627450942993164, "learning_rate": 9.290144483031781e-11, "loss": 0.2376708984375, "step": 14779 }, { "epoch": 0.9990536704069217, "grad_norm": 1.4629020690917969, "learning_rate": 8.165167070139922e-11, "loss": 0.203399658203125, "step": 14780 }, { "epoch": 0.9991212653778558, "grad_norm": 1.0729265213012695, "learning_rate": 7.112768590611385e-11, "loss": 0.159332275390625, "step": 14781 }, { "epoch": 0.9991888603487901, "grad_norm": 1.2248142957687378, "learning_rate": 6.13294909523887e-11, "loss": 0.206390380859375, "step": 14782 }, { "epoch": 0.9992564553197242, "grad_norm": 1.0605491399765015, "learning_rate": 5.2257086316509495e-11, "loss": 0.2110748291015625, "step": 14783 }, { "epoch": 0.9993240502906584, "grad_norm": 1.6036510467529297, "learning_rate": 4.391047243479385e-11, "loss": 0.189178466796875, "step": 14784 }, { "epoch": 0.9993916452615925, "grad_norm": 2.1161680221557617, "learning_rate": 3.62896497135834e-11, "loss": 0.265167236328125, "step": 14785 }, { "epoch": 0.9994592402325267, "grad_norm": 2.022165536880493, "learning_rate": 2.939461851925174e-11, "loss": 0.31884765625, "step": 14786 }, { "epoch": 0.9995268352034609, "grad_norm": 1.1940237283706665, "learning_rate": 2.322537918819645e-11, "loss": 0.20404052734375, "step": 14787 }, { "epoch": 0.999594430174395, "grad_norm": 2.0920958518981934, "learning_rate": 1.778193201684708e-11, "loss": 0.310150146484375, "step": 14788 }, { "epoch": 0.9996620251453292, "grad_norm": 0.9820833206176758, "learning_rate": 1.3064277268326486e-11, "loss": 0.1751861572265625, "step": 14789 }, { "epoch": 0.9997296201162633, "grad_norm": 2.185681104660034, "learning_rate": 9.072415172450831e-12, "loss": 0.27001953125, "step": 14790 }, { "epoch": 0.9997972150871975, "grad_norm": 1.3038108348846436, "learning_rate": 5.806345920733591e-12, "loss": 0.1719512939453125, "step": 14791 }, { "epoch": 0.9998648100581317, "grad_norm": 1.086390495300293, "learning_rate": 3.2660696730468786e-12, "loss": 0.222442626953125, "step": 14792 }, { "epoch": 0.9999324050290659, "grad_norm": 1.5684800148010254, "learning_rate": 1.4515865509601157e-12, "loss": 0.213287353515625, "step": 14793 }, { "epoch": 1.0, "grad_norm": 2.246612310409546, "learning_rate": 3.628966427360325e-13, "loss": 0.3192138671875, "step": 14794 }, { "epoch": 1.0, "step": 14794, "total_flos": 7.96498074587659e+19, "train_loss": 0.2497988196181271, "train_runtime": 108384.128, "train_samples_per_second": 17.471, "train_steps_per_second": 0.136 } ], "logging_steps": 1.0, "max_steps": 14794, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.96498074587659e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }