{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7397, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013519336876526207, "grad_norm": 88.03363800048828, "learning_rate": 0.0, "loss": 0.767974853515625, "step": 1 }, { "epoch": 0.00027038673753052413, "grad_norm": 89.86097717285156, "learning_rate": 5.859375e-08, "loss": 0.769378662109375, "step": 2 }, { "epoch": 0.0004055801062957862, "grad_norm": 91.50601196289062, "learning_rate": 1.171875e-07, "loss": 0.768035888671875, "step": 3 }, { "epoch": 0.0005407734750610483, "grad_norm": 84.51274871826172, "learning_rate": 1.7578125e-07, "loss": 0.7620849609375, "step": 4 }, { "epoch": 0.0006759668438263103, "grad_norm": 91.75151824951172, "learning_rate": 2.34375e-07, "loss": 0.767547607421875, "step": 5 }, { "epoch": 0.0008111602125915724, "grad_norm": 88.40542602539062, "learning_rate": 2.9296875000000003e-07, "loss": 0.76806640625, "step": 6 }, { "epoch": 0.0009463535813568345, "grad_norm": 88.53487396240234, "learning_rate": 3.515625e-07, "loss": 0.767822265625, "step": 7 }, { "epoch": 0.0010815469501220965, "grad_norm": 89.52659606933594, "learning_rate": 4.1015625e-07, "loss": 0.765960693359375, "step": 8 }, { "epoch": 0.0012167403188873586, "grad_norm": 91.50320434570312, "learning_rate": 4.6875e-07, "loss": 0.76519775390625, "step": 9 }, { "epoch": 0.0013519336876526207, "grad_norm": 88.8525619506836, "learning_rate": 5.2734375e-07, "loss": 0.76153564453125, "step": 10 }, { "epoch": 0.0014871270564178827, "grad_norm": 89.35160827636719, "learning_rate": 5.859375000000001e-07, "loss": 0.76251220703125, "step": 11 }, { "epoch": 0.0016223204251831448, "grad_norm": 89.51719665527344, "learning_rate": 6.4453125e-07, "loss": 0.76220703125, "step": 12 }, { "epoch": 0.0017575137939484068, "grad_norm": 91.03227996826172, "learning_rate": 7.03125e-07, "loss": 0.75811767578125, "step": 13 }, { "epoch": 0.001892707162713669, "grad_norm": 91.70550537109375, "learning_rate": 7.6171875e-07, "loss": 0.75830078125, "step": 14 }, { "epoch": 0.0020279005314789308, "grad_norm": 91.33809661865234, "learning_rate": 8.203125e-07, "loss": 0.75732421875, "step": 15 }, { "epoch": 0.002163093900244193, "grad_norm": 89.25840759277344, "learning_rate": 8.7890625e-07, "loss": 0.749755859375, "step": 16 }, { "epoch": 0.002298287269009455, "grad_norm": 90.64176177978516, "learning_rate": 9.375e-07, "loss": 0.748870849609375, "step": 17 }, { "epoch": 0.002433480637774717, "grad_norm": 85.99710083007812, "learning_rate": 9.9609375e-07, "loss": 0.738128662109375, "step": 18 }, { "epoch": 0.002568674006539979, "grad_norm": 90.21903991699219, "learning_rate": 1.0546875e-06, "loss": 0.736785888671875, "step": 19 }, { "epoch": 0.0027038673753052413, "grad_norm": 85.76763916015625, "learning_rate": 1.11328125e-06, "loss": 0.7325439453125, "step": 20 }, { "epoch": 0.002839060744070503, "grad_norm": 86.10233306884766, "learning_rate": 1.1718750000000001e-06, "loss": 0.731903076171875, "step": 21 }, { "epoch": 0.0029742541128357654, "grad_norm": 82.16548919677734, "learning_rate": 1.23046875e-06, "loss": 0.70294189453125, "step": 22 }, { "epoch": 0.0031094474816010273, "grad_norm": 84.72628021240234, "learning_rate": 1.2890625e-06, "loss": 0.70013427734375, "step": 23 }, { "epoch": 0.0032446408503662896, "grad_norm": 83.80533599853516, "learning_rate": 1.34765625e-06, "loss": 0.698394775390625, "step": 24 }, { "epoch": 0.0033798342191315514, "grad_norm": 81.4764175415039, "learning_rate": 1.40625e-06, "loss": 0.670745849609375, "step": 25 }, { "epoch": 0.0035150275878968137, "grad_norm": 79.31185913085938, "learning_rate": 1.46484375e-06, "loss": 0.666015625, "step": 26 }, { "epoch": 0.0036502209566620755, "grad_norm": 79.19160461425781, "learning_rate": 1.5234375e-06, "loss": 0.660888671875, "step": 27 }, { "epoch": 0.003785414325427338, "grad_norm": 76.50727844238281, "learning_rate": 1.5820312500000001e-06, "loss": 0.660675048828125, "step": 28 }, { "epoch": 0.0039206076941926, "grad_norm": 79.90402221679688, "learning_rate": 1.640625e-06, "loss": 0.648712158203125, "step": 29 }, { "epoch": 0.0040558010629578615, "grad_norm": 67.9672622680664, "learning_rate": 1.69921875e-06, "loss": 0.5775146484375, "step": 30 }, { "epoch": 0.004190994431723124, "grad_norm": 68.07891082763672, "learning_rate": 1.7578125e-06, "loss": 0.565826416015625, "step": 31 }, { "epoch": 0.004326187800488386, "grad_norm": 69.5920639038086, "learning_rate": 1.81640625e-06, "loss": 0.560821533203125, "step": 32 }, { "epoch": 0.004461381169253648, "grad_norm": 70.55146789550781, "learning_rate": 1.875e-06, "loss": 0.5455322265625, "step": 33 }, { "epoch": 0.00459657453801891, "grad_norm": 62.89144515991211, "learning_rate": 1.93359375e-06, "loss": 0.5310516357421875, "step": 34 }, { "epoch": 0.0047317679067841725, "grad_norm": 62.215370178222656, "learning_rate": 1.9921875e-06, "loss": 0.5177001953125, "step": 35 }, { "epoch": 0.004866961275549434, "grad_norm": 60.91170883178711, "learning_rate": 2.05078125e-06, "loss": 0.5041046142578125, "step": 36 }, { "epoch": 0.005002154644314696, "grad_norm": 62.39719772338867, "learning_rate": 2.109375e-06, "loss": 0.4893798828125, "step": 37 }, { "epoch": 0.005137348013079958, "grad_norm": 60.21321487426758, "learning_rate": 2.16796875e-06, "loss": 0.4813232421875, "step": 38 }, { "epoch": 0.005272541381845221, "grad_norm": 52.77524185180664, "learning_rate": 2.2265625e-06, "loss": 0.470611572265625, "step": 39 }, { "epoch": 0.005407734750610483, "grad_norm": 45.11795425415039, "learning_rate": 2.28515625e-06, "loss": 0.432891845703125, "step": 40 }, { "epoch": 0.0055429281193757445, "grad_norm": 40.704158782958984, "learning_rate": 2.3437500000000002e-06, "loss": 0.3948516845703125, "step": 41 }, { "epoch": 0.005678121488141006, "grad_norm": 36.75060272216797, "learning_rate": 2.40234375e-06, "loss": 0.3881683349609375, "step": 42 }, { "epoch": 0.005813314856906269, "grad_norm": 32.00703811645508, "learning_rate": 2.4609375e-06, "loss": 0.402679443359375, "step": 43 }, { "epoch": 0.005948508225671531, "grad_norm": 35.68880844116211, "learning_rate": 2.5195312500000003e-06, "loss": 0.3690643310546875, "step": 44 }, { "epoch": 0.006083701594436793, "grad_norm": 35.64313507080078, "learning_rate": 2.578125e-06, "loss": 0.356475830078125, "step": 45 }, { "epoch": 0.0062188949632020546, "grad_norm": 36.784759521484375, "learning_rate": 2.63671875e-06, "loss": 0.3323516845703125, "step": 46 }, { "epoch": 0.006354088331967317, "grad_norm": 30.713638305664062, "learning_rate": 2.6953125e-06, "loss": 0.3551788330078125, "step": 47 }, { "epoch": 0.006489281700732579, "grad_norm": 34.66780471801758, "learning_rate": 2.75390625e-06, "loss": 0.3091888427734375, "step": 48 }, { "epoch": 0.006624475069497841, "grad_norm": 30.918582916259766, "learning_rate": 2.8125e-06, "loss": 0.31719970703125, "step": 49 }, { "epoch": 0.006759668438263103, "grad_norm": 28.330442428588867, "learning_rate": 2.87109375e-06, "loss": 0.32224273681640625, "step": 50 }, { "epoch": 0.0068948618070283655, "grad_norm": 25.909151077270508, "learning_rate": 2.9296875e-06, "loss": 0.32096099853515625, "step": 51 }, { "epoch": 0.007030055175793627, "grad_norm": 24.142841339111328, "learning_rate": 2.9882812500000002e-06, "loss": 0.31163787841796875, "step": 52 }, { "epoch": 0.007165248544558889, "grad_norm": 23.957773208618164, "learning_rate": 3.046875e-06, "loss": 0.29045867919921875, "step": 53 }, { "epoch": 0.007300441913324151, "grad_norm": 22.4742488861084, "learning_rate": 3.10546875e-06, "loss": 0.281585693359375, "step": 54 }, { "epoch": 0.007435635282089414, "grad_norm": 18.76061248779297, "learning_rate": 3.1640625000000003e-06, "loss": 0.294525146484375, "step": 55 }, { "epoch": 0.007570828650854676, "grad_norm": 17.834041595458984, "learning_rate": 3.22265625e-06, "loss": 0.2843017578125, "step": 56 }, { "epoch": 0.0077060220196199375, "grad_norm": 18.969289779663086, "learning_rate": 3.28125e-06, "loss": 0.25803375244140625, "step": 57 }, { "epoch": 0.0078412153883852, "grad_norm": 16.735214233398438, "learning_rate": 3.3398437500000003e-06, "loss": 0.26306915283203125, "step": 58 }, { "epoch": 0.007976408757150461, "grad_norm": 14.971871376037598, "learning_rate": 3.3984375e-06, "loss": 0.26770782470703125, "step": 59 }, { "epoch": 0.008111602125915723, "grad_norm": 14.197041511535645, "learning_rate": 3.45703125e-06, "loss": 0.26336669921875, "step": 60 }, { "epoch": 0.008246795494680987, "grad_norm": 14.076885223388672, "learning_rate": 3.515625e-06, "loss": 0.2498626708984375, "step": 61 }, { "epoch": 0.008381988863446248, "grad_norm": 13.335953712463379, "learning_rate": 3.57421875e-06, "loss": 0.243988037109375, "step": 62 }, { "epoch": 0.00851718223221151, "grad_norm": 11.799309730529785, "learning_rate": 3.6328125e-06, "loss": 0.24451446533203125, "step": 63 }, { "epoch": 0.008652375600976772, "grad_norm": 12.843095779418945, "learning_rate": 3.69140625e-06, "loss": 0.21314239501953125, "step": 64 }, { "epoch": 0.008787568969742034, "grad_norm": 10.60861587524414, "learning_rate": 3.75e-06, "loss": 0.22778701782226562, "step": 65 }, { "epoch": 0.008922762338507296, "grad_norm": 4.623114585876465, "learning_rate": 3.8085937500000002e-06, "loss": 0.2890357971191406, "step": 66 }, { "epoch": 0.009057955707272558, "grad_norm": 7.7785725593566895, "learning_rate": 3.8671875e-06, "loss": 0.23848342895507812, "step": 67 }, { "epoch": 0.00919314907603782, "grad_norm": 7.840065956115723, "learning_rate": 3.92578125e-06, "loss": 0.22467803955078125, "step": 68 }, { "epoch": 0.009328342444803083, "grad_norm": 8.62641716003418, "learning_rate": 3.984375e-06, "loss": 0.20023727416992188, "step": 69 }, { "epoch": 0.009463535813568345, "grad_norm": 8.374451637268066, "learning_rate": 4.0429687500000004e-06, "loss": 0.19073486328125, "step": 70 }, { "epoch": 0.009598729182333607, "grad_norm": 7.400808811187744, "learning_rate": 4.1015625e-06, "loss": 0.190460205078125, "step": 71 }, { "epoch": 0.009733922551098869, "grad_norm": 6.118727207183838, "learning_rate": 4.16015625e-06, "loss": 0.1943359375, "step": 72 }, { "epoch": 0.00986911591986413, "grad_norm": 4.306054592132568, "learning_rate": 4.21875e-06, "loss": 0.21149826049804688, "step": 73 }, { "epoch": 0.010004309288629392, "grad_norm": 2.8444812297821045, "learning_rate": 4.27734375e-06, "loss": 0.22626876831054688, "step": 74 }, { "epoch": 0.010139502657394654, "grad_norm": 2.178194046020508, "learning_rate": 4.3359375e-06, "loss": 0.2353515625, "step": 75 }, { "epoch": 0.010274696026159916, "grad_norm": 6.895608425140381, "learning_rate": 4.3945312500000005e-06, "loss": 0.1833648681640625, "step": 76 }, { "epoch": 0.01040988939492518, "grad_norm": 3.906135082244873, "learning_rate": 4.453125e-06, "loss": 0.19829177856445312, "step": 77 }, { "epoch": 0.010545082763690442, "grad_norm": 3.4365999698638916, "learning_rate": 4.51171875e-06, "loss": 0.1984233856201172, "step": 78 }, { "epoch": 0.010680276132455703, "grad_norm": 8.071659088134766, "learning_rate": 4.5703125e-06, "loss": 0.14834213256835938, "step": 79 }, { "epoch": 0.010815469501220965, "grad_norm": 5.206031322479248, "learning_rate": 4.62890625e-06, "loss": 0.16190338134765625, "step": 80 }, { "epoch": 0.010950662869986227, "grad_norm": 9.365129470825195, "learning_rate": 4.6875000000000004e-06, "loss": 0.16129302978515625, "step": 81 }, { "epoch": 0.011085856238751489, "grad_norm": 15.743746757507324, "learning_rate": 4.74609375e-06, "loss": 0.18073081970214844, "step": 82 }, { "epoch": 0.01122104960751675, "grad_norm": 6.636305332183838, "learning_rate": 4.8046875e-06, "loss": 0.16434097290039062, "step": 83 }, { "epoch": 0.011356242976282013, "grad_norm": 26.415525436401367, "learning_rate": 4.86328125e-06, "loss": 0.21837615966796875, "step": 84 }, { "epoch": 0.011491436345047276, "grad_norm": 26.364681243896484, "learning_rate": 4.921875e-06, "loss": 0.2114429473876953, "step": 85 }, { "epoch": 0.011626629713812538, "grad_norm": 29.980968475341797, "learning_rate": 4.98046875e-06, "loss": 0.18030548095703125, "step": 86 }, { "epoch": 0.0117618230825778, "grad_norm": 33.83200454711914, "learning_rate": 5.0390625000000005e-06, "loss": 0.19058799743652344, "step": 87 }, { "epoch": 0.011897016451343062, "grad_norm": 8.606954574584961, "learning_rate": 5.09765625e-06, "loss": 0.17997169494628906, "step": 88 }, { "epoch": 0.012032209820108324, "grad_norm": 4.509511947631836, "learning_rate": 5.15625e-06, "loss": 0.1287860870361328, "step": 89 }, { "epoch": 0.012167403188873585, "grad_norm": 5.7349162101745605, "learning_rate": 5.21484375e-06, "loss": 0.1718158721923828, "step": 90 }, { "epoch": 0.012302596557638847, "grad_norm": 23.79114532470703, "learning_rate": 5.2734375e-06, "loss": 0.1282062530517578, "step": 91 }, { "epoch": 0.012437789926404109, "grad_norm": 6.960205554962158, "learning_rate": 5.3320312500000004e-06, "loss": 0.1727924346923828, "step": 92 }, { "epoch": 0.012572983295169373, "grad_norm": 5.630927085876465, "learning_rate": 5.390625e-06, "loss": 0.1682109832763672, "step": 93 }, { "epoch": 0.012708176663934635, "grad_norm": 33.38479232788086, "learning_rate": 5.44921875e-06, "loss": 0.1725749969482422, "step": 94 }, { "epoch": 0.012843370032699896, "grad_norm": 25.859128952026367, "learning_rate": 5.5078125e-06, "loss": 0.14342308044433594, "step": 95 }, { "epoch": 0.012978563401465158, "grad_norm": 17.265249252319336, "learning_rate": 5.56640625e-06, "loss": 0.1385517120361328, "step": 96 }, { "epoch": 0.01311375677023042, "grad_norm": 13.762451171875, "learning_rate": 5.625e-06, "loss": 0.19484519958496094, "step": 97 }, { "epoch": 0.013248950138995682, "grad_norm": 11.274735450744629, "learning_rate": 5.6835937500000005e-06, "loss": 0.13864517211914062, "step": 98 }, { "epoch": 0.013384143507760944, "grad_norm": 30.696157455444336, "learning_rate": 5.7421875e-06, "loss": 0.1842355728149414, "step": 99 }, { "epoch": 0.013519336876526206, "grad_norm": 42.534507751464844, "learning_rate": 5.80078125e-06, "loss": 0.15462398529052734, "step": 100 }, { "epoch": 0.01365453024529147, "grad_norm": 10.378792762756348, "learning_rate": 5.859375e-06, "loss": 0.11968231201171875, "step": 101 }, { "epoch": 0.013789723614056731, "grad_norm": 5.789700031280518, "learning_rate": 5.91796875e-06, "loss": 0.18852615356445312, "step": 102 }, { "epoch": 0.013924916982821993, "grad_norm": 35.43119430541992, "learning_rate": 5.9765625000000004e-06, "loss": 0.1937999725341797, "step": 103 }, { "epoch": 0.014060110351587255, "grad_norm": 14.727746963500977, "learning_rate": 6.03515625e-06, "loss": 0.16605758666992188, "step": 104 }, { "epoch": 0.014195303720352517, "grad_norm": 32.22308349609375, "learning_rate": 6.09375e-06, "loss": 0.14065933227539062, "step": 105 }, { "epoch": 0.014330497089117778, "grad_norm": 27.742570877075195, "learning_rate": 6.15234375e-06, "loss": 0.1688098907470703, "step": 106 }, { "epoch": 0.01446569045788304, "grad_norm": 6.195815563201904, "learning_rate": 6.2109375e-06, "loss": 0.11390304565429688, "step": 107 }, { "epoch": 0.014600883826648302, "grad_norm": 6.322120666503906, "learning_rate": 6.26953125e-06, "loss": 0.147125244140625, "step": 108 }, { "epoch": 0.014736077195413566, "grad_norm": 24.588651657104492, "learning_rate": 6.3281250000000005e-06, "loss": 0.20888996124267578, "step": 109 }, { "epoch": 0.014871270564178828, "grad_norm": 21.922161102294922, "learning_rate": 6.38671875e-06, "loss": 0.1313915252685547, "step": 110 }, { "epoch": 0.01500646393294409, "grad_norm": 14.735121726989746, "learning_rate": 6.4453125e-06, "loss": 0.12705707550048828, "step": 111 }, { "epoch": 0.015141657301709351, "grad_norm": 17.854413986206055, "learning_rate": 6.50390625e-06, "loss": 0.12715816497802734, "step": 112 }, { "epoch": 0.015276850670474613, "grad_norm": 15.228596687316895, "learning_rate": 6.5625e-06, "loss": 0.12988948822021484, "step": 113 }, { "epoch": 0.015412044039239875, "grad_norm": 11.644367218017578, "learning_rate": 6.6210937500000004e-06, "loss": 0.160980224609375, "step": 114 }, { "epoch": 0.015547237408005137, "grad_norm": 38.006412506103516, "learning_rate": 6.679687500000001e-06, "loss": 0.16860294342041016, "step": 115 }, { "epoch": 0.0156824307767704, "grad_norm": 18.42274284362793, "learning_rate": 6.73828125e-06, "loss": 0.10754013061523438, "step": 116 }, { "epoch": 0.015817624145535662, "grad_norm": 5.729445934295654, "learning_rate": 6.796875e-06, "loss": 0.13247394561767578, "step": 117 }, { "epoch": 0.015952817514300922, "grad_norm": 8.216533660888672, "learning_rate": 6.85546875e-06, "loss": 0.13490581512451172, "step": 118 }, { "epoch": 0.016088010883066186, "grad_norm": 28.392295837402344, "learning_rate": 6.9140625e-06, "loss": 0.13926315307617188, "step": 119 }, { "epoch": 0.016223204251831446, "grad_norm": 13.207259178161621, "learning_rate": 6.9726562500000005e-06, "loss": 0.12112712860107422, "step": 120 }, { "epoch": 0.01635839762059671, "grad_norm": 14.374978065490723, "learning_rate": 7.03125e-06, "loss": 0.1497325897216797, "step": 121 }, { "epoch": 0.016493590989361973, "grad_norm": 14.391389846801758, "learning_rate": 7.08984375e-06, "loss": 0.10625457763671875, "step": 122 }, { "epoch": 0.016628784358127233, "grad_norm": 9.666500091552734, "learning_rate": 7.1484375e-06, "loss": 0.12035751342773438, "step": 123 }, { "epoch": 0.016763977726892497, "grad_norm": 9.767655372619629, "learning_rate": 7.20703125e-06, "loss": 0.11335468292236328, "step": 124 }, { "epoch": 0.016899171095657757, "grad_norm": 19.927940368652344, "learning_rate": 7.265625e-06, "loss": 0.16068744659423828, "step": 125 }, { "epoch": 0.01703436446442302, "grad_norm": 24.16889190673828, "learning_rate": 7.3242187500000006e-06, "loss": 0.13986587524414062, "step": 126 }, { "epoch": 0.01716955783318828, "grad_norm": 15.296323776245117, "learning_rate": 7.3828125e-06, "loss": 0.1767120361328125, "step": 127 }, { "epoch": 0.017304751201953544, "grad_norm": 9.244256973266602, "learning_rate": 7.44140625e-06, "loss": 0.1714944839477539, "step": 128 }, { "epoch": 0.017439944570718808, "grad_norm": 18.481584548950195, "learning_rate": 7.5e-06, "loss": 0.13541030883789062, "step": 129 }, { "epoch": 0.017575137939484068, "grad_norm": 20.11408233642578, "learning_rate": 7.55859375e-06, "loss": 0.16762447357177734, "step": 130 }, { "epoch": 0.01771033130824933, "grad_norm": 12.163348197937012, "learning_rate": 7.6171875000000005e-06, "loss": 0.13053226470947266, "step": 131 }, { "epoch": 0.01784552467701459, "grad_norm": 10.763473510742188, "learning_rate": 7.67578125e-06, "loss": 0.14937305450439453, "step": 132 }, { "epoch": 0.017980718045779855, "grad_norm": 18.54998207092285, "learning_rate": 7.734375e-06, "loss": 0.18165111541748047, "step": 133 }, { "epoch": 0.018115911414545115, "grad_norm": 14.987088203430176, "learning_rate": 7.792968750000001e-06, "loss": 0.11704730987548828, "step": 134 }, { "epoch": 0.01825110478331038, "grad_norm": 24.72374153137207, "learning_rate": 7.8515625e-06, "loss": 0.15272188186645508, "step": 135 }, { "epoch": 0.01838629815207564, "grad_norm": 15.230940818786621, "learning_rate": 7.91015625e-06, "loss": 0.12484550476074219, "step": 136 }, { "epoch": 0.018521491520840903, "grad_norm": 10.244939804077148, "learning_rate": 7.96875e-06, "loss": 0.1355447769165039, "step": 137 }, { "epoch": 0.018656684889606166, "grad_norm": 5.154263973236084, "learning_rate": 8.02734375e-06, "loss": 0.12298297882080078, "step": 138 }, { "epoch": 0.018791878258371426, "grad_norm": 12.942228317260742, "learning_rate": 8.085937500000001e-06, "loss": 0.126708984375, "step": 139 }, { "epoch": 0.01892707162713669, "grad_norm": 9.724465370178223, "learning_rate": 8.14453125e-06, "loss": 0.10453987121582031, "step": 140 }, { "epoch": 0.01906226499590195, "grad_norm": 19.185394287109375, "learning_rate": 8.203125e-06, "loss": 0.15420866012573242, "step": 141 }, { "epoch": 0.019197458364667214, "grad_norm": 21.297924041748047, "learning_rate": 8.26171875e-06, "loss": 0.1211385726928711, "step": 142 }, { "epoch": 0.019332651733432474, "grad_norm": 5.746158123016357, "learning_rate": 8.3203125e-06, "loss": 0.08761024475097656, "step": 143 }, { "epoch": 0.019467845102197737, "grad_norm": 40.37514114379883, "learning_rate": 8.37890625e-06, "loss": 0.19170904159545898, "step": 144 }, { "epoch": 0.019603038470963, "grad_norm": 41.8349609375, "learning_rate": 8.4375e-06, "loss": 0.17560100555419922, "step": 145 }, { "epoch": 0.01973823183972826, "grad_norm": 15.578014373779297, "learning_rate": 8.49609375e-06, "loss": 0.14916515350341797, "step": 146 }, { "epoch": 0.019873425208493525, "grad_norm": 13.305092811584473, "learning_rate": 8.5546875e-06, "loss": 0.12070083618164062, "step": 147 }, { "epoch": 0.020008618577258785, "grad_norm": 16.86214256286621, "learning_rate": 8.61328125e-06, "loss": 0.1086874008178711, "step": 148 }, { "epoch": 0.02014381194602405, "grad_norm": 7.134828090667725, "learning_rate": 8.671875e-06, "loss": 0.13480234146118164, "step": 149 }, { "epoch": 0.02027900531478931, "grad_norm": 21.424076080322266, "learning_rate": 8.73046875e-06, "loss": 0.18094730377197266, "step": 150 }, { "epoch": 0.020414198683554572, "grad_norm": 13.057845115661621, "learning_rate": 8.789062500000001e-06, "loss": 0.12444591522216797, "step": 151 }, { "epoch": 0.020549392052319832, "grad_norm": 4.724929332733154, "learning_rate": 8.84765625e-06, "loss": 0.1180415153503418, "step": 152 }, { "epoch": 0.020684585421085096, "grad_norm": 20.824792861938477, "learning_rate": 8.90625e-06, "loss": 0.17496871948242188, "step": 153 }, { "epoch": 0.02081977878985036, "grad_norm": 25.18788719177246, "learning_rate": 8.96484375e-06, "loss": 0.1593494415283203, "step": 154 }, { "epoch": 0.02095497215861562, "grad_norm": 17.29711151123047, "learning_rate": 9.0234375e-06, "loss": 0.13325977325439453, "step": 155 }, { "epoch": 0.021090165527380883, "grad_norm": 31.869096755981445, "learning_rate": 9.082031250000001e-06, "loss": 0.1534104347229004, "step": 156 }, { "epoch": 0.021225358896146143, "grad_norm": 35.00883865356445, "learning_rate": 9.140625e-06, "loss": 0.1826152801513672, "step": 157 }, { "epoch": 0.021360552264911407, "grad_norm": 19.997268676757812, "learning_rate": 9.19921875e-06, "loss": 0.14528989791870117, "step": 158 }, { "epoch": 0.021495745633676667, "grad_norm": 8.106069564819336, "learning_rate": 9.2578125e-06, "loss": 0.13573265075683594, "step": 159 }, { "epoch": 0.02163093900244193, "grad_norm": 11.43966293334961, "learning_rate": 9.31640625e-06, "loss": 0.13237857818603516, "step": 160 }, { "epoch": 0.021766132371207194, "grad_norm": 2.361283302307129, "learning_rate": 9.375000000000001e-06, "loss": 0.1256718635559082, "step": 161 }, { "epoch": 0.021901325739972454, "grad_norm": 13.342789649963379, "learning_rate": 9.43359375e-06, "loss": 0.14096593856811523, "step": 162 }, { "epoch": 0.022036519108737718, "grad_norm": 8.534233093261719, "learning_rate": 9.4921875e-06, "loss": 0.12235212326049805, "step": 163 }, { "epoch": 0.022171712477502978, "grad_norm": 3.0962111949920654, "learning_rate": 9.55078125e-06, "loss": 0.13095426559448242, "step": 164 }, { "epoch": 0.02230690584626824, "grad_norm": 9.89217758178711, "learning_rate": 9.609375e-06, "loss": 0.08809947967529297, "step": 165 }, { "epoch": 0.0224420992150335, "grad_norm": 5.463018417358398, "learning_rate": 9.66796875e-06, "loss": 0.10288095474243164, "step": 166 }, { "epoch": 0.022577292583798765, "grad_norm": 8.819249153137207, "learning_rate": 9.7265625e-06, "loss": 0.13170766830444336, "step": 167 }, { "epoch": 0.022712485952564025, "grad_norm": 8.91738224029541, "learning_rate": 9.78515625e-06, "loss": 0.11728525161743164, "step": 168 }, { "epoch": 0.02284767932132929, "grad_norm": 3.5539848804473877, "learning_rate": 9.84375e-06, "loss": 0.11752843856811523, "step": 169 }, { "epoch": 0.022982872690094552, "grad_norm": 5.445803642272949, "learning_rate": 9.90234375e-06, "loss": 0.14904022216796875, "step": 170 }, { "epoch": 0.023118066058859812, "grad_norm": 5.846347332000732, "learning_rate": 9.9609375e-06, "loss": 0.09321165084838867, "step": 171 }, { "epoch": 0.023253259427625076, "grad_norm": 2.9143776893615723, "learning_rate": 1.001953125e-05, "loss": 0.11193323135375977, "step": 172 }, { "epoch": 0.023388452796390336, "grad_norm": 1.7001529932022095, "learning_rate": 1.0078125000000001e-05, "loss": 0.08716917037963867, "step": 173 }, { "epoch": 0.0235236461651556, "grad_norm": 4.321895122528076, "learning_rate": 1.013671875e-05, "loss": 0.1195530891418457, "step": 174 }, { "epoch": 0.02365883953392086, "grad_norm": 6.5774970054626465, "learning_rate": 1.01953125e-05, "loss": 0.13956212997436523, "step": 175 }, { "epoch": 0.023794032902686123, "grad_norm": 4.023632049560547, "learning_rate": 1.025390625e-05, "loss": 0.10661029815673828, "step": 176 }, { "epoch": 0.023929226271451387, "grad_norm": 4.219208240509033, "learning_rate": 1.03125e-05, "loss": 0.12139606475830078, "step": 177 }, { "epoch": 0.024064419640216647, "grad_norm": 2.6929924488067627, "learning_rate": 1.0371093750000001e-05, "loss": 0.11550712585449219, "step": 178 }, { "epoch": 0.02419961300898191, "grad_norm": 7.381962299346924, "learning_rate": 1.04296875e-05, "loss": 0.12038326263427734, "step": 179 }, { "epoch": 0.02433480637774717, "grad_norm": 2.777801752090454, "learning_rate": 1.048828125e-05, "loss": 0.10917186737060547, "step": 180 }, { "epoch": 0.024469999746512434, "grad_norm": 12.518394470214844, "learning_rate": 1.0546875e-05, "loss": 0.09705829620361328, "step": 181 }, { "epoch": 0.024605193115277695, "grad_norm": 11.730567932128906, "learning_rate": 1.060546875e-05, "loss": 0.11596536636352539, "step": 182 }, { "epoch": 0.024740386484042958, "grad_norm": 9.598226547241211, "learning_rate": 1.0664062500000001e-05, "loss": 0.15691423416137695, "step": 183 }, { "epoch": 0.024875579852808218, "grad_norm": 8.342377662658691, "learning_rate": 1.072265625e-05, "loss": 0.1196894645690918, "step": 184 }, { "epoch": 0.025010773221573482, "grad_norm": 8.918068885803223, "learning_rate": 1.078125e-05, "loss": 0.12574005126953125, "step": 185 }, { "epoch": 0.025145966590338745, "grad_norm": 3.005324363708496, "learning_rate": 1.083984375e-05, "loss": 0.12017583847045898, "step": 186 }, { "epoch": 0.025281159959104006, "grad_norm": 10.396531105041504, "learning_rate": 1.08984375e-05, "loss": 0.12627720832824707, "step": 187 }, { "epoch": 0.02541635332786927, "grad_norm": 23.966936111450195, "learning_rate": 1.095703125e-05, "loss": 0.169081449508667, "step": 188 }, { "epoch": 0.02555154669663453, "grad_norm": 14.574640274047852, "learning_rate": 1.1015625e-05, "loss": 0.12852835655212402, "step": 189 }, { "epoch": 0.025686740065399793, "grad_norm": 1.5561856031417847, "learning_rate": 1.1074218750000001e-05, "loss": 0.1138162612915039, "step": 190 }, { "epoch": 0.025821933434165053, "grad_norm": 12.832440376281738, "learning_rate": 1.11328125e-05, "loss": 0.11830949783325195, "step": 191 }, { "epoch": 0.025957126802930316, "grad_norm": 8.318037033081055, "learning_rate": 1.119140625e-05, "loss": 0.08885073661804199, "step": 192 }, { "epoch": 0.026092320171695577, "grad_norm": 2.1923887729644775, "learning_rate": 1.125e-05, "loss": 0.11017823219299316, "step": 193 }, { "epoch": 0.02622751354046084, "grad_norm": 19.12151336669922, "learning_rate": 1.130859375e-05, "loss": 0.17975258827209473, "step": 194 }, { "epoch": 0.026362706909226104, "grad_norm": 13.106063842773438, "learning_rate": 1.1367187500000001e-05, "loss": 0.12197279930114746, "step": 195 }, { "epoch": 0.026497900277991364, "grad_norm": 11.476738929748535, "learning_rate": 1.142578125e-05, "loss": 0.1254715919494629, "step": 196 }, { "epoch": 0.026633093646756627, "grad_norm": 2.7868235111236572, "learning_rate": 1.1484375e-05, "loss": 0.0923452377319336, "step": 197 }, { "epoch": 0.026768287015521888, "grad_norm": 4.950589179992676, "learning_rate": 1.154296875e-05, "loss": 0.1008601188659668, "step": 198 }, { "epoch": 0.02690348038428715, "grad_norm": 3.5299816131591797, "learning_rate": 1.16015625e-05, "loss": 0.1331310272216797, "step": 199 }, { "epoch": 0.02703867375305241, "grad_norm": 4.6229166984558105, "learning_rate": 1.1660156250000001e-05, "loss": 0.11776399612426758, "step": 200 }, { "epoch": 0.027173867121817675, "grad_norm": 5.123754978179932, "learning_rate": 1.171875e-05, "loss": 0.11093664169311523, "step": 201 }, { "epoch": 0.02730906049058294, "grad_norm": 1.3746099472045898, "learning_rate": 1.177734375e-05, "loss": 0.10559463500976562, "step": 202 }, { "epoch": 0.0274442538593482, "grad_norm": 5.895517349243164, "learning_rate": 1.18359375e-05, "loss": 0.1263742446899414, "step": 203 }, { "epoch": 0.027579447228113462, "grad_norm": 6.1869378089904785, "learning_rate": 1.189453125e-05, "loss": 0.1250779628753662, "step": 204 }, { "epoch": 0.027714640596878722, "grad_norm": 1.8732315301895142, "learning_rate": 1.1953125000000001e-05, "loss": 0.09824371337890625, "step": 205 }, { "epoch": 0.027849833965643986, "grad_norm": 3.040196657180786, "learning_rate": 1.201171875e-05, "loss": 0.13049840927124023, "step": 206 }, { "epoch": 0.027985027334409246, "grad_norm": 5.667483329772949, "learning_rate": 1.20703125e-05, "loss": 0.1139378547668457, "step": 207 }, { "epoch": 0.02812022070317451, "grad_norm": 1.674835205078125, "learning_rate": 1.212890625e-05, "loss": 0.08015131950378418, "step": 208 }, { "epoch": 0.02825541407193977, "grad_norm": 3.9440863132476807, "learning_rate": 1.21875e-05, "loss": 0.09508085250854492, "step": 209 }, { "epoch": 0.028390607440705033, "grad_norm": 2.1189498901367188, "learning_rate": 1.224609375e-05, "loss": 0.11002635955810547, "step": 210 }, { "epoch": 0.028525800809470297, "grad_norm": 2.2205352783203125, "learning_rate": 1.23046875e-05, "loss": 0.09447610378265381, "step": 211 }, { "epoch": 0.028660994178235557, "grad_norm": 9.254800796508789, "learning_rate": 1.2363281250000001e-05, "loss": 0.1143198013305664, "step": 212 }, { "epoch": 0.02879618754700082, "grad_norm": 6.819960117340088, "learning_rate": 1.2421875e-05, "loss": 0.10503458976745605, "step": 213 }, { "epoch": 0.02893138091576608, "grad_norm": 12.390182495117188, "learning_rate": 1.248046875e-05, "loss": 0.13003826141357422, "step": 214 }, { "epoch": 0.029066574284531344, "grad_norm": 2.660463333129883, "learning_rate": 1.25390625e-05, "loss": 0.0995473861694336, "step": 215 }, { "epoch": 0.029201767653296604, "grad_norm": 2.5199267864227295, "learning_rate": 1.259765625e-05, "loss": 0.09882307052612305, "step": 216 }, { "epoch": 0.029336961022061868, "grad_norm": 3.099677085876465, "learning_rate": 1.2656250000000001e-05, "loss": 0.1080394983291626, "step": 217 }, { "epoch": 0.02947215439082713, "grad_norm": 10.921390533447266, "learning_rate": 1.271484375e-05, "loss": 0.1387948989868164, "step": 218 }, { "epoch": 0.02960734775959239, "grad_norm": 1.8139811754226685, "learning_rate": 1.27734375e-05, "loss": 0.08462834358215332, "step": 219 }, { "epoch": 0.029742541128357655, "grad_norm": 2.6657485961914062, "learning_rate": 1.283203125e-05, "loss": 0.11264526844024658, "step": 220 }, { "epoch": 0.029877734497122915, "grad_norm": 8.381332397460938, "learning_rate": 1.2890625e-05, "loss": 0.12684082984924316, "step": 221 }, { "epoch": 0.03001292786588818, "grad_norm": 3.141138792037964, "learning_rate": 1.2949218750000001e-05, "loss": 0.1339268684387207, "step": 222 }, { "epoch": 0.03014812123465344, "grad_norm": 8.974042892456055, "learning_rate": 1.30078125e-05, "loss": 0.12345361709594727, "step": 223 }, { "epoch": 0.030283314603418703, "grad_norm": 5.3078436851501465, "learning_rate": 1.306640625e-05, "loss": 0.09230971336364746, "step": 224 }, { "epoch": 0.030418507972183963, "grad_norm": 1.307445764541626, "learning_rate": 1.3125e-05, "loss": 0.08745694160461426, "step": 225 }, { "epoch": 0.030553701340949226, "grad_norm": 4.963911056518555, "learning_rate": 1.318359375e-05, "loss": 0.09627199172973633, "step": 226 }, { "epoch": 0.03068889470971449, "grad_norm": 3.65700364112854, "learning_rate": 1.3242187500000001e-05, "loss": 0.10228323936462402, "step": 227 }, { "epoch": 0.03082408807847975, "grad_norm": 3.8001017570495605, "learning_rate": 1.330078125e-05, "loss": 0.12071537971496582, "step": 228 }, { "epoch": 0.030959281447245014, "grad_norm": 10.60185718536377, "learning_rate": 1.3359375000000001e-05, "loss": 0.10210299491882324, "step": 229 }, { "epoch": 0.031094474816010274, "grad_norm": 16.104564666748047, "learning_rate": 1.341796875e-05, "loss": 0.11900615692138672, "step": 230 }, { "epoch": 0.031229668184775537, "grad_norm": 8.709383010864258, "learning_rate": 1.34765625e-05, "loss": 0.15693950653076172, "step": 231 }, { "epoch": 0.0313648615535408, "grad_norm": 15.20444107055664, "learning_rate": 1.353515625e-05, "loss": 0.12135660648345947, "step": 232 }, { "epoch": 0.03150005492230606, "grad_norm": 14.495709419250488, "learning_rate": 1.359375e-05, "loss": 0.12809765338897705, "step": 233 }, { "epoch": 0.031635248291071325, "grad_norm": 1.579339861869812, "learning_rate": 1.3652343750000001e-05, "loss": 0.06402897834777832, "step": 234 }, { "epoch": 0.031770441659836585, "grad_norm": 4.174615383148193, "learning_rate": 1.37109375e-05, "loss": 0.09828853607177734, "step": 235 }, { "epoch": 0.031905635028601845, "grad_norm": 4.023648262023926, "learning_rate": 1.376953125e-05, "loss": 0.12398958206176758, "step": 236 }, { "epoch": 0.03204082839736711, "grad_norm": 2.566143035888672, "learning_rate": 1.3828125e-05, "loss": 0.09736800193786621, "step": 237 }, { "epoch": 0.03217602176613237, "grad_norm": 3.476241111755371, "learning_rate": 1.388671875e-05, "loss": 0.11614418029785156, "step": 238 }, { "epoch": 0.03231121513489763, "grad_norm": 2.4547955989837646, "learning_rate": 1.3945312500000001e-05, "loss": 0.10267853736877441, "step": 239 }, { "epoch": 0.03244640850366289, "grad_norm": 2.6375529766082764, "learning_rate": 1.400390625e-05, "loss": 0.08126914501190186, "step": 240 }, { "epoch": 0.03258160187242816, "grad_norm": 2.8025100231170654, "learning_rate": 1.40625e-05, "loss": 0.07041752338409424, "step": 241 }, { "epoch": 0.03271679524119342, "grad_norm": 3.087529182434082, "learning_rate": 1.412109375e-05, "loss": 0.08452367782592773, "step": 242 }, { "epoch": 0.03285198860995868, "grad_norm": 2.6189351081848145, "learning_rate": 1.41796875e-05, "loss": 0.10260534286499023, "step": 243 }, { "epoch": 0.032987181978723946, "grad_norm": 5.7942399978637695, "learning_rate": 1.4238281250000001e-05, "loss": 0.1002422571182251, "step": 244 }, { "epoch": 0.03312237534748921, "grad_norm": 2.233701705932617, "learning_rate": 1.4296875e-05, "loss": 0.1160893440246582, "step": 245 }, { "epoch": 0.03325756871625447, "grad_norm": 1.7791078090667725, "learning_rate": 1.435546875e-05, "loss": 0.14070630073547363, "step": 246 }, { "epoch": 0.03339276208501973, "grad_norm": 4.403859615325928, "learning_rate": 1.44140625e-05, "loss": 0.1036067008972168, "step": 247 }, { "epoch": 0.033527955453784994, "grad_norm": 4.750607967376709, "learning_rate": 1.447265625e-05, "loss": 0.10971546173095703, "step": 248 }, { "epoch": 0.033663148822550254, "grad_norm": 2.128018856048584, "learning_rate": 1.453125e-05, "loss": 0.0850905179977417, "step": 249 }, { "epoch": 0.033798342191315514, "grad_norm": 3.6158080101013184, "learning_rate": 1.458984375e-05, "loss": 0.0932382345199585, "step": 250 }, { "epoch": 0.03393353556008078, "grad_norm": 2.7658867835998535, "learning_rate": 1.4648437500000001e-05, "loss": 0.07552975416183472, "step": 251 }, { "epoch": 0.03406872892884604, "grad_norm": 4.574974060058594, "learning_rate": 1.470703125e-05, "loss": 0.0757284164428711, "step": 252 }, { "epoch": 0.0342039222976113, "grad_norm": 5.151226997375488, "learning_rate": 1.4765625e-05, "loss": 0.0752873420715332, "step": 253 }, { "epoch": 0.03433911566637656, "grad_norm": 2.2354824542999268, "learning_rate": 1.482421875e-05, "loss": 0.10274958610534668, "step": 254 }, { "epoch": 0.03447430903514183, "grad_norm": 2.9898624420166016, "learning_rate": 1.48828125e-05, "loss": 0.09230160713195801, "step": 255 }, { "epoch": 0.03460950240390709, "grad_norm": 4.158736228942871, "learning_rate": 1.4941406250000001e-05, "loss": 0.08805322647094727, "step": 256 }, { "epoch": 0.03474469577267235, "grad_norm": 1.8160712718963623, "learning_rate": 1.5e-05, "loss": 0.08407831192016602, "step": 257 }, { "epoch": 0.034879889141437616, "grad_norm": 11.49789047241211, "learning_rate": 1.505859375e-05, "loss": 0.11480516195297241, "step": 258 }, { "epoch": 0.035015082510202876, "grad_norm": 6.841657638549805, "learning_rate": 1.51171875e-05, "loss": 0.09477484226226807, "step": 259 }, { "epoch": 0.035150275878968136, "grad_norm": 2.060495615005493, "learning_rate": 1.517578125e-05, "loss": 0.07318687438964844, "step": 260 }, { "epoch": 0.035285469247733396, "grad_norm": 5.91054105758667, "learning_rate": 1.5234375000000001e-05, "loss": 0.12297433614730835, "step": 261 }, { "epoch": 0.03542066261649866, "grad_norm": 9.994206428527832, "learning_rate": 1.529296875e-05, "loss": 0.10230755805969238, "step": 262 }, { "epoch": 0.03555585598526392, "grad_norm": 9.000950813293457, "learning_rate": 1.53515625e-05, "loss": 0.10456538200378418, "step": 263 }, { "epoch": 0.03569104935402918, "grad_norm": 3.6928207874298096, "learning_rate": 1.541015625e-05, "loss": 0.09900754690170288, "step": 264 }, { "epoch": 0.035826242722794444, "grad_norm": 2.467583179473877, "learning_rate": 1.546875e-05, "loss": 0.08031642436981201, "step": 265 }, { "epoch": 0.03596143609155971, "grad_norm": 15.056185722351074, "learning_rate": 1.552734375e-05, "loss": 0.15174436569213867, "step": 266 }, { "epoch": 0.03609662946032497, "grad_norm": 8.540653228759766, "learning_rate": 1.5585937500000002e-05, "loss": 0.10565590858459473, "step": 267 }, { "epoch": 0.03623182282909023, "grad_norm": 4.716104507446289, "learning_rate": 1.564453125e-05, "loss": 0.09699249267578125, "step": 268 }, { "epoch": 0.0363670161978555, "grad_norm": 2.3965976238250732, "learning_rate": 1.5703125e-05, "loss": 0.11889266967773438, "step": 269 }, { "epoch": 0.03650220956662076, "grad_norm": 1.9796611070632935, "learning_rate": 1.576171875e-05, "loss": 0.09106159210205078, "step": 270 }, { "epoch": 0.03663740293538602, "grad_norm": 2.821047306060791, "learning_rate": 1.58203125e-05, "loss": 0.061782002449035645, "step": 271 }, { "epoch": 0.03677259630415128, "grad_norm": 4.7325310707092285, "learning_rate": 1.587890625e-05, "loss": 0.09113740921020508, "step": 272 }, { "epoch": 0.036907789672916545, "grad_norm": 2.668755054473877, "learning_rate": 1.59375e-05, "loss": 0.09154939651489258, "step": 273 }, { "epoch": 0.037042983041681805, "grad_norm": 5.259016513824463, "learning_rate": 1.599609375e-05, "loss": 0.11018514633178711, "step": 274 }, { "epoch": 0.037178176410447066, "grad_norm": 4.589200496673584, "learning_rate": 1.60546875e-05, "loss": 0.10451722145080566, "step": 275 }, { "epoch": 0.03731336977921233, "grad_norm": 2.119976758956909, "learning_rate": 1.611328125e-05, "loss": 0.07092821598052979, "step": 276 }, { "epoch": 0.03744856314797759, "grad_norm": 2.3516979217529297, "learning_rate": 1.6171875000000002e-05, "loss": 0.09616303443908691, "step": 277 }, { "epoch": 0.03758375651674285, "grad_norm": 1.3791124820709229, "learning_rate": 1.623046875e-05, "loss": 0.09056389331817627, "step": 278 }, { "epoch": 0.03771894988550811, "grad_norm": 8.076896667480469, "learning_rate": 1.62890625e-05, "loss": 0.11744379997253418, "step": 279 }, { "epoch": 0.03785414325427338, "grad_norm": 3.684563636779785, "learning_rate": 1.634765625e-05, "loss": 0.08807528018951416, "step": 280 }, { "epoch": 0.03798933662303864, "grad_norm": 4.626955509185791, "learning_rate": 1.640625e-05, "loss": 0.10694241523742676, "step": 281 }, { "epoch": 0.0381245299918039, "grad_norm": 7.342104911804199, "learning_rate": 1.646484375e-05, "loss": 0.09790802001953125, "step": 282 }, { "epoch": 0.03825972336056917, "grad_norm": 7.423117637634277, "learning_rate": 1.65234375e-05, "loss": 0.12955760955810547, "step": 283 }, { "epoch": 0.03839491672933443, "grad_norm": 2.925480604171753, "learning_rate": 1.6582031250000002e-05, "loss": 0.09709501266479492, "step": 284 }, { "epoch": 0.03853011009809969, "grad_norm": 9.6500825881958, "learning_rate": 1.6640625e-05, "loss": 0.09623748064041138, "step": 285 }, { "epoch": 0.03866530346686495, "grad_norm": 10.83087158203125, "learning_rate": 1.669921875e-05, "loss": 0.10638195276260376, "step": 286 }, { "epoch": 0.038800496835630215, "grad_norm": 5.118061065673828, "learning_rate": 1.67578125e-05, "loss": 0.0744776725769043, "step": 287 }, { "epoch": 0.038935690204395475, "grad_norm": 1.7427037954330444, "learning_rate": 1.681640625e-05, "loss": 0.09165191650390625, "step": 288 }, { "epoch": 0.039070883573160735, "grad_norm": 4.346134662628174, "learning_rate": 1.6875e-05, "loss": 0.08267402648925781, "step": 289 }, { "epoch": 0.039206076941926, "grad_norm": 2.6110188961029053, "learning_rate": 1.693359375e-05, "loss": 0.08368420600891113, "step": 290 }, { "epoch": 0.03934127031069126, "grad_norm": 3.2085206508636475, "learning_rate": 1.69921875e-05, "loss": 0.0966956615447998, "step": 291 }, { "epoch": 0.03947646367945652, "grad_norm": 2.1107547283172607, "learning_rate": 1.705078125e-05, "loss": 0.10750734806060791, "step": 292 }, { "epoch": 0.03961165704822178, "grad_norm": 1.9825631380081177, "learning_rate": 1.7109375e-05, "loss": 0.1081845760345459, "step": 293 }, { "epoch": 0.03974685041698705, "grad_norm": 2.3374953269958496, "learning_rate": 1.7167968750000002e-05, "loss": 0.10037648677825928, "step": 294 }, { "epoch": 0.03988204378575231, "grad_norm": 1.1485666036605835, "learning_rate": 1.72265625e-05, "loss": 0.06504380702972412, "step": 295 }, { "epoch": 0.04001723715451757, "grad_norm": 1.4012730121612549, "learning_rate": 1.728515625e-05, "loss": 0.10542726516723633, "step": 296 }, { "epoch": 0.04015243052328283, "grad_norm": 3.4263930320739746, "learning_rate": 1.734375e-05, "loss": 0.08764410018920898, "step": 297 }, { "epoch": 0.0402876238920481, "grad_norm": 2.7934041023254395, "learning_rate": 1.740234375e-05, "loss": 0.07379543781280518, "step": 298 }, { "epoch": 0.04042281726081336, "grad_norm": 2.6881332397460938, "learning_rate": 1.74609375e-05, "loss": 0.06783699989318848, "step": 299 }, { "epoch": 0.04055801062957862, "grad_norm": 6.708810806274414, "learning_rate": 1.751953125e-05, "loss": 0.10776853561401367, "step": 300 }, { "epoch": 0.040693203998343884, "grad_norm": 5.752043724060059, "learning_rate": 1.7578125000000002e-05, "loss": 0.09417986869812012, "step": 301 }, { "epoch": 0.040828397367109144, "grad_norm": 4.546014308929443, "learning_rate": 1.763671875e-05, "loss": 0.09645891189575195, "step": 302 }, { "epoch": 0.040963590735874404, "grad_norm": 7.75122594833374, "learning_rate": 1.76953125e-05, "loss": 0.1010664701461792, "step": 303 }, { "epoch": 0.041098784104639664, "grad_norm": 5.185743808746338, "learning_rate": 1.775390625e-05, "loss": 0.09725743532180786, "step": 304 }, { "epoch": 0.04123397747340493, "grad_norm": 1.7176100015640259, "learning_rate": 1.78125e-05, "loss": 0.08327817916870117, "step": 305 }, { "epoch": 0.04136917084217019, "grad_norm": 5.91613245010376, "learning_rate": 1.787109375e-05, "loss": 0.09066081047058105, "step": 306 }, { "epoch": 0.04150436421093545, "grad_norm": 5.451717376708984, "learning_rate": 1.79296875e-05, "loss": 0.10851454734802246, "step": 307 }, { "epoch": 0.04163955757970072, "grad_norm": 1.243614673614502, "learning_rate": 1.798828125e-05, "loss": 0.07808101177215576, "step": 308 }, { "epoch": 0.04177475094846598, "grad_norm": 3.067702293395996, "learning_rate": 1.8046875e-05, "loss": 0.06620454788208008, "step": 309 }, { "epoch": 0.04190994431723124, "grad_norm": 2.0741145610809326, "learning_rate": 1.810546875e-05, "loss": 0.0906221866607666, "step": 310 }, { "epoch": 0.0420451376859965, "grad_norm": 1.381949782371521, "learning_rate": 1.8164062500000002e-05, "loss": 0.09767007827758789, "step": 311 }, { "epoch": 0.042180331054761766, "grad_norm": 3.3403677940368652, "learning_rate": 1.822265625e-05, "loss": 0.08916366100311279, "step": 312 }, { "epoch": 0.042315524423527026, "grad_norm": 6.893871307373047, "learning_rate": 1.828125e-05, "loss": 0.10216140747070312, "step": 313 }, { "epoch": 0.042450717792292286, "grad_norm": 7.702648162841797, "learning_rate": 1.833984375e-05, "loss": 0.09231257438659668, "step": 314 }, { "epoch": 0.04258591116105755, "grad_norm": 1.366671085357666, "learning_rate": 1.83984375e-05, "loss": 0.0905611515045166, "step": 315 }, { "epoch": 0.04272110452982281, "grad_norm": 3.0089540481567383, "learning_rate": 1.845703125e-05, "loss": 0.07928508520126343, "step": 316 }, { "epoch": 0.042856297898588074, "grad_norm": 1.875510811805725, "learning_rate": 1.8515625e-05, "loss": 0.09736001491546631, "step": 317 }, { "epoch": 0.042991491267353334, "grad_norm": 1.8423699140548706, "learning_rate": 1.8574218750000002e-05, "loss": 0.1001434326171875, "step": 318 }, { "epoch": 0.0431266846361186, "grad_norm": 1.1062639951705933, "learning_rate": 1.86328125e-05, "loss": 0.04751598834991455, "step": 319 }, { "epoch": 0.04326187800488386, "grad_norm": 7.962644100189209, "learning_rate": 1.869140625e-05, "loss": 0.09258556365966797, "step": 320 }, { "epoch": 0.04339707137364912, "grad_norm": 9.39422607421875, "learning_rate": 1.8750000000000002e-05, "loss": 0.11903655529022217, "step": 321 }, { "epoch": 0.04353226474241439, "grad_norm": 1.427226185798645, "learning_rate": 1.880859375e-05, "loss": 0.10064029693603516, "step": 322 }, { "epoch": 0.04366745811117965, "grad_norm": 3.133540630340576, "learning_rate": 1.88671875e-05, "loss": 0.06096315383911133, "step": 323 }, { "epoch": 0.04380265147994491, "grad_norm": 3.97255277633667, "learning_rate": 1.892578125e-05, "loss": 0.09675478935241699, "step": 324 }, { "epoch": 0.04393784484871017, "grad_norm": 1.0997313261032104, "learning_rate": 1.8984375e-05, "loss": 0.0714145302772522, "step": 325 }, { "epoch": 0.044073038217475435, "grad_norm": 5.258992671966553, "learning_rate": 1.904296875e-05, "loss": 0.1031460165977478, "step": 326 }, { "epoch": 0.044208231586240695, "grad_norm": 2.8044447898864746, "learning_rate": 1.91015625e-05, "loss": 0.07067525386810303, "step": 327 }, { "epoch": 0.044343424955005956, "grad_norm": 8.673918724060059, "learning_rate": 1.9160156250000002e-05, "loss": 0.1001860499382019, "step": 328 }, { "epoch": 0.044478618323771216, "grad_norm": 9.760953903198242, "learning_rate": 1.921875e-05, "loss": 0.10470497608184814, "step": 329 }, { "epoch": 0.04461381169253648, "grad_norm": 2.4386723041534424, "learning_rate": 1.927734375e-05, "loss": 0.07603228092193604, "step": 330 }, { "epoch": 0.04474900506130174, "grad_norm": 14.846535682678223, "learning_rate": 1.93359375e-05, "loss": 0.1105351597070694, "step": 331 }, { "epoch": 0.044884198430067, "grad_norm": 8.958602905273438, "learning_rate": 1.939453125e-05, "loss": 0.10117067396640778, "step": 332 }, { "epoch": 0.04501939179883227, "grad_norm": 8.11074161529541, "learning_rate": 1.9453125e-05, "loss": 0.10616093873977661, "step": 333 }, { "epoch": 0.04515458516759753, "grad_norm": 6.426239013671875, "learning_rate": 1.951171875e-05, "loss": 0.09173119068145752, "step": 334 }, { "epoch": 0.04528977853636279, "grad_norm": 11.673319816589355, "learning_rate": 1.95703125e-05, "loss": 0.10270881652832031, "step": 335 }, { "epoch": 0.04542497190512805, "grad_norm": 16.70077896118164, "learning_rate": 1.962890625e-05, "loss": 0.13658809661865234, "step": 336 }, { "epoch": 0.04556016527389332, "grad_norm": 16.626657485961914, "learning_rate": 1.96875e-05, "loss": 0.11444759368896484, "step": 337 }, { "epoch": 0.04569535864265858, "grad_norm": 5.551499843597412, "learning_rate": 1.9746093750000002e-05, "loss": 0.08840703964233398, "step": 338 }, { "epoch": 0.04583055201142384, "grad_norm": 1.8800626993179321, "learning_rate": 1.98046875e-05, "loss": 0.06582427024841309, "step": 339 }, { "epoch": 0.045965745380189105, "grad_norm": 6.9070000648498535, "learning_rate": 1.986328125e-05, "loss": 0.10113584995269775, "step": 340 }, { "epoch": 0.046100938748954365, "grad_norm": 6.177674770355225, "learning_rate": 1.9921875e-05, "loss": 0.10582369565963745, "step": 341 }, { "epoch": 0.046236132117719625, "grad_norm": 7.566307544708252, "learning_rate": 1.998046875e-05, "loss": 0.11455971002578735, "step": 342 }, { "epoch": 0.046371325486484885, "grad_norm": 1.430694580078125, "learning_rate": 2.00390625e-05, "loss": 0.07259982824325562, "step": 343 }, { "epoch": 0.04650651885525015, "grad_norm": 4.00285005569458, "learning_rate": 2.009765625e-05, "loss": 0.0822591781616211, "step": 344 }, { "epoch": 0.04664171222401541, "grad_norm": 3.8489129543304443, "learning_rate": 2.0156250000000002e-05, "loss": 0.10387992858886719, "step": 345 }, { "epoch": 0.04677690559278067, "grad_norm": 1.9880205392837524, "learning_rate": 2.021484375e-05, "loss": 0.09433865547180176, "step": 346 }, { "epoch": 0.04691209896154594, "grad_norm": 2.200664758682251, "learning_rate": 2.02734375e-05, "loss": 0.08370310068130493, "step": 347 }, { "epoch": 0.0470472923303112, "grad_norm": 1.9416441917419434, "learning_rate": 2.033203125e-05, "loss": 0.09031820297241211, "step": 348 }, { "epoch": 0.04718248569907646, "grad_norm": 2.0605087280273438, "learning_rate": 2.0390625e-05, "loss": 0.06338119506835938, "step": 349 }, { "epoch": 0.04731767906784172, "grad_norm": 3.013564348220825, "learning_rate": 2.044921875e-05, "loss": 0.10893821716308594, "step": 350 }, { "epoch": 0.04745287243660699, "grad_norm": 5.238158226013184, "learning_rate": 2.05078125e-05, "loss": 0.07785534858703613, "step": 351 }, { "epoch": 0.04758806580537225, "grad_norm": 6.897862911224365, "learning_rate": 2.056640625e-05, "loss": 0.08934783935546875, "step": 352 }, { "epoch": 0.04772325917413751, "grad_norm": 4.818323612213135, "learning_rate": 2.0625e-05, "loss": 0.07204675674438477, "step": 353 }, { "epoch": 0.047858452542902774, "grad_norm": 7.2365031242370605, "learning_rate": 2.068359375e-05, "loss": 0.12442159652709961, "step": 354 }, { "epoch": 0.047993645911668034, "grad_norm": 3.4385592937469482, "learning_rate": 2.0742187500000002e-05, "loss": 0.0787164568901062, "step": 355 }, { "epoch": 0.048128839280433294, "grad_norm": 5.228093147277832, "learning_rate": 2.080078125e-05, "loss": 0.1132049560546875, "step": 356 }, { "epoch": 0.048264032649198554, "grad_norm": 1.9497636556625366, "learning_rate": 2.0859375e-05, "loss": 0.09700345993041992, "step": 357 }, { "epoch": 0.04839922601796382, "grad_norm": 5.747951030731201, "learning_rate": 2.091796875e-05, "loss": 0.09501749277114868, "step": 358 }, { "epoch": 0.04853441938672908, "grad_norm": 1.3901766538619995, "learning_rate": 2.09765625e-05, "loss": 0.1029747724533081, "step": 359 }, { "epoch": 0.04866961275549434, "grad_norm": 1.3685859441757202, "learning_rate": 2.103515625e-05, "loss": 0.061025023460388184, "step": 360 }, { "epoch": 0.0488048061242596, "grad_norm": 1.9651250839233398, "learning_rate": 2.109375e-05, "loss": 0.08706998825073242, "step": 361 }, { "epoch": 0.04893999949302487, "grad_norm": 1.6208951473236084, "learning_rate": 2.1152343750000002e-05, "loss": 0.08523666858673096, "step": 362 }, { "epoch": 0.04907519286179013, "grad_norm": 4.106655120849609, "learning_rate": 2.12109375e-05, "loss": 0.11466550827026367, "step": 363 }, { "epoch": 0.04921038623055539, "grad_norm": 2.599555015563965, "learning_rate": 2.126953125e-05, "loss": 0.1063699722290039, "step": 364 }, { "epoch": 0.049345579599320656, "grad_norm": 2.2370595932006836, "learning_rate": 2.1328125000000002e-05, "loss": 0.07172304391860962, "step": 365 }, { "epoch": 0.049480772968085916, "grad_norm": 3.7827062606811523, "learning_rate": 2.138671875e-05, "loss": 0.0763203501701355, "step": 366 }, { "epoch": 0.049615966336851176, "grad_norm": 1.5155869722366333, "learning_rate": 2.14453125e-05, "loss": 0.09769988059997559, "step": 367 }, { "epoch": 0.049751159705616436, "grad_norm": 5.802121162414551, "learning_rate": 2.150390625e-05, "loss": 0.10133606195449829, "step": 368 }, { "epoch": 0.049886353074381704, "grad_norm": 4.722776412963867, "learning_rate": 2.15625e-05, "loss": 0.06864500045776367, "step": 369 }, { "epoch": 0.050021546443146964, "grad_norm": 4.014957904815674, "learning_rate": 2.162109375e-05, "loss": 0.06608808040618896, "step": 370 }, { "epoch": 0.050156739811912224, "grad_norm": 3.3784916400909424, "learning_rate": 2.16796875e-05, "loss": 0.08661270141601562, "step": 371 }, { "epoch": 0.05029193318067749, "grad_norm": 0.9976571798324585, "learning_rate": 2.1738281250000002e-05, "loss": 0.06785595417022705, "step": 372 }, { "epoch": 0.05042712654944275, "grad_norm": 3.677727222442627, "learning_rate": 2.1796875e-05, "loss": 0.08674323558807373, "step": 373 }, { "epoch": 0.05056231991820801, "grad_norm": 2.9944887161254883, "learning_rate": 2.185546875e-05, "loss": 0.07923847436904907, "step": 374 }, { "epoch": 0.05069751328697327, "grad_norm": 1.8594979047775269, "learning_rate": 2.19140625e-05, "loss": 0.0975416898727417, "step": 375 }, { "epoch": 0.05083270665573854, "grad_norm": 7.510520935058594, "learning_rate": 2.197265625e-05, "loss": 0.0970761775970459, "step": 376 }, { "epoch": 0.0509679000245038, "grad_norm": 7.723931312561035, "learning_rate": 2.203125e-05, "loss": 0.09255567193031311, "step": 377 }, { "epoch": 0.05110309339326906, "grad_norm": 3.676070213317871, "learning_rate": 2.208984375e-05, "loss": 0.08611512184143066, "step": 378 }, { "epoch": 0.051238286762034325, "grad_norm": 1.5313795804977417, "learning_rate": 2.2148437500000002e-05, "loss": 0.08518537878990173, "step": 379 }, { "epoch": 0.051373480130799586, "grad_norm": 2.037703037261963, "learning_rate": 2.220703125e-05, "loss": 0.0772629976272583, "step": 380 }, { "epoch": 0.051508673499564846, "grad_norm": 2.8023152351379395, "learning_rate": 2.2265625e-05, "loss": 0.0715177059173584, "step": 381 }, { "epoch": 0.051643866868330106, "grad_norm": 2.998567581176758, "learning_rate": 2.2324218750000002e-05, "loss": 0.10310161113739014, "step": 382 }, { "epoch": 0.05177906023709537, "grad_norm": 2.0281074047088623, "learning_rate": 2.23828125e-05, "loss": 0.05637192726135254, "step": 383 }, { "epoch": 0.05191425360586063, "grad_norm": 2.003948450088501, "learning_rate": 2.244140625e-05, "loss": 0.08940792083740234, "step": 384 }, { "epoch": 0.05204944697462589, "grad_norm": 3.8771634101867676, "learning_rate": 2.25e-05, "loss": 0.07818818092346191, "step": 385 }, { "epoch": 0.05218464034339115, "grad_norm": 2.335020065307617, "learning_rate": 2.255859375e-05, "loss": 0.07242804765701294, "step": 386 }, { "epoch": 0.05231983371215642, "grad_norm": 2.4745888710021973, "learning_rate": 2.26171875e-05, "loss": 0.0739285945892334, "step": 387 }, { "epoch": 0.05245502708092168, "grad_norm": 6.174681186676025, "learning_rate": 2.267578125e-05, "loss": 0.1017613410949707, "step": 388 }, { "epoch": 0.05259022044968694, "grad_norm": 4.108165264129639, "learning_rate": 2.2734375000000002e-05, "loss": 0.08620131015777588, "step": 389 }, { "epoch": 0.05272541381845221, "grad_norm": 2.4019179344177246, "learning_rate": 2.279296875e-05, "loss": 0.0928642749786377, "step": 390 }, { "epoch": 0.05286060718721747, "grad_norm": 0.8760350346565247, "learning_rate": 2.28515625e-05, "loss": 0.08316290378570557, "step": 391 }, { "epoch": 0.05299580055598273, "grad_norm": 5.666714191436768, "learning_rate": 2.291015625e-05, "loss": 0.09477740526199341, "step": 392 }, { "epoch": 0.05313099392474799, "grad_norm": 5.060141086578369, "learning_rate": 2.296875e-05, "loss": 0.09440922737121582, "step": 393 }, { "epoch": 0.053266187293513255, "grad_norm": 7.304379940032959, "learning_rate": 2.302734375e-05, "loss": 0.11314010620117188, "step": 394 }, { "epoch": 0.053401380662278515, "grad_norm": 1.990586757659912, "learning_rate": 2.30859375e-05, "loss": 0.09145021438598633, "step": 395 }, { "epoch": 0.053536574031043775, "grad_norm": 3.8750035762786865, "learning_rate": 2.3144531250000002e-05, "loss": 0.06574606895446777, "step": 396 }, { "epoch": 0.05367176739980904, "grad_norm": 3.6363365650177, "learning_rate": 2.3203125e-05, "loss": 0.07064366340637207, "step": 397 }, { "epoch": 0.0538069607685743, "grad_norm": 4.949976921081543, "learning_rate": 2.326171875e-05, "loss": 0.08974480628967285, "step": 398 }, { "epoch": 0.05394215413733956, "grad_norm": 2.357069730758667, "learning_rate": 2.3320312500000002e-05, "loss": 0.09490275382995605, "step": 399 }, { "epoch": 0.05407734750610482, "grad_norm": 2.3310916423797607, "learning_rate": 2.337890625e-05, "loss": 0.07440567016601562, "step": 400 }, { "epoch": 0.05421254087487009, "grad_norm": 1.8446704149246216, "learning_rate": 2.34375e-05, "loss": 0.06704896688461304, "step": 401 }, { "epoch": 0.05434773424363535, "grad_norm": 2.1257400512695312, "learning_rate": 2.349609375e-05, "loss": 0.07876241207122803, "step": 402 }, { "epoch": 0.05448292761240061, "grad_norm": 0.6201032996177673, "learning_rate": 2.35546875e-05, "loss": 0.06959986686706543, "step": 403 }, { "epoch": 0.05461812098116588, "grad_norm": 1.6564972400665283, "learning_rate": 2.361328125e-05, "loss": 0.05341958999633789, "step": 404 }, { "epoch": 0.05475331434993114, "grad_norm": 3.3766963481903076, "learning_rate": 2.3671875e-05, "loss": 0.09309649467468262, "step": 405 }, { "epoch": 0.0548885077186964, "grad_norm": 1.0515787601470947, "learning_rate": 2.3730468750000002e-05, "loss": 0.09221798181533813, "step": 406 }, { "epoch": 0.05502370108746166, "grad_norm": 4.562459945678711, "learning_rate": 2.37890625e-05, "loss": 0.11785227060317993, "step": 407 }, { "epoch": 0.055158894456226924, "grad_norm": 3.8840770721435547, "learning_rate": 2.384765625e-05, "loss": 0.08860963582992554, "step": 408 }, { "epoch": 0.055294087824992184, "grad_norm": 2.603105306625366, "learning_rate": 2.3906250000000002e-05, "loss": 0.06699860095977783, "step": 409 }, { "epoch": 0.055429281193757445, "grad_norm": 2.319427967071533, "learning_rate": 2.396484375e-05, "loss": 0.08991217613220215, "step": 410 }, { "epoch": 0.05556447456252271, "grad_norm": 2.4066224098205566, "learning_rate": 2.40234375e-05, "loss": 0.0811762809753418, "step": 411 }, { "epoch": 0.05569966793128797, "grad_norm": 1.0495777130126953, "learning_rate": 2.408203125e-05, "loss": 0.0670466423034668, "step": 412 }, { "epoch": 0.05583486130005323, "grad_norm": 2.1325764656066895, "learning_rate": 2.4140625e-05, "loss": 0.0867302417755127, "step": 413 }, { "epoch": 0.05597005466881849, "grad_norm": 0.9265480637550354, "learning_rate": 2.419921875e-05, "loss": 0.08390694856643677, "step": 414 }, { "epoch": 0.05610524803758376, "grad_norm": 2.2873709201812744, "learning_rate": 2.42578125e-05, "loss": 0.10381579399108887, "step": 415 }, { "epoch": 0.05624044140634902, "grad_norm": 2.1423025131225586, "learning_rate": 2.4316406250000002e-05, "loss": 0.0864713191986084, "step": 416 }, { "epoch": 0.05637563477511428, "grad_norm": 2.467217206954956, "learning_rate": 2.4375e-05, "loss": 0.09174138307571411, "step": 417 }, { "epoch": 0.05651082814387954, "grad_norm": 2.6019210815429688, "learning_rate": 2.443359375e-05, "loss": 0.08538818359375, "step": 418 }, { "epoch": 0.056646021512644806, "grad_norm": 1.3806113004684448, "learning_rate": 2.44921875e-05, "loss": 0.07542300224304199, "step": 419 }, { "epoch": 0.056781214881410066, "grad_norm": 1.9980984926223755, "learning_rate": 2.455078125e-05, "loss": 0.08560431003570557, "step": 420 }, { "epoch": 0.05691640825017533, "grad_norm": 2.729419708251953, "learning_rate": 2.4609375e-05, "loss": 0.08438265323638916, "step": 421 }, { "epoch": 0.057051601618940594, "grad_norm": 1.1821060180664062, "learning_rate": 2.466796875e-05, "loss": 0.050698280334472656, "step": 422 }, { "epoch": 0.057186794987705854, "grad_norm": 1.6192868947982788, "learning_rate": 2.4726562500000002e-05, "loss": 0.0817415714263916, "step": 423 }, { "epoch": 0.057321988356471114, "grad_norm": 1.4687600135803223, "learning_rate": 2.478515625e-05, "loss": 0.09992074966430664, "step": 424 }, { "epoch": 0.057457181725236374, "grad_norm": 4.619570255279541, "learning_rate": 2.484375e-05, "loss": 0.11551809310913086, "step": 425 }, { "epoch": 0.05759237509400164, "grad_norm": 3.8509414196014404, "learning_rate": 2.4902343750000002e-05, "loss": 0.07223761081695557, "step": 426 }, { "epoch": 0.0577275684627669, "grad_norm": 2.7280728816986084, "learning_rate": 2.49609375e-05, "loss": 0.07535338401794434, "step": 427 }, { "epoch": 0.05786276183153216, "grad_norm": 2.048595905303955, "learning_rate": 2.501953125e-05, "loss": 0.08168274164199829, "step": 428 }, { "epoch": 0.05799795520029743, "grad_norm": 2.0069172382354736, "learning_rate": 2.5078125e-05, "loss": 0.0855332612991333, "step": 429 }, { "epoch": 0.05813314856906269, "grad_norm": 2.6839518547058105, "learning_rate": 2.513671875e-05, "loss": 0.07594740390777588, "step": 430 }, { "epoch": 0.05826834193782795, "grad_norm": 1.9031440019607544, "learning_rate": 2.51953125e-05, "loss": 0.08525919914245605, "step": 431 }, { "epoch": 0.05840353530659321, "grad_norm": 2.022561550140381, "learning_rate": 2.525390625e-05, "loss": 0.0855109691619873, "step": 432 }, { "epoch": 0.058538728675358476, "grad_norm": 3.3286380767822266, "learning_rate": 2.5312500000000002e-05, "loss": 0.08324706554412842, "step": 433 }, { "epoch": 0.058673922044123736, "grad_norm": 2.32784104347229, "learning_rate": 2.537109375e-05, "loss": 0.09593796730041504, "step": 434 }, { "epoch": 0.058809115412888996, "grad_norm": 1.7362346649169922, "learning_rate": 2.54296875e-05, "loss": 0.08457082509994507, "step": 435 }, { "epoch": 0.05894430878165426, "grad_norm": 1.3542895317077637, "learning_rate": 2.548828125e-05, "loss": 0.08672499656677246, "step": 436 }, { "epoch": 0.05907950215041952, "grad_norm": 1.7253117561340332, "learning_rate": 2.5546875e-05, "loss": 0.09296071529388428, "step": 437 }, { "epoch": 0.05921469551918478, "grad_norm": 2.2812447547912598, "learning_rate": 2.560546875e-05, "loss": 0.06116044521331787, "step": 438 }, { "epoch": 0.05934988888795004, "grad_norm": 2.4810948371887207, "learning_rate": 2.56640625e-05, "loss": 0.07155156135559082, "step": 439 }, { "epoch": 0.05948508225671531, "grad_norm": 1.031551718711853, "learning_rate": 2.5722656250000002e-05, "loss": 0.08875316381454468, "step": 440 }, { "epoch": 0.05962027562548057, "grad_norm": 1.3491324186325073, "learning_rate": 2.578125e-05, "loss": 0.0728384256362915, "step": 441 }, { "epoch": 0.05975546899424583, "grad_norm": 4.78794527053833, "learning_rate": 2.583984375e-05, "loss": 0.07623755931854248, "step": 442 }, { "epoch": 0.0598906623630111, "grad_norm": 3.6588025093078613, "learning_rate": 2.5898437500000002e-05, "loss": 0.06744241714477539, "step": 443 }, { "epoch": 0.06002585573177636, "grad_norm": 1.644033670425415, "learning_rate": 2.595703125e-05, "loss": 0.08454227447509766, "step": 444 }, { "epoch": 0.06016104910054162, "grad_norm": 2.69010853767395, "learning_rate": 2.6015625e-05, "loss": 0.07021188735961914, "step": 445 }, { "epoch": 0.06029624246930688, "grad_norm": 2.966491222381592, "learning_rate": 2.607421875e-05, "loss": 0.10312604904174805, "step": 446 }, { "epoch": 0.060431435838072145, "grad_norm": 2.192582130432129, "learning_rate": 2.61328125e-05, "loss": 0.08733201026916504, "step": 447 }, { "epoch": 0.060566629206837405, "grad_norm": 3.28017520904541, "learning_rate": 2.619140625e-05, "loss": 0.11056116223335266, "step": 448 }, { "epoch": 0.060701822575602665, "grad_norm": 1.294121265411377, "learning_rate": 2.625e-05, "loss": 0.08379864692687988, "step": 449 }, { "epoch": 0.060837015944367925, "grad_norm": 2.3864622116088867, "learning_rate": 2.6308593750000002e-05, "loss": 0.06312590837478638, "step": 450 }, { "epoch": 0.06097220931313319, "grad_norm": 3.0067105293273926, "learning_rate": 2.63671875e-05, "loss": 0.07582861185073853, "step": 451 }, { "epoch": 0.06110740268189845, "grad_norm": 2.875403881072998, "learning_rate": 2.642578125e-05, "loss": 0.08906316757202148, "step": 452 }, { "epoch": 0.06124259605066371, "grad_norm": 3.5831849575042725, "learning_rate": 2.6484375000000002e-05, "loss": 0.10461187362670898, "step": 453 }, { "epoch": 0.06137778941942898, "grad_norm": 5.713657855987549, "learning_rate": 2.654296875e-05, "loss": 0.08087611198425293, "step": 454 }, { "epoch": 0.06151298278819424, "grad_norm": 4.763205051422119, "learning_rate": 2.66015625e-05, "loss": 0.09847617149353027, "step": 455 }, { "epoch": 0.0616481761569595, "grad_norm": 2.2618470191955566, "learning_rate": 2.666015625e-05, "loss": 0.07884633541107178, "step": 456 }, { "epoch": 0.06178336952572476, "grad_norm": 4.422484397888184, "learning_rate": 2.6718750000000002e-05, "loss": 0.09517449140548706, "step": 457 }, { "epoch": 0.06191856289449003, "grad_norm": 6.361186504364014, "learning_rate": 2.677734375e-05, "loss": 0.11206620931625366, "step": 458 }, { "epoch": 0.06205375626325529, "grad_norm": 1.5987541675567627, "learning_rate": 2.68359375e-05, "loss": 0.04566866159439087, "step": 459 }, { "epoch": 0.06218894963202055, "grad_norm": 3.8181939125061035, "learning_rate": 2.6894531250000002e-05, "loss": 0.09405183792114258, "step": 460 }, { "epoch": 0.062324143000785814, "grad_norm": 0.971908450126648, "learning_rate": 2.6953125e-05, "loss": 0.0977165699005127, "step": 461 }, { "epoch": 0.062459336369551074, "grad_norm": 4.934218883514404, "learning_rate": 2.701171875e-05, "loss": 0.07342004776000977, "step": 462 }, { "epoch": 0.06259452973831633, "grad_norm": 5.575514316558838, "learning_rate": 2.70703125e-05, "loss": 0.11276412010192871, "step": 463 }, { "epoch": 0.0627297231070816, "grad_norm": 6.003695964813232, "learning_rate": 2.712890625e-05, "loss": 0.09714007377624512, "step": 464 }, { "epoch": 0.06286491647584685, "grad_norm": 3.977492332458496, "learning_rate": 2.71875e-05, "loss": 0.12041759490966797, "step": 465 }, { "epoch": 0.06300010984461211, "grad_norm": 3.8524303436279297, "learning_rate": 2.724609375e-05, "loss": 0.0952996015548706, "step": 466 }, { "epoch": 0.06313530321337739, "grad_norm": 0.717673659324646, "learning_rate": 2.7304687500000002e-05, "loss": 0.0744776725769043, "step": 467 }, { "epoch": 0.06327049658214265, "grad_norm": 3.0212314128875732, "learning_rate": 2.736328125e-05, "loss": 0.09562563896179199, "step": 468 }, { "epoch": 0.06340568995090791, "grad_norm": 1.3355679512023926, "learning_rate": 2.7421875e-05, "loss": 0.057322025299072266, "step": 469 }, { "epoch": 0.06354088331967317, "grad_norm": 1.1125469207763672, "learning_rate": 2.7480468750000002e-05, "loss": 0.07634854316711426, "step": 470 }, { "epoch": 0.06367607668843843, "grad_norm": 0.9093754887580872, "learning_rate": 2.75390625e-05, "loss": 0.06976306438446045, "step": 471 }, { "epoch": 0.06381127005720369, "grad_norm": 0.6387351751327515, "learning_rate": 2.759765625e-05, "loss": 0.07417264580726624, "step": 472 }, { "epoch": 0.06394646342596895, "grad_norm": 0.9516288042068481, "learning_rate": 2.765625e-05, "loss": 0.0605187714099884, "step": 473 }, { "epoch": 0.06408165679473422, "grad_norm": 2.21431040763855, "learning_rate": 2.7714843750000002e-05, "loss": 0.0928950309753418, "step": 474 }, { "epoch": 0.06421685016349948, "grad_norm": 0.692080557346344, "learning_rate": 2.77734375e-05, "loss": 0.06328105926513672, "step": 475 }, { "epoch": 0.06435204353226474, "grad_norm": 1.8471779823303223, "learning_rate": 2.783203125e-05, "loss": 0.06584012508392334, "step": 476 }, { "epoch": 0.06448723690103, "grad_norm": 0.7896122932434082, "learning_rate": 2.7890625000000002e-05, "loss": 0.07595396041870117, "step": 477 }, { "epoch": 0.06462243026979526, "grad_norm": 0.8789713382720947, "learning_rate": 2.794921875e-05, "loss": 0.052197396755218506, "step": 478 }, { "epoch": 0.06475762363856052, "grad_norm": 1.795386552810669, "learning_rate": 2.80078125e-05, "loss": 0.06410270929336548, "step": 479 }, { "epoch": 0.06489281700732578, "grad_norm": 2.4986648559570312, "learning_rate": 2.806640625e-05, "loss": 0.09851151704788208, "step": 480 }, { "epoch": 0.06502801037609106, "grad_norm": 2.338993549346924, "learning_rate": 2.8125e-05, "loss": 0.08768641948699951, "step": 481 }, { "epoch": 0.06516320374485632, "grad_norm": 1.754858136177063, "learning_rate": 2.818359375e-05, "loss": 0.08150345087051392, "step": 482 }, { "epoch": 0.06529839711362158, "grad_norm": 1.4037175178527832, "learning_rate": 2.82421875e-05, "loss": 0.07955771684646606, "step": 483 }, { "epoch": 0.06543359048238684, "grad_norm": 2.536918878555298, "learning_rate": 2.8300781250000002e-05, "loss": 0.06898238509893417, "step": 484 }, { "epoch": 0.0655687838511521, "grad_norm": 2.3428120613098145, "learning_rate": 2.8359375e-05, "loss": 0.09016305208206177, "step": 485 }, { "epoch": 0.06570397721991736, "grad_norm": 1.6057106256484985, "learning_rate": 2.841796875e-05, "loss": 0.07038795948028564, "step": 486 }, { "epoch": 0.06583917058868262, "grad_norm": 1.4538984298706055, "learning_rate": 2.8476562500000002e-05, "loss": 0.07903861999511719, "step": 487 }, { "epoch": 0.06597436395744789, "grad_norm": 4.265200138092041, "learning_rate": 2.853515625e-05, "loss": 0.0682828426361084, "step": 488 }, { "epoch": 0.06610955732621315, "grad_norm": 1.1278741359710693, "learning_rate": 2.859375e-05, "loss": 0.07071077823638916, "step": 489 }, { "epoch": 0.06624475069497841, "grad_norm": 1.5449624061584473, "learning_rate": 2.865234375e-05, "loss": 0.07739472389221191, "step": 490 }, { "epoch": 0.06637994406374367, "grad_norm": 3.274684190750122, "learning_rate": 2.87109375e-05, "loss": 0.0890035629272461, "step": 491 }, { "epoch": 0.06651513743250893, "grad_norm": 1.5417492389678955, "learning_rate": 2.876953125e-05, "loss": 0.04077804088592529, "step": 492 }, { "epoch": 0.0666503308012742, "grad_norm": 0.8454017043113708, "learning_rate": 2.8828125e-05, "loss": 0.06175266206264496, "step": 493 }, { "epoch": 0.06678552417003945, "grad_norm": 2.7841005325317383, "learning_rate": 2.8886718750000002e-05, "loss": 0.08558833599090576, "step": 494 }, { "epoch": 0.06692071753880473, "grad_norm": 4.5589494705200195, "learning_rate": 2.89453125e-05, "loss": 0.07117438316345215, "step": 495 }, { "epoch": 0.06705591090756999, "grad_norm": 4.542187213897705, "learning_rate": 2.900390625e-05, "loss": 0.10571330785751343, "step": 496 }, { "epoch": 0.06719110427633525, "grad_norm": 1.6217830181121826, "learning_rate": 2.90625e-05, "loss": 0.10186421871185303, "step": 497 }, { "epoch": 0.06732629764510051, "grad_norm": 3.4604969024658203, "learning_rate": 2.912109375e-05, "loss": 0.08647912740707397, "step": 498 }, { "epoch": 0.06746149101386577, "grad_norm": 5.712713241577148, "learning_rate": 2.91796875e-05, "loss": 0.10363689064979553, "step": 499 }, { "epoch": 0.06759668438263103, "grad_norm": 4.759398937225342, "learning_rate": 2.923828125e-05, "loss": 0.09876960515975952, "step": 500 }, { "epoch": 0.06773187775139629, "grad_norm": 3.2786519527435303, "learning_rate": 2.9296875000000002e-05, "loss": 0.08078700304031372, "step": 501 }, { "epoch": 0.06786707112016156, "grad_norm": 1.162019968032837, "learning_rate": 2.935546875e-05, "loss": 0.06313735246658325, "step": 502 }, { "epoch": 0.06800226448892682, "grad_norm": 1.308668851852417, "learning_rate": 2.94140625e-05, "loss": 0.07171192765235901, "step": 503 }, { "epoch": 0.06813745785769208, "grad_norm": 2.578092575073242, "learning_rate": 2.9472656250000002e-05, "loss": 0.08997377753257751, "step": 504 }, { "epoch": 0.06827265122645734, "grad_norm": 1.9555453062057495, "learning_rate": 2.953125e-05, "loss": 0.07397922873497009, "step": 505 }, { "epoch": 0.0684078445952226, "grad_norm": 0.8753219246864319, "learning_rate": 2.958984375e-05, "loss": 0.07477635145187378, "step": 506 }, { "epoch": 0.06854303796398786, "grad_norm": 2.393610715866089, "learning_rate": 2.96484375e-05, "loss": 0.08859503269195557, "step": 507 }, { "epoch": 0.06867823133275312, "grad_norm": 1.1795601844787598, "learning_rate": 2.970703125e-05, "loss": 0.07753872871398926, "step": 508 }, { "epoch": 0.0688134247015184, "grad_norm": 0.7777423858642578, "learning_rate": 2.9765625e-05, "loss": 0.05384558439254761, "step": 509 }, { "epoch": 0.06894861807028366, "grad_norm": 1.0782616138458252, "learning_rate": 2.982421875e-05, "loss": 0.0829768031835556, "step": 510 }, { "epoch": 0.06908381143904892, "grad_norm": 2.6904473304748535, "learning_rate": 2.9882812500000002e-05, "loss": 0.07865709066390991, "step": 511 }, { "epoch": 0.06921900480781418, "grad_norm": 0.9617339372634888, "learning_rate": 2.994140625e-05, "loss": 0.08347833156585693, "step": 512 }, { "epoch": 0.06935419817657944, "grad_norm": 4.226638317108154, "learning_rate": 3e-05, "loss": 0.06993937492370605, "step": 513 }, { "epoch": 0.0694893915453447, "grad_norm": 5.760720729827881, "learning_rate": 2.9999998438460004e-05, "loss": 0.08640438318252563, "step": 514 }, { "epoch": 0.06962458491410996, "grad_norm": 7.5532145500183105, "learning_rate": 2.9999993753840344e-05, "loss": 0.11704134941101074, "step": 515 }, { "epoch": 0.06975977828287523, "grad_norm": 2.440594434738159, "learning_rate": 2.9999985946141995e-05, "loss": 0.08039286732673645, "step": 516 }, { "epoch": 0.06989497165164049, "grad_norm": 1.566990852355957, "learning_rate": 2.9999975015366586e-05, "loss": 0.05487659573554993, "step": 517 }, { "epoch": 0.07003016502040575, "grad_norm": 3.4617419242858887, "learning_rate": 2.9999960961516384e-05, "loss": 0.07434442639350891, "step": 518 }, { "epoch": 0.07016535838917101, "grad_norm": 4.425759315490723, "learning_rate": 2.9999943784594325e-05, "loss": 0.08561226725578308, "step": 519 }, { "epoch": 0.07030055175793627, "grad_norm": 0.42576825618743896, "learning_rate": 2.9999923484603975e-05, "loss": 0.0590876042842865, "step": 520 }, { "epoch": 0.07043574512670153, "grad_norm": 1.4820572137832642, "learning_rate": 2.999990006154957e-05, "loss": 0.05504500865936279, "step": 521 }, { "epoch": 0.07057093849546679, "grad_norm": 2.1246414184570312, "learning_rate": 2.9999873515435977e-05, "loss": 0.08048522472381592, "step": 522 }, { "epoch": 0.07070613186423207, "grad_norm": 0.5896093845367432, "learning_rate": 2.9999843846268735e-05, "loss": 0.07550835609436035, "step": 523 }, { "epoch": 0.07084132523299733, "grad_norm": 0.4509059190750122, "learning_rate": 2.9999811054054018e-05, "loss": 0.06059771776199341, "step": 524 }, { "epoch": 0.07097651860176259, "grad_norm": 2.119579553604126, "learning_rate": 2.9999775138798646e-05, "loss": 0.07610955834388733, "step": 525 }, { "epoch": 0.07111171197052785, "grad_norm": 0.6424253582954407, "learning_rate": 2.99997361005101e-05, "loss": 0.05062618851661682, "step": 526 }, { "epoch": 0.0712469053392931, "grad_norm": 0.4129358232021332, "learning_rate": 2.9999693939196513e-05, "loss": 0.04428476095199585, "step": 527 }, { "epoch": 0.07138209870805837, "grad_norm": 0.9434506893157959, "learning_rate": 2.999964865486666e-05, "loss": 0.06673705577850342, "step": 528 }, { "epoch": 0.07151729207682363, "grad_norm": 1.987471103668213, "learning_rate": 2.999960024752997e-05, "loss": 0.07437312602996826, "step": 529 }, { "epoch": 0.07165248544558889, "grad_norm": 1.8667017221450806, "learning_rate": 2.9999548717196516e-05, "loss": 0.08354781568050385, "step": 530 }, { "epoch": 0.07178767881435416, "grad_norm": 1.7856428623199463, "learning_rate": 2.999949406387703e-05, "loss": 0.09004330635070801, "step": 531 }, { "epoch": 0.07192287218311942, "grad_norm": 1.2929048538208008, "learning_rate": 2.9999436287582903e-05, "loss": 0.07697892189025879, "step": 532 }, { "epoch": 0.07205806555188468, "grad_norm": 1.8377665281295776, "learning_rate": 2.9999375388326145e-05, "loss": 0.0900411605834961, "step": 533 }, { "epoch": 0.07219325892064994, "grad_norm": 0.629837155342102, "learning_rate": 2.9999311366119447e-05, "loss": 0.07256340980529785, "step": 534 }, { "epoch": 0.0723284522894152, "grad_norm": 1.0979360342025757, "learning_rate": 2.9999244220976137e-05, "loss": 0.059804320335388184, "step": 535 }, { "epoch": 0.07246364565818046, "grad_norm": 1.495607614517212, "learning_rate": 2.9999173952910197e-05, "loss": 0.059341222047805786, "step": 536 }, { "epoch": 0.07259883902694572, "grad_norm": 2.1448705196380615, "learning_rate": 2.9999100561936252e-05, "loss": 0.06675183773040771, "step": 537 }, { "epoch": 0.072734032395711, "grad_norm": 2.3952038288116455, "learning_rate": 2.9999024048069585e-05, "loss": 0.06725847721099854, "step": 538 }, { "epoch": 0.07286922576447626, "grad_norm": 2.405796766281128, "learning_rate": 2.9998944411326127e-05, "loss": 0.05848759412765503, "step": 539 }, { "epoch": 0.07300441913324152, "grad_norm": 2.3532772064208984, "learning_rate": 2.999886165172246e-05, "loss": 0.08737562596797943, "step": 540 }, { "epoch": 0.07313961250200678, "grad_norm": 1.9639641046524048, "learning_rate": 2.9998775769275814e-05, "loss": 0.08353215456008911, "step": 541 }, { "epoch": 0.07327480587077204, "grad_norm": 1.4823710918426514, "learning_rate": 2.9998686764004067e-05, "loss": 0.08125150203704834, "step": 542 }, { "epoch": 0.0734099992395373, "grad_norm": 4.220778942108154, "learning_rate": 2.9998594635925755e-05, "loss": 0.10129022598266602, "step": 543 }, { "epoch": 0.07354519260830256, "grad_norm": 6.046679973602295, "learning_rate": 2.999849938506005e-05, "loss": 0.09592664241790771, "step": 544 }, { "epoch": 0.07368038597706783, "grad_norm": 3.6834492683410645, "learning_rate": 2.99984010114268e-05, "loss": 0.062161028385162354, "step": 545 }, { "epoch": 0.07381557934583309, "grad_norm": 3.0782973766326904, "learning_rate": 2.9998299515046475e-05, "loss": 0.08035112917423248, "step": 546 }, { "epoch": 0.07395077271459835, "grad_norm": 1.19057035446167, "learning_rate": 2.9998194895940213e-05, "loss": 0.06646406650543213, "step": 547 }, { "epoch": 0.07408596608336361, "grad_norm": 0.42260244488716125, "learning_rate": 2.9998087154129792e-05, "loss": 0.06359273195266724, "step": 548 }, { "epoch": 0.07422115945212887, "grad_norm": 1.7558311223983765, "learning_rate": 2.9997976289637645e-05, "loss": 0.0758281946182251, "step": 549 }, { "epoch": 0.07435635282089413, "grad_norm": 0.8219915628433228, "learning_rate": 2.9997862302486855e-05, "loss": 0.06392264366149902, "step": 550 }, { "epoch": 0.07449154618965939, "grad_norm": 1.645617127418518, "learning_rate": 2.9997745192701153e-05, "loss": 0.07526493072509766, "step": 551 }, { "epoch": 0.07462673955842467, "grad_norm": 0.48630088567733765, "learning_rate": 2.9997624960304926e-05, "loss": 0.06985509395599365, "step": 552 }, { "epoch": 0.07476193292718993, "grad_norm": 0.5417790412902832, "learning_rate": 2.9997501605323214e-05, "loss": 0.06212824583053589, "step": 553 }, { "epoch": 0.07489712629595519, "grad_norm": 0.9031376242637634, "learning_rate": 2.999737512778168e-05, "loss": 0.059658557176589966, "step": 554 }, { "epoch": 0.07503231966472045, "grad_norm": 1.3054426908493042, "learning_rate": 2.9997245527706674e-05, "loss": 0.054506897926330566, "step": 555 }, { "epoch": 0.0751675130334857, "grad_norm": 2.3331143856048584, "learning_rate": 2.999711280512517e-05, "loss": 0.07523204386234283, "step": 556 }, { "epoch": 0.07530270640225097, "grad_norm": 1.8983051776885986, "learning_rate": 2.9996976960064807e-05, "loss": 0.08041766285896301, "step": 557 }, { "epoch": 0.07543789977101623, "grad_norm": 2.1832733154296875, "learning_rate": 2.999683799255387e-05, "loss": 0.07708972692489624, "step": 558 }, { "epoch": 0.0755730931397815, "grad_norm": 1.53640615940094, "learning_rate": 2.999669590262129e-05, "loss": 0.07007499039173126, "step": 559 }, { "epoch": 0.07570828650854676, "grad_norm": 2.4458889961242676, "learning_rate": 2.999655069029665e-05, "loss": 0.0823965072631836, "step": 560 }, { "epoch": 0.07584347987731202, "grad_norm": 0.9590111970901489, "learning_rate": 2.9996402355610183e-05, "loss": 0.06776762008666992, "step": 561 }, { "epoch": 0.07597867324607728, "grad_norm": 0.502263069152832, "learning_rate": 2.9996250898592777e-05, "loss": 0.06980323791503906, "step": 562 }, { "epoch": 0.07611386661484254, "grad_norm": 1.8956334590911865, "learning_rate": 2.9996096319275962e-05, "loss": 0.06285884976387024, "step": 563 }, { "epoch": 0.0762490599836078, "grad_norm": 3.2829816341400146, "learning_rate": 2.9995938617691925e-05, "loss": 0.09168515354394913, "step": 564 }, { "epoch": 0.07638425335237306, "grad_norm": 1.0252958536148071, "learning_rate": 2.9995777793873504e-05, "loss": 0.0881321132183075, "step": 565 }, { "epoch": 0.07651944672113833, "grad_norm": 1.8914507627487183, "learning_rate": 2.9995613847854176e-05, "loss": 0.07276776432991028, "step": 566 }, { "epoch": 0.0766546400899036, "grad_norm": 1.0187723636627197, "learning_rate": 2.9995446779668078e-05, "loss": 0.05905801057815552, "step": 567 }, { "epoch": 0.07678983345866885, "grad_norm": 2.9869515895843506, "learning_rate": 2.9995276589349992e-05, "loss": 0.06865787506103516, "step": 568 }, { "epoch": 0.07692502682743411, "grad_norm": 2.001732349395752, "learning_rate": 2.9995103276935357e-05, "loss": 0.09824866056442261, "step": 569 }, { "epoch": 0.07706022019619937, "grad_norm": 1.145727276802063, "learning_rate": 2.9994926842460258e-05, "loss": 0.06862831115722656, "step": 570 }, { "epoch": 0.07719541356496464, "grad_norm": 2.2794854640960693, "learning_rate": 2.9994747285961428e-05, "loss": 0.07974135875701904, "step": 571 }, { "epoch": 0.0773306069337299, "grad_norm": 2.717662811279297, "learning_rate": 2.9994564607476255e-05, "loss": 0.07998615503311157, "step": 572 }, { "epoch": 0.07746580030249517, "grad_norm": 1.9366658926010132, "learning_rate": 2.9994378807042762e-05, "loss": 0.08855938911437988, "step": 573 }, { "epoch": 0.07760099367126043, "grad_norm": 1.265599012374878, "learning_rate": 2.9994189884699647e-05, "loss": 0.08413612842559814, "step": 574 }, { "epoch": 0.07773618704002569, "grad_norm": 0.8053746223449707, "learning_rate": 2.9993997840486233e-05, "loss": 0.08846956491470337, "step": 575 }, { "epoch": 0.07787138040879095, "grad_norm": 1.0635226964950562, "learning_rate": 2.9993802674442516e-05, "loss": 0.07659375667572021, "step": 576 }, { "epoch": 0.07800657377755621, "grad_norm": 0.9337055087089539, "learning_rate": 2.999360438660913e-05, "loss": 0.08028608560562134, "step": 577 }, { "epoch": 0.07814176714632147, "grad_norm": 1.3205060958862305, "learning_rate": 2.9993402977027346e-05, "loss": 0.05538719892501831, "step": 578 }, { "epoch": 0.07827696051508673, "grad_norm": 1.600622534751892, "learning_rate": 2.999319844573911e-05, "loss": 0.05767008662223816, "step": 579 }, { "epoch": 0.078412153883852, "grad_norm": 0.590765118598938, "learning_rate": 2.9992990792787007e-05, "loss": 0.03631296753883362, "step": 580 }, { "epoch": 0.07854734725261726, "grad_norm": 1.2146821022033691, "learning_rate": 2.999278001821427e-05, "loss": 0.05524358153343201, "step": 581 }, { "epoch": 0.07868254062138252, "grad_norm": 1.7299154996871948, "learning_rate": 2.9992566122064775e-05, "loss": 0.054759085178375244, "step": 582 }, { "epoch": 0.07881773399014778, "grad_norm": 1.8429731130599976, "learning_rate": 2.999234910438307e-05, "loss": 0.06211972236633301, "step": 583 }, { "epoch": 0.07895292735891304, "grad_norm": 2.8555471897125244, "learning_rate": 2.999212896521433e-05, "loss": 0.07378911972045898, "step": 584 }, { "epoch": 0.0790881207276783, "grad_norm": 0.525478720664978, "learning_rate": 2.999190570460439e-05, "loss": 0.0759100466966629, "step": 585 }, { "epoch": 0.07922331409644356, "grad_norm": 0.7448561191558838, "learning_rate": 2.9991679322599734e-05, "loss": 0.07554101943969727, "step": 586 }, { "epoch": 0.07935850746520882, "grad_norm": 2.3114960193634033, "learning_rate": 2.9991449819247505e-05, "loss": 0.06755508482456207, "step": 587 }, { "epoch": 0.0794937008339741, "grad_norm": 1.1557061672210693, "learning_rate": 2.9991217194595474e-05, "loss": 0.06702160835266113, "step": 588 }, { "epoch": 0.07962889420273936, "grad_norm": 2.0428478717803955, "learning_rate": 2.9990981448692078e-05, "loss": 0.07164239883422852, "step": 589 }, { "epoch": 0.07976408757150462, "grad_norm": 1.5015708208084106, "learning_rate": 2.999074258158641e-05, "loss": 0.08351626992225647, "step": 590 }, { "epoch": 0.07989928094026988, "grad_norm": 1.9153437614440918, "learning_rate": 2.9990500593328192e-05, "loss": 0.06143990159034729, "step": 591 }, { "epoch": 0.08003447430903514, "grad_norm": 0.9049278497695923, "learning_rate": 2.999025548396781e-05, "loss": 0.0896698534488678, "step": 592 }, { "epoch": 0.0801696676778004, "grad_norm": 0.7840317487716675, "learning_rate": 2.9990007253556302e-05, "loss": 0.06274673342704773, "step": 593 }, { "epoch": 0.08030486104656566, "grad_norm": 2.514416456222534, "learning_rate": 2.9989755902145345e-05, "loss": 0.08844804763793945, "step": 594 }, { "epoch": 0.08044005441533093, "grad_norm": 0.6164042353630066, "learning_rate": 2.9989501429787273e-05, "loss": 0.08358362317085266, "step": 595 }, { "epoch": 0.0805752477840962, "grad_norm": 0.4986203908920288, "learning_rate": 2.9989243836535073e-05, "loss": 0.0751338005065918, "step": 596 }, { "epoch": 0.08071044115286145, "grad_norm": 1.5278631448745728, "learning_rate": 2.998898312244237e-05, "loss": 0.07638660073280334, "step": 597 }, { "epoch": 0.08084563452162671, "grad_norm": 3.135820150375366, "learning_rate": 2.9988719287563452e-05, "loss": 0.08369582891464233, "step": 598 }, { "epoch": 0.08098082789039197, "grad_norm": 0.5640255212783813, "learning_rate": 2.998845233195325e-05, "loss": 0.05716729164123535, "step": 599 }, { "epoch": 0.08111602125915723, "grad_norm": 0.6250023245811462, "learning_rate": 2.998818225566734e-05, "loss": 0.05709981918334961, "step": 600 }, { "epoch": 0.0812512146279225, "grad_norm": 1.8859413862228394, "learning_rate": 2.998790905876196e-05, "loss": 0.0794154703617096, "step": 601 }, { "epoch": 0.08138640799668777, "grad_norm": 2.544265031814575, "learning_rate": 2.9987632741293987e-05, "loss": 0.08674255013465881, "step": 602 }, { "epoch": 0.08152160136545303, "grad_norm": 0.7365748882293701, "learning_rate": 2.998735330332096e-05, "loss": 0.061058998107910156, "step": 603 }, { "epoch": 0.08165679473421829, "grad_norm": 1.6129382848739624, "learning_rate": 2.9987070744901046e-05, "loss": 0.07812857627868652, "step": 604 }, { "epoch": 0.08179198810298355, "grad_norm": 1.4834262132644653, "learning_rate": 2.9986785066093084e-05, "loss": 0.05322533845901489, "step": 605 }, { "epoch": 0.08192718147174881, "grad_norm": 1.7839930057525635, "learning_rate": 2.9986496266956556e-05, "loss": 0.06453073024749756, "step": 606 }, { "epoch": 0.08206237484051407, "grad_norm": 2.200597047805786, "learning_rate": 2.9986204347551583e-05, "loss": 0.0850030779838562, "step": 607 }, { "epoch": 0.08219756820927933, "grad_norm": 1.8930333852767944, "learning_rate": 2.9985909307938948e-05, "loss": 0.05780499428510666, "step": 608 }, { "epoch": 0.0823327615780446, "grad_norm": 0.8153626918792725, "learning_rate": 2.9985611148180082e-05, "loss": 0.07434721291065216, "step": 609 }, { "epoch": 0.08246795494680986, "grad_norm": 2.432506561279297, "learning_rate": 2.9985309868337063e-05, "loss": 0.054645657539367676, "step": 610 }, { "epoch": 0.08260314831557512, "grad_norm": 0.8336548805236816, "learning_rate": 2.9985005468472617e-05, "loss": 0.07022076845169067, "step": 611 }, { "epoch": 0.08273834168434038, "grad_norm": 0.8005855083465576, "learning_rate": 2.9984697948650124e-05, "loss": 0.04986768960952759, "step": 612 }, { "epoch": 0.08287353505310564, "grad_norm": 0.8254513144493103, "learning_rate": 2.998438730893361e-05, "loss": 0.07963043451309204, "step": 613 }, { "epoch": 0.0830087284218709, "grad_norm": 2.1858198642730713, "learning_rate": 2.9984073549387747e-05, "loss": 0.07175400853157043, "step": 614 }, { "epoch": 0.08314392179063616, "grad_norm": 2.355132818222046, "learning_rate": 2.998375667007787e-05, "loss": 0.07016070187091827, "step": 615 }, { "epoch": 0.08327911515940144, "grad_norm": 0.599641740322113, "learning_rate": 2.998343667106995e-05, "loss": 0.058913350105285645, "step": 616 }, { "epoch": 0.0834143085281667, "grad_norm": 0.9613671898841858, "learning_rate": 2.9983113552430616e-05, "loss": 0.07916253805160522, "step": 617 }, { "epoch": 0.08354950189693196, "grad_norm": 3.1716086864471436, "learning_rate": 2.9982787314227134e-05, "loss": 0.07137918472290039, "step": 618 }, { "epoch": 0.08368469526569722, "grad_norm": 1.6789171695709229, "learning_rate": 2.998245795652744e-05, "loss": 0.05489552021026611, "step": 619 }, { "epoch": 0.08381988863446248, "grad_norm": 2.5331814289093018, "learning_rate": 2.9982125479400106e-05, "loss": 0.07533860206604004, "step": 620 }, { "epoch": 0.08395508200322774, "grad_norm": 1.989997148513794, "learning_rate": 2.9981789882914352e-05, "loss": 0.08089403808116913, "step": 621 }, { "epoch": 0.084090275371993, "grad_norm": 0.8958866596221924, "learning_rate": 2.9981451167140048e-05, "loss": 0.05067843198776245, "step": 622 }, { "epoch": 0.08422546874075827, "grad_norm": 3.3056395053863525, "learning_rate": 2.9981109332147722e-05, "loss": 0.0888616293668747, "step": 623 }, { "epoch": 0.08436066210952353, "grad_norm": 0.6550520658493042, "learning_rate": 2.9980764378008545e-05, "loss": 0.04205785691738129, "step": 624 }, { "epoch": 0.08449585547828879, "grad_norm": 1.6792329549789429, "learning_rate": 2.9980416304794332e-05, "loss": 0.05595056712627411, "step": 625 }, { "epoch": 0.08463104884705405, "grad_norm": 1.2456620931625366, "learning_rate": 2.9980065112577565e-05, "loss": 0.06043350696563721, "step": 626 }, { "epoch": 0.08476624221581931, "grad_norm": 1.6745126247406006, "learning_rate": 2.9979710801431357e-05, "loss": 0.06566761434078217, "step": 627 }, { "epoch": 0.08490143558458457, "grad_norm": 2.0548148155212402, "learning_rate": 2.997935337142948e-05, "loss": 0.06921306252479553, "step": 628 }, { "epoch": 0.08503662895334983, "grad_norm": 1.9589701890945435, "learning_rate": 2.9978992822646347e-05, "loss": 0.08202598243951797, "step": 629 }, { "epoch": 0.0851718223221151, "grad_norm": 0.798828125, "learning_rate": 2.9978629155157036e-05, "loss": 0.06456416845321655, "step": 630 }, { "epoch": 0.08530701569088037, "grad_norm": 1.6530522108078003, "learning_rate": 2.9978262369037252e-05, "loss": 0.05158402770757675, "step": 631 }, { "epoch": 0.08544220905964563, "grad_norm": 1.7948981523513794, "learning_rate": 2.9977892464363375e-05, "loss": 0.09780353307723999, "step": 632 }, { "epoch": 0.08557740242841089, "grad_norm": 0.8527246117591858, "learning_rate": 2.9977519441212412e-05, "loss": 0.054360270500183105, "step": 633 }, { "epoch": 0.08571259579717615, "grad_norm": 2.158431053161621, "learning_rate": 2.9977143299662034e-05, "loss": 0.09504848718643188, "step": 634 }, { "epoch": 0.08584778916594141, "grad_norm": 0.8429413437843323, "learning_rate": 2.997676403979055e-05, "loss": 0.0699925422668457, "step": 635 }, { "epoch": 0.08598298253470667, "grad_norm": 1.0217684507369995, "learning_rate": 2.997638166167693e-05, "loss": 0.06281322240829468, "step": 636 }, { "epoch": 0.08611817590347194, "grad_norm": 1.3085033893585205, "learning_rate": 2.9975996165400786e-05, "loss": 0.07656960189342499, "step": 637 }, { "epoch": 0.0862533692722372, "grad_norm": 0.8830587267875671, "learning_rate": 2.9975607551042373e-05, "loss": 0.06641332060098648, "step": 638 }, { "epoch": 0.08638856264100246, "grad_norm": 0.7558915615081787, "learning_rate": 2.9975215818682607e-05, "loss": 0.08561281859874725, "step": 639 }, { "epoch": 0.08652375600976772, "grad_norm": 0.6917787194252014, "learning_rate": 2.9974820968403056e-05, "loss": 0.06669548153877258, "step": 640 }, { "epoch": 0.08665894937853298, "grad_norm": 2.1940808296203613, "learning_rate": 2.9974423000285923e-05, "loss": 0.07260659337043762, "step": 641 }, { "epoch": 0.08679414274729824, "grad_norm": 0.9054264426231384, "learning_rate": 2.9974021914414068e-05, "loss": 0.07992389798164368, "step": 642 }, { "epoch": 0.0869293361160635, "grad_norm": 1.1337133646011353, "learning_rate": 2.9973617710871e-05, "loss": 0.07288509607315063, "step": 643 }, { "epoch": 0.08706452948482878, "grad_norm": 2.689429759979248, "learning_rate": 2.997321038974087e-05, "loss": 0.07699355483055115, "step": 644 }, { "epoch": 0.08719972285359404, "grad_norm": 1.0502344369888306, "learning_rate": 2.997279995110849e-05, "loss": 0.082648366689682, "step": 645 }, { "epoch": 0.0873349162223593, "grad_norm": 2.049832820892334, "learning_rate": 2.997238639505932e-05, "loss": 0.08546966314315796, "step": 646 }, { "epoch": 0.08747010959112456, "grad_norm": 2.7138779163360596, "learning_rate": 2.997196972167946e-05, "loss": 0.0652693510055542, "step": 647 }, { "epoch": 0.08760530295988982, "grad_norm": 1.1469850540161133, "learning_rate": 2.9971549931055665e-05, "loss": 0.07901674509048462, "step": 648 }, { "epoch": 0.08774049632865508, "grad_norm": 0.536758303642273, "learning_rate": 2.997112702327533e-05, "loss": 0.06264278292655945, "step": 649 }, { "epoch": 0.08787568969742034, "grad_norm": 0.7747340202331543, "learning_rate": 2.9970700998426518e-05, "loss": 0.07239297032356262, "step": 650 }, { "epoch": 0.0880108830661856, "grad_norm": 2.103119373321533, "learning_rate": 2.9970271856597925e-05, "loss": 0.06559926271438599, "step": 651 }, { "epoch": 0.08814607643495087, "grad_norm": 2.075385332107544, "learning_rate": 2.9969839597878896e-05, "loss": 0.06409762799739838, "step": 652 }, { "epoch": 0.08828126980371613, "grad_norm": 2.3380393981933594, "learning_rate": 2.9969404222359436e-05, "loss": 0.09425145387649536, "step": 653 }, { "epoch": 0.08841646317248139, "grad_norm": 3.265293598175049, "learning_rate": 2.9968965730130188e-05, "loss": 0.06211280822753906, "step": 654 }, { "epoch": 0.08855165654124665, "grad_norm": 3.7281758785247803, "learning_rate": 2.9968524121282455e-05, "loss": 0.07705247402191162, "step": 655 }, { "epoch": 0.08868684991001191, "grad_norm": 4.030885696411133, "learning_rate": 2.9968079395908178e-05, "loss": 0.09976205229759216, "step": 656 }, { "epoch": 0.08882204327877717, "grad_norm": 1.7111213207244873, "learning_rate": 2.9967631554099947e-05, "loss": 0.05883723497390747, "step": 657 }, { "epoch": 0.08895723664754243, "grad_norm": 1.616750955581665, "learning_rate": 2.996718059595101e-05, "loss": 0.07166612148284912, "step": 658 }, { "epoch": 0.0890924300163077, "grad_norm": 1.1883772611618042, "learning_rate": 2.9966726521555265e-05, "loss": 0.07008233666419983, "step": 659 }, { "epoch": 0.08922762338507297, "grad_norm": 1.9944844245910645, "learning_rate": 2.996626933100724e-05, "loss": 0.06989702582359314, "step": 660 }, { "epoch": 0.08936281675383823, "grad_norm": 1.8264142274856567, "learning_rate": 2.996580902440213e-05, "loss": 0.06547290086746216, "step": 661 }, { "epoch": 0.08949801012260349, "grad_norm": 2.1894795894622803, "learning_rate": 2.9965345601835773e-05, "loss": 0.06390075385570526, "step": 662 }, { "epoch": 0.08963320349136875, "grad_norm": 1.2526521682739258, "learning_rate": 2.996487906340466e-05, "loss": 0.06138528883457184, "step": 663 }, { "epoch": 0.089768396860134, "grad_norm": 1.6418873071670532, "learning_rate": 2.996440940920592e-05, "loss": 0.0744391679763794, "step": 664 }, { "epoch": 0.08990359022889927, "grad_norm": 0.9773853421211243, "learning_rate": 2.996393663933735e-05, "loss": 0.07288479804992676, "step": 665 }, { "epoch": 0.09003878359766454, "grad_norm": 1.7122795581817627, "learning_rate": 2.9963460753897364e-05, "loss": 0.06747186183929443, "step": 666 }, { "epoch": 0.0901739769664298, "grad_norm": 1.5426825284957886, "learning_rate": 2.996298175298506e-05, "loss": 0.08441933989524841, "step": 667 }, { "epoch": 0.09030917033519506, "grad_norm": 0.7839665412902832, "learning_rate": 2.996249963670016e-05, "loss": 0.07313214242458344, "step": 668 }, { "epoch": 0.09044436370396032, "grad_norm": 0.9040767550468445, "learning_rate": 2.9962014405143042e-05, "loss": 0.08949136734008789, "step": 669 }, { "epoch": 0.09057955707272558, "grad_norm": 0.5139188766479492, "learning_rate": 2.9961526058414745e-05, "loss": 0.06375241279602051, "step": 670 }, { "epoch": 0.09071475044149084, "grad_norm": 2.4885027408599854, "learning_rate": 2.9961034596616936e-05, "loss": 0.06757906079292297, "step": 671 }, { "epoch": 0.0908499438102561, "grad_norm": 2.298752546310425, "learning_rate": 2.996054001985194e-05, "loss": 0.061887532472610474, "step": 672 }, { "epoch": 0.09098513717902137, "grad_norm": 1.8645328283309937, "learning_rate": 2.9960042328222732e-05, "loss": 0.0736398696899414, "step": 673 }, { "epoch": 0.09112033054778663, "grad_norm": 0.5860607624053955, "learning_rate": 2.995954152183294e-05, "loss": 0.06288467347621918, "step": 674 }, { "epoch": 0.0912555239165519, "grad_norm": 1.5632336139678955, "learning_rate": 2.9959037600786822e-05, "loss": 0.07192747294902802, "step": 675 }, { "epoch": 0.09139071728531716, "grad_norm": 2.513230323791504, "learning_rate": 2.9958530565189307e-05, "loss": 0.0633854866027832, "step": 676 }, { "epoch": 0.09152591065408242, "grad_norm": 1.0967401266098022, "learning_rate": 2.995802041514596e-05, "loss": 0.08458459377288818, "step": 677 }, { "epoch": 0.09166110402284768, "grad_norm": 0.4788946807384491, "learning_rate": 2.9957507150762996e-05, "loss": 0.061201997101306915, "step": 678 }, { "epoch": 0.09179629739161294, "grad_norm": 0.6017131805419922, "learning_rate": 2.9956990772147283e-05, "loss": 0.053073760122060776, "step": 679 }, { "epoch": 0.09193149076037821, "grad_norm": 1.4322870969772339, "learning_rate": 2.9956471279406324e-05, "loss": 0.049558788537979126, "step": 680 }, { "epoch": 0.09206668412914347, "grad_norm": 0.6398486495018005, "learning_rate": 2.9955948672648298e-05, "loss": 0.062316685914993286, "step": 681 }, { "epoch": 0.09220187749790873, "grad_norm": 0.848783016204834, "learning_rate": 2.9955422951981994e-05, "loss": 0.0711323618888855, "step": 682 }, { "epoch": 0.09233707086667399, "grad_norm": 1.5682917833328247, "learning_rate": 2.995489411751688e-05, "loss": 0.08610308170318604, "step": 683 }, { "epoch": 0.09247226423543925, "grad_norm": 0.8479408621788025, "learning_rate": 2.9954362169363064e-05, "loss": 0.06241118907928467, "step": 684 }, { "epoch": 0.09260745760420451, "grad_norm": 0.9955445528030396, "learning_rate": 2.99538271076313e-05, "loss": 0.05744418501853943, "step": 685 }, { "epoch": 0.09274265097296977, "grad_norm": 0.5702171921730042, "learning_rate": 2.9953288932432985e-05, "loss": 0.051501452922821045, "step": 686 }, { "epoch": 0.09287784434173504, "grad_norm": 1.3401936292648315, "learning_rate": 2.995274764388018e-05, "loss": 0.068433478474617, "step": 687 }, { "epoch": 0.0930130377105003, "grad_norm": 2.055107593536377, "learning_rate": 2.9952203242085566e-05, "loss": 0.1082228571176529, "step": 688 }, { "epoch": 0.09314823107926556, "grad_norm": 1.7486412525177002, "learning_rate": 2.995165572716251e-05, "loss": 0.07478171586990356, "step": 689 }, { "epoch": 0.09328342444803082, "grad_norm": 0.4199610948562622, "learning_rate": 2.9951105099225003e-05, "loss": 0.06392104923725128, "step": 690 }, { "epoch": 0.09341861781679608, "grad_norm": 2.0826423168182373, "learning_rate": 2.995055135838768e-05, "loss": 0.08926263451576233, "step": 691 }, { "epoch": 0.09355381118556134, "grad_norm": 0.5368213057518005, "learning_rate": 2.994999450476584e-05, "loss": 0.06264454126358032, "step": 692 }, { "epoch": 0.0936890045543266, "grad_norm": 0.5854647159576416, "learning_rate": 2.9949434538475414e-05, "loss": 0.07068228721618652, "step": 693 }, { "epoch": 0.09382419792309188, "grad_norm": 1.64569890499115, "learning_rate": 2.9948871459633008e-05, "loss": 0.08626413345336914, "step": 694 }, { "epoch": 0.09395939129185714, "grad_norm": 1.3701481819152832, "learning_rate": 2.994830526835584e-05, "loss": 0.06382119655609131, "step": 695 }, { "epoch": 0.0940945846606224, "grad_norm": 1.5206036567687988, "learning_rate": 2.9947735964761803e-05, "loss": 0.08059477806091309, "step": 696 }, { "epoch": 0.09422977802938766, "grad_norm": 1.9635858535766602, "learning_rate": 2.9947163548969428e-05, "loss": 0.08320224285125732, "step": 697 }, { "epoch": 0.09436497139815292, "grad_norm": 3.18457293510437, "learning_rate": 2.9946588021097893e-05, "loss": 0.07568666338920593, "step": 698 }, { "epoch": 0.09450016476691818, "grad_norm": 0.7685016989707947, "learning_rate": 2.9946009381267028e-05, "loss": 0.06975710391998291, "step": 699 }, { "epoch": 0.09463535813568344, "grad_norm": 1.169830083847046, "learning_rate": 2.9945427629597306e-05, "loss": 0.05975687503814697, "step": 700 }, { "epoch": 0.09477055150444871, "grad_norm": 2.346580743789673, "learning_rate": 2.9944842766209853e-05, "loss": 0.06218397617340088, "step": 701 }, { "epoch": 0.09490574487321397, "grad_norm": 2.587913751602173, "learning_rate": 2.9944254791226444e-05, "loss": 0.0864153504371643, "step": 702 }, { "epoch": 0.09504093824197923, "grad_norm": 0.5739260315895081, "learning_rate": 2.994366370476949e-05, "loss": 0.06660217046737671, "step": 703 }, { "epoch": 0.0951761316107445, "grad_norm": 2.2969987392425537, "learning_rate": 2.9943069506962067e-05, "loss": 0.07569676637649536, "step": 704 }, { "epoch": 0.09531132497950975, "grad_norm": 0.560544490814209, "learning_rate": 2.9942472197927886e-05, "loss": 0.055384933948516846, "step": 705 }, { "epoch": 0.09544651834827501, "grad_norm": 1.0866575241088867, "learning_rate": 2.994187177779131e-05, "loss": 0.061683833599090576, "step": 706 }, { "epoch": 0.09558171171704027, "grad_norm": 0.6969910264015198, "learning_rate": 2.9941268246677353e-05, "loss": 0.06661128997802734, "step": 707 }, { "epoch": 0.09571690508580555, "grad_norm": 2.0652143955230713, "learning_rate": 2.9940661604711664e-05, "loss": 0.07562792301177979, "step": 708 }, { "epoch": 0.09585209845457081, "grad_norm": 2.230104923248291, "learning_rate": 2.994005185202056e-05, "loss": 0.06700295209884644, "step": 709 }, { "epoch": 0.09598729182333607, "grad_norm": 1.7517598867416382, "learning_rate": 2.9939438988730986e-05, "loss": 0.05676276981830597, "step": 710 }, { "epoch": 0.09612248519210133, "grad_norm": 1.8654752969741821, "learning_rate": 2.9938823014970553e-05, "loss": 0.057524263858795166, "step": 711 }, { "epoch": 0.09625767856086659, "grad_norm": 2.1614320278167725, "learning_rate": 2.99382039308675e-05, "loss": 0.06328561902046204, "step": 712 }, { "epoch": 0.09639287192963185, "grad_norm": 1.671129822731018, "learning_rate": 2.993758173655073e-05, "loss": 0.07212221622467041, "step": 713 }, { "epoch": 0.09652806529839711, "grad_norm": 0.696434497833252, "learning_rate": 2.993695643214979e-05, "loss": 0.07994964718818665, "step": 714 }, { "epoch": 0.09666325866716237, "grad_norm": 1.1885391473770142, "learning_rate": 2.9936328017794864e-05, "loss": 0.07391977310180664, "step": 715 }, { "epoch": 0.09679845203592764, "grad_norm": 0.6399041414260864, "learning_rate": 2.9935696493616796e-05, "loss": 0.060434311628341675, "step": 716 }, { "epoch": 0.0969336454046929, "grad_norm": 2.2488210201263428, "learning_rate": 2.9935061859747065e-05, "loss": 0.0648459643125534, "step": 717 }, { "epoch": 0.09706883877345816, "grad_norm": 2.4420716762542725, "learning_rate": 2.993442411631782e-05, "loss": 0.09676936268806458, "step": 718 }, { "epoch": 0.09720403214222342, "grad_norm": 0.48561614751815796, "learning_rate": 2.9933783263461827e-05, "loss": 0.06036248430609703, "step": 719 }, { "epoch": 0.09733922551098868, "grad_norm": 0.5393803119659424, "learning_rate": 2.9933139301312526e-05, "loss": 0.04506488889455795, "step": 720 }, { "epoch": 0.09747441887975394, "grad_norm": 1.6485989093780518, "learning_rate": 2.9932492230003984e-05, "loss": 0.068889319896698, "step": 721 }, { "epoch": 0.0976096122485192, "grad_norm": 0.4607279300689697, "learning_rate": 2.993184204967094e-05, "loss": 0.04657787084579468, "step": 722 }, { "epoch": 0.09774480561728448, "grad_norm": 2.5502140522003174, "learning_rate": 2.9931188760448748e-05, "loss": 0.09501942247152328, "step": 723 }, { "epoch": 0.09787999898604974, "grad_norm": 1.5143232345581055, "learning_rate": 2.9930532362473433e-05, "loss": 0.07087540626525879, "step": 724 }, { "epoch": 0.098015192354815, "grad_norm": 0.9561923146247864, "learning_rate": 2.9929872855881663e-05, "loss": 0.07919055223464966, "step": 725 }, { "epoch": 0.09815038572358026, "grad_norm": 2.1918020248413086, "learning_rate": 2.9929210240810744e-05, "loss": 0.06227082014083862, "step": 726 }, { "epoch": 0.09828557909234552, "grad_norm": 0.3666427731513977, "learning_rate": 2.9928544517398644e-05, "loss": 0.053845345973968506, "step": 727 }, { "epoch": 0.09842077246111078, "grad_norm": 1.5570653676986694, "learning_rate": 2.9927875685783966e-05, "loss": 0.07275102287530899, "step": 728 }, { "epoch": 0.09855596582987604, "grad_norm": 1.464547038078308, "learning_rate": 2.9927203746105968e-05, "loss": 0.06417824327945709, "step": 729 }, { "epoch": 0.09869115919864131, "grad_norm": 0.7170355319976807, "learning_rate": 2.9926528698504546e-05, "loss": 0.08210603147745132, "step": 730 }, { "epoch": 0.09882635256740657, "grad_norm": 0.7994945049285889, "learning_rate": 2.992585054312025e-05, "loss": 0.06015549600124359, "step": 731 }, { "epoch": 0.09896154593617183, "grad_norm": 1.777430772781372, "learning_rate": 2.9925169280094278e-05, "loss": 0.08092288672924042, "step": 732 }, { "epoch": 0.09909673930493709, "grad_norm": 2.4413414001464844, "learning_rate": 2.9924484909568472e-05, "loss": 0.0873079001903534, "step": 733 }, { "epoch": 0.09923193267370235, "grad_norm": 0.8687604665756226, "learning_rate": 2.9923797431685322e-05, "loss": 0.0524190291762352, "step": 734 }, { "epoch": 0.09936712604246761, "grad_norm": 0.5589920878410339, "learning_rate": 2.992310684658796e-05, "loss": 0.053833842277526855, "step": 735 }, { "epoch": 0.09950231941123287, "grad_norm": 1.160480260848999, "learning_rate": 2.9922413154420173e-05, "loss": 0.07847365736961365, "step": 736 }, { "epoch": 0.09963751277999815, "grad_norm": 0.40571966767311096, "learning_rate": 2.9921716355326393e-05, "loss": 0.0431639701128006, "step": 737 }, { "epoch": 0.09977270614876341, "grad_norm": 0.6975798606872559, "learning_rate": 2.9921016449451695e-05, "loss": 0.06810298562049866, "step": 738 }, { "epoch": 0.09990789951752867, "grad_norm": 0.7199623584747314, "learning_rate": 2.9920313436941805e-05, "loss": 0.05668321251869202, "step": 739 }, { "epoch": 0.10004309288629393, "grad_norm": 1.9567862749099731, "learning_rate": 2.991960731794309e-05, "loss": 0.07285964488983154, "step": 740 }, { "epoch": 0.10017828625505919, "grad_norm": 0.6199095845222473, "learning_rate": 2.991889809260257e-05, "loss": 0.08010756969451904, "step": 741 }, { "epoch": 0.10031347962382445, "grad_norm": 2.2614054679870605, "learning_rate": 2.9918185761067912e-05, "loss": 0.06480199098587036, "step": 742 }, { "epoch": 0.10044867299258971, "grad_norm": 2.2115559577941895, "learning_rate": 2.9917470323487423e-05, "loss": 0.0716850757598877, "step": 743 }, { "epoch": 0.10058386636135498, "grad_norm": 1.1710504293441772, "learning_rate": 2.9916751780010063e-05, "loss": 0.06521201133728027, "step": 744 }, { "epoch": 0.10071905973012024, "grad_norm": 1.2195080518722534, "learning_rate": 2.9916030130785436e-05, "loss": 0.08190056681632996, "step": 745 }, { "epoch": 0.1008542530988855, "grad_norm": 2.0479962825775146, "learning_rate": 2.99153053759638e-05, "loss": 0.09446015954017639, "step": 746 }, { "epoch": 0.10098944646765076, "grad_norm": 1.6798561811447144, "learning_rate": 2.991457751569604e-05, "loss": 0.06819860637187958, "step": 747 }, { "epoch": 0.10112463983641602, "grad_norm": 2.639376401901245, "learning_rate": 2.991384655013371e-05, "loss": 0.0689087063074112, "step": 748 }, { "epoch": 0.10125983320518128, "grad_norm": 0.7151151895523071, "learning_rate": 2.9913112479429e-05, "loss": 0.05495786666870117, "step": 749 }, { "epoch": 0.10139502657394654, "grad_norm": 0.5547966361045837, "learning_rate": 2.991237530373474e-05, "loss": 0.05543568730354309, "step": 750 }, { "epoch": 0.10153021994271182, "grad_norm": 0.76650470495224, "learning_rate": 2.9911635023204423e-05, "loss": 0.0550074577331543, "step": 751 }, { "epoch": 0.10166541331147708, "grad_norm": 1.9718046188354492, "learning_rate": 2.9910891637992172e-05, "loss": 0.07963669300079346, "step": 752 }, { "epoch": 0.10180060668024234, "grad_norm": 1.059044361114502, "learning_rate": 2.991014514825277e-05, "loss": 0.06547009944915771, "step": 753 }, { "epoch": 0.1019358000490076, "grad_norm": 0.5173660516738892, "learning_rate": 2.9909395554141638e-05, "loss": 0.06538379192352295, "step": 754 }, { "epoch": 0.10207099341777286, "grad_norm": 0.7970746159553528, "learning_rate": 2.9908642855814844e-05, "loss": 0.0796213150024414, "step": 755 }, { "epoch": 0.10220618678653812, "grad_norm": 0.509709358215332, "learning_rate": 2.9907887053429107e-05, "loss": 0.07489272952079773, "step": 756 }, { "epoch": 0.10234138015530338, "grad_norm": 0.43984255194664, "learning_rate": 2.9907128147141783e-05, "loss": 0.052528321743011475, "step": 757 }, { "epoch": 0.10247657352406865, "grad_norm": 2.5199241638183594, "learning_rate": 2.990636613711089e-05, "loss": 0.07140716910362244, "step": 758 }, { "epoch": 0.10261176689283391, "grad_norm": 0.7608581185340881, "learning_rate": 2.990560102349507e-05, "loss": 0.08876919746398926, "step": 759 }, { "epoch": 0.10274696026159917, "grad_norm": 0.3698713779449463, "learning_rate": 2.9904832806453635e-05, "loss": 0.0644555538892746, "step": 760 }, { "epoch": 0.10288215363036443, "grad_norm": 0.6989520788192749, "learning_rate": 2.9904061486146524e-05, "loss": 0.04861116409301758, "step": 761 }, { "epoch": 0.10301734699912969, "grad_norm": 0.44074511528015137, "learning_rate": 2.9903287062734333e-05, "loss": 0.054500073194503784, "step": 762 }, { "epoch": 0.10315254036789495, "grad_norm": 0.6014143824577332, "learning_rate": 2.990250953637831e-05, "loss": 0.07462161779403687, "step": 763 }, { "epoch": 0.10328773373666021, "grad_norm": 1.3084285259246826, "learning_rate": 2.9901728907240326e-05, "loss": 0.05892127752304077, "step": 764 }, { "epoch": 0.10342292710542549, "grad_norm": 1.170800805091858, "learning_rate": 2.9900945175482916e-05, "loss": 0.07677042484283447, "step": 765 }, { "epoch": 0.10355812047419075, "grad_norm": 0.8300243616104126, "learning_rate": 2.990015834126926e-05, "loss": 0.053655147552490234, "step": 766 }, { "epoch": 0.103693313842956, "grad_norm": 1.9866926670074463, "learning_rate": 2.989936840476318e-05, "loss": 0.08869680762290955, "step": 767 }, { "epoch": 0.10382850721172127, "grad_norm": 1.2555097341537476, "learning_rate": 2.9898575366129145e-05, "loss": 0.08476987481117249, "step": 768 }, { "epoch": 0.10396370058048653, "grad_norm": 1.3849974870681763, "learning_rate": 2.9897779225532273e-05, "loss": 0.07022476196289062, "step": 769 }, { "epoch": 0.10409889394925179, "grad_norm": 1.173958659172058, "learning_rate": 2.989697998313832e-05, "loss": 0.0702330470085144, "step": 770 }, { "epoch": 0.10423408731801705, "grad_norm": 1.936564326286316, "learning_rate": 2.989617763911369e-05, "loss": 0.07958516478538513, "step": 771 }, { "epoch": 0.1043692806867823, "grad_norm": 1.0194870233535767, "learning_rate": 2.9895372193625442e-05, "loss": 0.045984745025634766, "step": 772 }, { "epoch": 0.10450447405554758, "grad_norm": 1.7459651231765747, "learning_rate": 2.9894563646841273e-05, "loss": 0.09469527006149292, "step": 773 }, { "epoch": 0.10463966742431284, "grad_norm": 0.6957114338874817, "learning_rate": 2.9893751998929523e-05, "loss": 0.08088427782058716, "step": 774 }, { "epoch": 0.1047748607930781, "grad_norm": 0.6176539063453674, "learning_rate": 2.9892937250059187e-05, "loss": 0.06235021352767944, "step": 775 }, { "epoch": 0.10491005416184336, "grad_norm": 1.5887959003448486, "learning_rate": 2.9892119400399894e-05, "loss": 0.06383442878723145, "step": 776 }, { "epoch": 0.10504524753060862, "grad_norm": 1.5024386644363403, "learning_rate": 2.989129845012193e-05, "loss": 0.06038999557495117, "step": 777 }, { "epoch": 0.10518044089937388, "grad_norm": 2.5025908946990967, "learning_rate": 2.989047439939621e-05, "loss": 0.08759409189224243, "step": 778 }, { "epoch": 0.10531563426813914, "grad_norm": 0.47842252254486084, "learning_rate": 2.9889647248394324e-05, "loss": 0.07418322563171387, "step": 779 }, { "epoch": 0.10545082763690442, "grad_norm": 1.1762672662734985, "learning_rate": 2.9888816997288475e-05, "loss": 0.06556662172079086, "step": 780 }, { "epoch": 0.10558602100566968, "grad_norm": 1.706656813621521, "learning_rate": 2.988798364625153e-05, "loss": 0.07296490669250488, "step": 781 }, { "epoch": 0.10572121437443494, "grad_norm": 0.6584717035293579, "learning_rate": 2.9887147195457002e-05, "loss": 0.059939682483673096, "step": 782 }, { "epoch": 0.1058564077432002, "grad_norm": 1.2951629161834717, "learning_rate": 2.9886307645079037e-05, "loss": 0.07447600364685059, "step": 783 }, { "epoch": 0.10599160111196546, "grad_norm": 0.5977193713188171, "learning_rate": 2.9885464995292436e-05, "loss": 0.054437726736068726, "step": 784 }, { "epoch": 0.10612679448073072, "grad_norm": 1.9214591979980469, "learning_rate": 2.9884619246272648e-05, "loss": 0.07319247722625732, "step": 785 }, { "epoch": 0.10626198784949598, "grad_norm": 0.9901397824287415, "learning_rate": 2.988377039819575e-05, "loss": 0.08041355013847351, "step": 786 }, { "epoch": 0.10639718121826125, "grad_norm": 1.2070285081863403, "learning_rate": 2.9882918451238494e-05, "loss": 0.06483381986618042, "step": 787 }, { "epoch": 0.10653237458702651, "grad_norm": 1.4205820560455322, "learning_rate": 2.988206340557825e-05, "loss": 0.06248599290847778, "step": 788 }, { "epoch": 0.10666756795579177, "grad_norm": 1.5785423517227173, "learning_rate": 2.9881205261393037e-05, "loss": 0.06281136721372604, "step": 789 }, { "epoch": 0.10680276132455703, "grad_norm": 0.6278877258300781, "learning_rate": 2.988034401886154e-05, "loss": 0.07736283540725708, "step": 790 }, { "epoch": 0.10693795469332229, "grad_norm": 3.044987201690674, "learning_rate": 2.9879479678163065e-05, "loss": 0.07018107175827026, "step": 791 }, { "epoch": 0.10707314806208755, "grad_norm": 0.5835976004600525, "learning_rate": 2.9878612239477568e-05, "loss": 0.06766742467880249, "step": 792 }, { "epoch": 0.10720834143085281, "grad_norm": 0.8784189224243164, "learning_rate": 2.9877741702985666e-05, "loss": 0.08805882930755615, "step": 793 }, { "epoch": 0.10734353479961808, "grad_norm": 0.8258385062217712, "learning_rate": 2.98768680688686e-05, "loss": 0.05716429650783539, "step": 794 }, { "epoch": 0.10747872816838334, "grad_norm": 0.5803067088127136, "learning_rate": 2.9875991337308274e-05, "loss": 0.0653550773859024, "step": 795 }, { "epoch": 0.1076139215371486, "grad_norm": 0.9804613590240479, "learning_rate": 2.987511150848722e-05, "loss": 0.07762131094932556, "step": 796 }, { "epoch": 0.10774911490591386, "grad_norm": 1.0947750806808472, "learning_rate": 2.9874228582588627e-05, "loss": 0.07066251337528229, "step": 797 }, { "epoch": 0.10788430827467912, "grad_norm": 0.5301551818847656, "learning_rate": 2.9873342559796325e-05, "loss": 0.06156620383262634, "step": 798 }, { "epoch": 0.10801950164344438, "grad_norm": 0.9586540460586548, "learning_rate": 2.9872453440294787e-05, "loss": 0.07479715347290039, "step": 799 }, { "epoch": 0.10815469501220965, "grad_norm": 0.683612585067749, "learning_rate": 2.9871561224269134e-05, "loss": 0.05333220958709717, "step": 800 }, { "epoch": 0.10828988838097492, "grad_norm": 0.7130185961723328, "learning_rate": 2.9870665911905127e-05, "loss": 0.05451911687850952, "step": 801 }, { "epoch": 0.10842508174974018, "grad_norm": 0.5970738530158997, "learning_rate": 2.9869767503389176e-05, "loss": 0.0738338828086853, "step": 802 }, { "epoch": 0.10856027511850544, "grad_norm": 2.720468282699585, "learning_rate": 2.986886599890834e-05, "loss": 0.08367794752120972, "step": 803 }, { "epoch": 0.1086954684872707, "grad_norm": 0.5943778157234192, "learning_rate": 2.9867961398650306e-05, "loss": 0.06560193002223969, "step": 804 }, { "epoch": 0.10883066185603596, "grad_norm": 0.3316863775253296, "learning_rate": 2.9867053702803425e-05, "loss": 0.06915531307458878, "step": 805 }, { "epoch": 0.10896585522480122, "grad_norm": 1.3966037034988403, "learning_rate": 2.9866142911556685e-05, "loss": 0.055244386196136475, "step": 806 }, { "epoch": 0.10910104859356648, "grad_norm": 0.6676267981529236, "learning_rate": 2.9865229025099713e-05, "loss": 0.05885940045118332, "step": 807 }, { "epoch": 0.10923624196233175, "grad_norm": 1.6562120914459229, "learning_rate": 2.986431204362279e-05, "loss": 0.07612279802560806, "step": 808 }, { "epoch": 0.10937143533109701, "grad_norm": 1.0587570667266846, "learning_rate": 2.9863391967316835e-05, "loss": 0.07631005346775055, "step": 809 }, { "epoch": 0.10950662869986227, "grad_norm": 1.4671709537506104, "learning_rate": 2.9862468796373404e-05, "loss": 0.06622779369354248, "step": 810 }, { "epoch": 0.10964182206862753, "grad_norm": 1.1340380907058716, "learning_rate": 2.9861542530984718e-05, "loss": 0.07871812582015991, "step": 811 }, { "epoch": 0.1097770154373928, "grad_norm": 3.6486597061157227, "learning_rate": 2.9860613171343624e-05, "loss": 0.0973467230796814, "step": 812 }, { "epoch": 0.10991220880615805, "grad_norm": 3.2831902503967285, "learning_rate": 2.9859680717643623e-05, "loss": 0.07520408928394318, "step": 813 }, { "epoch": 0.11004740217492331, "grad_norm": 1.5942511558532715, "learning_rate": 2.985874517007885e-05, "loss": 0.05213823914527893, "step": 814 }, { "epoch": 0.11018259554368859, "grad_norm": 2.048729181289673, "learning_rate": 2.98578065288441e-05, "loss": 0.052881523966789246, "step": 815 }, { "epoch": 0.11031778891245385, "grad_norm": 0.43020763993263245, "learning_rate": 2.9856864794134798e-05, "loss": 0.04438048601150513, "step": 816 }, { "epoch": 0.11045298228121911, "grad_norm": 2.1527047157287598, "learning_rate": 2.9855919966147025e-05, "loss": 0.06952786445617676, "step": 817 }, { "epoch": 0.11058817564998437, "grad_norm": 1.5365175008773804, "learning_rate": 2.9854972045077485e-05, "loss": 0.05727046728134155, "step": 818 }, { "epoch": 0.11072336901874963, "grad_norm": 0.9647189974784851, "learning_rate": 2.9854021031123555e-05, "loss": 0.05073575675487518, "step": 819 }, { "epoch": 0.11085856238751489, "grad_norm": 1.5040180683135986, "learning_rate": 2.9853066924483232e-05, "loss": 0.0715726763010025, "step": 820 }, { "epoch": 0.11099375575628015, "grad_norm": 1.0900242328643799, "learning_rate": 2.9852109725355173e-05, "loss": 0.06380832195281982, "step": 821 }, { "epoch": 0.11112894912504542, "grad_norm": 1.058659315109253, "learning_rate": 2.9851149433938662e-05, "loss": 0.07913881540298462, "step": 822 }, { "epoch": 0.11126414249381068, "grad_norm": 1.3789092302322388, "learning_rate": 2.9850186050433645e-05, "loss": 0.05524668097496033, "step": 823 }, { "epoch": 0.11139933586257594, "grad_norm": 4.019479751586914, "learning_rate": 2.9849219575040708e-05, "loss": 0.07588204741477966, "step": 824 }, { "epoch": 0.1115345292313412, "grad_norm": 2.0158936977386475, "learning_rate": 2.984825000796106e-05, "loss": 0.07111486792564392, "step": 825 }, { "epoch": 0.11166972260010646, "grad_norm": 1.8207803964614868, "learning_rate": 2.9847277349396586e-05, "loss": 0.07971811294555664, "step": 826 }, { "epoch": 0.11180491596887172, "grad_norm": 0.3318125307559967, "learning_rate": 2.984630159954979e-05, "loss": 0.06541299819946289, "step": 827 }, { "epoch": 0.11194010933763698, "grad_norm": 0.5564690232276917, "learning_rate": 2.9845322758623833e-05, "loss": 0.06687232851982117, "step": 828 }, { "epoch": 0.11207530270640226, "grad_norm": 4.113993167877197, "learning_rate": 2.984434082682251e-05, "loss": 0.09146535396575928, "step": 829 }, { "epoch": 0.11221049607516752, "grad_norm": 1.7399711608886719, "learning_rate": 2.984335580435027e-05, "loss": 0.06328044831752777, "step": 830 }, { "epoch": 0.11234568944393278, "grad_norm": 0.9513656497001648, "learning_rate": 2.9842367691412192e-05, "loss": 0.07036569714546204, "step": 831 }, { "epoch": 0.11248088281269804, "grad_norm": 0.7836118340492249, "learning_rate": 2.9841376488214015e-05, "loss": 0.06594693660736084, "step": 832 }, { "epoch": 0.1126160761814633, "grad_norm": 1.620522141456604, "learning_rate": 2.984038219496211e-05, "loss": 0.049950480461120605, "step": 833 }, { "epoch": 0.11275126955022856, "grad_norm": 0.8438585996627808, "learning_rate": 2.9839384811863493e-05, "loss": 0.05754745006561279, "step": 834 }, { "epoch": 0.11288646291899382, "grad_norm": 1.3088704347610474, "learning_rate": 2.9838384339125824e-05, "loss": 0.04670900106430054, "step": 835 }, { "epoch": 0.11302165628775908, "grad_norm": 2.862694025039673, "learning_rate": 2.9837380776957405e-05, "loss": 0.07493305206298828, "step": 836 }, { "epoch": 0.11315684965652435, "grad_norm": 0.6323216557502747, "learning_rate": 2.9836374125567193e-05, "loss": 0.0698775202035904, "step": 837 }, { "epoch": 0.11329204302528961, "grad_norm": 0.9724240899085999, "learning_rate": 2.9835364385164764e-05, "loss": 0.07646346092224121, "step": 838 }, { "epoch": 0.11342723639405487, "grad_norm": 0.9290037155151367, "learning_rate": 2.983435155596036e-05, "loss": 0.06411190330982208, "step": 839 }, { "epoch": 0.11356242976282013, "grad_norm": 1.4180476665496826, "learning_rate": 2.9833335638164858e-05, "loss": 0.06833842396736145, "step": 840 }, { "epoch": 0.11369762313158539, "grad_norm": 1.3837205171585083, "learning_rate": 2.9832316631989774e-05, "loss": 0.06893129646778107, "step": 841 }, { "epoch": 0.11383281650035065, "grad_norm": 0.9323716163635254, "learning_rate": 2.9831294537647272e-05, "loss": 0.07906448096036911, "step": 842 }, { "epoch": 0.11396800986911591, "grad_norm": 0.43834733963012695, "learning_rate": 2.9830269355350155e-05, "loss": 0.06279188394546509, "step": 843 }, { "epoch": 0.11410320323788119, "grad_norm": 1.4275168180465698, "learning_rate": 2.9829241085311872e-05, "loss": 0.04370114952325821, "step": 844 }, { "epoch": 0.11423839660664645, "grad_norm": 0.7436087727546692, "learning_rate": 2.9828209727746522e-05, "loss": 0.07141464948654175, "step": 845 }, { "epoch": 0.11437358997541171, "grad_norm": 1.1526052951812744, "learning_rate": 2.982717528286883e-05, "loss": 0.06148737668991089, "step": 846 }, { "epoch": 0.11450878334417697, "grad_norm": 0.6929847002029419, "learning_rate": 2.9826137750894176e-05, "loss": 0.047165051102638245, "step": 847 }, { "epoch": 0.11464397671294223, "grad_norm": 1.6904442310333252, "learning_rate": 2.9825097132038578e-05, "loss": 0.07676011323928833, "step": 848 }, { "epoch": 0.11477917008170749, "grad_norm": 1.3478825092315674, "learning_rate": 2.9824053426518703e-05, "loss": 0.07952776551246643, "step": 849 }, { "epoch": 0.11491436345047275, "grad_norm": 0.4725055992603302, "learning_rate": 2.9823006634551848e-05, "loss": 0.06164819002151489, "step": 850 }, { "epoch": 0.11504955681923802, "grad_norm": 0.39076611399650574, "learning_rate": 2.9821956756355973e-05, "loss": 0.05239543318748474, "step": 851 }, { "epoch": 0.11518475018800328, "grad_norm": 0.5654911994934082, "learning_rate": 2.9820903792149653e-05, "loss": 0.0677807629108429, "step": 852 }, { "epoch": 0.11531994355676854, "grad_norm": 0.42723843455314636, "learning_rate": 2.981984774215214e-05, "loss": 0.04578013718128204, "step": 853 }, { "epoch": 0.1154551369255338, "grad_norm": 1.8594659566879272, "learning_rate": 2.9818788606583286e-05, "loss": 0.08013622462749481, "step": 854 }, { "epoch": 0.11559033029429906, "grad_norm": 1.2543073892593384, "learning_rate": 2.9817726385663627e-05, "loss": 0.05468093603849411, "step": 855 }, { "epoch": 0.11572552366306432, "grad_norm": 0.7090474963188171, "learning_rate": 2.9816661079614316e-05, "loss": 0.05808126926422119, "step": 856 }, { "epoch": 0.11586071703182958, "grad_norm": 0.5124869346618652, "learning_rate": 2.9815592688657154e-05, "loss": 0.06463829427957535, "step": 857 }, { "epoch": 0.11599591040059486, "grad_norm": 0.6906826496124268, "learning_rate": 2.9814521213014588e-05, "loss": 0.07837387919425964, "step": 858 }, { "epoch": 0.11613110376936012, "grad_norm": 0.575003981590271, "learning_rate": 2.9813446652909707e-05, "loss": 0.04965294897556305, "step": 859 }, { "epoch": 0.11626629713812538, "grad_norm": 1.7726686000823975, "learning_rate": 2.981236900856624e-05, "loss": 0.06196914613246918, "step": 860 }, { "epoch": 0.11640149050689064, "grad_norm": 4.158353328704834, "learning_rate": 2.9811288280208552e-05, "loss": 0.1057438999414444, "step": 861 }, { "epoch": 0.1165366838756559, "grad_norm": 2.241472005844116, "learning_rate": 2.9810204468061664e-05, "loss": 0.09526422619819641, "step": 862 }, { "epoch": 0.11667187724442116, "grad_norm": 1.0776751041412354, "learning_rate": 2.9809117572351223e-05, "loss": 0.09615468978881836, "step": 863 }, { "epoch": 0.11680707061318642, "grad_norm": 1.199429988861084, "learning_rate": 2.9808027593303537e-05, "loss": 0.07904808223247528, "step": 864 }, { "epoch": 0.11694226398195169, "grad_norm": 1.8700757026672363, "learning_rate": 2.980693453114554e-05, "loss": 0.07645630836486816, "step": 865 }, { "epoch": 0.11707745735071695, "grad_norm": 0.6234368085861206, "learning_rate": 2.980583838610481e-05, "loss": 0.058155059814453125, "step": 866 }, { "epoch": 0.11721265071948221, "grad_norm": 1.198128342628479, "learning_rate": 2.980473915840957e-05, "loss": 0.0551808625459671, "step": 867 }, { "epoch": 0.11734784408824747, "grad_norm": 0.831923246383667, "learning_rate": 2.9803636848288696e-05, "loss": 0.06490929424762726, "step": 868 }, { "epoch": 0.11748303745701273, "grad_norm": 2.5640063285827637, "learning_rate": 2.9802531455971686e-05, "loss": 0.06057201325893402, "step": 869 }, { "epoch": 0.11761823082577799, "grad_norm": 0.9984391927719116, "learning_rate": 2.980142298168869e-05, "loss": 0.04081990197300911, "step": 870 }, { "epoch": 0.11775342419454325, "grad_norm": 1.7455575466156006, "learning_rate": 2.9800311425670495e-05, "loss": 0.05051445960998535, "step": 871 }, { "epoch": 0.11788861756330853, "grad_norm": 1.3665772676467896, "learning_rate": 2.9799196788148538e-05, "loss": 0.06193774938583374, "step": 872 }, { "epoch": 0.11802381093207379, "grad_norm": 1.3060983419418335, "learning_rate": 2.9798079069354893e-05, "loss": 0.04442111402750015, "step": 873 }, { "epoch": 0.11815900430083905, "grad_norm": 1.7216105461120605, "learning_rate": 2.9796958269522273e-05, "loss": 0.06150418519973755, "step": 874 }, { "epoch": 0.1182941976696043, "grad_norm": 1.261020302772522, "learning_rate": 2.9795834388884034e-05, "loss": 0.06411415338516235, "step": 875 }, { "epoch": 0.11842939103836957, "grad_norm": 1.3385952711105347, "learning_rate": 2.979470742767417e-05, "loss": 0.040393806993961334, "step": 876 }, { "epoch": 0.11856458440713483, "grad_norm": 1.189489483833313, "learning_rate": 2.9793577386127327e-05, "loss": 0.059862058609724045, "step": 877 }, { "epoch": 0.11869977777590009, "grad_norm": 0.8483715057373047, "learning_rate": 2.9792444264478784e-05, "loss": 0.09011414647102356, "step": 878 }, { "epoch": 0.11883497114466536, "grad_norm": 0.8031742572784424, "learning_rate": 2.979130806296446e-05, "loss": 0.0652703046798706, "step": 879 }, { "epoch": 0.11897016451343062, "grad_norm": 1.6406009197235107, "learning_rate": 2.9790168781820925e-05, "loss": 0.05732802674174309, "step": 880 }, { "epoch": 0.11910535788219588, "grad_norm": 2.006895065307617, "learning_rate": 2.9789026421285375e-05, "loss": 0.06943021714687347, "step": 881 }, { "epoch": 0.11924055125096114, "grad_norm": 0.8554799556732178, "learning_rate": 2.9787880981595663e-05, "loss": 0.06801632046699524, "step": 882 }, { "epoch": 0.1193757446197264, "grad_norm": 0.3612128794193268, "learning_rate": 2.9786732462990267e-05, "loss": 0.05761076509952545, "step": 883 }, { "epoch": 0.11951093798849166, "grad_norm": 0.5959079265594482, "learning_rate": 2.9785580865708323e-05, "loss": 0.05626345053315163, "step": 884 }, { "epoch": 0.11964613135725692, "grad_norm": 1.26160728931427, "learning_rate": 2.97844261899896e-05, "loss": 0.06064796447753906, "step": 885 }, { "epoch": 0.1197813247260222, "grad_norm": 0.37730908393859863, "learning_rate": 2.9783268436074495e-05, "loss": 0.03973902761936188, "step": 886 }, { "epoch": 0.11991651809478746, "grad_norm": 1.4231590032577515, "learning_rate": 2.978210760420407e-05, "loss": 0.05634953826665878, "step": 887 }, { "epoch": 0.12005171146355272, "grad_norm": 1.3626042604446411, "learning_rate": 2.978094369462002e-05, "loss": 0.06598908454179764, "step": 888 }, { "epoch": 0.12018690483231798, "grad_norm": 1.5572553873062134, "learning_rate": 2.977977670756467e-05, "loss": 0.05168672651052475, "step": 889 }, { "epoch": 0.12032209820108324, "grad_norm": 0.9045180082321167, "learning_rate": 2.9778606643280987e-05, "loss": 0.05312810838222504, "step": 890 }, { "epoch": 0.1204572915698485, "grad_norm": 0.4083821475505829, "learning_rate": 2.97774335020126e-05, "loss": 0.06432473659515381, "step": 891 }, { "epoch": 0.12059248493861376, "grad_norm": 1.6102540493011475, "learning_rate": 2.9776257284003748e-05, "loss": 0.06559291481971741, "step": 892 }, { "epoch": 0.12072767830737903, "grad_norm": 0.7310768365859985, "learning_rate": 2.9775077989499338e-05, "loss": 0.0623704195022583, "step": 893 }, { "epoch": 0.12086287167614429, "grad_norm": 1.1899971961975098, "learning_rate": 2.97738956187449e-05, "loss": 0.06307214498519897, "step": 894 }, { "epoch": 0.12099806504490955, "grad_norm": 0.5913503766059875, "learning_rate": 2.9772710171986605e-05, "loss": 0.05548405647277832, "step": 895 }, { "epoch": 0.12113325841367481, "grad_norm": 0.3939407467842102, "learning_rate": 2.977152164947128e-05, "loss": 0.05418830364942551, "step": 896 }, { "epoch": 0.12126845178244007, "grad_norm": 0.5984494686126709, "learning_rate": 2.9770330051446373e-05, "loss": 0.06009635329246521, "step": 897 }, { "epoch": 0.12140364515120533, "grad_norm": 0.4753250777721405, "learning_rate": 2.976913537815999e-05, "loss": 0.05103091523051262, "step": 898 }, { "epoch": 0.12153883851997059, "grad_norm": 1.1283420324325562, "learning_rate": 2.9767937629860853e-05, "loss": 0.061839379370212555, "step": 899 }, { "epoch": 0.12167403188873585, "grad_norm": 0.39484310150146484, "learning_rate": 2.9766736806798353e-05, "loss": 0.03699915111064911, "step": 900 }, { "epoch": 0.12180922525750112, "grad_norm": 1.2396390438079834, "learning_rate": 2.9765532909222512e-05, "loss": 0.09471902251243591, "step": 901 }, { "epoch": 0.12194441862626638, "grad_norm": 0.6397807002067566, "learning_rate": 2.976432593738397e-05, "loss": 0.06966215372085571, "step": 902 }, { "epoch": 0.12207961199503164, "grad_norm": 2.4892289638519287, "learning_rate": 2.9763115891534036e-05, "loss": 0.06544773280620575, "step": 903 }, { "epoch": 0.1222148053637969, "grad_norm": 1.672637939453125, "learning_rate": 2.9761902771924648e-05, "loss": 0.06459483504295349, "step": 904 }, { "epoch": 0.12234999873256217, "grad_norm": 1.1361255645751953, "learning_rate": 2.9760686578808387e-05, "loss": 0.098862424492836, "step": 905 }, { "epoch": 0.12248519210132743, "grad_norm": 0.5333594083786011, "learning_rate": 2.9759467312438462e-05, "loss": 0.055858880281448364, "step": 906 }, { "epoch": 0.12262038547009269, "grad_norm": 4.857579708099365, "learning_rate": 2.975824497306874e-05, "loss": 0.08147549629211426, "step": 907 }, { "epoch": 0.12275557883885796, "grad_norm": 2.879769802093506, "learning_rate": 2.9757019560953707e-05, "loss": 0.06890100240707397, "step": 908 }, { "epoch": 0.12289077220762322, "grad_norm": 1.9118424654006958, "learning_rate": 2.9755791076348517e-05, "loss": 0.07549156248569489, "step": 909 }, { "epoch": 0.12302596557638848, "grad_norm": 1.9207574129104614, "learning_rate": 2.9754559519508924e-05, "loss": 0.05638545751571655, "step": 910 }, { "epoch": 0.12316115894515374, "grad_norm": 1.3111354112625122, "learning_rate": 2.975332489069137e-05, "loss": 0.06364589929580688, "step": 911 }, { "epoch": 0.123296352313919, "grad_norm": 1.1506239175796509, "learning_rate": 2.9752087190152893e-05, "loss": 0.05111977458000183, "step": 912 }, { "epoch": 0.12343154568268426, "grad_norm": 2.007946491241455, "learning_rate": 2.97508464181512e-05, "loss": 0.06762352585792542, "step": 913 }, { "epoch": 0.12356673905144952, "grad_norm": 3.3811326026916504, "learning_rate": 2.9749602574944615e-05, "loss": 0.06516307592391968, "step": 914 }, { "epoch": 0.1237019324202148, "grad_norm": 3.5728046894073486, "learning_rate": 2.9748355660792125e-05, "loss": 0.07153135538101196, "step": 915 }, { "epoch": 0.12383712578898005, "grad_norm": 5.441051006317139, "learning_rate": 2.9747105675953338e-05, "loss": 0.10422030091285706, "step": 916 }, { "epoch": 0.12397231915774531, "grad_norm": 1.6395776271820068, "learning_rate": 2.9745852620688506e-05, "loss": 0.05035269260406494, "step": 917 }, { "epoch": 0.12410751252651057, "grad_norm": 2.6748416423797607, "learning_rate": 2.974459649525853e-05, "loss": 0.0686805248260498, "step": 918 }, { "epoch": 0.12424270589527583, "grad_norm": 0.5252537727355957, "learning_rate": 2.9743337299924925e-05, "loss": 0.07046585530042648, "step": 919 }, { "epoch": 0.1243778992640411, "grad_norm": 0.5221901535987854, "learning_rate": 2.9742075034949883e-05, "loss": 0.05802202224731445, "step": 920 }, { "epoch": 0.12451309263280635, "grad_norm": 1.5236924886703491, "learning_rate": 2.97408097005962e-05, "loss": 0.049811482429504395, "step": 921 }, { "epoch": 0.12464828600157163, "grad_norm": 0.9998456835746765, "learning_rate": 2.973954129712733e-05, "loss": 0.0606459379196167, "step": 922 }, { "epoch": 0.12478347937033689, "grad_norm": 1.7388442754745483, "learning_rate": 2.973826982480736e-05, "loss": 0.04614129662513733, "step": 923 }, { "epoch": 0.12491867273910215, "grad_norm": 0.809036374092102, "learning_rate": 2.9736995283901022e-05, "loss": 0.07797861099243164, "step": 924 }, { "epoch": 0.12505386610786742, "grad_norm": 0.873992919921875, "learning_rate": 2.9735717674673676e-05, "loss": 0.048703208565711975, "step": 925 }, { "epoch": 0.12518905947663267, "grad_norm": 0.6025969386100769, "learning_rate": 2.973443699739133e-05, "loss": 0.05472603440284729, "step": 926 }, { "epoch": 0.12532425284539794, "grad_norm": 1.1863207817077637, "learning_rate": 2.973315325232063e-05, "loss": 0.06854510307312012, "step": 927 }, { "epoch": 0.1254594462141632, "grad_norm": 0.7763834595680237, "learning_rate": 2.9731866439728853e-05, "loss": 0.07515951991081238, "step": 928 }, { "epoch": 0.12559463958292846, "grad_norm": 1.0926889181137085, "learning_rate": 2.9730576559883924e-05, "loss": 0.0751802921295166, "step": 929 }, { "epoch": 0.1257298329516937, "grad_norm": 0.715045154094696, "learning_rate": 2.97292836130544e-05, "loss": 0.06445688009262085, "step": 930 }, { "epoch": 0.12586502632045898, "grad_norm": 0.6126769185066223, "learning_rate": 2.9727987599509485e-05, "loss": 0.07244141399860382, "step": 931 }, { "epoch": 0.12600021968922423, "grad_norm": 2.9130282402038574, "learning_rate": 2.972668851951901e-05, "loss": 0.08251702785491943, "step": 932 }, { "epoch": 0.1261354130579895, "grad_norm": 3.5020670890808105, "learning_rate": 2.9725386373353455e-05, "loss": 0.0746297836303711, "step": 933 }, { "epoch": 0.12627060642675478, "grad_norm": 2.260486364364624, "learning_rate": 2.972408116128393e-05, "loss": 0.055924415588378906, "step": 934 }, { "epoch": 0.12640579979552002, "grad_norm": 0.6031903624534607, "learning_rate": 2.972277288358219e-05, "loss": 0.07023245096206665, "step": 935 }, { "epoch": 0.1265409931642853, "grad_norm": 1.5049775838851929, "learning_rate": 2.9721461540520628e-05, "loss": 0.07419735193252563, "step": 936 }, { "epoch": 0.12667618653305054, "grad_norm": 0.8745632171630859, "learning_rate": 2.9720147132372265e-05, "loss": 0.047914132475852966, "step": 937 }, { "epoch": 0.12681137990181582, "grad_norm": 1.3793575763702393, "learning_rate": 2.9718829659410772e-05, "loss": 0.07423095405101776, "step": 938 }, { "epoch": 0.12694657327058106, "grad_norm": 1.5917911529541016, "learning_rate": 2.9717509121910453e-05, "loss": 0.08392691612243652, "step": 939 }, { "epoch": 0.12708176663934634, "grad_norm": 1.1619272232055664, "learning_rate": 2.971618552014625e-05, "loss": 0.0750933438539505, "step": 940 }, { "epoch": 0.1272169600081116, "grad_norm": 1.3756927251815796, "learning_rate": 2.971485885439375e-05, "loss": 0.05483636260032654, "step": 941 }, { "epoch": 0.12735215337687686, "grad_norm": 1.454074501991272, "learning_rate": 2.9713529124929163e-05, "loss": 0.06254559755325317, "step": 942 }, { "epoch": 0.12748734674564213, "grad_norm": 0.8426021933555603, "learning_rate": 2.9712196332029352e-05, "loss": 0.06235361099243164, "step": 943 }, { "epoch": 0.12762254011440738, "grad_norm": 0.5614035129547119, "learning_rate": 2.971086047597181e-05, "loss": 0.0639098584651947, "step": 944 }, { "epoch": 0.12775773348317265, "grad_norm": 0.4427438974380493, "learning_rate": 2.9709521557034668e-05, "loss": 0.05317005515098572, "step": 945 }, { "epoch": 0.1278929268519379, "grad_norm": 1.5867512226104736, "learning_rate": 2.9708179575496696e-05, "loss": 0.06115603446960449, "step": 946 }, { "epoch": 0.12802812022070317, "grad_norm": 1.1928867101669312, "learning_rate": 2.9706834531637303e-05, "loss": 0.06077621877193451, "step": 947 }, { "epoch": 0.12816331358946845, "grad_norm": 1.291746973991394, "learning_rate": 2.9705486425736537e-05, "loss": 0.06889256089925766, "step": 948 }, { "epoch": 0.1282985069582337, "grad_norm": 0.37629184126853943, "learning_rate": 2.9704135258075077e-05, "loss": 0.056202471256256104, "step": 949 }, { "epoch": 0.12843370032699897, "grad_norm": 0.49766677618026733, "learning_rate": 2.970278102893424e-05, "loss": 0.06517137587070465, "step": 950 }, { "epoch": 0.1285688936957642, "grad_norm": 3.0935404300689697, "learning_rate": 2.9701423738595992e-05, "loss": 0.06942284107208252, "step": 951 }, { "epoch": 0.1287040870645295, "grad_norm": 0.8044374585151672, "learning_rate": 2.9700063387342925e-05, "loss": 0.05415910482406616, "step": 952 }, { "epoch": 0.12883928043329473, "grad_norm": 1.8038911819458008, "learning_rate": 2.969869997545827e-05, "loss": 0.05029727518558502, "step": 953 }, { "epoch": 0.12897447380206, "grad_norm": 1.792656660079956, "learning_rate": 2.9697333503225897e-05, "loss": 0.08134263753890991, "step": 954 }, { "epoch": 0.12910966717082528, "grad_norm": 1.6244159936904907, "learning_rate": 2.969596397093031e-05, "loss": 0.08842146396636963, "step": 955 }, { "epoch": 0.12924486053959053, "grad_norm": 1.7177331447601318, "learning_rate": 2.969459137885666e-05, "loss": 0.06281697750091553, "step": 956 }, { "epoch": 0.1293800539083558, "grad_norm": 0.923691987991333, "learning_rate": 2.969321572729072e-05, "loss": 0.06180095672607422, "step": 957 }, { "epoch": 0.12951524727712105, "grad_norm": 1.5431050062179565, "learning_rate": 2.9691837016518915e-05, "loss": 0.05874697118997574, "step": 958 }, { "epoch": 0.12965044064588632, "grad_norm": 1.1677707433700562, "learning_rate": 2.9690455246828294e-05, "loss": 0.07713724672794342, "step": 959 }, { "epoch": 0.12978563401465157, "grad_norm": 0.8348269462585449, "learning_rate": 2.968907041850655e-05, "loss": 0.09317457675933838, "step": 960 }, { "epoch": 0.12992082738341684, "grad_norm": 1.7617661952972412, "learning_rate": 2.968768253184202e-05, "loss": 0.08088464289903641, "step": 961 }, { "epoch": 0.13005602075218212, "grad_norm": 0.8855709433555603, "learning_rate": 2.9686291587123655e-05, "loss": 0.05380135774612427, "step": 962 }, { "epoch": 0.13019121412094736, "grad_norm": 1.5261743068695068, "learning_rate": 2.968489758464107e-05, "loss": 0.08649598062038422, "step": 963 }, { "epoch": 0.13032640748971264, "grad_norm": 0.648188591003418, "learning_rate": 2.9683500524684494e-05, "loss": 0.07602918148040771, "step": 964 }, { "epoch": 0.13046160085847788, "grad_norm": 0.34662652015686035, "learning_rate": 2.9682100407544812e-05, "loss": 0.057284027338027954, "step": 965 }, { "epoch": 0.13059679422724316, "grad_norm": 0.922051727771759, "learning_rate": 2.9680697233513526e-05, "loss": 0.06950879096984863, "step": 966 }, { "epoch": 0.1307319875960084, "grad_norm": 0.43236103653907776, "learning_rate": 2.9679291002882793e-05, "loss": 0.058712005615234375, "step": 967 }, { "epoch": 0.13086718096477368, "grad_norm": 0.8399235010147095, "learning_rate": 2.967788171594539e-05, "loss": 0.0585600882768631, "step": 968 }, { "epoch": 0.13100237433353895, "grad_norm": 1.3014203310012817, "learning_rate": 2.967646937299474e-05, "loss": 0.03898021578788757, "step": 969 }, { "epoch": 0.1311375677023042, "grad_norm": 0.47541749477386475, "learning_rate": 2.9675053974324907e-05, "loss": 0.07133780419826508, "step": 970 }, { "epoch": 0.13127276107106947, "grad_norm": 0.48997896909713745, "learning_rate": 2.9673635520230576e-05, "loss": 0.06527069211006165, "step": 971 }, { "epoch": 0.13140795443983472, "grad_norm": 0.5041906237602234, "learning_rate": 2.9672214011007087e-05, "loss": 0.07057970762252808, "step": 972 }, { "epoch": 0.1315431478086, "grad_norm": 0.5192206501960754, "learning_rate": 2.9670789446950396e-05, "loss": 0.07677845656871796, "step": 973 }, { "epoch": 0.13167834117736524, "grad_norm": 0.5961480736732483, "learning_rate": 2.9669361828357105e-05, "loss": 0.06424421072006226, "step": 974 }, { "epoch": 0.1318135345461305, "grad_norm": 0.3776947259902954, "learning_rate": 2.9667931155524454e-05, "loss": 0.05686786770820618, "step": 975 }, { "epoch": 0.13194872791489579, "grad_norm": 0.7766925096511841, "learning_rate": 2.966649742875032e-05, "loss": 0.03621548414230347, "step": 976 }, { "epoch": 0.13208392128366103, "grad_norm": 1.2372055053710938, "learning_rate": 2.9665060648333206e-05, "loss": 0.08590242266654968, "step": 977 }, { "epoch": 0.1322191146524263, "grad_norm": 0.538113534450531, "learning_rate": 2.9663620814572266e-05, "loss": 0.049870461225509644, "step": 978 }, { "epoch": 0.13235430802119155, "grad_norm": 0.5654088854789734, "learning_rate": 2.966217792776728e-05, "loss": 0.04519476741552353, "step": 979 }, { "epoch": 0.13248950138995683, "grad_norm": 1.511000633239746, "learning_rate": 2.9660731988218652e-05, "loss": 0.05653522536158562, "step": 980 }, { "epoch": 0.13262469475872207, "grad_norm": 1.6901277303695679, "learning_rate": 2.965928299622745e-05, "loss": 0.07455629110336304, "step": 981 }, { "epoch": 0.13275988812748735, "grad_norm": 1.12834632396698, "learning_rate": 2.965783095209535e-05, "loss": 0.058841437101364136, "step": 982 }, { "epoch": 0.13289508149625262, "grad_norm": 0.83742755651474, "learning_rate": 2.965637585612469e-05, "loss": 0.06962665915489197, "step": 983 }, { "epoch": 0.13303027486501787, "grad_norm": 0.4493842124938965, "learning_rate": 2.965491770861841e-05, "loss": 0.05994352698326111, "step": 984 }, { "epoch": 0.13316546823378314, "grad_norm": 0.37051379680633545, "learning_rate": 2.965345650988012e-05, "loss": 0.050818562507629395, "step": 985 }, { "epoch": 0.1333006616025484, "grad_norm": 1.1987723112106323, "learning_rate": 2.9651992260214035e-05, "loss": 0.08498618006706238, "step": 986 }, { "epoch": 0.13343585497131366, "grad_norm": 0.7185893058776855, "learning_rate": 2.9650524959925037e-05, "loss": 0.04328185319900513, "step": 987 }, { "epoch": 0.1335710483400789, "grad_norm": 1.0075762271881104, "learning_rate": 2.9649054609318607e-05, "loss": 0.060393065214157104, "step": 988 }, { "epoch": 0.13370624170884418, "grad_norm": 1.6080729961395264, "learning_rate": 2.9647581208700894e-05, "loss": 0.06977052986621857, "step": 989 }, { "epoch": 0.13384143507760946, "grad_norm": 0.7243795394897461, "learning_rate": 2.9646104758378666e-05, "loss": 0.06446462869644165, "step": 990 }, { "epoch": 0.1339766284463747, "grad_norm": 0.9294887185096741, "learning_rate": 2.964462525865932e-05, "loss": 0.0632893443107605, "step": 991 }, { "epoch": 0.13411182181513998, "grad_norm": 0.4036575257778168, "learning_rate": 2.96431427098509e-05, "loss": 0.06646198034286499, "step": 992 }, { "epoch": 0.13424701518390522, "grad_norm": 0.7816623449325562, "learning_rate": 2.9641657112262084e-05, "loss": 0.06584139168262482, "step": 993 }, { "epoch": 0.1343822085526705, "grad_norm": 1.8476940393447876, "learning_rate": 2.9640168466202174e-05, "loss": 0.06878417730331421, "step": 994 }, { "epoch": 0.13451740192143574, "grad_norm": 1.5821086168289185, "learning_rate": 2.9638676771981124e-05, "loss": 0.0807846188545227, "step": 995 }, { "epoch": 0.13465259529020102, "grad_norm": 0.7830606698989868, "learning_rate": 2.9637182029909508e-05, "loss": 0.08169925212860107, "step": 996 }, { "epoch": 0.1347877886589663, "grad_norm": 0.4132815897464752, "learning_rate": 2.9635684240298532e-05, "loss": 0.07634192705154419, "step": 997 }, { "epoch": 0.13492298202773154, "grad_norm": 2.0433011054992676, "learning_rate": 2.9634183403460053e-05, "loss": 0.06839564442634583, "step": 998 }, { "epoch": 0.1350581753964968, "grad_norm": 1.4567112922668457, "learning_rate": 2.9632679519706553e-05, "loss": 0.0772167444229126, "step": 999 }, { "epoch": 0.13519336876526206, "grad_norm": 1.1899508237838745, "learning_rate": 2.9631172589351137e-05, "loss": 0.07662512362003326, "step": 1000 }, { "epoch": 0.13532856213402733, "grad_norm": 0.44104450941085815, "learning_rate": 2.962966261270758e-05, "loss": 0.0652366429567337, "step": 1001 }, { "epoch": 0.13546375550279258, "grad_norm": 1.766348123550415, "learning_rate": 2.962814959009024e-05, "loss": 0.06794068217277527, "step": 1002 }, { "epoch": 0.13559894887155785, "grad_norm": 1.7373936176300049, "learning_rate": 2.962663352181415e-05, "loss": 0.0638173520565033, "step": 1003 }, { "epoch": 0.13573414224032312, "grad_norm": 0.5086437463760376, "learning_rate": 2.9625114408194966e-05, "loss": 0.06538169085979462, "step": 1004 }, { "epoch": 0.13586933560908837, "grad_norm": 1.0753984451293945, "learning_rate": 2.962359224954897e-05, "loss": 0.059414565563201904, "step": 1005 }, { "epoch": 0.13600452897785364, "grad_norm": 0.728958010673523, "learning_rate": 2.9622067046193086e-05, "loss": 0.07270858436822891, "step": 1006 }, { "epoch": 0.1361397223466189, "grad_norm": 1.2851983308792114, "learning_rate": 2.9620538798444867e-05, "loss": 0.05009317398071289, "step": 1007 }, { "epoch": 0.13627491571538417, "grad_norm": 0.3558655083179474, "learning_rate": 2.9619007506622506e-05, "loss": 0.040700286626815796, "step": 1008 }, { "epoch": 0.1364101090841494, "grad_norm": 0.8928632736206055, "learning_rate": 2.961747317104482e-05, "loss": 0.06870028376579285, "step": 1009 }, { "epoch": 0.13654530245291469, "grad_norm": 1.4920967817306519, "learning_rate": 2.9615935792031274e-05, "loss": 0.06532464176416397, "step": 1010 }, { "epoch": 0.13668049582167996, "grad_norm": 0.9910970330238342, "learning_rate": 2.9614395369901953e-05, "loss": 0.05240216851234436, "step": 1011 }, { "epoch": 0.1368156891904452, "grad_norm": 0.6473343372344971, "learning_rate": 2.9612851904977582e-05, "loss": 0.059423238039016724, "step": 1012 }, { "epoch": 0.13695088255921048, "grad_norm": 0.6781181693077087, "learning_rate": 2.9611305397579518e-05, "loss": 0.0826173722743988, "step": 1013 }, { "epoch": 0.13708607592797573, "grad_norm": 2.144813299179077, "learning_rate": 2.9609755848029755e-05, "loss": 0.06403718888759613, "step": 1014 }, { "epoch": 0.137221269296741, "grad_norm": 1.1143856048583984, "learning_rate": 2.9608203256650916e-05, "loss": 0.07878640294075012, "step": 1015 }, { "epoch": 0.13735646266550625, "grad_norm": 1.0002622604370117, "learning_rate": 2.9606647623766257e-05, "loss": 0.07778096944093704, "step": 1016 }, { "epoch": 0.13749165603427152, "grad_norm": 2.941098690032959, "learning_rate": 2.9605088949699672e-05, "loss": 0.08105441927909851, "step": 1017 }, { "epoch": 0.1376268494030368, "grad_norm": 0.9422494769096375, "learning_rate": 2.9603527234775682e-05, "loss": 0.040878817439079285, "step": 1018 }, { "epoch": 0.13776204277180204, "grad_norm": 1.3126517534255981, "learning_rate": 2.960196247931945e-05, "loss": 0.05973470211029053, "step": 1019 }, { "epoch": 0.13789723614056731, "grad_norm": 1.0239661931991577, "learning_rate": 2.960039468365676e-05, "loss": 0.0669485330581665, "step": 1020 }, { "epoch": 0.13803242950933256, "grad_norm": 1.0040719509124756, "learning_rate": 2.959882384811404e-05, "loss": 0.057542502880096436, "step": 1021 }, { "epoch": 0.13816762287809783, "grad_norm": 1.0834072828292847, "learning_rate": 2.9597249973018343e-05, "loss": 0.0507512241601944, "step": 1022 }, { "epoch": 0.13830281624686308, "grad_norm": 1.3461211919784546, "learning_rate": 2.959567305869736e-05, "loss": 0.06819938868284225, "step": 1023 }, { "epoch": 0.13843800961562835, "grad_norm": 1.9950796365737915, "learning_rate": 2.9594093105479413e-05, "loss": 0.05097164213657379, "step": 1024 }, { "epoch": 0.13857320298439363, "grad_norm": 0.788266658782959, "learning_rate": 2.959251011369345e-05, "loss": 0.056308358907699585, "step": 1025 }, { "epoch": 0.13870839635315887, "grad_norm": 0.7522055506706238, "learning_rate": 2.959092408366907e-05, "loss": 0.06282210350036621, "step": 1026 }, { "epoch": 0.13884358972192415, "grad_norm": 1.9201449155807495, "learning_rate": 2.958933501573649e-05, "loss": 0.07393568754196167, "step": 1027 }, { "epoch": 0.1389787830906894, "grad_norm": 0.47177568078041077, "learning_rate": 2.9587742910226555e-05, "loss": 0.06012946367263794, "step": 1028 }, { "epoch": 0.13911397645945467, "grad_norm": 0.7972776293754578, "learning_rate": 2.958614776747076e-05, "loss": 0.06980639696121216, "step": 1029 }, { "epoch": 0.13924916982821992, "grad_norm": 1.3969213962554932, "learning_rate": 2.9584549587801213e-05, "loss": 0.08450901508331299, "step": 1030 }, { "epoch": 0.1393843631969852, "grad_norm": 0.9194837808609009, "learning_rate": 2.958294837155067e-05, "loss": 0.06217719614505768, "step": 1031 }, { "epoch": 0.13951955656575046, "grad_norm": 0.4585019052028656, "learning_rate": 2.9581344119052508e-05, "loss": 0.059274494647979736, "step": 1032 }, { "epoch": 0.1396547499345157, "grad_norm": 0.32113614678382874, "learning_rate": 2.957973683064074e-05, "loss": 0.06717707961797714, "step": 1033 }, { "epoch": 0.13978994330328098, "grad_norm": 1.8591957092285156, "learning_rate": 2.957812650665002e-05, "loss": 0.05622287094593048, "step": 1034 }, { "epoch": 0.13992513667204623, "grad_norm": 2.2799623012542725, "learning_rate": 2.957651314741562e-05, "loss": 0.05717763304710388, "step": 1035 }, { "epoch": 0.1400603300408115, "grad_norm": 1.5666700601577759, "learning_rate": 2.9574896753273454e-05, "loss": 0.08140039443969727, "step": 1036 }, { "epoch": 0.14019552340957675, "grad_norm": 0.5222755074501038, "learning_rate": 2.9573277324560058e-05, "loss": 0.054591115564107895, "step": 1037 }, { "epoch": 0.14033071677834202, "grad_norm": 1.7303684949874878, "learning_rate": 2.9571654861612608e-05, "loss": 0.06008630990982056, "step": 1038 }, { "epoch": 0.1404659101471073, "grad_norm": 1.889082431793213, "learning_rate": 2.957002936476891e-05, "loss": 0.06525285542011261, "step": 1039 }, { "epoch": 0.14060110351587254, "grad_norm": 1.5210939645767212, "learning_rate": 2.9568400834367406e-05, "loss": 0.06174790859222412, "step": 1040 }, { "epoch": 0.14073629688463782, "grad_norm": 1.0190696716308594, "learning_rate": 2.9566769270747158e-05, "loss": 0.06712540984153748, "step": 1041 }, { "epoch": 0.14087149025340306, "grad_norm": 0.42528876662254333, "learning_rate": 2.9565134674247864e-05, "loss": 0.06162145733833313, "step": 1042 }, { "epoch": 0.14100668362216834, "grad_norm": 0.9857556223869324, "learning_rate": 2.9563497045209866e-05, "loss": 0.07064189016819, "step": 1043 }, { "epoch": 0.14114187699093358, "grad_norm": 2.3487162590026855, "learning_rate": 2.9561856383974118e-05, "loss": 0.0643070861697197, "step": 1044 }, { "epoch": 0.14127707035969886, "grad_norm": 0.4388207495212555, "learning_rate": 2.9560212690882218e-05, "loss": 0.056035563349723816, "step": 1045 }, { "epoch": 0.14141226372846413, "grad_norm": 1.9741652011871338, "learning_rate": 2.9558565966276395e-05, "loss": 0.06551049649715424, "step": 1046 }, { "epoch": 0.14154745709722938, "grad_norm": 2.2496907711029053, "learning_rate": 2.9556916210499497e-05, "loss": 0.0794958546757698, "step": 1047 }, { "epoch": 0.14168265046599465, "grad_norm": 0.6142613887786865, "learning_rate": 2.9555263423895016e-05, "loss": 0.07496144622564316, "step": 1048 }, { "epoch": 0.1418178438347599, "grad_norm": 0.809754490852356, "learning_rate": 2.955360760680708e-05, "loss": 0.0774577409029007, "step": 1049 }, { "epoch": 0.14195303720352517, "grad_norm": 1.5004494190216064, "learning_rate": 2.9551948759580423e-05, "loss": 0.06169368326663971, "step": 1050 }, { "epoch": 0.14208823057229042, "grad_norm": 1.9727504253387451, "learning_rate": 2.9550286882560435e-05, "loss": 0.07013940811157227, "step": 1051 }, { "epoch": 0.1422234239410557, "grad_norm": 1.5525301694869995, "learning_rate": 2.9548621976093126e-05, "loss": 0.057036034762859344, "step": 1052 }, { "epoch": 0.14235861730982094, "grad_norm": 0.6294586062431335, "learning_rate": 2.9546954040525144e-05, "loss": 0.07674640417098999, "step": 1053 }, { "epoch": 0.1424938106785862, "grad_norm": 0.6791669130325317, "learning_rate": 2.9545283076203753e-05, "loss": 0.05670631676912308, "step": 1054 }, { "epoch": 0.1426290040473515, "grad_norm": 2.116180658340454, "learning_rate": 2.954360908347686e-05, "loss": 0.09419101476669312, "step": 1055 }, { "epoch": 0.14276419741611673, "grad_norm": 0.5166284441947937, "learning_rate": 2.9541932062693e-05, "loss": 0.06673908233642578, "step": 1056 }, { "epoch": 0.142899390784882, "grad_norm": 0.5847052931785583, "learning_rate": 2.954025201420134e-05, "loss": 0.06297552585601807, "step": 1057 }, { "epoch": 0.14303458415364725, "grad_norm": 1.0459882020950317, "learning_rate": 2.9538568938351672e-05, "loss": 0.061056531965732574, "step": 1058 }, { "epoch": 0.14316977752241253, "grad_norm": 0.5410304069519043, "learning_rate": 2.953688283549442e-05, "loss": 0.0664343312382698, "step": 1059 }, { "epoch": 0.14330497089117777, "grad_norm": 1.6860905885696411, "learning_rate": 2.9535193705980642e-05, "loss": 0.06649842858314514, "step": 1060 }, { "epoch": 0.14344016425994305, "grad_norm": 1.0052019357681274, "learning_rate": 2.9533501550162028e-05, "loss": 0.07814857363700867, "step": 1061 }, { "epoch": 0.14357535762870832, "grad_norm": 1.191643476486206, "learning_rate": 2.9531806368390882e-05, "loss": 0.08305811882019043, "step": 1062 }, { "epoch": 0.14371055099747357, "grad_norm": 0.4967811107635498, "learning_rate": 2.953010816102016e-05, "loss": 0.04764849692583084, "step": 1063 }, { "epoch": 0.14384574436623884, "grad_norm": 1.2741310596466064, "learning_rate": 2.952840692840343e-05, "loss": 0.0677926242351532, "step": 1064 }, { "epoch": 0.1439809377350041, "grad_norm": 0.5323997735977173, "learning_rate": 2.9526702670894914e-05, "loss": 0.06263640522956848, "step": 1065 }, { "epoch": 0.14411613110376936, "grad_norm": 0.45657527446746826, "learning_rate": 2.952499538884943e-05, "loss": 0.06672221422195435, "step": 1066 }, { "epoch": 0.1442513244725346, "grad_norm": 1.4525842666625977, "learning_rate": 2.9523285082622448e-05, "loss": 0.07497847080230713, "step": 1067 }, { "epoch": 0.14438651784129988, "grad_norm": 0.9516348838806152, "learning_rate": 2.9521571752570064e-05, "loss": 0.060328468680381775, "step": 1068 }, { "epoch": 0.14452171121006516, "grad_norm": 0.7086668014526367, "learning_rate": 2.9519855399049004e-05, "loss": 0.06583045423030853, "step": 1069 }, { "epoch": 0.1446569045788304, "grad_norm": 0.5770422220230103, "learning_rate": 2.951813602241662e-05, "loss": 0.06792578101158142, "step": 1070 }, { "epoch": 0.14479209794759568, "grad_norm": 2.717829704284668, "learning_rate": 2.9516413623030896e-05, "loss": 0.09775790572166443, "step": 1071 }, { "epoch": 0.14492729131636092, "grad_norm": 0.45390549302101135, "learning_rate": 2.951468820125045e-05, "loss": 0.06244179606437683, "step": 1072 }, { "epoch": 0.1450624846851262, "grad_norm": 0.7627196311950684, "learning_rate": 2.9512959757434508e-05, "loss": 0.056396666914224625, "step": 1073 }, { "epoch": 0.14519767805389144, "grad_norm": 0.975857138633728, "learning_rate": 2.951122829194296e-05, "loss": 0.07269716262817383, "step": 1074 }, { "epoch": 0.14533287142265672, "grad_norm": 0.3419383466243744, "learning_rate": 2.9509493805136296e-05, "loss": 0.05531579256057739, "step": 1075 }, { "epoch": 0.145468064791422, "grad_norm": 1.8316603899002075, "learning_rate": 2.9507756297375648e-05, "loss": 0.06720045953989029, "step": 1076 }, { "epoch": 0.14560325816018724, "grad_norm": 2.4128952026367188, "learning_rate": 2.9506015769022778e-05, "loss": 0.07311362773180008, "step": 1077 }, { "epoch": 0.1457384515289525, "grad_norm": 1.209806203842163, "learning_rate": 2.950427222044006e-05, "loss": 0.08212907612323761, "step": 1078 }, { "epoch": 0.14587364489771776, "grad_norm": 0.38941672444343567, "learning_rate": 2.9502525651990525e-05, "loss": 0.061763226985931396, "step": 1079 }, { "epoch": 0.14600883826648303, "grad_norm": 0.8612173795700073, "learning_rate": 2.9500776064037813e-05, "loss": 0.05280739814043045, "step": 1080 }, { "epoch": 0.14614403163524828, "grad_norm": 0.653526782989502, "learning_rate": 2.9499023456946194e-05, "loss": 0.05213388428092003, "step": 1081 }, { "epoch": 0.14627922500401355, "grad_norm": 0.7387879490852356, "learning_rate": 2.9497267831080575e-05, "loss": 0.06898682564496994, "step": 1082 }, { "epoch": 0.14641441837277883, "grad_norm": 0.5965262651443481, "learning_rate": 2.949550918680649e-05, "loss": 0.06924512982368469, "step": 1083 }, { "epoch": 0.14654961174154407, "grad_norm": 2.2757368087768555, "learning_rate": 2.9493747524490086e-05, "loss": 0.08496251702308655, "step": 1084 }, { "epoch": 0.14668480511030935, "grad_norm": 0.6187565326690674, "learning_rate": 2.9491982844498156e-05, "loss": 0.05621740221977234, "step": 1085 }, { "epoch": 0.1468199984790746, "grad_norm": 0.42957448959350586, "learning_rate": 2.949021514719812e-05, "loss": 0.04010415077209473, "step": 1086 }, { "epoch": 0.14695519184783987, "grad_norm": 1.1361998319625854, "learning_rate": 2.948844443295802e-05, "loss": 0.08248896896839142, "step": 1087 }, { "epoch": 0.1470903852166051, "grad_norm": 0.5923175811767578, "learning_rate": 2.9486670702146526e-05, "loss": 0.0988461971282959, "step": 1088 }, { "epoch": 0.1472255785853704, "grad_norm": 0.3613094985485077, "learning_rate": 2.948489395513294e-05, "loss": 0.05367446690797806, "step": 1089 }, { "epoch": 0.14736077195413566, "grad_norm": 0.9075596332550049, "learning_rate": 2.948311419228719e-05, "loss": 0.0606498122215271, "step": 1090 }, { "epoch": 0.1474959653229009, "grad_norm": 1.122167706489563, "learning_rate": 2.948133141397983e-05, "loss": 0.05617364123463631, "step": 1091 }, { "epoch": 0.14763115869166618, "grad_norm": 0.9702511429786682, "learning_rate": 2.9479545620582047e-05, "loss": 0.06455537676811218, "step": 1092 }, { "epoch": 0.14776635206043143, "grad_norm": 0.6575385928153992, "learning_rate": 2.9477756812465652e-05, "loss": 0.07123291492462158, "step": 1093 }, { "epoch": 0.1479015454291967, "grad_norm": 1.3090535402297974, "learning_rate": 2.9475964990003085e-05, "loss": 0.05015579238533974, "step": 1094 }, { "epoch": 0.14803673879796195, "grad_norm": 0.7073326706886292, "learning_rate": 2.9474170153567406e-05, "loss": 0.06102776527404785, "step": 1095 }, { "epoch": 0.14817193216672722, "grad_norm": 0.5905848741531372, "learning_rate": 2.947237230353232e-05, "loss": 0.07047636061906815, "step": 1096 }, { "epoch": 0.1483071255354925, "grad_norm": 1.980143666267395, "learning_rate": 2.9470571440272147e-05, "loss": 0.0791713297367096, "step": 1097 }, { "epoch": 0.14844231890425774, "grad_norm": 0.4122215807437897, "learning_rate": 2.946876756416183e-05, "loss": 0.04994921386241913, "step": 1098 }, { "epoch": 0.14857751227302302, "grad_norm": 0.716984212398529, "learning_rate": 2.946696067557695e-05, "loss": 0.06360213458538055, "step": 1099 }, { "epoch": 0.14871270564178826, "grad_norm": 1.2254761457443237, "learning_rate": 2.9465150774893706e-05, "loss": 0.06261074542999268, "step": 1100 }, { "epoch": 0.14884789901055354, "grad_norm": 0.25284162163734436, "learning_rate": 2.9463337862488938e-05, "loss": 0.055991947650909424, "step": 1101 }, { "epoch": 0.14898309237931878, "grad_norm": 0.4695529639720917, "learning_rate": 2.9461521938740096e-05, "loss": 0.06830859184265137, "step": 1102 }, { "epoch": 0.14911828574808406, "grad_norm": 1.6265945434570312, "learning_rate": 2.9459703004025273e-05, "loss": 0.06502760201692581, "step": 1103 }, { "epoch": 0.14925347911684933, "grad_norm": 1.1338313817977905, "learning_rate": 2.9457881058723174e-05, "loss": 0.06526631116867065, "step": 1104 }, { "epoch": 0.14938867248561458, "grad_norm": 0.8361053466796875, "learning_rate": 2.9456056103213137e-05, "loss": 0.04766133427619934, "step": 1105 }, { "epoch": 0.14952386585437985, "grad_norm": 1.4736934900283813, "learning_rate": 2.945422813787513e-05, "loss": 0.06487038731575012, "step": 1106 }, { "epoch": 0.1496590592231451, "grad_norm": 0.996687650680542, "learning_rate": 2.9452397163089748e-05, "loss": 0.0539180152118206, "step": 1107 }, { "epoch": 0.14979425259191037, "grad_norm": 0.9717349410057068, "learning_rate": 2.9450563179238207e-05, "loss": 0.05086350440979004, "step": 1108 }, { "epoch": 0.14992944596067562, "grad_norm": 1.2033283710479736, "learning_rate": 2.9448726186702354e-05, "loss": 0.05426774546504021, "step": 1109 }, { "epoch": 0.1500646393294409, "grad_norm": 0.32348763942718506, "learning_rate": 2.9446886185864652e-05, "loss": 0.05802512168884277, "step": 1110 }, { "epoch": 0.15019983269820616, "grad_norm": 0.8118970394134521, "learning_rate": 2.944504317710821e-05, "loss": 0.06699618697166443, "step": 1111 }, { "epoch": 0.1503350260669714, "grad_norm": 0.8924899697303772, "learning_rate": 2.944319716081675e-05, "loss": 0.06311993300914764, "step": 1112 }, { "epoch": 0.15047021943573669, "grad_norm": 0.5372419357299805, "learning_rate": 2.944134813737462e-05, "loss": 0.06188049167394638, "step": 1113 }, { "epoch": 0.15060541280450193, "grad_norm": 1.255583643913269, "learning_rate": 2.9439496107166796e-05, "loss": 0.09018206596374512, "step": 1114 }, { "epoch": 0.1507406061732672, "grad_norm": 2.200819492340088, "learning_rate": 2.943764107057888e-05, "loss": 0.06547680497169495, "step": 1115 }, { "epoch": 0.15087579954203245, "grad_norm": 1.4588055610656738, "learning_rate": 2.9435783027997106e-05, "loss": 0.07638072222471237, "step": 1116 }, { "epoch": 0.15101099291079773, "grad_norm": 1.3274680376052856, "learning_rate": 2.9433921979808323e-05, "loss": 0.06149342656135559, "step": 1117 }, { "epoch": 0.151146186279563, "grad_norm": 1.705327033996582, "learning_rate": 2.9432057926400014e-05, "loss": 0.08070620894432068, "step": 1118 }, { "epoch": 0.15128137964832825, "grad_norm": 0.4748374819755554, "learning_rate": 2.943019086816028e-05, "loss": 0.05764502286911011, "step": 1119 }, { "epoch": 0.15141657301709352, "grad_norm": 0.8783624768257141, "learning_rate": 2.9428320805477855e-05, "loss": 0.06809592247009277, "step": 1120 }, { "epoch": 0.15155176638585877, "grad_norm": 0.36441802978515625, "learning_rate": 2.9426447738742104e-05, "loss": 0.06841164827346802, "step": 1121 }, { "epoch": 0.15168695975462404, "grad_norm": 1.1301729679107666, "learning_rate": 2.9424571668343e-05, "loss": 0.04396027326583862, "step": 1122 }, { "epoch": 0.1518221531233893, "grad_norm": 2.087982416152954, "learning_rate": 2.942269259467115e-05, "loss": 0.0730208158493042, "step": 1123 }, { "epoch": 0.15195734649215456, "grad_norm": 0.44096827507019043, "learning_rate": 2.9420810518117794e-05, "loss": 0.05848512053489685, "step": 1124 }, { "epoch": 0.15209253986091983, "grad_norm": 1.1126229763031006, "learning_rate": 2.9418925439074784e-05, "loss": 0.0867539644241333, "step": 1125 }, { "epoch": 0.15222773322968508, "grad_norm": 2.0272955894470215, "learning_rate": 2.9417037357934606e-05, "loss": 0.07255285978317261, "step": 1126 }, { "epoch": 0.15236292659845035, "grad_norm": 1.182340383529663, "learning_rate": 2.9415146275090373e-05, "loss": 0.06449049711227417, "step": 1127 }, { "epoch": 0.1524981199672156, "grad_norm": 2.9131686687469482, "learning_rate": 2.9413252190935813e-05, "loss": 0.05471482872962952, "step": 1128 }, { "epoch": 0.15263331333598087, "grad_norm": 1.4113824367523193, "learning_rate": 2.9411355105865286e-05, "loss": 0.06527480483055115, "step": 1129 }, { "epoch": 0.15276850670474612, "grad_norm": 1.8772748708724976, "learning_rate": 2.9409455020273775e-05, "loss": 0.06518785655498505, "step": 1130 }, { "epoch": 0.1529037000735114, "grad_norm": 0.6438419818878174, "learning_rate": 2.940755193455689e-05, "loss": 0.05799649655818939, "step": 1131 }, { "epoch": 0.15303889344227667, "grad_norm": 0.5866788029670715, "learning_rate": 2.940564584911086e-05, "loss": 0.04870527982711792, "step": 1132 }, { "epoch": 0.15317408681104192, "grad_norm": 2.1813197135925293, "learning_rate": 2.9403736764332543e-05, "loss": 0.0656803548336029, "step": 1133 }, { "epoch": 0.1533092801798072, "grad_norm": 1.3716038465499878, "learning_rate": 2.9401824680619423e-05, "loss": 0.08586659282445908, "step": 1134 }, { "epoch": 0.15344447354857244, "grad_norm": 1.49837327003479, "learning_rate": 2.9399909598369604e-05, "loss": 0.07058757543563843, "step": 1135 }, { "epoch": 0.1535796669173377, "grad_norm": 0.5299723148345947, "learning_rate": 2.939799151798182e-05, "loss": 0.05751442909240723, "step": 1136 }, { "epoch": 0.15371486028610296, "grad_norm": 1.1080832481384277, "learning_rate": 2.9396070439855417e-05, "loss": 0.0840616226196289, "step": 1137 }, { "epoch": 0.15385005365486823, "grad_norm": 0.9604690670967102, "learning_rate": 2.9394146364390382e-05, "loss": 0.05214349925518036, "step": 1138 }, { "epoch": 0.1539852470236335, "grad_norm": 1.7251310348510742, "learning_rate": 2.9392219291987315e-05, "loss": 0.08107581734657288, "step": 1139 }, { "epoch": 0.15412044039239875, "grad_norm": 0.4901770353317261, "learning_rate": 2.939028922304744e-05, "loss": 0.0697246789932251, "step": 1140 }, { "epoch": 0.15425563376116402, "grad_norm": 0.36542144417762756, "learning_rate": 2.9388356157972615e-05, "loss": 0.0458989292383194, "step": 1141 }, { "epoch": 0.15439082712992927, "grad_norm": 1.7223154306411743, "learning_rate": 2.938642009716531e-05, "loss": 0.07267695665359497, "step": 1142 }, { "epoch": 0.15452602049869454, "grad_norm": 1.3905787467956543, "learning_rate": 2.938448104102862e-05, "loss": 0.08286727219820023, "step": 1143 }, { "epoch": 0.1546612138674598, "grad_norm": 0.7059480547904968, "learning_rate": 2.9382538989966267e-05, "loss": 0.05648154020309448, "step": 1144 }, { "epoch": 0.15479640723622506, "grad_norm": 1.1272727251052856, "learning_rate": 2.9380593944382605e-05, "loss": 0.0527062863111496, "step": 1145 }, { "epoch": 0.15493160060499034, "grad_norm": 0.5262485146522522, "learning_rate": 2.9378645904682596e-05, "loss": 0.05934404581785202, "step": 1146 }, { "epoch": 0.15506679397375558, "grad_norm": 0.7232634425163269, "learning_rate": 2.937669487127183e-05, "loss": 0.0660967230796814, "step": 1147 }, { "epoch": 0.15520198734252086, "grad_norm": 1.1460777521133423, "learning_rate": 2.9374740844556532e-05, "loss": 0.07225543260574341, "step": 1148 }, { "epoch": 0.1553371807112861, "grad_norm": 0.42113828659057617, "learning_rate": 2.937278382494353e-05, "loss": 0.04665200412273407, "step": 1149 }, { "epoch": 0.15547237408005138, "grad_norm": 0.37123903632164, "learning_rate": 2.9370823812840287e-05, "loss": 0.06203605234622955, "step": 1150 }, { "epoch": 0.15560756744881662, "grad_norm": 0.6488979458808899, "learning_rate": 2.93688608086549e-05, "loss": 0.059460125863552094, "step": 1151 }, { "epoch": 0.1557427608175819, "grad_norm": 1.1914242506027222, "learning_rate": 2.9366894812796064e-05, "loss": 0.057233214378356934, "step": 1152 }, { "epoch": 0.15587795418634717, "grad_norm": 0.7370941638946533, "learning_rate": 2.9364925825673117e-05, "loss": 0.06552474200725555, "step": 1153 }, { "epoch": 0.15601314755511242, "grad_norm": 0.6630440950393677, "learning_rate": 2.9362953847696006e-05, "loss": 0.07680156081914902, "step": 1154 }, { "epoch": 0.1561483409238777, "grad_norm": 0.8082736134529114, "learning_rate": 2.9360978879275313e-05, "loss": 0.06067699193954468, "step": 1155 }, { "epoch": 0.15628353429264294, "grad_norm": 1.5821971893310547, "learning_rate": 2.9359000920822237e-05, "loss": 0.05686017870903015, "step": 1156 }, { "epoch": 0.1564187276614082, "grad_norm": 1.0934772491455078, "learning_rate": 2.9357019972748594e-05, "loss": 0.08420317620038986, "step": 1157 }, { "epoch": 0.15655392103017346, "grad_norm": 0.9861279129981995, "learning_rate": 2.9355036035466836e-05, "loss": 0.07504907250404358, "step": 1158 }, { "epoch": 0.15668911439893873, "grad_norm": 0.45539483428001404, "learning_rate": 2.935304910939002e-05, "loss": 0.04558587074279785, "step": 1159 }, { "epoch": 0.156824307767704, "grad_norm": 0.658811092376709, "learning_rate": 2.935105919493184e-05, "loss": 0.055750906467437744, "step": 1160 }, { "epoch": 0.15695950113646925, "grad_norm": 0.8782719373703003, "learning_rate": 2.9349066292506613e-05, "loss": 0.047747790813446045, "step": 1161 }, { "epoch": 0.15709469450523453, "grad_norm": 1.0695005655288696, "learning_rate": 2.934707040252926e-05, "loss": 0.07084289193153381, "step": 1162 }, { "epoch": 0.15722988787399977, "grad_norm": 0.6481457352638245, "learning_rate": 2.9345071525415342e-05, "loss": 0.0865902528166771, "step": 1163 }, { "epoch": 0.15736508124276505, "grad_norm": 1.2633981704711914, "learning_rate": 2.9343069661581035e-05, "loss": 0.06998138874769211, "step": 1164 }, { "epoch": 0.1575002746115303, "grad_norm": 0.6916375756263733, "learning_rate": 2.9341064811443138e-05, "loss": 0.06468892097473145, "step": 1165 }, { "epoch": 0.15763546798029557, "grad_norm": 1.4526642560958862, "learning_rate": 2.9339056975419078e-05, "loss": 0.06961843371391296, "step": 1166 }, { "epoch": 0.15777066134906084, "grad_norm": 0.7845077514648438, "learning_rate": 2.9337046153926882e-05, "loss": 0.07602876424789429, "step": 1167 }, { "epoch": 0.1579058547178261, "grad_norm": 0.8499192595481873, "learning_rate": 2.9335032347385224e-05, "loss": 0.04190849885344505, "step": 1168 }, { "epoch": 0.15804104808659136, "grad_norm": 1.3594536781311035, "learning_rate": 2.933301555621339e-05, "loss": 0.04575201869010925, "step": 1169 }, { "epoch": 0.1581762414553566, "grad_norm": 0.6063716411590576, "learning_rate": 2.933099578083128e-05, "loss": 0.05104599893093109, "step": 1170 }, { "epoch": 0.15831143482412188, "grad_norm": 0.5349283814430237, "learning_rate": 2.932897302165943e-05, "loss": 0.05895107239484787, "step": 1171 }, { "epoch": 0.15844662819288713, "grad_norm": 0.3548339903354645, "learning_rate": 2.9326947279118983e-05, "loss": 0.051456570625305176, "step": 1172 }, { "epoch": 0.1585818215616524, "grad_norm": 0.6213728785514832, "learning_rate": 2.9324918553631716e-05, "loss": 0.061855852603912354, "step": 1173 }, { "epoch": 0.15871701493041765, "grad_norm": 0.5467653274536133, "learning_rate": 2.9322886845620013e-05, "loss": 0.03827083855867386, "step": 1174 }, { "epoch": 0.15885220829918292, "grad_norm": 0.5223851799964905, "learning_rate": 2.932085215550689e-05, "loss": 0.07667255401611328, "step": 1175 }, { "epoch": 0.1589874016679482, "grad_norm": 0.601033091545105, "learning_rate": 2.9318814483715982e-05, "loss": 0.05599546432495117, "step": 1176 }, { "epoch": 0.15912259503671344, "grad_norm": 1.7533951997756958, "learning_rate": 2.9316773830671537e-05, "loss": 0.05947062373161316, "step": 1177 }, { "epoch": 0.15925778840547872, "grad_norm": 0.3397280275821686, "learning_rate": 2.9314730196798437e-05, "loss": 0.05962613224983215, "step": 1178 }, { "epoch": 0.15939298177424396, "grad_norm": 0.8302063345909119, "learning_rate": 2.9312683582522178e-05, "loss": 0.07750225067138672, "step": 1179 }, { "epoch": 0.15952817514300924, "grad_norm": 0.6219926476478577, "learning_rate": 2.9310633988268868e-05, "loss": 0.06360742449760437, "step": 1180 }, { "epoch": 0.15966336851177448, "grad_norm": 1.2431893348693848, "learning_rate": 2.9308581414465246e-05, "loss": 0.047799646854400635, "step": 1181 }, { "epoch": 0.15979856188053976, "grad_norm": 0.7167524695396423, "learning_rate": 2.9306525861538674e-05, "loss": 0.07473218441009521, "step": 1182 }, { "epoch": 0.15993375524930503, "grad_norm": 1.344822645187378, "learning_rate": 2.9304467329917127e-05, "loss": 0.06404919177293777, "step": 1183 }, { "epoch": 0.16006894861807028, "grad_norm": 1.3459992408752441, "learning_rate": 2.9302405820029198e-05, "loss": 0.06698673218488693, "step": 1184 }, { "epoch": 0.16020414198683555, "grad_norm": 0.4024495482444763, "learning_rate": 2.9300341332304114e-05, "loss": 0.049604594707489014, "step": 1185 }, { "epoch": 0.1603393353556008, "grad_norm": 0.6731967329978943, "learning_rate": 2.9298273867171697e-05, "loss": 0.06139141321182251, "step": 1186 }, { "epoch": 0.16047452872436607, "grad_norm": 1.0775129795074463, "learning_rate": 2.929620342506242e-05, "loss": 0.07561543583869934, "step": 1187 }, { "epoch": 0.16060972209313132, "grad_norm": 1.8730332851409912, "learning_rate": 2.929413000640735e-05, "loss": 0.0670536607503891, "step": 1188 }, { "epoch": 0.1607449154618966, "grad_norm": 1.2282514572143555, "learning_rate": 2.9292053611638187e-05, "loss": 0.05790209770202637, "step": 1189 }, { "epoch": 0.16088010883066187, "grad_norm": 1.6782622337341309, "learning_rate": 2.928997424118725e-05, "loss": 0.0759919285774231, "step": 1190 }, { "epoch": 0.1610153021994271, "grad_norm": 1.298192024230957, "learning_rate": 2.928789189548747e-05, "loss": 0.0586373507976532, "step": 1191 }, { "epoch": 0.1611504955681924, "grad_norm": 2.9043021202087402, "learning_rate": 2.9285806574972405e-05, "loss": 0.08343648910522461, "step": 1192 }, { "epoch": 0.16128568893695763, "grad_norm": 0.8272789120674133, "learning_rate": 2.928371828007623e-05, "loss": 0.05761042982339859, "step": 1193 }, { "epoch": 0.1614208823057229, "grad_norm": 4.33075475692749, "learning_rate": 2.928162701123374e-05, "loss": 0.09251654148101807, "step": 1194 }, { "epoch": 0.16155607567448815, "grad_norm": 0.8157700896263123, "learning_rate": 2.9279532768880345e-05, "loss": 0.0797078013420105, "step": 1195 }, { "epoch": 0.16169126904325343, "grad_norm": 1.2659385204315186, "learning_rate": 2.9277435553452084e-05, "loss": 0.08196496963500977, "step": 1196 }, { "epoch": 0.1618264624120187, "grad_norm": 1.4740269184112549, "learning_rate": 2.9275335365385602e-05, "loss": 0.06738023459911346, "step": 1197 }, { "epoch": 0.16196165578078395, "grad_norm": 0.792144775390625, "learning_rate": 2.927323220511817e-05, "loss": 0.0665399506688118, "step": 1198 }, { "epoch": 0.16209684914954922, "grad_norm": 0.5530847907066345, "learning_rate": 2.9271126073087684e-05, "loss": 0.05725696682929993, "step": 1199 }, { "epoch": 0.16223204251831447, "grad_norm": 0.64042067527771, "learning_rate": 2.926901696973264e-05, "loss": 0.06065577268600464, "step": 1200 }, { "epoch": 0.16236723588707974, "grad_norm": 1.6298916339874268, "learning_rate": 2.9266904895492177e-05, "loss": 0.05311492830514908, "step": 1201 }, { "epoch": 0.162502429255845, "grad_norm": 1.9529551267623901, "learning_rate": 2.926478985080603e-05, "loss": 0.06378045678138733, "step": 1202 }, { "epoch": 0.16263762262461026, "grad_norm": 1.2222492694854736, "learning_rate": 2.9262671836114568e-05, "loss": 0.05665773153305054, "step": 1203 }, { "epoch": 0.16277281599337554, "grad_norm": 0.7065329551696777, "learning_rate": 2.9260550851858774e-05, "loss": 0.05856727808713913, "step": 1204 }, { "epoch": 0.16290800936214078, "grad_norm": 0.5530824661254883, "learning_rate": 2.9258426898480243e-05, "loss": 0.06612825393676758, "step": 1205 }, { "epoch": 0.16304320273090606, "grad_norm": 1.5942386388778687, "learning_rate": 2.9256299976421198e-05, "loss": 0.06233048439025879, "step": 1206 }, { "epoch": 0.1631783960996713, "grad_norm": 0.7886584997177124, "learning_rate": 2.9254170086124474e-05, "loss": 0.08418315649032593, "step": 1207 }, { "epoch": 0.16331358946843658, "grad_norm": 0.8411967754364014, "learning_rate": 2.9252037228033526e-05, "loss": 0.07825121283531189, "step": 1208 }, { "epoch": 0.16344878283720182, "grad_norm": 0.752139687538147, "learning_rate": 2.9249901402592424e-05, "loss": 0.04684019088745117, "step": 1209 }, { "epoch": 0.1635839762059671, "grad_norm": 0.592678964138031, "learning_rate": 2.9247762610245863e-05, "loss": 0.0651087760925293, "step": 1210 }, { "epoch": 0.16371916957473237, "grad_norm": 1.1372289657592773, "learning_rate": 2.9245620851439146e-05, "loss": 0.06731021404266357, "step": 1211 }, { "epoch": 0.16385436294349762, "grad_norm": 0.7190410494804382, "learning_rate": 2.92434761266182e-05, "loss": 0.06134688854217529, "step": 1212 }, { "epoch": 0.1639895563122629, "grad_norm": 0.6636428236961365, "learning_rate": 2.924132843622957e-05, "loss": 0.04543939232826233, "step": 1213 }, { "epoch": 0.16412474968102814, "grad_norm": 0.8182775974273682, "learning_rate": 2.9239177780720418e-05, "loss": 0.056656286120414734, "step": 1214 }, { "epoch": 0.1642599430497934, "grad_norm": 0.709373414516449, "learning_rate": 2.923702416053852e-05, "loss": 0.07238680124282837, "step": 1215 }, { "epoch": 0.16439513641855866, "grad_norm": 0.6388227939605713, "learning_rate": 2.9234867576132268e-05, "loss": 0.0633966475725174, "step": 1216 }, { "epoch": 0.16453032978732393, "grad_norm": 0.5293400883674622, "learning_rate": 2.923270802795068e-05, "loss": 0.06284171342849731, "step": 1217 }, { "epoch": 0.1646655231560892, "grad_norm": 1.3790321350097656, "learning_rate": 2.9230545516443378e-05, "loss": 0.07694780826568604, "step": 1218 }, { "epoch": 0.16480071652485445, "grad_norm": 0.5367887020111084, "learning_rate": 2.9228380042060615e-05, "loss": 0.05496096611022949, "step": 1219 }, { "epoch": 0.16493590989361973, "grad_norm": 0.3577832281589508, "learning_rate": 2.9226211605253252e-05, "loss": 0.06105245277285576, "step": 1220 }, { "epoch": 0.16507110326238497, "grad_norm": 0.7726598978042603, "learning_rate": 2.922404020647277e-05, "loss": 0.05503867194056511, "step": 1221 }, { "epoch": 0.16520629663115025, "grad_norm": 0.6038638353347778, "learning_rate": 2.9221865846171264e-05, "loss": 0.05856103450059891, "step": 1222 }, { "epoch": 0.1653414899999155, "grad_norm": 1.0350080728530884, "learning_rate": 2.9219688524801446e-05, "loss": 0.05294673144817352, "step": 1223 }, { "epoch": 0.16547668336868077, "grad_norm": 1.02299964427948, "learning_rate": 2.9217508242816653e-05, "loss": 0.06431066989898682, "step": 1224 }, { "epoch": 0.16561187673744604, "grad_norm": 0.2556920051574707, "learning_rate": 2.921532500067083e-05, "loss": 0.04862751066684723, "step": 1225 }, { "epoch": 0.16574707010621129, "grad_norm": 0.6257906556129456, "learning_rate": 2.9213138798818528e-05, "loss": 0.04743689298629761, "step": 1226 }, { "epoch": 0.16588226347497656, "grad_norm": 0.8982285857200623, "learning_rate": 2.921094963771494e-05, "loss": 0.045286282896995544, "step": 1227 }, { "epoch": 0.1660174568437418, "grad_norm": 0.5756014585494995, "learning_rate": 2.9208757517815855e-05, "loss": 0.08197182416915894, "step": 1228 }, { "epoch": 0.16615265021250708, "grad_norm": 1.370456576347351, "learning_rate": 2.9206562439577684e-05, "loss": 0.05535238981246948, "step": 1229 }, { "epoch": 0.16628784358127233, "grad_norm": 2.5601351261138916, "learning_rate": 2.9204364403457452e-05, "loss": 0.09890396147966385, "step": 1230 }, { "epoch": 0.1664230369500376, "grad_norm": 0.9468607306480408, "learning_rate": 2.9202163409912808e-05, "loss": 0.07507592439651489, "step": 1231 }, { "epoch": 0.16655823031880287, "grad_norm": 0.7571756839752197, "learning_rate": 2.9199959459402003e-05, "loss": 0.04222369194030762, "step": 1232 }, { "epoch": 0.16669342368756812, "grad_norm": 2.2120721340179443, "learning_rate": 2.919775255238392e-05, "loss": 0.07921065390110016, "step": 1233 }, { "epoch": 0.1668286170563334, "grad_norm": 1.0212641954421997, "learning_rate": 2.919554268931804e-05, "loss": 0.05951683595776558, "step": 1234 }, { "epoch": 0.16696381042509864, "grad_norm": 0.497661828994751, "learning_rate": 2.9193329870664475e-05, "loss": 0.06232532858848572, "step": 1235 }, { "epoch": 0.16709900379386392, "grad_norm": 0.5672320127487183, "learning_rate": 2.9191114096883938e-05, "loss": 0.05264327675104141, "step": 1236 }, { "epoch": 0.16723419716262916, "grad_norm": 0.7996569871902466, "learning_rate": 2.9188895368437774e-05, "loss": 0.07075543701648712, "step": 1237 }, { "epoch": 0.16736939053139444, "grad_norm": 1.8040475845336914, "learning_rate": 2.9186673685787926e-05, "loss": 0.05779653787612915, "step": 1238 }, { "epoch": 0.1675045839001597, "grad_norm": 0.94858318567276, "learning_rate": 2.918444904939697e-05, "loss": 0.07351627945899963, "step": 1239 }, { "epoch": 0.16763977726892496, "grad_norm": 0.5381576418876648, "learning_rate": 2.9182221459728078e-05, "loss": 0.053827933967113495, "step": 1240 }, { "epoch": 0.16777497063769023, "grad_norm": 1.689529299736023, "learning_rate": 2.917999091724505e-05, "loss": 0.07044818997383118, "step": 1241 }, { "epoch": 0.16791016400645548, "grad_norm": 1.0230121612548828, "learning_rate": 2.9177757422412294e-05, "loss": 0.07499171793460846, "step": 1242 }, { "epoch": 0.16804535737522075, "grad_norm": 1.2331953048706055, "learning_rate": 2.917552097569484e-05, "loss": 0.05908238887786865, "step": 1243 }, { "epoch": 0.168180550743986, "grad_norm": 2.16436767578125, "learning_rate": 2.917328157755832e-05, "loss": 0.08111009001731873, "step": 1244 }, { "epoch": 0.16831574411275127, "grad_norm": 0.577461302280426, "learning_rate": 2.9171039228469003e-05, "loss": 0.05244028568267822, "step": 1245 }, { "epoch": 0.16845093748151654, "grad_norm": 0.6482664942741394, "learning_rate": 2.9168793928893747e-05, "loss": 0.06110365688800812, "step": 1246 }, { "epoch": 0.1685861308502818, "grad_norm": 0.887102484703064, "learning_rate": 2.9166545679300036e-05, "loss": 0.07000339031219482, "step": 1247 }, { "epoch": 0.16872132421904706, "grad_norm": 1.3720703125, "learning_rate": 2.9164294480155966e-05, "loss": 0.07628023624420166, "step": 1248 }, { "epoch": 0.1688565175878123, "grad_norm": 0.4543033242225647, "learning_rate": 2.9162040331930256e-05, "loss": 0.05563601851463318, "step": 1249 }, { "epoch": 0.16899171095657758, "grad_norm": 0.9618221521377563, "learning_rate": 2.915978323509223e-05, "loss": 0.0719805359840393, "step": 1250 }, { "epoch": 0.16912690432534283, "grad_norm": 0.8158279061317444, "learning_rate": 2.915752319011182e-05, "loss": 0.07083217054605484, "step": 1251 }, { "epoch": 0.1692620976941081, "grad_norm": 0.6591756939888, "learning_rate": 2.9155260197459588e-05, "loss": 0.07081496715545654, "step": 1252 }, { "epoch": 0.16939729106287338, "grad_norm": 1.1029133796691895, "learning_rate": 2.91529942576067e-05, "loss": 0.06827642023563385, "step": 1253 }, { "epoch": 0.16953248443163862, "grad_norm": 0.8691407442092896, "learning_rate": 2.915072537102493e-05, "loss": 0.05868637561798096, "step": 1254 }, { "epoch": 0.1696676778004039, "grad_norm": 0.8354164958000183, "learning_rate": 2.914845353818668e-05, "loss": 0.05098971724510193, "step": 1255 }, { "epoch": 0.16980287116916915, "grad_norm": 0.6224926710128784, "learning_rate": 2.9146178759564953e-05, "loss": 0.07958734035491943, "step": 1256 }, { "epoch": 0.16993806453793442, "grad_norm": 0.8625409007072449, "learning_rate": 2.914390103563337e-05, "loss": 0.07424217462539673, "step": 1257 }, { "epoch": 0.17007325790669967, "grad_norm": 1.2023978233337402, "learning_rate": 2.914162036686617e-05, "loss": 0.06286744773387909, "step": 1258 }, { "epoch": 0.17020845127546494, "grad_norm": 0.9498358964920044, "learning_rate": 2.9139336753738196e-05, "loss": 0.05720394849777222, "step": 1259 }, { "epoch": 0.1703436446442302, "grad_norm": 0.5057894587516785, "learning_rate": 2.913705019672491e-05, "loss": 0.06330971419811249, "step": 1260 }, { "epoch": 0.17047883801299546, "grad_norm": 2.2030510902404785, "learning_rate": 2.9134760696302386e-05, "loss": 0.05698040500283241, "step": 1261 }, { "epoch": 0.17061403138176073, "grad_norm": 0.359602153301239, "learning_rate": 2.9132468252947306e-05, "loss": 0.05025045573711395, "step": 1262 }, { "epoch": 0.17074922475052598, "grad_norm": 1.3271559476852417, "learning_rate": 2.9130172867136974e-05, "loss": 0.06531503796577454, "step": 1263 }, { "epoch": 0.17088441811929125, "grad_norm": 0.3179835081100464, "learning_rate": 2.91278745393493e-05, "loss": 0.03917551040649414, "step": 1264 }, { "epoch": 0.1710196114880565, "grad_norm": 0.5240478515625, "learning_rate": 2.9125573270062812e-05, "loss": 0.07011768221855164, "step": 1265 }, { "epoch": 0.17115480485682177, "grad_norm": 0.5413030385971069, "learning_rate": 2.9123269059756634e-05, "loss": 0.09455788135528564, "step": 1266 }, { "epoch": 0.17128999822558705, "grad_norm": 0.47013336420059204, "learning_rate": 2.9120961908910528e-05, "loss": 0.0679527074098587, "step": 1267 }, { "epoch": 0.1714251915943523, "grad_norm": 0.9408896565437317, "learning_rate": 2.911865181800485e-05, "loss": 0.06111685186624527, "step": 1268 }, { "epoch": 0.17156038496311757, "grad_norm": 0.47430142760276794, "learning_rate": 2.9116338787520577e-05, "loss": 0.07369215786457062, "step": 1269 }, { "epoch": 0.17169557833188281, "grad_norm": 1.098947286605835, "learning_rate": 2.9114022817939283e-05, "loss": 0.05156645178794861, "step": 1270 }, { "epoch": 0.1718307717006481, "grad_norm": 1.2353764772415161, "learning_rate": 2.911170390974318e-05, "loss": 0.07163624465465546, "step": 1271 }, { "epoch": 0.17196596506941333, "grad_norm": 1.6305971145629883, "learning_rate": 2.9109382063415067e-05, "loss": 0.06071142852306366, "step": 1272 }, { "epoch": 0.1721011584381786, "grad_norm": 1.8492131233215332, "learning_rate": 2.9107057279438372e-05, "loss": 0.06312686949968338, "step": 1273 }, { "epoch": 0.17223635180694388, "grad_norm": 0.71107417345047, "learning_rate": 2.910472955829712e-05, "loss": 0.06514546275138855, "step": 1274 }, { "epoch": 0.17237154517570913, "grad_norm": 0.898524284362793, "learning_rate": 2.9102398900475958e-05, "loss": 0.06705622375011444, "step": 1275 }, { "epoch": 0.1725067385444744, "grad_norm": 0.2860540449619293, "learning_rate": 2.910006530646014e-05, "loss": 0.051441192626953125, "step": 1276 }, { "epoch": 0.17264193191323965, "grad_norm": 0.9001814723014832, "learning_rate": 2.909772877673554e-05, "loss": 0.07040204852819443, "step": 1277 }, { "epoch": 0.17277712528200492, "grad_norm": 0.3261363208293915, "learning_rate": 2.9095389311788626e-05, "loss": 0.06222736835479736, "step": 1278 }, { "epoch": 0.17291231865077017, "grad_norm": 1.1822023391723633, "learning_rate": 2.9093046912106494e-05, "loss": 0.08206778764724731, "step": 1279 }, { "epoch": 0.17304751201953544, "grad_norm": 0.6466618776321411, "learning_rate": 2.909070157817684e-05, "loss": 0.0743969976902008, "step": 1280 }, { "epoch": 0.17318270538830072, "grad_norm": 0.6141491532325745, "learning_rate": 2.9088353310487976e-05, "loss": 0.04727473109960556, "step": 1281 }, { "epoch": 0.17331789875706596, "grad_norm": 1.0952966213226318, "learning_rate": 2.9086002109528825e-05, "loss": 0.04447062313556671, "step": 1282 }, { "epoch": 0.17345309212583124, "grad_norm": 0.9889422655105591, "learning_rate": 2.908364797578892e-05, "loss": 0.056224673986434937, "step": 1283 }, { "epoch": 0.17358828549459648, "grad_norm": 0.2930924594402313, "learning_rate": 2.9081290909758405e-05, "loss": 0.04904172569513321, "step": 1284 }, { "epoch": 0.17372347886336176, "grad_norm": 1.41915762424469, "learning_rate": 2.9078930911928033e-05, "loss": 0.08801735937595367, "step": 1285 }, { "epoch": 0.173858672232127, "grad_norm": 1.8939464092254639, "learning_rate": 2.907656798278916e-05, "loss": 0.07156491279602051, "step": 1286 }, { "epoch": 0.17399386560089228, "grad_norm": 0.5577313303947449, "learning_rate": 2.9074202122833773e-05, "loss": 0.07089945673942566, "step": 1287 }, { "epoch": 0.17412905896965755, "grad_norm": 1.4324188232421875, "learning_rate": 2.907183333255445e-05, "loss": 0.06999829411506653, "step": 1288 }, { "epoch": 0.1742642523384228, "grad_norm": 0.33485084772109985, "learning_rate": 2.9069461612444384e-05, "loss": 0.0712369829416275, "step": 1289 }, { "epoch": 0.17439944570718807, "grad_norm": 0.3693605065345764, "learning_rate": 2.9067086962997385e-05, "loss": 0.07062338292598724, "step": 1290 }, { "epoch": 0.17453463907595332, "grad_norm": 0.7931060791015625, "learning_rate": 2.9064709384707868e-05, "loss": 0.0733291506767273, "step": 1291 }, { "epoch": 0.1746698324447186, "grad_norm": 0.7573596239089966, "learning_rate": 2.9062328878070855e-05, "loss": 0.06712360680103302, "step": 1292 }, { "epoch": 0.17480502581348384, "grad_norm": 1.6741069555282593, "learning_rate": 2.905994544358198e-05, "loss": 0.057468071579933167, "step": 1293 }, { "epoch": 0.1749402191822491, "grad_norm": 0.9005102515220642, "learning_rate": 2.9057559081737482e-05, "loss": 0.06759119033813477, "step": 1294 }, { "epoch": 0.1750754125510144, "grad_norm": 0.9607921838760376, "learning_rate": 2.9055169793034225e-05, "loss": 0.05060403048992157, "step": 1295 }, { "epoch": 0.17521060591977963, "grad_norm": 0.49658510088920593, "learning_rate": 2.9052777577969656e-05, "loss": 0.0803871750831604, "step": 1296 }, { "epoch": 0.1753457992885449, "grad_norm": 0.31053662300109863, "learning_rate": 2.9050382437041868e-05, "loss": 0.046495724469423294, "step": 1297 }, { "epoch": 0.17548099265731015, "grad_norm": 0.6438810229301453, "learning_rate": 2.9047984370749526e-05, "loss": 0.0692463219165802, "step": 1298 }, { "epoch": 0.17561618602607543, "grad_norm": 0.2605363130569458, "learning_rate": 2.9045583379591925e-05, "loss": 0.055267587304115295, "step": 1299 }, { "epoch": 0.17575137939484067, "grad_norm": 0.3073757588863373, "learning_rate": 2.9043179464068965e-05, "loss": 0.057579156011343, "step": 1300 }, { "epoch": 0.17588657276360595, "grad_norm": 1.2076513767242432, "learning_rate": 2.9040772624681152e-05, "loss": 0.08651989698410034, "step": 1301 }, { "epoch": 0.1760217661323712, "grad_norm": 0.597019612789154, "learning_rate": 2.9038362861929603e-05, "loss": 0.06141549348831177, "step": 1302 }, { "epoch": 0.17615695950113647, "grad_norm": 0.6154230237007141, "learning_rate": 2.903595017631605e-05, "loss": 0.0814509391784668, "step": 1303 }, { "epoch": 0.17629215286990174, "grad_norm": 0.9492348432540894, "learning_rate": 2.903353456834282e-05, "loss": 0.07166945934295654, "step": 1304 }, { "epoch": 0.176427346238667, "grad_norm": 1.2229962348937988, "learning_rate": 2.903111603851285e-05, "loss": 0.0598774254322052, "step": 1305 }, { "epoch": 0.17656253960743226, "grad_norm": 0.944716215133667, "learning_rate": 2.9028694587329704e-05, "loss": 0.061225906014442444, "step": 1306 }, { "epoch": 0.1766977329761975, "grad_norm": 0.7197076678276062, "learning_rate": 2.902627021529753e-05, "loss": 0.05019553005695343, "step": 1307 }, { "epoch": 0.17683292634496278, "grad_norm": 0.9098347425460815, "learning_rate": 2.9023842922921105e-05, "loss": 0.07456699013710022, "step": 1308 }, { "epoch": 0.17696811971372803, "grad_norm": 2.485454559326172, "learning_rate": 2.90214127107058e-05, "loss": 0.07402080297470093, "step": 1309 }, { "epoch": 0.1771033130824933, "grad_norm": 1.4531241655349731, "learning_rate": 2.9018979579157592e-05, "loss": 0.07091991603374481, "step": 1310 }, { "epoch": 0.17723850645125858, "grad_norm": 0.4783823788166046, "learning_rate": 2.901654352878308e-05, "loss": 0.0479360967874527, "step": 1311 }, { "epoch": 0.17737369982002382, "grad_norm": 0.5426583886146545, "learning_rate": 2.9014104560089462e-05, "loss": 0.043479904532432556, "step": 1312 }, { "epoch": 0.1775088931887891, "grad_norm": 0.6178447604179382, "learning_rate": 2.9011662673584538e-05, "loss": 0.08286845684051514, "step": 1313 }, { "epoch": 0.17764408655755434, "grad_norm": 1.829768180847168, "learning_rate": 2.900921786977673e-05, "loss": 0.049149930477142334, "step": 1314 }, { "epoch": 0.17777927992631962, "grad_norm": 0.8166171312332153, "learning_rate": 2.900677014917505e-05, "loss": 0.07144433259963989, "step": 1315 }, { "epoch": 0.17791447329508486, "grad_norm": 0.9345900416374207, "learning_rate": 2.9004319512289136e-05, "loss": 0.05137139558792114, "step": 1316 }, { "epoch": 0.17804966666385014, "grad_norm": 0.4796876013278961, "learning_rate": 2.9001865959629222e-05, "loss": 0.04606900364160538, "step": 1317 }, { "epoch": 0.1781848600326154, "grad_norm": 0.7798415422439575, "learning_rate": 2.8999409491706143e-05, "loss": 0.05695191025733948, "step": 1318 }, { "epoch": 0.17832005340138066, "grad_norm": 0.9366718530654907, "learning_rate": 2.8996950109031355e-05, "loss": 0.07336622476577759, "step": 1319 }, { "epoch": 0.17845524677014593, "grad_norm": 1.6822129487991333, "learning_rate": 2.8994487812116917e-05, "loss": 0.08025699853897095, "step": 1320 }, { "epoch": 0.17859044013891118, "grad_norm": 0.8868669271469116, "learning_rate": 2.8992022601475483e-05, "loss": 0.09152978658676147, "step": 1321 }, { "epoch": 0.17872563350767645, "grad_norm": 0.9628618359565735, "learning_rate": 2.8989554477620332e-05, "loss": 0.0634050965309143, "step": 1322 }, { "epoch": 0.1788608268764417, "grad_norm": 0.36020126938819885, "learning_rate": 2.8987083441065335e-05, "loss": 0.05945882201194763, "step": 1323 }, { "epoch": 0.17899602024520697, "grad_norm": 1.1709458827972412, "learning_rate": 2.8984609492324983e-05, "loss": 0.07281702756881714, "step": 1324 }, { "epoch": 0.17913121361397225, "grad_norm": 0.5962613821029663, "learning_rate": 2.8982132631914357e-05, "loss": 0.06544731557369232, "step": 1325 }, { "epoch": 0.1792664069827375, "grad_norm": 1.8113411664962769, "learning_rate": 2.8979652860349154e-05, "loss": 0.06583040952682495, "step": 1326 }, { "epoch": 0.17940160035150277, "grad_norm": 1.1780592203140259, "learning_rate": 2.8977170178145675e-05, "loss": 0.06304009258747101, "step": 1327 }, { "epoch": 0.179536793720268, "grad_norm": 1.8488304615020752, "learning_rate": 2.8974684585820833e-05, "loss": 0.0766972005367279, "step": 1328 }, { "epoch": 0.17967198708903329, "grad_norm": 0.4457748234272003, "learning_rate": 2.8972196083892138e-05, "loss": 0.05921986699104309, "step": 1329 }, { "epoch": 0.17980718045779853, "grad_norm": 0.4643270671367645, "learning_rate": 2.8969704672877707e-05, "loss": 0.07630157470703125, "step": 1330 }, { "epoch": 0.1799423738265638, "grad_norm": 0.3910136818885803, "learning_rate": 2.896721035329627e-05, "loss": 0.05909073352813721, "step": 1331 }, { "epoch": 0.18007756719532908, "grad_norm": 1.9842510223388672, "learning_rate": 2.8964713125667153e-05, "loss": 0.06459778547286987, "step": 1332 }, { "epoch": 0.18021276056409433, "grad_norm": 1.379435658454895, "learning_rate": 2.8962212990510294e-05, "loss": 0.07560950517654419, "step": 1333 }, { "epoch": 0.1803479539328596, "grad_norm": 1.6055450439453125, "learning_rate": 2.8959709948346237e-05, "loss": 0.05632704496383667, "step": 1334 }, { "epoch": 0.18048314730162485, "grad_norm": 0.4815007448196411, "learning_rate": 2.8957203999696124e-05, "loss": 0.06901296973228455, "step": 1335 }, { "epoch": 0.18061834067039012, "grad_norm": 0.3896736800670624, "learning_rate": 2.8954695145081713e-05, "loss": 0.053732067346572876, "step": 1336 }, { "epoch": 0.18075353403915537, "grad_norm": 1.5841916799545288, "learning_rate": 2.8952183385025356e-05, "loss": 0.06032158434391022, "step": 1337 }, { "epoch": 0.18088872740792064, "grad_norm": 0.43961912393569946, "learning_rate": 2.8949668720050014e-05, "loss": 0.058547914028167725, "step": 1338 }, { "epoch": 0.18102392077668591, "grad_norm": 0.8505260348320007, "learning_rate": 2.8947151150679256e-05, "loss": 0.059689223766326904, "step": 1339 }, { "epoch": 0.18115911414545116, "grad_norm": 0.6629796624183655, "learning_rate": 2.8944630677437255e-05, "loss": 0.07312893867492676, "step": 1340 }, { "epoch": 0.18129430751421644, "grad_norm": 0.802157998085022, "learning_rate": 2.8942107300848784e-05, "loss": 0.05700739473104477, "step": 1341 }, { "epoch": 0.18142950088298168, "grad_norm": 0.6724606156349182, "learning_rate": 2.8939581021439225e-05, "loss": 0.0663527324795723, "step": 1342 }, { "epoch": 0.18156469425174696, "grad_norm": 0.5345813632011414, "learning_rate": 2.8937051839734563e-05, "loss": 0.043694838881492615, "step": 1343 }, { "epoch": 0.1816998876205122, "grad_norm": 0.34863507747650146, "learning_rate": 2.8934519756261384e-05, "loss": 0.05110260099172592, "step": 1344 }, { "epoch": 0.18183508098927748, "grad_norm": 1.1135504245758057, "learning_rate": 2.8931984771546885e-05, "loss": 0.058423250913619995, "step": 1345 }, { "epoch": 0.18197027435804275, "grad_norm": 1.122864007949829, "learning_rate": 2.8929446886118866e-05, "loss": 0.06855830550193787, "step": 1346 }, { "epoch": 0.182105467726808, "grad_norm": 1.548689603805542, "learning_rate": 2.892690610050572e-05, "loss": 0.06757400184869766, "step": 1347 }, { "epoch": 0.18224066109557327, "grad_norm": 0.3105394244194031, "learning_rate": 2.892436241523646e-05, "loss": 0.047551169991493225, "step": 1348 }, { "epoch": 0.18237585446433852, "grad_norm": 1.830718755722046, "learning_rate": 2.8921815830840685e-05, "loss": 0.08233155310153961, "step": 1349 }, { "epoch": 0.1825110478331038, "grad_norm": 0.4397461712360382, "learning_rate": 2.891926634784862e-05, "loss": 0.06138932704925537, "step": 1350 }, { "epoch": 0.18264624120186904, "grad_norm": 0.7208978533744812, "learning_rate": 2.8916713966791076e-05, "loss": 0.05991073325276375, "step": 1351 }, { "epoch": 0.1827814345706343, "grad_norm": 0.7274355292320251, "learning_rate": 2.8914158688199464e-05, "loss": 0.060209520161151886, "step": 1352 }, { "epoch": 0.18291662793939958, "grad_norm": 1.2630335092544556, "learning_rate": 2.891160051260582e-05, "loss": 0.06155611574649811, "step": 1353 }, { "epoch": 0.18305182130816483, "grad_norm": 1.1451889276504517, "learning_rate": 2.8909039440542758e-05, "loss": 0.062114208936691284, "step": 1354 }, { "epoch": 0.1831870146769301, "grad_norm": 1.212422251701355, "learning_rate": 2.890647547254352e-05, "loss": 0.060322076082229614, "step": 1355 }, { "epoch": 0.18332220804569535, "grad_norm": 0.9648664593696594, "learning_rate": 2.8903908609141923e-05, "loss": 0.08123242855072021, "step": 1356 }, { "epoch": 0.18345740141446062, "grad_norm": 1.5143882036209106, "learning_rate": 2.8901338850872413e-05, "loss": 0.06759816408157349, "step": 1357 }, { "epoch": 0.18359259478322587, "grad_norm": 0.5402228832244873, "learning_rate": 2.8898766198270022e-05, "loss": 0.07894018292427063, "step": 1358 }, { "epoch": 0.18372778815199114, "grad_norm": 0.6195774674415588, "learning_rate": 2.8896190651870392e-05, "loss": 0.09232117235660553, "step": 1359 }, { "epoch": 0.18386298152075642, "grad_norm": 1.2298245429992676, "learning_rate": 2.8893612212209763e-05, "loss": 0.05993261933326721, "step": 1360 }, { "epoch": 0.18399817488952167, "grad_norm": 2.5731022357940674, "learning_rate": 2.8891030879824985e-05, "loss": 0.07145705819129944, "step": 1361 }, { "epoch": 0.18413336825828694, "grad_norm": 0.5495437979698181, "learning_rate": 2.88884466552535e-05, "loss": 0.06904041767120361, "step": 1362 }, { "epoch": 0.18426856162705219, "grad_norm": 0.8204841613769531, "learning_rate": 2.888585953903336e-05, "loss": 0.051793456077575684, "step": 1363 }, { "epoch": 0.18440375499581746, "grad_norm": 1.3093513250350952, "learning_rate": 2.888326953170321e-05, "loss": 0.06429917365312576, "step": 1364 }, { "epoch": 0.1845389483645827, "grad_norm": 0.4332553446292877, "learning_rate": 2.8880676633802314e-05, "loss": 0.059311643242836, "step": 1365 }, { "epoch": 0.18467414173334798, "grad_norm": 0.9532291293144226, "learning_rate": 2.8878080845870522e-05, "loss": 0.07912647724151611, "step": 1366 }, { "epoch": 0.18480933510211325, "grad_norm": 0.7556528449058533, "learning_rate": 2.887548216844829e-05, "loss": 0.04770098999142647, "step": 1367 }, { "epoch": 0.1849445284708785, "grad_norm": 1.159386157989502, "learning_rate": 2.8872880602076675e-05, "loss": 0.053607672452926636, "step": 1368 }, { "epoch": 0.18507972183964377, "grad_norm": 0.6745975017547607, "learning_rate": 2.8870276147297344e-05, "loss": 0.08379173278808594, "step": 1369 }, { "epoch": 0.18521491520840902, "grad_norm": 1.985194206237793, "learning_rate": 2.8867668804652552e-05, "loss": 0.06111246347427368, "step": 1370 }, { "epoch": 0.1853501085771743, "grad_norm": 0.5915805697441101, "learning_rate": 2.886505857468516e-05, "loss": 0.06341639161109924, "step": 1371 }, { "epoch": 0.18548530194593954, "grad_norm": 0.8633459210395813, "learning_rate": 2.8862445457938642e-05, "loss": 0.0885532796382904, "step": 1372 }, { "epoch": 0.18562049531470481, "grad_norm": 0.5439592599868774, "learning_rate": 2.8859829454957053e-05, "loss": 0.05765097588300705, "step": 1373 }, { "epoch": 0.1857556886834701, "grad_norm": 0.846956193447113, "learning_rate": 2.8857210566285062e-05, "loss": 0.06486758589744568, "step": 1374 }, { "epoch": 0.18589088205223533, "grad_norm": 1.9822721481323242, "learning_rate": 2.8854588792467932e-05, "loss": 0.07421214878559113, "step": 1375 }, { "epoch": 0.1860260754210006, "grad_norm": 2.105954647064209, "learning_rate": 2.8851964134051535e-05, "loss": 0.07359392940998077, "step": 1376 }, { "epoch": 0.18616126878976585, "grad_norm": 1.0604692697525024, "learning_rate": 2.884933659158234e-05, "loss": 0.07753920555114746, "step": 1377 }, { "epoch": 0.18629646215853113, "grad_norm": 1.4054888486862183, "learning_rate": 2.8846706165607415e-05, "loss": 0.06569766998291016, "step": 1378 }, { "epoch": 0.18643165552729637, "grad_norm": 0.9416623711585999, "learning_rate": 2.8844072856674422e-05, "loss": 0.0600811243057251, "step": 1379 }, { "epoch": 0.18656684889606165, "grad_norm": 0.5811194777488708, "learning_rate": 2.8841436665331634e-05, "loss": 0.05377200245857239, "step": 1380 }, { "epoch": 0.18670204226482692, "grad_norm": 1.1855747699737549, "learning_rate": 2.8838797592127927e-05, "loss": 0.04610684514045715, "step": 1381 }, { "epoch": 0.18683723563359217, "grad_norm": 0.3341962993144989, "learning_rate": 2.883615563761276e-05, "loss": 0.04806177318096161, "step": 1382 }, { "epoch": 0.18697242900235744, "grad_norm": 0.5546738505363464, "learning_rate": 2.8833510802336206e-05, "loss": 0.03987765312194824, "step": 1383 }, { "epoch": 0.1871076223711227, "grad_norm": 0.3393515944480896, "learning_rate": 2.883086308684893e-05, "loss": 0.05964048206806183, "step": 1384 }, { "epoch": 0.18724281573988796, "grad_norm": 0.5718085169792175, "learning_rate": 2.882821249170221e-05, "loss": 0.04978965222835541, "step": 1385 }, { "epoch": 0.1873780091086532, "grad_norm": 2.1170740127563477, "learning_rate": 2.8825559017447905e-05, "loss": 0.05634533613920212, "step": 1386 }, { "epoch": 0.18751320247741848, "grad_norm": 1.212235689163208, "learning_rate": 2.8822902664638487e-05, "loss": 0.05493786931037903, "step": 1387 }, { "epoch": 0.18764839584618376, "grad_norm": 0.3020192086696625, "learning_rate": 2.882024343382702e-05, "loss": 0.056016504764556885, "step": 1388 }, { "epoch": 0.187783589214949, "grad_norm": 1.2162086963653564, "learning_rate": 2.8817581325567174e-05, "loss": 0.06100273132324219, "step": 1389 }, { "epoch": 0.18791878258371428, "grad_norm": 1.113671898841858, "learning_rate": 2.8814916340413205e-05, "loss": 0.052695900201797485, "step": 1390 }, { "epoch": 0.18805397595247952, "grad_norm": 1.1613482236862183, "learning_rate": 2.881224847891999e-05, "loss": 0.050453707575798035, "step": 1391 }, { "epoch": 0.1881891693212448, "grad_norm": 1.153808355331421, "learning_rate": 2.8809577741642987e-05, "loss": 0.061996977776288986, "step": 1392 }, { "epoch": 0.18832436269001004, "grad_norm": 0.34761378169059753, "learning_rate": 2.8806904129138255e-05, "loss": 0.050693221390247345, "step": 1393 }, { "epoch": 0.18845955605877532, "grad_norm": 0.4242549240589142, "learning_rate": 2.8804227641962457e-05, "loss": 0.04989054054021835, "step": 1394 }, { "epoch": 0.1885947494275406, "grad_norm": 1.1975650787353516, "learning_rate": 2.8801548280672847e-05, "loss": 0.07356453686952591, "step": 1395 }, { "epoch": 0.18872994279630584, "grad_norm": 0.8233917355537415, "learning_rate": 2.8798866045827288e-05, "loss": 0.04728338122367859, "step": 1396 }, { "epoch": 0.1888651361650711, "grad_norm": 0.7185053825378418, "learning_rate": 2.8796180937984234e-05, "loss": 0.06343841552734375, "step": 1397 }, { "epoch": 0.18900032953383636, "grad_norm": 0.8097310066223145, "learning_rate": 2.8793492957702738e-05, "loss": 0.0484083853662014, "step": 1398 }, { "epoch": 0.18913552290260163, "grad_norm": 1.0301105976104736, "learning_rate": 2.8790802105542454e-05, "loss": 0.03863731026649475, "step": 1399 }, { "epoch": 0.18927071627136688, "grad_norm": 0.9395861625671387, "learning_rate": 2.8788108382063628e-05, "loss": 0.0369383729994297, "step": 1400 }, { "epoch": 0.18940590964013215, "grad_norm": 0.5939541459083557, "learning_rate": 2.878541178782711e-05, "loss": 0.04944443702697754, "step": 1401 }, { "epoch": 0.18954110300889743, "grad_norm": 0.3824097514152527, "learning_rate": 2.8782712323394344e-05, "loss": 0.06165945529937744, "step": 1402 }, { "epoch": 0.18967629637766267, "grad_norm": 0.6497465372085571, "learning_rate": 2.878000998932738e-05, "loss": 0.061732374131679535, "step": 1403 }, { "epoch": 0.18981148974642795, "grad_norm": 0.8493558168411255, "learning_rate": 2.8777304786188847e-05, "loss": 0.06216737627983093, "step": 1404 }, { "epoch": 0.1899466831151932, "grad_norm": 0.565345287322998, "learning_rate": 2.8774596714541988e-05, "loss": 0.058844342827796936, "step": 1405 }, { "epoch": 0.19008187648395847, "grad_norm": 0.26175230741500854, "learning_rate": 2.8771885774950637e-05, "loss": 0.0428578220307827, "step": 1406 }, { "epoch": 0.1902170698527237, "grad_norm": 1.515351414680481, "learning_rate": 2.876917196797923e-05, "loss": 0.06139880791306496, "step": 1407 }, { "epoch": 0.190352263221489, "grad_norm": 0.29945918917655945, "learning_rate": 2.876645529419279e-05, "loss": 0.04580672085285187, "step": 1408 }, { "epoch": 0.19048745659025426, "grad_norm": 0.5630703568458557, "learning_rate": 2.876373575415695e-05, "loss": 0.06493443250656128, "step": 1409 }, { "epoch": 0.1906226499590195, "grad_norm": 0.6920896172523499, "learning_rate": 2.8761013348437926e-05, "loss": 0.04265984147787094, "step": 1410 }, { "epoch": 0.19075784332778478, "grad_norm": 0.6778731942176819, "learning_rate": 2.875828807760254e-05, "loss": 0.06861774623394012, "step": 1411 }, { "epoch": 0.19089303669655003, "grad_norm": 0.705251932144165, "learning_rate": 2.875555994221821e-05, "loss": 0.049150049686431885, "step": 1412 }, { "epoch": 0.1910282300653153, "grad_norm": 0.42983245849609375, "learning_rate": 2.8752828942852943e-05, "loss": 0.06673504412174225, "step": 1413 }, { "epoch": 0.19116342343408055, "grad_norm": 0.5979132056236267, "learning_rate": 2.875009508007535e-05, "loss": 0.05231538414955139, "step": 1414 }, { "epoch": 0.19129861680284582, "grad_norm": 0.5539986491203308, "learning_rate": 2.8747358354454642e-05, "loss": 0.05697956681251526, "step": 1415 }, { "epoch": 0.1914338101716111, "grad_norm": 1.201626181602478, "learning_rate": 2.8744618766560614e-05, "loss": 0.062018804252147675, "step": 1416 }, { "epoch": 0.19156900354037634, "grad_norm": 0.4408376216888428, "learning_rate": 2.8741876316963664e-05, "loss": 0.07073703408241272, "step": 1417 }, { "epoch": 0.19170419690914162, "grad_norm": 0.5461744666099548, "learning_rate": 2.873913100623478e-05, "loss": 0.06411121785640717, "step": 1418 }, { "epoch": 0.19183939027790686, "grad_norm": 0.5853143334388733, "learning_rate": 2.873638283494556e-05, "loss": 0.06544029712677002, "step": 1419 }, { "epoch": 0.19197458364667214, "grad_norm": 0.3352160155773163, "learning_rate": 2.8733631803668178e-05, "loss": 0.052479326725006104, "step": 1420 }, { "epoch": 0.19210977701543738, "grad_norm": 0.587458074092865, "learning_rate": 2.8730877912975418e-05, "loss": 0.058047693222761154, "step": 1421 }, { "epoch": 0.19224497038420266, "grad_norm": 0.5267054438591003, "learning_rate": 2.8728121163440656e-05, "loss": 0.06486907601356506, "step": 1422 }, { "epoch": 0.1923801637529679, "grad_norm": 0.9581182599067688, "learning_rate": 2.8725361555637863e-05, "loss": 0.065681591629982, "step": 1423 }, { "epoch": 0.19251535712173318, "grad_norm": 0.5375241041183472, "learning_rate": 2.8722599090141598e-05, "loss": 0.04519478231668472, "step": 1424 }, { "epoch": 0.19265055049049845, "grad_norm": 0.4517899751663208, "learning_rate": 2.8719833767527026e-05, "loss": 0.058487050235271454, "step": 1425 }, { "epoch": 0.1927857438592637, "grad_norm": 1.9817936420440674, "learning_rate": 2.8717065588369896e-05, "loss": 0.0784255713224411, "step": 1426 }, { "epoch": 0.19292093722802897, "grad_norm": 1.4670841693878174, "learning_rate": 2.871429455324657e-05, "loss": 0.06524771451950073, "step": 1427 }, { "epoch": 0.19305613059679422, "grad_norm": 0.8886244297027588, "learning_rate": 2.871152066273398e-05, "loss": 0.059401385486125946, "step": 1428 }, { "epoch": 0.1931913239655595, "grad_norm": 0.4757377803325653, "learning_rate": 2.870874391740967e-05, "loss": 0.05879536271095276, "step": 1429 }, { "epoch": 0.19332651733432474, "grad_norm": 0.8385919332504272, "learning_rate": 2.8705964317851774e-05, "loss": 0.06977000832557678, "step": 1430 }, { "epoch": 0.19346171070309, "grad_norm": 1.4974596500396729, "learning_rate": 2.8703181864639013e-05, "loss": 0.06021403521299362, "step": 1431 }, { "epoch": 0.19359690407185529, "grad_norm": 1.6825629472732544, "learning_rate": 2.870039655835072e-05, "loss": 0.07012343406677246, "step": 1432 }, { "epoch": 0.19373209744062053, "grad_norm": 0.38851383328437805, "learning_rate": 2.8697608399566796e-05, "loss": 0.050712019205093384, "step": 1433 }, { "epoch": 0.1938672908093858, "grad_norm": 0.5127747058868408, "learning_rate": 2.869481738886777e-05, "loss": 0.07452350854873657, "step": 1434 }, { "epoch": 0.19400248417815105, "grad_norm": 0.3593316674232483, "learning_rate": 2.8692023526834725e-05, "loss": 0.07108698785305023, "step": 1435 }, { "epoch": 0.19413767754691633, "grad_norm": 2.6567790508270264, "learning_rate": 2.8689226814049367e-05, "loss": 0.08526412397623062, "step": 1436 }, { "epoch": 0.19427287091568157, "grad_norm": 1.179962158203125, "learning_rate": 2.868642725109399e-05, "loss": 0.06299266219139099, "step": 1437 }, { "epoch": 0.19440806428444685, "grad_norm": 1.5013912916183472, "learning_rate": 2.868362483855147e-05, "loss": 0.06303150206804276, "step": 1438 }, { "epoch": 0.19454325765321212, "grad_norm": 0.7080300450325012, "learning_rate": 2.8680819577005295e-05, "loss": 0.04272356629371643, "step": 1439 }, { "epoch": 0.19467845102197737, "grad_norm": 0.361674964427948, "learning_rate": 2.8678011467039526e-05, "loss": 0.06747883558273315, "step": 1440 }, { "epoch": 0.19481364439074264, "grad_norm": 0.835003674030304, "learning_rate": 2.867520050923883e-05, "loss": 0.0589282363653183, "step": 1441 }, { "epoch": 0.1949488377595079, "grad_norm": 0.40837588906288147, "learning_rate": 2.8672386704188466e-05, "loss": 0.06220751255750656, "step": 1442 }, { "epoch": 0.19508403112827316, "grad_norm": 0.6291946172714233, "learning_rate": 2.8669570052474273e-05, "loss": 0.0549418106675148, "step": 1443 }, { "epoch": 0.1952192244970384, "grad_norm": 0.40878450870513916, "learning_rate": 2.86667505546827e-05, "loss": 0.04268855229020119, "step": 1444 }, { "epoch": 0.19535441786580368, "grad_norm": 0.4511062800884247, "learning_rate": 2.866392821140079e-05, "loss": 0.07379084825515747, "step": 1445 }, { "epoch": 0.19548961123456896, "grad_norm": 0.9196026921272278, "learning_rate": 2.8661103023216154e-05, "loss": 0.06482299417257309, "step": 1446 }, { "epoch": 0.1956248046033342, "grad_norm": 0.7536078691482544, "learning_rate": 2.8658274990717018e-05, "loss": 0.06228230148553848, "step": 1447 }, { "epoch": 0.19575999797209948, "grad_norm": 1.1135989427566528, "learning_rate": 2.86554441144922e-05, "loss": 0.06930319964885712, "step": 1448 }, { "epoch": 0.19589519134086472, "grad_norm": 0.5206980109214783, "learning_rate": 2.8652610395131097e-05, "loss": 0.07230779528617859, "step": 1449 }, { "epoch": 0.19603038470963, "grad_norm": 0.5373529195785522, "learning_rate": 2.8649773833223702e-05, "loss": 0.06830896437168121, "step": 1450 }, { "epoch": 0.19616557807839524, "grad_norm": 0.3036535084247589, "learning_rate": 2.8646934429360606e-05, "loss": 0.049472615122795105, "step": 1451 }, { "epoch": 0.19630077144716052, "grad_norm": 0.5945667624473572, "learning_rate": 2.8644092184132986e-05, "loss": 0.06595683097839355, "step": 1452 }, { "epoch": 0.1964359648159258, "grad_norm": 0.8892114758491516, "learning_rate": 2.864124709813262e-05, "loss": 0.05675327777862549, "step": 1453 }, { "epoch": 0.19657115818469104, "grad_norm": 0.727644681930542, "learning_rate": 2.8638399171951856e-05, "loss": 0.06490229070186615, "step": 1454 }, { "epoch": 0.1967063515534563, "grad_norm": 0.6210764050483704, "learning_rate": 2.8635548406183664e-05, "loss": 0.04059150815010071, "step": 1455 }, { "epoch": 0.19684154492222156, "grad_norm": 0.6249570250511169, "learning_rate": 2.8632694801421576e-05, "loss": 0.07593834400177002, "step": 1456 }, { "epoch": 0.19697673829098683, "grad_norm": 1.0537331104278564, "learning_rate": 2.862983835825973e-05, "loss": 0.06349700689315796, "step": 1457 }, { "epoch": 0.19711193165975208, "grad_norm": 0.6601712107658386, "learning_rate": 2.8626979077292856e-05, "loss": 0.042082592844963074, "step": 1458 }, { "epoch": 0.19724712502851735, "grad_norm": 0.5785070061683655, "learning_rate": 2.862411695911627e-05, "loss": 0.06087595224380493, "step": 1459 }, { "epoch": 0.19738231839728262, "grad_norm": 0.5136145353317261, "learning_rate": 2.862125200432588e-05, "loss": 0.06294526904821396, "step": 1460 }, { "epoch": 0.19751751176604787, "grad_norm": 0.856765866279602, "learning_rate": 2.8618384213518188e-05, "loss": 0.04286312311887741, "step": 1461 }, { "epoch": 0.19765270513481314, "grad_norm": 0.7110387682914734, "learning_rate": 2.861551358729028e-05, "loss": 0.07258517295122147, "step": 1462 }, { "epoch": 0.1977878985035784, "grad_norm": 1.8345304727554321, "learning_rate": 2.8612640126239836e-05, "loss": 0.07168304920196533, "step": 1463 }, { "epoch": 0.19792309187234366, "grad_norm": 0.32499468326568604, "learning_rate": 2.8609763830965126e-05, "loss": 0.032810524106025696, "step": 1464 }, { "epoch": 0.1980582852411089, "grad_norm": 0.4453463554382324, "learning_rate": 2.860688470206501e-05, "loss": 0.07522565126419067, "step": 1465 }, { "epoch": 0.19819347860987419, "grad_norm": 0.577467143535614, "learning_rate": 2.8604002740138936e-05, "loss": 0.06885667890310287, "step": 1466 }, { "epoch": 0.19832867197863946, "grad_norm": 0.6764350533485413, "learning_rate": 2.860111794578695e-05, "loss": 0.05699273943901062, "step": 1467 }, { "epoch": 0.1984638653474047, "grad_norm": 1.195464849472046, "learning_rate": 2.8598230319609677e-05, "loss": 0.055881090462207794, "step": 1468 }, { "epoch": 0.19859905871616998, "grad_norm": 0.4497874975204468, "learning_rate": 2.8595339862208336e-05, "loss": 0.06361427903175354, "step": 1469 }, { "epoch": 0.19873425208493523, "grad_norm": 0.5617062449455261, "learning_rate": 2.8592446574184733e-05, "loss": 0.08015744388103485, "step": 1470 }, { "epoch": 0.1988694454537005, "grad_norm": 0.571229100227356, "learning_rate": 2.8589550456141274e-05, "loss": 0.08003470301628113, "step": 1471 }, { "epoch": 0.19900463882246575, "grad_norm": 0.84321129322052, "learning_rate": 2.8586651508680942e-05, "loss": 0.04576558619737625, "step": 1472 }, { "epoch": 0.19913983219123102, "grad_norm": 0.3449549674987793, "learning_rate": 2.8583749732407312e-05, "loss": 0.038829684257507324, "step": 1473 }, { "epoch": 0.1992750255599963, "grad_norm": 0.6807087659835815, "learning_rate": 2.8580845127924546e-05, "loss": 0.04984404146671295, "step": 1474 }, { "epoch": 0.19941021892876154, "grad_norm": 0.3755718171596527, "learning_rate": 2.8577937695837408e-05, "loss": 0.048488616943359375, "step": 1475 }, { "epoch": 0.19954541229752681, "grad_norm": 1.0324113368988037, "learning_rate": 2.8575027436751235e-05, "loss": 0.05285117030143738, "step": 1476 }, { "epoch": 0.19968060566629206, "grad_norm": 1.789212703704834, "learning_rate": 2.8572114351271955e-05, "loss": 0.0629136934876442, "step": 1477 }, { "epoch": 0.19981579903505733, "grad_norm": 0.4355745315551758, "learning_rate": 2.85691984400061e-05, "loss": 0.06396521627902985, "step": 1478 }, { "epoch": 0.19995099240382258, "grad_norm": 1.519565463066101, "learning_rate": 2.8566279703560762e-05, "loss": 0.05704353377223015, "step": 1479 }, { "epoch": 0.20008618577258785, "grad_norm": 0.5814024806022644, "learning_rate": 2.8563358142543648e-05, "loss": 0.0474286787211895, "step": 1480 }, { "epoch": 0.20022137914135313, "grad_norm": 3.2591962814331055, "learning_rate": 2.856043375756304e-05, "loss": 0.07081346213817596, "step": 1481 }, { "epoch": 0.20035657251011837, "grad_norm": 1.7282776832580566, "learning_rate": 2.855750654922781e-05, "loss": 0.05421200394630432, "step": 1482 }, { "epoch": 0.20049176587888365, "grad_norm": 2.3187155723571777, "learning_rate": 2.855457651814742e-05, "loss": 0.04969785362482071, "step": 1483 }, { "epoch": 0.2006269592476489, "grad_norm": 1.7427756786346436, "learning_rate": 2.8551643664931916e-05, "loss": 0.052194394171237946, "step": 1484 }, { "epoch": 0.20076215261641417, "grad_norm": 0.4092828631401062, "learning_rate": 2.8548707990191933e-05, "loss": 0.05346745252609253, "step": 1485 }, { "epoch": 0.20089734598517942, "grad_norm": 0.6095959544181824, "learning_rate": 2.8545769494538698e-05, "loss": 0.060761407017707825, "step": 1486 }, { "epoch": 0.2010325393539447, "grad_norm": 2.5191256999969482, "learning_rate": 2.854282817858402e-05, "loss": 0.06619636714458466, "step": 1487 }, { "epoch": 0.20116773272270996, "grad_norm": 1.464105248451233, "learning_rate": 2.85398840429403e-05, "loss": 0.05017085000872612, "step": 1488 }, { "epoch": 0.2013029260914752, "grad_norm": 1.7387162446975708, "learning_rate": 2.853693708822051e-05, "loss": 0.0615549311041832, "step": 1489 }, { "epoch": 0.20143811946024048, "grad_norm": 1.7491748332977295, "learning_rate": 2.8533987315038234e-05, "loss": 0.08808696269989014, "step": 1490 }, { "epoch": 0.20157331282900573, "grad_norm": 0.8185397386550903, "learning_rate": 2.8531034724007627e-05, "loss": 0.05078073590993881, "step": 1491 }, { "epoch": 0.201708506197771, "grad_norm": 1.4671510457992554, "learning_rate": 2.8528079315743435e-05, "loss": 0.054104551672935486, "step": 1492 }, { "epoch": 0.20184369956653625, "grad_norm": 0.38106274604797363, "learning_rate": 2.852512109086099e-05, "loss": 0.05543220043182373, "step": 1493 }, { "epoch": 0.20197889293530152, "grad_norm": 1.7341113090515137, "learning_rate": 2.8522160049976208e-05, "loss": 0.074765145778656, "step": 1494 }, { "epoch": 0.2021140863040668, "grad_norm": 1.8391804695129395, "learning_rate": 2.8519196193705595e-05, "loss": 0.05571398138999939, "step": 1495 }, { "epoch": 0.20224927967283204, "grad_norm": 3.4821393489837646, "learning_rate": 2.8516229522666243e-05, "loss": 0.07515992224216461, "step": 1496 }, { "epoch": 0.20238447304159732, "grad_norm": 3.051548480987549, "learning_rate": 2.8513260037475825e-05, "loss": 0.08510702848434448, "step": 1497 }, { "epoch": 0.20251966641036256, "grad_norm": 1.314646601676941, "learning_rate": 2.8510287738752604e-05, "loss": 0.0674319863319397, "step": 1498 }, { "epoch": 0.20265485977912784, "grad_norm": 1.2149888277053833, "learning_rate": 2.8507312627115435e-05, "loss": 0.06909724324941635, "step": 1499 }, { "epoch": 0.20279005314789308, "grad_norm": 1.0091185569763184, "learning_rate": 2.850433470318374e-05, "loss": 0.059906914830207825, "step": 1500 }, { "epoch": 0.20292524651665836, "grad_norm": 0.4593426287174225, "learning_rate": 2.8501353967577556e-05, "loss": 0.05634528398513794, "step": 1501 }, { "epoch": 0.20306043988542363, "grad_norm": 0.7114489674568176, "learning_rate": 2.8498370420917468e-05, "loss": 0.05177636444568634, "step": 1502 }, { "epoch": 0.20319563325418888, "grad_norm": 0.8836167454719543, "learning_rate": 2.8495384063824683e-05, "loss": 0.04526149481534958, "step": 1503 }, { "epoch": 0.20333082662295415, "grad_norm": 0.6767558455467224, "learning_rate": 2.8492394896920964e-05, "loss": 0.04810865968465805, "step": 1504 }, { "epoch": 0.2034660199917194, "grad_norm": 0.8853635787963867, "learning_rate": 2.848940292082868e-05, "loss": 0.05030532553792, "step": 1505 }, { "epoch": 0.20360121336048467, "grad_norm": 1.0097651481628418, "learning_rate": 2.8486408136170772e-05, "loss": 0.05914625525474548, "step": 1506 }, { "epoch": 0.20373640672924992, "grad_norm": 0.9485298991203308, "learning_rate": 2.8483410543570776e-05, "loss": 0.053879305720329285, "step": 1507 }, { "epoch": 0.2038716000980152, "grad_norm": 0.3891317546367645, "learning_rate": 2.8480410143652803e-05, "loss": 0.03825295343995094, "step": 1508 }, { "epoch": 0.20400679346678047, "grad_norm": 0.304548978805542, "learning_rate": 2.8477406937041547e-05, "loss": 0.058615073561668396, "step": 1509 }, { "epoch": 0.2041419868355457, "grad_norm": 0.4876135289669037, "learning_rate": 2.8474400924362298e-05, "loss": 0.0701942965388298, "step": 1510 }, { "epoch": 0.204277180204311, "grad_norm": 0.617000162601471, "learning_rate": 2.847139210624092e-05, "loss": 0.07015332579612732, "step": 1511 }, { "epoch": 0.20441237357307623, "grad_norm": 0.3961528539657593, "learning_rate": 2.8468380483303873e-05, "loss": 0.06465774774551392, "step": 1512 }, { "epoch": 0.2045475669418415, "grad_norm": 1.253760814666748, "learning_rate": 2.8465366056178183e-05, "loss": 0.049995020031929016, "step": 1513 }, { "epoch": 0.20468276031060675, "grad_norm": 0.6975782513618469, "learning_rate": 2.8462348825491475e-05, "loss": 0.06051470339298248, "step": 1514 }, { "epoch": 0.20481795367937203, "grad_norm": 1.233405590057373, "learning_rate": 2.8459328791871953e-05, "loss": 0.06607866287231445, "step": 1515 }, { "epoch": 0.2049531470481373, "grad_norm": 2.070460557937622, "learning_rate": 2.8456305955948402e-05, "loss": 0.07863160967826843, "step": 1516 }, { "epoch": 0.20508834041690255, "grad_norm": 0.9268529415130615, "learning_rate": 2.845328031835019e-05, "loss": 0.07612798362970352, "step": 1517 }, { "epoch": 0.20522353378566782, "grad_norm": 0.5493766665458679, "learning_rate": 2.8450251879707277e-05, "loss": 0.05937439203262329, "step": 1518 }, { "epoch": 0.20535872715443307, "grad_norm": 0.817802906036377, "learning_rate": 2.8447220640650194e-05, "loss": 0.043556079268455505, "step": 1519 }, { "epoch": 0.20549392052319834, "grad_norm": 0.5602957010269165, "learning_rate": 2.8444186601810068e-05, "loss": 0.05191570520401001, "step": 1520 }, { "epoch": 0.2056291138919636, "grad_norm": 1.5219169855117798, "learning_rate": 2.84411497638186e-05, "loss": 0.062377363443374634, "step": 1521 }, { "epoch": 0.20576430726072886, "grad_norm": 0.41533032059669495, "learning_rate": 2.843811012730807e-05, "loss": 0.06018230319023132, "step": 1522 }, { "epoch": 0.20589950062949414, "grad_norm": 0.42811867594718933, "learning_rate": 2.8435067692911353e-05, "loss": 0.06564238667488098, "step": 1523 }, { "epoch": 0.20603469399825938, "grad_norm": 0.4506815969944, "learning_rate": 2.8432022461261897e-05, "loss": 0.05740030109882355, "step": 1524 }, { "epoch": 0.20616988736702466, "grad_norm": 0.7717320919036865, "learning_rate": 2.8428974432993736e-05, "loss": 0.07080622762441635, "step": 1525 }, { "epoch": 0.2063050807357899, "grad_norm": 0.30265215039253235, "learning_rate": 2.8425923608741486e-05, "loss": 0.06177237629890442, "step": 1526 }, { "epoch": 0.20644027410455518, "grad_norm": 0.9989373683929443, "learning_rate": 2.8422869989140343e-05, "loss": 0.08423186093568802, "step": 1527 }, { "epoch": 0.20657546747332042, "grad_norm": 0.8502436876296997, "learning_rate": 2.8419813574826093e-05, "loss": 0.06335252523422241, "step": 1528 }, { "epoch": 0.2067106608420857, "grad_norm": 1.1274170875549316, "learning_rate": 2.8416754366435092e-05, "loss": 0.05677357316017151, "step": 1529 }, { "epoch": 0.20684585421085097, "grad_norm": 0.5897433161735535, "learning_rate": 2.8413692364604285e-05, "loss": 0.05167311057448387, "step": 1530 }, { "epoch": 0.20698104757961622, "grad_norm": 0.7777832746505737, "learning_rate": 2.8410627569971197e-05, "loss": 0.054501548409461975, "step": 1531 }, { "epoch": 0.2071162409483815, "grad_norm": 0.8203860521316528, "learning_rate": 2.8407559983173934e-05, "loss": 0.0593453012406826, "step": 1532 }, { "epoch": 0.20725143431714674, "grad_norm": 0.7851532101631165, "learning_rate": 2.8404489604851186e-05, "loss": 0.07835626602172852, "step": 1533 }, { "epoch": 0.207386627685912, "grad_norm": 0.6273208856582642, "learning_rate": 2.840141643564222e-05, "loss": 0.06668758392333984, "step": 1534 }, { "epoch": 0.20752182105467726, "grad_norm": 0.8988673090934753, "learning_rate": 2.8398340476186885e-05, "loss": 0.0568954274058342, "step": 1535 }, { "epoch": 0.20765701442344253, "grad_norm": 0.3905712962150574, "learning_rate": 2.8395261727125617e-05, "loss": 0.04055122286081314, "step": 1536 }, { "epoch": 0.2077922077922078, "grad_norm": 1.662516474723816, "learning_rate": 2.8392180189099425e-05, "loss": 0.04748539626598358, "step": 1537 }, { "epoch": 0.20792740116097305, "grad_norm": 1.5907397270202637, "learning_rate": 2.83890958627499e-05, "loss": 0.06497156620025635, "step": 1538 }, { "epoch": 0.20806259452973833, "grad_norm": 1.8041553497314453, "learning_rate": 2.8386008748719216e-05, "loss": 0.06606616824865341, "step": 1539 }, { "epoch": 0.20819778789850357, "grad_norm": 0.3390914499759674, "learning_rate": 2.838291884765013e-05, "loss": 0.056449294090270996, "step": 1540 }, { "epoch": 0.20833298126726885, "grad_norm": 0.8439186215400696, "learning_rate": 2.8379826160185975e-05, "loss": 0.053850337862968445, "step": 1541 }, { "epoch": 0.2084681746360341, "grad_norm": 0.4911138713359833, "learning_rate": 2.8376730686970664e-05, "loss": 0.0673052966594696, "step": 1542 }, { "epoch": 0.20860336800479937, "grad_norm": 1.1656914949417114, "learning_rate": 2.8373632428648683e-05, "loss": 0.0522618293762207, "step": 1543 }, { "epoch": 0.2087385613735646, "grad_norm": 0.8624969720840454, "learning_rate": 2.8370531385865124e-05, "loss": 0.07638722658157349, "step": 1544 }, { "epoch": 0.2088737547423299, "grad_norm": 0.8319230675697327, "learning_rate": 2.8367427559265622e-05, "loss": 0.0550997257232666, "step": 1545 }, { "epoch": 0.20900894811109516, "grad_norm": 0.6036023497581482, "learning_rate": 2.836432094949642e-05, "loss": 0.051479071378707886, "step": 1546 }, { "epoch": 0.2091441414798604, "grad_norm": 0.4117427468299866, "learning_rate": 2.836121155720433e-05, "loss": 0.0465034544467926, "step": 1547 }, { "epoch": 0.20927933484862568, "grad_norm": 0.3497418761253357, "learning_rate": 2.8358099383036745e-05, "loss": 0.052825331687927246, "step": 1548 }, { "epoch": 0.20941452821739093, "grad_norm": 0.4250861704349518, "learning_rate": 2.8354984427641634e-05, "loss": 0.04520291090011597, "step": 1549 }, { "epoch": 0.2095497215861562, "grad_norm": 0.9436564445495605, "learning_rate": 2.8351866691667544e-05, "loss": 0.061111681163311005, "step": 1550 }, { "epoch": 0.20968491495492145, "grad_norm": 0.8618022799491882, "learning_rate": 2.8348746175763613e-05, "loss": 0.0505509227514267, "step": 1551 }, { "epoch": 0.20982010832368672, "grad_norm": 0.30750373005867004, "learning_rate": 2.8345622880579537e-05, "loss": 0.048603713512420654, "step": 1552 }, { "epoch": 0.209955301692452, "grad_norm": 0.40893998742103577, "learning_rate": 2.8342496806765615e-05, "loss": 0.049013957381248474, "step": 1553 }, { "epoch": 0.21009049506121724, "grad_norm": 0.5988844633102417, "learning_rate": 2.833936795497271e-05, "loss": 0.045951008796691895, "step": 1554 }, { "epoch": 0.21022568842998252, "grad_norm": 0.9709116220474243, "learning_rate": 2.8336236325852257e-05, "loss": 0.05929470434784889, "step": 1555 }, { "epoch": 0.21036088179874776, "grad_norm": 0.5662141442298889, "learning_rate": 2.8333101920056285e-05, "loss": 0.07690741866827011, "step": 1556 }, { "epoch": 0.21049607516751304, "grad_norm": 0.5925481915473938, "learning_rate": 2.8329964738237392e-05, "loss": 0.04631420969963074, "step": 1557 }, { "epoch": 0.21063126853627828, "grad_norm": 0.6251190900802612, "learning_rate": 2.8326824781048756e-05, "loss": 0.052150797098875046, "step": 1558 }, { "epoch": 0.21076646190504356, "grad_norm": 0.700613796710968, "learning_rate": 2.8323682049144135e-05, "loss": 0.0730980932712555, "step": 1559 }, { "epoch": 0.21090165527380883, "grad_norm": 0.8474962115287781, "learning_rate": 2.832053654317786e-05, "loss": 0.05908976495265961, "step": 1560 }, { "epoch": 0.21103684864257408, "grad_norm": 1.0694419145584106, "learning_rate": 2.8317388263804842e-05, "loss": 0.050022710114717484, "step": 1561 }, { "epoch": 0.21117204201133935, "grad_norm": 1.1018961668014526, "learning_rate": 2.8314237211680573e-05, "loss": 0.06538151204586029, "step": 1562 }, { "epoch": 0.2113072353801046, "grad_norm": 1.3340040445327759, "learning_rate": 2.8311083387461118e-05, "loss": 0.06075182557106018, "step": 1563 }, { "epoch": 0.21144242874886987, "grad_norm": 0.39237844944000244, "learning_rate": 2.8307926791803114e-05, "loss": 0.04414108395576477, "step": 1564 }, { "epoch": 0.21157762211763512, "grad_norm": 0.8417506217956543, "learning_rate": 2.8304767425363785e-05, "loss": 0.07103466987609863, "step": 1565 }, { "epoch": 0.2117128154864004, "grad_norm": 0.7777599692344666, "learning_rate": 2.830160528880093e-05, "loss": 0.05266956984996796, "step": 1566 }, { "epoch": 0.21184800885516566, "grad_norm": 1.6419627666473389, "learning_rate": 2.829844038277292e-05, "loss": 0.055458784103393555, "step": 1567 }, { "epoch": 0.2119832022239309, "grad_norm": 1.389513611793518, "learning_rate": 2.8295272707938706e-05, "loss": 0.07247611880302429, "step": 1568 }, { "epoch": 0.21211839559269619, "grad_norm": 0.29261651635169983, "learning_rate": 2.8292102264957817e-05, "loss": 0.051503024995326996, "step": 1569 }, { "epoch": 0.21225358896146143, "grad_norm": 1.0491795539855957, "learning_rate": 2.8288929054490357e-05, "loss": 0.055749163031578064, "step": 1570 }, { "epoch": 0.2123887823302267, "grad_norm": 0.7268002033233643, "learning_rate": 2.8285753077196998e-05, "loss": 0.05362197756767273, "step": 1571 }, { "epoch": 0.21252397569899195, "grad_norm": 0.41287580132484436, "learning_rate": 2.8282574333739006e-05, "loss": 0.05210597813129425, "step": 1572 }, { "epoch": 0.21265916906775723, "grad_norm": 0.5221613645553589, "learning_rate": 2.8279392824778197e-05, "loss": 0.06534935534000397, "step": 1573 }, { "epoch": 0.2127943624365225, "grad_norm": 0.9850326180458069, "learning_rate": 2.8276208550976993e-05, "loss": 0.04884040355682373, "step": 1574 }, { "epoch": 0.21292955580528775, "grad_norm": 0.43634629249572754, "learning_rate": 2.8273021512998372e-05, "loss": 0.047863513231277466, "step": 1575 }, { "epoch": 0.21306474917405302, "grad_norm": 0.9789410829544067, "learning_rate": 2.826983171150589e-05, "loss": 0.08013477921485901, "step": 1576 }, { "epoch": 0.21319994254281827, "grad_norm": 0.37938928604125977, "learning_rate": 2.826663914716368e-05, "loss": 0.06604525446891785, "step": 1577 }, { "epoch": 0.21333513591158354, "grad_norm": 0.5171474814414978, "learning_rate": 2.826344382063646e-05, "loss": 0.053354568779468536, "step": 1578 }, { "epoch": 0.2134703292803488, "grad_norm": 0.9365546703338623, "learning_rate": 2.8260245732589503e-05, "loss": 0.06443160772323608, "step": 1579 }, { "epoch": 0.21360552264911406, "grad_norm": 0.5557537078857422, "learning_rate": 2.8257044883688672e-05, "loss": 0.06397908180952072, "step": 1580 }, { "epoch": 0.21374071601787933, "grad_norm": 0.6537729501724243, "learning_rate": 2.82538412746004e-05, "loss": 0.07449346035718918, "step": 1581 }, { "epoch": 0.21387590938664458, "grad_norm": 1.3514331579208374, "learning_rate": 2.8250634905991695e-05, "loss": 0.06255114078521729, "step": 1582 }, { "epoch": 0.21401110275540985, "grad_norm": 0.7896385192871094, "learning_rate": 2.824742577853015e-05, "loss": 0.05204898118972778, "step": 1583 }, { "epoch": 0.2141462961241751, "grad_norm": 0.33208876848220825, "learning_rate": 2.8244213892883907e-05, "loss": 0.04208874702453613, "step": 1584 }, { "epoch": 0.21428148949294037, "grad_norm": 1.2684580087661743, "learning_rate": 2.82409992497217e-05, "loss": 0.06291976571083069, "step": 1585 }, { "epoch": 0.21441668286170562, "grad_norm": 0.4156642556190491, "learning_rate": 2.8237781849712852e-05, "loss": 0.043654412031173706, "step": 1586 }, { "epoch": 0.2145518762304709, "grad_norm": 1.0025794506072998, "learning_rate": 2.8234561693527222e-05, "loss": 0.06891020387411118, "step": 1587 }, { "epoch": 0.21468706959923617, "grad_norm": 0.40402752161026, "learning_rate": 2.8231338781835275e-05, "loss": 0.05821501463651657, "step": 1588 }, { "epoch": 0.21482226296800142, "grad_norm": 1.179532766342163, "learning_rate": 2.8228113115308032e-05, "loss": 0.04632169008255005, "step": 1589 }, { "epoch": 0.2149574563367667, "grad_norm": 0.5531177520751953, "learning_rate": 2.82248846946171e-05, "loss": 0.050778210163116455, "step": 1590 }, { "epoch": 0.21509264970553194, "grad_norm": 0.7919912338256836, "learning_rate": 2.822165352043465e-05, "loss": 0.051515668630599976, "step": 1591 }, { "epoch": 0.2152278430742972, "grad_norm": 0.30874982476234436, "learning_rate": 2.8218419593433437e-05, "loss": 0.05695490539073944, "step": 1592 }, { "epoch": 0.21536303644306246, "grad_norm": 0.47324231266975403, "learning_rate": 2.8215182914286768e-05, "loss": 0.051553875207901, "step": 1593 }, { "epoch": 0.21549822981182773, "grad_norm": 0.4900946617126465, "learning_rate": 2.8211943483668546e-05, "loss": 0.07251714169979095, "step": 1594 }, { "epoch": 0.215633423180593, "grad_norm": 1.1118718385696411, "learning_rate": 2.8208701302253237e-05, "loss": 0.07170483469963074, "step": 1595 }, { "epoch": 0.21576861654935825, "grad_norm": 1.024431824684143, "learning_rate": 2.820545637071588e-05, "loss": 0.040563106536865234, "step": 1596 }, { "epoch": 0.21590380991812352, "grad_norm": 1.1180799007415771, "learning_rate": 2.8202208689732083e-05, "loss": 0.05583508312702179, "step": 1597 }, { "epoch": 0.21603900328688877, "grad_norm": 1.9988240003585815, "learning_rate": 2.819895825997804e-05, "loss": 0.05353221297264099, "step": 1598 }, { "epoch": 0.21617419665565404, "grad_norm": 1.0622143745422363, "learning_rate": 2.81957050821305e-05, "loss": 0.05039858818054199, "step": 1599 }, { "epoch": 0.2163093900244193, "grad_norm": 1.1034172773361206, "learning_rate": 2.8192449156866787e-05, "loss": 0.04590676724910736, "step": 1600 }, { "epoch": 0.21644458339318456, "grad_norm": 0.7669665217399597, "learning_rate": 2.8189190484864814e-05, "loss": 0.05868140608072281, "step": 1601 }, { "epoch": 0.21657977676194984, "grad_norm": 0.396586537361145, "learning_rate": 2.8185929066803052e-05, "loss": 0.0575755313038826, "step": 1602 }, { "epoch": 0.21671497013071508, "grad_norm": 1.191956877708435, "learning_rate": 2.818266490336054e-05, "loss": 0.036856234073638916, "step": 1603 }, { "epoch": 0.21685016349948036, "grad_norm": 1.4901243448257446, "learning_rate": 2.817939799521689e-05, "loss": 0.0770600438117981, "step": 1604 }, { "epoch": 0.2169853568682456, "grad_norm": 2.492192268371582, "learning_rate": 2.8176128343052304e-05, "loss": 0.06851261854171753, "step": 1605 }, { "epoch": 0.21712055023701088, "grad_norm": 0.568828284740448, "learning_rate": 2.817285594754753e-05, "loss": 0.04781460762023926, "step": 1606 }, { "epoch": 0.21725574360577612, "grad_norm": 0.9603391885757446, "learning_rate": 2.8169580809383902e-05, "loss": 0.05101378262042999, "step": 1607 }, { "epoch": 0.2173909369745414, "grad_norm": 0.4263308644294739, "learning_rate": 2.8166302929243326e-05, "loss": 0.052577219903469086, "step": 1608 }, { "epoch": 0.21752613034330667, "grad_norm": 1.386176347732544, "learning_rate": 2.8163022307808264e-05, "loss": 0.06691841036081314, "step": 1609 }, { "epoch": 0.21766132371207192, "grad_norm": 0.5585028529167175, "learning_rate": 2.8159738945761764e-05, "loss": 0.04208102077245712, "step": 1610 }, { "epoch": 0.2177965170808372, "grad_norm": 0.48720434308052063, "learning_rate": 2.8156452843787438e-05, "loss": 0.05007272958755493, "step": 1611 }, { "epoch": 0.21793171044960244, "grad_norm": 0.8018759489059448, "learning_rate": 2.815316400256947e-05, "loss": 0.07517276704311371, "step": 1612 }, { "epoch": 0.2180669038183677, "grad_norm": 0.680761992931366, "learning_rate": 2.814987242279262e-05, "loss": 0.06988190114498138, "step": 1613 }, { "epoch": 0.21820209718713296, "grad_norm": 0.3617783188819885, "learning_rate": 2.8146578105142202e-05, "loss": 0.0472244918346405, "step": 1614 }, { "epoch": 0.21833729055589823, "grad_norm": 1.3154038190841675, "learning_rate": 2.814328105030412e-05, "loss": 0.05513004958629608, "step": 1615 }, { "epoch": 0.2184724839246635, "grad_norm": 1.6053988933563232, "learning_rate": 2.8139981258964836e-05, "loss": 0.07096618413925171, "step": 1616 }, { "epoch": 0.21860767729342875, "grad_norm": 0.7854763269424438, "learning_rate": 2.8136678731811385e-05, "loss": 0.06544134020805359, "step": 1617 }, { "epoch": 0.21874287066219403, "grad_norm": 1.5256870985031128, "learning_rate": 2.8133373469531362e-05, "loss": 0.0759010910987854, "step": 1618 }, { "epoch": 0.21887806403095927, "grad_norm": 1.4773973226547241, "learning_rate": 2.8130065472812952e-05, "loss": 0.05293330177664757, "step": 1619 }, { "epoch": 0.21901325739972455, "grad_norm": 0.6383475065231323, "learning_rate": 2.812675474234489e-05, "loss": 0.06383223831653595, "step": 1620 }, { "epoch": 0.2191484507684898, "grad_norm": 0.7026002407073975, "learning_rate": 2.812344127881649e-05, "loss": 0.055020660161972046, "step": 1621 }, { "epoch": 0.21928364413725507, "grad_norm": 1.5413810014724731, "learning_rate": 2.8120125082917638e-05, "loss": 0.051977306604385376, "step": 1622 }, { "epoch": 0.21941883750602034, "grad_norm": 0.6406027674674988, "learning_rate": 2.8116806155338773e-05, "loss": 0.07189971208572388, "step": 1623 }, { "epoch": 0.2195540308747856, "grad_norm": 1.2294435501098633, "learning_rate": 2.8113484496770923e-05, "loss": 0.06025470793247223, "step": 1624 }, { "epoch": 0.21968922424355086, "grad_norm": 0.6029508709907532, "learning_rate": 2.811016010790567e-05, "loss": 0.05814117193222046, "step": 1625 }, { "epoch": 0.2198244176123161, "grad_norm": 1.443228006362915, "learning_rate": 2.8106832989435165e-05, "loss": 0.07699942588806152, "step": 1626 }, { "epoch": 0.21995961098108138, "grad_norm": 0.37957221269607544, "learning_rate": 2.8103503142052146e-05, "loss": 0.05255141854286194, "step": 1627 }, { "epoch": 0.22009480434984663, "grad_norm": 0.9441279768943787, "learning_rate": 2.8100170566449892e-05, "loss": 0.04794152081012726, "step": 1628 }, { "epoch": 0.2202299977186119, "grad_norm": 0.3265136182308197, "learning_rate": 2.8096835263322266e-05, "loss": 0.0478270947933197, "step": 1629 }, { "epoch": 0.22036519108737718, "grad_norm": 0.4508991241455078, "learning_rate": 2.8093497233363702e-05, "loss": 0.046484291553497314, "step": 1630 }, { "epoch": 0.22050038445614242, "grad_norm": 0.9232129454612732, "learning_rate": 2.8090156477269185e-05, "loss": 0.054828643798828125, "step": 1631 }, { "epoch": 0.2206355778249077, "grad_norm": 0.8288602828979492, "learning_rate": 2.808681299573429e-05, "loss": 0.04206643998622894, "step": 1632 }, { "epoch": 0.22077077119367294, "grad_norm": 1.2797791957855225, "learning_rate": 2.8083466789455137e-05, "loss": 0.08406156301498413, "step": 1633 }, { "epoch": 0.22090596456243822, "grad_norm": 0.3598003685474396, "learning_rate": 2.808011785912843e-05, "loss": 0.04149273782968521, "step": 1634 }, { "epoch": 0.22104115793120346, "grad_norm": 0.6481890678405762, "learning_rate": 2.8076766205451435e-05, "loss": 0.05434077978134155, "step": 1635 }, { "epoch": 0.22117635129996874, "grad_norm": 1.588051676750183, "learning_rate": 2.8073411829121983e-05, "loss": 0.05897385999560356, "step": 1636 }, { "epoch": 0.221311544668734, "grad_norm": 0.5865188837051392, "learning_rate": 2.8070054730838467e-05, "loss": 0.05248776078224182, "step": 1637 }, { "epoch": 0.22144673803749926, "grad_norm": 1.468625545501709, "learning_rate": 2.8066694911299865e-05, "loss": 0.07838153839111328, "step": 1638 }, { "epoch": 0.22158193140626453, "grad_norm": 1.1525522470474243, "learning_rate": 2.8063332371205698e-05, "loss": 0.05041515827178955, "step": 1639 }, { "epoch": 0.22171712477502978, "grad_norm": 1.5627365112304688, "learning_rate": 2.8059967111256072e-05, "loss": 0.06059056520462036, "step": 1640 }, { "epoch": 0.22185231814379505, "grad_norm": 0.6790928244590759, "learning_rate": 2.8056599132151647e-05, "loss": 0.06592552363872528, "step": 1641 }, { "epoch": 0.2219875115125603, "grad_norm": 0.33519986271858215, "learning_rate": 2.8053228434593656e-05, "loss": 0.06335578113794327, "step": 1642 }, { "epoch": 0.22212270488132557, "grad_norm": 0.39631593227386475, "learning_rate": 2.8049855019283895e-05, "loss": 0.052461713552474976, "step": 1643 }, { "epoch": 0.22225789825009085, "grad_norm": 0.6668994426727295, "learning_rate": 2.8046478886924736e-05, "loss": 0.07242193818092346, "step": 1644 }, { "epoch": 0.2223930916188561, "grad_norm": 2.2576355934143066, "learning_rate": 2.804310003821909e-05, "loss": 0.0729055404663086, "step": 1645 }, { "epoch": 0.22252828498762137, "grad_norm": 0.3832198679447174, "learning_rate": 2.8039718473870473e-05, "loss": 0.05689685046672821, "step": 1646 }, { "epoch": 0.2226634783563866, "grad_norm": 0.8421013951301575, "learning_rate": 2.8036334194582924e-05, "loss": 0.06981846690177917, "step": 1647 }, { "epoch": 0.2227986717251519, "grad_norm": 0.9113370180130005, "learning_rate": 2.8032947201061084e-05, "loss": 0.05107051134109497, "step": 1648 }, { "epoch": 0.22293386509391713, "grad_norm": 0.7018494009971619, "learning_rate": 2.8029557494010132e-05, "loss": 0.07083918154239655, "step": 1649 }, { "epoch": 0.2230690584626824, "grad_norm": 1.2353463172912598, "learning_rate": 2.802616507413583e-05, "loss": 0.057971615344285965, "step": 1650 }, { "epoch": 0.22320425183144768, "grad_norm": 0.6947340369224548, "learning_rate": 2.8022769942144492e-05, "loss": 0.059219732880592346, "step": 1651 }, { "epoch": 0.22333944520021293, "grad_norm": 0.6718868613243103, "learning_rate": 2.801937209874301e-05, "loss": 0.053752824664115906, "step": 1652 }, { "epoch": 0.2234746385689782, "grad_norm": 1.9309371709823608, "learning_rate": 2.8015971544638832e-05, "loss": 0.08076757192611694, "step": 1653 }, { "epoch": 0.22360983193774345, "grad_norm": 1.4071406126022339, "learning_rate": 2.8012568280539964e-05, "loss": 0.06257554888725281, "step": 1654 }, { "epoch": 0.22374502530650872, "grad_norm": 0.9768237471580505, "learning_rate": 2.800916230715499e-05, "loss": 0.06816723942756653, "step": 1655 }, { "epoch": 0.22388021867527397, "grad_norm": 1.7064036130905151, "learning_rate": 2.800575362519305e-05, "loss": 0.06085750460624695, "step": 1656 }, { "epoch": 0.22401541204403924, "grad_norm": 1.0066249370574951, "learning_rate": 2.800234223536385e-05, "loss": 0.06367231905460358, "step": 1657 }, { "epoch": 0.22415060541280452, "grad_norm": 0.6331930160522461, "learning_rate": 2.799892813837766e-05, "loss": 0.052688345313072205, "step": 1658 }, { "epoch": 0.22428579878156976, "grad_norm": 0.5558263063430786, "learning_rate": 2.7995511334945315e-05, "loss": 0.040113601833581924, "step": 1659 }, { "epoch": 0.22442099215033504, "grad_norm": 0.5435409545898438, "learning_rate": 2.7992091825778202e-05, "loss": 0.05569447576999664, "step": 1660 }, { "epoch": 0.22455618551910028, "grad_norm": 1.3237699270248413, "learning_rate": 2.7988669611588295e-05, "loss": 0.06360754370689392, "step": 1661 }, { "epoch": 0.22469137888786556, "grad_norm": 0.49552586674690247, "learning_rate": 2.7985244693088112e-05, "loss": 0.06739620119333267, "step": 1662 }, { "epoch": 0.2248265722566308, "grad_norm": 0.3336208164691925, "learning_rate": 2.7981817070990736e-05, "loss": 0.057138100266456604, "step": 1663 }, { "epoch": 0.22496176562539608, "grad_norm": 0.6160723567008972, "learning_rate": 2.7978386746009813e-05, "loss": 0.06904512643814087, "step": 1664 }, { "epoch": 0.22509695899416135, "grad_norm": 0.9872597455978394, "learning_rate": 2.797495371885957e-05, "loss": 0.06188105046749115, "step": 1665 }, { "epoch": 0.2252321523629266, "grad_norm": 0.9342311024665833, "learning_rate": 2.7971517990254768e-05, "loss": 0.07091188430786133, "step": 1666 }, { "epoch": 0.22536734573169187, "grad_norm": 0.7085817456245422, "learning_rate": 2.7968079560910744e-05, "loss": 0.05051851272583008, "step": 1667 }, { "epoch": 0.22550253910045712, "grad_norm": 0.6815599203109741, "learning_rate": 2.7964638431543402e-05, "loss": 0.030333321541547775, "step": 1668 }, { "epoch": 0.2256377324692224, "grad_norm": 1.7448046207427979, "learning_rate": 2.7961194602869208e-05, "loss": 0.05529448390007019, "step": 1669 }, { "epoch": 0.22577292583798764, "grad_norm": 1.0418426990509033, "learning_rate": 2.7957748075605178e-05, "loss": 0.09649249911308289, "step": 1670 }, { "epoch": 0.2259081192067529, "grad_norm": 1.0943551063537598, "learning_rate": 2.7954298850468898e-05, "loss": 0.046424269676208496, "step": 1671 }, { "epoch": 0.22604331257551816, "grad_norm": 1.1015396118164062, "learning_rate": 2.7950846928178517e-05, "loss": 0.05632650852203369, "step": 1672 }, { "epoch": 0.22617850594428343, "grad_norm": 1.5949186086654663, "learning_rate": 2.7947392309452744e-05, "loss": 0.06467847526073456, "step": 1673 }, { "epoch": 0.2263136993130487, "grad_norm": 0.7427306771278381, "learning_rate": 2.7943934995010845e-05, "loss": 0.06725446879863739, "step": 1674 }, { "epoch": 0.22644889268181395, "grad_norm": 0.7063242793083191, "learning_rate": 2.7940474985572657e-05, "loss": 0.06955593824386597, "step": 1675 }, { "epoch": 0.22658408605057923, "grad_norm": 1.926409363746643, "learning_rate": 2.793701228185857e-05, "loss": 0.07470649480819702, "step": 1676 }, { "epoch": 0.22671927941934447, "grad_norm": 1.3241316080093384, "learning_rate": 2.7933546884589536e-05, "loss": 0.067235067486763, "step": 1677 }, { "epoch": 0.22685447278810975, "grad_norm": 1.6580857038497925, "learning_rate": 2.7930078794487077e-05, "loss": 0.05165666341781616, "step": 1678 }, { "epoch": 0.226989666156875, "grad_norm": 0.7732198238372803, "learning_rate": 2.7926608012273253e-05, "loss": 0.07548081874847412, "step": 1679 }, { "epoch": 0.22712485952564027, "grad_norm": 1.19050133228302, "learning_rate": 2.7923134538670715e-05, "loss": 0.09109258651733398, "step": 1680 }, { "epoch": 0.22726005289440554, "grad_norm": 0.34316375851631165, "learning_rate": 2.7919658374402645e-05, "loss": 0.06333279609680176, "step": 1681 }, { "epoch": 0.22739524626317079, "grad_norm": 0.8293101191520691, "learning_rate": 2.7916179520192807e-05, "loss": 0.06527724862098694, "step": 1682 }, { "epoch": 0.22753043963193606, "grad_norm": 1.1587144136428833, "learning_rate": 2.7912697976765516e-05, "loss": 0.07280510663986206, "step": 1683 }, { "epoch": 0.2276656330007013, "grad_norm": 0.510677695274353, "learning_rate": 2.790921374484565e-05, "loss": 0.06580553203821182, "step": 1684 }, { "epoch": 0.22780082636946658, "grad_norm": 0.9677261114120483, "learning_rate": 2.7905726825158637e-05, "loss": 0.05581563711166382, "step": 1685 }, { "epoch": 0.22793601973823183, "grad_norm": 0.2613687515258789, "learning_rate": 2.7902237218430485e-05, "loss": 0.04570366442203522, "step": 1686 }, { "epoch": 0.2280712131069971, "grad_norm": 0.4171440899372101, "learning_rate": 2.7898744925387735e-05, "loss": 0.04238733649253845, "step": 1687 }, { "epoch": 0.22820640647576237, "grad_norm": 0.946252167224884, "learning_rate": 2.7895249946757505e-05, "loss": 0.0751807689666748, "step": 1688 }, { "epoch": 0.22834159984452762, "grad_norm": 0.36497074365615845, "learning_rate": 2.7891752283267474e-05, "loss": 0.052467137575149536, "step": 1689 }, { "epoch": 0.2284767932132929, "grad_norm": 0.43665435910224915, "learning_rate": 2.788825193564587e-05, "loss": 0.05565465986728668, "step": 1690 }, { "epoch": 0.22861198658205814, "grad_norm": 0.5362083315849304, "learning_rate": 2.7884748904621483e-05, "loss": 0.058127254247665405, "step": 1691 }, { "epoch": 0.22874717995082341, "grad_norm": 1.1373850107192993, "learning_rate": 2.7881243190923667e-05, "loss": 0.0649629458785057, "step": 1692 }, { "epoch": 0.22888237331958866, "grad_norm": 0.5474332571029663, "learning_rate": 2.7877734795282326e-05, "loss": 0.04449063912034035, "step": 1693 }, { "epoch": 0.22901756668835394, "grad_norm": 0.5453364849090576, "learning_rate": 2.7874223718427926e-05, "loss": 0.07171127945184708, "step": 1694 }, { "epoch": 0.2291527600571192, "grad_norm": 1.0143977403640747, "learning_rate": 2.78707099610915e-05, "loss": 0.056407347321510315, "step": 1695 }, { "epoch": 0.22928795342588446, "grad_norm": 0.5908769369125366, "learning_rate": 2.7867193524004618e-05, "loss": 0.06064420938491821, "step": 1696 }, { "epoch": 0.22942314679464973, "grad_norm": 0.8280333280563354, "learning_rate": 2.786367440789943e-05, "loss": 0.059423357248306274, "step": 1697 }, { "epoch": 0.22955834016341498, "grad_norm": 0.5103542804718018, "learning_rate": 2.7860152613508634e-05, "loss": 0.07177874445915222, "step": 1698 }, { "epoch": 0.22969353353218025, "grad_norm": 0.542658805847168, "learning_rate": 2.7856628141565484e-05, "loss": 0.0574946254491806, "step": 1699 }, { "epoch": 0.2298287269009455, "grad_norm": 0.5609861612319946, "learning_rate": 2.7853100992803797e-05, "loss": 0.05437430739402771, "step": 1700 }, { "epoch": 0.22996392026971077, "grad_norm": 1.6992119550704956, "learning_rate": 2.7849571167957942e-05, "loss": 0.07060274481773376, "step": 1701 }, { "epoch": 0.23009911363847604, "grad_norm": 0.4380677044391632, "learning_rate": 2.784603866776285e-05, "loss": 0.06999701261520386, "step": 1702 }, { "epoch": 0.2302343070072413, "grad_norm": 1.9180126190185547, "learning_rate": 2.7842503492953996e-05, "loss": 0.06108155846595764, "step": 1703 }, { "epoch": 0.23036950037600656, "grad_norm": 0.9410768151283264, "learning_rate": 2.7838965644267435e-05, "loss": 0.06441255658864975, "step": 1704 }, { "epoch": 0.2305046937447718, "grad_norm": 0.7353812456130981, "learning_rate": 2.7835425122439764e-05, "loss": 0.05866764485836029, "step": 1705 }, { "epoch": 0.23063988711353708, "grad_norm": 0.787632405757904, "learning_rate": 2.7831881928208128e-05, "loss": 0.05790655314922333, "step": 1706 }, { "epoch": 0.23077508048230233, "grad_norm": 0.43534642457962036, "learning_rate": 2.7828336062310252e-05, "loss": 0.056324705481529236, "step": 1707 }, { "epoch": 0.2309102738510676, "grad_norm": 1.1952860355377197, "learning_rate": 2.7824787525484403e-05, "loss": 0.05890044569969177, "step": 1708 }, { "epoch": 0.23104546721983288, "grad_norm": 2.0317039489746094, "learning_rate": 2.7821236318469395e-05, "loss": 0.0760083720088005, "step": 1709 }, { "epoch": 0.23118066058859812, "grad_norm": 1.2457159757614136, "learning_rate": 2.7817682442004615e-05, "loss": 0.07557854056358337, "step": 1710 }, { "epoch": 0.2313158539573634, "grad_norm": 0.46890324354171753, "learning_rate": 2.781412589683e-05, "loss": 0.06290195882320404, "step": 1711 }, { "epoch": 0.23145104732612864, "grad_norm": 0.507620632648468, "learning_rate": 2.781056668368604e-05, "loss": 0.0546550378203392, "step": 1712 }, { "epoch": 0.23158624069489392, "grad_norm": 0.7180529236793518, "learning_rate": 2.780700480331378e-05, "loss": 0.04927615821361542, "step": 1713 }, { "epoch": 0.23172143406365917, "grad_norm": 0.559109091758728, "learning_rate": 2.7803440256454825e-05, "loss": 0.08365112543106079, "step": 1714 }, { "epoch": 0.23185662743242444, "grad_norm": 0.7803539037704468, "learning_rate": 2.7799873043851337e-05, "loss": 0.04947352409362793, "step": 1715 }, { "epoch": 0.2319918208011897, "grad_norm": 1.0731490850448608, "learning_rate": 2.7796303166246016e-05, "loss": 0.06417693197727203, "step": 1716 }, { "epoch": 0.23212701416995496, "grad_norm": 0.3587137460708618, "learning_rate": 2.7792730624382142e-05, "loss": 0.051572009921073914, "step": 1717 }, { "epoch": 0.23226220753872023, "grad_norm": 0.7012569308280945, "learning_rate": 2.778915541900353e-05, "loss": 0.052128903567790985, "step": 1718 }, { "epoch": 0.23239740090748548, "grad_norm": 1.057073712348938, "learning_rate": 2.7785577550854566e-05, "loss": 0.053997982293367386, "step": 1719 }, { "epoch": 0.23253259427625075, "grad_norm": 0.7684621810913086, "learning_rate": 2.778199702068017e-05, "loss": 0.05164429545402527, "step": 1720 }, { "epoch": 0.232667787645016, "grad_norm": 0.3256686329841614, "learning_rate": 2.777841382922583e-05, "loss": 0.060562632977962494, "step": 1721 }, { "epoch": 0.23280298101378127, "grad_norm": 0.444046288728714, "learning_rate": 2.7774827977237596e-05, "loss": 0.06021483242511749, "step": 1722 }, { "epoch": 0.23293817438254655, "grad_norm": 0.7290605902671814, "learning_rate": 2.777123946546205e-05, "loss": 0.06646603345870972, "step": 1723 }, { "epoch": 0.2330733677513118, "grad_norm": 0.8953688740730286, "learning_rate": 2.776764829464634e-05, "loss": 0.05031698942184448, "step": 1724 }, { "epoch": 0.23320856112007707, "grad_norm": 1.12669038772583, "learning_rate": 2.7764054465538173e-05, "loss": 0.06480012089014053, "step": 1725 }, { "epoch": 0.23334375448884231, "grad_norm": 0.8605860471725464, "learning_rate": 2.7760457978885794e-05, "loss": 0.06884333491325378, "step": 1726 }, { "epoch": 0.2334789478576076, "grad_norm": 1.0614045858383179, "learning_rate": 2.7756858835438022e-05, "loss": 0.06185644865036011, "step": 1727 }, { "epoch": 0.23361414122637283, "grad_norm": 0.6377485990524292, "learning_rate": 2.7753257035944216e-05, "loss": 0.0591130405664444, "step": 1728 }, { "epoch": 0.2337493345951381, "grad_norm": 0.7270711064338684, "learning_rate": 2.7749652581154277e-05, "loss": 0.07110196352005005, "step": 1729 }, { "epoch": 0.23388452796390338, "grad_norm": 0.30580636858940125, "learning_rate": 2.7746045471818685e-05, "loss": 0.03449159860610962, "step": 1730 }, { "epoch": 0.23401972133266863, "grad_norm": 0.9647356271743774, "learning_rate": 2.7742435708688458e-05, "loss": 0.06302178651094437, "step": 1731 }, { "epoch": 0.2341549147014339, "grad_norm": 0.6172770857810974, "learning_rate": 2.7738823292515167e-05, "loss": 0.05716082453727722, "step": 1732 }, { "epoch": 0.23429010807019915, "grad_norm": 1.1884669065475464, "learning_rate": 2.773520822405093e-05, "loss": 0.08634865283966064, "step": 1733 }, { "epoch": 0.23442530143896442, "grad_norm": 1.4400255680084229, "learning_rate": 2.7731590504048433e-05, "loss": 0.06523825228214264, "step": 1734 }, { "epoch": 0.23456049480772967, "grad_norm": 0.8287622332572937, "learning_rate": 2.7727970133260896e-05, "loss": 0.0670129656791687, "step": 1735 }, { "epoch": 0.23469568817649494, "grad_norm": 0.34829503297805786, "learning_rate": 2.7724347112442106e-05, "loss": 0.0478961318731308, "step": 1736 }, { "epoch": 0.23483088154526022, "grad_norm": 0.8971961140632629, "learning_rate": 2.772072144234639e-05, "loss": 0.05987761914730072, "step": 1737 }, { "epoch": 0.23496607491402546, "grad_norm": 0.5579344034194946, "learning_rate": 2.7717093123728634e-05, "loss": 0.045401155948638916, "step": 1738 }, { "epoch": 0.23510126828279074, "grad_norm": 0.5325412750244141, "learning_rate": 2.771346215734428e-05, "loss": 0.055544640868902206, "step": 1739 }, { "epoch": 0.23523646165155598, "grad_norm": 0.9315246343612671, "learning_rate": 2.7709828543949302e-05, "loss": 0.07714492082595825, "step": 1740 }, { "epoch": 0.23537165502032126, "grad_norm": 1.5582971572875977, "learning_rate": 2.770619228430025e-05, "loss": 0.06546777486801147, "step": 1741 }, { "epoch": 0.2355068483890865, "grad_norm": 0.9863983392715454, "learning_rate": 2.77025533791542e-05, "loss": 0.06010594964027405, "step": 1742 }, { "epoch": 0.23564204175785178, "grad_norm": 1.0297991037368774, "learning_rate": 2.76989118292688e-05, "loss": 0.06947432458400726, "step": 1743 }, { "epoch": 0.23577723512661705, "grad_norm": 0.45426660776138306, "learning_rate": 2.7695267635402242e-05, "loss": 0.053979575634002686, "step": 1744 }, { "epoch": 0.2359124284953823, "grad_norm": 0.641617476940155, "learning_rate": 2.7691620798313258e-05, "loss": 0.056682419031858444, "step": 1745 }, { "epoch": 0.23604762186414757, "grad_norm": 1.4111003875732422, "learning_rate": 2.7687971318761145e-05, "loss": 0.060999996960163116, "step": 1746 }, { "epoch": 0.23618281523291282, "grad_norm": 1.1374071836471558, "learning_rate": 2.7684319197505746e-05, "loss": 0.051502518355846405, "step": 1747 }, { "epoch": 0.2363180086016781, "grad_norm": 1.2918708324432373, "learning_rate": 2.7680664435307446e-05, "loss": 0.04531306400895119, "step": 1748 }, { "epoch": 0.23645320197044334, "grad_norm": 0.8245479464530945, "learning_rate": 2.767700703292719e-05, "loss": 0.06491582095623016, "step": 1749 }, { "epoch": 0.2365883953392086, "grad_norm": 0.6467477083206177, "learning_rate": 2.767334699112647e-05, "loss": 0.0583115816116333, "step": 1750 }, { "epoch": 0.2367235887079739, "grad_norm": 1.161240577697754, "learning_rate": 2.7669684310667318e-05, "loss": 0.06556473672389984, "step": 1751 }, { "epoch": 0.23685878207673913, "grad_norm": 0.6168792843818665, "learning_rate": 2.7666018992312333e-05, "loss": 0.07531628012657166, "step": 1752 }, { "epoch": 0.2369939754455044, "grad_norm": 0.29516834020614624, "learning_rate": 2.7662351036824653e-05, "loss": 0.04231659322977066, "step": 1753 }, { "epoch": 0.23712916881426965, "grad_norm": 0.4645411968231201, "learning_rate": 2.7658680444967964e-05, "loss": 0.0712827742099762, "step": 1754 }, { "epoch": 0.23726436218303493, "grad_norm": 0.38169869780540466, "learning_rate": 2.76550072175065e-05, "loss": 0.0872441977262497, "step": 1755 }, { "epoch": 0.23739955555180017, "grad_norm": 0.6510182619094849, "learning_rate": 2.7651331355205044e-05, "loss": 0.04930286854505539, "step": 1756 }, { "epoch": 0.23753474892056545, "grad_norm": 1.2145999670028687, "learning_rate": 2.7647652858828936e-05, "loss": 0.06656190752983093, "step": 1757 }, { "epoch": 0.23766994228933072, "grad_norm": 0.5234197974205017, "learning_rate": 2.764397172914406e-05, "loss": 0.05475449562072754, "step": 1758 }, { "epoch": 0.23780513565809597, "grad_norm": 0.9165642857551575, "learning_rate": 2.7640287966916845e-05, "loss": 0.07122278213500977, "step": 1759 }, { "epoch": 0.23794032902686124, "grad_norm": 0.2774665653705597, "learning_rate": 2.7636601572914266e-05, "loss": 0.0409126877784729, "step": 1760 }, { "epoch": 0.2380755223956265, "grad_norm": 1.0802595615386963, "learning_rate": 2.7632912547903855e-05, "loss": 0.06233122944831848, "step": 1761 }, { "epoch": 0.23821071576439176, "grad_norm": 0.2581363022327423, "learning_rate": 2.7629220892653685e-05, "loss": 0.05251719802618027, "step": 1762 }, { "epoch": 0.238345909133157, "grad_norm": 1.029417872428894, "learning_rate": 2.7625526607932378e-05, "loss": 0.06709900498390198, "step": 1763 }, { "epoch": 0.23848110250192228, "grad_norm": 0.8297461271286011, "learning_rate": 2.76218296945091e-05, "loss": 0.05600571632385254, "step": 1764 }, { "epoch": 0.23861629587068756, "grad_norm": 0.8343843817710876, "learning_rate": 2.7618130153153577e-05, "loss": 0.07020968198776245, "step": 1765 }, { "epoch": 0.2387514892394528, "grad_norm": 0.6480276584625244, "learning_rate": 2.7614427984636063e-05, "loss": 0.0596952810883522, "step": 1766 }, { "epoch": 0.23888668260821808, "grad_norm": 0.33661749958992004, "learning_rate": 2.7610723189727377e-05, "loss": 0.07578754425048828, "step": 1767 }, { "epoch": 0.23902187597698332, "grad_norm": 0.4645842909812927, "learning_rate": 2.760701576919888e-05, "loss": 0.058799296617507935, "step": 1768 }, { "epoch": 0.2391570693457486, "grad_norm": 0.24671271443367004, "learning_rate": 2.760330572382246e-05, "loss": 0.03767775744199753, "step": 1769 }, { "epoch": 0.23929226271451384, "grad_norm": 0.6260137557983398, "learning_rate": 2.7599593054370584e-05, "loss": 0.06268168985843658, "step": 1770 }, { "epoch": 0.23942745608327912, "grad_norm": 1.1759059429168701, "learning_rate": 2.7595877761616246e-05, "loss": 0.0525277704000473, "step": 1771 }, { "epoch": 0.2395626494520444, "grad_norm": 0.5533081889152527, "learning_rate": 2.759215984633299e-05, "loss": 0.060588620603084564, "step": 1772 }, { "epoch": 0.23969784282080964, "grad_norm": 2.2983760833740234, "learning_rate": 2.7588439309294902e-05, "loss": 0.08851748704910278, "step": 1773 }, { "epoch": 0.2398330361895749, "grad_norm": 0.4648050367832184, "learning_rate": 2.7584716151276623e-05, "loss": 0.05141003802418709, "step": 1774 }, { "epoch": 0.23996822955834016, "grad_norm": 1.3984687328338623, "learning_rate": 2.7580990373053325e-05, "loss": 0.05576743185520172, "step": 1775 }, { "epoch": 0.24010342292710543, "grad_norm": 0.6347246170043945, "learning_rate": 2.7577261975400747e-05, "loss": 0.06411650776863098, "step": 1776 }, { "epoch": 0.24023861629587068, "grad_norm": 0.9491327404975891, "learning_rate": 2.7573530959095154e-05, "loss": 0.048143547028303146, "step": 1777 }, { "epoch": 0.24037380966463595, "grad_norm": 0.46420589089393616, "learning_rate": 2.756979732491336e-05, "loss": 0.04543163627386093, "step": 1778 }, { "epoch": 0.24050900303340123, "grad_norm": 0.5693197250366211, "learning_rate": 2.756606107363274e-05, "loss": 0.07411319017410278, "step": 1779 }, { "epoch": 0.24064419640216647, "grad_norm": 1.1672110557556152, "learning_rate": 2.7562322206031192e-05, "loss": 0.0833682268857956, "step": 1780 }, { "epoch": 0.24077938977093175, "grad_norm": 1.0420798063278198, "learning_rate": 2.7558580722887166e-05, "loss": 0.0583530068397522, "step": 1781 }, { "epoch": 0.240914583139697, "grad_norm": 0.573514997959137, "learning_rate": 2.7554836624979666e-05, "loss": 0.055614590644836426, "step": 1782 }, { "epoch": 0.24104977650846227, "grad_norm": 0.5239211320877075, "learning_rate": 2.7551089913088233e-05, "loss": 0.06242452189326286, "step": 1783 }, { "epoch": 0.2411849698772275, "grad_norm": 0.6482314467430115, "learning_rate": 2.7547340587992948e-05, "loss": 0.05109933018684387, "step": 1784 }, { "epoch": 0.24132016324599279, "grad_norm": 1.2163139581680298, "learning_rate": 2.754358865047444e-05, "loss": 0.07107076793909073, "step": 1785 }, { "epoch": 0.24145535661475806, "grad_norm": 0.4805852472782135, "learning_rate": 2.7539834101313885e-05, "loss": 0.06997501850128174, "step": 1786 }, { "epoch": 0.2415905499835233, "grad_norm": 2.0008926391601562, "learning_rate": 2.7536076941293003e-05, "loss": 0.06454098224639893, "step": 1787 }, { "epoch": 0.24172574335228858, "grad_norm": 0.32264941930770874, "learning_rate": 2.753231717119405e-05, "loss": 0.038575127720832825, "step": 1788 }, { "epoch": 0.24186093672105383, "grad_norm": 0.38871124386787415, "learning_rate": 2.7528554791799826e-05, "loss": 0.04571658372879028, "step": 1789 }, { "epoch": 0.2419961300898191, "grad_norm": 0.9657143354415894, "learning_rate": 2.7524789803893686e-05, "loss": 0.07892800867557526, "step": 1790 }, { "epoch": 0.24213132345858435, "grad_norm": 0.28928887844085693, "learning_rate": 2.7521022208259526e-05, "loss": 0.05117356777191162, "step": 1791 }, { "epoch": 0.24226651682734962, "grad_norm": 0.5177487134933472, "learning_rate": 2.7517252005681762e-05, "loss": 0.06379137188196182, "step": 1792 }, { "epoch": 0.24240171019611487, "grad_norm": 1.0492885112762451, "learning_rate": 2.7513479196945385e-05, "loss": 0.07026708126068115, "step": 1793 }, { "epoch": 0.24253690356488014, "grad_norm": 1.247847318649292, "learning_rate": 2.750970378283591e-05, "loss": 0.06689602136611938, "step": 1794 }, { "epoch": 0.24267209693364541, "grad_norm": 2.1468605995178223, "learning_rate": 2.7505925764139398e-05, "loss": 0.06966928392648697, "step": 1795 }, { "epoch": 0.24280729030241066, "grad_norm": 1.622549295425415, "learning_rate": 2.7502145141642447e-05, "loss": 0.04319116473197937, "step": 1796 }, { "epoch": 0.24294248367117594, "grad_norm": 0.9661479592323303, "learning_rate": 2.7498361916132212e-05, "loss": 0.046201109886169434, "step": 1797 }, { "epoch": 0.24307767703994118, "grad_norm": 1.2449190616607666, "learning_rate": 2.7494576088396376e-05, "loss": 0.0622846782207489, "step": 1798 }, { "epoch": 0.24321287040870646, "grad_norm": 0.784413754940033, "learning_rate": 2.749078765922317e-05, "loss": 0.05469036102294922, "step": 1799 }, { "epoch": 0.2433480637774717, "grad_norm": 0.9067596793174744, "learning_rate": 2.7486996629401366e-05, "loss": 0.05928552895784378, "step": 1800 }, { "epoch": 0.24348325714623698, "grad_norm": 1.948528528213501, "learning_rate": 2.7483202999720272e-05, "loss": 0.06363476812839508, "step": 1801 }, { "epoch": 0.24361845051500225, "grad_norm": 1.1192318201065063, "learning_rate": 2.7479406770969747e-05, "loss": 0.04887492209672928, "step": 1802 }, { "epoch": 0.2437536438837675, "grad_norm": 0.898763120174408, "learning_rate": 2.7475607943940182e-05, "loss": 0.04625377804040909, "step": 1803 }, { "epoch": 0.24388883725253277, "grad_norm": 1.0470030307769775, "learning_rate": 2.7471806519422514e-05, "loss": 0.06388384103775024, "step": 1804 }, { "epoch": 0.24402403062129802, "grad_norm": 0.7634301781654358, "learning_rate": 2.746800249820822e-05, "loss": 0.05902114510536194, "step": 1805 }, { "epoch": 0.2441592239900633, "grad_norm": 1.5930242538452148, "learning_rate": 2.7464195881089323e-05, "loss": 0.06449858844280243, "step": 1806 }, { "epoch": 0.24429441735882854, "grad_norm": 1.7291353940963745, "learning_rate": 2.746038666885837e-05, "loss": 0.0698220357298851, "step": 1807 }, { "epoch": 0.2444296107275938, "grad_norm": 2.733517646789551, "learning_rate": 2.7456574862308474e-05, "loss": 0.06998045742511749, "step": 1808 }, { "epoch": 0.24456480409635908, "grad_norm": 2.2248353958129883, "learning_rate": 2.745276046223326e-05, "loss": 0.055635951459407806, "step": 1809 }, { "epoch": 0.24469999746512433, "grad_norm": 2.745321035385132, "learning_rate": 2.744894346942691e-05, "loss": 0.06990677118301392, "step": 1810 }, { "epoch": 0.2448351908338896, "grad_norm": 1.1119916439056396, "learning_rate": 2.744512388468415e-05, "loss": 0.05870842933654785, "step": 1811 }, { "epoch": 0.24497038420265485, "grad_norm": 0.6725683808326721, "learning_rate": 2.7441301708800227e-05, "loss": 0.07774585485458374, "step": 1812 }, { "epoch": 0.24510557757142012, "grad_norm": 0.804542601108551, "learning_rate": 2.7437476942570942e-05, "loss": 0.07373358309268951, "step": 1813 }, { "epoch": 0.24524077094018537, "grad_norm": 0.2727304995059967, "learning_rate": 2.7433649586792637e-05, "loss": 0.05927370488643646, "step": 1814 }, { "epoch": 0.24537596430895064, "grad_norm": 2.648794174194336, "learning_rate": 2.7429819642262178e-05, "loss": 0.08700032532215118, "step": 1815 }, { "epoch": 0.24551115767771592, "grad_norm": 1.0932246446609497, "learning_rate": 2.7425987109776994e-05, "loss": 0.0529962033033371, "step": 1816 }, { "epoch": 0.24564635104648117, "grad_norm": 1.0143872499465942, "learning_rate": 2.7422151990135022e-05, "loss": 0.0611632764339447, "step": 1817 }, { "epoch": 0.24578154441524644, "grad_norm": 1.2205675840377808, "learning_rate": 2.741831428413477e-05, "loss": 0.06258591264486313, "step": 1818 }, { "epoch": 0.24591673778401169, "grad_norm": 1.5245801210403442, "learning_rate": 2.7414473992575257e-05, "loss": 0.06870941817760468, "step": 1819 }, { "epoch": 0.24605193115277696, "grad_norm": 0.6271198987960815, "learning_rate": 2.7410631116256054e-05, "loss": 0.048978958278894424, "step": 1820 }, { "epoch": 0.2461871245215422, "grad_norm": 1.5812677145004272, "learning_rate": 2.7406785655977275e-05, "loss": 0.0609498955309391, "step": 1821 }, { "epoch": 0.24632231789030748, "grad_norm": 1.5800942182540894, "learning_rate": 2.7402937612539563e-05, "loss": 0.058327898383140564, "step": 1822 }, { "epoch": 0.24645751125907275, "grad_norm": 1.137834072113037, "learning_rate": 2.7399086986744095e-05, "loss": 0.05759924650192261, "step": 1823 }, { "epoch": 0.246592704627838, "grad_norm": 1.3274519443511963, "learning_rate": 2.7395233779392598e-05, "loss": 0.06996330618858337, "step": 1824 }, { "epoch": 0.24672789799660327, "grad_norm": 0.9218474626541138, "learning_rate": 2.739137799128733e-05, "loss": 0.03890904784202576, "step": 1825 }, { "epoch": 0.24686309136536852, "grad_norm": 1.3220356702804565, "learning_rate": 2.7387519623231085e-05, "loss": 0.0814785361289978, "step": 1826 }, { "epoch": 0.2469982847341338, "grad_norm": 0.50826495885849, "learning_rate": 2.7383658676027195e-05, "loss": 0.06760821491479874, "step": 1827 }, { "epoch": 0.24713347810289904, "grad_norm": 0.7867356538772583, "learning_rate": 2.7379795150479535e-05, "loss": 0.06059293448925018, "step": 1828 }, { "epoch": 0.24726867147166431, "grad_norm": 1.4877345561981201, "learning_rate": 2.73759290473925e-05, "loss": 0.06261137127876282, "step": 1829 }, { "epoch": 0.2474038648404296, "grad_norm": 0.9563901424407959, "learning_rate": 2.7372060367571044e-05, "loss": 0.060074642300605774, "step": 1830 }, { "epoch": 0.24753905820919483, "grad_norm": 0.351698637008667, "learning_rate": 2.7368189111820648e-05, "loss": 0.05316463112831116, "step": 1831 }, { "epoch": 0.2476742515779601, "grad_norm": 0.4171631932258606, "learning_rate": 2.736431528094732e-05, "loss": 0.045943550765514374, "step": 1832 }, { "epoch": 0.24780944494672535, "grad_norm": 0.359910786151886, "learning_rate": 2.7360438875757614e-05, "loss": 0.054830729961395264, "step": 1833 }, { "epoch": 0.24794463831549063, "grad_norm": 0.3965355157852173, "learning_rate": 2.7356559897058624e-05, "loss": 0.056278809905052185, "step": 1834 }, { "epoch": 0.24807983168425587, "grad_norm": 1.5511910915374756, "learning_rate": 2.735267834565797e-05, "loss": 0.06495694816112518, "step": 1835 }, { "epoch": 0.24821502505302115, "grad_norm": 0.47934770584106445, "learning_rate": 2.734879422236381e-05, "loss": 0.04668864607810974, "step": 1836 }, { "epoch": 0.24835021842178642, "grad_norm": 0.8820375204086304, "learning_rate": 2.734490752798484e-05, "loss": 0.06065814197063446, "step": 1837 }, { "epoch": 0.24848541179055167, "grad_norm": 0.6350238919258118, "learning_rate": 2.7341018263330296e-05, "loss": 0.042666926980018616, "step": 1838 }, { "epoch": 0.24862060515931694, "grad_norm": 0.6458765268325806, "learning_rate": 2.7337126429209935e-05, "loss": 0.06931717693805695, "step": 1839 }, { "epoch": 0.2487557985280822, "grad_norm": 0.721137285232544, "learning_rate": 2.7333232026434064e-05, "loss": 0.06862339377403259, "step": 1840 }, { "epoch": 0.24889099189684746, "grad_norm": 0.2562505006790161, "learning_rate": 2.7329335055813517e-05, "loss": 0.047194480895996094, "step": 1841 }, { "epoch": 0.2490261852656127, "grad_norm": 0.30537474155426025, "learning_rate": 2.732543551815966e-05, "loss": 0.05128122493624687, "step": 1842 }, { "epoch": 0.24916137863437798, "grad_norm": 0.4329995810985565, "learning_rate": 2.7321533414284404e-05, "loss": 0.057897549122571945, "step": 1843 }, { "epoch": 0.24929657200314326, "grad_norm": 1.046766996383667, "learning_rate": 2.731762874500018e-05, "loss": 0.07247406989336014, "step": 1844 }, { "epoch": 0.2494317653719085, "grad_norm": 0.4092596173286438, "learning_rate": 2.7313721511119972e-05, "loss": 0.051524870097637177, "step": 1845 }, { "epoch": 0.24956695874067378, "grad_norm": 0.36781468987464905, "learning_rate": 2.7309811713457275e-05, "loss": 0.05429118871688843, "step": 1846 }, { "epoch": 0.24970215210943902, "grad_norm": 0.8583148717880249, "learning_rate": 2.730589935282614e-05, "loss": 0.053462326526641846, "step": 1847 }, { "epoch": 0.2498373454782043, "grad_norm": 0.3260382115840912, "learning_rate": 2.7301984430041135e-05, "loss": 0.0619199275970459, "step": 1848 }, { "epoch": 0.24997253884696954, "grad_norm": 1.4224365949630737, "learning_rate": 2.7298066945917368e-05, "loss": 0.05573566257953644, "step": 1849 }, { "epoch": 0.25010773221573485, "grad_norm": 0.350322425365448, "learning_rate": 2.7294146901270482e-05, "loss": 0.0641157478094101, "step": 1850 }, { "epoch": 0.25024292558450006, "grad_norm": 0.47027844190597534, "learning_rate": 2.7290224296916653e-05, "loss": 0.05068720504641533, "step": 1851 }, { "epoch": 0.25037811895326534, "grad_norm": 0.9299583435058594, "learning_rate": 2.7286299133672584e-05, "loss": 0.0563206672668457, "step": 1852 }, { "epoch": 0.2505133123220306, "grad_norm": 0.9145474433898926, "learning_rate": 2.728237141235552e-05, "loss": 0.04449716955423355, "step": 1853 }, { "epoch": 0.2506485056907959, "grad_norm": 1.6957886219024658, "learning_rate": 2.727844113378322e-05, "loss": 0.06835083663463593, "step": 1854 }, { "epoch": 0.2507836990595611, "grad_norm": 0.529062807559967, "learning_rate": 2.7274508298774013e-05, "loss": 0.062294989824295044, "step": 1855 }, { "epoch": 0.2509188924283264, "grad_norm": 1.1401989459991455, "learning_rate": 2.727057290814672e-05, "loss": 0.06933862715959549, "step": 1856 }, { "epoch": 0.25105408579709165, "grad_norm": 0.7565990090370178, "learning_rate": 2.7266634962720704e-05, "loss": 0.053271643817424774, "step": 1857 }, { "epoch": 0.2511892791658569, "grad_norm": 0.3381154537200928, "learning_rate": 2.726269446331588e-05, "loss": 0.048572588711977005, "step": 1858 }, { "epoch": 0.2513244725346222, "grad_norm": 0.40603747963905334, "learning_rate": 2.7258751410752676e-05, "loss": 0.06588006764650345, "step": 1859 }, { "epoch": 0.2514596659033874, "grad_norm": 0.6585817337036133, "learning_rate": 2.725480580585206e-05, "loss": 0.04722384363412857, "step": 1860 }, { "epoch": 0.2515948592721527, "grad_norm": 1.5531675815582275, "learning_rate": 2.7250857649435522e-05, "loss": 0.062035009264945984, "step": 1861 }, { "epoch": 0.25173005264091797, "grad_norm": 0.7977484464645386, "learning_rate": 2.724690694232509e-05, "loss": 0.07116580009460449, "step": 1862 }, { "epoch": 0.25186524600968324, "grad_norm": 0.7185953855514526, "learning_rate": 2.7242953685343327e-05, "loss": 0.0728900358080864, "step": 1863 }, { "epoch": 0.25200043937844846, "grad_norm": 0.4016585648059845, "learning_rate": 2.723899787931332e-05, "loss": 0.039566099643707275, "step": 1864 }, { "epoch": 0.25213563274721373, "grad_norm": 1.5395886898040771, "learning_rate": 2.7235039525058684e-05, "loss": 0.056715503334999084, "step": 1865 }, { "epoch": 0.252270826115979, "grad_norm": 0.8606144189834595, "learning_rate": 2.7231078623403575e-05, "loss": 0.046288251876831055, "step": 1866 }, { "epoch": 0.2524060194847443, "grad_norm": 2.1993372440338135, "learning_rate": 2.722711517517267e-05, "loss": 0.07265862822532654, "step": 1867 }, { "epoch": 0.25254121285350956, "grad_norm": 1.4779186248779297, "learning_rate": 2.7223149181191187e-05, "loss": 0.06568378210067749, "step": 1868 }, { "epoch": 0.2526764062222748, "grad_norm": 0.7808976769447327, "learning_rate": 2.7219180642284864e-05, "loss": 0.08012668043375015, "step": 1869 }, { "epoch": 0.25281159959104005, "grad_norm": 0.465035617351532, "learning_rate": 2.721520955927997e-05, "loss": 0.054050326347351074, "step": 1870 }, { "epoch": 0.2529467929598053, "grad_norm": 0.5058714747428894, "learning_rate": 2.7211235933003302e-05, "loss": 0.050615519285202026, "step": 1871 }, { "epoch": 0.2530819863285706, "grad_norm": 1.8749537467956543, "learning_rate": 2.72072597642822e-05, "loss": 0.08371332287788391, "step": 1872 }, { "epoch": 0.25321717969733587, "grad_norm": 0.6472415328025818, "learning_rate": 2.7203281053944512e-05, "loss": 0.053905874490737915, "step": 1873 }, { "epoch": 0.2533523730661011, "grad_norm": 1.0982719659805298, "learning_rate": 2.719929980281864e-05, "loss": 0.05468970537185669, "step": 1874 }, { "epoch": 0.25348756643486636, "grad_norm": 0.7528514862060547, "learning_rate": 2.719531601173349e-05, "loss": 0.06013808399438858, "step": 1875 }, { "epoch": 0.25362275980363164, "grad_norm": 0.4620010554790497, "learning_rate": 2.7191329681518512e-05, "loss": 0.06881189346313477, "step": 1876 }, { "epoch": 0.2537579531723969, "grad_norm": 0.6234038472175598, "learning_rate": 2.7187340813003682e-05, "loss": 0.04425733536481857, "step": 1877 }, { "epoch": 0.25389314654116213, "grad_norm": 1.4850654602050781, "learning_rate": 2.718334940701951e-05, "loss": 0.0729677677154541, "step": 1878 }, { "epoch": 0.2540283399099274, "grad_norm": 0.9001442193984985, "learning_rate": 2.7179355464397014e-05, "loss": 0.07417845726013184, "step": 1879 }, { "epoch": 0.2541635332786927, "grad_norm": 0.9688825011253357, "learning_rate": 2.7175358985967763e-05, "loss": 0.07931607961654663, "step": 1880 }, { "epoch": 0.25429872664745795, "grad_norm": 0.5000962615013123, "learning_rate": 2.717135997256385e-05, "loss": 0.0718928873538971, "step": 1881 }, { "epoch": 0.2544339200162232, "grad_norm": 0.28846657276153564, "learning_rate": 2.7167358425017882e-05, "loss": 0.04686398804187775, "step": 1882 }, { "epoch": 0.25456911338498844, "grad_norm": 1.3982936143875122, "learning_rate": 2.7163354344163004e-05, "loss": 0.052206553518772125, "step": 1883 }, { "epoch": 0.2547043067537537, "grad_norm": 0.30318522453308105, "learning_rate": 2.715934773083289e-05, "loss": 0.053683459758758545, "step": 1884 }, { "epoch": 0.254839500122519, "grad_norm": 1.3615477085113525, "learning_rate": 2.715533858586174e-05, "loss": 0.05026477575302124, "step": 1885 }, { "epoch": 0.25497469349128427, "grad_norm": 1.1193678379058838, "learning_rate": 2.715132691008427e-05, "loss": 0.0637890100479126, "step": 1886 }, { "epoch": 0.25510988686004954, "grad_norm": 0.7913280725479126, "learning_rate": 2.714731270433574e-05, "loss": 0.05701271444559097, "step": 1887 }, { "epoch": 0.25524508022881476, "grad_norm": 0.6793217062950134, "learning_rate": 2.7143295969451933e-05, "loss": 0.05757066607475281, "step": 1888 }, { "epoch": 0.25538027359758003, "grad_norm": 0.9453518390655518, "learning_rate": 2.7139276706269147e-05, "loss": 0.06984785199165344, "step": 1889 }, { "epoch": 0.2555154669663453, "grad_norm": 0.595427930355072, "learning_rate": 2.7135254915624213e-05, "loss": 0.05994880199432373, "step": 1890 }, { "epoch": 0.2556506603351106, "grad_norm": 1.0444555282592773, "learning_rate": 2.7131230598354497e-05, "loss": 0.05637727677822113, "step": 1891 }, { "epoch": 0.2557858537038758, "grad_norm": 0.9351038932800293, "learning_rate": 2.712720375529787e-05, "loss": 0.0469779372215271, "step": 1892 }, { "epoch": 0.2559210470726411, "grad_norm": 0.8016331195831299, "learning_rate": 2.7123174387292758e-05, "loss": 0.0498119592666626, "step": 1893 }, { "epoch": 0.25605624044140635, "grad_norm": 0.31011295318603516, "learning_rate": 2.7119142495178088e-05, "loss": 0.043321095407009125, "step": 1894 }, { "epoch": 0.2561914338101716, "grad_norm": 1.458289623260498, "learning_rate": 2.711510807979333e-05, "loss": 0.05448979139328003, "step": 1895 }, { "epoch": 0.2563266271789369, "grad_norm": 1.7407903671264648, "learning_rate": 2.7111071141978452e-05, "loss": 0.05992579460144043, "step": 1896 }, { "epoch": 0.2564618205477021, "grad_norm": 0.3617306649684906, "learning_rate": 2.7107031682573987e-05, "loss": 0.054641127586364746, "step": 1897 }, { "epoch": 0.2565970139164674, "grad_norm": 0.6111330986022949, "learning_rate": 2.710298970242096e-05, "loss": 0.053890250623226166, "step": 1898 }, { "epoch": 0.25673220728523266, "grad_norm": 0.42177915573120117, "learning_rate": 2.7098945202360937e-05, "loss": 0.05688124895095825, "step": 1899 }, { "epoch": 0.25686740065399793, "grad_norm": 0.7408020496368408, "learning_rate": 2.7094898183236e-05, "loss": 0.06984029710292816, "step": 1900 }, { "epoch": 0.2570025940227632, "grad_norm": 0.5317678451538086, "learning_rate": 2.709084864588877e-05, "loss": 0.06329578906297684, "step": 1901 }, { "epoch": 0.2571377873915284, "grad_norm": 0.6379442811012268, "learning_rate": 2.708679659116237e-05, "loss": 0.06518420577049255, "step": 1902 }, { "epoch": 0.2572729807602937, "grad_norm": 1.0077797174453735, "learning_rate": 2.708274201990047e-05, "loss": 0.04854530096054077, "step": 1903 }, { "epoch": 0.257408174129059, "grad_norm": 0.6798588633537292, "learning_rate": 2.7078684932947247e-05, "loss": 0.06895408034324646, "step": 1904 }, { "epoch": 0.25754336749782425, "grad_norm": 0.48604434728622437, "learning_rate": 2.7074625331147407e-05, "loss": 0.047987326979637146, "step": 1905 }, { "epoch": 0.25767856086658947, "grad_norm": 0.48254266381263733, "learning_rate": 2.7070563215346184e-05, "loss": 0.05127117037773132, "step": 1906 }, { "epoch": 0.25781375423535474, "grad_norm": 0.5055396556854248, "learning_rate": 2.7066498586389332e-05, "loss": 0.05714119225740433, "step": 1907 }, { "epoch": 0.25794894760412, "grad_norm": 1.2692639827728271, "learning_rate": 2.7062431445123127e-05, "loss": 0.06041686609387398, "step": 1908 }, { "epoch": 0.2580841409728853, "grad_norm": 0.5003594160079956, "learning_rate": 2.705836179239437e-05, "loss": 0.05806645005941391, "step": 1909 }, { "epoch": 0.25821933434165056, "grad_norm": 0.9051533341407776, "learning_rate": 2.705428962905039e-05, "loss": 0.07491414248943329, "step": 1910 }, { "epoch": 0.2583545277104158, "grad_norm": 0.5739898085594177, "learning_rate": 2.705021495593902e-05, "loss": 0.05252230167388916, "step": 1911 }, { "epoch": 0.25848972107918106, "grad_norm": 1.1121389865875244, "learning_rate": 2.704613777390864e-05, "loss": 0.072174072265625, "step": 1912 }, { "epoch": 0.25862491444794633, "grad_norm": 0.8885291814804077, "learning_rate": 2.7042058083808135e-05, "loss": 0.06262989342212677, "step": 1913 }, { "epoch": 0.2587601078167116, "grad_norm": 0.9516357779502869, "learning_rate": 2.7037975886486928e-05, "loss": 0.04772152006626129, "step": 1914 }, { "epoch": 0.2588953011854769, "grad_norm": 0.3271455764770508, "learning_rate": 2.7033891182794942e-05, "loss": 0.030513525009155273, "step": 1915 }, { "epoch": 0.2590304945542421, "grad_norm": 0.7102453112602234, "learning_rate": 2.7029803973582642e-05, "loss": 0.052742987871170044, "step": 1916 }, { "epoch": 0.25916568792300737, "grad_norm": 0.6446483135223389, "learning_rate": 2.7025714259701e-05, "loss": 0.054237931966781616, "step": 1917 }, { "epoch": 0.25930088129177264, "grad_norm": 2.5130269527435303, "learning_rate": 2.7021622042001524e-05, "loss": 0.07978424429893494, "step": 1918 }, { "epoch": 0.2594360746605379, "grad_norm": 1.4490081071853638, "learning_rate": 2.701752732133623e-05, "loss": 0.06512665748596191, "step": 1919 }, { "epoch": 0.25957126802930314, "grad_norm": 1.5117402076721191, "learning_rate": 2.7013430098557664e-05, "loss": 0.06800676882266998, "step": 1920 }, { "epoch": 0.2597064613980684, "grad_norm": 0.4595567286014557, "learning_rate": 2.7009330374518885e-05, "loss": 0.07122290134429932, "step": 1921 }, { "epoch": 0.2598416547668337, "grad_norm": 1.3470333814620972, "learning_rate": 2.7005228150073483e-05, "loss": 0.06333541870117188, "step": 1922 }, { "epoch": 0.25997684813559896, "grad_norm": 0.5032757520675659, "learning_rate": 2.7001123426075558e-05, "loss": 0.05834554135799408, "step": 1923 }, { "epoch": 0.26011204150436423, "grad_norm": 0.28954771161079407, "learning_rate": 2.699701620337974e-05, "loss": 0.04165119305253029, "step": 1924 }, { "epoch": 0.26024723487312945, "grad_norm": 0.20491264760494232, "learning_rate": 2.699290648284117e-05, "loss": 0.03780645132064819, "step": 1925 }, { "epoch": 0.2603824282418947, "grad_norm": 0.7633419632911682, "learning_rate": 2.6988794265315522e-05, "loss": 0.04897985979914665, "step": 1926 }, { "epoch": 0.26051762161066, "grad_norm": 0.5443441271781921, "learning_rate": 2.698467955165897e-05, "loss": 0.046710193157196045, "step": 1927 }, { "epoch": 0.2606528149794253, "grad_norm": 1.3065940141677856, "learning_rate": 2.6980562342728226e-05, "loss": 0.0639866441488266, "step": 1928 }, { "epoch": 0.26078800834819055, "grad_norm": 2.1279795169830322, "learning_rate": 2.6976442639380516e-05, "loss": 0.09672144055366516, "step": 1929 }, { "epoch": 0.26092320171695577, "grad_norm": 0.3977057635784149, "learning_rate": 2.6972320442473583e-05, "loss": 0.05983440577983856, "step": 1930 }, { "epoch": 0.26105839508572104, "grad_norm": 0.3161788880825043, "learning_rate": 2.6968195752865686e-05, "loss": 0.0703844353556633, "step": 1931 }, { "epoch": 0.2611935884544863, "grad_norm": 0.7519835233688354, "learning_rate": 2.6964068571415613e-05, "loss": 0.039884477853775024, "step": 1932 }, { "epoch": 0.2613287818232516, "grad_norm": 0.949356198310852, "learning_rate": 2.6959938898982667e-05, "loss": 0.06464026123285294, "step": 1933 }, { "epoch": 0.2614639751920168, "grad_norm": 0.3803577125072479, "learning_rate": 2.6955806736426657e-05, "loss": 0.04969945549964905, "step": 1934 }, { "epoch": 0.2615991685607821, "grad_norm": 0.5054728984832764, "learning_rate": 2.6951672084607937e-05, "loss": 0.05244843661785126, "step": 1935 }, { "epoch": 0.26173436192954735, "grad_norm": 0.9093344807624817, "learning_rate": 2.694753494438735e-05, "loss": 0.05855073779821396, "step": 1936 }, { "epoch": 0.26186955529831263, "grad_norm": 0.2236987054347992, "learning_rate": 2.6943395316626272e-05, "loss": 0.0456208810210228, "step": 1937 }, { "epoch": 0.2620047486670779, "grad_norm": 0.29676297307014465, "learning_rate": 2.69392532021866e-05, "loss": 0.055048078298568726, "step": 1938 }, { "epoch": 0.2621399420358431, "grad_norm": 0.4697316586971283, "learning_rate": 2.693510860193075e-05, "loss": 0.046270325779914856, "step": 1939 }, { "epoch": 0.2622751354046084, "grad_norm": 1.1346640586853027, "learning_rate": 2.6930961516721638e-05, "loss": 0.07410261034965515, "step": 1940 }, { "epoch": 0.26241032877337367, "grad_norm": 1.0093399286270142, "learning_rate": 2.6926811947422717e-05, "loss": 0.061102449893951416, "step": 1941 }, { "epoch": 0.26254552214213894, "grad_norm": 0.32292136549949646, "learning_rate": 2.6922659894897946e-05, "loss": 0.03949351608753204, "step": 1942 }, { "epoch": 0.2626807155109042, "grad_norm": 0.4040752947330475, "learning_rate": 2.6918505360011805e-05, "loss": 0.058416783809661865, "step": 1943 }, { "epoch": 0.26281590887966944, "grad_norm": 0.6055988669395447, "learning_rate": 2.6914348343629292e-05, "loss": 0.0581560879945755, "step": 1944 }, { "epoch": 0.2629511022484347, "grad_norm": 0.7155444622039795, "learning_rate": 2.6910188846615918e-05, "loss": 0.07350419461727142, "step": 1945 }, { "epoch": 0.2630862956172, "grad_norm": 0.5275724530220032, "learning_rate": 2.6906026869837714e-05, "loss": 0.06944310665130615, "step": 1946 }, { "epoch": 0.26322148898596526, "grad_norm": 0.7116784453392029, "learning_rate": 2.6901862414161222e-05, "loss": 0.06892804801464081, "step": 1947 }, { "epoch": 0.2633566823547305, "grad_norm": 0.4181476831436157, "learning_rate": 2.689769548045351e-05, "loss": 0.0550856739282608, "step": 1948 }, { "epoch": 0.26349187572349575, "grad_norm": 0.33882415294647217, "learning_rate": 2.6893526069582154e-05, "loss": 0.05200064182281494, "step": 1949 }, { "epoch": 0.263627069092261, "grad_norm": 0.6489690542221069, "learning_rate": 2.6889354182415245e-05, "loss": 0.05584654211997986, "step": 1950 }, { "epoch": 0.2637622624610263, "grad_norm": 0.6772111654281616, "learning_rate": 2.688517981982139e-05, "loss": 0.06969714164733887, "step": 1951 }, { "epoch": 0.26389745582979157, "grad_norm": 1.2031954526901245, "learning_rate": 2.6881002982669723e-05, "loss": 0.06634595990180969, "step": 1952 }, { "epoch": 0.2640326491985568, "grad_norm": 0.5072511434555054, "learning_rate": 2.6876823671829874e-05, "loss": 0.061920978128910065, "step": 1953 }, { "epoch": 0.26416784256732206, "grad_norm": 1.2806464433670044, "learning_rate": 2.6872641888172e-05, "loss": 0.05857972800731659, "step": 1954 }, { "epoch": 0.26430303593608734, "grad_norm": 0.6853794455528259, "learning_rate": 2.6868457632566774e-05, "loss": 0.06263160705566406, "step": 1955 }, { "epoch": 0.2644382293048526, "grad_norm": 0.5920037627220154, "learning_rate": 2.6864270905885377e-05, "loss": 0.054866619408130646, "step": 1956 }, { "epoch": 0.2645734226736179, "grad_norm": 1.4363727569580078, "learning_rate": 2.6860081708999515e-05, "loss": 0.0517156682908535, "step": 1957 }, { "epoch": 0.2647086160423831, "grad_norm": 0.7071147561073303, "learning_rate": 2.685589004278139e-05, "loss": 0.06194252520799637, "step": 1958 }, { "epoch": 0.2648438094111484, "grad_norm": 1.235511302947998, "learning_rate": 2.6851695908103737e-05, "loss": 0.04652991518378258, "step": 1959 }, { "epoch": 0.26497900277991365, "grad_norm": 0.5754021406173706, "learning_rate": 2.6847499305839796e-05, "loss": 0.04518058896064758, "step": 1960 }, { "epoch": 0.2651141961486789, "grad_norm": 0.875260055065155, "learning_rate": 2.684330023686332e-05, "loss": 0.06999662518501282, "step": 1961 }, { "epoch": 0.26524938951744415, "grad_norm": 1.3374043703079224, "learning_rate": 2.6839098702048577e-05, "loss": 0.07331164926290512, "step": 1962 }, { "epoch": 0.2653845828862094, "grad_norm": 0.9756178259849548, "learning_rate": 2.683489470227035e-05, "loss": 0.04712153598666191, "step": 1963 }, { "epoch": 0.2655197762549747, "grad_norm": 1.1614489555358887, "learning_rate": 2.6830688238403936e-05, "loss": 0.05995406210422516, "step": 1964 }, { "epoch": 0.26565496962373997, "grad_norm": 0.9477787017822266, "learning_rate": 2.682647931132514e-05, "loss": 0.04590131342411041, "step": 1965 }, { "epoch": 0.26579016299250524, "grad_norm": 1.38800847530365, "learning_rate": 2.682226792191029e-05, "loss": 0.056134700775146484, "step": 1966 }, { "epoch": 0.26592535636127046, "grad_norm": 2.509615182876587, "learning_rate": 2.681805407103621e-05, "loss": 0.08689026534557343, "step": 1967 }, { "epoch": 0.26606054973003573, "grad_norm": 0.551263153553009, "learning_rate": 2.6813837759580253e-05, "loss": 0.05793978273868561, "step": 1968 }, { "epoch": 0.266195743098801, "grad_norm": 0.6268625855445862, "learning_rate": 2.6809618988420274e-05, "loss": 0.06955307722091675, "step": 1969 }, { "epoch": 0.2663309364675663, "grad_norm": 0.3579012453556061, "learning_rate": 2.6805397758434647e-05, "loss": 0.05733288824558258, "step": 1970 }, { "epoch": 0.26646612983633156, "grad_norm": 0.539228618144989, "learning_rate": 2.6801174070502248e-05, "loss": 0.05921357870101929, "step": 1971 }, { "epoch": 0.2666013232050968, "grad_norm": 1.1919562816619873, "learning_rate": 2.679694792550248e-05, "loss": 0.05999784544110298, "step": 1972 }, { "epoch": 0.26673651657386205, "grad_norm": 1.372760534286499, "learning_rate": 2.6792719324315248e-05, "loss": 0.049474261701107025, "step": 1973 }, { "epoch": 0.2668717099426273, "grad_norm": 0.9844462275505066, "learning_rate": 2.678848826782096e-05, "loss": 0.07781863212585449, "step": 1974 }, { "epoch": 0.2670069033113926, "grad_norm": 0.37535110116004944, "learning_rate": 2.678425475690055e-05, "loss": 0.04978233575820923, "step": 1975 }, { "epoch": 0.2671420966801578, "grad_norm": 0.5681155323982239, "learning_rate": 2.6780018792435464e-05, "loss": 0.05559802055358887, "step": 1976 }, { "epoch": 0.2672772900489231, "grad_norm": 1.2379475831985474, "learning_rate": 2.6775780375307645e-05, "loss": 0.05596546828746796, "step": 1977 }, { "epoch": 0.26741248341768836, "grad_norm": 0.3967132270336151, "learning_rate": 2.6771539506399555e-05, "loss": 0.04318176209926605, "step": 1978 }, { "epoch": 0.26754767678645364, "grad_norm": 0.40085673332214355, "learning_rate": 2.6767296186594165e-05, "loss": 0.05271802097558975, "step": 1979 }, { "epoch": 0.2676828701552189, "grad_norm": 0.7064202427864075, "learning_rate": 2.676305041677496e-05, "loss": 0.04608451575040817, "step": 1980 }, { "epoch": 0.26781806352398413, "grad_norm": 0.3746558427810669, "learning_rate": 2.675880219782593e-05, "loss": 0.06824222207069397, "step": 1981 }, { "epoch": 0.2679532568927494, "grad_norm": 0.364446759223938, "learning_rate": 2.6754551530631575e-05, "loss": 0.037284694612026215, "step": 1982 }, { "epoch": 0.2680884502615147, "grad_norm": 0.26884880661964417, "learning_rate": 2.6750298416076907e-05, "loss": 0.03772097826004028, "step": 1983 }, { "epoch": 0.26822364363027995, "grad_norm": 1.5052671432495117, "learning_rate": 2.674604285504745e-05, "loss": 0.07238410413265228, "step": 1984 }, { "epoch": 0.26835883699904517, "grad_norm": 0.869867742061615, "learning_rate": 2.6741784848429235e-05, "loss": 0.05454730987548828, "step": 1985 }, { "epoch": 0.26849403036781044, "grad_norm": 1.318238377571106, "learning_rate": 2.67375243971088e-05, "loss": 0.06269331276416779, "step": 1986 }, { "epoch": 0.2686292237365757, "grad_norm": 0.535857081413269, "learning_rate": 2.6733261501973192e-05, "loss": 0.060758545994758606, "step": 1987 }, { "epoch": 0.268764417105341, "grad_norm": 1.0873723030090332, "learning_rate": 2.672899616390997e-05, "loss": 0.06212398409843445, "step": 1988 }, { "epoch": 0.26889961047410627, "grad_norm": 0.9297671914100647, "learning_rate": 2.67247283838072e-05, "loss": 0.07655810564756393, "step": 1989 }, { "epoch": 0.2690348038428715, "grad_norm": 0.6478985548019409, "learning_rate": 2.6720458162553457e-05, "loss": 0.04838288575410843, "step": 1990 }, { "epoch": 0.26916999721163676, "grad_norm": 1.0387420654296875, "learning_rate": 2.6716185501037822e-05, "loss": 0.06883057951927185, "step": 1991 }, { "epoch": 0.26930519058040203, "grad_norm": 0.5580405592918396, "learning_rate": 2.671191040014989e-05, "loss": 0.046358756721019745, "step": 1992 }, { "epoch": 0.2694403839491673, "grad_norm": 1.4209740161895752, "learning_rate": 2.6707632860779756e-05, "loss": 0.09473806619644165, "step": 1993 }, { "epoch": 0.2695755773179326, "grad_norm": 0.5205309391021729, "learning_rate": 2.6703352883818024e-05, "loss": 0.043007511645555496, "step": 1994 }, { "epoch": 0.2697107706866978, "grad_norm": 0.38811278343200684, "learning_rate": 2.6699070470155816e-05, "loss": 0.04337712749838829, "step": 1995 }, { "epoch": 0.2698459640554631, "grad_norm": 0.45077791810035706, "learning_rate": 2.669478562068475e-05, "loss": 0.07259446382522583, "step": 1996 }, { "epoch": 0.26998115742422835, "grad_norm": 0.7099560499191284, "learning_rate": 2.6690498336296955e-05, "loss": 0.06714433431625366, "step": 1997 }, { "epoch": 0.2701163507929936, "grad_norm": 1.017167568206787, "learning_rate": 2.6686208617885057e-05, "loss": 0.03857170045375824, "step": 1998 }, { "epoch": 0.27025154416175884, "grad_norm": 1.6143343448638916, "learning_rate": 2.668191646634221e-05, "loss": 0.06279931217432022, "step": 1999 }, { "epoch": 0.2703867375305241, "grad_norm": 0.24972137808799744, "learning_rate": 2.667762188256206e-05, "loss": 0.040912751108407974, "step": 2000 }, { "epoch": 0.2705219308992894, "grad_norm": 0.7155330777168274, "learning_rate": 2.6673324867438764e-05, "loss": 0.05724817514419556, "step": 2001 }, { "epoch": 0.27065712426805466, "grad_norm": 1.0139793157577515, "learning_rate": 2.666902542186698e-05, "loss": 0.07184246182441711, "step": 2002 }, { "epoch": 0.27079231763681993, "grad_norm": 0.6095170378684998, "learning_rate": 2.666472354674187e-05, "loss": 0.0645485520362854, "step": 2003 }, { "epoch": 0.27092751100558515, "grad_norm": 1.3737459182739258, "learning_rate": 2.666041924295912e-05, "loss": 0.05256572365760803, "step": 2004 }, { "epoch": 0.2710627043743504, "grad_norm": 0.966224730014801, "learning_rate": 2.6656112511414902e-05, "loss": 0.06350427865982056, "step": 2005 }, { "epoch": 0.2711978977431157, "grad_norm": 2.5293614864349365, "learning_rate": 2.6651803353005896e-05, "loss": 0.06370405107736588, "step": 2006 }, { "epoch": 0.271333091111881, "grad_norm": 0.6755800247192383, "learning_rate": 2.66474917686293e-05, "loss": 0.06628291308879852, "step": 2007 }, { "epoch": 0.27146828448064625, "grad_norm": 0.6063771843910217, "learning_rate": 2.664317775918281e-05, "loss": 0.05570520460605621, "step": 2008 }, { "epoch": 0.27160347784941147, "grad_norm": 0.4344748854637146, "learning_rate": 2.6638861325564615e-05, "loss": 0.04128864407539368, "step": 2009 }, { "epoch": 0.27173867121817674, "grad_norm": 0.7044264078140259, "learning_rate": 2.6634542468673432e-05, "loss": 0.0394858792424202, "step": 2010 }, { "epoch": 0.271873864586942, "grad_norm": 0.4111612141132355, "learning_rate": 2.663022118940846e-05, "loss": 0.053647249937057495, "step": 2011 }, { "epoch": 0.2720090579557073, "grad_norm": 1.2196134328842163, "learning_rate": 2.662589748866942e-05, "loss": 0.05058395862579346, "step": 2012 }, { "epoch": 0.2721442513244725, "grad_norm": 0.36669281125068665, "learning_rate": 2.6621571367356522e-05, "loss": 0.048945896327495575, "step": 2013 }, { "epoch": 0.2722794446932378, "grad_norm": 0.4089919328689575, "learning_rate": 2.6617242826370495e-05, "loss": 0.05728524923324585, "step": 2014 }, { "epoch": 0.27241463806200306, "grad_norm": 0.5235752463340759, "learning_rate": 2.661291186661256e-05, "loss": 0.0560784712433815, "step": 2015 }, { "epoch": 0.27254983143076833, "grad_norm": 0.6754398941993713, "learning_rate": 2.6608578488984444e-05, "loss": 0.0594220831990242, "step": 2016 }, { "epoch": 0.2726850247995336, "grad_norm": 0.25663843750953674, "learning_rate": 2.6604242694388388e-05, "loss": 0.042109452188014984, "step": 2017 }, { "epoch": 0.2728202181682988, "grad_norm": 0.6239408254623413, "learning_rate": 2.6599904483727116e-05, "loss": 0.05000513046979904, "step": 2018 }, { "epoch": 0.2729554115370641, "grad_norm": 0.9525699615478516, "learning_rate": 2.6595563857903872e-05, "loss": 0.04286642372608185, "step": 2019 }, { "epoch": 0.27309060490582937, "grad_norm": 0.6130484342575073, "learning_rate": 2.6591220817822405e-05, "loss": 0.07682472467422485, "step": 2020 }, { "epoch": 0.27322579827459464, "grad_norm": 0.7622012495994568, "learning_rate": 2.658687536438694e-05, "loss": 0.07658661901950836, "step": 2021 }, { "epoch": 0.2733609916433599, "grad_norm": 1.308983325958252, "learning_rate": 2.6582527498502243e-05, "loss": 0.05244206637144089, "step": 2022 }, { "epoch": 0.27349618501212514, "grad_norm": 0.9608461260795593, "learning_rate": 2.6578177221073556e-05, "loss": 0.06910043954849243, "step": 2023 }, { "epoch": 0.2736313783808904, "grad_norm": 1.6253361701965332, "learning_rate": 2.6573824533006628e-05, "loss": 0.06038973480463028, "step": 2024 }, { "epoch": 0.2737665717496557, "grad_norm": 0.36416172981262207, "learning_rate": 2.6569469435207712e-05, "loss": 0.06410835683345795, "step": 2025 }, { "epoch": 0.27390176511842096, "grad_norm": 1.387352705001831, "learning_rate": 2.656511192858356e-05, "loss": 0.06703147292137146, "step": 2026 }, { "epoch": 0.2740369584871862, "grad_norm": 1.038543939590454, "learning_rate": 2.6560752014041438e-05, "loss": 0.063834547996521, "step": 2027 }, { "epoch": 0.27417215185595145, "grad_norm": 1.0874626636505127, "learning_rate": 2.6556389692489098e-05, "loss": 0.05736267566680908, "step": 2028 }, { "epoch": 0.2743073452247167, "grad_norm": 0.49104976654052734, "learning_rate": 2.6552024964834795e-05, "loss": 0.06279715895652771, "step": 2029 }, { "epoch": 0.274442538593482, "grad_norm": 0.8350416421890259, "learning_rate": 2.6547657831987286e-05, "loss": 0.07277926057577133, "step": 2030 }, { "epoch": 0.2745777319622473, "grad_norm": 1.1978554725646973, "learning_rate": 2.6543288294855843e-05, "loss": 0.06596343219280243, "step": 2031 }, { "epoch": 0.2747129253310125, "grad_norm": 0.3906417787075043, "learning_rate": 2.653891635435022e-05, "loss": 0.0609915629029274, "step": 2032 }, { "epoch": 0.27484811869977777, "grad_norm": 1.3309073448181152, "learning_rate": 2.653454201138068e-05, "loss": 0.06403705477714539, "step": 2033 }, { "epoch": 0.27498331206854304, "grad_norm": 0.4766184985637665, "learning_rate": 2.653016526685798e-05, "loss": 0.06566610932350159, "step": 2034 }, { "epoch": 0.2751185054373083, "grad_norm": 0.593104362487793, "learning_rate": 2.6525786121693387e-05, "loss": 0.05452541261911392, "step": 2035 }, { "epoch": 0.2752536988060736, "grad_norm": 1.4176524877548218, "learning_rate": 2.652140457679866e-05, "loss": 0.08625222742557526, "step": 2036 }, { "epoch": 0.2753888921748388, "grad_norm": 0.2285621613264084, "learning_rate": 2.6517020633086064e-05, "loss": 0.044230446219444275, "step": 2037 }, { "epoch": 0.2755240855436041, "grad_norm": 1.102249264717102, "learning_rate": 2.6512634291468354e-05, "loss": 0.05788937211036682, "step": 2038 }, { "epoch": 0.27565927891236935, "grad_norm": 1.042309045791626, "learning_rate": 2.6508245552858792e-05, "loss": 0.07709316909313202, "step": 2039 }, { "epoch": 0.27579447228113463, "grad_norm": 0.4332238733768463, "learning_rate": 2.6503854418171133e-05, "loss": 0.053490445017814636, "step": 2040 }, { "epoch": 0.27592966564989985, "grad_norm": 0.5355224013328552, "learning_rate": 2.6499460888319644e-05, "loss": 0.06303292512893677, "step": 2041 }, { "epoch": 0.2760648590186651, "grad_norm": 0.6453908681869507, "learning_rate": 2.6495064964219073e-05, "loss": 0.06252860277891159, "step": 2042 }, { "epoch": 0.2762000523874304, "grad_norm": 0.6886420845985413, "learning_rate": 2.649066664678467e-05, "loss": 0.06643763929605484, "step": 2043 }, { "epoch": 0.27633524575619567, "grad_norm": 0.5869872570037842, "learning_rate": 2.6486265936932205e-05, "loss": 0.07961654663085938, "step": 2044 }, { "epoch": 0.27647043912496094, "grad_norm": 0.6214112043380737, "learning_rate": 2.6481862835577915e-05, "loss": 0.05826053023338318, "step": 2045 }, { "epoch": 0.27660563249372616, "grad_norm": 0.9288861155509949, "learning_rate": 2.6477457343638557e-05, "loss": 0.05841217562556267, "step": 2046 }, { "epoch": 0.27674082586249144, "grad_norm": 1.3822966814041138, "learning_rate": 2.647304946203137e-05, "loss": 0.06501396745443344, "step": 2047 }, { "epoch": 0.2768760192312567, "grad_norm": 0.4394715428352356, "learning_rate": 2.6468639191674106e-05, "loss": 0.0452602282166481, "step": 2048 }, { "epoch": 0.277011212600022, "grad_norm": 0.38890260457992554, "learning_rate": 2.6464226533485007e-05, "loss": 0.06317457556724548, "step": 2049 }, { "epoch": 0.27714640596878726, "grad_norm": 0.9562539458274841, "learning_rate": 2.6459811488382806e-05, "loss": 0.0622381754219532, "step": 2050 }, { "epoch": 0.2772815993375525, "grad_norm": 0.7285279631614685, "learning_rate": 2.645539405728674e-05, "loss": 0.06485778093338013, "step": 2051 }, { "epoch": 0.27741679270631775, "grad_norm": 0.5501190423965454, "learning_rate": 2.6450974241116545e-05, "loss": 0.05617007613182068, "step": 2052 }, { "epoch": 0.277551986075083, "grad_norm": 0.7587544322013855, "learning_rate": 2.644655204079245e-05, "loss": 0.06515318900346756, "step": 2053 }, { "epoch": 0.2776871794438483, "grad_norm": 0.7682645916938782, "learning_rate": 2.6442127457235177e-05, "loss": 0.054640933871269226, "step": 2054 }, { "epoch": 0.2778223728126135, "grad_norm": 1.141964316368103, "learning_rate": 2.6437700491365957e-05, "loss": 0.06025565043091774, "step": 2055 }, { "epoch": 0.2779575661813788, "grad_norm": 0.38962647318840027, "learning_rate": 2.6433271144106495e-05, "loss": 0.05071568489074707, "step": 2056 }, { "epoch": 0.27809275955014406, "grad_norm": 0.4076961576938629, "learning_rate": 2.6428839416379015e-05, "loss": 0.06274399906396866, "step": 2057 }, { "epoch": 0.27822795291890934, "grad_norm": 0.4470677375793457, "learning_rate": 2.642440530910622e-05, "loss": 0.055875495076179504, "step": 2058 }, { "epoch": 0.2783631462876746, "grad_norm": 1.213831901550293, "learning_rate": 2.6419968823211318e-05, "loss": 0.07357648015022278, "step": 2059 }, { "epoch": 0.27849833965643983, "grad_norm": 0.6747694611549377, "learning_rate": 2.641552995961801e-05, "loss": 0.06557597219944, "step": 2060 }, { "epoch": 0.2786335330252051, "grad_norm": 0.438777357339859, "learning_rate": 2.6411088719250484e-05, "loss": 0.07125673443078995, "step": 2061 }, { "epoch": 0.2787687263939704, "grad_norm": 0.6402414441108704, "learning_rate": 2.6406645103033442e-05, "loss": 0.05008098483085632, "step": 2062 }, { "epoch": 0.27890391976273565, "grad_norm": 0.47974082827568054, "learning_rate": 2.640219911189206e-05, "loss": 0.052809394896030426, "step": 2063 }, { "epoch": 0.2790391131315009, "grad_norm": 1.3230500221252441, "learning_rate": 2.6397750746752015e-05, "loss": 0.06357279419898987, "step": 2064 }, { "epoch": 0.27917430650026615, "grad_norm": 0.6687652468681335, "learning_rate": 2.6393300008539488e-05, "loss": 0.07994140684604645, "step": 2065 }, { "epoch": 0.2793094998690314, "grad_norm": 1.1518137454986572, "learning_rate": 2.6388846898181143e-05, "loss": 0.06600034236907959, "step": 2066 }, { "epoch": 0.2794446932377967, "grad_norm": 0.45129120349884033, "learning_rate": 2.6384391416604142e-05, "loss": 0.07186335325241089, "step": 2067 }, { "epoch": 0.27957988660656197, "grad_norm": 0.560322105884552, "learning_rate": 2.6379933564736136e-05, "loss": 0.06591766327619553, "step": 2068 }, { "epoch": 0.2797150799753272, "grad_norm": 0.24667029082775116, "learning_rate": 2.637547334350528e-05, "loss": 0.03618912398815155, "step": 2069 }, { "epoch": 0.27985027334409246, "grad_norm": 0.8697183728218079, "learning_rate": 2.637101075384021e-05, "loss": 0.05519033968448639, "step": 2070 }, { "epoch": 0.27998546671285773, "grad_norm": 0.7805359363555908, "learning_rate": 2.636654579667006e-05, "loss": 0.06149636209011078, "step": 2071 }, { "epoch": 0.280120660081623, "grad_norm": 1.6960641145706177, "learning_rate": 2.6362078472924467e-05, "loss": 0.08044280111789703, "step": 2072 }, { "epoch": 0.2802558534503883, "grad_norm": 0.3118131458759308, "learning_rate": 2.6357608783533545e-05, "loss": 0.04548192769289017, "step": 2073 }, { "epoch": 0.2803910468191535, "grad_norm": 0.6053363680839539, "learning_rate": 2.6353136729427907e-05, "loss": 0.0536673367023468, "step": 2074 }, { "epoch": 0.2805262401879188, "grad_norm": 0.5909741520881653, "learning_rate": 2.6348662311538657e-05, "loss": 0.06355074048042297, "step": 2075 }, { "epoch": 0.28066143355668405, "grad_norm": 0.47632163763046265, "learning_rate": 2.6344185530797398e-05, "loss": 0.06311307847499847, "step": 2076 }, { "epoch": 0.2807966269254493, "grad_norm": 1.1889612674713135, "learning_rate": 2.633970638813622e-05, "loss": 0.05792977288365364, "step": 2077 }, { "epoch": 0.2809318202942146, "grad_norm": 1.0011950731277466, "learning_rate": 2.6335224884487698e-05, "loss": 0.04652509093284607, "step": 2078 }, { "epoch": 0.2810670136629798, "grad_norm": 0.3565572202205658, "learning_rate": 2.6330741020784905e-05, "loss": 0.06233729422092438, "step": 2079 }, { "epoch": 0.2812022070317451, "grad_norm": 1.7147787809371948, "learning_rate": 2.6326254797961415e-05, "loss": 0.06864406913518906, "step": 2080 }, { "epoch": 0.28133740040051036, "grad_norm": 1.502392053604126, "learning_rate": 2.6321766216951273e-05, "loss": 0.06973086297512054, "step": 2081 }, { "epoch": 0.28147259376927564, "grad_norm": 0.4112927317619324, "learning_rate": 2.631727527868903e-05, "loss": 0.05096091330051422, "step": 2082 }, { "epoch": 0.28160778713804085, "grad_norm": 0.5707592368125916, "learning_rate": 2.6312781984109727e-05, "loss": 0.05502316355705261, "step": 2083 }, { "epoch": 0.28174298050680613, "grad_norm": 0.5201780200004578, "learning_rate": 2.6308286334148882e-05, "loss": 0.06917780637741089, "step": 2084 }, { "epoch": 0.2818781738755714, "grad_norm": 1.4294310808181763, "learning_rate": 2.630378832974252e-05, "loss": 0.06621050834655762, "step": 2085 }, { "epoch": 0.2820133672443367, "grad_norm": 0.5480486750602722, "learning_rate": 2.6299287971827154e-05, "loss": 0.046482332050800323, "step": 2086 }, { "epoch": 0.28214856061310195, "grad_norm": 0.7295203804969788, "learning_rate": 2.629478526133977e-05, "loss": 0.046879373490810394, "step": 2087 }, { "epoch": 0.28228375398186717, "grad_norm": 1.3096091747283936, "learning_rate": 2.6290280199217867e-05, "loss": 0.07276512682437897, "step": 2088 }, { "epoch": 0.28241894735063244, "grad_norm": 0.5544393062591553, "learning_rate": 2.6285772786399424e-05, "loss": 0.07403305172920227, "step": 2089 }, { "epoch": 0.2825541407193977, "grad_norm": 1.1611231565475464, "learning_rate": 2.6281263023822894e-05, "loss": 0.05479201301932335, "step": 2090 }, { "epoch": 0.282689334088163, "grad_norm": 0.589711606502533, "learning_rate": 2.627675091242725e-05, "loss": 0.07190374284982681, "step": 2091 }, { "epoch": 0.28282452745692827, "grad_norm": 0.37469837069511414, "learning_rate": 2.627223645315193e-05, "loss": 0.04291430488228798, "step": 2092 }, { "epoch": 0.2829597208256935, "grad_norm": 0.579419732093811, "learning_rate": 2.6267719646936868e-05, "loss": 0.05952748656272888, "step": 2093 }, { "epoch": 0.28309491419445876, "grad_norm": 1.2522783279418945, "learning_rate": 2.626320049472249e-05, "loss": 0.055046483874320984, "step": 2094 }, { "epoch": 0.28323010756322403, "grad_norm": 0.848613440990448, "learning_rate": 2.6258678997449705e-05, "loss": 0.0651765838265419, "step": 2095 }, { "epoch": 0.2833653009319893, "grad_norm": 1.2239712476730347, "learning_rate": 2.6254155156059912e-05, "loss": 0.05742935836315155, "step": 2096 }, { "epoch": 0.2835004943007545, "grad_norm": 0.4563262462615967, "learning_rate": 2.6249628971495006e-05, "loss": 0.061477482318878174, "step": 2097 }, { "epoch": 0.2836356876695198, "grad_norm": 0.484327107667923, "learning_rate": 2.6245100444697353e-05, "loss": 0.05643342435359955, "step": 2098 }, { "epoch": 0.28377088103828507, "grad_norm": 0.6670452356338501, "learning_rate": 2.6240569576609824e-05, "loss": 0.07622776925563812, "step": 2099 }, { "epoch": 0.28390607440705035, "grad_norm": 0.6406234502792358, "learning_rate": 2.623603636817577e-05, "loss": 0.08094936609268188, "step": 2100 }, { "epoch": 0.2840412677758156, "grad_norm": 0.917285144329071, "learning_rate": 2.6231500820339024e-05, "loss": 0.030194712802767754, "step": 2101 }, { "epoch": 0.28417646114458084, "grad_norm": 0.886455237865448, "learning_rate": 2.6226962934043913e-05, "loss": 0.04499289393424988, "step": 2102 }, { "epoch": 0.2843116545133461, "grad_norm": 0.5797407627105713, "learning_rate": 2.622242271023525e-05, "loss": 0.08102905750274658, "step": 2103 }, { "epoch": 0.2844468478821114, "grad_norm": 1.250813364982605, "learning_rate": 2.6217880149858333e-05, "loss": 0.05921363830566406, "step": 2104 }, { "epoch": 0.28458204125087666, "grad_norm": 0.9572173953056335, "learning_rate": 2.621333525385895e-05, "loss": 0.0761459469795227, "step": 2105 }, { "epoch": 0.2847172346196419, "grad_norm": 0.5949355363845825, "learning_rate": 2.6208788023183366e-05, "loss": 0.0530424565076828, "step": 2106 }, { "epoch": 0.28485242798840715, "grad_norm": 1.1584101915359497, "learning_rate": 2.6204238458778346e-05, "loss": 0.055029988288879395, "step": 2107 }, { "epoch": 0.2849876213571724, "grad_norm": 0.647292971611023, "learning_rate": 2.619968656159113e-05, "loss": 0.05487114191055298, "step": 2108 }, { "epoch": 0.2851228147259377, "grad_norm": 0.7586051821708679, "learning_rate": 2.6195132332569445e-05, "loss": 0.053023576736450195, "step": 2109 }, { "epoch": 0.285258008094703, "grad_norm": 1.4620863199234009, "learning_rate": 2.619057577266151e-05, "loss": 0.0534789115190506, "step": 2110 }, { "epoch": 0.2853932014634682, "grad_norm": 1.3903439044952393, "learning_rate": 2.6186016882816027e-05, "loss": 0.05233687534928322, "step": 2111 }, { "epoch": 0.28552839483223347, "grad_norm": 1.9320210218429565, "learning_rate": 2.6181455663982175e-05, "loss": 0.058652929961681366, "step": 2112 }, { "epoch": 0.28566358820099874, "grad_norm": 1.721946120262146, "learning_rate": 2.6176892117109628e-05, "loss": 0.06058088690042496, "step": 2113 }, { "epoch": 0.285798781569764, "grad_norm": 1.8031491041183472, "learning_rate": 2.617232624314854e-05, "loss": 0.061432018876075745, "step": 2114 }, { "epoch": 0.2859339749385293, "grad_norm": 1.1871031522750854, "learning_rate": 2.616775804304955e-05, "loss": 0.06383770704269409, "step": 2115 }, { "epoch": 0.2860691683072945, "grad_norm": 1.7513153553009033, "learning_rate": 2.616318751776378e-05, "loss": 0.05388442799448967, "step": 2116 }, { "epoch": 0.2862043616760598, "grad_norm": 0.41766881942749023, "learning_rate": 2.615861466824284e-05, "loss": 0.06770746409893036, "step": 2117 }, { "epoch": 0.28633955504482506, "grad_norm": 0.5413923859596252, "learning_rate": 2.6154039495438825e-05, "loss": 0.06088829040527344, "step": 2118 }, { "epoch": 0.28647474841359033, "grad_norm": 1.5705360174179077, "learning_rate": 2.6149462000304302e-05, "loss": 0.07825294882059097, "step": 2119 }, { "epoch": 0.28660994178235555, "grad_norm": 1.2637450695037842, "learning_rate": 2.6144882183792335e-05, "loss": 0.04790281131863594, "step": 2120 }, { "epoch": 0.2867451351511208, "grad_norm": 1.96914803981781, "learning_rate": 2.6140300046856468e-05, "loss": 0.06698126345872879, "step": 2121 }, { "epoch": 0.2868803285198861, "grad_norm": 2.1521503925323486, "learning_rate": 2.6135715590450722e-05, "loss": 0.06972119212150574, "step": 2122 }, { "epoch": 0.28701552188865137, "grad_norm": 2.6057887077331543, "learning_rate": 2.6131128815529608e-05, "loss": 0.07070231437683105, "step": 2123 }, { "epoch": 0.28715071525741664, "grad_norm": 0.8588773608207703, "learning_rate": 2.6126539723048115e-05, "loss": 0.0656328797340393, "step": 2124 }, { "epoch": 0.28728590862618186, "grad_norm": 0.9165468811988831, "learning_rate": 2.612194831396172e-05, "loss": 0.04477657750248909, "step": 2125 }, { "epoch": 0.28742110199494714, "grad_norm": 1.0606473684310913, "learning_rate": 2.611735458922637e-05, "loss": 0.047616153955459595, "step": 2126 }, { "epoch": 0.2875562953637124, "grad_norm": 1.8390511274337769, "learning_rate": 2.6112758549798515e-05, "loss": 0.0676884651184082, "step": 2127 }, { "epoch": 0.2876914887324777, "grad_norm": 1.0135793685913086, "learning_rate": 2.610816019663507e-05, "loss": 0.07285168766975403, "step": 2128 }, { "epoch": 0.28782668210124296, "grad_norm": 0.8273748159408569, "learning_rate": 2.6103559530693436e-05, "loss": 0.0653875470161438, "step": 2129 }, { "epoch": 0.2879618754700082, "grad_norm": 1.3856375217437744, "learning_rate": 2.6098956552931495e-05, "loss": 0.07490475475788116, "step": 2130 }, { "epoch": 0.28809706883877345, "grad_norm": 0.3101572096347809, "learning_rate": 2.6094351264307613e-05, "loss": 0.0505615696310997, "step": 2131 }, { "epoch": 0.2882322622075387, "grad_norm": 0.6453413367271423, "learning_rate": 2.6089743665780635e-05, "loss": 0.04547475278377533, "step": 2132 }, { "epoch": 0.288367455576304, "grad_norm": 0.412611186504364, "learning_rate": 2.6085133758309887e-05, "loss": 0.0630529522895813, "step": 2133 }, { "epoch": 0.2885026489450692, "grad_norm": 0.6633384823799133, "learning_rate": 2.6080521542855182e-05, "loss": 0.06623699516057968, "step": 2134 }, { "epoch": 0.2886378423138345, "grad_norm": 0.5063520073890686, "learning_rate": 2.60759070203768e-05, "loss": 0.03371000289916992, "step": 2135 }, { "epoch": 0.28877303568259977, "grad_norm": 0.6748740077018738, "learning_rate": 2.607129019183551e-05, "loss": 0.059692561626434326, "step": 2136 }, { "epoch": 0.28890822905136504, "grad_norm": 0.8152740001678467, "learning_rate": 2.6066671058192566e-05, "loss": 0.07377050817012787, "step": 2137 }, { "epoch": 0.2890434224201303, "grad_norm": 0.45241445302963257, "learning_rate": 2.606204962040969e-05, "loss": 0.05911744385957718, "step": 2138 }, { "epoch": 0.28917861578889553, "grad_norm": 0.9087333679199219, "learning_rate": 2.6057425879449095e-05, "loss": 0.06385991722345352, "step": 2139 }, { "epoch": 0.2893138091576608, "grad_norm": 1.0540632009506226, "learning_rate": 2.605279983627347e-05, "loss": 0.060062795877456665, "step": 2140 }, { "epoch": 0.2894490025264261, "grad_norm": 0.36895206570625305, "learning_rate": 2.6048171491845974e-05, "loss": 0.05678999423980713, "step": 2141 }, { "epoch": 0.28958419589519135, "grad_norm": 1.6579879522323608, "learning_rate": 2.604354084713026e-05, "loss": 0.055744290351867676, "step": 2142 }, { "epoch": 0.28971938926395663, "grad_norm": 0.5126224756240845, "learning_rate": 2.6038907903090446e-05, "loss": 0.05784796550869942, "step": 2143 }, { "epoch": 0.28985458263272185, "grad_norm": 0.527445912361145, "learning_rate": 2.6034272660691143e-05, "loss": 0.07844924926757812, "step": 2144 }, { "epoch": 0.2899897760014871, "grad_norm": 0.43016624450683594, "learning_rate": 2.6029635120897434e-05, "loss": 0.06831195950508118, "step": 2145 }, { "epoch": 0.2901249693702524, "grad_norm": 1.5274966955184937, "learning_rate": 2.6024995284674867e-05, "loss": 0.06620031595230103, "step": 2146 }, { "epoch": 0.29026016273901767, "grad_norm": 0.7344223856925964, "learning_rate": 2.6020353152989496e-05, "loss": 0.0732392966747284, "step": 2147 }, { "epoch": 0.2903953561077829, "grad_norm": 0.8761911988258362, "learning_rate": 2.601570872680783e-05, "loss": 0.052635855972766876, "step": 2148 }, { "epoch": 0.29053054947654816, "grad_norm": 0.7074698805809021, "learning_rate": 2.6011062007096857e-05, "loss": 0.05327111482620239, "step": 2149 }, { "epoch": 0.29066574284531344, "grad_norm": 0.50385981798172, "learning_rate": 2.6006412994824067e-05, "loss": 0.05209881067276001, "step": 2150 }, { "epoch": 0.2908009362140787, "grad_norm": 0.3549734652042389, "learning_rate": 2.6001761690957388e-05, "loss": 0.042777977883815765, "step": 2151 }, { "epoch": 0.290936129582844, "grad_norm": 0.6660646200180054, "learning_rate": 2.5997108096465263e-05, "loss": 0.03807990252971649, "step": 2152 }, { "epoch": 0.2910713229516092, "grad_norm": 0.4637339413166046, "learning_rate": 2.599245221231659e-05, "loss": 0.04703313857316971, "step": 2153 }, { "epoch": 0.2912065163203745, "grad_norm": 0.6196834444999695, "learning_rate": 2.5987794039480743e-05, "loss": 0.06262943148612976, "step": 2154 }, { "epoch": 0.29134170968913975, "grad_norm": 0.9010622501373291, "learning_rate": 2.5983133578927584e-05, "loss": 0.05753566324710846, "step": 2155 }, { "epoch": 0.291476903057905, "grad_norm": 0.5884128212928772, "learning_rate": 2.5978470831627444e-05, "loss": 0.07157447934150696, "step": 2156 }, { "epoch": 0.2916120964266703, "grad_norm": 0.505321741104126, "learning_rate": 2.597380579855113e-05, "loss": 0.06195785105228424, "step": 2157 }, { "epoch": 0.2917472897954355, "grad_norm": 0.5729936957359314, "learning_rate": 2.5969138480669936e-05, "loss": 0.07561683654785156, "step": 2158 }, { "epoch": 0.2918824831642008, "grad_norm": 0.7477554082870483, "learning_rate": 2.5964468878955614e-05, "loss": 0.06464653462171555, "step": 2159 }, { "epoch": 0.29201767653296606, "grad_norm": 0.8106328248977661, "learning_rate": 2.5959796994380397e-05, "loss": 0.061203569173812866, "step": 2160 }, { "epoch": 0.29215286990173134, "grad_norm": 0.34237033128738403, "learning_rate": 2.5955122827917004e-05, "loss": 0.05926303565502167, "step": 2161 }, { "epoch": 0.29228806327049656, "grad_norm": 0.291145920753479, "learning_rate": 2.595044638053862e-05, "loss": 0.042369719594717026, "step": 2162 }, { "epoch": 0.29242325663926183, "grad_norm": 0.35732415318489075, "learning_rate": 2.59457676532189e-05, "loss": 0.04472386837005615, "step": 2163 }, { "epoch": 0.2925584500080271, "grad_norm": 0.943020761013031, "learning_rate": 2.594108664693199e-05, "loss": 0.06752137839794159, "step": 2164 }, { "epoch": 0.2926936433767924, "grad_norm": 1.8312469720840454, "learning_rate": 2.5936403362652494e-05, "loss": 0.07746871560811996, "step": 2165 }, { "epoch": 0.29282883674555765, "grad_norm": 0.3293423056602478, "learning_rate": 2.5931717801355497e-05, "loss": 0.07278227061033249, "step": 2166 }, { "epoch": 0.29296403011432287, "grad_norm": 0.3762620985507965, "learning_rate": 2.5927029964016556e-05, "loss": 0.0636572614312172, "step": 2167 }, { "epoch": 0.29309922348308814, "grad_norm": 0.7130229473114014, "learning_rate": 2.592233985161171e-05, "loss": 0.037568364292383194, "step": 2168 }, { "epoch": 0.2932344168518534, "grad_norm": 0.6726861000061035, "learning_rate": 2.5917647465117463e-05, "loss": 0.06736618280410767, "step": 2169 }, { "epoch": 0.2933696102206187, "grad_norm": 0.47206631302833557, "learning_rate": 2.591295280551079e-05, "loss": 0.049211252480745316, "step": 2170 }, { "epoch": 0.29350480358938397, "grad_norm": 0.41759103536605835, "learning_rate": 2.590825587376915e-05, "loss": 0.059932708740234375, "step": 2171 }, { "epoch": 0.2936399969581492, "grad_norm": 0.6596844792366028, "learning_rate": 2.5903556670870464e-05, "loss": 0.0686102956533432, "step": 2172 }, { "epoch": 0.29377519032691446, "grad_norm": 0.7449108362197876, "learning_rate": 2.589885519779314e-05, "loss": 0.06023481488227844, "step": 2173 }, { "epoch": 0.29391038369567973, "grad_norm": 0.3134186565876007, "learning_rate": 2.5894151455516043e-05, "loss": 0.04394517093896866, "step": 2174 }, { "epoch": 0.294045577064445, "grad_norm": 0.9985197186470032, "learning_rate": 2.5889445445018513e-05, "loss": 0.07373246550559998, "step": 2175 }, { "epoch": 0.2941807704332102, "grad_norm": 0.6612058281898499, "learning_rate": 2.5884737167280375e-05, "loss": 0.062397778034210205, "step": 2176 }, { "epoch": 0.2943159638019755, "grad_norm": 0.4066429138183594, "learning_rate": 2.5880026623281914e-05, "loss": 0.07649281620979309, "step": 2177 }, { "epoch": 0.2944511571707408, "grad_norm": 0.7098007202148438, "learning_rate": 2.5875313814003892e-05, "loss": 0.07545536756515503, "step": 2178 }, { "epoch": 0.29458635053950605, "grad_norm": 0.5521203875541687, "learning_rate": 2.587059874042754e-05, "loss": 0.07319119572639465, "step": 2179 }, { "epoch": 0.2947215439082713, "grad_norm": 0.5925273299217224, "learning_rate": 2.5865881403534557e-05, "loss": 0.05126646161079407, "step": 2180 }, { "epoch": 0.29485673727703654, "grad_norm": 0.7179078459739685, "learning_rate": 2.5861161804307124e-05, "loss": 0.07141532003879547, "step": 2181 }, { "epoch": 0.2949919306458018, "grad_norm": 0.39854222536087036, "learning_rate": 2.5856439943727886e-05, "loss": 0.06480929255485535, "step": 2182 }, { "epoch": 0.2951271240145671, "grad_norm": 0.45212194323539734, "learning_rate": 2.5851715822779954e-05, "loss": 0.05433202162384987, "step": 2183 }, { "epoch": 0.29526231738333236, "grad_norm": 1.2842271327972412, "learning_rate": 2.5846989442446926e-05, "loss": 0.06126861274242401, "step": 2184 }, { "epoch": 0.29539751075209764, "grad_norm": 0.5720062255859375, "learning_rate": 2.584226080371285e-05, "loss": 0.07492798566818237, "step": 2185 }, { "epoch": 0.29553270412086285, "grad_norm": 0.706356942653656, "learning_rate": 2.5837529907562258e-05, "loss": 0.0765361487865448, "step": 2186 }, { "epoch": 0.29566789748962813, "grad_norm": 0.5648061037063599, "learning_rate": 2.5832796754980138e-05, "loss": 0.08554080128669739, "step": 2187 }, { "epoch": 0.2958030908583934, "grad_norm": 0.3338322639465332, "learning_rate": 2.5828061346951974e-05, "loss": 0.05611315369606018, "step": 2188 }, { "epoch": 0.2959382842271587, "grad_norm": 0.47631970047950745, "learning_rate": 2.5823323684463693e-05, "loss": 0.06342458724975586, "step": 2189 }, { "epoch": 0.2960734775959239, "grad_norm": 0.44482526183128357, "learning_rate": 2.5818583768501708e-05, "loss": 0.050715237855911255, "step": 2190 }, { "epoch": 0.29620867096468917, "grad_norm": 1.0387810468673706, "learning_rate": 2.5813841600052887e-05, "loss": 0.0709274411201477, "step": 2191 }, { "epoch": 0.29634386433345444, "grad_norm": 0.5544099807739258, "learning_rate": 2.580909718010458e-05, "loss": 0.06838691234588623, "step": 2192 }, { "epoch": 0.2964790577022197, "grad_norm": 0.7458712458610535, "learning_rate": 2.58043505096446e-05, "loss": 0.08356204628944397, "step": 2193 }, { "epoch": 0.296614251070985, "grad_norm": 0.35629406571388245, "learning_rate": 2.5799601589661223e-05, "loss": 0.04793331027030945, "step": 2194 }, { "epoch": 0.2967494444397502, "grad_norm": 1.2872802019119263, "learning_rate": 2.579485042114321e-05, "loss": 0.05886256694793701, "step": 2195 }, { "epoch": 0.2968846378085155, "grad_norm": 0.4057595431804657, "learning_rate": 2.5790097005079766e-05, "loss": 0.05406687408685684, "step": 2196 }, { "epoch": 0.29701983117728076, "grad_norm": 0.6697300672531128, "learning_rate": 2.5785341342460595e-05, "loss": 0.07221139967441559, "step": 2197 }, { "epoch": 0.29715502454604603, "grad_norm": 0.6518707275390625, "learning_rate": 2.5780583434275837e-05, "loss": 0.058696746826171875, "step": 2198 }, { "epoch": 0.2972902179148113, "grad_norm": 0.5879749655723572, "learning_rate": 2.577582328151612e-05, "loss": 0.05843890458345413, "step": 2199 }, { "epoch": 0.2974254112835765, "grad_norm": 0.8899679780006409, "learning_rate": 2.5771060885172532e-05, "loss": 0.06020255386829376, "step": 2200 }, { "epoch": 0.2975606046523418, "grad_norm": 1.8119174242019653, "learning_rate": 2.5766296246236628e-05, "loss": 0.07687734812498093, "step": 2201 }, { "epoch": 0.29769579802110707, "grad_norm": 0.8819155693054199, "learning_rate": 2.5761529365700437e-05, "loss": 0.04933363199234009, "step": 2202 }, { "epoch": 0.29783099138987235, "grad_norm": 1.332566261291504, "learning_rate": 2.5756760244556445e-05, "loss": 0.0746423602104187, "step": 2203 }, { "epoch": 0.29796618475863756, "grad_norm": 0.31044360995292664, "learning_rate": 2.5751988883797603e-05, "loss": 0.05552416667342186, "step": 2204 }, { "epoch": 0.29810137812740284, "grad_norm": 1.0255037546157837, "learning_rate": 2.574721528441734e-05, "loss": 0.05071808397769928, "step": 2205 }, { "epoch": 0.2982365714961681, "grad_norm": 0.8949851393699646, "learning_rate": 2.5742439447409545e-05, "loss": 0.05579739063978195, "step": 2206 }, { "epoch": 0.2983717648649334, "grad_norm": 0.3716931641101837, "learning_rate": 2.5737661373768568e-05, "loss": 0.06341147422790527, "step": 2207 }, { "epoch": 0.29850695823369866, "grad_norm": 0.523248553276062, "learning_rate": 2.5732881064489237e-05, "loss": 0.04970260709524155, "step": 2208 }, { "epoch": 0.2986421516024639, "grad_norm": 0.4543715715408325, "learning_rate": 2.572809852056683e-05, "loss": 0.04435265064239502, "step": 2209 }, { "epoch": 0.29877734497122915, "grad_norm": 0.36390364170074463, "learning_rate": 2.572331374299711e-05, "loss": 0.060278236865997314, "step": 2210 }, { "epoch": 0.2989125383399944, "grad_norm": 2.006216049194336, "learning_rate": 2.5718526732776276e-05, "loss": 0.06773050129413605, "step": 2211 }, { "epoch": 0.2990477317087597, "grad_norm": 0.7467211484909058, "learning_rate": 2.5713737490901023e-05, "loss": 0.05065278708934784, "step": 2212 }, { "epoch": 0.299182925077525, "grad_norm": 1.6297553777694702, "learning_rate": 2.570894601836849e-05, "loss": 0.07146915793418884, "step": 2213 }, { "epoch": 0.2993181184462902, "grad_norm": 0.7554977536201477, "learning_rate": 2.5704152316176287e-05, "loss": 0.06410757452249527, "step": 2214 }, { "epoch": 0.29945331181505547, "grad_norm": 1.449791669845581, "learning_rate": 2.5699356385322487e-05, "loss": 0.06757006049156189, "step": 2215 }, { "epoch": 0.29958850518382074, "grad_norm": 0.5825949311256409, "learning_rate": 2.5694558226805643e-05, "loss": 0.06181889772415161, "step": 2216 }, { "epoch": 0.299723698552586, "grad_norm": 0.8457855582237244, "learning_rate": 2.568975784162474e-05, "loss": 0.04547801613807678, "step": 2217 }, { "epoch": 0.29985889192135123, "grad_norm": 0.6279097199440002, "learning_rate": 2.5684955230779245e-05, "loss": 0.05461207777261734, "step": 2218 }, { "epoch": 0.2999940852901165, "grad_norm": 0.3246513605117798, "learning_rate": 2.5680150395269096e-05, "loss": 0.04975739121437073, "step": 2219 }, { "epoch": 0.3001292786588818, "grad_norm": 0.2790122330188751, "learning_rate": 2.5675343336094683e-05, "loss": 0.04424440115690231, "step": 2220 }, { "epoch": 0.30026447202764706, "grad_norm": 0.6023923754692078, "learning_rate": 2.5670534054256855e-05, "loss": 0.06825404614210129, "step": 2221 }, { "epoch": 0.30039966539641233, "grad_norm": 0.4277121424674988, "learning_rate": 2.5665722550756937e-05, "loss": 0.05392634496092796, "step": 2222 }, { "epoch": 0.30053485876517755, "grad_norm": 0.534571647644043, "learning_rate": 2.5660908826596707e-05, "loss": 0.0506836473941803, "step": 2223 }, { "epoch": 0.3006700521339428, "grad_norm": 0.8310690522193909, "learning_rate": 2.5656092882778413e-05, "loss": 0.05335317552089691, "step": 2224 }, { "epoch": 0.3008052455027081, "grad_norm": 1.2586736679077148, "learning_rate": 2.565127472030475e-05, "loss": 0.06451964378356934, "step": 2225 }, { "epoch": 0.30094043887147337, "grad_norm": 0.854870080947876, "learning_rate": 2.5646454340178894e-05, "loss": 0.06391061097383499, "step": 2226 }, { "epoch": 0.3010756322402386, "grad_norm": 0.41664719581604004, "learning_rate": 2.564163174340447e-05, "loss": 0.0637710690498352, "step": 2227 }, { "epoch": 0.30121082560900386, "grad_norm": 0.9564629197120667, "learning_rate": 2.5636806930985565e-05, "loss": 0.06166139245033264, "step": 2228 }, { "epoch": 0.30134601897776914, "grad_norm": 1.286916971206665, "learning_rate": 2.5631979903926738e-05, "loss": 0.05765240266919136, "step": 2229 }, { "epoch": 0.3014812123465344, "grad_norm": 0.39217379689216614, "learning_rate": 2.5627150663233e-05, "loss": 0.07160265743732452, "step": 2230 }, { "epoch": 0.3016164057152997, "grad_norm": 0.5052042603492737, "learning_rate": 2.5622319209909817e-05, "loss": 0.060192883014678955, "step": 2231 }, { "epoch": 0.3017515990840649, "grad_norm": 1.1523587703704834, "learning_rate": 2.5617485544963135e-05, "loss": 0.0801660567522049, "step": 2232 }, { "epoch": 0.3018867924528302, "grad_norm": 0.3566419184207916, "learning_rate": 2.561264966939934e-05, "loss": 0.05440396070480347, "step": 2233 }, { "epoch": 0.30202198582159545, "grad_norm": 0.657753586769104, "learning_rate": 2.5607811584225294e-05, "loss": 0.06699851155281067, "step": 2234 }, { "epoch": 0.3021571791903607, "grad_norm": 0.5132169723510742, "learning_rate": 2.5602971290448305e-05, "loss": 0.0679093524813652, "step": 2235 }, { "epoch": 0.302292372559126, "grad_norm": 0.614510715007782, "learning_rate": 2.5598128789076152e-05, "loss": 0.0653022825717926, "step": 2236 }, { "epoch": 0.3024275659278912, "grad_norm": 1.667647361755371, "learning_rate": 2.559328408111707e-05, "loss": 0.06635583937168121, "step": 2237 }, { "epoch": 0.3025627592966565, "grad_norm": 0.348704993724823, "learning_rate": 2.5588437167579755e-05, "loss": 0.04212060943245888, "step": 2238 }, { "epoch": 0.30269795266542177, "grad_norm": 0.5667931437492371, "learning_rate": 2.558358804947335e-05, "loss": 0.054676353931427, "step": 2239 }, { "epoch": 0.30283314603418704, "grad_norm": 2.80304217338562, "learning_rate": 2.557873672780748e-05, "loss": 0.07425614446401596, "step": 2240 }, { "epoch": 0.30296833940295226, "grad_norm": 0.4265073835849762, "learning_rate": 2.557388320359221e-05, "loss": 0.04567320644855499, "step": 2241 }, { "epoch": 0.30310353277171753, "grad_norm": 0.47821781039237976, "learning_rate": 2.5569027477838068e-05, "loss": 0.05417218804359436, "step": 2242 }, { "epoch": 0.3032387261404828, "grad_norm": 1.1858329772949219, "learning_rate": 2.5564169551556044e-05, "loss": 0.05075885355472565, "step": 2243 }, { "epoch": 0.3033739195092481, "grad_norm": 0.6808141469955444, "learning_rate": 2.5559309425757586e-05, "loss": 0.06217425316572189, "step": 2244 }, { "epoch": 0.30350911287801335, "grad_norm": 1.2015057802200317, "learning_rate": 2.5554447101454597e-05, "loss": 0.05612587183713913, "step": 2245 }, { "epoch": 0.3036443062467786, "grad_norm": 1.3352810144424438, "learning_rate": 2.554958257965944e-05, "loss": 0.07060706615447998, "step": 2246 }, { "epoch": 0.30377949961554385, "grad_norm": 1.2676361799240112, "learning_rate": 2.554471586138493e-05, "loss": 0.053166020661592484, "step": 2247 }, { "epoch": 0.3039146929843091, "grad_norm": 1.0703836679458618, "learning_rate": 2.5539846947644342e-05, "loss": 0.059078484773635864, "step": 2248 }, { "epoch": 0.3040498863530744, "grad_norm": 0.7100827097892761, "learning_rate": 2.5534975839451416e-05, "loss": 0.059625498950481415, "step": 2249 }, { "epoch": 0.30418507972183967, "grad_norm": 1.0836572647094727, "learning_rate": 2.5530102537820348e-05, "loss": 0.06885454803705215, "step": 2250 }, { "epoch": 0.3043202730906049, "grad_norm": 0.6359640955924988, "learning_rate": 2.5525227043765774e-05, "loss": 0.04971761628985405, "step": 2251 }, { "epoch": 0.30445546645937016, "grad_norm": 0.6875571012496948, "learning_rate": 2.55203493583028e-05, "loss": 0.0724475234746933, "step": 2252 }, { "epoch": 0.30459065982813543, "grad_norm": 0.9050090312957764, "learning_rate": 2.551546948244699e-05, "loss": 0.046121858060359955, "step": 2253 }, { "epoch": 0.3047258531969007, "grad_norm": 0.32779523730278015, "learning_rate": 2.551058741721436e-05, "loss": 0.05801308900117874, "step": 2254 }, { "epoch": 0.3048610465656659, "grad_norm": 0.30548548698425293, "learning_rate": 2.550570316362138e-05, "loss": 0.047217901796102524, "step": 2255 }, { "epoch": 0.3049962399344312, "grad_norm": 1.3277095556259155, "learning_rate": 2.5500816722684975e-05, "loss": 0.06256397813558578, "step": 2256 }, { "epoch": 0.3051314333031965, "grad_norm": 0.7082242369651794, "learning_rate": 2.549592809542253e-05, "loss": 0.06021872162818909, "step": 2257 }, { "epoch": 0.30526662667196175, "grad_norm": 1.5869684219360352, "learning_rate": 2.549103728285189e-05, "loss": 0.055726319551467896, "step": 2258 }, { "epoch": 0.305401820040727, "grad_norm": 0.4927757680416107, "learning_rate": 2.548614428599134e-05, "loss": 0.048781394958496094, "step": 2259 }, { "epoch": 0.30553701340949224, "grad_norm": 0.48715919256210327, "learning_rate": 2.5481249105859633e-05, "loss": 0.039811015129089355, "step": 2260 }, { "epoch": 0.3056722067782575, "grad_norm": 0.5090923309326172, "learning_rate": 2.5476351743475964e-05, "loss": 0.06070142984390259, "step": 2261 }, { "epoch": 0.3058074001470228, "grad_norm": 1.5543603897094727, "learning_rate": 2.547145219986e-05, "loss": 0.054766714572906494, "step": 2262 }, { "epoch": 0.30594259351578806, "grad_norm": 0.726177990436554, "learning_rate": 2.5466550476031846e-05, "loss": 0.037304095923900604, "step": 2263 }, { "epoch": 0.30607778688455334, "grad_norm": 1.7814775705337524, "learning_rate": 2.5461646573012072e-05, "loss": 0.06137313321232796, "step": 2264 }, { "epoch": 0.30621298025331856, "grad_norm": 0.7154324054718018, "learning_rate": 2.5456740491821687e-05, "loss": 0.06463988125324249, "step": 2265 }, { "epoch": 0.30634817362208383, "grad_norm": 0.40536412596702576, "learning_rate": 2.5451832233482172e-05, "loss": 0.05746598541736603, "step": 2266 }, { "epoch": 0.3064833669908491, "grad_norm": 0.451678067445755, "learning_rate": 2.544692179901545e-05, "loss": 0.044037047773599625, "step": 2267 }, { "epoch": 0.3066185603596144, "grad_norm": 0.40946945548057556, "learning_rate": 2.5442009189443902e-05, "loss": 0.052533067762851715, "step": 2268 }, { "epoch": 0.3067537537283796, "grad_norm": 0.37241262197494507, "learning_rate": 2.5437094405790355e-05, "loss": 0.06470764428377151, "step": 2269 }, { "epoch": 0.30688894709714487, "grad_norm": 0.559974730014801, "learning_rate": 2.5432177449078096e-05, "loss": 0.05968666449189186, "step": 2270 }, { "epoch": 0.30702414046591014, "grad_norm": 0.6734687685966492, "learning_rate": 2.5427258320330857e-05, "loss": 0.056102193892002106, "step": 2271 }, { "epoch": 0.3071593338346754, "grad_norm": 0.4163251519203186, "learning_rate": 2.5422337020572835e-05, "loss": 0.05599336698651314, "step": 2272 }, { "epoch": 0.3072945272034407, "grad_norm": 1.0217441320419312, "learning_rate": 2.5417413550828667e-05, "loss": 0.06663127988576889, "step": 2273 }, { "epoch": 0.3074297205722059, "grad_norm": 0.23586855828762054, "learning_rate": 2.5412487912123444e-05, "loss": 0.03475649654865265, "step": 2274 }, { "epoch": 0.3075649139409712, "grad_norm": 2.002098321914673, "learning_rate": 2.5407560105482708e-05, "loss": 0.06683699041604996, "step": 2275 }, { "epoch": 0.30770010730973646, "grad_norm": 0.3159419894218445, "learning_rate": 2.540263013193246e-05, "loss": 0.038786984980106354, "step": 2276 }, { "epoch": 0.30783530067850173, "grad_norm": 0.8217936754226685, "learning_rate": 2.539769799249915e-05, "loss": 0.07454089820384979, "step": 2277 }, { "epoch": 0.307970494047267, "grad_norm": 0.9815629720687866, "learning_rate": 2.5392763688209666e-05, "loss": 0.06661137193441391, "step": 2278 }, { "epoch": 0.3081056874160322, "grad_norm": 1.1893088817596436, "learning_rate": 2.5387827220091362e-05, "loss": 0.05191679298877716, "step": 2279 }, { "epoch": 0.3082408807847975, "grad_norm": 0.6995902061462402, "learning_rate": 2.538288858917204e-05, "loss": 0.05519968271255493, "step": 2280 }, { "epoch": 0.3083760741535628, "grad_norm": 1.0510425567626953, "learning_rate": 2.5377947796479936e-05, "loss": 0.05884440243244171, "step": 2281 }, { "epoch": 0.30851126752232805, "grad_norm": 0.5182132124900818, "learning_rate": 2.537300484304377e-05, "loss": 0.05221012979745865, "step": 2282 }, { "epoch": 0.30864646089109327, "grad_norm": 0.3225332498550415, "learning_rate": 2.536805972989267e-05, "loss": 0.03802039474248886, "step": 2283 }, { "epoch": 0.30878165425985854, "grad_norm": 0.7955539226531982, "learning_rate": 2.5363112458056252e-05, "loss": 0.051208097487688065, "step": 2284 }, { "epoch": 0.3089168476286238, "grad_norm": 1.0788735151290894, "learning_rate": 2.5358163028564552e-05, "loss": 0.07112982869148254, "step": 2285 }, { "epoch": 0.3090520409973891, "grad_norm": 0.375968873500824, "learning_rate": 2.535321144244808e-05, "loss": 0.06154714897274971, "step": 2286 }, { "epoch": 0.30918723436615436, "grad_norm": 1.073516607284546, "learning_rate": 2.534825770073777e-05, "loss": 0.04923577234148979, "step": 2287 }, { "epoch": 0.3093224277349196, "grad_norm": 0.5616546869277954, "learning_rate": 2.5343301804465026e-05, "loss": 0.058992281556129456, "step": 2288 }, { "epoch": 0.30945762110368485, "grad_norm": 0.6748343706130981, "learning_rate": 2.533834375466169e-05, "loss": 0.042425546795129776, "step": 2289 }, { "epoch": 0.30959281447245013, "grad_norm": 0.910538911819458, "learning_rate": 2.533338355236005e-05, "loss": 0.043878909200429916, "step": 2290 }, { "epoch": 0.3097280078412154, "grad_norm": 1.8196405172348022, "learning_rate": 2.532842119859285e-05, "loss": 0.05300411581993103, "step": 2291 }, { "epoch": 0.3098632012099807, "grad_norm": 0.3632139265537262, "learning_rate": 2.532345669439328e-05, "loss": 0.04527851939201355, "step": 2292 }, { "epoch": 0.3099983945787459, "grad_norm": 0.6463128328323364, "learning_rate": 2.5318490040794975e-05, "loss": 0.06771989166736603, "step": 2293 }, { "epoch": 0.31013358794751117, "grad_norm": 1.31852126121521, "learning_rate": 2.531352123883202e-05, "loss": 0.06382325291633606, "step": 2294 }, { "epoch": 0.31026878131627644, "grad_norm": 0.793941080570221, "learning_rate": 2.530855028953894e-05, "loss": 0.06026911735534668, "step": 2295 }, { "epoch": 0.3104039746850417, "grad_norm": 0.7083117365837097, "learning_rate": 2.5303577193950724e-05, "loss": 0.0671968162059784, "step": 2296 }, { "epoch": 0.31053916805380694, "grad_norm": 0.7987262606620789, "learning_rate": 2.5298601953102785e-05, "loss": 0.06650563329458237, "step": 2297 }, { "epoch": 0.3106743614225722, "grad_norm": 0.36385875940322876, "learning_rate": 2.5293624568031008e-05, "loss": 0.057412274181842804, "step": 2298 }, { "epoch": 0.3108095547913375, "grad_norm": 0.8501790761947632, "learning_rate": 2.5288645039771697e-05, "loss": 0.04437267780303955, "step": 2299 }, { "epoch": 0.31094474816010276, "grad_norm": 1.2511833906173706, "learning_rate": 2.5283663369361624e-05, "loss": 0.07001717388629913, "step": 2300 }, { "epoch": 0.31107994152886803, "grad_norm": 0.4312867522239685, "learning_rate": 2.5278679557837998e-05, "loss": 0.04859645664691925, "step": 2301 }, { "epoch": 0.31121513489763325, "grad_norm": 0.6450917720794678, "learning_rate": 2.5273693606238474e-05, "loss": 0.04278920590877533, "step": 2302 }, { "epoch": 0.3113503282663985, "grad_norm": 0.2746388912200928, "learning_rate": 2.5268705515601164e-05, "loss": 0.05155005306005478, "step": 2303 }, { "epoch": 0.3114855216351638, "grad_norm": 0.3895500898361206, "learning_rate": 2.5263715286964596e-05, "loss": 0.059623122215270996, "step": 2304 }, { "epoch": 0.31162071500392907, "grad_norm": 0.6174415946006775, "learning_rate": 2.525872292136778e-05, "loss": 0.06191803514957428, "step": 2305 }, { "epoch": 0.31175590837269435, "grad_norm": 0.6154640316963196, "learning_rate": 2.525372841985014e-05, "loss": 0.03563769906759262, "step": 2306 }, { "epoch": 0.31189110174145956, "grad_norm": 0.699211835861206, "learning_rate": 2.5248731783451567e-05, "loss": 0.05020824447274208, "step": 2307 }, { "epoch": 0.31202629511022484, "grad_norm": 0.3884332478046417, "learning_rate": 2.524373301321238e-05, "loss": 0.046756379306316376, "step": 2308 }, { "epoch": 0.3121614884789901, "grad_norm": 0.5133849382400513, "learning_rate": 2.5238732110173356e-05, "loss": 0.07218366861343384, "step": 2309 }, { "epoch": 0.3122966818477554, "grad_norm": 0.4838017225265503, "learning_rate": 2.5233729075375708e-05, "loss": 0.0739641785621643, "step": 2310 }, { "epoch": 0.3124318752165206, "grad_norm": 0.41110891103744507, "learning_rate": 2.522872390986109e-05, "loss": 0.04120361804962158, "step": 2311 }, { "epoch": 0.3125670685852859, "grad_norm": 0.7661836743354797, "learning_rate": 2.522371661467161e-05, "loss": 0.04032690078020096, "step": 2312 }, { "epoch": 0.31270226195405115, "grad_norm": 0.5305105447769165, "learning_rate": 2.521870719084981e-05, "loss": 0.05806836485862732, "step": 2313 }, { "epoch": 0.3128374553228164, "grad_norm": 0.47661060094833374, "learning_rate": 2.5213695639438686e-05, "loss": 0.07388916611671448, "step": 2314 }, { "epoch": 0.3129726486915817, "grad_norm": 0.8824151158332825, "learning_rate": 2.5208681961481657e-05, "loss": 0.07833479344844818, "step": 2315 }, { "epoch": 0.3131078420603469, "grad_norm": 1.5235333442687988, "learning_rate": 2.5203666158022607e-05, "loss": 0.05425835773348808, "step": 2316 }, { "epoch": 0.3132430354291122, "grad_norm": 1.2147819995880127, "learning_rate": 2.519864823010585e-05, "loss": 0.05932708457112312, "step": 2317 }, { "epoch": 0.31337822879787747, "grad_norm": 0.73869389295578, "learning_rate": 2.5193628178776148e-05, "loss": 0.06545159220695496, "step": 2318 }, { "epoch": 0.31351342216664274, "grad_norm": 0.3748724162578583, "learning_rate": 2.5188606005078695e-05, "loss": 0.05377781391143799, "step": 2319 }, { "epoch": 0.313648615535408, "grad_norm": 1.026969313621521, "learning_rate": 2.518358171005914e-05, "loss": 0.07322761416435242, "step": 2320 }, { "epoch": 0.31378380890417323, "grad_norm": 0.8221989274024963, "learning_rate": 2.517855529476357e-05, "loss": 0.0666864737868309, "step": 2321 }, { "epoch": 0.3139190022729385, "grad_norm": 0.8605358004570007, "learning_rate": 2.517352676023851e-05, "loss": 0.04999266564846039, "step": 2322 }, { "epoch": 0.3140541956417038, "grad_norm": 0.8627109527587891, "learning_rate": 2.5168496107530925e-05, "loss": 0.08818463981151581, "step": 2323 }, { "epoch": 0.31418938901046906, "grad_norm": 0.5400346517562866, "learning_rate": 2.5163463337688224e-05, "loss": 0.05868957191705704, "step": 2324 }, { "epoch": 0.3143245823792343, "grad_norm": 1.3682141304016113, "learning_rate": 2.515842845175826e-05, "loss": 0.05405328795313835, "step": 2325 }, { "epoch": 0.31445977574799955, "grad_norm": 0.6122546792030334, "learning_rate": 2.5153391450789326e-05, "loss": 0.06808657944202423, "step": 2326 }, { "epoch": 0.3145949691167648, "grad_norm": 0.6714510321617126, "learning_rate": 2.514835233583014e-05, "loss": 0.05392294377088547, "step": 2327 }, { "epoch": 0.3147301624855301, "grad_norm": 0.5753044486045837, "learning_rate": 2.514331110792988e-05, "loss": 0.0617678165435791, "step": 2328 }, { "epoch": 0.31486535585429537, "grad_norm": 0.8453408479690552, "learning_rate": 2.513826776813816e-05, "loss": 0.0437600314617157, "step": 2329 }, { "epoch": 0.3150005492230606, "grad_norm": 1.5375425815582275, "learning_rate": 2.5133222317505024e-05, "loss": 0.058279894292354584, "step": 2330 }, { "epoch": 0.31513574259182586, "grad_norm": 0.22465813159942627, "learning_rate": 2.5128174757080965e-05, "loss": 0.0558398962020874, "step": 2331 }, { "epoch": 0.31527093596059114, "grad_norm": 0.7125957608222961, "learning_rate": 2.5123125087916916e-05, "loss": 0.06689736247062683, "step": 2332 }, { "epoch": 0.3154061293293564, "grad_norm": 0.7507221698760986, "learning_rate": 2.5118073311064236e-05, "loss": 0.06189854443073273, "step": 2333 }, { "epoch": 0.3155413226981217, "grad_norm": 0.7014270424842834, "learning_rate": 2.5113019427574734e-05, "loss": 0.04525640979409218, "step": 2334 }, { "epoch": 0.3156765160668869, "grad_norm": 0.692531943321228, "learning_rate": 2.5107963438500666e-05, "loss": 0.04493245482444763, "step": 2335 }, { "epoch": 0.3158117094356522, "grad_norm": 0.8417144417762756, "learning_rate": 2.51029053448947e-05, "loss": 0.055364251136779785, "step": 2336 }, { "epoch": 0.31594690280441745, "grad_norm": 1.539441466331482, "learning_rate": 2.509784514780997e-05, "loss": 0.06964397430419922, "step": 2337 }, { "epoch": 0.3160820961731827, "grad_norm": 0.40144404768943787, "learning_rate": 2.5092782848300033e-05, "loss": 0.05018016695976257, "step": 2338 }, { "epoch": 0.31621728954194794, "grad_norm": 0.47542232275009155, "learning_rate": 2.5087718447418886e-05, "loss": 0.05702904611825943, "step": 2339 }, { "epoch": 0.3163524829107132, "grad_norm": 0.4485418200492859, "learning_rate": 2.5082651946220958e-05, "loss": 0.06316009163856506, "step": 2340 }, { "epoch": 0.3164876762794785, "grad_norm": 1.029420018196106, "learning_rate": 2.507758334576113e-05, "loss": 0.06851062178611755, "step": 2341 }, { "epoch": 0.31662286964824377, "grad_norm": 1.2221734523773193, "learning_rate": 2.5072512647094713e-05, "loss": 0.05392313003540039, "step": 2342 }, { "epoch": 0.31675806301700904, "grad_norm": 0.9792153239250183, "learning_rate": 2.506743985127745e-05, "loss": 0.07204374670982361, "step": 2343 }, { "epoch": 0.31689325638577426, "grad_norm": 0.5060032606124878, "learning_rate": 2.506236495936552e-05, "loss": 0.055379390716552734, "step": 2344 }, { "epoch": 0.31702844975453953, "grad_norm": 0.3366352319717407, "learning_rate": 2.5057287972415547e-05, "loss": 0.0541258230805397, "step": 2345 }, { "epoch": 0.3171636431233048, "grad_norm": 0.3888050317764282, "learning_rate": 2.5052208891484588e-05, "loss": 0.05308638513088226, "step": 2346 }, { "epoch": 0.3172988364920701, "grad_norm": 1.1438707113265991, "learning_rate": 2.504712771763013e-05, "loss": 0.06358611583709717, "step": 2347 }, { "epoch": 0.3174340298608353, "grad_norm": 0.8864173293113708, "learning_rate": 2.5042044451910108e-05, "loss": 0.06125408038496971, "step": 2348 }, { "epoch": 0.3175692232296006, "grad_norm": 0.3055857717990875, "learning_rate": 2.5036959095382875e-05, "loss": 0.06520285457372665, "step": 2349 }, { "epoch": 0.31770441659836585, "grad_norm": 1.0140036344528198, "learning_rate": 2.5031871649107233e-05, "loss": 0.03953090310096741, "step": 2350 }, { "epoch": 0.3178396099671311, "grad_norm": 1.01449716091156, "learning_rate": 2.5026782114142426e-05, "loss": 0.05470992624759674, "step": 2351 }, { "epoch": 0.3179748033358964, "grad_norm": 0.47396790981292725, "learning_rate": 2.5021690491548107e-05, "loss": 0.05226321518421173, "step": 2352 }, { "epoch": 0.3181099967046616, "grad_norm": 1.720629334449768, "learning_rate": 2.5016596782384387e-05, "loss": 0.06110893189907074, "step": 2353 }, { "epoch": 0.3182451900734269, "grad_norm": 0.32993414998054504, "learning_rate": 2.5011500987711804e-05, "loss": 0.041723668575286865, "step": 2354 }, { "epoch": 0.31838038344219216, "grad_norm": 0.8112986087799072, "learning_rate": 2.5006403108591325e-05, "loss": 0.061730362474918365, "step": 2355 }, { "epoch": 0.31851557681095743, "grad_norm": 0.878517210483551, "learning_rate": 2.500130314608436e-05, "loss": 0.059821128845214844, "step": 2356 }, { "epoch": 0.3186507701797227, "grad_norm": 0.5326772332191467, "learning_rate": 2.4996201101252742e-05, "loss": 0.043070752173662186, "step": 2357 }, { "epoch": 0.3187859635484879, "grad_norm": 0.2386907935142517, "learning_rate": 2.4991096975158757e-05, "loss": 0.050444453954696655, "step": 2358 }, { "epoch": 0.3189211569172532, "grad_norm": 0.3969920873641968, "learning_rate": 2.4985990768865095e-05, "loss": 0.060389913618564606, "step": 2359 }, { "epoch": 0.3190563502860185, "grad_norm": 0.7786385416984558, "learning_rate": 2.4980882483434904e-05, "loss": 0.048563480377197266, "step": 2360 }, { "epoch": 0.31919154365478375, "grad_norm": 0.7249764800071716, "learning_rate": 2.497577211993176e-05, "loss": 0.07420365512371063, "step": 2361 }, { "epoch": 0.31932673702354897, "grad_norm": 2.392258644104004, "learning_rate": 2.4970659679419658e-05, "loss": 0.07418099045753479, "step": 2362 }, { "epoch": 0.31946193039231424, "grad_norm": 0.2727169692516327, "learning_rate": 2.496554516296304e-05, "loss": 0.055803120136260986, "step": 2363 }, { "epoch": 0.3195971237610795, "grad_norm": 0.8510539531707764, "learning_rate": 2.4960428571626784e-05, "loss": 0.06812772154808044, "step": 2364 }, { "epoch": 0.3197323171298448, "grad_norm": 0.3870421350002289, "learning_rate": 2.4955309906476177e-05, "loss": 0.06500202417373657, "step": 2365 }, { "epoch": 0.31986751049861006, "grad_norm": 0.33770906925201416, "learning_rate": 2.495018916857696e-05, "loss": 0.05120396241545677, "step": 2366 }, { "epoch": 0.3200027038673753, "grad_norm": 0.5311005711555481, "learning_rate": 2.4945066358995304e-05, "loss": 0.04961118474602699, "step": 2367 }, { "epoch": 0.32013789723614056, "grad_norm": 0.5402520895004272, "learning_rate": 2.493994147879779e-05, "loss": 0.0555969774723053, "step": 2368 }, { "epoch": 0.32027309060490583, "grad_norm": 0.8443195223808289, "learning_rate": 2.4934814529051458e-05, "loss": 0.05740940570831299, "step": 2369 }, { "epoch": 0.3204082839736711, "grad_norm": 0.2561611831188202, "learning_rate": 2.4929685510823763e-05, "loss": 0.05358386039733887, "step": 2370 }, { "epoch": 0.3205434773424364, "grad_norm": 0.8193274736404419, "learning_rate": 2.492455442518259e-05, "loss": 0.05188105255365372, "step": 2371 }, { "epoch": 0.3206786707112016, "grad_norm": 0.6803062558174133, "learning_rate": 2.4919421273196262e-05, "loss": 0.05494639277458191, "step": 2372 }, { "epoch": 0.32081386407996687, "grad_norm": 0.5245711207389832, "learning_rate": 2.4914286055933527e-05, "loss": 0.05685272812843323, "step": 2373 }, { "epoch": 0.32094905744873214, "grad_norm": 1.1571366786956787, "learning_rate": 2.4909148774463572e-05, "loss": 0.04307565093040466, "step": 2374 }, { "epoch": 0.3210842508174974, "grad_norm": 0.43709444999694824, "learning_rate": 2.4904009429855992e-05, "loss": 0.06487181782722473, "step": 2375 }, { "epoch": 0.32121944418626264, "grad_norm": 0.8173585534095764, "learning_rate": 2.4898868023180844e-05, "loss": 0.048045456409454346, "step": 2376 }, { "epoch": 0.3213546375550279, "grad_norm": 0.45800986886024475, "learning_rate": 2.4893724555508575e-05, "loss": 0.06433533132076263, "step": 2377 }, { "epoch": 0.3214898309237932, "grad_norm": 0.3575770854949951, "learning_rate": 2.4888579027910105e-05, "loss": 0.05344129353761673, "step": 2378 }, { "epoch": 0.32162502429255846, "grad_norm": 0.4732600748538971, "learning_rate": 2.4883431441456738e-05, "loss": 0.07580220699310303, "step": 2379 }, { "epoch": 0.32176021766132373, "grad_norm": 0.508769154548645, "learning_rate": 2.4878281797220244e-05, "loss": 0.04080207645893097, "step": 2380 }, { "epoch": 0.32189541103008895, "grad_norm": 0.6944779753684998, "learning_rate": 2.4873130096272805e-05, "loss": 0.07719346880912781, "step": 2381 }, { "epoch": 0.3220306043988542, "grad_norm": 0.3551993668079376, "learning_rate": 2.4867976339687026e-05, "loss": 0.043351590633392334, "step": 2382 }, { "epoch": 0.3221657977676195, "grad_norm": 0.4997990131378174, "learning_rate": 2.4862820528535955e-05, "loss": 0.04125227406620979, "step": 2383 }, { "epoch": 0.3223009911363848, "grad_norm": 0.27644866704940796, "learning_rate": 2.4857662663893054e-05, "loss": 0.03502889722585678, "step": 2384 }, { "epoch": 0.32243618450515005, "grad_norm": 0.8085530400276184, "learning_rate": 2.485250274683222e-05, "loss": 0.05728539824485779, "step": 2385 }, { "epoch": 0.32257137787391527, "grad_norm": 1.5582599639892578, "learning_rate": 2.4847340778427772e-05, "loss": 0.05420401692390442, "step": 2386 }, { "epoch": 0.32270657124268054, "grad_norm": 0.4201025366783142, "learning_rate": 2.484217675975446e-05, "loss": 0.06086397171020508, "step": 2387 }, { "epoch": 0.3228417646114458, "grad_norm": 0.5157440304756165, "learning_rate": 2.4837010691887466e-05, "loss": 0.0627305805683136, "step": 2388 }, { "epoch": 0.3229769579802111, "grad_norm": 0.7646204233169556, "learning_rate": 2.4831842575902383e-05, "loss": 0.046876609325408936, "step": 2389 }, { "epoch": 0.3231121513489763, "grad_norm": 0.8501638770103455, "learning_rate": 2.482667241287525e-05, "loss": 0.06470996141433716, "step": 2390 }, { "epoch": 0.3232473447177416, "grad_norm": 1.5645791292190552, "learning_rate": 2.4821500203882517e-05, "loss": 0.06021870672702789, "step": 2391 }, { "epoch": 0.32338253808650685, "grad_norm": 1.2629863023757935, "learning_rate": 2.4816325950001067e-05, "loss": 0.04962673783302307, "step": 2392 }, { "epoch": 0.32351773145527213, "grad_norm": 0.4654386341571808, "learning_rate": 2.4811149652308205e-05, "loss": 0.04819222912192345, "step": 2393 }, { "epoch": 0.3236529248240374, "grad_norm": 0.5356674790382385, "learning_rate": 2.480597131188167e-05, "loss": 0.06122288107872009, "step": 2394 }, { "epoch": 0.3237881181928026, "grad_norm": 0.6159305572509766, "learning_rate": 2.4800790929799614e-05, "loss": 0.06149455904960632, "step": 2395 }, { "epoch": 0.3239233115615679, "grad_norm": 0.7850677371025085, "learning_rate": 2.4795608507140623e-05, "loss": 0.0695309042930603, "step": 2396 }, { "epoch": 0.32405850493033317, "grad_norm": 1.8402515649795532, "learning_rate": 2.4790424044983705e-05, "loss": 0.07553311437368393, "step": 2397 }, { "epoch": 0.32419369829909844, "grad_norm": 0.8507335186004639, "learning_rate": 2.4785237544408288e-05, "loss": 0.041149843484163284, "step": 2398 }, { "epoch": 0.3243288916678637, "grad_norm": 1.0684906244277954, "learning_rate": 2.478004900649424e-05, "loss": 0.05163618177175522, "step": 2399 }, { "epoch": 0.32446408503662894, "grad_norm": 0.7404788136482239, "learning_rate": 2.477485843232183e-05, "loss": 0.058647915720939636, "step": 2400 }, { "epoch": 0.3245992784053942, "grad_norm": 0.1775343120098114, "learning_rate": 2.476966582297177e-05, "loss": 0.04130779951810837, "step": 2401 }, { "epoch": 0.3247344717741595, "grad_norm": 0.3759896159172058, "learning_rate": 2.4764471179525188e-05, "loss": 0.04953281581401825, "step": 2402 }, { "epoch": 0.32486966514292476, "grad_norm": 1.2053723335266113, "learning_rate": 2.4759274503063632e-05, "loss": 0.05366501957178116, "step": 2403 }, { "epoch": 0.32500485851169, "grad_norm": 0.45025211572647095, "learning_rate": 2.4754075794669088e-05, "loss": 0.05164482444524765, "step": 2404 }, { "epoch": 0.32514005188045525, "grad_norm": 0.6710517406463623, "learning_rate": 2.4748875055423942e-05, "loss": 0.056668512523174286, "step": 2405 }, { "epoch": 0.3252752452492205, "grad_norm": 0.8575512766838074, "learning_rate": 2.4743672286411027e-05, "loss": 0.057905301451683044, "step": 2406 }, { "epoch": 0.3254104386179858, "grad_norm": 0.5478147864341736, "learning_rate": 2.4738467488713582e-05, "loss": 0.07564461976289749, "step": 2407 }, { "epoch": 0.32554563198675107, "grad_norm": 0.3981148302555084, "learning_rate": 2.473326066341527e-05, "loss": 0.06725983321666718, "step": 2408 }, { "epoch": 0.3256808253555163, "grad_norm": 0.6456949710845947, "learning_rate": 2.4728051811600184e-05, "loss": 0.07121357321739197, "step": 2409 }, { "epoch": 0.32581601872428156, "grad_norm": 1.2709277868270874, "learning_rate": 2.4722840934352838e-05, "loss": 0.08546609431505203, "step": 2410 }, { "epoch": 0.32595121209304684, "grad_norm": 0.30143308639526367, "learning_rate": 2.471762803275816e-05, "loss": 0.043651144951581955, "step": 2411 }, { "epoch": 0.3260864054618121, "grad_norm": 1.11068594455719, "learning_rate": 2.4712413107901504e-05, "loss": 0.050117939710617065, "step": 2412 }, { "epoch": 0.3262215988305774, "grad_norm": 0.5859435200691223, "learning_rate": 2.470719616086865e-05, "loss": 0.04886026680469513, "step": 2413 }, { "epoch": 0.3263567921993426, "grad_norm": 0.6050263047218323, "learning_rate": 2.4701977192745785e-05, "loss": 0.04854444041848183, "step": 2414 }, { "epoch": 0.3264919855681079, "grad_norm": 0.542130708694458, "learning_rate": 2.4696756204619535e-05, "loss": 0.07222840189933777, "step": 2415 }, { "epoch": 0.32662717893687315, "grad_norm": 0.4350861608982086, "learning_rate": 2.469153319757693e-05, "loss": 0.04858303815126419, "step": 2416 }, { "epoch": 0.3267623723056384, "grad_norm": 0.4467565417289734, "learning_rate": 2.4686308172705433e-05, "loss": 0.05715404450893402, "step": 2417 }, { "epoch": 0.32689756567440365, "grad_norm": 0.5131983757019043, "learning_rate": 2.4681081131092926e-05, "loss": 0.07129460573196411, "step": 2418 }, { "epoch": 0.3270327590431689, "grad_norm": 0.9890090823173523, "learning_rate": 2.467585207382769e-05, "loss": 0.08803065121173859, "step": 2419 }, { "epoch": 0.3271679524119342, "grad_norm": 1.0912023782730103, "learning_rate": 2.4670621001998467e-05, "loss": 0.049446359276771545, "step": 2420 }, { "epoch": 0.32730314578069947, "grad_norm": 0.37686675786972046, "learning_rate": 2.466538791669437e-05, "loss": 0.05590030550956726, "step": 2421 }, { "epoch": 0.32743833914946474, "grad_norm": 0.7751613855361938, "learning_rate": 2.4660152819004973e-05, "loss": 0.04032851755619049, "step": 2422 }, { "epoch": 0.32757353251822996, "grad_norm": 0.6678184866905212, "learning_rate": 2.4654915710020246e-05, "loss": 0.06188032776117325, "step": 2423 }, { "epoch": 0.32770872588699523, "grad_norm": 1.0484058856964111, "learning_rate": 2.464967659083058e-05, "loss": 0.06437119841575623, "step": 2424 }, { "epoch": 0.3278439192557605, "grad_norm": 0.4111539125442505, "learning_rate": 2.464443546252679e-05, "loss": 0.05515044182538986, "step": 2425 }, { "epoch": 0.3279791126245258, "grad_norm": 0.36039870977401733, "learning_rate": 2.4639192326200104e-05, "loss": 0.06106668710708618, "step": 2426 }, { "epoch": 0.32811430599329106, "grad_norm": 0.332709401845932, "learning_rate": 2.463394718294218e-05, "loss": 0.044600386172533035, "step": 2427 }, { "epoch": 0.3282494993620563, "grad_norm": 0.2628311514854431, "learning_rate": 2.4628700033845072e-05, "loss": 0.04399694502353668, "step": 2428 }, { "epoch": 0.32838469273082155, "grad_norm": 0.7415270209312439, "learning_rate": 2.4623450880001268e-05, "loss": 0.057974159717559814, "step": 2429 }, { "epoch": 0.3285198860995868, "grad_norm": 0.8735581636428833, "learning_rate": 2.4618199722503676e-05, "loss": 0.07110685110092163, "step": 2430 }, { "epoch": 0.3286550794683521, "grad_norm": 0.38375768065452576, "learning_rate": 2.4612946562445613e-05, "loss": 0.04996195808053017, "step": 2431 }, { "epoch": 0.3287902728371173, "grad_norm": 0.39115580916404724, "learning_rate": 2.460769140092081e-05, "loss": 0.06056630611419678, "step": 2432 }, { "epoch": 0.3289254662058826, "grad_norm": 1.1033483743667603, "learning_rate": 2.460243423902342e-05, "loss": 0.0673246830701828, "step": 2433 }, { "epoch": 0.32906065957464786, "grad_norm": 0.6083263158798218, "learning_rate": 2.459717507784802e-05, "loss": 0.0530889630317688, "step": 2434 }, { "epoch": 0.32919585294341314, "grad_norm": 0.5067211985588074, "learning_rate": 2.459191391848959e-05, "loss": 0.04698304086923599, "step": 2435 }, { "epoch": 0.3293310463121784, "grad_norm": 0.6329554915428162, "learning_rate": 2.4586650762043538e-05, "loss": 0.0533183254301548, "step": 2436 }, { "epoch": 0.32946623968094363, "grad_norm": 0.9279079437255859, "learning_rate": 2.4581385609605665e-05, "loss": 0.05001988261938095, "step": 2437 }, { "epoch": 0.3296014330497089, "grad_norm": 0.842490017414093, "learning_rate": 2.4576118462272218e-05, "loss": 0.05696358531713486, "step": 2438 }, { "epoch": 0.3297366264184742, "grad_norm": 1.5504297018051147, "learning_rate": 2.4570849321139836e-05, "loss": 0.06988736987113953, "step": 2439 }, { "epoch": 0.32987181978723945, "grad_norm": 0.7859694957733154, "learning_rate": 2.4565578187305596e-05, "loss": 0.048929743468761444, "step": 2440 }, { "epoch": 0.3300070131560047, "grad_norm": 0.8285436630249023, "learning_rate": 2.456030506186696e-05, "loss": 0.05581335723400116, "step": 2441 }, { "epoch": 0.33014220652476994, "grad_norm": 0.9999210238456726, "learning_rate": 2.4555029945921832e-05, "loss": 0.07747752964496613, "step": 2442 }, { "epoch": 0.3302773998935352, "grad_norm": 0.7843573689460754, "learning_rate": 2.4549752840568516e-05, "loss": 0.07872691750526428, "step": 2443 }, { "epoch": 0.3304125932623005, "grad_norm": 0.3269481956958771, "learning_rate": 2.4544473746905733e-05, "loss": 0.04337403178215027, "step": 2444 }, { "epoch": 0.33054778663106577, "grad_norm": 0.678648054599762, "learning_rate": 2.4539192666032617e-05, "loss": 0.05248695984482765, "step": 2445 }, { "epoch": 0.330682979999831, "grad_norm": 0.27597638964653015, "learning_rate": 2.4533909599048718e-05, "loss": 0.04707809537649155, "step": 2446 }, { "epoch": 0.33081817336859626, "grad_norm": 0.7942515015602112, "learning_rate": 2.4528624547054003e-05, "loss": 0.05427852272987366, "step": 2447 }, { "epoch": 0.33095336673736153, "grad_norm": 0.8145894408226013, "learning_rate": 2.4523337511148843e-05, "loss": 0.08071470260620117, "step": 2448 }, { "epoch": 0.3310885601061268, "grad_norm": 0.579809844493866, "learning_rate": 2.4518048492434028e-05, "loss": 0.051956817507743835, "step": 2449 }, { "epoch": 0.3312237534748921, "grad_norm": 1.0467652082443237, "learning_rate": 2.4512757492010762e-05, "loss": 0.0688856691122055, "step": 2450 }, { "epoch": 0.3313589468436573, "grad_norm": 0.26254987716674805, "learning_rate": 2.4507464510980652e-05, "loss": 0.03904147073626518, "step": 2451 }, { "epoch": 0.33149414021242257, "grad_norm": 1.1274890899658203, "learning_rate": 2.450216955044574e-05, "loss": 0.06311538070440292, "step": 2452 }, { "epoch": 0.33162933358118785, "grad_norm": 0.32946228981018066, "learning_rate": 2.449687261150845e-05, "loss": 0.05260097607970238, "step": 2453 }, { "epoch": 0.3317645269499531, "grad_norm": 0.4412408471107483, "learning_rate": 2.449157369527164e-05, "loss": 0.05872887000441551, "step": 2454 }, { "epoch": 0.3318997203187184, "grad_norm": 0.3664141297340393, "learning_rate": 2.448627280283857e-05, "loss": 0.058825358748435974, "step": 2455 }, { "epoch": 0.3320349136874836, "grad_norm": 0.5627807378768921, "learning_rate": 2.4480969935312917e-05, "loss": 0.06658937782049179, "step": 2456 }, { "epoch": 0.3321701070562489, "grad_norm": 0.6522341966629028, "learning_rate": 2.4475665093798766e-05, "loss": 0.04885073006153107, "step": 2457 }, { "epoch": 0.33230530042501416, "grad_norm": 0.6704056262969971, "learning_rate": 2.447035827940061e-05, "loss": 0.051568403840065, "step": 2458 }, { "epoch": 0.33244049379377943, "grad_norm": 0.5867211818695068, "learning_rate": 2.4465049493223356e-05, "loss": 0.05047309398651123, "step": 2459 }, { "epoch": 0.33257568716254465, "grad_norm": 0.6995148062705994, "learning_rate": 2.4459738736372327e-05, "loss": 0.05371546745300293, "step": 2460 }, { "epoch": 0.3327108805313099, "grad_norm": 0.3484623432159424, "learning_rate": 2.4454426009953252e-05, "loss": 0.04776644706726074, "step": 2461 }, { "epoch": 0.3328460739000752, "grad_norm": 0.8619898557662964, "learning_rate": 2.4449111315072254e-05, "loss": 0.07687251269817352, "step": 2462 }, { "epoch": 0.3329812672688405, "grad_norm": 0.7768975496292114, "learning_rate": 2.44437946528359e-05, "loss": 0.07029340416193008, "step": 2463 }, { "epoch": 0.33311646063760575, "grad_norm": 0.30667319893836975, "learning_rate": 2.4438476024351138e-05, "loss": 0.0642610639333725, "step": 2464 }, { "epoch": 0.33325165400637097, "grad_norm": 0.37118950486183167, "learning_rate": 2.4433155430725333e-05, "loss": 0.04756537824869156, "step": 2465 }, { "epoch": 0.33338684737513624, "grad_norm": 1.2667423486709595, "learning_rate": 2.4427832873066262e-05, "loss": 0.06080685555934906, "step": 2466 }, { "epoch": 0.3335220407439015, "grad_norm": 0.5492689609527588, "learning_rate": 2.4422508352482113e-05, "loss": 0.07088005542755127, "step": 2467 }, { "epoch": 0.3336572341126668, "grad_norm": 0.3840407431125641, "learning_rate": 2.441718187008148e-05, "loss": 0.06379091739654541, "step": 2468 }, { "epoch": 0.333792427481432, "grad_norm": 0.5432056784629822, "learning_rate": 2.441185342697336e-05, "loss": 0.03955751657485962, "step": 2469 }, { "epoch": 0.3339276208501973, "grad_norm": 1.3248522281646729, "learning_rate": 2.440652302426717e-05, "loss": 0.06881004571914673, "step": 2470 }, { "epoch": 0.33406281421896256, "grad_norm": 0.5760420560836792, "learning_rate": 2.440119066307272e-05, "loss": 0.07075256109237671, "step": 2471 }, { "epoch": 0.33419800758772783, "grad_norm": 1.4234977960586548, "learning_rate": 2.4395856344500244e-05, "loss": 0.06021004915237427, "step": 2472 }, { "epoch": 0.3343332009564931, "grad_norm": 0.3846748173236847, "learning_rate": 2.4390520069660377e-05, "loss": 0.04977214336395264, "step": 2473 }, { "epoch": 0.3344683943252583, "grad_norm": 0.5696228742599487, "learning_rate": 2.4385181839664146e-05, "loss": 0.0500163808465004, "step": 2474 }, { "epoch": 0.3346035876940236, "grad_norm": 0.8239949941635132, "learning_rate": 2.437984165562301e-05, "loss": 0.05204898118972778, "step": 2475 }, { "epoch": 0.33473878106278887, "grad_norm": 0.8784533739089966, "learning_rate": 2.4374499518648827e-05, "loss": 0.07036906480789185, "step": 2476 }, { "epoch": 0.33487397443155414, "grad_norm": 0.3877400755882263, "learning_rate": 2.436915542985385e-05, "loss": 0.047933436930179596, "step": 2477 }, { "epoch": 0.3350091678003194, "grad_norm": 1.5199875831604004, "learning_rate": 2.436380939035075e-05, "loss": 0.06770068407058716, "step": 2478 }, { "epoch": 0.33514436116908464, "grad_norm": 0.9085196852684021, "learning_rate": 2.43584614012526e-05, "loss": 0.06982289254665375, "step": 2479 }, { "epoch": 0.3352795545378499, "grad_norm": 0.47763633728027344, "learning_rate": 2.4353111463672882e-05, "loss": 0.059069663286209106, "step": 2480 }, { "epoch": 0.3354147479066152, "grad_norm": 0.803889274597168, "learning_rate": 2.4347759578725482e-05, "loss": 0.062149446457624435, "step": 2481 }, { "epoch": 0.33554994127538046, "grad_norm": 0.48583748936653137, "learning_rate": 2.4342405747524685e-05, "loss": 0.05786242336034775, "step": 2482 }, { "epoch": 0.3356851346441457, "grad_norm": 0.26858198642730713, "learning_rate": 2.4337049971185194e-05, "loss": 0.05728241801261902, "step": 2483 }, { "epoch": 0.33582032801291095, "grad_norm": 0.26471465826034546, "learning_rate": 2.433169225082211e-05, "loss": 0.057354867458343506, "step": 2484 }, { "epoch": 0.3359555213816762, "grad_norm": 0.5075029730796814, "learning_rate": 2.432633258755093e-05, "loss": 0.06345418840646744, "step": 2485 }, { "epoch": 0.3360907147504415, "grad_norm": 0.6484138369560242, "learning_rate": 2.432097098248758e-05, "loss": 0.06858913600444794, "step": 2486 }, { "epoch": 0.3362259081192068, "grad_norm": 1.5889604091644287, "learning_rate": 2.4315607436748362e-05, "loss": 0.05519643425941467, "step": 2487 }, { "epoch": 0.336361101487972, "grad_norm": 0.627388060092926, "learning_rate": 2.4310241951449997e-05, "loss": 0.05793236941099167, "step": 2488 }, { "epoch": 0.33649629485673727, "grad_norm": 0.4585854411125183, "learning_rate": 2.4304874527709614e-05, "loss": 0.039617158472537994, "step": 2489 }, { "epoch": 0.33663148822550254, "grad_norm": 0.5693581700325012, "learning_rate": 2.429950516664473e-05, "loss": 0.05294934660196304, "step": 2490 }, { "epoch": 0.3367666815942678, "grad_norm": 1.1733219623565674, "learning_rate": 2.4294133869373284e-05, "loss": 0.07497140765190125, "step": 2491 }, { "epoch": 0.3369018749630331, "grad_norm": 0.5426054000854492, "learning_rate": 2.42887606370136e-05, "loss": 0.05442477762699127, "step": 2492 }, { "epoch": 0.3370370683317983, "grad_norm": 0.41151532530784607, "learning_rate": 2.428338547068442e-05, "loss": 0.047723107039928436, "step": 2493 }, { "epoch": 0.3371722617005636, "grad_norm": 0.699028491973877, "learning_rate": 2.427800837150488e-05, "loss": 0.054842229932546616, "step": 2494 }, { "epoch": 0.33730745506932885, "grad_norm": 0.600603461265564, "learning_rate": 2.4272629340594518e-05, "loss": 0.06443243473768234, "step": 2495 }, { "epoch": 0.33744264843809413, "grad_norm": 0.587218701839447, "learning_rate": 2.426724837907328e-05, "loss": 0.05162160098552704, "step": 2496 }, { "epoch": 0.33757784180685935, "grad_norm": 0.3580301105976105, "learning_rate": 2.4261865488061512e-05, "loss": 0.041045911610126495, "step": 2497 }, { "epoch": 0.3377130351756246, "grad_norm": 0.7507015466690063, "learning_rate": 2.4256480668679958e-05, "loss": 0.04999418556690216, "step": 2498 }, { "epoch": 0.3378482285443899, "grad_norm": 1.4667502641677856, "learning_rate": 2.4251093922049766e-05, "loss": 0.0518585704267025, "step": 2499 }, { "epoch": 0.33798342191315517, "grad_norm": 2.0040831565856934, "learning_rate": 2.4245705249292494e-05, "loss": 0.05939832329750061, "step": 2500 }, { "epoch": 0.33811861528192044, "grad_norm": 0.571308434009552, "learning_rate": 2.4240314651530073e-05, "loss": 0.049097444862127304, "step": 2501 }, { "epoch": 0.33825380865068566, "grad_norm": 0.3827081322669983, "learning_rate": 2.4234922129884873e-05, "loss": 0.04473471641540527, "step": 2502 }, { "epoch": 0.33838900201945094, "grad_norm": 0.7014095187187195, "learning_rate": 2.4229527685479644e-05, "loss": 0.08057114481925964, "step": 2503 }, { "epoch": 0.3385241953882162, "grad_norm": 1.4268348217010498, "learning_rate": 2.4224131319437523e-05, "loss": 0.05230224132537842, "step": 2504 }, { "epoch": 0.3386593887569815, "grad_norm": 1.3716235160827637, "learning_rate": 2.421873303288208e-05, "loss": 0.0688956081867218, "step": 2505 }, { "epoch": 0.33879458212574676, "grad_norm": 0.8329153060913086, "learning_rate": 2.4213332826937255e-05, "loss": 0.04252082109451294, "step": 2506 }, { "epoch": 0.338929775494512, "grad_norm": 0.7675012946128845, "learning_rate": 2.4207930702727404e-05, "loss": 0.06650793552398682, "step": 2507 }, { "epoch": 0.33906496886327725, "grad_norm": 0.7769269943237305, "learning_rate": 2.420252666137728e-05, "loss": 0.0697876513004303, "step": 2508 }, { "epoch": 0.3392001622320425, "grad_norm": 0.3359142541885376, "learning_rate": 2.419712070401203e-05, "loss": 0.047617167234420776, "step": 2509 }, { "epoch": 0.3393353556008078, "grad_norm": 0.5980057716369629, "learning_rate": 2.4191712831757203e-05, "loss": 0.05942881107330322, "step": 2510 }, { "epoch": 0.339470548969573, "grad_norm": 0.4190271496772766, "learning_rate": 2.418630304573875e-05, "loss": 0.0539388507604599, "step": 2511 }, { "epoch": 0.3396057423383383, "grad_norm": 0.43068206310272217, "learning_rate": 2.418089134708302e-05, "loss": 0.061523064970970154, "step": 2512 }, { "epoch": 0.33974093570710356, "grad_norm": 0.5115730166435242, "learning_rate": 2.4175477736916743e-05, "loss": 0.06761537492275238, "step": 2513 }, { "epoch": 0.33987612907586884, "grad_norm": 1.7118775844573975, "learning_rate": 2.4170062216367082e-05, "loss": 0.053287506103515625, "step": 2514 }, { "epoch": 0.3400113224446341, "grad_norm": 0.6027407050132751, "learning_rate": 2.416464478656156e-05, "loss": 0.06524059176445007, "step": 2515 }, { "epoch": 0.34014651581339933, "grad_norm": 1.1736122369766235, "learning_rate": 2.4159225448628123e-05, "loss": 0.06616820394992828, "step": 2516 }, { "epoch": 0.3402817091821646, "grad_norm": 1.180332064628601, "learning_rate": 2.4153804203695103e-05, "loss": 0.05459309369325638, "step": 2517 }, { "epoch": 0.3404169025509299, "grad_norm": 0.42003291845321655, "learning_rate": 2.4148381052891236e-05, "loss": 0.04738977551460266, "step": 2518 }, { "epoch": 0.34055209591969515, "grad_norm": 0.3957957923412323, "learning_rate": 2.4142955997345648e-05, "loss": 0.053268320858478546, "step": 2519 }, { "epoch": 0.3406872892884604, "grad_norm": 0.49949464201927185, "learning_rate": 2.4137529038187864e-05, "loss": 0.06918475776910782, "step": 2520 }, { "epoch": 0.34082248265722564, "grad_norm": 1.229874849319458, "learning_rate": 2.413210017654781e-05, "loss": 0.07177431881427765, "step": 2521 }, { "epoch": 0.3409576760259909, "grad_norm": 0.9811297655105591, "learning_rate": 2.4126669413555802e-05, "loss": 0.05535629391670227, "step": 2522 }, { "epoch": 0.3410928693947562, "grad_norm": 0.46806463599205017, "learning_rate": 2.4121236750342548e-05, "loss": 0.045290857553482056, "step": 2523 }, { "epoch": 0.34122806276352147, "grad_norm": 0.5662136673927307, "learning_rate": 2.4115802188039165e-05, "loss": 0.07392966747283936, "step": 2524 }, { "epoch": 0.3413632561322867, "grad_norm": 1.0050934553146362, "learning_rate": 2.4110365727777156e-05, "loss": 0.07055419683456421, "step": 2525 }, { "epoch": 0.34149844950105196, "grad_norm": 0.765142023563385, "learning_rate": 2.410492737068842e-05, "loss": 0.0515645295381546, "step": 2526 }, { "epoch": 0.34163364286981723, "grad_norm": 0.5425080060958862, "learning_rate": 2.409948711790525e-05, "loss": 0.057616740465164185, "step": 2527 }, { "epoch": 0.3417688362385825, "grad_norm": 0.6883987784385681, "learning_rate": 2.4094044970560336e-05, "loss": 0.066932812333107, "step": 2528 }, { "epoch": 0.3419040296073478, "grad_norm": 0.5013260245323181, "learning_rate": 2.4088600929786767e-05, "loss": 0.06805744767189026, "step": 2529 }, { "epoch": 0.342039222976113, "grad_norm": 0.5903222560882568, "learning_rate": 2.408315499671802e-05, "loss": 0.07115764915943146, "step": 2530 }, { "epoch": 0.3421744163448783, "grad_norm": 0.34074294567108154, "learning_rate": 2.407770717248796e-05, "loss": 0.06422656774520874, "step": 2531 }, { "epoch": 0.34230960971364355, "grad_norm": 0.6268665194511414, "learning_rate": 2.407225745823086e-05, "loss": 0.05897696316242218, "step": 2532 }, { "epoch": 0.3424448030824088, "grad_norm": 0.8784014582633972, "learning_rate": 2.4066805855081378e-05, "loss": 0.059277549386024475, "step": 2533 }, { "epoch": 0.3425799964511741, "grad_norm": 0.8250194787979126, "learning_rate": 2.406135236417457e-05, "loss": 0.04818663001060486, "step": 2534 }, { "epoch": 0.3427151898199393, "grad_norm": 0.39451804757118225, "learning_rate": 2.4055896986645875e-05, "loss": 0.06961485743522644, "step": 2535 }, { "epoch": 0.3428503831887046, "grad_norm": 0.3706168830394745, "learning_rate": 2.4050439723631136e-05, "loss": 0.039464518427848816, "step": 2536 }, { "epoch": 0.34298557655746986, "grad_norm": 0.27246546745300293, "learning_rate": 2.404498057626659e-05, "loss": 0.053841859102249146, "step": 2537 }, { "epoch": 0.34312076992623514, "grad_norm": 0.5579825043678284, "learning_rate": 2.4039519545688848e-05, "loss": 0.06240345537662506, "step": 2538 }, { "epoch": 0.34325596329500035, "grad_norm": 0.2667763829231262, "learning_rate": 2.4034056633034932e-05, "loss": 0.055660054087638855, "step": 2539 }, { "epoch": 0.34339115666376563, "grad_norm": 0.4128856360912323, "learning_rate": 2.402859183944225e-05, "loss": 0.045962586998939514, "step": 2540 }, { "epoch": 0.3435263500325309, "grad_norm": 0.7272021770477295, "learning_rate": 2.4023125166048597e-05, "loss": 0.06458261609077454, "step": 2541 }, { "epoch": 0.3436615434012962, "grad_norm": 0.6898627877235413, "learning_rate": 2.401765661399218e-05, "loss": 0.0651286393404007, "step": 2542 }, { "epoch": 0.34379673677006145, "grad_norm": 0.4911874830722809, "learning_rate": 2.4012186184411556e-05, "loss": 0.0685330182313919, "step": 2543 }, { "epoch": 0.34393193013882667, "grad_norm": 0.7272791862487793, "learning_rate": 2.400671387844571e-05, "loss": 0.050691794604063034, "step": 2544 }, { "epoch": 0.34406712350759194, "grad_norm": 1.5425516366958618, "learning_rate": 2.4001239697234008e-05, "loss": 0.06000924110412598, "step": 2545 }, { "epoch": 0.3442023168763572, "grad_norm": 0.47782620787620544, "learning_rate": 2.3995763641916205e-05, "loss": 0.05121324956417084, "step": 2546 }, { "epoch": 0.3443375102451225, "grad_norm": 1.0811127424240112, "learning_rate": 2.3990285713632436e-05, "loss": 0.054869458079338074, "step": 2547 }, { "epoch": 0.34447270361388777, "grad_norm": 0.7305948734283447, "learning_rate": 2.398480591352324e-05, "loss": 0.05475684627890587, "step": 2548 }, { "epoch": 0.344607896982653, "grad_norm": 0.2501955032348633, "learning_rate": 2.3979324242729537e-05, "loss": 0.05399274826049805, "step": 2549 }, { "epoch": 0.34474309035141826, "grad_norm": 0.5100407600402832, "learning_rate": 2.3973840702392646e-05, "loss": 0.04482141137123108, "step": 2550 }, { "epoch": 0.34487828372018353, "grad_norm": 0.7808458209037781, "learning_rate": 2.3968355293654267e-05, "loss": 0.060536690056324005, "step": 2551 }, { "epoch": 0.3450134770889488, "grad_norm": 0.45987311005592346, "learning_rate": 2.396286801765649e-05, "loss": 0.05622217059135437, "step": 2552 }, { "epoch": 0.345148670457714, "grad_norm": 0.44538676738739014, "learning_rate": 2.3957378875541795e-05, "loss": 0.048855721950531006, "step": 2553 }, { "epoch": 0.3452838638264793, "grad_norm": 0.47922489047050476, "learning_rate": 2.395188786845305e-05, "loss": 0.05611765757203102, "step": 2554 }, { "epoch": 0.34541905719524457, "grad_norm": 0.7451590895652771, "learning_rate": 2.3946394997533516e-05, "loss": 0.0712137222290039, "step": 2555 }, { "epoch": 0.34555425056400985, "grad_norm": 1.1291475296020508, "learning_rate": 2.3940900263926833e-05, "loss": 0.04754869267344475, "step": 2556 }, { "epoch": 0.3456894439327751, "grad_norm": 0.8264445662498474, "learning_rate": 2.393540366877704e-05, "loss": 0.05984792858362198, "step": 2557 }, { "epoch": 0.34582463730154034, "grad_norm": 0.7591851353645325, "learning_rate": 2.392990521322855e-05, "loss": 0.058432627469301224, "step": 2558 }, { "epoch": 0.3459598306703056, "grad_norm": 0.4125320613384247, "learning_rate": 2.392440489842618e-05, "loss": 0.03875409811735153, "step": 2559 }, { "epoch": 0.3460950240390709, "grad_norm": 0.3128974139690399, "learning_rate": 2.3918902725515118e-05, "loss": 0.038277775049209595, "step": 2560 }, { "epoch": 0.34623021740783616, "grad_norm": 0.5362597703933716, "learning_rate": 2.391339869564094e-05, "loss": 0.07293155789375305, "step": 2561 }, { "epoch": 0.34636541077660143, "grad_norm": 0.6536089777946472, "learning_rate": 2.3907892809949628e-05, "loss": 0.06180323660373688, "step": 2562 }, { "epoch": 0.34650060414536665, "grad_norm": 0.8963726162910461, "learning_rate": 2.390238506958753e-05, "loss": 0.0651576817035675, "step": 2563 }, { "epoch": 0.3466357975141319, "grad_norm": 0.300552099943161, "learning_rate": 2.3896875475701387e-05, "loss": 0.04789632558822632, "step": 2564 }, { "epoch": 0.3467709908828972, "grad_norm": 0.8877284526824951, "learning_rate": 2.3891364029438323e-05, "loss": 0.06549087911844254, "step": 2565 }, { "epoch": 0.3469061842516625, "grad_norm": 0.5803410410881042, "learning_rate": 2.3885850731945857e-05, "loss": 0.05586116015911102, "step": 2566 }, { "epoch": 0.3470413776204277, "grad_norm": 1.0514369010925293, "learning_rate": 2.3880335584371884e-05, "loss": 0.06165571138262749, "step": 2567 }, { "epoch": 0.34717657098919297, "grad_norm": 0.5227709412574768, "learning_rate": 2.387481858786468e-05, "loss": 0.04741154611110687, "step": 2568 }, { "epoch": 0.34731176435795824, "grad_norm": 1.0869237184524536, "learning_rate": 2.386929974357293e-05, "loss": 0.0511382594704628, "step": 2569 }, { "epoch": 0.3474469577267235, "grad_norm": 0.79176265001297, "learning_rate": 2.386377905264567e-05, "loss": 0.06429392099380493, "step": 2570 }, { "epoch": 0.3475821510954888, "grad_norm": 0.6106847524642944, "learning_rate": 2.3858256516232346e-05, "loss": 0.06466436386108398, "step": 2571 }, { "epoch": 0.347717344464254, "grad_norm": 0.4373892545700073, "learning_rate": 2.3852732135482775e-05, "loss": 0.04785441234707832, "step": 2572 }, { "epoch": 0.3478525378330193, "grad_norm": 0.7229081988334656, "learning_rate": 2.3847205911547166e-05, "loss": 0.05422341823577881, "step": 2573 }, { "epoch": 0.34798773120178456, "grad_norm": 1.1516376733779907, "learning_rate": 2.3841677845576108e-05, "loss": 0.062233179807662964, "step": 2574 }, { "epoch": 0.34812292457054983, "grad_norm": 0.7101799249649048, "learning_rate": 2.383614793872057e-05, "loss": 0.061386242508888245, "step": 2575 }, { "epoch": 0.3482581179393151, "grad_norm": 0.2678720951080322, "learning_rate": 2.3830616192131913e-05, "loss": 0.03412139043211937, "step": 2576 }, { "epoch": 0.3483933113080803, "grad_norm": 0.4181729555130005, "learning_rate": 2.3825082606961876e-05, "loss": 0.05492118000984192, "step": 2577 }, { "epoch": 0.3485285046768456, "grad_norm": 0.582635760307312, "learning_rate": 2.3819547184362575e-05, "loss": 0.060674965381622314, "step": 2578 }, { "epoch": 0.34866369804561087, "grad_norm": 0.420721173286438, "learning_rate": 2.3814009925486522e-05, "loss": 0.05094034969806671, "step": 2579 }, { "epoch": 0.34879889141437614, "grad_norm": 0.9418121576309204, "learning_rate": 2.38084708314866e-05, "loss": 0.04535314440727234, "step": 2580 }, { "epoch": 0.34893408478314136, "grad_norm": 0.3692968487739563, "learning_rate": 2.380292990351608e-05, "loss": 0.04885189235210419, "step": 2581 }, { "epoch": 0.34906927815190664, "grad_norm": 0.7663249373435974, "learning_rate": 2.3797387142728607e-05, "loss": 0.046023085713386536, "step": 2582 }, { "epoch": 0.3492044715206719, "grad_norm": 0.5300248265266418, "learning_rate": 2.379184255027822e-05, "loss": 0.054738618433475494, "step": 2583 }, { "epoch": 0.3493396648894372, "grad_norm": 0.5208472013473511, "learning_rate": 2.378629612731933e-05, "loss": 0.06298881024122238, "step": 2584 }, { "epoch": 0.34947485825820246, "grad_norm": 0.559243381023407, "learning_rate": 2.3780747875006735e-05, "loss": 0.04366272687911987, "step": 2585 }, { "epoch": 0.3496100516269677, "grad_norm": 0.29652270674705505, "learning_rate": 2.37751977944956e-05, "loss": 0.05863502621650696, "step": 2586 }, { "epoch": 0.34974524499573295, "grad_norm": 0.5760659575462341, "learning_rate": 2.3769645886941497e-05, "loss": 0.0788021981716156, "step": 2587 }, { "epoch": 0.3498804383644982, "grad_norm": 0.27831214666366577, "learning_rate": 2.376409215350035e-05, "loss": 0.043708428740501404, "step": 2588 }, { "epoch": 0.3500156317332635, "grad_norm": 0.7583713531494141, "learning_rate": 2.3758536595328486e-05, "loss": 0.06890417635440826, "step": 2589 }, { "epoch": 0.3501508251020288, "grad_norm": 0.3065214157104492, "learning_rate": 2.375297921358259e-05, "loss": 0.05722136050462723, "step": 2590 }, { "epoch": 0.350286018470794, "grad_norm": 0.6269283890724182, "learning_rate": 2.3747420009419745e-05, "loss": 0.05212374031543732, "step": 2591 }, { "epoch": 0.35042121183955927, "grad_norm": 0.3478796184062958, "learning_rate": 2.3741858983997415e-05, "loss": 0.0655936747789383, "step": 2592 }, { "epoch": 0.35055640520832454, "grad_norm": 0.2571185231208801, "learning_rate": 2.373629613847342e-05, "loss": 0.046925801783800125, "step": 2593 }, { "epoch": 0.3506915985770898, "grad_norm": 0.4292133152484894, "learning_rate": 2.3730731474005988e-05, "loss": 0.03944733738899231, "step": 2594 }, { "epoch": 0.35082679194585503, "grad_norm": 0.4198874831199646, "learning_rate": 2.37251649917537e-05, "loss": 0.04771158844232559, "step": 2595 }, { "epoch": 0.3509619853146203, "grad_norm": 0.4447716176509857, "learning_rate": 2.3719596692875534e-05, "loss": 0.057852745056152344, "step": 2596 }, { "epoch": 0.3510971786833856, "grad_norm": 0.7505528330802917, "learning_rate": 2.3714026578530836e-05, "loss": 0.06255149841308594, "step": 2597 }, { "epoch": 0.35123237205215085, "grad_norm": 0.31214281916618347, "learning_rate": 2.370845464987934e-05, "loss": 0.04574630409479141, "step": 2598 }, { "epoch": 0.35136756542091613, "grad_norm": 0.3981100618839264, "learning_rate": 2.370288090808114e-05, "loss": 0.0443689338862896, "step": 2599 }, { "epoch": 0.35150275878968135, "grad_norm": 0.48687514662742615, "learning_rate": 2.369730535429673e-05, "loss": 0.05019274353981018, "step": 2600 }, { "epoch": 0.3516379521584466, "grad_norm": 0.38240477442741394, "learning_rate": 2.369172798968697e-05, "loss": 0.06334640830755234, "step": 2601 }, { "epoch": 0.3517731455272119, "grad_norm": 0.3267514109611511, "learning_rate": 2.3686148815413083e-05, "loss": 0.050984565168619156, "step": 2602 }, { "epoch": 0.35190833889597717, "grad_norm": 0.557716429233551, "learning_rate": 2.3680567832636695e-05, "loss": 0.05017661675810814, "step": 2603 }, { "epoch": 0.3520435322647424, "grad_norm": 0.29590439796447754, "learning_rate": 2.3674985042519795e-05, "loss": 0.06020501255989075, "step": 2604 }, { "epoch": 0.35217872563350766, "grad_norm": 0.2776332199573517, "learning_rate": 2.366940044622475e-05, "loss": 0.04737381637096405, "step": 2605 }, { "epoch": 0.35231391900227294, "grad_norm": 1.3460701704025269, "learning_rate": 2.3663814044914302e-05, "loss": 0.050212420523166656, "step": 2606 }, { "epoch": 0.3524491123710382, "grad_norm": 0.32826700806617737, "learning_rate": 2.3658225839751566e-05, "loss": 0.05376420542597771, "step": 2607 }, { "epoch": 0.3525843057398035, "grad_norm": 0.604899525642395, "learning_rate": 2.3652635831900043e-05, "loss": 0.07410729676485062, "step": 2608 }, { "epoch": 0.3527194991085687, "grad_norm": 1.2091988325119019, "learning_rate": 2.3647044022523595e-05, "loss": 0.0536273717880249, "step": 2609 }, { "epoch": 0.352854692477334, "grad_norm": 0.8003138303756714, "learning_rate": 2.364145041278647e-05, "loss": 0.0515962690114975, "step": 2610 }, { "epoch": 0.35298988584609925, "grad_norm": 0.2733721137046814, "learning_rate": 2.3635855003853287e-05, "loss": 0.03086373209953308, "step": 2611 }, { "epoch": 0.3531250792148645, "grad_norm": 1.9662023782730103, "learning_rate": 2.363025779688904e-05, "loss": 0.07728084921836853, "step": 2612 }, { "epoch": 0.3532602725836298, "grad_norm": 0.4124818444252014, "learning_rate": 2.3624658793059103e-05, "loss": 0.07044723629951477, "step": 2613 }, { "epoch": 0.353395465952395, "grad_norm": 1.3796569108963013, "learning_rate": 2.3619057993529204e-05, "loss": 0.07363824546337128, "step": 2614 }, { "epoch": 0.3535306593211603, "grad_norm": 1.6578117609024048, "learning_rate": 2.3613455399465475e-05, "loss": 0.048297226428985596, "step": 2615 }, { "epoch": 0.35366585268992556, "grad_norm": 0.39314690232276917, "learning_rate": 2.3607851012034394e-05, "loss": 0.057796917855739594, "step": 2616 }, { "epoch": 0.35380104605869084, "grad_norm": 0.8022534251213074, "learning_rate": 2.3602244832402838e-05, "loss": 0.043085239827632904, "step": 2617 }, { "epoch": 0.35393623942745606, "grad_norm": 2.165907382965088, "learning_rate": 2.3596636861738024e-05, "loss": 0.05915340781211853, "step": 2618 }, { "epoch": 0.35407143279622133, "grad_norm": 0.5089679956436157, "learning_rate": 2.3591027101207578e-05, "loss": 0.05461547523736954, "step": 2619 }, { "epoch": 0.3542066261649866, "grad_norm": 0.3693690001964569, "learning_rate": 2.3585415551979476e-05, "loss": 0.041728127747774124, "step": 2620 }, { "epoch": 0.3543418195337519, "grad_norm": 0.3994734287261963, "learning_rate": 2.3579802215222076e-05, "loss": 0.06538271903991699, "step": 2621 }, { "epoch": 0.35447701290251715, "grad_norm": 0.8325362801551819, "learning_rate": 2.35741870921041e-05, "loss": 0.08045431226491928, "step": 2622 }, { "epoch": 0.35461220627128237, "grad_norm": 0.28094246983528137, "learning_rate": 2.3568570183794645e-05, "loss": 0.05441633239388466, "step": 2623 }, { "epoch": 0.35474739964004764, "grad_norm": 1.7707818746566772, "learning_rate": 2.356295149146319e-05, "loss": 0.048590898513793945, "step": 2624 }, { "epoch": 0.3548825930088129, "grad_norm": 1.2356181144714355, "learning_rate": 2.3557331016279567e-05, "loss": 0.05864347517490387, "step": 2625 }, { "epoch": 0.3550177863775782, "grad_norm": 0.47317442297935486, "learning_rate": 2.3551708759413998e-05, "loss": 0.05378357693552971, "step": 2626 }, { "epoch": 0.35515297974634347, "grad_norm": 0.9529665112495422, "learning_rate": 2.354608472203706e-05, "loss": 0.0599437952041626, "step": 2627 }, { "epoch": 0.3552881731151087, "grad_norm": 0.6597905158996582, "learning_rate": 2.3540458905319705e-05, "loss": 0.05852166563272476, "step": 2628 }, { "epoch": 0.35542336648387396, "grad_norm": 0.6731898784637451, "learning_rate": 2.3534831310433264e-05, "loss": 0.061750784516334534, "step": 2629 }, { "epoch": 0.35555855985263923, "grad_norm": 0.6791136264801025, "learning_rate": 2.3529201938549434e-05, "loss": 0.06681104004383087, "step": 2630 }, { "epoch": 0.3556937532214045, "grad_norm": 0.5720198750495911, "learning_rate": 2.3523570790840274e-05, "loss": 0.06063258647918701, "step": 2631 }, { "epoch": 0.3558289465901697, "grad_norm": 0.73337322473526, "learning_rate": 2.3517937868478228e-05, "loss": 0.04149096831679344, "step": 2632 }, { "epoch": 0.355964139958935, "grad_norm": 0.27330920100212097, "learning_rate": 2.3512303172636092e-05, "loss": 0.04757782071828842, "step": 2633 }, { "epoch": 0.3560993333277003, "grad_norm": 0.28042715787887573, "learning_rate": 2.3506666704487033e-05, "loss": 0.060640573501586914, "step": 2634 }, { "epoch": 0.35623452669646555, "grad_norm": 0.6427616477012634, "learning_rate": 2.3501028465204614e-05, "loss": 0.06296513974666595, "step": 2635 }, { "epoch": 0.3563697200652308, "grad_norm": 0.47043800354003906, "learning_rate": 2.3495388455962734e-05, "loss": 0.06619837880134583, "step": 2636 }, { "epoch": 0.35650491343399604, "grad_norm": 1.122891902923584, "learning_rate": 2.3489746677935673e-05, "loss": 0.07286417484283447, "step": 2637 }, { "epoch": 0.3566401068027613, "grad_norm": 0.5454763174057007, "learning_rate": 2.3484103132298082e-05, "loss": 0.06258266419172287, "step": 2638 }, { "epoch": 0.3567753001715266, "grad_norm": 1.1158266067504883, "learning_rate": 2.347845782022497e-05, "loss": 0.05772816389799118, "step": 2639 }, { "epoch": 0.35691049354029186, "grad_norm": 0.47275757789611816, "learning_rate": 2.3472810742891734e-05, "loss": 0.05300324410200119, "step": 2640 }, { "epoch": 0.35704568690905714, "grad_norm": 2.019820213317871, "learning_rate": 2.3467161901474118e-05, "loss": 0.06003296375274658, "step": 2641 }, { "epoch": 0.35718088027782235, "grad_norm": 1.201438307762146, "learning_rate": 2.346151129714824e-05, "loss": 0.0818963423371315, "step": 2642 }, { "epoch": 0.35731607364658763, "grad_norm": 0.5656431317329407, "learning_rate": 2.3455858931090588e-05, "loss": 0.05859818309545517, "step": 2643 }, { "epoch": 0.3574512670153529, "grad_norm": 0.9253957867622375, "learning_rate": 2.3450204804478014e-05, "loss": 0.0758921205997467, "step": 2644 }, { "epoch": 0.3575864603841182, "grad_norm": 0.4945479929447174, "learning_rate": 2.344454891848774e-05, "loss": 0.06418024003505707, "step": 2645 }, { "epoch": 0.3577216537528834, "grad_norm": 0.21417228877544403, "learning_rate": 2.3438891274297348e-05, "loss": 0.045081257820129395, "step": 2646 }, { "epoch": 0.35785684712164867, "grad_norm": 1.4353468418121338, "learning_rate": 2.343323187308479e-05, "loss": 0.0547470897436142, "step": 2647 }, { "epoch": 0.35799204049041394, "grad_norm": 0.4400724470615387, "learning_rate": 2.342757071602839e-05, "loss": 0.05882125720381737, "step": 2648 }, { "epoch": 0.3581272338591792, "grad_norm": 1.221472144126892, "learning_rate": 2.3421907804306816e-05, "loss": 0.058724045753479004, "step": 2649 }, { "epoch": 0.3582624272279445, "grad_norm": 0.5090343952178955, "learning_rate": 2.341624313909913e-05, "loss": 0.04208928346633911, "step": 2650 }, { "epoch": 0.3583976205967097, "grad_norm": 0.3714367151260376, "learning_rate": 2.3410576721584742e-05, "loss": 0.04616578668355942, "step": 2651 }, { "epoch": 0.358532813965475, "grad_norm": 0.8840522766113281, "learning_rate": 2.3404908552943435e-05, "loss": 0.04824555665254593, "step": 2652 }, { "epoch": 0.35866800733424026, "grad_norm": 0.5322878956794739, "learning_rate": 2.339923863435534e-05, "loss": 0.048618778586387634, "step": 2653 }, { "epoch": 0.35880320070300553, "grad_norm": 0.8479301929473877, "learning_rate": 2.3393566967000974e-05, "loss": 0.06648284196853638, "step": 2654 }, { "epoch": 0.3589383940717708, "grad_norm": 1.0377086400985718, "learning_rate": 2.3387893552061202e-05, "loss": 0.06648188829421997, "step": 2655 }, { "epoch": 0.359073587440536, "grad_norm": 0.5264196395874023, "learning_rate": 2.3382218390717268e-05, "loss": 0.06007471680641174, "step": 2656 }, { "epoch": 0.3592087808093013, "grad_norm": 1.1664226055145264, "learning_rate": 2.3376541484150762e-05, "loss": 0.05334685742855072, "step": 2657 }, { "epoch": 0.35934397417806657, "grad_norm": 0.4176519513130188, "learning_rate": 2.3370862833543652e-05, "loss": 0.05965322256088257, "step": 2658 }, { "epoch": 0.35947916754683185, "grad_norm": 0.46354198455810547, "learning_rate": 2.336518244007826e-05, "loss": 0.05492527782917023, "step": 2659 }, { "epoch": 0.35961436091559706, "grad_norm": 0.6516409516334534, "learning_rate": 2.3359500304937274e-05, "loss": 0.05586463212966919, "step": 2660 }, { "epoch": 0.35974955428436234, "grad_norm": 0.6066004633903503, "learning_rate": 2.335381642930375e-05, "loss": 0.07232370972633362, "step": 2661 }, { "epoch": 0.3598847476531276, "grad_norm": 0.9156617522239685, "learning_rate": 2.3348130814361094e-05, "loss": 0.03705447167158127, "step": 2662 }, { "epoch": 0.3600199410218929, "grad_norm": 0.6057007908821106, "learning_rate": 2.334244346129309e-05, "loss": 0.04913758113980293, "step": 2663 }, { "epoch": 0.36015513439065816, "grad_norm": 0.31930455565452576, "learning_rate": 2.3336754371283862e-05, "loss": 0.0419168621301651, "step": 2664 }, { "epoch": 0.3602903277594234, "grad_norm": 0.30003830790519714, "learning_rate": 2.333106354551792e-05, "loss": 0.05088382586836815, "step": 2665 }, { "epoch": 0.36042552112818865, "grad_norm": 0.9144757390022278, "learning_rate": 2.332537098518012e-05, "loss": 0.07023753225803375, "step": 2666 }, { "epoch": 0.3605607144969539, "grad_norm": 0.21278853714466095, "learning_rate": 2.3319676691455686e-05, "loss": 0.03908340632915497, "step": 2667 }, { "epoch": 0.3606959078657192, "grad_norm": 0.425176203250885, "learning_rate": 2.3313980665530205e-05, "loss": 0.052151650190353394, "step": 2668 }, { "epoch": 0.3608311012344845, "grad_norm": 0.7476831078529358, "learning_rate": 2.3308282908589606e-05, "loss": 0.04653248190879822, "step": 2669 }, { "epoch": 0.3609662946032497, "grad_norm": 1.47437584400177, "learning_rate": 2.330258342182021e-05, "loss": 0.05309110879898071, "step": 2670 }, { "epoch": 0.36110148797201497, "grad_norm": 1.4785507917404175, "learning_rate": 2.329688220640866e-05, "loss": 0.05830750614404678, "step": 2671 }, { "epoch": 0.36123668134078024, "grad_norm": 1.1409943103790283, "learning_rate": 2.329117926354199e-05, "loss": 0.0646219328045845, "step": 2672 }, { "epoch": 0.3613718747095455, "grad_norm": 0.2731613516807556, "learning_rate": 2.3285474594407588e-05, "loss": 0.03335670754313469, "step": 2673 }, { "epoch": 0.36150706807831073, "grad_norm": 1.2858467102050781, "learning_rate": 2.327976820019319e-05, "loss": 0.06651624292135239, "step": 2674 }, { "epoch": 0.361642261447076, "grad_norm": 0.5883856415748596, "learning_rate": 2.32740600820869e-05, "loss": 0.0514206625521183, "step": 2675 }, { "epoch": 0.3617774548158413, "grad_norm": 1.06332266330719, "learning_rate": 2.326835024127718e-05, "loss": 0.07948241382837296, "step": 2676 }, { "epoch": 0.36191264818460656, "grad_norm": 0.47783055901527405, "learning_rate": 2.326263867895285e-05, "loss": 0.07001806795597076, "step": 2677 }, { "epoch": 0.36204784155337183, "grad_norm": 0.7671249508857727, "learning_rate": 2.3256925396303076e-05, "loss": 0.04274945706129074, "step": 2678 }, { "epoch": 0.36218303492213705, "grad_norm": 0.8156877160072327, "learning_rate": 2.3251210394517412e-05, "loss": 0.05127374827861786, "step": 2679 }, { "epoch": 0.3623182282909023, "grad_norm": 0.48961812257766724, "learning_rate": 2.3245493674785742e-05, "loss": 0.06561869382858276, "step": 2680 }, { "epoch": 0.3624534216596676, "grad_norm": 0.4243928790092468, "learning_rate": 2.3239775238298316e-05, "loss": 0.06535175442695618, "step": 2681 }, { "epoch": 0.36258861502843287, "grad_norm": 1.0964494943618774, "learning_rate": 2.3234055086245744e-05, "loss": 0.06075618043541908, "step": 2682 }, { "epoch": 0.36272380839719814, "grad_norm": 0.7016053795814514, "learning_rate": 2.3228333219818998e-05, "loss": 0.051271483302116394, "step": 2683 }, { "epoch": 0.36285900176596336, "grad_norm": 0.358658105134964, "learning_rate": 2.3222609640209397e-05, "loss": 0.05346973240375519, "step": 2684 }, { "epoch": 0.36299419513472864, "grad_norm": 0.361572265625, "learning_rate": 2.3216884348608614e-05, "loss": 0.06042298674583435, "step": 2685 }, { "epoch": 0.3631293885034939, "grad_norm": 0.8352283835411072, "learning_rate": 2.32111573462087e-05, "loss": 0.06254858523607254, "step": 2686 }, { "epoch": 0.3632645818722592, "grad_norm": 0.563700258731842, "learning_rate": 2.3205428634202028e-05, "loss": 0.043987635523080826, "step": 2687 }, { "epoch": 0.3633997752410244, "grad_norm": 0.49695372581481934, "learning_rate": 2.3199698213781367e-05, "loss": 0.053213298320770264, "step": 2688 }, { "epoch": 0.3635349686097897, "grad_norm": 0.6971237659454346, "learning_rate": 2.319396608613981e-05, "loss": 0.06444203853607178, "step": 2689 }, { "epoch": 0.36367016197855495, "grad_norm": 0.3077828288078308, "learning_rate": 2.318823225247082e-05, "loss": 0.0511898472905159, "step": 2690 }, { "epoch": 0.3638053553473202, "grad_norm": 0.2585951089859009, "learning_rate": 2.3182496713968208e-05, "loss": 0.050716765224933624, "step": 2691 }, { "epoch": 0.3639405487160855, "grad_norm": 0.4897538721561432, "learning_rate": 2.3176759471826143e-05, "loss": 0.0444374606013298, "step": 2692 }, { "epoch": 0.3640757420848507, "grad_norm": 0.469706654548645, "learning_rate": 2.3171020527239155e-05, "loss": 0.04763749986886978, "step": 2693 }, { "epoch": 0.364210935453616, "grad_norm": 0.27218949794769287, "learning_rate": 2.316527988140212e-05, "loss": 0.031900662928819656, "step": 2694 }, { "epoch": 0.36434612882238127, "grad_norm": 0.5049083232879639, "learning_rate": 2.315953753551027e-05, "loss": 0.04299360513687134, "step": 2695 }, { "epoch": 0.36448132219114654, "grad_norm": 0.3708631992340088, "learning_rate": 2.3153793490759197e-05, "loss": 0.048834264278411865, "step": 2696 }, { "epoch": 0.3646165155599118, "grad_norm": 1.4542580842971802, "learning_rate": 2.3148047748344835e-05, "loss": 0.05752076208591461, "step": 2697 }, { "epoch": 0.36475170892867703, "grad_norm": 0.33774030208587646, "learning_rate": 2.314230030946348e-05, "loss": 0.04336026310920715, "step": 2698 }, { "epoch": 0.3648869022974423, "grad_norm": 1.5721356868743896, "learning_rate": 2.3136551175311782e-05, "loss": 0.06467732042074203, "step": 2699 }, { "epoch": 0.3650220956662076, "grad_norm": 0.2294367402791977, "learning_rate": 2.313080034708674e-05, "loss": 0.04604820907115936, "step": 2700 }, { "epoch": 0.36515728903497285, "grad_norm": 0.3811178505420685, "learning_rate": 2.312504782598571e-05, "loss": 0.05043311044573784, "step": 2701 }, { "epoch": 0.3652924824037381, "grad_norm": 0.3909724950790405, "learning_rate": 2.311929361320639e-05, "loss": 0.061378300189971924, "step": 2702 }, { "epoch": 0.36542767577250335, "grad_norm": 0.7049826979637146, "learning_rate": 2.311353770994684e-05, "loss": 0.04515687748789787, "step": 2703 }, { "epoch": 0.3655628691412686, "grad_norm": 0.2656460106372833, "learning_rate": 2.310778011740548e-05, "loss": 0.048185400664806366, "step": 2704 }, { "epoch": 0.3656980625100339, "grad_norm": 1.106172800064087, "learning_rate": 2.310202083678106e-05, "loss": 0.06623083353042603, "step": 2705 }, { "epoch": 0.36583325587879917, "grad_norm": 1.0797213315963745, "learning_rate": 2.3096259869272694e-05, "loss": 0.05998314544558525, "step": 2706 }, { "epoch": 0.3659684492475644, "grad_norm": 1.055147647857666, "learning_rate": 2.309049721607985e-05, "loss": 0.07395990192890167, "step": 2707 }, { "epoch": 0.36610364261632966, "grad_norm": 0.5194681286811829, "learning_rate": 2.3084732878402342e-05, "loss": 0.05055910348892212, "step": 2708 }, { "epoch": 0.36623883598509493, "grad_norm": 0.5933238863945007, "learning_rate": 2.307896685744034e-05, "loss": 0.06610037386417389, "step": 2709 }, { "epoch": 0.3663740293538602, "grad_norm": 0.25912216305732727, "learning_rate": 2.3073199154394352e-05, "loss": 0.037207845598459244, "step": 2710 }, { "epoch": 0.3665092227226255, "grad_norm": 0.8808997273445129, "learning_rate": 2.3067429770465246e-05, "loss": 0.065489262342453, "step": 2711 }, { "epoch": 0.3666444160913907, "grad_norm": 0.8706968426704407, "learning_rate": 2.3061658706854244e-05, "loss": 0.06867433339357376, "step": 2712 }, { "epoch": 0.366779609460156, "grad_norm": 0.5530458688735962, "learning_rate": 2.3055885964762907e-05, "loss": 0.059696100652217865, "step": 2713 }, { "epoch": 0.36691480282892125, "grad_norm": 0.8945398330688477, "learning_rate": 2.3050111545393156e-05, "loss": 0.05344860255718231, "step": 2714 }, { "epoch": 0.3670499961976865, "grad_norm": 1.4480206966400146, "learning_rate": 2.304433544994725e-05, "loss": 0.057323891669511795, "step": 2715 }, { "epoch": 0.36718518956645174, "grad_norm": 0.6804830431938171, "learning_rate": 2.303855767962781e-05, "loss": 0.05345511436462402, "step": 2716 }, { "epoch": 0.367320382935217, "grad_norm": 0.5865732431411743, "learning_rate": 2.303277823563779e-05, "loss": 0.060651302337646484, "step": 2717 }, { "epoch": 0.3674555763039823, "grad_norm": 0.46498337388038635, "learning_rate": 2.3026997119180507e-05, "loss": 0.04442206025123596, "step": 2718 }, { "epoch": 0.36759076967274756, "grad_norm": 0.7256549596786499, "learning_rate": 2.3021214331459616e-05, "loss": 0.04756008833646774, "step": 2719 }, { "epoch": 0.36772596304151284, "grad_norm": 1.356463074684143, "learning_rate": 2.301542987367913e-05, "loss": 0.0645064115524292, "step": 2720 }, { "epoch": 0.36786115641027806, "grad_norm": 0.3430469036102295, "learning_rate": 2.3009643747043403e-05, "loss": 0.03879815340042114, "step": 2721 }, { "epoch": 0.36799634977904333, "grad_norm": 0.5895841717720032, "learning_rate": 2.3003855952757132e-05, "loss": 0.0735158622264862, "step": 2722 }, { "epoch": 0.3681315431478086, "grad_norm": 1.2479759454727173, "learning_rate": 2.2998066492025372e-05, "loss": 0.05335110425949097, "step": 2723 }, { "epoch": 0.3682667365165739, "grad_norm": 0.5266788601875305, "learning_rate": 2.2992275366053513e-05, "loss": 0.06457316875457764, "step": 2724 }, { "epoch": 0.3684019298853391, "grad_norm": 0.6879621148109436, "learning_rate": 2.2986482576047305e-05, "loss": 0.07312683761119843, "step": 2725 }, { "epoch": 0.36853712325410437, "grad_norm": 0.4298947751522064, "learning_rate": 2.298068812321284e-05, "loss": 0.03696560487151146, "step": 2726 }, { "epoch": 0.36867231662286964, "grad_norm": 0.4071572422981262, "learning_rate": 2.297489200875654e-05, "loss": 0.036071836948394775, "step": 2727 }, { "epoch": 0.3688075099916349, "grad_norm": 0.8441872000694275, "learning_rate": 2.2969094233885204e-05, "loss": 0.048501357436180115, "step": 2728 }, { "epoch": 0.3689427033604002, "grad_norm": 0.9283362627029419, "learning_rate": 2.296329479980595e-05, "loss": 0.06408321857452393, "step": 2729 }, { "epoch": 0.3690778967291654, "grad_norm": 0.7360424995422363, "learning_rate": 2.2957493707726252e-05, "loss": 0.049999088048934937, "step": 2730 }, { "epoch": 0.3692130900979307, "grad_norm": 0.39487510919570923, "learning_rate": 2.2951690958853932e-05, "loss": 0.05361481010913849, "step": 2731 }, { "epoch": 0.36934828346669596, "grad_norm": 0.5209821462631226, "learning_rate": 2.2945886554397154e-05, "loss": 0.05601753294467926, "step": 2732 }, { "epoch": 0.36948347683546123, "grad_norm": 0.42719781398773193, "learning_rate": 2.294008049556441e-05, "loss": 0.056699272245168686, "step": 2733 }, { "epoch": 0.3696186702042265, "grad_norm": 0.38892847299575806, "learning_rate": 2.2934272783564577e-05, "loss": 0.06433428823947906, "step": 2734 }, { "epoch": 0.3697538635729917, "grad_norm": 0.29517319798469543, "learning_rate": 2.2928463419606835e-05, "loss": 0.03910673037171364, "step": 2735 }, { "epoch": 0.369889056941757, "grad_norm": 1.056670904159546, "learning_rate": 2.292265240490073e-05, "loss": 0.05807200074195862, "step": 2736 }, { "epoch": 0.3700242503105223, "grad_norm": 0.22617408633232117, "learning_rate": 2.2916839740656154e-05, "loss": 0.03555326163768768, "step": 2737 }, { "epoch": 0.37015944367928755, "grad_norm": 1.2022522687911987, "learning_rate": 2.2911025428083316e-05, "loss": 0.0521751344203949, "step": 2738 }, { "epoch": 0.37029463704805277, "grad_norm": 0.2654719948768616, "learning_rate": 2.2905209468392798e-05, "loss": 0.051092736423015594, "step": 2739 }, { "epoch": 0.37042983041681804, "grad_norm": 0.5073826909065247, "learning_rate": 2.2899391862795514e-05, "loss": 0.04168633744120598, "step": 2740 }, { "epoch": 0.3705650237855833, "grad_norm": 0.35489004850387573, "learning_rate": 2.2893572612502718e-05, "loss": 0.06316696852445602, "step": 2741 }, { "epoch": 0.3707002171543486, "grad_norm": 0.6103214621543884, "learning_rate": 2.2887751718726013e-05, "loss": 0.04942135512828827, "step": 2742 }, { "epoch": 0.37083541052311386, "grad_norm": 0.3600391447544098, "learning_rate": 2.288192918267734e-05, "loss": 0.05429067462682724, "step": 2743 }, { "epoch": 0.3709706038918791, "grad_norm": 0.295093297958374, "learning_rate": 2.2876105005568974e-05, "loss": 0.047676101326942444, "step": 2744 }, { "epoch": 0.37110579726064435, "grad_norm": 1.0416713953018188, "learning_rate": 2.287027918861355e-05, "loss": 0.05760412663221359, "step": 2745 }, { "epoch": 0.37124099062940963, "grad_norm": 0.42710763216018677, "learning_rate": 2.2864451733024024e-05, "loss": 0.06148383766412735, "step": 2746 }, { "epoch": 0.3713761839981749, "grad_norm": 0.5819581747055054, "learning_rate": 2.2858622640013716e-05, "loss": 0.06356078386306763, "step": 2747 }, { "epoch": 0.3715113773669402, "grad_norm": 1.125916838645935, "learning_rate": 2.285279191079626e-05, "loss": 0.05261853337287903, "step": 2748 }, { "epoch": 0.3716465707357054, "grad_norm": 1.5052939653396606, "learning_rate": 2.2846959546585656e-05, "loss": 0.04465719312429428, "step": 2749 }, { "epoch": 0.37178176410447067, "grad_norm": 1.099485993385315, "learning_rate": 2.2841125548596225e-05, "loss": 0.06966456770896912, "step": 2750 }, { "epoch": 0.37191695747323594, "grad_norm": 0.36587652564048767, "learning_rate": 2.2835289918042648e-05, "loss": 0.06398959457874298, "step": 2751 }, { "epoch": 0.3720521508420012, "grad_norm": 0.3534739315509796, "learning_rate": 2.282945265613992e-05, "loss": 0.05443921685218811, "step": 2752 }, { "epoch": 0.37218734421076644, "grad_norm": 0.21372392773628235, "learning_rate": 2.2823613764103406e-05, "loss": 0.044846728444099426, "step": 2753 }, { "epoch": 0.3723225375795317, "grad_norm": 0.2837085723876953, "learning_rate": 2.2817773243148776e-05, "loss": 0.03718673065304756, "step": 2754 }, { "epoch": 0.372457730948297, "grad_norm": 0.5193917155265808, "learning_rate": 2.2811931094492074e-05, "loss": 0.07798799872398376, "step": 2755 }, { "epoch": 0.37259292431706226, "grad_norm": 0.48120924830436707, "learning_rate": 2.280608731934966e-05, "loss": 0.054799821227788925, "step": 2756 }, { "epoch": 0.37272811768582753, "grad_norm": 0.4023662507534027, "learning_rate": 2.280024191893823e-05, "loss": 0.05645638704299927, "step": 2757 }, { "epoch": 0.37286331105459275, "grad_norm": 0.6345804333686829, "learning_rate": 2.279439489447485e-05, "loss": 0.06636759638786316, "step": 2758 }, { "epoch": 0.372998504423358, "grad_norm": 0.7730368971824646, "learning_rate": 2.278854624717688e-05, "loss": 0.04965287446975708, "step": 2759 }, { "epoch": 0.3731336977921233, "grad_norm": 0.39420077204704285, "learning_rate": 2.2782695978262045e-05, "loss": 0.06774768233299255, "step": 2760 }, { "epoch": 0.37326889116088857, "grad_norm": 0.9592914581298828, "learning_rate": 2.2776844088948406e-05, "loss": 0.06119055300951004, "step": 2761 }, { "epoch": 0.37340408452965385, "grad_norm": 1.4865642786026, "learning_rate": 2.2770990580454364e-05, "loss": 0.06855255365371704, "step": 2762 }, { "epoch": 0.37353927789841906, "grad_norm": 1.741552472114563, "learning_rate": 2.276513545399864e-05, "loss": 0.059029772877693176, "step": 2763 }, { "epoch": 0.37367447126718434, "grad_norm": 0.21837197244167328, "learning_rate": 2.2759278710800306e-05, "loss": 0.03799697756767273, "step": 2764 }, { "epoch": 0.3738096646359496, "grad_norm": 0.784138560295105, "learning_rate": 2.275342035207876e-05, "loss": 0.05388563871383667, "step": 2765 }, { "epoch": 0.3739448580047149, "grad_norm": 1.3761948347091675, "learning_rate": 2.2747560379053752e-05, "loss": 0.0802559107542038, "step": 2766 }, { "epoch": 0.3740800513734801, "grad_norm": 0.3484854996204376, "learning_rate": 2.2741698792945364e-05, "loss": 0.0629880428314209, "step": 2767 }, { "epoch": 0.3742152447422454, "grad_norm": 0.3504173457622528, "learning_rate": 2.2735835594974003e-05, "loss": 0.05580052733421326, "step": 2768 }, { "epoch": 0.37435043811101065, "grad_norm": 0.7178674340248108, "learning_rate": 2.272997078636042e-05, "loss": 0.04396190121769905, "step": 2769 }, { "epoch": 0.3744856314797759, "grad_norm": 0.7006298303604126, "learning_rate": 2.272410436832569e-05, "loss": 0.06192396208643913, "step": 2770 }, { "epoch": 0.3746208248485412, "grad_norm": 0.6319068074226379, "learning_rate": 2.2718236342091248e-05, "loss": 0.04144385829567909, "step": 2771 }, { "epoch": 0.3747560182173064, "grad_norm": 1.289096713066101, "learning_rate": 2.2712366708878838e-05, "loss": 0.06428465247154236, "step": 2772 }, { "epoch": 0.3748912115860717, "grad_norm": 0.6738072633743286, "learning_rate": 2.2706495469910552e-05, "loss": 0.05183836817741394, "step": 2773 }, { "epoch": 0.37502640495483697, "grad_norm": 0.5354626178741455, "learning_rate": 2.2700622626408814e-05, "loss": 0.046249933540821075, "step": 2774 }, { "epoch": 0.37516159832360224, "grad_norm": 0.5030537843704224, "learning_rate": 2.2694748179596375e-05, "loss": 0.05862545967102051, "step": 2775 }, { "epoch": 0.3752967916923675, "grad_norm": 0.4092203676700592, "learning_rate": 2.2688872130696342e-05, "loss": 0.05227592587471008, "step": 2776 }, { "epoch": 0.37543198506113273, "grad_norm": 0.3558029532432556, "learning_rate": 2.268299448093212e-05, "loss": 0.05536097288131714, "step": 2777 }, { "epoch": 0.375567178429898, "grad_norm": 0.6395118832588196, "learning_rate": 2.2677115231527482e-05, "loss": 0.049967266619205475, "step": 2778 }, { "epoch": 0.3757023717986633, "grad_norm": 0.6145211458206177, "learning_rate": 2.267123438370651e-05, "loss": 0.07054963707923889, "step": 2779 }, { "epoch": 0.37583756516742856, "grad_norm": 0.6234350800514221, "learning_rate": 2.266535193869363e-05, "loss": 0.056785374879837036, "step": 2780 }, { "epoch": 0.3759727585361938, "grad_norm": 0.3293749988079071, "learning_rate": 2.2659467897713604e-05, "loss": 0.053153231739997864, "step": 2781 }, { "epoch": 0.37610795190495905, "grad_norm": 0.6051040291786194, "learning_rate": 2.2653582261991516e-05, "loss": 0.05777176097035408, "step": 2782 }, { "epoch": 0.3762431452737243, "grad_norm": 0.5731443166732788, "learning_rate": 2.2647695032752785e-05, "loss": 0.05151727795600891, "step": 2783 }, { "epoch": 0.3763783386424896, "grad_norm": 0.5319263935089111, "learning_rate": 2.264180621122317e-05, "loss": 0.04741548374295235, "step": 2784 }, { "epoch": 0.37651353201125487, "grad_norm": 0.3491629958152771, "learning_rate": 2.2635915798628747e-05, "loss": 0.05430998653173447, "step": 2785 }, { "epoch": 0.3766487253800201, "grad_norm": 0.5214946269989014, "learning_rate": 2.2630023796195932e-05, "loss": 0.05097288638353348, "step": 2786 }, { "epoch": 0.37678391874878536, "grad_norm": 0.4239116609096527, "learning_rate": 2.262413020515148e-05, "loss": 0.047918349504470825, "step": 2787 }, { "epoch": 0.37691911211755064, "grad_norm": 0.7723445296287537, "learning_rate": 2.261823502672246e-05, "loss": 0.06741046905517578, "step": 2788 }, { "epoch": 0.3770543054863159, "grad_norm": 0.6508707404136658, "learning_rate": 2.261233826213628e-05, "loss": 0.05089424550533295, "step": 2789 }, { "epoch": 0.3771894988550812, "grad_norm": 0.9169923663139343, "learning_rate": 2.2606439912620688e-05, "loss": 0.05232696235179901, "step": 2790 }, { "epoch": 0.3773246922238464, "grad_norm": 0.7651610970497131, "learning_rate": 2.2600539979403734e-05, "loss": 0.0608372688293457, "step": 2791 }, { "epoch": 0.3774598855926117, "grad_norm": 0.9743182063102722, "learning_rate": 2.259463846371383e-05, "loss": 0.0558069609105587, "step": 2792 }, { "epoch": 0.37759507896137695, "grad_norm": 0.3944699764251709, "learning_rate": 2.2588735366779698e-05, "loss": 0.0599839985370636, "step": 2793 }, { "epoch": 0.3777302723301422, "grad_norm": 0.4863327741622925, "learning_rate": 2.2582830689830394e-05, "loss": 0.059852585196495056, "step": 2794 }, { "epoch": 0.37786546569890744, "grad_norm": 0.30877846479415894, "learning_rate": 2.2576924434095305e-05, "loss": 0.03760230168700218, "step": 2795 }, { "epoch": 0.3780006590676727, "grad_norm": 0.4678315222263336, "learning_rate": 2.257101660080414e-05, "loss": 0.047534674406051636, "step": 2796 }, { "epoch": 0.378135852436438, "grad_norm": 0.38959020376205444, "learning_rate": 2.256510719118695e-05, "loss": 0.03578207641839981, "step": 2797 }, { "epoch": 0.37827104580520327, "grad_norm": 0.9588425159454346, "learning_rate": 2.2559196206474094e-05, "loss": 0.04739844799041748, "step": 2798 }, { "epoch": 0.37840623917396854, "grad_norm": 0.807678759098053, "learning_rate": 2.2553283647896287e-05, "loss": 0.0760999321937561, "step": 2799 }, { "epoch": 0.37854143254273376, "grad_norm": 1.932219386100769, "learning_rate": 2.254736951668454e-05, "loss": 0.048617780208587646, "step": 2800 }, { "epoch": 0.37867662591149903, "grad_norm": 0.5309522747993469, "learning_rate": 2.2541453814070212e-05, "loss": 0.06753760576248169, "step": 2801 }, { "epoch": 0.3788118192802643, "grad_norm": 0.8778548240661621, "learning_rate": 2.2535536541284983e-05, "loss": 0.06830979883670807, "step": 2802 }, { "epoch": 0.3789470126490296, "grad_norm": 0.6769487857818604, "learning_rate": 2.2529617699560857e-05, "loss": 0.05845741555094719, "step": 2803 }, { "epoch": 0.37908220601779485, "grad_norm": 0.326483815908432, "learning_rate": 2.2523697290130185e-05, "loss": 0.061376750469207764, "step": 2804 }, { "epoch": 0.3792173993865601, "grad_norm": 0.8158133029937744, "learning_rate": 2.251777531422561e-05, "loss": 0.04257579892873764, "step": 2805 }, { "epoch": 0.37935259275532535, "grad_norm": 1.3088444471359253, "learning_rate": 2.2511851773080127e-05, "loss": 0.05869834125041962, "step": 2806 }, { "epoch": 0.3794877861240906, "grad_norm": 0.33457887172698975, "learning_rate": 2.2505926667927043e-05, "loss": 0.05326327681541443, "step": 2807 }, { "epoch": 0.3796229794928559, "grad_norm": 0.21494072675704956, "learning_rate": 2.25e-05, "loss": 0.035292450338602066, "step": 2808 }, { "epoch": 0.3797581728616211, "grad_norm": 0.6951077580451965, "learning_rate": 2.2494071770532966e-05, "loss": 0.06634253263473511, "step": 2809 }, { "epoch": 0.3798933662303864, "grad_norm": 0.7285453081130981, "learning_rate": 2.2488141980760223e-05, "loss": 0.07704959809780121, "step": 2810 }, { "epoch": 0.38002855959915166, "grad_norm": 0.21081511676311493, "learning_rate": 2.248221063191639e-05, "loss": 0.05316847562789917, "step": 2811 }, { "epoch": 0.38016375296791693, "grad_norm": 0.9092246890068054, "learning_rate": 2.24762777252364e-05, "loss": 0.06442821025848389, "step": 2812 }, { "epoch": 0.3802989463366822, "grad_norm": 0.2412085086107254, "learning_rate": 2.2470343261955525e-05, "loss": 0.060990944504737854, "step": 2813 }, { "epoch": 0.3804341397054474, "grad_norm": 0.4725414514541626, "learning_rate": 2.246440724330934e-05, "loss": 0.06503316015005112, "step": 2814 }, { "epoch": 0.3805693330742127, "grad_norm": 0.8974441289901733, "learning_rate": 2.2458469670533765e-05, "loss": 0.06016567349433899, "step": 2815 }, { "epoch": 0.380704526442978, "grad_norm": 0.2822750210762024, "learning_rate": 2.2452530544865034e-05, "loss": 0.04852447658777237, "step": 2816 }, { "epoch": 0.38083971981174325, "grad_norm": 0.8395393490791321, "learning_rate": 2.24465898675397e-05, "loss": 0.0711175799369812, "step": 2817 }, { "epoch": 0.3809749131805085, "grad_norm": 0.6369546055793762, "learning_rate": 2.244064763979464e-05, "loss": 0.061800286173820496, "step": 2818 }, { "epoch": 0.38111010654927374, "grad_norm": 0.6756783723831177, "learning_rate": 2.2434703862867068e-05, "loss": 0.05368724465370178, "step": 2819 }, { "epoch": 0.381245299918039, "grad_norm": 0.4739999771118164, "learning_rate": 2.2428758537994504e-05, "loss": 0.061122551560401917, "step": 2820 }, { "epoch": 0.3813804932868043, "grad_norm": 0.3748827278614044, "learning_rate": 2.24228116664148e-05, "loss": 0.03840610384941101, "step": 2821 }, { "epoch": 0.38151568665556956, "grad_norm": 0.4113057255744934, "learning_rate": 2.2416863249366125e-05, "loss": 0.058167681097984314, "step": 2822 }, { "epoch": 0.3816508800243348, "grad_norm": 0.5908141732215881, "learning_rate": 2.241091328808696e-05, "loss": 0.06661194562911987, "step": 2823 }, { "epoch": 0.38178607339310006, "grad_norm": 1.5720164775848389, "learning_rate": 2.240496178381614e-05, "loss": 0.06552646309137344, "step": 2824 }, { "epoch": 0.38192126676186533, "grad_norm": 0.783643364906311, "learning_rate": 2.239900873779278e-05, "loss": 0.06343169510364532, "step": 2825 }, { "epoch": 0.3820564601306306, "grad_norm": 0.40840062499046326, "learning_rate": 2.2393054151256352e-05, "loss": 0.0626457929611206, "step": 2826 }, { "epoch": 0.3821916534993959, "grad_norm": 0.290694922208786, "learning_rate": 2.238709802544662e-05, "loss": 0.04983705282211304, "step": 2827 }, { "epoch": 0.3823268468681611, "grad_norm": 0.38798582553863525, "learning_rate": 2.2381140361603686e-05, "loss": 0.0732077807188034, "step": 2828 }, { "epoch": 0.38246204023692637, "grad_norm": 0.5534157156944275, "learning_rate": 2.237518116096797e-05, "loss": 0.053943514823913574, "step": 2829 }, { "epoch": 0.38259723360569164, "grad_norm": 0.34634479880332947, "learning_rate": 2.2369220424780203e-05, "loss": 0.060630276799201965, "step": 2830 }, { "epoch": 0.3827324269744569, "grad_norm": 0.561322808265686, "learning_rate": 2.2363258154281452e-05, "loss": 0.046656541526317596, "step": 2831 }, { "epoch": 0.3828676203432222, "grad_norm": 0.3773760199546814, "learning_rate": 2.2357294350713088e-05, "loss": 0.057856976985931396, "step": 2832 }, { "epoch": 0.3830028137119874, "grad_norm": 0.31006112694740295, "learning_rate": 2.2351329015316802e-05, "loss": 0.05619370937347412, "step": 2833 }, { "epoch": 0.3831380070807527, "grad_norm": 0.4072780907154083, "learning_rate": 2.2345362149334613e-05, "loss": 0.05969315767288208, "step": 2834 }, { "epoch": 0.38327320044951796, "grad_norm": 0.6256647706031799, "learning_rate": 2.2339393754008854e-05, "loss": 0.06885421276092529, "step": 2835 }, { "epoch": 0.38340839381828323, "grad_norm": 0.5506196618080139, "learning_rate": 2.233342383058218e-05, "loss": 0.058861277997493744, "step": 2836 }, { "epoch": 0.38354358718704845, "grad_norm": 0.5706689953804016, "learning_rate": 2.2327452380297554e-05, "loss": 0.05928713083267212, "step": 2837 }, { "epoch": 0.3836787805558137, "grad_norm": 0.6916266083717346, "learning_rate": 2.232147940439827e-05, "loss": 0.054866909980773926, "step": 2838 }, { "epoch": 0.383813973924579, "grad_norm": 1.3635517358779907, "learning_rate": 2.2315504904127936e-05, "loss": 0.06967507302761078, "step": 2839 }, { "epoch": 0.3839491672933443, "grad_norm": 0.40929463505744934, "learning_rate": 2.2309528880730463e-05, "loss": 0.057257890701293945, "step": 2840 }, { "epoch": 0.38408436066210955, "grad_norm": 1.0062419176101685, "learning_rate": 2.2303551335450096e-05, "loss": 0.04601931944489479, "step": 2841 }, { "epoch": 0.38421955403087477, "grad_norm": 0.3769949674606323, "learning_rate": 2.2297572269531398e-05, "loss": 0.05190584063529968, "step": 2842 }, { "epoch": 0.38435474739964004, "grad_norm": 0.32087311148643494, "learning_rate": 2.2291591684219243e-05, "loss": 0.041286490857601166, "step": 2843 }, { "epoch": 0.3844899407684053, "grad_norm": 0.292425274848938, "learning_rate": 2.2285609580758806e-05, "loss": 0.05107489973306656, "step": 2844 }, { "epoch": 0.3846251341371706, "grad_norm": 0.5732579827308655, "learning_rate": 2.227962596039561e-05, "loss": 0.07750353217124939, "step": 2845 }, { "epoch": 0.3847603275059358, "grad_norm": 0.4695257842540741, "learning_rate": 2.2273640824375462e-05, "loss": 0.0394764244556427, "step": 2846 }, { "epoch": 0.3848955208747011, "grad_norm": 0.25707632303237915, "learning_rate": 2.2267654173944515e-05, "loss": 0.04923805594444275, "step": 2847 }, { "epoch": 0.38503071424346635, "grad_norm": 0.4969066381454468, "learning_rate": 2.2261666010349212e-05, "loss": 0.05975627154111862, "step": 2848 }, { "epoch": 0.38516590761223163, "grad_norm": 0.5696791410446167, "learning_rate": 2.2255676334836317e-05, "loss": 0.06848347187042236, "step": 2849 }, { "epoch": 0.3853011009809969, "grad_norm": 0.2937922179698944, "learning_rate": 2.2249685148652917e-05, "loss": 0.05625009536743164, "step": 2850 }, { "epoch": 0.3854362943497621, "grad_norm": 0.5420044660568237, "learning_rate": 2.224369245304641e-05, "loss": 0.056445397436618805, "step": 2851 }, { "epoch": 0.3855714877185274, "grad_norm": 0.3149201571941376, "learning_rate": 2.2237698249264507e-05, "loss": 0.0482441782951355, "step": 2852 }, { "epoch": 0.38570668108729267, "grad_norm": 0.9132434129714966, "learning_rate": 2.2231702538555235e-05, "loss": 0.05407146364450455, "step": 2853 }, { "epoch": 0.38584187445605794, "grad_norm": 0.4237323999404907, "learning_rate": 2.2225705322166928e-05, "loss": 0.04787974804639816, "step": 2854 }, { "epoch": 0.3859770678248232, "grad_norm": 0.4087650179862976, "learning_rate": 2.2219706601348242e-05, "loss": 0.041061609983444214, "step": 2855 }, { "epoch": 0.38611226119358844, "grad_norm": 1.379231572151184, "learning_rate": 2.221370637734814e-05, "loss": 0.07291083037853241, "step": 2856 }, { "epoch": 0.3862474545623537, "grad_norm": 0.4396313726902008, "learning_rate": 2.22077046514159e-05, "loss": 0.05511271953582764, "step": 2857 }, { "epoch": 0.386382647931119, "grad_norm": 0.3383207321166992, "learning_rate": 2.220170142480112e-05, "loss": 0.055924803018569946, "step": 2858 }, { "epoch": 0.38651784129988426, "grad_norm": 0.22424940764904022, "learning_rate": 2.2195696698753695e-05, "loss": 0.036877959966659546, "step": 2859 }, { "epoch": 0.3866530346686495, "grad_norm": 0.6279323101043701, "learning_rate": 2.2189690474523844e-05, "loss": 0.06415797024965286, "step": 2860 }, { "epoch": 0.38678822803741475, "grad_norm": 0.4282463490962982, "learning_rate": 2.21836827533621e-05, "loss": 0.05457483232021332, "step": 2861 }, { "epoch": 0.38692342140618, "grad_norm": 0.3078089654445648, "learning_rate": 2.2177673536519297e-05, "loss": 0.04797707498073578, "step": 2862 }, { "epoch": 0.3870586147749453, "grad_norm": 0.2630147635936737, "learning_rate": 2.217166282524659e-05, "loss": 0.05074311047792435, "step": 2863 }, { "epoch": 0.38719380814371057, "grad_norm": 0.6083945631980896, "learning_rate": 2.216565062079544e-05, "loss": 0.04444461315870285, "step": 2864 }, { "epoch": 0.3873290015124758, "grad_norm": 0.9929003119468689, "learning_rate": 2.2159636924417612e-05, "loss": 0.06157923489809036, "step": 2865 }, { "epoch": 0.38746419488124106, "grad_norm": 0.6618122458457947, "learning_rate": 2.2153621737365205e-05, "loss": 0.04268050193786621, "step": 2866 }, { "epoch": 0.38759938825000634, "grad_norm": 0.424345999956131, "learning_rate": 2.2147605060890598e-05, "loss": 0.040484413504600525, "step": 2867 }, { "epoch": 0.3877345816187716, "grad_norm": 1.0123282670974731, "learning_rate": 2.2141586896246503e-05, "loss": 0.061379581689834595, "step": 2868 }, { "epoch": 0.3878697749875369, "grad_norm": 0.46783512830734253, "learning_rate": 2.2135567244685933e-05, "loss": 0.047980695962905884, "step": 2869 }, { "epoch": 0.3880049683563021, "grad_norm": 0.5564154982566833, "learning_rate": 2.2129546107462214e-05, "loss": 0.05251924321055412, "step": 2870 }, { "epoch": 0.3881401617250674, "grad_norm": 0.6100112795829773, "learning_rate": 2.212352348582897e-05, "loss": 0.06808388233184814, "step": 2871 }, { "epoch": 0.38827535509383265, "grad_norm": 0.32874441146850586, "learning_rate": 2.2117499381040157e-05, "loss": 0.06659463047981262, "step": 2872 }, { "epoch": 0.3884105484625979, "grad_norm": 0.5706319212913513, "learning_rate": 2.211147379435001e-05, "loss": 0.04702724516391754, "step": 2873 }, { "epoch": 0.38854574183136315, "grad_norm": 1.2208558320999146, "learning_rate": 2.2105446727013098e-05, "loss": 0.06388106197118759, "step": 2874 }, { "epoch": 0.3886809352001284, "grad_norm": 0.40987250208854675, "learning_rate": 2.209941818028429e-05, "loss": 0.059205979108810425, "step": 2875 }, { "epoch": 0.3888161285688937, "grad_norm": 0.4029499590396881, "learning_rate": 2.2093388155418757e-05, "loss": 0.05426451563835144, "step": 2876 }, { "epoch": 0.38895132193765897, "grad_norm": 0.621417224407196, "learning_rate": 2.2087356653671982e-05, "loss": 0.05353108048439026, "step": 2877 }, { "epoch": 0.38908651530642424, "grad_norm": 0.24934527277946472, "learning_rate": 2.2081323676299756e-05, "loss": 0.046502117067575455, "step": 2878 }, { "epoch": 0.38922170867518946, "grad_norm": 0.34832093119621277, "learning_rate": 2.207528922455818e-05, "loss": 0.05424058437347412, "step": 2879 }, { "epoch": 0.38935690204395473, "grad_norm": 0.46701887249946594, "learning_rate": 2.206925329970366e-05, "loss": 0.047336872667074203, "step": 2880 }, { "epoch": 0.38949209541272, "grad_norm": 1.636969804763794, "learning_rate": 2.20632159029929e-05, "loss": 0.08789052069187164, "step": 2881 }, { "epoch": 0.3896272887814853, "grad_norm": 1.1907824277877808, "learning_rate": 2.2057177035682926e-05, "loss": 0.07766807079315186, "step": 2882 }, { "epoch": 0.38976248215025056, "grad_norm": 0.5773687362670898, "learning_rate": 2.2051136699031058e-05, "loss": 0.07759416103363037, "step": 2883 }, { "epoch": 0.3898976755190158, "grad_norm": 0.2676785886287689, "learning_rate": 2.2045094894294933e-05, "loss": 0.046090394258499146, "step": 2884 }, { "epoch": 0.39003286888778105, "grad_norm": 0.4202217161655426, "learning_rate": 2.203905162273248e-05, "loss": 0.04131387919187546, "step": 2885 }, { "epoch": 0.3901680622565463, "grad_norm": 1.154231309890747, "learning_rate": 2.203300688560194e-05, "loss": 0.06303060054779053, "step": 2886 }, { "epoch": 0.3903032556253116, "grad_norm": 0.3597363531589508, "learning_rate": 2.2026960684161862e-05, "loss": 0.05045539140701294, "step": 2887 }, { "epoch": 0.3904384489940768, "grad_norm": 0.9783898591995239, "learning_rate": 2.2020913019671097e-05, "loss": 0.06813019514083862, "step": 2888 }, { "epoch": 0.3905736423628421, "grad_norm": 0.726236879825592, "learning_rate": 2.20148638933888e-05, "loss": 0.06245800107717514, "step": 2889 }, { "epoch": 0.39070883573160736, "grad_norm": 1.0685704946517944, "learning_rate": 2.2008813306574438e-05, "loss": 0.05793141573667526, "step": 2890 }, { "epoch": 0.39084402910037264, "grad_norm": 0.4038330316543579, "learning_rate": 2.200276126048777e-05, "loss": 0.05636018514633179, "step": 2891 }, { "epoch": 0.3909792224691379, "grad_norm": 1.0796375274658203, "learning_rate": 2.199670775638886e-05, "loss": 0.07029972970485687, "step": 2892 }, { "epoch": 0.39111441583790313, "grad_norm": 0.5935426354408264, "learning_rate": 2.1990652795538085e-05, "loss": 0.08655223995447159, "step": 2893 }, { "epoch": 0.3912496092066684, "grad_norm": 0.42973801493644714, "learning_rate": 2.1984596379196117e-05, "loss": 0.05218249931931496, "step": 2894 }, { "epoch": 0.3913848025754337, "grad_norm": 0.36252933740615845, "learning_rate": 2.1978538508623942e-05, "loss": 0.04683312773704529, "step": 2895 }, { "epoch": 0.39151999594419895, "grad_norm": 0.8237360119819641, "learning_rate": 2.197247918508283e-05, "loss": 0.04931851103901863, "step": 2896 }, { "epoch": 0.3916551893129642, "grad_norm": 0.5000747442245483, "learning_rate": 2.1966418409834374e-05, "loss": 0.046158745884895325, "step": 2897 }, { "epoch": 0.39179038268172944, "grad_norm": 0.4377847909927368, "learning_rate": 2.1960356184140453e-05, "loss": 0.0648607611656189, "step": 2898 }, { "epoch": 0.3919255760504947, "grad_norm": 1.9298532009124756, "learning_rate": 2.1954292509263258e-05, "loss": 0.06234771013259888, "step": 2899 }, { "epoch": 0.39206076941926, "grad_norm": 0.48706138134002686, "learning_rate": 2.194822738646528e-05, "loss": 0.06517994403839111, "step": 2900 }, { "epoch": 0.39219596278802527, "grad_norm": 0.3350776731967926, "learning_rate": 2.1942160817009304e-05, "loss": 0.05278658866882324, "step": 2901 }, { "epoch": 0.3923311561567905, "grad_norm": 0.8154242634773254, "learning_rate": 2.193609280215843e-05, "loss": 0.03802967816591263, "step": 2902 }, { "epoch": 0.39246634952555576, "grad_norm": 0.45581865310668945, "learning_rate": 2.1930023343176044e-05, "loss": 0.05451982468366623, "step": 2903 }, { "epoch": 0.39260154289432103, "grad_norm": 0.533525288105011, "learning_rate": 2.1923952441325837e-05, "loss": 0.05818824842572212, "step": 2904 }, { "epoch": 0.3927367362630863, "grad_norm": 0.4630977213382721, "learning_rate": 2.191788009787182e-05, "loss": 0.04634997993707657, "step": 2905 }, { "epoch": 0.3928719296318516, "grad_norm": 0.8678225874900818, "learning_rate": 2.1911806314078267e-05, "loss": 0.08176819980144501, "step": 2906 }, { "epoch": 0.3930071230006168, "grad_norm": 0.7259595990180969, "learning_rate": 2.1905731091209786e-05, "loss": 0.06727480888366699, "step": 2907 }, { "epoch": 0.39314231636938207, "grad_norm": 0.3742668926715851, "learning_rate": 2.1899654430531262e-05, "loss": 0.049308210611343384, "step": 2908 }, { "epoch": 0.39327750973814735, "grad_norm": 0.699688732624054, "learning_rate": 2.18935763333079e-05, "loss": 0.051163263618946075, "step": 2909 }, { "epoch": 0.3934127031069126, "grad_norm": 1.3867462873458862, "learning_rate": 2.1887496800805175e-05, "loss": 0.06994909048080444, "step": 2910 }, { "epoch": 0.3935478964756779, "grad_norm": 0.29912394285202026, "learning_rate": 2.188141583428889e-05, "loss": 0.050768643617630005, "step": 2911 }, { "epoch": 0.3936830898444431, "grad_norm": 0.4691031873226166, "learning_rate": 2.1875333435025138e-05, "loss": 0.04402311146259308, "step": 2912 }, { "epoch": 0.3938182832132084, "grad_norm": 0.3649757504463196, "learning_rate": 2.1869249604280296e-05, "loss": 0.06400299072265625, "step": 2913 }, { "epoch": 0.39395347658197366, "grad_norm": 0.43007636070251465, "learning_rate": 2.1863164343321057e-05, "loss": 0.04669588804244995, "step": 2914 }, { "epoch": 0.39408866995073893, "grad_norm": 0.6839202642440796, "learning_rate": 2.1857077653414397e-05, "loss": 0.07176864147186279, "step": 2915 }, { "epoch": 0.39422386331950415, "grad_norm": 1.8159350156784058, "learning_rate": 2.185098953582761e-05, "loss": 0.07163586467504501, "step": 2916 }, { "epoch": 0.3943590566882694, "grad_norm": 0.7029412984848022, "learning_rate": 2.1844899991828265e-05, "loss": 0.05822014808654785, "step": 2917 }, { "epoch": 0.3944942500570347, "grad_norm": 0.7634820342063904, "learning_rate": 2.1838809022684247e-05, "loss": 0.04471708834171295, "step": 2918 }, { "epoch": 0.3946294434258, "grad_norm": 0.8820807933807373, "learning_rate": 2.1832716629663712e-05, "loss": 0.052506059408187866, "step": 2919 }, { "epoch": 0.39476463679456525, "grad_norm": 0.4695444107055664, "learning_rate": 2.1826622814035138e-05, "loss": 0.050924718379974365, "step": 2920 }, { "epoch": 0.39489983016333047, "grad_norm": 0.5966514945030212, "learning_rate": 2.1820527577067293e-05, "loss": 0.06248673424124718, "step": 2921 }, { "epoch": 0.39503502353209574, "grad_norm": 0.2945012152194977, "learning_rate": 2.1814430920029238e-05, "loss": 0.04624895006418228, "step": 2922 }, { "epoch": 0.395170216900861, "grad_norm": 0.681483805179596, "learning_rate": 2.1808332844190325e-05, "loss": 0.07906293869018555, "step": 2923 }, { "epoch": 0.3953054102696263, "grad_norm": 0.32935208082199097, "learning_rate": 2.1802233350820203e-05, "loss": 0.04863521456718445, "step": 2924 }, { "epoch": 0.39544060363839156, "grad_norm": 0.662350058555603, "learning_rate": 2.179613244118883e-05, "loss": 0.058676302433013916, "step": 2925 }, { "epoch": 0.3955757970071568, "grad_norm": 0.4267386198043823, "learning_rate": 2.1790030116566436e-05, "loss": 0.07477055490016937, "step": 2926 }, { "epoch": 0.39571099037592206, "grad_norm": 0.48754751682281494, "learning_rate": 2.1783926378223563e-05, "loss": 0.06251576542854309, "step": 2927 }, { "epoch": 0.39584618374468733, "grad_norm": 0.9842320680618286, "learning_rate": 2.1777821227431048e-05, "loss": 0.0857323408126831, "step": 2928 }, { "epoch": 0.3959813771134526, "grad_norm": 0.7058376669883728, "learning_rate": 2.1771714665460005e-05, "loss": 0.04794486612081528, "step": 2929 }, { "epoch": 0.3961165704822178, "grad_norm": 0.7239179015159607, "learning_rate": 2.1765606693581857e-05, "loss": 0.06123724579811096, "step": 2930 }, { "epoch": 0.3962517638509831, "grad_norm": 0.5751566290855408, "learning_rate": 2.1759497313068316e-05, "loss": 0.06578932702541351, "step": 2931 }, { "epoch": 0.39638695721974837, "grad_norm": 0.36836645007133484, "learning_rate": 2.175338652519139e-05, "loss": 0.054364919662475586, "step": 2932 }, { "epoch": 0.39652215058851364, "grad_norm": 0.8371866345405579, "learning_rate": 2.1747274331223377e-05, "loss": 0.041383594274520874, "step": 2933 }, { "epoch": 0.3966573439572789, "grad_norm": 0.8707664608955383, "learning_rate": 2.1741160732436865e-05, "loss": 0.051559921354055405, "step": 2934 }, { "epoch": 0.39679253732604414, "grad_norm": 0.6024278998374939, "learning_rate": 2.1735045730104746e-05, "loss": 0.049465976655483246, "step": 2935 }, { "epoch": 0.3969277306948094, "grad_norm": 0.39770883321762085, "learning_rate": 2.1728929325500183e-05, "loss": 0.04017620533704758, "step": 2936 }, { "epoch": 0.3970629240635747, "grad_norm": 0.33352670073509216, "learning_rate": 2.1722811519896654e-05, "loss": 0.04881981760263443, "step": 2937 }, { "epoch": 0.39719811743233996, "grad_norm": 1.5417475700378418, "learning_rate": 2.171669231456792e-05, "loss": 0.07164902985095978, "step": 2938 }, { "epoch": 0.39733331080110523, "grad_norm": 1.3256629705429077, "learning_rate": 2.1710571710788025e-05, "loss": 0.06817503273487091, "step": 2939 }, { "epoch": 0.39746850416987045, "grad_norm": 0.73344886302948, "learning_rate": 2.1704449709831312e-05, "loss": 0.061049096286296844, "step": 2940 }, { "epoch": 0.3976036975386357, "grad_norm": 1.999131679534912, "learning_rate": 2.1698326312972423e-05, "loss": 0.05661129951477051, "step": 2941 }, { "epoch": 0.397738890907401, "grad_norm": 0.4522429406642914, "learning_rate": 2.1692201521486268e-05, "loss": 0.04034745693206787, "step": 2942 }, { "epoch": 0.3978740842761663, "grad_norm": 1.2400321960449219, "learning_rate": 2.1686075336648075e-05, "loss": 0.05283085256814957, "step": 2943 }, { "epoch": 0.3980092776449315, "grad_norm": 0.31111636757850647, "learning_rate": 2.167994775973334e-05, "loss": 0.04952539503574371, "step": 2944 }, { "epoch": 0.39814447101369677, "grad_norm": 1.8355605602264404, "learning_rate": 2.167381879201786e-05, "loss": 0.06255283951759338, "step": 2945 }, { "epoch": 0.39827966438246204, "grad_norm": 0.5530511736869812, "learning_rate": 2.166768843477772e-05, "loss": 0.05090506374835968, "step": 2946 }, { "epoch": 0.3984148577512273, "grad_norm": 0.7286508679389954, "learning_rate": 2.166155668928929e-05, "loss": 0.06268691271543503, "step": 2947 }, { "epoch": 0.3985500511199926, "grad_norm": 0.24765899777412415, "learning_rate": 2.1655423556829233e-05, "loss": 0.050964921712875366, "step": 2948 }, { "epoch": 0.3986852444887578, "grad_norm": 0.25484296679496765, "learning_rate": 2.1649289038674504e-05, "loss": 0.050675757229328156, "step": 2949 }, { "epoch": 0.3988204378575231, "grad_norm": 1.1110823154449463, "learning_rate": 2.1643153136102333e-05, "loss": 0.0936761274933815, "step": 2950 }, { "epoch": 0.39895563122628835, "grad_norm": 2.2881293296813965, "learning_rate": 2.1637015850390255e-05, "loss": 0.0666525661945343, "step": 2951 }, { "epoch": 0.39909082459505363, "grad_norm": 1.5375266075134277, "learning_rate": 2.1630877182816087e-05, "loss": 0.05764663964509964, "step": 2952 }, { "epoch": 0.3992260179638189, "grad_norm": 0.5055779218673706, "learning_rate": 2.162473713465793e-05, "loss": 0.050413694232702255, "step": 2953 }, { "epoch": 0.3993612113325841, "grad_norm": 0.6004561185836792, "learning_rate": 2.161859570719417e-05, "loss": 0.04441293329000473, "step": 2954 }, { "epoch": 0.3994964047013494, "grad_norm": 0.9383441209793091, "learning_rate": 2.161245290170349e-05, "loss": 0.05407094955444336, "step": 2955 }, { "epoch": 0.39963159807011467, "grad_norm": 1.520416021347046, "learning_rate": 2.1606308719464858e-05, "loss": 0.05162256956100464, "step": 2956 }, { "epoch": 0.39976679143887994, "grad_norm": 0.6215782761573792, "learning_rate": 2.160016316175752e-05, "loss": 0.06491442024707794, "step": 2957 }, { "epoch": 0.39990198480764516, "grad_norm": 0.4168660044670105, "learning_rate": 2.159401622986101e-05, "loss": 0.039890021085739136, "step": 2958 }, { "epoch": 0.40003717817641044, "grad_norm": 1.2815821170806885, "learning_rate": 2.1587867925055165e-05, "loss": 0.054726384580135345, "step": 2959 }, { "epoch": 0.4001723715451757, "grad_norm": 0.6798976063728333, "learning_rate": 2.158171824862008e-05, "loss": 0.04645705223083496, "step": 2960 }, { "epoch": 0.400307564913941, "grad_norm": 1.2501940727233887, "learning_rate": 2.157556720183616e-05, "loss": 0.04481580853462219, "step": 2961 }, { "epoch": 0.40044275828270626, "grad_norm": 0.9489818215370178, "learning_rate": 2.156941478598409e-05, "loss": 0.08229565620422363, "step": 2962 }, { "epoch": 0.4005779516514715, "grad_norm": 0.469913512468338, "learning_rate": 2.156326100234482e-05, "loss": 0.05371943116188049, "step": 2963 }, { "epoch": 0.40071314502023675, "grad_norm": 0.4225693941116333, "learning_rate": 2.1557105852199612e-05, "loss": 0.03603549301624298, "step": 2964 }, { "epoch": 0.400848338389002, "grad_norm": 0.4249083697795868, "learning_rate": 2.155094933683e-05, "loss": 0.05573997646570206, "step": 2965 }, { "epoch": 0.4009835317577673, "grad_norm": 1.0302414894104004, "learning_rate": 2.1544791457517802e-05, "loss": 0.07400310784578323, "step": 2966 }, { "epoch": 0.4011187251265325, "grad_norm": 0.5317590236663818, "learning_rate": 2.1538632215545126e-05, "loss": 0.04713648557662964, "step": 2967 }, { "epoch": 0.4012539184952978, "grad_norm": 0.7051037549972534, "learning_rate": 2.153247161219435e-05, "loss": 0.04982832074165344, "step": 2968 }, { "epoch": 0.40138911186406306, "grad_norm": 1.0028332471847534, "learning_rate": 2.1526309648748147e-05, "loss": 0.07157133519649506, "step": 2969 }, { "epoch": 0.40152430523282834, "grad_norm": 0.7827900052070618, "learning_rate": 2.1520146326489476e-05, "loss": 0.06770962476730347, "step": 2970 }, { "epoch": 0.4016594986015936, "grad_norm": 0.4194885194301605, "learning_rate": 2.151398164670157e-05, "loss": 0.06479065865278244, "step": 2971 }, { "epoch": 0.40179469197035883, "grad_norm": 0.6523520350456238, "learning_rate": 2.1507815610667948e-05, "loss": 0.06123092770576477, "step": 2972 }, { "epoch": 0.4019298853391241, "grad_norm": 0.4864334762096405, "learning_rate": 2.1501648219672407e-05, "loss": 0.0620855987071991, "step": 2973 }, { "epoch": 0.4020650787078894, "grad_norm": 1.055147409439087, "learning_rate": 2.149547947499904e-05, "loss": 0.06187228113412857, "step": 2974 }, { "epoch": 0.40220027207665465, "grad_norm": 0.6128971576690674, "learning_rate": 2.1489309377932212e-05, "loss": 0.05860280245542526, "step": 2975 }, { "epoch": 0.4023354654454199, "grad_norm": 0.544592022895813, "learning_rate": 2.1483137929756562e-05, "loss": 0.03803078830242157, "step": 2976 }, { "epoch": 0.40247065881418514, "grad_norm": 1.7454712390899658, "learning_rate": 2.147696513175702e-05, "loss": 0.06334815919399261, "step": 2977 }, { "epoch": 0.4026058521829504, "grad_norm": 0.2908405065536499, "learning_rate": 2.1470790985218804e-05, "loss": 0.047992438077926636, "step": 2978 }, { "epoch": 0.4027410455517157, "grad_norm": 0.2873890697956085, "learning_rate": 2.1464615491427393e-05, "loss": 0.042032837867736816, "step": 2979 }, { "epoch": 0.40287623892048097, "grad_norm": 0.26165837049484253, "learning_rate": 2.1458438651668567e-05, "loss": 0.04348848760128021, "step": 2980 }, { "epoch": 0.4030114322892462, "grad_norm": 0.549848198890686, "learning_rate": 2.1452260467228376e-05, "loss": 0.06827035546302795, "step": 2981 }, { "epoch": 0.40314662565801146, "grad_norm": 0.2638908624649048, "learning_rate": 2.144608093939314e-05, "loss": 0.06703531742095947, "step": 2982 }, { "epoch": 0.40328181902677673, "grad_norm": 0.34968826174736023, "learning_rate": 2.1439900069449483e-05, "loss": 0.042341575026512146, "step": 2983 }, { "epoch": 0.403417012395542, "grad_norm": 0.4925195872783661, "learning_rate": 2.1433717858684286e-05, "loss": 0.04880249500274658, "step": 2984 }, { "epoch": 0.4035522057643073, "grad_norm": 0.4777568280696869, "learning_rate": 2.1427534308384724e-05, "loss": 0.061606962233781815, "step": 2985 }, { "epoch": 0.4036873991330725, "grad_norm": 0.45032647252082825, "learning_rate": 2.1421349419838245e-05, "loss": 0.069297194480896, "step": 2986 }, { "epoch": 0.4038225925018378, "grad_norm": 0.6713958382606506, "learning_rate": 2.1415163194332574e-05, "loss": 0.059538375586271286, "step": 2987 }, { "epoch": 0.40395778587060305, "grad_norm": 0.37850481271743774, "learning_rate": 2.1408975633155715e-05, "loss": 0.03341542184352875, "step": 2988 }, { "epoch": 0.4040929792393683, "grad_norm": 0.8188773989677429, "learning_rate": 2.140278673759595e-05, "loss": 0.04894550144672394, "step": 2989 }, { "epoch": 0.4042281726081336, "grad_norm": 0.5429801344871521, "learning_rate": 2.1396596508941847e-05, "loss": 0.05429689586162567, "step": 2990 }, { "epoch": 0.4043633659768988, "grad_norm": 0.42324841022491455, "learning_rate": 2.1390404948482238e-05, "loss": 0.054476361721754074, "step": 2991 }, { "epoch": 0.4044985593456641, "grad_norm": 0.3851200342178345, "learning_rate": 2.1384212057506243e-05, "loss": 0.05033630132675171, "step": 2992 }, { "epoch": 0.40463375271442936, "grad_norm": 0.35794851183891296, "learning_rate": 2.137801783730325e-05, "loss": 0.04697723686695099, "step": 2993 }, { "epoch": 0.40476894608319464, "grad_norm": 0.4880930483341217, "learning_rate": 2.137182228916293e-05, "loss": 0.030466772615909576, "step": 2994 }, { "epoch": 0.40490413945195985, "grad_norm": 0.24870483577251434, "learning_rate": 2.136562541437523e-05, "loss": 0.04872870445251465, "step": 2995 }, { "epoch": 0.40503933282072513, "grad_norm": 0.3435058891773224, "learning_rate": 2.135942721423038e-05, "loss": 0.04835168272256851, "step": 2996 }, { "epoch": 0.4051745261894904, "grad_norm": 0.9559407234191895, "learning_rate": 2.1353227690018865e-05, "loss": 0.05477004870772362, "step": 2997 }, { "epoch": 0.4053097195582557, "grad_norm": 0.6487329006195068, "learning_rate": 2.1347026843031467e-05, "loss": 0.07681387662887573, "step": 2998 }, { "epoch": 0.40544491292702095, "grad_norm": 0.9528166055679321, "learning_rate": 2.1340824674559238e-05, "loss": 0.059322237968444824, "step": 2999 }, { "epoch": 0.40558010629578617, "grad_norm": 0.4483122229576111, "learning_rate": 2.133462118589349e-05, "loss": 0.05703192576766014, "step": 3000 }, { "epoch": 0.40571529966455144, "grad_norm": 0.6086049675941467, "learning_rate": 2.1328416378325837e-05, "loss": 0.05557991564273834, "step": 3001 }, { "epoch": 0.4058504930333167, "grad_norm": 0.39140862226486206, "learning_rate": 2.1322210253148144e-05, "loss": 0.0404512844979763, "step": 3002 }, { "epoch": 0.405985686402082, "grad_norm": 0.3951510787010193, "learning_rate": 2.131600281165257e-05, "loss": 0.05150478705763817, "step": 3003 }, { "epoch": 0.40612087977084727, "grad_norm": 0.48007631301879883, "learning_rate": 2.130979405513152e-05, "loss": 0.056524619460105896, "step": 3004 }, { "epoch": 0.4062560731396125, "grad_norm": 0.39255964756011963, "learning_rate": 2.1303583984877697e-05, "loss": 0.060637444257736206, "step": 3005 }, { "epoch": 0.40639126650837776, "grad_norm": 0.49237433075904846, "learning_rate": 2.1297372602184085e-05, "loss": 0.057009633630514145, "step": 3006 }, { "epoch": 0.40652645987714303, "grad_norm": 1.7834043502807617, "learning_rate": 2.1291159908343907e-05, "loss": 0.051647309213876724, "step": 3007 }, { "epoch": 0.4066616532459083, "grad_norm": 1.6294023990631104, "learning_rate": 2.1284945904650693e-05, "loss": 0.0586942583322525, "step": 3008 }, { "epoch": 0.4067968466146735, "grad_norm": 0.6993623971939087, "learning_rate": 2.127873059239822e-05, "loss": 0.07575511932373047, "step": 3009 }, { "epoch": 0.4069320399834388, "grad_norm": 0.3398081362247467, "learning_rate": 2.127251397288056e-05, "loss": 0.05911538004875183, "step": 3010 }, { "epoch": 0.40706723335220407, "grad_norm": 0.9419555068016052, "learning_rate": 2.126629604739204e-05, "loss": 0.06722565740346909, "step": 3011 }, { "epoch": 0.40720242672096935, "grad_norm": 0.7504845261573792, "learning_rate": 2.1260076817227268e-05, "loss": 0.048842914402484894, "step": 3012 }, { "epoch": 0.4073376200897346, "grad_norm": 0.9056908488273621, "learning_rate": 2.1253856283681122e-05, "loss": 0.05707564949989319, "step": 3013 }, { "epoch": 0.40747281345849984, "grad_norm": 0.3084135949611664, "learning_rate": 2.1247634448048743e-05, "loss": 0.05317723751068115, "step": 3014 }, { "epoch": 0.4076080068272651, "grad_norm": 0.7882542014122009, "learning_rate": 2.1241411311625562e-05, "loss": 0.05224326625466347, "step": 3015 }, { "epoch": 0.4077432001960304, "grad_norm": 0.3396093547344208, "learning_rate": 2.1235186875707257e-05, "loss": 0.06457221508026123, "step": 3016 }, { "epoch": 0.40787839356479566, "grad_norm": 0.35858070850372314, "learning_rate": 2.1228961141589797e-05, "loss": 0.050357699394226074, "step": 3017 }, { "epoch": 0.40801358693356093, "grad_norm": 0.35762953758239746, "learning_rate": 2.122273411056941e-05, "loss": 0.07766478508710861, "step": 3018 }, { "epoch": 0.40814878030232615, "grad_norm": 1.071114182472229, "learning_rate": 2.1216505783942592e-05, "loss": 0.06400462985038757, "step": 3019 }, { "epoch": 0.4082839736710914, "grad_norm": 1.0491416454315186, "learning_rate": 2.121027616300613e-05, "loss": 0.05831335112452507, "step": 3020 }, { "epoch": 0.4084191670398567, "grad_norm": 0.5437660217285156, "learning_rate": 2.1204045249057043e-05, "loss": 0.06762094795703888, "step": 3021 }, { "epoch": 0.408554360408622, "grad_norm": 1.0614601373672485, "learning_rate": 2.119781304339266e-05, "loss": 0.05835258960723877, "step": 3022 }, { "epoch": 0.4086895537773872, "grad_norm": 0.7614403367042542, "learning_rate": 2.1191579547310547e-05, "loss": 0.04494254291057587, "step": 3023 }, { "epoch": 0.40882474714615247, "grad_norm": 0.3509546220302582, "learning_rate": 2.1185344762108556e-05, "loss": 0.06634199619293213, "step": 3024 }, { "epoch": 0.40895994051491774, "grad_norm": 0.24866631627082825, "learning_rate": 2.11791086890848e-05, "loss": 0.03005295991897583, "step": 3025 }, { "epoch": 0.409095133883683, "grad_norm": 1.4082615375518799, "learning_rate": 2.1172871329537662e-05, "loss": 0.056225575506687164, "step": 3026 }, { "epoch": 0.4092303272524483, "grad_norm": 0.5511521100997925, "learning_rate": 2.1166632684765794e-05, "loss": 0.055938757956027985, "step": 3027 }, { "epoch": 0.4093655206212135, "grad_norm": 0.7599920034408569, "learning_rate": 2.1160392756068124e-05, "loss": 0.05547342449426651, "step": 3028 }, { "epoch": 0.4095007139899788, "grad_norm": 0.8268606662750244, "learning_rate": 2.1154151544743826e-05, "loss": 0.05882567539811134, "step": 3029 }, { "epoch": 0.40963590735874406, "grad_norm": 0.23169812560081482, "learning_rate": 2.114790905209236e-05, "loss": 0.04227090999484062, "step": 3030 }, { "epoch": 0.40977110072750933, "grad_norm": 0.6046125888824463, "learning_rate": 2.1141665279413444e-05, "loss": 0.05067253112792969, "step": 3031 }, { "epoch": 0.4099062940962746, "grad_norm": 0.6065956950187683, "learning_rate": 2.1135420228007062e-05, "loss": 0.052970755845308304, "step": 3032 }, { "epoch": 0.4100414874650398, "grad_norm": 0.35070839524269104, "learning_rate": 2.1129173899173474e-05, "loss": 0.054492343217134476, "step": 3033 }, { "epoch": 0.4101766808338051, "grad_norm": 0.6365925073623657, "learning_rate": 2.11229262942132e-05, "loss": 0.05697426199913025, "step": 3034 }, { "epoch": 0.41031187420257037, "grad_norm": 1.2109260559082031, "learning_rate": 2.1116677414427008e-05, "loss": 0.06884992122650146, "step": 3035 }, { "epoch": 0.41044706757133564, "grad_norm": 0.396894633769989, "learning_rate": 2.1110427261115972e-05, "loss": 0.0466761589050293, "step": 3036 }, { "epoch": 0.41058226094010086, "grad_norm": 1.1930204629898071, "learning_rate": 2.1104175835581386e-05, "loss": 0.07297670841217041, "step": 3037 }, { "epoch": 0.41071745430886614, "grad_norm": 0.46178895235061646, "learning_rate": 2.1097923139124846e-05, "loss": 0.05735020712018013, "step": 3038 }, { "epoch": 0.4108526476776314, "grad_norm": 0.7437040209770203, "learning_rate": 2.109166917304819e-05, "loss": 0.04401330649852753, "step": 3039 }, { "epoch": 0.4109878410463967, "grad_norm": 2.015864849090576, "learning_rate": 2.1085413938653532e-05, "loss": 0.07502947747707367, "step": 3040 }, { "epoch": 0.41112303441516196, "grad_norm": 1.0889643430709839, "learning_rate": 2.107915743724323e-05, "loss": 0.04603596031665802, "step": 3041 }, { "epoch": 0.4112582277839272, "grad_norm": 1.5796411037445068, "learning_rate": 2.1072899670119935e-05, "loss": 0.04325581341981888, "step": 3042 }, { "epoch": 0.41139342115269245, "grad_norm": 0.6258715987205505, "learning_rate": 2.1066640638586543e-05, "loss": 0.06860720366239548, "step": 3043 }, { "epoch": 0.4115286145214577, "grad_norm": 0.32061874866485596, "learning_rate": 2.1060380343946223e-05, "loss": 0.06643766164779663, "step": 3044 }, { "epoch": 0.411663807890223, "grad_norm": 0.42691174149513245, "learning_rate": 2.10541187875024e-05, "loss": 0.05105191469192505, "step": 3045 }, { "epoch": 0.4117990012589883, "grad_norm": 1.1686564683914185, "learning_rate": 2.1047855970558753e-05, "loss": 0.06568668782711029, "step": 3046 }, { "epoch": 0.4119341946277535, "grad_norm": 0.44537875056266785, "learning_rate": 2.1041591894419244e-05, "loss": 0.05183150991797447, "step": 3047 }, { "epoch": 0.41206938799651877, "grad_norm": 0.47970667481422424, "learning_rate": 2.1035326560388087e-05, "loss": 0.0559847354888916, "step": 3048 }, { "epoch": 0.41220458136528404, "grad_norm": 0.3690253794193268, "learning_rate": 2.1029059969769756e-05, "loss": 0.044215962290763855, "step": 3049 }, { "epoch": 0.4123397747340493, "grad_norm": 0.47140994668006897, "learning_rate": 2.1022792123868986e-05, "loss": 0.04593484103679657, "step": 3050 }, { "epoch": 0.41247496810281453, "grad_norm": 1.8404030799865723, "learning_rate": 2.1016523023990783e-05, "loss": 0.05107954517006874, "step": 3051 }, { "epoch": 0.4126101614715798, "grad_norm": 0.5912984013557434, "learning_rate": 2.1010252671440398e-05, "loss": 0.04349742829799652, "step": 3052 }, { "epoch": 0.4127453548403451, "grad_norm": 0.6136721968650818, "learning_rate": 2.1003981067523358e-05, "loss": 0.06135822832584381, "step": 3053 }, { "epoch": 0.41288054820911035, "grad_norm": 0.936673641204834, "learning_rate": 2.099770821354544e-05, "loss": 0.05388724058866501, "step": 3054 }, { "epoch": 0.41301574157787563, "grad_norm": 0.3524787425994873, "learning_rate": 2.0991434110812692e-05, "loss": 0.06820368766784668, "step": 3055 }, { "epoch": 0.41315093494664085, "grad_norm": 0.29848381876945496, "learning_rate": 2.0985158760631415e-05, "loss": 0.05465012788772583, "step": 3056 }, { "epoch": 0.4132861283154061, "grad_norm": 0.4333903193473816, "learning_rate": 2.0978882164308157e-05, "loss": 0.05514828860759735, "step": 3057 }, { "epoch": 0.4134213216841714, "grad_norm": 0.49294960498809814, "learning_rate": 2.0972604323149755e-05, "loss": 0.052114322781562805, "step": 3058 }, { "epoch": 0.41355651505293667, "grad_norm": 0.3174901604652405, "learning_rate": 2.0966325238463283e-05, "loss": 0.04909542202949524, "step": 3059 }, { "epoch": 0.41369170842170194, "grad_norm": 0.814630925655365, "learning_rate": 2.096004491155608e-05, "loss": 0.06041857972741127, "step": 3060 }, { "epoch": 0.41382690179046716, "grad_norm": 0.7371432781219482, "learning_rate": 2.0953763343735746e-05, "loss": 0.05977824330329895, "step": 3061 }, { "epoch": 0.41396209515923243, "grad_norm": 0.521529495716095, "learning_rate": 2.0947480536310133e-05, "loss": 0.06525170803070068, "step": 3062 }, { "epoch": 0.4140972885279977, "grad_norm": 0.4722524881362915, "learning_rate": 2.0941196490587352e-05, "loss": 0.05617760121822357, "step": 3063 }, { "epoch": 0.414232481896763, "grad_norm": 0.3377145826816559, "learning_rate": 2.0934911207875782e-05, "loss": 0.0448685958981514, "step": 3064 }, { "epoch": 0.4143676752655282, "grad_norm": 0.5349330306053162, "learning_rate": 2.092862468948405e-05, "loss": 0.04958082735538483, "step": 3065 }, { "epoch": 0.4145028686342935, "grad_norm": 1.1507644653320312, "learning_rate": 2.0922336936721044e-05, "loss": 0.06242275983095169, "step": 3066 }, { "epoch": 0.41463806200305875, "grad_norm": 0.3838142454624176, "learning_rate": 2.0916047950895907e-05, "loss": 0.05453038215637207, "step": 3067 }, { "epoch": 0.414773255371824, "grad_norm": 0.3850749433040619, "learning_rate": 2.0909757733318035e-05, "loss": 0.06268344819545746, "step": 3068 }, { "epoch": 0.4149084487405893, "grad_norm": 0.4310440719127655, "learning_rate": 2.090346628529709e-05, "loss": 0.05703897774219513, "step": 3069 }, { "epoch": 0.4150436421093545, "grad_norm": 0.41503193974494934, "learning_rate": 2.089717360814298e-05, "loss": 0.03965657949447632, "step": 3070 }, { "epoch": 0.4151788354781198, "grad_norm": 0.5820544362068176, "learning_rate": 2.0890879703165885e-05, "loss": 0.04829868674278259, "step": 3071 }, { "epoch": 0.41531402884688506, "grad_norm": 0.5983740091323853, "learning_rate": 2.0884584571676217e-05, "loss": 0.061264362186193466, "step": 3072 }, { "epoch": 0.41544922221565034, "grad_norm": 0.8227573037147522, "learning_rate": 2.0878288214984657e-05, "loss": 0.06077104061841965, "step": 3073 }, { "epoch": 0.4155844155844156, "grad_norm": 0.8223791122436523, "learning_rate": 2.0871990634402147e-05, "loss": 0.05666433274745941, "step": 3074 }, { "epoch": 0.41571960895318083, "grad_norm": 0.6710504293441772, "learning_rate": 2.0865691831239877e-05, "loss": 0.07451266050338745, "step": 3075 }, { "epoch": 0.4158548023219461, "grad_norm": 0.6195180416107178, "learning_rate": 2.0859391806809285e-05, "loss": 0.07262474298477173, "step": 3076 }, { "epoch": 0.4159899956907114, "grad_norm": 0.4847974181175232, "learning_rate": 2.0853090562422072e-05, "loss": 0.058167219161987305, "step": 3077 }, { "epoch": 0.41612518905947665, "grad_norm": 1.5001903772354126, "learning_rate": 2.084678809939019e-05, "loss": 0.05016686022281647, "step": 3078 }, { "epoch": 0.41626038242824187, "grad_norm": 2.3016607761383057, "learning_rate": 2.084048441902585e-05, "loss": 0.069181427359581, "step": 3079 }, { "epoch": 0.41639557579700714, "grad_norm": 1.4211180210113525, "learning_rate": 2.0834179522641508e-05, "loss": 0.06011167913675308, "step": 3080 }, { "epoch": 0.4165307691657724, "grad_norm": 0.2641412615776062, "learning_rate": 2.0827873411549877e-05, "loss": 0.06048768758773804, "step": 3081 }, { "epoch": 0.4166659625345377, "grad_norm": 1.3323535919189453, "learning_rate": 2.0821566087063926e-05, "loss": 0.07238896191120148, "step": 3082 }, { "epoch": 0.41680115590330297, "grad_norm": 0.356070339679718, "learning_rate": 2.081525755049687e-05, "loss": 0.059626445174217224, "step": 3083 }, { "epoch": 0.4169363492720682, "grad_norm": 0.48678216338157654, "learning_rate": 2.0808947803162182e-05, "loss": 0.06464888155460358, "step": 3084 }, { "epoch": 0.41707154264083346, "grad_norm": 0.3271723985671997, "learning_rate": 2.0802636846373578e-05, "loss": 0.04913259297609329, "step": 3085 }, { "epoch": 0.41720673600959873, "grad_norm": 0.8940332531929016, "learning_rate": 2.0796324681445045e-05, "loss": 0.054951827973127365, "step": 3086 }, { "epoch": 0.417341929378364, "grad_norm": 0.6586296558380127, "learning_rate": 2.0790011309690806e-05, "loss": 0.04194846749305725, "step": 3087 }, { "epoch": 0.4174771227471292, "grad_norm": 0.40827396512031555, "learning_rate": 2.0783696732425332e-05, "loss": 0.055915653705596924, "step": 3088 }, { "epoch": 0.4176123161158945, "grad_norm": 0.5980594754219055, "learning_rate": 2.0777380950963355e-05, "loss": 0.06726858019828796, "step": 3089 }, { "epoch": 0.4177475094846598, "grad_norm": 0.42285361886024475, "learning_rate": 2.0771063966619854e-05, "loss": 0.026932690292596817, "step": 3090 }, { "epoch": 0.41788270285342505, "grad_norm": 0.3072812259197235, "learning_rate": 2.0764745780710065e-05, "loss": 0.04799140244722366, "step": 3091 }, { "epoch": 0.4180178962221903, "grad_norm": 0.7011281251907349, "learning_rate": 2.075842639454946e-05, "loss": 0.044330716133117676, "step": 3092 }, { "epoch": 0.41815308959095554, "grad_norm": 0.9490211009979248, "learning_rate": 2.075210580945378e-05, "loss": 0.05724972486495972, "step": 3093 }, { "epoch": 0.4182882829597208, "grad_norm": 0.5081564784049988, "learning_rate": 2.0745784026738984e-05, "loss": 0.05207130312919617, "step": 3094 }, { "epoch": 0.4184234763284861, "grad_norm": 1.4715677499771118, "learning_rate": 2.073946104772132e-05, "loss": 0.06876707077026367, "step": 3095 }, { "epoch": 0.41855866969725136, "grad_norm": 1.1546006202697754, "learning_rate": 2.0733136873717258e-05, "loss": 0.0642404556274414, "step": 3096 }, { "epoch": 0.41869386306601664, "grad_norm": 0.49467188119888306, "learning_rate": 2.0726811506043527e-05, "loss": 0.04943768307566643, "step": 3097 }, { "epoch": 0.41882905643478185, "grad_norm": 0.4919363260269165, "learning_rate": 2.0720484946017104e-05, "loss": 0.03366868942975998, "step": 3098 }, { "epoch": 0.41896424980354713, "grad_norm": 0.9652361273765564, "learning_rate": 2.0714157194955202e-05, "loss": 0.07263242453336716, "step": 3099 }, { "epoch": 0.4190994431723124, "grad_norm": 0.3756640553474426, "learning_rate": 2.070782825417531e-05, "loss": 0.045490097254514694, "step": 3100 }, { "epoch": 0.4192346365410777, "grad_norm": 0.5862973928451538, "learning_rate": 2.0701498124995127e-05, "loss": 0.0809047669172287, "step": 3101 }, { "epoch": 0.4193698299098429, "grad_norm": 0.4543468654155731, "learning_rate": 2.069516680873264e-05, "loss": 0.0488109290599823, "step": 3102 }, { "epoch": 0.41950502327860817, "grad_norm": 0.47270965576171875, "learning_rate": 2.0688834306706047e-05, "loss": 0.06073812395334244, "step": 3103 }, { "epoch": 0.41964021664737344, "grad_norm": 1.1174681186676025, "learning_rate": 2.0682500620233815e-05, "loss": 0.05444493889808655, "step": 3104 }, { "epoch": 0.4197754100161387, "grad_norm": 0.4161030054092407, "learning_rate": 2.0676165750634656e-05, "loss": 0.061250392347574234, "step": 3105 }, { "epoch": 0.419910603384904, "grad_norm": 0.8984009027481079, "learning_rate": 2.0669829699227513e-05, "loss": 0.037275247275829315, "step": 3106 }, { "epoch": 0.4200457967536692, "grad_norm": 0.8720649480819702, "learning_rate": 2.06634924673316e-05, "loss": 0.05299574136734009, "step": 3107 }, { "epoch": 0.4201809901224345, "grad_norm": 0.44392624497413635, "learning_rate": 2.0657154056266346e-05, "loss": 0.04778685420751572, "step": 3108 }, { "epoch": 0.42031618349119976, "grad_norm": 0.4944213628768921, "learning_rate": 2.0650814467351452e-05, "loss": 0.05455660820007324, "step": 3109 }, { "epoch": 0.42045137685996503, "grad_norm": 1.4108939170837402, "learning_rate": 2.064447370190685e-05, "loss": 0.06568697094917297, "step": 3110 }, { "epoch": 0.4205865702287303, "grad_norm": 1.3381396532058716, "learning_rate": 2.0638131761252724e-05, "loss": 0.07705916464328766, "step": 3111 }, { "epoch": 0.4207217635974955, "grad_norm": 0.5389488339424133, "learning_rate": 2.06317886467095e-05, "loss": 0.05240902304649353, "step": 3112 }, { "epoch": 0.4208569569662608, "grad_norm": 0.9664044380187988, "learning_rate": 2.0625444359597847e-05, "loss": 0.07163377106189728, "step": 3113 }, { "epoch": 0.42099215033502607, "grad_norm": 0.28362175822257996, "learning_rate": 2.0619098901238684e-05, "loss": 0.05485422909259796, "step": 3114 }, { "epoch": 0.42112734370379135, "grad_norm": 0.7811862826347351, "learning_rate": 2.0612752272953158e-05, "loss": 0.06677161902189255, "step": 3115 }, { "epoch": 0.42126253707255656, "grad_norm": 0.8218448758125305, "learning_rate": 2.060640447606268e-05, "loss": 0.037672460079193115, "step": 3116 }, { "epoch": 0.42139773044132184, "grad_norm": 1.2673053741455078, "learning_rate": 2.0600055511888895e-05, "loss": 0.04295028746128082, "step": 3117 }, { "epoch": 0.4215329238100871, "grad_norm": 1.0854992866516113, "learning_rate": 2.059370538175369e-05, "loss": 0.06312769651412964, "step": 3118 }, { "epoch": 0.4216681171788524, "grad_norm": 0.5508562326431274, "learning_rate": 2.0587354086979194e-05, "loss": 0.040297526866197586, "step": 3119 }, { "epoch": 0.42180331054761766, "grad_norm": 0.3192689120769501, "learning_rate": 2.0581001628887785e-05, "loss": 0.05371555685997009, "step": 3120 }, { "epoch": 0.4219385039163829, "grad_norm": 1.5956919193267822, "learning_rate": 2.057464800880207e-05, "loss": 0.07366383075714111, "step": 3121 }, { "epoch": 0.42207369728514815, "grad_norm": 0.721843421459198, "learning_rate": 2.0568293228044914e-05, "loss": 0.05905497819185257, "step": 3122 }, { "epoch": 0.4222088906539134, "grad_norm": 0.3545355796813965, "learning_rate": 2.0561937287939413e-05, "loss": 0.03873894736170769, "step": 3123 }, { "epoch": 0.4223440840226787, "grad_norm": 0.4149668514728546, "learning_rate": 2.055558018980891e-05, "loss": 0.054845765233039856, "step": 3124 }, { "epoch": 0.422479277391444, "grad_norm": 0.7542999386787415, "learning_rate": 2.0549221934976987e-05, "loss": 0.05623306334018707, "step": 3125 }, { "epoch": 0.4226144707602092, "grad_norm": 0.5587112903594971, "learning_rate": 2.054286252476746e-05, "loss": 0.05437682569026947, "step": 3126 }, { "epoch": 0.42274966412897447, "grad_norm": 0.6315963268280029, "learning_rate": 2.05365019605044e-05, "loss": 0.046305060386657715, "step": 3127 }, { "epoch": 0.42288485749773974, "grad_norm": 0.4447070360183716, "learning_rate": 2.053014024351211e-05, "loss": 0.05291140079498291, "step": 3128 }, { "epoch": 0.423020050866505, "grad_norm": 1.1890922784805298, "learning_rate": 2.0523777375115133e-05, "loss": 0.054853226989507675, "step": 3129 }, { "epoch": 0.42315524423527023, "grad_norm": 0.19163751602172852, "learning_rate": 2.0517413356638245e-05, "loss": 0.03453580290079117, "step": 3130 }, { "epoch": 0.4232904376040355, "grad_norm": 0.860350489616394, "learning_rate": 2.0511048189406472e-05, "loss": 0.07074567675590515, "step": 3131 }, { "epoch": 0.4234256309728008, "grad_norm": 0.3687298595905304, "learning_rate": 2.0504681874745082e-05, "loss": 0.05705578625202179, "step": 3132 }, { "epoch": 0.42356082434156606, "grad_norm": 0.5984498858451843, "learning_rate": 2.049831441397957e-05, "loss": 0.0578913688659668, "step": 3133 }, { "epoch": 0.42369601771033133, "grad_norm": 0.5607897639274597, "learning_rate": 2.0491945808435674e-05, "loss": 0.049040380865335464, "step": 3134 }, { "epoch": 0.42383121107909655, "grad_norm": 0.4735546410083771, "learning_rate": 2.048557605943938e-05, "loss": 0.07487677037715912, "step": 3135 }, { "epoch": 0.4239664044478618, "grad_norm": 0.4827166497707367, "learning_rate": 2.047920516831689e-05, "loss": 0.05880272388458252, "step": 3136 }, { "epoch": 0.4241015978166271, "grad_norm": 0.29004523158073425, "learning_rate": 2.047283313639467e-05, "loss": 0.040888816118240356, "step": 3137 }, { "epoch": 0.42423679118539237, "grad_norm": 0.87261563539505, "learning_rate": 2.0466459964999408e-05, "loss": 0.049823611974716187, "step": 3138 }, { "epoch": 0.42437198455415764, "grad_norm": 0.9923464059829712, "learning_rate": 2.0460085655458025e-05, "loss": 0.05934444069862366, "step": 3139 }, { "epoch": 0.42450717792292286, "grad_norm": 0.6945972442626953, "learning_rate": 2.0453710209097697e-05, "loss": 0.04255807772278786, "step": 3140 }, { "epoch": 0.42464237129168814, "grad_norm": 0.22872036695480347, "learning_rate": 2.044733362724582e-05, "loss": 0.050856903195381165, "step": 3141 }, { "epoch": 0.4247775646604534, "grad_norm": 1.0634028911590576, "learning_rate": 2.0440955911230028e-05, "loss": 0.08041512966156006, "step": 3142 }, { "epoch": 0.4249127580292187, "grad_norm": 0.3631167411804199, "learning_rate": 2.0434577062378203e-05, "loss": 0.049963533878326416, "step": 3143 }, { "epoch": 0.4250479513979839, "grad_norm": 0.7671000361442566, "learning_rate": 2.0428197082018458e-05, "loss": 0.06532436609268188, "step": 3144 }, { "epoch": 0.4251831447667492, "grad_norm": 0.37645432353019714, "learning_rate": 2.042181597147913e-05, "loss": 0.05664876103401184, "step": 3145 }, { "epoch": 0.42531833813551445, "grad_norm": 0.38391217589378357, "learning_rate": 2.0415433732088806e-05, "loss": 0.05824176222085953, "step": 3146 }, { "epoch": 0.4254535315042797, "grad_norm": 1.1771982908248901, "learning_rate": 2.0409050365176294e-05, "loss": 0.05723366141319275, "step": 3147 }, { "epoch": 0.425588724873045, "grad_norm": 0.8841889500617981, "learning_rate": 2.0402665872070656e-05, "loss": 0.059717755764722824, "step": 3148 }, { "epoch": 0.4257239182418102, "grad_norm": 0.2481062412261963, "learning_rate": 2.0396280254101172e-05, "loss": 0.04214269667863846, "step": 3149 }, { "epoch": 0.4258591116105755, "grad_norm": 0.5287272930145264, "learning_rate": 2.0389893512597364e-05, "loss": 0.05356164276599884, "step": 3150 }, { "epoch": 0.42599430497934077, "grad_norm": 0.8665952682495117, "learning_rate": 2.0383505648888986e-05, "loss": 0.04244658350944519, "step": 3151 }, { "epoch": 0.42612949834810604, "grad_norm": 0.9140850305557251, "learning_rate": 2.037711666430602e-05, "loss": 0.07658889889717102, "step": 3152 }, { "epoch": 0.4262646917168713, "grad_norm": 1.0958154201507568, "learning_rate": 2.0370726560178693e-05, "loss": 0.05771195888519287, "step": 3153 }, { "epoch": 0.42639988508563653, "grad_norm": 0.3769657611846924, "learning_rate": 2.036433533783745e-05, "loss": 0.039417192339897156, "step": 3154 }, { "epoch": 0.4265350784544018, "grad_norm": 0.9372339248657227, "learning_rate": 2.0357942998612988e-05, "loss": 0.05889472737908363, "step": 3155 }, { "epoch": 0.4266702718231671, "grad_norm": 0.41475072503089905, "learning_rate": 2.0351549543836224e-05, "loss": 0.05053633823990822, "step": 3156 }, { "epoch": 0.42680546519193235, "grad_norm": 0.9089570045471191, "learning_rate": 2.0345154974838307e-05, "loss": 0.04572737216949463, "step": 3157 }, { "epoch": 0.4269406585606976, "grad_norm": 0.34928643703460693, "learning_rate": 2.0338759292950618e-05, "loss": 0.044844917953014374, "step": 3158 }, { "epoch": 0.42707585192946285, "grad_norm": 0.38216763734817505, "learning_rate": 2.033236249950477e-05, "loss": 0.05039975047111511, "step": 3159 }, { "epoch": 0.4272110452982281, "grad_norm": 1.0492303371429443, "learning_rate": 2.0325964595832618e-05, "loss": 0.050439879298210144, "step": 3160 }, { "epoch": 0.4273462386669934, "grad_norm": 0.29913753271102905, "learning_rate": 2.031956558326624e-05, "loss": 0.06860119104385376, "step": 3161 }, { "epoch": 0.42748143203575867, "grad_norm": 1.319534182548523, "learning_rate": 2.0313165463137935e-05, "loss": 0.06233935058116913, "step": 3162 }, { "epoch": 0.4276166254045239, "grad_norm": 0.9906255602836609, "learning_rate": 2.030676423678025e-05, "loss": 0.06712990254163742, "step": 3163 }, { "epoch": 0.42775181877328916, "grad_norm": 0.25455281138420105, "learning_rate": 2.030036190552595e-05, "loss": 0.05154913663864136, "step": 3164 }, { "epoch": 0.42788701214205443, "grad_norm": 0.5683528184890747, "learning_rate": 2.029395847070803e-05, "loss": 0.0626387894153595, "step": 3165 }, { "epoch": 0.4280222055108197, "grad_norm": 0.8501094579696655, "learning_rate": 2.0287553933659735e-05, "loss": 0.05703309178352356, "step": 3166 }, { "epoch": 0.428157398879585, "grad_norm": 0.44154056906700134, "learning_rate": 2.0281148295714512e-05, "loss": 0.04259533807635307, "step": 3167 }, { "epoch": 0.4282925922483502, "grad_norm": 1.5962021350860596, "learning_rate": 2.027474155820605e-05, "loss": 0.05909880995750427, "step": 3168 }, { "epoch": 0.4284277856171155, "grad_norm": 0.6748774647712708, "learning_rate": 2.026833372246827e-05, "loss": 0.057523906230926514, "step": 3169 }, { "epoch": 0.42856297898588075, "grad_norm": 0.5485327839851379, "learning_rate": 2.026192478983531e-05, "loss": 0.055931925773620605, "step": 3170 }, { "epoch": 0.428698172354646, "grad_norm": 1.0570600032806396, "learning_rate": 2.0255514761641555e-05, "loss": 0.06017902493476868, "step": 3171 }, { "epoch": 0.42883336572341124, "grad_norm": 0.7914089560508728, "learning_rate": 2.0249103639221597e-05, "loss": 0.05615999549627304, "step": 3172 }, { "epoch": 0.4289685590921765, "grad_norm": 0.26349112391471863, "learning_rate": 2.024269142391027e-05, "loss": 0.04027664661407471, "step": 3173 }, { "epoch": 0.4291037524609418, "grad_norm": 0.3956128656864166, "learning_rate": 2.023627811704263e-05, "loss": 0.061393264681100845, "step": 3174 }, { "epoch": 0.42923894582970706, "grad_norm": 0.5028055310249329, "learning_rate": 2.0229863719953963e-05, "loss": 0.05934426188468933, "step": 3175 }, { "epoch": 0.42937413919847234, "grad_norm": 0.6211058497428894, "learning_rate": 2.0223448233979785e-05, "loss": 0.05135301500558853, "step": 3176 }, { "epoch": 0.42950933256723756, "grad_norm": 0.7907568216323853, "learning_rate": 2.0217031660455825e-05, "loss": 0.0515744686126709, "step": 3177 }, { "epoch": 0.42964452593600283, "grad_norm": 0.3090481162071228, "learning_rate": 2.0210614000718054e-05, "loss": 0.047113820910453796, "step": 3178 }, { "epoch": 0.4297797193047681, "grad_norm": 0.4285459518432617, "learning_rate": 2.020419525610266e-05, "loss": 0.05762678012251854, "step": 3179 }, { "epoch": 0.4299149126735334, "grad_norm": 0.7162730693817139, "learning_rate": 2.0197775427946066e-05, "loss": 0.061169058084487915, "step": 3180 }, { "epoch": 0.43005010604229865, "grad_norm": 0.3998583257198334, "learning_rate": 2.0191354517584902e-05, "loss": 0.061767011880874634, "step": 3181 }, { "epoch": 0.43018529941106387, "grad_norm": 0.34154802560806274, "learning_rate": 2.018493252635605e-05, "loss": 0.05337631702423096, "step": 3182 }, { "epoch": 0.43032049277982914, "grad_norm": 0.34869688749313354, "learning_rate": 2.0178509455596598e-05, "loss": 0.05695056915283203, "step": 3183 }, { "epoch": 0.4304556861485944, "grad_norm": 0.5041409730911255, "learning_rate": 2.017208530664386e-05, "loss": 0.047867417335510254, "step": 3184 }, { "epoch": 0.4305908795173597, "grad_norm": 1.3873205184936523, "learning_rate": 2.016566008083538e-05, "loss": 0.06679412722587585, "step": 3185 }, { "epoch": 0.4307260728861249, "grad_norm": 0.38294702768325806, "learning_rate": 2.0159233779508923e-05, "loss": 0.060565900057554245, "step": 3186 }, { "epoch": 0.4308612662548902, "grad_norm": 0.6644356846809387, "learning_rate": 2.0152806404002482e-05, "loss": 0.06483253836631775, "step": 3187 }, { "epoch": 0.43099645962365546, "grad_norm": 0.7891088724136353, "learning_rate": 2.014637795565427e-05, "loss": 0.053404733538627625, "step": 3188 }, { "epoch": 0.43113165299242073, "grad_norm": 0.4857129156589508, "learning_rate": 2.0139948435802722e-05, "loss": 0.05069521814584732, "step": 3189 }, { "epoch": 0.431266846361186, "grad_norm": 0.40805938839912415, "learning_rate": 2.0133517845786504e-05, "loss": 0.05410213768482208, "step": 3190 }, { "epoch": 0.4314020397299512, "grad_norm": 0.8711510300636292, "learning_rate": 2.012708618694449e-05, "loss": 0.04502767324447632, "step": 3191 }, { "epoch": 0.4315372330987165, "grad_norm": 0.9506723880767822, "learning_rate": 2.0120653460615795e-05, "loss": 0.06844263523817062, "step": 3192 }, { "epoch": 0.4316724264674818, "grad_norm": 0.6595968008041382, "learning_rate": 2.011421966813974e-05, "loss": 0.05935592204332352, "step": 3193 }, { "epoch": 0.43180761983624705, "grad_norm": 0.60198575258255, "learning_rate": 2.0107784810855882e-05, "loss": 0.04870133101940155, "step": 3194 }, { "epoch": 0.4319428132050123, "grad_norm": 0.7255297303199768, "learning_rate": 2.0101348890103985e-05, "loss": 0.05051903426647186, "step": 3195 }, { "epoch": 0.43207800657377754, "grad_norm": 0.3370662033557892, "learning_rate": 2.0094911907224043e-05, "loss": 0.05318271368741989, "step": 3196 }, { "epoch": 0.4322131999425428, "grad_norm": 0.7351927757263184, "learning_rate": 2.008847386355628e-05, "loss": 0.0428081750869751, "step": 3197 }, { "epoch": 0.4323483933113081, "grad_norm": 0.4200049340724945, "learning_rate": 2.008203476044112e-05, "loss": 0.052372708916664124, "step": 3198 }, { "epoch": 0.43248358668007336, "grad_norm": 2.193608283996582, "learning_rate": 2.007559459921922e-05, "loss": 0.0705721452832222, "step": 3199 }, { "epoch": 0.4326187800488386, "grad_norm": 1.3866478204727173, "learning_rate": 2.0069153381231456e-05, "loss": 0.05817686766386032, "step": 3200 }, { "epoch": 0.43275397341760385, "grad_norm": 1.2520829439163208, "learning_rate": 2.0062711107818933e-05, "loss": 0.06678328663110733, "step": 3201 }, { "epoch": 0.43288916678636913, "grad_norm": 0.2560953199863434, "learning_rate": 2.0056267780322953e-05, "loss": 0.04996124655008316, "step": 3202 }, { "epoch": 0.4330243601551344, "grad_norm": 0.5135100483894348, "learning_rate": 2.004982340008506e-05, "loss": 0.053074657917022705, "step": 3203 }, { "epoch": 0.4331595535238997, "grad_norm": 0.4020565450191498, "learning_rate": 2.004337796844701e-05, "loss": 0.0586613267660141, "step": 3204 }, { "epoch": 0.4332947468926649, "grad_norm": 0.45359864830970764, "learning_rate": 2.003693148675077e-05, "loss": 0.04980979859828949, "step": 3205 }, { "epoch": 0.43342994026143017, "grad_norm": 0.36846843361854553, "learning_rate": 2.003048395633853e-05, "loss": 0.06583218276500702, "step": 3206 }, { "epoch": 0.43356513363019544, "grad_norm": 0.3749634921550751, "learning_rate": 2.0024035378552708e-05, "loss": 0.061269551515579224, "step": 3207 }, { "epoch": 0.4337003269989607, "grad_norm": 0.431803435087204, "learning_rate": 2.001758575473593e-05, "loss": 0.06340453028678894, "step": 3208 }, { "epoch": 0.43383552036772594, "grad_norm": 0.3598921000957489, "learning_rate": 2.0011135086231042e-05, "loss": 0.06528577208518982, "step": 3209 }, { "epoch": 0.4339707137364912, "grad_norm": 0.4522221088409424, "learning_rate": 2.0004683374381104e-05, "loss": 0.0628328025341034, "step": 3210 }, { "epoch": 0.4341059071052565, "grad_norm": 0.5399999618530273, "learning_rate": 1.9998230620529395e-05, "loss": 0.06149455904960632, "step": 3211 }, { "epoch": 0.43424110047402176, "grad_norm": 0.7971583008766174, "learning_rate": 1.999177682601942e-05, "loss": 0.06106923520565033, "step": 3212 }, { "epoch": 0.43437629384278703, "grad_norm": 0.40637439489364624, "learning_rate": 1.9985321992194896e-05, "loss": 0.04652516171336174, "step": 3213 }, { "epoch": 0.43451148721155225, "grad_norm": 0.4229534864425659, "learning_rate": 1.9978866120399746e-05, "loss": 0.058541879057884216, "step": 3214 }, { "epoch": 0.4346466805803175, "grad_norm": 0.38915035128593445, "learning_rate": 1.9972409211978116e-05, "loss": 0.06971043348312378, "step": 3215 }, { "epoch": 0.4347818739490828, "grad_norm": 0.7337201833724976, "learning_rate": 1.9965951268274373e-05, "loss": 0.06125558912754059, "step": 3216 }, { "epoch": 0.43491706731784807, "grad_norm": 0.4810763895511627, "learning_rate": 1.9959492290633093e-05, "loss": 0.061926454305648804, "step": 3217 }, { "epoch": 0.43505226068661335, "grad_norm": 0.8450936675071716, "learning_rate": 1.995303228039907e-05, "loss": 0.09613863378763199, "step": 3218 }, { "epoch": 0.43518745405537856, "grad_norm": 0.5844223499298096, "learning_rate": 1.994657123891732e-05, "loss": 0.07898298650979996, "step": 3219 }, { "epoch": 0.43532264742414384, "grad_norm": 0.9888449311256409, "learning_rate": 1.9940109167533055e-05, "loss": 0.0579981803894043, "step": 3220 }, { "epoch": 0.4354578407929091, "grad_norm": 0.44750744104385376, "learning_rate": 1.9933646067591716e-05, "loss": 0.054648369550704956, "step": 3221 }, { "epoch": 0.4355930341616744, "grad_norm": 0.5578551292419434, "learning_rate": 1.992718194043896e-05, "loss": 0.0631774291396141, "step": 3222 }, { "epoch": 0.4357282275304396, "grad_norm": 0.556036651134491, "learning_rate": 1.9920716787420643e-05, "loss": 0.057975560426712036, "step": 3223 }, { "epoch": 0.4358634208992049, "grad_norm": 0.520154595375061, "learning_rate": 1.9914250609882858e-05, "loss": 0.062328506261110306, "step": 3224 }, { "epoch": 0.43599861426797015, "grad_norm": 0.33388620615005493, "learning_rate": 1.9907783409171885e-05, "loss": 0.06294068694114685, "step": 3225 }, { "epoch": 0.4361338076367354, "grad_norm": 0.8506790995597839, "learning_rate": 1.990131518663424e-05, "loss": 0.04879724979400635, "step": 3226 }, { "epoch": 0.4362690010055007, "grad_norm": 0.34767946600914, "learning_rate": 1.9894845943616632e-05, "loss": 0.0620148591697216, "step": 3227 }, { "epoch": 0.4364041943742659, "grad_norm": 0.36129623651504517, "learning_rate": 1.988837568146599e-05, "loss": 0.047696761786937714, "step": 3228 }, { "epoch": 0.4365393877430312, "grad_norm": 0.3131442666053772, "learning_rate": 1.988190440152947e-05, "loss": 0.05027863383293152, "step": 3229 }, { "epoch": 0.43667458111179647, "grad_norm": 0.28045254945755005, "learning_rate": 1.9875432105154424e-05, "loss": 0.048961855471134186, "step": 3230 }, { "epoch": 0.43680977448056174, "grad_norm": 0.33952921628952026, "learning_rate": 1.9868958793688412e-05, "loss": 0.0586174875497818, "step": 3231 }, { "epoch": 0.436944967849327, "grad_norm": 0.4964987635612488, "learning_rate": 1.9862484468479213e-05, "loss": 0.04754757881164551, "step": 3232 }, { "epoch": 0.43708016121809223, "grad_norm": 1.9296250343322754, "learning_rate": 1.985600913087482e-05, "loss": 0.07404591143131256, "step": 3233 }, { "epoch": 0.4372153545868575, "grad_norm": 0.638374388217926, "learning_rate": 1.9849532782223425e-05, "loss": 0.048216745257377625, "step": 3234 }, { "epoch": 0.4373505479556228, "grad_norm": 0.4848732054233551, "learning_rate": 1.9843055423873447e-05, "loss": 0.05438791215419769, "step": 3235 }, { "epoch": 0.43748574132438806, "grad_norm": 0.3670552670955658, "learning_rate": 1.9836577057173507e-05, "loss": 0.04548480361700058, "step": 3236 }, { "epoch": 0.4376209346931533, "grad_norm": 0.6398751139640808, "learning_rate": 1.9830097683472427e-05, "loss": 0.06153556704521179, "step": 3237 }, { "epoch": 0.43775612806191855, "grad_norm": 0.31612691283226013, "learning_rate": 1.9823617304119252e-05, "loss": 0.05666649341583252, "step": 3238 }, { "epoch": 0.4378913214306838, "grad_norm": 0.41127637028694153, "learning_rate": 1.9817135920463232e-05, "loss": 0.05100924149155617, "step": 3239 }, { "epoch": 0.4380265147994491, "grad_norm": 0.3253238797187805, "learning_rate": 1.9810653533853826e-05, "loss": 0.04596482589840889, "step": 3240 }, { "epoch": 0.43816170816821437, "grad_norm": 0.3820272982120514, "learning_rate": 1.9804170145640706e-05, "loss": 0.04634726047515869, "step": 3241 }, { "epoch": 0.4382969015369796, "grad_norm": 0.5377208590507507, "learning_rate": 1.9797685757173737e-05, "loss": 0.04338394105434418, "step": 3242 }, { "epoch": 0.43843209490574486, "grad_norm": 0.7749507427215576, "learning_rate": 1.979120036980301e-05, "loss": 0.05895250290632248, "step": 3243 }, { "epoch": 0.43856728827451014, "grad_norm": 0.7766172885894775, "learning_rate": 1.9784713984878814e-05, "loss": 0.06698055565357208, "step": 3244 }, { "epoch": 0.4387024816432754, "grad_norm": 0.652805507183075, "learning_rate": 1.9778226603751652e-05, "loss": 0.06592923402786255, "step": 3245 }, { "epoch": 0.4388376750120407, "grad_norm": 0.512524425983429, "learning_rate": 1.9771738227772235e-05, "loss": 0.047188758850097656, "step": 3246 }, { "epoch": 0.4389728683808059, "grad_norm": 0.41331592202186584, "learning_rate": 1.976524885829147e-05, "loss": 0.050759319216012955, "step": 3247 }, { "epoch": 0.4391080617495712, "grad_norm": 0.17818917334079742, "learning_rate": 1.975875849666048e-05, "loss": 0.03525908291339874, "step": 3248 }, { "epoch": 0.43924325511833645, "grad_norm": 1.3401192426681519, "learning_rate": 1.9752267144230595e-05, "loss": 0.09731544554233551, "step": 3249 }, { "epoch": 0.4393784484871017, "grad_norm": 0.4023383855819702, "learning_rate": 1.9745774802353347e-05, "loss": 0.06433025747537613, "step": 3250 }, { "epoch": 0.43951364185586694, "grad_norm": 1.0541208982467651, "learning_rate": 1.973928147238048e-05, "loss": 0.05375806987285614, "step": 3251 }, { "epoch": 0.4396488352246322, "grad_norm": 0.3709910809993744, "learning_rate": 1.973278715566394e-05, "loss": 0.047917380928993225, "step": 3252 }, { "epoch": 0.4397840285933975, "grad_norm": 0.4579212963581085, "learning_rate": 1.972629185355587e-05, "loss": 0.05296555161476135, "step": 3253 }, { "epoch": 0.43991922196216277, "grad_norm": 0.38086408376693726, "learning_rate": 1.971979556740864e-05, "loss": 0.04802680015563965, "step": 3254 }, { "epoch": 0.44005441533092804, "grad_norm": 0.7378490567207336, "learning_rate": 1.971329829857479e-05, "loss": 0.09295317530632019, "step": 3255 }, { "epoch": 0.44018960869969326, "grad_norm": 0.3409130871295929, "learning_rate": 1.9706800048407112e-05, "loss": 0.06915012001991272, "step": 3256 }, { "epoch": 0.44032480206845853, "grad_norm": 0.5004827380180359, "learning_rate": 1.9700300818258566e-05, "loss": 0.05587489530444145, "step": 3257 }, { "epoch": 0.4404599954372238, "grad_norm": 0.4734703004360199, "learning_rate": 1.9693800609482318e-05, "loss": 0.08199352025985718, "step": 3258 }, { "epoch": 0.4405951888059891, "grad_norm": 1.2099796533584595, "learning_rate": 1.9687299423431754e-05, "loss": 0.047711823135614395, "step": 3259 }, { "epoch": 0.44073038217475435, "grad_norm": 0.4385868310928345, "learning_rate": 1.968079726146045e-05, "loss": 0.07175090909004211, "step": 3260 }, { "epoch": 0.44086557554351957, "grad_norm": 0.9640140533447266, "learning_rate": 1.9674294124922204e-05, "loss": 0.05886758118867874, "step": 3261 }, { "epoch": 0.44100076891228485, "grad_norm": 0.8408608436584473, "learning_rate": 1.966779001517099e-05, "loss": 0.056329671293497086, "step": 3262 }, { "epoch": 0.4411359622810501, "grad_norm": 0.9086351990699768, "learning_rate": 1.9661284933561006e-05, "loss": 0.06688427925109863, "step": 3263 }, { "epoch": 0.4412711556498154, "grad_norm": 0.3593735098838806, "learning_rate": 1.9654778881446636e-05, "loss": 0.06216779351234436, "step": 3264 }, { "epoch": 0.4414063490185806, "grad_norm": 0.5820820331573486, "learning_rate": 1.9648271860182487e-05, "loss": 0.05620887503027916, "step": 3265 }, { "epoch": 0.4415415423873459, "grad_norm": 0.3773776590824127, "learning_rate": 1.9641763871123345e-05, "loss": 0.05723300576210022, "step": 3266 }, { "epoch": 0.44167673575611116, "grad_norm": 0.6403797268867493, "learning_rate": 1.963525491562421e-05, "loss": 0.06503838300704956, "step": 3267 }, { "epoch": 0.44181192912487643, "grad_norm": 0.6258330941200256, "learning_rate": 1.9628744995040287e-05, "loss": 0.04666862636804581, "step": 3268 }, { "epoch": 0.4419471224936417, "grad_norm": 0.5811514258384705, "learning_rate": 1.9622234110726976e-05, "loss": 0.06009425222873688, "step": 3269 }, { "epoch": 0.4420823158624069, "grad_norm": 0.42902350425720215, "learning_rate": 1.9615722264039868e-05, "loss": 0.05902581661939621, "step": 3270 }, { "epoch": 0.4422175092311722, "grad_norm": 0.24103787541389465, "learning_rate": 1.9609209456334772e-05, "loss": 0.03415320813655853, "step": 3271 }, { "epoch": 0.4423527025999375, "grad_norm": 0.40127962827682495, "learning_rate": 1.960269568896769e-05, "loss": 0.03563021123409271, "step": 3272 }, { "epoch": 0.44248789596870275, "grad_norm": 0.558793306350708, "learning_rate": 1.9596180963294822e-05, "loss": 0.02755964919924736, "step": 3273 }, { "epoch": 0.442623089337468, "grad_norm": 0.7912986278533936, "learning_rate": 1.9589665280672564e-05, "loss": 0.04704117029905319, "step": 3274 }, { "epoch": 0.44275828270623324, "grad_norm": 0.46691492199897766, "learning_rate": 1.958314864245752e-05, "loss": 0.060979537665843964, "step": 3275 }, { "epoch": 0.4428934760749985, "grad_norm": 0.45566725730895996, "learning_rate": 1.957663105000649e-05, "loss": 0.06779834628105164, "step": 3276 }, { "epoch": 0.4430286694437638, "grad_norm": 0.38593584299087524, "learning_rate": 1.957011250467647e-05, "loss": 0.05130264535546303, "step": 3277 }, { "epoch": 0.44316386281252906, "grad_norm": 0.41180941462516785, "learning_rate": 1.9563593007824658e-05, "loss": 0.04429095238447189, "step": 3278 }, { "epoch": 0.4432990561812943, "grad_norm": 0.36301419138908386, "learning_rate": 1.9557072560808442e-05, "loss": 0.059429511427879333, "step": 3279 }, { "epoch": 0.44343424955005956, "grad_norm": 0.5429235696792603, "learning_rate": 1.9550551164985418e-05, "loss": 0.057512715458869934, "step": 3280 }, { "epoch": 0.44356944291882483, "grad_norm": 0.41305485367774963, "learning_rate": 1.9544028821713372e-05, "loss": 0.05373512953519821, "step": 3281 }, { "epoch": 0.4437046362875901, "grad_norm": 0.8366455435752869, "learning_rate": 1.9537505532350298e-05, "loss": 0.06713327765464783, "step": 3282 }, { "epoch": 0.4438398296563554, "grad_norm": 0.5665419697761536, "learning_rate": 1.9530981298254376e-05, "loss": 0.048474207520484924, "step": 3283 }, { "epoch": 0.4439750230251206, "grad_norm": 0.8575229644775391, "learning_rate": 1.9524456120783983e-05, "loss": 0.04002584517002106, "step": 3284 }, { "epoch": 0.44411021639388587, "grad_norm": 1.1227996349334717, "learning_rate": 1.95179300012977e-05, "loss": 0.06277889758348465, "step": 3285 }, { "epoch": 0.44424540976265114, "grad_norm": 0.4908751845359802, "learning_rate": 1.9511402941154296e-05, "loss": 0.03839091211557388, "step": 3286 }, { "epoch": 0.4443806031314164, "grad_norm": 0.701174259185791, "learning_rate": 1.950487494171274e-05, "loss": 0.057075630873441696, "step": 3287 }, { "epoch": 0.4445157965001817, "grad_norm": 0.2505267858505249, "learning_rate": 1.9498346004332203e-05, "loss": 0.04473879933357239, "step": 3288 }, { "epoch": 0.4446509898689469, "grad_norm": 0.5283285975456238, "learning_rate": 1.949181613037204e-05, "loss": 0.06413161754608154, "step": 3289 }, { "epoch": 0.4447861832377122, "grad_norm": 0.29598984122276306, "learning_rate": 1.9485285321191804e-05, "loss": 0.03635593503713608, "step": 3290 }, { "epoch": 0.44492137660647746, "grad_norm": 0.8156299591064453, "learning_rate": 1.9478753578151244e-05, "loss": 0.06624409556388855, "step": 3291 }, { "epoch": 0.44505656997524273, "grad_norm": 0.5123705267906189, "learning_rate": 1.9472220902610304e-05, "loss": 0.05679801106452942, "step": 3292 }, { "epoch": 0.44519176334400795, "grad_norm": 0.5711596012115479, "learning_rate": 1.9465687295929127e-05, "loss": 0.04937441647052765, "step": 3293 }, { "epoch": 0.4453269567127732, "grad_norm": 0.7444480061531067, "learning_rate": 1.945915275946804e-05, "loss": 0.05021470785140991, "step": 3294 }, { "epoch": 0.4454621500815385, "grad_norm": 0.6331806182861328, "learning_rate": 1.9452617294587573e-05, "loss": 0.042013660073280334, "step": 3295 }, { "epoch": 0.4455973434503038, "grad_norm": 0.29099100828170776, "learning_rate": 1.9446080902648435e-05, "loss": 0.055853720754384995, "step": 3296 }, { "epoch": 0.44573253681906905, "grad_norm": 0.688414990901947, "learning_rate": 1.943954358501154e-05, "loss": 0.05329141765832901, "step": 3297 }, { "epoch": 0.44586773018783427, "grad_norm": 0.30019357800483704, "learning_rate": 1.943300534303801e-05, "loss": 0.050568610429763794, "step": 3298 }, { "epoch": 0.44600292355659954, "grad_norm": 0.2570483982563019, "learning_rate": 1.9426466178089116e-05, "loss": 0.05790006369352341, "step": 3299 }, { "epoch": 0.4461381169253648, "grad_norm": 0.5182380676269531, "learning_rate": 1.9419926091526367e-05, "loss": 0.0813378095626831, "step": 3300 }, { "epoch": 0.4462733102941301, "grad_norm": 0.3804183900356293, "learning_rate": 1.9413385084711425e-05, "loss": 0.06054868921637535, "step": 3301 }, { "epoch": 0.44640850366289536, "grad_norm": 0.34261995553970337, "learning_rate": 1.9406843159006183e-05, "loss": 0.044449031352996826, "step": 3302 }, { "epoch": 0.4465436970316606, "grad_norm": 0.5972133278846741, "learning_rate": 1.940030031577269e-05, "loss": 0.05956093966960907, "step": 3303 }, { "epoch": 0.44667889040042585, "grad_norm": 0.425552636384964, "learning_rate": 1.9393756556373212e-05, "loss": 0.061608828604221344, "step": 3304 }, { "epoch": 0.44681408376919113, "grad_norm": 0.4237003028392792, "learning_rate": 1.9387211882170184e-05, "loss": 0.0635639876127243, "step": 3305 }, { "epoch": 0.4469492771379564, "grad_norm": 0.6458073258399963, "learning_rate": 1.9380666294526243e-05, "loss": 0.0525660365819931, "step": 3306 }, { "epoch": 0.4470844705067216, "grad_norm": 0.5430435538291931, "learning_rate": 1.9374119794804228e-05, "loss": 0.05350537225604057, "step": 3307 }, { "epoch": 0.4472196638754869, "grad_norm": 0.28671368956565857, "learning_rate": 1.936757238436714e-05, "loss": 0.045153431594371796, "step": 3308 }, { "epoch": 0.44735485724425217, "grad_norm": 0.41515183448791504, "learning_rate": 1.936102406457819e-05, "loss": 0.06470730900764465, "step": 3309 }, { "epoch": 0.44749005061301744, "grad_norm": 0.24309509992599487, "learning_rate": 1.935447483680078e-05, "loss": 0.04342038184404373, "step": 3310 }, { "epoch": 0.4476252439817827, "grad_norm": 0.32473987340927124, "learning_rate": 1.9347924702398484e-05, "loss": 0.05396459996700287, "step": 3311 }, { "epoch": 0.44776043735054794, "grad_norm": 2.080249786376953, "learning_rate": 1.9341373662735075e-05, "loss": 0.07097083330154419, "step": 3312 }, { "epoch": 0.4478956307193132, "grad_norm": 0.371449738740921, "learning_rate": 1.9334821719174524e-05, "loss": 0.056131958961486816, "step": 3313 }, { "epoch": 0.4480308240880785, "grad_norm": 0.6857618689537048, "learning_rate": 1.9328268873080974e-05, "loss": 0.05432959273457527, "step": 3314 }, { "epoch": 0.44816601745684376, "grad_norm": 0.7514235377311707, "learning_rate": 1.9321715125818765e-05, "loss": 0.04985123872756958, "step": 3315 }, { "epoch": 0.44830121082560903, "grad_norm": 0.7892392873764038, "learning_rate": 1.931516047875242e-05, "loss": 0.07212784886360168, "step": 3316 }, { "epoch": 0.44843640419437425, "grad_norm": 0.42287296056747437, "learning_rate": 1.930860493324665e-05, "loss": 0.044905733317136765, "step": 3317 }, { "epoch": 0.4485715975631395, "grad_norm": 0.762205183506012, "learning_rate": 1.9302048490666356e-05, "loss": 0.04661571606993675, "step": 3318 }, { "epoch": 0.4487067909319048, "grad_norm": 1.1957151889801025, "learning_rate": 1.9295491152376623e-05, "loss": 0.054930999875068665, "step": 3319 }, { "epoch": 0.44884198430067007, "grad_norm": 0.6091325879096985, "learning_rate": 1.928893291974273e-05, "loss": 0.0738677978515625, "step": 3320 }, { "epoch": 0.4489771776694353, "grad_norm": 0.5154362916946411, "learning_rate": 1.9282373794130132e-05, "loss": 0.05926453322172165, "step": 3321 }, { "epoch": 0.44911237103820056, "grad_norm": 0.8428158760070801, "learning_rate": 1.9275813776904472e-05, "loss": 0.0578303337097168, "step": 3322 }, { "epoch": 0.44924756440696584, "grad_norm": 1.1031607389450073, "learning_rate": 1.9269252869431582e-05, "loss": 0.05805609002709389, "step": 3323 }, { "epoch": 0.4493827577757311, "grad_norm": 1.2092585563659668, "learning_rate": 1.9262691073077476e-05, "loss": 0.07075157761573792, "step": 3324 }, { "epoch": 0.4495179511444964, "grad_norm": 0.5066566467285156, "learning_rate": 1.9256128389208363e-05, "loss": 0.039241723716259, "step": 3325 }, { "epoch": 0.4496531445132616, "grad_norm": 1.1367995738983154, "learning_rate": 1.924956481919062e-05, "loss": 0.08321493864059448, "step": 3326 }, { "epoch": 0.4497883378820269, "grad_norm": 0.36701610684394836, "learning_rate": 1.9243000364390825e-05, "loss": 0.04881875216960907, "step": 3327 }, { "epoch": 0.44992353125079215, "grad_norm": 0.3730778396129608, "learning_rate": 1.9236435026175717e-05, "loss": 0.05519121512770653, "step": 3328 }, { "epoch": 0.4500587246195574, "grad_norm": 0.877098798751831, "learning_rate": 1.9229868805912248e-05, "loss": 0.04307505488395691, "step": 3329 }, { "epoch": 0.4501939179883227, "grad_norm": 0.5113497376441956, "learning_rate": 1.9223301704967543e-05, "loss": 0.06186281889677048, "step": 3330 }, { "epoch": 0.4503291113570879, "grad_norm": 0.5405557751655579, "learning_rate": 1.92167337247089e-05, "loss": 0.0581301748752594, "step": 3331 }, { "epoch": 0.4504643047258532, "grad_norm": 0.661689281463623, "learning_rate": 1.9210164866503808e-05, "loss": 0.050081897526979446, "step": 3332 }, { "epoch": 0.45059949809461847, "grad_norm": 0.6406334638595581, "learning_rate": 1.9203595131719935e-05, "loss": 0.04544217884540558, "step": 3333 }, { "epoch": 0.45073469146338374, "grad_norm": 0.35986214876174927, "learning_rate": 1.9197024521725148e-05, "loss": 0.031505897641181946, "step": 3334 }, { "epoch": 0.45086988483214896, "grad_norm": 0.8860594034194946, "learning_rate": 1.9190453037887465e-05, "loss": 0.0622507706284523, "step": 3335 }, { "epoch": 0.45100507820091423, "grad_norm": 0.25882935523986816, "learning_rate": 1.918388068157512e-05, "loss": 0.04336508363485336, "step": 3336 }, { "epoch": 0.4511402715696795, "grad_norm": 0.9596101641654968, "learning_rate": 1.9177307454156507e-05, "loss": 0.03922261297702789, "step": 3337 }, { "epoch": 0.4512754649384448, "grad_norm": 0.47697144746780396, "learning_rate": 1.9170733357000202e-05, "loss": 0.06904802471399307, "step": 3338 }, { "epoch": 0.45141065830721006, "grad_norm": 0.4825569987297058, "learning_rate": 1.916415839147497e-05, "loss": 0.06335628032684326, "step": 3339 }, { "epoch": 0.4515458516759753, "grad_norm": 0.6721900701522827, "learning_rate": 1.9157582558949756e-05, "loss": 0.061501145362854004, "step": 3340 }, { "epoch": 0.45168104504474055, "grad_norm": 0.43628573417663574, "learning_rate": 1.9151005860793682e-05, "loss": 0.06130243092775345, "step": 3341 }, { "epoch": 0.4518162384135058, "grad_norm": 0.37040936946868896, "learning_rate": 1.9144428298376056e-05, "loss": 0.05750388652086258, "step": 3342 }, { "epoch": 0.4519514317822711, "grad_norm": 0.185185506939888, "learning_rate": 1.9137849873066355e-05, "loss": 0.030655065551400185, "step": 3343 }, { "epoch": 0.4520866251510363, "grad_norm": 0.9033924341201782, "learning_rate": 1.9131270586234243e-05, "loss": 0.06248645484447479, "step": 3344 }, { "epoch": 0.4522218185198016, "grad_norm": 0.9251096844673157, "learning_rate": 1.9124690439249564e-05, "loss": 0.07921051979064941, "step": 3345 }, { "epoch": 0.45235701188856686, "grad_norm": 0.5152464509010315, "learning_rate": 1.9118109433482342e-05, "loss": 0.06715615093708038, "step": 3346 }, { "epoch": 0.45249220525733214, "grad_norm": 0.5808513760566711, "learning_rate": 1.911152757030278e-05, "loss": 0.06280335783958435, "step": 3347 }, { "epoch": 0.4526273986260974, "grad_norm": 0.2645490765571594, "learning_rate": 1.9104944851081247e-05, "loss": 0.03735634684562683, "step": 3348 }, { "epoch": 0.45276259199486263, "grad_norm": 1.25602126121521, "learning_rate": 1.9098361277188303e-05, "loss": 0.058656878769397736, "step": 3349 }, { "epoch": 0.4528977853636279, "grad_norm": 1.070167064666748, "learning_rate": 1.909177684999469e-05, "loss": 0.06288847327232361, "step": 3350 }, { "epoch": 0.4530329787323932, "grad_norm": 1.1462658643722534, "learning_rate": 1.9085191570871316e-05, "loss": 0.05812841281294823, "step": 3351 }, { "epoch": 0.45316817210115845, "grad_norm": 1.4589474201202393, "learning_rate": 1.9078605441189275e-05, "loss": 0.07801459729671478, "step": 3352 }, { "epoch": 0.4533033654699237, "grad_norm": 0.8323929905891418, "learning_rate": 1.9072018462319828e-05, "loss": 0.04907388612627983, "step": 3353 }, { "epoch": 0.45343855883868894, "grad_norm": 0.56675785779953, "learning_rate": 1.9065430635634422e-05, "loss": 0.05784274637699127, "step": 3354 }, { "epoch": 0.4535737522074542, "grad_norm": 0.314178466796875, "learning_rate": 1.9058841962504677e-05, "loss": 0.05712277814745903, "step": 3355 }, { "epoch": 0.4537089455762195, "grad_norm": 0.542799711227417, "learning_rate": 1.9052252444302394e-05, "loss": 0.06617625057697296, "step": 3356 }, { "epoch": 0.45384413894498477, "grad_norm": 0.19473232328891754, "learning_rate": 1.904566208239954e-05, "loss": 0.04961702227592468, "step": 3357 }, { "epoch": 0.45397933231375, "grad_norm": 0.7159522175788879, "learning_rate": 1.903907087816827e-05, "loss": 0.03728868067264557, "step": 3358 }, { "epoch": 0.45411452568251526, "grad_norm": 1.2334436178207397, "learning_rate": 1.9032478832980902e-05, "loss": 0.05597636103630066, "step": 3359 }, { "epoch": 0.45424971905128053, "grad_norm": 0.3319777250289917, "learning_rate": 1.9025885948209938e-05, "loss": 0.0323946475982666, "step": 3360 }, { "epoch": 0.4543849124200458, "grad_norm": 0.8528616428375244, "learning_rate": 1.901929222522805e-05, "loss": 0.03945460915565491, "step": 3361 }, { "epoch": 0.4545201057888111, "grad_norm": 0.7063406705856323, "learning_rate": 1.901269766540809e-05, "loss": 0.05806471407413483, "step": 3362 }, { "epoch": 0.4546552991575763, "grad_norm": 0.9343569278717041, "learning_rate": 1.9006102270123076e-05, "loss": 0.05490513890981674, "step": 3363 }, { "epoch": 0.45479049252634157, "grad_norm": 0.6323087811470032, "learning_rate": 1.8999506040746208e-05, "loss": 0.06549021601676941, "step": 3364 }, { "epoch": 0.45492568589510685, "grad_norm": 0.35148748755455017, "learning_rate": 1.899290897865085e-05, "loss": 0.051027994602918625, "step": 3365 }, { "epoch": 0.4550608792638721, "grad_norm": 0.3146888315677643, "learning_rate": 1.898631108521055e-05, "loss": 0.05037091672420502, "step": 3366 }, { "epoch": 0.4551960726326374, "grad_norm": 0.8882355093955994, "learning_rate": 1.8979712361799027e-05, "loss": 0.05013510584831238, "step": 3367 }, { "epoch": 0.4553312660014026, "grad_norm": 0.9671123027801514, "learning_rate": 1.8973112809790168e-05, "loss": 0.044209957122802734, "step": 3368 }, { "epoch": 0.4554664593701679, "grad_norm": 0.2821582555770874, "learning_rate": 1.8966512430558036e-05, "loss": 0.05418318510055542, "step": 3369 }, { "epoch": 0.45560165273893316, "grad_norm": 1.2645623683929443, "learning_rate": 1.8959911225476858e-05, "loss": 0.0573011189699173, "step": 3370 }, { "epoch": 0.45573684610769843, "grad_norm": 0.3427494466304779, "learning_rate": 1.895330919592105e-05, "loss": 0.03825434297323227, "step": 3371 }, { "epoch": 0.45587203947646365, "grad_norm": 0.6923047304153442, "learning_rate": 1.8946706343265187e-05, "loss": 0.05962452292442322, "step": 3372 }, { "epoch": 0.4560072328452289, "grad_norm": 0.4523015022277832, "learning_rate": 1.8940102668884016e-05, "loss": 0.060275617986917496, "step": 3373 }, { "epoch": 0.4561424262139942, "grad_norm": 0.3502708673477173, "learning_rate": 1.893349817415246e-05, "loss": 0.04480568319559097, "step": 3374 }, { "epoch": 0.4562776195827595, "grad_norm": 0.31605008244514465, "learning_rate": 1.8926892860445607e-05, "loss": 0.038295455276966095, "step": 3375 }, { "epoch": 0.45641281295152475, "grad_norm": 0.8085044622421265, "learning_rate": 1.8920286729138718e-05, "loss": 0.056893039494752884, "step": 3376 }, { "epoch": 0.45654800632028997, "grad_norm": 0.748875081539154, "learning_rate": 1.8913679781607225e-05, "loss": 0.0810333639383316, "step": 3377 }, { "epoch": 0.45668319968905524, "grad_norm": 0.48029693961143494, "learning_rate": 1.8907072019226734e-05, "loss": 0.06152554228901863, "step": 3378 }, { "epoch": 0.4568183930578205, "grad_norm": 0.6049940586090088, "learning_rate": 1.8900463443373015e-05, "loss": 0.05309459567070007, "step": 3379 }, { "epoch": 0.4569535864265858, "grad_norm": 1.2409980297088623, "learning_rate": 1.889385405542201e-05, "loss": 0.06103135645389557, "step": 3380 }, { "epoch": 0.45708877979535106, "grad_norm": 1.6766926050186157, "learning_rate": 1.8887243856749816e-05, "loss": 0.08241996169090271, "step": 3381 }, { "epoch": 0.4572239731641163, "grad_norm": 0.535280704498291, "learning_rate": 1.8880632848732723e-05, "loss": 0.05949222296476364, "step": 3382 }, { "epoch": 0.45735916653288156, "grad_norm": 0.7128165364265442, "learning_rate": 1.8874021032747185e-05, "loss": 0.05191230773925781, "step": 3383 }, { "epoch": 0.45749435990164683, "grad_norm": 0.6367013454437256, "learning_rate": 1.8867408410169803e-05, "loss": 0.07156538963317871, "step": 3384 }, { "epoch": 0.4576295532704121, "grad_norm": 0.6274704933166504, "learning_rate": 1.886079498237737e-05, "loss": 0.06594263017177582, "step": 3385 }, { "epoch": 0.4577647466391773, "grad_norm": 0.8115509748458862, "learning_rate": 1.885418075074683e-05, "loss": 0.04770982265472412, "step": 3386 }, { "epoch": 0.4578999400079426, "grad_norm": 0.8404709696769714, "learning_rate": 1.884756571665531e-05, "loss": 0.05110614746809006, "step": 3387 }, { "epoch": 0.45803513337670787, "grad_norm": 0.7161521315574646, "learning_rate": 1.8840949881480085e-05, "loss": 0.052481502294540405, "step": 3388 }, { "epoch": 0.45817032674547314, "grad_norm": 1.0244470834732056, "learning_rate": 1.8834333246598613e-05, "loss": 0.05016744136810303, "step": 3389 }, { "epoch": 0.4583055201142384, "grad_norm": 0.2579854130744934, "learning_rate": 1.8827715813388514e-05, "loss": 0.04288007318973541, "step": 3390 }, { "epoch": 0.45844071348300364, "grad_norm": 0.25869378447532654, "learning_rate": 1.8821097583227572e-05, "loss": 0.04056933522224426, "step": 3391 }, { "epoch": 0.4585759068517689, "grad_norm": 0.2815791368484497, "learning_rate": 1.8814478557493732e-05, "loss": 0.04701721668243408, "step": 3392 }, { "epoch": 0.4587111002205342, "grad_norm": 0.7656209468841553, "learning_rate": 1.8807858737565118e-05, "loss": 0.05907118320465088, "step": 3393 }, { "epoch": 0.45884629358929946, "grad_norm": 0.3615525960922241, "learning_rate": 1.880123812482001e-05, "loss": 0.051198653876781464, "step": 3394 }, { "epoch": 0.45898148695806473, "grad_norm": 0.7177746295928955, "learning_rate": 1.8794616720636853e-05, "loss": 0.05915292724967003, "step": 3395 }, { "epoch": 0.45911668032682995, "grad_norm": 0.37491458654403687, "learning_rate": 1.8787994526394257e-05, "loss": 0.06178012490272522, "step": 3396 }, { "epoch": 0.4592518736955952, "grad_norm": 0.3707510232925415, "learning_rate": 1.8781371543471e-05, "loss": 0.0526086688041687, "step": 3397 }, { "epoch": 0.4593870670643605, "grad_norm": 1.5047873258590698, "learning_rate": 1.8774747773246024e-05, "loss": 0.06546938419342041, "step": 3398 }, { "epoch": 0.4595222604331258, "grad_norm": 0.6238464117050171, "learning_rate": 1.8768123217098438e-05, "loss": 0.06101621687412262, "step": 3399 }, { "epoch": 0.459657453801891, "grad_norm": 0.5374392867088318, "learning_rate": 1.8761497876407496e-05, "loss": 0.0675344169139862, "step": 3400 }, { "epoch": 0.45979264717065627, "grad_norm": 0.621504008769989, "learning_rate": 1.8754871752552646e-05, "loss": 0.07805784791707993, "step": 3401 }, { "epoch": 0.45992784053942154, "grad_norm": 0.28153881430625916, "learning_rate": 1.8748244846913463e-05, "loss": 0.05409560352563858, "step": 3402 }, { "epoch": 0.4600630339081868, "grad_norm": 0.4041364789009094, "learning_rate": 1.874161716086972e-05, "loss": 0.04168283939361572, "step": 3403 }, { "epoch": 0.4601982272769521, "grad_norm": 0.3518540561199188, "learning_rate": 1.8734988695801333e-05, "loss": 0.0461282804608345, "step": 3404 }, { "epoch": 0.4603334206457173, "grad_norm": 0.9236600399017334, "learning_rate": 1.8728359453088382e-05, "loss": 0.055802371352910995, "step": 3405 }, { "epoch": 0.4604686140144826, "grad_norm": 0.4125099182128906, "learning_rate": 1.8721729434111108e-05, "loss": 0.042859822511672974, "step": 3406 }, { "epoch": 0.46060380738324785, "grad_norm": 0.5350891351699829, "learning_rate": 1.871509864024992e-05, "loss": 0.05139743164181709, "step": 3407 }, { "epoch": 0.46073900075201313, "grad_norm": 0.7655153274536133, "learning_rate": 1.8708467072885385e-05, "loss": 0.06916612386703491, "step": 3408 }, { "epoch": 0.4608741941207784, "grad_norm": 0.8144391179084778, "learning_rate": 1.8701834733398227e-05, "loss": 0.04741179570555687, "step": 3409 }, { "epoch": 0.4610093874895436, "grad_norm": 0.24424514174461365, "learning_rate": 1.8695201623169335e-05, "loss": 0.05104641616344452, "step": 3410 }, { "epoch": 0.4611445808583089, "grad_norm": 0.5939989686012268, "learning_rate": 1.868856774357977e-05, "loss": 0.06348701566457748, "step": 3411 }, { "epoch": 0.46127977422707417, "grad_norm": 0.4160653352737427, "learning_rate": 1.868193309601072e-05, "loss": 0.06438948959112167, "step": 3412 }, { "epoch": 0.46141496759583944, "grad_norm": 0.3901027739048004, "learning_rate": 1.867529768184357e-05, "loss": 0.053491801023483276, "step": 3413 }, { "epoch": 0.46155016096460466, "grad_norm": 0.37588658928871155, "learning_rate": 1.8668661502459842e-05, "loss": 0.060191426426172256, "step": 3414 }, { "epoch": 0.46168535433336994, "grad_norm": 0.662421703338623, "learning_rate": 1.866202455924123e-05, "loss": 0.05157659202814102, "step": 3415 }, { "epoch": 0.4618205477021352, "grad_norm": 0.366858571767807, "learning_rate": 1.865538685356957e-05, "loss": 0.06058875471353531, "step": 3416 }, { "epoch": 0.4619557410709005, "grad_norm": 0.48786288499832153, "learning_rate": 1.864874838682688e-05, "loss": 0.061392560601234436, "step": 3417 }, { "epoch": 0.46209093443966576, "grad_norm": 0.2137085497379303, "learning_rate": 1.864210916039531e-05, "loss": 0.04010477662086487, "step": 3418 }, { "epoch": 0.462226127808431, "grad_norm": 0.27187997102737427, "learning_rate": 1.86354691756572e-05, "loss": 0.05676757171750069, "step": 3419 }, { "epoch": 0.46236132117719625, "grad_norm": 0.31700649857521057, "learning_rate": 1.8628828433995013e-05, "loss": 0.04720379412174225, "step": 3420 }, { "epoch": 0.4624965145459615, "grad_norm": 0.3173675835132599, "learning_rate": 1.86221869367914e-05, "loss": 0.04724174737930298, "step": 3421 }, { "epoch": 0.4626317079147268, "grad_norm": 1.117053747177124, "learning_rate": 1.8615544685429153e-05, "loss": 0.05501580983400345, "step": 3422 }, { "epoch": 0.46276690128349207, "grad_norm": 1.2657760381698608, "learning_rate": 1.860890168129122e-05, "loss": 0.06483148038387299, "step": 3423 }, { "epoch": 0.4629020946522573, "grad_norm": 0.3202558159828186, "learning_rate": 1.8602257925760708e-05, "loss": 0.04469441622495651, "step": 3424 }, { "epoch": 0.46303728802102256, "grad_norm": 0.38990518450737, "learning_rate": 1.859561342022089e-05, "loss": 0.07769818603992462, "step": 3425 }, { "epoch": 0.46317248138978784, "grad_norm": 0.9692981243133545, "learning_rate": 1.8588968166055185e-05, "loss": 0.04871663823723793, "step": 3426 }, { "epoch": 0.4633076747585531, "grad_norm": 1.47981858253479, "learning_rate": 1.8582322164647166e-05, "loss": 0.05820249021053314, "step": 3427 }, { "epoch": 0.46344286812731833, "grad_norm": 0.3757861852645874, "learning_rate": 1.8575675417380568e-05, "loss": 0.054184168577194214, "step": 3428 }, { "epoch": 0.4635780614960836, "grad_norm": 0.4767962098121643, "learning_rate": 1.856902792563928e-05, "loss": 0.051788631826639175, "step": 3429 }, { "epoch": 0.4637132548648489, "grad_norm": 0.4673423767089844, "learning_rate": 1.856237969080735e-05, "loss": 0.044467851519584656, "step": 3430 }, { "epoch": 0.46384844823361415, "grad_norm": 0.42419612407684326, "learning_rate": 1.8555730714268967e-05, "loss": 0.034988969564437866, "step": 3431 }, { "epoch": 0.4639836416023794, "grad_norm": 0.6036272644996643, "learning_rate": 1.8549080997408492e-05, "loss": 0.07301966845989227, "step": 3432 }, { "epoch": 0.46411883497114464, "grad_norm": 0.43555545806884766, "learning_rate": 1.8542430541610426e-05, "loss": 0.06862740218639374, "step": 3433 }, { "epoch": 0.4642540283399099, "grad_norm": 0.2437300980091095, "learning_rate": 1.8535779348259428e-05, "loss": 0.05106465518474579, "step": 3434 }, { "epoch": 0.4643892217086752, "grad_norm": 0.8447793126106262, "learning_rate": 1.852912741874032e-05, "loss": 0.06574514508247375, "step": 3435 }, { "epoch": 0.46452441507744047, "grad_norm": 0.44374769926071167, "learning_rate": 1.8522474754438056e-05, "loss": 0.06850980222225189, "step": 3436 }, { "epoch": 0.46465960844620574, "grad_norm": 0.6663258075714111, "learning_rate": 1.851582135673777e-05, "loss": 0.0613822340965271, "step": 3437 }, { "epoch": 0.46479480181497096, "grad_norm": 0.6282140612602234, "learning_rate": 1.850916722702473e-05, "loss": 0.06273164600133896, "step": 3438 }, { "epoch": 0.46492999518373623, "grad_norm": 0.633892834186554, "learning_rate": 1.8502512366684355e-05, "loss": 0.050341397523880005, "step": 3439 }, { "epoch": 0.4650651885525015, "grad_norm": 0.7119957804679871, "learning_rate": 1.8495856777102232e-05, "loss": 0.05206059291958809, "step": 3440 }, { "epoch": 0.4652003819212668, "grad_norm": 0.3833909332752228, "learning_rate": 1.848920045966408e-05, "loss": 0.06406398117542267, "step": 3441 }, { "epoch": 0.465335575290032, "grad_norm": 0.18850788474082947, "learning_rate": 1.8482543415755797e-05, "loss": 0.026808317750692368, "step": 3442 }, { "epoch": 0.4654707686587973, "grad_norm": 0.5003129839897156, "learning_rate": 1.8475885646763394e-05, "loss": 0.053905852138996124, "step": 3443 }, { "epoch": 0.46560596202756255, "grad_norm": 0.9879121780395508, "learning_rate": 1.8469227154073064e-05, "loss": 0.05546775460243225, "step": 3444 }, { "epoch": 0.4657411553963278, "grad_norm": 1.4978803396224976, "learning_rate": 1.8462567939071142e-05, "loss": 0.08119261264801025, "step": 3445 }, { "epoch": 0.4658763487650931, "grad_norm": 0.65383380651474, "learning_rate": 1.845590800314411e-05, "loss": 0.06052582710981369, "step": 3446 }, { "epoch": 0.4660115421338583, "grad_norm": 0.48527494072914124, "learning_rate": 1.8449247347678607e-05, "loss": 0.04265797138214111, "step": 3447 }, { "epoch": 0.4661467355026236, "grad_norm": 0.36451455950737, "learning_rate": 1.8442585974061405e-05, "loss": 0.04752948880195618, "step": 3448 }, { "epoch": 0.46628192887138886, "grad_norm": 0.7075680494308472, "learning_rate": 1.8435923883679452e-05, "loss": 0.05228123068809509, "step": 3449 }, { "epoch": 0.46641712224015414, "grad_norm": 0.5869306921958923, "learning_rate": 1.8429261077919818e-05, "loss": 0.055602557957172394, "step": 3450 }, { "epoch": 0.4665523156089194, "grad_norm": 0.3610169589519501, "learning_rate": 1.8422597558169742e-05, "loss": 0.05988682806491852, "step": 3451 }, { "epoch": 0.46668750897768463, "grad_norm": 0.5981849431991577, "learning_rate": 1.84159333258166e-05, "loss": 0.053812094032764435, "step": 3452 }, { "epoch": 0.4668227023464499, "grad_norm": 0.27451688051223755, "learning_rate": 1.8409268382247933e-05, "loss": 0.0516011118888855, "step": 3453 }, { "epoch": 0.4669578957152152, "grad_norm": 1.1405771970748901, "learning_rate": 1.8402602728851405e-05, "loss": 0.0609176903963089, "step": 3454 }, { "epoch": 0.46709308908398045, "grad_norm": 1.826614260673523, "learning_rate": 1.839593636701484e-05, "loss": 0.058491140604019165, "step": 3455 }, { "epoch": 0.46722828245274567, "grad_norm": 1.3256646394729614, "learning_rate": 1.8389269298126214e-05, "loss": 0.04908771812915802, "step": 3456 }, { "epoch": 0.46736347582151094, "grad_norm": 0.5058795809745789, "learning_rate": 1.838260152357365e-05, "loss": 0.03418835625052452, "step": 3457 }, { "epoch": 0.4674986691902762, "grad_norm": 0.7562401294708252, "learning_rate": 1.837593304474541e-05, "loss": 0.04584929347038269, "step": 3458 }, { "epoch": 0.4676338625590415, "grad_norm": 0.37185177206993103, "learning_rate": 1.836926386302991e-05, "loss": 0.06661425530910492, "step": 3459 }, { "epoch": 0.46776905592780677, "grad_norm": 0.3748548626899719, "learning_rate": 1.8362593979815696e-05, "loss": 0.05052608251571655, "step": 3460 }, { "epoch": 0.467904249296572, "grad_norm": 0.7036848068237305, "learning_rate": 1.8355923396491496e-05, "loss": 0.0682167336344719, "step": 3461 }, { "epoch": 0.46803944266533726, "grad_norm": 1.4316294193267822, "learning_rate": 1.8349252114446138e-05, "loss": 0.048164620995521545, "step": 3462 }, { "epoch": 0.46817463603410253, "grad_norm": 0.3539901673793793, "learning_rate": 1.834258013506864e-05, "loss": 0.05239434540271759, "step": 3463 }, { "epoch": 0.4683098294028678, "grad_norm": 1.9415796995162964, "learning_rate": 1.833590745974813e-05, "loss": 0.062813401222229, "step": 3464 }, { "epoch": 0.468445022771633, "grad_norm": 0.7124485373497009, "learning_rate": 1.8329234089873892e-05, "loss": 0.06759695708751678, "step": 3465 }, { "epoch": 0.4685802161403983, "grad_norm": 1.0345145463943481, "learning_rate": 1.8322560026835366e-05, "loss": 0.051732808351516724, "step": 3466 }, { "epoch": 0.46871540950916357, "grad_norm": 0.7185250520706177, "learning_rate": 1.8315885272022125e-05, "loss": 0.07194608449935913, "step": 3467 }, { "epoch": 0.46885060287792885, "grad_norm": 0.6272517442703247, "learning_rate": 1.830920982682389e-05, "loss": 0.05070233345031738, "step": 3468 }, { "epoch": 0.4689857962466941, "grad_norm": 0.955436110496521, "learning_rate": 1.830253369263052e-05, "loss": 0.04889228194952011, "step": 3469 }, { "epoch": 0.46912098961545934, "grad_norm": 0.5173132419586182, "learning_rate": 1.8295856870832024e-05, "loss": 0.04823637008666992, "step": 3470 }, { "epoch": 0.4692561829842246, "grad_norm": 0.44537919759750366, "learning_rate": 1.828917936281855e-05, "loss": 0.06603503227233887, "step": 3471 }, { "epoch": 0.4693913763529899, "grad_norm": 0.633919894695282, "learning_rate": 1.8282501169980396e-05, "loss": 0.06749454140663147, "step": 3472 }, { "epoch": 0.46952656972175516, "grad_norm": 1.38040292263031, "learning_rate": 1.8275822293707992e-05, "loss": 0.05902627110481262, "step": 3473 }, { "epoch": 0.46966176309052043, "grad_norm": 1.7549225091934204, "learning_rate": 1.8269142735391917e-05, "loss": 0.07592406868934631, "step": 3474 }, { "epoch": 0.46979695645928565, "grad_norm": 1.3159812688827515, "learning_rate": 1.8262462496422893e-05, "loss": 0.04475869983434677, "step": 3475 }, { "epoch": 0.4699321498280509, "grad_norm": 0.3364255130290985, "learning_rate": 1.8255781578191778e-05, "loss": 0.048334427177906036, "step": 3476 }, { "epoch": 0.4700673431968162, "grad_norm": 0.8258396983146667, "learning_rate": 1.824909998208958e-05, "loss": 0.05500677973031998, "step": 3477 }, { "epoch": 0.4702025365655815, "grad_norm": 1.0199753046035767, "learning_rate": 1.8242417709507434e-05, "loss": 0.056221529841423035, "step": 3478 }, { "epoch": 0.4703377299343467, "grad_norm": 0.5179654955863953, "learning_rate": 1.8235734761836637e-05, "loss": 0.06112375110387802, "step": 3479 }, { "epoch": 0.47047292330311197, "grad_norm": 0.5609266757965088, "learning_rate": 1.82290511404686e-05, "loss": 0.04634537547826767, "step": 3480 }, { "epoch": 0.47060811667187724, "grad_norm": 0.456564724445343, "learning_rate": 1.8222366846794904e-05, "loss": 0.044562939554452896, "step": 3481 }, { "epoch": 0.4707433100406425, "grad_norm": 0.6344621181488037, "learning_rate": 1.8215681882207238e-05, "loss": 0.04998070001602173, "step": 3482 }, { "epoch": 0.4708785034094078, "grad_norm": 1.3882066011428833, "learning_rate": 1.8208996248097462e-05, "loss": 0.06087478995323181, "step": 3483 }, { "epoch": 0.471013696778173, "grad_norm": 0.4571479856967926, "learning_rate": 1.8202309945857557e-05, "loss": 0.05453101545572281, "step": 3484 }, { "epoch": 0.4711488901469383, "grad_norm": 0.543473482131958, "learning_rate": 1.8195622976879638e-05, "loss": 0.06498540937900543, "step": 3485 }, { "epoch": 0.47128408351570356, "grad_norm": 0.596011757850647, "learning_rate": 1.8188935342555977e-05, "loss": 0.048884838819503784, "step": 3486 }, { "epoch": 0.47141927688446883, "grad_norm": 0.769769012928009, "learning_rate": 1.818224704427897e-05, "loss": 0.06901255995035172, "step": 3487 }, { "epoch": 0.4715544702532341, "grad_norm": 0.3739570081233978, "learning_rate": 1.8175558083441162e-05, "loss": 0.06934164464473724, "step": 3488 }, { "epoch": 0.4716896636219993, "grad_norm": 0.36289092898368835, "learning_rate": 1.8168868461435225e-05, "loss": 0.03892380744218826, "step": 3489 }, { "epoch": 0.4718248569907646, "grad_norm": 0.5343566536903381, "learning_rate": 1.8162178179653977e-05, "loss": 0.05886805057525635, "step": 3490 }, { "epoch": 0.47196005035952987, "grad_norm": 0.5023666024208069, "learning_rate": 1.815548723949037e-05, "loss": 0.05915558338165283, "step": 3491 }, { "epoch": 0.47209524372829514, "grad_norm": 0.46567168831825256, "learning_rate": 1.814879564233749e-05, "loss": 0.07154445350170135, "step": 3492 }, { "epoch": 0.47223043709706036, "grad_norm": 0.45786207914352417, "learning_rate": 1.8142103389588567e-05, "loss": 0.05651085823774338, "step": 3493 }, { "epoch": 0.47236563046582564, "grad_norm": 1.2955033779144287, "learning_rate": 1.813541048263696e-05, "loss": 0.047192446887493134, "step": 3494 }, { "epoch": 0.4725008238345909, "grad_norm": 0.8994665741920471, "learning_rate": 1.8128716922876178e-05, "loss": 0.04672951251268387, "step": 3495 }, { "epoch": 0.4726360172033562, "grad_norm": 0.39284980297088623, "learning_rate": 1.812202271169984e-05, "loss": 0.047931570559740067, "step": 3496 }, { "epoch": 0.47277121057212146, "grad_norm": 1.5437575578689575, "learning_rate": 1.8115327850501726e-05, "loss": 0.06407858431339264, "step": 3497 }, { "epoch": 0.4729064039408867, "grad_norm": 0.5672667622566223, "learning_rate": 1.8108632340675746e-05, "loss": 0.04306498169898987, "step": 3498 }, { "epoch": 0.47304159730965195, "grad_norm": 1.1058542728424072, "learning_rate": 1.810193618361593e-05, "loss": 0.059718698263168335, "step": 3499 }, { "epoch": 0.4731767906784172, "grad_norm": 0.32095372676849365, "learning_rate": 1.8095239380716464e-05, "loss": 0.043613508343696594, "step": 3500 }, { "epoch": 0.4733119840471825, "grad_norm": 0.5730457305908203, "learning_rate": 1.808854193337165e-05, "loss": 0.0419449508190155, "step": 3501 }, { "epoch": 0.4734471774159478, "grad_norm": 1.6305323839187622, "learning_rate": 1.8081843842975935e-05, "loss": 0.06303280591964722, "step": 3502 }, { "epoch": 0.473582370784713, "grad_norm": 1.2520970106124878, "learning_rate": 1.80751451109239e-05, "loss": 0.0593019537627697, "step": 3503 }, { "epoch": 0.47371756415347827, "grad_norm": 1.0650655031204224, "learning_rate": 1.806844573861025e-05, "loss": 0.04477345570921898, "step": 3504 }, { "epoch": 0.47385275752224354, "grad_norm": 0.3829644024372101, "learning_rate": 1.806174572742984e-05, "loss": 0.06156153231859207, "step": 3505 }, { "epoch": 0.4739879508910088, "grad_norm": 0.6122543811798096, "learning_rate": 1.8055045078777634e-05, "loss": 0.03903737664222717, "step": 3506 }, { "epoch": 0.47412314425977403, "grad_norm": 0.40458688139915466, "learning_rate": 1.8048343794048762e-05, "loss": 0.0539078563451767, "step": 3507 }, { "epoch": 0.4742583376285393, "grad_norm": 0.933499276638031, "learning_rate": 1.8041641874638445e-05, "loss": 0.07325289398431778, "step": 3508 }, { "epoch": 0.4743935309973046, "grad_norm": 0.2896197736263275, "learning_rate": 1.8034939321942077e-05, "loss": 0.047521188855171204, "step": 3509 }, { "epoch": 0.47452872436606985, "grad_norm": 0.46743831038475037, "learning_rate": 1.8028236137355154e-05, "loss": 0.06093984842300415, "step": 3510 }, { "epoch": 0.47466391773483513, "grad_norm": 0.4030322730541229, "learning_rate": 1.8021532322273327e-05, "loss": 0.04286172240972519, "step": 3511 }, { "epoch": 0.47479911110360035, "grad_norm": 1.35879647731781, "learning_rate": 1.8014827878092347e-05, "loss": 0.05102885514497757, "step": 3512 }, { "epoch": 0.4749343044723656, "grad_norm": 0.5118727684020996, "learning_rate": 1.800812280620813e-05, "loss": 0.0731704980134964, "step": 3513 }, { "epoch": 0.4750694978411309, "grad_norm": 0.4939217269420624, "learning_rate": 1.80014171080167e-05, "loss": 0.0499170646071434, "step": 3514 }, { "epoch": 0.47520469120989617, "grad_norm": 0.29561617970466614, "learning_rate": 1.7994710784914227e-05, "loss": 0.051967136561870575, "step": 3515 }, { "epoch": 0.47533988457866144, "grad_norm": 0.5234993696212769, "learning_rate": 1.7988003838297e-05, "loss": 0.05290350317955017, "step": 3516 }, { "epoch": 0.47547507794742666, "grad_norm": 0.415535032749176, "learning_rate": 1.7981296269561438e-05, "loss": 0.05252787843346596, "step": 3517 }, { "epoch": 0.47561027131619193, "grad_norm": 0.31221282482147217, "learning_rate": 1.7974588080104095e-05, "loss": 0.055691197514534, "step": 3518 }, { "epoch": 0.4757454646849572, "grad_norm": 0.2609007954597473, "learning_rate": 1.7967879271321648e-05, "loss": 0.05249927192926407, "step": 3519 }, { "epoch": 0.4758806580537225, "grad_norm": 0.5697064995765686, "learning_rate": 1.7961169844610913e-05, "loss": 0.03712989389896393, "step": 3520 }, { "epoch": 0.4760158514224877, "grad_norm": 0.9957741498947144, "learning_rate": 1.795445980136883e-05, "loss": 0.061709798872470856, "step": 3521 }, { "epoch": 0.476151044791253, "grad_norm": 1.1795940399169922, "learning_rate": 1.794774914299245e-05, "loss": 0.058040887117385864, "step": 3522 }, { "epoch": 0.47628623816001825, "grad_norm": 0.8981585502624512, "learning_rate": 1.794103787087899e-05, "loss": 0.062032561749219894, "step": 3523 }, { "epoch": 0.4764214315287835, "grad_norm": 0.3013242781162262, "learning_rate": 1.7934325986425755e-05, "loss": 0.04807254672050476, "step": 3524 }, { "epoch": 0.4765566248975488, "grad_norm": 0.4229622185230255, "learning_rate": 1.7927613491030204e-05, "loss": 0.05180754512548447, "step": 3525 }, { "epoch": 0.476691818266314, "grad_norm": 0.39684975147247314, "learning_rate": 1.7920900386089913e-05, "loss": 0.042893849313259125, "step": 3526 }, { "epoch": 0.4768270116350793, "grad_norm": 0.374948114156723, "learning_rate": 1.7914186673002588e-05, "loss": 0.04052875190973282, "step": 3527 }, { "epoch": 0.47696220500384456, "grad_norm": 1.1256228685379028, "learning_rate": 1.790747235316605e-05, "loss": 0.06336495280265808, "step": 3528 }, { "epoch": 0.47709739837260984, "grad_norm": 1.4504151344299316, "learning_rate": 1.790075742797827e-05, "loss": 0.04924369975924492, "step": 3529 }, { "epoch": 0.4772325917413751, "grad_norm": 0.3167463541030884, "learning_rate": 1.789404189883732e-05, "loss": 0.05668431520462036, "step": 3530 }, { "epoch": 0.47736778511014033, "grad_norm": 0.6608914136886597, "learning_rate": 1.7887325767141415e-05, "loss": 0.05914923548698425, "step": 3531 }, { "epoch": 0.4775029784789056, "grad_norm": 0.16262947022914886, "learning_rate": 1.7880609034288894e-05, "loss": 0.03452926129102707, "step": 3532 }, { "epoch": 0.4776381718476709, "grad_norm": 0.38108453154563904, "learning_rate": 1.7873891701678208e-05, "loss": 0.0512351468205452, "step": 3533 }, { "epoch": 0.47777336521643615, "grad_norm": 0.43436264991760254, "learning_rate": 1.786717377070794e-05, "loss": 0.05606218799948692, "step": 3534 }, { "epoch": 0.47790855858520137, "grad_norm": 0.5354858040809631, "learning_rate": 1.7860455242776803e-05, "loss": 0.05875754728913307, "step": 3535 }, { "epoch": 0.47804375195396664, "grad_norm": 0.36154821515083313, "learning_rate": 1.7853736119283635e-05, "loss": 0.058581605553627014, "step": 3536 }, { "epoch": 0.4781789453227319, "grad_norm": 0.8720853924751282, "learning_rate": 1.7847016401627388e-05, "loss": 0.037343598902225494, "step": 3537 }, { "epoch": 0.4783141386914972, "grad_norm": 0.288687527179718, "learning_rate": 1.7840296091207144e-05, "loss": 0.04128693416714668, "step": 3538 }, { "epoch": 0.47844933206026247, "grad_norm": 0.23239724338054657, "learning_rate": 1.7833575189422107e-05, "loss": 0.04333505406975746, "step": 3539 }, { "epoch": 0.4785845254290277, "grad_norm": 0.40633338689804077, "learning_rate": 1.7826853697671604e-05, "loss": 0.05566149204969406, "step": 3540 }, { "epoch": 0.47871971879779296, "grad_norm": 0.8720612525939941, "learning_rate": 1.782013161735509e-05, "loss": 0.06611773371696472, "step": 3541 }, { "epoch": 0.47885491216655823, "grad_norm": 0.40848857164382935, "learning_rate": 1.781340894987213e-05, "loss": 0.05184454098343849, "step": 3542 }, { "epoch": 0.4789901055353235, "grad_norm": 0.29652848839759827, "learning_rate": 1.7806685696622426e-05, "loss": 0.04452505707740784, "step": 3543 }, { "epoch": 0.4791252989040888, "grad_norm": 0.7281109690666199, "learning_rate": 1.779996185900579e-05, "loss": 0.05375983566045761, "step": 3544 }, { "epoch": 0.479260492272854, "grad_norm": 0.790040135383606, "learning_rate": 1.7793237438422165e-05, "loss": 0.05115380883216858, "step": 3545 }, { "epoch": 0.4793956856416193, "grad_norm": 0.6497330665588379, "learning_rate": 1.7786512436271617e-05, "loss": 0.04705348610877991, "step": 3546 }, { "epoch": 0.47953087901038455, "grad_norm": 0.842193603515625, "learning_rate": 1.777978685395431e-05, "loss": 0.06490755081176758, "step": 3547 }, { "epoch": 0.4796660723791498, "grad_norm": 0.4795830547809601, "learning_rate": 1.7773060692870564e-05, "loss": 0.05699475109577179, "step": 3548 }, { "epoch": 0.47980126574791504, "grad_norm": 0.6608070135116577, "learning_rate": 1.7766333954420794e-05, "loss": 0.06021735072135925, "step": 3549 }, { "epoch": 0.4799364591166803, "grad_norm": 0.3167383670806885, "learning_rate": 1.775960664000554e-05, "loss": 0.061611637473106384, "step": 3550 }, { "epoch": 0.4800716524854456, "grad_norm": 2.2271599769592285, "learning_rate": 1.7752878751025463e-05, "loss": 0.07767148315906525, "step": 3551 }, { "epoch": 0.48020684585421086, "grad_norm": 0.7745353579521179, "learning_rate": 1.7746150288881352e-05, "loss": 0.057197004556655884, "step": 3552 }, { "epoch": 0.48034203922297614, "grad_norm": 0.6138405203819275, "learning_rate": 1.7739421254974114e-05, "loss": 0.06160023435950279, "step": 3553 }, { "epoch": 0.48047723259174135, "grad_norm": 0.50901859998703, "learning_rate": 1.7732691650704756e-05, "loss": 0.06081277132034302, "step": 3554 }, { "epoch": 0.48061242596050663, "grad_norm": 0.5683940649032593, "learning_rate": 1.7725961477474423e-05, "loss": 0.05153261125087738, "step": 3555 }, { "epoch": 0.4807476193292719, "grad_norm": 0.5863044857978821, "learning_rate": 1.7719230736684375e-05, "loss": 0.05348905920982361, "step": 3556 }, { "epoch": 0.4808828126980372, "grad_norm": 0.4014899730682373, "learning_rate": 1.771249942973599e-05, "loss": 0.03674754127860069, "step": 3557 }, { "epoch": 0.48101800606680245, "grad_norm": 0.3008640706539154, "learning_rate": 1.7705767558030756e-05, "loss": 0.05308730900287628, "step": 3558 }, { "epoch": 0.48115319943556767, "grad_norm": 0.6073523759841919, "learning_rate": 1.769903512297029e-05, "loss": 0.04392029717564583, "step": 3559 }, { "epoch": 0.48128839280433294, "grad_norm": 0.5493504405021667, "learning_rate": 1.7692302125956315e-05, "loss": 0.04186969995498657, "step": 3560 }, { "epoch": 0.4814235861730982, "grad_norm": 0.5778635740280151, "learning_rate": 1.768556856839068e-05, "loss": 0.06150808930397034, "step": 3561 }, { "epoch": 0.4815587795418635, "grad_norm": 0.7606193423271179, "learning_rate": 1.767883445167535e-05, "loss": 0.059654057025909424, "step": 3562 }, { "epoch": 0.4816939729106287, "grad_norm": 0.7920559048652649, "learning_rate": 1.7672099777212398e-05, "loss": 0.0523810088634491, "step": 3563 }, { "epoch": 0.481829166279394, "grad_norm": 1.0121209621429443, "learning_rate": 1.7665364546404034e-05, "loss": 0.061217568814754486, "step": 3564 }, { "epoch": 0.48196435964815926, "grad_norm": 0.7576692700386047, "learning_rate": 1.7658628760652548e-05, "loss": 0.040264010429382324, "step": 3565 }, { "epoch": 0.48209955301692453, "grad_norm": 0.7712067365646362, "learning_rate": 1.765189242136038e-05, "loss": 0.048045091331005096, "step": 3566 }, { "epoch": 0.4822347463856898, "grad_norm": 0.3825013339519501, "learning_rate": 1.7645155529930065e-05, "loss": 0.059248507022857666, "step": 3567 }, { "epoch": 0.482369939754455, "grad_norm": 0.4226016104221344, "learning_rate": 1.763841808776426e-05, "loss": 0.03228508681058884, "step": 3568 }, { "epoch": 0.4825051331232203, "grad_norm": 1.3603301048278809, "learning_rate": 1.763168009626575e-05, "loss": 0.057688385248184204, "step": 3569 }, { "epoch": 0.48264032649198557, "grad_norm": 1.6420178413391113, "learning_rate": 1.7624941556837406e-05, "loss": 0.05993642657995224, "step": 3570 }, { "epoch": 0.48277551986075085, "grad_norm": 0.24214904010295868, "learning_rate": 1.7618202470882233e-05, "loss": 0.054488539695739746, "step": 3571 }, { "epoch": 0.4829107132295161, "grad_norm": 0.23728974163532257, "learning_rate": 1.7611462839803336e-05, "loss": 0.0469437837600708, "step": 3572 }, { "epoch": 0.48304590659828134, "grad_norm": 0.31635594367980957, "learning_rate": 1.760472266500396e-05, "loss": 0.05480599403381348, "step": 3573 }, { "epoch": 0.4831810999670466, "grad_norm": 0.43468227982521057, "learning_rate": 1.759798194788743e-05, "loss": 0.04596111923456192, "step": 3574 }, { "epoch": 0.4833162933358119, "grad_norm": 0.3930162191390991, "learning_rate": 1.75912406898572e-05, "loss": 0.0596642941236496, "step": 3575 }, { "epoch": 0.48345148670457716, "grad_norm": 0.34544721245765686, "learning_rate": 1.758449889231685e-05, "loss": 0.03439716994762421, "step": 3576 }, { "epoch": 0.4835866800733424, "grad_norm": 0.4070488512516022, "learning_rate": 1.757775655667004e-05, "loss": 0.04105016589164734, "step": 3577 }, { "epoch": 0.48372187344210765, "grad_norm": 0.8999466300010681, "learning_rate": 1.757101368432057e-05, "loss": 0.06334449350833893, "step": 3578 }, { "epoch": 0.4838570668108729, "grad_norm": 0.6146352291107178, "learning_rate": 1.7564270276672343e-05, "loss": 0.06370649486780167, "step": 3579 }, { "epoch": 0.4839922601796382, "grad_norm": 0.3868604898452759, "learning_rate": 1.7557526335129372e-05, "loss": 0.05511938035488129, "step": 3580 }, { "epoch": 0.4841274535484035, "grad_norm": 0.4834255278110504, "learning_rate": 1.7550781861095774e-05, "loss": 0.04977622628211975, "step": 3581 }, { "epoch": 0.4842626469171687, "grad_norm": 0.8302196860313416, "learning_rate": 1.754403685597579e-05, "loss": 0.05525088310241699, "step": 3582 }, { "epoch": 0.48439784028593397, "grad_norm": 0.9061380624771118, "learning_rate": 1.7537291321173773e-05, "loss": 0.05217995494604111, "step": 3583 }, { "epoch": 0.48453303365469924, "grad_norm": 0.27826252579689026, "learning_rate": 1.7530545258094165e-05, "loss": 0.044787466526031494, "step": 3584 }, { "epoch": 0.4846682270234645, "grad_norm": 1.808152437210083, "learning_rate": 1.7523798668141548e-05, "loss": 0.0737103819847107, "step": 3585 }, { "epoch": 0.48480342039222973, "grad_norm": 0.47455063462257385, "learning_rate": 1.7517051552720584e-05, "loss": 0.058237701654434204, "step": 3586 }, { "epoch": 0.484938613760995, "grad_norm": 0.47527530789375305, "learning_rate": 1.7510303913236066e-05, "loss": 0.06245797872543335, "step": 3587 }, { "epoch": 0.4850738071297603, "grad_norm": 0.37395185232162476, "learning_rate": 1.7503555751092883e-05, "loss": 0.05553579330444336, "step": 3588 }, { "epoch": 0.48520900049852556, "grad_norm": 0.4764285683631897, "learning_rate": 1.7496807067696046e-05, "loss": 0.0616372674703598, "step": 3589 }, { "epoch": 0.48534419386729083, "grad_norm": 0.3947109282016754, "learning_rate": 1.7490057864450665e-05, "loss": 0.0552312433719635, "step": 3590 }, { "epoch": 0.48547938723605605, "grad_norm": 0.6810263395309448, "learning_rate": 1.748330814276195e-05, "loss": 0.06639324128627777, "step": 3591 }, { "epoch": 0.4856145806048213, "grad_norm": 0.37617674469947815, "learning_rate": 1.7476557904035243e-05, "loss": 0.054632678627967834, "step": 3592 }, { "epoch": 0.4857497739735866, "grad_norm": 0.5433797240257263, "learning_rate": 1.7469807149675973e-05, "loss": 0.060484692454338074, "step": 3593 }, { "epoch": 0.48588496734235187, "grad_norm": 1.147133469581604, "learning_rate": 1.7463055881089685e-05, "loss": 0.061211928725242615, "step": 3594 }, { "epoch": 0.48602016071111714, "grad_norm": 0.4189985692501068, "learning_rate": 1.7456304099682024e-05, "loss": 0.06052279472351074, "step": 3595 }, { "epoch": 0.48615535407988236, "grad_norm": 0.24788349866867065, "learning_rate": 1.7449551806858756e-05, "loss": 0.03750964254140854, "step": 3596 }, { "epoch": 0.48629054744864764, "grad_norm": 0.41173654794692993, "learning_rate": 1.7442799004025733e-05, "loss": 0.05764399468898773, "step": 3597 }, { "epoch": 0.4864257408174129, "grad_norm": 0.2908349633216858, "learning_rate": 1.7436045692588934e-05, "loss": 0.041406065225601196, "step": 3598 }, { "epoch": 0.4865609341861782, "grad_norm": 1.3618172407150269, "learning_rate": 1.742929187395443e-05, "loss": 0.06943368911743164, "step": 3599 }, { "epoch": 0.4866961275549434, "grad_norm": 0.31823453307151794, "learning_rate": 1.7422537549528402e-05, "loss": 0.04396023601293564, "step": 3600 }, { "epoch": 0.4868313209237087, "grad_norm": 0.3217940628528595, "learning_rate": 1.7415782720717147e-05, "loss": 0.05544228479266167, "step": 3601 }, { "epoch": 0.48696651429247395, "grad_norm": 0.4987863600254059, "learning_rate": 1.740902738892704e-05, "loss": 0.04933873564004898, "step": 3602 }, { "epoch": 0.4871017076612392, "grad_norm": 0.4408535361289978, "learning_rate": 1.7402271555564585e-05, "loss": 0.06837975978851318, "step": 3603 }, { "epoch": 0.4872369010300045, "grad_norm": 0.6852837204933167, "learning_rate": 1.739551522203638e-05, "loss": 0.05049797147512436, "step": 3604 }, { "epoch": 0.4873720943987697, "grad_norm": 0.8003811240196228, "learning_rate": 1.738875838974913e-05, "loss": 0.07135294377803802, "step": 3605 }, { "epoch": 0.487507287767535, "grad_norm": 0.27592015266418457, "learning_rate": 1.7382001060109652e-05, "loss": 0.028982965275645256, "step": 3606 }, { "epoch": 0.48764248113630027, "grad_norm": 0.64210444688797, "learning_rate": 1.7375243234524843e-05, "loss": 0.0634416788816452, "step": 3607 }, { "epoch": 0.48777767450506554, "grad_norm": 0.31281939148902893, "learning_rate": 1.736848491440173e-05, "loss": 0.046354249119758606, "step": 3608 }, { "epoch": 0.4879128678738308, "grad_norm": 0.71690833568573, "learning_rate": 1.7361726101147424e-05, "loss": 0.056982751935720444, "step": 3609 }, { "epoch": 0.48804806124259603, "grad_norm": 0.783579409122467, "learning_rate": 1.7354966796169157e-05, "loss": 0.05837477743625641, "step": 3610 }, { "epoch": 0.4881832546113613, "grad_norm": 0.5557472109794617, "learning_rate": 1.734820700087424e-05, "loss": 0.06476366519927979, "step": 3611 }, { "epoch": 0.4883184479801266, "grad_norm": 0.46474185585975647, "learning_rate": 1.7341446716670103e-05, "loss": 0.0403604730963707, "step": 3612 }, { "epoch": 0.48845364134889185, "grad_norm": 0.9985072612762451, "learning_rate": 1.7334685944964272e-05, "loss": 0.05111198127269745, "step": 3613 }, { "epoch": 0.4885888347176571, "grad_norm": 0.4394057095050812, "learning_rate": 1.732792468716438e-05, "loss": 0.058096371591091156, "step": 3614 }, { "epoch": 0.48872402808642235, "grad_norm": 0.5287598371505737, "learning_rate": 1.7321162944678155e-05, "loss": 0.0497131384909153, "step": 3615 }, { "epoch": 0.4888592214551876, "grad_norm": 1.235806941986084, "learning_rate": 1.7314400718913425e-05, "loss": 0.06044672429561615, "step": 3616 }, { "epoch": 0.4889944148239529, "grad_norm": 0.39756840467453003, "learning_rate": 1.7307638011278126e-05, "loss": 0.051515936851501465, "step": 3617 }, { "epoch": 0.48912960819271817, "grad_norm": 0.3148517906665802, "learning_rate": 1.7300874823180284e-05, "loss": 0.048752591013908386, "step": 3618 }, { "epoch": 0.4892648015614834, "grad_norm": 0.7498884797096252, "learning_rate": 1.7294111156028034e-05, "loss": 0.06937260180711746, "step": 3619 }, { "epoch": 0.48939999493024866, "grad_norm": 0.5027851462364197, "learning_rate": 1.7287347011229605e-05, "loss": 0.04623818397521973, "step": 3620 }, { "epoch": 0.48953518829901393, "grad_norm": 0.8526375889778137, "learning_rate": 1.7280582390193333e-05, "loss": 0.05940094590187073, "step": 3621 }, { "epoch": 0.4896703816677792, "grad_norm": 0.5256524085998535, "learning_rate": 1.7273817294327653e-05, "loss": 0.037637438625097275, "step": 3622 }, { "epoch": 0.4898055750365445, "grad_norm": 0.5717564225196838, "learning_rate": 1.726705172504108e-05, "loss": 0.0569169819355011, "step": 3623 }, { "epoch": 0.4899407684053097, "grad_norm": 0.7404683232307434, "learning_rate": 1.7260285683742248e-05, "loss": 0.03827935457229614, "step": 3624 }, { "epoch": 0.490075961774075, "grad_norm": 0.5484645366668701, "learning_rate": 1.7253519171839883e-05, "loss": 0.03865198791027069, "step": 3625 }, { "epoch": 0.49021115514284025, "grad_norm": 1.9374899864196777, "learning_rate": 1.724675219074281e-05, "loss": 0.05823603272438049, "step": 3626 }, { "epoch": 0.4903463485116055, "grad_norm": 0.561532199382782, "learning_rate": 1.7239984741859957e-05, "loss": 0.048579275608062744, "step": 3627 }, { "epoch": 0.49048154188037074, "grad_norm": 0.9890323281288147, "learning_rate": 1.7233216826600324e-05, "loss": 0.05866961181163788, "step": 3628 }, { "epoch": 0.490616735249136, "grad_norm": 0.6588320136070251, "learning_rate": 1.7226448446373047e-05, "loss": 0.06463548541069031, "step": 3629 }, { "epoch": 0.4907519286179013, "grad_norm": 0.3036702573299408, "learning_rate": 1.7219679602587326e-05, "loss": 0.04972761869430542, "step": 3630 }, { "epoch": 0.49088712198666656, "grad_norm": 0.507706880569458, "learning_rate": 1.7212910296652476e-05, "loss": 0.0412788987159729, "step": 3631 }, { "epoch": 0.49102231535543184, "grad_norm": 1.1250947713851929, "learning_rate": 1.7206140529977904e-05, "loss": 0.05426212400197983, "step": 3632 }, { "epoch": 0.49115750872419706, "grad_norm": 0.5611497759819031, "learning_rate": 1.719937030397311e-05, "loss": 0.06120358407497406, "step": 3633 }, { "epoch": 0.49129270209296233, "grad_norm": 1.639595627784729, "learning_rate": 1.7192599620047683e-05, "loss": 0.07729215919971466, "step": 3634 }, { "epoch": 0.4914278954617276, "grad_norm": 1.4780056476593018, "learning_rate": 1.718582847961133e-05, "loss": 0.06893113255500793, "step": 3635 }, { "epoch": 0.4915630888304929, "grad_norm": 0.35439521074295044, "learning_rate": 1.7179056884073826e-05, "loss": 0.0451018288731575, "step": 3636 }, { "epoch": 0.49169828219925815, "grad_norm": 0.9456911683082581, "learning_rate": 1.717228483484506e-05, "loss": 0.06447529792785645, "step": 3637 }, { "epoch": 0.49183347556802337, "grad_norm": 0.49770087003707886, "learning_rate": 1.7165512333335013e-05, "loss": 0.04016174376010895, "step": 3638 }, { "epoch": 0.49196866893678864, "grad_norm": 0.4902314841747284, "learning_rate": 1.715873938095374e-05, "loss": 0.05016827583312988, "step": 3639 }, { "epoch": 0.4921038623055539, "grad_norm": 0.7114960551261902, "learning_rate": 1.7151965979111427e-05, "loss": 0.05821014940738678, "step": 3640 }, { "epoch": 0.4922390556743192, "grad_norm": 0.5455580353736877, "learning_rate": 1.7145192129218313e-05, "loss": 0.0648580938577652, "step": 3641 }, { "epoch": 0.4923742490430844, "grad_norm": 0.5809600949287415, "learning_rate": 1.7138417832684763e-05, "loss": 0.06623226404190063, "step": 3642 }, { "epoch": 0.4925094424118497, "grad_norm": 0.43233659863471985, "learning_rate": 1.7131643090921216e-05, "loss": 0.07102841138839722, "step": 3643 }, { "epoch": 0.49264463578061496, "grad_norm": 0.4675983786582947, "learning_rate": 1.712486790533821e-05, "loss": 0.058873116970062256, "step": 3644 }, { "epoch": 0.49277982914938023, "grad_norm": 0.7671048641204834, "learning_rate": 1.7118092277346372e-05, "loss": 0.06054091453552246, "step": 3645 }, { "epoch": 0.4929150225181455, "grad_norm": 1.271550178527832, "learning_rate": 1.7111316208356428e-05, "loss": 0.05534249544143677, "step": 3646 }, { "epoch": 0.4930502158869107, "grad_norm": 0.302577942609787, "learning_rate": 1.7104539699779192e-05, "loss": 0.04478336125612259, "step": 3647 }, { "epoch": 0.493185409255676, "grad_norm": 0.3643266558647156, "learning_rate": 1.709776275302557e-05, "loss": 0.054196715354919434, "step": 3648 }, { "epoch": 0.4933206026244413, "grad_norm": 0.8454395532608032, "learning_rate": 1.7090985369506555e-05, "loss": 0.06105788052082062, "step": 3649 }, { "epoch": 0.49345579599320655, "grad_norm": 0.4101276695728302, "learning_rate": 1.708420755063323e-05, "loss": 0.03762774169445038, "step": 3650 }, { "epoch": 0.4935909893619718, "grad_norm": 0.3875649869441986, "learning_rate": 1.707742929781678e-05, "loss": 0.05809156596660614, "step": 3651 }, { "epoch": 0.49372618273073704, "grad_norm": 0.40271130204200745, "learning_rate": 1.707065061246848e-05, "loss": 0.05698806047439575, "step": 3652 }, { "epoch": 0.4938613760995023, "grad_norm": 0.8993547558784485, "learning_rate": 1.7063871495999677e-05, "loss": 0.07150521874427795, "step": 3653 }, { "epoch": 0.4939965694682676, "grad_norm": 1.4272431135177612, "learning_rate": 1.705709194982182e-05, "loss": 0.045105963945388794, "step": 3654 }, { "epoch": 0.49413176283703286, "grad_norm": 1.2940682172775269, "learning_rate": 1.7050311975346447e-05, "loss": 0.07005545496940613, "step": 3655 }, { "epoch": 0.4942669562057981, "grad_norm": 1.3660322427749634, "learning_rate": 1.704353157398519e-05, "loss": 0.06419873237609863, "step": 3656 }, { "epoch": 0.49440214957456335, "grad_norm": 0.4984577000141144, "learning_rate": 1.7036750747149764e-05, "loss": 0.06574180722236633, "step": 3657 }, { "epoch": 0.49453734294332863, "grad_norm": 0.4779943525791168, "learning_rate": 1.702996949625197e-05, "loss": 0.06674687564373016, "step": 3658 }, { "epoch": 0.4946725363120939, "grad_norm": 0.3778025805950165, "learning_rate": 1.7023187822703702e-05, "loss": 0.04428689926862717, "step": 3659 }, { "epoch": 0.4948077296808592, "grad_norm": 0.43517693877220154, "learning_rate": 1.7016405727916936e-05, "loss": 0.05122418701648712, "step": 3660 }, { "epoch": 0.4949429230496244, "grad_norm": 0.8824998736381531, "learning_rate": 1.700962321330375e-05, "loss": 0.05420784652233124, "step": 3661 }, { "epoch": 0.49507811641838967, "grad_norm": 0.4531822204589844, "learning_rate": 1.700284028027629e-05, "loss": 0.055697277188301086, "step": 3662 }, { "epoch": 0.49521330978715494, "grad_norm": 0.9987421035766602, "learning_rate": 1.6996056930246807e-05, "loss": 0.06256911158561707, "step": 3663 }, { "epoch": 0.4953485031559202, "grad_norm": 1.1916788816452026, "learning_rate": 1.6989273164627626e-05, "loss": 0.04673158377408981, "step": 3664 }, { "epoch": 0.4954836965246855, "grad_norm": 0.9413378238677979, "learning_rate": 1.6982488984831163e-05, "loss": 0.04367947578430176, "step": 3665 }, { "epoch": 0.4956188898934507, "grad_norm": 0.5563457608222961, "learning_rate": 1.697570439226992e-05, "loss": 0.053668826818466187, "step": 3666 }, { "epoch": 0.495754083262216, "grad_norm": 0.5396509766578674, "learning_rate": 1.6968919388356486e-05, "loss": 0.060034189373254776, "step": 3667 }, { "epoch": 0.49588927663098126, "grad_norm": 0.4821469485759735, "learning_rate": 1.696213397450354e-05, "loss": 0.05835293233394623, "step": 3668 }, { "epoch": 0.49602446999974653, "grad_norm": 0.758277177810669, "learning_rate": 1.695534815212384e-05, "loss": 0.06622427701950073, "step": 3669 }, { "epoch": 0.49615966336851175, "grad_norm": 0.3500867486000061, "learning_rate": 1.6948561922630223e-05, "loss": 0.030244916677474976, "step": 3670 }, { "epoch": 0.496294856737277, "grad_norm": 0.42022255063056946, "learning_rate": 1.694177528743562e-05, "loss": 0.056450068950653076, "step": 3671 }, { "epoch": 0.4964300501060423, "grad_norm": 0.3428938686847687, "learning_rate": 1.6934988247953053e-05, "loss": 0.034330885857343674, "step": 3672 }, { "epoch": 0.49656524347480757, "grad_norm": 0.9088848829269409, "learning_rate": 1.6928200805595606e-05, "loss": 0.049365729093551636, "step": 3673 }, { "epoch": 0.49670043684357285, "grad_norm": 1.6058392524719238, "learning_rate": 1.6921412961776475e-05, "loss": 0.06689521670341492, "step": 3674 }, { "epoch": 0.49683563021233806, "grad_norm": 0.7300386428833008, "learning_rate": 1.6914624717908922e-05, "loss": 0.061847418546676636, "step": 3675 }, { "epoch": 0.49697082358110334, "grad_norm": 1.1861059665679932, "learning_rate": 1.6907836075406288e-05, "loss": 0.04366448521614075, "step": 3676 }, { "epoch": 0.4971060169498686, "grad_norm": 0.352228045463562, "learning_rate": 1.690104703568201e-05, "loss": 0.06551776826381683, "step": 3677 }, { "epoch": 0.4972412103186339, "grad_norm": 0.4220818877220154, "learning_rate": 1.68942576001496e-05, "loss": 0.06559351086616516, "step": 3678 }, { "epoch": 0.49737640368739916, "grad_norm": 0.4009663462638855, "learning_rate": 1.6887467770222658e-05, "loss": 0.04567134380340576, "step": 3679 }, { "epoch": 0.4975115970561644, "grad_norm": 1.063888430595398, "learning_rate": 1.6880677547314865e-05, "loss": 0.05458955094218254, "step": 3680 }, { "epoch": 0.49764679042492965, "grad_norm": 0.7614601254463196, "learning_rate": 1.6873886932839973e-05, "loss": 0.0724869966506958, "step": 3681 }, { "epoch": 0.4977819837936949, "grad_norm": 0.31583043932914734, "learning_rate": 1.686709592821183e-05, "loss": 0.04564934968948364, "step": 3682 }, { "epoch": 0.4979171771624602, "grad_norm": 1.0900447368621826, "learning_rate": 1.6860304534844355e-05, "loss": 0.060673344880342484, "step": 3683 }, { "epoch": 0.4980523705312254, "grad_norm": 0.40941131114959717, "learning_rate": 1.6853512754151556e-05, "loss": 0.04743904620409012, "step": 3684 }, { "epoch": 0.4981875638999907, "grad_norm": 0.26381832361221313, "learning_rate": 1.684672058754752e-05, "loss": 0.05201912671327591, "step": 3685 }, { "epoch": 0.49832275726875597, "grad_norm": 1.4613826274871826, "learning_rate": 1.6839928036446416e-05, "loss": 0.05368952080607414, "step": 3686 }, { "epoch": 0.49845795063752124, "grad_norm": 1.019508957862854, "learning_rate": 1.6833135102262473e-05, "loss": 0.05993691086769104, "step": 3687 }, { "epoch": 0.4985931440062865, "grad_norm": 0.7055081129074097, "learning_rate": 1.682634178641003e-05, "loss": 0.07027196884155273, "step": 3688 }, { "epoch": 0.49872833737505173, "grad_norm": 1.317682147026062, "learning_rate": 1.6819548090303485e-05, "loss": 0.055247001349925995, "step": 3689 }, { "epoch": 0.498863530743817, "grad_norm": 1.383270263671875, "learning_rate": 1.6812754015357328e-05, "loss": 0.059474170207977295, "step": 3690 }, { "epoch": 0.4989987241125823, "grad_norm": 0.5482416152954102, "learning_rate": 1.680595956298612e-05, "loss": 0.056713249534368515, "step": 3691 }, { "epoch": 0.49913391748134756, "grad_norm": 0.8580617904663086, "learning_rate": 1.6799164734604497e-05, "loss": 0.052371665835380554, "step": 3692 }, { "epoch": 0.49926911085011283, "grad_norm": 0.7360714077949524, "learning_rate": 1.6792369531627186e-05, "loss": 0.060470789670944214, "step": 3693 }, { "epoch": 0.49940430421887805, "grad_norm": 0.5085428953170776, "learning_rate": 1.6785573955468974e-05, "loss": 0.035801105201244354, "step": 3694 }, { "epoch": 0.4995394975876433, "grad_norm": 0.5546193718910217, "learning_rate": 1.6778778007544745e-05, "loss": 0.058577146381139755, "step": 3695 }, { "epoch": 0.4996746909564086, "grad_norm": 0.5918977856636047, "learning_rate": 1.6771981689269452e-05, "loss": 0.047676801681518555, "step": 3696 }, { "epoch": 0.49980988432517387, "grad_norm": 0.5465928316116333, "learning_rate": 1.6765185002058123e-05, "loss": 0.05425170063972473, "step": 3697 }, { "epoch": 0.4999450776939391, "grad_norm": 0.28634053468704224, "learning_rate": 1.6758387947325856e-05, "loss": 0.04169359430670738, "step": 3698 }, { "epoch": 0.5000802710627044, "grad_norm": 1.013948917388916, "learning_rate": 1.6751590526487843e-05, "loss": 0.058259516954422, "step": 3699 }, { "epoch": 0.5002154644314697, "grad_norm": 0.4457032084465027, "learning_rate": 1.6744792740959347e-05, "loss": 0.06493023037910461, "step": 3700 }, { "epoch": 0.5003506578002349, "grad_norm": 0.762586772441864, "learning_rate": 1.6737994592155697e-05, "loss": 0.06665882468223572, "step": 3701 }, { "epoch": 0.5004858511690001, "grad_norm": 0.6518793702125549, "learning_rate": 1.6731196081492307e-05, "loss": 0.0613759383559227, "step": 3702 }, { "epoch": 0.5006210445377655, "grad_norm": 0.31194958090782166, "learning_rate": 1.6724397210384655e-05, "loss": 0.044153835624456406, "step": 3703 }, { "epoch": 0.5007562379065307, "grad_norm": 1.887346863746643, "learning_rate": 1.6717597980248308e-05, "loss": 0.06944090127944946, "step": 3704 }, { "epoch": 0.5008914312752959, "grad_norm": 1.396126389503479, "learning_rate": 1.6710798392498904e-05, "loss": 0.06309475004673004, "step": 3705 }, { "epoch": 0.5010266246440612, "grad_norm": 0.3908338248729706, "learning_rate": 1.6703998448552154e-05, "loss": 0.05575186014175415, "step": 3706 }, { "epoch": 0.5011618180128264, "grad_norm": 0.3825208842754364, "learning_rate": 1.669719814982384e-05, "loss": 0.055212318897247314, "step": 3707 }, { "epoch": 0.5012970113815918, "grad_norm": 0.24458405375480652, "learning_rate": 1.6690397497729818e-05, "loss": 0.042881518602371216, "step": 3708 }, { "epoch": 0.501432204750357, "grad_norm": 0.3589680790901184, "learning_rate": 1.6683596493686028e-05, "loss": 0.039042823016643524, "step": 3709 }, { "epoch": 0.5015673981191222, "grad_norm": 0.7032709121704102, "learning_rate": 1.667679513910846e-05, "loss": 0.05372938513755798, "step": 3710 }, { "epoch": 0.5017025914878875, "grad_norm": 0.5207394957542419, "learning_rate": 1.666999343541321e-05, "loss": 0.06177643686532974, "step": 3711 }, { "epoch": 0.5018377848566528, "grad_norm": 1.1613737344741821, "learning_rate": 1.6663191384016422e-05, "loss": 0.05667185038328171, "step": 3712 }, { "epoch": 0.5019729782254181, "grad_norm": 0.5407153964042664, "learning_rate": 1.6656388986334315e-05, "loss": 0.04644760861992836, "step": 3713 }, { "epoch": 0.5021081715941833, "grad_norm": 0.8264970183372498, "learning_rate": 1.6649586243783186e-05, "loss": 0.0530182421207428, "step": 3714 }, { "epoch": 0.5022433649629485, "grad_norm": 0.7608914971351624, "learning_rate": 1.6642783157779405e-05, "loss": 0.05709357559680939, "step": 3715 }, { "epoch": 0.5023785583317139, "grad_norm": 0.48501965403556824, "learning_rate": 1.6635979729739417e-05, "loss": 0.08360063284635544, "step": 3716 }, { "epoch": 0.5025137517004791, "grad_norm": 0.4754328727722168, "learning_rate": 1.662917596107972e-05, "loss": 0.05376851558685303, "step": 3717 }, { "epoch": 0.5026489450692444, "grad_norm": 0.9103453755378723, "learning_rate": 1.6622371853216904e-05, "loss": 0.050147056579589844, "step": 3718 }, { "epoch": 0.5027841384380096, "grad_norm": 1.55764901638031, "learning_rate": 1.661556740756761e-05, "loss": 0.06033648923039436, "step": 3719 }, { "epoch": 0.5029193318067748, "grad_norm": 0.27574923634529114, "learning_rate": 1.6608762625548572e-05, "loss": 0.06433028727769852, "step": 3720 }, { "epoch": 0.5030545251755402, "grad_norm": 0.7348741292953491, "learning_rate": 1.6601957508576573e-05, "loss": 0.04384501278400421, "step": 3721 }, { "epoch": 0.5031897185443054, "grad_norm": 1.7975655794143677, "learning_rate": 1.659515205806848e-05, "loss": 0.04705321788787842, "step": 3722 }, { "epoch": 0.5033249119130707, "grad_norm": 0.2675785422325134, "learning_rate": 1.6588346275441224e-05, "loss": 0.04868397116661072, "step": 3723 }, { "epoch": 0.5034601052818359, "grad_norm": 1.0152156352996826, "learning_rate": 1.65815401621118e-05, "loss": 0.052325956523418427, "step": 3724 }, { "epoch": 0.5035952986506012, "grad_norm": 0.5642540454864502, "learning_rate": 1.657473371949728e-05, "loss": 0.05865258723497391, "step": 3725 }, { "epoch": 0.5037304920193665, "grad_norm": 0.4270670413970947, "learning_rate": 1.6567926949014805e-05, "loss": 0.04334026575088501, "step": 3726 }, { "epoch": 0.5038656853881317, "grad_norm": 0.9907171130180359, "learning_rate": 1.6561119852081574e-05, "loss": 0.056989990174770355, "step": 3727 }, { "epoch": 0.5040008787568969, "grad_norm": 0.619630753993988, "learning_rate": 1.6554312430114868e-05, "loss": 0.06310467422008514, "step": 3728 }, { "epoch": 0.5041360721256622, "grad_norm": 0.35960811376571655, "learning_rate": 1.6547504684532026e-05, "loss": 0.03643840551376343, "step": 3729 }, { "epoch": 0.5042712654944275, "grad_norm": 0.6705995202064514, "learning_rate": 1.6540696616750454e-05, "loss": 0.06269891560077667, "step": 3730 }, { "epoch": 0.5044064588631928, "grad_norm": 0.42261171340942383, "learning_rate": 1.6533888228187628e-05, "loss": 0.04221126437187195, "step": 3731 }, { "epoch": 0.504541652231958, "grad_norm": 0.36732491850852966, "learning_rate": 1.6527079520261103e-05, "loss": 0.05267505347728729, "step": 3732 }, { "epoch": 0.5046768456007232, "grad_norm": 0.43220579624176025, "learning_rate": 1.6520270494388472e-05, "loss": 0.047395966947078705, "step": 3733 }, { "epoch": 0.5048120389694886, "grad_norm": 1.1709681749343872, "learning_rate": 1.6513461151987418e-05, "loss": 0.04951769858598709, "step": 3734 }, { "epoch": 0.5049472323382538, "grad_norm": 0.6413924098014832, "learning_rate": 1.6506651494475678e-05, "loss": 0.06443445384502411, "step": 3735 }, { "epoch": 0.5050824257070191, "grad_norm": 0.7721502780914307, "learning_rate": 1.6499841523271062e-05, "loss": 0.07294841855764389, "step": 3736 }, { "epoch": 0.5052176190757843, "grad_norm": 0.47612032294273376, "learning_rate": 1.649303123979145e-05, "loss": 0.053695209324359894, "step": 3737 }, { "epoch": 0.5053528124445495, "grad_norm": 0.4241267144680023, "learning_rate": 1.648622064545477e-05, "loss": 0.07089121639728546, "step": 3738 }, { "epoch": 0.5054880058133149, "grad_norm": 0.41727402806282043, "learning_rate": 1.6479409741679025e-05, "loss": 0.04553259164094925, "step": 3739 }, { "epoch": 0.5056231991820801, "grad_norm": 0.7261723279953003, "learning_rate": 1.6472598529882277e-05, "loss": 0.04759795963764191, "step": 3740 }, { "epoch": 0.5057583925508454, "grad_norm": 0.4641232490539551, "learning_rate": 1.646578701148267e-05, "loss": 0.05426902323961258, "step": 3741 }, { "epoch": 0.5058935859196106, "grad_norm": 0.331417441368103, "learning_rate": 1.6458975187898384e-05, "loss": 0.04891033470630646, "step": 3742 }, { "epoch": 0.5060287792883759, "grad_norm": 0.8553628325462341, "learning_rate": 1.6452163060547687e-05, "loss": 0.060906924307346344, "step": 3743 }, { "epoch": 0.5061639726571412, "grad_norm": 0.38397231698036194, "learning_rate": 1.64453506308489e-05, "loss": 0.06193387508392334, "step": 3744 }, { "epoch": 0.5062991660259064, "grad_norm": 0.5434215068817139, "learning_rate": 1.64385379002204e-05, "loss": 0.04837833344936371, "step": 3745 }, { "epoch": 0.5064343593946717, "grad_norm": 0.427910715341568, "learning_rate": 1.643172487008064e-05, "loss": 0.05249765142798424, "step": 3746 }, { "epoch": 0.506569552763437, "grad_norm": 0.6468247175216675, "learning_rate": 1.6424911541848124e-05, "loss": 0.06334252655506134, "step": 3747 }, { "epoch": 0.5067047461322022, "grad_norm": 0.33395493030548096, "learning_rate": 1.641809791694143e-05, "loss": 0.039932504296302795, "step": 3748 }, { "epoch": 0.5068399395009675, "grad_norm": 0.9206994771957397, "learning_rate": 1.6411283996779184e-05, "loss": 0.06204662472009659, "step": 3749 }, { "epoch": 0.5069751328697327, "grad_norm": 0.6091291308403015, "learning_rate": 1.6404469782780088e-05, "loss": 0.06372246891260147, "step": 3750 }, { "epoch": 0.507110326238498, "grad_norm": 0.9094211459159851, "learning_rate": 1.639765527636289e-05, "loss": 0.03974538296461105, "step": 3751 }, { "epoch": 0.5072455196072633, "grad_norm": 0.4258108139038086, "learning_rate": 1.639084047894641e-05, "loss": 0.05518403649330139, "step": 3752 }, { "epoch": 0.5073807129760285, "grad_norm": 0.5820168256759644, "learning_rate": 1.638402539194953e-05, "loss": 0.07280158996582031, "step": 3753 }, { "epoch": 0.5075159063447938, "grad_norm": 0.6401300430297852, "learning_rate": 1.6377210016791182e-05, "loss": 0.05840405076742172, "step": 3754 }, { "epoch": 0.507651099713559, "grad_norm": 0.7480988502502441, "learning_rate": 1.6370394354890364e-05, "loss": 0.044561177492141724, "step": 3755 }, { "epoch": 0.5077862930823243, "grad_norm": 0.6101779341697693, "learning_rate": 1.636357840766613e-05, "loss": 0.06543612480163574, "step": 3756 }, { "epoch": 0.5079214864510896, "grad_norm": 0.8819478154182434, "learning_rate": 1.6356762176537606e-05, "loss": 0.05290394276380539, "step": 3757 }, { "epoch": 0.5080566798198548, "grad_norm": 0.39467740058898926, "learning_rate": 1.6349945662923953e-05, "loss": 0.07516656816005707, "step": 3758 }, { "epoch": 0.5081918731886201, "grad_norm": 0.8476990461349487, "learning_rate": 1.634312886824442e-05, "loss": 0.0673803985118866, "step": 3759 }, { "epoch": 0.5083270665573854, "grad_norm": 0.29710081219673157, "learning_rate": 1.6336311793918298e-05, "loss": 0.05188039690256119, "step": 3760 }, { "epoch": 0.5084622599261506, "grad_norm": 0.7095276117324829, "learning_rate": 1.6329494441364925e-05, "loss": 0.06461555510759354, "step": 3761 }, { "epoch": 0.5085974532949159, "grad_norm": 0.3347511291503906, "learning_rate": 1.6322676812003727e-05, "loss": 0.04996323958039284, "step": 3762 }, { "epoch": 0.5087326466636811, "grad_norm": 0.8257862329483032, "learning_rate": 1.631585890725416e-05, "loss": 0.04910095036029816, "step": 3763 }, { "epoch": 0.5088678400324465, "grad_norm": 0.9673036336898804, "learning_rate": 1.630904072853575e-05, "loss": 0.06738243997097015, "step": 3764 }, { "epoch": 0.5090030334012117, "grad_norm": 0.49601346254348755, "learning_rate": 1.6302222277268085e-05, "loss": 0.053567975759506226, "step": 3765 }, { "epoch": 0.5091382267699769, "grad_norm": 0.21795786917209625, "learning_rate": 1.6295403554870794e-05, "loss": 0.051491305232048035, "step": 3766 }, { "epoch": 0.5092734201387422, "grad_norm": 1.084364652633667, "learning_rate": 1.6288584562763572e-05, "loss": 0.05329851806163788, "step": 3767 }, { "epoch": 0.5094086135075074, "grad_norm": 1.3986179828643799, "learning_rate": 1.6281765302366176e-05, "loss": 0.05432478338479996, "step": 3768 }, { "epoch": 0.5095438068762728, "grad_norm": 0.2347116768360138, "learning_rate": 1.6274945775098412e-05, "loss": 0.04109840840101242, "step": 3769 }, { "epoch": 0.509679000245038, "grad_norm": 0.36350691318511963, "learning_rate": 1.6268125982380135e-05, "loss": 0.05259581282734871, "step": 3770 }, { "epoch": 0.5098141936138032, "grad_norm": 0.36298471689224243, "learning_rate": 1.626130592563127e-05, "loss": 0.047557294368743896, "step": 3771 }, { "epoch": 0.5099493869825685, "grad_norm": 0.5269676446914673, "learning_rate": 1.6254485606271778e-05, "loss": 0.04008561372756958, "step": 3772 }, { "epoch": 0.5100845803513337, "grad_norm": 0.4229883551597595, "learning_rate": 1.6247665025721698e-05, "loss": 0.0676676407456398, "step": 3773 }, { "epoch": 0.5102197737200991, "grad_norm": 0.5472242832183838, "learning_rate": 1.62408441854011e-05, "loss": 0.042094528675079346, "step": 3774 }, { "epoch": 0.5103549670888643, "grad_norm": 0.22410422563552856, "learning_rate": 1.6234023086730136e-05, "loss": 0.021529115736484528, "step": 3775 }, { "epoch": 0.5104901604576295, "grad_norm": 0.2958018183708191, "learning_rate": 1.622720173112898e-05, "loss": 0.043539129197597504, "step": 3776 }, { "epoch": 0.5106253538263948, "grad_norm": 0.8274920582771301, "learning_rate": 1.6220380120017874e-05, "loss": 0.03790076822042465, "step": 3777 }, { "epoch": 0.5107605471951601, "grad_norm": 0.2734115421772003, "learning_rate": 1.6213558254817128e-05, "loss": 0.049877118319272995, "step": 3778 }, { "epoch": 0.5108957405639254, "grad_norm": 1.2204430103302002, "learning_rate": 1.6206736136947074e-05, "loss": 0.055194538086652756, "step": 3779 }, { "epoch": 0.5110309339326906, "grad_norm": 0.574268102645874, "learning_rate": 1.6199913767828126e-05, "loss": 0.047274813055992126, "step": 3780 }, { "epoch": 0.5111661273014558, "grad_norm": 1.3793399333953857, "learning_rate": 1.6193091148880733e-05, "loss": 0.06472516059875488, "step": 3781 }, { "epoch": 0.5113013206702212, "grad_norm": 0.5116593241691589, "learning_rate": 1.61862682815254e-05, "loss": 0.048178352415561676, "step": 3782 }, { "epoch": 0.5114365140389864, "grad_norm": 0.45036429166793823, "learning_rate": 1.617944516718268e-05, "loss": 0.05155980587005615, "step": 3783 }, { "epoch": 0.5115717074077516, "grad_norm": 0.8632551431655884, "learning_rate": 1.617262180727319e-05, "loss": 0.061946380883455276, "step": 3784 }, { "epoch": 0.5117069007765169, "grad_norm": 1.622459053993225, "learning_rate": 1.6165798203217588e-05, "loss": 0.04066108167171478, "step": 3785 }, { "epoch": 0.5118420941452821, "grad_norm": 0.623325765132904, "learning_rate": 1.6158974356436585e-05, "loss": 0.04205261170864105, "step": 3786 }, { "epoch": 0.5119772875140475, "grad_norm": 0.44530412554740906, "learning_rate": 1.6152150268350938e-05, "loss": 0.0872809886932373, "step": 3787 }, { "epoch": 0.5121124808828127, "grad_norm": 0.31950002908706665, "learning_rate": 1.6145325940381458e-05, "loss": 0.03133980557322502, "step": 3788 }, { "epoch": 0.5122476742515779, "grad_norm": 0.6281394362449646, "learning_rate": 1.6138501373949018e-05, "loss": 0.05071195960044861, "step": 3789 }, { "epoch": 0.5123828676203432, "grad_norm": 0.9504885077476501, "learning_rate": 1.613167657047451e-05, "loss": 0.044804222881793976, "step": 3790 }, { "epoch": 0.5125180609891085, "grad_norm": 0.7927709817886353, "learning_rate": 1.612485153137891e-05, "loss": 0.0602584145963192, "step": 3791 }, { "epoch": 0.5126532543578738, "grad_norm": 0.20674657821655273, "learning_rate": 1.611802625808323e-05, "loss": 0.03130554407835007, "step": 3792 }, { "epoch": 0.512788447726639, "grad_norm": 0.256592720746994, "learning_rate": 1.611120075200851e-05, "loss": 0.04670822620391846, "step": 3793 }, { "epoch": 0.5129236410954042, "grad_norm": 0.42805352807044983, "learning_rate": 1.610437501457587e-05, "loss": 0.04130428284406662, "step": 3794 }, { "epoch": 0.5130588344641696, "grad_norm": 0.4622546434402466, "learning_rate": 1.6097549047206464e-05, "loss": 0.0534285306930542, "step": 3795 }, { "epoch": 0.5131940278329348, "grad_norm": 0.2508544623851776, "learning_rate": 1.6090722851321497e-05, "loss": 0.05359993875026703, "step": 3796 }, { "epoch": 0.5133292212017001, "grad_norm": 0.7881059050559998, "learning_rate": 1.6083896428342213e-05, "loss": 0.05133604630827904, "step": 3797 }, { "epoch": 0.5134644145704653, "grad_norm": 0.5246431231498718, "learning_rate": 1.6077069779689915e-05, "loss": 0.07454442977905273, "step": 3798 }, { "epoch": 0.5135996079392305, "grad_norm": 0.42925789952278137, "learning_rate": 1.607024290678594e-05, "loss": 0.06013303995132446, "step": 3799 }, { "epoch": 0.5137348013079959, "grad_norm": 0.5546578168869019, "learning_rate": 1.6063415811051686e-05, "loss": 0.056167393922805786, "step": 3800 }, { "epoch": 0.5138699946767611, "grad_norm": 1.0981993675231934, "learning_rate": 1.6056588493908596e-05, "loss": 0.07775872945785522, "step": 3801 }, { "epoch": 0.5140051880455264, "grad_norm": 0.5263516902923584, "learning_rate": 1.604976095677814e-05, "loss": 0.05087617412209511, "step": 3802 }, { "epoch": 0.5141403814142916, "grad_norm": 0.4181288182735443, "learning_rate": 1.604293320108186e-05, "loss": 0.04873044788837433, "step": 3803 }, { "epoch": 0.5142755747830569, "grad_norm": 0.686302900314331, "learning_rate": 1.603610522824132e-05, "loss": 0.05752221494913101, "step": 3804 }, { "epoch": 0.5144107681518222, "grad_norm": 0.828981876373291, "learning_rate": 1.6029277039678153e-05, "loss": 0.05069413781166077, "step": 3805 }, { "epoch": 0.5145459615205874, "grad_norm": 0.5596721172332764, "learning_rate": 1.602244863681401e-05, "loss": 0.056078072637319565, "step": 3806 }, { "epoch": 0.5146811548893527, "grad_norm": 0.8351901769638062, "learning_rate": 1.6015620021070613e-05, "loss": 0.06332506239414215, "step": 3807 }, { "epoch": 0.514816348258118, "grad_norm": 0.535958468914032, "learning_rate": 1.6008791193869714e-05, "loss": 0.0573420524597168, "step": 3808 }, { "epoch": 0.5149515416268832, "grad_norm": 0.3675961196422577, "learning_rate": 1.6001962156633102e-05, "loss": 0.05918343365192413, "step": 3809 }, { "epoch": 0.5150867349956485, "grad_norm": 0.3773360848426819, "learning_rate": 1.5995132910782632e-05, "loss": 0.07070313394069672, "step": 3810 }, { "epoch": 0.5152219283644137, "grad_norm": 0.836632251739502, "learning_rate": 1.5988303457740178e-05, "loss": 0.06749022752046585, "step": 3811 }, { "epoch": 0.5153571217331789, "grad_norm": 0.5288264155387878, "learning_rate": 1.598147379892768e-05, "loss": 0.05578360706567764, "step": 3812 }, { "epoch": 0.5154923151019443, "grad_norm": 0.7572844624519348, "learning_rate": 1.5974643935767098e-05, "loss": 0.060728468000888824, "step": 3813 }, { "epoch": 0.5156275084707095, "grad_norm": 0.4169822037220001, "learning_rate": 1.5967813869680452e-05, "loss": 0.04759075492620468, "step": 3814 }, { "epoch": 0.5157627018394748, "grad_norm": 0.6279019713401794, "learning_rate": 1.59609836020898e-05, "loss": 0.05336674302816391, "step": 3815 }, { "epoch": 0.51589789520824, "grad_norm": 0.5039679408073425, "learning_rate": 1.5954153134417236e-05, "loss": 0.04822143539786339, "step": 3816 }, { "epoch": 0.5160330885770053, "grad_norm": 0.7431056499481201, "learning_rate": 1.59473224680849e-05, "loss": 0.04274154081940651, "step": 3817 }, { "epoch": 0.5161682819457706, "grad_norm": 0.43867871165275574, "learning_rate": 1.5940491604514976e-05, "loss": 0.05871080607175827, "step": 3818 }, { "epoch": 0.5163034753145358, "grad_norm": 0.29136285185813904, "learning_rate": 1.5933660545129683e-05, "loss": 0.04610811918973923, "step": 3819 }, { "epoch": 0.5164386686833011, "grad_norm": 0.37538301944732666, "learning_rate": 1.5926829291351288e-05, "loss": 0.05637273192405701, "step": 3820 }, { "epoch": 0.5165738620520663, "grad_norm": 0.4648245871067047, "learning_rate": 1.591999784460209e-05, "loss": 0.052748456597328186, "step": 3821 }, { "epoch": 0.5167090554208316, "grad_norm": 1.408056616783142, "learning_rate": 1.5913166206304435e-05, "loss": 0.056761935353279114, "step": 3822 }, { "epoch": 0.5168442487895969, "grad_norm": 1.2758344411849976, "learning_rate": 1.5906334377880707e-05, "loss": 0.0536779910326004, "step": 3823 }, { "epoch": 0.5169794421583621, "grad_norm": 1.120057463645935, "learning_rate": 1.589950236075333e-05, "loss": 0.05391278117895126, "step": 3824 }, { "epoch": 0.5171146355271274, "grad_norm": 0.9656091928482056, "learning_rate": 1.5892670156344764e-05, "loss": 0.055062390863895416, "step": 3825 }, { "epoch": 0.5172498288958927, "grad_norm": 0.8462112545967102, "learning_rate": 1.588583776607751e-05, "loss": 0.05114181339740753, "step": 3826 }, { "epoch": 0.5173850222646579, "grad_norm": 0.33182936906814575, "learning_rate": 1.5879005191374106e-05, "loss": 0.06080438196659088, "step": 3827 }, { "epoch": 0.5175202156334232, "grad_norm": 0.7704989314079285, "learning_rate": 1.587217243365714e-05, "loss": 0.0652606412768364, "step": 3828 }, { "epoch": 0.5176554090021884, "grad_norm": 0.9421502947807312, "learning_rate": 1.586533949434922e-05, "loss": 0.048460062593221664, "step": 3829 }, { "epoch": 0.5177906023709538, "grad_norm": 0.8190940022468567, "learning_rate": 1.5858506374872998e-05, "loss": 0.046723030507564545, "step": 3830 }, { "epoch": 0.517925795739719, "grad_norm": 0.9917197227478027, "learning_rate": 1.5851673076651178e-05, "loss": 0.057548344135284424, "step": 3831 }, { "epoch": 0.5180609891084842, "grad_norm": 0.9356064200401306, "learning_rate": 1.5844839601106477e-05, "loss": 0.06234029680490494, "step": 3832 }, { "epoch": 0.5181961824772495, "grad_norm": 0.3192106783390045, "learning_rate": 1.583800594966167e-05, "loss": 0.053430721163749695, "step": 3833 }, { "epoch": 0.5183313758460147, "grad_norm": 0.3324984312057495, "learning_rate": 1.583117212373955e-05, "loss": 0.06126798689365387, "step": 3834 }, { "epoch": 0.51846656921478, "grad_norm": 0.5636252164840698, "learning_rate": 1.5824338124762967e-05, "loss": 0.06817282736301422, "step": 3835 }, { "epoch": 0.5186017625835453, "grad_norm": 0.1898660510778427, "learning_rate": 1.581750395415479e-05, "loss": 0.0354049950838089, "step": 3836 }, { "epoch": 0.5187369559523105, "grad_norm": 0.7402545809745789, "learning_rate": 1.5810669613337922e-05, "loss": 0.053197041153907776, "step": 3837 }, { "epoch": 0.5188721493210758, "grad_norm": 0.2631089985370636, "learning_rate": 1.5803835103735327e-05, "loss": 0.04453294724225998, "step": 3838 }, { "epoch": 0.5190073426898411, "grad_norm": 0.5345631837844849, "learning_rate": 1.5797000426769973e-05, "loss": 0.04821440204977989, "step": 3839 }, { "epoch": 0.5191425360586063, "grad_norm": 0.5558984875679016, "learning_rate": 1.579016558386488e-05, "loss": 0.04820968210697174, "step": 3840 }, { "epoch": 0.5192777294273716, "grad_norm": 0.2662298381328583, "learning_rate": 1.5783330576443096e-05, "loss": 0.041163913905620575, "step": 3841 }, { "epoch": 0.5194129227961368, "grad_norm": 0.2704199552536011, "learning_rate": 1.5776495405927716e-05, "loss": 0.0519847609102726, "step": 3842 }, { "epoch": 0.5195481161649022, "grad_norm": 0.30000296235084534, "learning_rate": 1.5769660073741844e-05, "loss": 0.04474365711212158, "step": 3843 }, { "epoch": 0.5196833095336674, "grad_norm": 0.5938866138458252, "learning_rate": 1.5762824581308645e-05, "loss": 0.06992365419864655, "step": 3844 }, { "epoch": 0.5198185029024326, "grad_norm": 1.4845619201660156, "learning_rate": 1.5755988930051304e-05, "loss": 0.06222233176231384, "step": 3845 }, { "epoch": 0.5199536962711979, "grad_norm": 0.8205815553665161, "learning_rate": 1.5749153121393025e-05, "loss": 0.044046223163604736, "step": 3846 }, { "epoch": 0.5200888896399631, "grad_norm": 1.048462152481079, "learning_rate": 1.574231715675708e-05, "loss": 0.045677050948143005, "step": 3847 }, { "epoch": 0.5202240830087285, "grad_norm": 1.2512325048446655, "learning_rate": 1.573548103756674e-05, "loss": 0.056922078132629395, "step": 3848 }, { "epoch": 0.5203592763774937, "grad_norm": 0.6226654052734375, "learning_rate": 1.572864476524533e-05, "loss": 0.05940178036689758, "step": 3849 }, { "epoch": 0.5204944697462589, "grad_norm": 0.350815087556839, "learning_rate": 1.5721808341216195e-05, "loss": 0.06943237781524658, "step": 3850 }, { "epoch": 0.5206296631150242, "grad_norm": 0.8176279664039612, "learning_rate": 1.571497176690271e-05, "loss": 0.07380060851573944, "step": 3851 }, { "epoch": 0.5207648564837895, "grad_norm": 0.22101128101348877, "learning_rate": 1.570813504372829e-05, "loss": 0.03988974541425705, "step": 3852 }, { "epoch": 0.5209000498525548, "grad_norm": 0.41456660628318787, "learning_rate": 1.570129817311638e-05, "loss": 0.0563424788415432, "step": 3853 }, { "epoch": 0.52103524322132, "grad_norm": 0.328954815864563, "learning_rate": 1.5694461156490452e-05, "loss": 0.04352203756570816, "step": 3854 }, { "epoch": 0.5211704365900852, "grad_norm": 0.8576094508171082, "learning_rate": 1.5687623995274008e-05, "loss": 0.06292326003313065, "step": 3855 }, { "epoch": 0.5213056299588505, "grad_norm": 0.9740639925003052, "learning_rate": 1.568078669089058e-05, "loss": 0.07779178023338318, "step": 3856 }, { "epoch": 0.5214408233276158, "grad_norm": 0.6014882922172546, "learning_rate": 1.567394924476373e-05, "loss": 0.05543476343154907, "step": 3857 }, { "epoch": 0.5215760166963811, "grad_norm": 0.3653249442577362, "learning_rate": 1.5667111658317057e-05, "loss": 0.056862927973270416, "step": 3858 }, { "epoch": 0.5217112100651463, "grad_norm": 0.8777360320091248, "learning_rate": 1.5660273932974177e-05, "loss": 0.06048957258462906, "step": 3859 }, { "epoch": 0.5218464034339115, "grad_norm": 0.21385769546031952, "learning_rate": 1.5653436070158743e-05, "loss": 0.043873440474271774, "step": 3860 }, { "epoch": 0.5219815968026769, "grad_norm": 0.5791447758674622, "learning_rate": 1.564659807129444e-05, "loss": 0.05659090355038643, "step": 3861 }, { "epoch": 0.5221167901714421, "grad_norm": 0.532931387424469, "learning_rate": 1.5639759937804962e-05, "loss": 0.05180169641971588, "step": 3862 }, { "epoch": 0.5222519835402073, "grad_norm": 0.3157457411289215, "learning_rate": 1.5632921671114055e-05, "loss": 0.05484512448310852, "step": 3863 }, { "epoch": 0.5223871769089726, "grad_norm": 1.268995761871338, "learning_rate": 1.5626083272645485e-05, "loss": 0.06786813586950302, "step": 3864 }, { "epoch": 0.5225223702777378, "grad_norm": 0.676856279373169, "learning_rate": 1.5619244743823038e-05, "loss": 0.04725271090865135, "step": 3865 }, { "epoch": 0.5226575636465032, "grad_norm": 0.4009271264076233, "learning_rate": 1.5612406086070534e-05, "loss": 0.07125125080347061, "step": 3866 }, { "epoch": 0.5227927570152684, "grad_norm": 0.46584638953208923, "learning_rate": 1.560556730081181e-05, "loss": 0.06834045052528381, "step": 3867 }, { "epoch": 0.5229279503840336, "grad_norm": 0.38255366683006287, "learning_rate": 1.5598728389470754e-05, "loss": 0.047905176877975464, "step": 3868 }, { "epoch": 0.5230631437527989, "grad_norm": 0.824782133102417, "learning_rate": 1.5591889353471245e-05, "loss": 0.05604368448257446, "step": 3869 }, { "epoch": 0.5231983371215642, "grad_norm": 0.5316340327262878, "learning_rate": 1.5585050194237226e-05, "loss": 0.06252117455005646, "step": 3870 }, { "epoch": 0.5233335304903295, "grad_norm": 0.32712358236312866, "learning_rate": 1.557821091319263e-05, "loss": 0.0662146508693695, "step": 3871 }, { "epoch": 0.5234687238590947, "grad_norm": 0.4667937457561493, "learning_rate": 1.5571371511761446e-05, "loss": 0.053338099271059036, "step": 3872 }, { "epoch": 0.5236039172278599, "grad_norm": 0.6650628447532654, "learning_rate": 1.5564531991367658e-05, "loss": 0.06061176583170891, "step": 3873 }, { "epoch": 0.5237391105966253, "grad_norm": 1.3317973613739014, "learning_rate": 1.5557692353435302e-05, "loss": 0.0495610386133194, "step": 3874 }, { "epoch": 0.5238743039653905, "grad_norm": 0.6013226509094238, "learning_rate": 1.5550852599388424e-05, "loss": 0.0501331090927124, "step": 3875 }, { "epoch": 0.5240094973341558, "grad_norm": 2.2273976802825928, "learning_rate": 1.5544012730651096e-05, "loss": 0.06653538346290588, "step": 3876 }, { "epoch": 0.524144690702921, "grad_norm": 0.46187737584114075, "learning_rate": 1.5537172748647422e-05, "loss": 0.05617896467447281, "step": 3877 }, { "epoch": 0.5242798840716862, "grad_norm": 1.363012433052063, "learning_rate": 1.553033265480151e-05, "loss": 0.06546523422002792, "step": 3878 }, { "epoch": 0.5244150774404516, "grad_norm": 0.5969788432121277, "learning_rate": 1.552349245053752e-05, "loss": 0.05155107378959656, "step": 3879 }, { "epoch": 0.5245502708092168, "grad_norm": 0.8172316551208496, "learning_rate": 1.5516652137279597e-05, "loss": 0.06449680030345917, "step": 3880 }, { "epoch": 0.5246854641779821, "grad_norm": 0.7563415765762329, "learning_rate": 1.5509811716451955e-05, "loss": 0.050470590591430664, "step": 3881 }, { "epoch": 0.5248206575467473, "grad_norm": 0.46673232316970825, "learning_rate": 1.550297118947879e-05, "loss": 0.047976646572351456, "step": 3882 }, { "epoch": 0.5249558509155126, "grad_norm": 0.5018225908279419, "learning_rate": 1.5496130557784343e-05, "loss": 0.047616541385650635, "step": 3883 }, { "epoch": 0.5250910442842779, "grad_norm": 0.4791119396686554, "learning_rate": 1.5489289822792868e-05, "loss": 0.06036540865898132, "step": 3884 }, { "epoch": 0.5252262376530431, "grad_norm": 0.1800708770751953, "learning_rate": 1.5482448985928645e-05, "loss": 0.03271171823143959, "step": 3885 }, { "epoch": 0.5253614310218084, "grad_norm": 0.9041771292686462, "learning_rate": 1.5475608048615964e-05, "loss": 0.05772767215967178, "step": 3886 }, { "epoch": 0.5254966243905737, "grad_norm": 0.7105149626731873, "learning_rate": 1.546876701227916e-05, "loss": 0.05694102868437767, "step": 3887 }, { "epoch": 0.5256318177593389, "grad_norm": 0.34959179162979126, "learning_rate": 1.5461925878342558e-05, "loss": 0.037605538964271545, "step": 3888 }, { "epoch": 0.5257670111281042, "grad_norm": 1.0307186841964722, "learning_rate": 1.5455084648230527e-05, "loss": 0.062323808670043945, "step": 3889 }, { "epoch": 0.5259022044968694, "grad_norm": 0.23543967306613922, "learning_rate": 1.5448243323367438e-05, "loss": 0.044301412999629974, "step": 3890 }, { "epoch": 0.5260373978656346, "grad_norm": 0.3705684244632721, "learning_rate": 1.544140190517771e-05, "loss": 0.03656876087188721, "step": 3891 }, { "epoch": 0.5261725912344, "grad_norm": 0.8742043375968933, "learning_rate": 1.5434560395085745e-05, "loss": 0.05393455922603607, "step": 3892 }, { "epoch": 0.5263077846031652, "grad_norm": 0.8725761771202087, "learning_rate": 1.542771879451599e-05, "loss": 0.04833026975393295, "step": 3893 }, { "epoch": 0.5264429779719305, "grad_norm": 0.4318239092826843, "learning_rate": 1.54208771048929e-05, "loss": 0.04526226222515106, "step": 3894 }, { "epoch": 0.5265781713406957, "grad_norm": 1.3377554416656494, "learning_rate": 1.5414035327640958e-05, "loss": 0.053397081792354584, "step": 3895 }, { "epoch": 0.526713364709461, "grad_norm": 0.7858668565750122, "learning_rate": 1.5407193464184644e-05, "loss": 0.0437445342540741, "step": 3896 }, { "epoch": 0.5268485580782263, "grad_norm": 0.47253498435020447, "learning_rate": 1.5400351515948485e-05, "loss": 0.04661727696657181, "step": 3897 }, { "epoch": 0.5269837514469915, "grad_norm": 0.7696034908294678, "learning_rate": 1.5393509484357006e-05, "loss": 0.04863014817237854, "step": 3898 }, { "epoch": 0.5271189448157568, "grad_norm": 0.43488770723342896, "learning_rate": 1.538666737083475e-05, "loss": 0.0425231009721756, "step": 3899 }, { "epoch": 0.527254138184522, "grad_norm": 0.5269473195075989, "learning_rate": 1.537982517680629e-05, "loss": 0.04516787454485893, "step": 3900 }, { "epoch": 0.5273893315532873, "grad_norm": 0.7998220920562744, "learning_rate": 1.5372982903696196e-05, "loss": 0.05101688206195831, "step": 3901 }, { "epoch": 0.5275245249220526, "grad_norm": 0.9070618152618408, "learning_rate": 1.536614055292908e-05, "loss": 0.06188303232192993, "step": 3902 }, { "epoch": 0.5276597182908178, "grad_norm": 0.7143345475196838, "learning_rate": 1.535929812592955e-05, "loss": 0.06838368624448776, "step": 3903 }, { "epoch": 0.5277949116595831, "grad_norm": 1.5905269384384155, "learning_rate": 1.5352455624122227e-05, "loss": 0.07274457812309265, "step": 3904 }, { "epoch": 0.5279301050283484, "grad_norm": 0.5470003485679626, "learning_rate": 1.5345613048931765e-05, "loss": 0.04788441210985184, "step": 3905 }, { "epoch": 0.5280652983971136, "grad_norm": 0.8070629835128784, "learning_rate": 1.5338770401782822e-05, "loss": 0.05735981464385986, "step": 3906 }, { "epoch": 0.5282004917658789, "grad_norm": 0.3525625467300415, "learning_rate": 1.5331927684100077e-05, "loss": 0.07135386765003204, "step": 3907 }, { "epoch": 0.5283356851346441, "grad_norm": 0.46959879994392395, "learning_rate": 1.5325084897308218e-05, "loss": 0.05447271093726158, "step": 3908 }, { "epoch": 0.5284708785034095, "grad_norm": 0.5100918412208557, "learning_rate": 1.5318242042831952e-05, "loss": 0.05446285381913185, "step": 3909 }, { "epoch": 0.5286060718721747, "grad_norm": 1.1584619283676147, "learning_rate": 1.5311399122095992e-05, "loss": 0.04878786951303482, "step": 3910 }, { "epoch": 0.5287412652409399, "grad_norm": 1.0253782272338867, "learning_rate": 1.5304556136525074e-05, "loss": 0.05952572822570801, "step": 3911 }, { "epoch": 0.5288764586097052, "grad_norm": 0.5966041088104248, "learning_rate": 1.5297713087543948e-05, "loss": 0.03753407299518585, "step": 3912 }, { "epoch": 0.5290116519784704, "grad_norm": 0.8773438930511475, "learning_rate": 1.5290869976577365e-05, "loss": 0.04544057324528694, "step": 3913 }, { "epoch": 0.5291468453472358, "grad_norm": 0.44492125511169434, "learning_rate": 1.5284026805050107e-05, "loss": 0.05192258581519127, "step": 3914 }, { "epoch": 0.529282038716001, "grad_norm": 0.36962461471557617, "learning_rate": 1.5277183574386947e-05, "loss": 0.05602163076400757, "step": 3915 }, { "epoch": 0.5294172320847662, "grad_norm": 0.6350026726722717, "learning_rate": 1.5270340286012694e-05, "loss": 0.04901233687996864, "step": 3916 }, { "epoch": 0.5295524254535315, "grad_norm": 0.38578498363494873, "learning_rate": 1.526349694135215e-05, "loss": 0.054621048271656036, "step": 3917 }, { "epoch": 0.5296876188222968, "grad_norm": 0.3361757695674896, "learning_rate": 1.525665354183014e-05, "loss": 0.05293537676334381, "step": 3918 }, { "epoch": 0.529822812191062, "grad_norm": 0.39618435502052307, "learning_rate": 1.5249810088871493e-05, "loss": 0.06588000804185867, "step": 3919 }, { "epoch": 0.5299580055598273, "grad_norm": 0.5319632887840271, "learning_rate": 1.5242966583901052e-05, "loss": 0.03069375641644001, "step": 3920 }, { "epoch": 0.5300931989285925, "grad_norm": 0.2962198257446289, "learning_rate": 1.523612302834367e-05, "loss": 0.06402204930782318, "step": 3921 }, { "epoch": 0.5302283922973579, "grad_norm": 0.954925537109375, "learning_rate": 1.5229279423624217e-05, "loss": 0.06296830624341965, "step": 3922 }, { "epoch": 0.5303635856661231, "grad_norm": 0.34819495677948, "learning_rate": 1.5222435771167566e-05, "loss": 0.05464529991149902, "step": 3923 }, { "epoch": 0.5304987790348883, "grad_norm": 0.9567537307739258, "learning_rate": 1.5215592072398602e-05, "loss": 0.05670039355754852, "step": 3924 }, { "epoch": 0.5306339724036536, "grad_norm": 0.297484815120697, "learning_rate": 1.520874832874222e-05, "loss": 0.033783864229917526, "step": 3925 }, { "epoch": 0.5307691657724188, "grad_norm": 0.29705309867858887, "learning_rate": 1.5201904541623318e-05, "loss": 0.04822036623954773, "step": 3926 }, { "epoch": 0.5309043591411842, "grad_norm": 0.8676610589027405, "learning_rate": 1.5195060712466817e-05, "loss": 0.07909068465232849, "step": 3927 }, { "epoch": 0.5310395525099494, "grad_norm": 0.4062187373638153, "learning_rate": 1.5188216842697635e-05, "loss": 0.06048871576786041, "step": 3928 }, { "epoch": 0.5311747458787146, "grad_norm": 0.6600865125656128, "learning_rate": 1.5181372933740703e-05, "loss": 0.05761503800749779, "step": 3929 }, { "epoch": 0.5313099392474799, "grad_norm": 0.4390881061553955, "learning_rate": 1.5174528987020958e-05, "loss": 0.058613844215869904, "step": 3930 }, { "epoch": 0.5314451326162452, "grad_norm": 0.9420432448387146, "learning_rate": 1.5167685003963345e-05, "loss": 0.04771383851766586, "step": 3931 }, { "epoch": 0.5315803259850105, "grad_norm": 0.6272133588790894, "learning_rate": 1.5160840985992824e-05, "loss": 0.059007227420806885, "step": 3932 }, { "epoch": 0.5317155193537757, "grad_norm": 0.8019857406616211, "learning_rate": 1.515399693453435e-05, "loss": 0.05623268336057663, "step": 3933 }, { "epoch": 0.5318507127225409, "grad_norm": 0.4517199397087097, "learning_rate": 1.5147152851012894e-05, "loss": 0.04688534513115883, "step": 3934 }, { "epoch": 0.5319859060913062, "grad_norm": 0.5166458487510681, "learning_rate": 1.514030873685343e-05, "loss": 0.05230247601866722, "step": 3935 }, { "epoch": 0.5321210994600715, "grad_norm": 0.9699565172195435, "learning_rate": 1.513346459348094e-05, "loss": 0.06472494453191757, "step": 3936 }, { "epoch": 0.5322562928288368, "grad_norm": 0.8696311712265015, "learning_rate": 1.5126620422320405e-05, "loss": 0.058809250593185425, "step": 3937 }, { "epoch": 0.532391486197602, "grad_norm": 0.8052372336387634, "learning_rate": 1.5119776224796823e-05, "loss": 0.056269899010658264, "step": 3938 }, { "epoch": 0.5325266795663672, "grad_norm": 0.33290237188339233, "learning_rate": 1.5112932002335195e-05, "loss": 0.04272053390741348, "step": 3939 }, { "epoch": 0.5326618729351326, "grad_norm": 0.36302196979522705, "learning_rate": 1.5106087756360524e-05, "loss": 0.046406738460063934, "step": 3940 }, { "epoch": 0.5327970663038978, "grad_norm": 1.064314365386963, "learning_rate": 1.5099243488297816e-05, "loss": 0.05604291334748268, "step": 3941 }, { "epoch": 0.5329322596726631, "grad_norm": 0.49924832582473755, "learning_rate": 1.5092399199572083e-05, "loss": 0.05543386936187744, "step": 3942 }, { "epoch": 0.5330674530414283, "grad_norm": 0.39729470014572144, "learning_rate": 1.5085554891608343e-05, "loss": 0.05348895490169525, "step": 3943 }, { "epoch": 0.5332026464101935, "grad_norm": 0.6028935313224792, "learning_rate": 1.5078710565831616e-05, "loss": 0.05534949526190758, "step": 3944 }, { "epoch": 0.5333378397789589, "grad_norm": 0.5769929885864258, "learning_rate": 1.5071866223666935e-05, "loss": 0.04693952202796936, "step": 3945 }, { "epoch": 0.5334730331477241, "grad_norm": 0.7933577299118042, "learning_rate": 1.5065021866539323e-05, "loss": 0.05774712562561035, "step": 3946 }, { "epoch": 0.5336082265164893, "grad_norm": 0.5810902714729309, "learning_rate": 1.5058177495873805e-05, "loss": 0.04253597557544708, "step": 3947 }, { "epoch": 0.5337434198852546, "grad_norm": 0.3926798403263092, "learning_rate": 1.5051333113095429e-05, "loss": 0.05249135196208954, "step": 3948 }, { "epoch": 0.5338786132540199, "grad_norm": 0.4805140197277069, "learning_rate": 1.5044488719629218e-05, "loss": 0.047764845192432404, "step": 3949 }, { "epoch": 0.5340138066227852, "grad_norm": 0.3548891842365265, "learning_rate": 1.5037644316900227e-05, "loss": 0.051260799169540405, "step": 3950 }, { "epoch": 0.5341489999915504, "grad_norm": 0.37100163102149963, "learning_rate": 1.5030799906333484e-05, "loss": 0.04891455918550491, "step": 3951 }, { "epoch": 0.5342841933603156, "grad_norm": 0.7228818535804749, "learning_rate": 1.5023955489354031e-05, "loss": 0.06493876129388809, "step": 3952 }, { "epoch": 0.534419386729081, "grad_norm": 0.30967745184898376, "learning_rate": 1.5017111067386927e-05, "loss": 0.05843615531921387, "step": 3953 }, { "epoch": 0.5345545800978462, "grad_norm": 0.8512394428253174, "learning_rate": 1.50102666418572e-05, "loss": 0.06520982831716537, "step": 3954 }, { "epoch": 0.5346897734666115, "grad_norm": 0.8132785558700562, "learning_rate": 1.500342221418991e-05, "loss": 0.062483467161655426, "step": 3955 }, { "epoch": 0.5348249668353767, "grad_norm": 0.30524805188179016, "learning_rate": 1.4996577785810094e-05, "loss": 0.047003597021102905, "step": 3956 }, { "epoch": 0.5349601602041419, "grad_norm": 0.39114096760749817, "learning_rate": 1.4989733358142798e-05, "loss": 0.05080056190490723, "step": 3957 }, { "epoch": 0.5350953535729073, "grad_norm": 1.2146002054214478, "learning_rate": 1.498288893261308e-05, "loss": 0.05616268515586853, "step": 3958 }, { "epoch": 0.5352305469416725, "grad_norm": 0.5430983304977417, "learning_rate": 1.497604451064597e-05, "loss": 0.04470429569482803, "step": 3959 }, { "epoch": 0.5353657403104378, "grad_norm": 0.7658754587173462, "learning_rate": 1.496920009366652e-05, "loss": 0.06324173510074615, "step": 3960 }, { "epoch": 0.535500933679203, "grad_norm": 0.657473623752594, "learning_rate": 1.4962355683099777e-05, "loss": 0.05880952626466751, "step": 3961 }, { "epoch": 0.5356361270479683, "grad_norm": 0.4107910692691803, "learning_rate": 1.4955511280370782e-05, "loss": 0.04643833637237549, "step": 3962 }, { "epoch": 0.5357713204167336, "grad_norm": 0.25113463401794434, "learning_rate": 1.4948666886904579e-05, "loss": 0.05110061168670654, "step": 3963 }, { "epoch": 0.5359065137854988, "grad_norm": 0.2991109788417816, "learning_rate": 1.4941822504126199e-05, "loss": 0.04806319624185562, "step": 3964 }, { "epoch": 0.5360417071542641, "grad_norm": 0.47618919610977173, "learning_rate": 1.4934978133460681e-05, "loss": 0.050580233335494995, "step": 3965 }, { "epoch": 0.5361769005230294, "grad_norm": 0.7184830904006958, "learning_rate": 1.4928133776333068e-05, "loss": 0.049489960074424744, "step": 3966 }, { "epoch": 0.5363120938917946, "grad_norm": 0.3012215793132782, "learning_rate": 1.4921289434168386e-05, "loss": 0.043583422899246216, "step": 3967 }, { "epoch": 0.5364472872605599, "grad_norm": 0.8425238728523254, "learning_rate": 1.4914445108391663e-05, "loss": 0.06295725703239441, "step": 3968 }, { "epoch": 0.5365824806293251, "grad_norm": 0.412889301776886, "learning_rate": 1.4907600800427922e-05, "loss": 0.04477260261774063, "step": 3969 }, { "epoch": 0.5367176739980903, "grad_norm": 0.4890596270561218, "learning_rate": 1.4900756511702188e-05, "loss": 0.06844127178192139, "step": 3970 }, { "epoch": 0.5368528673668557, "grad_norm": 0.42125043272972107, "learning_rate": 1.4893912243639479e-05, "loss": 0.05592568218708038, "step": 3971 }, { "epoch": 0.5369880607356209, "grad_norm": 0.7336945533752441, "learning_rate": 1.4887067997664807e-05, "loss": 0.07025749981403351, "step": 3972 }, { "epoch": 0.5371232541043862, "grad_norm": 0.2617323100566864, "learning_rate": 1.488022377520318e-05, "loss": 0.052128762006759644, "step": 3973 }, { "epoch": 0.5372584474731514, "grad_norm": 0.6597862839698792, "learning_rate": 1.4873379577679599e-05, "loss": 0.04164543002843857, "step": 3974 }, { "epoch": 0.5373936408419167, "grad_norm": 0.927537739276886, "learning_rate": 1.4866535406519063e-05, "loss": 0.051501937210559845, "step": 3975 }, { "epoch": 0.537528834210682, "grad_norm": 0.31450849771499634, "learning_rate": 1.4859691263146574e-05, "loss": 0.04818488657474518, "step": 3976 }, { "epoch": 0.5376640275794472, "grad_norm": 0.447421669960022, "learning_rate": 1.485284714898711e-05, "loss": 0.05986437201499939, "step": 3977 }, { "epoch": 0.5377992209482125, "grad_norm": 1.162524700164795, "learning_rate": 1.4846003065465653e-05, "loss": 0.05748840421438217, "step": 3978 }, { "epoch": 0.5379344143169777, "grad_norm": 0.34466612339019775, "learning_rate": 1.4839159014007179e-05, "loss": 0.06284038722515106, "step": 3979 }, { "epoch": 0.538069607685743, "grad_norm": 0.7355208396911621, "learning_rate": 1.4832314996036653e-05, "loss": 0.04395366832613945, "step": 3980 }, { "epoch": 0.5382048010545083, "grad_norm": 0.4821547865867615, "learning_rate": 1.4825471012979048e-05, "loss": 0.05633664131164551, "step": 3981 }, { "epoch": 0.5383399944232735, "grad_norm": 0.6286754608154297, "learning_rate": 1.4818627066259301e-05, "loss": 0.0601787269115448, "step": 3982 }, { "epoch": 0.5384751877920388, "grad_norm": 0.37904462218284607, "learning_rate": 1.481178315730237e-05, "loss": 0.051263660192489624, "step": 3983 }, { "epoch": 0.5386103811608041, "grad_norm": 0.39313796162605286, "learning_rate": 1.4804939287533184e-05, "loss": 0.061426207423210144, "step": 3984 }, { "epoch": 0.5387455745295693, "grad_norm": 0.9317488074302673, "learning_rate": 1.4798095458376682e-05, "loss": 0.04274756461381912, "step": 3985 }, { "epoch": 0.5388807678983346, "grad_norm": 0.48566317558288574, "learning_rate": 1.4791251671257788e-05, "loss": 0.052697841078042984, "step": 3986 }, { "epoch": 0.5390159612670998, "grad_norm": 0.3606877326965332, "learning_rate": 1.4784407927601401e-05, "loss": 0.05537360906600952, "step": 3987 }, { "epoch": 0.5391511546358652, "grad_norm": 0.5394943952560425, "learning_rate": 1.4777564228832436e-05, "loss": 0.05346938967704773, "step": 3988 }, { "epoch": 0.5392863480046304, "grad_norm": 0.35650384426116943, "learning_rate": 1.4770720576375782e-05, "loss": 0.03862364962697029, "step": 3989 }, { "epoch": 0.5394215413733956, "grad_norm": 0.6105693578720093, "learning_rate": 1.4763876971656334e-05, "loss": 0.03420288488268852, "step": 3990 }, { "epoch": 0.5395567347421609, "grad_norm": 0.8025833964347839, "learning_rate": 1.4757033416098953e-05, "loss": 0.06326080858707428, "step": 3991 }, { "epoch": 0.5396919281109261, "grad_norm": 1.293094515800476, "learning_rate": 1.4750189911128511e-05, "loss": 0.06910311430692673, "step": 3992 }, { "epoch": 0.5398271214796915, "grad_norm": 0.5423516035079956, "learning_rate": 1.4743346458169863e-05, "loss": 0.05523611977696419, "step": 3993 }, { "epoch": 0.5399623148484567, "grad_norm": 0.5392700433731079, "learning_rate": 1.473650305864785e-05, "loss": 0.06953607499599457, "step": 3994 }, { "epoch": 0.5400975082172219, "grad_norm": 0.9406944513320923, "learning_rate": 1.472965971398731e-05, "loss": 0.0425386056303978, "step": 3995 }, { "epoch": 0.5402327015859872, "grad_norm": 0.35616856813430786, "learning_rate": 1.4722816425613054e-05, "loss": 0.05764736235141754, "step": 3996 }, { "epoch": 0.5403678949547525, "grad_norm": 0.3001463711261749, "learning_rate": 1.4715973194949895e-05, "loss": 0.03597217798233032, "step": 3997 }, { "epoch": 0.5405030883235177, "grad_norm": 0.31074583530426025, "learning_rate": 1.4709130023422636e-05, "loss": 0.04479756951332092, "step": 3998 }, { "epoch": 0.540638281692283, "grad_norm": 0.8563055396080017, "learning_rate": 1.4702286912456052e-05, "loss": 0.06081032752990723, "step": 3999 }, { "epoch": 0.5407734750610482, "grad_norm": 0.49117857217788696, "learning_rate": 1.4695443863474928e-05, "loss": 0.05906405299901962, "step": 4000 }, { "epoch": 0.5409086684298136, "grad_norm": 1.1349149942398071, "learning_rate": 1.4688600877904012e-05, "loss": 0.05307549238204956, "step": 4001 }, { "epoch": 0.5410438617985788, "grad_norm": 0.9400823712348938, "learning_rate": 1.468175795716805e-05, "loss": 0.06698894500732422, "step": 4002 }, { "epoch": 0.541179055167344, "grad_norm": 0.47587287425994873, "learning_rate": 1.4674915102691783e-05, "loss": 0.043459322303533554, "step": 4003 }, { "epoch": 0.5413142485361093, "grad_norm": 0.4782063663005829, "learning_rate": 1.4668072315899926e-05, "loss": 0.03177499771118164, "step": 4004 }, { "epoch": 0.5414494419048745, "grad_norm": 0.324441522359848, "learning_rate": 1.466122959821718e-05, "loss": 0.05222472548484802, "step": 4005 }, { "epoch": 0.5415846352736399, "grad_norm": 0.3802015781402588, "learning_rate": 1.4654386951068239e-05, "loss": 0.04731915518641472, "step": 4006 }, { "epoch": 0.5417198286424051, "grad_norm": 0.49423736333847046, "learning_rate": 1.4647544375877776e-05, "loss": 0.059997059404850006, "step": 4007 }, { "epoch": 0.5418550220111703, "grad_norm": 0.6311784386634827, "learning_rate": 1.4640701874070457e-05, "loss": 0.04727725684642792, "step": 4008 }, { "epoch": 0.5419902153799356, "grad_norm": 0.487170934677124, "learning_rate": 1.4633859447070922e-05, "loss": 0.04510696232318878, "step": 4009 }, { "epoch": 0.5421254087487009, "grad_norm": 0.6538010239601135, "learning_rate": 1.4627017096303805e-05, "loss": 0.048280373215675354, "step": 4010 }, { "epoch": 0.5422606021174662, "grad_norm": 0.7923305630683899, "learning_rate": 1.4620174823193711e-05, "loss": 0.04995451495051384, "step": 4011 }, { "epoch": 0.5423957954862314, "grad_norm": 0.6805353164672852, "learning_rate": 1.4613332629165249e-05, "loss": 0.05478479713201523, "step": 4012 }, { "epoch": 0.5425309888549966, "grad_norm": 1.2362356185913086, "learning_rate": 1.4606490515642998e-05, "loss": 0.06356494873762131, "step": 4013 }, { "epoch": 0.542666182223762, "grad_norm": 1.2735474109649658, "learning_rate": 1.4599648484051516e-05, "loss": 0.050921857357025146, "step": 4014 }, { "epoch": 0.5428013755925272, "grad_norm": 0.62109375, "learning_rate": 1.4592806535815358e-05, "loss": 0.05046012997627258, "step": 4015 }, { "epoch": 0.5429365689612925, "grad_norm": 0.3577922582626343, "learning_rate": 1.4585964672359045e-05, "loss": 0.06369826942682266, "step": 4016 }, { "epoch": 0.5430717623300577, "grad_norm": 0.5299736261367798, "learning_rate": 1.4579122895107098e-05, "loss": 0.05683884024620056, "step": 4017 }, { "epoch": 0.5432069556988229, "grad_norm": 1.1091710329055786, "learning_rate": 1.4572281205484012e-05, "loss": 0.053758978843688965, "step": 4018 }, { "epoch": 0.5433421490675883, "grad_norm": 1.0474162101745605, "learning_rate": 1.4565439604914256e-05, "loss": 0.05532175302505493, "step": 4019 }, { "epoch": 0.5434773424363535, "grad_norm": 0.9889752864837646, "learning_rate": 1.4558598094822294e-05, "loss": 0.0699634999036789, "step": 4020 }, { "epoch": 0.5436125358051188, "grad_norm": 0.7981903553009033, "learning_rate": 1.455175667663256e-05, "loss": 0.05378991737961769, "step": 4021 }, { "epoch": 0.543747729173884, "grad_norm": 0.4481160342693329, "learning_rate": 1.4544915351769476e-05, "loss": 0.04631450027227402, "step": 4022 }, { "epoch": 0.5438829225426492, "grad_norm": 0.574632465839386, "learning_rate": 1.4538074121657448e-05, "loss": 0.06112110614776611, "step": 4023 }, { "epoch": 0.5440181159114146, "grad_norm": 0.33643460273742676, "learning_rate": 1.4531232987720846e-05, "loss": 0.05754278600215912, "step": 4024 }, { "epoch": 0.5441533092801798, "grad_norm": 0.4023594260215759, "learning_rate": 1.4524391951384037e-05, "loss": 0.050510138273239136, "step": 4025 }, { "epoch": 0.544288502648945, "grad_norm": 0.47239968180656433, "learning_rate": 1.4517551014071358e-05, "loss": 0.053018130362033844, "step": 4026 }, { "epoch": 0.5444236960177103, "grad_norm": 1.0239540338516235, "learning_rate": 1.4510710177207137e-05, "loss": 0.050805479288101196, "step": 4027 }, { "epoch": 0.5445588893864756, "grad_norm": 1.0416115522384644, "learning_rate": 1.450386944221566e-05, "loss": 0.05809459090232849, "step": 4028 }, { "epoch": 0.5446940827552409, "grad_norm": 0.36053913831710815, "learning_rate": 1.449702881052121e-05, "loss": 0.055181846022605896, "step": 4029 }, { "epoch": 0.5448292761240061, "grad_norm": 0.7667855620384216, "learning_rate": 1.4490188283548048e-05, "loss": 0.060374438762664795, "step": 4030 }, { "epoch": 0.5449644694927713, "grad_norm": 0.495286226272583, "learning_rate": 1.44833478627204e-05, "loss": 0.0642457827925682, "step": 4031 }, { "epoch": 0.5450996628615367, "grad_norm": 0.4716796278953552, "learning_rate": 1.447650754946249e-05, "loss": 0.04884020984172821, "step": 4032 }, { "epoch": 0.5452348562303019, "grad_norm": 0.6766912937164307, "learning_rate": 1.4469667345198492e-05, "loss": 0.05094345286488533, "step": 4033 }, { "epoch": 0.5453700495990672, "grad_norm": 0.8531404733657837, "learning_rate": 1.446282725135258e-05, "loss": 0.03966781497001648, "step": 4034 }, { "epoch": 0.5455052429678324, "grad_norm": 0.5359408855438232, "learning_rate": 1.4455987269348904e-05, "loss": 0.056910835206508636, "step": 4035 }, { "epoch": 0.5456404363365976, "grad_norm": 0.49176695942878723, "learning_rate": 1.4449147400611578e-05, "loss": 0.053131937980651855, "step": 4036 }, { "epoch": 0.545775629705363, "grad_norm": 0.8822224140167236, "learning_rate": 1.4442307646564702e-05, "loss": 0.07119302451610565, "step": 4037 }, { "epoch": 0.5459108230741282, "grad_norm": 0.3242127299308777, "learning_rate": 1.4435468008632345e-05, "loss": 0.04253949224948883, "step": 4038 }, { "epoch": 0.5460460164428935, "grad_norm": 0.9133155941963196, "learning_rate": 1.4428628488238557e-05, "loss": 0.057325027883052826, "step": 4039 }, { "epoch": 0.5461812098116587, "grad_norm": 0.8766014575958252, "learning_rate": 1.442178908680737e-05, "loss": 0.06670939922332764, "step": 4040 }, { "epoch": 0.546316403180424, "grad_norm": 0.5208431482315063, "learning_rate": 1.4414949805762779e-05, "loss": 0.0591048002243042, "step": 4041 }, { "epoch": 0.5464515965491893, "grad_norm": 0.5918677449226379, "learning_rate": 1.4408110646528757e-05, "loss": 0.0653669536113739, "step": 4042 }, { "epoch": 0.5465867899179545, "grad_norm": 1.5518032312393188, "learning_rate": 1.440127161052925e-05, "loss": 0.05806749686598778, "step": 4043 }, { "epoch": 0.5467219832867198, "grad_norm": 0.4439472258090973, "learning_rate": 1.4394432699188188e-05, "loss": 0.05808722972869873, "step": 4044 }, { "epoch": 0.546857176655485, "grad_norm": 0.40956586599349976, "learning_rate": 1.4387593913929472e-05, "loss": 0.03346915543079376, "step": 4045 }, { "epoch": 0.5469923700242503, "grad_norm": 0.6225788593292236, "learning_rate": 1.4380755256176968e-05, "loss": 0.045644327998161316, "step": 4046 }, { "epoch": 0.5471275633930156, "grad_norm": 0.4095410108566284, "learning_rate": 1.437391672735452e-05, "loss": 0.05311070382595062, "step": 4047 }, { "epoch": 0.5472627567617808, "grad_norm": 0.4994106888771057, "learning_rate": 1.4367078328885946e-05, "loss": 0.06590622663497925, "step": 4048 }, { "epoch": 0.5473979501305462, "grad_norm": 0.4182433784008026, "learning_rate": 1.4360240062195039e-05, "loss": 0.07823541760444641, "step": 4049 }, { "epoch": 0.5475331434993114, "grad_norm": 0.36532631516456604, "learning_rate": 1.435340192870557e-05, "loss": 0.04127059876918793, "step": 4050 }, { "epoch": 0.5476683368680766, "grad_norm": 0.3086000978946686, "learning_rate": 1.434656392984126e-05, "loss": 0.03841794654726982, "step": 4051 }, { "epoch": 0.5478035302368419, "grad_norm": 0.5577488541603088, "learning_rate": 1.4339726067025828e-05, "loss": 0.05866488441824913, "step": 4052 }, { "epoch": 0.5479387236056071, "grad_norm": 0.4661047160625458, "learning_rate": 1.4332888341682947e-05, "loss": 0.04983644187450409, "step": 4053 }, { "epoch": 0.5480739169743724, "grad_norm": 0.40975701808929443, "learning_rate": 1.432605075523627e-05, "loss": 0.04896017909049988, "step": 4054 }, { "epoch": 0.5482091103431377, "grad_norm": 0.3015366792678833, "learning_rate": 1.4319213309109426e-05, "loss": 0.03304576501250267, "step": 4055 }, { "epoch": 0.5483443037119029, "grad_norm": 0.3199126124382019, "learning_rate": 1.4312376004725996e-05, "loss": 0.06221427768468857, "step": 4056 }, { "epoch": 0.5484794970806682, "grad_norm": 0.2792492210865021, "learning_rate": 1.430553884350955e-05, "loss": 0.05618011951446533, "step": 4057 }, { "epoch": 0.5486146904494335, "grad_norm": 0.23022061586380005, "learning_rate": 1.429870182688362e-05, "loss": 0.03981427103281021, "step": 4058 }, { "epoch": 0.5487498838181987, "grad_norm": 0.49077630043029785, "learning_rate": 1.4291864956271713e-05, "loss": 0.06821377575397491, "step": 4059 }, { "epoch": 0.548885077186964, "grad_norm": 0.6557405591011047, "learning_rate": 1.4285028233097293e-05, "loss": 0.06908242404460907, "step": 4060 }, { "epoch": 0.5490202705557292, "grad_norm": 0.960667073726654, "learning_rate": 1.4278191658783809e-05, "loss": 0.04825650155544281, "step": 4061 }, { "epoch": 0.5491554639244945, "grad_norm": 0.7569752335548401, "learning_rate": 1.427135523475467e-05, "loss": 0.0507570281624794, "step": 4062 }, { "epoch": 0.5492906572932598, "grad_norm": 0.3151237368583679, "learning_rate": 1.4264518962433258e-05, "loss": 0.052069079130887985, "step": 4063 }, { "epoch": 0.549425850662025, "grad_norm": 0.6898719668388367, "learning_rate": 1.4257682843242925e-05, "loss": 0.04433870315551758, "step": 4064 }, { "epoch": 0.5495610440307903, "grad_norm": 0.6530208587646484, "learning_rate": 1.4250846878606974e-05, "loss": 0.06546321511268616, "step": 4065 }, { "epoch": 0.5496962373995555, "grad_norm": 1.4969708919525146, "learning_rate": 1.4244011069948702e-05, "loss": 0.0656256452202797, "step": 4066 }, { "epoch": 0.5498314307683209, "grad_norm": 0.31266719102859497, "learning_rate": 1.4237175418691357e-05, "loss": 0.03966697305440903, "step": 4067 }, { "epoch": 0.5499666241370861, "grad_norm": 0.9247994422912598, "learning_rate": 1.4230339926258153e-05, "loss": 0.04583561420440674, "step": 4068 }, { "epoch": 0.5501018175058513, "grad_norm": 1.2100938558578491, "learning_rate": 1.422350459407229e-05, "loss": 0.04688480496406555, "step": 4069 }, { "epoch": 0.5502370108746166, "grad_norm": 0.440973699092865, "learning_rate": 1.4216669423556903e-05, "loss": 0.04519513621926308, "step": 4070 }, { "epoch": 0.5503722042433818, "grad_norm": 0.34939002990722656, "learning_rate": 1.420983441613512e-05, "loss": 0.042144447565078735, "step": 4071 }, { "epoch": 0.5505073976121472, "grad_norm": 0.7764574885368347, "learning_rate": 1.420299957323003e-05, "loss": 0.04697326570749283, "step": 4072 }, { "epoch": 0.5506425909809124, "grad_norm": 0.5128805041313171, "learning_rate": 1.4196164896264679e-05, "loss": 0.04348839819431305, "step": 4073 }, { "epoch": 0.5507777843496776, "grad_norm": 0.5639934539794922, "learning_rate": 1.418933038666208e-05, "loss": 0.06003371626138687, "step": 4074 }, { "epoch": 0.5509129777184429, "grad_norm": 0.3731740415096283, "learning_rate": 1.4182496045845217e-05, "loss": 0.04616701602935791, "step": 4075 }, { "epoch": 0.5510481710872082, "grad_norm": 0.5516098141670227, "learning_rate": 1.4175661875237036e-05, "loss": 0.05356122553348541, "step": 4076 }, { "epoch": 0.5511833644559734, "grad_norm": 0.7479949593544006, "learning_rate": 1.416882787626045e-05, "loss": 0.054915063083171844, "step": 4077 }, { "epoch": 0.5513185578247387, "grad_norm": 0.2697214186191559, "learning_rate": 1.4161994050338334e-05, "loss": 0.042996957898139954, "step": 4078 }, { "epoch": 0.5514537511935039, "grad_norm": 0.9078285098075867, "learning_rate": 1.4155160398893528e-05, "loss": 0.04638843983411789, "step": 4079 }, { "epoch": 0.5515889445622693, "grad_norm": 0.2647183835506439, "learning_rate": 1.4148326923348824e-05, "loss": 0.05164635181427002, "step": 4080 }, { "epoch": 0.5517241379310345, "grad_norm": 0.4467652440071106, "learning_rate": 1.4141493625127e-05, "loss": 0.06272059679031372, "step": 4081 }, { "epoch": 0.5518593312997997, "grad_norm": 0.24048303067684174, "learning_rate": 1.4134660505650786e-05, "loss": 0.03398372232913971, "step": 4082 }, { "epoch": 0.551994524668565, "grad_norm": 0.6048710942268372, "learning_rate": 1.4127827566342864e-05, "loss": 0.05136989802122116, "step": 4083 }, { "epoch": 0.5521297180373302, "grad_norm": 0.8332582712173462, "learning_rate": 1.4120994808625896e-05, "loss": 0.0650918185710907, "step": 4084 }, { "epoch": 0.5522649114060956, "grad_norm": 0.635187029838562, "learning_rate": 1.4114162233922494e-05, "loss": 0.053848572075366974, "step": 4085 }, { "epoch": 0.5524001047748608, "grad_norm": 0.9322073459625244, "learning_rate": 1.4107329843655238e-05, "loss": 0.07745838165283203, "step": 4086 }, { "epoch": 0.552535298143626, "grad_norm": 0.5019484758377075, "learning_rate": 1.4100497639246675e-05, "loss": 0.04726524278521538, "step": 4087 }, { "epoch": 0.5526704915123913, "grad_norm": 0.7686334252357483, "learning_rate": 1.4093665622119294e-05, "loss": 0.043291181325912476, "step": 4088 }, { "epoch": 0.5528056848811566, "grad_norm": 0.7255486249923706, "learning_rate": 1.4086833793695566e-05, "loss": 0.050574421882629395, "step": 4089 }, { "epoch": 0.5529408782499219, "grad_norm": 1.2916117906570435, "learning_rate": 1.408000215539791e-05, "loss": 0.05484726279973984, "step": 4090 }, { "epoch": 0.5530760716186871, "grad_norm": 0.3538680970668793, "learning_rate": 1.4073170708648711e-05, "loss": 0.06455840170383453, "step": 4091 }, { "epoch": 0.5532112649874523, "grad_norm": 0.8253899216651917, "learning_rate": 1.406633945487032e-05, "loss": 0.049087703227996826, "step": 4092 }, { "epoch": 0.5533464583562177, "grad_norm": 0.4992747902870178, "learning_rate": 1.4059508395485026e-05, "loss": 0.056317687034606934, "step": 4093 }, { "epoch": 0.5534816517249829, "grad_norm": 0.8482896685600281, "learning_rate": 1.4052677531915102e-05, "loss": 0.05254112184047699, "step": 4094 }, { "epoch": 0.5536168450937482, "grad_norm": 0.37596696615219116, "learning_rate": 1.4045846865582765e-05, "loss": 0.0493338480591774, "step": 4095 }, { "epoch": 0.5537520384625134, "grad_norm": 0.41362515091896057, "learning_rate": 1.4039016397910206e-05, "loss": 0.05972599983215332, "step": 4096 }, { "epoch": 0.5538872318312786, "grad_norm": 0.7611149549484253, "learning_rate": 1.403218613031955e-05, "loss": 0.05557525157928467, "step": 4097 }, { "epoch": 0.554022425200044, "grad_norm": 0.324356347322464, "learning_rate": 1.4025356064232903e-05, "loss": 0.036259621381759644, "step": 4098 }, { "epoch": 0.5541576185688092, "grad_norm": 0.5125876665115356, "learning_rate": 1.4018526201072324e-05, "loss": 0.05409299582242966, "step": 4099 }, { "epoch": 0.5542928119375745, "grad_norm": 0.420535147190094, "learning_rate": 1.4011696542259821e-05, "loss": 0.05342576652765274, "step": 4100 }, { "epoch": 0.5544280053063397, "grad_norm": 1.3484312295913696, "learning_rate": 1.4004867089217376e-05, "loss": 0.07324740290641785, "step": 4101 }, { "epoch": 0.554563198675105, "grad_norm": 1.4609699249267578, "learning_rate": 1.39980378433669e-05, "loss": 0.05767257511615753, "step": 4102 }, { "epoch": 0.5546983920438703, "grad_norm": 0.4036857783794403, "learning_rate": 1.399120880613029e-05, "loss": 0.050574153661727905, "step": 4103 }, { "epoch": 0.5548335854126355, "grad_norm": 1.92477548122406, "learning_rate": 1.3984379978929388e-05, "loss": 0.06461501121520996, "step": 4104 }, { "epoch": 0.5549687787814007, "grad_norm": 0.9905716776847839, "learning_rate": 1.3977551363185995e-05, "loss": 0.04796414077281952, "step": 4105 }, { "epoch": 0.555103972150166, "grad_norm": 1.1557170152664185, "learning_rate": 1.3970722960321854e-05, "loss": 0.05784928798675537, "step": 4106 }, { "epoch": 0.5552391655189313, "grad_norm": 0.7333866953849792, "learning_rate": 1.3963894771758682e-05, "loss": 0.0469755083322525, "step": 4107 }, { "epoch": 0.5553743588876966, "grad_norm": 0.7916989922523499, "learning_rate": 1.3957066798918143e-05, "loss": 0.04061264544725418, "step": 4108 }, { "epoch": 0.5555095522564618, "grad_norm": 0.9357433319091797, "learning_rate": 1.3950239043221861e-05, "loss": 0.050264425575733185, "step": 4109 }, { "epoch": 0.555644745625227, "grad_norm": 0.5857399106025696, "learning_rate": 1.3943411506091408e-05, "loss": 0.047240953892469406, "step": 4110 }, { "epoch": 0.5557799389939924, "grad_norm": 1.3813725709915161, "learning_rate": 1.3936584188948313e-05, "loss": 0.0697888657450676, "step": 4111 }, { "epoch": 0.5559151323627576, "grad_norm": 0.35123321413993835, "learning_rate": 1.3929757093214059e-05, "loss": 0.05244212597608566, "step": 4112 }, { "epoch": 0.5560503257315229, "grad_norm": 0.3247286081314087, "learning_rate": 1.3922930220310085e-05, "loss": 0.05216085910797119, "step": 4113 }, { "epoch": 0.5561855191002881, "grad_norm": 0.2647315561771393, "learning_rate": 1.3916103571657786e-05, "loss": 0.050299856811761856, "step": 4114 }, { "epoch": 0.5563207124690533, "grad_norm": 0.8535521626472473, "learning_rate": 1.3909277148678504e-05, "loss": 0.07653629034757614, "step": 4115 }, { "epoch": 0.5564559058378187, "grad_norm": 0.38206785917282104, "learning_rate": 1.3902450952793536e-05, "loss": 0.0732155591249466, "step": 4116 }, { "epoch": 0.5565910992065839, "grad_norm": 0.32354500889778137, "learning_rate": 1.389562498542413e-05, "loss": 0.049967825412750244, "step": 4117 }, { "epoch": 0.5567262925753492, "grad_norm": 0.5827561616897583, "learning_rate": 1.388879924799149e-05, "loss": 0.06330039352178574, "step": 4118 }, { "epoch": 0.5568614859441144, "grad_norm": 0.39095014333724976, "learning_rate": 1.388197374191678e-05, "loss": 0.03535168245434761, "step": 4119 }, { "epoch": 0.5569966793128797, "grad_norm": 0.3703196942806244, "learning_rate": 1.387514846862109e-05, "loss": 0.0664469376206398, "step": 4120 }, { "epoch": 0.557131872681645, "grad_norm": 0.6275187134742737, "learning_rate": 1.3868323429525492e-05, "loss": 0.04316431283950806, "step": 4121 }, { "epoch": 0.5572670660504102, "grad_norm": 0.3375455141067505, "learning_rate": 1.3861498626050986e-05, "loss": 0.05484852194786072, "step": 4122 }, { "epoch": 0.5574022594191755, "grad_norm": 0.6091442704200745, "learning_rate": 1.385467405961854e-05, "loss": 0.06739459931850433, "step": 4123 }, { "epoch": 0.5575374527879408, "grad_norm": 0.5915157794952393, "learning_rate": 1.3847849731649066e-05, "loss": 0.04694291576743126, "step": 4124 }, { "epoch": 0.557672646156706, "grad_norm": 0.2952704131603241, "learning_rate": 1.3841025643563418e-05, "loss": 0.04867354780435562, "step": 4125 }, { "epoch": 0.5578078395254713, "grad_norm": 0.3743056058883667, "learning_rate": 1.3834201796782413e-05, "loss": 0.04590238630771637, "step": 4126 }, { "epoch": 0.5579430328942365, "grad_norm": 0.2780226171016693, "learning_rate": 1.3827378192726808e-05, "loss": 0.04500555992126465, "step": 4127 }, { "epoch": 0.5580782262630019, "grad_norm": 0.626263439655304, "learning_rate": 1.3820554832817324e-05, "loss": 0.064442940056324, "step": 4128 }, { "epoch": 0.5582134196317671, "grad_norm": 0.7143221497535706, "learning_rate": 1.3813731718474606e-05, "loss": 0.05304088816046715, "step": 4129 }, { "epoch": 0.5583486130005323, "grad_norm": 0.44622063636779785, "learning_rate": 1.380690885111927e-05, "loss": 0.03524869680404663, "step": 4130 }, { "epoch": 0.5584838063692976, "grad_norm": 0.9775217771530151, "learning_rate": 1.3800086232171877e-05, "loss": 0.06411316990852356, "step": 4131 }, { "epoch": 0.5586189997380628, "grad_norm": 0.3309904634952545, "learning_rate": 1.3793263863052926e-05, "loss": 0.04349002242088318, "step": 4132 }, { "epoch": 0.5587541931068281, "grad_norm": 0.9526672959327698, "learning_rate": 1.3786441745182881e-05, "loss": 0.060160089284181595, "step": 4133 }, { "epoch": 0.5588893864755934, "grad_norm": 0.3098883628845215, "learning_rate": 1.3779619879982127e-05, "loss": 0.03340590000152588, "step": 4134 }, { "epoch": 0.5590245798443586, "grad_norm": 0.34839949011802673, "learning_rate": 1.3772798268871025e-05, "loss": 0.057577282190322876, "step": 4135 }, { "epoch": 0.5591597732131239, "grad_norm": 0.5307559967041016, "learning_rate": 1.376597691326987e-05, "loss": 0.04966269060969353, "step": 4136 }, { "epoch": 0.5592949665818892, "grad_norm": 0.6379033923149109, "learning_rate": 1.3759155814598898e-05, "loss": 0.06234271824359894, "step": 4137 }, { "epoch": 0.5594301599506544, "grad_norm": 0.7114551067352295, "learning_rate": 1.3752334974278308e-05, "loss": 0.05135638266801834, "step": 4138 }, { "epoch": 0.5595653533194197, "grad_norm": 0.7957108020782471, "learning_rate": 1.3745514393728225e-05, "loss": 0.050775811076164246, "step": 4139 }, { "epoch": 0.5597005466881849, "grad_norm": 0.6730691194534302, "learning_rate": 1.3738694074368735e-05, "loss": 0.05369863659143448, "step": 4140 }, { "epoch": 0.5598357400569502, "grad_norm": 0.772809624671936, "learning_rate": 1.3731874017619868e-05, "loss": 0.06065630167722702, "step": 4141 }, { "epoch": 0.5599709334257155, "grad_norm": 0.5261940360069275, "learning_rate": 1.3725054224901597e-05, "loss": 0.055207014083862305, "step": 4142 }, { "epoch": 0.5601061267944807, "grad_norm": 0.7934703826904297, "learning_rate": 1.3718234697633826e-05, "loss": 0.04808718338608742, "step": 4143 }, { "epoch": 0.560241320163246, "grad_norm": 0.7563589811325073, "learning_rate": 1.3711415437236427e-05, "loss": 0.059947334229946136, "step": 4144 }, { "epoch": 0.5603765135320112, "grad_norm": 0.7628953456878662, "learning_rate": 1.3704596445129207e-05, "loss": 0.057378850877285004, "step": 4145 }, { "epoch": 0.5605117069007766, "grad_norm": 0.3508414924144745, "learning_rate": 1.369777772273192e-05, "loss": 0.03536544740200043, "step": 4146 }, { "epoch": 0.5606469002695418, "grad_norm": 0.35734426975250244, "learning_rate": 1.369095927146425e-05, "loss": 0.0586828775703907, "step": 4147 }, { "epoch": 0.560782093638307, "grad_norm": 0.4771082103252411, "learning_rate": 1.3684141092745846e-05, "loss": 0.05103515088558197, "step": 4148 }, { "epoch": 0.5609172870070723, "grad_norm": 0.5046601295471191, "learning_rate": 1.3677323187996276e-05, "loss": 0.05139821767807007, "step": 4149 }, { "epoch": 0.5610524803758375, "grad_norm": 0.7221264243125916, "learning_rate": 1.3670505558635074e-05, "loss": 0.04857698827981949, "step": 4150 }, { "epoch": 0.5611876737446029, "grad_norm": 1.0628623962402344, "learning_rate": 1.366368820608171e-05, "loss": 0.06827078759670258, "step": 4151 }, { "epoch": 0.5613228671133681, "grad_norm": 0.27733466029167175, "learning_rate": 1.365687113175558e-05, "loss": 0.04822686314582825, "step": 4152 }, { "epoch": 0.5614580604821333, "grad_norm": 0.5919431447982788, "learning_rate": 1.3650054337076049e-05, "loss": 0.05639851093292236, "step": 4153 }, { "epoch": 0.5615932538508986, "grad_norm": 0.9658674001693726, "learning_rate": 1.3643237823462398e-05, "loss": 0.042504988610744476, "step": 4154 }, { "epoch": 0.5617284472196639, "grad_norm": 1.4385346174240112, "learning_rate": 1.363642159233387e-05, "loss": 0.06431318074464798, "step": 4155 }, { "epoch": 0.5618636405884292, "grad_norm": 0.37629932165145874, "learning_rate": 1.3629605645109642e-05, "loss": 0.042373064905405045, "step": 4156 }, { "epoch": 0.5619988339571944, "grad_norm": 0.8330971002578735, "learning_rate": 1.362278998320882e-05, "loss": 0.053013384342193604, "step": 4157 }, { "epoch": 0.5621340273259596, "grad_norm": 0.5067549347877502, "learning_rate": 1.3615974608050472e-05, "loss": 0.049661993980407715, "step": 4158 }, { "epoch": 0.562269220694725, "grad_norm": 1.273038387298584, "learning_rate": 1.3609159521053588e-05, "loss": 0.06212637573480606, "step": 4159 }, { "epoch": 0.5624044140634902, "grad_norm": 0.7922593355178833, "learning_rate": 1.3602344723637107e-05, "loss": 0.06535595655441284, "step": 4160 }, { "epoch": 0.5625396074322554, "grad_norm": 0.3038403391838074, "learning_rate": 1.3595530217219916e-05, "loss": 0.045795708894729614, "step": 4161 }, { "epoch": 0.5626748008010207, "grad_norm": 0.6399083137512207, "learning_rate": 1.3588716003220815e-05, "loss": 0.07586899399757385, "step": 4162 }, { "epoch": 0.5628099941697859, "grad_norm": 0.4581215977668762, "learning_rate": 1.3581902083058574e-05, "loss": 0.05741545557975769, "step": 4163 }, { "epoch": 0.5629451875385513, "grad_norm": 0.9531058669090271, "learning_rate": 1.3575088458151877e-05, "loss": 0.06747080385684967, "step": 4164 }, { "epoch": 0.5630803809073165, "grad_norm": 0.30632415413856506, "learning_rate": 1.3568275129919367e-05, "loss": 0.048171043395996094, "step": 4165 }, { "epoch": 0.5632155742760817, "grad_norm": 0.6023296117782593, "learning_rate": 1.3561462099779604e-05, "loss": 0.04519319534301758, "step": 4166 }, { "epoch": 0.563350767644847, "grad_norm": 0.4950801730155945, "learning_rate": 1.3554649369151104e-05, "loss": 0.05491878092288971, "step": 4167 }, { "epoch": 0.5634859610136123, "grad_norm": 0.6327624320983887, "learning_rate": 1.3547836939452315e-05, "loss": 0.04467090964317322, "step": 4168 }, { "epoch": 0.5636211543823776, "grad_norm": 0.959592342376709, "learning_rate": 1.3541024812101615e-05, "loss": 0.04802456498146057, "step": 4169 }, { "epoch": 0.5637563477511428, "grad_norm": 0.4934079349040985, "learning_rate": 1.3534212988517339e-05, "loss": 0.04421760141849518, "step": 4170 }, { "epoch": 0.563891541119908, "grad_norm": 1.5135420560836792, "learning_rate": 1.3527401470117726e-05, "loss": 0.06407340615987778, "step": 4171 }, { "epoch": 0.5640267344886734, "grad_norm": 0.2907327711582184, "learning_rate": 1.3520590258320981e-05, "loss": 0.04562245309352875, "step": 4172 }, { "epoch": 0.5641619278574386, "grad_norm": 1.0602513551712036, "learning_rate": 1.3513779354545235e-05, "loss": 0.06544183194637299, "step": 4173 }, { "epoch": 0.5642971212262039, "grad_norm": 0.6522163152694702, "learning_rate": 1.3506968760208557e-05, "loss": 0.04684177786111832, "step": 4174 }, { "epoch": 0.5644323145949691, "grad_norm": 0.6463688611984253, "learning_rate": 1.3500158476728938e-05, "loss": 0.07147551327943802, "step": 4175 }, { "epoch": 0.5645675079637343, "grad_norm": 0.585493803024292, "learning_rate": 1.3493348505524325e-05, "loss": 0.05106959864497185, "step": 4176 }, { "epoch": 0.5647027013324997, "grad_norm": 0.34694379568099976, "learning_rate": 1.3486538848012586e-05, "loss": 0.051603272557258606, "step": 4177 }, { "epoch": 0.5648378947012649, "grad_norm": 0.47639647126197815, "learning_rate": 1.3479729505611532e-05, "loss": 0.046169739216566086, "step": 4178 }, { "epoch": 0.5649730880700302, "grad_norm": 1.6975427865982056, "learning_rate": 1.3472920479738906e-05, "loss": 0.05696733295917511, "step": 4179 }, { "epoch": 0.5651082814387954, "grad_norm": 0.2580471932888031, "learning_rate": 1.346611177181237e-05, "loss": 0.050407782196998596, "step": 4180 }, { "epoch": 0.5652434748075607, "grad_norm": 0.3292336165904999, "learning_rate": 1.3459303383249547e-05, "loss": 0.053844138979911804, "step": 4181 }, { "epoch": 0.565378668176326, "grad_norm": 0.8269696831703186, "learning_rate": 1.3452495315467975e-05, "loss": 0.0390135832130909, "step": 4182 }, { "epoch": 0.5655138615450912, "grad_norm": 0.5612631440162659, "learning_rate": 1.3445687569885132e-05, "loss": 0.054399967193603516, "step": 4183 }, { "epoch": 0.5656490549138565, "grad_norm": 0.4437982738018036, "learning_rate": 1.3438880147918429e-05, "loss": 0.04522149637341499, "step": 4184 }, { "epoch": 0.5657842482826217, "grad_norm": 0.661548376083374, "learning_rate": 1.3432073050985201e-05, "loss": 0.05173173546791077, "step": 4185 }, { "epoch": 0.565919441651387, "grad_norm": 1.1240564584732056, "learning_rate": 1.3425266280502721e-05, "loss": 0.06077219173312187, "step": 4186 }, { "epoch": 0.5660546350201523, "grad_norm": 1.235144853591919, "learning_rate": 1.3418459837888202e-05, "loss": 0.06470440328121185, "step": 4187 }, { "epoch": 0.5661898283889175, "grad_norm": 0.2538681626319885, "learning_rate": 1.3411653724558784e-05, "loss": 0.05350903421640396, "step": 4188 }, { "epoch": 0.5663250217576827, "grad_norm": 0.5879151225090027, "learning_rate": 1.3404847941931523e-05, "loss": 0.0470278300344944, "step": 4189 }, { "epoch": 0.5664602151264481, "grad_norm": 1.0408427715301514, "learning_rate": 1.339804249142343e-05, "loss": 0.0490865521132946, "step": 4190 }, { "epoch": 0.5665954084952133, "grad_norm": 0.8385155200958252, "learning_rate": 1.3391237374451429e-05, "loss": 0.045292384922504425, "step": 4191 }, { "epoch": 0.5667306018639786, "grad_norm": 0.19643595814704895, "learning_rate": 1.3384432592432388e-05, "loss": 0.03735601156949997, "step": 4192 }, { "epoch": 0.5668657952327438, "grad_norm": 0.4666518270969391, "learning_rate": 1.3377628146783102e-05, "loss": 0.054945189505815506, "step": 4193 }, { "epoch": 0.567000988601509, "grad_norm": 0.9669063091278076, "learning_rate": 1.3370824038920281e-05, "loss": 0.06678605824708939, "step": 4194 }, { "epoch": 0.5671361819702744, "grad_norm": 0.9080616235733032, "learning_rate": 1.3364020270260586e-05, "loss": 0.06193765252828598, "step": 4195 }, { "epoch": 0.5672713753390396, "grad_norm": 0.6309118270874023, "learning_rate": 1.335721684222059e-05, "loss": 0.04887032508850098, "step": 4196 }, { "epoch": 0.5674065687078049, "grad_norm": 0.5722773671150208, "learning_rate": 1.3350413756216816e-05, "loss": 0.043982818722724915, "step": 4197 }, { "epoch": 0.5675417620765701, "grad_norm": 1.1884140968322754, "learning_rate": 1.334361101366569e-05, "loss": 0.06711280345916748, "step": 4198 }, { "epoch": 0.5676769554453354, "grad_norm": 0.22038215398788452, "learning_rate": 1.3336808615983582e-05, "loss": 0.03942631930112839, "step": 4199 }, { "epoch": 0.5678121488141007, "grad_norm": 1.034661054611206, "learning_rate": 1.3330006564586791e-05, "loss": 0.058673225343227386, "step": 4200 }, { "epoch": 0.5679473421828659, "grad_norm": 0.8482556939125061, "learning_rate": 1.3323204860891539e-05, "loss": 0.04450428485870361, "step": 4201 }, { "epoch": 0.5680825355516312, "grad_norm": 0.31858378648757935, "learning_rate": 1.3316403506313981e-05, "loss": 0.04843887314200401, "step": 4202 }, { "epoch": 0.5682177289203965, "grad_norm": 1.3350555896759033, "learning_rate": 1.3309602502270184e-05, "loss": 0.07182306051254272, "step": 4203 }, { "epoch": 0.5683529222891617, "grad_norm": 0.6230509281158447, "learning_rate": 1.3302801850176161e-05, "loss": 0.060498617589473724, "step": 4204 }, { "epoch": 0.568488115657927, "grad_norm": 0.33955833315849304, "learning_rate": 1.3296001551447848e-05, "loss": 0.060212284326553345, "step": 4205 }, { "epoch": 0.5686233090266922, "grad_norm": 0.33457639813423157, "learning_rate": 1.32892016075011e-05, "loss": 0.05924059823155403, "step": 4206 }, { "epoch": 0.5687585023954576, "grad_norm": 0.43424177169799805, "learning_rate": 1.3282402019751694e-05, "loss": 0.05278228223323822, "step": 4207 }, { "epoch": 0.5688936957642228, "grad_norm": 0.4012652039527893, "learning_rate": 1.327560278961535e-05, "loss": 0.05098462477326393, "step": 4208 }, { "epoch": 0.569028889132988, "grad_norm": 0.365847647190094, "learning_rate": 1.3268803918507699e-05, "loss": 0.06091932952404022, "step": 4209 }, { "epoch": 0.5691640825017533, "grad_norm": 1.8767354488372803, "learning_rate": 1.3262005407844306e-05, "loss": 0.06962120532989502, "step": 4210 }, { "epoch": 0.5692992758705185, "grad_norm": 1.3309123516082764, "learning_rate": 1.325520725904066e-05, "loss": 0.05918598920106888, "step": 4211 }, { "epoch": 0.5694344692392838, "grad_norm": 1.093928337097168, "learning_rate": 1.3248409473512158e-05, "loss": 0.043022722005844116, "step": 4212 }, { "epoch": 0.5695696626080491, "grad_norm": 0.6291282773017883, "learning_rate": 1.3241612052674146e-05, "loss": 0.04735366255044937, "step": 4213 }, { "epoch": 0.5697048559768143, "grad_norm": 0.28831589221954346, "learning_rate": 1.3234814997941883e-05, "loss": 0.04577290266752243, "step": 4214 }, { "epoch": 0.5698400493455796, "grad_norm": 0.2873865067958832, "learning_rate": 1.322801831073055e-05, "loss": 0.05296456813812256, "step": 4215 }, { "epoch": 0.5699752427143449, "grad_norm": 0.3886588215827942, "learning_rate": 1.322122199245526e-05, "loss": 0.05226030945777893, "step": 4216 }, { "epoch": 0.5701104360831101, "grad_norm": 0.3163350522518158, "learning_rate": 1.321442604453103e-05, "loss": 0.04614682495594025, "step": 4217 }, { "epoch": 0.5702456294518754, "grad_norm": 0.6476740837097168, "learning_rate": 1.320763046837282e-05, "loss": 0.0599798858165741, "step": 4218 }, { "epoch": 0.5703808228206406, "grad_norm": 0.6392112374305725, "learning_rate": 1.3200835265395504e-05, "loss": 0.04629442095756531, "step": 4219 }, { "epoch": 0.570516016189406, "grad_norm": 0.8076706528663635, "learning_rate": 1.3194040437013885e-05, "loss": 0.036763809621334076, "step": 4220 }, { "epoch": 0.5706512095581712, "grad_norm": 0.6790736317634583, "learning_rate": 1.3187245984642673e-05, "loss": 0.0466952919960022, "step": 4221 }, { "epoch": 0.5707864029269364, "grad_norm": 0.4466095566749573, "learning_rate": 1.3180451909696517e-05, "loss": 0.0447138249874115, "step": 4222 }, { "epoch": 0.5709215962957017, "grad_norm": 0.4621639847755432, "learning_rate": 1.3173658213589972e-05, "loss": 0.04115643352270126, "step": 4223 }, { "epoch": 0.5710567896644669, "grad_norm": 0.8061466217041016, "learning_rate": 1.3166864897737526e-05, "loss": 0.04671631008386612, "step": 4224 }, { "epoch": 0.5711919830332323, "grad_norm": 0.28725436329841614, "learning_rate": 1.3160071963553593e-05, "loss": 0.043019846081733704, "step": 4225 }, { "epoch": 0.5713271764019975, "grad_norm": 0.4862815737724304, "learning_rate": 1.315327941245248e-05, "loss": 0.0696071982383728, "step": 4226 }, { "epoch": 0.5714623697707627, "grad_norm": 0.5043781995773315, "learning_rate": 1.3146487245848445e-05, "loss": 0.07956242561340332, "step": 4227 }, { "epoch": 0.571597563139528, "grad_norm": 0.4982438087463379, "learning_rate": 1.3139695465155645e-05, "loss": 0.05159628391265869, "step": 4228 }, { "epoch": 0.5717327565082932, "grad_norm": 0.36959385871887207, "learning_rate": 1.3132904071788177e-05, "loss": 0.052197109907865524, "step": 4229 }, { "epoch": 0.5718679498770586, "grad_norm": 0.7914020419120789, "learning_rate": 1.3126113067160031e-05, "loss": 0.052085041999816895, "step": 4230 }, { "epoch": 0.5720031432458238, "grad_norm": 1.8427563905715942, "learning_rate": 1.3119322452685139e-05, "loss": 0.06533059477806091, "step": 4231 }, { "epoch": 0.572138336614589, "grad_norm": 0.3809410631656647, "learning_rate": 1.3112532229777344e-05, "loss": 0.04417727142572403, "step": 4232 }, { "epoch": 0.5722735299833543, "grad_norm": 0.38008368015289307, "learning_rate": 1.3105742399850399e-05, "loss": 0.03816206008195877, "step": 4233 }, { "epoch": 0.5724087233521196, "grad_norm": 0.3189476728439331, "learning_rate": 1.3098952964317996e-05, "loss": 0.06020701676607132, "step": 4234 }, { "epoch": 0.5725439167208849, "grad_norm": 0.47066813707351685, "learning_rate": 1.3092163924593717e-05, "loss": 0.058689720928668976, "step": 4235 }, { "epoch": 0.5726791100896501, "grad_norm": 0.6496806144714355, "learning_rate": 1.308537528209108e-05, "loss": 0.05438286066055298, "step": 4236 }, { "epoch": 0.5728143034584153, "grad_norm": 0.8448660373687744, "learning_rate": 1.3078587038223525e-05, "loss": 0.06135724484920502, "step": 4237 }, { "epoch": 0.5729494968271807, "grad_norm": 0.312496542930603, "learning_rate": 1.3071799194404392e-05, "loss": 0.039947301149368286, "step": 4238 }, { "epoch": 0.5730846901959459, "grad_norm": 0.8599078059196472, "learning_rate": 1.3065011752046955e-05, "loss": 0.0575120747089386, "step": 4239 }, { "epoch": 0.5732198835647111, "grad_norm": 0.3608524203300476, "learning_rate": 1.3058224712564382e-05, "loss": 0.05470991134643555, "step": 4240 }, { "epoch": 0.5733550769334764, "grad_norm": 0.3681836426258087, "learning_rate": 1.305143807736978e-05, "loss": 0.060732871294021606, "step": 4241 }, { "epoch": 0.5734902703022416, "grad_norm": 0.9190942645072937, "learning_rate": 1.3044651847876163e-05, "loss": 0.07144888490438461, "step": 4242 }, { "epoch": 0.573625463671007, "grad_norm": 0.9063126444816589, "learning_rate": 1.3037866025496466e-05, "loss": 0.05000302195549011, "step": 4243 }, { "epoch": 0.5737606570397722, "grad_norm": 0.7755569219589233, "learning_rate": 1.3031080611643514e-05, "loss": 0.05154004693031311, "step": 4244 }, { "epoch": 0.5738958504085374, "grad_norm": 0.7370505332946777, "learning_rate": 1.3024295607730083e-05, "loss": 0.0579124391078949, "step": 4245 }, { "epoch": 0.5740310437773027, "grad_norm": 0.7617559432983398, "learning_rate": 1.301751101516884e-05, "loss": 0.06045614555478096, "step": 4246 }, { "epoch": 0.574166237146068, "grad_norm": 0.9118973612785339, "learning_rate": 1.3010726835372377e-05, "loss": 0.05753954499959946, "step": 4247 }, { "epoch": 0.5743014305148333, "grad_norm": 0.6673192381858826, "learning_rate": 1.30039430697532e-05, "loss": 0.060206253081560135, "step": 4248 }, { "epoch": 0.5744366238835985, "grad_norm": 0.9776385426521301, "learning_rate": 1.2997159719723713e-05, "loss": 0.04279157519340515, "step": 4249 }, { "epoch": 0.5745718172523637, "grad_norm": 0.44115784764289856, "learning_rate": 1.2990376786696254e-05, "loss": 0.06441664695739746, "step": 4250 }, { "epoch": 0.574707010621129, "grad_norm": 0.43690550327301025, "learning_rate": 1.2983594272083063e-05, "loss": 0.04498184099793434, "step": 4251 }, { "epoch": 0.5748422039898943, "grad_norm": 0.834084689617157, "learning_rate": 1.2976812177296307e-05, "loss": 0.03776583820581436, "step": 4252 }, { "epoch": 0.5749773973586596, "grad_norm": 0.5029201507568359, "learning_rate": 1.2970030503748039e-05, "loss": 0.057890862226486206, "step": 4253 }, { "epoch": 0.5751125907274248, "grad_norm": 1.0927622318267822, "learning_rate": 1.2963249252850242e-05, "loss": 0.05455537885427475, "step": 4254 }, { "epoch": 0.57524778409619, "grad_norm": 0.5903801918029785, "learning_rate": 1.295646842601481e-05, "loss": 0.047474633902311325, "step": 4255 }, { "epoch": 0.5753829774649554, "grad_norm": 0.2989392876625061, "learning_rate": 1.294968802465355e-05, "loss": 0.056687116622924805, "step": 4256 }, { "epoch": 0.5755181708337206, "grad_norm": 0.5718480944633484, "learning_rate": 1.2942908050178187e-05, "loss": 0.05198892578482628, "step": 4257 }, { "epoch": 0.5756533642024859, "grad_norm": 0.2312406599521637, "learning_rate": 1.293612850400033e-05, "loss": 0.03864598274230957, "step": 4258 }, { "epoch": 0.5757885575712511, "grad_norm": 0.34641847014427185, "learning_rate": 1.2929349387531525e-05, "loss": 0.04378978908061981, "step": 4259 }, { "epoch": 0.5759237509400164, "grad_norm": 0.42612916231155396, "learning_rate": 1.2922570702183217e-05, "loss": 0.0561370924115181, "step": 4260 }, { "epoch": 0.5760589443087817, "grad_norm": 0.23974616825580597, "learning_rate": 1.2915792449366768e-05, "loss": 0.044177234172821045, "step": 4261 }, { "epoch": 0.5761941376775469, "grad_norm": 0.6883369088172913, "learning_rate": 1.2909014630493451e-05, "loss": 0.06474051624536514, "step": 4262 }, { "epoch": 0.5763293310463122, "grad_norm": 0.3299371302127838, "learning_rate": 1.2902237246974432e-05, "loss": 0.05071800947189331, "step": 4263 }, { "epoch": 0.5764645244150775, "grad_norm": 0.7747350931167603, "learning_rate": 1.289546030022081e-05, "loss": 0.06015794724225998, "step": 4264 }, { "epoch": 0.5765997177838427, "grad_norm": 0.4959127604961395, "learning_rate": 1.2888683791643572e-05, "loss": 0.05921699106693268, "step": 4265 }, { "epoch": 0.576734911152608, "grad_norm": 0.8913729786872864, "learning_rate": 1.2881907722653633e-05, "loss": 0.04792968928813934, "step": 4266 }, { "epoch": 0.5768701045213732, "grad_norm": 0.25339075922966003, "learning_rate": 1.2875132094661796e-05, "loss": 0.03906603902578354, "step": 4267 }, { "epoch": 0.5770052978901384, "grad_norm": 0.5254693031311035, "learning_rate": 1.2868356909078787e-05, "loss": 0.0628429651260376, "step": 4268 }, { "epoch": 0.5771404912589038, "grad_norm": 0.46943530440330505, "learning_rate": 1.286158216731524e-05, "loss": 0.07296016812324524, "step": 4269 }, { "epoch": 0.577275684627669, "grad_norm": 0.4199482798576355, "learning_rate": 1.2854807870781686e-05, "loss": 0.05589793622493744, "step": 4270 }, { "epoch": 0.5774108779964343, "grad_norm": 0.41195833683013916, "learning_rate": 1.284803402088858e-05, "loss": 0.054120901972055435, "step": 4271 }, { "epoch": 0.5775460713651995, "grad_norm": 0.8266782760620117, "learning_rate": 1.284126061904626e-05, "loss": 0.06698405742645264, "step": 4272 }, { "epoch": 0.5776812647339647, "grad_norm": 1.3787145614624023, "learning_rate": 1.283448766666499e-05, "loss": 0.07092546671628952, "step": 4273 }, { "epoch": 0.5778164581027301, "grad_norm": 0.4263189733028412, "learning_rate": 1.282771516515494e-05, "loss": 0.06073032319545746, "step": 4274 }, { "epoch": 0.5779516514714953, "grad_norm": 0.6744718551635742, "learning_rate": 1.282094311592618e-05, "loss": 0.07127252221107483, "step": 4275 }, { "epoch": 0.5780868448402606, "grad_norm": 1.0482310056686401, "learning_rate": 1.2814171520388676e-05, "loss": 0.059532083570957184, "step": 4276 }, { "epoch": 0.5782220382090258, "grad_norm": 0.3588486313819885, "learning_rate": 1.2807400379952318e-05, "loss": 0.04554516077041626, "step": 4277 }, { "epoch": 0.5783572315777911, "grad_norm": 0.25432875752449036, "learning_rate": 1.2800629696026895e-05, "loss": 0.041674643754959106, "step": 4278 }, { "epoch": 0.5784924249465564, "grad_norm": 0.5969200730323792, "learning_rate": 1.2793859470022098e-05, "loss": 0.05866185203194618, "step": 4279 }, { "epoch": 0.5786276183153216, "grad_norm": 0.9372228384017944, "learning_rate": 1.278708970334753e-05, "loss": 0.057602398097515106, "step": 4280 }, { "epoch": 0.5787628116840869, "grad_norm": 0.4330744445323944, "learning_rate": 1.2780320397412678e-05, "loss": 0.05368056893348694, "step": 4281 }, { "epoch": 0.5788980050528522, "grad_norm": 0.3051561415195465, "learning_rate": 1.2773551553626957e-05, "loss": 0.05303791165351868, "step": 4282 }, { "epoch": 0.5790331984216174, "grad_norm": 0.46041205525398254, "learning_rate": 1.2766783173399675e-05, "loss": 0.05813620612025261, "step": 4283 }, { "epoch": 0.5791683917903827, "grad_norm": 0.7252134680747986, "learning_rate": 1.276001525814005e-05, "loss": 0.04161641001701355, "step": 4284 }, { "epoch": 0.5793035851591479, "grad_norm": 0.5711507797241211, "learning_rate": 1.2753247809257192e-05, "loss": 0.05217631161212921, "step": 4285 }, { "epoch": 0.5794387785279133, "grad_norm": 0.3248074948787689, "learning_rate": 1.2746480828160119e-05, "loss": 0.05739147961139679, "step": 4286 }, { "epoch": 0.5795739718966785, "grad_norm": 0.20690594613552094, "learning_rate": 1.2739714316257753e-05, "loss": 0.04190623760223389, "step": 4287 }, { "epoch": 0.5797091652654437, "grad_norm": 0.7761908769607544, "learning_rate": 1.273294827495892e-05, "loss": 0.07080733776092529, "step": 4288 }, { "epoch": 0.579844358634209, "grad_norm": 0.23968324065208435, "learning_rate": 1.2726182705672352e-05, "loss": 0.03853154182434082, "step": 4289 }, { "epoch": 0.5799795520029742, "grad_norm": 0.25534650683403015, "learning_rate": 1.271941760980667e-05, "loss": 0.041600268334150314, "step": 4290 }, { "epoch": 0.5801147453717396, "grad_norm": 0.8996881246566772, "learning_rate": 1.2712652988770396e-05, "loss": 0.04725450277328491, "step": 4291 }, { "epoch": 0.5802499387405048, "grad_norm": 0.3842504620552063, "learning_rate": 1.2705888843971967e-05, "loss": 0.04753348231315613, "step": 4292 }, { "epoch": 0.58038513210927, "grad_norm": 0.4953189194202423, "learning_rate": 1.2699125176819717e-05, "loss": 0.05507143586874008, "step": 4293 }, { "epoch": 0.5805203254780353, "grad_norm": 0.7106717824935913, "learning_rate": 1.269236198872188e-05, "loss": 0.04401867091655731, "step": 4294 }, { "epoch": 0.5806555188468006, "grad_norm": 1.290099024772644, "learning_rate": 1.2685599281086577e-05, "loss": 0.06402313709259033, "step": 4295 }, { "epoch": 0.5807907122155658, "grad_norm": 0.4258081018924713, "learning_rate": 1.2678837055321849e-05, "loss": 0.059646766632795334, "step": 4296 }, { "epoch": 0.5809259055843311, "grad_norm": 0.3928106129169464, "learning_rate": 1.267207531283562e-05, "loss": 0.05644255876541138, "step": 4297 }, { "epoch": 0.5810610989530963, "grad_norm": 0.9362512826919556, "learning_rate": 1.266531405503573e-05, "loss": 0.06461828947067261, "step": 4298 }, { "epoch": 0.5811962923218617, "grad_norm": 0.5836531519889832, "learning_rate": 1.26585532833299e-05, "loss": 0.051962971687316895, "step": 4299 }, { "epoch": 0.5813314856906269, "grad_norm": 0.3997187316417694, "learning_rate": 1.2651792999125763e-05, "loss": 0.05108805000782013, "step": 4300 }, { "epoch": 0.5814666790593921, "grad_norm": 1.0038704872131348, "learning_rate": 1.2645033203830846e-05, "loss": 0.04372485354542732, "step": 4301 }, { "epoch": 0.5816018724281574, "grad_norm": 0.6462306976318359, "learning_rate": 1.2638273898852573e-05, "loss": 0.07015995681285858, "step": 4302 }, { "epoch": 0.5817370657969226, "grad_norm": 0.4095308184623718, "learning_rate": 1.2631515085598275e-05, "loss": 0.05098499357700348, "step": 4303 }, { "epoch": 0.581872259165688, "grad_norm": 0.5673921704292297, "learning_rate": 1.262475676547516e-05, "loss": 0.06684210151433945, "step": 4304 }, { "epoch": 0.5820074525344532, "grad_norm": 0.8474652767181396, "learning_rate": 1.2617998939890352e-05, "loss": 0.06170887127518654, "step": 4305 }, { "epoch": 0.5821426459032184, "grad_norm": 0.4729473888874054, "learning_rate": 1.261124161025087e-05, "loss": 0.04176437854766846, "step": 4306 }, { "epoch": 0.5822778392719837, "grad_norm": 0.36480385065078735, "learning_rate": 1.260448477796362e-05, "loss": 0.05516377091407776, "step": 4307 }, { "epoch": 0.582413032640749, "grad_norm": 0.3644324839115143, "learning_rate": 1.259772844443542e-05, "loss": 0.06253552436828613, "step": 4308 }, { "epoch": 0.5825482260095143, "grad_norm": 0.7161415815353394, "learning_rate": 1.2590972611072964e-05, "loss": 0.060322076082229614, "step": 4309 }, { "epoch": 0.5826834193782795, "grad_norm": 0.5066630840301514, "learning_rate": 1.2584217279282855e-05, "loss": 0.0561978816986084, "step": 4310 }, { "epoch": 0.5828186127470447, "grad_norm": 0.6844581961631775, "learning_rate": 1.2577462450471593e-05, "loss": 0.05494430661201477, "step": 4311 }, { "epoch": 0.58295380611581, "grad_norm": 1.0546153783798218, "learning_rate": 1.2570708126045574e-05, "loss": 0.052705734968185425, "step": 4312 }, { "epoch": 0.5830889994845753, "grad_norm": 0.45255061984062195, "learning_rate": 1.256395430741107e-05, "loss": 0.06387509405612946, "step": 4313 }, { "epoch": 0.5832241928533406, "grad_norm": 0.6577525734901428, "learning_rate": 1.2557200995974268e-05, "loss": 0.038536787033081055, "step": 4314 }, { "epoch": 0.5833593862221058, "grad_norm": 0.42378073930740356, "learning_rate": 1.2550448193141248e-05, "loss": 0.05845659226179123, "step": 4315 }, { "epoch": 0.583494579590871, "grad_norm": 0.9057872295379639, "learning_rate": 1.2543695900317977e-05, "loss": 0.06074342131614685, "step": 4316 }, { "epoch": 0.5836297729596364, "grad_norm": 1.0088768005371094, "learning_rate": 1.2536944118910323e-05, "loss": 0.04993507266044617, "step": 4317 }, { "epoch": 0.5837649663284016, "grad_norm": 1.4090949296951294, "learning_rate": 1.2530192850324032e-05, "loss": 0.0724797248840332, "step": 4318 }, { "epoch": 0.5839001596971669, "grad_norm": 0.49211621284484863, "learning_rate": 1.252344209596476e-05, "loss": 0.055135756731033325, "step": 4319 }, { "epoch": 0.5840353530659321, "grad_norm": 1.1818344593048096, "learning_rate": 1.251669185723805e-05, "loss": 0.08254462480545044, "step": 4320 }, { "epoch": 0.5841705464346973, "grad_norm": 0.421589732170105, "learning_rate": 1.2509942135549344e-05, "loss": 0.06350669264793396, "step": 4321 }, { "epoch": 0.5843057398034627, "grad_norm": 0.26521849632263184, "learning_rate": 1.250319293230396e-05, "loss": 0.04598189890384674, "step": 4322 }, { "epoch": 0.5844409331722279, "grad_norm": 0.8640342354774475, "learning_rate": 1.2496444248907121e-05, "loss": 0.06404251605272293, "step": 4323 }, { "epoch": 0.5845761265409931, "grad_norm": 0.8883600234985352, "learning_rate": 1.2489696086763939e-05, "loss": 0.06144168600440025, "step": 4324 }, { "epoch": 0.5847113199097584, "grad_norm": 0.33867132663726807, "learning_rate": 1.2482948447279417e-05, "loss": 0.041415244340896606, "step": 4325 }, { "epoch": 0.5848465132785237, "grad_norm": 0.3557130992412567, "learning_rate": 1.2476201331858458e-05, "loss": 0.054164670407772064, "step": 4326 }, { "epoch": 0.584981706647289, "grad_norm": 0.28703773021698, "learning_rate": 1.2469454741905839e-05, "loss": 0.04187518358230591, "step": 4327 }, { "epoch": 0.5851169000160542, "grad_norm": 0.45612937211990356, "learning_rate": 1.2462708678826233e-05, "loss": 0.045684874057769775, "step": 4328 }, { "epoch": 0.5852520933848194, "grad_norm": 0.5603744983673096, "learning_rate": 1.245596314402421e-05, "loss": 0.058832503855228424, "step": 4329 }, { "epoch": 0.5853872867535848, "grad_norm": 0.2811681926250458, "learning_rate": 1.2449218138904225e-05, "loss": 0.03117161989212036, "step": 4330 }, { "epoch": 0.58552248012235, "grad_norm": 0.5589156150817871, "learning_rate": 1.2442473664870636e-05, "loss": 0.07267424464225769, "step": 4331 }, { "epoch": 0.5856576734911153, "grad_norm": 0.5839823484420776, "learning_rate": 1.2435729723327661e-05, "loss": 0.051913969218730927, "step": 4332 }, { "epoch": 0.5857928668598805, "grad_norm": 0.42361190915107727, "learning_rate": 1.2428986315679433e-05, "loss": 0.05698402225971222, "step": 4333 }, { "epoch": 0.5859280602286457, "grad_norm": 0.521894097328186, "learning_rate": 1.2422243443329962e-05, "loss": 0.06634476780891418, "step": 4334 }, { "epoch": 0.5860632535974111, "grad_norm": 0.29309552907943726, "learning_rate": 1.241550110768316e-05, "loss": 0.04298561066389084, "step": 4335 }, { "epoch": 0.5861984469661763, "grad_norm": 0.42206260561943054, "learning_rate": 1.2408759310142803e-05, "loss": 0.0429934561252594, "step": 4336 }, { "epoch": 0.5863336403349416, "grad_norm": 0.341665655374527, "learning_rate": 1.2402018052112576e-05, "loss": 0.05137814208865166, "step": 4337 }, { "epoch": 0.5864688337037068, "grad_norm": 0.3751700818538666, "learning_rate": 1.2395277334996045e-05, "loss": 0.05620073154568672, "step": 4338 }, { "epoch": 0.5866040270724721, "grad_norm": 0.37570586800575256, "learning_rate": 1.2388537160196663e-05, "loss": 0.0541437491774559, "step": 4339 }, { "epoch": 0.5867392204412374, "grad_norm": 0.3547845184803009, "learning_rate": 1.2381797529117776e-05, "loss": 0.04071830213069916, "step": 4340 }, { "epoch": 0.5868744138100026, "grad_norm": 0.30910295248031616, "learning_rate": 1.23750584431626e-05, "loss": 0.05100833252072334, "step": 4341 }, { "epoch": 0.5870096071787679, "grad_norm": 0.4811614155769348, "learning_rate": 1.236831990373425e-05, "loss": 0.0544014535844326, "step": 4342 }, { "epoch": 0.5871448005475332, "grad_norm": 0.26779910922050476, "learning_rate": 1.2361581912235736e-05, "loss": 0.041720613837242126, "step": 4343 }, { "epoch": 0.5872799939162984, "grad_norm": 0.4530389904975891, "learning_rate": 1.235484447006994e-05, "loss": 0.05202691629528999, "step": 4344 }, { "epoch": 0.5874151872850637, "grad_norm": 0.8156394362449646, "learning_rate": 1.2348107578639627e-05, "loss": 0.04596954584121704, "step": 4345 }, { "epoch": 0.5875503806538289, "grad_norm": 0.44907045364379883, "learning_rate": 1.2341371239347454e-05, "loss": 0.05828942358493805, "step": 4346 }, { "epoch": 0.5876855740225941, "grad_norm": 1.5142748355865479, "learning_rate": 1.233463545359597e-05, "loss": 0.06395776569843292, "step": 4347 }, { "epoch": 0.5878207673913595, "grad_norm": 0.5775921940803528, "learning_rate": 1.23279002227876e-05, "loss": 0.04298216104507446, "step": 4348 }, { "epoch": 0.5879559607601247, "grad_norm": 0.6789389252662659, "learning_rate": 1.2321165548324655e-05, "loss": 0.05139341950416565, "step": 4349 }, { "epoch": 0.58809115412889, "grad_norm": 0.4374094009399414, "learning_rate": 1.2314431431609323e-05, "loss": 0.03571010380983353, "step": 4350 }, { "epoch": 0.5882263474976552, "grad_norm": 0.5122615098953247, "learning_rate": 1.2307697874043687e-05, "loss": 0.06129905581474304, "step": 4351 }, { "epoch": 0.5883615408664205, "grad_norm": 0.6931524276733398, "learning_rate": 1.2300964877029712e-05, "loss": 0.07287464290857315, "step": 4352 }, { "epoch": 0.5884967342351858, "grad_norm": 0.5996394157409668, "learning_rate": 1.2294232441969246e-05, "loss": 0.0418534129858017, "step": 4353 }, { "epoch": 0.588631927603951, "grad_norm": 0.6493504643440247, "learning_rate": 1.2287500570264017e-05, "loss": 0.062101706862449646, "step": 4354 }, { "epoch": 0.5887671209727163, "grad_norm": 0.6454962491989136, "learning_rate": 1.2280769263315628e-05, "loss": 0.04887927323579788, "step": 4355 }, { "epoch": 0.5889023143414815, "grad_norm": 0.5784348845481873, "learning_rate": 1.2274038522525577e-05, "loss": 0.05716042220592499, "step": 4356 }, { "epoch": 0.5890375077102468, "grad_norm": 0.5934639573097229, "learning_rate": 1.2267308349295246e-05, "loss": 0.04778507351875305, "step": 4357 }, { "epoch": 0.5891727010790121, "grad_norm": 0.338418573141098, "learning_rate": 1.2260578745025892e-05, "loss": 0.06151972711086273, "step": 4358 }, { "epoch": 0.5893078944477773, "grad_norm": 0.3730992078781128, "learning_rate": 1.225384971111865e-05, "loss": 0.03685858100652695, "step": 4359 }, { "epoch": 0.5894430878165426, "grad_norm": 1.1936970949172974, "learning_rate": 1.224712124897454e-05, "loss": 0.05756092816591263, "step": 4360 }, { "epoch": 0.5895782811853079, "grad_norm": 0.39170098304748535, "learning_rate": 1.2240393359994466e-05, "loss": 0.04370509833097458, "step": 4361 }, { "epoch": 0.5897134745540731, "grad_norm": 0.38018980622291565, "learning_rate": 1.2233666045579209e-05, "loss": 0.0440501868724823, "step": 4362 }, { "epoch": 0.5898486679228384, "grad_norm": 0.31766247749328613, "learning_rate": 1.222693930712944e-05, "loss": 0.03685295581817627, "step": 4363 }, { "epoch": 0.5899838612916036, "grad_norm": 1.0282713174819946, "learning_rate": 1.2220213146045691e-05, "loss": 0.059676796197891235, "step": 4364 }, { "epoch": 0.590119054660369, "grad_norm": 0.8460642695426941, "learning_rate": 1.2213487563728389e-05, "loss": 0.05196624994277954, "step": 4365 }, { "epoch": 0.5902542480291342, "grad_norm": 0.591384768486023, "learning_rate": 1.220676256157783e-05, "loss": 0.04675830900669098, "step": 4366 }, { "epoch": 0.5903894413978994, "grad_norm": 0.6578323841094971, "learning_rate": 1.2200038140994212e-05, "loss": 0.04303811490535736, "step": 4367 }, { "epoch": 0.5905246347666647, "grad_norm": 0.27735573053359985, "learning_rate": 1.2193314303377578e-05, "loss": 0.0528721883893013, "step": 4368 }, { "epoch": 0.5906598281354299, "grad_norm": 0.3221501111984253, "learning_rate": 1.2186591050127874e-05, "loss": 0.05126020312309265, "step": 4369 }, { "epoch": 0.5907950215041953, "grad_norm": 1.4605958461761475, "learning_rate": 1.2179868382644916e-05, "loss": 0.056049223989248276, "step": 4370 }, { "epoch": 0.5909302148729605, "grad_norm": 0.7284490466117859, "learning_rate": 1.2173146302328396e-05, "loss": 0.04205405339598656, "step": 4371 }, { "epoch": 0.5910654082417257, "grad_norm": 0.4030453860759735, "learning_rate": 1.21664248105779e-05, "loss": 0.060487955808639526, "step": 4372 }, { "epoch": 0.591200601610491, "grad_norm": 0.36900702118873596, "learning_rate": 1.2159703908792858e-05, "loss": 0.07081760466098785, "step": 4373 }, { "epoch": 0.5913357949792563, "grad_norm": 0.217972993850708, "learning_rate": 1.2152983598372613e-05, "loss": 0.03986116871237755, "step": 4374 }, { "epoch": 0.5914709883480215, "grad_norm": 0.5936857461929321, "learning_rate": 1.2146263880716366e-05, "loss": 0.06678003072738647, "step": 4375 }, { "epoch": 0.5916061817167868, "grad_norm": 0.3019724190235138, "learning_rate": 1.2139544757223194e-05, "loss": 0.050807081162929535, "step": 4376 }, { "epoch": 0.591741375085552, "grad_norm": 0.3493005335330963, "learning_rate": 1.2132826229292066e-05, "loss": 0.057315923273563385, "step": 4377 }, { "epoch": 0.5918765684543174, "grad_norm": 0.5496053099632263, "learning_rate": 1.2126108298321798e-05, "loss": 0.03934304043650627, "step": 4378 }, { "epoch": 0.5920117618230826, "grad_norm": 1.3614407777786255, "learning_rate": 1.2119390965711107e-05, "loss": 0.07513861358165741, "step": 4379 }, { "epoch": 0.5921469551918478, "grad_norm": 0.347663015127182, "learning_rate": 1.2112674232858582e-05, "loss": 0.048264436423778534, "step": 4380 }, { "epoch": 0.5922821485606131, "grad_norm": 0.30053818225860596, "learning_rate": 1.2105958101162684e-05, "loss": 0.061656564474105835, "step": 4381 }, { "epoch": 0.5924173419293783, "grad_norm": 0.4433741569519043, "learning_rate": 1.2099242572021735e-05, "loss": 0.06932751834392548, "step": 4382 }, { "epoch": 0.5925525352981437, "grad_norm": 0.6770027279853821, "learning_rate": 1.209252764683395e-05, "loss": 0.05713781714439392, "step": 4383 }, { "epoch": 0.5926877286669089, "grad_norm": 1.0216777324676514, "learning_rate": 1.2085813326997414e-05, "loss": 0.06954903155565262, "step": 4384 }, { "epoch": 0.5928229220356741, "grad_norm": 0.31264278292655945, "learning_rate": 1.2079099613910088e-05, "loss": 0.06308547407388687, "step": 4385 }, { "epoch": 0.5929581154044394, "grad_norm": 0.28358975052833557, "learning_rate": 1.20723865089698e-05, "loss": 0.046769797801971436, "step": 4386 }, { "epoch": 0.5930933087732047, "grad_norm": 0.4193544089794159, "learning_rate": 1.2065674013574248e-05, "loss": 0.05737566947937012, "step": 4387 }, { "epoch": 0.59322850214197, "grad_norm": 1.1413214206695557, "learning_rate": 1.2058962129121013e-05, "loss": 0.049747034907341, "step": 4388 }, { "epoch": 0.5933636955107352, "grad_norm": 0.47744250297546387, "learning_rate": 1.2052250857007548e-05, "loss": 0.04318264499306679, "step": 4389 }, { "epoch": 0.5934988888795004, "grad_norm": 1.706655502319336, "learning_rate": 1.2045540198631177e-05, "loss": 0.06314040720462799, "step": 4390 }, { "epoch": 0.5936340822482657, "grad_norm": 0.5557811260223389, "learning_rate": 1.2038830155389091e-05, "loss": 0.05621114373207092, "step": 4391 }, { "epoch": 0.593769275617031, "grad_norm": 0.46371743083000183, "learning_rate": 1.2032120728678354e-05, "loss": 0.05500394105911255, "step": 4392 }, { "epoch": 0.5939044689857963, "grad_norm": 1.143871545791626, "learning_rate": 1.2025411919895907e-05, "loss": 0.05228354036808014, "step": 4393 }, { "epoch": 0.5940396623545615, "grad_norm": 0.31671059131622314, "learning_rate": 1.2018703730438561e-05, "loss": 0.04256322234869003, "step": 4394 }, { "epoch": 0.5941748557233267, "grad_norm": 0.3683294951915741, "learning_rate": 1.2011996161703003e-05, "loss": 0.05954208970069885, "step": 4395 }, { "epoch": 0.5943100490920921, "grad_norm": 0.36772671341896057, "learning_rate": 1.2005289215085775e-05, "loss": 0.039159759879112244, "step": 4396 }, { "epoch": 0.5944452424608573, "grad_norm": 0.6880024075508118, "learning_rate": 1.19985828919833e-05, "loss": 0.05449862405657768, "step": 4397 }, { "epoch": 0.5945804358296226, "grad_norm": 0.4047585725784302, "learning_rate": 1.1991877193791872e-05, "loss": 0.05063389241695404, "step": 4398 }, { "epoch": 0.5947156291983878, "grad_norm": 0.40419384837150574, "learning_rate": 1.1985172121907653e-05, "loss": 0.05270978808403015, "step": 4399 }, { "epoch": 0.594850822567153, "grad_norm": 1.3940887451171875, "learning_rate": 1.1978467677726682e-05, "loss": 0.04564329981803894, "step": 4400 }, { "epoch": 0.5949860159359184, "grad_norm": 0.3156428933143616, "learning_rate": 1.197176386264485e-05, "loss": 0.04023021459579468, "step": 4401 }, { "epoch": 0.5951212093046836, "grad_norm": 0.3367822766304016, "learning_rate": 1.1965060678057927e-05, "loss": 0.047062501311302185, "step": 4402 }, { "epoch": 0.5952564026734488, "grad_norm": 1.0438036918640137, "learning_rate": 1.1958358125361554e-05, "loss": 0.04885775223374367, "step": 4403 }, { "epoch": 0.5953915960422141, "grad_norm": 0.30638495087623596, "learning_rate": 1.1951656205951247e-05, "loss": 0.037989918142557144, "step": 4404 }, { "epoch": 0.5955267894109794, "grad_norm": 0.2807495594024658, "learning_rate": 1.1944954921222367e-05, "loss": 0.030818786472082138, "step": 4405 }, { "epoch": 0.5956619827797447, "grad_norm": 0.876422643661499, "learning_rate": 1.1938254272570167e-05, "loss": 0.0718328207731247, "step": 4406 }, { "epoch": 0.5957971761485099, "grad_norm": 0.37925800681114197, "learning_rate": 1.1931554261389751e-05, "loss": 0.046253807842731476, "step": 4407 }, { "epoch": 0.5959323695172751, "grad_norm": 0.5791126489639282, "learning_rate": 1.1924854889076103e-05, "loss": 0.07300731539726257, "step": 4408 }, { "epoch": 0.5960675628860405, "grad_norm": 0.45043909549713135, "learning_rate": 1.191815615702407e-05, "loss": 0.05591295287013054, "step": 4409 }, { "epoch": 0.5962027562548057, "grad_norm": 0.34145453572273254, "learning_rate": 1.1911458066628353e-05, "loss": 0.05854158103466034, "step": 4410 }, { "epoch": 0.596337949623571, "grad_norm": 1.750978708267212, "learning_rate": 1.1904760619283537e-05, "loss": 0.079463891685009, "step": 4411 }, { "epoch": 0.5964731429923362, "grad_norm": 0.8478667736053467, "learning_rate": 1.1898063816384069e-05, "loss": 0.04810492694377899, "step": 4412 }, { "epoch": 0.5966083363611014, "grad_norm": 0.2722764313220978, "learning_rate": 1.189136765932426e-05, "loss": 0.04031342267990112, "step": 4413 }, { "epoch": 0.5967435297298668, "grad_norm": 0.6616509556770325, "learning_rate": 1.1884672149498276e-05, "loss": 0.050533972680568695, "step": 4414 }, { "epoch": 0.596878723098632, "grad_norm": 0.6429870128631592, "learning_rate": 1.187797728830016e-05, "loss": 0.05223057419061661, "step": 4415 }, { "epoch": 0.5970139164673973, "grad_norm": 0.9347872734069824, "learning_rate": 1.1871283077123823e-05, "loss": 0.04920932278037071, "step": 4416 }, { "epoch": 0.5971491098361625, "grad_norm": 0.5087645053863525, "learning_rate": 1.1864589517363038e-05, "loss": 0.05826232209801674, "step": 4417 }, { "epoch": 0.5972843032049278, "grad_norm": 1.1827343702316284, "learning_rate": 1.185789661041144e-05, "loss": 0.05395862087607384, "step": 4418 }, { "epoch": 0.5974194965736931, "grad_norm": 0.6172618269920349, "learning_rate": 1.1851204357662513e-05, "loss": 0.06921996176242828, "step": 4419 }, { "epoch": 0.5975546899424583, "grad_norm": 0.6347182989120483, "learning_rate": 1.1844512760509634e-05, "loss": 0.05589497089385986, "step": 4420 }, { "epoch": 0.5976898833112236, "grad_norm": 0.5600841641426086, "learning_rate": 1.1837821820346022e-05, "loss": 0.056553810834884644, "step": 4421 }, { "epoch": 0.5978250766799889, "grad_norm": 0.7159196734428406, "learning_rate": 1.1831131538564775e-05, "loss": 0.06223991513252258, "step": 4422 }, { "epoch": 0.5979602700487541, "grad_norm": 0.4582863748073578, "learning_rate": 1.1824441916558843e-05, "loss": 0.05742642283439636, "step": 4423 }, { "epoch": 0.5980954634175194, "grad_norm": 0.9712393879890442, "learning_rate": 1.1817752955721031e-05, "loss": 0.06445480138063431, "step": 4424 }, { "epoch": 0.5982306567862846, "grad_norm": 1.0242362022399902, "learning_rate": 1.1811064657444023e-05, "loss": 0.059829238802194595, "step": 4425 }, { "epoch": 0.59836585015505, "grad_norm": 0.3005829155445099, "learning_rate": 1.1804377023120361e-05, "loss": 0.04465368390083313, "step": 4426 }, { "epoch": 0.5985010435238152, "grad_norm": 0.5643232464790344, "learning_rate": 1.1797690054142451e-05, "loss": 0.05843439698219299, "step": 4427 }, { "epoch": 0.5986362368925804, "grad_norm": 1.0243879556655884, "learning_rate": 1.1791003751902542e-05, "loss": 0.05740996450185776, "step": 4428 }, { "epoch": 0.5987714302613457, "grad_norm": 0.41483309864997864, "learning_rate": 1.1784318117792763e-05, "loss": 0.06152985617518425, "step": 4429 }, { "epoch": 0.5989066236301109, "grad_norm": 0.3840685188770294, "learning_rate": 1.17776331532051e-05, "loss": 0.040231138467788696, "step": 4430 }, { "epoch": 0.5990418169988762, "grad_norm": 0.9017181396484375, "learning_rate": 1.1770948859531397e-05, "loss": 0.05257067084312439, "step": 4431 }, { "epoch": 0.5991770103676415, "grad_norm": 0.6576009392738342, "learning_rate": 1.1764265238163369e-05, "loss": 0.06276470422744751, "step": 4432 }, { "epoch": 0.5993122037364067, "grad_norm": 0.4700859785079956, "learning_rate": 1.1757582290492568e-05, "loss": 0.055005013942718506, "step": 4433 }, { "epoch": 0.599447397105172, "grad_norm": 0.9550535082817078, "learning_rate": 1.1750900017910425e-05, "loss": 0.07956385612487793, "step": 4434 }, { "epoch": 0.5995825904739372, "grad_norm": 0.7578852772712708, "learning_rate": 1.1744218421808221e-05, "loss": 0.05361468344926834, "step": 4435 }, { "epoch": 0.5997177838427025, "grad_norm": 0.6122483611106873, "learning_rate": 1.1737537503577112e-05, "loss": 0.06411196291446686, "step": 4436 }, { "epoch": 0.5998529772114678, "grad_norm": 0.7074890732765198, "learning_rate": 1.1730857264608086e-05, "loss": 0.05618126690387726, "step": 4437 }, { "epoch": 0.599988170580233, "grad_norm": 0.6040074229240417, "learning_rate": 1.1724177706292013e-05, "loss": 0.05763174593448639, "step": 4438 }, { "epoch": 0.6001233639489983, "grad_norm": 0.3741949796676636, "learning_rate": 1.1717498830019607e-05, "loss": 0.054901402443647385, "step": 4439 }, { "epoch": 0.6002585573177636, "grad_norm": 0.30857011675834656, "learning_rate": 1.1710820637181449e-05, "loss": 0.041115377098321915, "step": 4440 }, { "epoch": 0.6003937506865288, "grad_norm": 0.5500670671463013, "learning_rate": 1.170414312916798e-05, "loss": 0.0641489326953888, "step": 4441 }, { "epoch": 0.6005289440552941, "grad_norm": 0.3509334325790405, "learning_rate": 1.1697466307369484e-05, "loss": 0.04598594456911087, "step": 4442 }, { "epoch": 0.6006641374240593, "grad_norm": 0.2850635349750519, "learning_rate": 1.1690790173176116e-05, "loss": 0.055777426809072495, "step": 4443 }, { "epoch": 0.6007993307928247, "grad_norm": 0.2754749357700348, "learning_rate": 1.1684114727977876e-05, "loss": 0.039429061114788055, "step": 4444 }, { "epoch": 0.6009345241615899, "grad_norm": 1.281582236289978, "learning_rate": 1.167743997316464e-05, "loss": 0.0517854169011116, "step": 4445 }, { "epoch": 0.6010697175303551, "grad_norm": 0.7395382523536682, "learning_rate": 1.1670765910126112e-05, "loss": 0.05040641129016876, "step": 4446 }, { "epoch": 0.6012049108991204, "grad_norm": 0.5542140603065491, "learning_rate": 1.1664092540251877e-05, "loss": 0.053301576524972916, "step": 4447 }, { "epoch": 0.6013401042678856, "grad_norm": 0.7280396819114685, "learning_rate": 1.1657419864931361e-05, "loss": 0.04925832897424698, "step": 4448 }, { "epoch": 0.601475297636651, "grad_norm": 0.5647919774055481, "learning_rate": 1.165074788555386e-05, "loss": 0.07171936333179474, "step": 4449 }, { "epoch": 0.6016104910054162, "grad_norm": 0.4669032394886017, "learning_rate": 1.1644076603508514e-05, "loss": 0.06894838809967041, "step": 4450 }, { "epoch": 0.6017456843741814, "grad_norm": 0.525761604309082, "learning_rate": 1.1637406020184305e-05, "loss": 0.049926526844501495, "step": 4451 }, { "epoch": 0.6018808777429467, "grad_norm": 0.4559738337993622, "learning_rate": 1.1630736136970097e-05, "loss": 0.04470636323094368, "step": 4452 }, { "epoch": 0.602016071111712, "grad_norm": 0.9113357663154602, "learning_rate": 1.162406695525459e-05, "loss": 0.041763052344322205, "step": 4453 }, { "epoch": 0.6021512644804772, "grad_norm": 0.30894824862480164, "learning_rate": 1.161739847642635e-05, "loss": 0.02997482195496559, "step": 4454 }, { "epoch": 0.6022864578492425, "grad_norm": 0.4740419387817383, "learning_rate": 1.1610730701873788e-05, "loss": 0.0627528578042984, "step": 4455 }, { "epoch": 0.6024216512180077, "grad_norm": 0.5100468993186951, "learning_rate": 1.1604063632985163e-05, "loss": 0.04404717683792114, "step": 4456 }, { "epoch": 0.602556844586773, "grad_norm": 0.37073689699172974, "learning_rate": 1.1597397271148598e-05, "loss": 0.06574007123708725, "step": 4457 }, { "epoch": 0.6026920379555383, "grad_norm": 0.5196016430854797, "learning_rate": 1.1590731617752067e-05, "loss": 0.06108853220939636, "step": 4458 }, { "epoch": 0.6028272313243035, "grad_norm": 0.4110202193260193, "learning_rate": 1.1584066674183398e-05, "loss": 0.06466826796531677, "step": 4459 }, { "epoch": 0.6029624246930688, "grad_norm": 0.2575041651725769, "learning_rate": 1.1577402441830262e-05, "loss": 0.045179583132267, "step": 4460 }, { "epoch": 0.603097618061834, "grad_norm": 0.56150883436203, "learning_rate": 1.1570738922080185e-05, "loss": 0.06690779328346252, "step": 4461 }, { "epoch": 0.6032328114305994, "grad_norm": 0.5520632863044739, "learning_rate": 1.1564076116320552e-05, "loss": 0.06335223466157913, "step": 4462 }, { "epoch": 0.6033680047993646, "grad_norm": 0.44771501421928406, "learning_rate": 1.1557414025938592e-05, "loss": 0.04493759945034981, "step": 4463 }, { "epoch": 0.6035031981681298, "grad_norm": 0.4248267412185669, "learning_rate": 1.15507526523214e-05, "loss": 0.04206670820713043, "step": 4464 }, { "epoch": 0.6036383915368951, "grad_norm": 0.3631657660007477, "learning_rate": 1.1544091996855895e-05, "loss": 0.05404015630483627, "step": 4465 }, { "epoch": 0.6037735849056604, "grad_norm": 0.7033305764198303, "learning_rate": 1.153743206092886e-05, "loss": 0.05844687670469284, "step": 4466 }, { "epoch": 0.6039087782744257, "grad_norm": 0.5848333239555359, "learning_rate": 1.1530772845926936e-05, "loss": 0.05540715903043747, "step": 4467 }, { "epoch": 0.6040439716431909, "grad_norm": 0.4362988770008087, "learning_rate": 1.1524114353236614e-05, "loss": 0.04233485460281372, "step": 4468 }, { "epoch": 0.6041791650119561, "grad_norm": 0.4982118606567383, "learning_rate": 1.151745658424421e-05, "loss": 0.05209067091345787, "step": 4469 }, { "epoch": 0.6043143583807215, "grad_norm": 1.0047283172607422, "learning_rate": 1.151079954033592e-05, "loss": 0.05934375524520874, "step": 4470 }, { "epoch": 0.6044495517494867, "grad_norm": 0.4419027268886566, "learning_rate": 1.150414322289777e-05, "loss": 0.036733437329530716, "step": 4471 }, { "epoch": 0.604584745118252, "grad_norm": 0.6926829814910889, "learning_rate": 1.1497487633315643e-05, "loss": 0.03674779087305069, "step": 4472 }, { "epoch": 0.6047199384870172, "grad_norm": 0.4897218346595764, "learning_rate": 1.1490832772975275e-05, "loss": 0.06402263790369034, "step": 4473 }, { "epoch": 0.6048551318557824, "grad_norm": 0.7173218727111816, "learning_rate": 1.148417864326223e-05, "loss": 0.05959732085466385, "step": 4474 }, { "epoch": 0.6049903252245478, "grad_norm": 0.6146566271781921, "learning_rate": 1.1477525245561944e-05, "loss": 0.04254075512290001, "step": 4475 }, { "epoch": 0.605125518593313, "grad_norm": 0.5662301182746887, "learning_rate": 1.1470872581259684e-05, "loss": 0.05857444554567337, "step": 4476 }, { "epoch": 0.6052607119620783, "grad_norm": 0.2399030327796936, "learning_rate": 1.146422065174057e-05, "loss": 0.04444399103522301, "step": 4477 }, { "epoch": 0.6053959053308435, "grad_norm": 0.8195293545722961, "learning_rate": 1.1457569458389578e-05, "loss": 0.048936132341623306, "step": 4478 }, { "epoch": 0.6055310986996087, "grad_norm": 0.5751801133155823, "learning_rate": 1.145091900259151e-05, "loss": 0.059193696826696396, "step": 4479 }, { "epoch": 0.6056662920683741, "grad_norm": 0.4658619463443756, "learning_rate": 1.1444269285731032e-05, "loss": 0.038571201264858246, "step": 4480 }, { "epoch": 0.6058014854371393, "grad_norm": 0.4931628108024597, "learning_rate": 1.1437620309192652e-05, "loss": 0.05222680792212486, "step": 4481 }, { "epoch": 0.6059366788059045, "grad_norm": 0.582899808883667, "learning_rate": 1.1430972074360722e-05, "loss": 0.05361529067158699, "step": 4482 }, { "epoch": 0.6060718721746698, "grad_norm": 0.890861988067627, "learning_rate": 1.1424324582619435e-05, "loss": 0.05030763894319534, "step": 4483 }, { "epoch": 0.6062070655434351, "grad_norm": 0.26525622606277466, "learning_rate": 1.1417677835352837e-05, "loss": 0.049131568521261215, "step": 4484 }, { "epoch": 0.6063422589122004, "grad_norm": 0.3394700288772583, "learning_rate": 1.1411031833944816e-05, "loss": 0.061053432524204254, "step": 4485 }, { "epoch": 0.6064774522809656, "grad_norm": 0.47498729825019836, "learning_rate": 1.1404386579779111e-05, "loss": 0.05526217818260193, "step": 4486 }, { "epoch": 0.6066126456497308, "grad_norm": 1.155960202217102, "learning_rate": 1.1397742074239296e-05, "loss": 0.05119175463914871, "step": 4487 }, { "epoch": 0.6067478390184962, "grad_norm": 0.3275589048862457, "learning_rate": 1.1391098318708785e-05, "loss": 0.04987310245633125, "step": 4488 }, { "epoch": 0.6068830323872614, "grad_norm": 0.6260046362876892, "learning_rate": 1.1384455314570848e-05, "loss": 0.05643220245838165, "step": 4489 }, { "epoch": 0.6070182257560267, "grad_norm": 0.28886738419532776, "learning_rate": 1.1377813063208596e-05, "loss": 0.061378877609968185, "step": 4490 }, { "epoch": 0.6071534191247919, "grad_norm": 0.25748902559280396, "learning_rate": 1.1371171566004986e-05, "loss": 0.03589729964733124, "step": 4491 }, { "epoch": 0.6072886124935571, "grad_norm": 0.29101109504699707, "learning_rate": 1.1364530824342806e-05, "loss": 0.049356117844581604, "step": 4492 }, { "epoch": 0.6074238058623225, "grad_norm": 0.7595440745353699, "learning_rate": 1.1357890839604688e-05, "loss": 0.061377085745334625, "step": 4493 }, { "epoch": 0.6075589992310877, "grad_norm": 0.7708401679992676, "learning_rate": 1.1351251613173122e-05, "loss": 0.06804318726062775, "step": 4494 }, { "epoch": 0.607694192599853, "grad_norm": 0.5198320150375366, "learning_rate": 1.1344613146430428e-05, "loss": 0.05746417120099068, "step": 4495 }, { "epoch": 0.6078293859686182, "grad_norm": 1.0845645666122437, "learning_rate": 1.1337975440758775e-05, "loss": 0.06742439419031143, "step": 4496 }, { "epoch": 0.6079645793373835, "grad_norm": 0.19988715648651123, "learning_rate": 1.133133849754016e-05, "loss": 0.044403448700904846, "step": 4497 }, { "epoch": 0.6080997727061488, "grad_norm": 0.2857939600944519, "learning_rate": 1.1324702318156431e-05, "loss": 0.0392131507396698, "step": 4498 }, { "epoch": 0.608234966074914, "grad_norm": 0.458284854888916, "learning_rate": 1.1318066903989279e-05, "loss": 0.04939714074134827, "step": 4499 }, { "epoch": 0.6083701594436793, "grad_norm": 0.7546911239624023, "learning_rate": 1.1311432256420232e-05, "loss": 0.06240024045109749, "step": 4500 }, { "epoch": 0.6085053528124446, "grad_norm": 0.40432146191596985, "learning_rate": 1.1304798376830664e-05, "loss": 0.044800613075494766, "step": 4501 }, { "epoch": 0.6086405461812098, "grad_norm": 0.22065140306949615, "learning_rate": 1.1298165266601778e-05, "loss": 0.03623470291495323, "step": 4502 }, { "epoch": 0.6087757395499751, "grad_norm": 1.1977784633636475, "learning_rate": 1.129153292711462e-05, "loss": 0.07958650588989258, "step": 4503 }, { "epoch": 0.6089109329187403, "grad_norm": 0.6214346289634705, "learning_rate": 1.1284901359750082e-05, "loss": 0.039936840534210205, "step": 4504 }, { "epoch": 0.6090461262875057, "grad_norm": 0.5940772294998169, "learning_rate": 1.1278270565888897e-05, "loss": 0.06382440030574799, "step": 4505 }, { "epoch": 0.6091813196562709, "grad_norm": 0.6148561239242554, "learning_rate": 1.1271640546911624e-05, "loss": 0.05178374797105789, "step": 4506 }, { "epoch": 0.6093165130250361, "grad_norm": 0.5272179245948792, "learning_rate": 1.1265011304198672e-05, "loss": 0.05109657719731331, "step": 4507 }, { "epoch": 0.6094517063938014, "grad_norm": 0.37512943148612976, "learning_rate": 1.1258382839130282e-05, "loss": 0.051076628267765045, "step": 4508 }, { "epoch": 0.6095868997625666, "grad_norm": 0.4755030572414398, "learning_rate": 1.1251755153086536e-05, "loss": 0.0556274950504303, "step": 4509 }, { "epoch": 0.6097220931313319, "grad_norm": 0.4107944369316101, "learning_rate": 1.1245128247447362e-05, "loss": 0.05108289420604706, "step": 4510 }, { "epoch": 0.6098572865000972, "grad_norm": 0.5602015852928162, "learning_rate": 1.1238502123592507e-05, "loss": 0.04840603470802307, "step": 4511 }, { "epoch": 0.6099924798688624, "grad_norm": 0.4493732750415802, "learning_rate": 1.1231876782901568e-05, "loss": 0.06225484609603882, "step": 4512 }, { "epoch": 0.6101276732376277, "grad_norm": 0.6337834000587463, "learning_rate": 1.1225252226753975e-05, "loss": 0.06003597378730774, "step": 4513 }, { "epoch": 0.610262866606393, "grad_norm": 0.6227352023124695, "learning_rate": 1.1218628456529005e-05, "loss": 0.04350108653306961, "step": 4514 }, { "epoch": 0.6103980599751582, "grad_norm": 0.49585258960723877, "learning_rate": 1.1212005473605746e-05, "loss": 0.05694460868835449, "step": 4515 }, { "epoch": 0.6105332533439235, "grad_norm": 1.3771907091140747, "learning_rate": 1.120538327936315e-05, "loss": 0.06392525136470795, "step": 4516 }, { "epoch": 0.6106684467126887, "grad_norm": 0.7170501351356506, "learning_rate": 1.1198761875179993e-05, "loss": 0.066231869161129, "step": 4517 }, { "epoch": 0.610803640081454, "grad_norm": 0.6148501038551331, "learning_rate": 1.1192141262434883e-05, "loss": 0.07428601384162903, "step": 4518 }, { "epoch": 0.6109388334502193, "grad_norm": 0.3670237958431244, "learning_rate": 1.1185521442506272e-05, "loss": 0.04119445011019707, "step": 4519 }, { "epoch": 0.6110740268189845, "grad_norm": 0.42440328001976013, "learning_rate": 1.1178902416772432e-05, "loss": 0.04954603314399719, "step": 4520 }, { "epoch": 0.6112092201877498, "grad_norm": 0.334435373544693, "learning_rate": 1.1172284186611485e-05, "loss": 0.04701607674360275, "step": 4521 }, { "epoch": 0.611344413556515, "grad_norm": 0.30091506242752075, "learning_rate": 1.1165666753401384e-05, "loss": 0.051545873284339905, "step": 4522 }, { "epoch": 0.6114796069252804, "grad_norm": 0.32049858570098877, "learning_rate": 1.1159050118519914e-05, "loss": 0.05188825726509094, "step": 4523 }, { "epoch": 0.6116148002940456, "grad_norm": 0.5259817242622375, "learning_rate": 1.1152434283344696e-05, "loss": 0.061009228229522705, "step": 4524 }, { "epoch": 0.6117499936628108, "grad_norm": 0.3324960470199585, "learning_rate": 1.114581924925317e-05, "loss": 0.041055694222450256, "step": 4525 }, { "epoch": 0.6118851870315761, "grad_norm": 0.6279718279838562, "learning_rate": 1.113920501762263e-05, "loss": 0.05985286831855774, "step": 4526 }, { "epoch": 0.6120203804003413, "grad_norm": 1.092221736907959, "learning_rate": 1.1132591589830193e-05, "loss": 0.04456346482038498, "step": 4527 }, { "epoch": 0.6121555737691067, "grad_norm": 0.4922019839286804, "learning_rate": 1.1125978967252818e-05, "loss": 0.049274761229753494, "step": 4528 }, { "epoch": 0.6122907671378719, "grad_norm": 0.5381514430046082, "learning_rate": 1.1119367151267278e-05, "loss": 0.044018328189849854, "step": 4529 }, { "epoch": 0.6124259605066371, "grad_norm": 0.5391473770141602, "learning_rate": 1.1112756143250186e-05, "loss": 0.045425914227962494, "step": 4530 }, { "epoch": 0.6125611538754024, "grad_norm": 0.2799208462238312, "learning_rate": 1.1106145944577995e-05, "loss": 0.03928492218255997, "step": 4531 }, { "epoch": 0.6126963472441677, "grad_norm": 0.17868204414844513, "learning_rate": 1.1099536556626984e-05, "loss": 0.031209055334329605, "step": 4532 }, { "epoch": 0.612831540612933, "grad_norm": 0.346330851316452, "learning_rate": 1.1092927980773269e-05, "loss": 0.05431076139211655, "step": 4533 }, { "epoch": 0.6129667339816982, "grad_norm": 0.4241599440574646, "learning_rate": 1.1086320218392777e-05, "loss": 0.056727904826402664, "step": 4534 }, { "epoch": 0.6131019273504634, "grad_norm": 0.3166915774345398, "learning_rate": 1.1079713270861286e-05, "loss": 0.043045420199632645, "step": 4535 }, { "epoch": 0.6132371207192288, "grad_norm": 0.3482891321182251, "learning_rate": 1.1073107139554395e-05, "loss": 0.06476882845163345, "step": 4536 }, { "epoch": 0.613372314087994, "grad_norm": 0.8104740381240845, "learning_rate": 1.1066501825847545e-05, "loss": 0.062356606125831604, "step": 4537 }, { "epoch": 0.6135075074567592, "grad_norm": 0.42610758543014526, "learning_rate": 1.1059897331115985e-05, "loss": 0.04907872527837753, "step": 4538 }, { "epoch": 0.6136427008255245, "grad_norm": 0.42339351773262024, "learning_rate": 1.1053293656734816e-05, "loss": 0.0518743060529232, "step": 4539 }, { "epoch": 0.6137778941942897, "grad_norm": 0.4038066864013672, "learning_rate": 1.1046690804078949e-05, "loss": 0.04603969305753708, "step": 4540 }, { "epoch": 0.6139130875630551, "grad_norm": 0.32849767804145813, "learning_rate": 1.1040088774523139e-05, "loss": 0.04492694139480591, "step": 4541 }, { "epoch": 0.6140482809318203, "grad_norm": 0.6885644793510437, "learning_rate": 1.1033487569441971e-05, "loss": 0.04000355303287506, "step": 4542 }, { "epoch": 0.6141834743005855, "grad_norm": 0.9018462896347046, "learning_rate": 1.1026887190209834e-05, "loss": 0.05526731163263321, "step": 4543 }, { "epoch": 0.6143186676693508, "grad_norm": 0.27910539507865906, "learning_rate": 1.1020287638200977e-05, "loss": 0.034677326679229736, "step": 4544 }, { "epoch": 0.6144538610381161, "grad_norm": 1.0978777408599854, "learning_rate": 1.1013688914789452e-05, "loss": 0.06008952856063843, "step": 4545 }, { "epoch": 0.6145890544068814, "grad_norm": 0.7051708698272705, "learning_rate": 1.100709102134915e-05, "loss": 0.046445004642009735, "step": 4546 }, { "epoch": 0.6147242477756466, "grad_norm": 0.6244603395462036, "learning_rate": 1.10004939592538e-05, "loss": 0.06161254644393921, "step": 4547 }, { "epoch": 0.6148594411444118, "grad_norm": 0.3483380973339081, "learning_rate": 1.0993897729876927e-05, "loss": 0.05491615831851959, "step": 4548 }, { "epoch": 0.6149946345131772, "grad_norm": 1.7858338356018066, "learning_rate": 1.0987302334591915e-05, "loss": 0.06269332766532898, "step": 4549 }, { "epoch": 0.6151298278819424, "grad_norm": 0.3047940731048584, "learning_rate": 1.098070777477195e-05, "loss": 0.03702007979154587, "step": 4550 }, { "epoch": 0.6152650212507077, "grad_norm": 0.6988071799278259, "learning_rate": 1.0974114051790067e-05, "loss": 0.06821054220199585, "step": 4551 }, { "epoch": 0.6154002146194729, "grad_norm": 0.4086858928203583, "learning_rate": 1.09675211670191e-05, "loss": 0.0581369623541832, "step": 4552 }, { "epoch": 0.6155354079882381, "grad_norm": 0.43806758522987366, "learning_rate": 1.0960929121831732e-05, "loss": 0.040879987180233, "step": 4553 }, { "epoch": 0.6156706013570035, "grad_norm": 1.7018482685089111, "learning_rate": 1.095433791760046e-05, "loss": 0.056650206446647644, "step": 4554 }, { "epoch": 0.6158057947257687, "grad_norm": 0.4267836809158325, "learning_rate": 1.0947747555697609e-05, "loss": 0.06319084763526917, "step": 4555 }, { "epoch": 0.615940988094534, "grad_norm": 0.3949653208255768, "learning_rate": 1.0941158037495328e-05, "loss": 0.0571795254945755, "step": 4556 }, { "epoch": 0.6160761814632992, "grad_norm": 0.3105303943157196, "learning_rate": 1.0934569364365583e-05, "loss": 0.05321167781949043, "step": 4557 }, { "epoch": 0.6162113748320645, "grad_norm": 0.4802440106868744, "learning_rate": 1.0927981537680176e-05, "loss": 0.053725797683000565, "step": 4558 }, { "epoch": 0.6163465682008298, "grad_norm": 0.2842666506767273, "learning_rate": 1.0921394558810726e-05, "loss": 0.03103308379650116, "step": 4559 }, { "epoch": 0.616481761569595, "grad_norm": 0.347064346075058, "learning_rate": 1.0914808429128688e-05, "loss": 0.07509362697601318, "step": 4560 }, { "epoch": 0.6166169549383603, "grad_norm": 0.31892895698547363, "learning_rate": 1.0908223150005315e-05, "loss": 0.04672209173440933, "step": 4561 }, { "epoch": 0.6167521483071255, "grad_norm": 0.6527944803237915, "learning_rate": 1.09016387228117e-05, "loss": 0.0335516631603241, "step": 4562 }, { "epoch": 0.6168873416758908, "grad_norm": 0.7031928300857544, "learning_rate": 1.0895055148918758e-05, "loss": 0.056803472340106964, "step": 4563 }, { "epoch": 0.6170225350446561, "grad_norm": 1.5055878162384033, "learning_rate": 1.0888472429697223e-05, "loss": 0.05277127027511597, "step": 4564 }, { "epoch": 0.6171577284134213, "grad_norm": 0.40353551506996155, "learning_rate": 1.088189056651766e-05, "loss": 0.05125678330659866, "step": 4565 }, { "epoch": 0.6172929217821865, "grad_norm": 0.5008995532989502, "learning_rate": 1.0875309560750438e-05, "loss": 0.05714167654514313, "step": 4566 }, { "epoch": 0.6174281151509519, "grad_norm": 1.4121614694595337, "learning_rate": 1.086872941376576e-05, "loss": 0.07152986526489258, "step": 4567 }, { "epoch": 0.6175633085197171, "grad_norm": 0.2644006907939911, "learning_rate": 1.0862150126933648e-05, "loss": 0.035382434725761414, "step": 4568 }, { "epoch": 0.6176985018884824, "grad_norm": 0.9554007053375244, "learning_rate": 1.0855571701623942e-05, "loss": 0.07176284492015839, "step": 4569 }, { "epoch": 0.6178336952572476, "grad_norm": 0.32135018706321716, "learning_rate": 1.0848994139206317e-05, "loss": 0.041753847151994705, "step": 4570 }, { "epoch": 0.6179688886260128, "grad_norm": 0.46053165197372437, "learning_rate": 1.0842417441050247e-05, "loss": 0.05809721350669861, "step": 4571 }, { "epoch": 0.6181040819947782, "grad_norm": 0.418971449136734, "learning_rate": 1.0835841608525031e-05, "loss": 0.06268781423568726, "step": 4572 }, { "epoch": 0.6182392753635434, "grad_norm": 0.5150498747825623, "learning_rate": 1.08292666429998e-05, "loss": 0.04394363611936569, "step": 4573 }, { "epoch": 0.6183744687323087, "grad_norm": 0.4957038462162018, "learning_rate": 1.08226925458435e-05, "loss": 0.05378865450620651, "step": 4574 }, { "epoch": 0.6185096621010739, "grad_norm": 0.520465612411499, "learning_rate": 1.0816119318424882e-05, "loss": 0.04268661141395569, "step": 4575 }, { "epoch": 0.6186448554698392, "grad_norm": 0.8219638466835022, "learning_rate": 1.0809546962112535e-05, "loss": 0.04037794470787048, "step": 4576 }, { "epoch": 0.6187800488386045, "grad_norm": 0.437211275100708, "learning_rate": 1.0802975478274856e-05, "loss": 0.04212292656302452, "step": 4577 }, { "epoch": 0.6189152422073697, "grad_norm": 0.6094338893890381, "learning_rate": 1.0796404868280062e-05, "loss": 0.046925559639930725, "step": 4578 }, { "epoch": 0.619050435576135, "grad_norm": 0.4211161732673645, "learning_rate": 1.07898351334962e-05, "loss": 0.04569794237613678, "step": 4579 }, { "epoch": 0.6191856289449003, "grad_norm": 0.5433511734008789, "learning_rate": 1.0783266275291103e-05, "loss": 0.04710765182971954, "step": 4580 }, { "epoch": 0.6193208223136655, "grad_norm": 0.2898409962654114, "learning_rate": 1.077669829503246e-05, "loss": 0.03926851600408554, "step": 4581 }, { "epoch": 0.6194560156824308, "grad_norm": 0.29439783096313477, "learning_rate": 1.077013119408775e-05, "loss": 0.047526340931653976, "step": 4582 }, { "epoch": 0.619591209051196, "grad_norm": 0.5454329252243042, "learning_rate": 1.0763564973824289e-05, "loss": 0.05025076866149902, "step": 4583 }, { "epoch": 0.6197264024199614, "grad_norm": 0.3629743158817291, "learning_rate": 1.0756999635609185e-05, "loss": 0.04767164587974548, "step": 4584 }, { "epoch": 0.6198615957887266, "grad_norm": 0.21775631606578827, "learning_rate": 1.0750435180809381e-05, "loss": 0.03224128857254982, "step": 4585 }, { "epoch": 0.6199967891574918, "grad_norm": 0.27913036942481995, "learning_rate": 1.074387161079164e-05, "loss": 0.04899683594703674, "step": 4586 }, { "epoch": 0.6201319825262571, "grad_norm": 0.753542423248291, "learning_rate": 1.0737308926922521e-05, "loss": 0.05364812910556793, "step": 4587 }, { "epoch": 0.6202671758950223, "grad_norm": 0.5278282165527344, "learning_rate": 1.0730747130568424e-05, "loss": 0.03552785515785217, "step": 4588 }, { "epoch": 0.6204023692637876, "grad_norm": 0.6113235354423523, "learning_rate": 1.0724186223095532e-05, "loss": 0.05290553718805313, "step": 4589 }, { "epoch": 0.6205375626325529, "grad_norm": 0.4825994372367859, "learning_rate": 1.071762620586987e-05, "loss": 0.05363726615905762, "step": 4590 }, { "epoch": 0.6206727560013181, "grad_norm": 0.7129722833633423, "learning_rate": 1.0711067080257273e-05, "loss": 0.04131750017404556, "step": 4591 }, { "epoch": 0.6208079493700834, "grad_norm": 0.7018037438392639, "learning_rate": 1.0704508847623374e-05, "loss": 0.04199056699872017, "step": 4592 }, { "epoch": 0.6209431427388487, "grad_norm": 0.36936163902282715, "learning_rate": 1.069795150933365e-05, "loss": 0.04437442123889923, "step": 4593 }, { "epoch": 0.6210783361076139, "grad_norm": 0.39472657442092896, "learning_rate": 1.0691395066753357e-05, "loss": 0.05951548367738724, "step": 4594 }, { "epoch": 0.6212135294763792, "grad_norm": 0.3580392897129059, "learning_rate": 1.0684839521247584e-05, "loss": 0.041038259863853455, "step": 4595 }, { "epoch": 0.6213487228451444, "grad_norm": 0.32261937856674194, "learning_rate": 1.0678284874181234e-05, "loss": 0.05707205832004547, "step": 4596 }, { "epoch": 0.6214839162139097, "grad_norm": 1.4168047904968262, "learning_rate": 1.0671731126919028e-05, "loss": 0.06272955983877182, "step": 4597 }, { "epoch": 0.621619109582675, "grad_norm": 0.8052384853363037, "learning_rate": 1.066517828082548e-05, "loss": 0.06737357378005981, "step": 4598 }, { "epoch": 0.6217543029514402, "grad_norm": 1.3140414953231812, "learning_rate": 1.0658626337264926e-05, "loss": 0.06402401626110077, "step": 4599 }, { "epoch": 0.6218894963202055, "grad_norm": 0.26091107726097107, "learning_rate": 1.0652075297601518e-05, "loss": 0.043483246117830276, "step": 4600 }, { "epoch": 0.6220246896889707, "grad_norm": 0.31169095635414124, "learning_rate": 1.0645525163199222e-05, "loss": 0.037219852209091187, "step": 4601 }, { "epoch": 0.6221598830577361, "grad_norm": 0.46487393975257874, "learning_rate": 1.063897593542181e-05, "loss": 0.03937599062919617, "step": 4602 }, { "epoch": 0.6222950764265013, "grad_norm": 1.5859410762786865, "learning_rate": 1.0632427615632864e-05, "loss": 0.05201828479766846, "step": 4603 }, { "epoch": 0.6224302697952665, "grad_norm": 0.5500233173370361, "learning_rate": 1.0625880205195776e-05, "loss": 0.06203152611851692, "step": 4604 }, { "epoch": 0.6225654631640318, "grad_norm": 0.8580653667449951, "learning_rate": 1.0619333705473754e-05, "loss": 0.05133706331253052, "step": 4605 }, { "epoch": 0.622700656532797, "grad_norm": 0.8754221200942993, "learning_rate": 1.0612788117829821e-05, "loss": 0.057396914809942245, "step": 4606 }, { "epoch": 0.6228358499015624, "grad_norm": 0.23293782770633698, "learning_rate": 1.0606243443626792e-05, "loss": 0.050726406276226044, "step": 4607 }, { "epoch": 0.6229710432703276, "grad_norm": 0.46297305822372437, "learning_rate": 1.0599699684227313e-05, "loss": 0.05203748494386673, "step": 4608 }, { "epoch": 0.6231062366390928, "grad_norm": 0.6241655945777893, "learning_rate": 1.0593156840993818e-05, "loss": 0.04434943199157715, "step": 4609 }, { "epoch": 0.6232414300078581, "grad_norm": 0.5743916630744934, "learning_rate": 1.0586614915288571e-05, "loss": 0.04693271219730377, "step": 4610 }, { "epoch": 0.6233766233766234, "grad_norm": 0.2689257264137268, "learning_rate": 1.0580073908473641e-05, "loss": 0.04017655551433563, "step": 4611 }, { "epoch": 0.6235118167453887, "grad_norm": 0.5774421095848083, "learning_rate": 1.0573533821910885e-05, "loss": 0.06909120082855225, "step": 4612 }, { "epoch": 0.6236470101141539, "grad_norm": 1.0421887636184692, "learning_rate": 1.0566994656961997e-05, "loss": 0.0610082745552063, "step": 4613 }, { "epoch": 0.6237822034829191, "grad_norm": 0.5665892958641052, "learning_rate": 1.0560456414988456e-05, "loss": 0.06724916398525238, "step": 4614 }, { "epoch": 0.6239173968516845, "grad_norm": 0.5606773495674133, "learning_rate": 1.0553919097351564e-05, "loss": 0.0555778369307518, "step": 4615 }, { "epoch": 0.6240525902204497, "grad_norm": 0.5149750709533691, "learning_rate": 1.0547382705412434e-05, "loss": 0.057661134749650955, "step": 4616 }, { "epoch": 0.6241877835892149, "grad_norm": 0.5562970042228699, "learning_rate": 1.054084724053196e-05, "loss": 0.05339311063289642, "step": 4617 }, { "epoch": 0.6243229769579802, "grad_norm": 0.6296172738075256, "learning_rate": 1.0534312704070875e-05, "loss": 0.050513237714767456, "step": 4618 }, { "epoch": 0.6244581703267454, "grad_norm": 0.5522633194923401, "learning_rate": 1.0527779097389695e-05, "loss": 0.04391420632600784, "step": 4619 }, { "epoch": 0.6245933636955108, "grad_norm": 0.5698334574699402, "learning_rate": 1.0521246421848762e-05, "loss": 0.04198046028614044, "step": 4620 }, { "epoch": 0.624728557064276, "grad_norm": 0.6919045448303223, "learning_rate": 1.0514714678808202e-05, "loss": 0.047321055084466934, "step": 4621 }, { "epoch": 0.6248637504330412, "grad_norm": 1.213399887084961, "learning_rate": 1.0508183869627962e-05, "loss": 0.06009151041507721, "step": 4622 }, { "epoch": 0.6249989438018065, "grad_norm": 0.48268258571624756, "learning_rate": 1.0501653995667798e-05, "loss": 0.06529982388019562, "step": 4623 }, { "epoch": 0.6251341371705718, "grad_norm": 0.59559166431427, "learning_rate": 1.0495125058287258e-05, "loss": 0.06405478715896606, "step": 4624 }, { "epoch": 0.6252693305393371, "grad_norm": 0.9471227526664734, "learning_rate": 1.0488597058845708e-05, "loss": 0.058127835392951965, "step": 4625 }, { "epoch": 0.6254045239081023, "grad_norm": 0.40131238102912903, "learning_rate": 1.0482069998702304e-05, "loss": 0.040883246809244156, "step": 4626 }, { "epoch": 0.6255397172768675, "grad_norm": 0.4903443157672882, "learning_rate": 1.0475543879216017e-05, "loss": 0.05748749524354935, "step": 4627 }, { "epoch": 0.6256749106456329, "grad_norm": 0.664860188961029, "learning_rate": 1.0469018701745626e-05, "loss": 0.04879584163427353, "step": 4628 }, { "epoch": 0.6258101040143981, "grad_norm": 0.9136778116226196, "learning_rate": 1.0462494467649704e-05, "loss": 0.04561778903007507, "step": 4629 }, { "epoch": 0.6259452973831634, "grad_norm": 1.187482476234436, "learning_rate": 1.045597117828663e-05, "loss": 0.04310613498091698, "step": 4630 }, { "epoch": 0.6260804907519286, "grad_norm": 0.5049716234207153, "learning_rate": 1.0449448835014586e-05, "loss": 0.06675244867801666, "step": 4631 }, { "epoch": 0.6262156841206938, "grad_norm": 0.41891464591026306, "learning_rate": 1.044292743919156e-05, "loss": 0.046847864985466, "step": 4632 }, { "epoch": 0.6263508774894592, "grad_norm": 0.43414586782455444, "learning_rate": 1.0436406992175343e-05, "loss": 0.06464985758066177, "step": 4633 }, { "epoch": 0.6264860708582244, "grad_norm": 0.5278609991073608, "learning_rate": 1.0429887495323532e-05, "loss": 0.051476627588272095, "step": 4634 }, { "epoch": 0.6266212642269897, "grad_norm": 0.6428268551826477, "learning_rate": 1.0423368949993512e-05, "loss": 0.06466898322105408, "step": 4635 }, { "epoch": 0.6267564575957549, "grad_norm": 0.34842076897621155, "learning_rate": 1.041685135754248e-05, "loss": 0.05078856647014618, "step": 4636 }, { "epoch": 0.6268916509645202, "grad_norm": 0.5631418228149414, "learning_rate": 1.0410334719327435e-05, "loss": 0.060221489518880844, "step": 4637 }, { "epoch": 0.6270268443332855, "grad_norm": 0.8179173469543457, "learning_rate": 1.0403819036705177e-05, "loss": 0.06180504709482193, "step": 4638 }, { "epoch": 0.6271620377020507, "grad_norm": 1.1750370264053345, "learning_rate": 1.0397304311032311e-05, "loss": 0.06148827075958252, "step": 4639 }, { "epoch": 0.627297231070816, "grad_norm": 0.24618911743164062, "learning_rate": 1.039079054366523e-05, "loss": 0.039470549672842026, "step": 4640 }, { "epoch": 0.6274324244395812, "grad_norm": 0.5818641185760498, "learning_rate": 1.0384277735960133e-05, "loss": 0.060404010117053986, "step": 4641 }, { "epoch": 0.6275676178083465, "grad_norm": 0.4214550256729126, "learning_rate": 1.0377765889273025e-05, "loss": 0.04228523373603821, "step": 4642 }, { "epoch": 0.6277028111771118, "grad_norm": 0.984075129032135, "learning_rate": 1.0371255004959715e-05, "loss": 0.07434651255607605, "step": 4643 }, { "epoch": 0.627838004545877, "grad_norm": 0.32798850536346436, "learning_rate": 1.036474508437579e-05, "loss": 0.03830292075872421, "step": 4644 }, { "epoch": 0.6279731979146422, "grad_norm": 0.4325157403945923, "learning_rate": 1.035823612887666e-05, "loss": 0.05093963444232941, "step": 4645 }, { "epoch": 0.6281083912834076, "grad_norm": 0.8938845992088318, "learning_rate": 1.0351728139817517e-05, "loss": 0.06327836215496063, "step": 4646 }, { "epoch": 0.6282435846521728, "grad_norm": 0.7308549880981445, "learning_rate": 1.0345221118553362e-05, "loss": 0.052430376410484314, "step": 4647 }, { "epoch": 0.6283787780209381, "grad_norm": 0.568248450756073, "learning_rate": 1.0338715066439002e-05, "loss": 0.050008200109004974, "step": 4648 }, { "epoch": 0.6285139713897033, "grad_norm": 0.7952597737312317, "learning_rate": 1.0332209984829013e-05, "loss": 0.06555724143981934, "step": 4649 }, { "epoch": 0.6286491647584685, "grad_norm": 0.36300814151763916, "learning_rate": 1.03257058750778e-05, "loss": 0.04265543073415756, "step": 4650 }, { "epoch": 0.6287843581272339, "grad_norm": 0.405932754278183, "learning_rate": 1.0319202738539548e-05, "loss": 0.04134620726108551, "step": 4651 }, { "epoch": 0.6289195514959991, "grad_norm": 0.49853989481925964, "learning_rate": 1.0312700576568253e-05, "loss": 0.05231449007987976, "step": 4652 }, { "epoch": 0.6290547448647644, "grad_norm": 1.1169902086257935, "learning_rate": 1.0306199390517688e-05, "loss": 0.056012049317359924, "step": 4653 }, { "epoch": 0.6291899382335296, "grad_norm": 0.5426188111305237, "learning_rate": 1.0299699181741439e-05, "loss": 0.07195337116718292, "step": 4654 }, { "epoch": 0.6293251316022949, "grad_norm": 1.414876937866211, "learning_rate": 1.0293199951592889e-05, "loss": 0.06186673045158386, "step": 4655 }, { "epoch": 0.6294603249710602, "grad_norm": 0.3539164960384369, "learning_rate": 1.0286701701425206e-05, "loss": 0.05881623178720474, "step": 4656 }, { "epoch": 0.6295955183398254, "grad_norm": 0.7021041512489319, "learning_rate": 1.0280204432591369e-05, "loss": 0.07057327032089233, "step": 4657 }, { "epoch": 0.6297307117085907, "grad_norm": 1.7062206268310547, "learning_rate": 1.0273708146444133e-05, "loss": 0.06410694867372513, "step": 4658 }, { "epoch": 0.629865905077356, "grad_norm": 0.8552284240722656, "learning_rate": 1.0267212844336062e-05, "loss": 0.0669512152671814, "step": 4659 }, { "epoch": 0.6300010984461212, "grad_norm": 0.22374765574932098, "learning_rate": 1.026071852761952e-05, "loss": 0.03679882735013962, "step": 4660 }, { "epoch": 0.6301362918148865, "grad_norm": 0.8955230712890625, "learning_rate": 1.025422519764665e-05, "loss": 0.04569307714700699, "step": 4661 }, { "epoch": 0.6302714851836517, "grad_norm": 1.1031315326690674, "learning_rate": 1.024773285576941e-05, "loss": 0.04860438406467438, "step": 4662 }, { "epoch": 0.630406678552417, "grad_norm": 0.34250757098197937, "learning_rate": 1.0241241503339524e-05, "loss": 0.04318247362971306, "step": 4663 }, { "epoch": 0.6305418719211823, "grad_norm": 0.6776455640792847, "learning_rate": 1.023475114170853e-05, "loss": 0.05851234123110771, "step": 4664 }, { "epoch": 0.6306770652899475, "grad_norm": 0.6967629790306091, "learning_rate": 1.0228261772227768e-05, "loss": 0.0459999218583107, "step": 4665 }, { "epoch": 0.6308122586587128, "grad_norm": 0.7477776408195496, "learning_rate": 1.0221773396248349e-05, "loss": 0.06580895185470581, "step": 4666 }, { "epoch": 0.630947452027478, "grad_norm": 1.4138846397399902, "learning_rate": 1.021528601512119e-05, "loss": 0.06900068372488022, "step": 4667 }, { "epoch": 0.6310826453962434, "grad_norm": 0.9229210615158081, "learning_rate": 1.0208799630196994e-05, "loss": 0.0526098906993866, "step": 4668 }, { "epoch": 0.6312178387650086, "grad_norm": 0.5856477618217468, "learning_rate": 1.0202314242826264e-05, "loss": 0.05838462710380554, "step": 4669 }, { "epoch": 0.6313530321337738, "grad_norm": 0.31988847255706787, "learning_rate": 1.0195829854359299e-05, "loss": 0.0435689240694046, "step": 4670 }, { "epoch": 0.6314882255025391, "grad_norm": 0.37234628200531006, "learning_rate": 1.0189346466146175e-05, "loss": 0.043655455112457275, "step": 4671 }, { "epoch": 0.6316234188713044, "grad_norm": 0.4791053533554077, "learning_rate": 1.018286407953677e-05, "loss": 0.056932225823402405, "step": 4672 }, { "epoch": 0.6317586122400696, "grad_norm": 0.25158461928367615, "learning_rate": 1.017638269588075e-05, "loss": 0.05398217588663101, "step": 4673 }, { "epoch": 0.6318938056088349, "grad_norm": 0.9145091772079468, "learning_rate": 1.0169902316527575e-05, "loss": 0.07113750278949738, "step": 4674 }, { "epoch": 0.6320289989776001, "grad_norm": 0.7961694002151489, "learning_rate": 1.0163422942826502e-05, "loss": 0.06181144714355469, "step": 4675 }, { "epoch": 0.6321641923463654, "grad_norm": 0.5894361734390259, "learning_rate": 1.0156944576126555e-05, "loss": 0.04805958271026611, "step": 4676 }, { "epoch": 0.6322993857151307, "grad_norm": 0.2800850570201874, "learning_rate": 1.0150467217776579e-05, "loss": 0.049652036279439926, "step": 4677 }, { "epoch": 0.6324345790838959, "grad_norm": 0.4588675796985626, "learning_rate": 1.0143990869125185e-05, "loss": 0.053740665316581726, "step": 4678 }, { "epoch": 0.6325697724526612, "grad_norm": 0.4305167496204376, "learning_rate": 1.013751553152079e-05, "loss": 0.06193351745605469, "step": 4679 }, { "epoch": 0.6327049658214264, "grad_norm": 0.4192427694797516, "learning_rate": 1.0131041206311594e-05, "loss": 0.0593588650226593, "step": 4680 }, { "epoch": 0.6328401591901918, "grad_norm": 0.48952728509902954, "learning_rate": 1.0124567894845578e-05, "loss": 0.06378231942653656, "step": 4681 }, { "epoch": 0.632975352558957, "grad_norm": 0.7123900055885315, "learning_rate": 1.0118095598470528e-05, "loss": 0.04663345590233803, "step": 4682 }, { "epoch": 0.6331105459277222, "grad_norm": 0.3992023169994354, "learning_rate": 1.0111624318534006e-05, "loss": 0.04774162545800209, "step": 4683 }, { "epoch": 0.6332457392964875, "grad_norm": 1.074270486831665, "learning_rate": 1.0105154056383377e-05, "loss": 0.04648825526237488, "step": 4684 }, { "epoch": 0.6333809326652527, "grad_norm": 1.688263177871704, "learning_rate": 1.0098684813365764e-05, "loss": 0.058587878942489624, "step": 4685 }, { "epoch": 0.6335161260340181, "grad_norm": 0.4962066113948822, "learning_rate": 1.0092216590828115e-05, "loss": 0.03800930455327034, "step": 4686 }, { "epoch": 0.6336513194027833, "grad_norm": 0.9367076754570007, "learning_rate": 1.0085749390117146e-05, "loss": 0.05098221078515053, "step": 4687 }, { "epoch": 0.6337865127715485, "grad_norm": 0.3751489818096161, "learning_rate": 1.0079283212579354e-05, "loss": 0.04532918334007263, "step": 4688 }, { "epoch": 0.6339217061403138, "grad_norm": 0.5485007166862488, "learning_rate": 1.0072818059561045e-05, "loss": 0.041458070278167725, "step": 4689 }, { "epoch": 0.6340568995090791, "grad_norm": 0.595403254032135, "learning_rate": 1.0066353932408285e-05, "loss": 0.047010332345962524, "step": 4690 }, { "epoch": 0.6341920928778444, "grad_norm": 0.21888503432273865, "learning_rate": 1.0059890832466948e-05, "loss": 0.05118374526500702, "step": 4691 }, { "epoch": 0.6343272862466096, "grad_norm": 0.32095620036125183, "learning_rate": 1.0053428761082684e-05, "loss": 0.04708552360534668, "step": 4692 }, { "epoch": 0.6344624796153748, "grad_norm": 0.23541928827762604, "learning_rate": 1.0046967719600927e-05, "loss": 0.05972860008478165, "step": 4693 }, { "epoch": 0.6345976729841402, "grad_norm": 0.3196291923522949, "learning_rate": 1.0040507709366912e-05, "loss": 0.04170640558004379, "step": 4694 }, { "epoch": 0.6347328663529054, "grad_norm": 1.1211464405059814, "learning_rate": 1.0034048731725631e-05, "loss": 0.08250119537115097, "step": 4695 }, { "epoch": 0.6348680597216706, "grad_norm": 0.5909939408302307, "learning_rate": 1.0027590788021886e-05, "loss": 0.04364566504955292, "step": 4696 }, { "epoch": 0.6350032530904359, "grad_norm": 1.281913161277771, "learning_rate": 1.0021133879600258e-05, "loss": 0.05728508532047272, "step": 4697 }, { "epoch": 0.6351384464592011, "grad_norm": 0.33976858854293823, "learning_rate": 1.0014678007805108e-05, "loss": 0.05651407688856125, "step": 4698 }, { "epoch": 0.6352736398279665, "grad_norm": 0.3187199831008911, "learning_rate": 1.0008223173980579e-05, "loss": 0.05691501498222351, "step": 4699 }, { "epoch": 0.6354088331967317, "grad_norm": 0.29178905487060547, "learning_rate": 1.0001769379470604e-05, "loss": 0.03464853763580322, "step": 4700 }, { "epoch": 0.6355440265654969, "grad_norm": 0.48736095428466797, "learning_rate": 9.995316625618898e-06, "loss": 0.056324444711208344, "step": 4701 }, { "epoch": 0.6356792199342622, "grad_norm": 0.4598604440689087, "learning_rate": 9.988864913768962e-06, "loss": 0.06390278786420822, "step": 4702 }, { "epoch": 0.6358144133030275, "grad_norm": 0.3368128538131714, "learning_rate": 9.982414245264071e-06, "loss": 0.03932724520564079, "step": 4703 }, { "epoch": 0.6359496066717928, "grad_norm": 0.8145586848258972, "learning_rate": 9.975964621447293e-06, "loss": 0.05152442306280136, "step": 4704 }, { "epoch": 0.636084800040558, "grad_norm": 1.0392258167266846, "learning_rate": 9.96951604366147e-06, "loss": 0.04404053837060928, "step": 4705 }, { "epoch": 0.6362199934093232, "grad_norm": 0.2877473533153534, "learning_rate": 9.963068513249233e-06, "loss": 0.055663272738456726, "step": 4706 }, { "epoch": 0.6363551867780886, "grad_norm": 0.18260252475738525, "learning_rate": 9.956622031552996e-06, "loss": 0.03911535441875458, "step": 4707 }, { "epoch": 0.6364903801468538, "grad_norm": 0.6719172596931458, "learning_rate": 9.950176599914942e-06, "loss": 0.044287413358688354, "step": 4708 }, { "epoch": 0.6366255735156191, "grad_norm": 0.2303326278924942, "learning_rate": 9.943732219677048e-06, "loss": 0.03544317185878754, "step": 4709 }, { "epoch": 0.6367607668843843, "grad_norm": 0.5941929817199707, "learning_rate": 9.93728889218107e-06, "loss": 0.03194409981369972, "step": 4710 }, { "epoch": 0.6368959602531495, "grad_norm": 0.40900540351867676, "learning_rate": 9.930846618768543e-06, "loss": 0.05201096832752228, "step": 4711 }, { "epoch": 0.6370311536219149, "grad_norm": 0.6259360909461975, "learning_rate": 9.924405400780784e-06, "loss": 0.046068064868450165, "step": 4712 }, { "epoch": 0.6371663469906801, "grad_norm": 0.5820388793945312, "learning_rate": 9.917965239558885e-06, "loss": 0.03427599370479584, "step": 4713 }, { "epoch": 0.6373015403594454, "grad_norm": 0.33634090423583984, "learning_rate": 9.911526136443726e-06, "loss": 0.05215923488140106, "step": 4714 }, { "epoch": 0.6374367337282106, "grad_norm": 0.8394302129745483, "learning_rate": 9.905088092775956e-06, "loss": 0.046040382236242294, "step": 4715 }, { "epoch": 0.6375719270969759, "grad_norm": 0.8640695214271545, "learning_rate": 9.898651109896015e-06, "loss": 0.05065205693244934, "step": 4716 }, { "epoch": 0.6377071204657412, "grad_norm": 0.9978101253509521, "learning_rate": 9.892215189144123e-06, "loss": 0.06882886588573456, "step": 4717 }, { "epoch": 0.6378423138345064, "grad_norm": 0.4321083724498749, "learning_rate": 9.88578033186026e-06, "loss": 0.04601617157459259, "step": 4718 }, { "epoch": 0.6379775072032717, "grad_norm": 0.35456937551498413, "learning_rate": 9.879346539384207e-06, "loss": 0.03964271396398544, "step": 4719 }, { "epoch": 0.638112700572037, "grad_norm": 0.7505684494972229, "learning_rate": 9.87291381305551e-06, "loss": 0.053060080856084824, "step": 4720 }, { "epoch": 0.6382478939408022, "grad_norm": 0.21678295731544495, "learning_rate": 9.866482154213502e-06, "loss": 0.04147632420063019, "step": 4721 }, { "epoch": 0.6383830873095675, "grad_norm": 0.400951087474823, "learning_rate": 9.86005156419728e-06, "loss": 0.06529366225004196, "step": 4722 }, { "epoch": 0.6385182806783327, "grad_norm": 0.6195996403694153, "learning_rate": 9.853622044345732e-06, "loss": 0.06088293343782425, "step": 4723 }, { "epoch": 0.6386534740470979, "grad_norm": 0.6974532008171082, "learning_rate": 9.847193595997522e-06, "loss": 0.06185964494943619, "step": 4724 }, { "epoch": 0.6387886674158633, "grad_norm": 0.675674319267273, "learning_rate": 9.840766220491078e-06, "loss": 0.047476764768362045, "step": 4725 }, { "epoch": 0.6389238607846285, "grad_norm": 0.5759568214416504, "learning_rate": 9.834339919164625e-06, "loss": 0.05363106727600098, "step": 4726 }, { "epoch": 0.6390590541533938, "grad_norm": 1.741191029548645, "learning_rate": 9.827914693356145e-06, "loss": 0.05436190217733383, "step": 4727 }, { "epoch": 0.639194247522159, "grad_norm": 0.5656270384788513, "learning_rate": 9.821490544403403e-06, "loss": 0.056480370461940765, "step": 4728 }, { "epoch": 0.6393294408909242, "grad_norm": 0.3221462070941925, "learning_rate": 9.815067473643951e-06, "loss": 0.05631650239229202, "step": 4729 }, { "epoch": 0.6394646342596896, "grad_norm": 0.3064517676830292, "learning_rate": 9.808645482415097e-06, "loss": 0.03556741401553154, "step": 4730 }, { "epoch": 0.6395998276284548, "grad_norm": 0.6442540884017944, "learning_rate": 9.80222457205394e-06, "loss": 0.039395034313201904, "step": 4731 }, { "epoch": 0.6397350209972201, "grad_norm": 0.5686182975769043, "learning_rate": 9.795804743897341e-06, "loss": 0.05261033773422241, "step": 4732 }, { "epoch": 0.6398702143659853, "grad_norm": 0.3568746745586395, "learning_rate": 9.789385999281948e-06, "loss": 0.05085264891386032, "step": 4733 }, { "epoch": 0.6400054077347506, "grad_norm": 1.2988499402999878, "learning_rate": 9.782968339544179e-06, "loss": 0.057544857263565063, "step": 4734 }, { "epoch": 0.6401406011035159, "grad_norm": 0.5114401578903198, "learning_rate": 9.776551766020219e-06, "loss": 0.06983036547899246, "step": 4735 }, { "epoch": 0.6402757944722811, "grad_norm": 0.3355632722377777, "learning_rate": 9.77013628004604e-06, "loss": 0.04666588455438614, "step": 4736 }, { "epoch": 0.6404109878410464, "grad_norm": 0.3591151833534241, "learning_rate": 9.763721882957371e-06, "loss": 0.0373477041721344, "step": 4737 }, { "epoch": 0.6405461812098117, "grad_norm": 0.5469942688941956, "learning_rate": 9.757308576089732e-06, "loss": 0.044421806931495667, "step": 4738 }, { "epoch": 0.6406813745785769, "grad_norm": 0.5699624419212341, "learning_rate": 9.750896360778404e-06, "loss": 0.053258977830410004, "step": 4739 }, { "epoch": 0.6408165679473422, "grad_norm": 0.6950234174728394, "learning_rate": 9.744485238358448e-06, "loss": 0.04999391734600067, "step": 4740 }, { "epoch": 0.6409517613161074, "grad_norm": 0.3082069158554077, "learning_rate": 9.73807521016469e-06, "loss": 0.03505382686853409, "step": 4741 }, { "epoch": 0.6410869546848728, "grad_norm": 0.6573958396911621, "learning_rate": 9.731666277531732e-06, "loss": 0.045720405876636505, "step": 4742 }, { "epoch": 0.641222148053638, "grad_norm": 0.31336405873298645, "learning_rate": 9.725258441793947e-06, "loss": 0.040391504764556885, "step": 4743 }, { "epoch": 0.6413573414224032, "grad_norm": 0.3726145923137665, "learning_rate": 9.71885170428549e-06, "loss": 0.062105149030685425, "step": 4744 }, { "epoch": 0.6414925347911685, "grad_norm": 0.24680550396442413, "learning_rate": 9.712446066340265e-06, "loss": 0.041123002767562866, "step": 4745 }, { "epoch": 0.6416277281599337, "grad_norm": 1.074472427368164, "learning_rate": 9.70604152929197e-06, "loss": 0.06705289334058762, "step": 4746 }, { "epoch": 0.6417629215286991, "grad_norm": 1.0206831693649292, "learning_rate": 9.699638094474054e-06, "loss": 0.061657726764678955, "step": 4747 }, { "epoch": 0.6418981148974643, "grad_norm": 0.4847748875617981, "learning_rate": 9.693235763219752e-06, "loss": 0.036822669208049774, "step": 4748 }, { "epoch": 0.6420333082662295, "grad_norm": 0.5083960294723511, "learning_rate": 9.68683453686207e-06, "loss": 0.06037784367799759, "step": 4749 }, { "epoch": 0.6421685016349948, "grad_norm": 0.33866918087005615, "learning_rate": 9.680434416733763e-06, "loss": 0.05275866761803627, "step": 4750 }, { "epoch": 0.6423036950037601, "grad_norm": 0.6149324178695679, "learning_rate": 9.674035404167381e-06, "loss": 0.04908076301217079, "step": 4751 }, { "epoch": 0.6424388883725253, "grad_norm": 0.8515546321868896, "learning_rate": 9.66763750049523e-06, "loss": 0.06292057782411575, "step": 4752 }, { "epoch": 0.6425740817412906, "grad_norm": 1.4440233707427979, "learning_rate": 9.66124070704939e-06, "loss": 0.07689607888460159, "step": 4753 }, { "epoch": 0.6427092751100558, "grad_norm": 0.6262544393539429, "learning_rate": 9.654845025161699e-06, "loss": 0.03946412354707718, "step": 4754 }, { "epoch": 0.6428444684788212, "grad_norm": 0.4633781909942627, "learning_rate": 9.648450456163777e-06, "loss": 0.04835525155067444, "step": 4755 }, { "epoch": 0.6429796618475864, "grad_norm": 0.7444468140602112, "learning_rate": 9.64205700138701e-06, "loss": 0.04709312319755554, "step": 4756 }, { "epoch": 0.6431148552163516, "grad_norm": 0.5160643458366394, "learning_rate": 9.635664662162548e-06, "loss": 0.05223727226257324, "step": 4757 }, { "epoch": 0.6432500485851169, "grad_norm": 0.3359399139881134, "learning_rate": 9.629273439821315e-06, "loss": 0.054152511060237885, "step": 4758 }, { "epoch": 0.6433852419538821, "grad_norm": 0.4551103115081787, "learning_rate": 9.622883335693984e-06, "loss": 0.05163699388504028, "step": 4759 }, { "epoch": 0.6435204353226475, "grad_norm": 0.4248725473880768, "learning_rate": 9.616494351111017e-06, "loss": 0.04353562742471695, "step": 4760 }, { "epoch": 0.6436556286914127, "grad_norm": 0.4266912639141083, "learning_rate": 9.610106487402637e-06, "loss": 0.056361302733421326, "step": 4761 }, { "epoch": 0.6437908220601779, "grad_norm": 0.7169679403305054, "learning_rate": 9.603719745898826e-06, "loss": 0.059100985527038574, "step": 4762 }, { "epoch": 0.6439260154289432, "grad_norm": 0.9839428067207336, "learning_rate": 9.597334127929346e-06, "loss": 0.05191851034760475, "step": 4763 }, { "epoch": 0.6440612087977085, "grad_norm": 0.5494467616081238, "learning_rate": 9.590949634823707e-06, "loss": 0.05256250873208046, "step": 4764 }, { "epoch": 0.6441964021664738, "grad_norm": 0.28033575415611267, "learning_rate": 9.584566267911198e-06, "loss": 0.0662446916103363, "step": 4765 }, { "epoch": 0.644331595535239, "grad_norm": 0.8343197703361511, "learning_rate": 9.578184028520874e-06, "loss": 0.05676156282424927, "step": 4766 }, { "epoch": 0.6444667889040042, "grad_norm": 0.5727573037147522, "learning_rate": 9.571802917981548e-06, "loss": 0.03550533577799797, "step": 4767 }, { "epoch": 0.6446019822727695, "grad_norm": 0.8502346277236938, "learning_rate": 9.565422937621798e-06, "loss": 0.042402856051921844, "step": 4768 }, { "epoch": 0.6447371756415348, "grad_norm": 0.5257234573364258, "learning_rate": 9.559044088769971e-06, "loss": 0.057161688804626465, "step": 4769 }, { "epoch": 0.6448723690103001, "grad_norm": 0.7567318677902222, "learning_rate": 9.552666372754182e-06, "loss": 0.05372577905654907, "step": 4770 }, { "epoch": 0.6450075623790653, "grad_norm": 0.4655241370201111, "learning_rate": 9.546289790902307e-06, "loss": 0.03939799964427948, "step": 4771 }, { "epoch": 0.6451427557478305, "grad_norm": 0.6124745011329651, "learning_rate": 9.539914344541976e-06, "loss": 0.047296732664108276, "step": 4772 }, { "epoch": 0.6452779491165959, "grad_norm": 0.24542281031608582, "learning_rate": 9.533540035000598e-06, "loss": 0.047902628779411316, "step": 4773 }, { "epoch": 0.6454131424853611, "grad_norm": 0.4496416747570038, "learning_rate": 9.52716686360533e-06, "loss": 0.05026061832904816, "step": 4774 }, { "epoch": 0.6455483358541264, "grad_norm": 0.5349769592285156, "learning_rate": 9.520794831683108e-06, "loss": 0.053541723638772964, "step": 4775 }, { "epoch": 0.6456835292228916, "grad_norm": 1.1830024719238281, "learning_rate": 9.514423940560627e-06, "loss": 0.05755196139216423, "step": 4776 }, { "epoch": 0.6458187225916568, "grad_norm": 0.46192553639411926, "learning_rate": 9.508054191564326e-06, "loss": 0.04821890592575073, "step": 4777 }, { "epoch": 0.6459539159604222, "grad_norm": 0.36615103483200073, "learning_rate": 9.501685586020434e-06, "loss": 0.04434036463499069, "step": 4778 }, { "epoch": 0.6460891093291874, "grad_norm": 1.0897423028945923, "learning_rate": 9.495318125254919e-06, "loss": 0.06512558460235596, "step": 4779 }, { "epoch": 0.6462243026979526, "grad_norm": 0.685386598110199, "learning_rate": 9.488951810593527e-06, "loss": 0.06861716508865356, "step": 4780 }, { "epoch": 0.6463594960667179, "grad_norm": 0.6100823879241943, "learning_rate": 9.48258664336176e-06, "loss": 0.0717424675822258, "step": 4781 }, { "epoch": 0.6464946894354832, "grad_norm": 0.46544957160949707, "learning_rate": 9.476222624884873e-06, "loss": 0.055689986795186996, "step": 4782 }, { "epoch": 0.6466298828042485, "grad_norm": 0.4717201590538025, "learning_rate": 9.469859756487893e-06, "loss": 0.05503963679075241, "step": 4783 }, { "epoch": 0.6467650761730137, "grad_norm": 0.6445606350898743, "learning_rate": 9.463498039495598e-06, "loss": 0.08194366097450256, "step": 4784 }, { "epoch": 0.6469002695417789, "grad_norm": 0.6095782518386841, "learning_rate": 9.457137475232537e-06, "loss": 0.060378074645996094, "step": 4785 }, { "epoch": 0.6470354629105443, "grad_norm": 0.4055345058441162, "learning_rate": 9.450778065023019e-06, "loss": 0.05957464128732681, "step": 4786 }, { "epoch": 0.6471706562793095, "grad_norm": 0.547907292842865, "learning_rate": 9.444419810191091e-06, "loss": 0.05349154770374298, "step": 4787 }, { "epoch": 0.6473058496480748, "grad_norm": 0.43033722043037415, "learning_rate": 9.43806271206059e-06, "loss": 0.04731358587741852, "step": 4788 }, { "epoch": 0.64744104301684, "grad_norm": 1.0177619457244873, "learning_rate": 9.431706771955089e-06, "loss": 0.06784875690937042, "step": 4789 }, { "epoch": 0.6475762363856052, "grad_norm": 0.7726863622665405, "learning_rate": 9.425351991197937e-06, "loss": 0.06031767278909683, "step": 4790 }, { "epoch": 0.6477114297543706, "grad_norm": 0.41456326842308044, "learning_rate": 9.418998371112221e-06, "loss": 0.046337857842445374, "step": 4791 }, { "epoch": 0.6478466231231358, "grad_norm": 0.39923757314682007, "learning_rate": 9.412645913020807e-06, "loss": 0.055342741310596466, "step": 4792 }, { "epoch": 0.6479818164919011, "grad_norm": 1.0042386054992676, "learning_rate": 9.406294618246313e-06, "loss": 0.05199577659368515, "step": 4793 }, { "epoch": 0.6481170098606663, "grad_norm": 0.6410531401634216, "learning_rate": 9.399944488111103e-06, "loss": 0.05791462957859039, "step": 4794 }, { "epoch": 0.6482522032294316, "grad_norm": 0.37604424357414246, "learning_rate": 9.39359552393732e-06, "loss": 0.04712918400764465, "step": 4795 }, { "epoch": 0.6483873965981969, "grad_norm": 0.33884933590888977, "learning_rate": 9.387247727046845e-06, "loss": 0.05989651381969452, "step": 4796 }, { "epoch": 0.6485225899669621, "grad_norm": 0.6658311486244202, "learning_rate": 9.380901098761319e-06, "loss": 0.05861292779445648, "step": 4797 }, { "epoch": 0.6486577833357274, "grad_norm": 0.5477061867713928, "learning_rate": 9.374555640402153e-06, "loss": 0.07044249773025513, "step": 4798 }, { "epoch": 0.6487929767044927, "grad_norm": 0.9148610830307007, "learning_rate": 9.368211353290503e-06, "loss": 0.06327980756759644, "step": 4799 }, { "epoch": 0.6489281700732579, "grad_norm": 1.0573673248291016, "learning_rate": 9.36186823874728e-06, "loss": 0.04950319230556488, "step": 4800 }, { "epoch": 0.6490633634420232, "grad_norm": 0.45756450295448303, "learning_rate": 9.355526298093152e-06, "loss": 0.05846443399786949, "step": 4801 }, { "epoch": 0.6491985568107884, "grad_norm": 0.3989078104496002, "learning_rate": 9.34918553264855e-06, "loss": 0.05459968000650406, "step": 4802 }, { "epoch": 0.6493337501795537, "grad_norm": 0.48171260952949524, "learning_rate": 9.342845943733658e-06, "loss": 0.05709845572710037, "step": 4803 }, { "epoch": 0.649468943548319, "grad_norm": 0.8460701704025269, "learning_rate": 9.336507532668407e-06, "loss": 0.04590034484863281, "step": 4804 }, { "epoch": 0.6496041369170842, "grad_norm": 0.6834648847579956, "learning_rate": 9.33017030077249e-06, "loss": 0.05731251835823059, "step": 4805 }, { "epoch": 0.6497393302858495, "grad_norm": 0.4440569579601288, "learning_rate": 9.323834249365346e-06, "loss": 0.039629578590393066, "step": 4806 }, { "epoch": 0.6498745236546147, "grad_norm": 0.35447558760643005, "learning_rate": 9.317499379766183e-06, "loss": 0.04773441702127457, "step": 4807 }, { "epoch": 0.65000971702338, "grad_norm": 0.40892234444618225, "learning_rate": 9.311165693293954e-06, "loss": 0.05303184688091278, "step": 4808 }, { "epoch": 0.6501449103921453, "grad_norm": 0.5478176474571228, "learning_rate": 9.304833191267364e-06, "loss": 0.051091477274894714, "step": 4809 }, { "epoch": 0.6502801037609105, "grad_norm": 0.3483165204524994, "learning_rate": 9.298501875004874e-06, "loss": 0.04721619188785553, "step": 4810 }, { "epoch": 0.6504152971296758, "grad_norm": 0.3029663860797882, "learning_rate": 9.292171745824695e-06, "loss": 0.05366624891757965, "step": 4811 }, { "epoch": 0.650550490498441, "grad_norm": 0.21481314301490784, "learning_rate": 9.285842805044797e-06, "loss": 0.03443337231874466, "step": 4812 }, { "epoch": 0.6506856838672063, "grad_norm": 0.36269816756248474, "learning_rate": 9.279515053982905e-06, "loss": 0.059001073241233826, "step": 4813 }, { "epoch": 0.6508208772359716, "grad_norm": 0.467576801776886, "learning_rate": 9.273188493956476e-06, "loss": 0.0521400161087513, "step": 4814 }, { "epoch": 0.6509560706047368, "grad_norm": 0.4456583559513092, "learning_rate": 9.266863126282746e-06, "loss": 0.05726826190948486, "step": 4815 }, { "epoch": 0.6510912639735021, "grad_norm": 1.0652563571929932, "learning_rate": 9.260538952278683e-06, "loss": 0.06769102811813354, "step": 4816 }, { "epoch": 0.6512264573422674, "grad_norm": 0.3957787752151489, "learning_rate": 9.254215973261014e-06, "loss": 0.0648040771484375, "step": 4817 }, { "epoch": 0.6513616507110326, "grad_norm": 0.7416332364082336, "learning_rate": 9.247894190546228e-06, "loss": 0.04052651673555374, "step": 4818 }, { "epoch": 0.6514968440797979, "grad_norm": 0.7028465270996094, "learning_rate": 9.241573605450539e-06, "loss": 0.04837683215737343, "step": 4819 }, { "epoch": 0.6516320374485631, "grad_norm": 0.6483571529388428, "learning_rate": 9.235254219289937e-06, "loss": 0.05493997037410736, "step": 4820 }, { "epoch": 0.6517672308173285, "grad_norm": 1.38758385181427, "learning_rate": 9.228936033380143e-06, "loss": 0.05247645080089569, "step": 4821 }, { "epoch": 0.6519024241860937, "grad_norm": 0.5732993483543396, "learning_rate": 9.222619049036649e-06, "loss": 0.04802927002310753, "step": 4822 }, { "epoch": 0.6520376175548589, "grad_norm": 0.44083142280578613, "learning_rate": 9.216303267574674e-06, "loss": 0.05586346238851547, "step": 4823 }, { "epoch": 0.6521728109236242, "grad_norm": 0.39318540692329407, "learning_rate": 9.209988690309198e-06, "loss": 0.05186024308204651, "step": 4824 }, { "epoch": 0.6523080042923894, "grad_norm": 0.77596116065979, "learning_rate": 9.203675318554956e-06, "loss": 0.0651119127869606, "step": 4825 }, { "epoch": 0.6524431976611548, "grad_norm": 0.9065393209457397, "learning_rate": 9.19736315362642e-06, "loss": 0.054299451410770416, "step": 4826 }, { "epoch": 0.65257839102992, "grad_norm": 0.5990732312202454, "learning_rate": 9.191052196837825e-06, "loss": 0.06679188460111618, "step": 4827 }, { "epoch": 0.6527135843986852, "grad_norm": 0.8140655159950256, "learning_rate": 9.184742449503135e-06, "loss": 0.03726750984787941, "step": 4828 }, { "epoch": 0.6528487777674505, "grad_norm": 1.070213794708252, "learning_rate": 9.178433912936077e-06, "loss": 0.06304620951414108, "step": 4829 }, { "epoch": 0.6529839711362158, "grad_norm": 1.819206714630127, "learning_rate": 9.172126588450125e-06, "loss": 0.059059225022792816, "step": 4830 }, { "epoch": 0.653119164504981, "grad_norm": 0.282976359128952, "learning_rate": 9.165820477358491e-06, "loss": 0.03074474073946476, "step": 4831 }, { "epoch": 0.6532543578737463, "grad_norm": 0.31543517112731934, "learning_rate": 9.159515580974154e-06, "loss": 0.04291544109582901, "step": 4832 }, { "epoch": 0.6533895512425115, "grad_norm": 0.6946098804473877, "learning_rate": 9.15321190060981e-06, "loss": 0.051056768745183945, "step": 4833 }, { "epoch": 0.6535247446112769, "grad_norm": 0.7878214120864868, "learning_rate": 9.14690943757793e-06, "loss": 0.048043809831142426, "step": 4834 }, { "epoch": 0.6536599379800421, "grad_norm": 0.943025529384613, "learning_rate": 9.14060819319072e-06, "loss": 0.057242460548877716, "step": 4835 }, { "epoch": 0.6537951313488073, "grad_norm": 0.8952677249908447, "learning_rate": 9.134308168760127e-06, "loss": 0.06574608385562897, "step": 4836 }, { "epoch": 0.6539303247175726, "grad_norm": 0.23783697187900543, "learning_rate": 9.128009365597854e-06, "loss": 0.05044171214103699, "step": 4837 }, { "epoch": 0.6540655180863378, "grad_norm": 0.47051429748535156, "learning_rate": 9.121711785015342e-06, "loss": 0.04344892501831055, "step": 4838 }, { "epoch": 0.6542007114551032, "grad_norm": 0.4336937367916107, "learning_rate": 9.115415428323787e-06, "loss": 0.042462307959795, "step": 4839 }, { "epoch": 0.6543359048238684, "grad_norm": 0.4336228668689728, "learning_rate": 9.109120296834118e-06, "loss": 0.048836223781108856, "step": 4840 }, { "epoch": 0.6544710981926336, "grad_norm": 0.520319402217865, "learning_rate": 9.10282639185702e-06, "loss": 0.05925591289997101, "step": 4841 }, { "epoch": 0.6546062915613989, "grad_norm": 0.39654210209846497, "learning_rate": 9.096533714702913e-06, "loss": 0.055843666195869446, "step": 4842 }, { "epoch": 0.6547414849301642, "grad_norm": 0.39380353689193726, "learning_rate": 9.090242266681967e-06, "loss": 0.061240747570991516, "step": 4843 }, { "epoch": 0.6548766782989295, "grad_norm": 0.4190712571144104, "learning_rate": 9.083952049104094e-06, "loss": 0.050237759947776794, "step": 4844 }, { "epoch": 0.6550118716676947, "grad_norm": 0.3751411437988281, "learning_rate": 9.07766306327896e-06, "loss": 0.04893229529261589, "step": 4845 }, { "epoch": 0.6551470650364599, "grad_norm": 0.33267223834991455, "learning_rate": 9.071375310515949e-06, "loss": 0.06221573054790497, "step": 4846 }, { "epoch": 0.6552822584052252, "grad_norm": 0.38737332820892334, "learning_rate": 9.065088792124219e-06, "loss": 0.04428090900182724, "step": 4847 }, { "epoch": 0.6554174517739905, "grad_norm": 0.45950019359588623, "learning_rate": 9.058803509412647e-06, "loss": 0.05439738184213638, "step": 4848 }, { "epoch": 0.6555526451427558, "grad_norm": 0.2903836965560913, "learning_rate": 9.05251946368987e-06, "loss": 0.04227876663208008, "step": 4849 }, { "epoch": 0.655687838511521, "grad_norm": 0.27683648467063904, "learning_rate": 9.046236656264258e-06, "loss": 0.03420381247997284, "step": 4850 }, { "epoch": 0.6558230318802862, "grad_norm": 0.32851892709732056, "learning_rate": 9.03995508844392e-06, "loss": 0.054434239864349365, "step": 4851 }, { "epoch": 0.6559582252490516, "grad_norm": 0.8404786586761475, "learning_rate": 9.033674761536718e-06, "loss": 0.056378528475761414, "step": 4852 }, { "epoch": 0.6560934186178168, "grad_norm": 0.5099725127220154, "learning_rate": 9.027395676850244e-06, "loss": 0.04750455170869827, "step": 4853 }, { "epoch": 0.6562286119865821, "grad_norm": 0.3070297837257385, "learning_rate": 9.02111783569184e-06, "loss": 0.0587763711810112, "step": 4854 }, { "epoch": 0.6563638053553473, "grad_norm": 0.4024368226528168, "learning_rate": 9.014841239368591e-06, "loss": 0.058742910623550415, "step": 4855 }, { "epoch": 0.6564989987241125, "grad_norm": 0.4439047873020172, "learning_rate": 9.008565889187308e-06, "loss": 0.04431654512882233, "step": 4856 }, { "epoch": 0.6566341920928779, "grad_norm": 0.5737723112106323, "learning_rate": 9.00229178645456e-06, "loss": 0.054135292768478394, "step": 4857 }, { "epoch": 0.6567693854616431, "grad_norm": 0.8024532198905945, "learning_rate": 8.996018932476641e-06, "loss": 0.04513950273394585, "step": 4858 }, { "epoch": 0.6569045788304083, "grad_norm": 0.5955286026000977, "learning_rate": 8.989747328559606e-06, "loss": 0.0479813888669014, "step": 4859 }, { "epoch": 0.6570397721991736, "grad_norm": 0.5546364188194275, "learning_rate": 8.98347697600922e-06, "loss": 0.050392478704452515, "step": 4860 }, { "epoch": 0.6571749655679389, "grad_norm": 0.5462738871574402, "learning_rate": 8.977207876131013e-06, "loss": 0.05263403058052063, "step": 4861 }, { "epoch": 0.6573101589367042, "grad_norm": 0.2945302128791809, "learning_rate": 8.970940030230245e-06, "loss": 0.047712430357933044, "step": 4862 }, { "epoch": 0.6574453523054694, "grad_norm": 0.4589152932167053, "learning_rate": 8.96467343961191e-06, "loss": 0.04600256681442261, "step": 4863 }, { "epoch": 0.6575805456742346, "grad_norm": 0.54264235496521, "learning_rate": 8.958408105580759e-06, "loss": 0.04188507795333862, "step": 4864 }, { "epoch": 0.657715739043, "grad_norm": 0.5305918455123901, "learning_rate": 8.952144029441248e-06, "loss": 0.03736427426338196, "step": 4865 }, { "epoch": 0.6578509324117652, "grad_norm": 0.4474351704120636, "learning_rate": 8.945881212497603e-06, "loss": 0.04515926539897919, "step": 4866 }, { "epoch": 0.6579861257805305, "grad_norm": 0.7046924233436584, "learning_rate": 8.939619656053777e-06, "loss": 0.05059683322906494, "step": 4867 }, { "epoch": 0.6581213191492957, "grad_norm": 0.546092689037323, "learning_rate": 8.933359361413456e-06, "loss": 0.046880945563316345, "step": 4868 }, { "epoch": 0.6582565125180609, "grad_norm": 0.5458420515060425, "learning_rate": 8.92710032988007e-06, "loss": 0.04236246645450592, "step": 4869 }, { "epoch": 0.6583917058868263, "grad_norm": 0.6035169363021851, "learning_rate": 8.920842562756773e-06, "loss": 0.05331994593143463, "step": 4870 }, { "epoch": 0.6585268992555915, "grad_norm": 0.38238996267318726, "learning_rate": 8.914586061346474e-06, "loss": 0.04808778315782547, "step": 4871 }, { "epoch": 0.6586620926243568, "grad_norm": 0.5059957504272461, "learning_rate": 8.908330826951811e-06, "loss": 0.05822131037712097, "step": 4872 }, { "epoch": 0.658797285993122, "grad_norm": 0.5722382068634033, "learning_rate": 8.902076860875155e-06, "loss": 0.052761077880859375, "step": 4873 }, { "epoch": 0.6589324793618873, "grad_norm": 0.9064381122589111, "learning_rate": 8.895824164418615e-06, "loss": 0.053103089332580566, "step": 4874 }, { "epoch": 0.6590676727306526, "grad_norm": 0.3827478289604187, "learning_rate": 8.889572738884033e-06, "loss": 0.04915151745080948, "step": 4875 }, { "epoch": 0.6592028660994178, "grad_norm": 1.523366093635559, "learning_rate": 8.88332258557299e-06, "loss": 0.06489886343479156, "step": 4876 }, { "epoch": 0.6593380594681831, "grad_norm": 0.8395458459854126, "learning_rate": 8.877073705786806e-06, "loss": 0.05081567168235779, "step": 4877 }, { "epoch": 0.6594732528369484, "grad_norm": 0.49804267287254333, "learning_rate": 8.870826100826527e-06, "loss": 0.04341159015893936, "step": 4878 }, { "epoch": 0.6596084462057136, "grad_norm": 0.46628883481025696, "learning_rate": 8.86457977199294e-06, "loss": 0.052684515714645386, "step": 4879 }, { "epoch": 0.6597436395744789, "grad_norm": 1.0826929807662964, "learning_rate": 8.85833472058656e-06, "loss": 0.047316402196884155, "step": 4880 }, { "epoch": 0.6598788329432441, "grad_norm": 1.2853542566299438, "learning_rate": 8.852090947907643e-06, "loss": 0.06989351660013199, "step": 4881 }, { "epoch": 0.6600140263120094, "grad_norm": 0.2032260298728943, "learning_rate": 8.84584845525618e-06, "loss": 0.0314270555973053, "step": 4882 }, { "epoch": 0.6601492196807747, "grad_norm": 0.3008894622325897, "learning_rate": 8.83960724393188e-06, "loss": 0.03849566727876663, "step": 4883 }, { "epoch": 0.6602844130495399, "grad_norm": 0.7275771498680115, "learning_rate": 8.833367315234206e-06, "loss": 0.05365827679634094, "step": 4884 }, { "epoch": 0.6604196064183052, "grad_norm": 0.30494412779808044, "learning_rate": 8.82712867046234e-06, "loss": 0.03763725608587265, "step": 4885 }, { "epoch": 0.6605547997870704, "grad_norm": 0.2807183861732483, "learning_rate": 8.820891310915203e-06, "loss": 0.04110005870461464, "step": 4886 }, { "epoch": 0.6606899931558357, "grad_norm": 0.40106067061424255, "learning_rate": 8.81465523789145e-06, "loss": 0.05846823751926422, "step": 4887 }, { "epoch": 0.660825186524601, "grad_norm": 0.8326048254966736, "learning_rate": 8.808420452689455e-06, "loss": 0.047958508133888245, "step": 4888 }, { "epoch": 0.6609603798933662, "grad_norm": 0.7759664058685303, "learning_rate": 8.802186956607344e-06, "loss": 0.04456786811351776, "step": 4889 }, { "epoch": 0.6610955732621315, "grad_norm": 0.5056163668632507, "learning_rate": 8.795954750942954e-06, "loss": 0.04759174585342407, "step": 4890 }, { "epoch": 0.6612307666308967, "grad_norm": 0.40979140996932983, "learning_rate": 8.789723836993878e-06, "loss": 0.05130434036254883, "step": 4891 }, { "epoch": 0.661365959999662, "grad_norm": 1.1193937063217163, "learning_rate": 8.783494216057407e-06, "loss": 0.06788745522499084, "step": 4892 }, { "epoch": 0.6615011533684273, "grad_norm": 0.6738024950027466, "learning_rate": 8.777265889430593e-06, "loss": 0.03771587088704109, "step": 4893 }, { "epoch": 0.6616363467371925, "grad_norm": 0.4568180441856384, "learning_rate": 8.771038858410206e-06, "loss": 0.06290781497955322, "step": 4894 }, { "epoch": 0.6617715401059578, "grad_norm": 0.8085651397705078, "learning_rate": 8.764813124292744e-06, "loss": 0.0503215417265892, "step": 4895 }, { "epoch": 0.6619067334747231, "grad_norm": 0.728347897529602, "learning_rate": 8.758588688374445e-06, "loss": 0.06116729974746704, "step": 4896 }, { "epoch": 0.6620419268434883, "grad_norm": 0.9897721409797668, "learning_rate": 8.752365551951262e-06, "loss": 0.060762468725442886, "step": 4897 }, { "epoch": 0.6621771202122536, "grad_norm": 0.7816612124443054, "learning_rate": 8.74614371631888e-06, "loss": 0.04762818291783333, "step": 4898 }, { "epoch": 0.6623123135810188, "grad_norm": 1.0026624202728271, "learning_rate": 8.739923182772732e-06, "loss": 0.0729488730430603, "step": 4899 }, { "epoch": 0.6624475069497842, "grad_norm": 0.7975327968597412, "learning_rate": 8.733703952607956e-06, "loss": 0.055999740958213806, "step": 4900 }, { "epoch": 0.6625827003185494, "grad_norm": 0.3464163839817047, "learning_rate": 8.727486027119443e-06, "loss": 0.05318213999271393, "step": 4901 }, { "epoch": 0.6627178936873146, "grad_norm": 0.4403265118598938, "learning_rate": 8.721269407601783e-06, "loss": 0.05413217842578888, "step": 4902 }, { "epoch": 0.6628530870560799, "grad_norm": 0.262787401676178, "learning_rate": 8.71505409534931e-06, "loss": 0.03239549696445465, "step": 4903 }, { "epoch": 0.6629882804248451, "grad_norm": 1.250382900238037, "learning_rate": 8.708840091656093e-06, "loss": 0.07249332964420319, "step": 4904 }, { "epoch": 0.6631234737936105, "grad_norm": 0.2127716988325119, "learning_rate": 8.70262739781592e-06, "loss": 0.03368908166885376, "step": 4905 }, { "epoch": 0.6632586671623757, "grad_norm": 0.47799018025398254, "learning_rate": 8.696416015122302e-06, "loss": 0.04219493269920349, "step": 4906 }, { "epoch": 0.6633938605311409, "grad_norm": 0.47827261686325073, "learning_rate": 8.690205944868487e-06, "loss": 0.05766450986266136, "step": 4907 }, { "epoch": 0.6635290538999062, "grad_norm": 0.41965779662132263, "learning_rate": 8.683997188347436e-06, "loss": 0.06484150886535645, "step": 4908 }, { "epoch": 0.6636642472686715, "grad_norm": 0.7600243091583252, "learning_rate": 8.677789746851855e-06, "loss": 0.05288514122366905, "step": 4909 }, { "epoch": 0.6637994406374368, "grad_norm": 0.5731505155563354, "learning_rate": 8.671583621674167e-06, "loss": 0.056962043046951294, "step": 4910 }, { "epoch": 0.663934634006202, "grad_norm": 0.8263890743255615, "learning_rate": 8.665378814106512e-06, "loss": 0.04230496287345886, "step": 4911 }, { "epoch": 0.6640698273749672, "grad_norm": 0.5004463791847229, "learning_rate": 8.65917532544077e-06, "loss": 0.06371943652629852, "step": 4912 }, { "epoch": 0.6642050207437326, "grad_norm": 0.62314373254776, "learning_rate": 8.652973156968532e-06, "loss": 0.046881675720214844, "step": 4913 }, { "epoch": 0.6643402141124978, "grad_norm": 0.8253375887870789, "learning_rate": 8.646772309981141e-06, "loss": 0.04562593251466751, "step": 4914 }, { "epoch": 0.664475407481263, "grad_norm": 0.6556355357170105, "learning_rate": 8.640572785769624e-06, "loss": 0.0736011266708374, "step": 4915 }, { "epoch": 0.6646106008500283, "grad_norm": 0.669047474861145, "learning_rate": 8.63437458562477e-06, "loss": 0.05263333022594452, "step": 4916 }, { "epoch": 0.6647457942187935, "grad_norm": 0.22817447781562805, "learning_rate": 8.628177710837068e-06, "loss": 0.04105766862630844, "step": 4917 }, { "epoch": 0.6648809875875589, "grad_norm": 0.5972623229026794, "learning_rate": 8.621982162696752e-06, "loss": 0.05957232043147087, "step": 4918 }, { "epoch": 0.6650161809563241, "grad_norm": 0.6017746925354004, "learning_rate": 8.615787942493766e-06, "loss": 0.041902463883161545, "step": 4919 }, { "epoch": 0.6651513743250893, "grad_norm": 0.41817542910575867, "learning_rate": 8.609595051517765e-06, "loss": 0.040782541036605835, "step": 4920 }, { "epoch": 0.6652865676938546, "grad_norm": 0.3211895823478699, "learning_rate": 8.603403491058157e-06, "loss": 0.038159385323524475, "step": 4921 }, { "epoch": 0.6654217610626199, "grad_norm": 1.154258131980896, "learning_rate": 8.597213262404046e-06, "loss": 0.056552689522504807, "step": 4922 }, { "epoch": 0.6655569544313852, "grad_norm": 0.20456521213054657, "learning_rate": 8.591024366844291e-06, "loss": 0.0435362309217453, "step": 4923 }, { "epoch": 0.6656921478001504, "grad_norm": 0.8878408670425415, "learning_rate": 8.584836805667434e-06, "loss": 0.044599395245313644, "step": 4924 }, { "epoch": 0.6658273411689156, "grad_norm": 0.32103127241134644, "learning_rate": 8.578650580161754e-06, "loss": 0.0503804013133049, "step": 4925 }, { "epoch": 0.665962534537681, "grad_norm": 0.38880297541618347, "learning_rate": 8.572465691615275e-06, "loss": 0.05300453305244446, "step": 4926 }, { "epoch": 0.6660977279064462, "grad_norm": 0.4174281358718872, "learning_rate": 8.56628214131571e-06, "loss": 0.06797348707914352, "step": 4927 }, { "epoch": 0.6662329212752115, "grad_norm": 0.7094303369522095, "learning_rate": 8.560099930550523e-06, "loss": 0.05302663892507553, "step": 4928 }, { "epoch": 0.6663681146439767, "grad_norm": 0.7040273547172546, "learning_rate": 8.553919060606866e-06, "loss": 0.07105447351932526, "step": 4929 }, { "epoch": 0.6665033080127419, "grad_norm": 0.3127346634864807, "learning_rate": 8.54773953277163e-06, "loss": 0.045730799436569214, "step": 4930 }, { "epoch": 0.6666385013815073, "grad_norm": 0.39840197563171387, "learning_rate": 8.541561348331433e-06, "loss": 0.04773852229118347, "step": 4931 }, { "epoch": 0.6667736947502725, "grad_norm": 0.5970373749732971, "learning_rate": 8.535384508572603e-06, "loss": 0.06129232794046402, "step": 4932 }, { "epoch": 0.6669088881190378, "grad_norm": 0.38890382647514343, "learning_rate": 8.529209014781202e-06, "loss": 0.062496498227119446, "step": 4933 }, { "epoch": 0.667044081487803, "grad_norm": 0.2800302803516388, "learning_rate": 8.523034868242984e-06, "loss": 0.04507967829704285, "step": 4934 }, { "epoch": 0.6671792748565682, "grad_norm": 0.6557746529579163, "learning_rate": 8.51686207024344e-06, "loss": 0.06638331711292267, "step": 4935 }, { "epoch": 0.6673144682253336, "grad_norm": 0.34183457493782043, "learning_rate": 8.510690622067792e-06, "loss": 0.053346022963523865, "step": 4936 }, { "epoch": 0.6674496615940988, "grad_norm": 0.2507877051830292, "learning_rate": 8.50452052500096e-06, "loss": 0.04421476274728775, "step": 4937 }, { "epoch": 0.667584854962864, "grad_norm": 0.7738170027732849, "learning_rate": 8.498351780327594e-06, "loss": 0.05261988192796707, "step": 4938 }, { "epoch": 0.6677200483316293, "grad_norm": 0.4712650179862976, "learning_rate": 8.492184389332061e-06, "loss": 0.05027683824300766, "step": 4939 }, { "epoch": 0.6678552417003946, "grad_norm": 0.3059353530406952, "learning_rate": 8.486018353298432e-06, "loss": 0.06472623348236084, "step": 4940 }, { "epoch": 0.6679904350691599, "grad_norm": 0.711154043674469, "learning_rate": 8.479853673510528e-06, "loss": 0.049798935651779175, "step": 4941 }, { "epoch": 0.6681256284379251, "grad_norm": 0.7332208156585693, "learning_rate": 8.473690351251855e-06, "loss": 0.05915476754307747, "step": 4942 }, { "epoch": 0.6682608218066903, "grad_norm": 0.3910542130470276, "learning_rate": 8.467528387805656e-06, "loss": 0.04300757497549057, "step": 4943 }, { "epoch": 0.6683960151754557, "grad_norm": 0.3801325261592865, "learning_rate": 8.461367784454881e-06, "loss": 0.039240531623363495, "step": 4944 }, { "epoch": 0.6685312085442209, "grad_norm": 0.23581597208976746, "learning_rate": 8.455208542482195e-06, "loss": 0.04541178047657013, "step": 4945 }, { "epoch": 0.6686664019129862, "grad_norm": 0.412605345249176, "learning_rate": 8.449050663170004e-06, "loss": 0.0451117679476738, "step": 4946 }, { "epoch": 0.6688015952817514, "grad_norm": 0.4841384291648865, "learning_rate": 8.442894147800387e-06, "loss": 0.04456118866801262, "step": 4947 }, { "epoch": 0.6689367886505166, "grad_norm": 0.4538283050060272, "learning_rate": 8.436738997655184e-06, "loss": 0.04737669229507446, "step": 4948 }, { "epoch": 0.669071982019282, "grad_norm": 0.4836001694202423, "learning_rate": 8.430585214015918e-06, "loss": 0.05537992715835571, "step": 4949 }, { "epoch": 0.6692071753880472, "grad_norm": 0.385507196187973, "learning_rate": 8.424432798163838e-06, "loss": 0.04484360292553902, "step": 4950 }, { "epoch": 0.6693423687568125, "grad_norm": 1.2376917600631714, "learning_rate": 8.418281751379926e-06, "loss": 0.06745009124279022, "step": 4951 }, { "epoch": 0.6694775621255777, "grad_norm": 0.6492207646369934, "learning_rate": 8.41213207494484e-06, "loss": 0.06594588607549667, "step": 4952 }, { "epoch": 0.669612755494343, "grad_norm": 0.5820128321647644, "learning_rate": 8.405983770138992e-06, "loss": 0.0351836234331131, "step": 4953 }, { "epoch": 0.6697479488631083, "grad_norm": 0.5219746828079224, "learning_rate": 8.399836838242479e-06, "loss": 0.05730397254228592, "step": 4954 }, { "epoch": 0.6698831422318735, "grad_norm": 0.296224445104599, "learning_rate": 8.393691280535143e-06, "loss": 0.05526592954993248, "step": 4955 }, { "epoch": 0.6700183356006388, "grad_norm": 0.6880562901496887, "learning_rate": 8.387547098296516e-06, "loss": 0.05071261152625084, "step": 4956 }, { "epoch": 0.670153528969404, "grad_norm": 0.6879351735115051, "learning_rate": 8.38140429280583e-06, "loss": 0.03831213712692261, "step": 4957 }, { "epoch": 0.6702887223381693, "grad_norm": 0.42694202065467834, "learning_rate": 8.375262865342073e-06, "loss": 0.049122318625450134, "step": 4958 }, { "epoch": 0.6704239157069346, "grad_norm": 0.5570389032363892, "learning_rate": 8.36912281718391e-06, "loss": 0.053875505924224854, "step": 4959 }, { "epoch": 0.6705591090756998, "grad_norm": 0.7798247337341309, "learning_rate": 8.362984149609748e-06, "loss": 0.049223948270082474, "step": 4960 }, { "epoch": 0.6706943024444652, "grad_norm": 0.47196725010871887, "learning_rate": 8.356846863897672e-06, "loss": 0.06143927201628685, "step": 4961 }, { "epoch": 0.6708294958132304, "grad_norm": 0.35774117708206177, "learning_rate": 8.350710961325498e-06, "loss": 0.06524142622947693, "step": 4962 }, { "epoch": 0.6709646891819956, "grad_norm": 0.34857645630836487, "learning_rate": 8.344576443170768e-06, "loss": 0.05976579710841179, "step": 4963 }, { "epoch": 0.6710998825507609, "grad_norm": 0.4516732096672058, "learning_rate": 8.338443310710708e-06, "loss": 0.04969620704650879, "step": 4964 }, { "epoch": 0.6712350759195261, "grad_norm": 1.2974827289581299, "learning_rate": 8.332311565222284e-06, "loss": 0.05954143404960632, "step": 4965 }, { "epoch": 0.6713702692882914, "grad_norm": 0.4259440004825592, "learning_rate": 8.326181207982145e-06, "loss": 0.04360128194093704, "step": 4966 }, { "epoch": 0.6715054626570567, "grad_norm": 0.43535634875297546, "learning_rate": 8.32005224026666e-06, "loss": 0.06057393550872803, "step": 4967 }, { "epoch": 0.6716406560258219, "grad_norm": 0.6019091606140137, "learning_rate": 8.313924663351927e-06, "loss": 0.042790770530700684, "step": 4968 }, { "epoch": 0.6717758493945872, "grad_norm": 0.6441512107849121, "learning_rate": 8.307798478513733e-06, "loss": 0.06718750298023224, "step": 4969 }, { "epoch": 0.6719110427633525, "grad_norm": 0.5350854396820068, "learning_rate": 8.301673687027583e-06, "loss": 0.05609264224767685, "step": 4970 }, { "epoch": 0.6720462361321177, "grad_norm": 1.0968847274780273, "learning_rate": 8.295550290168692e-06, "loss": 0.054544731974601746, "step": 4971 }, { "epoch": 0.672181429500883, "grad_norm": 0.7787512540817261, "learning_rate": 8.289428289211977e-06, "loss": 0.060831211507320404, "step": 4972 }, { "epoch": 0.6723166228696482, "grad_norm": 0.6954808235168457, "learning_rate": 8.283307685432083e-06, "loss": 0.05581638216972351, "step": 4973 }, { "epoch": 0.6724518162384135, "grad_norm": 1.1396392583847046, "learning_rate": 8.277188480103348e-06, "loss": 0.08411954343318939, "step": 4974 }, { "epoch": 0.6725870096071788, "grad_norm": 0.24444527924060822, "learning_rate": 8.271070674499821e-06, "loss": 0.04333586245775223, "step": 4975 }, { "epoch": 0.672722202975944, "grad_norm": 0.30251526832580566, "learning_rate": 8.264954269895262e-06, "loss": 0.06129070743918419, "step": 4976 }, { "epoch": 0.6728573963447093, "grad_norm": 0.4709165692329407, "learning_rate": 8.258839267563134e-06, "loss": 0.06391668319702148, "step": 4977 }, { "epoch": 0.6729925897134745, "grad_norm": 0.7118197679519653, "learning_rate": 8.252725668776623e-06, "loss": 0.05873258784413338, "step": 4978 }, { "epoch": 0.6731277830822399, "grad_norm": 0.9924367666244507, "learning_rate": 8.24661347480861e-06, "loss": 0.038125358521938324, "step": 4979 }, { "epoch": 0.6732629764510051, "grad_norm": 1.1432424783706665, "learning_rate": 8.240502686931686e-06, "loss": 0.05306572467088699, "step": 4980 }, { "epoch": 0.6733981698197703, "grad_norm": 0.39897817373275757, "learning_rate": 8.234393306418148e-06, "loss": 0.035227708518505096, "step": 4981 }, { "epoch": 0.6735333631885356, "grad_norm": 0.5576872229576111, "learning_rate": 8.228285334539995e-06, "loss": 0.05256688594818115, "step": 4982 }, { "epoch": 0.6736685565573008, "grad_norm": 0.5145950317382812, "learning_rate": 8.22217877256896e-06, "loss": 0.0601494237780571, "step": 4983 }, { "epoch": 0.6738037499260662, "grad_norm": 1.218837857246399, "learning_rate": 8.216073621776436e-06, "loss": 0.07031473517417908, "step": 4984 }, { "epoch": 0.6739389432948314, "grad_norm": 0.5454159379005432, "learning_rate": 8.209969883433566e-06, "loss": 0.054414957761764526, "step": 4985 }, { "epoch": 0.6740741366635966, "grad_norm": 0.38681066036224365, "learning_rate": 8.203867558811177e-06, "loss": 0.043965667486190796, "step": 4986 }, { "epoch": 0.6742093300323619, "grad_norm": 0.9242783188819885, "learning_rate": 8.197766649179795e-06, "loss": 0.06138654798269272, "step": 4987 }, { "epoch": 0.6743445234011272, "grad_norm": 0.7326571941375732, "learning_rate": 8.191667155809684e-06, "loss": 0.05227571725845337, "step": 4988 }, { "epoch": 0.6744797167698925, "grad_norm": 0.45089221000671387, "learning_rate": 8.185569079970764e-06, "loss": 0.07390367984771729, "step": 4989 }, { "epoch": 0.6746149101386577, "grad_norm": 0.29904136061668396, "learning_rate": 8.179472422932709e-06, "loss": 0.038001649081707, "step": 4990 }, { "epoch": 0.6747501035074229, "grad_norm": 1.613762378692627, "learning_rate": 8.17337718596486e-06, "loss": 0.06401367485523224, "step": 4991 }, { "epoch": 0.6748852968761883, "grad_norm": 0.8714796900749207, "learning_rate": 8.167283370336295e-06, "loss": 0.06580977141857147, "step": 4992 }, { "epoch": 0.6750204902449535, "grad_norm": 0.274777352809906, "learning_rate": 8.161190977315766e-06, "loss": 0.03572496771812439, "step": 4993 }, { "epoch": 0.6751556836137187, "grad_norm": 0.24242113530635834, "learning_rate": 8.155100008171736e-06, "loss": 0.03542621433734894, "step": 4994 }, { "epoch": 0.675290876982484, "grad_norm": 0.3669450283050537, "learning_rate": 8.149010464172392e-06, "loss": 0.05259627848863602, "step": 4995 }, { "epoch": 0.6754260703512492, "grad_norm": 0.547822892665863, "learning_rate": 8.142922346585597e-06, "loss": 0.04516976699233055, "step": 4996 }, { "epoch": 0.6755612637200146, "grad_norm": 0.8237200379371643, "learning_rate": 8.13683565667895e-06, "loss": 0.061566174030303955, "step": 4997 }, { "epoch": 0.6756964570887798, "grad_norm": 0.19074095785617828, "learning_rate": 8.13075039571971e-06, "loss": 0.035949528217315674, "step": 4998 }, { "epoch": 0.675831650457545, "grad_norm": 0.5671090483665466, "learning_rate": 8.124666564974864e-06, "loss": 0.07176803052425385, "step": 4999 }, { "epoch": 0.6759668438263103, "grad_norm": 0.3654058873653412, "learning_rate": 8.11858416571111e-06, "loss": 0.05109212547540665, "step": 5000 }, { "epoch": 0.6761020371950756, "grad_norm": 0.36814555525779724, "learning_rate": 8.112503199194821e-06, "loss": 0.05827689170837402, "step": 5001 }, { "epoch": 0.6762372305638409, "grad_norm": 0.6010149121284485, "learning_rate": 8.106423666692108e-06, "loss": 0.04330838471651077, "step": 5002 }, { "epoch": 0.6763724239326061, "grad_norm": 0.8253535628318787, "learning_rate": 8.100345569468742e-06, "loss": 0.04694852977991104, "step": 5003 }, { "epoch": 0.6765076173013713, "grad_norm": 0.6037037968635559, "learning_rate": 8.094268908790215e-06, "loss": 0.06027128919959068, "step": 5004 }, { "epoch": 0.6766428106701367, "grad_norm": 0.7514895796775818, "learning_rate": 8.088193685921733e-06, "loss": 0.04608899727463722, "step": 5005 }, { "epoch": 0.6767780040389019, "grad_norm": 0.519709587097168, "learning_rate": 8.082119902128185e-06, "loss": 0.05882525444030762, "step": 5006 }, { "epoch": 0.6769131974076672, "grad_norm": 0.3477354645729065, "learning_rate": 8.076047558674164e-06, "loss": 0.02975265122950077, "step": 5007 }, { "epoch": 0.6770483907764324, "grad_norm": 0.338996022939682, "learning_rate": 8.069976656823964e-06, "loss": 0.045697540044784546, "step": 5008 }, { "epoch": 0.6771835841451976, "grad_norm": 0.4676816463470459, "learning_rate": 8.063907197841574e-06, "loss": 0.03803064674139023, "step": 5009 }, { "epoch": 0.677318777513963, "grad_norm": 0.5746403932571411, "learning_rate": 8.057839182990698e-06, "loss": 0.057335883378982544, "step": 5010 }, { "epoch": 0.6774539708827282, "grad_norm": 0.3573558032512665, "learning_rate": 8.051772613534725e-06, "loss": 0.061646685004234314, "step": 5011 }, { "epoch": 0.6775891642514935, "grad_norm": 1.0433835983276367, "learning_rate": 8.045707490736745e-06, "loss": 0.07262985408306122, "step": 5012 }, { "epoch": 0.6777243576202587, "grad_norm": 0.5547288656234741, "learning_rate": 8.039643815859552e-06, "loss": 0.05821345001459122, "step": 5013 }, { "epoch": 0.677859550989024, "grad_norm": 0.5345027446746826, "learning_rate": 8.033581590165627e-06, "loss": 0.05726383253931999, "step": 5014 }, { "epoch": 0.6779947443577893, "grad_norm": 0.38071736693382263, "learning_rate": 8.027520814917175e-06, "loss": 0.06746025383472443, "step": 5015 }, { "epoch": 0.6781299377265545, "grad_norm": 0.3310386538505554, "learning_rate": 8.021461491376064e-06, "loss": 0.04701864719390869, "step": 5016 }, { "epoch": 0.6782651310953198, "grad_norm": 0.41735756397247314, "learning_rate": 8.015403620803885e-06, "loss": 0.056895673274993896, "step": 5017 }, { "epoch": 0.678400324464085, "grad_norm": 0.2808677852153778, "learning_rate": 8.009347204461922e-06, "loss": 0.040169231593608856, "step": 5018 }, { "epoch": 0.6785355178328503, "grad_norm": 0.31439903378486633, "learning_rate": 8.003292243611143e-06, "loss": 0.051807381212711334, "step": 5019 }, { "epoch": 0.6786707112016156, "grad_norm": 0.4138041138648987, "learning_rate": 7.99723873951224e-06, "loss": 0.06021660566329956, "step": 5020 }, { "epoch": 0.6788059045703808, "grad_norm": 0.6591784358024597, "learning_rate": 7.991186693425563e-06, "loss": 0.07591414451599121, "step": 5021 }, { "epoch": 0.678941097939146, "grad_norm": 0.2700737416744232, "learning_rate": 7.9851361066112e-06, "loss": 0.054768163710832596, "step": 5022 }, { "epoch": 0.6790762913079114, "grad_norm": 0.4354577958583832, "learning_rate": 7.979086980328907e-06, "loss": 0.04215976595878601, "step": 5023 }, { "epoch": 0.6792114846766766, "grad_norm": 0.3354848325252533, "learning_rate": 7.973039315838137e-06, "loss": 0.053591564297676086, "step": 5024 }, { "epoch": 0.6793466780454419, "grad_norm": 0.5004885792732239, "learning_rate": 7.966993114398067e-06, "loss": 0.05722540616989136, "step": 5025 }, { "epoch": 0.6794818714142071, "grad_norm": 0.43668463826179504, "learning_rate": 7.960948377267524e-06, "loss": 0.06277358531951904, "step": 5026 }, { "epoch": 0.6796170647829723, "grad_norm": 0.9970447421073914, "learning_rate": 7.954905105705071e-06, "loss": 0.05152648687362671, "step": 5027 }, { "epoch": 0.6797522581517377, "grad_norm": 0.7500037550926208, "learning_rate": 7.948863300968938e-06, "loss": 0.05576123297214508, "step": 5028 }, { "epoch": 0.6798874515205029, "grad_norm": 0.25165197253227234, "learning_rate": 7.942822964317078e-06, "loss": 0.04088078439235687, "step": 5029 }, { "epoch": 0.6800226448892682, "grad_norm": 0.4969177842140198, "learning_rate": 7.936784097007105e-06, "loss": 0.051823690533638, "step": 5030 }, { "epoch": 0.6801578382580334, "grad_norm": 0.7050125002861023, "learning_rate": 7.930746700296344e-06, "loss": 0.053785525262355804, "step": 5031 }, { "epoch": 0.6802930316267987, "grad_norm": 0.349983811378479, "learning_rate": 7.924710775441822e-06, "loss": 0.04884212464094162, "step": 5032 }, { "epoch": 0.680428224995564, "grad_norm": 0.26775214076042175, "learning_rate": 7.918676323700241e-06, "loss": 0.04708987474441528, "step": 5033 }, { "epoch": 0.6805634183643292, "grad_norm": 0.3257896900177002, "learning_rate": 7.912643346328023e-06, "loss": 0.043380655348300934, "step": 5034 }, { "epoch": 0.6806986117330945, "grad_norm": 0.6439222097396851, "learning_rate": 7.906611844581251e-06, "loss": 0.05791155621409416, "step": 5035 }, { "epoch": 0.6808338051018598, "grad_norm": 1.3756972551345825, "learning_rate": 7.900581819715713e-06, "loss": 0.07098507136106491, "step": 5036 }, { "epoch": 0.680968998470625, "grad_norm": 0.6111595630645752, "learning_rate": 7.894553272986901e-06, "loss": 0.034134574234485626, "step": 5037 }, { "epoch": 0.6811041918393903, "grad_norm": 0.36403319239616394, "learning_rate": 7.888526205649993e-06, "loss": 0.04534803330898285, "step": 5038 }, { "epoch": 0.6812393852081555, "grad_norm": 0.22279338538646698, "learning_rate": 7.882500618959849e-06, "loss": 0.05339765548706055, "step": 5039 }, { "epoch": 0.6813745785769209, "grad_norm": 0.9752552509307861, "learning_rate": 7.876476514171033e-06, "loss": 0.05286046862602234, "step": 5040 }, { "epoch": 0.6815097719456861, "grad_norm": 0.714496910572052, "learning_rate": 7.870453892537788e-06, "loss": 0.04931214451789856, "step": 5041 }, { "epoch": 0.6816449653144513, "grad_norm": 0.6221680045127869, "learning_rate": 7.864432755314068e-06, "loss": 0.08361977338790894, "step": 5042 }, { "epoch": 0.6817801586832166, "grad_norm": 0.4232570230960846, "learning_rate": 7.858413103753499e-06, "loss": 0.0306400153785944, "step": 5043 }, { "epoch": 0.6819153520519818, "grad_norm": 0.7650275230407715, "learning_rate": 7.852394939109408e-06, "loss": 0.06406787782907486, "step": 5044 }, { "epoch": 0.6820505454207472, "grad_norm": 0.3299720287322998, "learning_rate": 7.846378262634803e-06, "loss": 0.0422019436955452, "step": 5045 }, { "epoch": 0.6821857387895124, "grad_norm": 0.2432260811328888, "learning_rate": 7.840363075582385e-06, "loss": 0.0305987149477005, "step": 5046 }, { "epoch": 0.6823209321582776, "grad_norm": 0.507563054561615, "learning_rate": 7.834349379204565e-06, "loss": 0.06139723211526871, "step": 5047 }, { "epoch": 0.6824561255270429, "grad_norm": 0.68167644739151, "learning_rate": 7.828337174753411e-06, "loss": 0.06552176922559738, "step": 5048 }, { "epoch": 0.6825913188958082, "grad_norm": 0.34419867396354675, "learning_rate": 7.822326463480703e-06, "loss": 0.051542606204748154, "step": 5049 }, { "epoch": 0.6827265122645734, "grad_norm": 0.36507555842399597, "learning_rate": 7.816317246637901e-06, "loss": 0.04676346480846405, "step": 5050 }, { "epoch": 0.6828617056333387, "grad_norm": 0.5398001670837402, "learning_rate": 7.810309525476152e-06, "loss": 0.0544717013835907, "step": 5051 }, { "epoch": 0.6829968990021039, "grad_norm": 0.6113669872283936, "learning_rate": 7.804303301246311e-06, "loss": 0.062158867716789246, "step": 5052 }, { "epoch": 0.6831320923708692, "grad_norm": 0.2693955898284912, "learning_rate": 7.798298575198884e-06, "loss": 0.039754901081323624, "step": 5053 }, { "epoch": 0.6832672857396345, "grad_norm": 0.607220470905304, "learning_rate": 7.792295348584103e-06, "loss": 0.0434325709939003, "step": 5054 }, { "epoch": 0.6834024791083997, "grad_norm": 0.3906765282154083, "learning_rate": 7.786293622651866e-06, "loss": 0.056094035506248474, "step": 5055 }, { "epoch": 0.683537672477165, "grad_norm": 0.6982684135437012, "learning_rate": 7.78029339865176e-06, "loss": 0.06186196580529213, "step": 5056 }, { "epoch": 0.6836728658459302, "grad_norm": 0.6677371859550476, "learning_rate": 7.774294677833078e-06, "loss": 0.060383137315511703, "step": 5057 }, { "epoch": 0.6838080592146956, "grad_norm": 0.6132252216339111, "learning_rate": 7.768297461444766e-06, "loss": 0.054885417222976685, "step": 5058 }, { "epoch": 0.6839432525834608, "grad_norm": 0.5411122441291809, "learning_rate": 7.762301750735494e-06, "loss": 0.06308025121688843, "step": 5059 }, { "epoch": 0.684078445952226, "grad_norm": 0.6690797805786133, "learning_rate": 7.756307546953592e-06, "loss": 0.04824995994567871, "step": 5060 }, { "epoch": 0.6842136393209913, "grad_norm": 0.46731314063072205, "learning_rate": 7.750314851347087e-06, "loss": 0.04488636180758476, "step": 5061 }, { "epoch": 0.6843488326897565, "grad_norm": 0.3768135905265808, "learning_rate": 7.74432366516369e-06, "loss": 0.05284181982278824, "step": 5062 }, { "epoch": 0.6844840260585219, "grad_norm": 0.9547297358512878, "learning_rate": 7.738333989650794e-06, "loss": 0.0503455251455307, "step": 5063 }, { "epoch": 0.6846192194272871, "grad_norm": 0.45269399881362915, "learning_rate": 7.732345826055487e-06, "loss": 0.06963029503822327, "step": 5064 }, { "epoch": 0.6847544127960523, "grad_norm": 0.3030494749546051, "learning_rate": 7.726359175624537e-06, "loss": 0.04146261513233185, "step": 5065 }, { "epoch": 0.6848896061648176, "grad_norm": 0.3183102011680603, "learning_rate": 7.720374039604395e-06, "loss": 0.04418213665485382, "step": 5066 }, { "epoch": 0.6850247995335829, "grad_norm": 0.35937291383743286, "learning_rate": 7.714390419241198e-06, "loss": 0.05535361170768738, "step": 5067 }, { "epoch": 0.6851599929023482, "grad_norm": 0.523383617401123, "learning_rate": 7.70840831578076e-06, "loss": 0.05090562626719475, "step": 5068 }, { "epoch": 0.6852951862711134, "grad_norm": 0.7214508056640625, "learning_rate": 7.702427730468601e-06, "loss": 0.06970679759979248, "step": 5069 }, { "epoch": 0.6854303796398786, "grad_norm": 0.1958610862493515, "learning_rate": 7.696448664549898e-06, "loss": 0.04092078655958176, "step": 5070 }, { "epoch": 0.685565573008644, "grad_norm": 0.4784294068813324, "learning_rate": 7.690471119269541e-06, "loss": 0.06622794270515442, "step": 5071 }, { "epoch": 0.6857007663774092, "grad_norm": 0.39987921714782715, "learning_rate": 7.684495095872073e-06, "loss": 0.05952848494052887, "step": 5072 }, { "epoch": 0.6858359597461744, "grad_norm": 0.3583393096923828, "learning_rate": 7.678520595601728e-06, "loss": 0.0378875732421875, "step": 5073 }, { "epoch": 0.6859711531149397, "grad_norm": 0.6153355240821838, "learning_rate": 7.672547619702445e-06, "loss": 0.06293143332004547, "step": 5074 }, { "epoch": 0.6861063464837049, "grad_norm": 0.21090540289878845, "learning_rate": 7.666576169417823e-06, "loss": 0.026582174003124237, "step": 5075 }, { "epoch": 0.6862415398524703, "grad_norm": 0.6142287254333496, "learning_rate": 7.660606245991147e-06, "loss": 0.05657009780406952, "step": 5076 }, { "epoch": 0.6863767332212355, "grad_norm": 1.086599588394165, "learning_rate": 7.654637850665393e-06, "loss": 0.067495197057724, "step": 5077 }, { "epoch": 0.6865119265900007, "grad_norm": 0.5097535252571106, "learning_rate": 7.648670984683199e-06, "loss": 0.0701148509979248, "step": 5078 }, { "epoch": 0.686647119958766, "grad_norm": 0.5392943024635315, "learning_rate": 7.642705649286916e-06, "loss": 0.05803874135017395, "step": 5079 }, { "epoch": 0.6867823133275313, "grad_norm": 0.24042265117168427, "learning_rate": 7.63674184571855e-06, "loss": 0.050581805408000946, "step": 5080 }, { "epoch": 0.6869175066962966, "grad_norm": 0.4314679503440857, "learning_rate": 7.630779575219797e-06, "loss": 0.07898205518722534, "step": 5081 }, { "epoch": 0.6870527000650618, "grad_norm": 0.20744667947292328, "learning_rate": 7.6248188390320344e-06, "loss": 0.047296687960624695, "step": 5082 }, { "epoch": 0.687187893433827, "grad_norm": 0.4768024981021881, "learning_rate": 7.6188596383963135e-06, "loss": 0.05420637130737305, "step": 5083 }, { "epoch": 0.6873230868025924, "grad_norm": 0.28742244839668274, "learning_rate": 7.612901974553388e-06, "loss": 0.04789707064628601, "step": 5084 }, { "epoch": 0.6874582801713576, "grad_norm": 0.3358917236328125, "learning_rate": 7.606945848743653e-06, "loss": 0.04327031970024109, "step": 5085 }, { "epoch": 0.6875934735401229, "grad_norm": 1.0368911027908325, "learning_rate": 7.600991262207221e-06, "loss": 0.0589791014790535, "step": 5086 }, { "epoch": 0.6877286669088881, "grad_norm": 0.335031658411026, "learning_rate": 7.595038216183867e-06, "loss": 0.05144873261451721, "step": 5087 }, { "epoch": 0.6878638602776533, "grad_norm": 0.9198899865150452, "learning_rate": 7.589086711913037e-06, "loss": 0.060086995363235474, "step": 5088 }, { "epoch": 0.6879990536464187, "grad_norm": 0.5198059678077698, "learning_rate": 7.583136750633885e-06, "loss": 0.049209460616111755, "step": 5089 }, { "epoch": 0.6881342470151839, "grad_norm": 0.508225679397583, "learning_rate": 7.577188333585202e-06, "loss": 0.06779351830482483, "step": 5090 }, { "epoch": 0.6882694403839492, "grad_norm": 0.462271511554718, "learning_rate": 7.5712414620054975e-06, "loss": 0.05050325393676758, "step": 5091 }, { "epoch": 0.6884046337527144, "grad_norm": 0.5502487421035767, "learning_rate": 7.565296137132935e-06, "loss": 0.051377713680267334, "step": 5092 }, { "epoch": 0.6885398271214797, "grad_norm": 1.556666374206543, "learning_rate": 7.559352360205357e-06, "loss": 0.05573230981826782, "step": 5093 }, { "epoch": 0.688675020490245, "grad_norm": 0.43901729583740234, "learning_rate": 7.553410132460308e-06, "loss": 0.04742233455181122, "step": 5094 }, { "epoch": 0.6888102138590102, "grad_norm": 0.46726635098457336, "learning_rate": 7.547469455134968e-06, "loss": 0.05612045153975487, "step": 5095 }, { "epoch": 0.6889454072277755, "grad_norm": 0.8445354104042053, "learning_rate": 7.541530329466236e-06, "loss": 0.051213786005973816, "step": 5096 }, { "epoch": 0.6890806005965407, "grad_norm": 0.42055872082710266, "learning_rate": 7.535592756690661e-06, "loss": 0.04920297861099243, "step": 5097 }, { "epoch": 0.689215793965306, "grad_norm": 0.26518017053604126, "learning_rate": 7.52965673804448e-06, "loss": 0.05163450539112091, "step": 5098 }, { "epoch": 0.6893509873340713, "grad_norm": 0.32651418447494507, "learning_rate": 7.5237222747636025e-06, "loss": 0.05538457632064819, "step": 5099 }, { "epoch": 0.6894861807028365, "grad_norm": 0.5737900733947754, "learning_rate": 7.517789368083611e-06, "loss": 0.05658465251326561, "step": 5100 }, { "epoch": 0.6896213740716017, "grad_norm": 0.8376307487487793, "learning_rate": 7.511858019239778e-06, "loss": 0.049514517188072205, "step": 5101 }, { "epoch": 0.6897565674403671, "grad_norm": 0.3505117893218994, "learning_rate": 7.505928229467038e-06, "loss": 0.0646526962518692, "step": 5102 }, { "epoch": 0.6898917608091323, "grad_norm": 0.8085762858390808, "learning_rate": 7.500000000000004e-06, "loss": 0.06394210457801819, "step": 5103 }, { "epoch": 0.6900269541778976, "grad_norm": 0.7817121744155884, "learning_rate": 7.494073332072963e-06, "loss": 0.06659898161888123, "step": 5104 }, { "epoch": 0.6901621475466628, "grad_norm": 0.3992208242416382, "learning_rate": 7.488148226919877e-06, "loss": 0.030706388875842094, "step": 5105 }, { "epoch": 0.690297340915428, "grad_norm": 0.19241000711917877, "learning_rate": 7.482224685774393e-06, "loss": 0.04445412755012512, "step": 5106 }, { "epoch": 0.6904325342841934, "grad_norm": 0.5191207528114319, "learning_rate": 7.4763027098698184e-06, "loss": 0.051409762352705, "step": 5107 }, { "epoch": 0.6905677276529586, "grad_norm": 0.5312533378601074, "learning_rate": 7.470382300439143e-06, "loss": 0.04130159318447113, "step": 5108 }, { "epoch": 0.6907029210217239, "grad_norm": 0.45414721965789795, "learning_rate": 7.4644634587150225e-06, "loss": 0.05870556831359863, "step": 5109 }, { "epoch": 0.6908381143904891, "grad_norm": 0.3957090973854065, "learning_rate": 7.4585461859297906e-06, "loss": 0.04819725453853607, "step": 5110 }, { "epoch": 0.6909733077592544, "grad_norm": 0.5227088928222656, "learning_rate": 7.452630483315463e-06, "loss": 0.04227384924888611, "step": 5111 }, { "epoch": 0.6911085011280197, "grad_norm": 0.29937782883644104, "learning_rate": 7.4467163521037186e-06, "loss": 0.04523632675409317, "step": 5112 }, { "epoch": 0.6912436944967849, "grad_norm": 0.30863818526268005, "learning_rate": 7.440803793525907e-06, "loss": 0.04430852830410004, "step": 5113 }, { "epoch": 0.6913788878655502, "grad_norm": 0.5694001317024231, "learning_rate": 7.434892808813056e-06, "loss": 0.05520200729370117, "step": 5114 }, { "epoch": 0.6915140812343155, "grad_norm": 0.7776246666908264, "learning_rate": 7.42898339919586e-06, "loss": 0.05291074514389038, "step": 5115 }, { "epoch": 0.6916492746030807, "grad_norm": 1.1083894968032837, "learning_rate": 7.423075565904698e-06, "loss": 0.06358221918344498, "step": 5116 }, { "epoch": 0.691784467971846, "grad_norm": 0.6599995493888855, "learning_rate": 7.417169310169609e-06, "loss": 0.06120622158050537, "step": 5117 }, { "epoch": 0.6919196613406112, "grad_norm": 0.9249976873397827, "learning_rate": 7.411264633220305e-06, "loss": 0.051445357501506805, "step": 5118 }, { "epoch": 0.6920548547093766, "grad_norm": 0.4731396734714508, "learning_rate": 7.405361536286174e-06, "loss": 0.051790982484817505, "step": 5119 }, { "epoch": 0.6921900480781418, "grad_norm": 0.4082396328449249, "learning_rate": 7.399460020596266e-06, "loss": 0.06415332853794098, "step": 5120 }, { "epoch": 0.692325241446907, "grad_norm": 0.5769826173782349, "learning_rate": 7.393560087379322e-06, "loss": 0.05486782267689705, "step": 5121 }, { "epoch": 0.6924604348156723, "grad_norm": 0.3882736265659332, "learning_rate": 7.3876617378637195e-06, "loss": 0.04376395046710968, "step": 5122 }, { "epoch": 0.6925956281844375, "grad_norm": 0.3410462737083435, "learning_rate": 7.381764973277543e-06, "loss": 0.04908747225999832, "step": 5123 }, { "epoch": 0.6927308215532029, "grad_norm": 0.5017903447151184, "learning_rate": 7.375869794848525e-06, "loss": 0.057123810052871704, "step": 5124 }, { "epoch": 0.6928660149219681, "grad_norm": 0.5975813269615173, "learning_rate": 7.3699762038040654e-06, "loss": 0.0574532151222229, "step": 5125 }, { "epoch": 0.6930012082907333, "grad_norm": 0.7153971791267395, "learning_rate": 7.364084201371261e-06, "loss": 0.07156383991241455, "step": 5126 }, { "epoch": 0.6931364016594986, "grad_norm": 1.0082093477249146, "learning_rate": 7.3581937887768334e-06, "loss": 0.05073480308055878, "step": 5127 }, { "epoch": 0.6932715950282639, "grad_norm": 1.5162277221679688, "learning_rate": 7.352304967247217e-06, "loss": 0.07791519165039062, "step": 5128 }, { "epoch": 0.6934067883970291, "grad_norm": 0.32575663924217224, "learning_rate": 7.346417738008487e-06, "loss": 0.0532938614487648, "step": 5129 }, { "epoch": 0.6935419817657944, "grad_norm": 0.37175315618515015, "learning_rate": 7.340532102286399e-06, "loss": 0.0561351478099823, "step": 5130 }, { "epoch": 0.6936771751345596, "grad_norm": 0.6612153649330139, "learning_rate": 7.3346480613063725e-06, "loss": 0.061547353863716125, "step": 5131 }, { "epoch": 0.693812368503325, "grad_norm": 0.5450572967529297, "learning_rate": 7.328765616293491e-06, "loss": 0.04134263098239899, "step": 5132 }, { "epoch": 0.6939475618720902, "grad_norm": 0.7792589664459229, "learning_rate": 7.322884768472521e-06, "loss": 0.05712822079658508, "step": 5133 }, { "epoch": 0.6940827552408554, "grad_norm": 1.0409884452819824, "learning_rate": 7.317005519067881e-06, "loss": 0.05768483877182007, "step": 5134 }, { "epoch": 0.6942179486096207, "grad_norm": 0.2901518642902374, "learning_rate": 7.311127869303665e-06, "loss": 0.046974167227745056, "step": 5135 }, { "epoch": 0.6943531419783859, "grad_norm": 0.5461139678955078, "learning_rate": 7.305251820403628e-06, "loss": 0.055159635841846466, "step": 5136 }, { "epoch": 0.6944883353471513, "grad_norm": 0.4399124085903168, "learning_rate": 7.299377373591188e-06, "loss": 0.07501746714115143, "step": 5137 }, { "epoch": 0.6946235287159165, "grad_norm": 1.0696556568145752, "learning_rate": 7.29350453008945e-06, "loss": 0.060276709496974945, "step": 5138 }, { "epoch": 0.6947587220846817, "grad_norm": 0.7495952844619751, "learning_rate": 7.287633291121166e-06, "loss": 0.053207822144031525, "step": 5139 }, { "epoch": 0.694893915453447, "grad_norm": 0.487591028213501, "learning_rate": 7.281763657908756e-06, "loss": 0.05811338499188423, "step": 5140 }, { "epoch": 0.6950291088222122, "grad_norm": 0.38049882650375366, "learning_rate": 7.275895631674313e-06, "loss": 0.05522039532661438, "step": 5141 }, { "epoch": 0.6951643021909776, "grad_norm": 1.051038146018982, "learning_rate": 7.2700292136395826e-06, "loss": 0.04356321692466736, "step": 5142 }, { "epoch": 0.6952994955597428, "grad_norm": 0.38571301102638245, "learning_rate": 7.264164405025997e-06, "loss": 0.05928133428096771, "step": 5143 }, { "epoch": 0.695434688928508, "grad_norm": 0.6754260063171387, "learning_rate": 7.2583012070546364e-06, "loss": 0.07309995591640472, "step": 5144 }, { "epoch": 0.6955698822972733, "grad_norm": 0.4771690368652344, "learning_rate": 7.252439620946247e-06, "loss": 0.04176047444343567, "step": 5145 }, { "epoch": 0.6957050756660386, "grad_norm": 1.246132731437683, "learning_rate": 7.246579647921243e-06, "loss": 0.06695803999900818, "step": 5146 }, { "epoch": 0.6958402690348039, "grad_norm": 0.9741593599319458, "learning_rate": 7.240721289199699e-06, "loss": 0.057047039270401, "step": 5147 }, { "epoch": 0.6959754624035691, "grad_norm": 1.1595772504806519, "learning_rate": 7.234864546001364e-06, "loss": 0.07157918810844421, "step": 5148 }, { "epoch": 0.6961106557723343, "grad_norm": 0.8684871196746826, "learning_rate": 7.229009419545638e-06, "loss": 0.0670970231294632, "step": 5149 }, { "epoch": 0.6962458491410997, "grad_norm": 0.40889328718185425, "learning_rate": 7.223155911051593e-06, "loss": 0.06505045294761658, "step": 5150 }, { "epoch": 0.6963810425098649, "grad_norm": 0.3071252405643463, "learning_rate": 7.2173040217379575e-06, "loss": 0.05174090340733528, "step": 5151 }, { "epoch": 0.6965162358786302, "grad_norm": 0.56471848487854, "learning_rate": 7.211453752823122e-06, "loss": 0.041987404227256775, "step": 5152 }, { "epoch": 0.6966514292473954, "grad_norm": 0.7361147999763489, "learning_rate": 7.205605105525161e-06, "loss": 0.05496387183666229, "step": 5153 }, { "epoch": 0.6967866226161606, "grad_norm": 0.30997464060783386, "learning_rate": 7.19975808106177e-06, "loss": 0.04422182962298393, "step": 5154 }, { "epoch": 0.696921815984926, "grad_norm": 0.3769621253013611, "learning_rate": 7.193912680650346e-06, "loss": 0.06449800729751587, "step": 5155 }, { "epoch": 0.6970570093536912, "grad_norm": 0.44344809651374817, "learning_rate": 7.188068905507931e-06, "loss": 0.056416332721710205, "step": 5156 }, { "epoch": 0.6971922027224564, "grad_norm": 0.7775740027427673, "learning_rate": 7.182226756851223e-06, "loss": 0.06033511087298393, "step": 5157 }, { "epoch": 0.6973273960912217, "grad_norm": 0.2717428207397461, "learning_rate": 7.176386235896603e-06, "loss": 0.04736998677253723, "step": 5158 }, { "epoch": 0.697462589459987, "grad_norm": 0.5783759355545044, "learning_rate": 7.170547343860079e-06, "loss": 0.0531974732875824, "step": 5159 }, { "epoch": 0.6975977828287523, "grad_norm": 1.0207737684249878, "learning_rate": 7.164710081957355e-06, "loss": 0.054501160979270935, "step": 5160 }, { "epoch": 0.6977329761975175, "grad_norm": 0.36375150084495544, "learning_rate": 7.158874451403777e-06, "loss": 0.05721798911690712, "step": 5161 }, { "epoch": 0.6978681695662827, "grad_norm": 1.3351860046386719, "learning_rate": 7.15304045341435e-06, "loss": 0.06120528280735016, "step": 5162 }, { "epoch": 0.698003362935048, "grad_norm": 0.303088515996933, "learning_rate": 7.147208089203745e-06, "loss": 0.05221192166209221, "step": 5163 }, { "epoch": 0.6981385563038133, "grad_norm": 1.5300906896591187, "learning_rate": 7.141377359986288e-06, "loss": 0.06439690291881561, "step": 5164 }, { "epoch": 0.6982737496725786, "grad_norm": 0.3447400629520416, "learning_rate": 7.135548266975978e-06, "loss": 0.039967283606529236, "step": 5165 }, { "epoch": 0.6984089430413438, "grad_norm": 0.665357232093811, "learning_rate": 7.129720811386456e-06, "loss": 0.057531893253326416, "step": 5166 }, { "epoch": 0.698544136410109, "grad_norm": 0.5568000078201294, "learning_rate": 7.12389499443103e-06, "loss": 0.051739323884248734, "step": 5167 }, { "epoch": 0.6986793297788744, "grad_norm": 0.5877439379692078, "learning_rate": 7.118070817322668e-06, "loss": 0.06013950705528259, "step": 5168 }, { "epoch": 0.6988145231476396, "grad_norm": 0.4831162989139557, "learning_rate": 7.1122482812739885e-06, "loss": 0.06758153438568115, "step": 5169 }, { "epoch": 0.6989497165164049, "grad_norm": 0.33590078353881836, "learning_rate": 7.106427387497283e-06, "loss": 0.04840763285756111, "step": 5170 }, { "epoch": 0.6990849098851701, "grad_norm": 0.6922755241394043, "learning_rate": 7.10060813720449e-06, "loss": 0.07645237445831299, "step": 5171 }, { "epoch": 0.6992201032539354, "grad_norm": 0.2970874607563019, "learning_rate": 7.094790531607207e-06, "loss": 0.045455269515514374, "step": 5172 }, { "epoch": 0.6993552966227007, "grad_norm": 0.7809378504753113, "learning_rate": 7.088974571916692e-06, "loss": 0.05412168800830841, "step": 5173 }, { "epoch": 0.6994904899914659, "grad_norm": 0.5885297060012817, "learning_rate": 7.0831602593438515e-06, "loss": 0.04803713411092758, "step": 5174 }, { "epoch": 0.6996256833602312, "grad_norm": 0.4061662554740906, "learning_rate": 7.077347595099269e-06, "loss": 0.03494921326637268, "step": 5175 }, { "epoch": 0.6997608767289965, "grad_norm": 0.6193325519561768, "learning_rate": 7.071536580393166e-06, "loss": 0.05285486578941345, "step": 5176 }, { "epoch": 0.6998960700977617, "grad_norm": 0.35865479707717896, "learning_rate": 7.065727216435426e-06, "loss": 0.04329541325569153, "step": 5177 }, { "epoch": 0.700031263466527, "grad_norm": 0.406389445066452, "learning_rate": 7.05991950443559e-06, "loss": 0.0642390251159668, "step": 5178 }, { "epoch": 0.7001664568352922, "grad_norm": 0.9310538172721863, "learning_rate": 7.05411344560285e-06, "loss": 0.047193270176649094, "step": 5179 }, { "epoch": 0.7003016502040575, "grad_norm": 0.3351069986820221, "learning_rate": 7.048309041146069e-06, "loss": 0.0356617271900177, "step": 5180 }, { "epoch": 0.7004368435728228, "grad_norm": 0.4099974036216736, "learning_rate": 7.0425062922737495e-06, "loss": 0.04956263303756714, "step": 5181 }, { "epoch": 0.700572036941588, "grad_norm": 0.4174969494342804, "learning_rate": 7.036705200194053e-06, "loss": 0.06671006977558136, "step": 5182 }, { "epoch": 0.7007072303103533, "grad_norm": 0.262410044670105, "learning_rate": 7.0309057661148e-06, "loss": 0.03994864597916603, "step": 5183 }, { "epoch": 0.7008424236791185, "grad_norm": 0.31396976113319397, "learning_rate": 7.0251079912434565e-06, "loss": 0.05015129595994949, "step": 5184 }, { "epoch": 0.7009776170478837, "grad_norm": 0.5556950569152832, "learning_rate": 7.019311876787169e-06, "loss": 0.049565330147743225, "step": 5185 }, { "epoch": 0.7011128104166491, "grad_norm": 0.9157549738883972, "learning_rate": 7.013517423952696e-06, "loss": 0.05310907959938049, "step": 5186 }, { "epoch": 0.7012480037854143, "grad_norm": 0.3359881639480591, "learning_rate": 7.0077246339464904e-06, "loss": 0.04812927544116974, "step": 5187 }, { "epoch": 0.7013831971541796, "grad_norm": 0.3641676604747772, "learning_rate": 7.001933507974635e-06, "loss": 0.04901690036058426, "step": 5188 }, { "epoch": 0.7015183905229448, "grad_norm": 0.5193299651145935, "learning_rate": 6.996144047242868e-06, "loss": 0.057293668389320374, "step": 5189 }, { "epoch": 0.7016535838917101, "grad_norm": 0.713130533695221, "learning_rate": 6.9903562529566044e-06, "loss": 0.059603363275527954, "step": 5190 }, { "epoch": 0.7017887772604754, "grad_norm": 0.6724171042442322, "learning_rate": 6.984570126320869e-06, "loss": 0.05180485546588898, "step": 5191 }, { "epoch": 0.7019239706292406, "grad_norm": 0.4026876986026764, "learning_rate": 6.978785668540384e-06, "loss": 0.06488806009292603, "step": 5192 }, { "epoch": 0.7020591639980059, "grad_norm": 0.479975163936615, "learning_rate": 6.973002880819496e-06, "loss": 0.044665493071079254, "step": 5193 }, { "epoch": 0.7021943573667712, "grad_norm": 1.061629295349121, "learning_rate": 6.96722176436221e-06, "loss": 0.0392318069934845, "step": 5194 }, { "epoch": 0.7023295507355364, "grad_norm": 0.34729188680648804, "learning_rate": 6.9614423203721975e-06, "loss": 0.048036955296993256, "step": 5195 }, { "epoch": 0.7024647441043017, "grad_norm": 0.3623282313346863, "learning_rate": 6.955664550052749e-06, "loss": 0.04847604036331177, "step": 5196 }, { "epoch": 0.7025999374730669, "grad_norm": 0.7228285074234009, "learning_rate": 6.949888454606847e-06, "loss": 0.04531398415565491, "step": 5197 }, { "epoch": 0.7027351308418323, "grad_norm": 0.6944763660430908, "learning_rate": 6.944114035237095e-06, "loss": 0.06294602155685425, "step": 5198 }, { "epoch": 0.7028703242105975, "grad_norm": 0.3658258616924286, "learning_rate": 6.93834129314576e-06, "loss": 0.04628367722034454, "step": 5199 }, { "epoch": 0.7030055175793627, "grad_norm": 0.39812472462654114, "learning_rate": 6.932570229534759e-06, "loss": 0.05470454692840576, "step": 5200 }, { "epoch": 0.703140710948128, "grad_norm": 0.3537338078022003, "learning_rate": 6.9268008456056505e-06, "loss": 0.04562537372112274, "step": 5201 }, { "epoch": 0.7032759043168932, "grad_norm": 0.5964049696922302, "learning_rate": 6.921033142559664e-06, "loss": 0.05565641075372696, "step": 5202 }, { "epoch": 0.7034110976856586, "grad_norm": 0.8398289084434509, "learning_rate": 6.915267121597659e-06, "loss": 0.06396631896495819, "step": 5203 }, { "epoch": 0.7035462910544238, "grad_norm": 0.6836197376251221, "learning_rate": 6.909502783920153e-06, "loss": 0.05060915648937225, "step": 5204 }, { "epoch": 0.703681484423189, "grad_norm": 0.7767529487609863, "learning_rate": 6.903740130727312e-06, "loss": 0.04536814987659454, "step": 5205 }, { "epoch": 0.7038166777919543, "grad_norm": 0.8539150953292847, "learning_rate": 6.8979791632189425e-06, "loss": 0.06451554596424103, "step": 5206 }, { "epoch": 0.7039518711607196, "grad_norm": 0.4127066433429718, "learning_rate": 6.892219882594523e-06, "loss": 0.05285489559173584, "step": 5207 }, { "epoch": 0.7040870645294848, "grad_norm": 0.5851449370384216, "learning_rate": 6.886462290053159e-06, "loss": 0.05206361040472984, "step": 5208 }, { "epoch": 0.7042222578982501, "grad_norm": 0.6160390377044678, "learning_rate": 6.880706386793614e-06, "loss": 0.05865874141454697, "step": 5209 }, { "epoch": 0.7043574512670153, "grad_norm": 0.7806863784790039, "learning_rate": 6.874952174014298e-06, "loss": 0.0446733683347702, "step": 5210 }, { "epoch": 0.7044926446357807, "grad_norm": 1.116327166557312, "learning_rate": 6.8691996529132585e-06, "loss": 0.0803598091006279, "step": 5211 }, { "epoch": 0.7046278380045459, "grad_norm": 0.7639861702919006, "learning_rate": 6.863448824688217e-06, "loss": 0.06711077690124512, "step": 5212 }, { "epoch": 0.7047630313733111, "grad_norm": 1.137369990348816, "learning_rate": 6.857699690536521e-06, "loss": 0.061547838151454926, "step": 5213 }, { "epoch": 0.7048982247420764, "grad_norm": 1.2901512384414673, "learning_rate": 6.8519522516551685e-06, "loss": 0.06110933795571327, "step": 5214 }, { "epoch": 0.7050334181108416, "grad_norm": 0.929878830909729, "learning_rate": 6.846206509240807e-06, "loss": 0.0629815012216568, "step": 5215 }, { "epoch": 0.705168611479607, "grad_norm": 0.6455501317977905, "learning_rate": 6.840462464489726e-06, "loss": 0.05758979916572571, "step": 5216 }, { "epoch": 0.7053038048483722, "grad_norm": 0.29729747772216797, "learning_rate": 6.834720118597879e-06, "loss": 0.041485995054244995, "step": 5217 }, { "epoch": 0.7054389982171374, "grad_norm": 1.0058354139328003, "learning_rate": 6.828979472760846e-06, "loss": 0.07757961750030518, "step": 5218 }, { "epoch": 0.7055741915859027, "grad_norm": 0.25626692175865173, "learning_rate": 6.823240528173858e-06, "loss": 0.05111341178417206, "step": 5219 }, { "epoch": 0.705709384954668, "grad_norm": 0.3151022493839264, "learning_rate": 6.817503286031797e-06, "loss": 0.03189639747142792, "step": 5220 }, { "epoch": 0.7058445783234333, "grad_norm": 0.6382620930671692, "learning_rate": 6.811767747529181e-06, "loss": 0.05536612868309021, "step": 5221 }, { "epoch": 0.7059797716921985, "grad_norm": 1.3285009860992432, "learning_rate": 6.806033913860195e-06, "loss": 0.0520717054605484, "step": 5222 }, { "epoch": 0.7061149650609637, "grad_norm": 0.5467832088470459, "learning_rate": 6.800301786218634e-06, "loss": 0.07821998745203018, "step": 5223 }, { "epoch": 0.706250158429729, "grad_norm": 0.32285797595977783, "learning_rate": 6.794571365797971e-06, "loss": 0.047391682863235474, "step": 5224 }, { "epoch": 0.7063853517984943, "grad_norm": 0.620692253112793, "learning_rate": 6.788842653791308e-06, "loss": 0.04966667294502258, "step": 5225 }, { "epoch": 0.7065205451672596, "grad_norm": 1.3107481002807617, "learning_rate": 6.7831156513913864e-06, "loss": 0.048862241208553314, "step": 5226 }, { "epoch": 0.7066557385360248, "grad_norm": 0.8749264478683472, "learning_rate": 6.777390359790614e-06, "loss": 0.06083528697490692, "step": 5227 }, { "epoch": 0.70679093190479, "grad_norm": 0.34699174761772156, "learning_rate": 6.771666780181004e-06, "loss": 0.05428403615951538, "step": 5228 }, { "epoch": 0.7069261252735554, "grad_norm": 0.9546473622322083, "learning_rate": 6.765944913754258e-06, "loss": 0.04775610566139221, "step": 5229 }, { "epoch": 0.7070613186423206, "grad_norm": 0.5657267570495605, "learning_rate": 6.7602247617016885e-06, "loss": 0.05932777374982834, "step": 5230 }, { "epoch": 0.7071965120110859, "grad_norm": 0.40411585569381714, "learning_rate": 6.754506325214265e-06, "loss": 0.053828880190849304, "step": 5231 }, { "epoch": 0.7073317053798511, "grad_norm": 0.5263655781745911, "learning_rate": 6.748789605482593e-06, "loss": 0.055475205183029175, "step": 5232 }, { "epoch": 0.7074668987486163, "grad_norm": 0.4490223526954651, "learning_rate": 6.743074603696922e-06, "loss": 0.048049196600914, "step": 5233 }, { "epoch": 0.7076020921173817, "grad_norm": 0.5384478569030762, "learning_rate": 6.737361321047155e-06, "loss": 0.05094766616821289, "step": 5234 }, { "epoch": 0.7077372854861469, "grad_norm": 0.43504464626312256, "learning_rate": 6.731649758722823e-06, "loss": 0.04241069406270981, "step": 5235 }, { "epoch": 0.7078724788549121, "grad_norm": 0.3702452778816223, "learning_rate": 6.725939917913102e-06, "loss": 0.04524552822113037, "step": 5236 }, { "epoch": 0.7080076722236774, "grad_norm": 0.8604250550270081, "learning_rate": 6.720231799806814e-06, "loss": 0.057267896831035614, "step": 5237 }, { "epoch": 0.7081428655924427, "grad_norm": 0.5495241284370422, "learning_rate": 6.7145254055924136e-06, "loss": 0.05888666212558746, "step": 5238 }, { "epoch": 0.708278058961208, "grad_norm": 0.34099462628364563, "learning_rate": 6.70882073645801e-06, "loss": 0.03822622448205948, "step": 5239 }, { "epoch": 0.7084132523299732, "grad_norm": 0.48479411005973816, "learning_rate": 6.703117793591346e-06, "loss": 0.04635937884449959, "step": 5240 }, { "epoch": 0.7085484456987384, "grad_norm": 0.9159398078918457, "learning_rate": 6.6974165781798e-06, "loss": 0.041539374738931656, "step": 5241 }, { "epoch": 0.7086836390675038, "grad_norm": 0.37235864996910095, "learning_rate": 6.691717091410398e-06, "loss": 0.05956708639860153, "step": 5242 }, { "epoch": 0.708818832436269, "grad_norm": 0.23226448893547058, "learning_rate": 6.686019334469797e-06, "loss": 0.03829336166381836, "step": 5243 }, { "epoch": 0.7089540258050343, "grad_norm": 0.4667544364929199, "learning_rate": 6.680323308544312e-06, "loss": 0.056112732738256454, "step": 5244 }, { "epoch": 0.7090892191737995, "grad_norm": 0.4571991562843323, "learning_rate": 6.674629014819879e-06, "loss": 0.06417109072208405, "step": 5245 }, { "epoch": 0.7092244125425647, "grad_norm": 0.2715124785900116, "learning_rate": 6.668936454482082e-06, "loss": 0.04070019721984863, "step": 5246 }, { "epoch": 0.7093596059113301, "grad_norm": 0.5346907377243042, "learning_rate": 6.6632456287161426e-06, "loss": 0.06418287754058838, "step": 5247 }, { "epoch": 0.7094947992800953, "grad_norm": 1.1269422769546509, "learning_rate": 6.657556538706914e-06, "loss": 0.05592989921569824, "step": 5248 }, { "epoch": 0.7096299926488606, "grad_norm": 0.7274010181427002, "learning_rate": 6.651869185638907e-06, "loss": 0.042320191860198975, "step": 5249 }, { "epoch": 0.7097651860176258, "grad_norm": 0.7881664633750916, "learning_rate": 6.646183570696253e-06, "loss": 0.06317999213933945, "step": 5250 }, { "epoch": 0.7099003793863911, "grad_norm": 0.3684523403644562, "learning_rate": 6.6404996950627275e-06, "loss": 0.04369693249464035, "step": 5251 }, { "epoch": 0.7100355727551564, "grad_norm": 0.39067620038986206, "learning_rate": 6.634817559921744e-06, "loss": 0.06155034154653549, "step": 5252 }, { "epoch": 0.7101707661239216, "grad_norm": 0.21505771577358246, "learning_rate": 6.629137166456348e-06, "loss": 0.03871676325798035, "step": 5253 }, { "epoch": 0.7103059594926869, "grad_norm": 0.9072936773300171, "learning_rate": 6.623458515849244e-06, "loss": 0.06219252943992615, "step": 5254 }, { "epoch": 0.7104411528614522, "grad_norm": 0.764362096786499, "learning_rate": 6.6177816092827354e-06, "loss": 0.06337004899978638, "step": 5255 }, { "epoch": 0.7105763462302174, "grad_norm": 0.58935546875, "learning_rate": 6.6121064479388e-06, "loss": 0.0512816496193409, "step": 5256 }, { "epoch": 0.7107115395989827, "grad_norm": 0.13981947302818298, "learning_rate": 6.606433032999031e-06, "loss": 0.02731890231370926, "step": 5257 }, { "epoch": 0.7108467329677479, "grad_norm": 0.3495858907699585, "learning_rate": 6.60076136564466e-06, "loss": 0.03778968006372452, "step": 5258 }, { "epoch": 0.7109819263365132, "grad_norm": 1.0834962129592896, "learning_rate": 6.595091447056574e-06, "loss": 0.06191082298755646, "step": 5259 }, { "epoch": 0.7111171197052785, "grad_norm": 0.45581018924713135, "learning_rate": 6.589423278415259e-06, "loss": 0.04199296608567238, "step": 5260 }, { "epoch": 0.7112523130740437, "grad_norm": 0.2496291995048523, "learning_rate": 6.583756860900872e-06, "loss": 0.03917001932859421, "step": 5261 }, { "epoch": 0.711387506442809, "grad_norm": 0.4140659272670746, "learning_rate": 6.578092195693187e-06, "loss": 0.0661146342754364, "step": 5262 }, { "epoch": 0.7115226998115742, "grad_norm": 1.0227134227752686, "learning_rate": 6.572429283971614e-06, "loss": 0.044877439737319946, "step": 5263 }, { "epoch": 0.7116578931803395, "grad_norm": 0.615459144115448, "learning_rate": 6.566768126915215e-06, "loss": 0.05712532624602318, "step": 5264 }, { "epoch": 0.7117930865491048, "grad_norm": 0.26087018847465515, "learning_rate": 6.561108725702653e-06, "loss": 0.04123082756996155, "step": 5265 }, { "epoch": 0.71192827991787, "grad_norm": 0.5591263771057129, "learning_rate": 6.555451081512262e-06, "loss": 0.04603567719459534, "step": 5266 }, { "epoch": 0.7120634732866353, "grad_norm": 0.4314917325973511, "learning_rate": 6.549795195521988e-06, "loss": 0.04503660649061203, "step": 5267 }, { "epoch": 0.7121986666554005, "grad_norm": 0.6721104979515076, "learning_rate": 6.544141068909416e-06, "loss": 0.05728599801659584, "step": 5268 }, { "epoch": 0.7123338600241658, "grad_norm": 0.48057857155799866, "learning_rate": 6.5384887028517645e-06, "loss": 0.0481424480676651, "step": 5269 }, { "epoch": 0.7124690533929311, "grad_norm": 0.6880866885185242, "learning_rate": 6.532838098525883e-06, "loss": 0.04324055835604668, "step": 5270 }, { "epoch": 0.7126042467616963, "grad_norm": 0.30941563844680786, "learning_rate": 6.5271892571082655e-06, "loss": 0.040373966097831726, "step": 5271 }, { "epoch": 0.7127394401304616, "grad_norm": 0.28279757499694824, "learning_rate": 6.521542179775029e-06, "loss": 0.043951280415058136, "step": 5272 }, { "epoch": 0.7128746334992269, "grad_norm": 0.553692102432251, "learning_rate": 6.515896867701924e-06, "loss": 0.05222884938120842, "step": 5273 }, { "epoch": 0.7130098268679921, "grad_norm": 0.7409149408340454, "learning_rate": 6.510253322064333e-06, "loss": 0.07178819179534912, "step": 5274 }, { "epoch": 0.7131450202367574, "grad_norm": 0.40606531500816345, "learning_rate": 6.504611544037267e-06, "loss": 0.06418925523757935, "step": 5275 }, { "epoch": 0.7132802136055226, "grad_norm": 0.7129145860671997, "learning_rate": 6.498971534795387e-06, "loss": 0.05149535834789276, "step": 5276 }, { "epoch": 0.713415406974288, "grad_norm": 0.8560460209846497, "learning_rate": 6.493333295512965e-06, "loss": 0.045469388365745544, "step": 5277 }, { "epoch": 0.7135506003430532, "grad_norm": 0.27386897802352905, "learning_rate": 6.487696827363916e-06, "loss": 0.04209578409790993, "step": 5278 }, { "epoch": 0.7136857937118184, "grad_norm": 0.5673085451126099, "learning_rate": 6.48206213152178e-06, "loss": 0.043784499168395996, "step": 5279 }, { "epoch": 0.7138209870805837, "grad_norm": 0.4211233854293823, "learning_rate": 6.476429209159725e-06, "loss": 0.05123225972056389, "step": 5280 }, { "epoch": 0.7139561804493489, "grad_norm": 0.2857317626476288, "learning_rate": 6.470798061450568e-06, "loss": 0.03570246696472168, "step": 5281 }, { "epoch": 0.7140913738181143, "grad_norm": 0.38918983936309814, "learning_rate": 6.465168689566738e-06, "loss": 0.0379403755068779, "step": 5282 }, { "epoch": 0.7142265671868795, "grad_norm": 0.5957227945327759, "learning_rate": 6.4595410946803e-06, "loss": 0.06253184378147125, "step": 5283 }, { "epoch": 0.7143617605556447, "grad_norm": 0.43165212869644165, "learning_rate": 6.453915277962948e-06, "loss": 0.05619300156831741, "step": 5284 }, { "epoch": 0.71449695392441, "grad_norm": 1.7340635061264038, "learning_rate": 6.4482912405860055e-06, "loss": 0.07015684992074966, "step": 5285 }, { "epoch": 0.7146321472931753, "grad_norm": 0.9054259657859802, "learning_rate": 6.442668983720434e-06, "loss": 0.053550295531749725, "step": 5286 }, { "epoch": 0.7147673406619406, "grad_norm": 0.7461398839950562, "learning_rate": 6.437048508536813e-06, "loss": 0.05409380793571472, "step": 5287 }, { "epoch": 0.7149025340307058, "grad_norm": 0.3588312864303589, "learning_rate": 6.431429816205357e-06, "loss": 0.05562335252761841, "step": 5288 }, { "epoch": 0.715037727399471, "grad_norm": 0.19548359513282776, "learning_rate": 6.425812907895904e-06, "loss": 0.038705095648765564, "step": 5289 }, { "epoch": 0.7151729207682364, "grad_norm": 1.8110817670822144, "learning_rate": 6.420197784777925e-06, "loss": 0.07848978042602539, "step": 5290 }, { "epoch": 0.7153081141370016, "grad_norm": 0.19149990379810333, "learning_rate": 6.414584448020528e-06, "loss": 0.03312406688928604, "step": 5291 }, { "epoch": 0.7154433075057668, "grad_norm": 0.27977022528648376, "learning_rate": 6.408972898792423e-06, "loss": 0.04751484841108322, "step": 5292 }, { "epoch": 0.7155785008745321, "grad_norm": 0.27302271127700806, "learning_rate": 6.4033631382619766e-06, "loss": 0.04971819370985031, "step": 5293 }, { "epoch": 0.7157136942432973, "grad_norm": 0.31480973958969116, "learning_rate": 6.397755167597171e-06, "loss": 0.05296483635902405, "step": 5294 }, { "epoch": 0.7158488876120627, "grad_norm": 0.5366674065589905, "learning_rate": 6.392148987965603e-06, "loss": 0.0478631854057312, "step": 5295 }, { "epoch": 0.7159840809808279, "grad_norm": 0.23235802352428436, "learning_rate": 6.386544600534532e-06, "loss": 0.04729503393173218, "step": 5296 }, { "epoch": 0.7161192743495931, "grad_norm": 0.6369978189468384, "learning_rate": 6.3809420064707965e-06, "loss": 0.05295950174331665, "step": 5297 }, { "epoch": 0.7162544677183584, "grad_norm": 0.3334389626979828, "learning_rate": 6.375341206940902e-06, "loss": 0.05260632932186127, "step": 5298 }, { "epoch": 0.7163896610871237, "grad_norm": 0.4831049144268036, "learning_rate": 6.369742203110962e-06, "loss": 0.059853434562683105, "step": 5299 }, { "epoch": 0.716524854455889, "grad_norm": 0.7906429767608643, "learning_rate": 6.364144996146716e-06, "loss": 0.049375832080841064, "step": 5300 }, { "epoch": 0.7166600478246542, "grad_norm": 0.23303119838237762, "learning_rate": 6.358549587213534e-06, "loss": 0.026674766093492508, "step": 5301 }, { "epoch": 0.7167952411934194, "grad_norm": 0.39365312457084656, "learning_rate": 6.352955977476405e-06, "loss": 0.05765704810619354, "step": 5302 }, { "epoch": 0.7169304345621847, "grad_norm": 0.26632797718048096, "learning_rate": 6.347364168099959e-06, "loss": 0.041059210896492004, "step": 5303 }, { "epoch": 0.71706562793095, "grad_norm": 0.5234113931655884, "learning_rate": 6.341774160248435e-06, "loss": 0.041222766041755676, "step": 5304 }, { "epoch": 0.7172008212997153, "grad_norm": 0.3308750092983246, "learning_rate": 6.3361859550857e-06, "loss": 0.06355522572994232, "step": 5305 }, { "epoch": 0.7173360146684805, "grad_norm": 0.8985370993614197, "learning_rate": 6.330599553775252e-06, "loss": 0.05516377091407776, "step": 5306 }, { "epoch": 0.7174712080372457, "grad_norm": 0.4691896140575409, "learning_rate": 6.325014957480203e-06, "loss": 0.05438847839832306, "step": 5307 }, { "epoch": 0.7176064014060111, "grad_norm": 0.31181609630584717, "learning_rate": 6.319432167363305e-06, "loss": 0.05948318541049957, "step": 5308 }, { "epoch": 0.7177415947747763, "grad_norm": 0.599881112575531, "learning_rate": 6.313851184586918e-06, "loss": 0.04611806944012642, "step": 5309 }, { "epoch": 0.7178767881435416, "grad_norm": 0.707398533821106, "learning_rate": 6.308272010313037e-06, "loss": 0.054347846657037735, "step": 5310 }, { "epoch": 0.7180119815123068, "grad_norm": 1.5443148612976074, "learning_rate": 6.302694645703273e-06, "loss": 0.05726616457104683, "step": 5311 }, { "epoch": 0.718147174881072, "grad_norm": 0.3165954649448395, "learning_rate": 6.297119091918857e-06, "loss": 0.05020739138126373, "step": 5312 }, { "epoch": 0.7182823682498374, "grad_norm": 0.3362783193588257, "learning_rate": 6.2915453501206634e-06, "loss": 0.04550901800394058, "step": 5313 }, { "epoch": 0.7184175616186026, "grad_norm": 0.4389294981956482, "learning_rate": 6.285973421469166e-06, "loss": 0.049336910247802734, "step": 5314 }, { "epoch": 0.7185527549873678, "grad_norm": 0.25169846415519714, "learning_rate": 6.28040330712447e-06, "loss": 0.032390594482421875, "step": 5315 }, { "epoch": 0.7186879483561331, "grad_norm": 0.6153716444969177, "learning_rate": 6.274835008246304e-06, "loss": 0.06711088865995407, "step": 5316 }, { "epoch": 0.7188231417248984, "grad_norm": 1.0294628143310547, "learning_rate": 6.269268525994013e-06, "loss": 0.07531948387622833, "step": 5317 }, { "epoch": 0.7189583350936637, "grad_norm": 1.553651213645935, "learning_rate": 6.263703861526578e-06, "loss": 0.05934993922710419, "step": 5318 }, { "epoch": 0.7190935284624289, "grad_norm": 0.3488035500049591, "learning_rate": 6.258141016002587e-06, "loss": 0.05223589390516281, "step": 5319 }, { "epoch": 0.7192287218311941, "grad_norm": 0.4481896162033081, "learning_rate": 6.252579990580254e-06, "loss": 0.05611022561788559, "step": 5320 }, { "epoch": 0.7193639151999595, "grad_norm": 0.5994291305541992, "learning_rate": 6.247020786417412e-06, "loss": 0.05536480247974396, "step": 5321 }, { "epoch": 0.7194991085687247, "grad_norm": 0.7593721151351929, "learning_rate": 6.241463404671516e-06, "loss": 0.04418239742517471, "step": 5322 }, { "epoch": 0.71963430193749, "grad_norm": 0.23984026908874512, "learning_rate": 6.235907846499655e-06, "loss": 0.03307386487722397, "step": 5323 }, { "epoch": 0.7197694953062552, "grad_norm": 0.26421529054641724, "learning_rate": 6.230354113058505e-06, "loss": 0.03956104815006256, "step": 5324 }, { "epoch": 0.7199046886750204, "grad_norm": 0.3595588803291321, "learning_rate": 6.2248022055044e-06, "loss": 0.03947695344686508, "step": 5325 }, { "epoch": 0.7200398820437858, "grad_norm": 0.44018399715423584, "learning_rate": 6.219252124993271e-06, "loss": 0.049884915351867676, "step": 5326 }, { "epoch": 0.720175075412551, "grad_norm": 0.4145936369895935, "learning_rate": 6.213703872680668e-06, "loss": 0.050020769238471985, "step": 5327 }, { "epoch": 0.7203102687813163, "grad_norm": 0.27397283911705017, "learning_rate": 6.208157449721785e-06, "loss": 0.05111359804868698, "step": 5328 }, { "epoch": 0.7204454621500815, "grad_norm": 0.798911988735199, "learning_rate": 6.202612857271393e-06, "loss": 0.056186266243457794, "step": 5329 }, { "epoch": 0.7205806555188468, "grad_norm": 1.0204287767410278, "learning_rate": 6.197070096483923e-06, "loss": 0.06357286870479584, "step": 5330 }, { "epoch": 0.7207158488876121, "grad_norm": 0.359225332736969, "learning_rate": 6.191529168513403e-06, "loss": 0.0479716956615448, "step": 5331 }, { "epoch": 0.7208510422563773, "grad_norm": 0.5838215947151184, "learning_rate": 6.1859900745134755e-06, "loss": 0.06106770038604736, "step": 5332 }, { "epoch": 0.7209862356251426, "grad_norm": 0.6393482089042664, "learning_rate": 6.180452815637429e-06, "loss": 0.05821022391319275, "step": 5333 }, { "epoch": 0.7211214289939079, "grad_norm": 0.39810070395469666, "learning_rate": 6.174917393038126e-06, "loss": 0.04513488709926605, "step": 5334 }, { "epoch": 0.7212566223626731, "grad_norm": 0.8694458603858948, "learning_rate": 6.169383807868088e-06, "loss": 0.0457916259765625, "step": 5335 }, { "epoch": 0.7213918157314384, "grad_norm": 0.3716004490852356, "learning_rate": 6.163852061279432e-06, "loss": 0.04418657720088959, "step": 5336 }, { "epoch": 0.7215270091002036, "grad_norm": 0.44321274757385254, "learning_rate": 6.158322154423897e-06, "loss": 0.05373102426528931, "step": 5337 }, { "epoch": 0.721662202468969, "grad_norm": 0.7104228138923645, "learning_rate": 6.15279408845284e-06, "loss": 0.062415868043899536, "step": 5338 }, { "epoch": 0.7217973958377342, "grad_norm": 1.0298091173171997, "learning_rate": 6.147267864517226e-06, "loss": 0.058851905167102814, "step": 5339 }, { "epoch": 0.7219325892064994, "grad_norm": 0.4179043471813202, "learning_rate": 6.141743483767658e-06, "loss": 0.05466245114803314, "step": 5340 }, { "epoch": 0.7220677825752647, "grad_norm": 0.2920180857181549, "learning_rate": 6.136220947354333e-06, "loss": 0.03467337787151337, "step": 5341 }, { "epoch": 0.7222029759440299, "grad_norm": 1.014169454574585, "learning_rate": 6.130700256427075e-06, "loss": 0.055091291666030884, "step": 5342 }, { "epoch": 0.7223381693127952, "grad_norm": 0.6214808821678162, "learning_rate": 6.1251814121353204e-06, "loss": 0.0512273907661438, "step": 5343 }, { "epoch": 0.7224733626815605, "grad_norm": 0.753602147102356, "learning_rate": 6.1196644156281175e-06, "loss": 0.045717403292655945, "step": 5344 }, { "epoch": 0.7226085560503257, "grad_norm": 0.35925543308258057, "learning_rate": 6.114149268054143e-06, "loss": 0.04642605781555176, "step": 5345 }, { "epoch": 0.722743749419091, "grad_norm": 1.6378378868103027, "learning_rate": 6.108635970561679e-06, "loss": 0.06745411455631256, "step": 5346 }, { "epoch": 0.7228789427878562, "grad_norm": 0.3161318004131317, "learning_rate": 6.103124524298617e-06, "loss": 0.056581318378448486, "step": 5347 }, { "epoch": 0.7230141361566215, "grad_norm": 0.4949035942554474, "learning_rate": 6.097614930412475e-06, "loss": 0.04653365910053253, "step": 5348 }, { "epoch": 0.7231493295253868, "grad_norm": 0.4795159101486206, "learning_rate": 6.092107190050371e-06, "loss": 0.06439661979675293, "step": 5349 }, { "epoch": 0.723284522894152, "grad_norm": 0.2476195991039276, "learning_rate": 6.086601304359059e-06, "loss": 0.037961263209581375, "step": 5350 }, { "epoch": 0.7234197162629173, "grad_norm": 0.9817245602607727, "learning_rate": 6.081097274484887e-06, "loss": 0.05518001317977905, "step": 5351 }, { "epoch": 0.7235549096316826, "grad_norm": 0.415656715631485, "learning_rate": 6.075595101573825e-06, "loss": 0.05471087992191315, "step": 5352 }, { "epoch": 0.7236901030004478, "grad_norm": 0.24990646541118622, "learning_rate": 6.070094786771451e-06, "loss": 0.03266160190105438, "step": 5353 }, { "epoch": 0.7238252963692131, "grad_norm": 0.5751898884773254, "learning_rate": 6.06459633122296e-06, "loss": 0.05331885814666748, "step": 5354 }, { "epoch": 0.7239604897379783, "grad_norm": 0.39063960313796997, "learning_rate": 6.059099736073166e-06, "loss": 0.04876334220170975, "step": 5355 }, { "epoch": 0.7240956831067437, "grad_norm": 0.4047369956970215, "learning_rate": 6.0536050024664865e-06, "loss": 0.05805745720863342, "step": 5356 }, { "epoch": 0.7242308764755089, "grad_norm": 0.6284480690956116, "learning_rate": 6.048112131546953e-06, "loss": 0.046272601932287216, "step": 5357 }, { "epoch": 0.7243660698442741, "grad_norm": 0.24715310335159302, "learning_rate": 6.0426211244582105e-06, "loss": 0.03717603534460068, "step": 5358 }, { "epoch": 0.7245012632130394, "grad_norm": 0.2916569411754608, "learning_rate": 6.03713198234351e-06, "loss": 0.04737754166126251, "step": 5359 }, { "epoch": 0.7246364565818046, "grad_norm": 0.2525986135005951, "learning_rate": 6.0316447063457395e-06, "loss": 0.04116874933242798, "step": 5360 }, { "epoch": 0.72477164995057, "grad_norm": 1.6070128679275513, "learning_rate": 6.026159297607356e-06, "loss": 0.05991578847169876, "step": 5361 }, { "epoch": 0.7249068433193352, "grad_norm": 0.4565029740333557, "learning_rate": 6.020675757270466e-06, "loss": 0.05490928143262863, "step": 5362 }, { "epoch": 0.7250420366881004, "grad_norm": 0.4200201630592346, "learning_rate": 6.015194086476766e-06, "loss": 0.050050586462020874, "step": 5363 }, { "epoch": 0.7251772300568657, "grad_norm": 0.4062773883342743, "learning_rate": 6.009714286367565e-06, "loss": 0.05584326758980751, "step": 5364 }, { "epoch": 0.725312423425631, "grad_norm": 1.1536589860916138, "learning_rate": 6.004236358083802e-06, "loss": 0.07136119902133942, "step": 5365 }, { "epoch": 0.7254476167943963, "grad_norm": 0.45489436388015747, "learning_rate": 5.998760302765989e-06, "loss": 0.05478912591934204, "step": 5366 }, { "epoch": 0.7255828101631615, "grad_norm": 0.5681073665618896, "learning_rate": 5.993286121554289e-06, "loss": 0.06499084830284119, "step": 5367 }, { "epoch": 0.7257180035319267, "grad_norm": 0.3667809069156647, "learning_rate": 5.987813815588447e-06, "loss": 0.05895799398422241, "step": 5368 }, { "epoch": 0.725853196900692, "grad_norm": 0.3511812090873718, "learning_rate": 5.982343386007827e-06, "loss": 0.048358991742134094, "step": 5369 }, { "epoch": 0.7259883902694573, "grad_norm": 0.4450904428958893, "learning_rate": 5.976874833951404e-06, "loss": 0.06092090532183647, "step": 5370 }, { "epoch": 0.7261235836382225, "grad_norm": 1.4003962278366089, "learning_rate": 5.971408160557751e-06, "loss": 0.07164968550205231, "step": 5371 }, { "epoch": 0.7262587770069878, "grad_norm": 0.5836803913116455, "learning_rate": 5.965943366965069e-06, "loss": 0.04724377393722534, "step": 5372 }, { "epoch": 0.726393970375753, "grad_norm": 0.40526553988456726, "learning_rate": 5.960480454311155e-06, "loss": 0.031082840636372566, "step": 5373 }, { "epoch": 0.7265291637445184, "grad_norm": 0.28404921293258667, "learning_rate": 5.955019423733416e-06, "loss": 0.04713664948940277, "step": 5374 }, { "epoch": 0.7266643571132836, "grad_norm": 0.7471832036972046, "learning_rate": 5.949560276368866e-06, "loss": 0.06546878814697266, "step": 5375 }, { "epoch": 0.7267995504820488, "grad_norm": 0.40865886211395264, "learning_rate": 5.9441030133541235e-06, "loss": 0.05339936912059784, "step": 5376 }, { "epoch": 0.7269347438508141, "grad_norm": 0.6689923405647278, "learning_rate": 5.938647635825432e-06, "loss": 0.05192083865404129, "step": 5377 }, { "epoch": 0.7270699372195794, "grad_norm": 0.30280566215515137, "learning_rate": 5.933194144918623e-06, "loss": 0.041438695043325424, "step": 5378 }, { "epoch": 0.7272051305883447, "grad_norm": 0.30500245094299316, "learning_rate": 5.927742541769142e-06, "loss": 0.04250006377696991, "step": 5379 }, { "epoch": 0.7273403239571099, "grad_norm": 0.737541139125824, "learning_rate": 5.9222928275120445e-06, "loss": 0.057598624378442764, "step": 5380 }, { "epoch": 0.7274755173258751, "grad_norm": 1.2554136514663696, "learning_rate": 5.916845003281983e-06, "loss": 0.0700816661119461, "step": 5381 }, { "epoch": 0.7276107106946405, "grad_norm": 0.8446027636528015, "learning_rate": 5.911399070213234e-06, "loss": 0.04976761341094971, "step": 5382 }, { "epoch": 0.7277459040634057, "grad_norm": 0.3828921318054199, "learning_rate": 5.905955029439665e-06, "loss": 0.04100614786148071, "step": 5383 }, { "epoch": 0.727881097432171, "grad_norm": 0.42416954040527344, "learning_rate": 5.900512882094754e-06, "loss": 0.06199878454208374, "step": 5384 }, { "epoch": 0.7280162908009362, "grad_norm": 0.4000873565673828, "learning_rate": 5.8950726293115855e-06, "loss": 0.05113799124956131, "step": 5385 }, { "epoch": 0.7281514841697014, "grad_norm": 0.36619076132774353, "learning_rate": 5.889634272222844e-06, "loss": 0.05405128374695778, "step": 5386 }, { "epoch": 0.7282866775384668, "grad_norm": 0.3242407739162445, "learning_rate": 5.8841978119608345e-06, "loss": 0.028411703184247017, "step": 5387 }, { "epoch": 0.728421870907232, "grad_norm": 0.5705392956733704, "learning_rate": 5.878763249657452e-06, "loss": 0.053586747497320175, "step": 5388 }, { "epoch": 0.7285570642759973, "grad_norm": 0.7255110740661621, "learning_rate": 5.873330586444202e-06, "loss": 0.04739254713058472, "step": 5389 }, { "epoch": 0.7286922576447625, "grad_norm": 0.5211935043334961, "learning_rate": 5.867899823452193e-06, "loss": 0.05576735734939575, "step": 5390 }, { "epoch": 0.7288274510135277, "grad_norm": 0.28238874673843384, "learning_rate": 5.862470961812133e-06, "loss": 0.03129733353853226, "step": 5391 }, { "epoch": 0.7289626443822931, "grad_norm": 0.49288153648376465, "learning_rate": 5.857044002654357e-06, "loss": 0.04071573540568352, "step": 5392 }, { "epoch": 0.7290978377510583, "grad_norm": 0.27442362904548645, "learning_rate": 5.851618947108764e-06, "loss": 0.04104681685566902, "step": 5393 }, { "epoch": 0.7292330311198236, "grad_norm": 1.0412551164627075, "learning_rate": 5.8461957963048984e-06, "loss": 0.06329309940338135, "step": 5394 }, { "epoch": 0.7293682244885888, "grad_norm": 0.862945020198822, "learning_rate": 5.840774551371882e-06, "loss": 0.038138359785079956, "step": 5395 }, { "epoch": 0.7295034178573541, "grad_norm": 1.227352499961853, "learning_rate": 5.8353552134384405e-06, "loss": 0.04771539568901062, "step": 5396 }, { "epoch": 0.7296386112261194, "grad_norm": 0.7169512510299683, "learning_rate": 5.829937783632926e-06, "loss": 0.07296678423881531, "step": 5397 }, { "epoch": 0.7297738045948846, "grad_norm": 0.3882494866847992, "learning_rate": 5.824522263083256e-06, "loss": 0.042264342308044434, "step": 5398 }, { "epoch": 0.7299089979636498, "grad_norm": 0.5537630915641785, "learning_rate": 5.8191086529169855e-06, "loss": 0.06330940872430801, "step": 5399 }, { "epoch": 0.7300441913324152, "grad_norm": 0.5714014768600464, "learning_rate": 5.813696954261253e-06, "loss": 0.048753999173641205, "step": 5400 }, { "epoch": 0.7301793847011804, "grad_norm": 0.6927962303161621, "learning_rate": 5.8082871682428e-06, "loss": 0.07524000108242035, "step": 5401 }, { "epoch": 0.7303145780699457, "grad_norm": 0.41377681493759155, "learning_rate": 5.802879295987975e-06, "loss": 0.05869010090827942, "step": 5402 }, { "epoch": 0.7304497714387109, "grad_norm": 0.6390061378479004, "learning_rate": 5.797473338622722e-06, "loss": 0.056198105216026306, "step": 5403 }, { "epoch": 0.7305849648074761, "grad_norm": 0.33836090564727783, "learning_rate": 5.792069297272599e-06, "loss": 0.03806114196777344, "step": 5404 }, { "epoch": 0.7307201581762415, "grad_norm": 0.44180917739868164, "learning_rate": 5.7866671730627485e-06, "loss": 0.06397104263305664, "step": 5405 }, { "epoch": 0.7308553515450067, "grad_norm": 0.3084310293197632, "learning_rate": 5.781266967117925e-06, "loss": 0.06115628033876419, "step": 5406 }, { "epoch": 0.730990544913772, "grad_norm": 0.6314193606376648, "learning_rate": 5.7758686805624815e-06, "loss": 0.0561547577381134, "step": 5407 }, { "epoch": 0.7311257382825372, "grad_norm": 1.0682895183563232, "learning_rate": 5.7704723145203605e-06, "loss": 0.045564793050289154, "step": 5408 }, { "epoch": 0.7312609316513025, "grad_norm": 0.38729023933410645, "learning_rate": 5.765077870115126e-06, "loss": 0.04134967178106308, "step": 5409 }, { "epoch": 0.7313961250200678, "grad_norm": 0.6590896248817444, "learning_rate": 5.759685348469928e-06, "loss": 0.05257289111614227, "step": 5410 }, { "epoch": 0.731531318388833, "grad_norm": 0.4695395529270172, "learning_rate": 5.754294750707514e-06, "loss": 0.04570161551237106, "step": 5411 }, { "epoch": 0.7316665117575983, "grad_norm": 1.559465765953064, "learning_rate": 5.748906077950237e-06, "loss": 0.04657503589987755, "step": 5412 }, { "epoch": 0.7318017051263636, "grad_norm": 0.7578441500663757, "learning_rate": 5.743519331320042e-06, "loss": 0.053437016904354095, "step": 5413 }, { "epoch": 0.7319368984951288, "grad_norm": 0.7532030940055847, "learning_rate": 5.73813451193849e-06, "loss": 0.04919366538524628, "step": 5414 }, { "epoch": 0.7320720918638941, "grad_norm": 0.6908899545669556, "learning_rate": 5.7327516209267225e-06, "loss": 0.04903233423829079, "step": 5415 }, { "epoch": 0.7322072852326593, "grad_norm": 0.30994343757629395, "learning_rate": 5.727370659405486e-06, "loss": 0.03947867453098297, "step": 5416 }, { "epoch": 0.7323424786014247, "grad_norm": 0.8648495078086853, "learning_rate": 5.7219916284951265e-06, "loss": 0.057804450392723083, "step": 5417 }, { "epoch": 0.7324776719701899, "grad_norm": 0.5805301666259766, "learning_rate": 5.716614529315582e-06, "loss": 0.0589471310377121, "step": 5418 }, { "epoch": 0.7326128653389551, "grad_norm": 0.26550325751304626, "learning_rate": 5.711239362986401e-06, "loss": 0.035116612911224365, "step": 5419 }, { "epoch": 0.7327480587077204, "grad_norm": 0.3858984410762787, "learning_rate": 5.705866130626719e-06, "loss": 0.059017881751060486, "step": 5420 }, { "epoch": 0.7328832520764856, "grad_norm": 0.3619306683540344, "learning_rate": 5.700494833355271e-06, "loss": 0.03745993226766586, "step": 5421 }, { "epoch": 0.733018445445251, "grad_norm": 0.5689400434494019, "learning_rate": 5.69512547229039e-06, "loss": 0.062376707792282104, "step": 5422 }, { "epoch": 0.7331536388140162, "grad_norm": 0.835976243019104, "learning_rate": 5.689758048550001e-06, "loss": 0.06354323029518127, "step": 5423 }, { "epoch": 0.7332888321827814, "grad_norm": 1.202174425125122, "learning_rate": 5.684392563251644e-06, "loss": 0.051449280232191086, "step": 5424 }, { "epoch": 0.7334240255515467, "grad_norm": 0.4447128474712372, "learning_rate": 5.679029017512422e-06, "loss": 0.04517205059528351, "step": 5425 }, { "epoch": 0.733559218920312, "grad_norm": 0.33622944355010986, "learning_rate": 5.6736674124490684e-06, "loss": 0.032995931804180145, "step": 5426 }, { "epoch": 0.7336944122890772, "grad_norm": 0.3162919878959656, "learning_rate": 5.6683077491778935e-06, "loss": 0.04157820716500282, "step": 5427 }, { "epoch": 0.7338296056578425, "grad_norm": 0.3667377829551697, "learning_rate": 5.6629500288148044e-06, "loss": 0.05399259924888611, "step": 5428 }, { "epoch": 0.7339647990266077, "grad_norm": 0.9378178119659424, "learning_rate": 5.657594252475319e-06, "loss": 0.060357775539159775, "step": 5429 }, { "epoch": 0.734099992395373, "grad_norm": 0.38600993156433105, "learning_rate": 5.652240421274521e-06, "loss": 0.046939365565776825, "step": 5430 }, { "epoch": 0.7342351857641383, "grad_norm": 0.6062623858451843, "learning_rate": 5.646888536327121e-06, "loss": 0.05704595893621445, "step": 5431 }, { "epoch": 0.7343703791329035, "grad_norm": 1.3618842363357544, "learning_rate": 5.641538598747403e-06, "loss": 0.0682445615530014, "step": 5432 }, { "epoch": 0.7345055725016688, "grad_norm": 0.37157899141311646, "learning_rate": 5.6361906096492495e-06, "loss": 0.04282565787434578, "step": 5433 }, { "epoch": 0.734640765870434, "grad_norm": 0.5693402290344238, "learning_rate": 5.630844570146157e-06, "loss": 0.07153460383415222, "step": 5434 }, { "epoch": 0.7347759592391994, "grad_norm": 1.0029247999191284, "learning_rate": 5.625500481351176e-06, "loss": 0.06501775234937668, "step": 5435 }, { "epoch": 0.7349111526079646, "grad_norm": 0.43693169951438904, "learning_rate": 5.6201583443769895e-06, "loss": 0.050835371017456055, "step": 5436 }, { "epoch": 0.7350463459767298, "grad_norm": 0.6364457011222839, "learning_rate": 5.614818160335857e-06, "loss": 0.048228323459625244, "step": 5437 }, { "epoch": 0.7351815393454951, "grad_norm": 0.6963417530059814, "learning_rate": 5.6094799303396315e-06, "loss": 0.045394234359264374, "step": 5438 }, { "epoch": 0.7353167327142603, "grad_norm": 1.1840342283248901, "learning_rate": 5.6041436554997595e-06, "loss": 0.047025345265865326, "step": 5439 }, { "epoch": 0.7354519260830257, "grad_norm": 1.1665226221084595, "learning_rate": 5.598809336927278e-06, "loss": 0.040745411068201065, "step": 5440 }, { "epoch": 0.7355871194517909, "grad_norm": 0.7079965472221375, "learning_rate": 5.5934769757328325e-06, "loss": 0.0491214394569397, "step": 5441 }, { "epoch": 0.7357223128205561, "grad_norm": 1.2897837162017822, "learning_rate": 5.588146573026642e-06, "loss": 0.06136471778154373, "step": 5442 }, { "epoch": 0.7358575061893214, "grad_norm": 1.7740062475204468, "learning_rate": 5.582818129918525e-06, "loss": 0.05140011012554169, "step": 5443 }, { "epoch": 0.7359926995580867, "grad_norm": 1.4889432191848755, "learning_rate": 5.5774916475178915e-06, "loss": 0.08720873296260834, "step": 5444 }, { "epoch": 0.736127892926852, "grad_norm": 0.5965356826782227, "learning_rate": 5.572167126933738e-06, "loss": 0.04267352819442749, "step": 5445 }, { "epoch": 0.7362630862956172, "grad_norm": 0.8960931897163391, "learning_rate": 5.566844569274669e-06, "loss": 0.054328806698322296, "step": 5446 }, { "epoch": 0.7363982796643824, "grad_norm": 0.301082044839859, "learning_rate": 5.5615239756488665e-06, "loss": 0.039215702563524246, "step": 5447 }, { "epoch": 0.7365334730331478, "grad_norm": 0.46671172976493835, "learning_rate": 5.556205347164104e-06, "loss": 0.047793835401535034, "step": 5448 }, { "epoch": 0.736668666401913, "grad_norm": 0.6154031157493591, "learning_rate": 5.550888684927746e-06, "loss": 0.06589873135089874, "step": 5449 }, { "epoch": 0.7368038597706782, "grad_norm": 0.28910574316978455, "learning_rate": 5.545573990046752e-06, "loss": 0.04198184609413147, "step": 5450 }, { "epoch": 0.7369390531394435, "grad_norm": 0.4902563989162445, "learning_rate": 5.540261263627672e-06, "loss": 0.053771331906318665, "step": 5451 }, { "epoch": 0.7370742465082087, "grad_norm": 0.48701468110084534, "learning_rate": 5.534950506776644e-06, "loss": 0.041878048330545425, "step": 5452 }, { "epoch": 0.7372094398769741, "grad_norm": 0.4950914978981018, "learning_rate": 5.529641720599393e-06, "loss": 0.06427861750125885, "step": 5453 }, { "epoch": 0.7373446332457393, "grad_norm": 0.11448574811220169, "learning_rate": 5.52433490620124e-06, "loss": 0.025246743112802505, "step": 5454 }, { "epoch": 0.7374798266145045, "grad_norm": 1.035756230354309, "learning_rate": 5.519030064687082e-06, "loss": 0.04920538514852524, "step": 5455 }, { "epoch": 0.7376150199832698, "grad_norm": 1.4957166910171509, "learning_rate": 5.51372719716143e-06, "loss": 0.05387274548411369, "step": 5456 }, { "epoch": 0.7377502133520351, "grad_norm": 0.6066105365753174, "learning_rate": 5.508426304728363e-06, "loss": 0.062377989292144775, "step": 5457 }, { "epoch": 0.7378854067208004, "grad_norm": 0.6234009861946106, "learning_rate": 5.503127388491552e-06, "loss": 0.05049778148531914, "step": 5458 }, { "epoch": 0.7380206000895656, "grad_norm": 0.6380067467689514, "learning_rate": 5.497830449554266e-06, "loss": 0.060820288956165314, "step": 5459 }, { "epoch": 0.7381557934583308, "grad_norm": 0.7095192670822144, "learning_rate": 5.492535489019344e-06, "loss": 0.05636731535196304, "step": 5460 }, { "epoch": 0.7382909868270962, "grad_norm": 0.4989028573036194, "learning_rate": 5.4872425079892454e-06, "loss": 0.05664017051458359, "step": 5461 }, { "epoch": 0.7384261801958614, "grad_norm": 0.41198912262916565, "learning_rate": 5.481951507565973e-06, "loss": 0.05192377418279648, "step": 5462 }, { "epoch": 0.7385613735646267, "grad_norm": 0.4872404932975769, "learning_rate": 5.476662488851159e-06, "loss": 0.04623319208621979, "step": 5463 }, { "epoch": 0.7386965669333919, "grad_norm": 0.6754307150840759, "learning_rate": 5.471375452946e-06, "loss": 0.06387482583522797, "step": 5464 }, { "epoch": 0.7388317603021571, "grad_norm": 0.4037375748157501, "learning_rate": 5.466090400951279e-06, "loss": 0.03927309811115265, "step": 5465 }, { "epoch": 0.7389669536709225, "grad_norm": 0.2377721667289734, "learning_rate": 5.460807333967387e-06, "loss": 0.04169538989663124, "step": 5466 }, { "epoch": 0.7391021470396877, "grad_norm": 0.34345486760139465, "learning_rate": 5.455526253094267e-06, "loss": 0.04141313210129738, "step": 5467 }, { "epoch": 0.739237340408453, "grad_norm": 0.4850858151912689, "learning_rate": 5.450247159431486e-06, "loss": 0.05807226896286011, "step": 5468 }, { "epoch": 0.7393725337772182, "grad_norm": 0.5401327610015869, "learning_rate": 5.44497005407817e-06, "loss": 0.05638475716114044, "step": 5469 }, { "epoch": 0.7395077271459835, "grad_norm": 0.355792373418808, "learning_rate": 5.439694938133042e-06, "loss": 0.05783563852310181, "step": 5470 }, { "epoch": 0.7396429205147488, "grad_norm": 0.5744714736938477, "learning_rate": 5.434421812694409e-06, "loss": 0.05246324837207794, "step": 5471 }, { "epoch": 0.739778113883514, "grad_norm": 0.45978790521621704, "learning_rate": 5.4291506788601624e-06, "loss": 0.04424493759870529, "step": 5472 }, { "epoch": 0.7399133072522793, "grad_norm": 0.31267258524894714, "learning_rate": 5.423881537727785e-06, "loss": 0.05124234780669212, "step": 5473 }, { "epoch": 0.7400485006210445, "grad_norm": 0.4105251431465149, "learning_rate": 5.418614390394338e-06, "loss": 0.055374015122652054, "step": 5474 }, { "epoch": 0.7401836939898098, "grad_norm": 0.23387347161769867, "learning_rate": 5.413349237956469e-06, "loss": 0.041248027235269547, "step": 5475 }, { "epoch": 0.7403188873585751, "grad_norm": 0.6171667575836182, "learning_rate": 5.4080860815104125e-06, "loss": 0.05616402626037598, "step": 5476 }, { "epoch": 0.7404540807273403, "grad_norm": 0.41180717945098877, "learning_rate": 5.402824922151977e-06, "loss": 0.043565187603235245, "step": 5477 }, { "epoch": 0.7405892740961055, "grad_norm": 1.1337555646896362, "learning_rate": 5.397565760976577e-06, "loss": 0.052514851093292236, "step": 5478 }, { "epoch": 0.7407244674648709, "grad_norm": 0.6680711507797241, "learning_rate": 5.392308599079193e-06, "loss": 0.04831764101982117, "step": 5479 }, { "epoch": 0.7408596608336361, "grad_norm": 0.5328572392463684, "learning_rate": 5.3870534375543916e-06, "loss": 0.047862708568573, "step": 5480 }, { "epoch": 0.7409948542024014, "grad_norm": 0.4246765077114105, "learning_rate": 5.381800277496328e-06, "loss": 0.05253169685602188, "step": 5481 }, { "epoch": 0.7411300475711666, "grad_norm": 0.26996853947639465, "learning_rate": 5.376549119998731e-06, "loss": 0.046397864818573, "step": 5482 }, { "epoch": 0.7412652409399318, "grad_norm": 0.5127127170562744, "learning_rate": 5.3712999661549314e-06, "loss": 0.04420596361160278, "step": 5483 }, { "epoch": 0.7414004343086972, "grad_norm": 1.6158188581466675, "learning_rate": 5.366052817057826e-06, "loss": 0.0653582438826561, "step": 5484 }, { "epoch": 0.7415356276774624, "grad_norm": 0.5706088542938232, "learning_rate": 5.360807673799899e-06, "loss": 0.06273028999567032, "step": 5485 }, { "epoch": 0.7416708210462277, "grad_norm": 1.9467107057571411, "learning_rate": 5.355564537473214e-06, "loss": 0.06342384219169617, "step": 5486 }, { "epoch": 0.7418060144149929, "grad_norm": 0.6369274854660034, "learning_rate": 5.35032340916942e-06, "loss": 0.04993153735995293, "step": 5487 }, { "epoch": 0.7419412077837582, "grad_norm": 0.44693541526794434, "learning_rate": 5.345084289979755e-06, "loss": 0.061812981963157654, "step": 5488 }, { "epoch": 0.7420764011525235, "grad_norm": 0.31033197045326233, "learning_rate": 5.339847180995026e-06, "loss": 0.05872730910778046, "step": 5489 }, { "epoch": 0.7422115945212887, "grad_norm": 0.9868068695068359, "learning_rate": 5.33461208330563e-06, "loss": 0.05589687451720238, "step": 5490 }, { "epoch": 0.742346787890054, "grad_norm": 0.5339412093162537, "learning_rate": 5.32937899800154e-06, "loss": 0.057909876108169556, "step": 5491 }, { "epoch": 0.7424819812588193, "grad_norm": 0.29535794258117676, "learning_rate": 5.324147926172307e-06, "loss": 0.03965213522315025, "step": 5492 }, { "epoch": 0.7426171746275845, "grad_norm": 0.3467995822429657, "learning_rate": 5.318918868907084e-06, "loss": 0.04146319627761841, "step": 5493 }, { "epoch": 0.7427523679963498, "grad_norm": 0.32661059498786926, "learning_rate": 5.313691827294568e-06, "loss": 0.04507768154144287, "step": 5494 }, { "epoch": 0.742887561365115, "grad_norm": 0.2553118169307709, "learning_rate": 5.308466802423072e-06, "loss": 0.037207067012786865, "step": 5495 }, { "epoch": 0.7430227547338804, "grad_norm": 0.2132217288017273, "learning_rate": 5.303243795380471e-06, "loss": 0.035676877945661545, "step": 5496 }, { "epoch": 0.7431579481026456, "grad_norm": 0.5859035849571228, "learning_rate": 5.298022807254215e-06, "loss": 0.06544259190559387, "step": 5497 }, { "epoch": 0.7432931414714108, "grad_norm": 0.547611653804779, "learning_rate": 5.292803839131358e-06, "loss": 0.04794768989086151, "step": 5498 }, { "epoch": 0.7434283348401761, "grad_norm": 0.5438745617866516, "learning_rate": 5.287586892098496e-06, "loss": 0.04789666831493378, "step": 5499 }, { "epoch": 0.7435635282089413, "grad_norm": 0.3389785885810852, "learning_rate": 5.282371967241842e-06, "loss": 0.04364372044801712, "step": 5500 }, { "epoch": 0.7436987215777067, "grad_norm": 1.0312086343765259, "learning_rate": 5.277159065647164e-06, "loss": 0.057772405445575714, "step": 5501 }, { "epoch": 0.7438339149464719, "grad_norm": 0.36524340510368347, "learning_rate": 5.271948188399814e-06, "loss": 0.06014310568571091, "step": 5502 }, { "epoch": 0.7439691083152371, "grad_norm": 0.2583935856819153, "learning_rate": 5.266739336584735e-06, "loss": 0.04450896382331848, "step": 5503 }, { "epoch": 0.7441043016840024, "grad_norm": 0.9652280807495117, "learning_rate": 5.261532511286422e-06, "loss": 0.047141849994659424, "step": 5504 }, { "epoch": 0.7442394950527677, "grad_norm": 0.7472803592681885, "learning_rate": 5.256327713588977e-06, "loss": 0.049192577600479126, "step": 5505 }, { "epoch": 0.7443746884215329, "grad_norm": 0.9693590402603149, "learning_rate": 5.25112494457606e-06, "loss": 0.04846014082431793, "step": 5506 }, { "epoch": 0.7445098817902982, "grad_norm": 0.46338823437690735, "learning_rate": 5.245924205330919e-06, "loss": 0.052900612354278564, "step": 5507 }, { "epoch": 0.7446450751590634, "grad_norm": 0.9405046701431274, "learning_rate": 5.240725496936373e-06, "loss": 0.055649518966674805, "step": 5508 }, { "epoch": 0.7447802685278287, "grad_norm": 0.628115713596344, "learning_rate": 5.2355288204748145e-06, "loss": 0.05605718493461609, "step": 5509 }, { "epoch": 0.744915461896594, "grad_norm": 0.4606287181377411, "learning_rate": 5.230334177028233e-06, "loss": 0.029851146042346954, "step": 5510 }, { "epoch": 0.7450506552653592, "grad_norm": 0.7744247317314148, "learning_rate": 5.2251415676781726e-06, "loss": 0.04809856414794922, "step": 5511 }, { "epoch": 0.7451858486341245, "grad_norm": 0.40339159965515137, "learning_rate": 5.2199509935057655e-06, "loss": 0.06350082159042358, "step": 5512 }, { "epoch": 0.7453210420028897, "grad_norm": 1.1671186685562134, "learning_rate": 5.214762455591713e-06, "loss": 0.06291911005973816, "step": 5513 }, { "epoch": 0.7454562353716551, "grad_norm": 0.5708240866661072, "learning_rate": 5.209575955016295e-06, "loss": 0.059872016310691833, "step": 5514 }, { "epoch": 0.7455914287404203, "grad_norm": 0.6044837832450867, "learning_rate": 5.204391492859377e-06, "loss": 0.037675123661756516, "step": 5515 }, { "epoch": 0.7457266221091855, "grad_norm": 0.3899818956851959, "learning_rate": 5.199209070200388e-06, "loss": 0.04870859906077385, "step": 5516 }, { "epoch": 0.7458618154779508, "grad_norm": 0.4968286454677582, "learning_rate": 5.194028688118332e-06, "loss": 0.04478497803211212, "step": 5517 }, { "epoch": 0.745997008846716, "grad_norm": 0.38747715950012207, "learning_rate": 5.188850347691797e-06, "loss": 0.04321112111210823, "step": 5518 }, { "epoch": 0.7461322022154814, "grad_norm": 0.4650703966617584, "learning_rate": 5.183674049998934e-06, "loss": 0.061393894255161285, "step": 5519 }, { "epoch": 0.7462673955842466, "grad_norm": 1.437670111656189, "learning_rate": 5.178499796117485e-06, "loss": 0.05953851342201233, "step": 5520 }, { "epoch": 0.7464025889530118, "grad_norm": 0.7268595695495605, "learning_rate": 5.173327587124753e-06, "loss": 0.06345154345035553, "step": 5521 }, { "epoch": 0.7465377823217771, "grad_norm": 0.6755163669586182, "learning_rate": 5.16815742409762e-06, "loss": 0.07344269752502441, "step": 5522 }, { "epoch": 0.7466729756905424, "grad_norm": 0.2622813880443573, "learning_rate": 5.16298930811254e-06, "loss": 0.03183408081531525, "step": 5523 }, { "epoch": 0.7468081690593077, "grad_norm": 0.4086357355117798, "learning_rate": 5.15782324024554e-06, "loss": 0.047601908445358276, "step": 5524 }, { "epoch": 0.7469433624280729, "grad_norm": 0.5096064209938049, "learning_rate": 5.152659221572231e-06, "loss": 0.04264703765511513, "step": 5525 }, { "epoch": 0.7470785557968381, "grad_norm": 0.3501160144805908, "learning_rate": 5.147497253167784e-06, "loss": 0.049505382776260376, "step": 5526 }, { "epoch": 0.7472137491656035, "grad_norm": 0.29180678725242615, "learning_rate": 5.142337336106948e-06, "loss": 0.04939737915992737, "step": 5527 }, { "epoch": 0.7473489425343687, "grad_norm": 0.3182583451271057, "learning_rate": 5.137179471464047e-06, "loss": 0.03303126245737076, "step": 5528 }, { "epoch": 0.747484135903134, "grad_norm": 0.4041508436203003, "learning_rate": 5.13202366031297e-06, "loss": 0.06243613362312317, "step": 5529 }, { "epoch": 0.7476193292718992, "grad_norm": 0.4116409718990326, "learning_rate": 5.1268699037272e-06, "loss": 0.03966081142425537, "step": 5530 }, { "epoch": 0.7477545226406644, "grad_norm": 0.8240025639533997, "learning_rate": 5.121718202779756e-06, "loss": 0.06657412648200989, "step": 5531 }, { "epoch": 0.7478897160094298, "grad_norm": 0.46376833319664, "learning_rate": 5.116568558543264e-06, "loss": 0.05384666845202446, "step": 5532 }, { "epoch": 0.748024909378195, "grad_norm": 0.8592454791069031, "learning_rate": 5.1114209720899025e-06, "loss": 0.03947149217128754, "step": 5533 }, { "epoch": 0.7481601027469602, "grad_norm": 0.7284401059150696, "learning_rate": 5.106275444491423e-06, "loss": 0.03800439089536667, "step": 5534 }, { "epoch": 0.7482952961157255, "grad_norm": 0.43126651644706726, "learning_rate": 5.101131976819165e-06, "loss": 0.04651375487446785, "step": 5535 }, { "epoch": 0.7484304894844908, "grad_norm": 0.6406553983688354, "learning_rate": 5.095990570144008e-06, "loss": 0.05920161306858063, "step": 5536 }, { "epoch": 0.7485656828532561, "grad_norm": 0.7216412425041199, "learning_rate": 5.090851225536432e-06, "loss": 0.042914777994155884, "step": 5537 }, { "epoch": 0.7487008762220213, "grad_norm": 0.3997083604335785, "learning_rate": 5.085713944066474e-06, "loss": 0.049191270023584366, "step": 5538 }, { "epoch": 0.7488360695907865, "grad_norm": 0.6152087450027466, "learning_rate": 5.080578726803741e-06, "loss": 0.05972042679786682, "step": 5539 }, { "epoch": 0.7489712629595519, "grad_norm": 0.7942944765090942, "learning_rate": 5.075445574817415e-06, "loss": 0.05272885784506798, "step": 5540 }, { "epoch": 0.7491064563283171, "grad_norm": 0.42381125688552856, "learning_rate": 5.07031448917624e-06, "loss": 0.040979981422424316, "step": 5541 }, { "epoch": 0.7492416496970824, "grad_norm": 0.45558756589889526, "learning_rate": 5.065185470948544e-06, "loss": 0.054111696779727936, "step": 5542 }, { "epoch": 0.7493768430658476, "grad_norm": 0.5484164357185364, "learning_rate": 5.060058521202211e-06, "loss": 0.05012672394514084, "step": 5543 }, { "epoch": 0.7495120364346128, "grad_norm": 0.6709420084953308, "learning_rate": 5.054933641004703e-06, "loss": 0.05820755660533905, "step": 5544 }, { "epoch": 0.7496472298033782, "grad_norm": 0.363085001707077, "learning_rate": 5.0498108314230425e-06, "loss": 0.04157222807407379, "step": 5545 }, { "epoch": 0.7497824231721434, "grad_norm": 0.37748420238494873, "learning_rate": 5.044690093523823e-06, "loss": 0.039329893887043, "step": 5546 }, { "epoch": 0.7499176165409087, "grad_norm": 0.7921310663223267, "learning_rate": 5.039571428373219e-06, "loss": 0.05081334710121155, "step": 5547 }, { "epoch": 0.7500528099096739, "grad_norm": 0.5201166868209839, "learning_rate": 5.034454837036959e-06, "loss": 0.054199472069740295, "step": 5548 }, { "epoch": 0.7501880032784392, "grad_norm": 0.47418633103370667, "learning_rate": 5.0293403205803455e-06, "loss": 0.033734939992427826, "step": 5549 }, { "epoch": 0.7503231966472045, "grad_norm": 0.4220098853111267, "learning_rate": 5.024227880068247e-06, "loss": 0.06173555552959442, "step": 5550 }, { "epoch": 0.7504583900159697, "grad_norm": 0.4469703435897827, "learning_rate": 5.019117516565096e-06, "loss": 0.04640002176165581, "step": 5551 }, { "epoch": 0.750593583384735, "grad_norm": 0.7153790593147278, "learning_rate": 5.014009231134908e-06, "loss": 0.07060258090496063, "step": 5552 }, { "epoch": 0.7507287767535002, "grad_norm": 1.1831549406051636, "learning_rate": 5.008903024841248e-06, "loss": 0.053222209215164185, "step": 5553 }, { "epoch": 0.7508639701222655, "grad_norm": 0.7888247966766357, "learning_rate": 5.0037988987472595e-06, "loss": 0.05328439176082611, "step": 5554 }, { "epoch": 0.7509991634910308, "grad_norm": 0.5309898257255554, "learning_rate": 4.998696853915646e-06, "loss": 0.04646754264831543, "step": 5555 }, { "epoch": 0.751134356859796, "grad_norm": 0.5059635043144226, "learning_rate": 4.993596891408676e-06, "loss": 0.043650705367326736, "step": 5556 }, { "epoch": 0.7512695502285612, "grad_norm": 0.4907800853252411, "learning_rate": 4.988499012288198e-06, "loss": 0.057316869497299194, "step": 5557 }, { "epoch": 0.7514047435973266, "grad_norm": 0.9628327488899231, "learning_rate": 4.983403217615614e-06, "loss": 0.045806288719177246, "step": 5558 }, { "epoch": 0.7515399369660918, "grad_norm": 0.989477813243866, "learning_rate": 4.978309508451896e-06, "loss": 0.050319552421569824, "step": 5559 }, { "epoch": 0.7516751303348571, "grad_norm": 0.34060031175613403, "learning_rate": 4.973217885857578e-06, "loss": 0.03778659552335739, "step": 5560 }, { "epoch": 0.7518103237036223, "grad_norm": 0.2964553236961365, "learning_rate": 4.968128350892763e-06, "loss": 0.06380490958690643, "step": 5561 }, { "epoch": 0.7519455170723875, "grad_norm": 0.7513662576675415, "learning_rate": 4.963040904617131e-06, "loss": 0.05820570886135101, "step": 5562 }, { "epoch": 0.7520807104411529, "grad_norm": 0.5941129922866821, "learning_rate": 4.9579555480898955e-06, "loss": 0.04170685261487961, "step": 5563 }, { "epoch": 0.7522159038099181, "grad_norm": 0.333189994096756, "learning_rate": 4.952872282369873e-06, "loss": 0.05616297572851181, "step": 5564 }, { "epoch": 0.7523510971786834, "grad_norm": 0.3728758990764618, "learning_rate": 4.947791108515417e-06, "loss": 0.054476816207170486, "step": 5565 }, { "epoch": 0.7524862905474486, "grad_norm": 0.3514253497123718, "learning_rate": 4.942712027584453e-06, "loss": 0.04808138310909271, "step": 5566 }, { "epoch": 0.7526214839162139, "grad_norm": 1.2773067951202393, "learning_rate": 4.937635040634485e-06, "loss": 0.061209291219711304, "step": 5567 }, { "epoch": 0.7527566772849792, "grad_norm": 1.3021620512008667, "learning_rate": 4.9325601487225545e-06, "loss": 0.07051263004541397, "step": 5568 }, { "epoch": 0.7528918706537444, "grad_norm": 0.6259169578552246, "learning_rate": 4.927487352905289e-06, "loss": 0.06368683278560638, "step": 5569 }, { "epoch": 0.7530270640225097, "grad_norm": 0.5547968745231628, "learning_rate": 4.92241665423887e-06, "loss": 0.05438164249062538, "step": 5570 }, { "epoch": 0.753162257391275, "grad_norm": 0.5523261427879333, "learning_rate": 4.917348053779039e-06, "loss": 0.05029526352882385, "step": 5571 }, { "epoch": 0.7532974507600402, "grad_norm": 0.7147053480148315, "learning_rate": 4.912281552581122e-06, "loss": 0.03933337330818176, "step": 5572 }, { "epoch": 0.7534326441288055, "grad_norm": 0.5926932692527771, "learning_rate": 4.907217151699969e-06, "loss": 0.03968716412782669, "step": 5573 }, { "epoch": 0.7535678374975707, "grad_norm": 0.5737786889076233, "learning_rate": 4.9021548521900305e-06, "loss": 0.055317461490631104, "step": 5574 }, { "epoch": 0.753703030866336, "grad_norm": 0.508925199508667, "learning_rate": 4.8970946551053005e-06, "loss": 0.050451233983039856, "step": 5575 }, { "epoch": 0.7538382242351013, "grad_norm": 0.34880030155181885, "learning_rate": 4.892036561499339e-06, "loss": 0.04335852712392807, "step": 5576 }, { "epoch": 0.7539734176038665, "grad_norm": 0.374421089887619, "learning_rate": 4.8869805724252675e-06, "loss": 0.033583879470825195, "step": 5577 }, { "epoch": 0.7541086109726318, "grad_norm": 0.288886159658432, "learning_rate": 4.8819266889357665e-06, "loss": 0.0498289130628109, "step": 5578 }, { "epoch": 0.754243804341397, "grad_norm": 0.697701632976532, "learning_rate": 4.876874912083088e-06, "loss": 0.05230463296175003, "step": 5579 }, { "epoch": 0.7543789977101624, "grad_norm": 1.3653676509857178, "learning_rate": 4.871825242919037e-06, "loss": 0.05933082103729248, "step": 5580 }, { "epoch": 0.7545141910789276, "grad_norm": 0.3522177040576935, "learning_rate": 4.866777682494978e-06, "loss": 0.06317342817783356, "step": 5581 }, { "epoch": 0.7546493844476928, "grad_norm": 0.3955438733100891, "learning_rate": 4.861732231861845e-06, "loss": 0.044080331921577454, "step": 5582 }, { "epoch": 0.7547845778164581, "grad_norm": 0.46337470412254333, "learning_rate": 4.85668889207012e-06, "loss": 0.045066311955451965, "step": 5583 }, { "epoch": 0.7549197711852234, "grad_norm": 0.60178142786026, "learning_rate": 4.851647664169862e-06, "loss": 0.04821908473968506, "step": 5584 }, { "epoch": 0.7550549645539886, "grad_norm": 0.3464260399341583, "learning_rate": 4.846608549210679e-06, "loss": 0.05057625100016594, "step": 5585 }, { "epoch": 0.7551901579227539, "grad_norm": 0.5496123433113098, "learning_rate": 4.841571548241741e-06, "loss": 0.052937984466552734, "step": 5586 }, { "epoch": 0.7553253512915191, "grad_norm": 0.5719283819198608, "learning_rate": 4.836536662311777e-06, "loss": 0.045376766473054886, "step": 5587 }, { "epoch": 0.7554605446602844, "grad_norm": 0.30261653661727905, "learning_rate": 4.8315038924690745e-06, "loss": 0.04904872179031372, "step": 5588 }, { "epoch": 0.7555957380290497, "grad_norm": 0.5772355198860168, "learning_rate": 4.82647323976149e-06, "loss": 0.059018149971961975, "step": 5589 }, { "epoch": 0.7557309313978149, "grad_norm": 0.37943848967552185, "learning_rate": 4.821444705236429e-06, "loss": 0.041968148201704025, "step": 5590 }, { "epoch": 0.7558661247665802, "grad_norm": 0.44476258754730225, "learning_rate": 4.81641828994086e-06, "loss": 0.05268823355436325, "step": 5591 }, { "epoch": 0.7560013181353454, "grad_norm": 0.3439156413078308, "learning_rate": 4.811393994921308e-06, "loss": 0.047693632543087006, "step": 5592 }, { "epoch": 0.7561365115041108, "grad_norm": 0.28831806778907776, "learning_rate": 4.806371821223854e-06, "loss": 0.05170748382806778, "step": 5593 }, { "epoch": 0.756271704872876, "grad_norm": 0.9020028114318848, "learning_rate": 4.801351769894151e-06, "loss": 0.043787021189928055, "step": 5594 }, { "epoch": 0.7564068982416412, "grad_norm": 0.8622465133666992, "learning_rate": 4.796333841977394e-06, "loss": 0.06588828563690186, "step": 5595 }, { "epoch": 0.7565420916104065, "grad_norm": 0.5422247648239136, "learning_rate": 4.791318038518345e-06, "loss": 0.04037340357899666, "step": 5596 }, { "epoch": 0.7566772849791717, "grad_norm": 1.0936359167099, "learning_rate": 4.7863043605613185e-06, "loss": 0.058387063443660736, "step": 5597 }, { "epoch": 0.7568124783479371, "grad_norm": 0.23124346137046814, "learning_rate": 4.7812928091501865e-06, "loss": 0.04374666512012482, "step": 5598 }, { "epoch": 0.7569476717167023, "grad_norm": 0.36500871181488037, "learning_rate": 4.7762833853283935e-06, "loss": 0.0665564090013504, "step": 5599 }, { "epoch": 0.7570828650854675, "grad_norm": 0.914710283279419, "learning_rate": 4.77127609013891e-06, "loss": 0.05417275428771973, "step": 5600 }, { "epoch": 0.7572180584542328, "grad_norm": 0.4734802544116974, "learning_rate": 4.766270924624295e-06, "loss": 0.058981843292713165, "step": 5601 }, { "epoch": 0.7573532518229981, "grad_norm": 0.8628573417663574, "learning_rate": 4.761267889826647e-06, "loss": 0.06330180168151855, "step": 5602 }, { "epoch": 0.7574884451917634, "grad_norm": 0.7319051027297974, "learning_rate": 4.756266986787619e-06, "loss": 0.0619068518280983, "step": 5603 }, { "epoch": 0.7576236385605286, "grad_norm": 0.6942102909088135, "learning_rate": 4.751268216548439e-06, "loss": 0.05534280836582184, "step": 5604 }, { "epoch": 0.7577588319292938, "grad_norm": 0.1495506763458252, "learning_rate": 4.746271580149861e-06, "loss": 0.029495157301425934, "step": 5605 }, { "epoch": 0.7578940252980592, "grad_norm": 0.6326749920845032, "learning_rate": 4.7412770786322244e-06, "loss": 0.05507656931877136, "step": 5606 }, { "epoch": 0.7580292186668244, "grad_norm": 0.36095789074897766, "learning_rate": 4.736284713035406e-06, "loss": 0.042208775877952576, "step": 5607 }, { "epoch": 0.7581644120355897, "grad_norm": 1.6832358837127686, "learning_rate": 4.731294484398843e-06, "loss": 0.06477434933185577, "step": 5608 }, { "epoch": 0.7582996054043549, "grad_norm": 0.6530440449714661, "learning_rate": 4.726306393761526e-06, "loss": 0.05780060589313507, "step": 5609 }, { "epoch": 0.7584347987731201, "grad_norm": 0.502464234828949, "learning_rate": 4.721320442162001e-06, "loss": 0.05332181602716446, "step": 5610 }, { "epoch": 0.7585699921418855, "grad_norm": 0.44271019101142883, "learning_rate": 4.716336630638378e-06, "loss": 0.0516282320022583, "step": 5611 }, { "epoch": 0.7587051855106507, "grad_norm": 0.6140710711479187, "learning_rate": 4.711354960228306e-06, "loss": 0.05469930171966553, "step": 5612 }, { "epoch": 0.7588403788794159, "grad_norm": 0.4506165087223053, "learning_rate": 4.706375431968998e-06, "loss": 0.042360689491033554, "step": 5613 }, { "epoch": 0.7589755722481812, "grad_norm": 1.0104496479034424, "learning_rate": 4.701398046897218e-06, "loss": 0.05663909390568733, "step": 5614 }, { "epoch": 0.7591107656169465, "grad_norm": 0.4034721851348877, "learning_rate": 4.696422806049277e-06, "loss": 0.04604434594511986, "step": 5615 }, { "epoch": 0.7592459589857118, "grad_norm": 0.5353710651397705, "learning_rate": 4.69144971046106e-06, "loss": 0.0535174161195755, "step": 5616 }, { "epoch": 0.759381152354477, "grad_norm": 0.552902340888977, "learning_rate": 4.686478761167984e-06, "loss": 0.04603348672389984, "step": 5617 }, { "epoch": 0.7595163457232422, "grad_norm": 0.3774576187133789, "learning_rate": 4.681509959205028e-06, "loss": 0.060617804527282715, "step": 5618 }, { "epoch": 0.7596515390920076, "grad_norm": 0.526112973690033, "learning_rate": 4.676543305606724e-06, "loss": 0.03966321796178818, "step": 5619 }, { "epoch": 0.7597867324607728, "grad_norm": 0.32452404499053955, "learning_rate": 4.67157880140715e-06, "loss": 0.03446151316165924, "step": 5620 }, { "epoch": 0.7599219258295381, "grad_norm": 0.8323810696601868, "learning_rate": 4.666616447639952e-06, "loss": 0.054547131061553955, "step": 5621 }, { "epoch": 0.7600571191983033, "grad_norm": 0.9185550212860107, "learning_rate": 4.661656245338314e-06, "loss": 0.07025061547756195, "step": 5622 }, { "epoch": 0.7601923125670685, "grad_norm": 0.5083182454109192, "learning_rate": 4.656698195534978e-06, "loss": 0.05385303497314453, "step": 5623 }, { "epoch": 0.7603275059358339, "grad_norm": 0.2188534438610077, "learning_rate": 4.651742299262233e-06, "loss": 0.03369933366775513, "step": 5624 }, { "epoch": 0.7604626993045991, "grad_norm": 0.4826721251010895, "learning_rate": 4.646788557551921e-06, "loss": 0.06902796775102615, "step": 5625 }, { "epoch": 0.7605978926733644, "grad_norm": 0.5610843300819397, "learning_rate": 4.641836971435445e-06, "loss": 0.05385258048772812, "step": 5626 }, { "epoch": 0.7607330860421296, "grad_norm": 0.3415719270706177, "learning_rate": 4.63688754194375e-06, "loss": 0.05460992455482483, "step": 5627 }, { "epoch": 0.7608682794108949, "grad_norm": 0.5464599132537842, "learning_rate": 4.6319402701073295e-06, "loss": 0.045591652393341064, "step": 5628 }, { "epoch": 0.7610034727796602, "grad_norm": 0.4167063534259796, "learning_rate": 4.6269951569562355e-06, "loss": 0.0458734929561615, "step": 5629 }, { "epoch": 0.7611386661484254, "grad_norm": 0.7480295300483704, "learning_rate": 4.622052203520061e-06, "loss": 0.04887177050113678, "step": 5630 }, { "epoch": 0.7612738595171907, "grad_norm": 0.7025947570800781, "learning_rate": 4.617111410827968e-06, "loss": 0.06044713407754898, "step": 5631 }, { "epoch": 0.761409052885956, "grad_norm": 0.4838401675224304, "learning_rate": 4.612172779908639e-06, "loss": 0.06201377511024475, "step": 5632 }, { "epoch": 0.7615442462547212, "grad_norm": 0.3738311529159546, "learning_rate": 4.607236311790335e-06, "loss": 0.03970567137002945, "step": 5633 }, { "epoch": 0.7616794396234865, "grad_norm": 0.9668474793434143, "learning_rate": 4.602302007500854e-06, "loss": 0.03886076807975769, "step": 5634 }, { "epoch": 0.7618146329922517, "grad_norm": 0.4892688989639282, "learning_rate": 4.597369868067537e-06, "loss": 0.0554858073592186, "step": 5635 }, { "epoch": 0.761949826361017, "grad_norm": 0.4261897802352905, "learning_rate": 4.592439894517296e-06, "loss": 0.059930577874183655, "step": 5636 }, { "epoch": 0.7620850197297823, "grad_norm": 0.8358299732208252, "learning_rate": 4.587512087876559e-06, "loss": 0.05172547698020935, "step": 5637 }, { "epoch": 0.7622202130985475, "grad_norm": 0.2497706413269043, "learning_rate": 4.582586449171336e-06, "loss": 0.041028089821338654, "step": 5638 }, { "epoch": 0.7623554064673128, "grad_norm": 0.7382331490516663, "learning_rate": 4.577662979427168e-06, "loss": 0.07519373297691345, "step": 5639 }, { "epoch": 0.762490599836078, "grad_norm": 0.625405490398407, "learning_rate": 4.572741679669147e-06, "loss": 0.05997096374630928, "step": 5640 }, { "epoch": 0.7626257932048432, "grad_norm": 0.4168659746646881, "learning_rate": 4.567822550921912e-06, "loss": 0.042681142687797546, "step": 5641 }, { "epoch": 0.7627609865736086, "grad_norm": 0.9903636574745178, "learning_rate": 4.562905594209647e-06, "loss": 0.054901592433452606, "step": 5642 }, { "epoch": 0.7628961799423738, "grad_norm": 0.602508008480072, "learning_rate": 4.557990810556102e-06, "loss": 0.06749507784843445, "step": 5643 }, { "epoch": 0.7630313733111391, "grad_norm": 0.7250024080276489, "learning_rate": 4.553078200984553e-06, "loss": 0.07625281065702438, "step": 5644 }, { "epoch": 0.7631665666799043, "grad_norm": 0.3466350734233856, "learning_rate": 4.548167766517832e-06, "loss": 0.027551140636205673, "step": 5645 }, { "epoch": 0.7633017600486696, "grad_norm": 0.46788859367370605, "learning_rate": 4.543259508178318e-06, "loss": 0.04973139241337776, "step": 5646 }, { "epoch": 0.7634369534174349, "grad_norm": 0.3555099666118622, "learning_rate": 4.538353426987931e-06, "loss": 0.04602910578250885, "step": 5647 }, { "epoch": 0.7635721467862001, "grad_norm": 0.5704427361488342, "learning_rate": 4.533449523968154e-06, "loss": 0.051924578845500946, "step": 5648 }, { "epoch": 0.7637073401549654, "grad_norm": 0.814926266670227, "learning_rate": 4.528547800140001e-06, "loss": 0.07376405596733093, "step": 5649 }, { "epoch": 0.7638425335237307, "grad_norm": 0.7312045693397522, "learning_rate": 4.523648256524037e-06, "loss": 0.047536760568618774, "step": 5650 }, { "epoch": 0.7639777268924959, "grad_norm": 0.6229032278060913, "learning_rate": 4.518750894140372e-06, "loss": 0.05637187883257866, "step": 5651 }, { "epoch": 0.7641129202612612, "grad_norm": 1.309388279914856, "learning_rate": 4.513855714008659e-06, "loss": 0.04895530641078949, "step": 5652 }, { "epoch": 0.7642481136300264, "grad_norm": 0.5115934014320374, "learning_rate": 4.508962717148111e-06, "loss": 0.04286891967058182, "step": 5653 }, { "epoch": 0.7643833069987918, "grad_norm": 0.5343486666679382, "learning_rate": 4.504071904577469e-06, "loss": 0.045203179121017456, "step": 5654 }, { "epoch": 0.764518500367557, "grad_norm": 0.6865848898887634, "learning_rate": 4.499183277315027e-06, "loss": 0.05576498061418533, "step": 5655 }, { "epoch": 0.7646536937363222, "grad_norm": 0.5834632515907288, "learning_rate": 4.494296836378625e-06, "loss": 0.04854917898774147, "step": 5656 }, { "epoch": 0.7647888871050875, "grad_norm": 0.44065696001052856, "learning_rate": 4.4894125827856415e-06, "loss": 0.049645356833934784, "step": 5657 }, { "epoch": 0.7649240804738527, "grad_norm": 0.3580378592014313, "learning_rate": 4.4845305175530105e-06, "loss": 0.03369799628853798, "step": 5658 }, { "epoch": 0.7650592738426181, "grad_norm": 0.5414225459098816, "learning_rate": 4.479650641697201e-06, "loss": 0.0440206453204155, "step": 5659 }, { "epoch": 0.7651944672113833, "grad_norm": 0.8068389892578125, "learning_rate": 4.4747729562342305e-06, "loss": 0.0768054723739624, "step": 5660 }, { "epoch": 0.7653296605801485, "grad_norm": 0.508140504360199, "learning_rate": 4.469897462179656e-06, "loss": 0.06765955686569214, "step": 5661 }, { "epoch": 0.7654648539489138, "grad_norm": 0.71355140209198, "learning_rate": 4.46502416054858e-06, "loss": 0.05604205280542374, "step": 5662 }, { "epoch": 0.765600047317679, "grad_norm": 0.8222368955612183, "learning_rate": 4.460153052355663e-06, "loss": 0.05523822084069252, "step": 5663 }, { "epoch": 0.7657352406864444, "grad_norm": 0.8126984238624573, "learning_rate": 4.455284138615074e-06, "loss": 0.04764276742935181, "step": 5664 }, { "epoch": 0.7658704340552096, "grad_norm": 0.6107057332992554, "learning_rate": 4.4504174203405656e-06, "loss": 0.035220615565776825, "step": 5665 }, { "epoch": 0.7660056274239748, "grad_norm": 0.8327341675758362, "learning_rate": 4.445552898545407e-06, "loss": 0.05195525288581848, "step": 5666 }, { "epoch": 0.7661408207927402, "grad_norm": 0.48244842886924744, "learning_rate": 4.440690574242413e-06, "loss": 0.053544119000434875, "step": 5667 }, { "epoch": 0.7662760141615054, "grad_norm": 0.3419904410839081, "learning_rate": 4.435830448443961e-06, "loss": 0.0494111031293869, "step": 5668 }, { "epoch": 0.7664112075302706, "grad_norm": 0.43288305401802063, "learning_rate": 4.430972522161934e-06, "loss": 0.049941033124923706, "step": 5669 }, { "epoch": 0.7665464008990359, "grad_norm": 0.3529101610183716, "learning_rate": 4.426116796407794e-06, "loss": 0.0279175266623497, "step": 5670 }, { "epoch": 0.7666815942678011, "grad_norm": 0.26189494132995605, "learning_rate": 4.421263272192523e-06, "loss": 0.0417625866830349, "step": 5671 }, { "epoch": 0.7668167876365665, "grad_norm": 0.539402425289154, "learning_rate": 4.416411950526648e-06, "loss": 0.05093270540237427, "step": 5672 }, { "epoch": 0.7669519810053317, "grad_norm": 0.8766064643859863, "learning_rate": 4.411562832420252e-06, "loss": 0.04850289598107338, "step": 5673 }, { "epoch": 0.7670871743740969, "grad_norm": 0.3340434730052948, "learning_rate": 4.406715918882929e-06, "loss": 0.043039269745349884, "step": 5674 }, { "epoch": 0.7672223677428622, "grad_norm": 0.49644145369529724, "learning_rate": 4.4018712109238475e-06, "loss": 0.052819907665252686, "step": 5675 }, { "epoch": 0.7673575611116275, "grad_norm": 0.3417988717556, "learning_rate": 4.3970287095516965e-06, "loss": 0.0382411926984787, "step": 5676 }, { "epoch": 0.7674927544803928, "grad_norm": 0.7697397470474243, "learning_rate": 4.39218841577471e-06, "loss": 0.060521915555000305, "step": 5677 }, { "epoch": 0.767627947849158, "grad_norm": 0.7141456604003906, "learning_rate": 4.387350330600662e-06, "loss": 0.048661477863788605, "step": 5678 }, { "epoch": 0.7677631412179232, "grad_norm": 0.9669989347457886, "learning_rate": 4.382514455036864e-06, "loss": 0.04918695241212845, "step": 5679 }, { "epoch": 0.7678983345866885, "grad_norm": 0.5380557775497437, "learning_rate": 4.377680790090182e-06, "loss": 0.051476217806339264, "step": 5680 }, { "epoch": 0.7680335279554538, "grad_norm": 0.5008072853088379, "learning_rate": 4.372849336767004e-06, "loss": 0.044195614755153656, "step": 5681 }, { "epoch": 0.7681687213242191, "grad_norm": 0.5809420943260193, "learning_rate": 4.3680200960732645e-06, "loss": 0.051470693200826645, "step": 5682 }, { "epoch": 0.7683039146929843, "grad_norm": 0.4443592429161072, "learning_rate": 4.363193069014439e-06, "loss": 0.05219653248786926, "step": 5683 }, { "epoch": 0.7684391080617495, "grad_norm": 1.040880799293518, "learning_rate": 4.3583682565955325e-06, "loss": 0.04851394146680832, "step": 5684 }, { "epoch": 0.7685743014305149, "grad_norm": 0.7633553743362427, "learning_rate": 4.3535456598211074e-06, "loss": 0.059917330741882324, "step": 5685 }, { "epoch": 0.7687094947992801, "grad_norm": 0.3302479684352875, "learning_rate": 4.348725279695251e-06, "loss": 0.034069906920194626, "step": 5686 }, { "epoch": 0.7688446881680454, "grad_norm": 0.4986826479434967, "learning_rate": 4.343907117221591e-06, "loss": 0.04978461563587189, "step": 5687 }, { "epoch": 0.7689798815368106, "grad_norm": 0.7870305776596069, "learning_rate": 4.339091173403294e-06, "loss": 0.07076722383499146, "step": 5688 }, { "epoch": 0.7691150749055758, "grad_norm": 0.37520065903663635, "learning_rate": 4.334277449243061e-06, "loss": 0.0574164092540741, "step": 5689 }, { "epoch": 0.7692502682743412, "grad_norm": 1.180557131767273, "learning_rate": 4.329465945743144e-06, "loss": 0.03569352626800537, "step": 5690 }, { "epoch": 0.7693854616431064, "grad_norm": 0.47424882650375366, "learning_rate": 4.32465666390532e-06, "loss": 0.0582297146320343, "step": 5691 }, { "epoch": 0.7695206550118716, "grad_norm": 0.2785654366016388, "learning_rate": 4.319849604730905e-06, "loss": 0.04558062180876732, "step": 5692 }, { "epoch": 0.7696558483806369, "grad_norm": 0.43649712204933167, "learning_rate": 4.315044769220758e-06, "loss": 0.06203489005565643, "step": 5693 }, { "epoch": 0.7697910417494022, "grad_norm": 0.6950480937957764, "learning_rate": 4.310242158375264e-06, "loss": 0.05505382642149925, "step": 5694 }, { "epoch": 0.7699262351181675, "grad_norm": 0.5324304103851318, "learning_rate": 4.30544177319436e-06, "loss": 0.049787141382694244, "step": 5695 }, { "epoch": 0.7700614284869327, "grad_norm": 0.44713354110717773, "learning_rate": 4.300643614677511e-06, "loss": 0.05204059183597565, "step": 5696 }, { "epoch": 0.7701966218556979, "grad_norm": 0.3508920669555664, "learning_rate": 4.2958476838237165e-06, "loss": 0.0416136234998703, "step": 5697 }, { "epoch": 0.7703318152244633, "grad_norm": 0.7075027227401733, "learning_rate": 4.2910539816315166e-06, "loss": 0.04444601386785507, "step": 5698 }, { "epoch": 0.7704670085932285, "grad_norm": 0.790549635887146, "learning_rate": 4.286262509098979e-06, "loss": 0.05803542211651802, "step": 5699 }, { "epoch": 0.7706022019619938, "grad_norm": 1.0832196474075317, "learning_rate": 4.28147326722373e-06, "loss": 0.03805040568113327, "step": 5700 }, { "epoch": 0.770737395330759, "grad_norm": 0.6148602366447449, "learning_rate": 4.2766862570028965e-06, "loss": 0.07428151369094849, "step": 5701 }, { "epoch": 0.7708725886995242, "grad_norm": 0.7821617722511292, "learning_rate": 4.2719014794331715e-06, "loss": 0.04477187618613243, "step": 5702 }, { "epoch": 0.7710077820682896, "grad_norm": 0.6982478499412537, "learning_rate": 4.267118935510767e-06, "loss": 0.06008635461330414, "step": 5703 }, { "epoch": 0.7711429754370548, "grad_norm": 0.43888673186302185, "learning_rate": 4.2623386262314306e-06, "loss": 0.06447956711053848, "step": 5704 }, { "epoch": 0.7712781688058201, "grad_norm": 0.41614997386932373, "learning_rate": 4.257560552590461e-06, "loss": 0.052055105566978455, "step": 5705 }, { "epoch": 0.7714133621745853, "grad_norm": 0.34694284200668335, "learning_rate": 4.252784715582661e-06, "loss": 0.04736727476119995, "step": 5706 }, { "epoch": 0.7715485555433506, "grad_norm": 0.5842350125312805, "learning_rate": 4.2480111162024e-06, "loss": 0.031814415007829666, "step": 5707 }, { "epoch": 0.7716837489121159, "grad_norm": 0.4248739778995514, "learning_rate": 4.243239755443561e-06, "loss": 0.04953247308731079, "step": 5708 }, { "epoch": 0.7718189422808811, "grad_norm": 0.5501111149787903, "learning_rate": 4.238470634299567e-06, "loss": 0.041759759187698364, "step": 5709 }, { "epoch": 0.7719541356496464, "grad_norm": 0.5856011509895325, "learning_rate": 4.233703753763375e-06, "loss": 0.060414426028728485, "step": 5710 }, { "epoch": 0.7720893290184117, "grad_norm": 0.39352521300315857, "learning_rate": 4.228939114827469e-06, "loss": 0.06005595624446869, "step": 5711 }, { "epoch": 0.7722245223871769, "grad_norm": 1.007741093635559, "learning_rate": 4.224176718483881e-06, "loss": 0.04893037676811218, "step": 5712 }, { "epoch": 0.7723597157559422, "grad_norm": 0.6353216171264648, "learning_rate": 4.219416565724165e-06, "loss": 0.05638323351740837, "step": 5713 }, { "epoch": 0.7724949091247074, "grad_norm": 0.47796788811683655, "learning_rate": 4.21465865753941e-06, "loss": 0.05115942656993866, "step": 5714 }, { "epoch": 0.7726301024934727, "grad_norm": 0.7587752938270569, "learning_rate": 4.209902994920236e-06, "loss": 0.051104605197906494, "step": 5715 }, { "epoch": 0.772765295862238, "grad_norm": 0.9474219083786011, "learning_rate": 4.205149578856794e-06, "loss": 0.05527476966381073, "step": 5716 }, { "epoch": 0.7729004892310032, "grad_norm": 1.4634283781051636, "learning_rate": 4.200398410338779e-06, "loss": 0.05003051832318306, "step": 5717 }, { "epoch": 0.7730356825997685, "grad_norm": 0.29603874683380127, "learning_rate": 4.1956494903554056e-06, "loss": 0.045849766582250595, "step": 5718 }, { "epoch": 0.7731708759685337, "grad_norm": 0.5924423336982727, "learning_rate": 4.190902819895425e-06, "loss": 0.04351881891489029, "step": 5719 }, { "epoch": 0.773306069337299, "grad_norm": 0.4455014765262604, "learning_rate": 4.186158399947118e-06, "loss": 0.04066503047943115, "step": 5720 }, { "epoch": 0.7734412627060643, "grad_norm": 0.4561283588409424, "learning_rate": 4.181416231498292e-06, "loss": 0.043446414172649384, "step": 5721 }, { "epoch": 0.7735764560748295, "grad_norm": 0.4668290913105011, "learning_rate": 4.176676315536306e-06, "loss": 0.04991087689995766, "step": 5722 }, { "epoch": 0.7737116494435948, "grad_norm": 0.610687255859375, "learning_rate": 4.171938653048027e-06, "loss": 0.05808749049901962, "step": 5723 }, { "epoch": 0.77384684281236, "grad_norm": 0.5134508013725281, "learning_rate": 4.1672032450198616e-06, "loss": 0.06386997550725937, "step": 5724 }, { "epoch": 0.7739820361811253, "grad_norm": 0.3268800973892212, "learning_rate": 4.16247009243775e-06, "loss": 0.051299914717674255, "step": 5725 }, { "epoch": 0.7741172295498906, "grad_norm": 0.31994378566741943, "learning_rate": 4.1577391962871504e-06, "loss": 0.04202529788017273, "step": 5726 }, { "epoch": 0.7742524229186558, "grad_norm": 1.1951512098312378, "learning_rate": 4.153010557553076e-06, "loss": 0.056732963770627975, "step": 5727 }, { "epoch": 0.7743876162874211, "grad_norm": 0.2234276682138443, "learning_rate": 4.148284177220045e-06, "loss": 0.05105416476726532, "step": 5728 }, { "epoch": 0.7745228096561864, "grad_norm": 0.6800549030303955, "learning_rate": 4.143560056272117e-06, "loss": 0.039719775319099426, "step": 5729 }, { "epoch": 0.7746580030249516, "grad_norm": 0.6751525402069092, "learning_rate": 4.1388381956928796e-06, "loss": 0.052033185958862305, "step": 5730 }, { "epoch": 0.7747931963937169, "grad_norm": 0.39217954874038696, "learning_rate": 4.134118596465443e-06, "loss": 0.049040380865335464, "step": 5731 }, { "epoch": 0.7749283897624821, "grad_norm": 0.5880181193351746, "learning_rate": 4.1294012595724675e-06, "loss": 0.05009182170033455, "step": 5732 }, { "epoch": 0.7750635831312475, "grad_norm": 0.3974986970424652, "learning_rate": 4.1246861859961114e-06, "loss": 0.05804557725787163, "step": 5733 }, { "epoch": 0.7751987765000127, "grad_norm": 0.4695318639278412, "learning_rate": 4.119973376718089e-06, "loss": 0.053791116923093796, "step": 5734 }, { "epoch": 0.7753339698687779, "grad_norm": 0.3485158383846283, "learning_rate": 4.115262832719628e-06, "loss": 0.05303540080785751, "step": 5735 }, { "epoch": 0.7754691632375432, "grad_norm": 0.59285569190979, "learning_rate": 4.110554554981486e-06, "loss": 0.047546081244945526, "step": 5736 }, { "epoch": 0.7756043566063084, "grad_norm": 0.562120258808136, "learning_rate": 4.1058485444839655e-06, "loss": 0.07138626277446747, "step": 5737 }, { "epoch": 0.7757395499750738, "grad_norm": 0.5767315626144409, "learning_rate": 4.101144802206862e-06, "loss": 0.07587608695030212, "step": 5738 }, { "epoch": 0.775874743343839, "grad_norm": 0.5098394155502319, "learning_rate": 4.096443329129535e-06, "loss": 0.03905707970261574, "step": 5739 }, { "epoch": 0.7760099367126042, "grad_norm": 0.6379435658454895, "learning_rate": 4.091744126230853e-06, "loss": 0.06477160006761551, "step": 5740 }, { "epoch": 0.7761451300813695, "grad_norm": 0.42736637592315674, "learning_rate": 4.08704719448921e-06, "loss": 0.04898831248283386, "step": 5741 }, { "epoch": 0.7762803234501348, "grad_norm": 0.4801262319087982, "learning_rate": 4.082352534882543e-06, "loss": 0.043267395347356796, "step": 5742 }, { "epoch": 0.7764155168189001, "grad_norm": 0.3270072340965271, "learning_rate": 4.07766014838829e-06, "loss": 0.05136318877339363, "step": 5743 }, { "epoch": 0.7765507101876653, "grad_norm": 0.46256282925605774, "learning_rate": 4.072970035983443e-06, "loss": 0.051225755363702774, "step": 5744 }, { "epoch": 0.7766859035564305, "grad_norm": 0.7822785377502441, "learning_rate": 4.068282198644505e-06, "loss": 0.05353916436433792, "step": 5745 }, { "epoch": 0.7768210969251959, "grad_norm": 0.40267154574394226, "learning_rate": 4.06359663734751e-06, "loss": 0.03540894016623497, "step": 5746 }, { "epoch": 0.7769562902939611, "grad_norm": 0.2805401086807251, "learning_rate": 4.058913353068013e-06, "loss": 0.04988737404346466, "step": 5747 }, { "epoch": 0.7770914836627263, "grad_norm": 0.654659628868103, "learning_rate": 4.0542323467810985e-06, "loss": 0.0653553307056427, "step": 5748 }, { "epoch": 0.7772266770314916, "grad_norm": 0.57283616065979, "learning_rate": 4.049553619461381e-06, "loss": 0.06992589682340622, "step": 5749 }, { "epoch": 0.7773618704002568, "grad_norm": 0.443215012550354, "learning_rate": 4.044877172082997e-06, "loss": 0.04163704812526703, "step": 5750 }, { "epoch": 0.7774970637690222, "grad_norm": 0.9152970314025879, "learning_rate": 4.040203005619604e-06, "loss": 0.05481569468975067, "step": 5751 }, { "epoch": 0.7776322571377874, "grad_norm": 0.37695738673210144, "learning_rate": 4.035531121044392e-06, "loss": 0.04760066792368889, "step": 5752 }, { "epoch": 0.7777674505065526, "grad_norm": 0.4861292541027069, "learning_rate": 4.030861519330065e-06, "loss": 0.051728539168834686, "step": 5753 }, { "epoch": 0.7779026438753179, "grad_norm": 0.4190061390399933, "learning_rate": 4.026194201448868e-06, "loss": 0.04368724673986435, "step": 5754 }, { "epoch": 0.7780378372440832, "grad_norm": 0.454704612493515, "learning_rate": 4.021529168372558e-06, "loss": 0.0677175521850586, "step": 5755 }, { "epoch": 0.7781730306128485, "grad_norm": 0.8699820041656494, "learning_rate": 4.01686642107242e-06, "loss": 0.04995008930563927, "step": 5756 }, { "epoch": 0.7783082239816137, "grad_norm": 0.5254787802696228, "learning_rate": 4.0122059605192624e-06, "loss": 0.04606795310974121, "step": 5757 }, { "epoch": 0.7784434173503789, "grad_norm": 0.3809521198272705, "learning_rate": 4.007547787683412e-06, "loss": 0.047455623745918274, "step": 5758 }, { "epoch": 0.7785786107191442, "grad_norm": 0.4173734486103058, "learning_rate": 4.002891903534736e-06, "loss": 0.060535043478012085, "step": 5759 }, { "epoch": 0.7787138040879095, "grad_norm": 0.4707096517086029, "learning_rate": 3.998238309042611e-06, "loss": 0.06532557308673859, "step": 5760 }, { "epoch": 0.7788489974566748, "grad_norm": 0.49603208899497986, "learning_rate": 3.993587005175937e-06, "loss": 0.035698167979717255, "step": 5761 }, { "epoch": 0.77898419082544, "grad_norm": 0.6485784649848938, "learning_rate": 3.988937992903144e-06, "loss": 0.07103504240512848, "step": 5762 }, { "epoch": 0.7791193841942052, "grad_norm": 0.8104240894317627, "learning_rate": 3.9842912731921716e-06, "loss": 0.04359731078147888, "step": 5763 }, { "epoch": 0.7792545775629706, "grad_norm": 0.3764703571796417, "learning_rate": 3.979646847010506e-06, "loss": 0.045068494975566864, "step": 5764 }, { "epoch": 0.7793897709317358, "grad_norm": 0.4137573838233948, "learning_rate": 3.975004715325134e-06, "loss": 0.04682117700576782, "step": 5765 }, { "epoch": 0.7795249643005011, "grad_norm": 1.1215702295303345, "learning_rate": 3.970364879102572e-06, "loss": 0.04776846244931221, "step": 5766 }, { "epoch": 0.7796601576692663, "grad_norm": 0.5139415264129639, "learning_rate": 3.96572733930886e-06, "loss": 0.07105226814746857, "step": 5767 }, { "epoch": 0.7797953510380315, "grad_norm": 0.5522721409797668, "learning_rate": 3.961092096909552e-06, "loss": 0.053970180451869965, "step": 5768 }, { "epoch": 0.7799305444067969, "grad_norm": 0.47779467701911926, "learning_rate": 3.9564591528697455e-06, "loss": 0.03991309180855751, "step": 5769 }, { "epoch": 0.7800657377755621, "grad_norm": 0.28264862298965454, "learning_rate": 3.9518285081540275e-06, "loss": 0.04973236471414566, "step": 5770 }, { "epoch": 0.7802009311443274, "grad_norm": 0.4290755093097687, "learning_rate": 3.947200163726534e-06, "loss": 0.05141142010688782, "step": 5771 }, { "epoch": 0.7803361245130926, "grad_norm": 0.614381730556488, "learning_rate": 3.9425741205509055e-06, "loss": 0.06349176168441772, "step": 5772 }, { "epoch": 0.7804713178818579, "grad_norm": 0.5289402604103088, "learning_rate": 3.9379503795903065e-06, "loss": 0.05813773721456528, "step": 5773 }, { "epoch": 0.7806065112506232, "grad_norm": 0.6217710375785828, "learning_rate": 3.933328941807439e-06, "loss": 0.04701855778694153, "step": 5774 }, { "epoch": 0.7807417046193884, "grad_norm": 1.0539789199829102, "learning_rate": 3.928709808164491e-06, "loss": 0.0448966845870018, "step": 5775 }, { "epoch": 0.7808768979881536, "grad_norm": 0.35859671235084534, "learning_rate": 3.924092979623203e-06, "loss": 0.045018672943115234, "step": 5776 }, { "epoch": 0.781012091356919, "grad_norm": 0.5532165169715881, "learning_rate": 3.919478457144824e-06, "loss": 0.054671987891197205, "step": 5777 }, { "epoch": 0.7811472847256842, "grad_norm": 0.28777357935905457, "learning_rate": 3.914866241690115e-06, "loss": 0.04595384746789932, "step": 5778 }, { "epoch": 0.7812824780944495, "grad_norm": 0.3780209720134735, "learning_rate": 3.9102563342193695e-06, "loss": 0.06887669861316681, "step": 5779 }, { "epoch": 0.7814176714632147, "grad_norm": 0.3771103024482727, "learning_rate": 3.905648735692389e-06, "loss": 0.04795943200588226, "step": 5780 }, { "epoch": 0.7815528648319799, "grad_norm": 0.6279870271682739, "learning_rate": 3.901043447068508e-06, "loss": 0.0433628186583519, "step": 5781 }, { "epoch": 0.7816880582007453, "grad_norm": 0.5482854843139648, "learning_rate": 3.896440469306567e-06, "loss": 0.07337145507335663, "step": 5782 }, { "epoch": 0.7818232515695105, "grad_norm": 0.6327605843544006, "learning_rate": 3.891839803364934e-06, "loss": 0.043239280581474304, "step": 5783 }, { "epoch": 0.7819584449382758, "grad_norm": 0.5140485167503357, "learning_rate": 3.887241450201487e-06, "loss": 0.045149900019168854, "step": 5784 }, { "epoch": 0.782093638307041, "grad_norm": 0.703194797039032, "learning_rate": 3.882645410773629e-06, "loss": 0.06559029966592789, "step": 5785 }, { "epoch": 0.7822288316758063, "grad_norm": 0.24512290954589844, "learning_rate": 3.878051686038284e-06, "loss": 0.05819549784064293, "step": 5786 }, { "epoch": 0.7823640250445716, "grad_norm": 0.6596559286117554, "learning_rate": 3.873460276951889e-06, "loss": 0.05555453896522522, "step": 5787 }, { "epoch": 0.7824992184133368, "grad_norm": 0.9214807152748108, "learning_rate": 3.868871184470397e-06, "loss": 0.03861886262893677, "step": 5788 }, { "epoch": 0.7826344117821021, "grad_norm": 0.5653980374336243, "learning_rate": 3.864284409549282e-06, "loss": 0.05683492124080658, "step": 5789 }, { "epoch": 0.7827696051508674, "grad_norm": 0.5974285006523132, "learning_rate": 3.859699953143532e-06, "loss": 0.054988350719213486, "step": 5790 }, { "epoch": 0.7829047985196326, "grad_norm": 0.5913573503494263, "learning_rate": 3.855117816207665e-06, "loss": 0.048264771699905396, "step": 5791 }, { "epoch": 0.7830399918883979, "grad_norm": 0.8017600774765015, "learning_rate": 3.850537999695699e-06, "loss": 0.046860143542289734, "step": 5792 }, { "epoch": 0.7831751852571631, "grad_norm": 0.8446592092514038, "learning_rate": 3.845960504561179e-06, "loss": 0.05040036886930466, "step": 5793 }, { "epoch": 0.7833103786259284, "grad_norm": 0.4981122314929962, "learning_rate": 3.841385331757161e-06, "loss": 0.04143751040101051, "step": 5794 }, { "epoch": 0.7834455719946937, "grad_norm": 0.4162934124469757, "learning_rate": 3.8368124822362184e-06, "loss": 0.05737932026386261, "step": 5795 }, { "epoch": 0.7835807653634589, "grad_norm": 0.661861777305603, "learning_rate": 3.832241956950449e-06, "loss": 0.048417821526527405, "step": 5796 }, { "epoch": 0.7837159587322242, "grad_norm": 0.5370731353759766, "learning_rate": 3.82767375685146e-06, "loss": 0.056625284254550934, "step": 5797 }, { "epoch": 0.7838511521009894, "grad_norm": 0.31087347865104675, "learning_rate": 3.823107882890373e-06, "loss": 0.04839096963405609, "step": 5798 }, { "epoch": 0.7839863454697548, "grad_norm": 0.5822319388389587, "learning_rate": 3.8185443360178265e-06, "loss": 0.0583285391330719, "step": 5799 }, { "epoch": 0.78412153883852, "grad_norm": 0.3733569085597992, "learning_rate": 3.813983117183973e-06, "loss": 0.0516224168241024, "step": 5800 }, { "epoch": 0.7842567322072852, "grad_norm": 0.46516770124435425, "learning_rate": 3.8094242273384932e-06, "loss": 0.04963213950395584, "step": 5801 }, { "epoch": 0.7843919255760505, "grad_norm": 0.36023858189582825, "learning_rate": 3.804867667430555e-06, "loss": 0.0596126914024353, "step": 5802 }, { "epoch": 0.7845271189448157, "grad_norm": 0.5960729122161865, "learning_rate": 3.800313438408874e-06, "loss": 0.06885658204555511, "step": 5803 }, { "epoch": 0.784662312313581, "grad_norm": 0.5354682803153992, "learning_rate": 3.7957615412216582e-06, "loss": 0.05400829389691353, "step": 5804 }, { "epoch": 0.7847975056823463, "grad_norm": 0.8081690073013306, "learning_rate": 3.791211976816634e-06, "loss": 0.047955870628356934, "step": 5805 }, { "epoch": 0.7849326990511115, "grad_norm": 0.7521482706069946, "learning_rate": 3.786664746141057e-06, "loss": 0.07321782410144806, "step": 5806 }, { "epoch": 0.7850678924198768, "grad_norm": 0.656195878982544, "learning_rate": 3.782119850141669e-06, "loss": 0.04018782824277878, "step": 5807 }, { "epoch": 0.7852030857886421, "grad_norm": 0.3345497250556946, "learning_rate": 3.777577289764752e-06, "loss": 0.06700266897678375, "step": 5808 }, { "epoch": 0.7853382791574073, "grad_norm": 0.40408748388290405, "learning_rate": 3.7730370659560904e-06, "loss": 0.04518834874033928, "step": 5809 }, { "epoch": 0.7854734725261726, "grad_norm": 0.5429202318191528, "learning_rate": 3.7684991796609746e-06, "loss": 0.045586466789245605, "step": 5810 }, { "epoch": 0.7856086658949378, "grad_norm": 0.4379957914352417, "learning_rate": 3.7639636318242344e-06, "loss": 0.06153438985347748, "step": 5811 }, { "epoch": 0.7857438592637032, "grad_norm": 0.41803351044654846, "learning_rate": 3.7594304233901738e-06, "loss": 0.06180073320865631, "step": 5812 }, { "epoch": 0.7858790526324684, "grad_norm": 0.37024974822998047, "learning_rate": 3.754899555302645e-06, "loss": 0.05707992613315582, "step": 5813 }, { "epoch": 0.7860142460012336, "grad_norm": 0.9152238965034485, "learning_rate": 3.7503710285049964e-06, "loss": 0.04836093634366989, "step": 5814 }, { "epoch": 0.7861494393699989, "grad_norm": 0.29534226655960083, "learning_rate": 3.7458448439400888e-06, "loss": 0.05663030594587326, "step": 5815 }, { "epoch": 0.7862846327387641, "grad_norm": 0.9970876574516296, "learning_rate": 3.7413210025502985e-06, "loss": 0.055408746004104614, "step": 5816 }, { "epoch": 0.7864198261075295, "grad_norm": 0.9171462655067444, "learning_rate": 3.7367995052775123e-06, "loss": 0.04693305864930153, "step": 5817 }, { "epoch": 0.7865550194762947, "grad_norm": 0.39299800992012024, "learning_rate": 3.732280353063133e-06, "loss": 0.043476566672325134, "step": 5818 }, { "epoch": 0.7866902128450599, "grad_norm": 0.4400605261325836, "learning_rate": 3.727763546848074e-06, "loss": 0.05251064896583557, "step": 5819 }, { "epoch": 0.7868254062138252, "grad_norm": 0.2650921046733856, "learning_rate": 3.7232490875727544e-06, "loss": 0.04420124739408493, "step": 5820 }, { "epoch": 0.7869605995825905, "grad_norm": 0.5161288380622864, "learning_rate": 3.718736976177108e-06, "loss": 0.05436871945858002, "step": 5821 }, { "epoch": 0.7870957929513558, "grad_norm": 0.45217934250831604, "learning_rate": 3.71422721360058e-06, "loss": 0.051149845123291016, "step": 5822 }, { "epoch": 0.787230986320121, "grad_norm": 0.4663630723953247, "learning_rate": 3.709719800782133e-06, "loss": 0.04806032031774521, "step": 5823 }, { "epoch": 0.7873661796888862, "grad_norm": 0.5508677363395691, "learning_rate": 3.7052147386602304e-06, "loss": 0.04836851358413696, "step": 5824 }, { "epoch": 0.7875013730576516, "grad_norm": 0.33200010657310486, "learning_rate": 3.700712028172851e-06, "loss": 0.05179045349359512, "step": 5825 }, { "epoch": 0.7876365664264168, "grad_norm": 0.4456416666507721, "learning_rate": 3.696211670257481e-06, "loss": 0.06013600155711174, "step": 5826 }, { "epoch": 0.787771759795182, "grad_norm": 0.5219780802726746, "learning_rate": 3.691713665851117e-06, "loss": 0.0494326576590538, "step": 5827 }, { "epoch": 0.7879069531639473, "grad_norm": 0.5698608160018921, "learning_rate": 3.6872180158902764e-06, "loss": 0.05211539566516876, "step": 5828 }, { "epoch": 0.7880421465327125, "grad_norm": 0.4790526330471039, "learning_rate": 3.6827247213109705e-06, "loss": 0.05283583700656891, "step": 5829 }, { "epoch": 0.7881773399014779, "grad_norm": 0.3996173143386841, "learning_rate": 3.6782337830487294e-06, "loss": 0.05189451575279236, "step": 5830 }, { "epoch": 0.7883125332702431, "grad_norm": 0.8280686736106873, "learning_rate": 3.6737452020385886e-06, "loss": 0.04844554513692856, "step": 5831 }, { "epoch": 0.7884477266390083, "grad_norm": 0.406904399394989, "learning_rate": 3.6692589792150923e-06, "loss": 0.0553140714764595, "step": 5832 }, { "epoch": 0.7885829200077736, "grad_norm": 0.4483954608440399, "learning_rate": 3.6647751155123026e-06, "loss": 0.04909306764602661, "step": 5833 }, { "epoch": 0.7887181133765389, "grad_norm": 1.0544463396072388, "learning_rate": 3.660293611863782e-06, "loss": 0.049436070024967194, "step": 5834 }, { "epoch": 0.7888533067453042, "grad_norm": 0.558396577835083, "learning_rate": 3.655814469202602e-06, "loss": 0.04861228168010712, "step": 5835 }, { "epoch": 0.7889885001140694, "grad_norm": 0.8497613668441772, "learning_rate": 3.6513376884613446e-06, "loss": 0.06306800991296768, "step": 5836 }, { "epoch": 0.7891236934828346, "grad_norm": 0.47947338223457336, "learning_rate": 3.6468632705720934e-06, "loss": 0.03444236144423485, "step": 5837 }, { "epoch": 0.7892588868516, "grad_norm": 0.7960044741630554, "learning_rate": 3.6423912164664606e-06, "loss": 0.05294312536716461, "step": 5838 }, { "epoch": 0.7893940802203652, "grad_norm": 0.4889637231826782, "learning_rate": 3.637921527075534e-06, "loss": 0.05175085365772247, "step": 5839 }, { "epoch": 0.7895292735891305, "grad_norm": 0.4937731921672821, "learning_rate": 3.63345420332994e-06, "loss": 0.0552225336432457, "step": 5840 }, { "epoch": 0.7896644669578957, "grad_norm": 0.2001790553331375, "learning_rate": 3.628989246159795e-06, "loss": 0.03912050276994705, "step": 5841 }, { "epoch": 0.7897996603266609, "grad_norm": 0.6610744595527649, "learning_rate": 3.6245266564947205e-06, "loss": 0.06487545371055603, "step": 5842 }, { "epoch": 0.7899348536954263, "grad_norm": 0.5012447834014893, "learning_rate": 3.620066435263868e-06, "loss": 0.05035126209259033, "step": 5843 }, { "epoch": 0.7900700470641915, "grad_norm": 0.43423858284950256, "learning_rate": 3.6156085833958596e-06, "loss": 0.05305560678243637, "step": 5844 }, { "epoch": 0.7902052404329568, "grad_norm": 0.3528660833835602, "learning_rate": 3.6111531018188584e-06, "loss": 0.054082803428173065, "step": 5845 }, { "epoch": 0.790340433801722, "grad_norm": 0.4592651128768921, "learning_rate": 3.606699991460513e-06, "loss": 0.04173130542039871, "step": 5846 }, { "epoch": 0.7904756271704872, "grad_norm": 0.4069738984107971, "learning_rate": 3.602249253247986e-06, "loss": 0.05436393618583679, "step": 5847 }, { "epoch": 0.7906108205392526, "grad_norm": 0.375313937664032, "learning_rate": 3.5978008881079445e-06, "loss": 0.0516459122300148, "step": 5848 }, { "epoch": 0.7907460139080178, "grad_norm": 0.3917820155620575, "learning_rate": 3.5933548969665587e-06, "loss": 0.04290139675140381, "step": 5849 }, { "epoch": 0.7908812072767831, "grad_norm": 0.27167415618896484, "learning_rate": 3.5889112807495152e-06, "loss": 0.046996913850307465, "step": 5850 }, { "epoch": 0.7910164006455483, "grad_norm": 0.30987969040870667, "learning_rate": 3.5844700403819935e-06, "loss": 0.05544118583202362, "step": 5851 }, { "epoch": 0.7911515940143136, "grad_norm": 1.262507438659668, "learning_rate": 3.5800311767886847e-06, "loss": 0.06227273866534233, "step": 5852 }, { "epoch": 0.7912867873830789, "grad_norm": 0.5790842771530151, "learning_rate": 3.575594690893784e-06, "loss": 0.041585374623537064, "step": 5853 }, { "epoch": 0.7914219807518441, "grad_norm": 0.7852163910865784, "learning_rate": 3.5711605836209853e-06, "loss": 0.05485051870346069, "step": 5854 }, { "epoch": 0.7915571741206093, "grad_norm": 0.4399736821651459, "learning_rate": 3.566728855893505e-06, "loss": 0.05603671073913574, "step": 5855 }, { "epoch": 0.7916923674893747, "grad_norm": 0.40009763836860657, "learning_rate": 3.5622995086340466e-06, "loss": 0.04773164168000221, "step": 5856 }, { "epoch": 0.7918275608581399, "grad_norm": 0.3449288010597229, "learning_rate": 3.5578725427648233e-06, "loss": 0.048646703362464905, "step": 5857 }, { "epoch": 0.7919627542269052, "grad_norm": 0.28758126497268677, "learning_rate": 3.553447959207553e-06, "loss": 0.041172951459884644, "step": 5858 }, { "epoch": 0.7920979475956704, "grad_norm": 0.44380614161491394, "learning_rate": 3.5490257588834552e-06, "loss": 0.042261168360710144, "step": 5859 }, { "epoch": 0.7922331409644356, "grad_norm": 0.5089505910873413, "learning_rate": 3.5446059427132615e-06, "loss": 0.05467886105179787, "step": 5860 }, { "epoch": 0.792368334333201, "grad_norm": 0.3433547616004944, "learning_rate": 3.5401885116171977e-06, "loss": 0.04381384328007698, "step": 5861 }, { "epoch": 0.7925035277019662, "grad_norm": 0.5133224725723267, "learning_rate": 3.5357734665149983e-06, "loss": 0.057044968008995056, "step": 5862 }, { "epoch": 0.7926387210707315, "grad_norm": 0.5437789559364319, "learning_rate": 3.5313608083258975e-06, "loss": 0.05475904420018196, "step": 5863 }, { "epoch": 0.7927739144394967, "grad_norm": 0.46170106530189514, "learning_rate": 3.526950537968629e-06, "loss": 0.0631060004234314, "step": 5864 }, { "epoch": 0.792909107808262, "grad_norm": 0.4708152413368225, "learning_rate": 3.5225426563614466e-06, "loss": 0.046782486140728, "step": 5865 }, { "epoch": 0.7930443011770273, "grad_norm": 0.5431990027427673, "learning_rate": 3.518137164422088e-06, "loss": 0.0453324019908905, "step": 5866 }, { "epoch": 0.7931794945457925, "grad_norm": 0.8199841976165771, "learning_rate": 3.513734063067799e-06, "loss": 0.0767545998096466, "step": 5867 }, { "epoch": 0.7933146879145578, "grad_norm": 0.3561827540397644, "learning_rate": 3.5093333532153316e-06, "loss": 0.04865230619907379, "step": 5868 }, { "epoch": 0.793449881283323, "grad_norm": 1.4668605327606201, "learning_rate": 3.504935035780931e-06, "loss": 0.0693330466747284, "step": 5869 }, { "epoch": 0.7935850746520883, "grad_norm": 0.6234803199768066, "learning_rate": 3.500539111680364e-06, "loss": 0.05087678134441376, "step": 5870 }, { "epoch": 0.7937202680208536, "grad_norm": 0.4847146272659302, "learning_rate": 3.4961455818288683e-06, "loss": 0.060263291001319885, "step": 5871 }, { "epoch": 0.7938554613896188, "grad_norm": 0.35021916031837463, "learning_rate": 3.491754447141212e-06, "loss": 0.032333433628082275, "step": 5872 }, { "epoch": 0.7939906547583842, "grad_norm": 0.8613430261611938, "learning_rate": 3.4873657085316504e-06, "loss": 0.05604889616370201, "step": 5873 }, { "epoch": 0.7941258481271494, "grad_norm": 0.680248498916626, "learning_rate": 3.482979366913935e-06, "loss": 0.04539275914430618, "step": 5874 }, { "epoch": 0.7942610414959146, "grad_norm": 0.4233610928058624, "learning_rate": 3.4785954232013423e-06, "loss": 0.045897115021944046, "step": 5875 }, { "epoch": 0.7943962348646799, "grad_norm": 1.1722338199615479, "learning_rate": 3.4742138783066122e-06, "loss": 0.06935595721006393, "step": 5876 }, { "epoch": 0.7945314282334451, "grad_norm": 0.495280385017395, "learning_rate": 3.4698347331420206e-06, "loss": 0.048590466380119324, "step": 5877 }, { "epoch": 0.7946666216022105, "grad_norm": 0.6487596035003662, "learning_rate": 3.4654579886193223e-06, "loss": 0.05463692173361778, "step": 5878 }, { "epoch": 0.7948018149709757, "grad_norm": 0.48033493757247925, "learning_rate": 3.461083645649782e-06, "loss": 0.04509492963552475, "step": 5879 }, { "epoch": 0.7949370083397409, "grad_norm": 0.38070330023765564, "learning_rate": 3.4567117051441594e-06, "loss": 0.05485737323760986, "step": 5880 }, { "epoch": 0.7950722017085062, "grad_norm": 0.6430317163467407, "learning_rate": 3.4523421680127115e-06, "loss": 0.056696128100156784, "step": 5881 }, { "epoch": 0.7952073950772715, "grad_norm": 0.5140246748924255, "learning_rate": 3.447975035165209e-06, "loss": 0.04350459575653076, "step": 5882 }, { "epoch": 0.7953425884460367, "grad_norm": 0.6138967871665955, "learning_rate": 3.4436103075109076e-06, "loss": 0.04122386872768402, "step": 5883 }, { "epoch": 0.795477781814802, "grad_norm": 0.4643726944923401, "learning_rate": 3.4392479859585642e-06, "loss": 0.07189544290304184, "step": 5884 }, { "epoch": 0.7956129751835672, "grad_norm": 0.835229754447937, "learning_rate": 3.4348880714164416e-06, "loss": 0.04200848191976547, "step": 5885 }, { "epoch": 0.7957481685523325, "grad_norm": 1.0927886962890625, "learning_rate": 3.430530564792289e-06, "loss": 0.07564973831176758, "step": 5886 }, { "epoch": 0.7958833619210978, "grad_norm": 0.3393499255180359, "learning_rate": 3.426175466993374e-06, "loss": 0.05649684742093086, "step": 5887 }, { "epoch": 0.796018555289863, "grad_norm": 0.723112165927887, "learning_rate": 3.4218227789264468e-06, "loss": 0.07994869351387024, "step": 5888 }, { "epoch": 0.7961537486586283, "grad_norm": 0.47634345293045044, "learning_rate": 3.417472501497758e-06, "loss": 0.057938359677791595, "step": 5889 }, { "epoch": 0.7962889420273935, "grad_norm": 0.27681440114974976, "learning_rate": 3.413124635613061e-06, "loss": 0.03615274280309677, "step": 5890 }, { "epoch": 0.7964241353961589, "grad_norm": 0.2717513144016266, "learning_rate": 3.4087791821775986e-06, "loss": 0.04536506533622742, "step": 5891 }, { "epoch": 0.7965593287649241, "grad_norm": 0.9388107657432556, "learning_rate": 3.4044361420961285e-06, "loss": 0.05544573441147804, "step": 5892 }, { "epoch": 0.7966945221336893, "grad_norm": 0.4876197278499603, "learning_rate": 3.4000955162728866e-06, "loss": 0.06180986016988754, "step": 5893 }, { "epoch": 0.7968297155024546, "grad_norm": 0.5775308012962341, "learning_rate": 3.3957573056116164e-06, "loss": 0.06050202250480652, "step": 5894 }, { "epoch": 0.7969649088712198, "grad_norm": 0.6010247468948364, "learning_rate": 3.391421511015558e-06, "loss": 0.06660735607147217, "step": 5895 }, { "epoch": 0.7971001022399852, "grad_norm": 0.7866582870483398, "learning_rate": 3.38708813338744e-06, "loss": 0.05013039708137512, "step": 5896 }, { "epoch": 0.7972352956087504, "grad_norm": 0.8188081383705139, "learning_rate": 3.382757173629506e-06, "loss": 0.04270391911268234, "step": 5897 }, { "epoch": 0.7973704889775156, "grad_norm": 0.45585179328918457, "learning_rate": 3.378428632643478e-06, "loss": 0.040556058287620544, "step": 5898 }, { "epoch": 0.7975056823462809, "grad_norm": 0.6447826623916626, "learning_rate": 3.3741025113305825e-06, "loss": 0.040891677141189575, "step": 5899 }, { "epoch": 0.7976408757150462, "grad_norm": 0.5508434176445007, "learning_rate": 3.369778810591541e-06, "loss": 0.05295625329017639, "step": 5900 }, { "epoch": 0.7977760690838115, "grad_norm": 0.38005709648132324, "learning_rate": 3.3654575313265664e-06, "loss": 0.044245921075344086, "step": 5901 }, { "epoch": 0.7979112624525767, "grad_norm": 0.2504396140575409, "learning_rate": 3.361138674435386e-06, "loss": 0.03862909600138664, "step": 5902 }, { "epoch": 0.7980464558213419, "grad_norm": 0.38960644602775574, "learning_rate": 3.35682224081719e-06, "loss": 0.04928061366081238, "step": 5903 }, { "epoch": 0.7981816491901073, "grad_norm": 0.7733510136604309, "learning_rate": 3.352508231370699e-06, "loss": 0.03577747941017151, "step": 5904 }, { "epoch": 0.7983168425588725, "grad_norm": 0.2694184184074402, "learning_rate": 3.3481966469941044e-06, "loss": 0.03993189334869385, "step": 5905 }, { "epoch": 0.7984520359276378, "grad_norm": 0.5073070526123047, "learning_rate": 3.3438874885850984e-06, "loss": 0.06413762271404266, "step": 5906 }, { "epoch": 0.798587229296403, "grad_norm": 0.43924349546432495, "learning_rate": 3.3395807570408847e-06, "loss": 0.04331628605723381, "step": 5907 }, { "epoch": 0.7987224226651682, "grad_norm": 0.41936638951301575, "learning_rate": 3.33527645325813e-06, "loss": 0.049984727054834366, "step": 5908 }, { "epoch": 0.7988576160339336, "grad_norm": 0.3911786675453186, "learning_rate": 3.3309745781330247e-06, "loss": 0.05361741781234741, "step": 5909 }, { "epoch": 0.7989928094026988, "grad_norm": 0.35794228315353394, "learning_rate": 3.32667513256124e-06, "loss": 0.04724178463220596, "step": 5910 }, { "epoch": 0.799128002771464, "grad_norm": 0.36457961797714233, "learning_rate": 3.3223781174379375e-06, "loss": 0.05640942603349686, "step": 5911 }, { "epoch": 0.7992631961402293, "grad_norm": 0.7705474495887756, "learning_rate": 3.3180835336577917e-06, "loss": 0.04085811227560043, "step": 5912 }, { "epoch": 0.7993983895089946, "grad_norm": 0.8283811807632446, "learning_rate": 3.313791382114943e-06, "loss": 0.06525485217571259, "step": 5913 }, { "epoch": 0.7995335828777599, "grad_norm": 0.43810907006263733, "learning_rate": 3.3095016637030505e-06, "loss": 0.05487743392586708, "step": 5914 }, { "epoch": 0.7996687762465251, "grad_norm": 0.5616934895515442, "learning_rate": 3.3052143793152524e-06, "loss": 0.04688303917646408, "step": 5915 }, { "epoch": 0.7998039696152903, "grad_norm": 0.6169284582138062, "learning_rate": 3.3009295298441855e-06, "loss": 0.05722212791442871, "step": 5916 }, { "epoch": 0.7999391629840557, "grad_norm": 0.48905086517333984, "learning_rate": 3.2966471161819767e-06, "loss": 0.05633637309074402, "step": 5917 }, { "epoch": 0.8000743563528209, "grad_norm": 0.36363470554351807, "learning_rate": 3.292367139220246e-06, "loss": 0.05375855416059494, "step": 5918 }, { "epoch": 0.8002095497215862, "grad_norm": 0.37660279870033264, "learning_rate": 3.288089599850112e-06, "loss": 0.04628485441207886, "step": 5919 }, { "epoch": 0.8003447430903514, "grad_norm": 0.4696013331413269, "learning_rate": 3.2838144989621795e-06, "loss": 0.03974946588277817, "step": 5920 }, { "epoch": 0.8004799364591166, "grad_norm": 0.6916917562484741, "learning_rate": 3.2795418374465458e-06, "loss": 0.06132357195019722, "step": 5921 }, { "epoch": 0.800615129827882, "grad_norm": 0.3849853575229645, "learning_rate": 3.275271616192803e-06, "loss": 0.051506709307432175, "step": 5922 }, { "epoch": 0.8007503231966472, "grad_norm": 0.5831172466278076, "learning_rate": 3.2710038360900303e-06, "loss": 0.03692181408405304, "step": 5923 }, { "epoch": 0.8008855165654125, "grad_norm": 0.46334871649742126, "learning_rate": 3.266738498026808e-06, "loss": 0.05871987342834473, "step": 5924 }, { "epoch": 0.8010207099341777, "grad_norm": 0.20020557940006256, "learning_rate": 3.2624756028912005e-06, "loss": 0.03192692995071411, "step": 5925 }, { "epoch": 0.801155903302943, "grad_norm": 0.5319243669509888, "learning_rate": 3.2582151515707655e-06, "loss": 0.05883929133415222, "step": 5926 }, { "epoch": 0.8012910966717083, "grad_norm": 0.8536004424095154, "learning_rate": 3.253957144952551e-06, "loss": 0.04183913394808769, "step": 5927 }, { "epoch": 0.8014262900404735, "grad_norm": 0.32520031929016113, "learning_rate": 3.249701583923091e-06, "loss": 0.05815349891781807, "step": 5928 }, { "epoch": 0.8015614834092388, "grad_norm": 0.4152997136116028, "learning_rate": 3.2454484693684257e-06, "loss": 0.04818055406212807, "step": 5929 }, { "epoch": 0.801696676778004, "grad_norm": 0.5073022842407227, "learning_rate": 3.2411978021740727e-06, "loss": 0.05007811263203621, "step": 5930 }, { "epoch": 0.8018318701467693, "grad_norm": 0.34095102548599243, "learning_rate": 3.2369495832250434e-06, "loss": 0.04873757064342499, "step": 5931 }, { "epoch": 0.8019670635155346, "grad_norm": 1.1944684982299805, "learning_rate": 3.2327038134058378e-06, "loss": 0.06941474229097366, "step": 5932 }, { "epoch": 0.8021022568842998, "grad_norm": 0.3666043281555176, "learning_rate": 3.228460493600446e-06, "loss": 0.04918378219008446, "step": 5933 }, { "epoch": 0.802237450253065, "grad_norm": 0.557352602481842, "learning_rate": 3.2242196246923554e-06, "loss": 0.04282465577125549, "step": 5934 }, { "epoch": 0.8023726436218304, "grad_norm": 0.5829573273658752, "learning_rate": 3.2199812075645375e-06, "loss": 0.062043510377407074, "step": 5935 }, { "epoch": 0.8025078369905956, "grad_norm": 0.8321906924247742, "learning_rate": 3.2157452430994487e-06, "loss": 0.059558261185884476, "step": 5936 }, { "epoch": 0.8026430303593609, "grad_norm": 0.55205899477005, "learning_rate": 3.2115117321790427e-06, "loss": 0.05116770789027214, "step": 5937 }, { "epoch": 0.8027782237281261, "grad_norm": 0.4666525721549988, "learning_rate": 3.207280675684754e-06, "loss": 0.05095399171113968, "step": 5938 }, { "epoch": 0.8029134170968913, "grad_norm": 0.34478697180747986, "learning_rate": 3.203052074497523e-06, "loss": 0.04628527909517288, "step": 5939 }, { "epoch": 0.8030486104656567, "grad_norm": 0.7084249258041382, "learning_rate": 3.198825929497752e-06, "loss": 0.04001776874065399, "step": 5940 }, { "epoch": 0.8031838038344219, "grad_norm": 1.0986882448196411, "learning_rate": 3.194602241565357e-06, "loss": 0.05720539018511772, "step": 5941 }, { "epoch": 0.8033189972031872, "grad_norm": 0.4885752499103546, "learning_rate": 3.1903810115797282e-06, "loss": 0.05838749185204506, "step": 5942 }, { "epoch": 0.8034541905719524, "grad_norm": 1.1779735088348389, "learning_rate": 3.1861622404197475e-06, "loss": 0.06367367506027222, "step": 5943 }, { "epoch": 0.8035893839407177, "grad_norm": 0.7139937281608582, "learning_rate": 3.181945928963794e-06, "loss": 0.049517132341861725, "step": 5944 }, { "epoch": 0.803724577309483, "grad_norm": 0.38674503564834595, "learning_rate": 3.1777320780897124e-06, "loss": 0.05659361183643341, "step": 5945 }, { "epoch": 0.8038597706782482, "grad_norm": 0.6774954795837402, "learning_rate": 3.1735206886748602e-06, "loss": 0.04896688461303711, "step": 5946 }, { "epoch": 0.8039949640470135, "grad_norm": 0.5579783916473389, "learning_rate": 3.1693117615960665e-06, "loss": 0.06495684385299683, "step": 5947 }, { "epoch": 0.8041301574157788, "grad_norm": 0.3195686340332031, "learning_rate": 3.1651052977296537e-06, "loss": 0.03254825621843338, "step": 5948 }, { "epoch": 0.804265350784544, "grad_norm": 0.2509482204914093, "learning_rate": 3.1609012979514273e-06, "loss": 0.038103729486465454, "step": 5949 }, { "epoch": 0.8044005441533093, "grad_norm": 0.41426920890808105, "learning_rate": 3.156699763136683e-06, "loss": 0.03943061828613281, "step": 5950 }, { "epoch": 0.8045357375220745, "grad_norm": 0.36887314915657043, "learning_rate": 3.152500694160207e-06, "loss": 0.05029183626174927, "step": 5951 }, { "epoch": 0.8046709308908399, "grad_norm": 1.0817335844039917, "learning_rate": 3.148304091896265e-06, "loss": 0.05729471892118454, "step": 5952 }, { "epoch": 0.8048061242596051, "grad_norm": 0.4055531322956085, "learning_rate": 3.144109957218612e-06, "loss": 0.050674036145210266, "step": 5953 }, { "epoch": 0.8049413176283703, "grad_norm": 0.6952711343765259, "learning_rate": 3.1399182910004893e-06, "loss": 0.06114784628152847, "step": 5954 }, { "epoch": 0.8050765109971356, "grad_norm": 0.3529430329799652, "learning_rate": 3.1357290941146215e-06, "loss": 0.04761424660682678, "step": 5955 }, { "epoch": 0.8052117043659008, "grad_norm": 1.1649551391601562, "learning_rate": 3.1315423674332265e-06, "loss": 0.06620429456233978, "step": 5956 }, { "epoch": 0.8053468977346662, "grad_norm": 0.6548296809196472, "learning_rate": 3.127358111828002e-06, "loss": 0.055577006191015244, "step": 5957 }, { "epoch": 0.8054820911034314, "grad_norm": 0.39968588948249817, "learning_rate": 3.123176328170131e-06, "loss": 0.05089409649372101, "step": 5958 }, { "epoch": 0.8056172844721966, "grad_norm": 0.19584575295448303, "learning_rate": 3.1189970173302816e-06, "loss": 0.032486554235219955, "step": 5959 }, { "epoch": 0.8057524778409619, "grad_norm": 0.4145294427871704, "learning_rate": 3.1148201801786085e-06, "loss": 0.04014986380934715, "step": 5960 }, { "epoch": 0.8058876712097272, "grad_norm": 0.6185129880905151, "learning_rate": 3.1106458175847572e-06, "loss": 0.05759398639202118, "step": 5961 }, { "epoch": 0.8060228645784924, "grad_norm": 0.3994033932685852, "learning_rate": 3.106473930417848e-06, "loss": 0.039771318435668945, "step": 5962 }, { "epoch": 0.8061580579472577, "grad_norm": 0.44405725598335266, "learning_rate": 3.1023045195464903e-06, "loss": 0.05337781459093094, "step": 5963 }, { "epoch": 0.8062932513160229, "grad_norm": 0.36284634470939636, "learning_rate": 3.098137585838779e-06, "loss": 0.04051637649536133, "step": 5964 }, { "epoch": 0.8064284446847882, "grad_norm": 0.2603057026863098, "learning_rate": 3.093973130162286e-06, "loss": 0.036102838814258575, "step": 5965 }, { "epoch": 0.8065636380535535, "grad_norm": 0.7885052561759949, "learning_rate": 3.089811153384083e-06, "loss": 0.051426973193883896, "step": 5966 }, { "epoch": 0.8066988314223187, "grad_norm": 0.8790342211723328, "learning_rate": 3.08565165637071e-06, "loss": 0.04787473380565643, "step": 5967 }, { "epoch": 0.806834024791084, "grad_norm": 0.40639930963516235, "learning_rate": 3.081494639988196e-06, "loss": 0.055586278438568115, "step": 5968 }, { "epoch": 0.8069692181598492, "grad_norm": 0.7877558469772339, "learning_rate": 3.077340105102057e-06, "loss": 0.03896524757146835, "step": 5969 }, { "epoch": 0.8071044115286146, "grad_norm": 0.2484983652830124, "learning_rate": 3.0731880525772817e-06, "loss": 0.04340030997991562, "step": 5970 }, { "epoch": 0.8072396048973798, "grad_norm": 0.4748263359069824, "learning_rate": 3.069038483278364e-06, "loss": 0.04639517143368721, "step": 5971 }, { "epoch": 0.807374798266145, "grad_norm": 0.8538673520088196, "learning_rate": 3.0648913980692505e-06, "loss": 0.06069638952612877, "step": 5972 }, { "epoch": 0.8075099916349103, "grad_norm": 0.4544968008995056, "learning_rate": 3.0607467978133985e-06, "loss": 0.054083552211523056, "step": 5973 }, { "epoch": 0.8076451850036755, "grad_norm": 0.32060369849205017, "learning_rate": 3.0566046833737294e-06, "loss": 0.04942159727215767, "step": 5974 }, { "epoch": 0.8077803783724409, "grad_norm": 0.4709891974925995, "learning_rate": 3.0524650556126517e-06, "loss": 0.0423416793346405, "step": 5975 }, { "epoch": 0.8079155717412061, "grad_norm": 0.48734596371650696, "learning_rate": 3.048327915392069e-06, "loss": 0.053480833768844604, "step": 5976 }, { "epoch": 0.8080507651099713, "grad_norm": 0.3904436528682709, "learning_rate": 3.044193263573341e-06, "loss": 0.051738277077674866, "step": 5977 }, { "epoch": 0.8081859584787366, "grad_norm": 1.3868236541748047, "learning_rate": 3.0400611010173355e-06, "loss": 0.043892428278923035, "step": 5978 }, { "epoch": 0.8083211518475019, "grad_norm": 0.4235009253025055, "learning_rate": 3.0359314285843863e-06, "loss": 0.04972689598798752, "step": 5979 }, { "epoch": 0.8084563452162672, "grad_norm": 0.3021641969680786, "learning_rate": 3.0318042471343104e-06, "loss": 0.03799622505903244, "step": 5980 }, { "epoch": 0.8085915385850324, "grad_norm": 0.6612793207168579, "learning_rate": 3.027679557526422e-06, "loss": 0.05360596999526024, "step": 5981 }, { "epoch": 0.8087267319537976, "grad_norm": 0.1806853860616684, "learning_rate": 3.0235573606194844e-06, "loss": 0.03642776608467102, "step": 5982 }, { "epoch": 0.808861925322563, "grad_norm": 0.35930144786834717, "learning_rate": 3.0194376572717743e-06, "loss": 0.046458661556243896, "step": 5983 }, { "epoch": 0.8089971186913282, "grad_norm": 0.8379183411598206, "learning_rate": 3.0153204483410318e-06, "loss": 0.035362377762794495, "step": 5984 }, { "epoch": 0.8091323120600935, "grad_norm": 0.6114612221717834, "learning_rate": 3.0112057346844834e-06, "loss": 0.07475194334983826, "step": 5985 }, { "epoch": 0.8092675054288587, "grad_norm": 0.3169575333595276, "learning_rate": 3.007093517158832e-06, "loss": 0.04471041262149811, "step": 5986 }, { "epoch": 0.8094026987976239, "grad_norm": 0.25989773869514465, "learning_rate": 3.002983796620261e-06, "loss": 0.03761632740497589, "step": 5987 }, { "epoch": 0.8095378921663893, "grad_norm": 0.8605539798736572, "learning_rate": 2.9988765739244427e-06, "loss": 0.07086487859487534, "step": 5988 }, { "epoch": 0.8096730855351545, "grad_norm": 0.46825146675109863, "learning_rate": 2.9947718499265197e-06, "loss": 0.0600430965423584, "step": 5989 }, { "epoch": 0.8098082789039197, "grad_norm": 0.43604162335395813, "learning_rate": 2.9906696254811184e-06, "loss": 0.07032966613769531, "step": 5990 }, { "epoch": 0.809943472272685, "grad_norm": 0.7523314356803894, "learning_rate": 2.9865699014423404e-06, "loss": 0.06513407081365585, "step": 5991 }, { "epoch": 0.8100786656414503, "grad_norm": 0.39125683903694153, "learning_rate": 2.9824726786637698e-06, "loss": 0.052056439220905304, "step": 5992 }, { "epoch": 0.8102138590102156, "grad_norm": 0.4359718859195709, "learning_rate": 2.978377957998477e-06, "loss": 0.051102060824632645, "step": 5993 }, { "epoch": 0.8103490523789808, "grad_norm": 0.5977135896682739, "learning_rate": 2.974285740299001e-06, "loss": 0.0511966347694397, "step": 5994 }, { "epoch": 0.810484245747746, "grad_norm": 0.7350088357925415, "learning_rate": 2.9701960264173612e-06, "loss": 0.056402504444122314, "step": 5995 }, { "epoch": 0.8106194391165114, "grad_norm": 0.6118856072425842, "learning_rate": 2.96610881720506e-06, "loss": 0.05361482873558998, "step": 5996 }, { "epoch": 0.8107546324852766, "grad_norm": 0.5564828515052795, "learning_rate": 2.9620241135130715e-06, "loss": 0.06175960600376129, "step": 5997 }, { "epoch": 0.8108898258540419, "grad_norm": 0.9695990085601807, "learning_rate": 2.9579419161918607e-06, "loss": 0.07416443526744843, "step": 5998 }, { "epoch": 0.8110250192228071, "grad_norm": 0.34099122881889343, "learning_rate": 2.9538622260913595e-06, "loss": 0.03728532791137695, "step": 5999 }, { "epoch": 0.8111602125915723, "grad_norm": 0.31995654106140137, "learning_rate": 2.9497850440609814e-06, "loss": 0.05529361963272095, "step": 6000 }, { "epoch": 0.8112954059603377, "grad_norm": 0.37622174620628357, "learning_rate": 2.945710370949616e-06, "loss": 0.032759249210357666, "step": 6001 }, { "epoch": 0.8114305993291029, "grad_norm": 0.4569580554962158, "learning_rate": 2.941638207605629e-06, "loss": 0.05487819015979767, "step": 6002 }, { "epoch": 0.8115657926978682, "grad_norm": 0.470242440700531, "learning_rate": 2.937568554876873e-06, "loss": 0.059556424617767334, "step": 6003 }, { "epoch": 0.8117009860666334, "grad_norm": 0.6070911288261414, "learning_rate": 2.9335014136106704e-06, "loss": 0.07529856264591217, "step": 6004 }, { "epoch": 0.8118361794353987, "grad_norm": 0.4584585130214691, "learning_rate": 2.929436784653818e-06, "loss": 0.04798061400651932, "step": 6005 }, { "epoch": 0.811971372804164, "grad_norm": 0.5598738193511963, "learning_rate": 2.925374668852597e-06, "loss": 0.056647613644599915, "step": 6006 }, { "epoch": 0.8121065661729292, "grad_norm": 0.5855300426483154, "learning_rate": 2.921315067052754e-06, "loss": 0.05834600701928139, "step": 6007 }, { "epoch": 0.8122417595416945, "grad_norm": 0.5238317847251892, "learning_rate": 2.917257980099535e-06, "loss": 0.04600805789232254, "step": 6008 }, { "epoch": 0.8123769529104597, "grad_norm": 1.0761687755584717, "learning_rate": 2.913203408837629e-06, "loss": 0.061650726944208145, "step": 6009 }, { "epoch": 0.812512146279225, "grad_norm": 0.408304899930954, "learning_rate": 2.909151354111232e-06, "loss": 0.0393282026052475, "step": 6010 }, { "epoch": 0.8126473396479903, "grad_norm": 0.7101040482521057, "learning_rate": 2.905101816763998e-06, "loss": 0.05413798987865448, "step": 6011 }, { "epoch": 0.8127825330167555, "grad_norm": 0.5555978417396545, "learning_rate": 2.9010547976390617e-06, "loss": 0.048170894384384155, "step": 6012 }, { "epoch": 0.8129177263855208, "grad_norm": 0.4768838882446289, "learning_rate": 2.897010297579042e-06, "loss": 0.04922903701663017, "step": 6013 }, { "epoch": 0.8130529197542861, "grad_norm": 0.7568203210830688, "learning_rate": 2.8929683174260133e-06, "loss": 0.06784403324127197, "step": 6014 }, { "epoch": 0.8131881131230513, "grad_norm": 0.35972142219543457, "learning_rate": 2.8889288580215467e-06, "loss": 0.05512236803770065, "step": 6015 }, { "epoch": 0.8133233064918166, "grad_norm": 0.41098833084106445, "learning_rate": 2.8848919202066752e-06, "loss": 0.05282828211784363, "step": 6016 }, { "epoch": 0.8134584998605818, "grad_norm": 0.2256694734096527, "learning_rate": 2.8808575048219123e-06, "loss": 0.03642760217189789, "step": 6017 }, { "epoch": 0.813593693229347, "grad_norm": 0.7187584638595581, "learning_rate": 2.8768256127072436e-06, "loss": 0.06820935010910034, "step": 6018 }, { "epoch": 0.8137288865981124, "grad_norm": 0.4291542172431946, "learning_rate": 2.872796244702128e-06, "loss": 0.04829160124063492, "step": 6019 }, { "epoch": 0.8138640799668776, "grad_norm": 0.33461013436317444, "learning_rate": 2.8687694016455075e-06, "loss": 0.056207820773124695, "step": 6020 }, { "epoch": 0.8139992733356429, "grad_norm": 0.6442399024963379, "learning_rate": 2.86474508437579e-06, "loss": 0.07606568932533264, "step": 6021 }, { "epoch": 0.8141344667044081, "grad_norm": 0.47667205333709717, "learning_rate": 2.8607232937308587e-06, "loss": 0.06469475477933884, "step": 6022 }, { "epoch": 0.8142696600731734, "grad_norm": 0.6076067686080933, "learning_rate": 2.856704030548072e-06, "loss": 0.054419368505477905, "step": 6023 }, { "epoch": 0.8144048534419387, "grad_norm": 1.0293875932693481, "learning_rate": 2.8526872956642568e-06, "loss": 0.052383869886398315, "step": 6024 }, { "epoch": 0.8145400468107039, "grad_norm": 0.38018274307250977, "learning_rate": 2.84867308991573e-06, "loss": 0.05584040284156799, "step": 6025 }, { "epoch": 0.8146752401794692, "grad_norm": 0.3009570240974426, "learning_rate": 2.8446614141382638e-06, "loss": 0.04549434781074524, "step": 6026 }, { "epoch": 0.8148104335482345, "grad_norm": 0.6425711512565613, "learning_rate": 2.8406522691671104e-06, "loss": 0.04711001366376877, "step": 6027 }, { "epoch": 0.8149456269169997, "grad_norm": 0.4095251262187958, "learning_rate": 2.8366456558369975e-06, "loss": 0.049842387437820435, "step": 6028 }, { "epoch": 0.815080820285765, "grad_norm": 1.0910245180130005, "learning_rate": 2.8326415749821186e-06, "loss": 0.05321177840232849, "step": 6029 }, { "epoch": 0.8152160136545302, "grad_norm": 0.35466185212135315, "learning_rate": 2.828640027436151e-06, "loss": 0.056677818298339844, "step": 6030 }, { "epoch": 0.8153512070232956, "grad_norm": 0.5476468801498413, "learning_rate": 2.824641014032235e-06, "loss": 0.06131923198699951, "step": 6031 }, { "epoch": 0.8154864003920608, "grad_norm": 0.5465874075889587, "learning_rate": 2.820644535602987e-06, "loss": 0.06629450619220734, "step": 6032 }, { "epoch": 0.815621593760826, "grad_norm": 0.37514084577560425, "learning_rate": 2.8166505929804953e-06, "loss": 0.047612980008125305, "step": 6033 }, { "epoch": 0.8157567871295913, "grad_norm": 0.8173540830612183, "learning_rate": 2.8126591869963163e-06, "loss": 0.04677024483680725, "step": 6034 }, { "epoch": 0.8158919804983565, "grad_norm": 0.8117858171463013, "learning_rate": 2.8086703184814887e-06, "loss": 0.05163833126425743, "step": 6035 }, { "epoch": 0.8160271738671219, "grad_norm": 0.6131458878517151, "learning_rate": 2.8046839882665134e-06, "loss": 0.03967595845460892, "step": 6036 }, { "epoch": 0.8161623672358871, "grad_norm": 0.2959958016872406, "learning_rate": 2.800700197181364e-06, "loss": 0.03668498247861862, "step": 6037 }, { "epoch": 0.8162975606046523, "grad_norm": 0.2307182252407074, "learning_rate": 2.7967189460554876e-06, "loss": 0.03201761469244957, "step": 6038 }, { "epoch": 0.8164327539734176, "grad_norm": 0.4182891249656677, "learning_rate": 2.792740235717801e-06, "loss": 0.04801169037818909, "step": 6039 }, { "epoch": 0.8165679473421829, "grad_norm": 0.8194049000740051, "learning_rate": 2.7887640669967e-06, "loss": 0.0409955233335495, "step": 6040 }, { "epoch": 0.8167031407109482, "grad_norm": 0.2697567641735077, "learning_rate": 2.7847904407200327e-06, "loss": 0.03320390731096268, "step": 6041 }, { "epoch": 0.8168383340797134, "grad_norm": 0.3574306070804596, "learning_rate": 2.7808193577151363e-06, "loss": 0.06256964802742004, "step": 6042 }, { "epoch": 0.8169735274484786, "grad_norm": 0.41344887018203735, "learning_rate": 2.776850818808812e-06, "loss": 0.051427166908979416, "step": 6043 }, { "epoch": 0.817108720817244, "grad_norm": 0.5584813952445984, "learning_rate": 2.772884824827325e-06, "loss": 0.04259277135133743, "step": 6044 }, { "epoch": 0.8172439141860092, "grad_norm": 0.49408358335494995, "learning_rate": 2.768921376596429e-06, "loss": 0.057242006063461304, "step": 6045 }, { "epoch": 0.8173791075547744, "grad_norm": 0.2824747860431671, "learning_rate": 2.7649604749413176e-06, "loss": 0.04084141552448273, "step": 6046 }, { "epoch": 0.8175143009235397, "grad_norm": 0.7278298139572144, "learning_rate": 2.7610021206866837e-06, "loss": 0.058346718549728394, "step": 6047 }, { "epoch": 0.8176494942923049, "grad_norm": 0.27571889758110046, "learning_rate": 2.757046314656676e-06, "loss": 0.04878956824541092, "step": 6048 }, { "epoch": 0.8177846876610703, "grad_norm": 0.3876435458660126, "learning_rate": 2.753093057674909e-06, "loss": 0.046020932495594025, "step": 6049 }, { "epoch": 0.8179198810298355, "grad_norm": 1.0057417154312134, "learning_rate": 2.749142350564483e-06, "loss": 0.07525050640106201, "step": 6050 }, { "epoch": 0.8180550743986007, "grad_norm": 1.0023913383483887, "learning_rate": 2.7451941941479414e-06, "loss": 0.04973903298377991, "step": 6051 }, { "epoch": 0.818190267767366, "grad_norm": 1.013225793838501, "learning_rate": 2.741248589247323e-06, "loss": 0.06751750409603119, "step": 6052 }, { "epoch": 0.8183254611361312, "grad_norm": 0.3252927362918854, "learning_rate": 2.73730553668412e-06, "loss": 0.04444827884435654, "step": 6053 }, { "epoch": 0.8184606545048966, "grad_norm": 0.43519914150238037, "learning_rate": 2.7333650372792978e-06, "loss": 0.05890996754169464, "step": 6054 }, { "epoch": 0.8185958478736618, "grad_norm": 0.2914881706237793, "learning_rate": 2.7294270918532876e-06, "loss": 0.04107041284441948, "step": 6055 }, { "epoch": 0.818731041242427, "grad_norm": 0.5293598175048828, "learning_rate": 2.7254917012259882e-06, "loss": 0.06380148231983185, "step": 6056 }, { "epoch": 0.8188662346111923, "grad_norm": 0.8536359667778015, "learning_rate": 2.721558866216776e-06, "loss": 0.056998737156391144, "step": 6057 }, { "epoch": 0.8190014279799576, "grad_norm": 0.7351745367050171, "learning_rate": 2.7176285876444846e-06, "loss": 0.047584764659404755, "step": 6058 }, { "epoch": 0.8191366213487229, "grad_norm": 0.3175927996635437, "learning_rate": 2.713700866327417e-06, "loss": 0.05166204273700714, "step": 6059 }, { "epoch": 0.8192718147174881, "grad_norm": 0.220992311835289, "learning_rate": 2.7097757030833497e-06, "loss": 0.038770660758018494, "step": 6060 }, { "epoch": 0.8194070080862533, "grad_norm": 0.3946404457092285, "learning_rate": 2.705853098729517e-06, "loss": 0.06518112868070602, "step": 6061 }, { "epoch": 0.8195422014550187, "grad_norm": 0.3855142593383789, "learning_rate": 2.7019330540826325e-06, "loss": 0.045286234468221664, "step": 6062 }, { "epoch": 0.8196773948237839, "grad_norm": 0.35774752497673035, "learning_rate": 2.6980155699588666e-06, "loss": 0.05292356014251709, "step": 6063 }, { "epoch": 0.8198125881925492, "grad_norm": 1.1996122598648071, "learning_rate": 2.6941006471738633e-06, "loss": 0.0711471438407898, "step": 6064 }, { "epoch": 0.8199477815613144, "grad_norm": 0.8823511004447937, "learning_rate": 2.690188286542726e-06, "loss": 0.047625068575143814, "step": 6065 }, { "epoch": 0.8200829749300796, "grad_norm": 0.5733317136764526, "learning_rate": 2.686278488880029e-06, "loss": 0.054057564586400986, "step": 6066 }, { "epoch": 0.820218168298845, "grad_norm": 1.0579018592834473, "learning_rate": 2.6823712549998187e-06, "loss": 0.05836092680692673, "step": 6067 }, { "epoch": 0.8203533616676102, "grad_norm": 0.37968772649765015, "learning_rate": 2.678466585715599e-06, "loss": 0.041866935789585114, "step": 6068 }, { "epoch": 0.8204885550363754, "grad_norm": 0.3310588300228119, "learning_rate": 2.6745644818403426e-06, "loss": 0.05284113064408302, "step": 6069 }, { "epoch": 0.8206237484051407, "grad_norm": 0.6838980317115784, "learning_rate": 2.6706649441864883e-06, "loss": 0.046298783272504807, "step": 6070 }, { "epoch": 0.820758941773906, "grad_norm": 0.5420961976051331, "learning_rate": 2.666767973565937e-06, "loss": 0.04984646290540695, "step": 6071 }, { "epoch": 0.8208941351426713, "grad_norm": 1.0090419054031372, "learning_rate": 2.6628735707900653e-06, "loss": 0.06006111577153206, "step": 6072 }, { "epoch": 0.8210293285114365, "grad_norm": 0.8741933703422546, "learning_rate": 2.658981736669707e-06, "loss": 0.04873526841402054, "step": 6073 }, { "epoch": 0.8211645218802017, "grad_norm": 1.103743314743042, "learning_rate": 2.655092472015161e-06, "loss": 0.06706659495830536, "step": 6074 }, { "epoch": 0.821299715248967, "grad_norm": 0.3980848491191864, "learning_rate": 2.6512057776361935e-06, "loss": 0.0645224079489708, "step": 6075 }, { "epoch": 0.8214349086177323, "grad_norm": 0.5778664946556091, "learning_rate": 2.64732165434203e-06, "loss": 0.04888932406902313, "step": 6076 }, { "epoch": 0.8215701019864976, "grad_norm": 1.1038556098937988, "learning_rate": 2.6434401029413792e-06, "loss": 0.05492448806762695, "step": 6077 }, { "epoch": 0.8217052953552628, "grad_norm": 0.45790040493011475, "learning_rate": 2.639561124242385e-06, "loss": 0.053073152899742126, "step": 6078 }, { "epoch": 0.821840488724028, "grad_norm": 0.4845079183578491, "learning_rate": 2.635684719052682e-06, "loss": 0.0440652072429657, "step": 6079 }, { "epoch": 0.8219756820927934, "grad_norm": 0.5969334244728088, "learning_rate": 2.631810888179355e-06, "loss": 0.06416060030460358, "step": 6080 }, { "epoch": 0.8221108754615586, "grad_norm": 0.46688759326934814, "learning_rate": 2.627939632428952e-06, "loss": 0.038395948708057404, "step": 6081 }, { "epoch": 0.8222460688303239, "grad_norm": 0.8864238858222961, "learning_rate": 2.624070952607502e-06, "loss": 0.04338468983769417, "step": 6082 }, { "epoch": 0.8223812621990891, "grad_norm": 0.7390619516372681, "learning_rate": 2.620204849520468e-06, "loss": 0.06229623034596443, "step": 6083 }, { "epoch": 0.8225164555678544, "grad_norm": 0.5434011816978455, "learning_rate": 2.616341323972806e-06, "loss": 0.04871061444282532, "step": 6084 }, { "epoch": 0.8226516489366197, "grad_norm": 0.4083769917488098, "learning_rate": 2.612480376768917e-06, "loss": 0.033784568309783936, "step": 6085 }, { "epoch": 0.8227868423053849, "grad_norm": 0.4263850748538971, "learning_rate": 2.608622008712672e-06, "loss": 0.05414266884326935, "step": 6086 }, { "epoch": 0.8229220356741502, "grad_norm": 0.5287026166915894, "learning_rate": 2.6047662206074034e-06, "loss": 0.04452309384942055, "step": 6087 }, { "epoch": 0.8230572290429155, "grad_norm": 0.36066341400146484, "learning_rate": 2.600913013255904e-06, "loss": 0.05635611712932587, "step": 6088 }, { "epoch": 0.8231924224116807, "grad_norm": 0.6323385238647461, "learning_rate": 2.59706238746044e-06, "loss": 0.055132389068603516, "step": 6089 }, { "epoch": 0.823327615780446, "grad_norm": 0.2682657241821289, "learning_rate": 2.593214344022725e-06, "loss": 0.028467580676078796, "step": 6090 }, { "epoch": 0.8234628091492112, "grad_norm": 0.6920700669288635, "learning_rate": 2.5893688837439474e-06, "loss": 0.06494811177253723, "step": 6091 }, { "epoch": 0.8235980025179765, "grad_norm": 0.49712565541267395, "learning_rate": 2.5855260074247473e-06, "loss": 0.05038559436798096, "step": 6092 }, { "epoch": 0.8237331958867418, "grad_norm": 0.48757442831993103, "learning_rate": 2.581685715865232e-06, "loss": 0.039091311395168304, "step": 6093 }, { "epoch": 0.823868389255507, "grad_norm": 0.8614658713340759, "learning_rate": 2.5778480098649766e-06, "loss": 0.06300951540470123, "step": 6094 }, { "epoch": 0.8240035826242723, "grad_norm": 0.33780935406684875, "learning_rate": 2.5740128902230087e-06, "loss": 0.03624337911605835, "step": 6095 }, { "epoch": 0.8241387759930375, "grad_norm": 0.42048728466033936, "learning_rate": 2.5701803577378214e-06, "loss": 0.054029226303100586, "step": 6096 }, { "epoch": 0.8242739693618027, "grad_norm": 1.1494966745376587, "learning_rate": 2.566350413207366e-06, "loss": 0.07297086715698242, "step": 6097 }, { "epoch": 0.8244091627305681, "grad_norm": 0.5856238603591919, "learning_rate": 2.5625230574290554e-06, "loss": 0.05555553734302521, "step": 6098 }, { "epoch": 0.8245443560993333, "grad_norm": 0.3982914984226227, "learning_rate": 2.558698291199773e-06, "loss": 0.03345699608325958, "step": 6099 }, { "epoch": 0.8246795494680986, "grad_norm": 0.40843665599823, "learning_rate": 2.5548761153158524e-06, "loss": 0.0372922420501709, "step": 6100 }, { "epoch": 0.8248147428368638, "grad_norm": 0.4422677755355835, "learning_rate": 2.55105653057309e-06, "loss": 0.05058560147881508, "step": 6101 }, { "epoch": 0.8249499362056291, "grad_norm": 0.8284547924995422, "learning_rate": 2.547239537766743e-06, "loss": 0.06245347112417221, "step": 6102 }, { "epoch": 0.8250851295743944, "grad_norm": 0.3782658278942108, "learning_rate": 2.543425137691526e-06, "loss": 0.05809428542852402, "step": 6103 }, { "epoch": 0.8252203229431596, "grad_norm": 0.4869481027126312, "learning_rate": 2.5396133311416264e-06, "loss": 0.03994176536798477, "step": 6104 }, { "epoch": 0.8253555163119249, "grad_norm": 0.7174405455589294, "learning_rate": 2.5358041189106784e-06, "loss": 0.05265609174966812, "step": 6105 }, { "epoch": 0.8254907096806902, "grad_norm": 0.548949658870697, "learning_rate": 2.531997501791779e-06, "loss": 0.053619205951690674, "step": 6106 }, { "epoch": 0.8256259030494554, "grad_norm": 0.6912299394607544, "learning_rate": 2.528193480577489e-06, "loss": 0.037102386355400085, "step": 6107 }, { "epoch": 0.8257610964182207, "grad_norm": 0.5926231145858765, "learning_rate": 2.5243920560598186e-06, "loss": 0.07208763062953949, "step": 6108 }, { "epoch": 0.8258962897869859, "grad_norm": 0.6693443655967712, "learning_rate": 2.5205932290302598e-06, "loss": 0.0516146719455719, "step": 6109 }, { "epoch": 0.8260314831557513, "grad_norm": 0.368770569562912, "learning_rate": 2.516797000279729e-06, "loss": 0.04051916301250458, "step": 6110 }, { "epoch": 0.8261666765245165, "grad_norm": 0.4633229076862335, "learning_rate": 2.513003370598637e-06, "loss": 0.058559875935316086, "step": 6111 }, { "epoch": 0.8263018698932817, "grad_norm": 0.4610815644264221, "learning_rate": 2.509212340776832e-06, "loss": 0.057527586817741394, "step": 6112 }, { "epoch": 0.826437063262047, "grad_norm": 0.30278053879737854, "learning_rate": 2.505423911603622e-06, "loss": 0.03435998409986496, "step": 6113 }, { "epoch": 0.8265722566308122, "grad_norm": 0.5012966990470886, "learning_rate": 2.501638083867789e-06, "loss": 0.042091481387615204, "step": 6114 }, { "epoch": 0.8267074499995776, "grad_norm": 0.6133097410202026, "learning_rate": 2.497854858357552e-06, "loss": 0.051196541637182236, "step": 6115 }, { "epoch": 0.8268426433683428, "grad_norm": 0.6618172526359558, "learning_rate": 2.494074235860604e-06, "loss": 0.05097391456365585, "step": 6116 }, { "epoch": 0.826977836737108, "grad_norm": 0.31498780846595764, "learning_rate": 2.4902962171640913e-06, "loss": 0.04439883306622505, "step": 6117 }, { "epoch": 0.8271130301058733, "grad_norm": 0.9481056928634644, "learning_rate": 2.4865208030546167e-06, "loss": 0.0532616525888443, "step": 6118 }, { "epoch": 0.8272482234746386, "grad_norm": 0.4932495355606079, "learning_rate": 2.482747994318239e-06, "loss": 0.04085560515522957, "step": 6119 }, { "epoch": 0.8273834168434039, "grad_norm": 0.49863311648368835, "learning_rate": 2.478977791740477e-06, "loss": 0.06760723143815994, "step": 6120 }, { "epoch": 0.8275186102121691, "grad_norm": 0.5750812888145447, "learning_rate": 2.475210196106313e-06, "loss": 0.06401936709880829, "step": 6121 }, { "epoch": 0.8276538035809343, "grad_norm": 0.8845735788345337, "learning_rate": 2.4714452082001753e-06, "loss": 0.05270498991012573, "step": 6122 }, { "epoch": 0.8277889969496997, "grad_norm": 0.3728500008583069, "learning_rate": 2.467682828805956e-06, "loss": 0.05247960239648819, "step": 6123 }, { "epoch": 0.8279241903184649, "grad_norm": 1.0556254386901855, "learning_rate": 2.4639230587070017e-06, "loss": 0.06627139449119568, "step": 6124 }, { "epoch": 0.8280593836872301, "grad_norm": 1.4212722778320312, "learning_rate": 2.460165898686114e-06, "loss": 0.07075628638267517, "step": 6125 }, { "epoch": 0.8281945770559954, "grad_norm": 0.450106680393219, "learning_rate": 2.4564113495255597e-06, "loss": 0.052923545241355896, "step": 6126 }, { "epoch": 0.8283297704247606, "grad_norm": 1.2226994037628174, "learning_rate": 2.4526594120070545e-06, "loss": 0.06417983770370483, "step": 6127 }, { "epoch": 0.828464963793526, "grad_norm": 0.6285002827644348, "learning_rate": 2.4489100869117686e-06, "loss": 0.0484108105301857, "step": 6128 }, { "epoch": 0.8286001571622912, "grad_norm": 0.5206980109214783, "learning_rate": 2.4451633750203344e-06, "loss": 0.06516458094120026, "step": 6129 }, { "epoch": 0.8287353505310564, "grad_norm": 0.5448839664459229, "learning_rate": 2.441419277112831e-06, "loss": 0.05673800781369209, "step": 6130 }, { "epoch": 0.8288705438998217, "grad_norm": 0.5945592522621155, "learning_rate": 2.4376777939688107e-06, "loss": 0.05581730604171753, "step": 6131 }, { "epoch": 0.829005737268587, "grad_norm": 0.329216867685318, "learning_rate": 2.4339389263672625e-06, "loss": 0.04142392426729202, "step": 6132 }, { "epoch": 0.8291409306373523, "grad_norm": 0.616303026676178, "learning_rate": 2.4302026750866406e-06, "loss": 0.04276688024401665, "step": 6133 }, { "epoch": 0.8292761240061175, "grad_norm": 0.8358241319656372, "learning_rate": 2.4264690409048517e-06, "loss": 0.05740465223789215, "step": 6134 }, { "epoch": 0.8294113173748827, "grad_norm": 1.3148711919784546, "learning_rate": 2.4227380245992555e-06, "loss": 0.057329535484313965, "step": 6135 }, { "epoch": 0.829546510743648, "grad_norm": 0.5319249033927917, "learning_rate": 2.4190096269466767e-06, "loss": 0.044739410281181335, "step": 6136 }, { "epoch": 0.8296817041124133, "grad_norm": 0.5065696239471436, "learning_rate": 2.415283848723383e-06, "loss": 0.04829120635986328, "step": 6137 }, { "epoch": 0.8298168974811786, "grad_norm": 0.7488718032836914, "learning_rate": 2.411560690705101e-06, "loss": 0.054521623998880386, "step": 6138 }, { "epoch": 0.8299520908499438, "grad_norm": 0.23163874447345734, "learning_rate": 2.4078401536670146e-06, "loss": 0.04687364771962166, "step": 6139 }, { "epoch": 0.830087284218709, "grad_norm": 0.9627823233604431, "learning_rate": 2.4041222383837538e-06, "loss": 0.06781119108200073, "step": 6140 }, { "epoch": 0.8302224775874744, "grad_norm": 2.258822441101074, "learning_rate": 2.400406945629418e-06, "loss": 0.07293403148651123, "step": 6141 }, { "epoch": 0.8303576709562396, "grad_norm": 1.251961588859558, "learning_rate": 2.3966942761775396e-06, "loss": 0.06108919158577919, "step": 6142 }, { "epoch": 0.8304928643250049, "grad_norm": 1.0319828987121582, "learning_rate": 2.3929842308011263e-06, "loss": 0.05969039350748062, "step": 6143 }, { "epoch": 0.8306280576937701, "grad_norm": 0.6500529646873474, "learning_rate": 2.3892768102726236e-06, "loss": 0.03957810252904892, "step": 6144 }, { "epoch": 0.8307632510625353, "grad_norm": 0.4057830274105072, "learning_rate": 2.3855720153639344e-06, "loss": 0.06209491193294525, "step": 6145 }, { "epoch": 0.8308984444313007, "grad_norm": 0.8404145836830139, "learning_rate": 2.381869846846428e-06, "loss": 0.05145217478275299, "step": 6146 }, { "epoch": 0.8310336378000659, "grad_norm": 0.3667839467525482, "learning_rate": 2.3781703054908993e-06, "loss": 0.04141956567764282, "step": 6147 }, { "epoch": 0.8311688311688312, "grad_norm": 0.48730531334877014, "learning_rate": 2.374473392067624e-06, "loss": 0.050588108599185944, "step": 6148 }, { "epoch": 0.8313040245375964, "grad_norm": 0.5241788029670715, "learning_rate": 2.370779107346317e-06, "loss": 0.04770607873797417, "step": 6149 }, { "epoch": 0.8314392179063617, "grad_norm": 0.5279482007026672, "learning_rate": 2.3670874520961437e-06, "loss": 0.055301908403635025, "step": 6150 }, { "epoch": 0.831574411275127, "grad_norm": 0.5519096851348877, "learning_rate": 2.3633984270857367e-06, "loss": 0.04346519708633423, "step": 6151 }, { "epoch": 0.8317096046438922, "grad_norm": 0.3969717025756836, "learning_rate": 2.359712033083156e-06, "loss": 0.0380130298435688, "step": 6152 }, { "epoch": 0.8318447980126574, "grad_norm": 0.34964194893836975, "learning_rate": 2.35602827085594e-06, "loss": 0.04213812202215195, "step": 6153 }, { "epoch": 0.8319799913814228, "grad_norm": 0.6710175275802612, "learning_rate": 2.3523471411710644e-06, "loss": 0.05972909927368164, "step": 6154 }, { "epoch": 0.832115184750188, "grad_norm": 0.28571468591690063, "learning_rate": 2.3486686447949585e-06, "loss": 0.043431565165519714, "step": 6155 }, { "epoch": 0.8322503781189533, "grad_norm": 0.6187041401863098, "learning_rate": 2.3449927824935075e-06, "loss": 0.04155975952744484, "step": 6156 }, { "epoch": 0.8323855714877185, "grad_norm": 0.6864646673202515, "learning_rate": 2.3413195550320393e-06, "loss": 0.047659337520599365, "step": 6157 }, { "epoch": 0.8325207648564837, "grad_norm": 0.5197789072990417, "learning_rate": 2.3376489631753474e-06, "loss": 0.06052897870540619, "step": 6158 }, { "epoch": 0.8326559582252491, "grad_norm": 0.4447621703147888, "learning_rate": 2.3339810076876665e-06, "loss": 0.043431296944618225, "step": 6159 }, { "epoch": 0.8327911515940143, "grad_norm": 1.0718086957931519, "learning_rate": 2.3303156893326815e-06, "loss": 0.0609980933368206, "step": 6160 }, { "epoch": 0.8329263449627796, "grad_norm": 0.37900206446647644, "learning_rate": 2.326653008873535e-06, "loss": 0.04300892353057861, "step": 6161 }, { "epoch": 0.8330615383315448, "grad_norm": 0.49139440059661865, "learning_rate": 2.3229929670728085e-06, "loss": 0.05007103830575943, "step": 6162 }, { "epoch": 0.8331967317003101, "grad_norm": 1.430359959602356, "learning_rate": 2.319335564692554e-06, "loss": 0.05631953850388527, "step": 6163 }, { "epoch": 0.8333319250690754, "grad_norm": 0.28598642349243164, "learning_rate": 2.315680802494256e-06, "loss": 0.056486576795578, "step": 6164 }, { "epoch": 0.8334671184378406, "grad_norm": 0.5973681807518005, "learning_rate": 2.312028681238856e-06, "loss": 0.058661580085754395, "step": 6165 }, { "epoch": 0.8336023118066059, "grad_norm": 0.4279343783855438, "learning_rate": 2.3083792016867434e-06, "loss": 0.04404953122138977, "step": 6166 }, { "epoch": 0.8337375051753712, "grad_norm": 0.3314683139324188, "learning_rate": 2.304732364597759e-06, "loss": 0.0457480326294899, "step": 6167 }, { "epoch": 0.8338726985441364, "grad_norm": 0.2974388301372528, "learning_rate": 2.3010881707311994e-06, "loss": 0.04642082750797272, "step": 6168 }, { "epoch": 0.8340078919129017, "grad_norm": 0.3039360046386719, "learning_rate": 2.2974466208458017e-06, "loss": 0.037277087569236755, "step": 6169 }, { "epoch": 0.8341430852816669, "grad_norm": 0.2541879713535309, "learning_rate": 2.293807715699755e-06, "loss": 0.04402574896812439, "step": 6170 }, { "epoch": 0.8342782786504322, "grad_norm": 1.1918162107467651, "learning_rate": 2.2901714560507e-06, "loss": 0.06334730237722397, "step": 6171 }, { "epoch": 0.8344134720191975, "grad_norm": 0.3717571794986725, "learning_rate": 2.286537842655722e-06, "loss": 0.044879794120788574, "step": 6172 }, { "epoch": 0.8345486653879627, "grad_norm": 0.47871437668800354, "learning_rate": 2.2829068762713633e-06, "loss": 0.05879815295338631, "step": 6173 }, { "epoch": 0.834683858756728, "grad_norm": 0.2037031650543213, "learning_rate": 2.279278557653611e-06, "loss": 0.037516482174396515, "step": 6174 }, { "epoch": 0.8348190521254932, "grad_norm": 0.6012883186340332, "learning_rate": 2.2756528875578965e-06, "loss": 0.042451538145542145, "step": 6175 }, { "epoch": 0.8349542454942585, "grad_norm": 0.6637305617332458, "learning_rate": 2.2720298667391067e-06, "loss": 0.04357580468058586, "step": 6176 }, { "epoch": 0.8350894388630238, "grad_norm": 0.4367183744907379, "learning_rate": 2.268409495951568e-06, "loss": 0.04872041940689087, "step": 6177 }, { "epoch": 0.835224632231789, "grad_norm": 0.7653933167457581, "learning_rate": 2.2647917759490723e-06, "loss": 0.05078501999378204, "step": 6178 }, { "epoch": 0.8353598256005543, "grad_norm": 0.5071462392807007, "learning_rate": 2.261176707484834e-06, "loss": 0.047454267740249634, "step": 6179 }, { "epoch": 0.8354950189693195, "grad_norm": 0.5136466026306152, "learning_rate": 2.2575642913115408e-06, "loss": 0.04026796668767929, "step": 6180 }, { "epoch": 0.8356302123380848, "grad_norm": 0.4026106297969818, "learning_rate": 2.253954528181313e-06, "loss": 0.04408586025238037, "step": 6181 }, { "epoch": 0.8357654057068501, "grad_norm": 1.0115156173706055, "learning_rate": 2.2503474188457206e-06, "loss": 0.04224462807178497, "step": 6182 }, { "epoch": 0.8359005990756153, "grad_norm": 0.7170173525810242, "learning_rate": 2.2467429640557903e-06, "loss": 0.04062668979167938, "step": 6183 }, { "epoch": 0.8360357924443806, "grad_norm": 0.32150521874427795, "learning_rate": 2.2431411645619776e-06, "loss": 0.04180286452174187, "step": 6184 }, { "epoch": 0.8361709858131459, "grad_norm": 0.9162803292274475, "learning_rate": 2.239542021114205e-06, "loss": 0.05110998451709747, "step": 6185 }, { "epoch": 0.8363061791819111, "grad_norm": 0.691806435585022, "learning_rate": 2.2359455344618306e-06, "loss": 0.0493222177028656, "step": 6186 }, { "epoch": 0.8364413725506764, "grad_norm": 0.31305885314941406, "learning_rate": 2.232351705353663e-06, "loss": 0.051948729902505875, "step": 6187 }, { "epoch": 0.8365765659194416, "grad_norm": 0.26568931341171265, "learning_rate": 2.228760534537955e-06, "loss": 0.035367682576179504, "step": 6188 }, { "epoch": 0.836711759288207, "grad_norm": 0.3882652521133423, "learning_rate": 2.2251720227624044e-06, "loss": 0.051023002713918686, "step": 6189 }, { "epoch": 0.8368469526569722, "grad_norm": 0.4140540063381195, "learning_rate": 2.2215861707741666e-06, "loss": 0.03535125404596329, "step": 6190 }, { "epoch": 0.8369821460257374, "grad_norm": 0.28733938932418823, "learning_rate": 2.2180029793198313e-06, "loss": 0.042948298156261444, "step": 6191 }, { "epoch": 0.8371173393945027, "grad_norm": 0.3373543620109558, "learning_rate": 2.2144224491454363e-06, "loss": 0.051349759101867676, "step": 6192 }, { "epoch": 0.8372525327632679, "grad_norm": 0.35262036323547363, "learning_rate": 2.2108445809964695e-06, "loss": 0.04738650470972061, "step": 6193 }, { "epoch": 0.8373877261320333, "grad_norm": 0.3689965605735779, "learning_rate": 2.2072693756178567e-06, "loss": 0.042664796113967896, "step": 6194 }, { "epoch": 0.8375229195007985, "grad_norm": 0.9979879856109619, "learning_rate": 2.203696833753983e-06, "loss": 0.05222224444150925, "step": 6195 }, { "epoch": 0.8376581128695637, "grad_norm": 0.35647955536842346, "learning_rate": 2.200126956148668e-06, "loss": 0.04531312361359596, "step": 6196 }, { "epoch": 0.837793306238329, "grad_norm": 0.357084184885025, "learning_rate": 2.196559743545177e-06, "loss": 0.046202752739191055, "step": 6197 }, { "epoch": 0.8379284996070943, "grad_norm": 1.526036262512207, "learning_rate": 2.1929951966862233e-06, "loss": 0.07071888446807861, "step": 6198 }, { "epoch": 0.8380636929758596, "grad_norm": 0.3560903072357178, "learning_rate": 2.1894333163139607e-06, "loss": 0.031010091304779053, "step": 6199 }, { "epoch": 0.8381988863446248, "grad_norm": 0.2677477300167084, "learning_rate": 2.1858741031700015e-06, "loss": 0.03647442162036896, "step": 6200 }, { "epoch": 0.83833407971339, "grad_norm": 1.0474574565887451, "learning_rate": 2.1823175579953856e-06, "loss": 0.07177795469760895, "step": 6201 }, { "epoch": 0.8384692730821554, "grad_norm": 1.179814338684082, "learning_rate": 2.1787636815306065e-06, "loss": 0.05104741454124451, "step": 6202 }, { "epoch": 0.8386044664509206, "grad_norm": 0.4671471118927002, "learning_rate": 2.1752124745156005e-06, "loss": 0.042914584279060364, "step": 6203 }, { "epoch": 0.8387396598196858, "grad_norm": 0.37799084186553955, "learning_rate": 2.171663937689744e-06, "loss": 0.03742415830492973, "step": 6204 }, { "epoch": 0.8388748531884511, "grad_norm": 0.3980090320110321, "learning_rate": 2.168118071791868e-06, "loss": 0.059833161532878876, "step": 6205 }, { "epoch": 0.8390100465572163, "grad_norm": 0.6616215705871582, "learning_rate": 2.164574877560237e-06, "loss": 0.04415488988161087, "step": 6206 }, { "epoch": 0.8391452399259817, "grad_norm": 0.22867345809936523, "learning_rate": 2.161034355732564e-06, "loss": 0.037374675273895264, "step": 6207 }, { "epoch": 0.8392804332947469, "grad_norm": 0.7260293364524841, "learning_rate": 2.1574965070460047e-06, "loss": 0.032028086483478546, "step": 6208 }, { "epoch": 0.8394156266635121, "grad_norm": 0.36259400844573975, "learning_rate": 2.1539613322371527e-06, "loss": 0.04549260810017586, "step": 6209 }, { "epoch": 0.8395508200322774, "grad_norm": 0.4727924168109894, "learning_rate": 2.1504288320420613e-06, "loss": 0.03847412019968033, "step": 6210 }, { "epoch": 0.8396860134010427, "grad_norm": 1.1600937843322754, "learning_rate": 2.1468990071962038e-06, "loss": 0.0547519326210022, "step": 6211 }, { "epoch": 0.839821206769808, "grad_norm": 0.4148150682449341, "learning_rate": 2.143371858434515e-06, "loss": 0.04619329050183296, "step": 6212 }, { "epoch": 0.8399564001385732, "grad_norm": 0.7472928762435913, "learning_rate": 2.139847386491367e-06, "loss": 0.03591097146272659, "step": 6213 }, { "epoch": 0.8400915935073384, "grad_norm": 0.4984283745288849, "learning_rate": 2.1363255921005685e-06, "loss": 0.057893648743629456, "step": 6214 }, { "epoch": 0.8402267868761037, "grad_norm": 0.4796014130115509, "learning_rate": 2.1328064759953853e-06, "loss": 0.05514446645975113, "step": 6215 }, { "epoch": 0.840361980244869, "grad_norm": 0.6901918053627014, "learning_rate": 2.129290038908504e-06, "loss": 0.03864287957549095, "step": 6216 }, { "epoch": 0.8404971736136343, "grad_norm": 0.3759935200214386, "learning_rate": 2.1257762815720745e-06, "loss": 0.05031343176960945, "step": 6217 }, { "epoch": 0.8406323669823995, "grad_norm": 0.801692008972168, "learning_rate": 2.122265204717678e-06, "loss": 0.05881436914205551, "step": 6218 }, { "epoch": 0.8407675603511647, "grad_norm": 0.7840434908866882, "learning_rate": 2.1187568090763328e-06, "loss": 0.07795971632003784, "step": 6219 }, { "epoch": 0.8409027537199301, "grad_norm": 0.6142735481262207, "learning_rate": 2.1152510953785196e-06, "loss": 0.049477964639663696, "step": 6220 }, { "epoch": 0.8410379470886953, "grad_norm": 0.4441145658493042, "learning_rate": 2.1117480643541304e-06, "loss": 0.05546906590461731, "step": 6221 }, { "epoch": 0.8411731404574606, "grad_norm": 0.578981339931488, "learning_rate": 2.1082477167325275e-06, "loss": 0.050517674535512924, "step": 6222 }, { "epoch": 0.8413083338262258, "grad_norm": 0.5397024154663086, "learning_rate": 2.1047500532424968e-06, "loss": 0.05547235906124115, "step": 6223 }, { "epoch": 0.841443527194991, "grad_norm": 1.2395414113998413, "learning_rate": 2.1012550746122705e-06, "loss": 0.056867048144340515, "step": 6224 }, { "epoch": 0.8415787205637564, "grad_norm": 0.7233183979988098, "learning_rate": 2.0977627815695217e-06, "loss": 0.054838210344314575, "step": 6225 }, { "epoch": 0.8417139139325216, "grad_norm": 0.7362427115440369, "learning_rate": 2.094273174841362e-06, "loss": 0.05307919532060623, "step": 6226 }, { "epoch": 0.8418491073012869, "grad_norm": 0.5328913927078247, "learning_rate": 2.0907862551543516e-06, "loss": 0.043626755475997925, "step": 6227 }, { "epoch": 0.8419843006700521, "grad_norm": 0.2717840373516083, "learning_rate": 2.087302023234485e-06, "loss": 0.037222929298877716, "step": 6228 }, { "epoch": 0.8421194940388174, "grad_norm": 0.26456549763679504, "learning_rate": 2.083820479807194e-06, "loss": 0.03553616255521774, "step": 6229 }, { "epoch": 0.8422546874075827, "grad_norm": 0.36989766359329224, "learning_rate": 2.0803416255973585e-06, "loss": 0.041608355939388275, "step": 6230 }, { "epoch": 0.8423898807763479, "grad_norm": 0.3028639853000641, "learning_rate": 2.0768654613292887e-06, "loss": 0.05160258710384369, "step": 6231 }, { "epoch": 0.8425250741451131, "grad_norm": 0.9835982918739319, "learning_rate": 2.0733919877267477e-06, "loss": 0.045955684036016464, "step": 6232 }, { "epoch": 0.8426602675138785, "grad_norm": 0.4995516240596771, "learning_rate": 2.0699212055129268e-06, "loss": 0.048937439918518066, "step": 6233 }, { "epoch": 0.8427954608826437, "grad_norm": 0.32965973019599915, "learning_rate": 2.066453115410463e-06, "loss": 0.041056472808122635, "step": 6234 }, { "epoch": 0.842930654251409, "grad_norm": 0.6493692398071289, "learning_rate": 2.062987718141431e-06, "loss": 0.046764492988586426, "step": 6235 }, { "epoch": 0.8430658476201742, "grad_norm": 0.3514888286590576, "learning_rate": 2.0595250144273423e-06, "loss": 0.03547742962837219, "step": 6236 }, { "epoch": 0.8432010409889394, "grad_norm": 0.2799179255962372, "learning_rate": 2.056065004989155e-06, "loss": 0.04868411645293236, "step": 6237 }, { "epoch": 0.8433362343577048, "grad_norm": 0.4646119475364685, "learning_rate": 2.0526076905472585e-06, "loss": 0.05488266050815582, "step": 6238 }, { "epoch": 0.84347142772647, "grad_norm": 0.7049408555030823, "learning_rate": 2.0491530718214855e-06, "loss": 0.04533543437719345, "step": 6239 }, { "epoch": 0.8436066210952353, "grad_norm": 0.25155574083328247, "learning_rate": 2.0457011495311045e-06, "loss": 0.05469845235347748, "step": 6240 }, { "epoch": 0.8437418144640005, "grad_norm": 0.5906786322593689, "learning_rate": 2.0422519243948232e-06, "loss": 0.07562047243118286, "step": 6241 }, { "epoch": 0.8438770078327658, "grad_norm": 0.6755495071411133, "learning_rate": 2.0388053971307927e-06, "loss": 0.03171085566282272, "step": 6242 }, { "epoch": 0.8440122012015311, "grad_norm": 0.6171776652336121, "learning_rate": 2.0353615684565956e-06, "loss": 0.06633298099040985, "step": 6243 }, { "epoch": 0.8441473945702963, "grad_norm": 0.5486334562301636, "learning_rate": 2.0319204390892566e-06, "loss": 0.06237971782684326, "step": 6244 }, { "epoch": 0.8442825879390616, "grad_norm": 0.7356615662574768, "learning_rate": 2.0284820097452374e-06, "loss": 0.06647787988185883, "step": 6245 }, { "epoch": 0.8444177813078269, "grad_norm": 0.34131887555122375, "learning_rate": 2.02504628114043e-06, "loss": 0.03561331331729889, "step": 6246 }, { "epoch": 0.8445529746765921, "grad_norm": 0.6534212231636047, "learning_rate": 2.0216132539901865e-06, "loss": 0.054908670485019684, "step": 6247 }, { "epoch": 0.8446881680453574, "grad_norm": 0.4225394129753113, "learning_rate": 2.0181829290092663e-06, "loss": 0.0484328418970108, "step": 6248 }, { "epoch": 0.8448233614141226, "grad_norm": 0.4311065375804901, "learning_rate": 2.014755306911891e-06, "loss": 0.07305488735437393, "step": 6249 }, { "epoch": 0.844958554782888, "grad_norm": 1.0475775003433228, "learning_rate": 2.0113303884117057e-06, "loss": 0.049315690994262695, "step": 6250 }, { "epoch": 0.8450937481516532, "grad_norm": 0.3791733980178833, "learning_rate": 2.0079081742217957e-06, "loss": 0.05666741728782654, "step": 6251 }, { "epoch": 0.8452289415204184, "grad_norm": 0.4025326371192932, "learning_rate": 2.0044886650546915e-06, "loss": 0.04880619794130325, "step": 6252 }, { "epoch": 0.8453641348891837, "grad_norm": 0.450590044260025, "learning_rate": 2.0010718616223406e-06, "loss": 0.06794996559619904, "step": 6253 }, { "epoch": 0.8454993282579489, "grad_norm": 0.537303626537323, "learning_rate": 1.9976577646361514e-06, "loss": 0.05239599943161011, "step": 6254 }, { "epoch": 0.8456345216267143, "grad_norm": 0.4125227928161621, "learning_rate": 1.994246374806953e-06, "loss": 0.04312615841627121, "step": 6255 }, { "epoch": 0.8457697149954795, "grad_norm": 0.41425222158432007, "learning_rate": 1.9908376928450128e-06, "loss": 0.036222536116838455, "step": 6256 }, { "epoch": 0.8459049083642447, "grad_norm": 0.43170732259750366, "learning_rate": 1.987431719460039e-06, "loss": 0.04883016273379326, "step": 6257 }, { "epoch": 0.84604010173301, "grad_norm": 0.2581452429294586, "learning_rate": 1.9840284553611706e-06, "loss": 0.04100232198834419, "step": 6258 }, { "epoch": 0.8461752951017752, "grad_norm": 0.7356533408164978, "learning_rate": 1.980627901256989e-06, "loss": 0.062110066413879395, "step": 6259 }, { "epoch": 0.8463104884705405, "grad_norm": 0.6201856136322021, "learning_rate": 1.9772300578555062e-06, "loss": 0.03643447533249855, "step": 6260 }, { "epoch": 0.8464456818393058, "grad_norm": 0.47453224658966064, "learning_rate": 1.973834925864172e-06, "loss": 0.04004233330488205, "step": 6261 }, { "epoch": 0.846580875208071, "grad_norm": 0.9696336388587952, "learning_rate": 1.97044250598987e-06, "loss": 0.050774574279785156, "step": 6262 }, { "epoch": 0.8467160685768363, "grad_norm": 0.8261301517486572, "learning_rate": 1.9670527989389177e-06, "loss": 0.05598049238324165, "step": 6263 }, { "epoch": 0.8468512619456016, "grad_norm": 0.38730388879776, "learning_rate": 1.9636658054170747e-06, "loss": 0.055252887308597565, "step": 6264 }, { "epoch": 0.8469864553143668, "grad_norm": 0.36259526014328003, "learning_rate": 1.960281526129531e-06, "loss": 0.04671737551689148, "step": 6265 }, { "epoch": 0.8471216486831321, "grad_norm": 0.9208915829658508, "learning_rate": 1.9568999617809077e-06, "loss": 0.06472264230251312, "step": 6266 }, { "epoch": 0.8472568420518973, "grad_norm": 0.46292293071746826, "learning_rate": 1.9535211130752676e-06, "loss": 0.04803779721260071, "step": 6267 }, { "epoch": 0.8473920354206627, "grad_norm": 0.6515359878540039, "learning_rate": 1.950144980716101e-06, "loss": 0.06567519903182983, "step": 6268 }, { "epoch": 0.8475272287894279, "grad_norm": 1.0845727920532227, "learning_rate": 1.9467715654063444e-06, "loss": 0.058678675442934036, "step": 6269 }, { "epoch": 0.8476624221581931, "grad_norm": 0.4048415720462799, "learning_rate": 1.9434008678483532e-06, "loss": 0.049621663987636566, "step": 6270 }, { "epoch": 0.8477976155269584, "grad_norm": 0.17586331069469452, "learning_rate": 1.9400328887439295e-06, "loss": 0.027401769533753395, "step": 6271 }, { "epoch": 0.8479328088957236, "grad_norm": 0.6511488556861877, "learning_rate": 1.9366676287943038e-06, "loss": 0.07533954828977585, "step": 6272 }, { "epoch": 0.848068002264489, "grad_norm": 1.032747745513916, "learning_rate": 1.9333050887001337e-06, "loss": 0.05825965106487274, "step": 6273 }, { "epoch": 0.8482031956332542, "grad_norm": 0.867613673210144, "learning_rate": 1.9299452691615293e-06, "loss": 0.07221711426973343, "step": 6274 }, { "epoch": 0.8483383890020194, "grad_norm": 0.1599804013967514, "learning_rate": 1.9265881708780182e-06, "loss": 0.03129618614912033, "step": 6275 }, { "epoch": 0.8484735823707847, "grad_norm": 0.45775869488716125, "learning_rate": 1.9232337945485657e-06, "loss": 0.04149597883224487, "step": 6276 }, { "epoch": 0.84860877573955, "grad_norm": 0.2641725242137909, "learning_rate": 1.91988214087157e-06, "loss": 0.04288870468735695, "step": 6277 }, { "epoch": 0.8487439691083153, "grad_norm": 0.5147126913070679, "learning_rate": 1.9165332105448613e-06, "loss": 0.04254014417529106, "step": 6278 }, { "epoch": 0.8488791624770805, "grad_norm": 0.8537400364875793, "learning_rate": 1.913187004265715e-06, "loss": 0.0676904171705246, "step": 6279 }, { "epoch": 0.8490143558458457, "grad_norm": 0.6239067912101746, "learning_rate": 1.909843522730814e-06, "loss": 0.03847206383943558, "step": 6280 }, { "epoch": 0.849149549214611, "grad_norm": 0.33657991886138916, "learning_rate": 1.9065027666363017e-06, "loss": 0.031904011964797974, "step": 6281 }, { "epoch": 0.8492847425833763, "grad_norm": 1.0744355916976929, "learning_rate": 1.903164736677736e-06, "loss": 0.061805665493011475, "step": 6282 }, { "epoch": 0.8494199359521416, "grad_norm": 0.32329660654067993, "learning_rate": 1.8998294335501082e-06, "loss": 0.05151509493589401, "step": 6283 }, { "epoch": 0.8495551293209068, "grad_norm": 0.6502349376678467, "learning_rate": 1.8964968579478592e-06, "loss": 0.056619733572006226, "step": 6284 }, { "epoch": 0.849690322689672, "grad_norm": 0.845119297504425, "learning_rate": 1.893167010564834e-06, "loss": 0.04350561648607254, "step": 6285 }, { "epoch": 0.8498255160584374, "grad_norm": 0.26181334257125854, "learning_rate": 1.8898398920943349e-06, "loss": 0.04260338097810745, "step": 6286 }, { "epoch": 0.8499607094272026, "grad_norm": 0.5554114580154419, "learning_rate": 1.886515503229081e-06, "loss": 0.04976817965507507, "step": 6287 }, { "epoch": 0.8500959027959678, "grad_norm": 0.38692641258239746, "learning_rate": 1.8831938446612269e-06, "loss": 0.03918194770812988, "step": 6288 }, { "epoch": 0.8502310961647331, "grad_norm": 1.0803706645965576, "learning_rate": 1.8798749170823676e-06, "loss": 0.04637579619884491, "step": 6289 }, { "epoch": 0.8503662895334984, "grad_norm": 0.4428040683269501, "learning_rate": 1.8765587211835089e-06, "loss": 0.05340223386883736, "step": 6290 }, { "epoch": 0.8505014829022637, "grad_norm": 0.47322025895118713, "learning_rate": 1.8732452576551102e-06, "loss": 0.05617991089820862, "step": 6291 }, { "epoch": 0.8506366762710289, "grad_norm": 0.30985206365585327, "learning_rate": 1.8699345271870493e-06, "loss": 0.03756796196103096, "step": 6292 }, { "epoch": 0.8507718696397941, "grad_norm": 0.7566153407096863, "learning_rate": 1.8666265304686387e-06, "loss": 0.06445176899433136, "step": 6293 }, { "epoch": 0.8509070630085594, "grad_norm": 0.530719518661499, "learning_rate": 1.8633212681886203e-06, "loss": 0.05478502810001373, "step": 6294 }, { "epoch": 0.8510422563773247, "grad_norm": 0.34763604402542114, "learning_rate": 1.8600187410351621e-06, "loss": 0.04491907358169556, "step": 6295 }, { "epoch": 0.85117744974609, "grad_norm": 0.35831430554389954, "learning_rate": 1.8567189496958776e-06, "loss": 0.047058895230293274, "step": 6296 }, { "epoch": 0.8513126431148552, "grad_norm": 0.33181583881378174, "learning_rate": 1.853421894857797e-06, "loss": 0.04101666063070297, "step": 6297 }, { "epoch": 0.8514478364836204, "grad_norm": 0.6222859621047974, "learning_rate": 1.8501275772073827e-06, "loss": 0.05943810194730759, "step": 6298 }, { "epoch": 0.8515830298523858, "grad_norm": 0.4641937017440796, "learning_rate": 1.8468359974305315e-06, "loss": 0.045994631946086884, "step": 6299 }, { "epoch": 0.851718223221151, "grad_norm": 0.4015611708164215, "learning_rate": 1.8435471562125633e-06, "loss": 0.05843416973948479, "step": 6300 }, { "epoch": 0.8518534165899163, "grad_norm": 0.35173070430755615, "learning_rate": 1.8402610542382386e-06, "loss": 0.051927611231803894, "step": 6301 }, { "epoch": 0.8519886099586815, "grad_norm": 0.8363332152366638, "learning_rate": 1.836977692191742e-06, "loss": 0.056589026004076004, "step": 6302 }, { "epoch": 0.8521238033274467, "grad_norm": 0.3716745674610138, "learning_rate": 1.8336970707566781e-06, "loss": 0.03616643697023392, "step": 6303 }, { "epoch": 0.8522589966962121, "grad_norm": 0.5818215012550354, "learning_rate": 1.8304191906160973e-06, "loss": 0.047216322273015976, "step": 6304 }, { "epoch": 0.8523941900649773, "grad_norm": 0.7553380131721497, "learning_rate": 1.8271440524524668e-06, "loss": 0.06350641697645187, "step": 6305 }, { "epoch": 0.8525293834337426, "grad_norm": 0.6106654405593872, "learning_rate": 1.8238716569476949e-06, "loss": 0.039121419191360474, "step": 6306 }, { "epoch": 0.8526645768025078, "grad_norm": 0.5820634961128235, "learning_rate": 1.8206020047831078e-06, "loss": 0.05436358228325844, "step": 6307 }, { "epoch": 0.8527997701712731, "grad_norm": 0.4586799442768097, "learning_rate": 1.8173350966394648e-06, "loss": 0.048718370497226715, "step": 6308 }, { "epoch": 0.8529349635400384, "grad_norm": 0.614192545413971, "learning_rate": 1.8140709331969513e-06, "loss": 0.06158027797937393, "step": 6309 }, { "epoch": 0.8530701569088036, "grad_norm": 0.6466459631919861, "learning_rate": 1.810809515135184e-06, "loss": 0.05154663324356079, "step": 6310 }, { "epoch": 0.8532053502775688, "grad_norm": 0.29418498277664185, "learning_rate": 1.8075508431332111e-06, "loss": 0.036776103079319, "step": 6311 }, { "epoch": 0.8533405436463342, "grad_norm": 0.29602229595184326, "learning_rate": 1.8042949178695034e-06, "loss": 0.03308059275150299, "step": 6312 }, { "epoch": 0.8534757370150994, "grad_norm": 0.7528901696205139, "learning_rate": 1.8010417400219636e-06, "loss": 0.057235248386859894, "step": 6313 }, { "epoch": 0.8536109303838647, "grad_norm": 1.0960278511047363, "learning_rate": 1.7977913102679167e-06, "loss": 0.042923420667648315, "step": 6314 }, { "epoch": 0.8537461237526299, "grad_norm": 0.6818878054618835, "learning_rate": 1.7945436292841193e-06, "loss": 0.06785756349563599, "step": 6315 }, { "epoch": 0.8538813171213951, "grad_norm": 0.2882588505744934, "learning_rate": 1.791298697746766e-06, "loss": 0.05655911564826965, "step": 6316 }, { "epoch": 0.8540165104901605, "grad_norm": 0.4256933927536011, "learning_rate": 1.7880565163314545e-06, "loss": 0.04764436185359955, "step": 6317 }, { "epoch": 0.8541517038589257, "grad_norm": 0.6672478318214417, "learning_rate": 1.784817085713233e-06, "loss": 0.04730550944805145, "step": 6318 }, { "epoch": 0.854286897227691, "grad_norm": 0.5017374753952026, "learning_rate": 1.7815804065665669e-06, "loss": 0.0507778599858284, "step": 6319 }, { "epoch": 0.8544220905964562, "grad_norm": 0.46741950511932373, "learning_rate": 1.778346479565346e-06, "loss": 0.05671544373035431, "step": 6320 }, { "epoch": 0.8545572839652215, "grad_norm": 1.0150688886642456, "learning_rate": 1.7751153053829011e-06, "loss": 0.06213996559381485, "step": 6321 }, { "epoch": 0.8546924773339868, "grad_norm": 0.5143606662750244, "learning_rate": 1.7718868846919662e-06, "loss": 0.06776639074087143, "step": 6322 }, { "epoch": 0.854827670702752, "grad_norm": 1.093859314918518, "learning_rate": 1.7686612181647266e-06, "loss": 0.052836500108242035, "step": 6323 }, { "epoch": 0.8549628640715173, "grad_norm": 0.9550508260726929, "learning_rate": 1.7654383064727802e-06, "loss": 0.04823852330446243, "step": 6324 }, { "epoch": 0.8550980574402826, "grad_norm": 0.7983924150466919, "learning_rate": 1.762218150287152e-06, "loss": 0.05271138250827789, "step": 6325 }, { "epoch": 0.8552332508090478, "grad_norm": 0.7791218757629395, "learning_rate": 1.759000750278299e-06, "loss": 0.042531318962574005, "step": 6326 }, { "epoch": 0.8553684441778131, "grad_norm": 0.6210097670555115, "learning_rate": 1.7557861071160953e-06, "loss": 0.05496615171432495, "step": 6327 }, { "epoch": 0.8555036375465783, "grad_norm": 0.20374125242233276, "learning_rate": 1.7525742214698538e-06, "loss": 0.03397899866104126, "step": 6328 }, { "epoch": 0.8556388309153437, "grad_norm": 0.5363911390304565, "learning_rate": 1.7493650940083045e-06, "loss": 0.04610375314950943, "step": 6329 }, { "epoch": 0.8557740242841089, "grad_norm": 1.0519609451293945, "learning_rate": 1.746158725399603e-06, "loss": 0.04173114895820618, "step": 6330 }, { "epoch": 0.8559092176528741, "grad_norm": 0.9720075130462646, "learning_rate": 1.7429551163113322e-06, "loss": 0.05323171615600586, "step": 6331 }, { "epoch": 0.8560444110216394, "grad_norm": 0.5926998853683472, "learning_rate": 1.7397542674105e-06, "loss": 0.06275883316993713, "step": 6332 }, { "epoch": 0.8561796043904046, "grad_norm": 0.4590113162994385, "learning_rate": 1.7365561793635431e-06, "loss": 0.04344320297241211, "step": 6333 }, { "epoch": 0.85631479775917, "grad_norm": 0.28174513578414917, "learning_rate": 1.7333608528363227e-06, "loss": 0.04860040917992592, "step": 6334 }, { "epoch": 0.8564499911279352, "grad_norm": 0.5829482674598694, "learning_rate": 1.7301682884941128e-06, "loss": 0.05380423367023468, "step": 6335 }, { "epoch": 0.8565851844967004, "grad_norm": 0.42195332050323486, "learning_rate": 1.726978487001632e-06, "loss": 0.04403732717037201, "step": 6336 }, { "epoch": 0.8567203778654657, "grad_norm": 0.3213565945625305, "learning_rate": 1.7237914490230072e-06, "loss": 0.03615276515483856, "step": 6337 }, { "epoch": 0.856855571234231, "grad_norm": 1.1198323965072632, "learning_rate": 1.7206071752218027e-06, "loss": 0.07081387937068939, "step": 6338 }, { "epoch": 0.8569907646029962, "grad_norm": 0.3278871774673462, "learning_rate": 1.7174256662610032e-06, "loss": 0.05254681408405304, "step": 6339 }, { "epoch": 0.8571259579717615, "grad_norm": 0.5900607705116272, "learning_rate": 1.714246922803004e-06, "loss": 0.06021782010793686, "step": 6340 }, { "epoch": 0.8572611513405267, "grad_norm": 0.4513183832168579, "learning_rate": 1.7110709455096468e-06, "loss": 0.049845121800899506, "step": 6341 }, { "epoch": 0.857396344709292, "grad_norm": 0.28617408871650696, "learning_rate": 1.7078977350421815e-06, "loss": 0.044528208673000336, "step": 6342 }, { "epoch": 0.8575315380780573, "grad_norm": 0.6556611657142639, "learning_rate": 1.7047272920612926e-06, "loss": 0.038710109889507294, "step": 6343 }, { "epoch": 0.8576667314468225, "grad_norm": 0.3762466311454773, "learning_rate": 1.7015596172270841e-06, "loss": 0.03848453238606453, "step": 6344 }, { "epoch": 0.8578019248155878, "grad_norm": 0.47321709990501404, "learning_rate": 1.6983947111990717e-06, "loss": 0.04214465618133545, "step": 6345 }, { "epoch": 0.857937118184353, "grad_norm": 0.35063162446022034, "learning_rate": 1.695232574636218e-06, "loss": 0.051957957446575165, "step": 6346 }, { "epoch": 0.8580723115531184, "grad_norm": 0.3668923079967499, "learning_rate": 1.6920732081968882e-06, "loss": 0.05051729083061218, "step": 6347 }, { "epoch": 0.8582075049218836, "grad_norm": 0.7809852361679077, "learning_rate": 1.6889166125388878e-06, "loss": 0.04531104117631912, "step": 6348 }, { "epoch": 0.8583426982906488, "grad_norm": 0.735115110874176, "learning_rate": 1.6857627883194277e-06, "loss": 0.05323927849531174, "step": 6349 }, { "epoch": 0.8584778916594141, "grad_norm": 0.2956749200820923, "learning_rate": 1.6826117361951577e-06, "loss": 0.046146489679813385, "step": 6350 }, { "epoch": 0.8586130850281793, "grad_norm": 0.6716263294219971, "learning_rate": 1.6794634568221412e-06, "loss": 0.07926372438669205, "step": 6351 }, { "epoch": 0.8587482783969447, "grad_norm": 0.8734883666038513, "learning_rate": 1.676317950855864e-06, "loss": 0.055575475096702576, "step": 6352 }, { "epoch": 0.8588834717657099, "grad_norm": 0.7916175127029419, "learning_rate": 1.6731752189512456e-06, "loss": 0.0412934236228466, "step": 6353 }, { "epoch": 0.8590186651344751, "grad_norm": 0.5428727269172668, "learning_rate": 1.6700352617626092e-06, "loss": 0.05648193508386612, "step": 6354 }, { "epoch": 0.8591538585032404, "grad_norm": 0.8448138236999512, "learning_rate": 1.6668980799437167e-06, "loss": 0.04699888080358505, "step": 6355 }, { "epoch": 0.8592890518720057, "grad_norm": 0.27350717782974243, "learning_rate": 1.6637636741477458e-06, "loss": 0.047247543931007385, "step": 6356 }, { "epoch": 0.859424245240771, "grad_norm": 0.5769308805465698, "learning_rate": 1.6606320450272943e-06, "loss": 0.05877112224698067, "step": 6357 }, { "epoch": 0.8595594386095362, "grad_norm": 0.3961043655872345, "learning_rate": 1.657503193234386e-06, "loss": 0.045877233147621155, "step": 6358 }, { "epoch": 0.8596946319783014, "grad_norm": 0.647350013256073, "learning_rate": 1.654377119420461e-06, "loss": 0.047591596841812134, "step": 6359 }, { "epoch": 0.8598298253470668, "grad_norm": 0.3304547369480133, "learning_rate": 1.6512538242363889e-06, "loss": 0.04440970718860626, "step": 6360 }, { "epoch": 0.859965018715832, "grad_norm": 0.6236200928688049, "learning_rate": 1.6481333083324563e-06, "loss": 0.062338974326848984, "step": 6361 }, { "epoch": 0.8601002120845973, "grad_norm": 0.4829655885696411, "learning_rate": 1.6450155723583698e-06, "loss": 0.0645047053694725, "step": 6362 }, { "epoch": 0.8602354054533625, "grad_norm": 1.0008617639541626, "learning_rate": 1.6419006169632573e-06, "loss": 0.058346331119537354, "step": 6363 }, { "epoch": 0.8603705988221277, "grad_norm": 0.6507902145385742, "learning_rate": 1.638788442795668e-06, "loss": 0.055436864495277405, "step": 6364 }, { "epoch": 0.8605057921908931, "grad_norm": 1.509135365486145, "learning_rate": 1.6356790505035785e-06, "loss": 0.06659440696239471, "step": 6365 }, { "epoch": 0.8606409855596583, "grad_norm": 0.7501407861709595, "learning_rate": 1.6325724407343795e-06, "loss": 0.05589187145233154, "step": 6366 }, { "epoch": 0.8607761789284235, "grad_norm": 0.7137078642845154, "learning_rate": 1.6294686141348801e-06, "loss": 0.05699652433395386, "step": 6367 }, { "epoch": 0.8609113722971888, "grad_norm": 0.5166784524917603, "learning_rate": 1.626367571351317e-06, "loss": 0.054060764610767365, "step": 6368 }, { "epoch": 0.8610465656659541, "grad_norm": 0.7450024485588074, "learning_rate": 1.6232693130293386e-06, "loss": 0.04736056178808212, "step": 6369 }, { "epoch": 0.8611817590347194, "grad_norm": 0.3280739188194275, "learning_rate": 1.6201738398140254e-06, "loss": 0.04433522745966911, "step": 6370 }, { "epoch": 0.8613169524034846, "grad_norm": 0.5554978847503662, "learning_rate": 1.6170811523498718e-06, "loss": 0.057081907987594604, "step": 6371 }, { "epoch": 0.8614521457722498, "grad_norm": 0.3789294958114624, "learning_rate": 1.613991251280783e-06, "loss": 0.06383635103702545, "step": 6372 }, { "epoch": 0.8615873391410152, "grad_norm": 0.8679389357566833, "learning_rate": 1.6109041372501028e-06, "loss": 0.049386560916900635, "step": 6373 }, { "epoch": 0.8617225325097804, "grad_norm": 0.5927804708480835, "learning_rate": 1.6078198109005766e-06, "loss": 0.06466534733772278, "step": 6374 }, { "epoch": 0.8618577258785457, "grad_norm": 1.431593656539917, "learning_rate": 1.6047382728743843e-06, "loss": 0.0730823203921318, "step": 6375 }, { "epoch": 0.8619929192473109, "grad_norm": 0.36228853464126587, "learning_rate": 1.6016595238131176e-06, "loss": 0.046502485871315, "step": 6376 }, { "epoch": 0.8621281126160761, "grad_norm": 0.4120558798313141, "learning_rate": 1.5985835643577824e-06, "loss": 0.037470173090696335, "step": 6377 }, { "epoch": 0.8622633059848415, "grad_norm": 0.4864238500595093, "learning_rate": 1.5955103951488177e-06, "loss": 0.04541681706905365, "step": 6378 }, { "epoch": 0.8623984993536067, "grad_norm": 0.2943304777145386, "learning_rate": 1.5924400168260666e-06, "loss": 0.0466192290186882, "step": 6379 }, { "epoch": 0.862533692722372, "grad_norm": 1.0102490186691284, "learning_rate": 1.5893724300288064e-06, "loss": 0.05961532145738602, "step": 6380 }, { "epoch": 0.8626688860911372, "grad_norm": 0.5914861559867859, "learning_rate": 1.5863076353957196e-06, "loss": 0.04195981100201607, "step": 6381 }, { "epoch": 0.8628040794599025, "grad_norm": 0.43987593054771423, "learning_rate": 1.5832456335649104e-06, "loss": 0.0477483868598938, "step": 6382 }, { "epoch": 0.8629392728286678, "grad_norm": 0.37542444467544556, "learning_rate": 1.580186425173909e-06, "loss": 0.04148589074611664, "step": 6383 }, { "epoch": 0.863074466197433, "grad_norm": 0.5105854868888855, "learning_rate": 1.5771300108596543e-06, "loss": 0.04227229952812195, "step": 6384 }, { "epoch": 0.8632096595661983, "grad_norm": 0.39576664566993713, "learning_rate": 1.5740763912585171e-06, "loss": 0.06163744255900383, "step": 6385 }, { "epoch": 0.8633448529349635, "grad_norm": 0.43987858295440674, "learning_rate": 1.5710255670062657e-06, "loss": 0.04137536138296127, "step": 6386 }, { "epoch": 0.8634800463037288, "grad_norm": 0.8914985060691833, "learning_rate": 1.567977538738105e-06, "loss": 0.045304179191589355, "step": 6387 }, { "epoch": 0.8636152396724941, "grad_norm": 0.38886401057243347, "learning_rate": 1.5649323070886494e-06, "loss": 0.04219827055931091, "step": 6388 }, { "epoch": 0.8637504330412593, "grad_norm": 0.6032065153121948, "learning_rate": 1.5618898726919284e-06, "loss": 0.07757360488176346, "step": 6389 }, { "epoch": 0.8638856264100246, "grad_norm": 0.6317402124404907, "learning_rate": 1.5588502361814032e-06, "loss": 0.04318195581436157, "step": 6390 }, { "epoch": 0.8640208197787899, "grad_norm": 0.41027575731277466, "learning_rate": 1.5558133981899314e-06, "loss": 0.04398167133331299, "step": 6391 }, { "epoch": 0.8641560131475551, "grad_norm": 0.6707596778869629, "learning_rate": 1.5527793593498053e-06, "loss": 0.053114041686058044, "step": 6392 }, { "epoch": 0.8642912065163204, "grad_norm": 0.6818137168884277, "learning_rate": 1.5497481202927244e-06, "loss": 0.05459202080965042, "step": 6393 }, { "epoch": 0.8644263998850856, "grad_norm": 0.7187246680259705, "learning_rate": 1.5467196816498107e-06, "loss": 0.05118875205516815, "step": 6394 }, { "epoch": 0.8645615932538508, "grad_norm": 0.8068587183952332, "learning_rate": 1.5436940440516018e-06, "loss": 0.056244686245918274, "step": 6395 }, { "epoch": 0.8646967866226162, "grad_norm": 0.5673879981040955, "learning_rate": 1.5406712081280484e-06, "loss": 0.06307181715965271, "step": 6396 }, { "epoch": 0.8648319799913814, "grad_norm": 0.7542120218276978, "learning_rate": 1.5376511745085254e-06, "loss": 0.06109628081321716, "step": 6397 }, { "epoch": 0.8649671733601467, "grad_norm": 0.6209232211112976, "learning_rate": 1.5346339438218181e-06, "loss": 0.04325780272483826, "step": 6398 }, { "epoch": 0.8651023667289119, "grad_norm": 0.5214393734931946, "learning_rate": 1.5316195166961295e-06, "loss": 0.04177108779549599, "step": 6399 }, { "epoch": 0.8652375600976772, "grad_norm": 0.46321913599967957, "learning_rate": 1.5286078937590802e-06, "loss": 0.04379759728908539, "step": 6400 }, { "epoch": 0.8653727534664425, "grad_norm": 0.3907475173473358, "learning_rate": 1.5255990756377025e-06, "loss": 0.05963176488876343, "step": 6401 }, { "epoch": 0.8655079468352077, "grad_norm": 0.32961878180503845, "learning_rate": 1.5225930629584534e-06, "loss": 0.04474177211523056, "step": 6402 }, { "epoch": 0.865643140203973, "grad_norm": 1.0100473165512085, "learning_rate": 1.5195898563472038e-06, "loss": 0.05553389713168144, "step": 6403 }, { "epoch": 0.8657783335727383, "grad_norm": 0.4864867031574249, "learning_rate": 1.5165894564292254e-06, "loss": 0.044846296310424805, "step": 6404 }, { "epoch": 0.8659135269415035, "grad_norm": 0.3903617560863495, "learning_rate": 1.5135918638292269e-06, "loss": 0.049049973487854004, "step": 6405 }, { "epoch": 0.8660487203102688, "grad_norm": 0.393320232629776, "learning_rate": 1.5105970791713186e-06, "loss": 0.04075436294078827, "step": 6406 }, { "epoch": 0.866183913679034, "grad_norm": 0.6641169190406799, "learning_rate": 1.5076051030790355e-06, "loss": 0.03864654526114464, "step": 6407 }, { "epoch": 0.8663191070477994, "grad_norm": 0.3610553741455078, "learning_rate": 1.5046159361753226e-06, "loss": 0.04762904345989227, "step": 6408 }, { "epoch": 0.8664543004165646, "grad_norm": 1.0069036483764648, "learning_rate": 1.5016295790825336e-06, "loss": 0.07318048179149628, "step": 6409 }, { "epoch": 0.8665894937853298, "grad_norm": 0.4771886467933655, "learning_rate": 1.4986460324224493e-06, "loss": 0.0502270944416523, "step": 6410 }, { "epoch": 0.8667246871540951, "grad_norm": 0.47285547852516174, "learning_rate": 1.4956652968162582e-06, "loss": 0.04290420562028885, "step": 6411 }, { "epoch": 0.8668598805228603, "grad_norm": 0.5081998705863953, "learning_rate": 1.492687372884567e-06, "loss": 0.05049280822277069, "step": 6412 }, { "epoch": 0.8669950738916257, "grad_norm": 0.5225005149841309, "learning_rate": 1.4897122612473978e-06, "loss": 0.03781360387802124, "step": 6413 }, { "epoch": 0.8671302672603909, "grad_norm": 0.7593132257461548, "learning_rate": 1.4867399625241772e-06, "loss": 0.05383706092834473, "step": 6414 }, { "epoch": 0.8672654606291561, "grad_norm": 1.5183064937591553, "learning_rate": 1.4837704773337602e-06, "loss": 0.05239257961511612, "step": 6415 }, { "epoch": 0.8674006539979214, "grad_norm": 0.6086822152137756, "learning_rate": 1.4808038062944036e-06, "loss": 0.05151867866516113, "step": 6416 }, { "epoch": 0.8675358473666867, "grad_norm": 1.0132187604904175, "learning_rate": 1.4778399500237933e-06, "loss": 0.051538825035095215, "step": 6417 }, { "epoch": 0.8676710407354519, "grad_norm": 0.4417206943035126, "learning_rate": 1.4748789091390124e-06, "loss": 0.05564595013856888, "step": 6418 }, { "epoch": 0.8678062341042172, "grad_norm": 0.24355541169643402, "learning_rate": 1.471920684256563e-06, "loss": 0.03250390291213989, "step": 6419 }, { "epoch": 0.8679414274729824, "grad_norm": 0.4006502628326416, "learning_rate": 1.4689652759923721e-06, "loss": 0.051907412707805634, "step": 6420 }, { "epoch": 0.8680766208417477, "grad_norm": 0.5877748131752014, "learning_rate": 1.4660126849617645e-06, "loss": 0.04292129725217819, "step": 6421 }, { "epoch": 0.868211814210513, "grad_norm": 0.696816623210907, "learning_rate": 1.4630629117794914e-06, "loss": 0.07989153265953064, "step": 6422 }, { "epoch": 0.8683470075792782, "grad_norm": 0.4741944372653961, "learning_rate": 1.4601159570597033e-06, "loss": 0.057127028703689575, "step": 6423 }, { "epoch": 0.8684822009480435, "grad_norm": 0.4335305690765381, "learning_rate": 1.4571718214159795e-06, "loss": 0.04525130242109299, "step": 6424 }, { "epoch": 0.8686173943168087, "grad_norm": 0.6267746090888977, "learning_rate": 1.454230505461303e-06, "loss": 0.05341595038771629, "step": 6425 }, { "epoch": 0.8687525876855741, "grad_norm": 0.4684426188468933, "learning_rate": 1.4512920098080672e-06, "loss": 0.05786684900522232, "step": 6426 }, { "epoch": 0.8688877810543393, "grad_norm": 0.5344424247741699, "learning_rate": 1.4483563350680878e-06, "loss": 0.0545182041823864, "step": 6427 }, { "epoch": 0.8690229744231045, "grad_norm": 0.33890390396118164, "learning_rate": 1.4454234818525824e-06, "loss": 0.04585573077201843, "step": 6428 }, { "epoch": 0.8691581677918698, "grad_norm": 0.8251439929008484, "learning_rate": 1.4424934507721926e-06, "loss": 0.05194412171840668, "step": 6429 }, { "epoch": 0.869293361160635, "grad_norm": 0.6554752588272095, "learning_rate": 1.4395662424369622e-06, "loss": 0.04080544039607048, "step": 6430 }, { "epoch": 0.8694285545294004, "grad_norm": 0.36110562086105347, "learning_rate": 1.436641857456355e-06, "loss": 0.055810391902923584, "step": 6431 }, { "epoch": 0.8695637478981656, "grad_norm": 0.2818463146686554, "learning_rate": 1.4337202964392409e-06, "loss": 0.03770819306373596, "step": 6432 }, { "epoch": 0.8696989412669308, "grad_norm": 1.177851676940918, "learning_rate": 1.4308015599939033e-06, "loss": 0.06075853109359741, "step": 6433 }, { "epoch": 0.8698341346356961, "grad_norm": 0.5334969758987427, "learning_rate": 1.4278856487280428e-06, "loss": 0.04215973988175392, "step": 6434 }, { "epoch": 0.8699693280044614, "grad_norm": 0.2561131417751312, "learning_rate": 1.4249725632487653e-06, "loss": 0.03477698192000389, "step": 6435 }, { "epoch": 0.8701045213732267, "grad_norm": 0.40459147095680237, "learning_rate": 1.4220623041625924e-06, "loss": 0.06360536068677902, "step": 6436 }, { "epoch": 0.8702397147419919, "grad_norm": 0.41641736030578613, "learning_rate": 1.4191548720754527e-06, "loss": 0.047386303544044495, "step": 6437 }, { "epoch": 0.8703749081107571, "grad_norm": 1.2045555114746094, "learning_rate": 1.4162502675926887e-06, "loss": 0.04919978976249695, "step": 6438 }, { "epoch": 0.8705101014795225, "grad_norm": 0.32616037130355835, "learning_rate": 1.4133484913190596e-06, "loss": 0.03985454887151718, "step": 6439 }, { "epoch": 0.8706452948482877, "grad_norm": 0.6028023362159729, "learning_rate": 1.4104495438587295e-06, "loss": 0.06465119123458862, "step": 6440 }, { "epoch": 0.870780488217053, "grad_norm": 0.649699866771698, "learning_rate": 1.4075534258152667e-06, "loss": 0.04072011262178421, "step": 6441 }, { "epoch": 0.8709156815858182, "grad_norm": 0.3229261636734009, "learning_rate": 1.4046601377916673e-06, "loss": 0.04177705943584442, "step": 6442 }, { "epoch": 0.8710508749545834, "grad_norm": 0.6591719388961792, "learning_rate": 1.4017696803903246e-06, "loss": 0.046736590564250946, "step": 6443 }, { "epoch": 0.8711860683233488, "grad_norm": 0.45171281695365906, "learning_rate": 1.3988820542130504e-06, "loss": 0.05376102030277252, "step": 6444 }, { "epoch": 0.871321261692114, "grad_norm": 0.6127946376800537, "learning_rate": 1.395997259861067e-06, "loss": 0.05691789835691452, "step": 6445 }, { "epoch": 0.8714564550608792, "grad_norm": 0.8426452875137329, "learning_rate": 1.3931152979349926e-06, "loss": 0.051948148757219315, "step": 6446 }, { "epoch": 0.8715916484296445, "grad_norm": 0.42971548438072205, "learning_rate": 1.3902361690348769e-06, "loss": 0.04130706191062927, "step": 6447 }, { "epoch": 0.8717268417984098, "grad_norm": 0.4513098895549774, "learning_rate": 1.3873598737601639e-06, "loss": 0.056921541690826416, "step": 6448 }, { "epoch": 0.8718620351671751, "grad_norm": 0.3034031093120575, "learning_rate": 1.3844864127097229e-06, "loss": 0.05169004574418068, "step": 6449 }, { "epoch": 0.8719972285359403, "grad_norm": 0.3589463531970978, "learning_rate": 1.3816157864818151e-06, "loss": 0.049613580107688904, "step": 6450 }, { "epoch": 0.8721324219047055, "grad_norm": 0.6391415596008301, "learning_rate": 1.3787479956741194e-06, "loss": 0.04300156235694885, "step": 6451 }, { "epoch": 0.8722676152734709, "grad_norm": 0.3037974238395691, "learning_rate": 1.3758830408837314e-06, "loss": 0.03323987126350403, "step": 6452 }, { "epoch": 0.8724028086422361, "grad_norm": 0.7414489984512329, "learning_rate": 1.3730209227071439e-06, "loss": 0.04776036739349365, "step": 6453 }, { "epoch": 0.8725380020110014, "grad_norm": 0.3472529351711273, "learning_rate": 1.3701616417402734e-06, "loss": 0.04723775386810303, "step": 6454 }, { "epoch": 0.8726731953797666, "grad_norm": 0.4649716913700104, "learning_rate": 1.367305198578429e-06, "loss": 0.040052302181720734, "step": 6455 }, { "epoch": 0.8728083887485318, "grad_norm": 0.2935580015182495, "learning_rate": 1.36445159381634e-06, "loss": 0.04241658002138138, "step": 6456 }, { "epoch": 0.8729435821172972, "grad_norm": 0.802302360534668, "learning_rate": 1.361600828048144e-06, "loss": 0.051356494426727295, "step": 6457 }, { "epoch": 0.8730787754860624, "grad_norm": 0.5173913836479187, "learning_rate": 1.3587529018673816e-06, "loss": 0.04657822474837303, "step": 6458 }, { "epoch": 0.8732139688548277, "grad_norm": 0.4374042749404907, "learning_rate": 1.3559078158670152e-06, "loss": 0.051262274384498596, "step": 6459 }, { "epoch": 0.8733491622235929, "grad_norm": 0.8997926115989685, "learning_rate": 1.353065570639394e-06, "loss": 0.06002334505319595, "step": 6460 }, { "epoch": 0.8734843555923582, "grad_norm": 0.5344158411026001, "learning_rate": 1.3502261667763e-06, "loss": 0.05258147791028023, "step": 6461 }, { "epoch": 0.8736195489611235, "grad_norm": 0.42840898036956787, "learning_rate": 1.3473896048689067e-06, "loss": 0.04621404409408569, "step": 6462 }, { "epoch": 0.8737547423298887, "grad_norm": 0.35789334774017334, "learning_rate": 1.3445558855078017e-06, "loss": 0.05047227442264557, "step": 6463 }, { "epoch": 0.873889935698654, "grad_norm": 1.0090200901031494, "learning_rate": 1.3417250092829814e-06, "loss": 0.05209711194038391, "step": 6464 }, { "epoch": 0.8740251290674192, "grad_norm": 0.5702008008956909, "learning_rate": 1.338896976783846e-06, "loss": 0.04754728078842163, "step": 6465 }, { "epoch": 0.8741603224361845, "grad_norm": 0.22192725539207458, "learning_rate": 1.336071788599213e-06, "loss": 0.03884801268577576, "step": 6466 }, { "epoch": 0.8742955158049498, "grad_norm": 0.6642999649047852, "learning_rate": 1.3332494453172982e-06, "loss": 0.04711092263460159, "step": 6467 }, { "epoch": 0.874430709173715, "grad_norm": 0.5399225950241089, "learning_rate": 1.3304299475257287e-06, "loss": 0.048498328775167465, "step": 6468 }, { "epoch": 0.8745659025424803, "grad_norm": 0.5880245566368103, "learning_rate": 1.3276132958115394e-06, "loss": 0.05429648980498314, "step": 6469 }, { "epoch": 0.8747010959112456, "grad_norm": 0.40637123584747314, "learning_rate": 1.32479949076117e-06, "loss": 0.04147633910179138, "step": 6470 }, { "epoch": 0.8748362892800108, "grad_norm": 0.5699285268783569, "learning_rate": 1.3219885329604747e-06, "loss": 0.04441697895526886, "step": 6471 }, { "epoch": 0.8749714826487761, "grad_norm": 0.9453657269477844, "learning_rate": 1.319180422994709e-06, "loss": 0.04900723695755005, "step": 6472 }, { "epoch": 0.8751066760175413, "grad_norm": 0.9205328822135925, "learning_rate": 1.3163751614485287e-06, "loss": 0.059768013656139374, "step": 6473 }, { "epoch": 0.8752418693863065, "grad_norm": 0.6823642253875732, "learning_rate": 1.3135727489060113e-06, "loss": 0.04388117790222168, "step": 6474 }, { "epoch": 0.8753770627550719, "grad_norm": 0.6911574006080627, "learning_rate": 1.3107731859506317e-06, "loss": 0.03704812377691269, "step": 6475 }, { "epoch": 0.8755122561238371, "grad_norm": 0.4610890746116638, "learning_rate": 1.3079764731652772e-06, "loss": 0.04261764511466026, "step": 6476 }, { "epoch": 0.8756474494926024, "grad_norm": 0.36496952176094055, "learning_rate": 1.3051826111322368e-06, "loss": 0.04335753247141838, "step": 6477 }, { "epoch": 0.8757826428613676, "grad_norm": 0.27545037865638733, "learning_rate": 1.3023916004332021e-06, "loss": 0.0426146499812603, "step": 6478 }, { "epoch": 0.8759178362301329, "grad_norm": 0.3519093096256256, "learning_rate": 1.2996034416492847e-06, "loss": 0.042835038155317307, "step": 6479 }, { "epoch": 0.8760530295988982, "grad_norm": 1.1075360774993896, "learning_rate": 1.2968181353609854e-06, "loss": 0.060977503657341, "step": 6480 }, { "epoch": 0.8761882229676634, "grad_norm": 0.6381919384002686, "learning_rate": 1.2940356821482285e-06, "loss": 0.05944786220788956, "step": 6481 }, { "epoch": 0.8763234163364287, "grad_norm": 0.3496914803981781, "learning_rate": 1.291256082590334e-06, "loss": 0.06015033274888992, "step": 6482 }, { "epoch": 0.876458609705194, "grad_norm": 0.657455563545227, "learning_rate": 1.2884793372660208e-06, "loss": 0.054166048765182495, "step": 6483 }, { "epoch": 0.8765938030739592, "grad_norm": 0.5428779125213623, "learning_rate": 1.285705446753433e-06, "loss": 0.050331417471170425, "step": 6484 }, { "epoch": 0.8767289964427245, "grad_norm": 0.5830390453338623, "learning_rate": 1.2829344116301e-06, "loss": 0.054551899433135986, "step": 6485 }, { "epoch": 0.8768641898114897, "grad_norm": 0.6821373701095581, "learning_rate": 1.2801662324729774e-06, "loss": 0.050336308777332306, "step": 6486 }, { "epoch": 0.876999383180255, "grad_norm": 0.3401375114917755, "learning_rate": 1.2774009098584055e-06, "loss": 0.059595443308353424, "step": 6487 }, { "epoch": 0.8771345765490203, "grad_norm": 0.7410391569137573, "learning_rate": 1.274638444362139e-06, "loss": 0.05255543068051338, "step": 6488 }, { "epoch": 0.8772697699177855, "grad_norm": 1.2307811975479126, "learning_rate": 1.2718788365593443e-06, "loss": 0.04362833499908447, "step": 6489 }, { "epoch": 0.8774049632865508, "grad_norm": 0.7255873084068298, "learning_rate": 1.26912208702458e-06, "loss": 0.06386878341436386, "step": 6490 }, { "epoch": 0.877540156655316, "grad_norm": 0.803435206413269, "learning_rate": 1.2663681963318242e-06, "loss": 0.06775794178247452, "step": 6491 }, { "epoch": 0.8776753500240814, "grad_norm": 0.28998297452926636, "learning_rate": 1.2636171650544443e-06, "loss": 0.03879409283399582, "step": 6492 }, { "epoch": 0.8778105433928466, "grad_norm": 1.466269850730896, "learning_rate": 1.260868993765219e-06, "loss": 0.06270934641361237, "step": 6493 }, { "epoch": 0.8779457367616118, "grad_norm": 0.5335553884506226, "learning_rate": 1.258123683036339e-06, "loss": 0.05277072638273239, "step": 6494 }, { "epoch": 0.8780809301303771, "grad_norm": 0.3859676718711853, "learning_rate": 1.2553812334393872e-06, "loss": 0.04895465075969696, "step": 6495 }, { "epoch": 0.8782161234991424, "grad_norm": 0.323969304561615, "learning_rate": 1.2526416455453582e-06, "loss": 0.055321067571640015, "step": 6496 }, { "epoch": 0.8783513168679077, "grad_norm": 0.3847314417362213, "learning_rate": 1.249904919924646e-06, "loss": 0.051676392555236816, "step": 6497 }, { "epoch": 0.8784865102366729, "grad_norm": 0.23739759624004364, "learning_rate": 1.2471710571470579e-06, "loss": 0.040119320154190063, "step": 6498 }, { "epoch": 0.8786217036054381, "grad_norm": 0.3581050932407379, "learning_rate": 1.2444400577817922e-06, "loss": 0.05266615375876427, "step": 6499 }, { "epoch": 0.8787568969742034, "grad_norm": 0.3567295968532562, "learning_rate": 1.2417119223974621e-06, "loss": 0.03939438238739967, "step": 6500 }, { "epoch": 0.8788920903429687, "grad_norm": 0.3675695061683655, "learning_rate": 1.2389866515620768e-06, "loss": 0.04629841446876526, "step": 6501 }, { "epoch": 0.8790272837117339, "grad_norm": 0.3217466175556183, "learning_rate": 1.2362642458430505e-06, "loss": 0.046079039573669434, "step": 6502 }, { "epoch": 0.8791624770804992, "grad_norm": 0.839565098285675, "learning_rate": 1.2335447058072103e-06, "loss": 0.0968470498919487, "step": 6503 }, { "epoch": 0.8792976704492644, "grad_norm": 0.23487775027751923, "learning_rate": 1.230828032020771e-06, "loss": 0.03534689545631409, "step": 6504 }, { "epoch": 0.8794328638180298, "grad_norm": 0.5126654505729675, "learning_rate": 1.2281142250493638e-06, "loss": 0.05204305052757263, "step": 6505 }, { "epoch": 0.879568057186795, "grad_norm": 1.0494384765625, "learning_rate": 1.225403285458015e-06, "loss": 0.06012066453695297, "step": 6506 }, { "epoch": 0.8797032505555602, "grad_norm": 0.41900715231895447, "learning_rate": 1.2226952138111546e-06, "loss": 0.05904790014028549, "step": 6507 }, { "epoch": 0.8798384439243255, "grad_norm": 0.3915879726409912, "learning_rate": 1.219990010672622e-06, "loss": 0.04632105678319931, "step": 6508 }, { "epoch": 0.8799736372930907, "grad_norm": 1.0778716802597046, "learning_rate": 1.2172876766056562e-06, "loss": 0.038495685905218124, "step": 6509 }, { "epoch": 0.8801088306618561, "grad_norm": 0.7172847986221313, "learning_rate": 1.2145882121728906e-06, "loss": 0.042837344110012054, "step": 6510 }, { "epoch": 0.8802440240306213, "grad_norm": 1.080238699913025, "learning_rate": 1.2118916179363727e-06, "loss": 0.07236932218074799, "step": 6511 }, { "epoch": 0.8803792173993865, "grad_norm": 0.26233533024787903, "learning_rate": 1.209197894457546e-06, "loss": 0.03946400433778763, "step": 6512 }, { "epoch": 0.8805144107681518, "grad_norm": 0.4374561309814453, "learning_rate": 1.2065070422972606e-06, "loss": 0.056719616055488586, "step": 6513 }, { "epoch": 0.8806496041369171, "grad_norm": 0.464121550321579, "learning_rate": 1.2038190620157685e-06, "loss": 0.055736273527145386, "step": 6514 }, { "epoch": 0.8807847975056824, "grad_norm": 0.7671204209327698, "learning_rate": 1.2011339541727117e-06, "loss": 0.049051813781261444, "step": 6515 }, { "epoch": 0.8809199908744476, "grad_norm": 0.628445565700531, "learning_rate": 1.198451719327155e-06, "loss": 0.059822455048561096, "step": 6516 }, { "epoch": 0.8810551842432128, "grad_norm": 0.8739883899688721, "learning_rate": 1.1957723580375447e-06, "loss": 0.06787683814764023, "step": 6517 }, { "epoch": 0.8811903776119782, "grad_norm": 0.8604546189308167, "learning_rate": 1.193095870861748e-06, "loss": 0.05168576538562775, "step": 6518 }, { "epoch": 0.8813255709807434, "grad_norm": 0.5475383996963501, "learning_rate": 1.1904222583570156e-06, "loss": 0.05231267958879471, "step": 6519 }, { "epoch": 0.8814607643495087, "grad_norm": 0.43577030301094055, "learning_rate": 1.1877515210800077e-06, "loss": 0.047853536903858185, "step": 6520 }, { "epoch": 0.8815959577182739, "grad_norm": 0.5603119134902954, "learning_rate": 1.1850836595867925e-06, "loss": 0.051129672676324844, "step": 6521 }, { "epoch": 0.8817311510870391, "grad_norm": 0.5466884970664978, "learning_rate": 1.1824186744328259e-06, "loss": 0.05245116353034973, "step": 6522 }, { "epoch": 0.8818663444558045, "grad_norm": 0.2560189664363861, "learning_rate": 1.179756566172982e-06, "loss": 0.03251160308718681, "step": 6523 }, { "epoch": 0.8820015378245697, "grad_norm": 0.32450202107429504, "learning_rate": 1.177097335361516e-06, "loss": 0.059180498123168945, "step": 6524 }, { "epoch": 0.882136731193335, "grad_norm": 0.2926059067249298, "learning_rate": 1.1744409825520969e-06, "loss": 0.041620850563049316, "step": 6525 }, { "epoch": 0.8822719245621002, "grad_norm": 0.39256179332733154, "learning_rate": 1.171787508297792e-06, "loss": 0.04282285273075104, "step": 6526 }, { "epoch": 0.8824071179308655, "grad_norm": 0.3213786780834198, "learning_rate": 1.1691369131510676e-06, "loss": 0.034612298011779785, "step": 6527 }, { "epoch": 0.8825423112996308, "grad_norm": 0.24405010044574738, "learning_rate": 1.1664891976637992e-06, "loss": 0.04390066862106323, "step": 6528 }, { "epoch": 0.882677504668396, "grad_norm": 0.32655808329582214, "learning_rate": 1.1638443623872442e-06, "loss": 0.03866888955235481, "step": 6529 }, { "epoch": 0.8828126980371612, "grad_norm": 0.3981506824493408, "learning_rate": 1.1612024078720752e-06, "loss": 0.04642610251903534, "step": 6530 }, { "epoch": 0.8829478914059266, "grad_norm": 0.9546152353286743, "learning_rate": 1.1585633346683655e-06, "loss": 0.059473518282175064, "step": 6531 }, { "epoch": 0.8830830847746918, "grad_norm": 0.4268510937690735, "learning_rate": 1.155927143325579e-06, "loss": 0.047990262508392334, "step": 6532 }, { "epoch": 0.8832182781434571, "grad_norm": 1.1467900276184082, "learning_rate": 1.1532938343925887e-06, "loss": 0.05960758030414581, "step": 6533 }, { "epoch": 0.8833534715122223, "grad_norm": 0.44528794288635254, "learning_rate": 1.1506634084176587e-06, "loss": 0.062300823628902435, "step": 6534 }, { "epoch": 0.8834886648809875, "grad_norm": 0.3454953134059906, "learning_rate": 1.148035865948463e-06, "loss": 0.05014472082257271, "step": 6535 }, { "epoch": 0.8836238582497529, "grad_norm": 0.6793503165245056, "learning_rate": 1.1454112075320688e-06, "loss": 0.03962131589651108, "step": 6536 }, { "epoch": 0.8837590516185181, "grad_norm": 0.6821101903915405, "learning_rate": 1.1427894337149426e-06, "loss": 0.059002310037612915, "step": 6537 }, { "epoch": 0.8838942449872834, "grad_norm": 0.4089685082435608, "learning_rate": 1.1401705450429506e-06, "loss": 0.03756380081176758, "step": 6538 }, { "epoch": 0.8840294383560486, "grad_norm": 0.8958247900009155, "learning_rate": 1.1375545420613586e-06, "loss": 0.05500996112823486, "step": 6539 }, { "epoch": 0.8841646317248139, "grad_norm": 0.9188216328620911, "learning_rate": 1.1349414253148377e-06, "loss": 0.04294709861278534, "step": 6540 }, { "epoch": 0.8842998250935792, "grad_norm": 1.2239514589309692, "learning_rate": 1.1323311953474524e-06, "loss": 0.060585539788007736, "step": 6541 }, { "epoch": 0.8844350184623444, "grad_norm": 0.5068191885948181, "learning_rate": 1.1297238527026582e-06, "loss": 0.05804656445980072, "step": 6542 }, { "epoch": 0.8845702118311097, "grad_norm": 0.24202004075050354, "learning_rate": 1.1271193979233258e-06, "loss": 0.033794134855270386, "step": 6543 }, { "epoch": 0.884705405199875, "grad_norm": 0.5629062652587891, "learning_rate": 1.1245178315517113e-06, "loss": 0.045803967863321304, "step": 6544 }, { "epoch": 0.8848405985686402, "grad_norm": 0.6934453845024109, "learning_rate": 1.1219191541294798e-06, "loss": 0.05704040825366974, "step": 6545 }, { "epoch": 0.8849757919374055, "grad_norm": 0.4430544078350067, "learning_rate": 1.1193233661976887e-06, "loss": 0.043864525854587555, "step": 6546 }, { "epoch": 0.8851109853061707, "grad_norm": 0.21920977532863617, "learning_rate": 1.1167304682967904e-06, "loss": 0.037121668457984924, "step": 6547 }, { "epoch": 0.885246178674936, "grad_norm": 0.6567691564559937, "learning_rate": 1.114140460966645e-06, "loss": 0.04725874215364456, "step": 6548 }, { "epoch": 0.8853813720437013, "grad_norm": 0.4458910822868347, "learning_rate": 1.111553344746501e-06, "loss": 0.04362950101494789, "step": 6549 }, { "epoch": 0.8855165654124665, "grad_norm": 0.4701639711856842, "learning_rate": 1.1089691201750174e-06, "loss": 0.05949559062719345, "step": 6550 }, { "epoch": 0.8856517587812318, "grad_norm": 0.6279432773590088, "learning_rate": 1.106387787790239e-06, "loss": 0.05217616260051727, "step": 6551 }, { "epoch": 0.885786952149997, "grad_norm": 0.4219288229942322, "learning_rate": 1.1038093481296091e-06, "loss": 0.04507826268672943, "step": 6552 }, { "epoch": 0.8859221455187622, "grad_norm": 0.6901844143867493, "learning_rate": 1.10123380172998e-06, "loss": 0.06525152176618576, "step": 6553 }, { "epoch": 0.8860573388875276, "grad_norm": 0.5920925736427307, "learning_rate": 1.098661149127586e-06, "loss": 0.03667248785495758, "step": 6554 }, { "epoch": 0.8861925322562928, "grad_norm": 0.7695235013961792, "learning_rate": 1.0960913908580788e-06, "loss": 0.07185602188110352, "step": 6555 }, { "epoch": 0.8863277256250581, "grad_norm": 0.5666546821594238, "learning_rate": 1.0935245274564852e-06, "loss": 0.07072822749614716, "step": 6556 }, { "epoch": 0.8864629189938233, "grad_norm": 0.36242297291755676, "learning_rate": 1.0909605594572413e-06, "loss": 0.05141735076904297, "step": 6557 }, { "epoch": 0.8865981123625886, "grad_norm": 0.2691637873649597, "learning_rate": 1.0883994873941816e-06, "loss": 0.03618592023849487, "step": 6558 }, { "epoch": 0.8867333057313539, "grad_norm": 0.40218985080718994, "learning_rate": 1.0858413118005345e-06, "loss": 0.04321213811635971, "step": 6559 }, { "epoch": 0.8868684991001191, "grad_norm": 1.1676558256149292, "learning_rate": 1.0832860332089288e-06, "loss": 0.06056632101535797, "step": 6560 }, { "epoch": 0.8870036924688844, "grad_norm": 0.5845606923103333, "learning_rate": 1.0807336521513828e-06, "loss": 0.0678192749619484, "step": 6561 }, { "epoch": 0.8871388858376497, "grad_norm": 0.42374172806739807, "learning_rate": 1.0781841691593142e-06, "loss": 0.05576469749212265, "step": 6562 }, { "epoch": 0.8872740792064149, "grad_norm": 0.36073818802833557, "learning_rate": 1.0756375847635435e-06, "loss": 0.03613992780447006, "step": 6563 }, { "epoch": 0.8874092725751802, "grad_norm": 0.6461474299430847, "learning_rate": 1.0730938994942818e-06, "loss": 0.06881657987833023, "step": 6564 }, { "epoch": 0.8875444659439454, "grad_norm": 0.395210862159729, "learning_rate": 1.070553113881137e-06, "loss": 0.04604113847017288, "step": 6565 }, { "epoch": 0.8876796593127108, "grad_norm": 0.48403221368789673, "learning_rate": 1.0680152284531158e-06, "loss": 0.05795672535896301, "step": 6566 }, { "epoch": 0.887814852681476, "grad_norm": 0.4987831115722656, "learning_rate": 1.0654802437386157e-06, "loss": 0.059023723006248474, "step": 6567 }, { "epoch": 0.8879500460502412, "grad_norm": 0.4567299485206604, "learning_rate": 1.062948160265438e-06, "loss": 0.04587674140930176, "step": 6568 }, { "epoch": 0.8880852394190065, "grad_norm": 0.5794356465339661, "learning_rate": 1.0604189785607772e-06, "loss": 0.06764669716358185, "step": 6569 }, { "epoch": 0.8882204327877717, "grad_norm": 1.1217278242111206, "learning_rate": 1.0578926991512171e-06, "loss": 0.046643126755952835, "step": 6570 }, { "epoch": 0.8883556261565371, "grad_norm": 0.4777769446372986, "learning_rate": 1.0553693225627458e-06, "loss": 0.05171160399913788, "step": 6571 }, { "epoch": 0.8884908195253023, "grad_norm": 0.19356529414653778, "learning_rate": 1.0528488493207444e-06, "loss": 0.029933147132396698, "step": 6572 }, { "epoch": 0.8886260128940675, "grad_norm": 0.7756878137588501, "learning_rate": 1.0503312799499898e-06, "loss": 0.06609708070755005, "step": 6573 }, { "epoch": 0.8887612062628328, "grad_norm": 0.7496227622032166, "learning_rate": 1.0478166149746476e-06, "loss": 0.055043645203113556, "step": 6574 }, { "epoch": 0.888896399631598, "grad_norm": 0.6214916110038757, "learning_rate": 1.0453048549182892e-06, "loss": 0.05154578387737274, "step": 6575 }, { "epoch": 0.8890315930003634, "grad_norm": 0.4027038514614105, "learning_rate": 1.0427960003038744e-06, "loss": 0.03429114818572998, "step": 6576 }, { "epoch": 0.8891667863691286, "grad_norm": 0.4690026044845581, "learning_rate": 1.040290051653764e-06, "loss": 0.04975851997733116, "step": 6577 }, { "epoch": 0.8893019797378938, "grad_norm": 0.31448978185653687, "learning_rate": 1.0377870094897085e-06, "loss": 0.05061821639537811, "step": 6578 }, { "epoch": 0.8894371731066592, "grad_norm": 0.3992505967617035, "learning_rate": 1.0352868743328497e-06, "loss": 0.04014480113983154, "step": 6579 }, { "epoch": 0.8895723664754244, "grad_norm": 0.39963221549987793, "learning_rate": 1.032789646703733e-06, "loss": 0.04242967069149017, "step": 6580 }, { "epoch": 0.8897075598441896, "grad_norm": 0.5489478707313538, "learning_rate": 1.0302953271222938e-06, "loss": 0.055710405111312866, "step": 6581 }, { "epoch": 0.8898427532129549, "grad_norm": 0.4911177158355713, "learning_rate": 1.0278039161078634e-06, "loss": 0.04291833937168121, "step": 6582 }, { "epoch": 0.8899779465817201, "grad_norm": 0.3042968809604645, "learning_rate": 1.0253154141791705e-06, "loss": 0.05094408988952637, "step": 6583 }, { "epoch": 0.8901131399504855, "grad_norm": 0.7723742127418518, "learning_rate": 1.0228298218543253e-06, "loss": 0.05948770046234131, "step": 6584 }, { "epoch": 0.8902483333192507, "grad_norm": 0.5254697799682617, "learning_rate": 1.020347139650849e-06, "loss": 0.053774673491716385, "step": 6585 }, { "epoch": 0.8903835266880159, "grad_norm": 0.5848549008369446, "learning_rate": 1.0178673680856448e-06, "loss": 0.04300014674663544, "step": 6586 }, { "epoch": 0.8905187200567812, "grad_norm": 0.5533194541931152, "learning_rate": 1.0153905076750196e-06, "loss": 0.043958112597465515, "step": 6587 }, { "epoch": 0.8906539134255465, "grad_norm": 0.6654220819473267, "learning_rate": 1.0129165589346644e-06, "loss": 0.04274154081940651, "step": 6588 }, { "epoch": 0.8907891067943118, "grad_norm": 0.5236038565635681, "learning_rate": 1.0104455223796688e-06, "loss": 0.0551171712577343, "step": 6589 }, { "epoch": 0.890924300163077, "grad_norm": 0.4612497389316559, "learning_rate": 1.0079773985245178e-06, "loss": 0.0630609393119812, "step": 6590 }, { "epoch": 0.8910594935318422, "grad_norm": 0.6932024359703064, "learning_rate": 1.0055121878830837e-06, "loss": 0.07203590124845505, "step": 6591 }, { "epoch": 0.8911946869006075, "grad_norm": 0.6976272463798523, "learning_rate": 1.0030498909686458e-06, "loss": 0.05027402192354202, "step": 6592 }, { "epoch": 0.8913298802693728, "grad_norm": 1.1622633934020996, "learning_rate": 1.0005905082938593e-06, "loss": 0.05766216665506363, "step": 6593 }, { "epoch": 0.8914650736381381, "grad_norm": 0.5363810062408447, "learning_rate": 9.981340403707794e-07, "loss": 0.05812821537256241, "step": 6594 }, { "epoch": 0.8916002670069033, "grad_norm": 0.5079368948936462, "learning_rate": 9.956804877108638e-07, "loss": 0.05828285962343216, "step": 6595 }, { "epoch": 0.8917354603756685, "grad_norm": 0.3242294490337372, "learning_rate": 9.932298508249488e-07, "loss": 0.03499762713909149, "step": 6596 }, { "epoch": 0.8918706537444339, "grad_norm": 0.328448623418808, "learning_rate": 9.907821302232729e-07, "loss": 0.0437769815325737, "step": 6597 }, { "epoch": 0.8920058471131991, "grad_norm": 0.7776864171028137, "learning_rate": 9.883373264154633e-07, "loss": 0.05071611702442169, "step": 6598 }, { "epoch": 0.8921410404819644, "grad_norm": 1.715150237083435, "learning_rate": 9.858954399105397e-07, "loss": 0.07196488976478577, "step": 6599 }, { "epoch": 0.8922762338507296, "grad_norm": 0.566561222076416, "learning_rate": 9.834564712169202e-07, "loss": 0.047789350152015686, "step": 6600 }, { "epoch": 0.8924114272194948, "grad_norm": 0.8820871710777283, "learning_rate": 9.81020420842409e-07, "loss": 0.054968468844890594, "step": 6601 }, { "epoch": 0.8925466205882602, "grad_norm": 0.2990488111972809, "learning_rate": 9.785872892942033e-07, "loss": 0.049929387867450714, "step": 6602 }, { "epoch": 0.8926818139570254, "grad_norm": 0.34779053926467896, "learning_rate": 9.761570770788964e-07, "loss": 0.04305468499660492, "step": 6603 }, { "epoch": 0.8928170073257907, "grad_norm": 0.2624479830265045, "learning_rate": 9.737297847024685e-07, "loss": 0.03659946471452713, "step": 6604 }, { "epoch": 0.8929522006945559, "grad_norm": 0.6984428763389587, "learning_rate": 9.713054126702968e-07, "loss": 0.056578680872917175, "step": 6605 }, { "epoch": 0.8930873940633212, "grad_norm": 0.2594917416572571, "learning_rate": 9.688839614871497e-07, "loss": 0.041214700788259506, "step": 6606 }, { "epoch": 0.8932225874320865, "grad_norm": 0.6019291281700134, "learning_rate": 9.664654316571852e-07, "loss": 0.038591306656599045, "step": 6607 }, { "epoch": 0.8933577808008517, "grad_norm": 0.37569570541381836, "learning_rate": 9.640498236839507e-07, "loss": 0.050752900540828705, "step": 6608 }, { "epoch": 0.8934929741696169, "grad_norm": 0.5007606744766235, "learning_rate": 9.616371380703953e-07, "loss": 0.044734060764312744, "step": 6609 }, { "epoch": 0.8936281675383823, "grad_norm": 0.3374848961830139, "learning_rate": 9.592273753188507e-07, "loss": 0.06034480035305023, "step": 6610 }, { "epoch": 0.8937633609071475, "grad_norm": 0.957298994064331, "learning_rate": 9.568205359310372e-07, "loss": 0.0423714816570282, "step": 6611 }, { "epoch": 0.8938985542759128, "grad_norm": 0.7147104144096375, "learning_rate": 9.544166204080772e-07, "loss": 0.04316665232181549, "step": 6612 }, { "epoch": 0.894033747644678, "grad_norm": 0.3773733973503113, "learning_rate": 9.520156292504739e-07, "loss": 0.03957603871822357, "step": 6613 }, { "epoch": 0.8941689410134432, "grad_norm": 0.5294026136398315, "learning_rate": 9.496175629581322e-07, "loss": 0.047300323843955994, "step": 6614 }, { "epoch": 0.8943041343822086, "grad_norm": 0.40021997690200806, "learning_rate": 9.472224220303427e-07, "loss": 0.04881554841995239, "step": 6615 }, { "epoch": 0.8944393277509738, "grad_norm": 0.9231643676757812, "learning_rate": 9.448302069657799e-07, "loss": 0.04621110111474991, "step": 6616 }, { "epoch": 0.8945745211197391, "grad_norm": 0.40651726722717285, "learning_rate": 9.424409182625205e-07, "loss": 0.05630484223365784, "step": 6617 }, { "epoch": 0.8947097144885043, "grad_norm": 0.686684787273407, "learning_rate": 9.40054556418023e-07, "loss": 0.049347031861543655, "step": 6618 }, { "epoch": 0.8948449078572696, "grad_norm": 0.4266158640384674, "learning_rate": 9.376711219291483e-07, "loss": 0.05713128298521042, "step": 6619 }, { "epoch": 0.8949801012260349, "grad_norm": 0.2672691345214844, "learning_rate": 9.352906152921348e-07, "loss": 0.05233769863843918, "step": 6620 }, { "epoch": 0.8951152945948001, "grad_norm": 0.7209280729293823, "learning_rate": 9.32913037002614e-07, "loss": 0.04346020147204399, "step": 6621 }, { "epoch": 0.8952504879635654, "grad_norm": 0.788275420665741, "learning_rate": 9.30538387555615e-07, "loss": 0.04618893563747406, "step": 6622 }, { "epoch": 0.8953856813323307, "grad_norm": 0.6408319473266602, "learning_rate": 9.281666674455508e-07, "loss": 0.04850313067436218, "step": 6623 }, { "epoch": 0.8955208747010959, "grad_norm": 0.6206988096237183, "learning_rate": 9.257978771662295e-07, "loss": 0.054072365164756775, "step": 6624 }, { "epoch": 0.8956560680698612, "grad_norm": 0.3479498028755188, "learning_rate": 9.234320172108418e-07, "loss": 0.06309705972671509, "step": 6625 }, { "epoch": 0.8957912614386264, "grad_norm": 1.3672723770141602, "learning_rate": 9.210690880719719e-07, "loss": 0.05391441285610199, "step": 6626 }, { "epoch": 0.8959264548073917, "grad_norm": 0.3593200445175171, "learning_rate": 9.187090902415962e-07, "loss": 0.04264727234840393, "step": 6627 }, { "epoch": 0.896061648176157, "grad_norm": 0.5666792988777161, "learning_rate": 9.163520242110784e-07, "loss": 0.04747620224952698, "step": 6628 }, { "epoch": 0.8961968415449222, "grad_norm": 0.4805433452129364, "learning_rate": 9.13997890471176e-07, "loss": 0.04367192089557648, "step": 6629 }, { "epoch": 0.8963320349136875, "grad_norm": 0.607454240322113, "learning_rate": 9.116466895120251e-07, "loss": 0.0377173125743866, "step": 6630 }, { "epoch": 0.8964672282824527, "grad_norm": 0.8191885352134705, "learning_rate": 9.092984218231609e-07, "loss": 0.052013371139764786, "step": 6631 }, { "epoch": 0.8966024216512181, "grad_norm": 0.7560107707977295, "learning_rate": 9.069530878935072e-07, "loss": 0.050231315195560455, "step": 6632 }, { "epoch": 0.8967376150199833, "grad_norm": 0.40043050050735474, "learning_rate": 9.046106882113753e-07, "loss": 0.04714782536029816, "step": 6633 }, { "epoch": 0.8968728083887485, "grad_norm": 0.46625396609306335, "learning_rate": 9.022712232644631e-07, "loss": 0.04685495048761368, "step": 6634 }, { "epoch": 0.8970080017575138, "grad_norm": 0.6381518840789795, "learning_rate": 8.999346935398611e-07, "loss": 0.06336129456758499, "step": 6635 }, { "epoch": 0.897143195126279, "grad_norm": 0.6549854278564453, "learning_rate": 8.976010995240436e-07, "loss": 0.06682717800140381, "step": 6636 }, { "epoch": 0.8972783884950443, "grad_norm": 0.9094131588935852, "learning_rate": 8.952704417028818e-07, "loss": 0.054925426840782166, "step": 6637 }, { "epoch": 0.8974135818638096, "grad_norm": 0.8842827081680298, "learning_rate": 8.929427205616308e-07, "loss": 0.061176784336566925, "step": 6638 }, { "epoch": 0.8975487752325748, "grad_norm": 0.6502466797828674, "learning_rate": 8.906179365849332e-07, "loss": 0.04725296422839165, "step": 6639 }, { "epoch": 0.8976839686013401, "grad_norm": 0.5638745427131653, "learning_rate": 8.882960902568216e-07, "loss": 0.04483944922685623, "step": 6640 }, { "epoch": 0.8978191619701054, "grad_norm": 0.5545969605445862, "learning_rate": 8.85977182060716e-07, "loss": 0.04482412338256836, "step": 6641 }, { "epoch": 0.8979543553388706, "grad_norm": 0.5826829671859741, "learning_rate": 8.836612124794285e-07, "loss": 0.061395928263664246, "step": 6642 }, { "epoch": 0.8980895487076359, "grad_norm": 0.9112701416015625, "learning_rate": 8.813481819951502e-07, "loss": 0.05944044142961502, "step": 6643 }, { "epoch": 0.8982247420764011, "grad_norm": 0.33140549063682556, "learning_rate": 8.790380910894724e-07, "loss": 0.04346524924039841, "step": 6644 }, { "epoch": 0.8983599354451665, "grad_norm": 0.8436567187309265, "learning_rate": 8.767309402433671e-07, "loss": 0.052305594086647034, "step": 6645 }, { "epoch": 0.8984951288139317, "grad_norm": 0.32180801033973694, "learning_rate": 8.744267299371917e-07, "loss": 0.04961639642715454, "step": 6646 }, { "epoch": 0.8986303221826969, "grad_norm": 0.40363961458206177, "learning_rate": 8.721254606507023e-07, "loss": 0.052960917353630066, "step": 6647 }, { "epoch": 0.8987655155514622, "grad_norm": 0.8495690822601318, "learning_rate": 8.698271328630275e-07, "loss": 0.07176923006772995, "step": 6648 }, { "epoch": 0.8989007089202274, "grad_norm": 0.30365681648254395, "learning_rate": 8.675317470526961e-07, "loss": 0.04673558101058006, "step": 6649 }, { "epoch": 0.8990359022889928, "grad_norm": 0.7263216972351074, "learning_rate": 8.652393036976159e-07, "loss": 0.04724418371915817, "step": 6650 }, { "epoch": 0.899171095657758, "grad_norm": 0.47684288024902344, "learning_rate": 8.629498032750916e-07, "loss": 0.03681980073451996, "step": 6651 }, { "epoch": 0.8993062890265232, "grad_norm": 1.1259374618530273, "learning_rate": 8.606632462618069e-07, "loss": 0.07840201258659363, "step": 6652 }, { "epoch": 0.8994414823952885, "grad_norm": 0.8104427456855774, "learning_rate": 8.583796331338311e-07, "loss": 0.058555424213409424, "step": 6653 }, { "epoch": 0.8995766757640538, "grad_norm": 0.6018322706222534, "learning_rate": 8.560989643666306e-07, "loss": 0.05421757698059082, "step": 6654 }, { "epoch": 0.8997118691328191, "grad_norm": 0.478898823261261, "learning_rate": 8.538212404350471e-07, "loss": 0.048683859407901764, "step": 6655 }, { "epoch": 0.8998470625015843, "grad_norm": 0.6437026262283325, "learning_rate": 8.515464618133228e-07, "loss": 0.048388078808784485, "step": 6656 }, { "epoch": 0.8999822558703495, "grad_norm": 0.513083815574646, "learning_rate": 8.492746289750725e-07, "loss": 0.028460489585995674, "step": 6657 }, { "epoch": 0.9001174492391149, "grad_norm": 0.2022758424282074, "learning_rate": 8.470057423933026e-07, "loss": 0.039095520973205566, "step": 6658 }, { "epoch": 0.9002526426078801, "grad_norm": 0.21069441735744476, "learning_rate": 8.447398025404118e-07, "loss": 0.048804871737957, "step": 6659 }, { "epoch": 0.9003878359766454, "grad_norm": 0.31124478578567505, "learning_rate": 8.42476809888178e-07, "loss": 0.04619381204247475, "step": 6660 }, { "epoch": 0.9005230293454106, "grad_norm": 0.6880770921707153, "learning_rate": 8.402167649077725e-07, "loss": 0.05387970805168152, "step": 6661 }, { "epoch": 0.9006582227141758, "grad_norm": 0.9850658178329468, "learning_rate": 8.379596680697454e-07, "loss": 0.058292463421821594, "step": 6662 }, { "epoch": 0.9007934160829412, "grad_norm": 0.7874597311019897, "learning_rate": 8.357055198440328e-07, "loss": 0.0476127564907074, "step": 6663 }, { "epoch": 0.9009286094517064, "grad_norm": 0.9424549341201782, "learning_rate": 8.334543206999673e-07, "loss": 0.06544631719589233, "step": 6664 }, { "epoch": 0.9010638028204716, "grad_norm": 0.8563365340232849, "learning_rate": 8.312060711062558e-07, "loss": 0.07613970339298248, "step": 6665 }, { "epoch": 0.9011989961892369, "grad_norm": 0.9180619120597839, "learning_rate": 8.289607715309988e-07, "loss": 0.058623701333999634, "step": 6666 }, { "epoch": 0.9013341895580022, "grad_norm": 0.31008583307266235, "learning_rate": 8.267184224416791e-07, "loss": 0.06269200891256332, "step": 6667 }, { "epoch": 0.9014693829267675, "grad_norm": 0.631394624710083, "learning_rate": 8.244790243051614e-07, "loss": 0.05464514344930649, "step": 6668 }, { "epoch": 0.9016045762955327, "grad_norm": 0.3147026300430298, "learning_rate": 8.222425775877079e-07, "loss": 0.03973004221916199, "step": 6669 }, { "epoch": 0.9017397696642979, "grad_norm": 1.0921937227249146, "learning_rate": 8.200090827549527e-07, "loss": 0.06183578073978424, "step": 6670 }, { "epoch": 0.9018749630330632, "grad_norm": 0.43494585156440735, "learning_rate": 8.17778540271924e-07, "loss": 0.03694123029708862, "step": 6671 }, { "epoch": 0.9020101564018285, "grad_norm": 0.34973108768463135, "learning_rate": 8.155509506030334e-07, "loss": 0.04598543792963028, "step": 6672 }, { "epoch": 0.9021453497705938, "grad_norm": 0.7542912364006042, "learning_rate": 8.133263142120717e-07, "loss": 0.05929616093635559, "step": 6673 }, { "epoch": 0.902280543139359, "grad_norm": 0.28905144333839417, "learning_rate": 8.111046315622284e-07, "loss": 0.05378742143511772, "step": 6674 }, { "epoch": 0.9024157365081242, "grad_norm": 0.5028477907180786, "learning_rate": 8.088859031160633e-07, "loss": 0.06173387169837952, "step": 6675 }, { "epoch": 0.9025509298768896, "grad_norm": 0.5621204376220703, "learning_rate": 8.066701293355288e-07, "loss": 0.04248432815074921, "step": 6676 }, { "epoch": 0.9026861232456548, "grad_norm": 0.40853118896484375, "learning_rate": 8.044573106819625e-07, "loss": 0.06189492344856262, "step": 6677 }, { "epoch": 0.9028213166144201, "grad_norm": 1.013044834136963, "learning_rate": 8.022474476160824e-07, "loss": 0.05431247875094414, "step": 6678 }, { "epoch": 0.9029565099831853, "grad_norm": 0.3616791069507599, "learning_rate": 8.000405405979988e-07, "loss": 0.040154121816158295, "step": 6679 }, { "epoch": 0.9030917033519505, "grad_norm": 0.3997938334941864, "learning_rate": 7.978365900871943e-07, "loss": 0.042385224252939224, "step": 6680 }, { "epoch": 0.9032268967207159, "grad_norm": 0.6154190301895142, "learning_rate": 7.956355965425482e-07, "loss": 0.04839048534631729, "step": 6681 }, { "epoch": 0.9033620900894811, "grad_norm": 0.43059805035591125, "learning_rate": 7.934375604223193e-07, "loss": 0.05832874774932861, "step": 6682 }, { "epoch": 0.9034972834582464, "grad_norm": 0.7557125091552734, "learning_rate": 7.912424821841463e-07, "loss": 0.050268713384866714, "step": 6683 }, { "epoch": 0.9036324768270116, "grad_norm": 0.39958083629608154, "learning_rate": 7.89050362285062e-07, "loss": 0.046776190400123596, "step": 6684 }, { "epoch": 0.9037676701957769, "grad_norm": 0.45176929235458374, "learning_rate": 7.868612011814713e-07, "loss": 0.06302172690629959, "step": 6685 }, { "epoch": 0.9039028635645422, "grad_norm": 0.8538007140159607, "learning_rate": 7.846749993291746e-07, "loss": 0.03994462639093399, "step": 6686 }, { "epoch": 0.9040380569333074, "grad_norm": 0.4270612299442291, "learning_rate": 7.824917571833445e-07, "loss": 0.0590907484292984, "step": 6687 }, { "epoch": 0.9041732503020726, "grad_norm": 0.5902412533760071, "learning_rate": 7.80311475198554e-07, "loss": 0.048745639622211456, "step": 6688 }, { "epoch": 0.904308443670838, "grad_norm": 0.642102837562561, "learning_rate": 7.781341538287384e-07, "loss": 0.04547517001628876, "step": 6689 }, { "epoch": 0.9044436370396032, "grad_norm": 0.8880202174186707, "learning_rate": 7.759597935272316e-07, "loss": 0.052772752940654755, "step": 6690 }, { "epoch": 0.9045788304083685, "grad_norm": 0.4674089848995209, "learning_rate": 7.7378839474675e-07, "loss": 0.05677032470703125, "step": 6691 }, { "epoch": 0.9047140237771337, "grad_norm": 0.9701254963874817, "learning_rate": 7.716199579393851e-07, "loss": 0.06401762366294861, "step": 6692 }, { "epoch": 0.9048492171458989, "grad_norm": 0.5477458834648132, "learning_rate": 7.694544835566259e-07, "loss": 0.05173102766275406, "step": 6693 }, { "epoch": 0.9049844105146643, "grad_norm": 0.9523248076438904, "learning_rate": 7.672919720493249e-07, "loss": 0.06883459538221359, "step": 6694 }, { "epoch": 0.9051196038834295, "grad_norm": 0.44439083337783813, "learning_rate": 7.651324238677338e-07, "loss": 0.0566340833902359, "step": 6695 }, { "epoch": 0.9052547972521948, "grad_norm": 0.6270071268081665, "learning_rate": 7.629758394614828e-07, "loss": 0.0413985475897789, "step": 6696 }, { "epoch": 0.90538999062096, "grad_norm": 0.5257025361061096, "learning_rate": 7.608222192795794e-07, "loss": 0.051652226597070694, "step": 6697 }, { "epoch": 0.9055251839897253, "grad_norm": 0.5392767786979675, "learning_rate": 7.586715637704284e-07, "loss": 0.051113277673721313, "step": 6698 }, { "epoch": 0.9056603773584906, "grad_norm": 0.5225648283958435, "learning_rate": 7.565238733817998e-07, "loss": 0.05162392556667328, "step": 6699 }, { "epoch": 0.9057955707272558, "grad_norm": 0.845072865486145, "learning_rate": 7.543791485608542e-07, "loss": 0.05560246855020523, "step": 6700 }, { "epoch": 0.9059307640960211, "grad_norm": 0.5046133995056152, "learning_rate": 7.52237389754138e-07, "loss": 0.0519113764166832, "step": 6701 }, { "epoch": 0.9060659574647864, "grad_norm": 0.6372543573379517, "learning_rate": 7.500985974075758e-07, "loss": 0.06785418838262558, "step": 6702 }, { "epoch": 0.9062011508335516, "grad_norm": 0.48460158705711365, "learning_rate": 7.479627719664767e-07, "loss": 0.05035112053155899, "step": 6703 }, { "epoch": 0.9063363442023169, "grad_norm": 0.39503371715545654, "learning_rate": 7.458299138755281e-07, "loss": 0.05464310199022293, "step": 6704 }, { "epoch": 0.9064715375710821, "grad_norm": 0.5971314907073975, "learning_rate": 7.437000235788033e-07, "loss": 0.0529952310025692, "step": 6705 }, { "epoch": 0.9066067309398474, "grad_norm": 0.768519401550293, "learning_rate": 7.415731015197575e-07, "loss": 0.045520246028900146, "step": 6706 }, { "epoch": 0.9067419243086127, "grad_norm": 0.21944670379161835, "learning_rate": 7.39449148141228e-07, "loss": 0.03883674368262291, "step": 6707 }, { "epoch": 0.9068771176773779, "grad_norm": 0.5327578186988831, "learning_rate": 7.373281638854329e-07, "loss": 0.04125240445137024, "step": 6708 }, { "epoch": 0.9070123110461432, "grad_norm": 0.4574945867061615, "learning_rate": 7.352101491939722e-07, "loss": 0.05063946545124054, "step": 6709 }, { "epoch": 0.9071475044149084, "grad_norm": 0.954848051071167, "learning_rate": 7.330951045078249e-07, "loss": 0.07672044634819031, "step": 6710 }, { "epoch": 0.9072826977836738, "grad_norm": 0.28551214933395386, "learning_rate": 7.309830302673621e-07, "loss": 0.04225584492087364, "step": 6711 }, { "epoch": 0.907417891152439, "grad_norm": 0.7695335149765015, "learning_rate": 7.288739269123184e-07, "loss": 0.06044097617268562, "step": 6712 }, { "epoch": 0.9075530845212042, "grad_norm": 0.5745910406112671, "learning_rate": 7.267677948818296e-07, "loss": 0.04879125952720642, "step": 6713 }, { "epoch": 0.9076882778899695, "grad_norm": 0.2723865807056427, "learning_rate": 7.246646346143997e-07, "loss": 0.03120660036802292, "step": 6714 }, { "epoch": 0.9078234712587347, "grad_norm": 0.568640947341919, "learning_rate": 7.225644465479153e-07, "loss": 0.04885981231927872, "step": 6715 }, { "epoch": 0.9079586646275, "grad_norm": 0.988162636756897, "learning_rate": 7.204672311196547e-07, "loss": 0.05720759928226471, "step": 6716 }, { "epoch": 0.9080938579962653, "grad_norm": 0.6753554940223694, "learning_rate": 7.183729887662604e-07, "loss": 0.044138528406620026, "step": 6717 }, { "epoch": 0.9082290513650305, "grad_norm": 0.2969352900981903, "learning_rate": 7.162817199237703e-07, "loss": 0.041479967534542084, "step": 6718 }, { "epoch": 0.9083642447337958, "grad_norm": 0.5062270164489746, "learning_rate": 7.141934250275978e-07, "loss": 0.07195169478654861, "step": 6719 }, { "epoch": 0.9084994381025611, "grad_norm": 1.2973712682724, "learning_rate": 7.121081045125316e-07, "loss": 0.05476060509681702, "step": 6720 }, { "epoch": 0.9086346314713263, "grad_norm": 0.7226802706718445, "learning_rate": 7.100257588127545e-07, "loss": 0.0396604910492897, "step": 6721 }, { "epoch": 0.9087698248400916, "grad_norm": 0.5772003531455994, "learning_rate": 7.079463883618148e-07, "loss": 0.032029759138822556, "step": 6722 }, { "epoch": 0.9089050182088568, "grad_norm": 0.5488506555557251, "learning_rate": 7.058699935926527e-07, "loss": 0.05793087184429169, "step": 6723 }, { "epoch": 0.9090402115776222, "grad_norm": 0.39535918831825256, "learning_rate": 7.037965749375808e-07, "loss": 0.04278813302516937, "step": 6724 }, { "epoch": 0.9091754049463874, "grad_norm": 0.7367969155311584, "learning_rate": 7.017261328283037e-07, "loss": 0.06994271278381348, "step": 6725 }, { "epoch": 0.9093105983151526, "grad_norm": 0.43338915705680847, "learning_rate": 6.996586676958916e-07, "loss": 0.039698973298072815, "step": 6726 }, { "epoch": 0.9094457916839179, "grad_norm": 0.3456937074661255, "learning_rate": 6.975941799708019e-07, "loss": 0.03696325793862343, "step": 6727 }, { "epoch": 0.9095809850526831, "grad_norm": 1.5028387308120728, "learning_rate": 6.955326700828757e-07, "loss": 0.07935422658920288, "step": 6728 }, { "epoch": 0.9097161784214485, "grad_norm": 0.29515862464904785, "learning_rate": 6.934741384613246e-07, "loss": 0.04717147350311279, "step": 6729 }, { "epoch": 0.9098513717902137, "grad_norm": 0.20951049029827118, "learning_rate": 6.91418585534756e-07, "loss": 0.041094474494457245, "step": 6730 }, { "epoch": 0.9099865651589789, "grad_norm": 0.29584041237831116, "learning_rate": 6.893660117311373e-07, "loss": 0.04478374123573303, "step": 6731 }, { "epoch": 0.9101217585277442, "grad_norm": 0.4299290180206299, "learning_rate": 6.873164174778252e-07, "loss": 0.05629752576351166, "step": 6732 }, { "epoch": 0.9102569518965095, "grad_norm": 1.201492190361023, "learning_rate": 6.852698032015631e-07, "loss": 0.0653962567448616, "step": 6733 }, { "epoch": 0.9103921452652748, "grad_norm": 0.7219903469085693, "learning_rate": 6.832261693284636e-07, "loss": 0.06231025606393814, "step": 6734 }, { "epoch": 0.91052733863404, "grad_norm": 1.453986406326294, "learning_rate": 6.811855162840214e-07, "loss": 0.06157355010509491, "step": 6735 }, { "epoch": 0.9106625320028052, "grad_norm": 0.48053452372550964, "learning_rate": 6.791478444931132e-07, "loss": 0.04321175813674927, "step": 6736 }, { "epoch": 0.9107977253715706, "grad_norm": 1.0145496129989624, "learning_rate": 6.77113154379988e-07, "loss": 0.049371764063835144, "step": 6737 }, { "epoch": 0.9109329187403358, "grad_norm": 0.3684634864330292, "learning_rate": 6.75081446368287e-07, "loss": 0.05167236179113388, "step": 6738 }, { "epoch": 0.9110681121091011, "grad_norm": 1.0186936855316162, "learning_rate": 6.730527208810166e-07, "loss": 0.05943696200847626, "step": 6739 }, { "epoch": 0.9112033054778663, "grad_norm": 1.1795024871826172, "learning_rate": 6.710269783405709e-07, "loss": 0.07245517522096634, "step": 6740 }, { "epoch": 0.9113384988466315, "grad_norm": 1.3816347122192383, "learning_rate": 6.690042191687206e-07, "loss": 0.07006622105836868, "step": 6741 }, { "epoch": 0.9114736922153969, "grad_norm": 0.36937466263771057, "learning_rate": 6.669844437866124e-07, "loss": 0.04919500648975372, "step": 6742 }, { "epoch": 0.9116088855841621, "grad_norm": 0.8501532077789307, "learning_rate": 6.649676526147764e-07, "loss": 0.06431706994771957, "step": 6743 }, { "epoch": 0.9117440789529273, "grad_norm": 0.3945702314376831, "learning_rate": 6.629538460731199e-07, "loss": 0.05623989552259445, "step": 6744 }, { "epoch": 0.9118792723216926, "grad_norm": 0.4328218698501587, "learning_rate": 6.609430245809261e-07, "loss": 0.04234835505485535, "step": 6745 }, { "epoch": 0.9120144656904579, "grad_norm": 0.42859089374542236, "learning_rate": 6.589351885568617e-07, "loss": 0.07000689953565598, "step": 6746 }, { "epoch": 0.9121496590592232, "grad_norm": 0.29595860838890076, "learning_rate": 6.569303384189624e-07, "loss": 0.030823007225990295, "step": 6747 }, { "epoch": 0.9122848524279884, "grad_norm": 0.6928752064704895, "learning_rate": 6.54928474584659e-07, "loss": 0.04525993764400482, "step": 6748 }, { "epoch": 0.9124200457967536, "grad_norm": 0.5369593501091003, "learning_rate": 6.5292959747074e-07, "loss": 0.045181408524513245, "step": 6749 }, { "epoch": 0.912555239165519, "grad_norm": 0.24417948722839355, "learning_rate": 6.509337074933891e-07, "loss": 0.04770684242248535, "step": 6750 }, { "epoch": 0.9126904325342842, "grad_norm": 0.6596533060073853, "learning_rate": 6.489408050681589e-07, "loss": 0.05600264295935631, "step": 6751 }, { "epoch": 0.9128256259030495, "grad_norm": 0.49990686774253845, "learning_rate": 6.469508906099792e-07, "loss": 0.05889815092086792, "step": 6752 }, { "epoch": 0.9129608192718147, "grad_norm": 0.7795887589454651, "learning_rate": 6.449639645331684e-07, "loss": 0.05389513075351715, "step": 6753 }, { "epoch": 0.9130960126405799, "grad_norm": 0.3981202244758606, "learning_rate": 6.429800272514058e-07, "loss": 0.04817391186952591, "step": 6754 }, { "epoch": 0.9132312060093453, "grad_norm": 0.2686627209186554, "learning_rate": 6.409990791777659e-07, "loss": 0.03644941747188568, "step": 6755 }, { "epoch": 0.9133663993781105, "grad_norm": 0.5846071243286133, "learning_rate": 6.390211207246888e-07, "loss": 0.06171203777194023, "step": 6756 }, { "epoch": 0.9135015927468758, "grad_norm": 0.24295461177825928, "learning_rate": 6.370461523039967e-07, "loss": 0.038843024522066116, "step": 6757 }, { "epoch": 0.913636786115641, "grad_norm": 0.4861011207103729, "learning_rate": 6.350741743268873e-07, "loss": 0.0579574853181839, "step": 6758 }, { "epoch": 0.9137719794844062, "grad_norm": 0.49936577677726746, "learning_rate": 6.331051872039373e-07, "loss": 0.07107189297676086, "step": 6759 }, { "epoch": 0.9139071728531716, "grad_norm": 0.5456699132919312, "learning_rate": 6.31139191345102e-07, "loss": 0.06423240900039673, "step": 6760 }, { "epoch": 0.9140423662219368, "grad_norm": 0.3858813941478729, "learning_rate": 6.291761871597091e-07, "loss": 0.05905080586671829, "step": 6761 }, { "epoch": 0.9141775595907021, "grad_norm": 0.8678537607192993, "learning_rate": 6.272161750564731e-07, "loss": 0.04089200496673584, "step": 6762 }, { "epoch": 0.9143127529594673, "grad_norm": 0.25096315145492554, "learning_rate": 6.252591554434728e-07, "loss": 0.03662240505218506, "step": 6763 }, { "epoch": 0.9144479463282326, "grad_norm": 0.9206839799880981, "learning_rate": 6.233051287281688e-07, "loss": 0.06119590997695923, "step": 6764 }, { "epoch": 0.9145831396969979, "grad_norm": 0.43097183108329773, "learning_rate": 6.213540953174057e-07, "loss": 0.06014223396778107, "step": 6765 }, { "epoch": 0.9147183330657631, "grad_norm": 0.441585510969162, "learning_rate": 6.194060556173953e-07, "loss": 0.044751644134521484, "step": 6766 }, { "epoch": 0.9148535264345284, "grad_norm": 0.6441513895988464, "learning_rate": 6.17461010033733e-07, "loss": 0.06827010214328766, "step": 6767 }, { "epoch": 0.9149887198032937, "grad_norm": 0.22242850065231323, "learning_rate": 6.155189589713833e-07, "loss": 0.04135541617870331, "step": 6768 }, { "epoch": 0.9151239131720589, "grad_norm": 0.4358827471733093, "learning_rate": 6.135799028346928e-07, "loss": 0.048567309975624084, "step": 6769 }, { "epoch": 0.9152591065408242, "grad_norm": 0.3070225417613983, "learning_rate": 6.116438420273868e-07, "loss": 0.052805133163928986, "step": 6770 }, { "epoch": 0.9153942999095894, "grad_norm": 0.6133350729942322, "learning_rate": 6.097107769525595e-07, "loss": 0.05564849451184273, "step": 6771 }, { "epoch": 0.9155294932783546, "grad_norm": 0.32048967480659485, "learning_rate": 6.077807080126873e-07, "loss": 0.05249929428100586, "step": 6772 }, { "epoch": 0.91566468664712, "grad_norm": 0.4419393241405487, "learning_rate": 6.058536356096206e-07, "loss": 0.05552881956100464, "step": 6773 }, { "epoch": 0.9157998800158852, "grad_norm": 0.4304952323436737, "learning_rate": 6.039295601445833e-07, "loss": 0.059729404747486115, "step": 6774 }, { "epoch": 0.9159350733846505, "grad_norm": 0.6956761479377747, "learning_rate": 6.020084820181831e-07, "loss": 0.051030222326517105, "step": 6775 }, { "epoch": 0.9160702667534157, "grad_norm": 0.27552148699760437, "learning_rate": 6.000904016303971e-07, "loss": 0.03568500652909279, "step": 6776 }, { "epoch": 0.916205460122181, "grad_norm": 0.6241278648376465, "learning_rate": 5.981753193805789e-07, "loss": 0.04082517698407173, "step": 6777 }, { "epoch": 0.9163406534909463, "grad_norm": 0.3620690703392029, "learning_rate": 5.962632356674597e-07, "loss": 0.04518449306488037, "step": 6778 }, { "epoch": 0.9164758468597115, "grad_norm": 0.4352056384086609, "learning_rate": 5.94354150889141e-07, "loss": 0.05573827028274536, "step": 6779 }, { "epoch": 0.9166110402284768, "grad_norm": 0.5349669456481934, "learning_rate": 5.924480654431147e-07, "loss": 0.0477016344666481, "step": 6780 }, { "epoch": 0.916746233597242, "grad_norm": 0.7191454172134399, "learning_rate": 5.905449797262252e-07, "loss": 0.05366852134466171, "step": 6781 }, { "epoch": 0.9168814269660073, "grad_norm": 0.9488183856010437, "learning_rate": 5.886448941347156e-07, "loss": 0.06195848435163498, "step": 6782 }, { "epoch": 0.9170166203347726, "grad_norm": 0.3767930865287781, "learning_rate": 5.867478090641892e-07, "loss": 0.04960257560014725, "step": 6783 }, { "epoch": 0.9171518137035378, "grad_norm": 0.26893851161003113, "learning_rate": 5.848537249096269e-07, "loss": 0.03197500482201576, "step": 6784 }, { "epoch": 0.9172870070723032, "grad_norm": 0.5486786365509033, "learning_rate": 5.829626420653949e-07, "loss": 0.04590778425335884, "step": 6785 }, { "epoch": 0.9174222004410684, "grad_norm": 0.43994349241256714, "learning_rate": 5.810745609252166e-07, "loss": 0.04933256655931473, "step": 6786 }, { "epoch": 0.9175573938098336, "grad_norm": 0.6018949747085571, "learning_rate": 5.791894818822091e-07, "loss": 0.06430856883525848, "step": 6787 }, { "epoch": 0.9176925871785989, "grad_norm": 0.3604245185852051, "learning_rate": 5.773074053288519e-07, "loss": 0.04646119475364685, "step": 6788 }, { "epoch": 0.9178277805473641, "grad_norm": 0.7268880009651184, "learning_rate": 5.75428331657003e-07, "loss": 0.04490852355957031, "step": 6789 }, { "epoch": 0.9179629739161295, "grad_norm": 0.5224449038505554, "learning_rate": 5.735522612578998e-07, "loss": 0.040009189397096634, "step": 6790 }, { "epoch": 0.9180981672848947, "grad_norm": 0.696461021900177, "learning_rate": 5.716791945221444e-07, "loss": 0.042265623807907104, "step": 6791 }, { "epoch": 0.9182333606536599, "grad_norm": 0.45371294021606445, "learning_rate": 5.698091318397219e-07, "loss": 0.035211462527513504, "step": 6792 }, { "epoch": 0.9183685540224252, "grad_norm": 0.6046249866485596, "learning_rate": 5.679420735999908e-07, "loss": 0.04693195968866348, "step": 6793 }, { "epoch": 0.9185037473911905, "grad_norm": 0.25992992520332336, "learning_rate": 5.660780201916799e-07, "loss": 0.041408687829971313, "step": 6794 }, { "epoch": 0.9186389407599557, "grad_norm": 1.2105790376663208, "learning_rate": 5.642169720028973e-07, "loss": 0.062398843467235565, "step": 6795 }, { "epoch": 0.918774134128721, "grad_norm": 0.7734612226486206, "learning_rate": 5.623589294211196e-07, "loss": 0.05671662092208862, "step": 6796 }, { "epoch": 0.9189093274974862, "grad_norm": 0.7736897468566895, "learning_rate": 5.605038928332057e-07, "loss": 0.06395828723907471, "step": 6797 }, { "epoch": 0.9190445208662515, "grad_norm": 1.4590506553649902, "learning_rate": 5.586518626253817e-07, "loss": 0.06466203927993774, "step": 6798 }, { "epoch": 0.9191797142350168, "grad_norm": 0.279774010181427, "learning_rate": 5.568028391832524e-07, "loss": 0.03793752193450928, "step": 6799 }, { "epoch": 0.919314907603782, "grad_norm": 0.6834036111831665, "learning_rate": 5.549568228917917e-07, "loss": 0.05344633385539055, "step": 6800 }, { "epoch": 0.9194501009725473, "grad_norm": 0.437775582075119, "learning_rate": 5.531138141353486e-07, "loss": 0.04165481775999069, "step": 6801 }, { "epoch": 0.9195852943413125, "grad_norm": 0.5745537877082825, "learning_rate": 5.512738132976514e-07, "loss": 0.037696562707424164, "step": 6802 }, { "epoch": 0.9197204877100779, "grad_norm": 0.3792494535446167, "learning_rate": 5.49436820761795e-07, "loss": 0.04074833169579506, "step": 6803 }, { "epoch": 0.9198556810788431, "grad_norm": 0.5810791850090027, "learning_rate": 5.476028369102537e-07, "loss": 0.06212163716554642, "step": 6804 }, { "epoch": 0.9199908744476083, "grad_norm": 1.3440566062927246, "learning_rate": 5.45771862124872e-07, "loss": 0.06217864900827408, "step": 6805 }, { "epoch": 0.9201260678163736, "grad_norm": 0.669025182723999, "learning_rate": 5.439438967868649e-07, "loss": 0.05776963010430336, "step": 6806 }, { "epoch": 0.9202612611851388, "grad_norm": 0.3603413999080658, "learning_rate": 5.421189412768296e-07, "loss": 0.06129508838057518, "step": 6807 }, { "epoch": 0.9203964545539042, "grad_norm": 0.5741078853607178, "learning_rate": 5.402969959747306e-07, "loss": 0.046145543456077576, "step": 6808 }, { "epoch": 0.9205316479226694, "grad_norm": 0.3716714680194855, "learning_rate": 5.384780612599044e-07, "loss": 0.041075631976127625, "step": 6809 }, { "epoch": 0.9206668412914346, "grad_norm": 0.6136707067489624, "learning_rate": 5.366621375110647e-07, "loss": 0.06171710789203644, "step": 6810 }, { "epoch": 0.9208020346601999, "grad_norm": 0.4962771236896515, "learning_rate": 5.348492251062942e-07, "loss": 0.04525885730981827, "step": 6811 }, { "epoch": 0.9209372280289652, "grad_norm": 0.3042999804019928, "learning_rate": 5.330393244230558e-07, "loss": 0.04088454693555832, "step": 6812 }, { "epoch": 0.9210724213977305, "grad_norm": 0.23977521061897278, "learning_rate": 5.312324358381731e-07, "loss": 0.032902006059885025, "step": 6813 }, { "epoch": 0.9212076147664957, "grad_norm": 0.41756364703178406, "learning_rate": 5.29428559727857e-07, "loss": 0.05185117572546005, "step": 6814 }, { "epoch": 0.9213428081352609, "grad_norm": 0.6165286302566528, "learning_rate": 5.276276964676802e-07, "loss": 0.06036829203367233, "step": 6815 }, { "epoch": 0.9214780015040263, "grad_norm": 0.5815603733062744, "learning_rate": 5.258298464325928e-07, "loss": 0.05971301719546318, "step": 6816 }, { "epoch": 0.9216131948727915, "grad_norm": 0.6722524166107178, "learning_rate": 5.240350099969204e-07, "loss": 0.031716879457235336, "step": 6817 }, { "epoch": 0.9217483882415568, "grad_norm": 0.3034622073173523, "learning_rate": 5.222431875343492e-07, "loss": 0.03890479356050491, "step": 6818 }, { "epoch": 0.921883581610322, "grad_norm": 0.39960381388664246, "learning_rate": 5.204543794179539e-07, "loss": 0.042162250727415085, "step": 6819 }, { "epoch": 0.9220187749790872, "grad_norm": 0.3323359191417694, "learning_rate": 5.186685860201717e-07, "loss": 0.04835645854473114, "step": 6820 }, { "epoch": 0.9221539683478526, "grad_norm": 0.5833526849746704, "learning_rate": 5.16885807712812e-07, "loss": 0.05948370322585106, "step": 6821 }, { "epoch": 0.9222891617166178, "grad_norm": 0.2958061397075653, "learning_rate": 5.151060448670625e-07, "loss": 0.03802281990647316, "step": 6822 }, { "epoch": 0.922424355085383, "grad_norm": 0.5264478325843811, "learning_rate": 5.133292978534754e-07, "loss": 0.06669776141643524, "step": 6823 }, { "epoch": 0.9225595484541483, "grad_norm": 0.37217971682548523, "learning_rate": 5.115555670419814e-07, "loss": 0.04286617413163185, "step": 6824 }, { "epoch": 0.9226947418229136, "grad_norm": 0.4074537456035614, "learning_rate": 5.097848528018817e-07, "loss": 0.03774375468492508, "step": 6825 }, { "epoch": 0.9228299351916789, "grad_norm": 0.890584409236908, "learning_rate": 5.080171555018448e-07, "loss": 0.045653946697711945, "step": 6826 }, { "epoch": 0.9229651285604441, "grad_norm": 0.4125596880912781, "learning_rate": 5.06252475509918e-07, "loss": 0.07558906078338623, "step": 6827 }, { "epoch": 0.9231003219292093, "grad_norm": 0.22560857236385345, "learning_rate": 5.044908131935139e-07, "loss": 0.0393093004822731, "step": 6828 }, { "epoch": 0.9232355152979747, "grad_norm": 0.37901556491851807, "learning_rate": 5.027321689194242e-07, "loss": 0.04411853104829788, "step": 6829 }, { "epoch": 0.9233707086667399, "grad_norm": 0.8121209144592285, "learning_rate": 5.009765430538061e-07, "loss": 0.06160545349121094, "step": 6830 }, { "epoch": 0.9235059020355052, "grad_norm": 0.38927438855171204, "learning_rate": 4.992239359621886e-07, "loss": 0.04794158786535263, "step": 6831 }, { "epoch": 0.9236410954042704, "grad_norm": 0.718383252620697, "learning_rate": 4.974743480094767e-07, "loss": 0.05739040672779083, "step": 6832 }, { "epoch": 0.9237762887730356, "grad_norm": 0.5825828313827515, "learning_rate": 4.957277795599407e-07, "loss": 0.054212428629398346, "step": 6833 }, { "epoch": 0.923911482141801, "grad_norm": 0.9849809408187866, "learning_rate": 4.93984230977228e-07, "loss": 0.052510667592287064, "step": 6834 }, { "epoch": 0.9240466755105662, "grad_norm": 0.5457069277763367, "learning_rate": 4.922437026243531e-07, "loss": 0.05190357193350792, "step": 6835 }, { "epoch": 0.9241818688793315, "grad_norm": 0.33216261863708496, "learning_rate": 4.905061948637063e-07, "loss": 0.048580102622509, "step": 6836 }, { "epoch": 0.9243170622480967, "grad_norm": 0.28173625469207764, "learning_rate": 4.887717080570431e-07, "loss": 0.04640397056937218, "step": 6837 }, { "epoch": 0.924452255616862, "grad_norm": 0.5957367420196533, "learning_rate": 4.870402425654913e-07, "loss": 0.044471945613622665, "step": 6838 }, { "epoch": 0.9245874489856273, "grad_norm": 0.29059484601020813, "learning_rate": 4.853117987495542e-07, "loss": 0.04338253289461136, "step": 6839 }, { "epoch": 0.9247226423543925, "grad_norm": 0.333125501871109, "learning_rate": 4.83586376969104e-07, "loss": 0.04301752895116806, "step": 6840 }, { "epoch": 0.9248578357231578, "grad_norm": 0.4836156368255615, "learning_rate": 4.818639775833816e-07, "loss": 0.06459888815879822, "step": 6841 }, { "epoch": 0.924993029091923, "grad_norm": 0.762301504611969, "learning_rate": 4.801446009509969e-07, "loss": 0.0763884037733078, "step": 6842 }, { "epoch": 0.9251282224606883, "grad_norm": 1.4248297214508057, "learning_rate": 4.784282474299367e-07, "loss": 0.07100367546081543, "step": 6843 }, { "epoch": 0.9252634158294536, "grad_norm": 0.7939831018447876, "learning_rate": 4.767149173775537e-07, "loss": 0.04749560356140137, "step": 6844 }, { "epoch": 0.9253986091982188, "grad_norm": 0.36788618564605713, "learning_rate": 4.750046111505724e-07, "loss": 0.04334796592593193, "step": 6845 }, { "epoch": 0.9255338025669841, "grad_norm": 0.8742966055870056, "learning_rate": 4.732973291050896e-07, "loss": 0.05864344537258148, "step": 6846 }, { "epoch": 0.9256689959357494, "grad_norm": 0.6088741421699524, "learning_rate": 4.7159307159656607e-07, "loss": 0.06100534647703171, "step": 6847 }, { "epoch": 0.9258041893045146, "grad_norm": 0.34826967120170593, "learning_rate": 4.6989183897983954e-07, "loss": 0.045671992003917694, "step": 6848 }, { "epoch": 0.9259393826732799, "grad_norm": 1.2595014572143555, "learning_rate": 4.681936316091201e-07, "loss": 0.07075631618499756, "step": 6849 }, { "epoch": 0.9260745760420451, "grad_norm": 0.4086087942123413, "learning_rate": 4.664984498379765e-07, "loss": 0.05260862782597542, "step": 6850 }, { "epoch": 0.9262097694108103, "grad_norm": 0.4885626435279846, "learning_rate": 4.6480629401935814e-07, "loss": 0.036779481917619705, "step": 6851 }, { "epoch": 0.9263449627795757, "grad_norm": 0.5012714862823486, "learning_rate": 4.631171645055815e-07, "loss": 0.06439995765686035, "step": 6852 }, { "epoch": 0.9264801561483409, "grad_norm": 0.4285504221916199, "learning_rate": 4.614310616483286e-07, "loss": 0.04551964998245239, "step": 6853 }, { "epoch": 0.9266153495171062, "grad_norm": 1.070753812789917, "learning_rate": 4.5974798579866193e-07, "loss": 0.05979108065366745, "step": 6854 }, { "epoch": 0.9267505428858714, "grad_norm": 0.7485949993133545, "learning_rate": 4.580679373069996e-07, "loss": 0.055185697972774506, "step": 6855 }, { "epoch": 0.9268857362546367, "grad_norm": 0.34306201338768005, "learning_rate": 4.5639091652314e-07, "loss": 0.051580943167209625, "step": 6856 }, { "epoch": 0.927020929623402, "grad_norm": 0.29891863465309143, "learning_rate": 4.54716923796249e-07, "loss": 0.03212370723485947, "step": 6857 }, { "epoch": 0.9271561229921672, "grad_norm": 0.823025107383728, "learning_rate": 4.5304595947485927e-07, "loss": 0.05347749590873718, "step": 6858 }, { "epoch": 0.9272913163609325, "grad_norm": 0.6613633632659912, "learning_rate": 4.5137802390687433e-07, "loss": 0.05161743983626366, "step": 6859 }, { "epoch": 0.9274265097296978, "grad_norm": 0.6856628656387329, "learning_rate": 4.497131174395663e-07, "loss": 0.06462137401103973, "step": 6860 }, { "epoch": 0.927561703098463, "grad_norm": 0.35582128167152405, "learning_rate": 4.4805124041957967e-07, "loss": 0.051616791635751724, "step": 6861 }, { "epoch": 0.9276968964672283, "grad_norm": 1.0053256750106812, "learning_rate": 4.463923931929259e-07, "loss": 0.04513093829154968, "step": 6862 }, { "epoch": 0.9278320898359935, "grad_norm": 0.2146720439195633, "learning_rate": 4.4473657610498377e-07, "loss": 0.03712264075875282, "step": 6863 }, { "epoch": 0.9279672832047589, "grad_norm": 0.38348299264907837, "learning_rate": 4.430837895005058e-07, "loss": 0.03526635468006134, "step": 6864 }, { "epoch": 0.9281024765735241, "grad_norm": 0.5687447190284729, "learning_rate": 4.4143403372360836e-07, "loss": 0.06411556899547577, "step": 6865 }, { "epoch": 0.9282376699422893, "grad_norm": 0.8411898612976074, "learning_rate": 4.3978730911778176e-07, "loss": 0.041140396147966385, "step": 6866 }, { "epoch": 0.9283728633110546, "grad_norm": 0.3413739502429962, "learning_rate": 4.381436160258834e-07, "loss": 0.053307853639125824, "step": 6867 }, { "epoch": 0.9285080566798198, "grad_norm": 0.593433678150177, "learning_rate": 4.3650295479013615e-07, "loss": 0.04913664609193802, "step": 6868 }, { "epoch": 0.9286432500485852, "grad_norm": 0.9128335118293762, "learning_rate": 4.348653257521351e-07, "loss": 0.086134172976017, "step": 6869 }, { "epoch": 0.9287784434173504, "grad_norm": 1.4322556257247925, "learning_rate": 4.332307292528442e-07, "loss": 0.0616513267159462, "step": 6870 }, { "epoch": 0.9289136367861156, "grad_norm": 0.6071394681930542, "learning_rate": 4.315991656325946e-07, "loss": 0.04312963783740997, "step": 6871 }, { "epoch": 0.9290488301548809, "grad_norm": 0.6859742999076843, "learning_rate": 4.299706352310895e-07, "loss": 0.060514338314533234, "step": 6872 }, { "epoch": 0.9291840235236462, "grad_norm": 0.36952733993530273, "learning_rate": 4.283451383873926e-07, "loss": 0.05173899978399277, "step": 6873 }, { "epoch": 0.9293192168924115, "grad_norm": 1.0553873777389526, "learning_rate": 4.26722675439945e-07, "loss": 0.06947000324726105, "step": 6874 }, { "epoch": 0.9294544102611767, "grad_norm": 0.5726354122161865, "learning_rate": 4.251032467265481e-07, "loss": 0.0537940114736557, "step": 6875 }, { "epoch": 0.9295896036299419, "grad_norm": 0.43401291966438293, "learning_rate": 4.234868525843805e-07, "loss": 0.05457330867648125, "step": 6876 }, { "epoch": 0.9297247969987072, "grad_norm": 0.5494489669799805, "learning_rate": 4.218734933499796e-07, "loss": 0.04692947864532471, "step": 6877 }, { "epoch": 0.9298599903674725, "grad_norm": 0.41966408491134644, "learning_rate": 4.202631693592601e-07, "loss": 0.036715637892484665, "step": 6878 }, { "epoch": 0.9299951837362377, "grad_norm": 0.32652556896209717, "learning_rate": 4.186558809474955e-07, "loss": 0.037756599485874176, "step": 6879 }, { "epoch": 0.930130377105003, "grad_norm": 1.1343594789505005, "learning_rate": 4.170516284493331e-07, "loss": 0.06886908411979675, "step": 6880 }, { "epoch": 0.9302655704737682, "grad_norm": 0.4337814152240753, "learning_rate": 4.1545041219879063e-07, "loss": 0.05352926254272461, "step": 6881 }, { "epoch": 0.9304007638425336, "grad_norm": 0.30627673864364624, "learning_rate": 4.138522325292432e-07, "loss": 0.04135363921523094, "step": 6882 }, { "epoch": 0.9305359572112988, "grad_norm": 0.41036850214004517, "learning_rate": 4.1225708977344457e-07, "loss": 0.04702620208263397, "step": 6883 }, { "epoch": 0.930671150580064, "grad_norm": 0.6200788617134094, "learning_rate": 4.106649842635124e-07, "loss": 0.0588408038020134, "step": 6884 }, { "epoch": 0.9308063439488293, "grad_norm": 0.3638732135295868, "learning_rate": 4.090759163309282e-07, "loss": 0.04563349857926369, "step": 6885 }, { "epoch": 0.9309415373175945, "grad_norm": 0.40585437417030334, "learning_rate": 4.07489886306549e-07, "loss": 0.06302759051322937, "step": 6886 }, { "epoch": 0.9310767306863599, "grad_norm": 1.2502042055130005, "learning_rate": 4.059068945205907e-07, "loss": 0.054131630808115005, "step": 6887 }, { "epoch": 0.9312119240551251, "grad_norm": 0.4585094451904297, "learning_rate": 4.043269413026429e-07, "loss": 0.04849252104759216, "step": 6888 }, { "epoch": 0.9313471174238903, "grad_norm": 0.3046894669532776, "learning_rate": 4.027500269816592e-07, "loss": 0.04442837834358215, "step": 6889 }, { "epoch": 0.9314823107926556, "grad_norm": 0.410313218832016, "learning_rate": 4.011761518859619e-07, "loss": 0.06582614034414291, "step": 6890 }, { "epoch": 0.9316175041614209, "grad_norm": 0.38509640097618103, "learning_rate": 3.996053163432406e-07, "loss": 0.052085891366004944, "step": 6891 }, { "epoch": 0.9317526975301862, "grad_norm": 0.6463513374328613, "learning_rate": 3.980375206805503e-07, "loss": 0.05951409786939621, "step": 6892 }, { "epoch": 0.9318878908989514, "grad_norm": 0.6272176504135132, "learning_rate": 3.9647276522431664e-07, "loss": 0.053722187876701355, "step": 6893 }, { "epoch": 0.9320230842677166, "grad_norm": 0.36715465784072876, "learning_rate": 3.949110503003289e-07, "loss": 0.04019009321928024, "step": 6894 }, { "epoch": 0.932158277636482, "grad_norm": 0.7486395239830017, "learning_rate": 3.9335237623374377e-07, "loss": 0.04625747725367546, "step": 6895 }, { "epoch": 0.9322934710052472, "grad_norm": 0.3732355535030365, "learning_rate": 3.917967433490849e-07, "loss": 0.04526018351316452, "step": 6896 }, { "epoch": 0.9324286643740125, "grad_norm": 0.4516226351261139, "learning_rate": 3.902441519702449e-07, "loss": 0.06396698951721191, "step": 6897 }, { "epoch": 0.9325638577427777, "grad_norm": 0.6238554120063782, "learning_rate": 3.886946024204818e-07, "loss": 0.049856893718242645, "step": 6898 }, { "epoch": 0.9326990511115429, "grad_norm": 0.262699156999588, "learning_rate": 3.871480950224193e-07, "loss": 0.035853028297424316, "step": 6899 }, { "epoch": 0.9328342444803083, "grad_norm": 0.3124576210975647, "learning_rate": 3.856046300980498e-07, "loss": 0.05443498492240906, "step": 6900 }, { "epoch": 0.9329694378490735, "grad_norm": 0.6546632647514343, "learning_rate": 3.8406420796872953e-07, "loss": 0.06108447164297104, "step": 6901 }, { "epoch": 0.9331046312178388, "grad_norm": 0.38207605481147766, "learning_rate": 3.825268289551803e-07, "loss": 0.036447420716285706, "step": 6902 }, { "epoch": 0.933239824586604, "grad_norm": 0.5642752647399902, "learning_rate": 3.8099249337749777e-07, "loss": 0.0575939416885376, "step": 6903 }, { "epoch": 0.9333750179553693, "grad_norm": 0.28798946738243103, "learning_rate": 3.7946120155513465e-07, "loss": 0.03988340497016907, "step": 6904 }, { "epoch": 0.9335102113241346, "grad_norm": 0.5857437252998352, "learning_rate": 3.7793295380691595e-07, "loss": 0.04093194007873535, "step": 6905 }, { "epoch": 0.9336454046928998, "grad_norm": 0.7235367894172668, "learning_rate": 3.7640775045103214e-07, "loss": 0.05671316385269165, "step": 6906 }, { "epoch": 0.933780598061665, "grad_norm": 0.7842723727226257, "learning_rate": 3.7488559180503423e-07, "loss": 0.07119694352149963, "step": 6907 }, { "epoch": 0.9339157914304304, "grad_norm": 0.3284439146518707, "learning_rate": 3.7336647818584866e-07, "loss": 0.052975937724113464, "step": 6908 }, { "epoch": 0.9340509847991956, "grad_norm": 0.7204075455665588, "learning_rate": 3.718504099097625e-07, "loss": 0.04168041795492172, "step": 6909 }, { "epoch": 0.9341861781679609, "grad_norm": 0.2687974274158478, "learning_rate": 3.703373872924265e-07, "loss": 0.03667091578245163, "step": 6910 }, { "epoch": 0.9343213715367261, "grad_norm": 0.4961684048175812, "learning_rate": 3.688274106488604e-07, "loss": 0.05535639822483063, "step": 6911 }, { "epoch": 0.9344565649054913, "grad_norm": 0.7571970820426941, "learning_rate": 3.67320480293451e-07, "loss": 0.04825403541326523, "step": 6912 }, { "epoch": 0.9345917582742567, "grad_norm": 0.4126468300819397, "learning_rate": 3.6581659653994736e-07, "loss": 0.04247164726257324, "step": 6913 }, { "epoch": 0.9347269516430219, "grad_norm": 1.5049036741256714, "learning_rate": 3.64315759701469e-07, "loss": 0.05699065327644348, "step": 6914 }, { "epoch": 0.9348621450117872, "grad_norm": 0.5831983089447021, "learning_rate": 3.6281797009049765e-07, "loss": 0.04462004452943802, "step": 6915 }, { "epoch": 0.9349973383805524, "grad_norm": 0.36182349920272827, "learning_rate": 3.613232280188772e-07, "loss": 0.03545515984296799, "step": 6916 }, { "epoch": 0.9351325317493177, "grad_norm": 0.5539961457252502, "learning_rate": 3.5983153379782363e-07, "loss": 0.06775589287281036, "step": 6917 }, { "epoch": 0.935267725118083, "grad_norm": 0.36537250876426697, "learning_rate": 3.5834288773791854e-07, "loss": 0.03607872873544693, "step": 6918 }, { "epoch": 0.9354029184868482, "grad_norm": 0.3558017611503601, "learning_rate": 3.568572901491007e-07, "loss": 0.045041970908641815, "step": 6919 }, { "epoch": 0.9355381118556135, "grad_norm": 0.9121891856193542, "learning_rate": 3.553747413406827e-07, "loss": 0.04132611304521561, "step": 6920 }, { "epoch": 0.9356733052243787, "grad_norm": 0.3153373599052429, "learning_rate": 3.538952416213376e-07, "loss": 0.03956308960914612, "step": 6921 }, { "epoch": 0.935808498593144, "grad_norm": 0.35840272903442383, "learning_rate": 3.524187912991056e-07, "loss": 0.04421333223581314, "step": 6922 }, { "epoch": 0.9359436919619093, "grad_norm": 0.3468698263168335, "learning_rate": 3.5094539068139254e-07, "loss": 0.049999285489320755, "step": 6923 }, { "epoch": 0.9360788853306745, "grad_norm": 0.6103076934814453, "learning_rate": 3.494750400749663e-07, "loss": 0.041032955050468445, "step": 6924 }, { "epoch": 0.9362140786994398, "grad_norm": 0.4706680178642273, "learning_rate": 3.480077397859638e-07, "loss": 0.056187525391578674, "step": 6925 }, { "epoch": 0.9363492720682051, "grad_norm": 0.3376704156398773, "learning_rate": 3.4654349011988384e-07, "loss": 0.045402638614177704, "step": 6926 }, { "epoch": 0.9364844654369703, "grad_norm": 0.40775826573371887, "learning_rate": 3.4508229138159095e-07, "loss": 0.04560445249080658, "step": 6927 }, { "epoch": 0.9366196588057356, "grad_norm": 0.23570431768894196, "learning_rate": 3.4362414387531516e-07, "loss": 0.04377678781747818, "step": 6928 }, { "epoch": 0.9367548521745008, "grad_norm": 0.5702465772628784, "learning_rate": 3.4216904790464854e-07, "loss": 0.03975013643503189, "step": 6929 }, { "epoch": 0.936890045543266, "grad_norm": 0.6928509473800659, "learning_rate": 3.407170037725521e-07, "loss": 0.062287162989377975, "step": 6930 }, { "epoch": 0.9370252389120314, "grad_norm": 0.22119970619678497, "learning_rate": 3.3926801178134737e-07, "loss": 0.03406835347414017, "step": 6931 }, { "epoch": 0.9371604322807966, "grad_norm": 0.3057532012462616, "learning_rate": 3.3782207223272467e-07, "loss": 0.03304586559534073, "step": 6932 }, { "epoch": 0.9372956256495619, "grad_norm": 0.7759184241294861, "learning_rate": 3.363791854277348e-07, "loss": 0.04740680754184723, "step": 6933 }, { "epoch": 0.9374308190183271, "grad_norm": 0.4912753403186798, "learning_rate": 3.349393516667926e-07, "loss": 0.06619034707546234, "step": 6934 }, { "epoch": 0.9375660123870924, "grad_norm": 0.7480354309082031, "learning_rate": 3.335025712496814e-07, "loss": 0.04873591661453247, "step": 6935 }, { "epoch": 0.9377012057558577, "grad_norm": 1.266300082206726, "learning_rate": 3.320688444755471e-07, "loss": 0.05261167511343956, "step": 6936 }, { "epoch": 0.9378363991246229, "grad_norm": 0.5463661551475525, "learning_rate": 3.306381716428991e-07, "loss": 0.06616979837417603, "step": 6937 }, { "epoch": 0.9379715924933882, "grad_norm": 0.4364592432975769, "learning_rate": 3.2921055304960925e-07, "loss": 0.05242602527141571, "step": 6938 }, { "epoch": 0.9381067858621535, "grad_norm": 0.4664572477340698, "learning_rate": 3.277859889929147e-07, "loss": 0.05553281307220459, "step": 6939 }, { "epoch": 0.9382419792309187, "grad_norm": 0.3258020579814911, "learning_rate": 3.263644797694215e-07, "loss": 0.05469992756843567, "step": 6940 }, { "epoch": 0.938377172599684, "grad_norm": 0.3591028153896332, "learning_rate": 3.2494602567509303e-07, "loss": 0.04743524640798569, "step": 6941 }, { "epoch": 0.9385123659684492, "grad_norm": 0.6162520051002502, "learning_rate": 3.2353062700525794e-07, "loss": 0.05535570904612541, "step": 6942 }, { "epoch": 0.9386475593372146, "grad_norm": 0.9031170010566711, "learning_rate": 3.221182840546122e-07, "loss": 0.046951621770858765, "step": 6943 }, { "epoch": 0.9387827527059798, "grad_norm": 0.8480353355407715, "learning_rate": 3.207089971172089e-07, "loss": 0.06944916397333145, "step": 6944 }, { "epoch": 0.938917946074745, "grad_norm": 0.5644265413284302, "learning_rate": 3.1930276648647504e-07, "loss": 0.05255895107984543, "step": 6945 }, { "epoch": 0.9390531394435103, "grad_norm": 0.5231180191040039, "learning_rate": 3.178995924551914e-07, "loss": 0.05253271386027336, "step": 6946 }, { "epoch": 0.9391883328122755, "grad_norm": 0.3365705609321594, "learning_rate": 3.164994753155059e-07, "loss": 0.03835735097527504, "step": 6947 }, { "epoch": 0.9393235261810409, "grad_norm": 0.6025895476341248, "learning_rate": 3.1510241535893215e-07, "loss": 0.052831098437309265, "step": 6948 }, { "epoch": 0.9394587195498061, "grad_norm": 0.46395939588546753, "learning_rate": 3.1370841287634567e-07, "loss": 0.04790782928466797, "step": 6949 }, { "epoch": 0.9395939129185713, "grad_norm": 0.26668328046798706, "learning_rate": 3.1231746815798436e-07, "loss": 0.036391712725162506, "step": 6950 }, { "epoch": 0.9397291062873366, "grad_norm": 0.882217288017273, "learning_rate": 3.1092958149344985e-07, "loss": 0.06984452903270721, "step": 6951 }, { "epoch": 0.9398642996561019, "grad_norm": 0.6008186340332031, "learning_rate": 3.095447531717077e-07, "loss": 0.05031910538673401, "step": 6952 }, { "epoch": 0.9399994930248672, "grad_norm": 0.4434886574745178, "learning_rate": 3.08162983481089e-07, "loss": 0.04873112589120865, "step": 6953 }, { "epoch": 0.9401346863936324, "grad_norm": 0.7196475267410278, "learning_rate": 3.067842727092801e-07, "loss": 0.04900206997990608, "step": 6954 }, { "epoch": 0.9402698797623976, "grad_norm": 0.26183605194091797, "learning_rate": 3.0540862114334323e-07, "loss": 0.038414888083934784, "step": 6955 }, { "epoch": 0.940405073131163, "grad_norm": 0.35962367057800293, "learning_rate": 3.0403602906969086e-07, "loss": 0.03842412680387497, "step": 6956 }, { "epoch": 0.9405402664999282, "grad_norm": 0.6452633738517761, "learning_rate": 3.0266649677410605e-07, "loss": 0.0571509413421154, "step": 6957 }, { "epoch": 0.9406754598686934, "grad_norm": 0.6688090562820435, "learning_rate": 3.0130002454173243e-07, "loss": 0.04868298023939133, "step": 6958 }, { "epoch": 0.9408106532374587, "grad_norm": 0.6871205568313599, "learning_rate": 2.9993661265707407e-07, "loss": 0.05186744034290314, "step": 6959 }, { "epoch": 0.9409458466062239, "grad_norm": 0.6805575489997864, "learning_rate": 2.985762614040072e-07, "loss": 0.048685796558856964, "step": 6960 }, { "epoch": 0.9410810399749893, "grad_norm": 0.38146278262138367, "learning_rate": 2.972189710657586e-07, "loss": 0.03637781739234924, "step": 6961 }, { "epoch": 0.9412162333437545, "grad_norm": 0.6755660772323608, "learning_rate": 2.958647419249255e-07, "loss": 0.06114444509148598, "step": 6962 }, { "epoch": 0.9413514267125197, "grad_norm": 0.29800012707710266, "learning_rate": 2.9451357426346415e-07, "loss": 0.04383504390716553, "step": 6963 }, { "epoch": 0.941486620081285, "grad_norm": 0.46357131004333496, "learning_rate": 2.9316546836269776e-07, "loss": 0.045333947986364365, "step": 6964 }, { "epoch": 0.9416218134500502, "grad_norm": 1.0406869649887085, "learning_rate": 2.9182042450330516e-07, "loss": 0.04631621390581131, "step": 6965 }, { "epoch": 0.9417570068188156, "grad_norm": 0.6998670697212219, "learning_rate": 2.9047844296533397e-07, "loss": 0.06134534627199173, "step": 6966 }, { "epoch": 0.9418922001875808, "grad_norm": 0.43813422322273254, "learning_rate": 2.8913952402819246e-07, "loss": 0.037293940782547, "step": 6967 }, { "epoch": 0.942027393556346, "grad_norm": 0.44376295804977417, "learning_rate": 2.878036679706492e-07, "loss": 0.06385214626789093, "step": 6968 }, { "epoch": 0.9421625869251113, "grad_norm": 0.4596906900405884, "learning_rate": 2.8647087507083837e-07, "loss": 0.03660021722316742, "step": 6969 }, { "epoch": 0.9422977802938766, "grad_norm": 0.29545512795448303, "learning_rate": 2.8514114560625303e-07, "loss": 0.03944259136915207, "step": 6970 }, { "epoch": 0.9424329736626419, "grad_norm": 0.7356157898902893, "learning_rate": 2.8381447985375007e-07, "loss": 0.04027798771858215, "step": 6971 }, { "epoch": 0.9425681670314071, "grad_norm": 0.6049381494522095, "learning_rate": 2.8249087808954853e-07, "loss": 0.04712092876434326, "step": 6972 }, { "epoch": 0.9427033604001723, "grad_norm": 0.27560052275657654, "learning_rate": 2.811703405892296e-07, "loss": 0.036072008311748505, "step": 6973 }, { "epoch": 0.9428385537689377, "grad_norm": 0.5239424705505371, "learning_rate": 2.798528676277368e-07, "loss": 0.05494248867034912, "step": 6974 }, { "epoch": 0.9429737471377029, "grad_norm": 0.20207205414772034, "learning_rate": 2.785384594793738e-07, "loss": 0.03431403264403343, "step": 6975 }, { "epoch": 0.9431089405064682, "grad_norm": 0.8590747117996216, "learning_rate": 2.772271164178086e-07, "loss": 0.04887323081493378, "step": 6976 }, { "epoch": 0.9432441338752334, "grad_norm": 0.45627400279045105, "learning_rate": 2.759188387160677e-07, "loss": 0.04969898611307144, "step": 6977 }, { "epoch": 0.9433793272439986, "grad_norm": 0.35985031723976135, "learning_rate": 2.746136266465449e-07, "loss": 0.04580122232437134, "step": 6978 }, { "epoch": 0.943514520612764, "grad_norm": 0.5015252232551575, "learning_rate": 2.7331148048098943e-07, "loss": 0.05370482802391052, "step": 6979 }, { "epoch": 0.9436497139815292, "grad_norm": 0.5873124599456787, "learning_rate": 2.7201240049051613e-07, "loss": 0.05057905986905098, "step": 6980 }, { "epoch": 0.9437849073502945, "grad_norm": 0.5648605227470398, "learning_rate": 2.707163869455986e-07, "loss": 0.04481428116559982, "step": 6981 }, { "epoch": 0.9439201007190597, "grad_norm": 0.5584554672241211, "learning_rate": 2.694234401160778e-07, "loss": 0.04871741682291031, "step": 6982 }, { "epoch": 0.944055294087825, "grad_norm": 0.4979788661003113, "learning_rate": 2.6813356027114986e-07, "loss": 0.058538466691970825, "step": 6983 }, { "epoch": 0.9441904874565903, "grad_norm": 0.6593074202537537, "learning_rate": 2.6684674767937346e-07, "loss": 0.06859508156776428, "step": 6984 }, { "epoch": 0.9443256808253555, "grad_norm": 0.5325539708137512, "learning_rate": 2.655630026086708e-07, "loss": 0.053475797176361084, "step": 6985 }, { "epoch": 0.9444608741941207, "grad_norm": 1.0206910371780396, "learning_rate": 2.642823253263249e-07, "loss": 0.06128133833408356, "step": 6986 }, { "epoch": 0.944596067562886, "grad_norm": 0.5368931293487549, "learning_rate": 2.630047160989807e-07, "loss": 0.05463109910488129, "step": 6987 }, { "epoch": 0.9447312609316513, "grad_norm": 0.23954050242900848, "learning_rate": 2.6173017519263875e-07, "loss": 0.04308106005191803, "step": 6988 }, { "epoch": 0.9448664543004166, "grad_norm": 0.8664786219596863, "learning_rate": 2.6045870287267014e-07, "loss": 0.06628285348415375, "step": 6989 }, { "epoch": 0.9450016476691818, "grad_norm": 0.5322293043136597, "learning_rate": 2.5919029940380147e-07, "loss": 0.07109948992729187, "step": 6990 }, { "epoch": 0.945136841037947, "grad_norm": 1.1322778463363647, "learning_rate": 2.5792496505011807e-07, "loss": 0.035068489611148834, "step": 6991 }, { "epoch": 0.9452720344067124, "grad_norm": 0.6810746192932129, "learning_rate": 2.5666270007507266e-07, "loss": 0.07524842023849487, "step": 6992 }, { "epoch": 0.9454072277754776, "grad_norm": 0.7723119854927063, "learning_rate": 2.5540350474147324e-07, "loss": 0.056723274290561676, "step": 6993 }, { "epoch": 0.9455424211442429, "grad_norm": 0.5633419752120972, "learning_rate": 2.5414737931149346e-07, "loss": 0.06861861050128937, "step": 6994 }, { "epoch": 0.9456776145130081, "grad_norm": 0.7289638519287109, "learning_rate": 2.5289432404666246e-07, "loss": 0.06425695866346359, "step": 6995 }, { "epoch": 0.9458128078817734, "grad_norm": 0.7342117428779602, "learning_rate": 2.5164433920787487e-07, "loss": 0.045791588723659515, "step": 6996 }, { "epoch": 0.9459480012505387, "grad_norm": 0.6309106349945068, "learning_rate": 2.503974250553842e-07, "loss": 0.044482797384262085, "step": 6997 }, { "epoch": 0.9460831946193039, "grad_norm": 0.6806005835533142, "learning_rate": 2.491535818488011e-07, "loss": 0.04941936582326889, "step": 6998 }, { "epoch": 0.9462183879880692, "grad_norm": 0.4786623418331146, "learning_rate": 2.479128098471067e-07, "loss": 0.04707799479365349, "step": 6999 }, { "epoch": 0.9463535813568345, "grad_norm": 0.5309879183769226, "learning_rate": 2.466751093086328e-07, "loss": 0.04728540778160095, "step": 7000 }, { "epoch": 0.9464887747255997, "grad_norm": 0.511669397354126, "learning_rate": 2.454404804910748e-07, "loss": 0.03843013942241669, "step": 7001 }, { "epoch": 0.946623968094365, "grad_norm": 0.22840344905853271, "learning_rate": 2.442089236514888e-07, "loss": 0.048430219292640686, "step": 7002 }, { "epoch": 0.9467591614631302, "grad_norm": 0.37283170223236084, "learning_rate": 2.429804390462931e-07, "loss": 0.052125778049230576, "step": 7003 }, { "epoch": 0.9468943548318955, "grad_norm": 0.6303684115409851, "learning_rate": 2.4175502693126293e-07, "loss": 0.03049170784652233, "step": 7004 }, { "epoch": 0.9470295482006608, "grad_norm": 1.169367790222168, "learning_rate": 2.4053268756153933e-07, "loss": 0.05231369659304619, "step": 7005 }, { "epoch": 0.947164741569426, "grad_norm": 0.2846252918243408, "learning_rate": 2.393134211916154e-07, "loss": 0.04040795564651489, "step": 7006 }, { "epoch": 0.9472999349381913, "grad_norm": 0.5095453858375549, "learning_rate": 2.3809722807535128e-07, "loss": 0.05332716554403305, "step": 7007 }, { "epoch": 0.9474351283069565, "grad_norm": 0.5822315216064453, "learning_rate": 2.3688410846596287e-07, "loss": 0.04308965429663658, "step": 7008 }, { "epoch": 0.9475703216757219, "grad_norm": 0.46818676590919495, "learning_rate": 2.3567406261603143e-07, "loss": 0.06459400057792664, "step": 7009 }, { "epoch": 0.9477055150444871, "grad_norm": 0.9598819613456726, "learning_rate": 2.3446709077749206e-07, "loss": 0.07035110890865326, "step": 7010 }, { "epoch": 0.9478407084132523, "grad_norm": 0.8856511116027832, "learning_rate": 2.3326319320164546e-07, "loss": 0.050109125673770905, "step": 7011 }, { "epoch": 0.9479759017820176, "grad_norm": 0.8328272700309753, "learning_rate": 2.320623701391461e-07, "loss": 0.060020022094249725, "step": 7012 }, { "epoch": 0.9481110951507828, "grad_norm": 0.5795353651046753, "learning_rate": 2.30864621840014e-07, "loss": 0.04656301438808441, "step": 7013 }, { "epoch": 0.9482462885195481, "grad_norm": 0.3655354380607605, "learning_rate": 2.2966994855362633e-07, "loss": 0.030804309993982315, "step": 7014 }, { "epoch": 0.9483814818883134, "grad_norm": 0.26527708768844604, "learning_rate": 2.2847835052872079e-07, "loss": 0.03650180250406265, "step": 7015 }, { "epoch": 0.9485166752570786, "grad_norm": 0.4255734086036682, "learning_rate": 2.2728982801339392e-07, "loss": 0.04098595678806305, "step": 7016 }, { "epoch": 0.9486518686258439, "grad_norm": 1.1527483463287354, "learning_rate": 2.261043812551028e-07, "loss": 0.0618458166718483, "step": 7017 }, { "epoch": 0.9487870619946092, "grad_norm": 0.22363893687725067, "learning_rate": 2.249220105006633e-07, "loss": 0.03161557763814926, "step": 7018 }, { "epoch": 0.9489222553633744, "grad_norm": 0.7646569609642029, "learning_rate": 2.2374271599625185e-07, "loss": 0.06502489745616913, "step": 7019 }, { "epoch": 0.9490574487321397, "grad_norm": 0.6240152716636658, "learning_rate": 2.2256649798740204e-07, "loss": 0.048737481236457825, "step": 7020 }, { "epoch": 0.9491926421009049, "grad_norm": 0.3745671510696411, "learning_rate": 2.2139335671901294e-07, "loss": 0.06698073446750641, "step": 7021 }, { "epoch": 0.9493278354696703, "grad_norm": 0.3783004879951477, "learning_rate": 2.2022329243533422e-07, "loss": 0.05842578411102295, "step": 7022 }, { "epoch": 0.9494630288384355, "grad_norm": 0.7516889572143555, "learning_rate": 2.19056305379981e-07, "loss": 0.05865636467933655, "step": 7023 }, { "epoch": 0.9495982222072007, "grad_norm": 0.27407386898994446, "learning_rate": 2.178923957959289e-07, "loss": 0.042855437844991684, "step": 7024 }, { "epoch": 0.949733415575966, "grad_norm": 0.24665077030658722, "learning_rate": 2.1673156392550408e-07, "loss": 0.03831611946225166, "step": 7025 }, { "epoch": 0.9498686089447312, "grad_norm": 0.6068966388702393, "learning_rate": 2.155738100104049e-07, "loss": 0.046118929982185364, "step": 7026 }, { "epoch": 0.9500038023134966, "grad_norm": 0.4965992867946625, "learning_rate": 2.1441913429167682e-07, "loss": 0.057642895728349686, "step": 7027 }, { "epoch": 0.9501389956822618, "grad_norm": 0.24034655094146729, "learning_rate": 2.1326753700973256e-07, "loss": 0.03208189085125923, "step": 7028 }, { "epoch": 0.950274189051027, "grad_norm": 0.8269594311714172, "learning_rate": 2.1211901840434034e-07, "loss": 0.0472969189286232, "step": 7029 }, { "epoch": 0.9504093824197923, "grad_norm": 1.1255297660827637, "learning_rate": 2.1097357871462386e-07, "loss": 0.043352872133255005, "step": 7030 }, { "epoch": 0.9505445757885576, "grad_norm": 0.23174862563610077, "learning_rate": 2.098312181790757e-07, "loss": 0.04218031466007233, "step": 7031 }, { "epoch": 0.9506797691573229, "grad_norm": 0.5022928714752197, "learning_rate": 2.086919370355389e-07, "loss": 0.04457200691103935, "step": 7032 }, { "epoch": 0.9508149625260881, "grad_norm": 1.0669138431549072, "learning_rate": 2.075557355212171e-07, "loss": 0.06347168236970901, "step": 7033 }, { "epoch": 0.9509501558948533, "grad_norm": 0.7271129488945007, "learning_rate": 2.0642261387267268e-07, "loss": 0.060640715062618256, "step": 7034 }, { "epoch": 0.9510853492636187, "grad_norm": 0.8298430442810059, "learning_rate": 2.0529257232583033e-07, "loss": 0.03710779547691345, "step": 7035 }, { "epoch": 0.9512205426323839, "grad_norm": 0.6621572375297546, "learning_rate": 2.0416561111596844e-07, "loss": 0.04770224541425705, "step": 7036 }, { "epoch": 0.9513557360011491, "grad_norm": 0.9314915537834167, "learning_rate": 2.0304173047772933e-07, "loss": 0.04345601797103882, "step": 7037 }, { "epoch": 0.9514909293699144, "grad_norm": 0.4792274534702301, "learning_rate": 2.0192093064510753e-07, "loss": 0.03735507279634476, "step": 7038 }, { "epoch": 0.9516261227386796, "grad_norm": 0.6797763705253601, "learning_rate": 2.0080321185146134e-07, "loss": 0.054027535021305084, "step": 7039 }, { "epoch": 0.951761316107445, "grad_norm": 0.21267877519130707, "learning_rate": 1.996885743295046e-07, "loss": 0.03521095588803291, "step": 7040 }, { "epoch": 0.9518965094762102, "grad_norm": 0.707499086856842, "learning_rate": 1.985770183113117e-07, "loss": 0.06317204236984253, "step": 7041 }, { "epoch": 0.9520317028449754, "grad_norm": 0.3947928547859192, "learning_rate": 1.9746854402831583e-07, "loss": 0.053580671548843384, "step": 7042 }, { "epoch": 0.9521668962137407, "grad_norm": 0.631397008895874, "learning_rate": 1.963631517113057e-07, "loss": 0.049400266259908676, "step": 7043 }, { "epoch": 0.952302089582506, "grad_norm": 0.7041942477226257, "learning_rate": 1.952608415904289e-07, "loss": 0.04562880098819733, "step": 7044 }, { "epoch": 0.9524372829512713, "grad_norm": 0.778236985206604, "learning_rate": 1.9416161389519348e-07, "loss": 0.05960611253976822, "step": 7045 }, { "epoch": 0.9525724763200365, "grad_norm": 0.47910770773887634, "learning_rate": 1.9306546885446475e-07, "loss": 0.04913066327571869, "step": 7046 }, { "epoch": 0.9527076696888017, "grad_norm": 0.3509472906589508, "learning_rate": 1.919724066964651e-07, "loss": 0.04112546890974045, "step": 7047 }, { "epoch": 0.952842863057567, "grad_norm": 0.3091643750667572, "learning_rate": 1.908824276487775e-07, "loss": 0.03803032636642456, "step": 7048 }, { "epoch": 0.9529780564263323, "grad_norm": 0.5412918329238892, "learning_rate": 1.8979553193833876e-07, "loss": 0.053448278456926346, "step": 7049 }, { "epoch": 0.9531132497950976, "grad_norm": 0.3901522755622864, "learning_rate": 1.8871171979144786e-07, "loss": 0.04777824878692627, "step": 7050 }, { "epoch": 0.9532484431638628, "grad_norm": 0.7053197622299194, "learning_rate": 1.8763099143376262e-07, "loss": 0.0679606944322586, "step": 7051 }, { "epoch": 0.953383636532628, "grad_norm": 0.3503236770629883, "learning_rate": 1.8655334709029303e-07, "loss": 0.052497513592243195, "step": 7052 }, { "epoch": 0.9535188299013934, "grad_norm": 0.36755189299583435, "learning_rate": 1.8547878698541132e-07, "loss": 0.038585394620895386, "step": 7053 }, { "epoch": 0.9536540232701586, "grad_norm": 0.4827481210231781, "learning_rate": 1.8440731134284684e-07, "loss": 0.05043557658791542, "step": 7054 }, { "epoch": 0.9537892166389239, "grad_norm": 0.3423571288585663, "learning_rate": 1.833389203856861e-07, "loss": 0.06038116663694382, "step": 7055 }, { "epoch": 0.9539244100076891, "grad_norm": 0.19178050756454468, "learning_rate": 1.8227361433637625e-07, "loss": 0.018407421186566353, "step": 7056 }, { "epoch": 0.9540596033764543, "grad_norm": 0.5763635039329529, "learning_rate": 1.812113934167148e-07, "loss": 0.05605198070406914, "step": 7057 }, { "epoch": 0.9541947967452197, "grad_norm": 0.5263624787330627, "learning_rate": 1.8015225784786483e-07, "loss": 0.0454728789627552, "step": 7058 }, { "epoch": 0.9543299901139849, "grad_norm": 0.32723468542099, "learning_rate": 1.7909620785034663e-07, "loss": 0.04476713389158249, "step": 7059 }, { "epoch": 0.9544651834827502, "grad_norm": 0.3506423234939575, "learning_rate": 1.7804324364402936e-07, "loss": 0.05381958559155464, "step": 7060 }, { "epoch": 0.9546003768515154, "grad_norm": 0.5271151661872864, "learning_rate": 1.769933654481526e-07, "loss": 0.04244597256183624, "step": 7061 }, { "epoch": 0.9547355702202807, "grad_norm": 0.4287923574447632, "learning_rate": 1.7594657348129984e-07, "loss": 0.04769602417945862, "step": 7062 }, { "epoch": 0.954870763589046, "grad_norm": 0.3924559950828552, "learning_rate": 1.749028679614234e-07, "loss": 0.04283842816948891, "step": 7063 }, { "epoch": 0.9550059569578112, "grad_norm": 0.5396997928619385, "learning_rate": 1.7386224910582615e-07, "loss": 0.045995138585567474, "step": 7064 }, { "epoch": 0.9551411503265764, "grad_norm": 0.5138243436813354, "learning_rate": 1.728247171311731e-07, "loss": 0.05613095313310623, "step": 7065 }, { "epoch": 0.9552763436953418, "grad_norm": 0.36675021052360535, "learning_rate": 1.7179027225348142e-07, "loss": 0.049286842346191406, "step": 7066 }, { "epoch": 0.955411537064107, "grad_norm": 0.4823135733604431, "learning_rate": 1.7075891468812722e-07, "loss": 0.0472954660654068, "step": 7067 }, { "epoch": 0.9555467304328723, "grad_norm": 0.27447083592414856, "learning_rate": 1.69730644649847e-07, "loss": 0.04970790445804596, "step": 7068 }, { "epoch": 0.9556819238016375, "grad_norm": 0.2845061719417572, "learning_rate": 1.687054623527312e-07, "loss": 0.043370600789785385, "step": 7069 }, { "epoch": 0.9558171171704027, "grad_norm": 0.7848108410835266, "learning_rate": 1.676833680102291e-07, "loss": 0.04972783476114273, "step": 7070 }, { "epoch": 0.9559523105391681, "grad_norm": 0.3588605225086212, "learning_rate": 1.6666436183514378e-07, "loss": 0.04506923258304596, "step": 7071 }, { "epoch": 0.9560875039079333, "grad_norm": 0.5329291224479675, "learning_rate": 1.6564844403964053e-07, "loss": 0.05122965946793556, "step": 7072 }, { "epoch": 0.9562226972766986, "grad_norm": 0.38017287850379944, "learning_rate": 1.6463561483523682e-07, "loss": 0.04808839410543442, "step": 7073 }, { "epoch": 0.9563578906454638, "grad_norm": 0.31512629985809326, "learning_rate": 1.6362587443281063e-07, "loss": 0.036521174013614655, "step": 7074 }, { "epoch": 0.9564930840142291, "grad_norm": 1.4295697212219238, "learning_rate": 1.626192230425938e-07, "loss": 0.06929187476634979, "step": 7075 }, { "epoch": 0.9566282773829944, "grad_norm": 1.0130048990249634, "learning_rate": 1.6161566087417868e-07, "loss": 0.05602887272834778, "step": 7076 }, { "epoch": 0.9567634707517596, "grad_norm": 0.5397262573242188, "learning_rate": 1.6061518813650977e-07, "loss": 0.054977670311927795, "step": 7077 }, { "epoch": 0.9568986641205249, "grad_norm": 0.6468890905380249, "learning_rate": 1.5961780503789215e-07, "loss": 0.057840749621391296, "step": 7078 }, { "epoch": 0.9570338574892902, "grad_norm": 0.49301132559776306, "learning_rate": 1.5862351178598633e-07, "loss": 0.04878881573677063, "step": 7079 }, { "epoch": 0.9571690508580554, "grad_norm": 0.9630247950553894, "learning_rate": 1.5763230858781008e-07, "loss": 0.0603552907705307, "step": 7080 }, { "epoch": 0.9573042442268207, "grad_norm": 1.3121906518936157, "learning_rate": 1.5664419564973497e-07, "loss": 0.04599609971046448, "step": 7081 }, { "epoch": 0.9574394375955859, "grad_norm": 0.875666618347168, "learning_rate": 1.5565917317749146e-07, "loss": 0.054941367357969284, "step": 7082 }, { "epoch": 0.9575746309643512, "grad_norm": 0.5639085173606873, "learning_rate": 1.5467724137617046e-07, "loss": 0.03386823832988739, "step": 7083 }, { "epoch": 0.9577098243331165, "grad_norm": 0.37282270193099976, "learning_rate": 1.5369840045021178e-07, "loss": 0.0650518536567688, "step": 7084 }, { "epoch": 0.9578450177018817, "grad_norm": 0.6280562281608582, "learning_rate": 1.5272265060341572e-07, "loss": 0.06313121318817139, "step": 7085 }, { "epoch": 0.957980211070647, "grad_norm": 0.6540583968162537, "learning_rate": 1.517499920389398e-07, "loss": 0.0653175413608551, "step": 7086 }, { "epoch": 0.9581154044394122, "grad_norm": 0.8435095548629761, "learning_rate": 1.5078042495929534e-07, "loss": 0.06455972790718079, "step": 7087 }, { "epoch": 0.9582505978081776, "grad_norm": 0.4780527353286743, "learning_rate": 1.498139495663542e-07, "loss": 0.053380683064460754, "step": 7088 }, { "epoch": 0.9583857911769428, "grad_norm": 0.709287166595459, "learning_rate": 1.4885056606133707e-07, "loss": 0.04134759306907654, "step": 7089 }, { "epoch": 0.958520984545708, "grad_norm": 0.820174515247345, "learning_rate": 1.478902746448302e-07, "loss": 0.049540046602487564, "step": 7090 }, { "epoch": 0.9586561779144733, "grad_norm": 0.6051679253578186, "learning_rate": 1.469330755167686e-07, "loss": 0.042211562395095825, "step": 7091 }, { "epoch": 0.9587913712832385, "grad_norm": 0.39660871028900146, "learning_rate": 1.4597896887644458e-07, "loss": 0.0401935800909996, "step": 7092 }, { "epoch": 0.9589265646520038, "grad_norm": 0.529358983039856, "learning_rate": 1.4502795492251418e-07, "loss": 0.06666940450668335, "step": 7093 }, { "epoch": 0.9590617580207691, "grad_norm": 0.45158839225769043, "learning_rate": 1.4408003385297742e-07, "loss": 0.043891970068216324, "step": 7094 }, { "epoch": 0.9591969513895343, "grad_norm": 0.29564619064331055, "learning_rate": 1.4313520586519968e-07, "loss": 0.04253445565700531, "step": 7095 }, { "epoch": 0.9593321447582996, "grad_norm": 0.8895026445388794, "learning_rate": 1.4219347115589863e-07, "loss": 0.04468303546309471, "step": 7096 }, { "epoch": 0.9594673381270649, "grad_norm": 0.3830501437187195, "learning_rate": 1.4125482992114914e-07, "loss": 0.03943002223968506, "step": 7097 }, { "epoch": 0.9596025314958301, "grad_norm": 1.0268203020095825, "learning_rate": 1.403192823563798e-07, "loss": 0.04540792852640152, "step": 7098 }, { "epoch": 0.9597377248645954, "grad_norm": 0.26480627059936523, "learning_rate": 1.3938682865637654e-07, "loss": 0.05046455189585686, "step": 7099 }, { "epoch": 0.9598729182333606, "grad_norm": 0.42334091663360596, "learning_rate": 1.38457469015284e-07, "loss": 0.04434821382164955, "step": 7100 }, { "epoch": 0.960008111602126, "grad_norm": 0.47198688983917236, "learning_rate": 1.3753120362659576e-07, "loss": 0.04567451775074005, "step": 7101 }, { "epoch": 0.9601433049708912, "grad_norm": 0.4214702844619751, "learning_rate": 1.3660803268316925e-07, "loss": 0.052460767328739166, "step": 7102 }, { "epoch": 0.9602784983396564, "grad_norm": 0.3947446048259735, "learning_rate": 1.3568795637721065e-07, "loss": 0.04251347482204437, "step": 7103 }, { "epoch": 0.9604136917084217, "grad_norm": 0.5492876172065735, "learning_rate": 1.347709749002851e-07, "loss": 0.048519060015678406, "step": 7104 }, { "epoch": 0.9605488850771869, "grad_norm": 0.7770055532455444, "learning_rate": 1.338570884433149e-07, "loss": 0.040394216775894165, "step": 7105 }, { "epoch": 0.9606840784459523, "grad_norm": 0.38754794001579285, "learning_rate": 1.3294629719657448e-07, "loss": 0.04493887722492218, "step": 7106 }, { "epoch": 0.9608192718147175, "grad_norm": 0.64454185962677, "learning_rate": 1.3203860134969548e-07, "loss": 0.07401294261217117, "step": 7107 }, { "epoch": 0.9609544651834827, "grad_norm": 0.6045639514923096, "learning_rate": 1.3113400109166508e-07, "loss": 0.03798668086528778, "step": 7108 }, { "epoch": 0.961089658552248, "grad_norm": 0.6573352217674255, "learning_rate": 1.3023249661082592e-07, "loss": 0.052593767642974854, "step": 7109 }, { "epoch": 0.9612248519210133, "grad_norm": 0.7348781824111938, "learning_rate": 1.2933408809487623e-07, "loss": 0.054810114204883575, "step": 7110 }, { "epoch": 0.9613600452897786, "grad_norm": 0.7497594356536865, "learning_rate": 1.2843877573086972e-07, "loss": 0.07394813001155853, "step": 7111 }, { "epoch": 0.9614952386585438, "grad_norm": 0.7536496520042419, "learning_rate": 1.2754655970521556e-07, "loss": 0.0443432480096817, "step": 7112 }, { "epoch": 0.961630432027309, "grad_norm": 0.37718063592910767, "learning_rate": 1.2665744020367686e-07, "loss": 0.04746967554092407, "step": 7113 }, { "epoch": 0.9617656253960744, "grad_norm": 0.4490594267845154, "learning_rate": 1.2577141741137388e-07, "loss": 0.05376119539141655, "step": 7114 }, { "epoch": 0.9619008187648396, "grad_norm": 0.3001408278942108, "learning_rate": 1.248884915127807e-07, "loss": 0.03593166172504425, "step": 7115 }, { "epoch": 0.9620360121336049, "grad_norm": 0.586151123046875, "learning_rate": 1.2400866269172694e-07, "loss": 0.05728622153401375, "step": 7116 }, { "epoch": 0.9621712055023701, "grad_norm": 0.241166353225708, "learning_rate": 1.2313193113139777e-07, "loss": 0.037278253585100174, "step": 7117 }, { "epoch": 0.9623063988711353, "grad_norm": 0.3017977476119995, "learning_rate": 1.2225829701433545e-07, "loss": 0.0425596684217453, "step": 7118 }, { "epoch": 0.9624415922399007, "grad_norm": 0.27473878860473633, "learning_rate": 1.2138776052243116e-07, "loss": 0.046478137373924255, "step": 7119 }, { "epoch": 0.9625767856086659, "grad_norm": 0.616244375705719, "learning_rate": 1.2052032183693996e-07, "loss": 0.05840327590703964, "step": 7120 }, { "epoch": 0.9627119789774311, "grad_norm": 0.43871229887008667, "learning_rate": 1.196559811384623e-07, "loss": 0.05451767519116402, "step": 7121 }, { "epoch": 0.9628471723461964, "grad_norm": 0.3072042465209961, "learning_rate": 1.1879473860696266e-07, "loss": 0.04456089437007904, "step": 7122 }, { "epoch": 0.9629823657149617, "grad_norm": 0.33909162878990173, "learning_rate": 1.179365944217542e-07, "loss": 0.05005289614200592, "step": 7123 }, { "epoch": 0.963117559083727, "grad_norm": 0.5105652213096619, "learning_rate": 1.1708154876150735e-07, "loss": 0.06939264386892319, "step": 7124 }, { "epoch": 0.9632527524524922, "grad_norm": 0.45631834864616394, "learning_rate": 1.1622960180424801e-07, "loss": 0.062086015939712524, "step": 7125 }, { "epoch": 0.9633879458212574, "grad_norm": 0.2777315676212311, "learning_rate": 1.1538075372735435e-07, "loss": 0.04628264904022217, "step": 7126 }, { "epoch": 0.9635231391900227, "grad_norm": 0.4102330207824707, "learning_rate": 1.1453500470756328e-07, "loss": 0.047382205724716187, "step": 7127 }, { "epoch": 0.963658332558788, "grad_norm": 0.4439198672771454, "learning_rate": 1.1369235492096397e-07, "loss": 0.03975337743759155, "step": 7128 }, { "epoch": 0.9637935259275533, "grad_norm": 0.33770933747291565, "learning_rate": 1.1285280454299774e-07, "loss": 0.05452870577573776, "step": 7129 }, { "epoch": 0.9639287192963185, "grad_norm": 0.2832639515399933, "learning_rate": 1.1201635374846808e-07, "loss": 0.05067891627550125, "step": 7130 }, { "epoch": 0.9640639126650837, "grad_norm": 0.5231163501739502, "learning_rate": 1.1118300271152404e-07, "loss": 0.05393754318356514, "step": 7131 }, { "epoch": 0.9641991060338491, "grad_norm": 0.6559244394302368, "learning_rate": 1.1035275160567682e-07, "loss": 0.04904860258102417, "step": 7132 }, { "epoch": 0.9643342994026143, "grad_norm": 0.38227379322052, "learning_rate": 1.0952560060378813e-07, "loss": 0.04571916162967682, "step": 7133 }, { "epoch": 0.9644694927713796, "grad_norm": 0.7259240746498108, "learning_rate": 1.0870154987807523e-07, "loss": 0.04718397557735443, "step": 7134 }, { "epoch": 0.9646046861401448, "grad_norm": 1.2006630897521973, "learning_rate": 1.0788059960010921e-07, "loss": 0.05734813213348389, "step": 7135 }, { "epoch": 0.96473987950891, "grad_norm": 0.5730720162391663, "learning_rate": 1.0706274994081499e-07, "loss": 0.03975367546081543, "step": 7136 }, { "epoch": 0.9648750728776754, "grad_norm": 0.6688952445983887, "learning_rate": 1.0624800107047805e-07, "loss": 0.05123265087604523, "step": 7137 }, { "epoch": 0.9650102662464406, "grad_norm": 0.5579339265823364, "learning_rate": 1.0543635315872934e-07, "loss": 0.056104280054569244, "step": 7138 }, { "epoch": 0.9651454596152059, "grad_norm": 0.38516145944595337, "learning_rate": 1.0462780637455871e-07, "loss": 0.05657939240336418, "step": 7139 }, { "epoch": 0.9652806529839711, "grad_norm": 0.45473095774650574, "learning_rate": 1.0382236088631148e-07, "loss": 0.0546586811542511, "step": 7140 }, { "epoch": 0.9654158463527364, "grad_norm": 0.47887957096099854, "learning_rate": 1.0302001686168349e-07, "loss": 0.0643608570098877, "step": 7141 }, { "epoch": 0.9655510397215017, "grad_norm": 0.4869871139526367, "learning_rate": 1.0222077446772949e-07, "loss": 0.05020198971033096, "step": 7142 }, { "epoch": 0.9656862330902669, "grad_norm": 1.108733892440796, "learning_rate": 1.0142463387085465e-07, "loss": 0.049881063401699066, "step": 7143 }, { "epoch": 0.9658214264590322, "grad_norm": 0.43896281719207764, "learning_rate": 1.0063159523682142e-07, "loss": 0.05548959970474243, "step": 7144 }, { "epoch": 0.9659566198277975, "grad_norm": 0.7983781695365906, "learning_rate": 9.984165873074102e-08, "loss": 0.051128603518009186, "step": 7145 }, { "epoch": 0.9660918131965627, "grad_norm": 1.0796408653259277, "learning_rate": 9.905482451708526e-08, "loss": 0.05442555248737335, "step": 7146 }, { "epoch": 0.966227006565328, "grad_norm": 0.6328137516975403, "learning_rate": 9.827109275967638e-08, "loss": 0.06515852361917496, "step": 7147 }, { "epoch": 0.9663621999340932, "grad_norm": 0.4078194499015808, "learning_rate": 9.749046362169223e-08, "loss": 0.043637827038764954, "step": 7148 }, { "epoch": 0.9664973933028584, "grad_norm": 1.3172117471694946, "learning_rate": 9.671293726566443e-08, "loss": 0.06495068967342377, "step": 7149 }, { "epoch": 0.9666325866716238, "grad_norm": 0.3541133403778076, "learning_rate": 9.593851385347518e-08, "loss": 0.04651781916618347, "step": 7150 }, { "epoch": 0.966767780040389, "grad_norm": 0.3603734076023102, "learning_rate": 9.516719354636716e-08, "loss": 0.05565696954727173, "step": 7151 }, { "epoch": 0.9669029734091543, "grad_norm": 0.39890196919441223, "learning_rate": 9.439897650493024e-08, "loss": 0.05666684731841087, "step": 7152 }, { "epoch": 0.9670381667779195, "grad_norm": 0.28428933024406433, "learning_rate": 9.363386288911313e-08, "loss": 0.04845336079597473, "step": 7153 }, { "epoch": 0.9671733601466848, "grad_norm": 0.36257874965667725, "learning_rate": 9.287185285821675e-08, "loss": 0.04322141036391258, "step": 7154 }, { "epoch": 0.9673085535154501, "grad_norm": 0.7991976141929626, "learning_rate": 9.211294657089587e-08, "loss": 0.06890275329351425, "step": 7155 }, { "epoch": 0.9674437468842153, "grad_norm": 0.44612377882003784, "learning_rate": 9.135714418515573e-08, "loss": 0.050186991691589355, "step": 7156 }, { "epoch": 0.9675789402529806, "grad_norm": 0.645582377910614, "learning_rate": 9.060444585836381e-08, "loss": 0.04344116523861885, "step": 7157 }, { "epoch": 0.9677141336217459, "grad_norm": 0.705514132976532, "learning_rate": 8.985485174722974e-08, "loss": 0.047943323850631714, "step": 7158 }, { "epoch": 0.9678493269905111, "grad_norm": 0.3527999520301819, "learning_rate": 8.910836200782868e-08, "loss": 0.044599514454603195, "step": 7159 }, { "epoch": 0.9679845203592764, "grad_norm": 0.5106353163719177, "learning_rate": 8.836497679557964e-08, "loss": 0.0542168915271759, "step": 7160 }, { "epoch": 0.9681197137280416, "grad_norm": 0.4286550283432007, "learning_rate": 8.762469626526048e-08, "loss": 0.0699646919965744, "step": 7161 }, { "epoch": 0.968254907096807, "grad_norm": 0.41920235753059387, "learning_rate": 8.688752057100457e-08, "loss": 0.03273099660873413, "step": 7162 }, { "epoch": 0.9683901004655722, "grad_norm": 0.38292351365089417, "learning_rate": 8.615344986629082e-08, "loss": 0.03990975767374039, "step": 7163 }, { "epoch": 0.9685252938343374, "grad_norm": 0.4461629092693329, "learning_rate": 8.542248430396027e-08, "loss": 0.04121055454015732, "step": 7164 }, { "epoch": 0.9686604872031027, "grad_norm": 0.3781512975692749, "learning_rate": 8.469462403620287e-08, "loss": 0.04370203614234924, "step": 7165 }, { "epoch": 0.9687956805718679, "grad_norm": 0.470559298992157, "learning_rate": 8.39698692145624e-08, "loss": 0.05675918236374855, "step": 7166 }, { "epoch": 0.9689308739406333, "grad_norm": 0.39212754368782043, "learning_rate": 8.324821998993648e-08, "loss": 0.060417890548706055, "step": 7167 }, { "epoch": 0.9690660673093985, "grad_norm": 0.5165414810180664, "learning_rate": 8.252967651257826e-08, "loss": 0.054235540330410004, "step": 7168 }, { "epoch": 0.9692012606781637, "grad_norm": 0.6558708548545837, "learning_rate": 8.181423893208973e-08, "loss": 0.056982845067977905, "step": 7169 }, { "epoch": 0.969336454046929, "grad_norm": 0.44899582862854004, "learning_rate": 8.110190739743172e-08, "loss": 0.038351260125637054, "step": 7170 }, { "epoch": 0.9694716474156942, "grad_norm": 0.3913290500640869, "learning_rate": 8.03926820569123e-08, "loss": 0.05124525725841522, "step": 7171 }, { "epoch": 0.9696068407844595, "grad_norm": 0.391361266374588, "learning_rate": 7.968656305819833e-08, "loss": 0.04919546842575073, "step": 7172 }, { "epoch": 0.9697420341532248, "grad_norm": 0.48596450686454773, "learning_rate": 7.898355054830719e-08, "loss": 0.05240263417363167, "step": 7173 }, { "epoch": 0.96987722752199, "grad_norm": 0.9218450784683228, "learning_rate": 7.828364467360849e-08, "loss": 0.05866679549217224, "step": 7174 }, { "epoch": 0.9700124208907553, "grad_norm": 0.5449346303939819, "learning_rate": 7.758684557982731e-08, "loss": 0.05991728603839874, "step": 7175 }, { "epoch": 0.9701476142595206, "grad_norm": 0.5859743356704712, "learning_rate": 7.689315341204262e-08, "loss": 0.04650276526808739, "step": 7176 }, { "epoch": 0.9702828076282858, "grad_norm": 1.184984803199768, "learning_rate": 7.62025683146822e-08, "loss": 0.05737435445189476, "step": 7177 }, { "epoch": 0.9704180009970511, "grad_norm": 0.4459211528301239, "learning_rate": 7.551509043152937e-08, "loss": 0.04736095666885376, "step": 7178 }, { "epoch": 0.9705531943658163, "grad_norm": 0.3969394564628601, "learning_rate": 7.483071990572132e-08, "loss": 0.04486561939120293, "step": 7179 }, { "epoch": 0.9706883877345817, "grad_norm": 0.3110262155532837, "learning_rate": 7.414945687975072e-08, "loss": 0.03385123610496521, "step": 7180 }, { "epoch": 0.9708235811033469, "grad_norm": 0.46259990334510803, "learning_rate": 7.347130149545578e-08, "loss": 0.0484342947602272, "step": 7181 }, { "epoch": 0.9709587744721121, "grad_norm": 0.48633572459220886, "learning_rate": 7.279625389403355e-08, "loss": 0.04074862599372864, "step": 7182 }, { "epoch": 0.9710939678408774, "grad_norm": 0.40412136912345886, "learning_rate": 7.212431421603327e-08, "loss": 0.053075142204761505, "step": 7183 }, { "epoch": 0.9712291612096426, "grad_norm": 0.7840222716331482, "learning_rate": 7.145548260135638e-08, "loss": 0.04890257492661476, "step": 7184 }, { "epoch": 0.971364354578408, "grad_norm": 0.6063414216041565, "learning_rate": 7.078975918925645e-08, "loss": 0.051963746547698975, "step": 7185 }, { "epoch": 0.9714995479471732, "grad_norm": 0.6529886722564697, "learning_rate": 7.012714411834098e-08, "loss": 0.056936487555503845, "step": 7186 }, { "epoch": 0.9716347413159384, "grad_norm": 0.3436601161956787, "learning_rate": 6.946763752656959e-08, "loss": 0.03833438456058502, "step": 7187 }, { "epoch": 0.9717699346847037, "grad_norm": 1.0106734037399292, "learning_rate": 6.881123955125579e-08, "loss": 0.0608120858669281, "step": 7188 }, { "epoch": 0.971905128053469, "grad_norm": 0.377293199300766, "learning_rate": 6.815795032906524e-08, "loss": 0.05356192961335182, "step": 7189 }, { "epoch": 0.9720403214222343, "grad_norm": 0.45921361446380615, "learning_rate": 6.750776999601415e-08, "loss": 0.04495931789278984, "step": 7190 }, { "epoch": 0.9721755147909995, "grad_norm": 0.5022160410881042, "learning_rate": 6.68606986874759e-08, "loss": 0.07296773791313171, "step": 7191 }, { "epoch": 0.9723107081597647, "grad_norm": 0.5256549715995789, "learning_rate": 6.62167365381744e-08, "loss": 0.058248236775398254, "step": 7192 }, { "epoch": 0.97244590152853, "grad_norm": 0.44644707441329956, "learning_rate": 6.557588368218237e-08, "loss": 0.05250312760472298, "step": 7193 }, { "epoch": 0.9725810948972953, "grad_norm": 0.546574592590332, "learning_rate": 6.493814025293476e-08, "loss": 0.042590174823999405, "step": 7194 }, { "epoch": 0.9727162882660606, "grad_norm": 0.36539652943611145, "learning_rate": 6.430350638320704e-08, "loss": 0.04494466632604599, "step": 7195 }, { "epoch": 0.9728514816348258, "grad_norm": 0.41228771209716797, "learning_rate": 6.367198220513848e-08, "loss": 0.05681285262107849, "step": 7196 }, { "epoch": 0.972986675003591, "grad_norm": 0.3799850344657898, "learning_rate": 6.304356785021226e-08, "loss": 0.04737436771392822, "step": 7197 }, { "epoch": 0.9731218683723564, "grad_norm": 0.5011929869651794, "learning_rate": 6.241826344926704e-08, "loss": 0.058452725410461426, "step": 7198 }, { "epoch": 0.9732570617411216, "grad_norm": 0.4156959354877472, "learning_rate": 6.17960691324987e-08, "loss": 0.04878932610154152, "step": 7199 }, { "epoch": 0.9733922551098868, "grad_norm": 0.5496264696121216, "learning_rate": 6.117698502944857e-08, "loss": 0.05052957683801651, "step": 7200 }, { "epoch": 0.9735274484786521, "grad_norm": 0.523495078086853, "learning_rate": 6.056101126901358e-08, "loss": 0.05461758375167847, "step": 7201 }, { "epoch": 0.9736626418474174, "grad_norm": 0.6230342984199524, "learning_rate": 5.994814797944281e-08, "loss": 0.0575384721159935, "step": 7202 }, { "epoch": 0.9737978352161827, "grad_norm": 0.3428310453891754, "learning_rate": 5.933839528833751e-08, "loss": 0.04423064738512039, "step": 7203 }, { "epoch": 0.9739330285849479, "grad_norm": 0.2559383809566498, "learning_rate": 5.873175332265279e-08, "loss": 0.039129048585891724, "step": 7204 }, { "epoch": 0.9740682219537131, "grad_norm": 0.8830430507659912, "learning_rate": 5.812822220869096e-08, "loss": 0.04372261092066765, "step": 7205 }, { "epoch": 0.9742034153224784, "grad_norm": 0.4651319682598114, "learning_rate": 5.752780207211483e-08, "loss": 0.05418848991394043, "step": 7206 }, { "epoch": 0.9743386086912437, "grad_norm": 0.6852202415466309, "learning_rate": 5.693049303793274e-08, "loss": 0.054455917328596115, "step": 7207 }, { "epoch": 0.974473802060009, "grad_norm": 0.3617790639400482, "learning_rate": 5.6336295230508536e-08, "loss": 0.05614317208528519, "step": 7208 }, { "epoch": 0.9746089954287742, "grad_norm": 0.4937783181667328, "learning_rate": 5.5745208773558266e-08, "loss": 0.04383295774459839, "step": 7209 }, { "epoch": 0.9747441887975394, "grad_norm": 1.3043458461761475, "learning_rate": 5.515723379014681e-08, "loss": 0.06821008026599884, "step": 7210 }, { "epoch": 0.9748793821663048, "grad_norm": 0.593988835811615, "learning_rate": 5.4572370402694583e-08, "loss": 0.06013453006744385, "step": 7211 }, { "epoch": 0.97501457553507, "grad_norm": 1.361351490020752, "learning_rate": 5.399061873297417e-08, "loss": 0.04968570917844772, "step": 7212 }, { "epoch": 0.9751497689038353, "grad_norm": 0.6637198328971863, "learning_rate": 5.341197890210869e-08, "loss": 0.051711756736040115, "step": 7213 }, { "epoch": 0.9752849622726005, "grad_norm": 0.5340448617935181, "learning_rate": 5.283645103057344e-08, "loss": 0.0720614492893219, "step": 7214 }, { "epoch": 0.9754201556413657, "grad_norm": 0.3676905035972595, "learning_rate": 5.226403523819756e-08, "loss": 0.04148132726550102, "step": 7215 }, { "epoch": 0.9755553490101311, "grad_norm": 0.7414591312408447, "learning_rate": 5.169473164416072e-08, "loss": 0.06510700285434723, "step": 7216 }, { "epoch": 0.9756905423788963, "grad_norm": 0.48349106311798096, "learning_rate": 5.112854036699477e-08, "loss": 0.0573880709707737, "step": 7217 }, { "epoch": 0.9758257357476616, "grad_norm": 0.37112605571746826, "learning_rate": 5.0565461524583745e-08, "loss": 0.053566765040159225, "step": 7218 }, { "epoch": 0.9759609291164268, "grad_norm": 0.5526524186134338, "learning_rate": 5.0005495234163865e-08, "loss": 0.048564400523900986, "step": 7219 }, { "epoch": 0.9760961224851921, "grad_norm": 1.4637776613235474, "learning_rate": 4.9448641612321874e-08, "loss": 0.05839678645133972, "step": 7220 }, { "epoch": 0.9762313158539574, "grad_norm": 0.49976494908332825, "learning_rate": 4.889490077500003e-08, "loss": 0.04154215753078461, "step": 7221 }, { "epoch": 0.9763665092227226, "grad_norm": 0.3476382791996002, "learning_rate": 4.8344272837489434e-08, "loss": 0.05859503149986267, "step": 7222 }, { "epoch": 0.9765017025914879, "grad_norm": 0.4435412585735321, "learning_rate": 4.779675791443172e-08, "loss": 0.044656869024038315, "step": 7223 }, { "epoch": 0.9766368959602532, "grad_norm": 0.6044033169746399, "learning_rate": 4.72523561198257e-08, "loss": 0.04710233211517334, "step": 7224 }, { "epoch": 0.9767720893290184, "grad_norm": 0.3266955316066742, "learning_rate": 4.6711067567014044e-08, "loss": 0.035173751413822174, "step": 7225 }, { "epoch": 0.9769072826977837, "grad_norm": 0.25391367077827454, "learning_rate": 4.6172892368701595e-08, "loss": 0.03522966429591179, "step": 7226 }, { "epoch": 0.9770424760665489, "grad_norm": 0.5141711831092834, "learning_rate": 4.5637830636935385e-08, "loss": 0.037283703684806824, "step": 7227 }, { "epoch": 0.9771776694353141, "grad_norm": 0.4566790461540222, "learning_rate": 4.5105882483119643e-08, "loss": 0.07286709547042847, "step": 7228 }, { "epoch": 0.9773128628040795, "grad_norm": 0.4071083664894104, "learning_rate": 4.4577048018007436e-08, "loss": 0.04396776109933853, "step": 7229 }, { "epoch": 0.9774480561728447, "grad_norm": 0.5844578742980957, "learning_rate": 4.405132735170569e-08, "loss": 0.07535350322723389, "step": 7230 }, { "epoch": 0.97758324954161, "grad_norm": 1.004151463508606, "learning_rate": 4.3528720593675184e-08, "loss": 0.06708019971847534, "step": 7231 }, { "epoch": 0.9777184429103752, "grad_norm": 0.24426449835300446, "learning_rate": 4.300922785271888e-08, "loss": 0.05083953216671944, "step": 7232 }, { "epoch": 0.9778536362791405, "grad_norm": 0.5798628926277161, "learning_rate": 4.249284923700358e-08, "loss": 0.051040276885032654, "step": 7233 }, { "epoch": 0.9779888296479058, "grad_norm": 0.7276411652565002, "learning_rate": 4.197958485404163e-08, "loss": 0.057106249034404755, "step": 7234 }, { "epoch": 0.978124023016671, "grad_norm": 0.36441540718078613, "learning_rate": 4.1469434810694206e-08, "loss": 0.044418517500162125, "step": 7235 }, { "epoch": 0.9782592163854363, "grad_norm": 1.381124496459961, "learning_rate": 4.096239921317968e-08, "loss": 0.05549690127372742, "step": 7236 }, { "epoch": 0.9783944097542016, "grad_norm": 0.371625155210495, "learning_rate": 4.045847816706361e-08, "loss": 0.0550915002822876, "step": 7237 }, { "epoch": 0.9785296031229668, "grad_norm": 0.3654465973377228, "learning_rate": 3.9957671777268724e-08, "loss": 0.03545597568154335, "step": 7238 }, { "epoch": 0.9786647964917321, "grad_norm": 1.3275188207626343, "learning_rate": 3.945998014806163e-08, "loss": 0.056324053555727005, "step": 7239 }, { "epoch": 0.9787999898604973, "grad_norm": 0.6294799447059631, "learning_rate": 3.896540338306609e-08, "loss": 0.05544418841600418, "step": 7240 }, { "epoch": 0.9789351832292627, "grad_norm": 0.429246187210083, "learning_rate": 3.847394158525641e-08, "loss": 0.05259708687663078, "step": 7241 }, { "epoch": 0.9790703765980279, "grad_norm": 0.6167306900024414, "learning_rate": 3.798559485695574e-08, "loss": 0.0666983425617218, "step": 7242 }, { "epoch": 0.9792055699667931, "grad_norm": 0.4300607442855835, "learning_rate": 3.7500363299842746e-08, "loss": 0.02788853645324707, "step": 7243 }, { "epoch": 0.9793407633355584, "grad_norm": 0.3931945264339447, "learning_rate": 3.701824701494327e-08, "loss": 0.022445425391197205, "step": 7244 }, { "epoch": 0.9794759567043236, "grad_norm": 0.3176533281803131, "learning_rate": 3.653924610263703e-08, "loss": 0.03755953907966614, "step": 7245 }, { "epoch": 0.979611150073089, "grad_norm": 0.4524339735507965, "learning_rate": 3.6063360662654255e-08, "loss": 0.04998893290758133, "step": 7246 }, { "epoch": 0.9797463434418542, "grad_norm": 0.85714191198349, "learning_rate": 3.559059079407734e-08, "loss": 0.05953269824385643, "step": 7247 }, { "epoch": 0.9798815368106194, "grad_norm": 0.5563637614250183, "learning_rate": 3.512093659533922e-08, "loss": 0.04398603364825249, "step": 7248 }, { "epoch": 0.9800167301793847, "grad_norm": 0.5393894910812378, "learning_rate": 3.4654398164225e-08, "loss": 0.056876182556152344, "step": 7249 }, { "epoch": 0.98015192354815, "grad_norm": 0.6972790956497192, "learning_rate": 3.4190975597870325e-08, "loss": 0.06028540059924126, "step": 7250 }, { "epoch": 0.9802871169169153, "grad_norm": 0.3229658603668213, "learning_rate": 3.373066899276134e-08, "loss": 0.03550612926483154, "step": 7251 }, { "epoch": 0.9804223102856805, "grad_norm": 0.2454824298620224, "learning_rate": 3.3273478444736386e-08, "loss": 0.03997718542814255, "step": 7252 }, { "epoch": 0.9805575036544457, "grad_norm": 0.39571788907051086, "learning_rate": 3.281940404898764e-08, "loss": 0.05327256768941879, "step": 7253 }, { "epoch": 0.980692697023211, "grad_norm": 0.4503594636917114, "learning_rate": 3.236844590005117e-08, "loss": 0.05812521651387215, "step": 7254 }, { "epoch": 0.9808278903919763, "grad_norm": 0.4966477155685425, "learning_rate": 3.192060409182351e-08, "loss": 0.05891026556491852, "step": 7255 }, { "epoch": 0.9809630837607415, "grad_norm": 0.3571639955043793, "learning_rate": 3.147587871754509e-08, "loss": 0.05026090145111084, "step": 7256 }, { "epoch": 0.9810982771295068, "grad_norm": 0.27719926834106445, "learning_rate": 3.1034269869810174e-08, "loss": 0.03677530586719513, "step": 7257 }, { "epoch": 0.981233470498272, "grad_norm": 0.29793864488601685, "learning_rate": 3.05957776405652e-08, "loss": 0.04622465372085571, "step": 7258 }, { "epoch": 0.9813686638670374, "grad_norm": 0.7389929294586182, "learning_rate": 3.016040212110549e-08, "loss": 0.061945840716362, "step": 7259 }, { "epoch": 0.9815038572358026, "grad_norm": 1.515032172203064, "learning_rate": 2.9728143402078522e-08, "loss": 0.0556643009185791, "step": 7260 }, { "epoch": 0.9816390506045678, "grad_norm": 0.5657045245170593, "learning_rate": 2.9299001573483975e-08, "loss": 0.05606083199381828, "step": 7261 }, { "epoch": 0.9817742439733331, "grad_norm": 0.537739634513855, "learning_rate": 2.8872976724670375e-08, "loss": 0.06261782348155975, "step": 7262 }, { "epoch": 0.9819094373420983, "grad_norm": 0.5054749846458435, "learning_rate": 2.8450068944338436e-08, "loss": 0.04781985282897949, "step": 7263 }, { "epoch": 0.9820446307108637, "grad_norm": 0.5242066383361816, "learning_rate": 2.803027832054106e-08, "loss": 0.06356542557477951, "step": 7264 }, { "epoch": 0.9821798240796289, "grad_norm": 0.4420137107372284, "learning_rate": 2.7613604940679995e-08, "loss": 0.033886268734931946, "step": 7265 }, { "epoch": 0.9823150174483941, "grad_norm": 0.37018951773643494, "learning_rate": 2.7200048891509176e-08, "loss": 0.057096101343631744, "step": 7266 }, { "epoch": 0.9824502108171594, "grad_norm": 0.3043867349624634, "learning_rate": 2.67896102591314e-08, "loss": 0.04797044396400452, "step": 7267 }, { "epoch": 0.9825854041859247, "grad_norm": 0.3050529956817627, "learning_rate": 2.6382289129004978e-08, "loss": 0.057027190923690796, "step": 7268 }, { "epoch": 0.98272059755469, "grad_norm": 0.3412294387817383, "learning_rate": 2.5978085585935395e-08, "loss": 0.040994659066200256, "step": 7269 }, { "epoch": 0.9828557909234552, "grad_norm": 0.43741175532341003, "learning_rate": 2.5576999714078676e-08, "loss": 0.051810264587402344, "step": 7270 }, { "epoch": 0.9829909842922204, "grad_norm": 0.20743736624717712, "learning_rate": 2.517903159694468e-08, "loss": 0.04013640806078911, "step": 7271 }, { "epoch": 0.9831261776609858, "grad_norm": 0.7194942235946655, "learning_rate": 2.4784181317390465e-08, "loss": 0.0506615936756134, "step": 7272 }, { "epoch": 0.983261371029751, "grad_norm": 0.7069981694221497, "learning_rate": 2.4392448957628598e-08, "loss": 0.06042274460196495, "step": 7273 }, { "epoch": 0.9833965643985163, "grad_norm": 0.47161486744880676, "learning_rate": 2.4003834599217177e-08, "loss": 0.05850675702095032, "step": 7274 }, { "epoch": 0.9835317577672815, "grad_norm": 0.4753275513648987, "learning_rate": 2.3618338323071474e-08, "loss": 0.05263178050518036, "step": 7275 }, { "epoch": 0.9836669511360467, "grad_norm": 0.6359841823577881, "learning_rate": 2.3235960209448958e-08, "loss": 0.0439833328127861, "step": 7276 }, { "epoch": 0.9838021445048121, "grad_norm": 0.2887997329235077, "learning_rate": 2.2856700337967606e-08, "loss": 0.03405828773975372, "step": 7277 }, { "epoch": 0.9839373378735773, "grad_norm": 0.3563024699687958, "learning_rate": 2.2480558787587592e-08, "loss": 0.0542268306016922, "step": 7278 }, { "epoch": 0.9840725312423426, "grad_norm": 0.5194105505943298, "learning_rate": 2.2107535636626263e-08, "loss": 0.04978272318840027, "step": 7279 }, { "epoch": 0.9842077246111078, "grad_norm": 0.3321051299571991, "learning_rate": 2.1737630962746502e-08, "loss": 0.03914583474397659, "step": 7280 }, { "epoch": 0.984342917979873, "grad_norm": 0.5193450450897217, "learning_rate": 2.1370844842966696e-08, "loss": 0.05838894844055176, "step": 7281 }, { "epoch": 0.9844781113486384, "grad_norm": 0.4899522066116333, "learning_rate": 2.100717735365243e-08, "loss": 0.05410711467266083, "step": 7282 }, { "epoch": 0.9846133047174036, "grad_norm": 0.998993456363678, "learning_rate": 2.0646628570521464e-08, "loss": 0.04839441180229187, "step": 7283 }, { "epoch": 0.9847484980861688, "grad_norm": 0.34424448013305664, "learning_rate": 2.028919856864375e-08, "loss": 0.03327178955078125, "step": 7284 }, { "epoch": 0.9848836914549342, "grad_norm": 0.7587503790855408, "learning_rate": 1.9934887422434766e-08, "loss": 0.04919227957725525, "step": 7285 }, { "epoch": 0.9850188848236994, "grad_norm": 0.3807278275489807, "learning_rate": 1.9583695205665496e-08, "loss": 0.04222244769334793, "step": 7286 }, { "epoch": 0.9851540781924647, "grad_norm": 0.918523907661438, "learning_rate": 1.9235621991457454e-08, "loss": 0.05059755593538284, "step": 7287 }, { "epoch": 0.9852892715612299, "grad_norm": 0.6230126023292542, "learning_rate": 1.889066785227933e-08, "loss": 0.0633402019739151, "step": 7288 }, { "epoch": 0.9854244649299951, "grad_norm": 0.3672540485858917, "learning_rate": 1.854883285995368e-08, "loss": 0.0501851849257946, "step": 7289 }, { "epoch": 0.9855596582987605, "grad_norm": 0.6481570601463318, "learning_rate": 1.8210117085651902e-08, "loss": 0.04359808564186096, "step": 7290 }, { "epoch": 0.9856948516675257, "grad_norm": 0.344828337430954, "learning_rate": 1.7874520599894252e-08, "loss": 0.03428077697753906, "step": 7291 }, { "epoch": 0.985830045036291, "grad_norm": 0.23351861536502838, "learning_rate": 1.7542043472558166e-08, "loss": 0.038654983043670654, "step": 7292 }, { "epoch": 0.9859652384050562, "grad_norm": 0.39137551188468933, "learning_rate": 1.7212685772864945e-08, "loss": 0.046825893223285675, "step": 7293 }, { "epoch": 0.9861004317738215, "grad_norm": 0.562049388885498, "learning_rate": 1.68864475693864e-08, "loss": 0.0369291752576828, "step": 7294 }, { "epoch": 0.9862356251425868, "grad_norm": 0.30741167068481445, "learning_rate": 1.6563328930051526e-08, "loss": 0.04968493431806564, "step": 7295 }, { "epoch": 0.986370818511352, "grad_norm": 0.5242623090744019, "learning_rate": 1.624332992213151e-08, "loss": 0.03748743236064911, "step": 7296 }, { "epoch": 0.9865060118801173, "grad_norm": 0.499889612197876, "learning_rate": 1.5926450612254728e-08, "loss": 0.044719040393829346, "step": 7297 }, { "epoch": 0.9866412052488825, "grad_norm": 1.1401457786560059, "learning_rate": 1.5612691066395068e-08, "loss": 0.05591359734535217, "step": 7298 }, { "epoch": 0.9867763986176478, "grad_norm": 0.27081045508384705, "learning_rate": 1.530205134987861e-08, "loss": 0.03869227319955826, "step": 7299 }, { "epoch": 0.9869115919864131, "grad_norm": 0.46528851985931396, "learning_rate": 1.499453152738528e-08, "loss": 0.04791422188282013, "step": 7300 }, { "epoch": 0.9870467853551783, "grad_norm": 0.404367595911026, "learning_rate": 1.4690131662938866e-08, "loss": 0.037147656083106995, "step": 7301 }, { "epoch": 0.9871819787239436, "grad_norm": 0.41163381934165955, "learning_rate": 1.438885181991867e-08, "loss": 0.03467421233654022, "step": 7302 }, { "epoch": 0.9873171720927089, "grad_norm": 0.5218137502670288, "learning_rate": 1.4090692061052846e-08, "loss": 0.04829150438308716, "step": 7303 }, { "epoch": 0.9874523654614741, "grad_norm": 0.31034398078918457, "learning_rate": 1.3795652448420071e-08, "loss": 0.03866346925497055, "step": 7304 }, { "epoch": 0.9875875588302394, "grad_norm": 0.34017080068588257, "learning_rate": 1.3503733043447874e-08, "loss": 0.040759533643722534, "step": 7305 }, { "epoch": 0.9877227521990046, "grad_norm": 0.2991945147514343, "learning_rate": 1.3214933906915971e-08, "loss": 0.04314536601305008, "step": 7306 }, { "epoch": 0.9878579455677698, "grad_norm": 0.48050668835639954, "learning_rate": 1.2929255098954596e-08, "loss": 0.05446822568774223, "step": 7307 }, { "epoch": 0.9879931389365352, "grad_norm": 0.6828495264053345, "learning_rate": 1.2646696679042835e-08, "loss": 0.04760288447141647, "step": 7308 }, { "epoch": 0.9881283323053004, "grad_norm": 0.5227488279342651, "learning_rate": 1.2367258706010298e-08, "loss": 0.04110647737979889, "step": 7309 }, { "epoch": 0.9882635256740657, "grad_norm": 0.495807945728302, "learning_rate": 1.2090941238040443e-08, "loss": 0.03955543041229248, "step": 7310 }, { "epoch": 0.9883987190428309, "grad_norm": 0.7100663781166077, "learning_rate": 1.1817744332660584e-08, "loss": 0.032148346304893494, "step": 7311 }, { "epoch": 0.9885339124115962, "grad_norm": 0.7787774205207825, "learning_rate": 1.1547668046751891e-08, "loss": 0.04325483739376068, "step": 7312 }, { "epoch": 0.9886691057803615, "grad_norm": 0.5496942400932312, "learning_rate": 1.1280712436549379e-08, "loss": 0.04591679573059082, "step": 7313 }, { "epoch": 0.9888042991491267, "grad_norm": 0.47885429859161377, "learning_rate": 1.1016877557630257e-08, "loss": 0.05158200114965439, "step": 7314 }, { "epoch": 0.988939492517892, "grad_norm": 0.42680996656417847, "learning_rate": 1.0756163464928915e-08, "loss": 0.05484379827976227, "step": 7315 }, { "epoch": 0.9890746858866573, "grad_norm": 0.7590004205703735, "learning_rate": 1.0498570212726932e-08, "loss": 0.05555178225040436, "step": 7316 }, { "epoch": 0.9892098792554225, "grad_norm": 0.32244136929512024, "learning_rate": 1.024409785465641e-08, "loss": 0.039124730974435806, "step": 7317 }, { "epoch": 0.9893450726241878, "grad_norm": 0.7999218106269836, "learning_rate": 9.992746443699962e-09, "loss": 0.06894140690565109, "step": 7318 }, { "epoch": 0.989480265992953, "grad_norm": 0.7047271728515625, "learning_rate": 9.744516032190731e-09, "loss": 0.06663292646408081, "step": 7319 }, { "epoch": 0.9896154593617184, "grad_norm": 0.92720627784729, "learning_rate": 9.499406671809041e-09, "loss": 0.058796972036361694, "step": 7320 }, { "epoch": 0.9897506527304836, "grad_norm": 0.615882933139801, "learning_rate": 9.2574184135924e-09, "loss": 0.05456940829753876, "step": 7321 }, { "epoch": 0.9898858460992488, "grad_norm": 0.49301859736442566, "learning_rate": 9.018551307920508e-09, "loss": 0.05672220140695572, "step": 7322 }, { "epoch": 0.9900210394680141, "grad_norm": 1.0820434093475342, "learning_rate": 8.782805404526917e-09, "loss": 0.04235593229532242, "step": 7323 }, { "epoch": 0.9901562328367793, "grad_norm": 0.39856624603271484, "learning_rate": 8.55018075249736e-09, "loss": 0.061092808842659, "step": 7324 }, { "epoch": 0.9902914262055447, "grad_norm": 0.46612387895584106, "learning_rate": 8.320677400264764e-09, "loss": 0.049432266503572464, "step": 7325 }, { "epoch": 0.9904266195743099, "grad_norm": 1.114904522895813, "learning_rate": 8.094295395610906e-09, "loss": 0.048036329448223114, "step": 7326 }, { "epoch": 0.9905618129430751, "grad_norm": 0.7147827744483948, "learning_rate": 7.87103478567308e-09, "loss": 0.05065537989139557, "step": 7327 }, { "epoch": 0.9906970063118404, "grad_norm": 0.4050542414188385, "learning_rate": 7.65089561693244e-09, "loss": 0.050760142505168915, "step": 7328 }, { "epoch": 0.9908321996806057, "grad_norm": 0.7346510887145996, "learning_rate": 7.433877935225652e-09, "loss": 0.04969797655940056, "step": 7329 }, { "epoch": 0.990967393049371, "grad_norm": 0.38470184803009033, "learning_rate": 7.219981785733243e-09, "loss": 0.05018511041998863, "step": 7330 }, { "epoch": 0.9911025864181362, "grad_norm": 0.2662709057331085, "learning_rate": 7.009207212992919e-09, "loss": 0.040013380348682404, "step": 7331 }, { "epoch": 0.9912377797869014, "grad_norm": 0.8572955131530762, "learning_rate": 6.801554260889575e-09, "loss": 0.05664151906967163, "step": 7332 }, { "epoch": 0.9913729731556667, "grad_norm": 0.4344453811645508, "learning_rate": 6.5970229726552976e-09, "loss": 0.044277727603912354, "step": 7333 }, { "epoch": 0.991508166524432, "grad_norm": 0.7193469405174255, "learning_rate": 6.3956133908743556e-09, "loss": 0.04339740052819252, "step": 7334 }, { "epoch": 0.9916433598931972, "grad_norm": 0.33341583609580994, "learning_rate": 6.197325557483202e-09, "loss": 0.04601786285638809, "step": 7335 }, { "epoch": 0.9917785532619625, "grad_norm": 0.4186420738697052, "learning_rate": 6.002159513765482e-09, "loss": 0.05796504020690918, "step": 7336 }, { "epoch": 0.9919137466307277, "grad_norm": 0.5352140665054321, "learning_rate": 5.810115300355357e-09, "loss": 0.0758514553308487, "step": 7337 }, { "epoch": 0.9920489399994931, "grad_norm": 0.312301903963089, "learning_rate": 5.621192957239174e-09, "loss": 0.05176236480474472, "step": 7338 }, { "epoch": 0.9921841333682583, "grad_norm": 0.5666396021842957, "learning_rate": 5.435392523748806e-09, "loss": 0.049953341484069824, "step": 7339 }, { "epoch": 0.9923193267370235, "grad_norm": 0.6477637887001038, "learning_rate": 5.252714038571638e-09, "loss": 0.046043433248996735, "step": 7340 }, { "epoch": 0.9924545201057888, "grad_norm": 0.44991573691368103, "learning_rate": 5.073157539742246e-09, "loss": 0.04542979598045349, "step": 7341 }, { "epoch": 0.992589713474554, "grad_norm": 0.42734628915786743, "learning_rate": 4.896723064642394e-09, "loss": 0.05863310396671295, "step": 7342 }, { "epoch": 0.9927249068433194, "grad_norm": 0.7168428301811218, "learning_rate": 4.723410650009363e-09, "loss": 0.03537128120660782, "step": 7343 }, { "epoch": 0.9928601002120846, "grad_norm": 0.33194559812545776, "learning_rate": 4.553220331925956e-09, "loss": 0.044486235827207565, "step": 7344 }, { "epoch": 0.9929952935808498, "grad_norm": 0.7351543307304382, "learning_rate": 4.38615214582716e-09, "loss": 0.045731037855148315, "step": 7345 }, { "epoch": 0.9931304869496151, "grad_norm": 0.40694817900657654, "learning_rate": 4.2222061265001496e-09, "loss": 0.047724440693855286, "step": 7346 }, { "epoch": 0.9932656803183804, "grad_norm": 0.5287313461303711, "learning_rate": 4.0613823080742905e-09, "loss": 0.04364553838968277, "step": 7347 }, { "epoch": 0.9934008736871457, "grad_norm": 0.5800458192825317, "learning_rate": 3.903680724037795e-09, "loss": 0.06558582186698914, "step": 7348 }, { "epoch": 0.9935360670559109, "grad_norm": 0.46178337931632996, "learning_rate": 3.749101407224398e-09, "loss": 0.04394470155239105, "step": 7349 }, { "epoch": 0.9936712604246761, "grad_norm": 0.4155826270580292, "learning_rate": 3.597644389818355e-09, "loss": 0.03946772962808609, "step": 7350 }, { "epoch": 0.9938064537934415, "grad_norm": 0.3430546224117279, "learning_rate": 3.4493097033527767e-09, "loss": 0.04388341307640076, "step": 7351 }, { "epoch": 0.9939416471622067, "grad_norm": 0.7640652060508728, "learning_rate": 3.3040973787112904e-09, "loss": 0.045907557010650635, "step": 7352 }, { "epoch": 0.994076840530972, "grad_norm": 0.5630175471305847, "learning_rate": 3.162007446129711e-09, "loss": 0.06320783495903015, "step": 7353 }, { "epoch": 0.9942120338997372, "grad_norm": 0.5912644863128662, "learning_rate": 3.023039935191041e-09, "loss": 0.047093722969293594, "step": 7354 }, { "epoch": 0.9943472272685024, "grad_norm": 0.46691006422042847, "learning_rate": 2.887194874830468e-09, "loss": 0.04925764724612236, "step": 7355 }, { "epoch": 0.9944824206372678, "grad_norm": 0.7257679104804993, "learning_rate": 2.7544722933287026e-09, "loss": 0.054604671895504, "step": 7356 }, { "epoch": 0.994617614006033, "grad_norm": 0.5620920062065125, "learning_rate": 2.6248722183203066e-09, "loss": 0.045784588903188705, "step": 7357 }, { "epoch": 0.9947528073747983, "grad_norm": 0.2936725616455078, "learning_rate": 2.498394676790361e-09, "loss": 0.039904750883579254, "step": 7358 }, { "epoch": 0.9948880007435635, "grad_norm": 0.34694939851760864, "learning_rate": 2.375039695071135e-09, "loss": 0.052568770945072174, "step": 7359 }, { "epoch": 0.9950231941123288, "grad_norm": 0.4866679310798645, "learning_rate": 2.2548072988454184e-09, "loss": 0.05424186587333679, "step": 7360 }, { "epoch": 0.9951583874810941, "grad_norm": 0.2569344639778137, "learning_rate": 2.1376975131465194e-09, "loss": 0.04274074733257294, "step": 7361 }, { "epoch": 0.9952935808498593, "grad_norm": 0.5606085062026978, "learning_rate": 2.023710362356601e-09, "loss": 0.047488436102867126, "step": 7362 }, { "epoch": 0.9954287742186245, "grad_norm": 1.2286579608917236, "learning_rate": 1.9128458702100117e-09, "loss": 0.05946382135152817, "step": 7363 }, { "epoch": 0.9955639675873899, "grad_norm": 0.8543102741241455, "learning_rate": 1.8051040597882873e-09, "loss": 0.06839191913604736, "step": 7364 }, { "epoch": 0.9956991609561551, "grad_norm": 0.40325114130973816, "learning_rate": 1.70048495352515e-09, "loss": 0.06515640020370483, "step": 7365 }, { "epoch": 0.9958343543249204, "grad_norm": 0.7575832009315491, "learning_rate": 1.5989885731998443e-09, "loss": 0.05808024853467941, "step": 7366 }, { "epoch": 0.9959695476936856, "grad_norm": 0.5866435766220093, "learning_rate": 1.5006149399487966e-09, "loss": 0.054545819759368896, "step": 7367 }, { "epoch": 0.9961047410624508, "grad_norm": 0.41578561067581177, "learning_rate": 1.4053640742489604e-09, "loss": 0.06123358756303787, "step": 7368 }, { "epoch": 0.9962399344312162, "grad_norm": 0.5700839757919312, "learning_rate": 1.3132359959361351e-09, "loss": 0.043250590562820435, "step": 7369 }, { "epoch": 0.9963751277999814, "grad_norm": 0.3385492265224457, "learning_rate": 1.2242307241899787e-09, "loss": 0.05139123648405075, "step": 7370 }, { "epoch": 0.9965103211687467, "grad_norm": 0.3921484053134918, "learning_rate": 1.1383482775406685e-09, "loss": 0.05334556847810745, "step": 7371 }, { "epoch": 0.9966455145375119, "grad_norm": 0.6032009124755859, "learning_rate": 1.0555886738738973e-09, "loss": 0.06666934490203857, "step": 7372 }, { "epoch": 0.9967807079062772, "grad_norm": 0.3122635781764984, "learning_rate": 9.75951930415886e-10, "loss": 0.0473453551530838, "step": 7373 }, { "epoch": 0.9969159012750425, "grad_norm": 0.7638356685638428, "learning_rate": 8.994380637483701e-10, "loss": 0.049236640334129333, "step": 7374 }, { "epoch": 0.9970510946438077, "grad_norm": 0.5076047778129578, "learning_rate": 8.260470898036054e-10, "loss": 0.06502559781074524, "step": 7375 }, { "epoch": 0.997186288012573, "grad_norm": 0.37870505452156067, "learning_rate": 7.557790238627016e-10, "loss": 0.05796322226524353, "step": 7376 }, { "epoch": 0.9973214813813382, "grad_norm": 1.0089482069015503, "learning_rate": 6.886338805522918e-10, "loss": 0.05884271115064621, "step": 7377 }, { "epoch": 0.9974566747501035, "grad_norm": 0.9313562512397766, "learning_rate": 6.246116738561903e-10, "loss": 0.05640513077378273, "step": 7378 }, { "epoch": 0.9975918681188688, "grad_norm": 0.3580704629421234, "learning_rate": 5.637124171004038e-10, "loss": 0.05452676862478256, "step": 7379 }, { "epoch": 0.997727061487634, "grad_norm": 0.5312066078186035, "learning_rate": 5.059361229681203e-10, "loss": 0.05440554767847061, "step": 7380 }, { "epoch": 0.9978622548563993, "grad_norm": 0.5021882653236389, "learning_rate": 4.5128280348638583e-10, "loss": 0.03760679066181183, "step": 7381 }, { "epoch": 0.9979974482251646, "grad_norm": 0.4162086844444275, "learning_rate": 3.9975247003443127e-10, "loss": 0.04282546043395996, "step": 7382 }, { "epoch": 0.9981326415939298, "grad_norm": 0.33936846256256104, "learning_rate": 3.51345133342007e-10, "loss": 0.041175272315740585, "step": 7383 }, { "epoch": 0.9982678349626951, "grad_norm": 0.3153817355632782, "learning_rate": 3.060608034877177e-10, "loss": 0.04674961790442467, "step": 7384 }, { "epoch": 0.9984030283314603, "grad_norm": 0.3183005452156067, "learning_rate": 2.638994898990221e-10, "loss": 0.053490906953811646, "step": 7385 }, { "epoch": 0.9985382217002257, "grad_norm": 0.5567049980163574, "learning_rate": 2.2486120135556398e-10, "loss": 0.060450583696365356, "step": 7386 }, { "epoch": 0.9986734150689909, "grad_norm": 0.5934447050094604, "learning_rate": 1.889459459841758e-10, "loss": 0.05848591774702072, "step": 7387 }, { "epoch": 0.9988086084377561, "grad_norm": 0.319583535194397, "learning_rate": 1.56153731263875e-10, "loss": 0.05173078551888466, "step": 7388 }, { "epoch": 0.9989438018065214, "grad_norm": 0.7217959761619568, "learning_rate": 1.2648456402086784e-10, "loss": 0.05946068465709686, "step": 7389 }, { "epoch": 0.9990789951752866, "grad_norm": 0.866469144821167, "learning_rate": 9.99384504318801e-11, "loss": 0.05586791783571243, "step": 7390 }, { "epoch": 0.9992141885440519, "grad_norm": 1.0748348236083984, "learning_rate": 7.651539602582247e-11, "loss": 0.05418063700199127, "step": 7391 }, { "epoch": 0.9993493819128172, "grad_norm": 0.4735819697380066, "learning_rate": 5.6215405678794464e-11, "loss": 0.056563325226306915, "step": 7392 }, { "epoch": 0.9994845752815824, "grad_norm": 0.43438920378685, "learning_rate": 3.9038483615749795e-11, "loss": 0.0597258135676384, "step": 7393 }, { "epoch": 0.9996197686503477, "grad_norm": 0.21488040685653687, "learning_rate": 2.4984633415492398e-11, "loss": 0.03498855233192444, "step": 7394 }, { "epoch": 0.999754962019113, "grad_norm": 0.5102145671844482, "learning_rate": 1.4053858004015041e-11, "loss": 0.05587794631719589, "step": 7395 }, { "epoch": 0.9998901553878782, "grad_norm": 0.25195667147636414, "learning_rate": 6.246159654499373e-12, "loss": 0.03818102926015854, "step": 7396 }, { "epoch": 1.0, "grad_norm": 0.4720384180545807, "learning_rate": 1.561539995642569e-12, "loss": 0.048416413366794586, "step": 7397 }, { "epoch": 1.0, "step": 7397, "total_flos": 9.247070602262269e+19, "train_loss": 0.06301417348261007, "train_runtime": 121181.5701, "train_samples_per_second": 15.626, "train_steps_per_second": 0.061 } ], "logging_steps": 1.0, "max_steps": 7397, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.247070602262269e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }