{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 8674, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00023057412958266084, "grad_norm": 0.5456158480642083, "learning_rate": 0.0, "loss": 1.2793785333633423, "step": 1 }, { "epoch": 0.0004611482591653217, "grad_norm": 0.5348414425588685, "learning_rate": 4.6082949308755755e-09, "loss": 1.2810249328613281, "step": 2 }, { "epoch": 0.0006917223887479825, "grad_norm": 0.5742665952103186, "learning_rate": 9.216589861751151e-09, "loss": 1.5180970430374146, "step": 3 }, { "epoch": 0.0009222965183306433, "grad_norm": 0.47570843593061296, "learning_rate": 1.3824884792626728e-08, "loss": 1.2771815061569214, "step": 4 }, { "epoch": 0.001152870647913304, "grad_norm": 0.6179854753010914, "learning_rate": 1.8433179723502302e-08, "loss": 1.6275714635849, "step": 5 }, { "epoch": 0.001383444777495965, "grad_norm": 0.5728287935763549, "learning_rate": 2.304147465437788e-08, "loss": 1.4852838516235352, "step": 6 }, { "epoch": 0.0016140189070786258, "grad_norm": 0.7402806033919309, "learning_rate": 2.7649769585253456e-08, "loss": 1.3845010995864868, "step": 7 }, { "epoch": 0.0018445930366612867, "grad_norm": 0.5357861516775319, "learning_rate": 3.225806451612903e-08, "loss": 1.2716574668884277, "step": 8 }, { "epoch": 0.0020751671662439476, "grad_norm": 0.49378309074438254, "learning_rate": 3.6866359447004604e-08, "loss": 1.4046194553375244, "step": 9 }, { "epoch": 0.002305741295826608, "grad_norm": 0.5231726157264511, "learning_rate": 4.1474654377880186e-08, "loss": 1.4988269805908203, "step": 10 }, { "epoch": 0.002536315425409269, "grad_norm": 0.5469518790093721, "learning_rate": 4.608294930875576e-08, "loss": 1.3523340225219727, "step": 11 }, { "epoch": 0.00276688955499193, "grad_norm": 0.5125117134786147, "learning_rate": 5.069124423963134e-08, "loss": 1.3664941787719727, "step": 12 }, { "epoch": 0.0029974636845745907, "grad_norm": 0.5526794406387441, "learning_rate": 5.529953917050691e-08, "loss": 1.4892609119415283, "step": 13 }, { "epoch": 0.0032280378141572516, "grad_norm": 0.5197262159341672, "learning_rate": 5.990783410138249e-08, "loss": 1.305836796760559, "step": 14 }, { "epoch": 0.0034586119437399125, "grad_norm": 0.5214120337499729, "learning_rate": 6.451612903225806e-08, "loss": 1.3458774089813232, "step": 15 }, { "epoch": 0.0036891860733225734, "grad_norm": 0.5249821302153419, "learning_rate": 6.912442396313364e-08, "loss": 1.4305222034454346, "step": 16 }, { "epoch": 0.003919760202905234, "grad_norm": 0.48597332722440695, "learning_rate": 7.373271889400921e-08, "loss": 1.4247705936431885, "step": 17 }, { "epoch": 0.004150334332487895, "grad_norm": 0.5492563451667527, "learning_rate": 7.834101382488478e-08, "loss": 1.4151098728179932, "step": 18 }, { "epoch": 0.004380908462070556, "grad_norm": 0.4931832122178826, "learning_rate": 8.294930875576037e-08, "loss": 1.4633708000183105, "step": 19 }, { "epoch": 0.004611482591653216, "grad_norm": 0.4601872454406169, "learning_rate": 8.755760368663594e-08, "loss": 1.2271082401275635, "step": 20 }, { "epoch": 0.004842056721235877, "grad_norm": 0.5482366075993729, "learning_rate": 9.216589861751152e-08, "loss": 1.493757724761963, "step": 21 }, { "epoch": 0.005072630850818538, "grad_norm": 0.5190439230451068, "learning_rate": 9.677419354838709e-08, "loss": 1.446916103363037, "step": 22 }, { "epoch": 0.005303204980401199, "grad_norm": 0.5010656217784003, "learning_rate": 1.0138248847926267e-07, "loss": 1.4575269222259521, "step": 23 }, { "epoch": 0.00553377910998386, "grad_norm": 0.5983934917725938, "learning_rate": 1.0599078341013824e-07, "loss": 1.5000505447387695, "step": 24 }, { "epoch": 0.005764353239566521, "grad_norm": 0.5264341016273323, "learning_rate": 1.1059907834101383e-07, "loss": 1.32895827293396, "step": 25 }, { "epoch": 0.005994927369149181, "grad_norm": 0.5507902323042685, "learning_rate": 1.152073732718894e-07, "loss": 1.479337215423584, "step": 26 }, { "epoch": 0.006225501498731842, "grad_norm": 0.4597707182389027, "learning_rate": 1.1981566820276498e-07, "loss": 1.3543293476104736, "step": 27 }, { "epoch": 0.006456075628314503, "grad_norm": 0.4984681813259071, "learning_rate": 1.2442396313364054e-07, "loss": 1.3075106143951416, "step": 28 }, { "epoch": 0.006686649757897164, "grad_norm": 0.540668752320374, "learning_rate": 1.2903225806451611e-07, "loss": 1.2077248096466064, "step": 29 }, { "epoch": 0.006917223887479825, "grad_norm": 0.5053904313535789, "learning_rate": 1.336405529953917e-07, "loss": 1.2841781377792358, "step": 30 }, { "epoch": 0.0071477980170624855, "grad_norm": 0.5007265235886551, "learning_rate": 1.3824884792626728e-07, "loss": 1.4022557735443115, "step": 31 }, { "epoch": 0.007378372146645147, "grad_norm": 0.5376464155945276, "learning_rate": 1.4285714285714285e-07, "loss": 1.4971141815185547, "step": 32 }, { "epoch": 0.007608946276227807, "grad_norm": 0.49485432736210644, "learning_rate": 1.4746543778801842e-07, "loss": 1.3699426651000977, "step": 33 }, { "epoch": 0.007839520405810468, "grad_norm": 0.602690054138726, "learning_rate": 1.52073732718894e-07, "loss": 1.466570258140564, "step": 34 }, { "epoch": 0.008070094535393129, "grad_norm": 0.544784030735669, "learning_rate": 1.5668202764976955e-07, "loss": 1.3031455278396606, "step": 35 }, { "epoch": 0.00830066866497579, "grad_norm": 0.5516628365932859, "learning_rate": 1.6129032258064515e-07, "loss": 1.3989369869232178, "step": 36 }, { "epoch": 0.00853124279455845, "grad_norm": 0.5375908894429152, "learning_rate": 1.6589861751152074e-07, "loss": 1.41139817237854, "step": 37 }, { "epoch": 0.008761816924141111, "grad_norm": 0.4923010186613349, "learning_rate": 1.705069124423963e-07, "loss": 1.305363655090332, "step": 38 }, { "epoch": 0.008992391053723773, "grad_norm": 0.5782996548067549, "learning_rate": 1.7511520737327188e-07, "loss": 1.3931915760040283, "step": 39 }, { "epoch": 0.009222965183306432, "grad_norm": 0.5425552369520273, "learning_rate": 1.7972350230414745e-07, "loss": 1.4728009700775146, "step": 40 }, { "epoch": 0.009453539312889093, "grad_norm": 0.5162050268750099, "learning_rate": 1.8433179723502305e-07, "loss": 1.4165544509887695, "step": 41 }, { "epoch": 0.009684113442471755, "grad_norm": 0.509079818266607, "learning_rate": 1.889400921658986e-07, "loss": 1.3693115711212158, "step": 42 }, { "epoch": 0.009914687572054416, "grad_norm": 0.5804116282906935, "learning_rate": 1.9354838709677418e-07, "loss": 1.468721866607666, "step": 43 }, { "epoch": 0.010145261701637076, "grad_norm": 0.5466645633601509, "learning_rate": 1.9815668202764975e-07, "loss": 1.4732704162597656, "step": 44 }, { "epoch": 0.010375835831219737, "grad_norm": 0.4534942899185725, "learning_rate": 2.0276497695852535e-07, "loss": 1.2579209804534912, "step": 45 }, { "epoch": 0.010606409960802398, "grad_norm": 0.4766380716605293, "learning_rate": 2.073732718894009e-07, "loss": 1.3587429523468018, "step": 46 }, { "epoch": 0.010836984090385058, "grad_norm": 0.5409254453286721, "learning_rate": 2.1198156682027649e-07, "loss": 1.368800401687622, "step": 47 }, { "epoch": 0.01106755821996772, "grad_norm": 0.5103994243466702, "learning_rate": 2.1658986175115208e-07, "loss": 1.2960132360458374, "step": 48 }, { "epoch": 0.01129813234955038, "grad_norm": 0.47493679434319974, "learning_rate": 2.2119815668202765e-07, "loss": 1.3035235404968262, "step": 49 }, { "epoch": 0.011528706479133042, "grad_norm": 0.5271868916321076, "learning_rate": 2.2580645161290322e-07, "loss": 1.5074443817138672, "step": 50 }, { "epoch": 0.011759280608715702, "grad_norm": 0.5381217045242119, "learning_rate": 2.304147465437788e-07, "loss": 1.4689760208129883, "step": 51 }, { "epoch": 0.011989854738298363, "grad_norm": 0.4629483381608022, "learning_rate": 2.3502304147465438e-07, "loss": 1.3542251586914062, "step": 52 }, { "epoch": 0.012220428867881024, "grad_norm": 0.4592532760230554, "learning_rate": 2.3963133640552995e-07, "loss": 1.3521728515625, "step": 53 }, { "epoch": 0.012451002997463684, "grad_norm": 0.5030837073491258, "learning_rate": 2.442396313364055e-07, "loss": 1.3577494621276855, "step": 54 }, { "epoch": 0.012681577127046345, "grad_norm": 0.5438911836333451, "learning_rate": 2.488479262672811e-07, "loss": 1.459476351737976, "step": 55 }, { "epoch": 0.012912151256629006, "grad_norm": 0.52516269169267, "learning_rate": 2.534562211981567e-07, "loss": 1.484410047531128, "step": 56 }, { "epoch": 0.013142725386211668, "grad_norm": 0.5188914022486312, "learning_rate": 2.5806451612903223e-07, "loss": 1.3589065074920654, "step": 57 }, { "epoch": 0.013373299515794327, "grad_norm": 0.5619229477118247, "learning_rate": 2.6267281105990777e-07, "loss": 1.3558262586593628, "step": 58 }, { "epoch": 0.013603873645376989, "grad_norm": 0.5534574014271282, "learning_rate": 2.672811059907834e-07, "loss": 1.5165367126464844, "step": 59 }, { "epoch": 0.01383444777495965, "grad_norm": 0.47598313164662104, "learning_rate": 2.7188940092165896e-07, "loss": 1.3051776885986328, "step": 60 }, { "epoch": 0.01406502190454231, "grad_norm": 0.45011107968146047, "learning_rate": 2.7649769585253456e-07, "loss": 1.2916524410247803, "step": 61 }, { "epoch": 0.014295596034124971, "grad_norm": 0.513792634149487, "learning_rate": 2.8110599078341015e-07, "loss": 1.440261721611023, "step": 62 }, { "epoch": 0.014526170163707632, "grad_norm": 0.5424492375693261, "learning_rate": 2.857142857142857e-07, "loss": 1.3422625064849854, "step": 63 }, { "epoch": 0.014756744293290294, "grad_norm": 0.4598784526258713, "learning_rate": 2.903225806451613e-07, "loss": 1.374439001083374, "step": 64 }, { "epoch": 0.014987318422872953, "grad_norm": 0.5339252174305668, "learning_rate": 2.9493087557603683e-07, "loss": 1.4382294416427612, "step": 65 }, { "epoch": 0.015217892552455614, "grad_norm": 0.5302645203365586, "learning_rate": 2.9953917050691243e-07, "loss": 1.3971002101898193, "step": 66 }, { "epoch": 0.015448466682038276, "grad_norm": 0.5711144083332746, "learning_rate": 3.04147465437788e-07, "loss": 1.376272439956665, "step": 67 }, { "epoch": 0.015679040811620935, "grad_norm": 0.5016109357973636, "learning_rate": 3.0875576036866356e-07, "loss": 1.3135097026824951, "step": 68 }, { "epoch": 0.015909614941203597, "grad_norm": 0.5041882505031982, "learning_rate": 3.133640552995391e-07, "loss": 1.2688875198364258, "step": 69 }, { "epoch": 0.016140189070786258, "grad_norm": 0.544108037399583, "learning_rate": 3.1797235023041476e-07, "loss": 1.4380691051483154, "step": 70 }, { "epoch": 0.01637076320036892, "grad_norm": 0.5634345795303867, "learning_rate": 3.225806451612903e-07, "loss": 1.319260835647583, "step": 71 }, { "epoch": 0.01660133732995158, "grad_norm": 0.5352869486400713, "learning_rate": 3.271889400921659e-07, "loss": 1.4083738327026367, "step": 72 }, { "epoch": 0.01683191145953424, "grad_norm": 0.5524091199068598, "learning_rate": 3.317972350230415e-07, "loss": 1.4904775619506836, "step": 73 }, { "epoch": 0.0170624855891169, "grad_norm": 0.5488563092854116, "learning_rate": 3.3640552995391703e-07, "loss": 1.4534616470336914, "step": 74 }, { "epoch": 0.01729305971869956, "grad_norm": 0.621117268365485, "learning_rate": 3.410138248847926e-07, "loss": 1.6545689105987549, "step": 75 }, { "epoch": 0.017523633848282223, "grad_norm": 0.4834761822798673, "learning_rate": 3.4562211981566817e-07, "loss": 1.2267192602157593, "step": 76 }, { "epoch": 0.017754207977864884, "grad_norm": 0.5801091305703396, "learning_rate": 3.5023041474654376e-07, "loss": 1.4207227230072021, "step": 77 }, { "epoch": 0.017984782107447545, "grad_norm": 0.5253671028782199, "learning_rate": 3.5483870967741936e-07, "loss": 1.4952092170715332, "step": 78 }, { "epoch": 0.018215356237030206, "grad_norm": 0.4832223487637491, "learning_rate": 3.594470046082949e-07, "loss": 1.2932121753692627, "step": 79 }, { "epoch": 0.018445930366612864, "grad_norm": 0.5623376259320272, "learning_rate": 3.6405529953917044e-07, "loss": 1.3855851888656616, "step": 80 }, { "epoch": 0.018676504496195526, "grad_norm": 0.45682252121341854, "learning_rate": 3.686635944700461e-07, "loss": 1.3645650148391724, "step": 81 }, { "epoch": 0.018907078625778187, "grad_norm": 0.49579660369860507, "learning_rate": 3.7327188940092163e-07, "loss": 1.322283387184143, "step": 82 }, { "epoch": 0.01913765275536085, "grad_norm": 0.5177315365924456, "learning_rate": 3.778801843317972e-07, "loss": 1.3363629579544067, "step": 83 }, { "epoch": 0.01936822688494351, "grad_norm": 0.616201260540867, "learning_rate": 3.824884792626728e-07, "loss": 1.553279161453247, "step": 84 }, { "epoch": 0.01959880101452617, "grad_norm": 0.5198473540371843, "learning_rate": 3.8709677419354837e-07, "loss": 1.4434814453125, "step": 85 }, { "epoch": 0.019829375144108832, "grad_norm": 0.5923570018189629, "learning_rate": 3.9170506912442396e-07, "loss": 1.5134285688400269, "step": 86 }, { "epoch": 0.02005994927369149, "grad_norm": 0.5850924486743854, "learning_rate": 3.963133640552995e-07, "loss": 1.4244651794433594, "step": 87 }, { "epoch": 0.02029052340327415, "grad_norm": 0.560105193358992, "learning_rate": 4.009216589861751e-07, "loss": 1.4571855068206787, "step": 88 }, { "epoch": 0.020521097532856813, "grad_norm": 0.48108556089196525, "learning_rate": 4.055299539170507e-07, "loss": 1.2940685749053955, "step": 89 }, { "epoch": 0.020751671662439474, "grad_norm": 0.5203979535892653, "learning_rate": 4.1013824884792624e-07, "loss": 1.3537572622299194, "step": 90 }, { "epoch": 0.020982245792022135, "grad_norm": 0.5791117780548783, "learning_rate": 4.147465437788018e-07, "loss": 1.524500846862793, "step": 91 }, { "epoch": 0.021212819921604797, "grad_norm": 0.4890632694429427, "learning_rate": 4.1935483870967743e-07, "loss": 1.4414368867874146, "step": 92 }, { "epoch": 0.021443394051187458, "grad_norm": 0.49954451696473423, "learning_rate": 4.2396313364055297e-07, "loss": 1.284010887145996, "step": 93 }, { "epoch": 0.021673968180770116, "grad_norm": 0.6088073736973271, "learning_rate": 4.285714285714285e-07, "loss": 1.5901892185211182, "step": 94 }, { "epoch": 0.021904542310352777, "grad_norm": 0.5856129890195899, "learning_rate": 4.3317972350230416e-07, "loss": 1.4408211708068848, "step": 95 }, { "epoch": 0.02213511643993544, "grad_norm": 0.49571353442310634, "learning_rate": 4.377880184331797e-07, "loss": 1.2293554544448853, "step": 96 }, { "epoch": 0.0223656905695181, "grad_norm": 0.570508723127356, "learning_rate": 4.423963133640553e-07, "loss": 1.4144377708435059, "step": 97 }, { "epoch": 0.02259626469910076, "grad_norm": 0.5952794755762669, "learning_rate": 4.4700460829493084e-07, "loss": 1.359034776687622, "step": 98 }, { "epoch": 0.022826838828683423, "grad_norm": 0.5878914385748992, "learning_rate": 4.5161290322580644e-07, "loss": 1.3299517631530762, "step": 99 }, { "epoch": 0.023057412958266084, "grad_norm": 0.5039341997298462, "learning_rate": 4.5622119815668203e-07, "loss": 1.3072423934936523, "step": 100 }, { "epoch": 0.023287987087848742, "grad_norm": 0.6205508042108064, "learning_rate": 4.608294930875576e-07, "loss": 1.5683096647262573, "step": 101 }, { "epoch": 0.023518561217431403, "grad_norm": 0.6300075069307655, "learning_rate": 4.654377880184331e-07, "loss": 1.6294015645980835, "step": 102 }, { "epoch": 0.023749135347014064, "grad_norm": 0.5245849244619794, "learning_rate": 4.7004608294930877e-07, "loss": 1.424511194229126, "step": 103 }, { "epoch": 0.023979709476596726, "grad_norm": 0.5471205081131801, "learning_rate": 4.746543778801843e-07, "loss": 1.4169164896011353, "step": 104 }, { "epoch": 0.024210283606179387, "grad_norm": 0.5854813174619509, "learning_rate": 4.792626728110599e-07, "loss": 1.3933480978012085, "step": 105 }, { "epoch": 0.02444085773576205, "grad_norm": 0.6166413586526565, "learning_rate": 4.838709677419355e-07, "loss": 1.488750696182251, "step": 106 }, { "epoch": 0.02467143186534471, "grad_norm": 0.6052025315612124, "learning_rate": 4.88479262672811e-07, "loss": 1.4852150678634644, "step": 107 }, { "epoch": 0.024902005994927368, "grad_norm": 0.5750922845804657, "learning_rate": 4.930875576036866e-07, "loss": 1.4256765842437744, "step": 108 }, { "epoch": 0.02513258012451003, "grad_norm": 0.5231547313189364, "learning_rate": 4.976958525345622e-07, "loss": 1.3063642978668213, "step": 109 }, { "epoch": 0.02536315425409269, "grad_norm": 0.5734263022927267, "learning_rate": 5.023041474654378e-07, "loss": 1.549802303314209, "step": 110 }, { "epoch": 0.02559372838367535, "grad_norm": 0.5041709928346361, "learning_rate": 5.069124423963134e-07, "loss": 1.301950454711914, "step": 111 }, { "epoch": 0.025824302513258013, "grad_norm": 0.5567596794280206, "learning_rate": 5.11520737327189e-07, "loss": 1.3025325536727905, "step": 112 }, { "epoch": 0.026054876642840674, "grad_norm": 0.5369405016436734, "learning_rate": 5.161290322580645e-07, "loss": 1.40749192237854, "step": 113 }, { "epoch": 0.026285450772423335, "grad_norm": 0.5208396194792263, "learning_rate": 5.2073732718894e-07, "loss": 1.3216793537139893, "step": 114 }, { "epoch": 0.026516024902005993, "grad_norm": 0.5052494958784187, "learning_rate": 5.253456221198155e-07, "loss": 1.3189308643341064, "step": 115 }, { "epoch": 0.026746599031588655, "grad_norm": 0.5632602249643789, "learning_rate": 5.299539170506912e-07, "loss": 1.430384635925293, "step": 116 }, { "epoch": 0.026977173161171316, "grad_norm": 0.5516062364182813, "learning_rate": 5.345622119815668e-07, "loss": 1.4081478118896484, "step": 117 }, { "epoch": 0.027207747290753977, "grad_norm": 0.6385508559977366, "learning_rate": 5.391705069124423e-07, "loss": 1.434388518333435, "step": 118 }, { "epoch": 0.02743832142033664, "grad_norm": 0.6138756203209041, "learning_rate": 5.437788018433179e-07, "loss": 1.4139282703399658, "step": 119 }, { "epoch": 0.0276688955499193, "grad_norm": 0.5683069275087388, "learning_rate": 5.483870967741935e-07, "loss": 1.4511487483978271, "step": 120 }, { "epoch": 0.02789946967950196, "grad_norm": 0.6423215590072974, "learning_rate": 5.529953917050691e-07, "loss": 1.5713481903076172, "step": 121 }, { "epoch": 0.02813004380908462, "grad_norm": 0.5705917499340588, "learning_rate": 5.576036866359447e-07, "loss": 1.4315730333328247, "step": 122 }, { "epoch": 0.02836061793866728, "grad_norm": 0.5316898536625556, "learning_rate": 5.622119815668203e-07, "loss": 1.3283708095550537, "step": 123 }, { "epoch": 0.028591192068249942, "grad_norm": 0.6184222176453401, "learning_rate": 5.668202764976958e-07, "loss": 1.4329016208648682, "step": 124 }, { "epoch": 0.028821766197832603, "grad_norm": 0.5872933055537319, "learning_rate": 5.714285714285714e-07, "loss": 1.444648265838623, "step": 125 }, { "epoch": 0.029052340327415264, "grad_norm": 0.5205647887621043, "learning_rate": 5.760368663594469e-07, "loss": 1.3584785461425781, "step": 126 }, { "epoch": 0.029282914456997926, "grad_norm": 0.5687232002808722, "learning_rate": 5.806451612903226e-07, "loss": 1.2815918922424316, "step": 127 }, { "epoch": 0.029513488586580587, "grad_norm": 0.5252774303203537, "learning_rate": 5.852534562211982e-07, "loss": 1.3332037925720215, "step": 128 }, { "epoch": 0.029744062716163245, "grad_norm": 0.5694649769044726, "learning_rate": 5.898617511520737e-07, "loss": 1.4522390365600586, "step": 129 }, { "epoch": 0.029974636845745906, "grad_norm": 0.5607244925516301, "learning_rate": 5.944700460829493e-07, "loss": 1.4362024068832397, "step": 130 }, { "epoch": 0.030205210975328568, "grad_norm": 0.5432906779366606, "learning_rate": 5.990783410138249e-07, "loss": 1.3271276950836182, "step": 131 }, { "epoch": 0.03043578510491123, "grad_norm": 0.6175056690394787, "learning_rate": 6.036866359447004e-07, "loss": 1.5936369895935059, "step": 132 }, { "epoch": 0.03066635923449389, "grad_norm": 0.5887629397700789, "learning_rate": 6.08294930875576e-07, "loss": 1.4786381721496582, "step": 133 }, { "epoch": 0.03089693336407655, "grad_norm": 0.5490770556101789, "learning_rate": 6.129032258064516e-07, "loss": 1.3499064445495605, "step": 134 }, { "epoch": 0.031127507493659213, "grad_norm": 0.583021664079577, "learning_rate": 6.175115207373271e-07, "loss": 1.4434795379638672, "step": 135 }, { "epoch": 0.03135808162324187, "grad_norm": 0.6037371306112707, "learning_rate": 6.221198156682027e-07, "loss": 1.4064602851867676, "step": 136 }, { "epoch": 0.03158865575282453, "grad_norm": 0.5005511365111003, "learning_rate": 6.267281105990782e-07, "loss": 1.3325507640838623, "step": 137 }, { "epoch": 0.03181922988240719, "grad_norm": 0.516984621863849, "learning_rate": 6.313364055299539e-07, "loss": 1.2584879398345947, "step": 138 }, { "epoch": 0.032049804011989855, "grad_norm": 0.5401703370709408, "learning_rate": 6.359447004608295e-07, "loss": 1.3754582405090332, "step": 139 }, { "epoch": 0.032280378141572516, "grad_norm": 0.5773695778497429, "learning_rate": 6.40552995391705e-07, "loss": 1.2700412273406982, "step": 140 }, { "epoch": 0.03251095227115518, "grad_norm": 0.580045410672373, "learning_rate": 6.451612903225806e-07, "loss": 1.395858645439148, "step": 141 }, { "epoch": 0.03274152640073784, "grad_norm": 0.6146943532430481, "learning_rate": 6.497695852534562e-07, "loss": 1.402890682220459, "step": 142 }, { "epoch": 0.0329721005303205, "grad_norm": 0.5736524878471048, "learning_rate": 6.543778801843318e-07, "loss": 1.5405397415161133, "step": 143 }, { "epoch": 0.03320267465990316, "grad_norm": 0.5418174501474893, "learning_rate": 6.589861751152074e-07, "loss": 1.2394921779632568, "step": 144 }, { "epoch": 0.03343324878948582, "grad_norm": 0.6276742940359161, "learning_rate": 6.63594470046083e-07, "loss": 1.453255295753479, "step": 145 }, { "epoch": 0.03366382291906848, "grad_norm": 0.6191808042065741, "learning_rate": 6.682027649769585e-07, "loss": 1.3661112785339355, "step": 146 }, { "epoch": 0.03389439704865114, "grad_norm": 0.5260230971069313, "learning_rate": 6.728110599078341e-07, "loss": 1.2952282428741455, "step": 147 }, { "epoch": 0.0341249711782338, "grad_norm": 0.6693704726704671, "learning_rate": 6.774193548387096e-07, "loss": 1.396565318107605, "step": 148 }, { "epoch": 0.03435554530781646, "grad_norm": 0.5881355966882998, "learning_rate": 6.820276497695853e-07, "loss": 1.3207082748413086, "step": 149 }, { "epoch": 0.03458611943739912, "grad_norm": 0.5727010424261832, "learning_rate": 6.866359447004608e-07, "loss": 1.4085125923156738, "step": 150 }, { "epoch": 0.034816693566981784, "grad_norm": 0.6667208730018341, "learning_rate": 6.912442396313363e-07, "loss": 1.5698528289794922, "step": 151 }, { "epoch": 0.035047267696564445, "grad_norm": 0.5847511619477141, "learning_rate": 6.958525345622119e-07, "loss": 1.4091004133224487, "step": 152 }, { "epoch": 0.035277841826147106, "grad_norm": 0.5143540253572731, "learning_rate": 7.004608294930875e-07, "loss": 1.2392504215240479, "step": 153 }, { "epoch": 0.03550841595572977, "grad_norm": 0.6061996419355483, "learning_rate": 7.05069124423963e-07, "loss": 1.3355891704559326, "step": 154 }, { "epoch": 0.03573899008531243, "grad_norm": 0.5654677060773288, "learning_rate": 7.096774193548387e-07, "loss": 1.330599308013916, "step": 155 }, { "epoch": 0.03596956421489509, "grad_norm": 0.5625277163359125, "learning_rate": 7.142857142857143e-07, "loss": 1.344653844833374, "step": 156 }, { "epoch": 0.03620013834447775, "grad_norm": 0.5693935421186345, "learning_rate": 7.188940092165898e-07, "loss": 1.341560959815979, "step": 157 }, { "epoch": 0.03643071247406041, "grad_norm": 0.5761507210889462, "learning_rate": 7.235023041474654e-07, "loss": 1.2242077589035034, "step": 158 }, { "epoch": 0.036661286603643074, "grad_norm": 0.61477283253827, "learning_rate": 7.281105990783409e-07, "loss": 1.2858202457427979, "step": 159 }, { "epoch": 0.03689186073322573, "grad_norm": 0.6410836439864531, "learning_rate": 7.327188940092166e-07, "loss": 1.479524850845337, "step": 160 }, { "epoch": 0.03712243486280839, "grad_norm": 0.5918139936623208, "learning_rate": 7.373271889400922e-07, "loss": 1.43915855884552, "step": 161 }, { "epoch": 0.03735300899239105, "grad_norm": 0.6478814183526712, "learning_rate": 7.419354838709677e-07, "loss": 1.3939034938812256, "step": 162 }, { "epoch": 0.03758358312197371, "grad_norm": 0.6065250961726126, "learning_rate": 7.465437788018433e-07, "loss": 1.2733443975448608, "step": 163 }, { "epoch": 0.037814157251556374, "grad_norm": 0.5670760124517911, "learning_rate": 7.511520737327189e-07, "loss": 1.3436474800109863, "step": 164 }, { "epoch": 0.038044731381139035, "grad_norm": 0.622037546591312, "learning_rate": 7.557603686635944e-07, "loss": 1.4250465631484985, "step": 165 }, { "epoch": 0.0382753055107217, "grad_norm": 0.607298640184171, "learning_rate": 7.603686635944701e-07, "loss": 1.4244422912597656, "step": 166 }, { "epoch": 0.03850587964030436, "grad_norm": 0.6986289389542176, "learning_rate": 7.649769585253457e-07, "loss": 1.5487544536590576, "step": 167 }, { "epoch": 0.03873645376988702, "grad_norm": 0.5793907792629099, "learning_rate": 7.695852534562211e-07, "loss": 1.3282281160354614, "step": 168 }, { "epoch": 0.03896702789946968, "grad_norm": 0.5428953608010194, "learning_rate": 7.741935483870967e-07, "loss": 1.2823774814605713, "step": 169 }, { "epoch": 0.03919760202905234, "grad_norm": 0.5889853233557574, "learning_rate": 7.788018433179722e-07, "loss": 1.2402329444885254, "step": 170 }, { "epoch": 0.039428176158635, "grad_norm": 0.6219537569729359, "learning_rate": 7.834101382488479e-07, "loss": 1.3755587339401245, "step": 171 }, { "epoch": 0.039658750288217665, "grad_norm": 0.5509851701904478, "learning_rate": 7.880184331797235e-07, "loss": 1.3403921127319336, "step": 172 }, { "epoch": 0.039889324417800326, "grad_norm": 0.5971512014225002, "learning_rate": 7.92626728110599e-07, "loss": 1.3742129802703857, "step": 173 }, { "epoch": 0.04011989854738298, "grad_norm": 0.7068161569826883, "learning_rate": 7.972350230414746e-07, "loss": 1.6444599628448486, "step": 174 }, { "epoch": 0.04035047267696564, "grad_norm": 0.6019721571978455, "learning_rate": 8.018433179723502e-07, "loss": 1.3891929388046265, "step": 175 }, { "epoch": 0.0405810468065483, "grad_norm": 0.5520157347061957, "learning_rate": 8.064516129032257e-07, "loss": 1.2279409170150757, "step": 176 }, { "epoch": 0.040811620936130964, "grad_norm": 0.6346481492269727, "learning_rate": 8.110599078341014e-07, "loss": 1.4576997756958008, "step": 177 }, { "epoch": 0.041042195065713626, "grad_norm": 0.612489332435889, "learning_rate": 8.15668202764977e-07, "loss": 1.3585199117660522, "step": 178 }, { "epoch": 0.04127276919529629, "grad_norm": 0.5908354773562909, "learning_rate": 8.202764976958525e-07, "loss": 1.3056905269622803, "step": 179 }, { "epoch": 0.04150334332487895, "grad_norm": 0.5749600887070265, "learning_rate": 8.248847926267281e-07, "loss": 1.3029698133468628, "step": 180 }, { "epoch": 0.04173391745446161, "grad_norm": 0.6598409427706357, "learning_rate": 8.294930875576036e-07, "loss": 1.4368736743927002, "step": 181 }, { "epoch": 0.04196449158404427, "grad_norm": 0.5781034108869284, "learning_rate": 8.341013824884793e-07, "loss": 1.3243422508239746, "step": 182 }, { "epoch": 0.04219506571362693, "grad_norm": 0.5206395827762466, "learning_rate": 8.387096774193549e-07, "loss": 1.232081413269043, "step": 183 }, { "epoch": 0.042425639843209594, "grad_norm": 0.656527379150416, "learning_rate": 8.433179723502303e-07, "loss": 1.4601390361785889, "step": 184 }, { "epoch": 0.042656213972792255, "grad_norm": 0.7159376690159417, "learning_rate": 8.479262672811059e-07, "loss": 1.3778860569000244, "step": 185 }, { "epoch": 0.042886788102374916, "grad_norm": 0.590059263278645, "learning_rate": 8.525345622119815e-07, "loss": 1.3235092163085938, "step": 186 }, { "epoch": 0.04311736223195758, "grad_norm": 0.6886704124574455, "learning_rate": 8.57142857142857e-07, "loss": 1.4480581283569336, "step": 187 }, { "epoch": 0.04334793636154023, "grad_norm": 0.6346582437238362, "learning_rate": 8.617511520737327e-07, "loss": 1.4530816078186035, "step": 188 }, { "epoch": 0.04357851049112289, "grad_norm": 0.6767670706852607, "learning_rate": 8.663594470046083e-07, "loss": 1.4447407722473145, "step": 189 }, { "epoch": 0.043809084620705555, "grad_norm": 0.6049885392306779, "learning_rate": 8.709677419354838e-07, "loss": 1.3610244989395142, "step": 190 }, { "epoch": 0.044039658750288216, "grad_norm": 0.6415008170468611, "learning_rate": 8.755760368663594e-07, "loss": 1.4084277153015137, "step": 191 }, { "epoch": 0.04427023287987088, "grad_norm": 0.579530872526008, "learning_rate": 8.801843317972349e-07, "loss": 1.3652758598327637, "step": 192 }, { "epoch": 0.04450080700945354, "grad_norm": 0.7106489880805067, "learning_rate": 8.847926267281106e-07, "loss": 1.4791496992111206, "step": 193 }, { "epoch": 0.0447313811390362, "grad_norm": 0.6211187249917176, "learning_rate": 8.894009216589862e-07, "loss": 1.3958008289337158, "step": 194 }, { "epoch": 0.04496195526861886, "grad_norm": 0.700016972508283, "learning_rate": 8.940092165898617e-07, "loss": 1.4134410619735718, "step": 195 }, { "epoch": 0.04519252939820152, "grad_norm": 0.6911089974612981, "learning_rate": 8.986175115207373e-07, "loss": 1.4062776565551758, "step": 196 }, { "epoch": 0.045423103527784184, "grad_norm": 0.6823334536756955, "learning_rate": 9.032258064516129e-07, "loss": 1.375224232673645, "step": 197 }, { "epoch": 0.045653677657366845, "grad_norm": 0.6003343488972004, "learning_rate": 9.078341013824884e-07, "loss": 1.2440606355667114, "step": 198 }, { "epoch": 0.045884251786949506, "grad_norm": 0.6737684280449967, "learning_rate": 9.124423963133641e-07, "loss": 1.4068349599838257, "step": 199 }, { "epoch": 0.04611482591653217, "grad_norm": 0.6181499859340271, "learning_rate": 9.170506912442397e-07, "loss": 1.3797581195831299, "step": 200 }, { "epoch": 0.04634540004611483, "grad_norm": 0.6445170966825345, "learning_rate": 9.216589861751152e-07, "loss": 1.4441678524017334, "step": 201 }, { "epoch": 0.046575974175697483, "grad_norm": 0.6677276378953197, "learning_rate": 9.262672811059907e-07, "loss": 1.4727370738983154, "step": 202 }, { "epoch": 0.046806548305280145, "grad_norm": 0.7032332117559357, "learning_rate": 9.308755760368662e-07, "loss": 1.448495864868164, "step": 203 }, { "epoch": 0.047037122434862806, "grad_norm": 0.674429398641426, "learning_rate": 9.354838709677418e-07, "loss": 1.3727293014526367, "step": 204 }, { "epoch": 0.04726769656444547, "grad_norm": 0.6701259318687961, "learning_rate": 9.400921658986175e-07, "loss": 1.4234352111816406, "step": 205 }, { "epoch": 0.04749827069402813, "grad_norm": 0.5974678653003657, "learning_rate": 9.44700460829493e-07, "loss": 1.2407056093215942, "step": 206 }, { "epoch": 0.04772884482361079, "grad_norm": 0.672276356974357, "learning_rate": 9.493087557603686e-07, "loss": 1.3502311706542969, "step": 207 }, { "epoch": 0.04795941895319345, "grad_norm": 0.7465400676066979, "learning_rate": 9.539170506912442e-07, "loss": 1.4618254899978638, "step": 208 }, { "epoch": 0.04818999308277611, "grad_norm": 0.681303163705478, "learning_rate": 9.585253456221198e-07, "loss": 1.3624317646026611, "step": 209 }, { "epoch": 0.048420567212358774, "grad_norm": 0.7608712138693399, "learning_rate": 9.631336405529954e-07, "loss": 1.512046456336975, "step": 210 }, { "epoch": 0.048651141341941435, "grad_norm": 0.6018077766578277, "learning_rate": 9.67741935483871e-07, "loss": 1.2896164655685425, "step": 211 }, { "epoch": 0.0488817154715241, "grad_norm": 0.7063578249182565, "learning_rate": 9.723502304147466e-07, "loss": 1.5507850646972656, "step": 212 }, { "epoch": 0.04911228960110676, "grad_norm": 0.7081498572564182, "learning_rate": 9.76958525345622e-07, "loss": 1.425408124923706, "step": 213 }, { "epoch": 0.04934286373068942, "grad_norm": 0.7025877080602252, "learning_rate": 9.815668202764976e-07, "loss": 1.347771406173706, "step": 214 }, { "epoch": 0.04957343786027208, "grad_norm": 0.7201983919068122, "learning_rate": 9.861751152073732e-07, "loss": 1.4044904708862305, "step": 215 }, { "epoch": 0.049804011989854735, "grad_norm": 0.7045020078596302, "learning_rate": 9.907834101382488e-07, "loss": 1.3507332801818848, "step": 216 }, { "epoch": 0.050034586119437396, "grad_norm": 0.6820424993070572, "learning_rate": 9.953917050691244e-07, "loss": 1.3022946119308472, "step": 217 }, { "epoch": 0.05026516024902006, "grad_norm": 0.6561516180690095, "learning_rate": 1e-06, "loss": 1.284754991531372, "step": 218 }, { "epoch": 0.05049573437860272, "grad_norm": 0.6003085662526402, "learning_rate": 1.0046082949308756e-06, "loss": 1.2985923290252686, "step": 219 }, { "epoch": 0.05072630850818538, "grad_norm": 0.6214608767923379, "learning_rate": 1.0092165898617511e-06, "loss": 1.3855717182159424, "step": 220 }, { "epoch": 0.05095688263776804, "grad_norm": 0.675694738994849, "learning_rate": 1.0138248847926267e-06, "loss": 1.357919692993164, "step": 221 }, { "epoch": 0.0511874567673507, "grad_norm": 0.6736529895786637, "learning_rate": 1.0184331797235021e-06, "loss": 1.2818949222564697, "step": 222 }, { "epoch": 0.051418030896933364, "grad_norm": 0.6226203332882617, "learning_rate": 1.023041474654378e-06, "loss": 1.2488511800765991, "step": 223 }, { "epoch": 0.051648605026516026, "grad_norm": 0.7420146271711324, "learning_rate": 1.0276497695852535e-06, "loss": 1.3824148178100586, "step": 224 }, { "epoch": 0.05187917915609869, "grad_norm": 0.6473939851836901, "learning_rate": 1.032258064516129e-06, "loss": 1.3114633560180664, "step": 225 }, { "epoch": 0.05210975328568135, "grad_norm": 0.6372141360329365, "learning_rate": 1.0368663594470047e-06, "loss": 1.272273063659668, "step": 226 }, { "epoch": 0.05234032741526401, "grad_norm": 0.8216490037105428, "learning_rate": 1.04147465437788e-06, "loss": 1.5072649717330933, "step": 227 }, { "epoch": 0.05257090154484667, "grad_norm": 0.7183581578734374, "learning_rate": 1.0460829493087557e-06, "loss": 1.4087142944335938, "step": 228 }, { "epoch": 0.05280147567442933, "grad_norm": 0.8332625481322393, "learning_rate": 1.050691244239631e-06, "loss": 1.4866605997085571, "step": 229 }, { "epoch": 0.05303204980401199, "grad_norm": 0.6315632875144884, "learning_rate": 1.0552995391705069e-06, "loss": 1.3377184867858887, "step": 230 }, { "epoch": 0.05326262393359465, "grad_norm": 0.6695801561741619, "learning_rate": 1.0599078341013825e-06, "loss": 1.4009103775024414, "step": 231 }, { "epoch": 0.05349319806317731, "grad_norm": 0.7832755910275336, "learning_rate": 1.0645161290322579e-06, "loss": 1.4878556728363037, "step": 232 }, { "epoch": 0.05372377219275997, "grad_norm": 0.7218421394327601, "learning_rate": 1.0691244239631337e-06, "loss": 1.4002021551132202, "step": 233 }, { "epoch": 0.05395434632234263, "grad_norm": 0.6918832056192313, "learning_rate": 1.073732718894009e-06, "loss": 1.337146520614624, "step": 234 }, { "epoch": 0.05418492045192529, "grad_norm": 0.7101215642172168, "learning_rate": 1.0783410138248847e-06, "loss": 1.4084792137145996, "step": 235 }, { "epoch": 0.054415494581507955, "grad_norm": 0.8413614642264606, "learning_rate": 1.0829493087557605e-06, "loss": 1.4131449460983276, "step": 236 }, { "epoch": 0.054646068711090616, "grad_norm": 0.6587637953772119, "learning_rate": 1.0875576036866358e-06, "loss": 1.1869292259216309, "step": 237 }, { "epoch": 0.05487664284067328, "grad_norm": 0.7608337119634553, "learning_rate": 1.0921658986175114e-06, "loss": 1.3970961570739746, "step": 238 }, { "epoch": 0.05510721697025594, "grad_norm": 0.7677503323555195, "learning_rate": 1.096774193548387e-06, "loss": 1.2682442665100098, "step": 239 }, { "epoch": 0.0553377910998386, "grad_norm": 0.6546621813731868, "learning_rate": 1.1013824884792626e-06, "loss": 1.2983934879302979, "step": 240 }, { "epoch": 0.05556836522942126, "grad_norm": 0.7451544478647047, "learning_rate": 1.1059907834101382e-06, "loss": 1.3980869054794312, "step": 241 }, { "epoch": 0.05579893935900392, "grad_norm": 0.6116475273591584, "learning_rate": 1.1105990783410138e-06, "loss": 1.3068631887435913, "step": 242 }, { "epoch": 0.056029513488586584, "grad_norm": 0.7974654782353883, "learning_rate": 1.1152073732718894e-06, "loss": 1.5353353023529053, "step": 243 }, { "epoch": 0.05626008761816924, "grad_norm": 0.663054900024182, "learning_rate": 1.1198156682027648e-06, "loss": 1.290163278579712, "step": 244 }, { "epoch": 0.0564906617477519, "grad_norm": 0.6761997400626832, "learning_rate": 1.1244239631336406e-06, "loss": 1.3671848773956299, "step": 245 }, { "epoch": 0.05672123587733456, "grad_norm": 0.6294209937786865, "learning_rate": 1.1290322580645162e-06, "loss": 1.3020408153533936, "step": 246 }, { "epoch": 0.05695181000691722, "grad_norm": 0.7207247726421506, "learning_rate": 1.1336405529953916e-06, "loss": 1.3159775733947754, "step": 247 }, { "epoch": 0.057182384136499884, "grad_norm": 0.6708051542823367, "learning_rate": 1.1382488479262674e-06, "loss": 1.3163995742797852, "step": 248 }, { "epoch": 0.057412958266082545, "grad_norm": 0.8019994049858626, "learning_rate": 1.1428571428571428e-06, "loss": 1.5215930938720703, "step": 249 }, { "epoch": 0.057643532395665206, "grad_norm": 0.6559479072990889, "learning_rate": 1.1474654377880184e-06, "loss": 1.2870161533355713, "step": 250 }, { "epoch": 0.05787410652524787, "grad_norm": 0.7147869966218979, "learning_rate": 1.1520737327188938e-06, "loss": 1.2624198198318481, "step": 251 }, { "epoch": 0.05810468065483053, "grad_norm": 0.7319832858668294, "learning_rate": 1.1566820276497696e-06, "loss": 1.2778981924057007, "step": 252 }, { "epoch": 0.05833525478441319, "grad_norm": 0.6564800467165074, "learning_rate": 1.1612903225806452e-06, "loss": 1.1934442520141602, "step": 253 }, { "epoch": 0.05856582891399585, "grad_norm": 0.7291335446235057, "learning_rate": 1.1658986175115205e-06, "loss": 1.3840088844299316, "step": 254 }, { "epoch": 0.05879640304357851, "grad_norm": 0.7017610521536986, "learning_rate": 1.1705069124423963e-06, "loss": 1.373002290725708, "step": 255 }, { "epoch": 0.059026977173161174, "grad_norm": 0.6853330554611681, "learning_rate": 1.1751152073732717e-06, "loss": 1.3614685535430908, "step": 256 }, { "epoch": 0.059257551302743836, "grad_norm": 0.7170055632885292, "learning_rate": 1.1797235023041473e-06, "loss": 1.3525335788726807, "step": 257 }, { "epoch": 0.05948812543232649, "grad_norm": 0.7471586447698318, "learning_rate": 1.1843317972350231e-06, "loss": 1.3806469440460205, "step": 258 }, { "epoch": 0.05971869956190915, "grad_norm": 0.7262354481718393, "learning_rate": 1.1889400921658985e-06, "loss": 1.372736930847168, "step": 259 }, { "epoch": 0.05994927369149181, "grad_norm": 0.7470794959515278, "learning_rate": 1.1935483870967741e-06, "loss": 1.309061050415039, "step": 260 }, { "epoch": 0.060179847821074474, "grad_norm": 0.7217295951903909, "learning_rate": 1.1981566820276497e-06, "loss": 1.3500525951385498, "step": 261 }, { "epoch": 0.060410421950657135, "grad_norm": 0.7498906773328822, "learning_rate": 1.2027649769585253e-06, "loss": 1.4197357892990112, "step": 262 }, { "epoch": 0.0606409960802398, "grad_norm": 0.9553336191863615, "learning_rate": 1.207373271889401e-06, "loss": 1.6454131603240967, "step": 263 }, { "epoch": 0.06087157020982246, "grad_norm": 0.7361372249879211, "learning_rate": 1.2119815668202765e-06, "loss": 1.269604206085205, "step": 264 }, { "epoch": 0.06110214433940512, "grad_norm": 0.6596823046141973, "learning_rate": 1.216589861751152e-06, "loss": 1.2358057498931885, "step": 265 }, { "epoch": 0.06133271846898778, "grad_norm": 0.7203751630823346, "learning_rate": 1.2211981566820275e-06, "loss": 1.2713422775268555, "step": 266 }, { "epoch": 0.06156329259857044, "grad_norm": 0.7033446179657081, "learning_rate": 1.2258064516129033e-06, "loss": 1.225820779800415, "step": 267 }, { "epoch": 0.0617938667281531, "grad_norm": 0.6900817599997362, "learning_rate": 1.2304147465437787e-06, "loss": 1.279617190361023, "step": 268 }, { "epoch": 0.062024440857735764, "grad_norm": 0.6800159728233099, "learning_rate": 1.2350230414746543e-06, "loss": 1.2081385850906372, "step": 269 }, { "epoch": 0.062255014987318426, "grad_norm": 0.7378639399050563, "learning_rate": 1.23963133640553e-06, "loss": 1.3121249675750732, "step": 270 }, { "epoch": 0.06248558911690109, "grad_norm": 0.7497904685097676, "learning_rate": 1.2442396313364054e-06, "loss": 1.28495454788208, "step": 271 }, { "epoch": 0.06271616324648374, "grad_norm": 0.7749777957183016, "learning_rate": 1.248847926267281e-06, "loss": 1.3837053775787354, "step": 272 }, { "epoch": 0.0629467373760664, "grad_norm": 0.7210838772374344, "learning_rate": 1.2534562211981564e-06, "loss": 1.2119230031967163, "step": 273 }, { "epoch": 0.06317731150564906, "grad_norm": 0.7143072591295863, "learning_rate": 1.2580645161290322e-06, "loss": 1.323190450668335, "step": 274 }, { "epoch": 0.06340788563523173, "grad_norm": 0.7546501032980093, "learning_rate": 1.2626728110599078e-06, "loss": 1.4300715923309326, "step": 275 }, { "epoch": 0.06363845976481439, "grad_norm": 0.7154461007442852, "learning_rate": 1.2672811059907832e-06, "loss": 1.1680996417999268, "step": 276 }, { "epoch": 0.06386903389439705, "grad_norm": 0.8088364505140268, "learning_rate": 1.271889400921659e-06, "loss": 1.3980211019515991, "step": 277 }, { "epoch": 0.06409960802397971, "grad_norm": 0.7801914373505492, "learning_rate": 1.2764976958525344e-06, "loss": 1.40798020362854, "step": 278 }, { "epoch": 0.06433018215356237, "grad_norm": 0.7237186405433459, "learning_rate": 1.28110599078341e-06, "loss": 1.2535033226013184, "step": 279 }, { "epoch": 0.06456075628314503, "grad_norm": 0.7779219570683336, "learning_rate": 1.2857142857142858e-06, "loss": 1.3866907358169556, "step": 280 }, { "epoch": 0.0647913304127277, "grad_norm": 0.7036374523288562, "learning_rate": 1.2903225806451612e-06, "loss": 1.1985647678375244, "step": 281 }, { "epoch": 0.06502190454231035, "grad_norm": 0.8186126171093759, "learning_rate": 1.2949308755760368e-06, "loss": 1.3741936683654785, "step": 282 }, { "epoch": 0.06525247867189302, "grad_norm": 0.7795060457073558, "learning_rate": 1.2995391705069124e-06, "loss": 1.3684422969818115, "step": 283 }, { "epoch": 0.06548305280147568, "grad_norm": 0.7685811594695469, "learning_rate": 1.304147465437788e-06, "loss": 1.3792086839675903, "step": 284 }, { "epoch": 0.06571362693105834, "grad_norm": 0.8541112738893439, "learning_rate": 1.3087557603686636e-06, "loss": 1.3252873420715332, "step": 285 }, { "epoch": 0.065944201060641, "grad_norm": 0.7272989570317888, "learning_rate": 1.3133640552995392e-06, "loss": 1.1918525695800781, "step": 286 }, { "epoch": 0.06617477519022366, "grad_norm": 0.8825171015262823, "learning_rate": 1.3179723502304148e-06, "loss": 1.3760654926300049, "step": 287 }, { "epoch": 0.06640534931980632, "grad_norm": 0.8100539272477522, "learning_rate": 1.3225806451612901e-06, "loss": 1.3452839851379395, "step": 288 }, { "epoch": 0.06663592344938898, "grad_norm": 0.7635396360128843, "learning_rate": 1.327188940092166e-06, "loss": 1.321220874786377, "step": 289 }, { "epoch": 0.06686649757897165, "grad_norm": 0.724002123288283, "learning_rate": 1.3317972350230413e-06, "loss": 1.222012996673584, "step": 290 }, { "epoch": 0.0670970717085543, "grad_norm": 0.7939713970528558, "learning_rate": 1.336405529953917e-06, "loss": 1.3209044933319092, "step": 291 }, { "epoch": 0.06732764583813695, "grad_norm": 0.834643855588948, "learning_rate": 1.3410138248847927e-06, "loss": 1.3250432014465332, "step": 292 }, { "epoch": 0.06755821996771962, "grad_norm": 0.6522445861220314, "learning_rate": 1.3456221198156681e-06, "loss": 1.1738805770874023, "step": 293 }, { "epoch": 0.06778879409730228, "grad_norm": 0.7430324759377445, "learning_rate": 1.3502304147465437e-06, "loss": 1.238675832748413, "step": 294 }, { "epoch": 0.06801936822688494, "grad_norm": 0.6872443402637277, "learning_rate": 1.354838709677419e-06, "loss": 1.2162814140319824, "step": 295 }, { "epoch": 0.0682499423564676, "grad_norm": 0.7451321254668013, "learning_rate": 1.359447004608295e-06, "loss": 1.2087210416793823, "step": 296 }, { "epoch": 0.06848051648605026, "grad_norm": 0.7183129418570579, "learning_rate": 1.3640552995391705e-06, "loss": 1.2657420635223389, "step": 297 }, { "epoch": 0.06871109061563292, "grad_norm": 0.8828866176671843, "learning_rate": 1.3686635944700459e-06, "loss": 1.496249794960022, "step": 298 }, { "epoch": 0.06894166474521558, "grad_norm": 0.7852198432087445, "learning_rate": 1.3732718894009217e-06, "loss": 1.2698930501937866, "step": 299 }, { "epoch": 0.06917223887479824, "grad_norm": 0.723866375282328, "learning_rate": 1.377880184331797e-06, "loss": 1.2088165283203125, "step": 300 }, { "epoch": 0.0694028130043809, "grad_norm": 0.764377981893855, "learning_rate": 1.3824884792626727e-06, "loss": 1.392000436782837, "step": 301 }, { "epoch": 0.06963338713396357, "grad_norm": 0.7252481501169622, "learning_rate": 1.3870967741935485e-06, "loss": 1.366544485092163, "step": 302 }, { "epoch": 0.06986396126354623, "grad_norm": 0.7900814443800929, "learning_rate": 1.3917050691244239e-06, "loss": 1.3276031017303467, "step": 303 }, { "epoch": 0.07009453539312889, "grad_norm": 0.7000339586583599, "learning_rate": 1.3963133640552995e-06, "loss": 1.1413768529891968, "step": 304 }, { "epoch": 0.07032510952271155, "grad_norm": 0.7903483195817192, "learning_rate": 1.400921658986175e-06, "loss": 1.2958520650863647, "step": 305 }, { "epoch": 0.07055568365229421, "grad_norm": 0.7651988170590107, "learning_rate": 1.4055299539170507e-06, "loss": 1.3514549732208252, "step": 306 }, { "epoch": 0.07078625778187687, "grad_norm": 0.767117117462576, "learning_rate": 1.410138248847926e-06, "loss": 1.332120418548584, "step": 307 }, { "epoch": 0.07101683191145954, "grad_norm": 0.8380945550826328, "learning_rate": 1.4147465437788018e-06, "loss": 1.282820463180542, "step": 308 }, { "epoch": 0.0712474060410422, "grad_norm": 0.7478573370757386, "learning_rate": 1.4193548387096774e-06, "loss": 1.3927665948867798, "step": 309 }, { "epoch": 0.07147798017062486, "grad_norm": 0.7471336867744233, "learning_rate": 1.4239631336405528e-06, "loss": 1.2459386587142944, "step": 310 }, { "epoch": 0.07170855430020752, "grad_norm": 0.715680538211599, "learning_rate": 1.4285714285714286e-06, "loss": 1.1996700763702393, "step": 311 }, { "epoch": 0.07193912842979018, "grad_norm": 0.7466366577926873, "learning_rate": 1.433179723502304e-06, "loss": 1.1007883548736572, "step": 312 }, { "epoch": 0.07216970255937284, "grad_norm": 0.6505103448142013, "learning_rate": 1.4377880184331796e-06, "loss": 1.211327314376831, "step": 313 }, { "epoch": 0.0724002766889555, "grad_norm": 0.7475198907178121, "learning_rate": 1.4423963133640554e-06, "loss": 1.314349889755249, "step": 314 }, { "epoch": 0.07263085081853816, "grad_norm": 0.7782372886671983, "learning_rate": 1.4470046082949308e-06, "loss": 1.2270662784576416, "step": 315 }, { "epoch": 0.07286142494812083, "grad_norm": 0.7521500862086049, "learning_rate": 1.4516129032258064e-06, "loss": 1.1802537441253662, "step": 316 }, { "epoch": 0.07309199907770349, "grad_norm": 0.7684137773026678, "learning_rate": 1.4562211981566818e-06, "loss": 1.275806188583374, "step": 317 }, { "epoch": 0.07332257320728615, "grad_norm": 0.789590997753613, "learning_rate": 1.4608294930875576e-06, "loss": 1.2713148593902588, "step": 318 }, { "epoch": 0.07355314733686881, "grad_norm": 0.8345280857312554, "learning_rate": 1.4654377880184332e-06, "loss": 1.3091093301773071, "step": 319 }, { "epoch": 0.07378372146645146, "grad_norm": 0.7108154017524825, "learning_rate": 1.4700460829493086e-06, "loss": 1.1274672746658325, "step": 320 }, { "epoch": 0.07401429559603412, "grad_norm": 0.7137227522476419, "learning_rate": 1.4746543778801844e-06, "loss": 1.236955165863037, "step": 321 }, { "epoch": 0.07424486972561678, "grad_norm": 0.7825967305477171, "learning_rate": 1.4792626728110598e-06, "loss": 1.2561366558074951, "step": 322 }, { "epoch": 0.07447544385519944, "grad_norm": 0.7250730413423113, "learning_rate": 1.4838709677419353e-06, "loss": 1.1229519844055176, "step": 323 }, { "epoch": 0.0747060179847821, "grad_norm": 0.7688658143017724, "learning_rate": 1.4884792626728112e-06, "loss": 1.200115442276001, "step": 324 }, { "epoch": 0.07493659211436476, "grad_norm": 0.7499295220603182, "learning_rate": 1.4930875576036865e-06, "loss": 1.1930850744247437, "step": 325 }, { "epoch": 0.07516716624394743, "grad_norm": 0.8209913282027874, "learning_rate": 1.4976958525345621e-06, "loss": 1.3204331398010254, "step": 326 }, { "epoch": 0.07539774037353009, "grad_norm": 0.7429612395335268, "learning_rate": 1.5023041474654377e-06, "loss": 1.109247088432312, "step": 327 }, { "epoch": 0.07562831450311275, "grad_norm": 0.7097388789784923, "learning_rate": 1.5069124423963133e-06, "loss": 1.1239254474639893, "step": 328 }, { "epoch": 0.07585888863269541, "grad_norm": 0.7867677832004493, "learning_rate": 1.5115207373271887e-06, "loss": 1.22686767578125, "step": 329 }, { "epoch": 0.07608946276227807, "grad_norm": 0.8425243281826544, "learning_rate": 1.5161290322580645e-06, "loss": 1.2846856117248535, "step": 330 }, { "epoch": 0.07632003689186073, "grad_norm": 0.7611030204070008, "learning_rate": 1.5207373271889401e-06, "loss": 1.1720764636993408, "step": 331 }, { "epoch": 0.0765506110214434, "grad_norm": 0.6783089545901869, "learning_rate": 1.5253456221198155e-06, "loss": 1.05867338180542, "step": 332 }, { "epoch": 0.07678118515102605, "grad_norm": 0.781197296597327, "learning_rate": 1.5299539170506913e-06, "loss": 1.2652220726013184, "step": 333 }, { "epoch": 0.07701175928060872, "grad_norm": 0.7674267376615101, "learning_rate": 1.5345622119815667e-06, "loss": 1.1367218494415283, "step": 334 }, { "epoch": 0.07724233341019138, "grad_norm": 0.7149265599125916, "learning_rate": 1.5391705069124423e-06, "loss": 1.169439673423767, "step": 335 }, { "epoch": 0.07747290753977404, "grad_norm": 0.8284832797024527, "learning_rate": 1.543778801843318e-06, "loss": 1.265104055404663, "step": 336 }, { "epoch": 0.0777034816693567, "grad_norm": 0.6605498491920537, "learning_rate": 1.5483870967741935e-06, "loss": 1.059098243713379, "step": 337 }, { "epoch": 0.07793405579893936, "grad_norm": 0.8255024678570093, "learning_rate": 1.552995391705069e-06, "loss": 1.0998419523239136, "step": 338 }, { "epoch": 0.07816462992852202, "grad_norm": 0.8285993940213782, "learning_rate": 1.5576036866359445e-06, "loss": 1.1361349821090698, "step": 339 }, { "epoch": 0.07839520405810468, "grad_norm": 0.7677612111698353, "learning_rate": 1.5622119815668203e-06, "loss": 1.1051890850067139, "step": 340 }, { "epoch": 0.07862577818768735, "grad_norm": 0.8204078401725609, "learning_rate": 1.5668202764976959e-06, "loss": 1.1675043106079102, "step": 341 }, { "epoch": 0.07885635231727, "grad_norm": 0.8428908363907526, "learning_rate": 1.5714285714285712e-06, "loss": 1.180741786956787, "step": 342 }, { "epoch": 0.07908692644685267, "grad_norm": 0.8559354133772745, "learning_rate": 1.576036866359447e-06, "loss": 1.241147518157959, "step": 343 }, { "epoch": 0.07931750057643533, "grad_norm": 0.848204694935563, "learning_rate": 1.5806451612903224e-06, "loss": 1.2831401824951172, "step": 344 }, { "epoch": 0.07954807470601799, "grad_norm": 0.7281233645086155, "learning_rate": 1.585253456221198e-06, "loss": 1.2328094244003296, "step": 345 }, { "epoch": 0.07977864883560065, "grad_norm": 0.7932743453051899, "learning_rate": 1.5898617511520738e-06, "loss": 1.296494960784912, "step": 346 }, { "epoch": 0.08000922296518331, "grad_norm": 0.7368517201206619, "learning_rate": 1.5944700460829492e-06, "loss": 1.1802153587341309, "step": 347 }, { "epoch": 0.08023979709476596, "grad_norm": 0.8829436639082808, "learning_rate": 1.5990783410138248e-06, "loss": 1.2387690544128418, "step": 348 }, { "epoch": 0.08047037122434862, "grad_norm": 0.8002618721063425, "learning_rate": 1.6036866359447004e-06, "loss": 1.1307916641235352, "step": 349 }, { "epoch": 0.08070094535393128, "grad_norm": 0.8185303488247757, "learning_rate": 1.608294930875576e-06, "loss": 1.117497444152832, "step": 350 }, { "epoch": 0.08093151948351394, "grad_norm": 0.7524331692605707, "learning_rate": 1.6129032258064514e-06, "loss": 1.1360805034637451, "step": 351 }, { "epoch": 0.0811620936130966, "grad_norm": 0.7626049955851422, "learning_rate": 1.6175115207373272e-06, "loss": 1.1756231784820557, "step": 352 }, { "epoch": 0.08139266774267927, "grad_norm": 0.7605864356179197, "learning_rate": 1.6221198156682028e-06, "loss": 1.0260417461395264, "step": 353 }, { "epoch": 0.08162324187226193, "grad_norm": 0.6949706544727091, "learning_rate": 1.6267281105990782e-06, "loss": 1.0863536596298218, "step": 354 }, { "epoch": 0.08185381600184459, "grad_norm": 0.7427032746567218, "learning_rate": 1.631336405529954e-06, "loss": 1.0529779195785522, "step": 355 }, { "epoch": 0.08208439013142725, "grad_norm": 0.7626426518406405, "learning_rate": 1.6359447004608294e-06, "loss": 1.0374994277954102, "step": 356 }, { "epoch": 0.08231496426100991, "grad_norm": 0.7762352327056515, "learning_rate": 1.640552995391705e-06, "loss": 1.153419017791748, "step": 357 }, { "epoch": 0.08254553839059257, "grad_norm": 0.7455681546697154, "learning_rate": 1.6451612903225808e-06, "loss": 1.0155376195907593, "step": 358 }, { "epoch": 0.08277611252017524, "grad_norm": 0.779838920397346, "learning_rate": 1.6497695852534561e-06, "loss": 1.1288530826568604, "step": 359 }, { "epoch": 0.0830066866497579, "grad_norm": 0.8920666311969824, "learning_rate": 1.6543778801843317e-06, "loss": 1.1493456363677979, "step": 360 }, { "epoch": 0.08323726077934056, "grad_norm": 0.8383114858680324, "learning_rate": 1.6589861751152071e-06, "loss": 1.1064895391464233, "step": 361 }, { "epoch": 0.08346783490892322, "grad_norm": 0.752156167882629, "learning_rate": 1.663594470046083e-06, "loss": 1.0102828741073608, "step": 362 }, { "epoch": 0.08369840903850588, "grad_norm": 0.8341451005387022, "learning_rate": 1.6682027649769585e-06, "loss": 1.0750138759613037, "step": 363 }, { "epoch": 0.08392898316808854, "grad_norm": 0.8504953523340792, "learning_rate": 1.672811059907834e-06, "loss": 1.1611195802688599, "step": 364 }, { "epoch": 0.0841595572976712, "grad_norm": 0.8228646683486963, "learning_rate": 1.6774193548387097e-06, "loss": 1.2799829244613647, "step": 365 }, { "epoch": 0.08439013142725386, "grad_norm": 0.9626273899315478, "learning_rate": 1.682027649769585e-06, "loss": 1.2427947521209717, "step": 366 }, { "epoch": 0.08462070555683653, "grad_norm": 0.724553415716276, "learning_rate": 1.6866359447004607e-06, "loss": 1.0379959344863892, "step": 367 }, { "epoch": 0.08485127968641919, "grad_norm": 0.7173602639018404, "learning_rate": 1.6912442396313363e-06, "loss": 0.8439304828643799, "step": 368 }, { "epoch": 0.08508185381600185, "grad_norm": 0.8477542480910312, "learning_rate": 1.6958525345622119e-06, "loss": 1.1249288320541382, "step": 369 }, { "epoch": 0.08531242794558451, "grad_norm": 0.8715705993798011, "learning_rate": 1.7004608294930875e-06, "loss": 1.186207890510559, "step": 370 }, { "epoch": 0.08554300207516717, "grad_norm": 0.9990300341847143, "learning_rate": 1.705069124423963e-06, "loss": 1.1181306838989258, "step": 371 }, { "epoch": 0.08577357620474983, "grad_norm": 0.8792678686182055, "learning_rate": 1.7096774193548387e-06, "loss": 0.9828017950057983, "step": 372 }, { "epoch": 0.0860041503343325, "grad_norm": 0.7710250186072433, "learning_rate": 1.714285714285714e-06, "loss": 1.1158804893493652, "step": 373 }, { "epoch": 0.08623472446391516, "grad_norm": 0.9602707019706166, "learning_rate": 1.7188940092165899e-06, "loss": 1.1771481037139893, "step": 374 }, { "epoch": 0.08646529859349782, "grad_norm": 0.8137176951163696, "learning_rate": 1.7235023041474655e-06, "loss": 1.1378540992736816, "step": 375 }, { "epoch": 0.08669587272308046, "grad_norm": 0.819557644912057, "learning_rate": 1.7281105990783408e-06, "loss": 1.2011152505874634, "step": 376 }, { "epoch": 0.08692644685266313, "grad_norm": 0.8779923853134601, "learning_rate": 1.7327188940092167e-06, "loss": 1.0932848453521729, "step": 377 }, { "epoch": 0.08715702098224579, "grad_norm": 0.7579888078286682, "learning_rate": 1.737327188940092e-06, "loss": 1.0530626773834229, "step": 378 }, { "epoch": 0.08738759511182845, "grad_norm": 0.8123881302713649, "learning_rate": 1.7419354838709676e-06, "loss": 1.09238600730896, "step": 379 }, { "epoch": 0.08761816924141111, "grad_norm": 0.8179032370650432, "learning_rate": 1.7465437788018434e-06, "loss": 1.10097336769104, "step": 380 }, { "epoch": 0.08784874337099377, "grad_norm": 0.9066182701404021, "learning_rate": 1.7511520737327188e-06, "loss": 1.1483392715454102, "step": 381 }, { "epoch": 0.08807931750057643, "grad_norm": 0.7929757896387074, "learning_rate": 1.7557603686635944e-06, "loss": 0.9776606559753418, "step": 382 }, { "epoch": 0.08830989163015909, "grad_norm": 0.7070713392242878, "learning_rate": 1.7603686635944698e-06, "loss": 0.9363219738006592, "step": 383 }, { "epoch": 0.08854046575974175, "grad_norm": 0.8829017901239412, "learning_rate": 1.7649769585253456e-06, "loss": 1.1259841918945312, "step": 384 }, { "epoch": 0.08877103988932442, "grad_norm": 0.8379913612296851, "learning_rate": 1.7695852534562212e-06, "loss": 1.0652339458465576, "step": 385 }, { "epoch": 0.08900161401890708, "grad_norm": 0.9016264696692738, "learning_rate": 1.7741935483870966e-06, "loss": 1.1088197231292725, "step": 386 }, { "epoch": 0.08923218814848974, "grad_norm": 0.8434226175443441, "learning_rate": 1.7788018433179724e-06, "loss": 1.0171717405319214, "step": 387 }, { "epoch": 0.0894627622780724, "grad_norm": 0.893116506697827, "learning_rate": 1.7834101382488478e-06, "loss": 1.0391405820846558, "step": 388 }, { "epoch": 0.08969333640765506, "grad_norm": 0.9558704899064524, "learning_rate": 1.7880184331797234e-06, "loss": 0.9970325231552124, "step": 389 }, { "epoch": 0.08992391053723772, "grad_norm": 0.8304308575964876, "learning_rate": 1.792626728110599e-06, "loss": 1.1427147388458252, "step": 390 }, { "epoch": 0.09015448466682038, "grad_norm": 0.8319398781501527, "learning_rate": 1.7972350230414746e-06, "loss": 0.8830767273902893, "step": 391 }, { "epoch": 0.09038505879640304, "grad_norm": 0.8983385232838542, "learning_rate": 1.8018433179723502e-06, "loss": 1.0469788312911987, "step": 392 }, { "epoch": 0.0906156329259857, "grad_norm": 1.0033385350969977, "learning_rate": 1.8064516129032258e-06, "loss": 1.022156834602356, "step": 393 }, { "epoch": 0.09084620705556837, "grad_norm": 0.8626168210196775, "learning_rate": 1.8110599078341013e-06, "loss": 1.0723674297332764, "step": 394 }, { "epoch": 0.09107678118515103, "grad_norm": 0.8060308252194399, "learning_rate": 1.8156682027649767e-06, "loss": 0.9089772701263428, "step": 395 }, { "epoch": 0.09130735531473369, "grad_norm": 0.8875270675183294, "learning_rate": 1.8202764976958525e-06, "loss": 1.1029877662658691, "step": 396 }, { "epoch": 0.09153792944431635, "grad_norm": 0.94113090982248, "learning_rate": 1.8248847926267281e-06, "loss": 0.998812198638916, "step": 397 }, { "epoch": 0.09176850357389901, "grad_norm": 1.0016962443263888, "learning_rate": 1.8294930875576035e-06, "loss": 1.116652250289917, "step": 398 }, { "epoch": 0.09199907770348167, "grad_norm": 0.8575568562545252, "learning_rate": 1.8341013824884793e-06, "loss": 1.0071923732757568, "step": 399 }, { "epoch": 0.09222965183306434, "grad_norm": 0.9758059413772218, "learning_rate": 1.8387096774193547e-06, "loss": 1.0713586807250977, "step": 400 }, { "epoch": 0.092460225962647, "grad_norm": 0.8883854169226675, "learning_rate": 1.8433179723502303e-06, "loss": 1.0897400379180908, "step": 401 }, { "epoch": 0.09269080009222966, "grad_norm": 0.9342253113098401, "learning_rate": 1.8479262672811061e-06, "loss": 0.9571444392204285, "step": 402 }, { "epoch": 0.09292137422181232, "grad_norm": 0.9173411430110425, "learning_rate": 1.8525345622119815e-06, "loss": 0.9822309017181396, "step": 403 }, { "epoch": 0.09315194835139497, "grad_norm": 0.8821702665182305, "learning_rate": 1.857142857142857e-06, "loss": 1.0010900497436523, "step": 404 }, { "epoch": 0.09338252248097763, "grad_norm": 0.8417761058687274, "learning_rate": 1.8617511520737325e-06, "loss": 0.8548961877822876, "step": 405 }, { "epoch": 0.09361309661056029, "grad_norm": 0.9390158571311362, "learning_rate": 1.8663594470046083e-06, "loss": 1.0856781005859375, "step": 406 }, { "epoch": 0.09384367074014295, "grad_norm": 0.9100547740927183, "learning_rate": 1.8709677419354837e-06, "loss": 1.0913856029510498, "step": 407 }, { "epoch": 0.09407424486972561, "grad_norm": 1.0379606890495185, "learning_rate": 1.8755760368663593e-06, "loss": 0.9409916400909424, "step": 408 }, { "epoch": 0.09430481899930827, "grad_norm": 0.9523962354053698, "learning_rate": 1.880184331797235e-06, "loss": 0.9950551390647888, "step": 409 }, { "epoch": 0.09453539312889093, "grad_norm": 0.861704297563458, "learning_rate": 1.8847926267281104e-06, "loss": 0.9915211200714111, "step": 410 }, { "epoch": 0.0947659672584736, "grad_norm": 0.9290893256356082, "learning_rate": 1.889400921658986e-06, "loss": 1.0381574630737305, "step": 411 }, { "epoch": 0.09499654138805626, "grad_norm": 0.9228539253940193, "learning_rate": 1.8940092165898616e-06, "loss": 0.8911284804344177, "step": 412 }, { "epoch": 0.09522711551763892, "grad_norm": 0.9426577567548815, "learning_rate": 1.8986175115207372e-06, "loss": 0.8757172226905823, "step": 413 }, { "epoch": 0.09545768964722158, "grad_norm": 0.7971911677154941, "learning_rate": 1.9032258064516128e-06, "loss": 0.8362075090408325, "step": 414 }, { "epoch": 0.09568826377680424, "grad_norm": 0.9051810749284879, "learning_rate": 1.9078341013824884e-06, "loss": 0.906524658203125, "step": 415 }, { "epoch": 0.0959188379063869, "grad_norm": 0.9304511138009018, "learning_rate": 1.912442396313364e-06, "loss": 1.100447654724121, "step": 416 }, { "epoch": 0.09614941203596956, "grad_norm": 0.8321943001479206, "learning_rate": 1.9170506912442396e-06, "loss": 0.9658455848693848, "step": 417 }, { "epoch": 0.09637998616555223, "grad_norm": 0.9393736008547379, "learning_rate": 1.921658986175115e-06, "loss": 0.971304714679718, "step": 418 }, { "epoch": 0.09661056029513489, "grad_norm": 0.8792304256570437, "learning_rate": 1.926267281105991e-06, "loss": 0.916153073310852, "step": 419 }, { "epoch": 0.09684113442471755, "grad_norm": 0.960700719296913, "learning_rate": 1.930875576036866e-06, "loss": 0.9166572093963623, "step": 420 }, { "epoch": 0.09707170855430021, "grad_norm": 0.8385154496673872, "learning_rate": 1.935483870967742e-06, "loss": 0.8754867315292358, "step": 421 }, { "epoch": 0.09730228268388287, "grad_norm": 0.8951117289542856, "learning_rate": 1.9400921658986174e-06, "loss": 0.9507668018341064, "step": 422 }, { "epoch": 0.09753285681346553, "grad_norm": 1.0251554467069826, "learning_rate": 1.944700460829493e-06, "loss": 0.8977904319763184, "step": 423 }, { "epoch": 0.0977634309430482, "grad_norm": 0.8433365129133346, "learning_rate": 1.9493087557603686e-06, "loss": 0.8359580039978027, "step": 424 }, { "epoch": 0.09799400507263085, "grad_norm": 0.8653781711190967, "learning_rate": 1.953917050691244e-06, "loss": 0.8928875923156738, "step": 425 }, { "epoch": 0.09822457920221352, "grad_norm": 1.016156538051323, "learning_rate": 1.9585253456221198e-06, "loss": 0.9031360149383545, "step": 426 }, { "epoch": 0.09845515333179618, "grad_norm": 0.9535004151409068, "learning_rate": 1.963133640552995e-06, "loss": 0.9135938286781311, "step": 427 }, { "epoch": 0.09868572746137884, "grad_norm": 0.9913179989235431, "learning_rate": 1.967741935483871e-06, "loss": 0.8978056907653809, "step": 428 }, { "epoch": 0.0989163015909615, "grad_norm": 0.7393338474601954, "learning_rate": 1.9723502304147463e-06, "loss": 0.8236517906188965, "step": 429 }, { "epoch": 0.09914687572054416, "grad_norm": 0.9578937542491764, "learning_rate": 1.976958525345622e-06, "loss": 0.8279497027397156, "step": 430 }, { "epoch": 0.09937744985012681, "grad_norm": 0.8687224271614162, "learning_rate": 1.9815668202764975e-06, "loss": 0.9273175001144409, "step": 431 }, { "epoch": 0.09960802397970947, "grad_norm": 0.9008857811722423, "learning_rate": 1.9861751152073733e-06, "loss": 0.8990100622177124, "step": 432 }, { "epoch": 0.09983859810929213, "grad_norm": 0.9051637314581525, "learning_rate": 1.9907834101382487e-06, "loss": 0.9221487045288086, "step": 433 }, { "epoch": 0.10006917223887479, "grad_norm": 0.8468556051112544, "learning_rate": 1.995391705069124e-06, "loss": 0.7376757264137268, "step": 434 }, { "epoch": 0.10029974636845745, "grad_norm": 0.8651656722450953, "learning_rate": 2e-06, "loss": 0.8496265411376953, "step": 435 }, { "epoch": 0.10053032049804012, "grad_norm": 0.8177327534577133, "learning_rate": 1.9999999273199326e-06, "loss": 0.73260897397995, "step": 436 }, { "epoch": 0.10076089462762278, "grad_norm": 1.2545811776233549, "learning_rate": 1.999999709279741e-06, "loss": 0.9583776593208313, "step": 437 }, { "epoch": 0.10099146875720544, "grad_norm": 0.7771019547302918, "learning_rate": 1.9999993458794573e-06, "loss": 0.810507595539093, "step": 438 }, { "epoch": 0.1012220428867881, "grad_norm": 0.8756547566965167, "learning_rate": 1.9999988371191337e-06, "loss": 0.7957329750061035, "step": 439 }, { "epoch": 0.10145261701637076, "grad_norm": 0.8325539024899065, "learning_rate": 1.9999981829988444e-06, "loss": 0.8141027688980103, "step": 440 }, { "epoch": 0.10168319114595342, "grad_norm": 0.9256731752358246, "learning_rate": 1.9999973835186847e-06, "loss": 0.8454669117927551, "step": 441 }, { "epoch": 0.10191376527553608, "grad_norm": 0.9086105801784582, "learning_rate": 1.9999964386787706e-06, "loss": 0.7966687679290771, "step": 442 }, { "epoch": 0.10214433940511874, "grad_norm": 0.8420803725442093, "learning_rate": 1.9999953484792394e-06, "loss": 0.8623852133750916, "step": 443 }, { "epoch": 0.1023749135347014, "grad_norm": 0.976279238987049, "learning_rate": 1.9999941129202494e-06, "loss": 0.9604165554046631, "step": 444 }, { "epoch": 0.10260548766428407, "grad_norm": 0.8427059790049124, "learning_rate": 1.999992732001981e-06, "loss": 0.7461415529251099, "step": 445 }, { "epoch": 0.10283606179386673, "grad_norm": 0.8066869506045082, "learning_rate": 1.9999912057246342e-06, "loss": 0.7243722677230835, "step": 446 }, { "epoch": 0.10306663592344939, "grad_norm": 0.8507773615519725, "learning_rate": 1.999989534088431e-06, "loss": 0.8466402292251587, "step": 447 }, { "epoch": 0.10329721005303205, "grad_norm": 0.9504023717644374, "learning_rate": 1.9999877170936142e-06, "loss": 0.8062578439712524, "step": 448 }, { "epoch": 0.10352778418261471, "grad_norm": 0.8134117517887439, "learning_rate": 1.9999857547404484e-06, "loss": 0.8979625701904297, "step": 449 }, { "epoch": 0.10375835831219737, "grad_norm": 0.7889840834274454, "learning_rate": 1.999983647029219e-06, "loss": 0.7970046401023865, "step": 450 }, { "epoch": 0.10398893244178004, "grad_norm": 0.8933195109789729, "learning_rate": 1.999981393960231e-06, "loss": 0.9027936458587646, "step": 451 }, { "epoch": 0.1042195065713627, "grad_norm": 0.9428128689196352, "learning_rate": 1.9999789955338133e-06, "loss": 0.8347916007041931, "step": 452 }, { "epoch": 0.10445008070094536, "grad_norm": 0.7636783217821816, "learning_rate": 1.9999764517503146e-06, "loss": 0.7856979370117188, "step": 453 }, { "epoch": 0.10468065483052802, "grad_norm": 0.8588750023960529, "learning_rate": 1.9999737626101037e-06, "loss": 0.8370383381843567, "step": 454 }, { "epoch": 0.10491122896011068, "grad_norm": 0.7607065236764231, "learning_rate": 1.9999709281135718e-06, "loss": 0.8629742860794067, "step": 455 }, { "epoch": 0.10514180308969334, "grad_norm": 0.7031266959727278, "learning_rate": 1.9999679482611315e-06, "loss": 0.8187414407730103, "step": 456 }, { "epoch": 0.105372377219276, "grad_norm": 0.7996485745988237, "learning_rate": 1.9999648230532156e-06, "loss": 0.8169279098510742, "step": 457 }, { "epoch": 0.10560295134885866, "grad_norm": 0.7291726430068795, "learning_rate": 1.999961552490278e-06, "loss": 0.7186012268066406, "step": 458 }, { "epoch": 0.10583352547844131, "grad_norm": 0.8814433348597316, "learning_rate": 1.9999581365727947e-06, "loss": 0.8088201284408569, "step": 459 }, { "epoch": 0.10606409960802397, "grad_norm": 0.8945815471698739, "learning_rate": 1.999954575301262e-06, "loss": 0.7067796587944031, "step": 460 }, { "epoch": 0.10629467373760663, "grad_norm": 0.8727386643724712, "learning_rate": 1.9999508686761974e-06, "loss": 0.8839461803436279, "step": 461 }, { "epoch": 0.1065252478671893, "grad_norm": 0.7752145606049893, "learning_rate": 1.99994701669814e-06, "loss": 0.750046968460083, "step": 462 }, { "epoch": 0.10675582199677196, "grad_norm": 0.8246620057663118, "learning_rate": 1.999943019367649e-06, "loss": 0.7954964637756348, "step": 463 }, { "epoch": 0.10698639612635462, "grad_norm": 0.8139454190246876, "learning_rate": 1.9999388766853065e-06, "loss": 0.7178900241851807, "step": 464 }, { "epoch": 0.10721697025593728, "grad_norm": 0.7775108685144316, "learning_rate": 1.999934588651714e-06, "loss": 0.7583869695663452, "step": 465 }, { "epoch": 0.10744754438551994, "grad_norm": 0.7294165374555056, "learning_rate": 1.999930155267495e-06, "loss": 0.8068876266479492, "step": 466 }, { "epoch": 0.1076781185151026, "grad_norm": 0.7396884936816651, "learning_rate": 1.9999255765332946e-06, "loss": 0.7507776021957397, "step": 467 }, { "epoch": 0.10790869264468526, "grad_norm": 0.7418847797451098, "learning_rate": 1.999920852449777e-06, "loss": 0.7719494104385376, "step": 468 }, { "epoch": 0.10813926677426793, "grad_norm": 0.7666886626519035, "learning_rate": 1.99991598301763e-06, "loss": 0.7420990467071533, "step": 469 }, { "epoch": 0.10836984090385059, "grad_norm": 0.7701810012003275, "learning_rate": 1.9999109682375606e-06, "loss": 0.7152374386787415, "step": 470 }, { "epoch": 0.10860041503343325, "grad_norm": 0.6850973266115482, "learning_rate": 1.9999058081102985e-06, "loss": 0.7971220016479492, "step": 471 }, { "epoch": 0.10883098916301591, "grad_norm": 0.7306176016482578, "learning_rate": 1.9999005026365936e-06, "loss": 0.774874746799469, "step": 472 }, { "epoch": 0.10906156329259857, "grad_norm": 0.8957955356096076, "learning_rate": 1.999895051817216e-06, "loss": 0.7567731142044067, "step": 473 }, { "epoch": 0.10929213742218123, "grad_norm": 0.9679087986333686, "learning_rate": 1.99988945565296e-06, "loss": 0.7221060991287231, "step": 474 }, { "epoch": 0.1095227115517639, "grad_norm": 0.7758710632294333, "learning_rate": 1.9998837141446378e-06, "loss": 0.8064852952957153, "step": 475 }, { "epoch": 0.10975328568134655, "grad_norm": 0.7342367942239104, "learning_rate": 1.9998778272930842e-06, "loss": 0.7329462766647339, "step": 476 }, { "epoch": 0.10998385981092922, "grad_norm": 0.6944047501493505, "learning_rate": 1.999871795099155e-06, "loss": 0.715752363204956, "step": 477 }, { "epoch": 0.11021443394051188, "grad_norm": 1.250464562888065, "learning_rate": 1.9998656175637265e-06, "loss": 0.8702882528305054, "step": 478 }, { "epoch": 0.11044500807009454, "grad_norm": 0.9132853105204283, "learning_rate": 1.9998592946876976e-06, "loss": 0.8559622764587402, "step": 479 }, { "epoch": 0.1106755821996772, "grad_norm": 1.0302853941011325, "learning_rate": 1.999852826471987e-06, "loss": 0.910442590713501, "step": 480 }, { "epoch": 0.11090615632925986, "grad_norm": 0.7658983046756905, "learning_rate": 1.9998462129175347e-06, "loss": 0.8159372806549072, "step": 481 }, { "epoch": 0.11113673045884252, "grad_norm": 0.6814545269174561, "learning_rate": 1.9998394540253022e-06, "loss": 0.8120635747909546, "step": 482 }, { "epoch": 0.11136730458842518, "grad_norm": 0.9382461503301303, "learning_rate": 1.999832549796272e-06, "loss": 0.7867682576179504, "step": 483 }, { "epoch": 0.11159787871800785, "grad_norm": 0.7285854274509946, "learning_rate": 1.999825500231448e-06, "loss": 0.695517897605896, "step": 484 }, { "epoch": 0.1118284528475905, "grad_norm": 0.7426222297635688, "learning_rate": 1.999818305331854e-06, "loss": 0.8402971029281616, "step": 485 }, { "epoch": 0.11205902697717317, "grad_norm": 0.9496598665654408, "learning_rate": 1.9998109650985372e-06, "loss": 0.7987074851989746, "step": 486 }, { "epoch": 0.11228960110675582, "grad_norm": 0.7601824170608918, "learning_rate": 1.9998034795325634e-06, "loss": 0.6525362133979797, "step": 487 }, { "epoch": 0.11252017523633848, "grad_norm": 0.6649425764525309, "learning_rate": 1.999795848635021e-06, "loss": 0.6218863725662231, "step": 488 }, { "epoch": 0.11275074936592114, "grad_norm": 0.6793237780262881, "learning_rate": 1.99978807240702e-06, "loss": 0.7225729823112488, "step": 489 }, { "epoch": 0.1129813234955038, "grad_norm": 0.7289774462660574, "learning_rate": 1.9997801508496893e-06, "loss": 0.7553551197052002, "step": 490 }, { "epoch": 0.11321189762508646, "grad_norm": 0.7070554840091658, "learning_rate": 1.999772083964182e-06, "loss": 0.6695772409439087, "step": 491 }, { "epoch": 0.11344247175466912, "grad_norm": 0.7937000317220514, "learning_rate": 1.999763871751669e-06, "loss": 0.7683162689208984, "step": 492 }, { "epoch": 0.11367304588425178, "grad_norm": 0.7958897510308529, "learning_rate": 1.9997555142133457e-06, "loss": 0.7761441469192505, "step": 493 }, { "epoch": 0.11390362001383444, "grad_norm": 0.8391915745578431, "learning_rate": 1.999747011350426e-06, "loss": 0.7204692959785461, "step": 494 }, { "epoch": 0.1141341941434171, "grad_norm": 0.6535908344557003, "learning_rate": 1.999738363164146e-06, "loss": 0.6960519552230835, "step": 495 }, { "epoch": 0.11436476827299977, "grad_norm": 0.669834933810116, "learning_rate": 1.999729569655763e-06, "loss": 0.7502788305282593, "step": 496 }, { "epoch": 0.11459534240258243, "grad_norm": 0.7119093873273127, "learning_rate": 1.999720630826555e-06, "loss": 0.7649067640304565, "step": 497 }, { "epoch": 0.11482591653216509, "grad_norm": 0.865452520980124, "learning_rate": 1.9997115466778214e-06, "loss": 0.6867918968200684, "step": 498 }, { "epoch": 0.11505649066174775, "grad_norm": 0.7725462530919065, "learning_rate": 1.9997023172108828e-06, "loss": 0.7324330806732178, "step": 499 }, { "epoch": 0.11528706479133041, "grad_norm": 0.7493898462804314, "learning_rate": 1.999692942427081e-06, "loss": 0.7452527284622192, "step": 500 }, { "epoch": 0.11551763892091307, "grad_norm": 0.8849003751162662, "learning_rate": 1.9996834223277775e-06, "loss": 0.8311381340026855, "step": 501 }, { "epoch": 0.11574821305049574, "grad_norm": 0.7698737492516583, "learning_rate": 1.999673756914358e-06, "loss": 0.6955340504646301, "step": 502 }, { "epoch": 0.1159787871800784, "grad_norm": 0.9035827861690212, "learning_rate": 1.999663946188226e-06, "loss": 0.802892804145813, "step": 503 }, { "epoch": 0.11620936130966106, "grad_norm": 0.9827928009523055, "learning_rate": 1.9996539901508086e-06, "loss": 0.8307123184204102, "step": 504 }, { "epoch": 0.11643993543924372, "grad_norm": 0.7167523084062808, "learning_rate": 1.9996438888035525e-06, "loss": 0.7604272365570068, "step": 505 }, { "epoch": 0.11667050956882638, "grad_norm": 0.7887244154559485, "learning_rate": 1.9996336421479256e-06, "loss": 0.798006534576416, "step": 506 }, { "epoch": 0.11690108369840904, "grad_norm": 0.9102232519285063, "learning_rate": 1.999623250185418e-06, "loss": 0.7342728972434998, "step": 507 }, { "epoch": 0.1171316578279917, "grad_norm": 0.689331248687117, "learning_rate": 1.9996127129175402e-06, "loss": 0.7659468650817871, "step": 508 }, { "epoch": 0.11736223195757436, "grad_norm": 0.9057052272338976, "learning_rate": 1.999602030345824e-06, "loss": 0.6467913389205933, "step": 509 }, { "epoch": 0.11759280608715703, "grad_norm": 0.9026632882900626, "learning_rate": 1.9995912024718214e-06, "loss": 0.8207371234893799, "step": 510 }, { "epoch": 0.11782338021673969, "grad_norm": 0.6427345565408408, "learning_rate": 1.999580229297108e-06, "loss": 0.6865919232368469, "step": 511 }, { "epoch": 0.11805395434632235, "grad_norm": 0.9123825063372557, "learning_rate": 1.999569110823277e-06, "loss": 0.7367759346961975, "step": 512 }, { "epoch": 0.11828452847590501, "grad_norm": 0.7732312467631449, "learning_rate": 1.9995578470519455e-06, "loss": 0.678460955619812, "step": 513 }, { "epoch": 0.11851510260548767, "grad_norm": 0.9273893139854266, "learning_rate": 1.999546437984751e-06, "loss": 0.7442954182624817, "step": 514 }, { "epoch": 0.11874567673507032, "grad_norm": 0.7064385006159516, "learning_rate": 1.9995348836233515e-06, "loss": 0.6881241798400879, "step": 515 }, { "epoch": 0.11897625086465298, "grad_norm": 0.7494917485319132, "learning_rate": 1.9995231839694267e-06, "loss": 0.6957181692123413, "step": 516 }, { "epoch": 0.11920682499423564, "grad_norm": 1.0228956088069594, "learning_rate": 1.9995113390246773e-06, "loss": 0.655665934085846, "step": 517 }, { "epoch": 0.1194373991238183, "grad_norm": 0.8789756041062182, "learning_rate": 1.9994993487908245e-06, "loss": 0.8156173229217529, "step": 518 }, { "epoch": 0.11966797325340096, "grad_norm": 0.8973364358315123, "learning_rate": 1.9994872132696125e-06, "loss": 0.7063135504722595, "step": 519 }, { "epoch": 0.11989854738298363, "grad_norm": 0.91785396837973, "learning_rate": 1.9994749324628046e-06, "loss": 0.694409966468811, "step": 520 }, { "epoch": 0.12012912151256629, "grad_norm": 0.7331348179727938, "learning_rate": 1.9994625063721852e-06, "loss": 0.8167020082473755, "step": 521 }, { "epoch": 0.12035969564214895, "grad_norm": 0.9326590546614593, "learning_rate": 1.9994499349995615e-06, "loss": 0.7214051485061646, "step": 522 }, { "epoch": 0.12059026977173161, "grad_norm": 0.8993621490561152, "learning_rate": 1.999437218346761e-06, "loss": 0.8798317909240723, "step": 523 }, { "epoch": 0.12082084390131427, "grad_norm": 0.6552492075288662, "learning_rate": 1.9994243564156316e-06, "loss": 0.684230387210846, "step": 524 }, { "epoch": 0.12105141803089693, "grad_norm": 0.9112132053465716, "learning_rate": 1.999411349208043e-06, "loss": 0.7519755363464355, "step": 525 }, { "epoch": 0.1212819921604796, "grad_norm": 0.8052315425352758, "learning_rate": 1.9993981967258857e-06, "loss": 0.8420398235321045, "step": 526 }, { "epoch": 0.12151256629006225, "grad_norm": 0.7105743668928439, "learning_rate": 1.999384898971073e-06, "loss": 0.8349270820617676, "step": 527 }, { "epoch": 0.12174314041964492, "grad_norm": 1.0983006521395142, "learning_rate": 1.999371455945536e-06, "loss": 0.794980525970459, "step": 528 }, { "epoch": 0.12197371454922758, "grad_norm": 1.1816598770476783, "learning_rate": 1.9993578676512294e-06, "loss": 0.666529655456543, "step": 529 }, { "epoch": 0.12220428867881024, "grad_norm": 0.7564948773505585, "learning_rate": 1.999344134090129e-06, "loss": 0.7356991767883301, "step": 530 }, { "epoch": 0.1224348628083929, "grad_norm": 0.8210277180950322, "learning_rate": 1.9993302552642305e-06, "loss": 0.6289858818054199, "step": 531 }, { "epoch": 0.12266543693797556, "grad_norm": 0.7570779839057131, "learning_rate": 1.9993162311755516e-06, "loss": 0.706937313079834, "step": 532 }, { "epoch": 0.12289601106755822, "grad_norm": 0.8676215771749471, "learning_rate": 1.99930206182613e-06, "loss": 0.7265158891677856, "step": 533 }, { "epoch": 0.12312658519714088, "grad_norm": 0.7802472371537522, "learning_rate": 1.999287747218027e-06, "loss": 0.6575910449028015, "step": 534 }, { "epoch": 0.12335715932672355, "grad_norm": 0.6298254280489823, "learning_rate": 1.999273287353322e-06, "loss": 0.6696841716766357, "step": 535 }, { "epoch": 0.1235877334563062, "grad_norm": 1.071079002554872, "learning_rate": 1.9992586822341177e-06, "loss": 0.7749101519584656, "step": 536 }, { "epoch": 0.12381830758588887, "grad_norm": 0.9432884782892066, "learning_rate": 1.9992439318625367e-06, "loss": 0.6880518198013306, "step": 537 }, { "epoch": 0.12404888171547153, "grad_norm": 0.7827285978985046, "learning_rate": 1.999229036240723e-06, "loss": 0.6871178150177002, "step": 538 }, { "epoch": 0.12427945584505419, "grad_norm": 0.7976778538474537, "learning_rate": 1.999213995370842e-06, "loss": 0.5867285132408142, "step": 539 }, { "epoch": 0.12451002997463685, "grad_norm": 0.9357527236724963, "learning_rate": 1.99919880925508e-06, "loss": 0.8276966214179993, "step": 540 }, { "epoch": 0.12474060410421951, "grad_norm": 1.0175450529032033, "learning_rate": 1.9991834778956445e-06, "loss": 0.7710754871368408, "step": 541 }, { "epoch": 0.12497117823380217, "grad_norm": 0.9390745817535735, "learning_rate": 1.9991680012947642e-06, "loss": 0.7753217816352844, "step": 542 }, { "epoch": 0.12520175236338482, "grad_norm": 0.8094522929040034, "learning_rate": 1.9991523794546886e-06, "loss": 0.7906090617179871, "step": 543 }, { "epoch": 0.12543232649296748, "grad_norm": 0.9340000664605023, "learning_rate": 1.9991366123776885e-06, "loss": 0.7199760675430298, "step": 544 }, { "epoch": 0.12566290062255014, "grad_norm": 0.7023452308433018, "learning_rate": 1.9991207000660556e-06, "loss": 0.671667218208313, "step": 545 }, { "epoch": 0.1258934747521328, "grad_norm": 0.8347026711317173, "learning_rate": 1.9991046425221036e-06, "loss": 0.7289182543754578, "step": 546 }, { "epoch": 0.12612404888171547, "grad_norm": 0.7827652568460417, "learning_rate": 1.999088439748166e-06, "loss": 0.6894270181655884, "step": 547 }, { "epoch": 0.12635462301129813, "grad_norm": 0.7280796152072353, "learning_rate": 1.9990720917465983e-06, "loss": 0.5861620306968689, "step": 548 }, { "epoch": 0.1265851971408808, "grad_norm": 0.9057106564897087, "learning_rate": 1.999055598519777e-06, "loss": 0.7082245349884033, "step": 549 }, { "epoch": 0.12681577127046345, "grad_norm": 0.9647506404446157, "learning_rate": 1.999038960070099e-06, "loss": 0.6746149659156799, "step": 550 }, { "epoch": 0.1270463454000461, "grad_norm": 0.8620899067636014, "learning_rate": 1.999022176399983e-06, "loss": 0.7791188955307007, "step": 551 }, { "epoch": 0.12727691952962877, "grad_norm": 0.7157725370776972, "learning_rate": 1.999005247511869e-06, "loss": 0.6371017694473267, "step": 552 }, { "epoch": 0.12750749365921143, "grad_norm": 1.0373263968991309, "learning_rate": 1.9989881734082182e-06, "loss": 0.7006558179855347, "step": 553 }, { "epoch": 0.1277380677887941, "grad_norm": 1.0670128946400503, "learning_rate": 1.9989709540915115e-06, "loss": 0.7011476755142212, "step": 554 }, { "epoch": 0.12796864191837676, "grad_norm": 0.7293348024241428, "learning_rate": 1.998953589564252e-06, "loss": 0.6518280506134033, "step": 555 }, { "epoch": 0.12819921604795942, "grad_norm": 1.013490270581775, "learning_rate": 1.9989360798289646e-06, "loss": 0.703351616859436, "step": 556 }, { "epoch": 0.12842979017754208, "grad_norm": 0.9007382613729068, "learning_rate": 1.998918424888194e-06, "loss": 0.7498817443847656, "step": 557 }, { "epoch": 0.12866036430712474, "grad_norm": 0.7936147649672419, "learning_rate": 1.998900624744507e-06, "loss": 0.647042989730835, "step": 558 }, { "epoch": 0.1288909384367074, "grad_norm": 1.058658035724676, "learning_rate": 1.99888267940049e-06, "loss": 0.7519131898880005, "step": 559 }, { "epoch": 0.12912151256629006, "grad_norm": 0.9392201849899589, "learning_rate": 1.9988645888587524e-06, "loss": 0.8416757583618164, "step": 560 }, { "epoch": 0.12935208669587273, "grad_norm": 0.7856467653874107, "learning_rate": 1.9988463531219238e-06, "loss": 0.7044156193733215, "step": 561 }, { "epoch": 0.1295826608254554, "grad_norm": 0.7712707168267965, "learning_rate": 1.9988279721926547e-06, "loss": 0.5429179668426514, "step": 562 }, { "epoch": 0.12981323495503805, "grad_norm": 0.8186921939471294, "learning_rate": 1.9988094460736173e-06, "loss": 0.6146735548973083, "step": 563 }, { "epoch": 0.1300438090846207, "grad_norm": 0.8439852070799176, "learning_rate": 1.9987907747675038e-06, "loss": 0.7544587850570679, "step": 564 }, { "epoch": 0.13027438321420337, "grad_norm": 0.9760725928946941, "learning_rate": 1.998771958277029e-06, "loss": 0.7344266772270203, "step": 565 }, { "epoch": 0.13050495734378603, "grad_norm": 0.8485941936610121, "learning_rate": 1.9987529966049276e-06, "loss": 0.6952091455459595, "step": 566 }, { "epoch": 0.1307355314733687, "grad_norm": 0.7996168239987546, "learning_rate": 1.9987338897539563e-06, "loss": 0.6164644956588745, "step": 567 }, { "epoch": 0.13096610560295135, "grad_norm": 1.04815525718601, "learning_rate": 1.998714637726892e-06, "loss": 0.7554208636283875, "step": 568 }, { "epoch": 0.13119667973253402, "grad_norm": 0.97358719596577, "learning_rate": 1.9986952405265336e-06, "loss": 0.6640980243682861, "step": 569 }, { "epoch": 0.13142725386211668, "grad_norm": 0.8089360786109361, "learning_rate": 1.9986756981557005e-06, "loss": 0.6947968006134033, "step": 570 }, { "epoch": 0.13165782799169934, "grad_norm": 0.8239726316605849, "learning_rate": 1.9986560106172332e-06, "loss": 0.5987592935562134, "step": 571 }, { "epoch": 0.131888402121282, "grad_norm": 0.709030479654625, "learning_rate": 1.9986361779139944e-06, "loss": 0.5830701589584351, "step": 572 }, { "epoch": 0.13211897625086466, "grad_norm": 1.1719328645727012, "learning_rate": 1.9986162000488655e-06, "loss": 0.6589827537536621, "step": 573 }, { "epoch": 0.13234955038044732, "grad_norm": 0.795778409153881, "learning_rate": 1.9985960770247514e-06, "loss": 0.7761766910552979, "step": 574 }, { "epoch": 0.13258012451002998, "grad_norm": 0.8403074018612, "learning_rate": 1.998575808844577e-06, "loss": 0.6817613244056702, "step": 575 }, { "epoch": 0.13281069863961265, "grad_norm": 0.8817998372104671, "learning_rate": 1.998555395511289e-06, "loss": 0.553085207939148, "step": 576 }, { "epoch": 0.1330412727691953, "grad_norm": 0.6885856342268037, "learning_rate": 1.998534837027854e-06, "loss": 0.6500711441040039, "step": 577 }, { "epoch": 0.13327184689877797, "grad_norm": 1.046231764034874, "learning_rate": 1.9985141333972605e-06, "loss": 0.7818950414657593, "step": 578 }, { "epoch": 0.13350242102836063, "grad_norm": 0.7987907466299384, "learning_rate": 1.9984932846225178e-06, "loss": 0.7030247449874878, "step": 579 }, { "epoch": 0.1337329951579433, "grad_norm": 0.7031460051202854, "learning_rate": 1.9984722907066572e-06, "loss": 0.6336206197738647, "step": 580 }, { "epoch": 0.13396356928752595, "grad_norm": 0.8178681347907562, "learning_rate": 1.9984511516527295e-06, "loss": 0.7483044862747192, "step": 581 }, { "epoch": 0.1341941434171086, "grad_norm": 0.8070808524670383, "learning_rate": 1.9984298674638084e-06, "loss": 0.7124725580215454, "step": 582 }, { "epoch": 0.13442471754669127, "grad_norm": 0.8209937510618921, "learning_rate": 1.998408438142987e-06, "loss": 0.623436450958252, "step": 583 }, { "epoch": 0.1346552916762739, "grad_norm": 0.8592886051949084, "learning_rate": 1.9983868636933804e-06, "loss": 0.646303653717041, "step": 584 }, { "epoch": 0.13488586580585657, "grad_norm": 0.715391883952278, "learning_rate": 1.998365144118125e-06, "loss": 0.6349619626998901, "step": 585 }, { "epoch": 0.13511643993543923, "grad_norm": 0.842094849315078, "learning_rate": 1.9983432794203778e-06, "loss": 0.5222466588020325, "step": 586 }, { "epoch": 0.1353470140650219, "grad_norm": 0.7893129778630776, "learning_rate": 1.998321269603317e-06, "loss": 0.7210453152656555, "step": 587 }, { "epoch": 0.13557758819460455, "grad_norm": 0.8260995902689467, "learning_rate": 1.998299114670142e-06, "loss": 0.6829872131347656, "step": 588 }, { "epoch": 0.13580816232418721, "grad_norm": 0.714861095640182, "learning_rate": 1.998276814624073e-06, "loss": 0.6493744254112244, "step": 589 }, { "epoch": 0.13603873645376988, "grad_norm": 0.8350239344719634, "learning_rate": 1.998254369468352e-06, "loss": 0.6885819435119629, "step": 590 }, { "epoch": 0.13626931058335254, "grad_norm": 0.7070632175859811, "learning_rate": 1.9982317792062415e-06, "loss": 0.6393503546714783, "step": 591 }, { "epoch": 0.1364998847129352, "grad_norm": 1.010551624947432, "learning_rate": 1.998209043841025e-06, "loss": 0.7243417501449585, "step": 592 }, { "epoch": 0.13673045884251786, "grad_norm": 0.693273868923859, "learning_rate": 1.9981861633760073e-06, "loss": 0.5955190658569336, "step": 593 }, { "epoch": 0.13696103297210052, "grad_norm": 0.89841301134605, "learning_rate": 1.9981631378145147e-06, "loss": 0.6907675862312317, "step": 594 }, { "epoch": 0.13719160710168318, "grad_norm": 1.022542216960162, "learning_rate": 1.9981399671598938e-06, "loss": 0.8540418148040771, "step": 595 }, { "epoch": 0.13742218123126584, "grad_norm": 0.850573072747265, "learning_rate": 1.9981166514155128e-06, "loss": 0.6558555364608765, "step": 596 }, { "epoch": 0.1376527553608485, "grad_norm": 0.9448807343375427, "learning_rate": 1.9980931905847607e-06, "loss": 0.6902164220809937, "step": 597 }, { "epoch": 0.13788332949043117, "grad_norm": 1.240663469028779, "learning_rate": 1.9980695846710485e-06, "loss": 0.7090387344360352, "step": 598 }, { "epoch": 0.13811390362001383, "grad_norm": 0.8847772852436644, "learning_rate": 1.9980458336778067e-06, "loss": 0.5913621187210083, "step": 599 }, { "epoch": 0.1383444777495965, "grad_norm": 0.864647475805302, "learning_rate": 1.998021937608488e-06, "loss": 0.6742709279060364, "step": 600 }, { "epoch": 0.13857505187917915, "grad_norm": 0.9253166862332501, "learning_rate": 1.997997896466566e-06, "loss": 0.7156273126602173, "step": 601 }, { "epoch": 0.1388056260087618, "grad_norm": 0.7104566809406643, "learning_rate": 1.9979737102555358e-06, "loss": 0.6039655208587646, "step": 602 }, { "epoch": 0.13903620013834447, "grad_norm": 0.7521323143425293, "learning_rate": 1.9979493789789123e-06, "loss": 0.6437175273895264, "step": 603 }, { "epoch": 0.13926677426792713, "grad_norm": 0.7922747435817725, "learning_rate": 1.9979249026402327e-06, "loss": 0.6037663221359253, "step": 604 }, { "epoch": 0.1394973483975098, "grad_norm": 0.8526913554693543, "learning_rate": 1.9979002812430544e-06, "loss": 0.6014829874038696, "step": 605 }, { "epoch": 0.13972792252709246, "grad_norm": 0.9960319429386536, "learning_rate": 1.9978755147909575e-06, "loss": 0.5644428133964539, "step": 606 }, { "epoch": 0.13995849665667512, "grad_norm": 0.7146930597248379, "learning_rate": 1.997850603287541e-06, "loss": 0.5483256578445435, "step": 607 }, { "epoch": 0.14018907078625778, "grad_norm": 0.941628560636658, "learning_rate": 1.9978255467364264e-06, "loss": 0.6323236227035522, "step": 608 }, { "epoch": 0.14041964491584044, "grad_norm": 0.8661204864695959, "learning_rate": 1.9978003451412563e-06, "loss": 0.677186131477356, "step": 609 }, { "epoch": 0.1406502190454231, "grad_norm": 0.7467694215725664, "learning_rate": 1.9977749985056934e-06, "loss": 0.6768285036087036, "step": 610 }, { "epoch": 0.14088079317500576, "grad_norm": 0.6978429335446755, "learning_rate": 1.997749506833422e-06, "loss": 0.5347047448158264, "step": 611 }, { "epoch": 0.14111136730458843, "grad_norm": 0.8856138167235749, "learning_rate": 1.9977238701281484e-06, "loss": 0.7459336519241333, "step": 612 }, { "epoch": 0.1413419414341711, "grad_norm": 0.7081494897690513, "learning_rate": 1.9976980883935982e-06, "loss": 0.6617337465286255, "step": 613 }, { "epoch": 0.14157251556375375, "grad_norm": 0.766248846701343, "learning_rate": 1.9976721616335197e-06, "loss": 0.6214765310287476, "step": 614 }, { "epoch": 0.1418030896933364, "grad_norm": 0.9664061776833217, "learning_rate": 1.9976460898516814e-06, "loss": 0.7468793392181396, "step": 615 }, { "epoch": 0.14203366382291907, "grad_norm": 0.9401860990707812, "learning_rate": 1.9976198730518733e-06, "loss": 0.676013708114624, "step": 616 }, { "epoch": 0.14226423795250173, "grad_norm": 0.7984359669803877, "learning_rate": 1.9975935112379057e-06, "loss": 0.6350057125091553, "step": 617 }, { "epoch": 0.1424948120820844, "grad_norm": 0.7941645196610473, "learning_rate": 1.997567004413611e-06, "loss": 0.6743426322937012, "step": 618 }, { "epoch": 0.14272538621166705, "grad_norm": 0.9456320720036326, "learning_rate": 1.9975403525828423e-06, "loss": 0.5894836187362671, "step": 619 }, { "epoch": 0.14295596034124972, "grad_norm": 1.1964423414511856, "learning_rate": 1.9975135557494735e-06, "loss": 0.7142415046691895, "step": 620 }, { "epoch": 0.14318653447083238, "grad_norm": 0.7973360588907056, "learning_rate": 1.9974866139174e-06, "loss": 0.6402454972267151, "step": 621 }, { "epoch": 0.14341710860041504, "grad_norm": 0.8197617379148621, "learning_rate": 1.997459527090538e-06, "loss": 0.6870661973953247, "step": 622 }, { "epoch": 0.1436476827299977, "grad_norm": 0.9660987988063562, "learning_rate": 1.9974322952728247e-06, "loss": 0.5526704788208008, "step": 623 }, { "epoch": 0.14387825685958036, "grad_norm": 0.8373386744091922, "learning_rate": 1.9974049184682186e-06, "loss": 0.6712762117385864, "step": 624 }, { "epoch": 0.14410883098916302, "grad_norm": 0.8330659804365839, "learning_rate": 1.997377396680699e-06, "loss": 0.6064080595970154, "step": 625 }, { "epoch": 0.14433940511874568, "grad_norm": 0.7758896299152315, "learning_rate": 1.997349729914267e-06, "loss": 0.5540767908096313, "step": 626 }, { "epoch": 0.14456997924832835, "grad_norm": 0.7444906414234538, "learning_rate": 1.997321918172944e-06, "loss": 0.52143394947052, "step": 627 }, { "epoch": 0.144800553377911, "grad_norm": 0.8091707705607726, "learning_rate": 1.9972939614607723e-06, "loss": 0.7708792686462402, "step": 628 }, { "epoch": 0.14503112750749367, "grad_norm": 1.0019252225174067, "learning_rate": 1.997265859781816e-06, "loss": 0.706872284412384, "step": 629 }, { "epoch": 0.14526170163707633, "grad_norm": 0.7978488701627702, "learning_rate": 1.99723761314016e-06, "loss": 0.6643307209014893, "step": 630 }, { "epoch": 0.145492275766659, "grad_norm": 1.0319728160628425, "learning_rate": 1.9972092215399107e-06, "loss": 0.6582880020141602, "step": 631 }, { "epoch": 0.14572284989624165, "grad_norm": 0.7041979367649327, "learning_rate": 1.997180684985194e-06, "loss": 0.5704749822616577, "step": 632 }, { "epoch": 0.1459534240258243, "grad_norm": 0.9160954038448087, "learning_rate": 1.997152003480159e-06, "loss": 0.6021866798400879, "step": 633 }, { "epoch": 0.14618399815540697, "grad_norm": 1.0186739140184302, "learning_rate": 1.9971231770289745e-06, "loss": 0.6980762481689453, "step": 634 }, { "epoch": 0.14641457228498964, "grad_norm": 0.9102171344238382, "learning_rate": 1.9970942056358307e-06, "loss": 0.6252140998840332, "step": 635 }, { "epoch": 0.1466451464145723, "grad_norm": 0.8257085970836279, "learning_rate": 1.9970650893049384e-06, "loss": 0.5938589572906494, "step": 636 }, { "epoch": 0.14687572054415496, "grad_norm": 0.7561297866548697, "learning_rate": 1.997035828040531e-06, "loss": 0.48420464992523193, "step": 637 }, { "epoch": 0.14710629467373762, "grad_norm": 1.1749911282917564, "learning_rate": 1.997006421846861e-06, "loss": 0.6917499303817749, "step": 638 }, { "epoch": 0.14733686880332028, "grad_norm": 0.9636395596462505, "learning_rate": 1.9969768707282034e-06, "loss": 0.7040522694587708, "step": 639 }, { "epoch": 0.14756744293290291, "grad_norm": 0.7956128694692409, "learning_rate": 1.9969471746888535e-06, "loss": 0.6131860017776489, "step": 640 }, { "epoch": 0.14779801706248558, "grad_norm": 0.8000550155014501, "learning_rate": 1.996917333733128e-06, "loss": 0.7042062282562256, "step": 641 }, { "epoch": 0.14802859119206824, "grad_norm": 0.9440344299424565, "learning_rate": 1.9968873478653647e-06, "loss": 0.6729326844215393, "step": 642 }, { "epoch": 0.1482591653216509, "grad_norm": 0.8065631083250541, "learning_rate": 1.996857217089922e-06, "loss": 0.5801228880882263, "step": 643 }, { "epoch": 0.14848973945123356, "grad_norm": 0.9584481605552773, "learning_rate": 1.99682694141118e-06, "loss": 0.6657989025115967, "step": 644 }, { "epoch": 0.14872031358081622, "grad_norm": 0.8276892521273487, "learning_rate": 1.9967965208335395e-06, "loss": 0.5915562510490417, "step": 645 }, { "epoch": 0.14895088771039888, "grad_norm": 0.8005079741579677, "learning_rate": 1.9967659553614225e-06, "loss": 0.6651759147644043, "step": 646 }, { "epoch": 0.14918146183998154, "grad_norm": 0.785500734493462, "learning_rate": 1.996735244999272e-06, "loss": 0.625860333442688, "step": 647 }, { "epoch": 0.1494120359695642, "grad_norm": 0.891334856659417, "learning_rate": 1.996704389751552e-06, "loss": 0.5731238126754761, "step": 648 }, { "epoch": 0.14964261009914687, "grad_norm": 0.8662032133236818, "learning_rate": 1.996673389622748e-06, "loss": 0.6233615875244141, "step": 649 }, { "epoch": 0.14987318422872953, "grad_norm": 0.7037223780792468, "learning_rate": 1.9966422446173655e-06, "loss": 0.5294947028160095, "step": 650 }, { "epoch": 0.1501037583583122, "grad_norm": 0.8024689158972043, "learning_rate": 1.996610954739932e-06, "loss": 0.6234334707260132, "step": 651 }, { "epoch": 0.15033433248789485, "grad_norm": 0.9863259301950934, "learning_rate": 1.996579519994996e-06, "loss": 0.5800126194953918, "step": 652 }, { "epoch": 0.1505649066174775, "grad_norm": 0.9145794705086053, "learning_rate": 1.9965479403871268e-06, "loss": 0.7072441577911377, "step": 653 }, { "epoch": 0.15079548074706017, "grad_norm": 0.8604804316966843, "learning_rate": 1.996516215920915e-06, "loss": 0.6350210309028625, "step": 654 }, { "epoch": 0.15102605487664283, "grad_norm": 0.8272551438363688, "learning_rate": 1.996484346600971e-06, "loss": 0.6098944544792175, "step": 655 }, { "epoch": 0.1512566290062255, "grad_norm": 0.7942772112843086, "learning_rate": 1.996452332431929e-06, "loss": 0.6593213081359863, "step": 656 }, { "epoch": 0.15148720313580816, "grad_norm": 1.0870788996229426, "learning_rate": 1.9964201734184413e-06, "loss": 0.6997909545898438, "step": 657 }, { "epoch": 0.15171777726539082, "grad_norm": 0.8320533396880808, "learning_rate": 1.996387869565183e-06, "loss": 0.5672277212142944, "step": 658 }, { "epoch": 0.15194835139497348, "grad_norm": 0.8777194103988153, "learning_rate": 1.99635542087685e-06, "loss": 0.5835613012313843, "step": 659 }, { "epoch": 0.15217892552455614, "grad_norm": 1.0025309187744094, "learning_rate": 1.9963228273581587e-06, "loss": 0.6001917123794556, "step": 660 }, { "epoch": 0.1524094996541388, "grad_norm": 0.9582174045063777, "learning_rate": 1.996290089013847e-06, "loss": 0.6421242356300354, "step": 661 }, { "epoch": 0.15264007378372146, "grad_norm": 0.8996449559898986, "learning_rate": 1.996257205848674e-06, "loss": 0.6888365745544434, "step": 662 }, { "epoch": 0.15287064791330413, "grad_norm": 0.8017642329752841, "learning_rate": 1.9962241778674193e-06, "loss": 0.6694042682647705, "step": 663 }, { "epoch": 0.1531012220428868, "grad_norm": 0.8362235694997654, "learning_rate": 1.9961910050748836e-06, "loss": 0.6754042506217957, "step": 664 }, { "epoch": 0.15333179617246945, "grad_norm": 0.9429947161447709, "learning_rate": 1.9961576874758893e-06, "loss": 0.576134979724884, "step": 665 }, { "epoch": 0.1535623703020521, "grad_norm": 0.8634505888713511, "learning_rate": 1.9961242250752796e-06, "loss": 0.6548957824707031, "step": 666 }, { "epoch": 0.15379294443163477, "grad_norm": 0.8494612034918267, "learning_rate": 1.9960906178779183e-06, "loss": 0.553372859954834, "step": 667 }, { "epoch": 0.15402351856121743, "grad_norm": 0.8776559544848238, "learning_rate": 1.9960568658886904e-06, "loss": 0.6749063730239868, "step": 668 }, { "epoch": 0.1542540926908001, "grad_norm": 0.8490449157821316, "learning_rate": 1.9960229691125023e-06, "loss": 0.6083666086196899, "step": 669 }, { "epoch": 0.15448466682038275, "grad_norm": 0.9102216407598661, "learning_rate": 1.995988927554281e-06, "loss": 0.6468017101287842, "step": 670 }, { "epoch": 0.15471524094996542, "grad_norm": 0.9054463862187181, "learning_rate": 1.995954741218976e-06, "loss": 0.7095121145248413, "step": 671 }, { "epoch": 0.15494581507954808, "grad_norm": 0.8984210973740085, "learning_rate": 1.995920410111555e-06, "loss": 0.7167302966117859, "step": 672 }, { "epoch": 0.15517638920913074, "grad_norm": 0.9754903087688545, "learning_rate": 1.995885934237009e-06, "loss": 0.6563462018966675, "step": 673 }, { "epoch": 0.1554069633387134, "grad_norm": 0.7833661271069817, "learning_rate": 1.9958513136003495e-06, "loss": 0.638554573059082, "step": 674 }, { "epoch": 0.15563753746829606, "grad_norm": 1.1119382875058637, "learning_rate": 1.995816548206609e-06, "loss": 0.7051291465759277, "step": 675 }, { "epoch": 0.15586811159787872, "grad_norm": 0.879000690907415, "learning_rate": 1.995781638060841e-06, "loss": 0.6292394399642944, "step": 676 }, { "epoch": 0.15609868572746138, "grad_norm": 0.7328696227145686, "learning_rate": 1.99574658316812e-06, "loss": 0.5266016721725464, "step": 677 }, { "epoch": 0.15632925985704405, "grad_norm": 0.8021809147598078, "learning_rate": 1.9957113835335415e-06, "loss": 0.6059033870697021, "step": 678 }, { "epoch": 0.1565598339866267, "grad_norm": 1.0012445200078677, "learning_rate": 1.995676039162222e-06, "loss": 0.5252447128295898, "step": 679 }, { "epoch": 0.15679040811620937, "grad_norm": 0.9661534967224599, "learning_rate": 1.9956405500593e-06, "loss": 0.5963196754455566, "step": 680 }, { "epoch": 0.15702098224579203, "grad_norm": 1.1191160767100459, "learning_rate": 1.9956049162299322e-06, "loss": 0.7262317538261414, "step": 681 }, { "epoch": 0.1572515563753747, "grad_norm": 0.6929567178003186, "learning_rate": 1.995569137679301e-06, "loss": 0.6701623201370239, "step": 682 }, { "epoch": 0.15748213050495735, "grad_norm": 1.1067508842107727, "learning_rate": 1.9955332144126048e-06, "loss": 0.6201569437980652, "step": 683 }, { "epoch": 0.15771270463454, "grad_norm": 0.8729576302308473, "learning_rate": 1.9954971464350673e-06, "loss": 0.5338399410247803, "step": 684 }, { "epoch": 0.15794327876412267, "grad_norm": 1.0541267316046437, "learning_rate": 1.99546093375193e-06, "loss": 0.6784210205078125, "step": 685 }, { "epoch": 0.15817385289370534, "grad_norm": 0.7386088048688241, "learning_rate": 1.9954245763684574e-06, "loss": 0.6752813458442688, "step": 686 }, { "epoch": 0.158404427023288, "grad_norm": 0.92655840240498, "learning_rate": 1.9953880742899344e-06, "loss": 0.6734355688095093, "step": 687 }, { "epoch": 0.15863500115287066, "grad_norm": 1.0183777461857344, "learning_rate": 1.995351427521667e-06, "loss": 0.4857062101364136, "step": 688 }, { "epoch": 0.15886557528245332, "grad_norm": 1.0292686670210065, "learning_rate": 1.995314636068982e-06, "loss": 0.6014343500137329, "step": 689 }, { "epoch": 0.15909614941203598, "grad_norm": 0.6804392354384567, "learning_rate": 1.995277699937227e-06, "loss": 0.571649432182312, "step": 690 }, { "epoch": 0.15932672354161864, "grad_norm": 0.8504096595688001, "learning_rate": 1.9952406191317717e-06, "loss": 0.5195556879043579, "step": 691 }, { "epoch": 0.1595572976712013, "grad_norm": 1.0458950135227758, "learning_rate": 1.995203393658006e-06, "loss": 0.6520895957946777, "step": 692 }, { "epoch": 0.15978787180078396, "grad_norm": 0.8415432435774023, "learning_rate": 1.995166023521341e-06, "loss": 0.7223460674285889, "step": 693 }, { "epoch": 0.16001844593036663, "grad_norm": 0.9976828679541363, "learning_rate": 1.9951285087272085e-06, "loss": 0.5540120005607605, "step": 694 }, { "epoch": 0.1602490200599493, "grad_norm": 0.9583028785849829, "learning_rate": 1.995090849281062e-06, "loss": 0.6539945602416992, "step": 695 }, { "epoch": 0.16047959418953192, "grad_norm": 0.6996553037894581, "learning_rate": 1.995053045188376e-06, "loss": 0.595169186592102, "step": 696 }, { "epoch": 0.16071016831911458, "grad_norm": 0.7841493951031693, "learning_rate": 1.995015096454645e-06, "loss": 0.564440131187439, "step": 697 }, { "epoch": 0.16094074244869724, "grad_norm": 0.8288568147288248, "learning_rate": 1.9949770030853857e-06, "loss": 0.5934277772903442, "step": 698 }, { "epoch": 0.1611713165782799, "grad_norm": 0.8284586150514878, "learning_rate": 1.9949387650861353e-06, "loss": 0.5645352602005005, "step": 699 }, { "epoch": 0.16140189070786257, "grad_norm": 0.7431587516594325, "learning_rate": 1.9949003824624517e-06, "loss": 0.6437552571296692, "step": 700 }, { "epoch": 0.16163246483744523, "grad_norm": 0.9720884796741701, "learning_rate": 1.9948618552199147e-06, "loss": 0.7052004337310791, "step": 701 }, { "epoch": 0.1618630389670279, "grad_norm": 0.869867046800395, "learning_rate": 1.994823183364124e-06, "loss": 0.6547686457633972, "step": 702 }, { "epoch": 0.16209361309661055, "grad_norm": 0.8852938288883528, "learning_rate": 1.994784366900702e-06, "loss": 0.582744836807251, "step": 703 }, { "epoch": 0.1623241872261932, "grad_norm": 0.9493941174588165, "learning_rate": 1.99474540583529e-06, "loss": 0.6668936014175415, "step": 704 }, { "epoch": 0.16255476135577587, "grad_norm": 0.8294615633120708, "learning_rate": 1.994706300173552e-06, "loss": 0.6076918840408325, "step": 705 }, { "epoch": 0.16278533548535853, "grad_norm": 0.8313694025786441, "learning_rate": 1.994667049921172e-06, "loss": 0.5053621530532837, "step": 706 }, { "epoch": 0.1630159096149412, "grad_norm": 0.7898437620774408, "learning_rate": 1.994627655083856e-06, "loss": 0.5480915904045105, "step": 707 }, { "epoch": 0.16324648374452386, "grad_norm": 0.8758549357955973, "learning_rate": 1.99458811566733e-06, "loss": 0.5851327776908875, "step": 708 }, { "epoch": 0.16347705787410652, "grad_norm": 0.8484239464634123, "learning_rate": 1.9945484316773415e-06, "loss": 0.7058213949203491, "step": 709 }, { "epoch": 0.16370763200368918, "grad_norm": 1.019538936894149, "learning_rate": 1.9945086031196588e-06, "loss": 0.6900246739387512, "step": 710 }, { "epoch": 0.16393820613327184, "grad_norm": 0.9247299002550031, "learning_rate": 1.994468630000072e-06, "loss": 0.6088757514953613, "step": 711 }, { "epoch": 0.1641687802628545, "grad_norm": 0.82117755294185, "learning_rate": 1.9944285123243908e-06, "loss": 0.6167945861816406, "step": 712 }, { "epoch": 0.16439935439243716, "grad_norm": 0.8171354955480022, "learning_rate": 1.994388250098447e-06, "loss": 0.5842427015304565, "step": 713 }, { "epoch": 0.16462992852201982, "grad_norm": 1.0833616769520091, "learning_rate": 1.9943478433280937e-06, "loss": 0.6709132194519043, "step": 714 }, { "epoch": 0.1648605026516025, "grad_norm": 0.9486447603343945, "learning_rate": 1.994307292019204e-06, "loss": 0.5600479245185852, "step": 715 }, { "epoch": 0.16509107678118515, "grad_norm": 0.9425877157645439, "learning_rate": 1.994266596177672e-06, "loss": 0.59420245885849, "step": 716 }, { "epoch": 0.1653216509107678, "grad_norm": 0.8878954538957776, "learning_rate": 1.994225755809414e-06, "loss": 0.6098697185516357, "step": 717 }, { "epoch": 0.16555222504035047, "grad_norm": 0.9792435497913993, "learning_rate": 1.994184770920366e-06, "loss": 0.5626084804534912, "step": 718 }, { "epoch": 0.16578279916993313, "grad_norm": 0.827415177568412, "learning_rate": 1.9941436415164854e-06, "loss": 0.633317232131958, "step": 719 }, { "epoch": 0.1660133732995158, "grad_norm": 0.7458775266643737, "learning_rate": 1.994102367603752e-06, "loss": 0.6629287004470825, "step": 720 }, { "epoch": 0.16624394742909845, "grad_norm": 0.8804838237561229, "learning_rate": 1.994060949188164e-06, "loss": 0.6281176805496216, "step": 721 }, { "epoch": 0.16647452155868112, "grad_norm": 0.7448717784104247, "learning_rate": 1.994019386275743e-06, "loss": 0.49195849895477295, "step": 722 }, { "epoch": 0.16670509568826378, "grad_norm": 0.8001133040698483, "learning_rate": 1.9939776788725295e-06, "loss": 0.5165697932243347, "step": 723 }, { "epoch": 0.16693566981784644, "grad_norm": 0.7747636914973149, "learning_rate": 1.9939358269845867e-06, "loss": 0.6294844150543213, "step": 724 }, { "epoch": 0.1671662439474291, "grad_norm": 0.944854174617811, "learning_rate": 1.9938938306179986e-06, "loss": 0.6117822527885437, "step": 725 }, { "epoch": 0.16739681807701176, "grad_norm": 0.8223415721013929, "learning_rate": 1.9938516897788693e-06, "loss": 0.5904515981674194, "step": 726 }, { "epoch": 0.16762739220659442, "grad_norm": 0.9451811550082199, "learning_rate": 1.9938094044733247e-06, "loss": 0.5453853011131287, "step": 727 }, { "epoch": 0.16785796633617708, "grad_norm": 1.0093698810967915, "learning_rate": 1.9937669747075107e-06, "loss": 0.6724731922149658, "step": 728 }, { "epoch": 0.16808854046575974, "grad_norm": 0.8787203913390783, "learning_rate": 1.993724400487596e-06, "loss": 0.4844778776168823, "step": 729 }, { "epoch": 0.1683191145953424, "grad_norm": 1.0150110817624924, "learning_rate": 1.9936816818197682e-06, "loss": 0.6666063070297241, "step": 730 }, { "epoch": 0.16854968872492507, "grad_norm": 0.8363215992575103, "learning_rate": 1.9936388187102374e-06, "loss": 0.49354803562164307, "step": 731 }, { "epoch": 0.16878026285450773, "grad_norm": 1.011739420494133, "learning_rate": 1.993595811165234e-06, "loss": 0.6587027311325073, "step": 732 }, { "epoch": 0.1690108369840904, "grad_norm": 0.8706809761457309, "learning_rate": 1.9935526591910095e-06, "loss": 0.5618065595626831, "step": 733 }, { "epoch": 0.16924141111367305, "grad_norm": 1.0230867510580486, "learning_rate": 1.993509362793837e-06, "loss": 0.6332052946090698, "step": 734 }, { "epoch": 0.1694719852432557, "grad_norm": 0.8938300688074264, "learning_rate": 1.9934659219800095e-06, "loss": 0.5888797044754028, "step": 735 }, { "epoch": 0.16970255937283837, "grad_norm": 0.9600504381358347, "learning_rate": 1.9934223367558418e-06, "loss": 0.6995177865028381, "step": 736 }, { "epoch": 0.16993313350242104, "grad_norm": 0.8183852978697493, "learning_rate": 1.9933786071276693e-06, "loss": 0.6117641925811768, "step": 737 }, { "epoch": 0.1701637076320037, "grad_norm": 0.8824726889784998, "learning_rate": 1.9933347331018487e-06, "loss": 0.7138235569000244, "step": 738 }, { "epoch": 0.17039428176158636, "grad_norm": 0.9234925675447027, "learning_rate": 1.993290714684758e-06, "loss": 0.6139661073684692, "step": 739 }, { "epoch": 0.17062485589116902, "grad_norm": 0.9457487351494172, "learning_rate": 1.9932465518827945e-06, "loss": 0.6998997926712036, "step": 740 }, { "epoch": 0.17085543002075168, "grad_norm": 0.8625145077640682, "learning_rate": 1.9932022447023787e-06, "loss": 0.5736757516860962, "step": 741 }, { "epoch": 0.17108600415033434, "grad_norm": 0.7768775382949296, "learning_rate": 1.993157793149951e-06, "loss": 0.6069833040237427, "step": 742 }, { "epoch": 0.171316578279917, "grad_norm": 0.9368489446003049, "learning_rate": 1.9931131972319726e-06, "loss": 0.618720531463623, "step": 743 }, { "epoch": 0.17154715240949966, "grad_norm": 1.1182101771495103, "learning_rate": 1.9930684569549263e-06, "loss": 0.6918530464172363, "step": 744 }, { "epoch": 0.17177772653908233, "grad_norm": 0.9107072762217621, "learning_rate": 1.993023572325315e-06, "loss": 0.5303134322166443, "step": 745 }, { "epoch": 0.172008300668665, "grad_norm": 1.163525853024132, "learning_rate": 1.9929785433496637e-06, "loss": 0.5017606019973755, "step": 746 }, { "epoch": 0.17223887479824765, "grad_norm": 0.8248835281602814, "learning_rate": 1.9929333700345176e-06, "loss": 0.5683910846710205, "step": 747 }, { "epoch": 0.1724694489278303, "grad_norm": 1.024957040527593, "learning_rate": 1.992888052386443e-06, "loss": 0.7594112157821655, "step": 748 }, { "epoch": 0.17270002305741297, "grad_norm": 0.8415419064063624, "learning_rate": 1.9928425904120272e-06, "loss": 0.5817109942436218, "step": 749 }, { "epoch": 0.17293059718699563, "grad_norm": 0.9772344685918459, "learning_rate": 1.9927969841178785e-06, "loss": 0.74810391664505, "step": 750 }, { "epoch": 0.17316117131657827, "grad_norm": 0.7709842631317299, "learning_rate": 1.992751233510627e-06, "loss": 0.5620408654212952, "step": 751 }, { "epoch": 0.17339174544616093, "grad_norm": 0.9147017514524429, "learning_rate": 1.9927053385969224e-06, "loss": 0.5661174654960632, "step": 752 }, { "epoch": 0.1736223195757436, "grad_norm": 0.8721149149743948, "learning_rate": 1.992659299383436e-06, "loss": 0.6170656681060791, "step": 753 }, { "epoch": 0.17385289370532625, "grad_norm": 0.8946316220934861, "learning_rate": 1.99261311587686e-06, "loss": 0.6399837136268616, "step": 754 }, { "epoch": 0.1740834678349089, "grad_norm": 0.7741035474142021, "learning_rate": 1.992566788083908e-06, "loss": 0.646568775177002, "step": 755 }, { "epoch": 0.17431404196449157, "grad_norm": 0.8936741351690501, "learning_rate": 1.992520316011314e-06, "loss": 0.6836358904838562, "step": 756 }, { "epoch": 0.17454461609407423, "grad_norm": 0.8304614027509832, "learning_rate": 1.9924736996658327e-06, "loss": 0.7077229619026184, "step": 757 }, { "epoch": 0.1747751902236569, "grad_norm": 0.87551528703017, "learning_rate": 1.9924269390542408e-06, "loss": 0.5127657651901245, "step": 758 }, { "epoch": 0.17500576435323956, "grad_norm": 0.9006786249451013, "learning_rate": 1.992380034183336e-06, "loss": 0.49244552850723267, "step": 759 }, { "epoch": 0.17523633848282222, "grad_norm": 0.8017561502743571, "learning_rate": 1.9923329850599353e-06, "loss": 0.6145986318588257, "step": 760 }, { "epoch": 0.17546691261240488, "grad_norm": 1.0163805424999015, "learning_rate": 1.9922857916908784e-06, "loss": 0.5233397483825684, "step": 761 }, { "epoch": 0.17569748674198754, "grad_norm": 0.9596772303146165, "learning_rate": 1.992238454083025e-06, "loss": 0.6296844482421875, "step": 762 }, { "epoch": 0.1759280608715702, "grad_norm": 0.7860963753584104, "learning_rate": 1.9921909722432565e-06, "loss": 0.5274437665939331, "step": 763 }, { "epoch": 0.17615863500115286, "grad_norm": 0.8930810667791799, "learning_rate": 1.9921433461784744e-06, "loss": 0.6365554332733154, "step": 764 }, { "epoch": 0.17638920913073552, "grad_norm": 0.9611521576454714, "learning_rate": 1.992095575895602e-06, "loss": 0.6256603002548218, "step": 765 }, { "epoch": 0.17661978326031819, "grad_norm": 0.9488006285824869, "learning_rate": 1.9920476614015827e-06, "loss": 0.6914918422698975, "step": 766 }, { "epoch": 0.17685035738990085, "grad_norm": 0.9925839476608436, "learning_rate": 1.9919996027033823e-06, "loss": 0.618436336517334, "step": 767 }, { "epoch": 0.1770809315194835, "grad_norm": 1.0637307823847924, "learning_rate": 1.9919513998079857e-06, "loss": 0.7496027946472168, "step": 768 }, { "epoch": 0.17731150564906617, "grad_norm": 0.873569070894671, "learning_rate": 1.9919030527224e-06, "loss": 0.6188616752624512, "step": 769 }, { "epoch": 0.17754207977864883, "grad_norm": 0.9573370107752551, "learning_rate": 1.991854561453653e-06, "loss": 0.6525505185127258, "step": 770 }, { "epoch": 0.1777726539082315, "grad_norm": 0.8791752874309303, "learning_rate": 1.9918059260087933e-06, "loss": 0.6302521228790283, "step": 771 }, { "epoch": 0.17800322803781415, "grad_norm": 0.7767159097983319, "learning_rate": 1.9917571463948905e-06, "loss": 0.48817628622055054, "step": 772 }, { "epoch": 0.17823380216739682, "grad_norm": 0.9997756560425097, "learning_rate": 1.9917082226190357e-06, "loss": 0.7571396231651306, "step": 773 }, { "epoch": 0.17846437629697948, "grad_norm": 0.9019653117383005, "learning_rate": 1.99165915468834e-06, "loss": 0.6416890025138855, "step": 774 }, { "epoch": 0.17869495042656214, "grad_norm": 0.9030141776784474, "learning_rate": 1.9916099426099357e-06, "loss": 0.5668659210205078, "step": 775 }, { "epoch": 0.1789255245561448, "grad_norm": 0.8616948701360102, "learning_rate": 1.991560586390977e-06, "loss": 0.5491495132446289, "step": 776 }, { "epoch": 0.17915609868572746, "grad_norm": 0.8461739489170892, "learning_rate": 1.991511086038637e-06, "loss": 0.5596655607223511, "step": 777 }, { "epoch": 0.17938667281531012, "grad_norm": 0.948797979696852, "learning_rate": 1.991461441560113e-06, "loss": 0.606618344783783, "step": 778 }, { "epoch": 0.17961724694489278, "grad_norm": 0.8682290862864503, "learning_rate": 1.9914116529626195e-06, "loss": 0.6534444093704224, "step": 779 }, { "epoch": 0.17984782107447544, "grad_norm": 0.7942772802909244, "learning_rate": 1.9913617202533956e-06, "loss": 0.6566994190216064, "step": 780 }, { "epoch": 0.1800783952040581, "grad_norm": 0.8753236598884384, "learning_rate": 1.9913116434396976e-06, "loss": 0.6745898723602295, "step": 781 }, { "epoch": 0.18030896933364077, "grad_norm": 0.8904483654623074, "learning_rate": 1.991261422528806e-06, "loss": 0.6260639429092407, "step": 782 }, { "epoch": 0.18053954346322343, "grad_norm": 1.095081708934966, "learning_rate": 1.9912110575280203e-06, "loss": 0.6937930583953857, "step": 783 }, { "epoch": 0.1807701175928061, "grad_norm": 0.7535766751550929, "learning_rate": 1.991160548444662e-06, "loss": 0.5220614671707153, "step": 784 }, { "epoch": 0.18100069172238875, "grad_norm": 1.0171096783148863, "learning_rate": 1.9911098952860725e-06, "loss": 0.630463719367981, "step": 785 }, { "epoch": 0.1812312658519714, "grad_norm": 0.9064677619585607, "learning_rate": 1.9910590980596154e-06, "loss": 0.5476818084716797, "step": 786 }, { "epoch": 0.18146183998155407, "grad_norm": 0.8827497683061851, "learning_rate": 1.9910081567726745e-06, "loss": 0.619910478591919, "step": 787 }, { "epoch": 0.18169241411113674, "grad_norm": 0.9583246792904453, "learning_rate": 1.990957071432654e-06, "loss": 0.759405255317688, "step": 788 }, { "epoch": 0.1819229882407194, "grad_norm": 0.9249642030902185, "learning_rate": 1.9909058420469808e-06, "loss": 0.6093606948852539, "step": 789 }, { "epoch": 0.18215356237030206, "grad_norm": 1.0777393301256872, "learning_rate": 1.9908544686231e-06, "loss": 0.5358198285102844, "step": 790 }, { "epoch": 0.18238413649988472, "grad_norm": 0.8619190562873736, "learning_rate": 1.9908029511684806e-06, "loss": 0.577926754951477, "step": 791 }, { "epoch": 0.18261471062946738, "grad_norm": 1.0298704295501269, "learning_rate": 1.990751289690611e-06, "loss": 0.6232448816299438, "step": 792 }, { "epoch": 0.18284528475905004, "grad_norm": 0.9837349749201401, "learning_rate": 1.9906994841970005e-06, "loss": 0.5461868047714233, "step": 793 }, { "epoch": 0.1830758588886327, "grad_norm": 0.9430576362377001, "learning_rate": 1.9906475346951793e-06, "loss": 0.6074671745300293, "step": 794 }, { "epoch": 0.18330643301821536, "grad_norm": 0.9936839742941572, "learning_rate": 1.990595441192699e-06, "loss": 0.7101696729660034, "step": 795 }, { "epoch": 0.18353700714779803, "grad_norm": 0.950260898814123, "learning_rate": 1.9905432036971318e-06, "loss": 0.6507722735404968, "step": 796 }, { "epoch": 0.1837675812773807, "grad_norm": 0.8942288113166778, "learning_rate": 1.9904908222160715e-06, "loss": 0.6497524380683899, "step": 797 }, { "epoch": 0.18399815540696335, "grad_norm": 0.9396678930556792, "learning_rate": 1.9904382967571315e-06, "loss": 0.6359415054321289, "step": 798 }, { "epoch": 0.184228729536546, "grad_norm": 0.8070326036364724, "learning_rate": 1.9903856273279475e-06, "loss": 0.6062989234924316, "step": 799 }, { "epoch": 0.18445930366612867, "grad_norm": 0.9626677000162343, "learning_rate": 1.9903328139361753e-06, "loss": 0.5872690677642822, "step": 800 }, { "epoch": 0.18468987779571133, "grad_norm": 0.7985705265040473, "learning_rate": 1.9902798565894917e-06, "loss": 0.541993260383606, "step": 801 }, { "epoch": 0.184920451925294, "grad_norm": 0.9775943406877085, "learning_rate": 1.9902267552955948e-06, "loss": 0.6509004235267639, "step": 802 }, { "epoch": 0.18515102605487666, "grad_norm": 1.032367389635004, "learning_rate": 1.9901735100622034e-06, "loss": 0.6994458436965942, "step": 803 }, { "epoch": 0.18538160018445932, "grad_norm": 0.723727027388961, "learning_rate": 1.9901201208970574e-06, "loss": 0.5426214933395386, "step": 804 }, { "epoch": 0.18561217431404198, "grad_norm": 0.9494744349432898, "learning_rate": 1.9900665878079172e-06, "loss": 0.5889894366264343, "step": 805 }, { "epoch": 0.18584274844362464, "grad_norm": 0.8565255265724333, "learning_rate": 1.990012910802564e-06, "loss": 0.6455902457237244, "step": 806 }, { "epoch": 0.18607332257320727, "grad_norm": 0.8487813974117321, "learning_rate": 1.989959089888801e-06, "loss": 0.6336048245429993, "step": 807 }, { "epoch": 0.18630389670278993, "grad_norm": 0.8414189962242138, "learning_rate": 1.9899051250744517e-06, "loss": 0.6091762781143188, "step": 808 }, { "epoch": 0.1865344708323726, "grad_norm": 0.9439572961008054, "learning_rate": 1.9898510163673594e-06, "loss": 0.5551953315734863, "step": 809 }, { "epoch": 0.18676504496195526, "grad_norm": 1.0494491780231465, "learning_rate": 1.9897967637753907e-06, "loss": 0.6441607475280762, "step": 810 }, { "epoch": 0.18699561909153792, "grad_norm": 0.886313339848662, "learning_rate": 1.989742367306431e-06, "loss": 0.5766205787658691, "step": 811 }, { "epoch": 0.18722619322112058, "grad_norm": 0.8129745295139125, "learning_rate": 1.9896878269683872e-06, "loss": 0.624677836894989, "step": 812 }, { "epoch": 0.18745676735070324, "grad_norm": 1.0883386432883795, "learning_rate": 1.9896331427691878e-06, "loss": 0.5942056775093079, "step": 813 }, { "epoch": 0.1876873414802859, "grad_norm": 0.9421668652395382, "learning_rate": 1.989578314716781e-06, "loss": 0.5194109082221985, "step": 814 }, { "epoch": 0.18791791560986856, "grad_norm": 0.9041080200693152, "learning_rate": 1.9895233428191375e-06, "loss": 0.5851193070411682, "step": 815 }, { "epoch": 0.18814848973945122, "grad_norm": 0.7963655717285544, "learning_rate": 1.989468227084248e-06, "loss": 0.5596088171005249, "step": 816 }, { "epoch": 0.18837906386903389, "grad_norm": 0.9364254304069746, "learning_rate": 1.989412967520123e-06, "loss": 0.608109712600708, "step": 817 }, { "epoch": 0.18860963799861655, "grad_norm": 0.8927696059217924, "learning_rate": 1.9893575641347957e-06, "loss": 0.6488924026489258, "step": 818 }, { "epoch": 0.1888402121281992, "grad_norm": 0.9447086482881396, "learning_rate": 1.9893020169363202e-06, "loss": 0.6668595671653748, "step": 819 }, { "epoch": 0.18907078625778187, "grad_norm": 0.9937318511996248, "learning_rate": 1.9892463259327702e-06, "loss": 0.6516261696815491, "step": 820 }, { "epoch": 0.18930136038736453, "grad_norm": 1.0796549259081865, "learning_rate": 1.9891904911322408e-06, "loss": 0.5960654020309448, "step": 821 }, { "epoch": 0.1895319345169472, "grad_norm": 0.7909478658460368, "learning_rate": 1.989134512542848e-06, "loss": 0.5836078524589539, "step": 822 }, { "epoch": 0.18976250864652985, "grad_norm": 0.8238472267757905, "learning_rate": 1.98907839017273e-06, "loss": 0.6233468651771545, "step": 823 }, { "epoch": 0.18999308277611252, "grad_norm": 0.9807541829716023, "learning_rate": 1.989022124030043e-06, "loss": 0.6228024363517761, "step": 824 }, { "epoch": 0.19022365690569518, "grad_norm": 0.8131035743107407, "learning_rate": 1.9889657141229674e-06, "loss": 0.5549489259719849, "step": 825 }, { "epoch": 0.19045423103527784, "grad_norm": 1.04900407843417, "learning_rate": 1.988909160459703e-06, "loss": 0.572743833065033, "step": 826 }, { "epoch": 0.1906848051648605, "grad_norm": 0.9532449351501632, "learning_rate": 1.988852463048469e-06, "loss": 0.5483371019363403, "step": 827 }, { "epoch": 0.19091537929444316, "grad_norm": 0.8589634934665029, "learning_rate": 1.988795621897508e-06, "loss": 0.6489086151123047, "step": 828 }, { "epoch": 0.19114595342402582, "grad_norm": 0.8093738620503291, "learning_rate": 1.9887386370150823e-06, "loss": 0.5885359644889832, "step": 829 }, { "epoch": 0.19137652755360848, "grad_norm": 1.1233507395706857, "learning_rate": 1.988681508409475e-06, "loss": 0.5725297927856445, "step": 830 }, { "epoch": 0.19160710168319114, "grad_norm": 0.9186016287497916, "learning_rate": 1.9886242360889907e-06, "loss": 0.5165927410125732, "step": 831 }, { "epoch": 0.1918376758127738, "grad_norm": 0.9873812028582082, "learning_rate": 1.988566820061954e-06, "loss": 0.4909062385559082, "step": 832 }, { "epoch": 0.19206824994235647, "grad_norm": 0.8524339429885558, "learning_rate": 1.988509260336711e-06, "loss": 0.6611230373382568, "step": 833 }, { "epoch": 0.19229882407193913, "grad_norm": 0.8054213393470881, "learning_rate": 1.9884515569216296e-06, "loss": 0.5702481269836426, "step": 834 }, { "epoch": 0.1925293982015218, "grad_norm": 1.0204414620630202, "learning_rate": 1.988393709825096e-06, "loss": 0.5923126935958862, "step": 835 }, { "epoch": 0.19275997233110445, "grad_norm": 0.9055032000924194, "learning_rate": 1.98833571905552e-06, "loss": 0.6054497957229614, "step": 836 }, { "epoch": 0.1929905464606871, "grad_norm": 0.9248140875126212, "learning_rate": 1.9882775846213305e-06, "loss": 0.6688513159751892, "step": 837 }, { "epoch": 0.19322112059026977, "grad_norm": 1.0273808455254545, "learning_rate": 1.988219306530978e-06, "loss": 0.5898394584655762, "step": 838 }, { "epoch": 0.19345169471985244, "grad_norm": 0.9751112903331337, "learning_rate": 1.9881608847929345e-06, "loss": 0.575627326965332, "step": 839 }, { "epoch": 0.1936822688494351, "grad_norm": 0.8673669914525766, "learning_rate": 1.9881023194156913e-06, "loss": 0.5392276048660278, "step": 840 }, { "epoch": 0.19391284297901776, "grad_norm": 0.8706508008641746, "learning_rate": 1.9880436104077624e-06, "loss": 0.5464376211166382, "step": 841 }, { "epoch": 0.19414341710860042, "grad_norm": 1.1088629334080236, "learning_rate": 1.9879847577776804e-06, "loss": 0.5483032464981079, "step": 842 }, { "epoch": 0.19437399123818308, "grad_norm": 1.088158010228094, "learning_rate": 1.9879257615340016e-06, "loss": 0.583878219127655, "step": 843 }, { "epoch": 0.19460456536776574, "grad_norm": 0.903659297701254, "learning_rate": 1.9878666216853005e-06, "loss": 0.5646623373031616, "step": 844 }, { "epoch": 0.1948351394973484, "grad_norm": 0.8893037043091606, "learning_rate": 1.9878073382401745e-06, "loss": 0.4785343408584595, "step": 845 }, { "epoch": 0.19506571362693106, "grad_norm": 0.8306997774077053, "learning_rate": 1.987747911207241e-06, "loss": 0.6247695684432983, "step": 846 }, { "epoch": 0.19529628775651373, "grad_norm": 0.8871051444384922, "learning_rate": 1.9876883405951377e-06, "loss": 0.5686244368553162, "step": 847 }, { "epoch": 0.1955268618860964, "grad_norm": 1.0693338597203925, "learning_rate": 1.9876286264125242e-06, "loss": 0.5887250900268555, "step": 848 }, { "epoch": 0.19575743601567905, "grad_norm": 1.009687803574172, "learning_rate": 1.9875687686680808e-06, "loss": 0.6225967407226562, "step": 849 }, { "epoch": 0.1959880101452617, "grad_norm": 0.8424215047754778, "learning_rate": 1.987508767370508e-06, "loss": 0.4695369601249695, "step": 850 }, { "epoch": 0.19621858427484437, "grad_norm": 1.0270923710251258, "learning_rate": 1.9874486225285276e-06, "loss": 0.5248171091079712, "step": 851 }, { "epoch": 0.19644915840442703, "grad_norm": 1.0947189066196994, "learning_rate": 1.9873883341508825e-06, "loss": 0.573886513710022, "step": 852 }, { "epoch": 0.1966797325340097, "grad_norm": 0.980074050730982, "learning_rate": 1.9873279022463365e-06, "loss": 0.5309966802597046, "step": 853 }, { "epoch": 0.19691030666359235, "grad_norm": 1.2273525906968545, "learning_rate": 1.987267326823673e-06, "loss": 0.7115850448608398, "step": 854 }, { "epoch": 0.19714088079317502, "grad_norm": 1.65154587276706, "learning_rate": 1.9872066078916984e-06, "loss": 0.6970044374465942, "step": 855 }, { "epoch": 0.19737145492275768, "grad_norm": 1.0520569639047552, "learning_rate": 1.987145745459238e-06, "loss": 0.5956458449363708, "step": 856 }, { "epoch": 0.19760202905234034, "grad_norm": 0.8621512966256671, "learning_rate": 1.9870847395351395e-06, "loss": 0.6200698614120483, "step": 857 }, { "epoch": 0.197832603181923, "grad_norm": 0.8987981187104104, "learning_rate": 1.98702359012827e-06, "loss": 0.6552712321281433, "step": 858 }, { "epoch": 0.19806317731150566, "grad_norm": 0.8832934653512269, "learning_rate": 1.986962297247519e-06, "loss": 0.5995951294898987, "step": 859 }, { "epoch": 0.19829375144108832, "grad_norm": 1.0415029103173328, "learning_rate": 1.9869008609017946e-06, "loss": 0.5903854966163635, "step": 860 }, { "epoch": 0.19852432557067098, "grad_norm": 0.7946410320386238, "learning_rate": 1.986839281100029e-06, "loss": 0.49756956100463867, "step": 861 }, { "epoch": 0.19875489970025362, "grad_norm": 0.8989937288923138, "learning_rate": 1.986777557851172e-06, "loss": 0.6726386547088623, "step": 862 }, { "epoch": 0.19898547382983628, "grad_norm": 1.066877002121069, "learning_rate": 1.9867156911641963e-06, "loss": 0.5941756963729858, "step": 863 }, { "epoch": 0.19921604795941894, "grad_norm": 1.1426428571577942, "learning_rate": 1.986653681048095e-06, "loss": 0.6148152351379395, "step": 864 }, { "epoch": 0.1994466220890016, "grad_norm": 0.8574337846446602, "learning_rate": 1.9865915275118815e-06, "loss": 0.5484675765037537, "step": 865 }, { "epoch": 0.19967719621858426, "grad_norm": 1.279305752369778, "learning_rate": 1.986529230564591e-06, "loss": 0.5835011601448059, "step": 866 }, { "epoch": 0.19990777034816692, "grad_norm": 1.2828587747963143, "learning_rate": 1.9864667902152785e-06, "loss": 0.5505619049072266, "step": 867 }, { "epoch": 0.20013834447774959, "grad_norm": 0.978792866059614, "learning_rate": 1.986404206473021e-06, "loss": 0.6170759797096252, "step": 868 }, { "epoch": 0.20036891860733225, "grad_norm": 0.9063283607010307, "learning_rate": 1.9863414793469144e-06, "loss": 0.6302823424339294, "step": 869 }, { "epoch": 0.2005994927369149, "grad_norm": 0.9919923586713045, "learning_rate": 1.9862786088460778e-06, "loss": 0.6265357732772827, "step": 870 }, { "epoch": 0.20083006686649757, "grad_norm": 0.8288163853607481, "learning_rate": 1.9862155949796497e-06, "loss": 0.5346760749816895, "step": 871 }, { "epoch": 0.20106064099608023, "grad_norm": 1.0613032711669241, "learning_rate": 1.98615243775679e-06, "loss": 0.5480276346206665, "step": 872 }, { "epoch": 0.2012912151256629, "grad_norm": 1.0504212966242243, "learning_rate": 1.986089137186679e-06, "loss": 0.615007758140564, "step": 873 }, { "epoch": 0.20152178925524555, "grad_norm": 1.0424303204478471, "learning_rate": 1.986025693278518e-06, "loss": 0.598671555519104, "step": 874 }, { "epoch": 0.20175236338482821, "grad_norm": 1.1162570964298844, "learning_rate": 1.98596210604153e-06, "loss": 0.6029553413391113, "step": 875 }, { "epoch": 0.20198293751441088, "grad_norm": 0.9723766835428509, "learning_rate": 1.985898375484957e-06, "loss": 0.6854428052902222, "step": 876 }, { "epoch": 0.20221351164399354, "grad_norm": 0.7502030102171089, "learning_rate": 1.9858345016180636e-06, "loss": 0.5032496452331543, "step": 877 }, { "epoch": 0.2024440857735762, "grad_norm": 0.910423493721141, "learning_rate": 1.9857704844501343e-06, "loss": 0.5521007776260376, "step": 878 }, { "epoch": 0.20267465990315886, "grad_norm": 0.9861926154372014, "learning_rate": 1.9857063239904742e-06, "loss": 0.6473567485809326, "step": 879 }, { "epoch": 0.20290523403274152, "grad_norm": 0.9973567674127126, "learning_rate": 1.9856420202484103e-06, "loss": 0.528810977935791, "step": 880 }, { "epoch": 0.20313580816232418, "grad_norm": 1.0663389238750165, "learning_rate": 1.9855775732332898e-06, "loss": 0.681857705116272, "step": 881 }, { "epoch": 0.20336638229190684, "grad_norm": 0.9199566615284357, "learning_rate": 1.9855129829544805e-06, "loss": 0.6510526537895203, "step": 882 }, { "epoch": 0.2035969564214895, "grad_norm": 1.0847608945381821, "learning_rate": 1.985448249421371e-06, "loss": 0.5690885782241821, "step": 883 }, { "epoch": 0.20382753055107217, "grad_norm": 0.9067033263808438, "learning_rate": 1.985383372643371e-06, "loss": 0.6451331973075867, "step": 884 }, { "epoch": 0.20405810468065483, "grad_norm": 0.7596187493834748, "learning_rate": 1.9853183526299117e-06, "loss": 0.493961900472641, "step": 885 }, { "epoch": 0.2042886788102375, "grad_norm": 1.031307930072274, "learning_rate": 1.9852531893904434e-06, "loss": 0.5390207767486572, "step": 886 }, { "epoch": 0.20451925293982015, "grad_norm": 0.9671201783822709, "learning_rate": 1.9851878829344395e-06, "loss": 0.5976558923721313, "step": 887 }, { "epoch": 0.2047498270694028, "grad_norm": 0.9832697265495778, "learning_rate": 1.9851224332713917e-06, "loss": 0.539776623249054, "step": 888 }, { "epoch": 0.20498040119898547, "grad_norm": 1.1606849770347532, "learning_rate": 1.9850568404108144e-06, "loss": 0.6791383624076843, "step": 889 }, { "epoch": 0.20521097532856813, "grad_norm": 1.1599404347752247, "learning_rate": 1.984991104362242e-06, "loss": 0.6195741891860962, "step": 890 }, { "epoch": 0.2054415494581508, "grad_norm": 1.0295013801913249, "learning_rate": 1.9849252251352303e-06, "loss": 0.5792666673660278, "step": 891 }, { "epoch": 0.20567212358773346, "grad_norm": 0.7871401361859056, "learning_rate": 1.984859202739355e-06, "loss": 0.5633316040039062, "step": 892 }, { "epoch": 0.20590269771731612, "grad_norm": 0.9078754261167402, "learning_rate": 1.9847930371842137e-06, "loss": 0.6152814626693726, "step": 893 }, { "epoch": 0.20613327184689878, "grad_norm": 1.0024181714804654, "learning_rate": 1.9847267284794234e-06, "loss": 0.5584526658058167, "step": 894 }, { "epoch": 0.20636384597648144, "grad_norm": 0.9442571191896375, "learning_rate": 1.9846602766346235e-06, "loss": 0.5526787042617798, "step": 895 }, { "epoch": 0.2065944201060641, "grad_norm": 1.114741515810547, "learning_rate": 1.984593681659473e-06, "loss": 0.6851564049720764, "step": 896 }, { "epoch": 0.20682499423564676, "grad_norm": 0.9529867069899208, "learning_rate": 1.9845269435636524e-06, "loss": 0.6012386083602905, "step": 897 }, { "epoch": 0.20705556836522943, "grad_norm": 0.9587418141612076, "learning_rate": 1.9844600623568626e-06, "loss": 0.5515716075897217, "step": 898 }, { "epoch": 0.2072861424948121, "grad_norm": 1.0489716310270325, "learning_rate": 1.9843930380488255e-06, "loss": 0.6534323692321777, "step": 899 }, { "epoch": 0.20751671662439475, "grad_norm": 0.9795829214559992, "learning_rate": 1.9843258706492836e-06, "loss": 0.726966381072998, "step": 900 }, { "epoch": 0.2077472907539774, "grad_norm": 1.0154014646465384, "learning_rate": 1.984258560168001e-06, "loss": 0.6692399978637695, "step": 901 }, { "epoch": 0.20797786488356007, "grad_norm": 0.8361205321250001, "learning_rate": 1.9841911066147614e-06, "loss": 0.5815941095352173, "step": 902 }, { "epoch": 0.20820843901314273, "grad_norm": 0.8093430372283338, "learning_rate": 1.98412350999937e-06, "loss": 0.4850257933139801, "step": 903 }, { "epoch": 0.2084390131427254, "grad_norm": 0.9321751727050823, "learning_rate": 1.9840557703316524e-06, "loss": 0.7309345006942749, "step": 904 }, { "epoch": 0.20866958727230805, "grad_norm": 0.9487721653557605, "learning_rate": 1.9839878876214556e-06, "loss": 0.6246342658996582, "step": 905 }, { "epoch": 0.20890016140189072, "grad_norm": 0.923401773715514, "learning_rate": 1.983919861878647e-06, "loss": 0.503870964050293, "step": 906 }, { "epoch": 0.20913073553147338, "grad_norm": 0.9277576649885639, "learning_rate": 1.9838516931131147e-06, "loss": 0.5316766500473022, "step": 907 }, { "epoch": 0.20936130966105604, "grad_norm": 0.9488124820166146, "learning_rate": 1.983783381334768e-06, "loss": 0.5707069039344788, "step": 908 }, { "epoch": 0.2095918837906387, "grad_norm": 1.1481758251998657, "learning_rate": 1.983714926553536e-06, "loss": 0.5482156276702881, "step": 909 }, { "epoch": 0.20982245792022136, "grad_norm": 0.8868748652499737, "learning_rate": 1.98364632877937e-06, "loss": 0.45747748017311096, "step": 910 }, { "epoch": 0.21005303204980402, "grad_norm": 1.070435205795932, "learning_rate": 1.9835775880222414e-06, "loss": 0.5599262118339539, "step": 911 }, { "epoch": 0.21028360617938668, "grad_norm": 0.8833178195747919, "learning_rate": 1.9835087042921416e-06, "loss": 0.5115377902984619, "step": 912 }, { "epoch": 0.21051418030896935, "grad_norm": 1.0026720443060566, "learning_rate": 1.9834396775990846e-06, "loss": 0.6577836275100708, "step": 913 }, { "epoch": 0.210744754438552, "grad_norm": 1.0996458728397183, "learning_rate": 1.9833705079531033e-06, "loss": 0.4979211091995239, "step": 914 }, { "epoch": 0.21097532856813467, "grad_norm": 0.9038590231228891, "learning_rate": 1.983301195364252e-06, "loss": 0.5052670240402222, "step": 915 }, { "epoch": 0.21120590269771733, "grad_norm": 0.9375736925419242, "learning_rate": 1.9832317398426076e-06, "loss": 0.5480808019638062, "step": 916 }, { "epoch": 0.2114364768273, "grad_norm": 1.1234174619828885, "learning_rate": 1.983162141398264e-06, "loss": 0.5328841209411621, "step": 917 }, { "epoch": 0.21166705095688262, "grad_norm": 1.0661654042909894, "learning_rate": 1.98309240004134e-06, "loss": 0.5572643280029297, "step": 918 }, { "epoch": 0.21189762508646529, "grad_norm": 0.7370595537346776, "learning_rate": 1.983022515781972e-06, "loss": 0.5180699825286865, "step": 919 }, { "epoch": 0.21212819921604795, "grad_norm": 0.9467461169752135, "learning_rate": 1.9829524886303182e-06, "loss": 0.5031566619873047, "step": 920 }, { "epoch": 0.2123587733456306, "grad_norm": 1.0924744776428812, "learning_rate": 1.9828823185965587e-06, "loss": 0.6579925417900085, "step": 921 }, { "epoch": 0.21258934747521327, "grad_norm": 1.0635734753276387, "learning_rate": 1.982812005690893e-06, "loss": 0.6107230186462402, "step": 922 }, { "epoch": 0.21281992160479593, "grad_norm": 0.8209241554677639, "learning_rate": 1.982741549923542e-06, "loss": 0.5244725942611694, "step": 923 }, { "epoch": 0.2130504957343786, "grad_norm": 0.8970249012108504, "learning_rate": 1.9826709513047466e-06, "loss": 0.5857048630714417, "step": 924 }, { "epoch": 0.21328106986396125, "grad_norm": 1.1702999413512643, "learning_rate": 1.9826002098447694e-06, "loss": 0.6417914628982544, "step": 925 }, { "epoch": 0.21351164399354391, "grad_norm": 1.025740647317304, "learning_rate": 1.982529325553893e-06, "loss": 0.6062248945236206, "step": 926 }, { "epoch": 0.21374221812312658, "grad_norm": 0.8397411976395659, "learning_rate": 1.982458298442422e-06, "loss": 0.4870455265045166, "step": 927 }, { "epoch": 0.21397279225270924, "grad_norm": 0.8931294029793581, "learning_rate": 1.9823871285206802e-06, "loss": 0.6552037000656128, "step": 928 }, { "epoch": 0.2142033663822919, "grad_norm": 0.9703019761386622, "learning_rate": 1.9823158157990133e-06, "loss": 0.531679093837738, "step": 929 }, { "epoch": 0.21443394051187456, "grad_norm": 1.2664544243150397, "learning_rate": 1.982244360287787e-06, "loss": 0.516847550868988, "step": 930 }, { "epoch": 0.21466451464145722, "grad_norm": 0.810392988957607, "learning_rate": 1.982172761997388e-06, "loss": 0.47147709131240845, "step": 931 }, { "epoch": 0.21489508877103988, "grad_norm": 0.8771741979565738, "learning_rate": 1.982101020938224e-06, "loss": 0.627938985824585, "step": 932 }, { "epoch": 0.21512566290062254, "grad_norm": 1.0257080856710215, "learning_rate": 1.9820291371207233e-06, "loss": 0.639348030090332, "step": 933 }, { "epoch": 0.2153562370302052, "grad_norm": 0.9702705556217962, "learning_rate": 1.9819571105553354e-06, "loss": 0.6480363607406616, "step": 934 }, { "epoch": 0.21558681115978787, "grad_norm": 0.9260617050921398, "learning_rate": 1.9818849412525293e-06, "loss": 0.5776711702346802, "step": 935 }, { "epoch": 0.21581738528937053, "grad_norm": 0.9042487017557694, "learning_rate": 1.9818126292227957e-06, "loss": 0.5891472101211548, "step": 936 }, { "epoch": 0.2160479594189532, "grad_norm": 0.8905401941241984, "learning_rate": 1.9817401744766465e-06, "loss": 0.5977755784988403, "step": 937 }, { "epoch": 0.21627853354853585, "grad_norm": 0.8626457448308078, "learning_rate": 1.981667577024613e-06, "loss": 0.5263733863830566, "step": 938 }, { "epoch": 0.2165091076781185, "grad_norm": 1.0627291912482457, "learning_rate": 1.9815948368772484e-06, "loss": 0.5440605878829956, "step": 939 }, { "epoch": 0.21673968180770117, "grad_norm": 0.9629159186929203, "learning_rate": 1.9815219540451263e-06, "loss": 0.5140440464019775, "step": 940 }, { "epoch": 0.21697025593728383, "grad_norm": 1.0494365886675714, "learning_rate": 1.9814489285388402e-06, "loss": 0.6741353273391724, "step": 941 }, { "epoch": 0.2172008300668665, "grad_norm": 1.1329427006993176, "learning_rate": 1.981375760369006e-06, "loss": 0.6243258714675903, "step": 942 }, { "epoch": 0.21743140419644916, "grad_norm": 1.1054961559311265, "learning_rate": 1.981302449546259e-06, "loss": 0.6363699436187744, "step": 943 }, { "epoch": 0.21766197832603182, "grad_norm": 0.9214231813217233, "learning_rate": 1.981228996081256e-06, "loss": 0.5849490165710449, "step": 944 }, { "epoch": 0.21789255245561448, "grad_norm": 0.8824229032075002, "learning_rate": 1.9811553999846736e-06, "loss": 0.43679118156433105, "step": 945 }, { "epoch": 0.21812312658519714, "grad_norm": 0.8524209104471582, "learning_rate": 1.9810816612672104e-06, "loss": 0.5575870275497437, "step": 946 }, { "epoch": 0.2183537007147798, "grad_norm": 1.2313981009960802, "learning_rate": 1.9810077799395846e-06, "loss": 0.5288122296333313, "step": 947 }, { "epoch": 0.21858427484436246, "grad_norm": 0.9413824588491826, "learning_rate": 1.9809337560125357e-06, "loss": 0.5618559718132019, "step": 948 }, { "epoch": 0.21881484897394513, "grad_norm": 0.900237395227137, "learning_rate": 1.980859589496824e-06, "loss": 0.6346654891967773, "step": 949 }, { "epoch": 0.2190454231035278, "grad_norm": 0.7859619018047411, "learning_rate": 1.98078528040323e-06, "loss": 0.5456810593605042, "step": 950 }, { "epoch": 0.21927599723311045, "grad_norm": 1.096845447650345, "learning_rate": 1.980710828742556e-06, "loss": 0.6463650465011597, "step": 951 }, { "epoch": 0.2195065713626931, "grad_norm": 0.8708852946707265, "learning_rate": 1.980636234525624e-06, "loss": 0.5013638734817505, "step": 952 }, { "epoch": 0.21973714549227577, "grad_norm": 1.0813749561311563, "learning_rate": 1.9805614977632763e-06, "loss": 0.6522110104560852, "step": 953 }, { "epoch": 0.21996771962185843, "grad_norm": 1.1282712003155921, "learning_rate": 1.9804866184663775e-06, "loss": 0.5864803791046143, "step": 954 }, { "epoch": 0.2201982937514411, "grad_norm": 1.0131587624930238, "learning_rate": 1.9804115966458116e-06, "loss": 0.5261500477790833, "step": 955 }, { "epoch": 0.22042886788102375, "grad_norm": 0.9727651996633074, "learning_rate": 1.980336432312484e-06, "loss": 0.585462212562561, "step": 956 }, { "epoch": 0.22065944201060642, "grad_norm": 0.913173290527313, "learning_rate": 1.9802611254773207e-06, "loss": 0.5889539122581482, "step": 957 }, { "epoch": 0.22089001614018908, "grad_norm": 0.9844451118331555, "learning_rate": 1.980185676151268e-06, "loss": 0.665162205696106, "step": 958 }, { "epoch": 0.22112059026977174, "grad_norm": 0.9378356304402508, "learning_rate": 1.9801100843452935e-06, "loss": 0.5344980359077454, "step": 959 }, { "epoch": 0.2213511643993544, "grad_norm": 0.9210142542004092, "learning_rate": 1.980034350070385e-06, "loss": 0.6301499009132385, "step": 960 }, { "epoch": 0.22158173852893706, "grad_norm": 1.0404902143094334, "learning_rate": 1.9799584733375512e-06, "loss": 0.5114584565162659, "step": 961 }, { "epoch": 0.22181231265851972, "grad_norm": 1.0168872016124533, "learning_rate": 1.979882454157822e-06, "loss": 0.5199861526489258, "step": 962 }, { "epoch": 0.22204288678810238, "grad_norm": 1.1826380086118446, "learning_rate": 1.9798062925422472e-06, "loss": 0.5336212515830994, "step": 963 }, { "epoch": 0.22227346091768505, "grad_norm": 1.0189277044162137, "learning_rate": 1.9797299885018977e-06, "loss": 0.535847544670105, "step": 964 }, { "epoch": 0.2225040350472677, "grad_norm": 1.1943664941065335, "learning_rate": 1.979653542047865e-06, "loss": 0.6234130859375, "step": 965 }, { "epoch": 0.22273460917685037, "grad_norm": 0.9414245062598806, "learning_rate": 1.979576953191262e-06, "loss": 0.5017205476760864, "step": 966 }, { "epoch": 0.22296518330643303, "grad_norm": 0.8271602877368085, "learning_rate": 1.9795002219432204e-06, "loss": 0.4982973337173462, "step": 967 }, { "epoch": 0.2231957574360157, "grad_norm": 1.0821521338057418, "learning_rate": 1.979423348314895e-06, "loss": 0.47946417331695557, "step": 968 }, { "epoch": 0.22342633156559835, "grad_norm": 0.9333636639659694, "learning_rate": 1.97934633231746e-06, "loss": 0.5431856513023376, "step": 969 }, { "epoch": 0.223656905695181, "grad_norm": 1.010615347342822, "learning_rate": 1.9792691739621097e-06, "loss": 0.5355685949325562, "step": 970 }, { "epoch": 0.22388747982476367, "grad_norm": 0.9115391310212676, "learning_rate": 1.979191873260061e-06, "loss": 0.6103906631469727, "step": 971 }, { "epoch": 0.22411805395434634, "grad_norm": 0.9295016548118124, "learning_rate": 1.9791144302225493e-06, "loss": 0.538421094417572, "step": 972 }, { "epoch": 0.224348628083929, "grad_norm": 1.2200934433979187, "learning_rate": 1.9790368448608322e-06, "loss": 0.6068445444107056, "step": 973 }, { "epoch": 0.22457920221351163, "grad_norm": 0.8606144159525476, "learning_rate": 1.9789591171861874e-06, "loss": 0.463737815618515, "step": 974 }, { "epoch": 0.2248097763430943, "grad_norm": 1.0217946560153375, "learning_rate": 1.9788812472099135e-06, "loss": 0.6588588953018188, "step": 975 }, { "epoch": 0.22504035047267695, "grad_norm": 1.0288343828209117, "learning_rate": 1.9788032349433297e-06, "loss": 0.678712010383606, "step": 976 }, { "epoch": 0.22527092460225961, "grad_norm": 1.1695805252394589, "learning_rate": 1.9787250803977757e-06, "loss": 0.6397948265075684, "step": 977 }, { "epoch": 0.22550149873184228, "grad_norm": 1.029054993282064, "learning_rate": 1.978646783584612e-06, "loss": 0.5422782897949219, "step": 978 }, { "epoch": 0.22573207286142494, "grad_norm": 0.9969509169785887, "learning_rate": 1.9785683445152204e-06, "loss": 0.5314444303512573, "step": 979 }, { "epoch": 0.2259626469910076, "grad_norm": 1.0816366548169771, "learning_rate": 1.9784897632010026e-06, "loss": 0.6260710954666138, "step": 980 }, { "epoch": 0.22619322112059026, "grad_norm": 1.6140506138107567, "learning_rate": 1.9784110396533804e-06, "loss": 0.6765384078025818, "step": 981 }, { "epoch": 0.22642379525017292, "grad_norm": 0.9741870993027198, "learning_rate": 1.9783321738837983e-06, "loss": 0.6716702580451965, "step": 982 }, { "epoch": 0.22665436937975558, "grad_norm": 0.9800524570597025, "learning_rate": 1.978253165903719e-06, "loss": 0.5537375211715698, "step": 983 }, { "epoch": 0.22688494350933824, "grad_norm": 1.2650751897909203, "learning_rate": 1.9781740157246285e-06, "loss": 0.525878369808197, "step": 984 }, { "epoch": 0.2271155176389209, "grad_norm": 1.1285639712327624, "learning_rate": 1.978094723358031e-06, "loss": 0.6349027156829834, "step": 985 }, { "epoch": 0.22734609176850357, "grad_norm": 0.9922350297605812, "learning_rate": 1.9780152888154525e-06, "loss": 0.5777440071105957, "step": 986 }, { "epoch": 0.22757666589808623, "grad_norm": 0.8792919247604332, "learning_rate": 1.9779357121084402e-06, "loss": 0.6181483268737793, "step": 987 }, { "epoch": 0.2278072400276689, "grad_norm": 1.113677830579263, "learning_rate": 1.9778559932485606e-06, "loss": 0.6364198923110962, "step": 988 }, { "epoch": 0.22803781415725155, "grad_norm": 1.0528039871957056, "learning_rate": 1.9777761322474024e-06, "loss": 0.623460054397583, "step": 989 }, { "epoch": 0.2282683882868342, "grad_norm": 1.0042426162492055, "learning_rate": 1.977696129116574e-06, "loss": 0.504749059677124, "step": 990 }, { "epoch": 0.22849896241641687, "grad_norm": 0.9462650071116105, "learning_rate": 1.9776159838677048e-06, "loss": 0.5228890180587769, "step": 991 }, { "epoch": 0.22872953654599953, "grad_norm": 0.983638268661895, "learning_rate": 1.977535696512444e-06, "loss": 0.5765929222106934, "step": 992 }, { "epoch": 0.2289601106755822, "grad_norm": 1.0000819039461677, "learning_rate": 1.977455267062463e-06, "loss": 0.5165348052978516, "step": 993 }, { "epoch": 0.22919068480516486, "grad_norm": 1.0528189784184039, "learning_rate": 1.9773746955294525e-06, "loss": 0.6056735515594482, "step": 994 }, { "epoch": 0.22942125893474752, "grad_norm": 1.0625954437167437, "learning_rate": 1.9772939819251245e-06, "loss": 0.5430403351783752, "step": 995 }, { "epoch": 0.22965183306433018, "grad_norm": 1.2611536344776966, "learning_rate": 1.977213126261212e-06, "loss": 0.5710945129394531, "step": 996 }, { "epoch": 0.22988240719391284, "grad_norm": 0.9590894945496666, "learning_rate": 1.977132128549468e-06, "loss": 0.5189366936683655, "step": 997 }, { "epoch": 0.2301129813234955, "grad_norm": 1.229825794085491, "learning_rate": 1.977050988801666e-06, "loss": 0.6578037738800049, "step": 998 }, { "epoch": 0.23034355545307816, "grad_norm": 1.0761110723698188, "learning_rate": 1.9769697070296006e-06, "loss": 0.5787034034729004, "step": 999 }, { "epoch": 0.23057412958266083, "grad_norm": 1.0414208441736372, "learning_rate": 1.976888283245087e-06, "loss": 0.5169408321380615, "step": 1000 }, { "epoch": 0.2308047037122435, "grad_norm": 1.1228864795023747, "learning_rate": 1.976806717459961e-06, "loss": 0.6326704025268555, "step": 1001 }, { "epoch": 0.23103527784182615, "grad_norm": 1.2998118201322668, "learning_rate": 1.9767250096860785e-06, "loss": 0.5188414454460144, "step": 1002 }, { "epoch": 0.2312658519714088, "grad_norm": 0.9684429634366722, "learning_rate": 1.9766431599353173e-06, "loss": 0.5788798928260803, "step": 1003 }, { "epoch": 0.23149642610099147, "grad_norm": 1.011079377555661, "learning_rate": 1.976561168219575e-06, "loss": 0.5513355731964111, "step": 1004 }, { "epoch": 0.23172700023057413, "grad_norm": 0.9242770139183195, "learning_rate": 1.97647903455077e-06, "loss": 0.5810542106628418, "step": 1005 }, { "epoch": 0.2319575743601568, "grad_norm": 0.9036081245550505, "learning_rate": 1.9763967589408407e-06, "loss": 0.6541746854782104, "step": 1006 }, { "epoch": 0.23218814848973945, "grad_norm": 0.972339176589073, "learning_rate": 1.976314341401747e-06, "loss": 0.48837774991989136, "step": 1007 }, { "epoch": 0.23241872261932212, "grad_norm": 1.0622732331560878, "learning_rate": 1.976231781945469e-06, "loss": 0.514664888381958, "step": 1008 }, { "epoch": 0.23264929674890478, "grad_norm": 1.1476741578183667, "learning_rate": 1.976149080584008e-06, "loss": 0.48295027017593384, "step": 1009 }, { "epoch": 0.23287987087848744, "grad_norm": 0.9532553897028984, "learning_rate": 1.9760662373293847e-06, "loss": 0.5975791811943054, "step": 1010 }, { "epoch": 0.2331104450080701, "grad_norm": 1.0101722687438028, "learning_rate": 1.9759832521936424e-06, "loss": 0.4810718297958374, "step": 1011 }, { "epoch": 0.23334101913765276, "grad_norm": 0.8377461102160731, "learning_rate": 1.9759001251888425e-06, "loss": 0.5984642505645752, "step": 1012 }, { "epoch": 0.23357159326723542, "grad_norm": 1.1428510363276687, "learning_rate": 1.975816856327069e-06, "loss": 0.600128710269928, "step": 1013 }, { "epoch": 0.23380216739681808, "grad_norm": 0.976646115631477, "learning_rate": 1.9757334456204263e-06, "loss": 0.5036175847053528, "step": 1014 }, { "epoch": 0.23403274152640074, "grad_norm": 0.781296299293608, "learning_rate": 1.975649893081038e-06, "loss": 0.49270063638687134, "step": 1015 }, { "epoch": 0.2342633156559834, "grad_norm": 1.0782515218974933, "learning_rate": 1.97556619872105e-06, "loss": 0.5337218642234802, "step": 1016 }, { "epoch": 0.23449388978556607, "grad_norm": 1.279305397178248, "learning_rate": 1.9754823625526277e-06, "loss": 0.5263136625289917, "step": 1017 }, { "epoch": 0.23472446391514873, "grad_norm": 1.1321753640293293, "learning_rate": 1.975398384587958e-06, "loss": 0.6271284818649292, "step": 1018 }, { "epoch": 0.2349550380447314, "grad_norm": 0.9524936816808555, "learning_rate": 1.975314264839248e-06, "loss": 0.7009197473526001, "step": 1019 }, { "epoch": 0.23518561217431405, "grad_norm": 1.0291281498015452, "learning_rate": 1.9752300033187248e-06, "loss": 0.5781605839729309, "step": 1020 }, { "epoch": 0.2354161863038967, "grad_norm": 1.0439195983844425, "learning_rate": 1.9751456000386367e-06, "loss": 0.549934446811676, "step": 1021 }, { "epoch": 0.23564676043347937, "grad_norm": 1.1313488046553661, "learning_rate": 1.9750610550112535e-06, "loss": 0.5856816172599792, "step": 1022 }, { "epoch": 0.23587733456306204, "grad_norm": 1.1355877980298148, "learning_rate": 1.9749763682488638e-06, "loss": 0.6225322484970093, "step": 1023 }, { "epoch": 0.2361079086926447, "grad_norm": 0.8829653489765357, "learning_rate": 1.9748915397637775e-06, "loss": 0.5533155202865601, "step": 1024 }, { "epoch": 0.23633848282222736, "grad_norm": 0.9964032830251005, "learning_rate": 1.974806569568326e-06, "loss": 0.4960908889770508, "step": 1025 }, { "epoch": 0.23656905695181002, "grad_norm": 1.0642112431572752, "learning_rate": 1.97472145767486e-06, "loss": 0.5960450768470764, "step": 1026 }, { "epoch": 0.23679963108139268, "grad_norm": 1.0609331852795814, "learning_rate": 1.9746362040957517e-06, "loss": 0.5653714537620544, "step": 1027 }, { "epoch": 0.23703020521097534, "grad_norm": 0.9636699324332547, "learning_rate": 1.9745508088433936e-06, "loss": 0.6400578022003174, "step": 1028 }, { "epoch": 0.23726077934055798, "grad_norm": 1.0105210896498236, "learning_rate": 1.9744652719301987e-06, "loss": 0.5459057092666626, "step": 1029 }, { "epoch": 0.23749135347014064, "grad_norm": 1.0859828591491134, "learning_rate": 1.9743795933686005e-06, "loss": 0.46735280752182007, "step": 1030 }, { "epoch": 0.2377219275997233, "grad_norm": 0.9440768334185448, "learning_rate": 1.9742937731710533e-06, "loss": 0.526339590549469, "step": 1031 }, { "epoch": 0.23795250172930596, "grad_norm": 1.013077702945683, "learning_rate": 1.9742078113500323e-06, "loss": 0.5976641178131104, "step": 1032 }, { "epoch": 0.23818307585888862, "grad_norm": 0.9655038700233691, "learning_rate": 1.9741217079180325e-06, "loss": 0.5331728458404541, "step": 1033 }, { "epoch": 0.23841364998847128, "grad_norm": 0.9368079955738086, "learning_rate": 1.9740354628875696e-06, "loss": 0.5743261575698853, "step": 1034 }, { "epoch": 0.23864422411805394, "grad_norm": 0.9982653104570526, "learning_rate": 1.973949076271181e-06, "loss": 0.54700767993927, "step": 1035 }, { "epoch": 0.2388747982476366, "grad_norm": 0.8919318869448586, "learning_rate": 1.9738625480814235e-06, "loss": 0.5483411550521851, "step": 1036 }, { "epoch": 0.23910537237721927, "grad_norm": 0.9314153856468148, "learning_rate": 1.973775878330875e-06, "loss": 0.5677193403244019, "step": 1037 }, { "epoch": 0.23933594650680193, "grad_norm": 0.9867371078797748, "learning_rate": 1.973689067032133e-06, "loss": 0.5092767477035522, "step": 1038 }, { "epoch": 0.2395665206363846, "grad_norm": 0.9526587430164372, "learning_rate": 1.973602114197818e-06, "loss": 0.5618614554405212, "step": 1039 }, { "epoch": 0.23979709476596725, "grad_norm": 1.1304270434054837, "learning_rate": 1.9735150198405677e-06, "loss": 0.5601966977119446, "step": 1040 }, { "epoch": 0.2400276688955499, "grad_norm": 1.2376653334727166, "learning_rate": 1.973427783973043e-06, "loss": 0.5945397019386292, "step": 1041 }, { "epoch": 0.24025824302513257, "grad_norm": 1.084452486357135, "learning_rate": 1.9733404066079253e-06, "loss": 0.42448002099990845, "step": 1042 }, { "epoch": 0.24048881715471523, "grad_norm": 1.0671556472806993, "learning_rate": 1.9732528877579146e-06, "loss": 0.5237313508987427, "step": 1043 }, { "epoch": 0.2407193912842979, "grad_norm": 1.085642930506958, "learning_rate": 1.973165227435733e-06, "loss": 0.6006743907928467, "step": 1044 }, { "epoch": 0.24094996541388056, "grad_norm": 0.9267133414742948, "learning_rate": 1.973077425654123e-06, "loss": 0.547584056854248, "step": 1045 }, { "epoch": 0.24118053954346322, "grad_norm": 1.0824218376223906, "learning_rate": 1.972989482425847e-06, "loss": 0.5472346544265747, "step": 1046 }, { "epoch": 0.24141111367304588, "grad_norm": 1.1106806941355478, "learning_rate": 1.972901397763689e-06, "loss": 0.5962260365486145, "step": 1047 }, { "epoch": 0.24164168780262854, "grad_norm": 0.9770536598072448, "learning_rate": 1.9728131716804525e-06, "loss": 0.561386227607727, "step": 1048 }, { "epoch": 0.2418722619322112, "grad_norm": 1.2169602038706573, "learning_rate": 1.9727248041889624e-06, "loss": 0.46618524193763733, "step": 1049 }, { "epoch": 0.24210283606179386, "grad_norm": 0.9641011081185654, "learning_rate": 1.9726362953020643e-06, "loss": 0.4684019088745117, "step": 1050 }, { "epoch": 0.24233341019137652, "grad_norm": 1.1116892767931694, "learning_rate": 1.9725476450326227e-06, "loss": 0.5670303106307983, "step": 1051 }, { "epoch": 0.2425639843209592, "grad_norm": 1.0413794589983083, "learning_rate": 1.9724588533935246e-06, "loss": 0.5451534986495972, "step": 1052 }, { "epoch": 0.24279455845054185, "grad_norm": 1.3028651104025368, "learning_rate": 1.9723699203976766e-06, "loss": 0.578605592250824, "step": 1053 }, { "epoch": 0.2430251325801245, "grad_norm": 1.072521418141734, "learning_rate": 1.972280846058006e-06, "loss": 0.5844857692718506, "step": 1054 }, { "epoch": 0.24325570670970717, "grad_norm": 0.8882845471690917, "learning_rate": 1.9721916303874603e-06, "loss": 0.5152320861816406, "step": 1055 }, { "epoch": 0.24348628083928983, "grad_norm": 0.994596822062513, "learning_rate": 1.9721022733990087e-06, "loss": 0.5108952522277832, "step": 1056 }, { "epoch": 0.2437168549688725, "grad_norm": 1.2179028657479944, "learning_rate": 1.97201277510564e-06, "loss": 0.6345964670181274, "step": 1057 }, { "epoch": 0.24394742909845515, "grad_norm": 1.0322609868377797, "learning_rate": 1.9719231355203627e-06, "loss": 0.6699639558792114, "step": 1058 }, { "epoch": 0.24417800322803782, "grad_norm": 1.0786593444912098, "learning_rate": 1.971833354656208e-06, "loss": 0.5426750779151917, "step": 1059 }, { "epoch": 0.24440857735762048, "grad_norm": 0.9469348439661489, "learning_rate": 1.9717434325262253e-06, "loss": 0.45797908306121826, "step": 1060 }, { "epoch": 0.24463915148720314, "grad_norm": 0.9212142090514559, "learning_rate": 1.9716533691434872e-06, "loss": 0.46754708886146545, "step": 1061 }, { "epoch": 0.2448697256167858, "grad_norm": 1.0419375830533737, "learning_rate": 1.9715631645210838e-06, "loss": 0.6593209505081177, "step": 1062 }, { "epoch": 0.24510029974636846, "grad_norm": 0.8714440933836988, "learning_rate": 1.9714728186721287e-06, "loss": 0.5634866952896118, "step": 1063 }, { "epoch": 0.24533087387595112, "grad_norm": 1.3414429697713321, "learning_rate": 1.971382331609753e-06, "loss": 0.5066277980804443, "step": 1064 }, { "epoch": 0.24556144800553378, "grad_norm": 0.9735373407478976, "learning_rate": 1.9712917033471113e-06, "loss": 0.5721756219863892, "step": 1065 }, { "epoch": 0.24579202213511644, "grad_norm": 0.9116883309182201, "learning_rate": 1.9712009338973765e-06, "loss": 0.5188664197921753, "step": 1066 }, { "epoch": 0.2460225962646991, "grad_norm": 1.1314636983505006, "learning_rate": 1.9711100232737434e-06, "loss": 0.4879762828350067, "step": 1067 }, { "epoch": 0.24625317039428177, "grad_norm": 1.2412816829375237, "learning_rate": 1.971018971489426e-06, "loss": 0.5169111490249634, "step": 1068 }, { "epoch": 0.24648374452386443, "grad_norm": 1.2239551353327036, "learning_rate": 1.9709277785576605e-06, "loss": 0.7341418862342834, "step": 1069 }, { "epoch": 0.2467143186534471, "grad_norm": 0.9353793197150668, "learning_rate": 1.970836444491702e-06, "loss": 0.48676228523254395, "step": 1070 }, { "epoch": 0.24694489278302975, "grad_norm": 1.1049152340951753, "learning_rate": 1.9707449693048277e-06, "loss": 0.5594040751457214, "step": 1071 }, { "epoch": 0.2471754669126124, "grad_norm": 1.1275772388460679, "learning_rate": 1.970653353010334e-06, "loss": 0.575579047203064, "step": 1072 }, { "epoch": 0.24740604104219507, "grad_norm": 0.9990792550863451, "learning_rate": 1.9705615956215375e-06, "loss": 0.5212938189506531, "step": 1073 }, { "epoch": 0.24763661517177774, "grad_norm": 1.2242480620016798, "learning_rate": 1.970469697151777e-06, "loss": 0.49838072061538696, "step": 1074 }, { "epoch": 0.2478671893013604, "grad_norm": 1.0069439526224342, "learning_rate": 1.9703776576144106e-06, "loss": 0.505547285079956, "step": 1075 }, { "epoch": 0.24809776343094306, "grad_norm": 0.9320138812686547, "learning_rate": 1.970285477022817e-06, "loss": 0.5236082077026367, "step": 1076 }, { "epoch": 0.24832833756052572, "grad_norm": 1.1096851604663263, "learning_rate": 1.9701931553903963e-06, "loss": 0.5417677760124207, "step": 1077 }, { "epoch": 0.24855891169010838, "grad_norm": 1.4437484296393372, "learning_rate": 1.9701006927305676e-06, "loss": 0.624547004699707, "step": 1078 }, { "epoch": 0.24878948581969104, "grad_norm": 1.1814609406249081, "learning_rate": 1.9700080890567713e-06, "loss": 0.7127759456634521, "step": 1079 }, { "epoch": 0.2490200599492737, "grad_norm": 1.1432146079503174, "learning_rate": 1.9699153443824686e-06, "loss": 0.44590264558792114, "step": 1080 }, { "epoch": 0.24925063407885636, "grad_norm": 0.9565451374538135, "learning_rate": 1.9698224587211407e-06, "loss": 0.6311746835708618, "step": 1081 }, { "epoch": 0.24948120820843903, "grad_norm": 0.870591902169041, "learning_rate": 1.9697294320862898e-06, "loss": 0.4837970733642578, "step": 1082 }, { "epoch": 0.2497117823380217, "grad_norm": 0.8760016768814028, "learning_rate": 1.969636264491438e-06, "loss": 0.5749634504318237, "step": 1083 }, { "epoch": 0.24994235646760435, "grad_norm": 0.9733867387062589, "learning_rate": 1.9695429559501283e-06, "loss": 0.5002774000167847, "step": 1084 }, { "epoch": 0.250172930597187, "grad_norm": 0.9904270135981337, "learning_rate": 1.9694495064759236e-06, "loss": 0.5407592058181763, "step": 1085 }, { "epoch": 0.25040350472676964, "grad_norm": 0.9112103184885231, "learning_rate": 1.969355916082408e-06, "loss": 0.5557315349578857, "step": 1086 }, { "epoch": 0.2506340788563523, "grad_norm": 1.073902907739282, "learning_rate": 1.9692621847831865e-06, "loss": 0.4710160493850708, "step": 1087 }, { "epoch": 0.25086465298593497, "grad_norm": 0.946965380647112, "learning_rate": 1.969168312591883e-06, "loss": 0.5935187339782715, "step": 1088 }, { "epoch": 0.2510952271155176, "grad_norm": 0.9849357353961209, "learning_rate": 1.969074299522143e-06, "loss": 0.5358916521072388, "step": 1089 }, { "epoch": 0.2513258012451003, "grad_norm": 0.9196749680008564, "learning_rate": 1.968980145587632e-06, "loss": 0.40736621618270874, "step": 1090 }, { "epoch": 0.25155637537468295, "grad_norm": 0.8048789415521217, "learning_rate": 1.968885850802037e-06, "loss": 0.4986698627471924, "step": 1091 }, { "epoch": 0.2517869495042656, "grad_norm": 0.9340127152994311, "learning_rate": 1.968791415179064e-06, "loss": 0.5547258853912354, "step": 1092 }, { "epoch": 0.2520175236338483, "grad_norm": 1.0477998347740531, "learning_rate": 1.96869683873244e-06, "loss": 0.5187167525291443, "step": 1093 }, { "epoch": 0.25224809776343093, "grad_norm": 0.9456931065936238, "learning_rate": 1.9686021214759136e-06, "loss": 0.560575008392334, "step": 1094 }, { "epoch": 0.2524786718930136, "grad_norm": 1.0595767044992972, "learning_rate": 1.968507263423252e-06, "loss": 0.6441233158111572, "step": 1095 }, { "epoch": 0.25270924602259626, "grad_norm": 1.1650850474563572, "learning_rate": 1.9684122645882446e-06, "loss": 0.6693669557571411, "step": 1096 }, { "epoch": 0.2529398201521789, "grad_norm": 0.9107773905688578, "learning_rate": 1.9683171249846992e-06, "loss": 0.4713742434978485, "step": 1097 }, { "epoch": 0.2531703942817616, "grad_norm": 1.0855755163203802, "learning_rate": 1.9682218446264466e-06, "loss": 0.5393046140670776, "step": 1098 }, { "epoch": 0.25340096841134424, "grad_norm": 0.8304628447343301, "learning_rate": 1.968126423527336e-06, "loss": 0.44416874647140503, "step": 1099 }, { "epoch": 0.2536315425409269, "grad_norm": 0.8560775526129268, "learning_rate": 1.9680308617012383e-06, "loss": 0.486186683177948, "step": 1100 }, { "epoch": 0.25386211667050956, "grad_norm": 0.8812542184427957, "learning_rate": 1.9679351591620446e-06, "loss": 0.5523893237113953, "step": 1101 }, { "epoch": 0.2540926908000922, "grad_norm": 0.9964866126205207, "learning_rate": 1.967839315923665e-06, "loss": 0.49889492988586426, "step": 1102 }, { "epoch": 0.2543232649296749, "grad_norm": 1.1438608764608638, "learning_rate": 1.9677433320000325e-06, "loss": 0.6084630489349365, "step": 1103 }, { "epoch": 0.25455383905925755, "grad_norm": 0.9684259335546852, "learning_rate": 1.967647207405099e-06, "loss": 0.5458555221557617, "step": 1104 }, { "epoch": 0.2547844131888402, "grad_norm": 1.3299718075912128, "learning_rate": 1.9675509421528367e-06, "loss": 0.5453877449035645, "step": 1105 }, { "epoch": 0.25501498731842287, "grad_norm": 1.0404901274691463, "learning_rate": 1.9674545362572393e-06, "loss": 0.5226954221725464, "step": 1106 }, { "epoch": 0.25524556144800553, "grad_norm": 1.0740163604419912, "learning_rate": 1.96735798973232e-06, "loss": 0.5736720561981201, "step": 1107 }, { "epoch": 0.2554761355775882, "grad_norm": 0.9184855028566775, "learning_rate": 1.9672613025921135e-06, "loss": 0.5474177598953247, "step": 1108 }, { "epoch": 0.25570670970717085, "grad_norm": 1.2485055919980548, "learning_rate": 1.967164474850673e-06, "loss": 0.5146498084068298, "step": 1109 }, { "epoch": 0.2559372838367535, "grad_norm": 1.1137167951471605, "learning_rate": 1.967067506522075e-06, "loss": 0.6319057941436768, "step": 1110 }, { "epoch": 0.2561678579663362, "grad_norm": 0.9087550652455604, "learning_rate": 1.9669703976204136e-06, "loss": 0.44495588541030884, "step": 1111 }, { "epoch": 0.25639843209591884, "grad_norm": 0.9108509097161608, "learning_rate": 1.9668731481598052e-06, "loss": 0.5331558585166931, "step": 1112 }, { "epoch": 0.2566290062255015, "grad_norm": 0.9795245602848469, "learning_rate": 1.9667757581543856e-06, "loss": 0.5409468412399292, "step": 1113 }, { "epoch": 0.25685958035508416, "grad_norm": 1.054007279778104, "learning_rate": 1.9666782276183112e-06, "loss": 0.5743308663368225, "step": 1114 }, { "epoch": 0.2570901544846668, "grad_norm": 1.004577427685411, "learning_rate": 1.96658055656576e-06, "loss": 0.5612793564796448, "step": 1115 }, { "epoch": 0.2573207286142495, "grad_norm": 0.9750416454144903, "learning_rate": 1.9664827450109285e-06, "loss": 0.554356575012207, "step": 1116 }, { "epoch": 0.25755130274383214, "grad_norm": 0.9682247695156199, "learning_rate": 1.9663847929680352e-06, "loss": 0.5999840497970581, "step": 1117 }, { "epoch": 0.2577818768734148, "grad_norm": 1.0370889815397122, "learning_rate": 1.9662867004513184e-06, "loss": 0.5152497291564941, "step": 1118 }, { "epoch": 0.25801245100299747, "grad_norm": 1.098663296506931, "learning_rate": 1.966188467475036e-06, "loss": 0.6333990097045898, "step": 1119 }, { "epoch": 0.25824302513258013, "grad_norm": 0.9734180757824468, "learning_rate": 1.9660900940534685e-06, "loss": 0.5826340913772583, "step": 1120 }, { "epoch": 0.2584735992621628, "grad_norm": 1.0258650855361047, "learning_rate": 1.965991580200915e-06, "loss": 0.5968586206436157, "step": 1121 }, { "epoch": 0.25870417339174545, "grad_norm": 1.1400845768454182, "learning_rate": 1.9658929259316945e-06, "loss": 0.6164212226867676, "step": 1122 }, { "epoch": 0.2589347475213281, "grad_norm": 0.9979393096335119, "learning_rate": 1.9657941312601487e-06, "loss": 0.6115970611572266, "step": 1123 }, { "epoch": 0.2591653216509108, "grad_norm": 1.0595728674513747, "learning_rate": 1.9656951962006376e-06, "loss": 0.5490012168884277, "step": 1124 }, { "epoch": 0.25939589578049344, "grad_norm": 0.9502072685023252, "learning_rate": 1.9655961207675425e-06, "loss": 0.6350439786911011, "step": 1125 }, { "epoch": 0.2596264699100761, "grad_norm": 1.0657411847577343, "learning_rate": 1.965496904975266e-06, "loss": 0.5667803287506104, "step": 1126 }, { "epoch": 0.25985704403965876, "grad_norm": 1.1821679518558437, "learning_rate": 1.9653975488382287e-06, "loss": 0.6443949937820435, "step": 1127 }, { "epoch": 0.2600876181692414, "grad_norm": 0.9716559479774245, "learning_rate": 1.965298052370874e-06, "loss": 0.6085849404335022, "step": 1128 }, { "epoch": 0.2603181922988241, "grad_norm": 1.0823001356947075, "learning_rate": 1.9651984155876644e-06, "loss": 0.6633332967758179, "step": 1129 }, { "epoch": 0.26054876642840674, "grad_norm": 1.2848504053653516, "learning_rate": 1.965098638503083e-06, "loss": 0.5997219085693359, "step": 1130 }, { "epoch": 0.2607793405579894, "grad_norm": 1.0454096533900064, "learning_rate": 1.9649987211316333e-06, "loss": 0.5425878167152405, "step": 1131 }, { "epoch": 0.26100991468757206, "grad_norm": 1.1511928917305188, "learning_rate": 1.9648986634878397e-06, "loss": 0.5894105434417725, "step": 1132 }, { "epoch": 0.2612404888171547, "grad_norm": 1.0098199878370706, "learning_rate": 1.9647984655862464e-06, "loss": 0.5967395901679993, "step": 1133 }, { "epoch": 0.2614710629467374, "grad_norm": 1.026032503619318, "learning_rate": 1.964698127441418e-06, "loss": 0.5129253268241882, "step": 1134 }, { "epoch": 0.26170163707632005, "grad_norm": 0.8680242413092717, "learning_rate": 1.96459764906794e-06, "loss": 0.4503140449523926, "step": 1135 }, { "epoch": 0.2619322112059027, "grad_norm": 1.3487730716398616, "learning_rate": 1.964497030480418e-06, "loss": 0.5533326864242554, "step": 1136 }, { "epoch": 0.26216278533548537, "grad_norm": 1.020191268815397, "learning_rate": 1.9643962716934776e-06, "loss": 0.695278525352478, "step": 1137 }, { "epoch": 0.26239335946506803, "grad_norm": 1.0637915159693183, "learning_rate": 1.9642953727217654e-06, "loss": 0.5198212265968323, "step": 1138 }, { "epoch": 0.2626239335946507, "grad_norm": 0.8691408428805534, "learning_rate": 1.9641943335799476e-06, "loss": 0.4348503351211548, "step": 1139 }, { "epoch": 0.26285450772423335, "grad_norm": 1.075781292907759, "learning_rate": 1.9640931542827116e-06, "loss": 0.5241343975067139, "step": 1140 }, { "epoch": 0.263085081853816, "grad_norm": 1.1170175690927264, "learning_rate": 1.9639918348447654e-06, "loss": 0.6621984839439392, "step": 1141 }, { "epoch": 0.2633156559833987, "grad_norm": 0.9797970310895017, "learning_rate": 1.9638903752808358e-06, "loss": 0.6091395020484924, "step": 1142 }, { "epoch": 0.26354623011298134, "grad_norm": 1.358580155566318, "learning_rate": 1.963788775605671e-06, "loss": 0.4857162833213806, "step": 1143 }, { "epoch": 0.263776804242564, "grad_norm": 1.155872598215321, "learning_rate": 1.9636870358340408e-06, "loss": 0.5912413597106934, "step": 1144 }, { "epoch": 0.26400737837214666, "grad_norm": 0.9493926626803307, "learning_rate": 1.9635851559807326e-06, "loss": 0.6006268858909607, "step": 1145 }, { "epoch": 0.2642379525017293, "grad_norm": 1.0095494395510323, "learning_rate": 1.9634831360605567e-06, "loss": 0.5580735802650452, "step": 1146 }, { "epoch": 0.264468526631312, "grad_norm": 1.09443652681985, "learning_rate": 1.9633809760883423e-06, "loss": 0.5554602146148682, "step": 1147 }, { "epoch": 0.26469910076089465, "grad_norm": 1.0073361110439816, "learning_rate": 1.9632786760789393e-06, "loss": 0.5648301839828491, "step": 1148 }, { "epoch": 0.2649296748904773, "grad_norm": 0.9958775096480507, "learning_rate": 1.9631762360472186e-06, "loss": 0.5317412614822388, "step": 1149 }, { "epoch": 0.26516024902005997, "grad_norm": 0.8377541227122274, "learning_rate": 1.96307365600807e-06, "loss": 0.5608310699462891, "step": 1150 }, { "epoch": 0.26539082314964263, "grad_norm": 0.9709108194630034, "learning_rate": 1.962970935976405e-06, "loss": 0.49922698736190796, "step": 1151 }, { "epoch": 0.2656213972792253, "grad_norm": 1.0372577064435262, "learning_rate": 1.9628680759671556e-06, "loss": 0.5840054750442505, "step": 1152 }, { "epoch": 0.26585197140880795, "grad_norm": 1.1264168952681184, "learning_rate": 1.9627650759952727e-06, "loss": 0.6038475632667542, "step": 1153 }, { "epoch": 0.2660825455383906, "grad_norm": 0.969212515968761, "learning_rate": 1.9626619360757284e-06, "loss": 0.5923193097114563, "step": 1154 }, { "epoch": 0.2663131196679733, "grad_norm": 1.1606889211687668, "learning_rate": 1.962558656223516e-06, "loss": 0.5278598666191101, "step": 1155 }, { "epoch": 0.26654369379755594, "grad_norm": 0.9873103600473375, "learning_rate": 1.9624552364536472e-06, "loss": 0.47691023349761963, "step": 1156 }, { "epoch": 0.2667742679271386, "grad_norm": 0.9087676067471127, "learning_rate": 1.962351676781156e-06, "loss": 0.5801899433135986, "step": 1157 }, { "epoch": 0.26700484205672126, "grad_norm": 1.253961482177072, "learning_rate": 1.962247977221095e-06, "loss": 0.5170506238937378, "step": 1158 }, { "epoch": 0.2672354161863039, "grad_norm": 1.0951542684812736, "learning_rate": 1.9621441377885387e-06, "loss": 0.6114981174468994, "step": 1159 }, { "epoch": 0.2674659903158866, "grad_norm": 1.0027892727643062, "learning_rate": 1.9620401584985807e-06, "loss": 0.6377004384994507, "step": 1160 }, { "epoch": 0.26769656444546924, "grad_norm": 0.9961094597216124, "learning_rate": 1.9619360393663356e-06, "loss": 0.6177431344985962, "step": 1161 }, { "epoch": 0.2679271385750519, "grad_norm": 1.1384478708718946, "learning_rate": 1.9618317804069384e-06, "loss": 0.579784095287323, "step": 1162 }, { "epoch": 0.26815771270463457, "grad_norm": 0.8744752952973797, "learning_rate": 1.9617273816355444e-06, "loss": 0.6078776121139526, "step": 1163 }, { "epoch": 0.2683882868342172, "grad_norm": 0.9801356210694869, "learning_rate": 1.961622843067328e-06, "loss": 0.5583093166351318, "step": 1164 }, { "epoch": 0.2686188609637999, "grad_norm": 0.8741287294678143, "learning_rate": 1.961518164717486e-06, "loss": 0.46033143997192383, "step": 1165 }, { "epoch": 0.26884943509338255, "grad_norm": 1.250568820610365, "learning_rate": 1.961413346601234e-06, "loss": 0.5637123584747314, "step": 1166 }, { "epoch": 0.2690800092229652, "grad_norm": 1.0360456860810905, "learning_rate": 1.9613083887338085e-06, "loss": 0.5943595170974731, "step": 1167 }, { "epoch": 0.2693105833525478, "grad_norm": 1.0495419121458136, "learning_rate": 1.961203291130466e-06, "loss": 0.5440319776535034, "step": 1168 }, { "epoch": 0.2695411574821305, "grad_norm": 0.9704830315061433, "learning_rate": 1.961098053806484e-06, "loss": 0.5665608048439026, "step": 1169 }, { "epoch": 0.26977173161171314, "grad_norm": 1.0522625707521382, "learning_rate": 1.960992676777159e-06, "loss": 0.5707683563232422, "step": 1170 }, { "epoch": 0.2700023057412958, "grad_norm": 1.034604689259721, "learning_rate": 1.9608871600578093e-06, "loss": 0.5447777509689331, "step": 1171 }, { "epoch": 0.27023287987087846, "grad_norm": 1.1920689559592121, "learning_rate": 1.9607815036637726e-06, "loss": 0.5598857402801514, "step": 1172 }, { "epoch": 0.2704634540004611, "grad_norm": 1.208701571232948, "learning_rate": 1.960675707610407e-06, "loss": 0.558403491973877, "step": 1173 }, { "epoch": 0.2706940281300438, "grad_norm": 1.3006493228897391, "learning_rate": 1.960569771913091e-06, "loss": 0.6696962118148804, "step": 1174 }, { "epoch": 0.27092460225962645, "grad_norm": 1.0597715788538418, "learning_rate": 1.960463696587224e-06, "loss": 0.519884467124939, "step": 1175 }, { "epoch": 0.2711551763892091, "grad_norm": 1.0090714718428708, "learning_rate": 1.9603574816482243e-06, "loss": 0.6440261602401733, "step": 1176 }, { "epoch": 0.27138575051879177, "grad_norm": 1.1163188497552168, "learning_rate": 1.9602511271115317e-06, "loss": 0.48713982105255127, "step": 1177 }, { "epoch": 0.27161632464837443, "grad_norm": 0.9570997011710476, "learning_rate": 1.960144632992606e-06, "loss": 0.5257129073143005, "step": 1178 }, { "epoch": 0.2718468987779571, "grad_norm": 1.3308862733434774, "learning_rate": 1.9600379993069272e-06, "loss": 0.5220426917076111, "step": 1179 }, { "epoch": 0.27207747290753975, "grad_norm": 1.0690404222828096, "learning_rate": 1.9599312260699955e-06, "loss": 0.569817304611206, "step": 1180 }, { "epoch": 0.2723080470371224, "grad_norm": 1.0650857331550394, "learning_rate": 1.9598243132973317e-06, "loss": 0.4370031952857971, "step": 1181 }, { "epoch": 0.2725386211667051, "grad_norm": 1.125403283606087, "learning_rate": 1.959717261004476e-06, "loss": 0.6060882210731506, "step": 1182 }, { "epoch": 0.27276919529628774, "grad_norm": 0.9065361051198069, "learning_rate": 1.9596100692069905e-06, "loss": 0.5830891132354736, "step": 1183 }, { "epoch": 0.2729997694258704, "grad_norm": 1.4570032441462188, "learning_rate": 1.9595027379204556e-06, "loss": 0.5689493417739868, "step": 1184 }, { "epoch": 0.27323034355545306, "grad_norm": 1.3244280690129522, "learning_rate": 1.9593952671604735e-06, "loss": 0.5550887584686279, "step": 1185 }, { "epoch": 0.2734609176850357, "grad_norm": 1.0207521269848765, "learning_rate": 1.9592876569426665e-06, "loss": 0.48127567768096924, "step": 1186 }, { "epoch": 0.2736914918146184, "grad_norm": 1.071211669612227, "learning_rate": 1.9591799072826764e-06, "loss": 0.640753984451294, "step": 1187 }, { "epoch": 0.27392206594420104, "grad_norm": 1.1730143666350425, "learning_rate": 1.959072018196165e-06, "loss": 0.5266000032424927, "step": 1188 }, { "epoch": 0.2741526400737837, "grad_norm": 0.927867514508325, "learning_rate": 1.958963989698817e-06, "loss": 0.5586614608764648, "step": 1189 }, { "epoch": 0.27438321420336637, "grad_norm": 1.1860842675481242, "learning_rate": 1.9588558218063336e-06, "loss": 0.5937967896461487, "step": 1190 }, { "epoch": 0.274613788332949, "grad_norm": 1.3761930600193095, "learning_rate": 1.958747514534439e-06, "loss": 0.5887218713760376, "step": 1191 }, { "epoch": 0.2748443624625317, "grad_norm": 1.0541442430853707, "learning_rate": 1.9586390678988766e-06, "loss": 0.5151614546775818, "step": 1192 }, { "epoch": 0.27507493659211435, "grad_norm": 0.9782419657689414, "learning_rate": 1.95853048191541e-06, "loss": 0.5392748713493347, "step": 1193 }, { "epoch": 0.275305510721697, "grad_norm": 1.330179141409128, "learning_rate": 1.9584217565998237e-06, "loss": 0.5649560689926147, "step": 1194 }, { "epoch": 0.2755360848512797, "grad_norm": 1.0628047614804303, "learning_rate": 1.9583128919679213e-06, "loss": 0.4888305962085724, "step": 1195 }, { "epoch": 0.27576665898086233, "grad_norm": 0.8838567368205815, "learning_rate": 1.9582038880355282e-06, "loss": 0.5026978850364685, "step": 1196 }, { "epoch": 0.275997233110445, "grad_norm": 1.094585503881071, "learning_rate": 1.9580947448184887e-06, "loss": 0.5358047485351562, "step": 1197 }, { "epoch": 0.27622780724002766, "grad_norm": 1.0838231861798517, "learning_rate": 1.957985462332668e-06, "loss": 0.6145739555358887, "step": 1198 }, { "epoch": 0.2764583813696103, "grad_norm": 1.1469394336927528, "learning_rate": 1.957876040593952e-06, "loss": 0.5155332684516907, "step": 1199 }, { "epoch": 0.276688955499193, "grad_norm": 0.9936014396625975, "learning_rate": 1.957766479618245e-06, "loss": 0.48794522881507874, "step": 1200 }, { "epoch": 0.27691952962877564, "grad_norm": 1.135029138979863, "learning_rate": 1.957656779421474e-06, "loss": 0.5851761102676392, "step": 1201 }, { "epoch": 0.2771501037583583, "grad_norm": 1.0236207003793518, "learning_rate": 1.957546940019584e-06, "loss": 0.603874683380127, "step": 1202 }, { "epoch": 0.27738067788794096, "grad_norm": 1.0658787224753152, "learning_rate": 1.9574369614285426e-06, "loss": 0.5022559762001038, "step": 1203 }, { "epoch": 0.2776112520175236, "grad_norm": 1.4179237341040045, "learning_rate": 1.9573268436643347e-06, "loss": 0.6469730138778687, "step": 1204 }, { "epoch": 0.2778418261471063, "grad_norm": 0.9207501665109726, "learning_rate": 1.9572165867429685e-06, "loss": 0.49918532371520996, "step": 1205 }, { "epoch": 0.27807240027668895, "grad_norm": 0.9656836684424259, "learning_rate": 1.95710619068047e-06, "loss": 0.48623788356781006, "step": 1206 }, { "epoch": 0.2783029744062716, "grad_norm": 0.9837814076450196, "learning_rate": 1.956995655492887e-06, "loss": 0.4868438243865967, "step": 1207 }, { "epoch": 0.27853354853585427, "grad_norm": 1.3533879485069031, "learning_rate": 1.9568849811962862e-06, "loss": 0.5989904403686523, "step": 1208 }, { "epoch": 0.27876412266543693, "grad_norm": 1.3345070230968985, "learning_rate": 1.956774167806756e-06, "loss": 0.5125104188919067, "step": 1209 }, { "epoch": 0.2789946967950196, "grad_norm": 1.0305365483781255, "learning_rate": 1.956663215340404e-06, "loss": 0.5126978158950806, "step": 1210 }, { "epoch": 0.27922527092460225, "grad_norm": 0.9524616726362105, "learning_rate": 1.9565521238133576e-06, "loss": 0.5009375810623169, "step": 1211 }, { "epoch": 0.2794558450541849, "grad_norm": 1.0762476710184214, "learning_rate": 1.956440893241766e-06, "loss": 0.5601603984832764, "step": 1212 }, { "epoch": 0.2796864191837676, "grad_norm": 1.2962045971613827, "learning_rate": 1.956329523641797e-06, "loss": 0.6310690641403198, "step": 1213 }, { "epoch": 0.27991699331335024, "grad_norm": 1.0395130987242733, "learning_rate": 1.95621801502964e-06, "loss": 0.498830646276474, "step": 1214 }, { "epoch": 0.2801475674429329, "grad_norm": 1.0547121574701517, "learning_rate": 1.9561063674215036e-06, "loss": 0.6612650156021118, "step": 1215 }, { "epoch": 0.28037814157251556, "grad_norm": 1.0369778810130763, "learning_rate": 1.9559945808336166e-06, "loss": 0.5651615858078003, "step": 1216 }, { "epoch": 0.2806087157020982, "grad_norm": 1.0028009497915646, "learning_rate": 1.955882655282229e-06, "loss": 0.5675203800201416, "step": 1217 }, { "epoch": 0.2808392898316809, "grad_norm": 1.0910384567165883, "learning_rate": 1.9557705907836095e-06, "loss": 0.5691455006599426, "step": 1218 }, { "epoch": 0.28106986396126354, "grad_norm": 1.2440322291047097, "learning_rate": 1.955658387354048e-06, "loss": 0.6018673181533813, "step": 1219 }, { "epoch": 0.2813004380908462, "grad_norm": 0.8594681913500082, "learning_rate": 1.955546045009855e-06, "loss": 0.5188831090927124, "step": 1220 }, { "epoch": 0.28153101222042887, "grad_norm": 0.9611802055135819, "learning_rate": 1.9554335637673596e-06, "loss": 0.5161044597625732, "step": 1221 }, { "epoch": 0.28176158635001153, "grad_norm": 1.0764912433641416, "learning_rate": 1.9553209436429132e-06, "loss": 0.5651452541351318, "step": 1222 }, { "epoch": 0.2819921604795942, "grad_norm": 1.0362033432012678, "learning_rate": 1.9552081846528858e-06, "loss": 0.5763273239135742, "step": 1223 }, { "epoch": 0.28222273460917685, "grad_norm": 1.0512305083546745, "learning_rate": 1.9550952868136677e-06, "loss": 0.6379664540290833, "step": 1224 }, { "epoch": 0.2824533087387595, "grad_norm": 0.966358468685478, "learning_rate": 1.95498225014167e-06, "loss": 0.4021342396736145, "step": 1225 }, { "epoch": 0.2826838828683422, "grad_norm": 1.3065298085361052, "learning_rate": 1.954869074653324e-06, "loss": 0.49230247735977173, "step": 1226 }, { "epoch": 0.28291445699792483, "grad_norm": 0.9198430971109288, "learning_rate": 1.954755760365081e-06, "loss": 0.5921554565429688, "step": 1227 }, { "epoch": 0.2831450311275075, "grad_norm": 1.2338068239582654, "learning_rate": 1.954642307293412e-06, "loss": 0.6495868563652039, "step": 1228 }, { "epoch": 0.28337560525709016, "grad_norm": 1.0310593371372254, "learning_rate": 1.954528715454808e-06, "loss": 0.5699795484542847, "step": 1229 }, { "epoch": 0.2836061793866728, "grad_norm": 1.3462988930710962, "learning_rate": 1.9544149848657816e-06, "loss": 0.582231879234314, "step": 1230 }, { "epoch": 0.2838367535162555, "grad_norm": 1.0033811085419764, "learning_rate": 1.9543011155428647e-06, "loss": 0.5952359437942505, "step": 1231 }, { "epoch": 0.28406732764583814, "grad_norm": 1.150479906025031, "learning_rate": 1.9541871075026092e-06, "loss": 0.646816611289978, "step": 1232 }, { "epoch": 0.2842979017754208, "grad_norm": 1.2509776515814615, "learning_rate": 1.9540729607615866e-06, "loss": 0.5781043767929077, "step": 1233 }, { "epoch": 0.28452847590500346, "grad_norm": 1.1718295930905136, "learning_rate": 1.95395867533639e-06, "loss": 0.609764814376831, "step": 1234 }, { "epoch": 0.2847590500345861, "grad_norm": 1.2826152398089232, "learning_rate": 1.9538442512436325e-06, "loss": 0.4673759341239929, "step": 1235 }, { "epoch": 0.2849896241641688, "grad_norm": 1.1343052125955835, "learning_rate": 1.953729688499946e-06, "loss": 0.6310999393463135, "step": 1236 }, { "epoch": 0.28522019829375145, "grad_norm": 1.075568996273352, "learning_rate": 1.953614987121983e-06, "loss": 0.5103853344917297, "step": 1237 }, { "epoch": 0.2854507724233341, "grad_norm": 1.1329951189185654, "learning_rate": 1.9535001471264178e-06, "loss": 0.5735328197479248, "step": 1238 }, { "epoch": 0.28568134655291677, "grad_norm": 1.010063337652323, "learning_rate": 1.953385168529942e-06, "loss": 0.5617454051971436, "step": 1239 }, { "epoch": 0.28591192068249943, "grad_norm": 1.1392481671873862, "learning_rate": 1.9532700513492705e-06, "loss": 0.49873489141464233, "step": 1240 }, { "epoch": 0.2861424948120821, "grad_norm": 0.9923008758606798, "learning_rate": 1.9531547956011353e-06, "loss": 0.49185073375701904, "step": 1241 }, { "epoch": 0.28637306894166475, "grad_norm": 1.1119890456844754, "learning_rate": 1.9530394013022907e-06, "loss": 0.6016734838485718, "step": 1242 }, { "epoch": 0.2866036430712474, "grad_norm": 0.984310677257317, "learning_rate": 1.9529238684695105e-06, "loss": 0.5922054052352905, "step": 1243 }, { "epoch": 0.2868342172008301, "grad_norm": 1.2933601588161594, "learning_rate": 1.952808197119588e-06, "loss": 0.6498355269432068, "step": 1244 }, { "epoch": 0.28706479133041274, "grad_norm": 1.106145681286101, "learning_rate": 1.9526923872693382e-06, "loss": 0.5564426183700562, "step": 1245 }, { "epoch": 0.2872953654599954, "grad_norm": 1.0410162813090216, "learning_rate": 1.9525764389355945e-06, "loss": 0.6144154071807861, "step": 1246 }, { "epoch": 0.28752593958957806, "grad_norm": 0.9304288925500919, "learning_rate": 1.9524603521352116e-06, "loss": 0.5958914756774902, "step": 1247 }, { "epoch": 0.2877565137191607, "grad_norm": 1.167763375182377, "learning_rate": 1.952344126885063e-06, "loss": 0.5471549034118652, "step": 1248 }, { "epoch": 0.2879870878487434, "grad_norm": 1.0658282088084226, "learning_rate": 1.952227763202044e-06, "loss": 0.5512329936027527, "step": 1249 }, { "epoch": 0.28821766197832605, "grad_norm": 0.9336952567830841, "learning_rate": 1.9521112611030695e-06, "loss": 0.5545130968093872, "step": 1250 }, { "epoch": 0.2884482361079087, "grad_norm": 0.9540157404500241, "learning_rate": 1.9519946206050734e-06, "loss": 0.5409479737281799, "step": 1251 }, { "epoch": 0.28867881023749137, "grad_norm": 1.0425656776824677, "learning_rate": 1.9518778417250114e-06, "loss": 0.5248778462409973, "step": 1252 }, { "epoch": 0.28890938436707403, "grad_norm": 1.1108036883068904, "learning_rate": 1.951760924479858e-06, "loss": 0.4985620975494385, "step": 1253 }, { "epoch": 0.2891399584966567, "grad_norm": 1.1956376798663733, "learning_rate": 1.951643868886608e-06, "loss": 0.5470424890518188, "step": 1254 }, { "epoch": 0.28937053262623935, "grad_norm": 0.830517770820401, "learning_rate": 1.9515266749622776e-06, "loss": 0.5082905292510986, "step": 1255 }, { "epoch": 0.289601106755822, "grad_norm": 1.1321002460273393, "learning_rate": 1.9514093427239013e-06, "loss": 0.5734596252441406, "step": 1256 }, { "epoch": 0.2898316808854047, "grad_norm": 1.133005147672039, "learning_rate": 1.951291872188535e-06, "loss": 0.4727100431919098, "step": 1257 }, { "epoch": 0.29006225501498734, "grad_norm": 1.044180363768592, "learning_rate": 1.951174263373254e-06, "loss": 0.6727551221847534, "step": 1258 }, { "epoch": 0.29029282914457, "grad_norm": 0.9491498247436025, "learning_rate": 1.9510565162951534e-06, "loss": 0.5225725173950195, "step": 1259 }, { "epoch": 0.29052340327415266, "grad_norm": 0.9861385624887246, "learning_rate": 1.95093863097135e-06, "loss": 0.46537530422210693, "step": 1260 }, { "epoch": 0.2907539774037353, "grad_norm": 1.0433291271591505, "learning_rate": 1.950820607418979e-06, "loss": 0.4729498624801636, "step": 1261 }, { "epoch": 0.290984551533318, "grad_norm": 1.0319083654914931, "learning_rate": 1.950702445655196e-06, "loss": 0.519434928894043, "step": 1262 }, { "epoch": 0.29121512566290064, "grad_norm": 1.0839075745171884, "learning_rate": 1.9505841456971784e-06, "loss": 0.5487297177314758, "step": 1263 }, { "epoch": 0.2914456997924833, "grad_norm": 0.9970964597897494, "learning_rate": 1.9504657075621207e-06, "loss": 0.6228574514389038, "step": 1264 }, { "epoch": 0.29167627392206597, "grad_norm": 1.076219157850212, "learning_rate": 1.95034713126724e-06, "loss": 0.486205518245697, "step": 1265 }, { "epoch": 0.2919068480516486, "grad_norm": 1.220321517878089, "learning_rate": 1.950228416829772e-06, "loss": 0.6465567350387573, "step": 1266 }, { "epoch": 0.2921374221812313, "grad_norm": 1.0227736343783316, "learning_rate": 1.9501095642669735e-06, "loss": 0.5160506963729858, "step": 1267 }, { "epoch": 0.29236799631081395, "grad_norm": 1.0494858452172506, "learning_rate": 1.9499905735961206e-06, "loss": 0.47334107756614685, "step": 1268 }, { "epoch": 0.2925985704403966, "grad_norm": 1.1563719640673416, "learning_rate": 1.9498714448345103e-06, "loss": 0.46453380584716797, "step": 1269 }, { "epoch": 0.29282914456997927, "grad_norm": 0.9754273704287023, "learning_rate": 1.9497521779994582e-06, "loss": 0.5617728233337402, "step": 1270 }, { "epoch": 0.29305971869956193, "grad_norm": 1.3129160300173046, "learning_rate": 1.9496327731083026e-06, "loss": 0.6129153966903687, "step": 1271 }, { "epoch": 0.2932902928291446, "grad_norm": 1.2949114738936178, "learning_rate": 1.9495132301783983e-06, "loss": 0.4903183579444885, "step": 1272 }, { "epoch": 0.29352086695872726, "grad_norm": 1.1167146830002543, "learning_rate": 1.9493935492271235e-06, "loss": 0.5087980628013611, "step": 1273 }, { "epoch": 0.2937514410883099, "grad_norm": 1.0447162269466075, "learning_rate": 1.949273730271874e-06, "loss": 0.5102910399436951, "step": 1274 }, { "epoch": 0.2939820152178926, "grad_norm": 1.0971342006057034, "learning_rate": 1.9491537733300674e-06, "loss": 0.5581132769584656, "step": 1275 }, { "epoch": 0.29421258934747524, "grad_norm": 1.0166201989797772, "learning_rate": 1.949033678419141e-06, "loss": 0.5668213367462158, "step": 1276 }, { "epoch": 0.2944431634770579, "grad_norm": 1.1646263878722904, "learning_rate": 1.9489134455565503e-06, "loss": 0.5352080464363098, "step": 1277 }, { "epoch": 0.29467373760664056, "grad_norm": 1.0375138174364513, "learning_rate": 1.948793074759774e-06, "loss": 0.47343915700912476, "step": 1278 }, { "epoch": 0.29490431173622317, "grad_norm": 1.2395532163204355, "learning_rate": 1.9486725660463084e-06, "loss": 0.5169435143470764, "step": 1279 }, { "epoch": 0.29513488586580583, "grad_norm": 1.2035025560649288, "learning_rate": 1.9485519194336707e-06, "loss": 0.4801402688026428, "step": 1280 }, { "epoch": 0.2953654599953885, "grad_norm": 1.2115883619737033, "learning_rate": 1.9484311349393984e-06, "loss": 0.6537381410598755, "step": 1281 }, { "epoch": 0.29559603412497115, "grad_norm": 0.9306094110342265, "learning_rate": 1.9483102125810483e-06, "loss": 0.5160089135169983, "step": 1282 }, { "epoch": 0.2958266082545538, "grad_norm": 1.0525832312633145, "learning_rate": 1.9481891523761985e-06, "loss": 0.5332320332527161, "step": 1283 }, { "epoch": 0.2960571823841365, "grad_norm": 0.9112280719646961, "learning_rate": 1.9480679543424453e-06, "loss": 0.5076215267181396, "step": 1284 }, { "epoch": 0.29628775651371914, "grad_norm": 1.1265706213450601, "learning_rate": 1.947946618497407e-06, "loss": 0.607105016708374, "step": 1285 }, { "epoch": 0.2965183306433018, "grad_norm": 1.076771624610464, "learning_rate": 1.9478251448587203e-06, "loss": 0.6265846490859985, "step": 1286 }, { "epoch": 0.29674890477288446, "grad_norm": 1.164803442921585, "learning_rate": 1.9477035334440426e-06, "loss": 0.5313390493392944, "step": 1287 }, { "epoch": 0.2969794789024671, "grad_norm": 1.0583207692233336, "learning_rate": 1.947581784271052e-06, "loss": 0.5059833526611328, "step": 1288 }, { "epoch": 0.2972100530320498, "grad_norm": 1.171630953302918, "learning_rate": 1.9474598973574455e-06, "loss": 0.5550922155380249, "step": 1289 }, { "epoch": 0.29744062716163244, "grad_norm": 0.9941233964259298, "learning_rate": 1.947337872720941e-06, "loss": 0.5594801306724548, "step": 1290 }, { "epoch": 0.2976712012912151, "grad_norm": 1.1672729516761162, "learning_rate": 1.9472157103792753e-06, "loss": 0.6404933333396912, "step": 1291 }, { "epoch": 0.29790177542079777, "grad_norm": 1.216836258446271, "learning_rate": 1.947093410350206e-06, "loss": 0.5884830355644226, "step": 1292 }, { "epoch": 0.2981323495503804, "grad_norm": 1.313520165154308, "learning_rate": 1.9469709726515114e-06, "loss": 0.5723487138748169, "step": 1293 }, { "epoch": 0.2983629236799631, "grad_norm": 1.047985941483805, "learning_rate": 1.946848397300989e-06, "loss": 0.5298895239830017, "step": 1294 }, { "epoch": 0.29859349780954575, "grad_norm": 1.009793366380185, "learning_rate": 1.9467256843164557e-06, "loss": 0.6118877530097961, "step": 1295 }, { "epoch": 0.2988240719391284, "grad_norm": 1.2369344702112195, "learning_rate": 1.9466028337157498e-06, "loss": 0.6014599800109863, "step": 1296 }, { "epoch": 0.29905464606871107, "grad_norm": 0.9889478752374168, "learning_rate": 1.9464798455167278e-06, "loss": 0.5861071944236755, "step": 1297 }, { "epoch": 0.29928522019829373, "grad_norm": 1.238998066636259, "learning_rate": 1.9463567197372684e-06, "loss": 0.5863409042358398, "step": 1298 }, { "epoch": 0.2995157943278764, "grad_norm": 1.217300214744882, "learning_rate": 1.9462334563952687e-06, "loss": 0.6576352119445801, "step": 1299 }, { "epoch": 0.29974636845745906, "grad_norm": 1.074029788035818, "learning_rate": 1.9461100555086463e-06, "loss": 0.5458395481109619, "step": 1300 }, { "epoch": 0.2999769425870417, "grad_norm": 1.2759220903954522, "learning_rate": 1.945986517095339e-06, "loss": 0.48430997133255005, "step": 1301 }, { "epoch": 0.3002075167166244, "grad_norm": 1.2436119574902915, "learning_rate": 1.945862841173304e-06, "loss": 0.4212522506713867, "step": 1302 }, { "epoch": 0.30043809084620704, "grad_norm": 1.1823128908009017, "learning_rate": 1.9457390277605188e-06, "loss": 0.5671685934066772, "step": 1303 }, { "epoch": 0.3006686649757897, "grad_norm": 1.0831721181422946, "learning_rate": 1.945615076874981e-06, "loss": 0.5350982546806335, "step": 1304 }, { "epoch": 0.30089923910537236, "grad_norm": 0.9247033101108441, "learning_rate": 1.9454909885347088e-06, "loss": 0.45792657136917114, "step": 1305 }, { "epoch": 0.301129813234955, "grad_norm": 1.0473073919925908, "learning_rate": 1.9453667627577387e-06, "loss": 0.5644106864929199, "step": 1306 }, { "epoch": 0.3013603873645377, "grad_norm": 1.3332547603439018, "learning_rate": 1.945242399562129e-06, "loss": 0.554198145866394, "step": 1307 }, { "epoch": 0.30159096149412035, "grad_norm": 0.9232575644574793, "learning_rate": 1.9451178989659565e-06, "loss": 0.5073474049568176, "step": 1308 }, { "epoch": 0.301821535623703, "grad_norm": 1.0206284762622284, "learning_rate": 1.944993260987319e-06, "loss": 0.569359302520752, "step": 1309 }, { "epoch": 0.30205210975328567, "grad_norm": 1.0382686851233573, "learning_rate": 1.944868485644334e-06, "loss": 0.5011791586875916, "step": 1310 }, { "epoch": 0.30228268388286833, "grad_norm": 0.9869955270819804, "learning_rate": 1.9447435729551384e-06, "loss": 0.41121986508369446, "step": 1311 }, { "epoch": 0.302513258012451, "grad_norm": 1.3489170954309295, "learning_rate": 1.9446185229378896e-06, "loss": 0.5615876913070679, "step": 1312 }, { "epoch": 0.30274383214203365, "grad_norm": 1.2244043366760826, "learning_rate": 1.9444933356107652e-06, "loss": 0.5450695157051086, "step": 1313 }, { "epoch": 0.3029744062716163, "grad_norm": 1.0371383598149113, "learning_rate": 1.9443680109919626e-06, "loss": 0.522222101688385, "step": 1314 }, { "epoch": 0.303204980401199, "grad_norm": 0.9638880730108786, "learning_rate": 1.9442425490996984e-06, "loss": 0.5081876516342163, "step": 1315 }, { "epoch": 0.30343555453078164, "grad_norm": 1.1506604859779093, "learning_rate": 1.9441169499522104e-06, "loss": 0.4955870509147644, "step": 1316 }, { "epoch": 0.3036661286603643, "grad_norm": 1.0185303369767542, "learning_rate": 1.9439912135677553e-06, "loss": 0.5098991990089417, "step": 1317 }, { "epoch": 0.30389670278994696, "grad_norm": 0.9949182918503017, "learning_rate": 1.94386533996461e-06, "loss": 0.5686191320419312, "step": 1318 }, { "epoch": 0.3041272769195296, "grad_norm": 1.180090494573931, "learning_rate": 1.943739329161072e-06, "loss": 0.606401264667511, "step": 1319 }, { "epoch": 0.3043578510491123, "grad_norm": 1.0411002752171188, "learning_rate": 1.9436131811754576e-06, "loss": 0.49249163269996643, "step": 1320 }, { "epoch": 0.30458842517869494, "grad_norm": 1.1079741007732102, "learning_rate": 1.9434868960261047e-06, "loss": 0.5373499989509583, "step": 1321 }, { "epoch": 0.3048189993082776, "grad_norm": 1.4236897413447511, "learning_rate": 1.943360473731369e-06, "loss": 0.4568977355957031, "step": 1322 }, { "epoch": 0.30504957343786027, "grad_norm": 1.034905077800575, "learning_rate": 1.943233914309628e-06, "loss": 0.562126636505127, "step": 1323 }, { "epoch": 0.3052801475674429, "grad_norm": 1.343019932527111, "learning_rate": 1.943107217779278e-06, "loss": 0.5795382261276245, "step": 1324 }, { "epoch": 0.3055107216970256, "grad_norm": 0.9852538064889438, "learning_rate": 1.942980384158736e-06, "loss": 0.5671530365943909, "step": 1325 }, { "epoch": 0.30574129582660825, "grad_norm": 0.8981413519731547, "learning_rate": 1.942853413466438e-06, "loss": 0.5511401891708374, "step": 1326 }, { "epoch": 0.3059718699561909, "grad_norm": 1.1491379693233763, "learning_rate": 1.942726305720841e-06, "loss": 0.5712149739265442, "step": 1327 }, { "epoch": 0.3062024440857736, "grad_norm": 1.171535283311252, "learning_rate": 1.9425990609404215e-06, "loss": 0.5181496739387512, "step": 1328 }, { "epoch": 0.30643301821535623, "grad_norm": 1.1968505005842098, "learning_rate": 1.9424716791436753e-06, "loss": 0.5758726596832275, "step": 1329 }, { "epoch": 0.3066635923449389, "grad_norm": 0.9714627365066287, "learning_rate": 1.942344160349119e-06, "loss": 0.5757049322128296, "step": 1330 }, { "epoch": 0.30689416647452156, "grad_norm": 0.9271633895158528, "learning_rate": 1.9422165045752886e-06, "loss": 0.47352534532546997, "step": 1331 }, { "epoch": 0.3071247406041042, "grad_norm": 1.1418817146577889, "learning_rate": 1.94208871184074e-06, "loss": 0.5940845012664795, "step": 1332 }, { "epoch": 0.3073553147336869, "grad_norm": 1.0590875448509756, "learning_rate": 1.9419607821640496e-06, "loss": 0.5225652456283569, "step": 1333 }, { "epoch": 0.30758588886326954, "grad_norm": 1.0803440664833228, "learning_rate": 1.9418327155638126e-06, "loss": 0.5253404378890991, "step": 1334 }, { "epoch": 0.3078164629928522, "grad_norm": 0.9995333811538123, "learning_rate": 1.941704512058646e-06, "loss": 0.5637744665145874, "step": 1335 }, { "epoch": 0.30804703712243486, "grad_norm": 0.9947267518967771, "learning_rate": 1.941576171667184e-06, "loss": 0.48273587226867676, "step": 1336 }, { "epoch": 0.3082776112520175, "grad_norm": 0.9569882979404835, "learning_rate": 1.9414476944080833e-06, "loss": 0.5989019870758057, "step": 1337 }, { "epoch": 0.3085081853816002, "grad_norm": 1.1125936950721667, "learning_rate": 1.9413190803000183e-06, "loss": 0.5231547951698303, "step": 1338 }, { "epoch": 0.30873875951118285, "grad_norm": 1.0300527191348772, "learning_rate": 1.9411903293616853e-06, "loss": 0.5125160217285156, "step": 1339 }, { "epoch": 0.3089693336407655, "grad_norm": 1.251133475270548, "learning_rate": 1.9410614416117993e-06, "loss": 0.50664883852005, "step": 1340 }, { "epoch": 0.30919990777034817, "grad_norm": 1.063411016331963, "learning_rate": 1.9409324170690955e-06, "loss": 0.5555824637413025, "step": 1341 }, { "epoch": 0.30943048189993083, "grad_norm": 0.9621002533491156, "learning_rate": 1.940803255752329e-06, "loss": 0.5182096362113953, "step": 1342 }, { "epoch": 0.3096610560295135, "grad_norm": 1.0359415249922332, "learning_rate": 1.940673957680274e-06, "loss": 0.5202751159667969, "step": 1343 }, { "epoch": 0.30989163015909615, "grad_norm": 0.9908809268815285, "learning_rate": 1.940544522871726e-06, "loss": 0.49791598320007324, "step": 1344 }, { "epoch": 0.3101222042886788, "grad_norm": 0.990495096784543, "learning_rate": 1.9404149513454995e-06, "loss": 0.48691657185554504, "step": 1345 }, { "epoch": 0.3103527784182615, "grad_norm": 1.0649987362093034, "learning_rate": 1.9402852431204293e-06, "loss": 0.5726481676101685, "step": 1346 }, { "epoch": 0.31058335254784414, "grad_norm": 0.9750258824279312, "learning_rate": 1.940155398215369e-06, "loss": 0.5443148016929626, "step": 1347 }, { "epoch": 0.3108139266774268, "grad_norm": 1.1005441671416878, "learning_rate": 1.9400254166491935e-06, "loss": 0.5767767429351807, "step": 1348 }, { "epoch": 0.31104450080700946, "grad_norm": 1.059167179602632, "learning_rate": 1.9398952984407967e-06, "loss": 0.5208882689476013, "step": 1349 }, { "epoch": 0.3112750749365921, "grad_norm": 0.8304820941291429, "learning_rate": 1.939765043609093e-06, "loss": 0.5152548551559448, "step": 1350 }, { "epoch": 0.3115056490661748, "grad_norm": 1.1875548530259965, "learning_rate": 1.939634652173016e-06, "loss": 0.42542198300361633, "step": 1351 }, { "epoch": 0.31173622319575744, "grad_norm": 1.1424220130032787, "learning_rate": 1.9395041241515197e-06, "loss": 0.6471734046936035, "step": 1352 }, { "epoch": 0.3119667973253401, "grad_norm": 1.1191897598164906, "learning_rate": 1.9393734595635767e-06, "loss": 0.6257486343383789, "step": 1353 }, { "epoch": 0.31219737145492277, "grad_norm": 1.1348942815080005, "learning_rate": 1.9392426584281815e-06, "loss": 0.562118649482727, "step": 1354 }, { "epoch": 0.31242794558450543, "grad_norm": 1.223083488663697, "learning_rate": 1.939111720764347e-06, "loss": 0.5602811574935913, "step": 1355 }, { "epoch": 0.3126585197140881, "grad_norm": 1.041642546930775, "learning_rate": 1.9389806465911056e-06, "loss": 0.54469895362854, "step": 1356 }, { "epoch": 0.31288909384367075, "grad_norm": 1.159034123821878, "learning_rate": 1.9388494359275115e-06, "loss": 0.5262914896011353, "step": 1357 }, { "epoch": 0.3131196679732534, "grad_norm": 1.184281074720895, "learning_rate": 1.938718088792637e-06, "loss": 0.6137207746505737, "step": 1358 }, { "epoch": 0.3133502421028361, "grad_norm": 1.0740150522099046, "learning_rate": 1.9385866052055744e-06, "loss": 0.5792986750602722, "step": 1359 }, { "epoch": 0.31358081623241874, "grad_norm": 0.9946259290534466, "learning_rate": 1.938454985185437e-06, "loss": 0.4953799843788147, "step": 1360 }, { "epoch": 0.3138113903620014, "grad_norm": 1.2906978669163651, "learning_rate": 1.938323228751356e-06, "loss": 0.5722379684448242, "step": 1361 }, { "epoch": 0.31404196449158406, "grad_norm": 0.9996513214249106, "learning_rate": 1.938191335922484e-06, "loss": 0.513651967048645, "step": 1362 }, { "epoch": 0.3142725386211667, "grad_norm": 1.0509635344773647, "learning_rate": 1.9380593067179935e-06, "loss": 0.4911235272884369, "step": 1363 }, { "epoch": 0.3145031127507494, "grad_norm": 1.0029036193486218, "learning_rate": 1.9379271411570753e-06, "loss": 0.5478678941726685, "step": 1364 }, { "epoch": 0.31473368688033204, "grad_norm": 0.8901015021428158, "learning_rate": 1.9377948392589417e-06, "loss": 0.46698129177093506, "step": 1365 }, { "epoch": 0.3149642610099147, "grad_norm": 1.3327357773387452, "learning_rate": 1.9376624010428243e-06, "loss": 0.5081343650817871, "step": 1366 }, { "epoch": 0.31519483513949736, "grad_norm": 1.1172038301784757, "learning_rate": 1.9375298265279735e-06, "loss": 0.583903431892395, "step": 1367 }, { "epoch": 0.31542540926908, "grad_norm": 1.0403870552320973, "learning_rate": 1.937397115733661e-06, "loss": 0.5249435901641846, "step": 1368 }, { "epoch": 0.3156559833986627, "grad_norm": 1.184866053048378, "learning_rate": 1.9372642686791777e-06, "loss": 0.5463817119598389, "step": 1369 }, { "epoch": 0.31588655752824535, "grad_norm": 1.2179956171685966, "learning_rate": 1.9371312853838338e-06, "loss": 0.4634520709514618, "step": 1370 }, { "epoch": 0.316117131657828, "grad_norm": 1.2606144259751904, "learning_rate": 1.93699816586696e-06, "loss": 0.6018840074539185, "step": 1371 }, { "epoch": 0.31634770578741067, "grad_norm": 1.1911067691024062, "learning_rate": 1.9368649101479072e-06, "loss": 0.5507885813713074, "step": 1372 }, { "epoch": 0.31657827991699333, "grad_norm": 0.9991148637431415, "learning_rate": 1.9367315182460442e-06, "loss": 0.5520491600036621, "step": 1373 }, { "epoch": 0.316808854046576, "grad_norm": 1.2455223208218802, "learning_rate": 1.936597990180762e-06, "loss": 0.5410347580909729, "step": 1374 }, { "epoch": 0.31703942817615866, "grad_norm": 1.6049117927004484, "learning_rate": 1.9364643259714694e-06, "loss": 0.5771749019622803, "step": 1375 }, { "epoch": 0.3172700023057413, "grad_norm": 1.123905862633382, "learning_rate": 1.9363305256375965e-06, "loss": 0.5071828365325928, "step": 1376 }, { "epoch": 0.317500576435324, "grad_norm": 1.1240180544134455, "learning_rate": 1.936196589198592e-06, "loss": 0.558908224105835, "step": 1377 }, { "epoch": 0.31773115056490664, "grad_norm": 1.1984781772064843, "learning_rate": 1.9360625166739256e-06, "loss": 0.5509803295135498, "step": 1378 }, { "epoch": 0.3179617246944893, "grad_norm": 1.1703050385431384, "learning_rate": 1.935928308083085e-06, "loss": 0.5333945155143738, "step": 1379 }, { "epoch": 0.31819229882407196, "grad_norm": 1.2141630137674275, "learning_rate": 1.93579396344558e-06, "loss": 0.5337819457054138, "step": 1380 }, { "epoch": 0.3184228729536546, "grad_norm": 1.161230429960398, "learning_rate": 1.9356594827809387e-06, "loss": 0.5286899209022522, "step": 1381 }, { "epoch": 0.3186534470832373, "grad_norm": 1.3042082103630104, "learning_rate": 1.9355248661087083e-06, "loss": 0.5915369987487793, "step": 1382 }, { "epoch": 0.31888402121281995, "grad_norm": 1.2725859277548193, "learning_rate": 1.9353901134484575e-06, "loss": 0.5843492746353149, "step": 1383 }, { "epoch": 0.3191145953424026, "grad_norm": 1.0723106790063142, "learning_rate": 1.935255224819774e-06, "loss": 0.5015528202056885, "step": 1384 }, { "epoch": 0.31934516947198527, "grad_norm": 1.2053658641154292, "learning_rate": 1.935120200242265e-06, "loss": 0.5650957822799683, "step": 1385 }, { "epoch": 0.31957574360156793, "grad_norm": 0.9993056241167617, "learning_rate": 1.9349850397355576e-06, "loss": 0.5452740788459778, "step": 1386 }, { "epoch": 0.3198063177311506, "grad_norm": 1.138341645042275, "learning_rate": 1.934849743319299e-06, "loss": 0.5069071054458618, "step": 1387 }, { "epoch": 0.32003689186073325, "grad_norm": 1.3097523217194937, "learning_rate": 1.934714311013156e-06, "loss": 0.5350260734558105, "step": 1388 }, { "epoch": 0.3202674659903159, "grad_norm": 1.065882395696928, "learning_rate": 1.9345787428368146e-06, "loss": 0.6002014875411987, "step": 1389 }, { "epoch": 0.3204980401198986, "grad_norm": 1.0951548438177328, "learning_rate": 1.9344430388099813e-06, "loss": 0.5111383199691772, "step": 1390 }, { "epoch": 0.3207286142494812, "grad_norm": 1.3896947100609738, "learning_rate": 1.934307198952382e-06, "loss": 0.6029741168022156, "step": 1391 }, { "epoch": 0.32095918837906384, "grad_norm": 1.0076386708324083, "learning_rate": 1.9341712232837628e-06, "loss": 0.48339328169822693, "step": 1392 }, { "epoch": 0.3211897625086465, "grad_norm": 1.5017597017671664, "learning_rate": 1.9340351118238882e-06, "loss": 0.6080894470214844, "step": 1393 }, { "epoch": 0.32142033663822916, "grad_norm": 1.1935202429445742, "learning_rate": 1.9338988645925444e-06, "loss": 0.46375036239624023, "step": 1394 }, { "epoch": 0.3216509107678118, "grad_norm": 1.2397479694281224, "learning_rate": 1.9337624816095357e-06, "loss": 0.5974088907241821, "step": 1395 }, { "epoch": 0.3218814848973945, "grad_norm": 1.4525926184759388, "learning_rate": 1.9336259628946865e-06, "loss": 0.5759298801422119, "step": 1396 }, { "epoch": 0.32211205902697715, "grad_norm": 1.0361695525185906, "learning_rate": 1.9334893084678417e-06, "loss": 0.6050859689712524, "step": 1397 }, { "epoch": 0.3223426331565598, "grad_norm": 1.1306650773102374, "learning_rate": 1.9333525183488657e-06, "loss": 0.5879993438720703, "step": 1398 }, { "epoch": 0.32257320728614247, "grad_norm": 1.055350398289763, "learning_rate": 1.933215592557642e-06, "loss": 0.5496323108673096, "step": 1399 }, { "epoch": 0.32280378141572513, "grad_norm": 1.2847712135798797, "learning_rate": 1.9330785311140732e-06, "loss": 0.48447534441947937, "step": 1400 }, { "epoch": 0.3230343555453078, "grad_norm": 1.2583031445613762, "learning_rate": 1.932941334038084e-06, "loss": 0.5687322020530701, "step": 1401 }, { "epoch": 0.32326492967489046, "grad_norm": 1.1545356458260727, "learning_rate": 1.9328040013496166e-06, "loss": 0.4070928990840912, "step": 1402 }, { "epoch": 0.3234955038044731, "grad_norm": 0.9643847324304846, "learning_rate": 1.9326665330686344e-06, "loss": 0.5131539106369019, "step": 1403 }, { "epoch": 0.3237260779340558, "grad_norm": 1.0846567553359194, "learning_rate": 1.932528929215119e-06, "loss": 0.47571802139282227, "step": 1404 }, { "epoch": 0.32395665206363844, "grad_norm": 1.095169764239565, "learning_rate": 1.9323911898090728e-06, "loss": 0.5676391124725342, "step": 1405 }, { "epoch": 0.3241872261932211, "grad_norm": 1.0653010445083047, "learning_rate": 1.9322533148705177e-06, "loss": 0.5464721322059631, "step": 1406 }, { "epoch": 0.32441780032280376, "grad_norm": 1.044728614529827, "learning_rate": 1.9321153044194953e-06, "loss": 0.6130954027175903, "step": 1407 }, { "epoch": 0.3246483744523864, "grad_norm": 1.6513732337511444, "learning_rate": 1.9319771584760666e-06, "loss": 0.6058028936386108, "step": 1408 }, { "epoch": 0.3248789485819691, "grad_norm": 1.1251884535657009, "learning_rate": 1.9318388770603123e-06, "loss": 0.5326286554336548, "step": 1409 }, { "epoch": 0.32510952271155175, "grad_norm": 1.2184625691329178, "learning_rate": 1.9317004601923337e-06, "loss": 0.6046053767204285, "step": 1410 }, { "epoch": 0.3253400968411344, "grad_norm": 1.058617017669887, "learning_rate": 1.931561907892251e-06, "loss": 0.4597975015640259, "step": 1411 }, { "epoch": 0.32557067097071707, "grad_norm": 1.1843983331118075, "learning_rate": 1.9314232201802035e-06, "loss": 0.6024897694587708, "step": 1412 }, { "epoch": 0.32580124510029973, "grad_norm": 1.037552834044261, "learning_rate": 1.9312843970763512e-06, "loss": 0.45463523268699646, "step": 1413 }, { "epoch": 0.3260318192298824, "grad_norm": 0.9412245310618959, "learning_rate": 1.9311454386008736e-06, "loss": 0.512498140335083, "step": 1414 }, { "epoch": 0.32626239335946505, "grad_norm": 0.8929271577435476, "learning_rate": 1.9310063447739695e-06, "loss": 0.4851795434951782, "step": 1415 }, { "epoch": 0.3264929674890477, "grad_norm": 1.1131717345806365, "learning_rate": 1.930867115615858e-06, "loss": 0.5464169979095459, "step": 1416 }, { "epoch": 0.3267235416186304, "grad_norm": 0.9649299588738096, "learning_rate": 1.930727751146777e-06, "loss": 0.5614463090896606, "step": 1417 }, { "epoch": 0.32695411574821304, "grad_norm": 1.1279163828506724, "learning_rate": 1.930588251386985e-06, "loss": 0.635399341583252, "step": 1418 }, { "epoch": 0.3271846898777957, "grad_norm": 1.0116750083389472, "learning_rate": 1.9304486163567588e-06, "loss": 0.4862840175628662, "step": 1419 }, { "epoch": 0.32741526400737836, "grad_norm": 1.3810849020281415, "learning_rate": 1.930308846076397e-06, "loss": 0.6548877954483032, "step": 1420 }, { "epoch": 0.327645838136961, "grad_norm": 0.9726550652757486, "learning_rate": 1.9301689405662154e-06, "loss": 0.5781031250953674, "step": 1421 }, { "epoch": 0.3278764122665437, "grad_norm": 1.0075078554250574, "learning_rate": 1.930028899846552e-06, "loss": 0.4945180118083954, "step": 1422 }, { "epoch": 0.32810698639612634, "grad_norm": 1.1661473529435082, "learning_rate": 1.9298887239377623e-06, "loss": 0.548690915107727, "step": 1423 }, { "epoch": 0.328337560525709, "grad_norm": 1.0120278252177992, "learning_rate": 1.929748412860222e-06, "loss": 0.44515126943588257, "step": 1424 }, { "epoch": 0.32856813465529167, "grad_norm": 0.8968526552864172, "learning_rate": 1.9296079666343273e-06, "loss": 0.433849573135376, "step": 1425 }, { "epoch": 0.3287987087848743, "grad_norm": 1.185097032812299, "learning_rate": 1.9294673852804938e-06, "loss": 0.5600666403770447, "step": 1426 }, { "epoch": 0.329029282914457, "grad_norm": 1.1490365285996864, "learning_rate": 1.9293266688191555e-06, "loss": 0.5302737355232239, "step": 1427 }, { "epoch": 0.32925985704403965, "grad_norm": 1.1854633228597617, "learning_rate": 1.929185817270768e-06, "loss": 0.5590239763259888, "step": 1428 }, { "epoch": 0.3294904311736223, "grad_norm": 0.9322915581005059, "learning_rate": 1.929044830655804e-06, "loss": 0.43225252628326416, "step": 1429 }, { "epoch": 0.329721005303205, "grad_norm": 1.0987581728513967, "learning_rate": 1.9289037089947595e-06, "loss": 0.4932950735092163, "step": 1430 }, { "epoch": 0.32995157943278763, "grad_norm": 1.1539316791656467, "learning_rate": 1.9287624523081457e-06, "loss": 0.48358941078186035, "step": 1431 }, { "epoch": 0.3301821535623703, "grad_norm": 1.1348341469716536, "learning_rate": 1.928621060616497e-06, "loss": 0.48359012603759766, "step": 1432 }, { "epoch": 0.33041272769195296, "grad_norm": 0.9278501695529541, "learning_rate": 1.9284795339403663e-06, "loss": 0.48462390899658203, "step": 1433 }, { "epoch": 0.3306433018215356, "grad_norm": 1.439376655816269, "learning_rate": 1.9283378723003253e-06, "loss": 0.5167088508605957, "step": 1434 }, { "epoch": 0.3308738759511183, "grad_norm": 1.0184323306356053, "learning_rate": 1.928196075716966e-06, "loss": 0.47352856397628784, "step": 1435 }, { "epoch": 0.33110445008070094, "grad_norm": 0.9676467825700396, "learning_rate": 1.9280541442109e-06, "loss": 0.5013144016265869, "step": 1436 }, { "epoch": 0.3313350242102836, "grad_norm": 1.1746874818237374, "learning_rate": 1.927912077802759e-06, "loss": 0.5061586499214172, "step": 1437 }, { "epoch": 0.33156559833986626, "grad_norm": 1.3055289684633111, "learning_rate": 1.9277698765131927e-06, "loss": 0.5718814134597778, "step": 1438 }, { "epoch": 0.3317961724694489, "grad_norm": 1.147604660511156, "learning_rate": 1.9276275403628727e-06, "loss": 0.47547006607055664, "step": 1439 }, { "epoch": 0.3320267465990316, "grad_norm": 1.1585259805283974, "learning_rate": 1.9274850693724884e-06, "loss": 0.5387942790985107, "step": 1440 }, { "epoch": 0.33225732072861425, "grad_norm": 1.013907046172662, "learning_rate": 1.9273424635627494e-06, "loss": 0.524285078048706, "step": 1441 }, { "epoch": 0.3324878948581969, "grad_norm": 1.1737357855070976, "learning_rate": 1.927199722954385e-06, "loss": 0.5073943138122559, "step": 1442 }, { "epoch": 0.33271846898777957, "grad_norm": 1.2047946851654725, "learning_rate": 1.927056847568144e-06, "loss": 0.4609600007534027, "step": 1443 }, { "epoch": 0.33294904311736223, "grad_norm": 1.0416538135601094, "learning_rate": 1.926913837424795e-06, "loss": 0.4861013889312744, "step": 1444 }, { "epoch": 0.3331796172469449, "grad_norm": 1.0835107342484427, "learning_rate": 1.9267706925451253e-06, "loss": 0.5255436897277832, "step": 1445 }, { "epoch": 0.33341019137652755, "grad_norm": 1.4634923921780199, "learning_rate": 1.9266274129499434e-06, "loss": 0.6673840880393982, "step": 1446 }, { "epoch": 0.3336407655061102, "grad_norm": 0.9656915858584796, "learning_rate": 1.9264839986600757e-06, "loss": 0.38582634925842285, "step": 1447 }, { "epoch": 0.3338713396356929, "grad_norm": 0.9567963925410773, "learning_rate": 1.926340449696369e-06, "loss": 0.4597562253475189, "step": 1448 }, { "epoch": 0.33410191376527554, "grad_norm": 1.130778436617546, "learning_rate": 1.92619676607969e-06, "loss": 0.5901148319244385, "step": 1449 }, { "epoch": 0.3343324878948582, "grad_norm": 1.2252206522255358, "learning_rate": 1.9260529478309242e-06, "loss": 0.49872028827667236, "step": 1450 }, { "epoch": 0.33456306202444086, "grad_norm": 0.9242619738807548, "learning_rate": 1.925908994970977e-06, "loss": 0.4611232578754425, "step": 1451 }, { "epoch": 0.3347936361540235, "grad_norm": 1.1122995891321772, "learning_rate": 1.9257649075207738e-06, "loss": 0.5671408176422119, "step": 1452 }, { "epoch": 0.3350242102836062, "grad_norm": 1.2073453603933548, "learning_rate": 1.925620685501259e-06, "loss": 0.4892054498195648, "step": 1453 }, { "epoch": 0.33525478441318884, "grad_norm": 1.1748595063207394, "learning_rate": 1.9254763289333966e-06, "loss": 0.5506503582000732, "step": 1454 }, { "epoch": 0.3354853585427715, "grad_norm": 1.4352362120603241, "learning_rate": 1.9253318378381702e-06, "loss": 0.6233078241348267, "step": 1455 }, { "epoch": 0.33571593267235417, "grad_norm": 1.2159230168553836, "learning_rate": 1.9251872122365835e-06, "loss": 0.5551373958587646, "step": 1456 }, { "epoch": 0.33594650680193683, "grad_norm": 1.0308435059717576, "learning_rate": 1.925042452149659e-06, "loss": 0.5561612844467163, "step": 1457 }, { "epoch": 0.3361770809315195, "grad_norm": 1.0286600789295617, "learning_rate": 1.924897557598439e-06, "loss": 0.613766074180603, "step": 1458 }, { "epoch": 0.33640765506110215, "grad_norm": 1.092154153863493, "learning_rate": 1.9247525286039852e-06, "loss": 0.5767652988433838, "step": 1459 }, { "epoch": 0.3366382291906848, "grad_norm": 1.1221153049255785, "learning_rate": 1.9246073651873795e-06, "loss": 0.49292564392089844, "step": 1460 }, { "epoch": 0.3368688033202675, "grad_norm": 1.2909262812986786, "learning_rate": 1.9244620673697224e-06, "loss": 0.5901867151260376, "step": 1461 }, { "epoch": 0.33709937744985013, "grad_norm": 1.1013040204716718, "learning_rate": 1.924316635172135e-06, "loss": 0.5543808937072754, "step": 1462 }, { "epoch": 0.3373299515794328, "grad_norm": 1.3433064818976315, "learning_rate": 1.9241710686157568e-06, "loss": 0.528805136680603, "step": 1463 }, { "epoch": 0.33756052570901546, "grad_norm": 1.2569454583762516, "learning_rate": 1.924025367721748e-06, "loss": 0.6396733522415161, "step": 1464 }, { "epoch": 0.3377910998385981, "grad_norm": 0.9764691877916688, "learning_rate": 1.9238795325112867e-06, "loss": 0.5558862686157227, "step": 1465 }, { "epoch": 0.3380216739681808, "grad_norm": 1.2329860923893396, "learning_rate": 1.9237335630055724e-06, "loss": 0.5863986015319824, "step": 1466 }, { "epoch": 0.33825224809776344, "grad_norm": 1.0929132974739206, "learning_rate": 1.923587459225823e-06, "loss": 0.5636321306228638, "step": 1467 }, { "epoch": 0.3384828222273461, "grad_norm": 1.1286586205882263, "learning_rate": 1.923441221193276e-06, "loss": 0.6065811514854431, "step": 1468 }, { "epoch": 0.33871339635692876, "grad_norm": 1.4147716425908794, "learning_rate": 1.9232948489291886e-06, "loss": 0.580939769744873, "step": 1469 }, { "epoch": 0.3389439704865114, "grad_norm": 1.1018333541876169, "learning_rate": 1.9231483424548377e-06, "loss": 0.5429994463920593, "step": 1470 }, { "epoch": 0.3391745446160941, "grad_norm": 1.1834314239894592, "learning_rate": 1.92300170179152e-06, "loss": 0.5090892910957336, "step": 1471 }, { "epoch": 0.33940511874567675, "grad_norm": 1.053685812356228, "learning_rate": 1.9228549269605498e-06, "loss": 0.5280312299728394, "step": 1472 }, { "epoch": 0.3396356928752594, "grad_norm": 0.992641626439364, "learning_rate": 1.9227080179832634e-06, "loss": 0.5098810195922852, "step": 1473 }, { "epoch": 0.33986626700484207, "grad_norm": 1.110706876976592, "learning_rate": 1.922560974881015e-06, "loss": 0.4554474353790283, "step": 1474 }, { "epoch": 0.34009684113442473, "grad_norm": 1.042826154870894, "learning_rate": 1.9224137976751793e-06, "loss": 0.4492517113685608, "step": 1475 }, { "epoch": 0.3403274152640074, "grad_norm": 1.3050966518961793, "learning_rate": 1.9222664863871495e-06, "loss": 0.47606343030929565, "step": 1476 }, { "epoch": 0.34055798939359005, "grad_norm": 1.331553847580159, "learning_rate": 1.9221190410383394e-06, "loss": 0.5939435362815857, "step": 1477 }, { "epoch": 0.3407885635231727, "grad_norm": 1.0156905582890146, "learning_rate": 1.921971461650181e-06, "loss": 0.5418350696563721, "step": 1478 }, { "epoch": 0.3410191376527554, "grad_norm": 1.258400628812999, "learning_rate": 1.9218237482441265e-06, "loss": 0.5307733416557312, "step": 1479 }, { "epoch": 0.34124971178233804, "grad_norm": 1.097634429758053, "learning_rate": 1.9216759008416483e-06, "loss": 0.5102016925811768, "step": 1480 }, { "epoch": 0.3414802859119207, "grad_norm": 1.6070497683125828, "learning_rate": 1.9215279194642366e-06, "loss": 0.5043876767158508, "step": 1481 }, { "epoch": 0.34171086004150336, "grad_norm": 1.0925329335071103, "learning_rate": 1.9213798041334025e-06, "loss": 0.5365253686904907, "step": 1482 }, { "epoch": 0.341941434171086, "grad_norm": 1.1923005853358424, "learning_rate": 1.921231554870676e-06, "loss": 0.4938368797302246, "step": 1483 }, { "epoch": 0.3421720083006687, "grad_norm": 1.0865439416616147, "learning_rate": 1.921083171697607e-06, "loss": 0.5274159908294678, "step": 1484 }, { "epoch": 0.34240258243025135, "grad_norm": 1.1913792015364102, "learning_rate": 1.9209346546357637e-06, "loss": 0.4720276892185211, "step": 1485 }, { "epoch": 0.342633156559834, "grad_norm": 0.9383641214181552, "learning_rate": 1.920786003706735e-06, "loss": 0.42276352643966675, "step": 1486 }, { "epoch": 0.34286373068941667, "grad_norm": 1.0581324959121157, "learning_rate": 1.920637218932129e-06, "loss": 0.5319294333457947, "step": 1487 }, { "epoch": 0.34309430481899933, "grad_norm": 1.1819330354237378, "learning_rate": 1.920488300333572e-06, "loss": 0.5197560787200928, "step": 1488 }, { "epoch": 0.343324878948582, "grad_norm": 1.5013538667422215, "learning_rate": 1.9203392479327127e-06, "loss": 0.550025463104248, "step": 1489 }, { "epoch": 0.34355545307816465, "grad_norm": 1.0981284345294107, "learning_rate": 1.920190061751216e-06, "loss": 0.50255286693573, "step": 1490 }, { "epoch": 0.3437860272077473, "grad_norm": 1.1895622589876538, "learning_rate": 1.9200407418107678e-06, "loss": 0.5952906608581543, "step": 1491 }, { "epoch": 0.34401660133733, "grad_norm": 0.9421522918126589, "learning_rate": 1.9198912881330737e-06, "loss": 0.48161056637763977, "step": 1492 }, { "epoch": 0.34424717546691264, "grad_norm": 1.177243819966174, "learning_rate": 1.919741700739858e-06, "loss": 0.5490972995758057, "step": 1493 }, { "epoch": 0.3444777495964953, "grad_norm": 1.4788962836499655, "learning_rate": 1.9195919796528647e-06, "loss": 0.45651519298553467, "step": 1494 }, { "epoch": 0.34470832372607796, "grad_norm": 1.2203060266370191, "learning_rate": 1.919442124893857e-06, "loss": 0.5318460464477539, "step": 1495 }, { "epoch": 0.3449388978556606, "grad_norm": 1.0748079339537138, "learning_rate": 1.9192921364846187e-06, "loss": 0.5052516460418701, "step": 1496 }, { "epoch": 0.3451694719852433, "grad_norm": 1.3171544150804408, "learning_rate": 1.9191420144469515e-06, "loss": 0.6653434038162231, "step": 1497 }, { "epoch": 0.34540004611482594, "grad_norm": 0.962422061512943, "learning_rate": 1.9189917588026774e-06, "loss": 0.47182875871658325, "step": 1498 }, { "epoch": 0.3456306202444086, "grad_norm": 1.0305251609345925, "learning_rate": 1.9188413695736376e-06, "loss": 0.5257801413536072, "step": 1499 }, { "epoch": 0.34586119437399127, "grad_norm": 1.1090254531285808, "learning_rate": 1.918690846781692e-06, "loss": 0.565075695514679, "step": 1500 }, { "epoch": 0.3460917685035739, "grad_norm": 1.1909717210416553, "learning_rate": 1.9185401904487214e-06, "loss": 0.49737876653671265, "step": 1501 }, { "epoch": 0.34632234263315653, "grad_norm": 1.021716441788736, "learning_rate": 1.918389400596625e-06, "loss": 0.5136237144470215, "step": 1502 }, { "epoch": 0.3465529167627392, "grad_norm": 1.011829912931323, "learning_rate": 1.9182384772473216e-06, "loss": 0.5122819542884827, "step": 1503 }, { "epoch": 0.34678349089232186, "grad_norm": 1.1232586653417744, "learning_rate": 1.91808742042275e-06, "loss": 0.4586041271686554, "step": 1504 }, { "epoch": 0.3470140650219045, "grad_norm": 1.0599756649712084, "learning_rate": 1.9179362301448666e-06, "loss": 0.49752146005630493, "step": 1505 }, { "epoch": 0.3472446391514872, "grad_norm": 1.0110535685015802, "learning_rate": 1.917784906435649e-06, "loss": 0.4423530101776123, "step": 1506 }, { "epoch": 0.34747521328106984, "grad_norm": 1.2828635133632034, "learning_rate": 1.9176334493170946e-06, "loss": 0.4979468882083893, "step": 1507 }, { "epoch": 0.3477057874106525, "grad_norm": 1.0086748218378025, "learning_rate": 1.9174818588112178e-06, "loss": 0.5229524374008179, "step": 1508 }, { "epoch": 0.34793636154023516, "grad_norm": 1.006104946386604, "learning_rate": 1.9173301349400546e-06, "loss": 0.47884654998779297, "step": 1509 }, { "epoch": 0.3481669356698178, "grad_norm": 1.161430061405767, "learning_rate": 1.9171782777256594e-06, "loss": 0.5204922556877136, "step": 1510 }, { "epoch": 0.3483975097994005, "grad_norm": 1.1268415177845295, "learning_rate": 1.917026287190106e-06, "loss": 0.5077674984931946, "step": 1511 }, { "epoch": 0.34862808392898315, "grad_norm": 0.9750269271228661, "learning_rate": 1.9168741633554885e-06, "loss": 0.4171299934387207, "step": 1512 }, { "epoch": 0.3488586580585658, "grad_norm": 1.065613083459404, "learning_rate": 1.9167219062439187e-06, "loss": 0.5228694081306458, "step": 1513 }, { "epoch": 0.34908923218814847, "grad_norm": 1.188410464922724, "learning_rate": 1.916569515877529e-06, "loss": 0.5496635437011719, "step": 1514 }, { "epoch": 0.34931980631773113, "grad_norm": 0.969674279609777, "learning_rate": 1.9164169922784716e-06, "loss": 0.5197573900222778, "step": 1515 }, { "epoch": 0.3495503804473138, "grad_norm": 1.3265152215611398, "learning_rate": 1.9162643354689163e-06, "loss": 0.5726813077926636, "step": 1516 }, { "epoch": 0.34978095457689645, "grad_norm": 1.0368094455843846, "learning_rate": 1.916111545471054e-06, "loss": 0.53382408618927, "step": 1517 }, { "epoch": 0.3500115287064791, "grad_norm": 1.0676291023728657, "learning_rate": 1.915958622307094e-06, "loss": 0.5535515546798706, "step": 1518 }, { "epoch": 0.3502421028360618, "grad_norm": 1.183098293067818, "learning_rate": 1.9158055659992648e-06, "loss": 0.5295307040214539, "step": 1519 }, { "epoch": 0.35047267696564444, "grad_norm": 1.3231709310936663, "learning_rate": 1.9156523765698158e-06, "loss": 0.5397933125495911, "step": 1520 }, { "epoch": 0.3507032510952271, "grad_norm": 1.217082341703879, "learning_rate": 1.915499054041014e-06, "loss": 0.5614666938781738, "step": 1521 }, { "epoch": 0.35093382522480976, "grad_norm": 1.155125291987811, "learning_rate": 1.915345598435146e-06, "loss": 0.5321720838546753, "step": 1522 }, { "epoch": 0.3511643993543924, "grad_norm": 1.172353935810673, "learning_rate": 1.9151920097745185e-06, "loss": 0.51869797706604, "step": 1523 }, { "epoch": 0.3513949734839751, "grad_norm": 1.0936179296558388, "learning_rate": 1.9150382880814577e-06, "loss": 0.58238685131073, "step": 1524 }, { "epoch": 0.35162554761355774, "grad_norm": 1.135142968184709, "learning_rate": 1.914884433378308e-06, "loss": 0.5617767572402954, "step": 1525 }, { "epoch": 0.3518561217431404, "grad_norm": 0.9232400306777988, "learning_rate": 1.9147304456874336e-06, "loss": 0.5207428932189941, "step": 1526 }, { "epoch": 0.35208669587272307, "grad_norm": 1.0829138732821308, "learning_rate": 1.914576325031218e-06, "loss": 0.5929840207099915, "step": 1527 }, { "epoch": 0.3523172700023057, "grad_norm": 1.0372438860332964, "learning_rate": 1.914422071432065e-06, "loss": 0.510567307472229, "step": 1528 }, { "epoch": 0.3525478441318884, "grad_norm": 1.2529291445912578, "learning_rate": 1.914267684912397e-06, "loss": 0.5524177551269531, "step": 1529 }, { "epoch": 0.35277841826147105, "grad_norm": 1.0844290023080794, "learning_rate": 1.9141131654946548e-06, "loss": 0.5622289180755615, "step": 1530 }, { "epoch": 0.3530089923910537, "grad_norm": 1.1655531028574153, "learning_rate": 1.9139585132012995e-06, "loss": 0.5085979700088501, "step": 1531 }, { "epoch": 0.35323956652063637, "grad_norm": 1.0367412290626608, "learning_rate": 1.9138037280548117e-06, "loss": 0.47232770919799805, "step": 1532 }, { "epoch": 0.35347014065021903, "grad_norm": 1.3584148636864177, "learning_rate": 1.913648810077691e-06, "loss": 0.535300612449646, "step": 1533 }, { "epoch": 0.3537007147798017, "grad_norm": 1.1457507125445123, "learning_rate": 1.9134937592924562e-06, "loss": 0.4351940155029297, "step": 1534 }, { "epoch": 0.35393128890938436, "grad_norm": 0.9891980196576595, "learning_rate": 1.9133385757216456e-06, "loss": 0.4691917896270752, "step": 1535 }, { "epoch": 0.354161863038967, "grad_norm": 1.03905005054118, "learning_rate": 1.9131832593878167e-06, "loss": 0.4911034107208252, "step": 1536 }, { "epoch": 0.3543924371685497, "grad_norm": 0.9599946260153974, "learning_rate": 1.9130278103135458e-06, "loss": 0.3954068422317505, "step": 1537 }, { "epoch": 0.35462301129813234, "grad_norm": 1.2512488183212185, "learning_rate": 1.9128722285214297e-06, "loss": 0.5541605949401855, "step": 1538 }, { "epoch": 0.354853585427715, "grad_norm": 1.2362059407886639, "learning_rate": 1.9127165140340832e-06, "loss": 0.5719314217567444, "step": 1539 }, { "epoch": 0.35508415955729766, "grad_norm": 1.342530930822934, "learning_rate": 1.9125606668741418e-06, "loss": 0.60889732837677, "step": 1540 }, { "epoch": 0.3553147336868803, "grad_norm": 1.2098741685807175, "learning_rate": 1.9124046870642587e-06, "loss": 0.5247465968132019, "step": 1541 }, { "epoch": 0.355545307816463, "grad_norm": 1.3096766952611592, "learning_rate": 1.912248574627107e-06, "loss": 0.5681591033935547, "step": 1542 }, { "epoch": 0.35577588194604565, "grad_norm": 1.0008372683888578, "learning_rate": 1.91209232958538e-06, "loss": 0.5995845794677734, "step": 1543 }, { "epoch": 0.3560064560756283, "grad_norm": 1.0463229098086306, "learning_rate": 1.9119359519617893e-06, "loss": 0.514456033706665, "step": 1544 }, { "epoch": 0.35623703020521097, "grad_norm": 1.0680000709528683, "learning_rate": 1.9117794417790657e-06, "loss": 0.45192602276802063, "step": 1545 }, { "epoch": 0.35646760433479363, "grad_norm": 1.042670075197141, "learning_rate": 1.911622799059959e-06, "loss": 0.5529573559761047, "step": 1546 }, { "epoch": 0.3566981784643763, "grad_norm": 1.2129822836493795, "learning_rate": 1.9114660238272403e-06, "loss": 0.4544152021408081, "step": 1547 }, { "epoch": 0.35692875259395895, "grad_norm": 1.516629148023364, "learning_rate": 1.9113091161036974e-06, "loss": 0.5676225423812866, "step": 1548 }, { "epoch": 0.3571593267235416, "grad_norm": 1.1320627323756525, "learning_rate": 1.9111520759121384e-06, "loss": 0.5571830868721008, "step": 1549 }, { "epoch": 0.3573899008531243, "grad_norm": 1.1377531274302592, "learning_rate": 1.910994903275391e-06, "loss": 0.5091487765312195, "step": 1550 }, { "epoch": 0.35762047498270694, "grad_norm": 1.107456889270875, "learning_rate": 1.9108375982163015e-06, "loss": 0.5484684705734253, "step": 1551 }, { "epoch": 0.3578510491122896, "grad_norm": 1.261905478374622, "learning_rate": 1.9106801607577364e-06, "loss": 0.49742424488067627, "step": 1552 }, { "epoch": 0.35808162324187226, "grad_norm": 1.2341261046425518, "learning_rate": 1.9105225909225804e-06, "loss": 0.5871520638465881, "step": 1553 }, { "epoch": 0.3583121973714549, "grad_norm": 1.2329576492287886, "learning_rate": 1.910364888733738e-06, "loss": 0.5096076726913452, "step": 1554 }, { "epoch": 0.3585427715010376, "grad_norm": 1.3375416968847058, "learning_rate": 1.910207054214133e-06, "loss": 0.7168693542480469, "step": 1555 }, { "epoch": 0.35877334563062024, "grad_norm": 1.126707169388949, "learning_rate": 1.910049087386707e-06, "loss": 0.5603561997413635, "step": 1556 }, { "epoch": 0.3590039197602029, "grad_norm": 1.299433383477777, "learning_rate": 1.909890988274424e-06, "loss": 0.5857734680175781, "step": 1557 }, { "epoch": 0.35923449388978557, "grad_norm": 1.040543925807462, "learning_rate": 1.9097327569002642e-06, "loss": 0.5612708926200867, "step": 1558 }, { "epoch": 0.35946506801936823, "grad_norm": 1.146949414139332, "learning_rate": 1.909574393287228e-06, "loss": 0.5264564752578735, "step": 1559 }, { "epoch": 0.3596956421489509, "grad_norm": 0.9390137754415148, "learning_rate": 1.9094158974583357e-06, "loss": 0.4163395166397095, "step": 1560 }, { "epoch": 0.35992621627853355, "grad_norm": 1.0884801214343747, "learning_rate": 1.909257269436626e-06, "loss": 0.483236163854599, "step": 1561 }, { "epoch": 0.3601567904081162, "grad_norm": 1.0086049535834347, "learning_rate": 1.9090985092451572e-06, "loss": 0.48892003297805786, "step": 1562 }, { "epoch": 0.3603873645376989, "grad_norm": 1.0090138133688373, "learning_rate": 1.908939616907007e-06, "loss": 0.45310860872268677, "step": 1563 }, { "epoch": 0.36061793866728153, "grad_norm": 1.0130833457744266, "learning_rate": 1.908780592445271e-06, "loss": 0.5242425799369812, "step": 1564 }, { "epoch": 0.3608485127968642, "grad_norm": 1.0425805251353624, "learning_rate": 1.9086214358830663e-06, "loss": 0.47026845812797546, "step": 1565 }, { "epoch": 0.36107908692644686, "grad_norm": 1.2209406413770176, "learning_rate": 1.9084621472435267e-06, "loss": 0.5783924460411072, "step": 1566 }, { "epoch": 0.3613096610560295, "grad_norm": 1.0139793238266448, "learning_rate": 1.9083027265498073e-06, "loss": 0.5534437894821167, "step": 1567 }, { "epoch": 0.3615402351856122, "grad_norm": 1.27522834837266, "learning_rate": 1.9081431738250815e-06, "loss": 0.49131953716278076, "step": 1568 }, { "epoch": 0.36177080931519484, "grad_norm": 1.0466765845853998, "learning_rate": 1.9079834890925412e-06, "loss": 0.4798020124435425, "step": 1569 }, { "epoch": 0.3620013834447775, "grad_norm": 1.1201181573638213, "learning_rate": 1.9078236723753987e-06, "loss": 0.4928893446922302, "step": 1570 }, { "epoch": 0.36223195757436016, "grad_norm": 0.884047440430311, "learning_rate": 1.9076637236968847e-06, "loss": 0.4483630657196045, "step": 1571 }, { "epoch": 0.3624625317039428, "grad_norm": 1.0983581542959335, "learning_rate": 1.90750364308025e-06, "loss": 0.593490481376648, "step": 1572 }, { "epoch": 0.3626931058335255, "grad_norm": 1.1430514811975505, "learning_rate": 1.9073434305487631e-06, "loss": 0.5944634675979614, "step": 1573 }, { "epoch": 0.36292367996310815, "grad_norm": 1.003698560447405, "learning_rate": 1.9071830861257134e-06, "loss": 0.5010452270507812, "step": 1574 }, { "epoch": 0.3631542540926908, "grad_norm": 1.0687566975761509, "learning_rate": 1.9070226098344078e-06, "loss": 0.5128473043441772, "step": 1575 }, { "epoch": 0.36338482822227347, "grad_norm": 1.0854169038402666, "learning_rate": 1.9068620016981733e-06, "loss": 0.6256363987922668, "step": 1576 }, { "epoch": 0.36361540235185613, "grad_norm": 1.0796360454107574, "learning_rate": 1.9067012617403565e-06, "loss": 0.5502322912216187, "step": 1577 }, { "epoch": 0.3638459764814388, "grad_norm": 1.2842731628323776, "learning_rate": 1.906540389984322e-06, "loss": 0.5756800174713135, "step": 1578 }, { "epoch": 0.36407655061102145, "grad_norm": 1.135643566986845, "learning_rate": 1.9063793864534543e-06, "loss": 0.5131359696388245, "step": 1579 }, { "epoch": 0.3643071247406041, "grad_norm": 0.9714084254330834, "learning_rate": 1.9062182511711567e-06, "loss": 0.5776810646057129, "step": 1580 }, { "epoch": 0.3645376988701868, "grad_norm": 1.0973639487789169, "learning_rate": 1.9060569841608523e-06, "loss": 0.49460822343826294, "step": 1581 }, { "epoch": 0.36476827299976944, "grad_norm": 0.942012419923591, "learning_rate": 1.9058955854459823e-06, "loss": 0.5031022429466248, "step": 1582 }, { "epoch": 0.3649988471293521, "grad_norm": 1.2106661637014209, "learning_rate": 1.9057340550500082e-06, "loss": 0.4957816004753113, "step": 1583 }, { "epoch": 0.36522942125893476, "grad_norm": 0.9363710565312526, "learning_rate": 1.9055723929964102e-06, "loss": 0.47861093282699585, "step": 1584 }, { "epoch": 0.3654599953885174, "grad_norm": 1.027272725701274, "learning_rate": 1.9054105993086868e-06, "loss": 0.44517919421195984, "step": 1585 }, { "epoch": 0.3656905695181001, "grad_norm": 1.1724343492985738, "learning_rate": 1.9052486740103568e-06, "loss": 0.46661484241485596, "step": 1586 }, { "epoch": 0.36592114364768275, "grad_norm": 0.9788001147307338, "learning_rate": 1.9050866171249575e-06, "loss": 0.517694890499115, "step": 1587 }, { "epoch": 0.3661517177772654, "grad_norm": 1.1284193922698917, "learning_rate": 1.904924428676046e-06, "loss": 0.49465644359588623, "step": 1588 }, { "epoch": 0.36638229190684807, "grad_norm": 1.0036913999315975, "learning_rate": 1.9047621086871971e-06, "loss": 0.41830652952194214, "step": 1589 }, { "epoch": 0.36661286603643073, "grad_norm": 1.1944977036427056, "learning_rate": 1.9045996571820067e-06, "loss": 0.5540663003921509, "step": 1590 }, { "epoch": 0.3668434401660134, "grad_norm": 1.072580109375711, "learning_rate": 1.9044370741840882e-06, "loss": 0.5619527101516724, "step": 1591 }, { "epoch": 0.36707401429559605, "grad_norm": 1.1509533440805209, "learning_rate": 1.9042743597170746e-06, "loss": 0.5086055994033813, "step": 1592 }, { "epoch": 0.3673045884251787, "grad_norm": 1.050425223739088, "learning_rate": 1.9041115138046183e-06, "loss": 0.5839927196502686, "step": 1593 }, { "epoch": 0.3675351625547614, "grad_norm": 1.0464789939377692, "learning_rate": 1.9039485364703904e-06, "loss": 0.508616030216217, "step": 1594 }, { "epoch": 0.36776573668434404, "grad_norm": 1.15877506638183, "learning_rate": 1.903785427738082e-06, "loss": 0.46514832973480225, "step": 1595 }, { "epoch": 0.3679963108139267, "grad_norm": 1.525284603977575, "learning_rate": 1.9036221876314016e-06, "loss": 0.42142176628112793, "step": 1596 }, { "epoch": 0.36822688494350936, "grad_norm": 1.3114380851226077, "learning_rate": 1.9034588161740786e-06, "loss": 0.42195791006088257, "step": 1597 }, { "epoch": 0.368457459073092, "grad_norm": 1.0276642661247686, "learning_rate": 1.9032953133898601e-06, "loss": 0.46705931425094604, "step": 1598 }, { "epoch": 0.3686880332026747, "grad_norm": 1.1002100436754347, "learning_rate": 1.9031316793025134e-06, "loss": 0.4741164743900299, "step": 1599 }, { "epoch": 0.36891860733225734, "grad_norm": 1.269728601723268, "learning_rate": 1.902967913935824e-06, "loss": 0.49730339646339417, "step": 1600 }, { "epoch": 0.36914918146184, "grad_norm": 0.9594474153361355, "learning_rate": 1.902804017313597e-06, "loss": 0.47678127884864807, "step": 1601 }, { "epoch": 0.36937975559142266, "grad_norm": 1.1964394586929104, "learning_rate": 1.9026399894596565e-06, "loss": 0.4954279661178589, "step": 1602 }, { "epoch": 0.3696103297210053, "grad_norm": 0.9685506818723637, "learning_rate": 1.9024758303978456e-06, "loss": 0.5115381479263306, "step": 1603 }, { "epoch": 0.369840903850588, "grad_norm": 1.0632901548704432, "learning_rate": 1.9023115401520264e-06, "loss": 0.6147117614746094, "step": 1604 }, { "epoch": 0.37007147798017065, "grad_norm": 1.4566806194426465, "learning_rate": 1.9021471187460802e-06, "loss": 0.5334371328353882, "step": 1605 }, { "epoch": 0.3703020521097533, "grad_norm": 1.2820059739478686, "learning_rate": 1.9019825662039073e-06, "loss": 0.4702361226081848, "step": 1606 }, { "epoch": 0.37053262623933597, "grad_norm": 1.1889012346736458, "learning_rate": 1.901817882549427e-06, "loss": 0.5049586892127991, "step": 1607 }, { "epoch": 0.37076320036891863, "grad_norm": 1.2055092488358514, "learning_rate": 1.901653067806578e-06, "loss": 0.5063170194625854, "step": 1608 }, { "epoch": 0.3709937744985013, "grad_norm": 1.1599393359430212, "learning_rate": 1.9014881219993175e-06, "loss": 0.540824294090271, "step": 1609 }, { "epoch": 0.37122434862808396, "grad_norm": 1.372148291928607, "learning_rate": 1.901323045151622e-06, "loss": 0.4744170904159546, "step": 1610 }, { "epoch": 0.3714549227576666, "grad_norm": 1.2144026597364277, "learning_rate": 1.9011578372874876e-06, "loss": 0.5090929269790649, "step": 1611 }, { "epoch": 0.3716854968872493, "grad_norm": 1.0610635938586983, "learning_rate": 1.9009924984309284e-06, "loss": 0.3886772394180298, "step": 1612 }, { "epoch": 0.3719160710168319, "grad_norm": 1.1192663585328575, "learning_rate": 1.9008270286059782e-06, "loss": 0.4976482391357422, "step": 1613 }, { "epoch": 0.37214664514641455, "grad_norm": 1.0577168176218985, "learning_rate": 1.9006614278366898e-06, "loss": 0.4629209041595459, "step": 1614 }, { "epoch": 0.3723772192759972, "grad_norm": 1.0381238100092287, "learning_rate": 1.9004956961471352e-06, "loss": 0.49334412813186646, "step": 1615 }, { "epoch": 0.37260779340557987, "grad_norm": 1.2336018114177745, "learning_rate": 1.9003298335614047e-06, "loss": 0.614592432975769, "step": 1616 }, { "epoch": 0.37283836753516253, "grad_norm": 0.9895019344615126, "learning_rate": 1.9001638401036082e-06, "loss": 0.5339843034744263, "step": 1617 }, { "epoch": 0.3730689416647452, "grad_norm": 0.9743667038154072, "learning_rate": 1.8999977157978749e-06, "loss": 0.5516937375068665, "step": 1618 }, { "epoch": 0.37329951579432785, "grad_norm": 1.2149293301312265, "learning_rate": 1.8998314606683522e-06, "loss": 0.5034124255180359, "step": 1619 }, { "epoch": 0.3735300899239105, "grad_norm": 0.9412969527830801, "learning_rate": 1.8996650747392073e-06, "loss": 0.49766790866851807, "step": 1620 }, { "epoch": 0.3737606640534932, "grad_norm": 1.1063112007683722, "learning_rate": 1.899498558034626e-06, "loss": 0.6662446856498718, "step": 1621 }, { "epoch": 0.37399123818307584, "grad_norm": 1.3692241861945424, "learning_rate": 1.8993319105788129e-06, "loss": 0.5416747331619263, "step": 1622 }, { "epoch": 0.3742218123126585, "grad_norm": 1.2377768970666951, "learning_rate": 1.8991651323959922e-06, "loss": 0.5137313604354858, "step": 1623 }, { "epoch": 0.37445238644224116, "grad_norm": 1.0509326993065755, "learning_rate": 1.8989982235104072e-06, "loss": 0.566002607345581, "step": 1624 }, { "epoch": 0.3746829605718238, "grad_norm": 1.314391237074608, "learning_rate": 1.8988311839463188e-06, "loss": 0.5201380252838135, "step": 1625 }, { "epoch": 0.3749135347014065, "grad_norm": 1.2844709164103703, "learning_rate": 1.8986640137280087e-06, "loss": 0.5103918313980103, "step": 1626 }, { "epoch": 0.37514410883098914, "grad_norm": 1.081063959726764, "learning_rate": 1.8984967128797763e-06, "loss": 0.47900843620300293, "step": 1627 }, { "epoch": 0.3753746829605718, "grad_norm": 1.0524739811683044, "learning_rate": 1.898329281425941e-06, "loss": 0.42991960048675537, "step": 1628 }, { "epoch": 0.37560525709015447, "grad_norm": 1.2087969734027784, "learning_rate": 1.89816171939084e-06, "loss": 0.5707317590713501, "step": 1629 }, { "epoch": 0.3758358312197371, "grad_norm": 1.0714171850017424, "learning_rate": 1.8979940267988309e-06, "loss": 0.565521240234375, "step": 1630 }, { "epoch": 0.3760664053493198, "grad_norm": 1.2721353238917528, "learning_rate": 1.8978262036742888e-06, "loss": 0.6584400534629822, "step": 1631 }, { "epoch": 0.37629697947890245, "grad_norm": 1.1181726564305359, "learning_rate": 1.897658250041609e-06, "loss": 0.4749317169189453, "step": 1632 }, { "epoch": 0.3765275536084851, "grad_norm": 1.3732616000652873, "learning_rate": 1.8974901659252048e-06, "loss": 0.5495604872703552, "step": 1633 }, { "epoch": 0.37675812773806777, "grad_norm": 1.6408199477459455, "learning_rate": 1.8973219513495094e-06, "loss": 0.465708464384079, "step": 1634 }, { "epoch": 0.37698870186765043, "grad_norm": 1.1887777428919946, "learning_rate": 1.8971536063389742e-06, "loss": 0.4599069058895111, "step": 1635 }, { "epoch": 0.3772192759972331, "grad_norm": 1.1348638946303797, "learning_rate": 1.89698513091807e-06, "loss": 0.4716145694255829, "step": 1636 }, { "epoch": 0.37744985012681576, "grad_norm": 0.990973234996169, "learning_rate": 1.8968165251112863e-06, "loss": 0.594079852104187, "step": 1637 }, { "epoch": 0.3776804242563984, "grad_norm": 1.3300173886007076, "learning_rate": 1.8966477889431317e-06, "loss": 0.4588915705680847, "step": 1638 }, { "epoch": 0.3779109983859811, "grad_norm": 1.5111913527277292, "learning_rate": 1.8964789224381337e-06, "loss": 0.5236901044845581, "step": 1639 }, { "epoch": 0.37814157251556374, "grad_norm": 1.067104402214014, "learning_rate": 1.8963099256208388e-06, "loss": 0.4954737424850464, "step": 1640 }, { "epoch": 0.3783721466451464, "grad_norm": 1.066408318154628, "learning_rate": 1.8961407985158125e-06, "loss": 0.4194701910018921, "step": 1641 }, { "epoch": 0.37860272077472906, "grad_norm": 0.9999478144515371, "learning_rate": 1.8959715411476388e-06, "loss": 0.5368303060531616, "step": 1642 }, { "epoch": 0.3788332949043117, "grad_norm": 1.2178837934755509, "learning_rate": 1.8958021535409214e-06, "loss": 0.5181677341461182, "step": 1643 }, { "epoch": 0.3790638690338944, "grad_norm": 1.0342390187480546, "learning_rate": 1.8956326357202821e-06, "loss": 0.4755169749259949, "step": 1644 }, { "epoch": 0.37929444316347705, "grad_norm": 1.1097461588236448, "learning_rate": 1.8954629877103625e-06, "loss": 0.5460895299911499, "step": 1645 }, { "epoch": 0.3795250172930597, "grad_norm": 1.090972908814234, "learning_rate": 1.8952932095358224e-06, "loss": 0.47811684012413025, "step": 1646 }, { "epoch": 0.37975559142264237, "grad_norm": 1.1794844360929688, "learning_rate": 1.8951233012213405e-06, "loss": 0.5791733860969543, "step": 1647 }, { "epoch": 0.37998616555222503, "grad_norm": 1.1163036430533217, "learning_rate": 1.8949532627916151e-06, "loss": 0.4996911585330963, "step": 1648 }, { "epoch": 0.3802167396818077, "grad_norm": 1.3190959058791496, "learning_rate": 1.8947830942713628e-06, "loss": 0.6108353137969971, "step": 1649 }, { "epoch": 0.38044731381139035, "grad_norm": 1.2084081721604487, "learning_rate": 1.8946127956853195e-06, "loss": 0.5303040742874146, "step": 1650 }, { "epoch": 0.380677887940973, "grad_norm": 1.0581391679258725, "learning_rate": 1.8944423670582397e-06, "loss": 0.4651896357536316, "step": 1651 }, { "epoch": 0.3809084620705557, "grad_norm": 1.1464415021916683, "learning_rate": 1.8942718084148969e-06, "loss": 0.6321637630462646, "step": 1652 }, { "epoch": 0.38113903620013834, "grad_norm": 1.1535120052175352, "learning_rate": 1.8941011197800836e-06, "loss": 0.5124787092208862, "step": 1653 }, { "epoch": 0.381369610329721, "grad_norm": 1.2712538370269149, "learning_rate": 1.893930301178611e-06, "loss": 0.5779180526733398, "step": 1654 }, { "epoch": 0.38160018445930366, "grad_norm": 1.2579128550158534, "learning_rate": 1.8937593526353096e-06, "loss": 0.5723867416381836, "step": 1655 }, { "epoch": 0.3818307585888863, "grad_norm": 1.0216965854263103, "learning_rate": 1.8935882741750281e-06, "loss": 0.4312398433685303, "step": 1656 }, { "epoch": 0.382061332718469, "grad_norm": 1.7195703110538068, "learning_rate": 1.893417065822635e-06, "loss": 0.6503756046295166, "step": 1657 }, { "epoch": 0.38229190684805164, "grad_norm": 1.2691180997694498, "learning_rate": 1.8932457276030166e-06, "loss": 0.508478045463562, "step": 1658 }, { "epoch": 0.3825224809776343, "grad_norm": 0.9328619594784499, "learning_rate": 1.8930742595410792e-06, "loss": 0.46552446484565735, "step": 1659 }, { "epoch": 0.38275305510721697, "grad_norm": 0.983497277362264, "learning_rate": 1.8929026616617467e-06, "loss": 0.4739278256893158, "step": 1660 }, { "epoch": 0.3829836292367996, "grad_norm": 1.2642164913655083, "learning_rate": 1.8927309339899634e-06, "loss": 0.5584233403205872, "step": 1661 }, { "epoch": 0.3832142033663823, "grad_norm": 1.0681648876128738, "learning_rate": 1.8925590765506911e-06, "loss": 0.6155074238777161, "step": 1662 }, { "epoch": 0.38344477749596495, "grad_norm": 1.1479148469369402, "learning_rate": 1.8923870893689112e-06, "loss": 0.5253106951713562, "step": 1663 }, { "epoch": 0.3836753516255476, "grad_norm": 1.2179992400932398, "learning_rate": 1.8922149724696238e-06, "loss": 0.4190565347671509, "step": 1664 }, { "epoch": 0.3839059257551303, "grad_norm": 1.124098215736467, "learning_rate": 1.892042725877848e-06, "loss": 0.5263853073120117, "step": 1665 }, { "epoch": 0.38413649988471293, "grad_norm": 1.0385777204325046, "learning_rate": 1.8918703496186214e-06, "loss": 0.4492432773113251, "step": 1666 }, { "epoch": 0.3843670740142956, "grad_norm": 1.3356308613758272, "learning_rate": 1.8916978437170004e-06, "loss": 0.49745023250579834, "step": 1667 }, { "epoch": 0.38459764814387826, "grad_norm": 1.2023114319635457, "learning_rate": 1.891525208198061e-06, "loss": 0.6003707647323608, "step": 1668 }, { "epoch": 0.3848282222734609, "grad_norm": 1.6371184982518272, "learning_rate": 1.8913524430868973e-06, "loss": 0.5430049300193787, "step": 1669 }, { "epoch": 0.3850587964030436, "grad_norm": 1.0715049923324578, "learning_rate": 1.8911795484086222e-06, "loss": 0.5561289191246033, "step": 1670 }, { "epoch": 0.38528937053262624, "grad_norm": 1.1416350409171048, "learning_rate": 1.8910065241883678e-06, "loss": 0.5488184690475464, "step": 1671 }, { "epoch": 0.3855199446622089, "grad_norm": 1.0082475661815067, "learning_rate": 1.890833370451285e-06, "loss": 0.46347010135650635, "step": 1672 }, { "epoch": 0.38575051879179156, "grad_norm": 1.0668592703569681, "learning_rate": 1.8906600872225438e-06, "loss": 0.553687334060669, "step": 1673 }, { "epoch": 0.3859810929213742, "grad_norm": 1.1035800532005071, "learning_rate": 1.8904866745273323e-06, "loss": 0.46162208914756775, "step": 1674 }, { "epoch": 0.3862116670509569, "grad_norm": 1.076914158561248, "learning_rate": 1.8903131323908576e-06, "loss": 0.4478996992111206, "step": 1675 }, { "epoch": 0.38644224118053955, "grad_norm": 1.1488135535707533, "learning_rate": 1.8901394608383463e-06, "loss": 0.5857031345367432, "step": 1676 }, { "epoch": 0.3866728153101222, "grad_norm": 1.5929334393746841, "learning_rate": 1.8899656598950432e-06, "loss": 0.592833399772644, "step": 1677 }, { "epoch": 0.38690338943970487, "grad_norm": 1.0232228390237461, "learning_rate": 1.8897917295862117e-06, "loss": 0.6007786989212036, "step": 1678 }, { "epoch": 0.38713396356928753, "grad_norm": 1.109869111259485, "learning_rate": 1.8896176699371343e-06, "loss": 0.5248164534568787, "step": 1679 }, { "epoch": 0.3873645376988702, "grad_norm": 0.856016560201164, "learning_rate": 1.8894434809731128e-06, "loss": 0.43112409114837646, "step": 1680 }, { "epoch": 0.38759511182845285, "grad_norm": 1.318795823918729, "learning_rate": 1.8892691627194673e-06, "loss": 0.56545090675354, "step": 1681 }, { "epoch": 0.3878256859580355, "grad_norm": 1.1470159881146635, "learning_rate": 1.8890947152015363e-06, "loss": 0.6287904977798462, "step": 1682 }, { "epoch": 0.3880562600876182, "grad_norm": 1.155806897456587, "learning_rate": 1.8889201384446775e-06, "loss": 0.48461633920669556, "step": 1683 }, { "epoch": 0.38828683421720084, "grad_norm": 1.2251476021613918, "learning_rate": 1.888745432474268e-06, "loss": 0.5089331865310669, "step": 1684 }, { "epoch": 0.3885174083467835, "grad_norm": 0.9661641286318025, "learning_rate": 1.8885705973157027e-06, "loss": 0.4805281162261963, "step": 1685 }, { "epoch": 0.38874798247636616, "grad_norm": 1.070887780603473, "learning_rate": 1.8883956329943955e-06, "loss": 0.5243096947669983, "step": 1686 }, { "epoch": 0.3889785566059488, "grad_norm": 1.240979728566986, "learning_rate": 1.8882205395357795e-06, "loss": 0.5808781981468201, "step": 1687 }, { "epoch": 0.3892091307355315, "grad_norm": 1.2574299318006046, "learning_rate": 1.8880453169653063e-06, "loss": 0.5397018194198608, "step": 1688 }, { "epoch": 0.38943970486511414, "grad_norm": 1.182945649827907, "learning_rate": 1.8878699653084462e-06, "loss": 0.4475638270378113, "step": 1689 }, { "epoch": 0.3896702789946968, "grad_norm": 1.3095447574792232, "learning_rate": 1.8876944845906884e-06, "loss": 0.6212958693504333, "step": 1690 }, { "epoch": 0.38990085312427947, "grad_norm": 1.1726349359481907, "learning_rate": 1.8875188748375407e-06, "loss": 0.44465404748916626, "step": 1691 }, { "epoch": 0.39013142725386213, "grad_norm": 1.2650698772045321, "learning_rate": 1.8873431360745297e-06, "loss": 0.5711641311645508, "step": 1692 }, { "epoch": 0.3903620013834448, "grad_norm": 1.2039233000565408, "learning_rate": 1.8871672683272012e-06, "loss": 0.4527866244316101, "step": 1693 }, { "epoch": 0.39059257551302745, "grad_norm": 1.515756125658867, "learning_rate": 1.8869912716211188e-06, "loss": 0.6242899894714355, "step": 1694 }, { "epoch": 0.3908231496426101, "grad_norm": 1.6198907712835393, "learning_rate": 1.8868151459818656e-06, "loss": 0.6294416189193726, "step": 1695 }, { "epoch": 0.3910537237721928, "grad_norm": 1.2238875456694314, "learning_rate": 1.8866388914350435e-06, "loss": 0.49869638681411743, "step": 1696 }, { "epoch": 0.39128429790177544, "grad_norm": 1.1755814842525432, "learning_rate": 1.886462508006273e-06, "loss": 0.5456752777099609, "step": 1697 }, { "epoch": 0.3915148720313581, "grad_norm": 1.0114016306766007, "learning_rate": 1.8862859957211926e-06, "loss": 0.4197172224521637, "step": 1698 }, { "epoch": 0.39174544616094076, "grad_norm": 1.0278658872450297, "learning_rate": 1.8861093546054603e-06, "loss": 0.5012276768684387, "step": 1699 }, { "epoch": 0.3919760202905234, "grad_norm": 1.2065880303446173, "learning_rate": 1.8859325846847531e-06, "loss": 0.48108845949172974, "step": 1700 }, { "epoch": 0.3922065944201061, "grad_norm": 1.1190986847477769, "learning_rate": 1.885755685984766e-06, "loss": 0.48592355847358704, "step": 1701 }, { "epoch": 0.39243716854968874, "grad_norm": 1.136053467553038, "learning_rate": 1.8855786585312132e-06, "loss": 0.5744791030883789, "step": 1702 }, { "epoch": 0.3926677426792714, "grad_norm": 1.1435558229801501, "learning_rate": 1.8854015023498273e-06, "loss": 0.5378769040107727, "step": 1703 }, { "epoch": 0.39289831680885406, "grad_norm": 1.0710678493453967, "learning_rate": 1.8852242174663594e-06, "loss": 0.5630123615264893, "step": 1704 }, { "epoch": 0.3931288909384367, "grad_norm": 1.0913466409725974, "learning_rate": 1.8850468039065806e-06, "loss": 0.5247849225997925, "step": 1705 }, { "epoch": 0.3933594650680194, "grad_norm": 1.282307381217427, "learning_rate": 1.884869261696279e-06, "loss": 0.5679286122322083, "step": 1706 }, { "epoch": 0.39359003919760205, "grad_norm": 1.0140902583392881, "learning_rate": 1.8846915908612622e-06, "loss": 0.4505179524421692, "step": 1707 }, { "epoch": 0.3938206133271847, "grad_norm": 1.233342858229108, "learning_rate": 1.8845137914273566e-06, "loss": 0.6077077388763428, "step": 1708 }, { "epoch": 0.39405118745676737, "grad_norm": 1.1523756442286543, "learning_rate": 1.8843358634204069e-06, "loss": 0.4703037738800049, "step": 1709 }, { "epoch": 0.39428176158635003, "grad_norm": 1.3467147447696661, "learning_rate": 1.8841578068662773e-06, "loss": 0.6085091829299927, "step": 1710 }, { "epoch": 0.3945123357159327, "grad_norm": 1.3769264461225226, "learning_rate": 1.8839796217908498e-06, "loss": 0.6075730919837952, "step": 1711 }, { "epoch": 0.39474290984551536, "grad_norm": 1.4068518720273175, "learning_rate": 1.8838013082200252e-06, "loss": 0.581851601600647, "step": 1712 }, { "epoch": 0.394973483975098, "grad_norm": 0.9365976129961602, "learning_rate": 1.8836228661797234e-06, "loss": 0.555284857749939, "step": 1713 }, { "epoch": 0.3952040581046807, "grad_norm": 1.205134330479215, "learning_rate": 1.8834442956958832e-06, "loss": 0.5342675447463989, "step": 1714 }, { "epoch": 0.39543463223426334, "grad_norm": 1.2329889286532099, "learning_rate": 1.8832655967944605e-06, "loss": 0.47501081228256226, "step": 1715 }, { "epoch": 0.395665206363846, "grad_norm": 1.1350943426800137, "learning_rate": 1.8830867695014323e-06, "loss": 0.592293918132782, "step": 1716 }, { "epoch": 0.39589578049342866, "grad_norm": 1.2591938264724012, "learning_rate": 1.8829078138427921e-06, "loss": 0.5903242826461792, "step": 1717 }, { "epoch": 0.3961263546230113, "grad_norm": 1.203385992389072, "learning_rate": 1.882728729844553e-06, "loss": 0.5292568206787109, "step": 1718 }, { "epoch": 0.396356928752594, "grad_norm": 1.070652075724697, "learning_rate": 1.8825495175327468e-06, "loss": 0.5748786926269531, "step": 1719 }, { "epoch": 0.39658750288217665, "grad_norm": 1.230421737483, "learning_rate": 1.8823701769334242e-06, "loss": 0.6191601753234863, "step": 1720 }, { "epoch": 0.3968180770117593, "grad_norm": 1.180452919869617, "learning_rate": 1.8821907080726535e-06, "loss": 0.5569231510162354, "step": 1721 }, { "epoch": 0.39704865114134197, "grad_norm": 1.291275382361216, "learning_rate": 1.882011110976523e-06, "loss": 0.5103349089622498, "step": 1722 }, { "epoch": 0.39727922527092463, "grad_norm": 1.1952555855906501, "learning_rate": 1.8818313856711382e-06, "loss": 0.4981175363063812, "step": 1723 }, { "epoch": 0.39750979940050724, "grad_norm": 1.5157833486690673, "learning_rate": 1.8816515321826248e-06, "loss": 0.5429514050483704, "step": 1724 }, { "epoch": 0.3977403735300899, "grad_norm": 1.1377768164918185, "learning_rate": 1.8814715505371254e-06, "loss": 0.5318386554718018, "step": 1725 }, { "epoch": 0.39797094765967256, "grad_norm": 1.0451576127270763, "learning_rate": 1.881291440760803e-06, "loss": 0.47451460361480713, "step": 1726 }, { "epoch": 0.3982015217892552, "grad_norm": 1.2815255131055066, "learning_rate": 1.8811112028798384e-06, "loss": 0.5141372680664062, "step": 1727 }, { "epoch": 0.3984320959188379, "grad_norm": 1.0864089006893662, "learning_rate": 1.8809308369204302e-06, "loss": 0.4950217008590698, "step": 1728 }, { "epoch": 0.39866267004842054, "grad_norm": 0.9530925154379366, "learning_rate": 1.880750342908797e-06, "loss": 0.4961693286895752, "step": 1729 }, { "epoch": 0.3988932441780032, "grad_norm": 1.1860643451162984, "learning_rate": 1.8805697208711752e-06, "loss": 0.43443650007247925, "step": 1730 }, { "epoch": 0.39912381830758586, "grad_norm": 1.1332453377909741, "learning_rate": 1.8803889708338203e-06, "loss": 0.6116896867752075, "step": 1731 }, { "epoch": 0.3993543924371685, "grad_norm": 0.9403622624868753, "learning_rate": 1.8802080928230062e-06, "loss": 0.46244728565216064, "step": 1732 }, { "epoch": 0.3995849665667512, "grad_norm": 1.3180964068285155, "learning_rate": 1.880027086865025e-06, "loss": 0.5728162527084351, "step": 1733 }, { "epoch": 0.39981554069633385, "grad_norm": 1.1310284579414278, "learning_rate": 1.8798459529861876e-06, "loss": 0.4472135901451111, "step": 1734 }, { "epoch": 0.4000461148259165, "grad_norm": 1.4100215542732757, "learning_rate": 1.8796646912128246e-06, "loss": 0.5862090587615967, "step": 1735 }, { "epoch": 0.40027668895549917, "grad_norm": 1.428537555998266, "learning_rate": 1.8794833015712831e-06, "loss": 0.6406301259994507, "step": 1736 }, { "epoch": 0.40050726308508183, "grad_norm": 1.3320783455965834, "learning_rate": 1.8793017840879306e-06, "loss": 0.5865743160247803, "step": 1737 }, { "epoch": 0.4007378372146645, "grad_norm": 1.2736301947050057, "learning_rate": 1.8791201387891524e-06, "loss": 0.5521814823150635, "step": 1738 }, { "epoch": 0.40096841134424716, "grad_norm": 0.9710129928143749, "learning_rate": 1.8789383657013522e-06, "loss": 0.40027791261672974, "step": 1739 }, { "epoch": 0.4011989854738298, "grad_norm": 1.213730124395359, "learning_rate": 1.8787564648509528e-06, "loss": 0.5594751238822937, "step": 1740 }, { "epoch": 0.4014295596034125, "grad_norm": 1.2077878384788876, "learning_rate": 1.8785744362643955e-06, "loss": 0.5029730796813965, "step": 1741 }, { "epoch": 0.40166013373299514, "grad_norm": 1.086599940670418, "learning_rate": 1.8783922799681397e-06, "loss": 0.6089034676551819, "step": 1742 }, { "epoch": 0.4018907078625778, "grad_norm": 1.178028157014987, "learning_rate": 1.8782099959886639e-06, "loss": 0.5238372683525085, "step": 1743 }, { "epoch": 0.40212128199216046, "grad_norm": 1.0430681899893623, "learning_rate": 1.8780275843524643e-06, "loss": 0.47281232476234436, "step": 1744 }, { "epoch": 0.4023518561217431, "grad_norm": 1.0603667709126336, "learning_rate": 1.8778450450860571e-06, "loss": 0.44885876774787903, "step": 1745 }, { "epoch": 0.4025824302513258, "grad_norm": 1.1187549409367323, "learning_rate": 1.8776623782159762e-06, "loss": 0.5915139317512512, "step": 1746 }, { "epoch": 0.40281300438090845, "grad_norm": 1.6743224234561098, "learning_rate": 1.8774795837687736e-06, "loss": 0.49341484904289246, "step": 1747 }, { "epoch": 0.4030435785104911, "grad_norm": 1.1133076324661322, "learning_rate": 1.8772966617710205e-06, "loss": 0.43253493309020996, "step": 1748 }, { "epoch": 0.40327415264007377, "grad_norm": 1.2596810310862556, "learning_rate": 1.8771136122493064e-06, "loss": 0.48660045862197876, "step": 1749 }, { "epoch": 0.40350472676965643, "grad_norm": 1.158836920018239, "learning_rate": 1.8769304352302396e-06, "loss": 0.4493838846683502, "step": 1750 }, { "epoch": 0.4037353008992391, "grad_norm": 1.1033409495303377, "learning_rate": 1.8767471307404464e-06, "loss": 0.5656435489654541, "step": 1751 }, { "epoch": 0.40396587502882175, "grad_norm": 1.1945430976561655, "learning_rate": 1.876563698806572e-06, "loss": 0.48047327995300293, "step": 1752 }, { "epoch": 0.4041964491584044, "grad_norm": 1.117811372759575, "learning_rate": 1.8763801394552806e-06, "loss": 0.5314204692840576, "step": 1753 }, { "epoch": 0.4044270232879871, "grad_norm": 1.212293607312766, "learning_rate": 1.876196452713254e-06, "loss": 0.5436627864837646, "step": 1754 }, { "epoch": 0.40465759741756974, "grad_norm": 1.1748084841171984, "learning_rate": 1.8760126386071933e-06, "loss": 0.5383991599082947, "step": 1755 }, { "epoch": 0.4048881715471524, "grad_norm": 1.1737559222863878, "learning_rate": 1.8758286971638171e-06, "loss": 0.48271507024765015, "step": 1756 }, { "epoch": 0.40511874567673506, "grad_norm": 1.0323965631837329, "learning_rate": 1.8756446284098638e-06, "loss": 0.5920745134353638, "step": 1757 }, { "epoch": 0.4053493198063177, "grad_norm": 1.1254236464300211, "learning_rate": 1.875460432372089e-06, "loss": 0.4467526078224182, "step": 1758 }, { "epoch": 0.4055798939359004, "grad_norm": 0.9503211623796617, "learning_rate": 1.875276109077268e-06, "loss": 0.425409734249115, "step": 1759 }, { "epoch": 0.40581046806548304, "grad_norm": 1.1318149217921376, "learning_rate": 1.8750916585521938e-06, "loss": 0.4911944568157196, "step": 1760 }, { "epoch": 0.4060410421950657, "grad_norm": 1.5865124774001016, "learning_rate": 1.8749070808236787e-06, "loss": 0.49605780839920044, "step": 1761 }, { "epoch": 0.40627161632464837, "grad_norm": 1.322640956813398, "learning_rate": 1.874722375918552e-06, "loss": 0.5582889914512634, "step": 1762 }, { "epoch": 0.406502190454231, "grad_norm": 1.0487904765861873, "learning_rate": 1.874537543863663e-06, "loss": 0.4867294132709503, "step": 1763 }, { "epoch": 0.4067327645838137, "grad_norm": 1.062364022734449, "learning_rate": 1.8743525846858787e-06, "loss": 0.5050587058067322, "step": 1764 }, { "epoch": 0.40696333871339635, "grad_norm": 1.0581562602291477, "learning_rate": 1.8741674984120852e-06, "loss": 0.4380977749824524, "step": 1765 }, { "epoch": 0.407193912842979, "grad_norm": 1.326690473297383, "learning_rate": 1.8739822850691865e-06, "loss": 0.5159280300140381, "step": 1766 }, { "epoch": 0.4074244869725617, "grad_norm": 1.3542586293022822, "learning_rate": 1.8737969446841046e-06, "loss": 0.6999780535697937, "step": 1767 }, { "epoch": 0.40765506110214433, "grad_norm": 1.110421221417803, "learning_rate": 1.8736114772837816e-06, "loss": 0.5844931602478027, "step": 1768 }, { "epoch": 0.407885635231727, "grad_norm": 1.2621793403708754, "learning_rate": 1.8734258828951764e-06, "loss": 0.5078610181808472, "step": 1769 }, { "epoch": 0.40811620936130966, "grad_norm": 1.1260800835324682, "learning_rate": 1.8732401615452673e-06, "loss": 0.564793586730957, "step": 1770 }, { "epoch": 0.4083467834908923, "grad_norm": 1.2906459398399637, "learning_rate": 1.8730543132610506e-06, "loss": 0.6145100593566895, "step": 1771 }, { "epoch": 0.408577357620475, "grad_norm": 1.181953537531204, "learning_rate": 1.8728683380695414e-06, "loss": 0.45434027910232544, "step": 1772 }, { "epoch": 0.40880793175005764, "grad_norm": 1.0716516851559217, "learning_rate": 1.872682235997773e-06, "loss": 0.4917553961277008, "step": 1773 }, { "epoch": 0.4090385058796403, "grad_norm": 1.0983534367258283, "learning_rate": 1.872496007072797e-06, "loss": 0.5677252411842346, "step": 1774 }, { "epoch": 0.40926908000922296, "grad_norm": 1.042591224606922, "learning_rate": 1.872309651321684e-06, "loss": 0.5516688823699951, "step": 1775 }, { "epoch": 0.4094996541388056, "grad_norm": 0.9746786592567609, "learning_rate": 1.8721231687715227e-06, "loss": 0.46755337715148926, "step": 1776 }, { "epoch": 0.4097302282683883, "grad_norm": 1.3130136596789415, "learning_rate": 1.8719365594494202e-06, "loss": 0.6575521230697632, "step": 1777 }, { "epoch": 0.40996080239797095, "grad_norm": 1.147271087293654, "learning_rate": 1.8717498233825019e-06, "loss": 0.6088716983795166, "step": 1778 }, { "epoch": 0.4101913765275536, "grad_norm": 0.9692417840942277, "learning_rate": 1.8715629605979118e-06, "loss": 0.39476478099823, "step": 1779 }, { "epoch": 0.41042195065713627, "grad_norm": 1.1915743629339146, "learning_rate": 1.8713759711228123e-06, "loss": 0.4893898665904999, "step": 1780 }, { "epoch": 0.41065252478671893, "grad_norm": 1.298092223223541, "learning_rate": 1.8711888549843842e-06, "loss": 0.5077828764915466, "step": 1781 }, { "epoch": 0.4108830989163016, "grad_norm": 1.0084481520460131, "learning_rate": 1.8710016122098269e-06, "loss": 0.5212582349777222, "step": 1782 }, { "epoch": 0.41111367304588425, "grad_norm": 1.1325685052130308, "learning_rate": 1.870814242826358e-06, "loss": 0.5135321617126465, "step": 1783 }, { "epoch": 0.4113442471754669, "grad_norm": 1.3281766258765773, "learning_rate": 1.8706267468612133e-06, "loss": 0.5398930311203003, "step": 1784 }, { "epoch": 0.4115748213050496, "grad_norm": 1.3736547238310808, "learning_rate": 1.8704391243416477e-06, "loss": 0.49205562472343445, "step": 1785 }, { "epoch": 0.41180539543463224, "grad_norm": 1.1386437791047925, "learning_rate": 1.8702513752949335e-06, "loss": 0.5145718455314636, "step": 1786 }, { "epoch": 0.4120359695642149, "grad_norm": 0.9532031818658743, "learning_rate": 1.8700634997483622e-06, "loss": 0.4868374466896057, "step": 1787 }, { "epoch": 0.41226654369379756, "grad_norm": 1.3881400467911258, "learning_rate": 1.8698754977292435e-06, "loss": 0.5409311652183533, "step": 1788 }, { "epoch": 0.4124971178233802, "grad_norm": 1.307800898328953, "learning_rate": 1.8696873692649052e-06, "loss": 0.5476658344268799, "step": 1789 }, { "epoch": 0.4127276919529629, "grad_norm": 1.251951597359409, "learning_rate": 1.8694991143826937e-06, "loss": 0.5545511245727539, "step": 1790 }, { "epoch": 0.41295826608254554, "grad_norm": 1.1923559975321376, "learning_rate": 1.869310733109974e-06, "loss": 0.5479267835617065, "step": 1791 }, { "epoch": 0.4131888402121282, "grad_norm": 1.1567279350887396, "learning_rate": 1.8691222254741289e-06, "loss": 0.5261585712432861, "step": 1792 }, { "epoch": 0.41341941434171087, "grad_norm": 1.035636889065738, "learning_rate": 1.8689335915025599e-06, "loss": 0.5478091239929199, "step": 1793 }, { "epoch": 0.41364998847129353, "grad_norm": 1.5699808716332777, "learning_rate": 1.8687448312226872e-06, "loss": 0.6739054322242737, "step": 1794 }, { "epoch": 0.4138805626008762, "grad_norm": 1.2236857571837823, "learning_rate": 1.8685559446619487e-06, "loss": 0.613865315914154, "step": 1795 }, { "epoch": 0.41411113673045885, "grad_norm": 1.0357788562325108, "learning_rate": 1.8683669318478012e-06, "loss": 0.3936721384525299, "step": 1796 }, { "epoch": 0.4143417108600415, "grad_norm": 1.2330991076599302, "learning_rate": 1.8681777928077197e-06, "loss": 0.5508556365966797, "step": 1797 }, { "epoch": 0.4145722849896242, "grad_norm": 1.1597942164225867, "learning_rate": 1.867988527569197e-06, "loss": 0.47734567523002625, "step": 1798 }, { "epoch": 0.41480285911920683, "grad_norm": 1.0741273588884312, "learning_rate": 1.8677991361597449e-06, "loss": 0.46847039461135864, "step": 1799 }, { "epoch": 0.4150334332487895, "grad_norm": 1.0364595457718502, "learning_rate": 1.8676096186068937e-06, "loss": 0.5202786326408386, "step": 1800 }, { "epoch": 0.41526400737837216, "grad_norm": 1.2972392907268704, "learning_rate": 1.8674199749381914e-06, "loss": 0.5144700407981873, "step": 1801 }, { "epoch": 0.4154945815079548, "grad_norm": 1.1959128972921023, "learning_rate": 1.8672302051812048e-06, "loss": 0.4394092559814453, "step": 1802 }, { "epoch": 0.4157251556375375, "grad_norm": 1.159378410595036, "learning_rate": 1.8670403093635185e-06, "loss": 0.5017338991165161, "step": 1803 }, { "epoch": 0.41595572976712014, "grad_norm": 1.173120824085894, "learning_rate": 1.8668502875127366e-06, "loss": 0.409381628036499, "step": 1804 }, { "epoch": 0.4161863038967028, "grad_norm": 1.0538601271665184, "learning_rate": 1.8666601396564795e-06, "loss": 0.5193957090377808, "step": 1805 }, { "epoch": 0.41641687802628546, "grad_norm": 1.1338279816499315, "learning_rate": 1.8664698658223882e-06, "loss": 0.5933586359024048, "step": 1806 }, { "epoch": 0.4166474521558681, "grad_norm": 1.1304820859227924, "learning_rate": 1.8662794660381204e-06, "loss": 0.5283366441726685, "step": 1807 }, { "epoch": 0.4168780262854508, "grad_norm": 1.118558214164988, "learning_rate": 1.8660889403313526e-06, "loss": 0.5063748359680176, "step": 1808 }, { "epoch": 0.41710860041503345, "grad_norm": 1.087893149342631, "learning_rate": 1.86589828872978e-06, "loss": 0.6386028528213501, "step": 1809 }, { "epoch": 0.4173391745446161, "grad_norm": 1.0041938541729358, "learning_rate": 1.8657075112611153e-06, "loss": 0.4618440270423889, "step": 1810 }, { "epoch": 0.41756974867419877, "grad_norm": 1.3214046412105014, "learning_rate": 1.8655166079530903e-06, "loss": 0.4523535966873169, "step": 1811 }, { "epoch": 0.41780032280378143, "grad_norm": 1.0747078557029888, "learning_rate": 1.8653255788334544e-06, "loss": 0.501311719417572, "step": 1812 }, { "epoch": 0.4180308969333641, "grad_norm": 1.112333239244982, "learning_rate": 1.865134423929976e-06, "loss": 0.5504614114761353, "step": 1813 }, { "epoch": 0.41826147106294675, "grad_norm": 1.0979124892402103, "learning_rate": 1.864943143270441e-06, "loss": 0.44275063276290894, "step": 1814 }, { "epoch": 0.4184920451925294, "grad_norm": 1.2558217334961832, "learning_rate": 1.8647517368826545e-06, "loss": 0.5628173351287842, "step": 1815 }, { "epoch": 0.4187226193221121, "grad_norm": 1.032119999950418, "learning_rate": 1.864560204794439e-06, "loss": 0.489221453666687, "step": 1816 }, { "epoch": 0.41895319345169474, "grad_norm": 1.2211401188891802, "learning_rate": 1.8643685470336355e-06, "loss": 0.5440137386322021, "step": 1817 }, { "epoch": 0.4191837675812774, "grad_norm": 1.169073111073683, "learning_rate": 1.8641767636281035e-06, "loss": 0.4518952965736389, "step": 1818 }, { "epoch": 0.41941434171086006, "grad_norm": 1.3403542594346476, "learning_rate": 1.8639848546057209e-06, "loss": 0.591090977191925, "step": 1819 }, { "epoch": 0.4196449158404427, "grad_norm": 1.1775626126130905, "learning_rate": 1.8637928199943836e-06, "loss": 0.5622411966323853, "step": 1820 }, { "epoch": 0.4198754899700254, "grad_norm": 1.1913164061698733, "learning_rate": 1.8636006598220052e-06, "loss": 0.5086779594421387, "step": 1821 }, { "epoch": 0.42010606409960805, "grad_norm": 1.1334153574078034, "learning_rate": 1.8634083741165188e-06, "loss": 0.5055384635925293, "step": 1822 }, { "epoch": 0.4203366382291907, "grad_norm": 1.129676706405598, "learning_rate": 1.863215962905875e-06, "loss": 0.5076277852058411, "step": 1823 }, { "epoch": 0.42056721235877337, "grad_norm": 1.2637764937692704, "learning_rate": 1.8630234262180424e-06, "loss": 0.5378403067588806, "step": 1824 }, { "epoch": 0.42079778648835603, "grad_norm": 1.0886873342980177, "learning_rate": 1.8628307640810083e-06, "loss": 0.6133165955543518, "step": 1825 }, { "epoch": 0.4210283606179387, "grad_norm": 1.1726755470049002, "learning_rate": 1.8626379765227782e-06, "loss": 0.4978156089782715, "step": 1826 }, { "epoch": 0.42125893474752135, "grad_norm": 1.0651427070474233, "learning_rate": 1.8624450635713759e-06, "loss": 0.43159037828445435, "step": 1827 }, { "epoch": 0.421489508877104, "grad_norm": 1.0498543002649237, "learning_rate": 1.8622520252548424e-06, "loss": 0.48821642994880676, "step": 1828 }, { "epoch": 0.4217200830066867, "grad_norm": 1.016883491579865, "learning_rate": 1.8620588616012387e-06, "loss": 0.4666696786880493, "step": 1829 }, { "epoch": 0.42195065713626934, "grad_norm": 1.3621906870852534, "learning_rate": 1.8618655726386425e-06, "loss": 0.5278067588806152, "step": 1830 }, { "epoch": 0.422181231265852, "grad_norm": 1.0791230542588068, "learning_rate": 1.8616721583951512e-06, "loss": 0.4357749819755554, "step": 1831 }, { "epoch": 0.42241180539543466, "grad_norm": 1.2299213864410639, "learning_rate": 1.8614786188988782e-06, "loss": 0.5388439893722534, "step": 1832 }, { "epoch": 0.4226423795250173, "grad_norm": 1.4108572710321559, "learning_rate": 1.8612849541779573e-06, "loss": 0.5443956255912781, "step": 1833 }, { "epoch": 0.4228729536546, "grad_norm": 1.2641105463427431, "learning_rate": 1.86109116426054e-06, "loss": 0.5614160895347595, "step": 1834 }, { "epoch": 0.4231035277841826, "grad_norm": 1.2744746751945835, "learning_rate": 1.8608972491747943e-06, "loss": 0.45780229568481445, "step": 1835 }, { "epoch": 0.42333410191376525, "grad_norm": 1.4638598184796152, "learning_rate": 1.8607032089489088e-06, "loss": 0.6354867219924927, "step": 1836 }, { "epoch": 0.4235646760433479, "grad_norm": 1.2548140048045007, "learning_rate": 1.860509043611089e-06, "loss": 0.5172948241233826, "step": 1837 }, { "epoch": 0.42379525017293057, "grad_norm": 1.1235697857312772, "learning_rate": 1.8603147531895586e-06, "loss": 0.4353157877922058, "step": 1838 }, { "epoch": 0.42402582430251323, "grad_norm": 1.1680682893696177, "learning_rate": 1.8601203377125599e-06, "loss": 0.4971036911010742, "step": 1839 }, { "epoch": 0.4242563984320959, "grad_norm": 1.0750331417799794, "learning_rate": 1.859925797208353e-06, "loss": 0.5037736296653748, "step": 1840 }, { "epoch": 0.42448697256167855, "grad_norm": 1.052234823772871, "learning_rate": 1.8597311317052165e-06, "loss": 0.4480808675289154, "step": 1841 }, { "epoch": 0.4247175466912612, "grad_norm": 1.2441100874175304, "learning_rate": 1.8595363412314468e-06, "loss": 0.5102680325508118, "step": 1842 }, { "epoch": 0.4249481208208439, "grad_norm": 1.1806961844163353, "learning_rate": 1.8593414258153585e-06, "loss": 0.5979090929031372, "step": 1843 }, { "epoch": 0.42517869495042654, "grad_norm": 1.0776260642041309, "learning_rate": 1.8591463854852854e-06, "loss": 0.4616047143936157, "step": 1844 }, { "epoch": 0.4254092690800092, "grad_norm": 1.0059742827824252, "learning_rate": 1.8589512202695773e-06, "loss": 0.4893925189971924, "step": 1845 }, { "epoch": 0.42563984320959186, "grad_norm": 1.0527785435538273, "learning_rate": 1.8587559301966045e-06, "loss": 0.49619823694229126, "step": 1846 }, { "epoch": 0.4258704173391745, "grad_norm": 1.0558967393125807, "learning_rate": 1.858560515294754e-06, "loss": 0.5205181837081909, "step": 1847 }, { "epoch": 0.4261009914687572, "grad_norm": 1.3589791827910958, "learning_rate": 1.8583649755924315e-06, "loss": 0.5910394191741943, "step": 1848 }, { "epoch": 0.42633156559833985, "grad_norm": 1.0092224062378152, "learning_rate": 1.8581693111180603e-06, "loss": 0.4916709363460541, "step": 1849 }, { "epoch": 0.4265621397279225, "grad_norm": 1.261654259944108, "learning_rate": 1.8579735219000824e-06, "loss": 0.5728994011878967, "step": 1850 }, { "epoch": 0.42679271385750517, "grad_norm": 1.162885813109175, "learning_rate": 1.857777607966958e-06, "loss": 0.49620527029037476, "step": 1851 }, { "epoch": 0.42702328798708783, "grad_norm": 1.2230754640158692, "learning_rate": 1.8575815693471649e-06, "loss": 0.5100233554840088, "step": 1852 }, { "epoch": 0.4272538621166705, "grad_norm": 1.1713081386962017, "learning_rate": 1.8573854060691994e-06, "loss": 0.48981544375419617, "step": 1853 }, { "epoch": 0.42748443624625315, "grad_norm": 1.0875128431195988, "learning_rate": 1.8571891181615755e-06, "loss": 0.44190293550491333, "step": 1854 }, { "epoch": 0.4277150103758358, "grad_norm": 1.2645757986317834, "learning_rate": 1.8569927056528264e-06, "loss": 0.42867448925971985, "step": 1855 }, { "epoch": 0.4279455845054185, "grad_norm": 1.849182592399251, "learning_rate": 1.8567961685715016e-06, "loss": 0.4873782694339752, "step": 1856 }, { "epoch": 0.42817615863500114, "grad_norm": 1.2007241803680166, "learning_rate": 1.8565995069461706e-06, "loss": 0.4985312819480896, "step": 1857 }, { "epoch": 0.4284067327645838, "grad_norm": 1.2242163730204847, "learning_rate": 1.85640272080542e-06, "loss": 0.5525496006011963, "step": 1858 }, { "epoch": 0.42863730689416646, "grad_norm": 1.293851624108558, "learning_rate": 1.8562058101778547e-06, "loss": 0.5645877122879028, "step": 1859 }, { "epoch": 0.4288678810237491, "grad_norm": 1.0805291431045556, "learning_rate": 1.856008775092097e-06, "loss": 0.4304332137107849, "step": 1860 }, { "epoch": 0.4290984551533318, "grad_norm": 1.14759009112306, "learning_rate": 1.8558116155767888e-06, "loss": 0.4970170259475708, "step": 1861 }, { "epoch": 0.42932902928291444, "grad_norm": 1.344010966492771, "learning_rate": 1.8556143316605888e-06, "loss": 0.5718003511428833, "step": 1862 }, { "epoch": 0.4295596034124971, "grad_norm": 1.3157067542574963, "learning_rate": 1.8554169233721741e-06, "loss": 0.4445415139198303, "step": 1863 }, { "epoch": 0.42979017754207977, "grad_norm": 1.1001033203387223, "learning_rate": 1.8552193907402404e-06, "loss": 0.5297178626060486, "step": 1864 }, { "epoch": 0.4300207516716624, "grad_norm": 0.9618626645905404, "learning_rate": 1.8550217337935013e-06, "loss": 0.4564483165740967, "step": 1865 }, { "epoch": 0.4302513258012451, "grad_norm": 1.2509575429906847, "learning_rate": 1.8548239525606872e-06, "loss": 0.4789202809333801, "step": 1866 }, { "epoch": 0.43048189993082775, "grad_norm": 1.0950598228304256, "learning_rate": 1.8546260470705485e-06, "loss": 0.5240263938903809, "step": 1867 }, { "epoch": 0.4307124740604104, "grad_norm": 1.0326884664902543, "learning_rate": 1.8544280173518523e-06, "loss": 0.4190866947174072, "step": 1868 }, { "epoch": 0.43094304818999307, "grad_norm": 1.098749197470929, "learning_rate": 1.8542298634333844e-06, "loss": 0.502301812171936, "step": 1869 }, { "epoch": 0.43117362231957573, "grad_norm": 1.3711612309046508, "learning_rate": 1.8540315853439488e-06, "loss": 0.5752545595169067, "step": 1870 }, { "epoch": 0.4314041964491584, "grad_norm": 0.9641480143185914, "learning_rate": 1.8538331831123667e-06, "loss": 0.44959962368011475, "step": 1871 }, { "epoch": 0.43163477057874106, "grad_norm": 1.2299121621798328, "learning_rate": 1.8536346567674782e-06, "loss": 0.5320106148719788, "step": 1872 }, { "epoch": 0.4318653447083237, "grad_norm": 1.393182956860924, "learning_rate": 1.8534360063381407e-06, "loss": 0.5981979966163635, "step": 1873 }, { "epoch": 0.4320959188379064, "grad_norm": 1.350381662747622, "learning_rate": 1.8532372318532306e-06, "loss": 0.5567579865455627, "step": 1874 }, { "epoch": 0.43232649296748904, "grad_norm": 1.4350681093951811, "learning_rate": 1.8530383333416415e-06, "loss": 0.5604764223098755, "step": 1875 }, { "epoch": 0.4325570670970717, "grad_norm": 1.4048444099270982, "learning_rate": 1.8528393108322852e-06, "loss": 0.5410721302032471, "step": 1876 }, { "epoch": 0.43278764122665436, "grad_norm": 1.1191045271107989, "learning_rate": 1.852640164354092e-06, "loss": 0.5417271852493286, "step": 1877 }, { "epoch": 0.433018215356237, "grad_norm": 1.1925092385457925, "learning_rate": 1.8524408939360096e-06, "loss": 0.5831471681594849, "step": 1878 }, { "epoch": 0.4332487894858197, "grad_norm": 1.0939224950949575, "learning_rate": 1.8522414996070045e-06, "loss": 0.45030760765075684, "step": 1879 }, { "epoch": 0.43347936361540235, "grad_norm": 1.1520994484307991, "learning_rate": 1.8520419813960596e-06, "loss": 0.44657936692237854, "step": 1880 }, { "epoch": 0.433709937744985, "grad_norm": 1.1691007631884454, "learning_rate": 1.851842339332178e-06, "loss": 0.5472795963287354, "step": 1881 }, { "epoch": 0.43394051187456767, "grad_norm": 1.1388268257083902, "learning_rate": 1.8516425734443786e-06, "loss": 0.4883359968662262, "step": 1882 }, { "epoch": 0.43417108600415033, "grad_norm": 1.0473976151781044, "learning_rate": 1.8514426837617006e-06, "loss": 0.5172675848007202, "step": 1883 }, { "epoch": 0.434401660133733, "grad_norm": 1.2812470936666533, "learning_rate": 1.851242670313199e-06, "loss": 0.5253418684005737, "step": 1884 }, { "epoch": 0.43463223426331565, "grad_norm": 1.2940121862284113, "learning_rate": 1.8510425331279485e-06, "loss": 0.4684918522834778, "step": 1885 }, { "epoch": 0.4348628083928983, "grad_norm": 1.7313907662218715, "learning_rate": 1.8508422722350404e-06, "loss": 0.522485077381134, "step": 1886 }, { "epoch": 0.435093382522481, "grad_norm": 1.0862530759153244, "learning_rate": 1.8506418876635852e-06, "loss": 0.5123787522315979, "step": 1887 }, { "epoch": 0.43532395665206364, "grad_norm": 1.2812741997977775, "learning_rate": 1.8504413794427106e-06, "loss": 0.5195976495742798, "step": 1888 }, { "epoch": 0.4355545307816463, "grad_norm": 1.081503403719265, "learning_rate": 1.8502407476015626e-06, "loss": 0.48394906520843506, "step": 1889 }, { "epoch": 0.43578510491122896, "grad_norm": 1.2031421687566246, "learning_rate": 1.850039992169305e-06, "loss": 0.5083323121070862, "step": 1890 }, { "epoch": 0.4360156790408116, "grad_norm": 1.2379097603599272, "learning_rate": 1.8498391131751196e-06, "loss": 0.5303651094436646, "step": 1891 }, { "epoch": 0.4362462531703943, "grad_norm": 1.010820397187413, "learning_rate": 1.8496381106482062e-06, "loss": 0.49429047107696533, "step": 1892 }, { "epoch": 0.43647682729997694, "grad_norm": 1.2506572926955764, "learning_rate": 1.8494369846177826e-06, "loss": 0.5263347625732422, "step": 1893 }, { "epoch": 0.4367074014295596, "grad_norm": 1.3195849148516783, "learning_rate": 1.8492357351130848e-06, "loss": 0.5332654714584351, "step": 1894 }, { "epoch": 0.43693797555914227, "grad_norm": 1.1692381501686961, "learning_rate": 1.8490343621633657e-06, "loss": 0.5598278045654297, "step": 1895 }, { "epoch": 0.43716854968872493, "grad_norm": 1.0323293964159153, "learning_rate": 1.8488328657978975e-06, "loss": 0.4026976227760315, "step": 1896 }, { "epoch": 0.4373991238183076, "grad_norm": 1.3568102099956687, "learning_rate": 1.8486312460459698e-06, "loss": 0.4277791380882263, "step": 1897 }, { "epoch": 0.43762969794789025, "grad_norm": 1.2550644818276735, "learning_rate": 1.8484295029368896e-06, "loss": 0.49567973613739014, "step": 1898 }, { "epoch": 0.4378602720774729, "grad_norm": 1.3750960531365106, "learning_rate": 1.8482276364999828e-06, "loss": 0.4659258723258972, "step": 1899 }, { "epoch": 0.4380908462070556, "grad_norm": 1.4921650354400726, "learning_rate": 1.8480256467645923e-06, "loss": 0.4950314164161682, "step": 1900 }, { "epoch": 0.43832142033663823, "grad_norm": 1.2407118809889077, "learning_rate": 1.8478235337600796e-06, "loss": 0.5584981441497803, "step": 1901 }, { "epoch": 0.4385519944662209, "grad_norm": 1.4539173472262998, "learning_rate": 1.847621297515824e-06, "loss": 0.6322404146194458, "step": 1902 }, { "epoch": 0.43878256859580356, "grad_norm": 1.6859923054790666, "learning_rate": 1.8474189380612225e-06, "loss": 0.49535471200942993, "step": 1903 }, { "epoch": 0.4390131427253862, "grad_norm": 1.0079272515569784, "learning_rate": 1.8472164554256897e-06, "loss": 0.40703707933425903, "step": 1904 }, { "epoch": 0.4392437168549689, "grad_norm": 1.1125525506446694, "learning_rate": 1.8470138496386588e-06, "loss": 0.4540821313858032, "step": 1905 }, { "epoch": 0.43947429098455154, "grad_norm": 1.1572392182622382, "learning_rate": 1.846811120729581e-06, "loss": 0.45964252948760986, "step": 1906 }, { "epoch": 0.4397048651141342, "grad_norm": 1.018497744556974, "learning_rate": 1.8466082687279244e-06, "loss": 0.4604472517967224, "step": 1907 }, { "epoch": 0.43993543924371686, "grad_norm": 1.114828518838774, "learning_rate": 1.8464052936631758e-06, "loss": 0.44585052132606506, "step": 1908 }, { "epoch": 0.4401660133732995, "grad_norm": 1.2189161284011176, "learning_rate": 1.8462021955648397e-06, "loss": 0.43862414360046387, "step": 1909 }, { "epoch": 0.4403965875028822, "grad_norm": 1.0484346475063675, "learning_rate": 1.8459989744624386e-06, "loss": 0.5148224234580994, "step": 1910 }, { "epoch": 0.44062716163246485, "grad_norm": 1.3041727396087255, "learning_rate": 1.8457956303855124e-06, "loss": 0.6201390027999878, "step": 1911 }, { "epoch": 0.4408577357620475, "grad_norm": 1.322348681007624, "learning_rate": 1.8455921633636196e-06, "loss": 0.5828813314437866, "step": 1912 }, { "epoch": 0.44108830989163017, "grad_norm": 1.2413839772395276, "learning_rate": 1.845388573426336e-06, "loss": 0.5491579174995422, "step": 1913 }, { "epoch": 0.44131888402121283, "grad_norm": 1.135006469141378, "learning_rate": 1.8451848606032554e-06, "loss": 0.4204079508781433, "step": 1914 }, { "epoch": 0.4415494581507955, "grad_norm": 1.3248528862326203, "learning_rate": 1.8449810249239898e-06, "loss": 0.5734649300575256, "step": 1915 }, { "epoch": 0.44178003228037815, "grad_norm": 1.1101812599659409, "learning_rate": 1.8447770664181684e-06, "loss": 0.48931679129600525, "step": 1916 }, { "epoch": 0.4420106064099608, "grad_norm": 1.292831898773596, "learning_rate": 1.8445729851154392e-06, "loss": 0.5206375122070312, "step": 1917 }, { "epoch": 0.4422411805395435, "grad_norm": 1.3590503413541226, "learning_rate": 1.8443687810454666e-06, "loss": 0.4916420578956604, "step": 1918 }, { "epoch": 0.44247175466912614, "grad_norm": 1.0963843972341092, "learning_rate": 1.8441644542379348e-06, "loss": 0.5021753311157227, "step": 1919 }, { "epoch": 0.4427023287987088, "grad_norm": 1.2556127492378621, "learning_rate": 1.8439600047225441e-06, "loss": 0.4615249037742615, "step": 1920 }, { "epoch": 0.44293290292829146, "grad_norm": 1.3251855444784397, "learning_rate": 1.8437554325290133e-06, "loss": 0.4849514365196228, "step": 1921 }, { "epoch": 0.4431634770578741, "grad_norm": 1.3926092312086646, "learning_rate": 1.843550737687079e-06, "loss": 0.5872727632522583, "step": 1922 }, { "epoch": 0.4433940511874568, "grad_norm": 1.1422193923698303, "learning_rate": 1.843345920226496e-06, "loss": 0.48469966650009155, "step": 1923 }, { "epoch": 0.44362462531703944, "grad_norm": 1.1078885152995024, "learning_rate": 1.8431409801770364e-06, "loss": 0.45931774377822876, "step": 1924 }, { "epoch": 0.4438551994466221, "grad_norm": 1.0630184817249293, "learning_rate": 1.8429359175684907e-06, "loss": 0.5138596296310425, "step": 1925 }, { "epoch": 0.44408577357620477, "grad_norm": 1.1576378783801253, "learning_rate": 1.8427307324306661e-06, "loss": 0.5586874485015869, "step": 1926 }, { "epoch": 0.44431634770578743, "grad_norm": 0.9982496919132913, "learning_rate": 1.8425254247933887e-06, "loss": 0.5373901724815369, "step": 1927 }, { "epoch": 0.4445469218353701, "grad_norm": 1.3044317948619655, "learning_rate": 1.8423199946865022e-06, "loss": 0.46104729175567627, "step": 1928 }, { "epoch": 0.44477749596495275, "grad_norm": 1.2637964058278408, "learning_rate": 1.8421144421398678e-06, "loss": 0.4837646782398224, "step": 1929 }, { "epoch": 0.4450080700945354, "grad_norm": 1.0579849017335872, "learning_rate": 1.8419087671833647e-06, "loss": 0.47685718536376953, "step": 1930 }, { "epoch": 0.4452386442241181, "grad_norm": 1.3061309074235694, "learning_rate": 1.8417029698468897e-06, "loss": 0.5904572606086731, "step": 1931 }, { "epoch": 0.44546921835370074, "grad_norm": 1.0698778232309683, "learning_rate": 1.8414970501603577e-06, "loss": 0.5434018969535828, "step": 1932 }, { "epoch": 0.4456997924832834, "grad_norm": 1.0813116335575876, "learning_rate": 1.8412910081537012e-06, "loss": 0.5532705783843994, "step": 1933 }, { "epoch": 0.44593036661286606, "grad_norm": 1.2746241772853588, "learning_rate": 1.8410848438568704e-06, "loss": 0.4900597929954529, "step": 1934 }, { "epoch": 0.4461609407424487, "grad_norm": 1.1321871851277807, "learning_rate": 1.8408785572998334e-06, "loss": 0.40426892042160034, "step": 1935 }, { "epoch": 0.4463915148720314, "grad_norm": 1.2056959007702837, "learning_rate": 1.840672148512576e-06, "loss": 0.48805081844329834, "step": 1936 }, { "epoch": 0.44662208900161404, "grad_norm": 1.247599925173634, "learning_rate": 1.8404656175251019e-06, "loss": 0.4997096657752991, "step": 1937 }, { "epoch": 0.4468526631311967, "grad_norm": 1.1300078883402307, "learning_rate": 1.8402589643674325e-06, "loss": 0.5113422274589539, "step": 1938 }, { "epoch": 0.44708323726077936, "grad_norm": 1.2034211237767165, "learning_rate": 1.8400521890696065e-06, "loss": 0.44080060720443726, "step": 1939 }, { "epoch": 0.447313811390362, "grad_norm": 1.1365386964776252, "learning_rate": 1.8398452916616816e-06, "loss": 0.4477943778038025, "step": 1940 }, { "epoch": 0.4475443855199447, "grad_norm": 1.2171142668463, "learning_rate": 1.8396382721737318e-06, "loss": 0.4597470760345459, "step": 1941 }, { "epoch": 0.44777495964952735, "grad_norm": 1.1079547319265362, "learning_rate": 1.8394311306358494e-06, "loss": 0.4758293628692627, "step": 1942 }, { "epoch": 0.44800553377911, "grad_norm": 1.1579717682654027, "learning_rate": 1.8392238670781453e-06, "loss": 0.4573550224304199, "step": 1943 }, { "epoch": 0.44823610790869267, "grad_norm": 1.318176172591765, "learning_rate": 1.8390164815307465e-06, "loss": 0.504696786403656, "step": 1944 }, { "epoch": 0.44846668203827533, "grad_norm": 1.176904108457006, "learning_rate": 1.8388089740237991e-06, "loss": 0.4936453700065613, "step": 1945 }, { "epoch": 0.448697256167858, "grad_norm": 1.0847569291854338, "learning_rate": 1.8386013445874661e-06, "loss": 0.4851078987121582, "step": 1946 }, { "epoch": 0.4489278302974406, "grad_norm": 1.184810595622898, "learning_rate": 1.8383935932519288e-06, "loss": 0.4881519377231598, "step": 1947 }, { "epoch": 0.44915840442702326, "grad_norm": 1.2389121525709461, "learning_rate": 1.8381857200473859e-06, "loss": 0.5604408979415894, "step": 1948 }, { "epoch": 0.4493889785566059, "grad_norm": 1.2909928460674411, "learning_rate": 1.8379777250040535e-06, "loss": 0.5022269487380981, "step": 1949 }, { "epoch": 0.4496195526861886, "grad_norm": 1.5074815200191058, "learning_rate": 1.8377696081521666e-06, "loss": 0.6519315242767334, "step": 1950 }, { "epoch": 0.44985012681577125, "grad_norm": 1.0636886048128833, "learning_rate": 1.8375613695219766e-06, "loss": 0.3820997476577759, "step": 1951 }, { "epoch": 0.4500807009453539, "grad_norm": 1.2705283632306288, "learning_rate": 1.8373530091437526e-06, "loss": 0.5473283529281616, "step": 1952 }, { "epoch": 0.45031127507493657, "grad_norm": 1.3245130391551474, "learning_rate": 1.8371445270477828e-06, "loss": 0.5835955142974854, "step": 1953 }, { "epoch": 0.45054184920451923, "grad_norm": 0.9645583101230016, "learning_rate": 1.8369359232643716e-06, "loss": 0.5398194789886475, "step": 1954 }, { "epoch": 0.4507724233341019, "grad_norm": 1.363319289299188, "learning_rate": 1.8367271978238418e-06, "loss": 0.36561834812164307, "step": 1955 }, { "epoch": 0.45100299746368455, "grad_norm": 1.212738724980002, "learning_rate": 1.8365183507565342e-06, "loss": 0.319802463054657, "step": 1956 }, { "epoch": 0.4512335715932672, "grad_norm": 1.2303957915062576, "learning_rate": 1.8363093820928063e-06, "loss": 0.46466606855392456, "step": 1957 }, { "epoch": 0.4514641457228499, "grad_norm": 1.0793723825771542, "learning_rate": 1.8361002918630338e-06, "loss": 0.5839806199073792, "step": 1958 }, { "epoch": 0.45169471985243254, "grad_norm": 1.1018651408043991, "learning_rate": 1.8358910800976105e-06, "loss": 0.4472346603870392, "step": 1959 }, { "epoch": 0.4519252939820152, "grad_norm": 1.2384424942976882, "learning_rate": 1.835681746826947e-06, "loss": 0.5191199779510498, "step": 1960 }, { "epoch": 0.45215586811159786, "grad_norm": 1.199344967008703, "learning_rate": 1.8354722920814722e-06, "loss": 0.5832456350326538, "step": 1961 }, { "epoch": 0.4523864422411805, "grad_norm": 1.17539846221013, "learning_rate": 1.8352627158916326e-06, "loss": 0.604708194732666, "step": 1962 }, { "epoch": 0.4526170163707632, "grad_norm": 1.0362921929144542, "learning_rate": 1.8350530182878924e-06, "loss": 0.5640981793403625, "step": 1963 }, { "epoch": 0.45284759050034584, "grad_norm": 1.6578766467164143, "learning_rate": 1.8348431993007326e-06, "loss": 0.4816977381706238, "step": 1964 }, { "epoch": 0.4530781646299285, "grad_norm": 1.1374005988930347, "learning_rate": 1.8346332589606526e-06, "loss": 0.4226726293563843, "step": 1965 }, { "epoch": 0.45330873875951117, "grad_norm": 1.1547528745449813, "learning_rate": 1.8344231972981701e-06, "loss": 0.49635130167007446, "step": 1966 }, { "epoch": 0.4535393128890938, "grad_norm": 1.1372879426647424, "learning_rate": 1.8342130143438193e-06, "loss": 0.5275523662567139, "step": 1967 }, { "epoch": 0.4537698870186765, "grad_norm": 1.202496816282669, "learning_rate": 1.834002710128152e-06, "loss": 0.48517313599586487, "step": 1968 }, { "epoch": 0.45400046114825915, "grad_norm": 1.1968500607132941, "learning_rate": 1.8337922846817388e-06, "loss": 0.4352126717567444, "step": 1969 }, { "epoch": 0.4542310352778418, "grad_norm": 1.116289808278095, "learning_rate": 1.8335817380351668e-06, "loss": 0.48131102323532104, "step": 1970 }, { "epoch": 0.45446160940742447, "grad_norm": 1.1124663257243492, "learning_rate": 1.8333710702190408e-06, "loss": 0.48989611864089966, "step": 1971 }, { "epoch": 0.45469218353700713, "grad_norm": 1.4370850989895667, "learning_rate": 1.8331602812639839e-06, "loss": 0.4841296076774597, "step": 1972 }, { "epoch": 0.4549227576665898, "grad_norm": 1.1830445801916494, "learning_rate": 1.8329493712006364e-06, "loss": 0.5479841232299805, "step": 1973 }, { "epoch": 0.45515333179617246, "grad_norm": 1.1923903658380426, "learning_rate": 1.8327383400596559e-06, "loss": 0.4732212424278259, "step": 1974 }, { "epoch": 0.4553839059257551, "grad_norm": 1.0628413230145501, "learning_rate": 1.8325271878717183e-06, "loss": 0.46675610542297363, "step": 1975 }, { "epoch": 0.4556144800553378, "grad_norm": 1.0416293786228703, "learning_rate": 1.8323159146675163e-06, "loss": 0.5464143753051758, "step": 1976 }, { "epoch": 0.45584505418492044, "grad_norm": 1.0345078154587666, "learning_rate": 1.832104520477761e-06, "loss": 0.3888660669326782, "step": 1977 }, { "epoch": 0.4560756283145031, "grad_norm": 1.4241654424068988, "learning_rate": 1.8318930053331805e-06, "loss": 0.5163271427154541, "step": 1978 }, { "epoch": 0.45630620244408576, "grad_norm": 1.2347472844947731, "learning_rate": 1.8316813692645208e-06, "loss": 0.5471124649047852, "step": 1979 }, { "epoch": 0.4565367765736684, "grad_norm": 1.1473833654009267, "learning_rate": 1.8314696123025452e-06, "loss": 0.5907406210899353, "step": 1980 }, { "epoch": 0.4567673507032511, "grad_norm": 1.298768820373183, "learning_rate": 1.8312577344780346e-06, "loss": 0.5249447226524353, "step": 1981 }, { "epoch": 0.45699792483283375, "grad_norm": 1.2135802460189444, "learning_rate": 1.8310457358217879e-06, "loss": 0.5063247084617615, "step": 1982 }, { "epoch": 0.4572284989624164, "grad_norm": 1.361065103282706, "learning_rate": 1.830833616364621e-06, "loss": 0.4448107182979584, "step": 1983 }, { "epoch": 0.45745907309199907, "grad_norm": 1.1036363497718666, "learning_rate": 1.830621376137368e-06, "loss": 0.5699697732925415, "step": 1984 }, { "epoch": 0.45768964722158173, "grad_norm": 1.246349122018957, "learning_rate": 1.8304090151708794e-06, "loss": 0.5701720118522644, "step": 1985 }, { "epoch": 0.4579202213511644, "grad_norm": 1.2319947144837158, "learning_rate": 1.830196533496025e-06, "loss": 0.4754391014575958, "step": 1986 }, { "epoch": 0.45815079548074705, "grad_norm": 1.3528306833221286, "learning_rate": 1.8299839311436903e-06, "loss": 0.47649019956588745, "step": 1987 }, { "epoch": 0.4583813696103297, "grad_norm": 1.3311097062461437, "learning_rate": 1.8297712081447797e-06, "loss": 0.5524393320083618, "step": 1988 }, { "epoch": 0.4586119437399124, "grad_norm": 1.0762480086961639, "learning_rate": 1.8295583645302144e-06, "loss": 0.45731648802757263, "step": 1989 }, { "epoch": 0.45884251786949504, "grad_norm": 1.130533269973984, "learning_rate": 1.8293454003309336e-06, "loss": 0.4999742805957794, "step": 1990 }, { "epoch": 0.4590730919990777, "grad_norm": 1.1313506863251181, "learning_rate": 1.829132315577894e-06, "loss": 0.49084147810935974, "step": 1991 }, { "epoch": 0.45930366612866036, "grad_norm": 1.2521400943324308, "learning_rate": 1.828919110302069e-06, "loss": 0.45332348346710205, "step": 1992 }, { "epoch": 0.459534240258243, "grad_norm": 1.0776738520694769, "learning_rate": 1.8287057845344504e-06, "loss": 0.5029363632202148, "step": 1993 }, { "epoch": 0.4597648143878257, "grad_norm": 1.1554006749910666, "learning_rate": 1.8284923383060475e-06, "loss": 0.5373274087905884, "step": 1994 }, { "epoch": 0.45999538851740834, "grad_norm": 1.372219905846735, "learning_rate": 1.8282787716478867e-06, "loss": 0.5022158622741699, "step": 1995 }, { "epoch": 0.460225962646991, "grad_norm": 1.5170390306548123, "learning_rate": 1.828065084591012e-06, "loss": 0.5093190670013428, "step": 1996 }, { "epoch": 0.46045653677657367, "grad_norm": 1.1628780385550688, "learning_rate": 1.827851277166485e-06, "loss": 0.5406581163406372, "step": 1997 }, { "epoch": 0.4606871109061563, "grad_norm": 1.0838824930169186, "learning_rate": 1.8276373494053852e-06, "loss": 0.4403364062309265, "step": 1998 }, { "epoch": 0.460917685035739, "grad_norm": 1.0663930849179153, "learning_rate": 1.8274233013388085e-06, "loss": 0.48383134603500366, "step": 1999 }, { "epoch": 0.46114825916532165, "grad_norm": 1.278024022767056, "learning_rate": 1.8272091329978693e-06, "loss": 0.5177836418151855, "step": 2000 }, { "epoch": 0.4613788332949043, "grad_norm": 1.3026255484345248, "learning_rate": 1.8269948444136991e-06, "loss": 0.5699004530906677, "step": 2001 }, { "epoch": 0.461609407424487, "grad_norm": 1.0712598167444656, "learning_rate": 1.826780435617447e-06, "loss": 0.5415153503417969, "step": 2002 }, { "epoch": 0.46183998155406963, "grad_norm": 1.3243429308154806, "learning_rate": 1.8265659066402792e-06, "loss": 0.5521166920661926, "step": 2003 }, { "epoch": 0.4620705556836523, "grad_norm": 1.0401918069659792, "learning_rate": 1.8263512575133802e-06, "loss": 0.4518507122993469, "step": 2004 }, { "epoch": 0.46230112981323496, "grad_norm": 1.4036586027704223, "learning_rate": 1.8261364882679508e-06, "loss": 0.5997140407562256, "step": 2005 }, { "epoch": 0.4625317039428176, "grad_norm": 1.2297832096563293, "learning_rate": 1.8259215989352103e-06, "loss": 0.5105265974998474, "step": 2006 }, { "epoch": 0.4627622780724003, "grad_norm": 1.3620575066378895, "learning_rate": 1.825706589546395e-06, "loss": 0.5229371190071106, "step": 2007 }, { "epoch": 0.46299285220198294, "grad_norm": 1.323713226525437, "learning_rate": 1.825491460132759e-06, "loss": 0.4833800792694092, "step": 2008 }, { "epoch": 0.4632234263315656, "grad_norm": 1.443684310899243, "learning_rate": 1.8252762107255727e-06, "loss": 0.4323253035545349, "step": 2009 }, { "epoch": 0.46345400046114826, "grad_norm": 1.0890999093716327, "learning_rate": 1.8250608413561253e-06, "loss": 0.4563494026660919, "step": 2010 }, { "epoch": 0.4636845745907309, "grad_norm": 1.5474519259744821, "learning_rate": 1.8248453520557228e-06, "loss": 0.5656196475028992, "step": 2011 }, { "epoch": 0.4639151487203136, "grad_norm": 1.4798653425077055, "learning_rate": 1.8246297428556887e-06, "loss": 0.5448226928710938, "step": 2012 }, { "epoch": 0.46414572284989625, "grad_norm": 1.1620535147248132, "learning_rate": 1.8244140137873645e-06, "loss": 0.4692860543727875, "step": 2013 }, { "epoch": 0.4643762969794789, "grad_norm": 1.1643805671555858, "learning_rate": 1.8241981648821079e-06, "loss": 0.5948643088340759, "step": 2014 }, { "epoch": 0.46460687110906157, "grad_norm": 1.1853722372788744, "learning_rate": 1.823982196171295e-06, "loss": 0.54410719871521, "step": 2015 }, { "epoch": 0.46483744523864423, "grad_norm": 1.1149495485691443, "learning_rate": 1.8237661076863192e-06, "loss": 0.430447518825531, "step": 2016 }, { "epoch": 0.4650680193682269, "grad_norm": 1.2520273819748522, "learning_rate": 1.8235498994585913e-06, "loss": 0.5420910716056824, "step": 2017 }, { "epoch": 0.46529859349780955, "grad_norm": 1.119152189162338, "learning_rate": 1.823333571519539e-06, "loss": 0.5140334963798523, "step": 2018 }, { "epoch": 0.4655291676273922, "grad_norm": 1.1399919106847334, "learning_rate": 1.8231171239006075e-06, "loss": 0.5901660323143005, "step": 2019 }, { "epoch": 0.4657597417569749, "grad_norm": 1.174060044130563, "learning_rate": 1.8229005566332603e-06, "loss": 0.5025908350944519, "step": 2020 }, { "epoch": 0.46599031588655754, "grad_norm": 1.3363070549997977, "learning_rate": 1.8226838697489772e-06, "loss": 0.4884544909000397, "step": 2021 }, { "epoch": 0.4662208900161402, "grad_norm": 1.1349219249551332, "learning_rate": 1.822467063279256e-06, "loss": 0.46449869871139526, "step": 2022 }, { "epoch": 0.46645146414572286, "grad_norm": 1.2563720378844234, "learning_rate": 1.8222501372556116e-06, "loss": 0.49463552236557007, "step": 2023 }, { "epoch": 0.4666820382753055, "grad_norm": 1.285405581097111, "learning_rate": 1.8220330917095768e-06, "loss": 0.5027149319648743, "step": 2024 }, { "epoch": 0.4669126124048882, "grad_norm": 1.3048909901236199, "learning_rate": 1.8218159266727007e-06, "loss": 0.564018726348877, "step": 2025 }, { "epoch": 0.46714318653447084, "grad_norm": 1.1965631228875364, "learning_rate": 1.821598642176551e-06, "loss": 0.4235766530036926, "step": 2026 }, { "epoch": 0.4673737606640535, "grad_norm": 1.3354885477125742, "learning_rate": 1.8213812382527118e-06, "loss": 0.5696560144424438, "step": 2027 }, { "epoch": 0.46760433479363617, "grad_norm": 1.2879943344932543, "learning_rate": 1.8211637149327856e-06, "loss": 0.6101738214492798, "step": 2028 }, { "epoch": 0.46783490892321883, "grad_norm": 1.2787382273760666, "learning_rate": 1.820946072248391e-06, "loss": 0.46749603748321533, "step": 2029 }, { "epoch": 0.4680654830528015, "grad_norm": 1.0137433334051962, "learning_rate": 1.8207283102311646e-06, "loss": 0.4713476300239563, "step": 2030 }, { "epoch": 0.46829605718238415, "grad_norm": 1.1924917748606811, "learning_rate": 1.8205104289127607e-06, "loss": 0.5381859540939331, "step": 2031 }, { "epoch": 0.4685266313119668, "grad_norm": 1.1753816722161505, "learning_rate": 1.82029242832485e-06, "loss": 0.4871833324432373, "step": 2032 }, { "epoch": 0.4687572054415495, "grad_norm": 1.2889177236993268, "learning_rate": 1.8200743084991217e-06, "loss": 0.520627498626709, "step": 2033 }, { "epoch": 0.46898777957113214, "grad_norm": 1.1168475824168262, "learning_rate": 1.8198560694672813e-06, "loss": 0.5382364392280579, "step": 2034 }, { "epoch": 0.4692183537007148, "grad_norm": 1.0953401197844614, "learning_rate": 1.8196377112610524e-06, "loss": 0.384588360786438, "step": 2035 }, { "epoch": 0.46944892783029746, "grad_norm": 1.3337847292368636, "learning_rate": 1.8194192339121752e-06, "loss": 0.5515186786651611, "step": 2036 }, { "epoch": 0.4696795019598801, "grad_norm": 1.2634192136555153, "learning_rate": 1.819200637452408e-06, "loss": 0.5405331254005432, "step": 2037 }, { "epoch": 0.4699100760894628, "grad_norm": 1.3408838607377604, "learning_rate": 1.818981921913526e-06, "loss": 0.5565645694732666, "step": 2038 }, { "epoch": 0.47014065021904544, "grad_norm": 1.1845986031026676, "learning_rate": 1.818763087327321e-06, "loss": 0.4856358468532562, "step": 2039 }, { "epoch": 0.4703712243486281, "grad_norm": 1.1018414398540533, "learning_rate": 1.8185441337256035e-06, "loss": 0.5495761632919312, "step": 2040 }, { "epoch": 0.47060179847821076, "grad_norm": 1.1792744067343253, "learning_rate": 1.8183250611402007e-06, "loss": 0.509435772895813, "step": 2041 }, { "epoch": 0.4708323726077934, "grad_norm": 1.0107628293119386, "learning_rate": 1.8181058696029564e-06, "loss": 0.4663920998573303, "step": 2042 }, { "epoch": 0.4710629467373761, "grad_norm": 1.5093599722992523, "learning_rate": 1.817886559145733e-06, "loss": 0.5976128578186035, "step": 2043 }, { "epoch": 0.47129352086695875, "grad_norm": 1.2084791393616294, "learning_rate": 1.817667129800409e-06, "loss": 0.49167966842651367, "step": 2044 }, { "epoch": 0.4715240949965414, "grad_norm": 1.1457657477052965, "learning_rate": 1.817447581598881e-06, "loss": 0.5889153480529785, "step": 2045 }, { "epoch": 0.47175466912612407, "grad_norm": 1.206584712735091, "learning_rate": 1.8172279145730622e-06, "loss": 0.4970330595970154, "step": 2046 }, { "epoch": 0.47198524325570673, "grad_norm": 1.1497751548880843, "learning_rate": 1.817008128754884e-06, "loss": 0.4840531051158905, "step": 2047 }, { "epoch": 0.4722158173852894, "grad_norm": 1.0450687693806986, "learning_rate": 1.816788224176294e-06, "loss": 0.48297861218452454, "step": 2048 }, { "epoch": 0.47244639151487205, "grad_norm": 1.184218710920589, "learning_rate": 1.8165682008692578e-06, "loss": 0.540350079536438, "step": 2049 }, { "epoch": 0.4726769656444547, "grad_norm": 1.0359041945652345, "learning_rate": 1.8163480588657578e-06, "loss": 0.46405351161956787, "step": 2050 }, { "epoch": 0.4729075397740374, "grad_norm": 1.1107404730922064, "learning_rate": 1.816127798197794e-06, "loss": 0.5175468921661377, "step": 2051 }, { "epoch": 0.47313811390362004, "grad_norm": 1.3876726162535544, "learning_rate": 1.8159074188973836e-06, "loss": 0.5923771858215332, "step": 2052 }, { "epoch": 0.4733686880332027, "grad_norm": 1.135618311389398, "learning_rate": 1.815686920996561e-06, "loss": 0.4999024569988251, "step": 2053 }, { "epoch": 0.47359926216278536, "grad_norm": 1.260203747569289, "learning_rate": 1.8154663045273775e-06, "loss": 0.5630939602851868, "step": 2054 }, { "epoch": 0.473829836292368, "grad_norm": 1.0446947469213006, "learning_rate": 1.8152455695219021e-06, "loss": 0.5505836009979248, "step": 2055 }, { "epoch": 0.4740604104219507, "grad_norm": 1.0593378648910954, "learning_rate": 1.8150247160122213e-06, "loss": 0.44550588726997375, "step": 2056 }, { "epoch": 0.47429098455153335, "grad_norm": 1.3784716647825315, "learning_rate": 1.8148037440304375e-06, "loss": 0.5387516021728516, "step": 2057 }, { "epoch": 0.47452155868111595, "grad_norm": 1.2100168024707112, "learning_rate": 1.814582653608672e-06, "loss": 0.5941788554191589, "step": 2058 }, { "epoch": 0.4747521328106986, "grad_norm": 1.3537451578676338, "learning_rate": 1.8143614447790622e-06, "loss": 0.552179217338562, "step": 2059 }, { "epoch": 0.4749827069402813, "grad_norm": 1.4352695047482156, "learning_rate": 1.8141401175737632e-06, "loss": 0.4475885033607483, "step": 2060 }, { "epoch": 0.47521328106986394, "grad_norm": 1.560782042661122, "learning_rate": 1.813918672024947e-06, "loss": 0.5821356773376465, "step": 2061 }, { "epoch": 0.4754438551994466, "grad_norm": 1.0378834941031638, "learning_rate": 1.8136971081648027e-06, "loss": 0.4673501253128052, "step": 2062 }, { "epoch": 0.47567442932902926, "grad_norm": 1.278556049660224, "learning_rate": 1.8134754260255373e-06, "loss": 0.582427978515625, "step": 2063 }, { "epoch": 0.4759050034586119, "grad_norm": 1.050202225169388, "learning_rate": 1.8132536256393744e-06, "loss": 0.4494328498840332, "step": 2064 }, { "epoch": 0.4761355775881946, "grad_norm": 1.2125688329070163, "learning_rate": 1.8130317070385552e-06, "loss": 0.44775205850601196, "step": 2065 }, { "epoch": 0.47636615171777724, "grad_norm": 1.6939798990457848, "learning_rate": 1.8128096702553372e-06, "loss": 0.5456822514533997, "step": 2066 }, { "epoch": 0.4765967258473599, "grad_norm": 1.3273956589633653, "learning_rate": 1.8125875153219963e-06, "loss": 0.46396178007125854, "step": 2067 }, { "epoch": 0.47682729997694256, "grad_norm": 1.1515186039412058, "learning_rate": 1.8123652422708247e-06, "loss": 0.4479365944862366, "step": 2068 }, { "epoch": 0.4770578741065252, "grad_norm": 1.2802069282774096, "learning_rate": 1.8121428511341322e-06, "loss": 0.4633978605270386, "step": 2069 }, { "epoch": 0.4772884482361079, "grad_norm": 1.0517363876370052, "learning_rate": 1.811920341944245e-06, "loss": 0.5190213918685913, "step": 2070 }, { "epoch": 0.47751902236569055, "grad_norm": 1.1502023331468956, "learning_rate": 1.811697714733508e-06, "loss": 0.3900855779647827, "step": 2071 }, { "epoch": 0.4777495964952732, "grad_norm": 1.1255517906685018, "learning_rate": 1.8114749695342816e-06, "loss": 0.5130020380020142, "step": 2072 }, { "epoch": 0.47798017062485587, "grad_norm": 1.181934216759251, "learning_rate": 1.8112521063789444e-06, "loss": 0.5279096364974976, "step": 2073 }, { "epoch": 0.47821074475443853, "grad_norm": 1.1536132669518966, "learning_rate": 1.8110291252998918e-06, "loss": 0.5048732161521912, "step": 2074 }, { "epoch": 0.4784413188840212, "grad_norm": 1.3979756779725594, "learning_rate": 1.8108060263295362e-06, "loss": 0.5410048365592957, "step": 2075 }, { "epoch": 0.47867189301360386, "grad_norm": 1.2583345285712537, "learning_rate": 1.8105828095003073e-06, "loss": 0.5144593715667725, "step": 2076 }, { "epoch": 0.4789024671431865, "grad_norm": 1.427505910251362, "learning_rate": 1.810359474844652e-06, "loss": 0.543846845626831, "step": 2077 }, { "epoch": 0.4791330412727692, "grad_norm": 1.3389957969723305, "learning_rate": 1.8101360223950346e-06, "loss": 0.5628032684326172, "step": 2078 }, { "epoch": 0.47936361540235184, "grad_norm": 1.2233623869672197, "learning_rate": 1.8099124521839358e-06, "loss": 0.5248516201972961, "step": 2079 }, { "epoch": 0.4795941895319345, "grad_norm": 1.1882395736191633, "learning_rate": 1.8096887642438537e-06, "loss": 0.44171589612960815, "step": 2080 }, { "epoch": 0.47982476366151716, "grad_norm": 1.1226478747483744, "learning_rate": 1.809464958607304e-06, "loss": 0.5003859996795654, "step": 2081 }, { "epoch": 0.4800553377910998, "grad_norm": 1.2241972764897475, "learning_rate": 1.8092410353068183e-06, "loss": 0.5271269679069519, "step": 2082 }, { "epoch": 0.4802859119206825, "grad_norm": 1.390627459359596, "learning_rate": 1.8090169943749474e-06, "loss": 0.5191465616226196, "step": 2083 }, { "epoch": 0.48051648605026515, "grad_norm": 1.229186901325219, "learning_rate": 1.8087928358442567e-06, "loss": 0.4569256007671356, "step": 2084 }, { "epoch": 0.4807470601798478, "grad_norm": 1.2586566204343959, "learning_rate": 1.8085685597473307e-06, "loss": 0.521030068397522, "step": 2085 }, { "epoch": 0.48097763430943047, "grad_norm": 1.8616539280014968, "learning_rate": 1.80834416611677e-06, "loss": 0.48959439992904663, "step": 2086 }, { "epoch": 0.48120820843901313, "grad_norm": 1.37464754051939, "learning_rate": 1.8081196549851925e-06, "loss": 0.6536514163017273, "step": 2087 }, { "epoch": 0.4814387825685958, "grad_norm": 1.2292193685806807, "learning_rate": 1.8078950263852327e-06, "loss": 0.5746080875396729, "step": 2088 }, { "epoch": 0.48166935669817845, "grad_norm": 1.244000490897379, "learning_rate": 1.8076702803495437e-06, "loss": 0.5518802404403687, "step": 2089 }, { "epoch": 0.4818999308277611, "grad_norm": 1.0641823457217219, "learning_rate": 1.8074454169107934e-06, "loss": 0.49385470151901245, "step": 2090 }, { "epoch": 0.4821305049573438, "grad_norm": 1.0197781900207734, "learning_rate": 1.8072204361016688e-06, "loss": 0.4488806426525116, "step": 2091 }, { "epoch": 0.48236107908692644, "grad_norm": 1.1424753749617582, "learning_rate": 1.8069953379548727e-06, "loss": 0.4167511761188507, "step": 2092 }, { "epoch": 0.4825916532165091, "grad_norm": 1.0650805504939584, "learning_rate": 1.8067701225031258e-06, "loss": 0.4181321859359741, "step": 2093 }, { "epoch": 0.48282222734609176, "grad_norm": 1.4930083094447149, "learning_rate": 1.806544789779165e-06, "loss": 0.5257805585861206, "step": 2094 }, { "epoch": 0.4830528014756744, "grad_norm": 1.2055270290247748, "learning_rate": 1.806319339815745e-06, "loss": 0.4687056541442871, "step": 2095 }, { "epoch": 0.4832833756052571, "grad_norm": 1.4682007990950796, "learning_rate": 1.8060937726456373e-06, "loss": 0.48070380091667175, "step": 2096 }, { "epoch": 0.48351394973483974, "grad_norm": 1.1555932423285984, "learning_rate": 1.80586808830163e-06, "loss": 0.516263484954834, "step": 2097 }, { "epoch": 0.4837445238644224, "grad_norm": 1.1676344701764343, "learning_rate": 1.805642286816529e-06, "loss": 0.44018858671188354, "step": 2098 }, { "epoch": 0.48397509799400507, "grad_norm": 1.1426045047454896, "learning_rate": 1.8054163682231565e-06, "loss": 0.469373881816864, "step": 2099 }, { "epoch": 0.4842056721235877, "grad_norm": 1.2080131082183756, "learning_rate": 1.8051903325543525e-06, "loss": 0.4759753346443176, "step": 2100 }, { "epoch": 0.4844362462531704, "grad_norm": 1.210070128706108, "learning_rate": 1.804964179842973e-06, "loss": 0.5002714395523071, "step": 2101 }, { "epoch": 0.48466682038275305, "grad_norm": 1.5442585246670464, "learning_rate": 1.804737910121892e-06, "loss": 0.4869537353515625, "step": 2102 }, { "epoch": 0.4848973945123357, "grad_norm": 1.0025531891942765, "learning_rate": 1.804511523424e-06, "loss": 0.4840247929096222, "step": 2103 }, { "epoch": 0.4851279686419184, "grad_norm": 1.2125955941110753, "learning_rate": 1.8042850197822049e-06, "loss": 0.48390740156173706, "step": 2104 }, { "epoch": 0.48535854277150103, "grad_norm": 1.2581816256760507, "learning_rate": 1.8040583992294305e-06, "loss": 0.5875431895256042, "step": 2105 }, { "epoch": 0.4855891169010837, "grad_norm": 1.1530238586197006, "learning_rate": 1.803831661798619e-06, "loss": 0.4599287211894989, "step": 2106 }, { "epoch": 0.48581969103066636, "grad_norm": 1.120967919274212, "learning_rate": 1.803604807522729e-06, "loss": 0.5266382694244385, "step": 2107 }, { "epoch": 0.486050265160249, "grad_norm": 1.6402953005136756, "learning_rate": 1.8033778364347359e-06, "loss": 0.5592058897018433, "step": 2108 }, { "epoch": 0.4862808392898317, "grad_norm": 1.278433491122833, "learning_rate": 1.8031507485676324e-06, "loss": 0.4385683834552765, "step": 2109 }, { "epoch": 0.48651141341941434, "grad_norm": 0.9409152493815139, "learning_rate": 1.8029235439544277e-06, "loss": 0.4205859303474426, "step": 2110 }, { "epoch": 0.486741987548997, "grad_norm": 1.2334271425613326, "learning_rate": 1.8026962226281484e-06, "loss": 0.4179378151893616, "step": 2111 }, { "epoch": 0.48697256167857966, "grad_norm": 1.3018247329424364, "learning_rate": 1.8024687846218382e-06, "loss": 0.5022565126419067, "step": 2112 }, { "epoch": 0.4872031358081623, "grad_norm": 1.092822670373115, "learning_rate": 1.8022412299685574e-06, "loss": 0.4591484069824219, "step": 2113 }, { "epoch": 0.487433709937745, "grad_norm": 1.135644170855214, "learning_rate": 1.8020135587013836e-06, "loss": 0.44381004571914673, "step": 2114 }, { "epoch": 0.48766428406732765, "grad_norm": 1.4882998519827229, "learning_rate": 1.8017857708534106e-06, "loss": 0.5418124198913574, "step": 2115 }, { "epoch": 0.4878948581969103, "grad_norm": 1.1899076485341344, "learning_rate": 1.80155786645775e-06, "loss": 0.45836228132247925, "step": 2116 }, { "epoch": 0.48812543232649297, "grad_norm": 1.0900529156655503, "learning_rate": 1.80132984554753e-06, "loss": 0.6028016805648804, "step": 2117 }, { "epoch": 0.48835600645607563, "grad_norm": 1.2082046720219188, "learning_rate": 1.8011017081558956e-06, "loss": 0.461037814617157, "step": 2118 }, { "epoch": 0.4885865805856583, "grad_norm": 1.2201342507223627, "learning_rate": 1.8008734543160092e-06, "loss": 0.45145073533058167, "step": 2119 }, { "epoch": 0.48881715471524095, "grad_norm": 1.0786402560770025, "learning_rate": 1.8006450840610495e-06, "loss": 0.5074604153633118, "step": 2120 }, { "epoch": 0.4890477288448236, "grad_norm": 1.047533414614444, "learning_rate": 1.8004165974242124e-06, "loss": 0.48518210649490356, "step": 2121 }, { "epoch": 0.4892783029744063, "grad_norm": 1.3858118136014763, "learning_rate": 1.800187994438711e-06, "loss": 0.5427801609039307, "step": 2122 }, { "epoch": 0.48950887710398894, "grad_norm": 1.1550068575676335, "learning_rate": 1.799959275137775e-06, "loss": 0.5002918839454651, "step": 2123 }, { "epoch": 0.4897394512335716, "grad_norm": 1.1639768741422865, "learning_rate": 1.799730439554651e-06, "loss": 0.4417838454246521, "step": 2124 }, { "epoch": 0.48997002536315426, "grad_norm": 1.1441558832004912, "learning_rate": 1.7995014877226024e-06, "loss": 0.4260700047016144, "step": 2125 }, { "epoch": 0.4902005994927369, "grad_norm": 1.2965264900873492, "learning_rate": 1.79927241967491e-06, "loss": 0.5480694770812988, "step": 2126 }, { "epoch": 0.4904311736223196, "grad_norm": 1.1303746553940783, "learning_rate": 1.7990432354448713e-06, "loss": 0.3911926746368408, "step": 2127 }, { "epoch": 0.49066174775190224, "grad_norm": 1.6919718962195622, "learning_rate": 1.7988139350657997e-06, "loss": 0.5269262194633484, "step": 2128 }, { "epoch": 0.4908923218814849, "grad_norm": 1.1850805062858767, "learning_rate": 1.7985845185710272e-06, "loss": 0.47482216358184814, "step": 2129 }, { "epoch": 0.49112289601106757, "grad_norm": 1.1047509042558772, "learning_rate": 1.7983549859939018e-06, "loss": 0.5663374662399292, "step": 2130 }, { "epoch": 0.49135347014065023, "grad_norm": 1.3067402879954033, "learning_rate": 1.7981253373677875e-06, "loss": 0.5322546362876892, "step": 2131 }, { "epoch": 0.4915840442702329, "grad_norm": 1.3127111295082199, "learning_rate": 1.797895572726067e-06, "loss": 0.4238794445991516, "step": 2132 }, { "epoch": 0.49181461839981555, "grad_norm": 1.3803934905983801, "learning_rate": 1.7976656921021384e-06, "loss": 0.49363791942596436, "step": 2133 }, { "epoch": 0.4920451925293982, "grad_norm": 1.2075981604593182, "learning_rate": 1.7974356955294178e-06, "loss": 0.5079565048217773, "step": 2134 }, { "epoch": 0.4922757666589809, "grad_norm": 1.2533809097279895, "learning_rate": 1.7972055830413369e-06, "loss": 0.5259063243865967, "step": 2135 }, { "epoch": 0.49250634078856353, "grad_norm": 1.1936271771370206, "learning_rate": 1.7969753546713448e-06, "loss": 0.49021831154823303, "step": 2136 }, { "epoch": 0.4927369149181462, "grad_norm": 1.1560183810694227, "learning_rate": 1.7967450104529078e-06, "loss": 0.49721387028694153, "step": 2137 }, { "epoch": 0.49296748904772886, "grad_norm": 1.523657234221405, "learning_rate": 1.796514550419509e-06, "loss": 0.6129348278045654, "step": 2138 }, { "epoch": 0.4931980631773115, "grad_norm": 1.245217894172975, "learning_rate": 1.7962839746046479e-06, "loss": 0.5034269094467163, "step": 2139 }, { "epoch": 0.4934286373068942, "grad_norm": 1.2009412202372387, "learning_rate": 1.7960532830418408e-06, "loss": 0.490216463804245, "step": 2140 }, { "epoch": 0.49365921143647684, "grad_norm": 1.3063386967377661, "learning_rate": 1.7958224757646212e-06, "loss": 0.5609744787216187, "step": 2141 }, { "epoch": 0.4938897855660595, "grad_norm": 1.2989425251267097, "learning_rate": 1.7955915528065395e-06, "loss": 0.4438238739967346, "step": 2142 }, { "epoch": 0.49412035969564216, "grad_norm": 1.1724755739495214, "learning_rate": 1.7953605142011626e-06, "loss": 0.4704767167568207, "step": 2143 }, { "epoch": 0.4943509338252248, "grad_norm": 1.0972580275821462, "learning_rate": 1.795129359982074e-06, "loss": 0.44819536805152893, "step": 2144 }, { "epoch": 0.4945815079548075, "grad_norm": 1.4390962273022694, "learning_rate": 1.7948980901828746e-06, "loss": 0.5311752557754517, "step": 2145 }, { "epoch": 0.49481208208439015, "grad_norm": 1.524280309497039, "learning_rate": 1.7946667048371818e-06, "loss": 0.46144258975982666, "step": 2146 }, { "epoch": 0.4950426562139728, "grad_norm": 1.719231407355215, "learning_rate": 1.7944352039786297e-06, "loss": 0.5973725914955139, "step": 2147 }, { "epoch": 0.49527323034355547, "grad_norm": 1.4078850153564488, "learning_rate": 1.7942035876408693e-06, "loss": 0.4930835962295532, "step": 2148 }, { "epoch": 0.49550380447313813, "grad_norm": 1.3404357985733748, "learning_rate": 1.7939718558575685e-06, "loss": 0.39137697219848633, "step": 2149 }, { "epoch": 0.4957343786027208, "grad_norm": 1.364926902591579, "learning_rate": 1.7937400086624117e-06, "loss": 0.47618329524993896, "step": 2150 }, { "epoch": 0.49596495273230345, "grad_norm": 1.1307446090872737, "learning_rate": 1.7935080460891005e-06, "loss": 0.4751483201980591, "step": 2151 }, { "epoch": 0.4961955268618861, "grad_norm": 1.05862482163457, "learning_rate": 1.7932759681713528e-06, "loss": 0.4654052257537842, "step": 2152 }, { "epoch": 0.4964261009914688, "grad_norm": 1.5078817597304273, "learning_rate": 1.7930437749429035e-06, "loss": 0.551579475402832, "step": 2153 }, { "epoch": 0.49665667512105144, "grad_norm": 1.1496698915645684, "learning_rate": 1.792811466437504e-06, "loss": 0.4967789053916931, "step": 2154 }, { "epoch": 0.4968872492506341, "grad_norm": 1.2983844202508301, "learning_rate": 1.7925790426889234e-06, "loss": 0.5826432108879089, "step": 2155 }, { "epoch": 0.49711782338021676, "grad_norm": 1.1680445889037752, "learning_rate": 1.792346503730946e-06, "loss": 0.4260643720626831, "step": 2156 }, { "epoch": 0.4973483975097994, "grad_norm": 1.287300561489553, "learning_rate": 1.7921138495973741e-06, "loss": 0.48679620027542114, "step": 2157 }, { "epoch": 0.4975789716393821, "grad_norm": 1.219223301068072, "learning_rate": 1.7918810803220266e-06, "loss": 0.5048027634620667, "step": 2158 }, { "epoch": 0.49780954576896475, "grad_norm": 1.3507694371861767, "learning_rate": 1.7916481959387384e-06, "loss": 0.5073787569999695, "step": 2159 }, { "epoch": 0.4980401198985474, "grad_norm": 1.1692017846177098, "learning_rate": 1.791415196481362e-06, "loss": 0.47361671924591064, "step": 2160 }, { "epoch": 0.49827069402813007, "grad_norm": 1.2422906508724816, "learning_rate": 1.7911820819837659e-06, "loss": 0.46382519602775574, "step": 2161 }, { "epoch": 0.49850126815771273, "grad_norm": 1.2239936361904968, "learning_rate": 1.7909488524798357e-06, "loss": 0.5167688727378845, "step": 2162 }, { "epoch": 0.4987318422872954, "grad_norm": 1.125831583037744, "learning_rate": 1.7907155080034739e-06, "loss": 0.4486730992794037, "step": 2163 }, { "epoch": 0.49896241641687805, "grad_norm": 1.1343310195374692, "learning_rate": 1.7904820485885991e-06, "loss": 0.508470356464386, "step": 2164 }, { "epoch": 0.4991929905464607, "grad_norm": 1.2928862741310794, "learning_rate": 1.790248474269148e-06, "loss": 0.4752856492996216, "step": 2165 }, { "epoch": 0.4994235646760434, "grad_norm": 1.4158256008874892, "learning_rate": 1.7900147850790713e-06, "loss": 0.47191953659057617, "step": 2166 }, { "epoch": 0.49965413880562604, "grad_norm": 1.2139421208311327, "learning_rate": 1.7897809810523396e-06, "loss": 0.48935621976852417, "step": 2167 }, { "epoch": 0.4998847129352087, "grad_norm": 1.0547512942585364, "learning_rate": 1.789547062222938e-06, "loss": 0.5455219149589539, "step": 2168 }, { "epoch": 0.5001152870647914, "grad_norm": 1.3471138253822197, "learning_rate": 1.789313028624869e-06, "loss": 0.5068193078041077, "step": 2169 }, { "epoch": 0.500345861194374, "grad_norm": 1.354177516749214, "learning_rate": 1.789078880292152e-06, "loss": 0.5868322253227234, "step": 2170 }, { "epoch": 0.5005764353239567, "grad_norm": 1.2474005261331733, "learning_rate": 1.7888446172588222e-06, "loss": 0.5132089853286743, "step": 2171 }, { "epoch": 0.5008070094535393, "grad_norm": 1.6917901077948925, "learning_rate": 1.788610239558933e-06, "loss": 0.5673823356628418, "step": 2172 }, { "epoch": 0.501037583583122, "grad_norm": 1.1902561905753382, "learning_rate": 1.7883757472265533e-06, "loss": 0.47085779905319214, "step": 2173 }, { "epoch": 0.5012681577127046, "grad_norm": 1.38526914772559, "learning_rate": 1.7881411402957685e-06, "loss": 0.5286725163459778, "step": 2174 }, { "epoch": 0.5014987318422873, "grad_norm": 1.1910792946448119, "learning_rate": 1.7879064188006817e-06, "loss": 0.5044010877609253, "step": 2175 }, { "epoch": 0.5017293059718699, "grad_norm": 1.8451305262061892, "learning_rate": 1.7876715827754113e-06, "loss": 0.5329761505126953, "step": 2176 }, { "epoch": 0.5019598801014526, "grad_norm": 1.1057498562542696, "learning_rate": 1.7874366322540937e-06, "loss": 0.5025275349617004, "step": 2177 }, { "epoch": 0.5021904542310353, "grad_norm": 1.1913338911250846, "learning_rate": 1.7872015672708814e-06, "loss": 0.48466378450393677, "step": 2178 }, { "epoch": 0.502421028360618, "grad_norm": 1.298497377256874, "learning_rate": 1.7869663878599427e-06, "loss": 0.505358099937439, "step": 2179 }, { "epoch": 0.5026516024902006, "grad_norm": 1.3974305011742736, "learning_rate": 1.7867310940554643e-06, "loss": 0.4934875965118408, "step": 2180 }, { "epoch": 0.5028821766197833, "grad_norm": 0.9670109365307766, "learning_rate": 1.7864956858916482e-06, "loss": 0.4726678133010864, "step": 2181 }, { "epoch": 0.5031127507493659, "grad_norm": 1.3043022336942207, "learning_rate": 1.786260163402713e-06, "loss": 0.4619986414909363, "step": 2182 }, { "epoch": 0.5033433248789486, "grad_norm": 1.17201330946801, "learning_rate": 1.7860245266228946e-06, "loss": 0.4483926594257355, "step": 2183 }, { "epoch": 0.5035738990085312, "grad_norm": 1.0474549975114675, "learning_rate": 1.7857887755864451e-06, "loss": 0.4756368100643158, "step": 2184 }, { "epoch": 0.5038044731381139, "grad_norm": 1.248404397964203, "learning_rate": 1.7855529103276334e-06, "loss": 0.5610564351081848, "step": 2185 }, { "epoch": 0.5040350472676965, "grad_norm": 1.178944045969772, "learning_rate": 1.7853169308807447e-06, "loss": 0.49948322772979736, "step": 2186 }, { "epoch": 0.5042656213972793, "grad_norm": 1.203613939490818, "learning_rate": 1.7850808372800813e-06, "loss": 0.5023819208145142, "step": 2187 }, { "epoch": 0.5044961955268619, "grad_norm": 1.1738403952666703, "learning_rate": 1.7848446295599617e-06, "loss": 0.45893096923828125, "step": 2188 }, { "epoch": 0.5047267696564446, "grad_norm": 1.2621327179460875, "learning_rate": 1.7846083077547212e-06, "loss": 0.39129459857940674, "step": 2189 }, { "epoch": 0.5049573437860272, "grad_norm": 0.9495823494613052, "learning_rate": 1.784371871898711e-06, "loss": 0.42348673939704895, "step": 2190 }, { "epoch": 0.5051879179156099, "grad_norm": 1.4438634303858584, "learning_rate": 1.7841353220263e-06, "loss": 0.5760704278945923, "step": 2191 }, { "epoch": 0.5054184920451925, "grad_norm": 1.1475240268019702, "learning_rate": 1.7838986581718731e-06, "loss": 0.5281997323036194, "step": 2192 }, { "epoch": 0.5056490661747752, "grad_norm": 1.3139768062702608, "learning_rate": 1.7836618803698315e-06, "loss": 0.543775200843811, "step": 2193 }, { "epoch": 0.5058796403043578, "grad_norm": 1.2497491249667418, "learning_rate": 1.7834249886545934e-06, "loss": 0.4148549437522888, "step": 2194 }, { "epoch": 0.5061102144339406, "grad_norm": 1.183178207015322, "learning_rate": 1.7831879830605936e-06, "loss": 0.5165001153945923, "step": 2195 }, { "epoch": 0.5063407885635232, "grad_norm": 1.0854657175123028, "learning_rate": 1.782950863622283e-06, "loss": 0.4183283746242523, "step": 2196 }, { "epoch": 0.5065713626931059, "grad_norm": 1.2476527930959387, "learning_rate": 1.7827136303741292e-06, "loss": 0.46558016538619995, "step": 2197 }, { "epoch": 0.5068019368226885, "grad_norm": 1.2829595269176914, "learning_rate": 1.782476283350617e-06, "loss": 0.5491806268692017, "step": 2198 }, { "epoch": 0.5070325109522712, "grad_norm": 1.3547672961051511, "learning_rate": 1.7822388225862466e-06, "loss": 0.42999008297920227, "step": 2199 }, { "epoch": 0.5072630850818538, "grad_norm": 1.2776437457035281, "learning_rate": 1.7820012481155358e-06, "loss": 0.42478299140930176, "step": 2200 }, { "epoch": 0.5074936592114365, "grad_norm": 4.51069636831696, "learning_rate": 1.781763559973018e-06, "loss": 0.4175076186656952, "step": 2201 }, { "epoch": 0.5077242333410191, "grad_norm": 1.1985836355289028, "learning_rate": 1.7815257581932439e-06, "loss": 0.42197084426879883, "step": 2202 }, { "epoch": 0.5079548074706018, "grad_norm": 1.2175005553032592, "learning_rate": 1.7812878428107803e-06, "loss": 0.39872926473617554, "step": 2203 }, { "epoch": 0.5081853816001844, "grad_norm": 1.2908474732070376, "learning_rate": 1.7810498138602106e-06, "loss": 0.4572516977787018, "step": 2204 }, { "epoch": 0.5084159557297672, "grad_norm": 1.1254873587347531, "learning_rate": 1.780811671376135e-06, "loss": 0.5261520147323608, "step": 2205 }, { "epoch": 0.5086465298593498, "grad_norm": 1.8336847349223555, "learning_rate": 1.7805734153931696e-06, "loss": 0.4714658260345459, "step": 2206 }, { "epoch": 0.5088771039889325, "grad_norm": 1.0757806041139168, "learning_rate": 1.7803350459459472e-06, "loss": 0.46184858679771423, "step": 2207 }, { "epoch": 0.5091076781185151, "grad_norm": 1.2531712345918984, "learning_rate": 1.7800965630691173e-06, "loss": 0.48189157247543335, "step": 2208 }, { "epoch": 0.5093382522480978, "grad_norm": 1.5363179586848308, "learning_rate": 1.7798579667973463e-06, "loss": 0.47865352034568787, "step": 2209 }, { "epoch": 0.5095688263776804, "grad_norm": 1.1589101806191746, "learning_rate": 1.7796192571653162e-06, "loss": 0.46073317527770996, "step": 2210 }, { "epoch": 0.5097994005072631, "grad_norm": 1.1781605500578527, "learning_rate": 1.7793804342077253e-06, "loss": 0.5099648237228394, "step": 2211 }, { "epoch": 0.5100299746368457, "grad_norm": 1.2319682423717142, "learning_rate": 1.7791414979592903e-06, "loss": 0.5436147451400757, "step": 2212 }, { "epoch": 0.5102605487664285, "grad_norm": 1.2305699349330186, "learning_rate": 1.7789024484547417e-06, "loss": 0.5455893278121948, "step": 2213 }, { "epoch": 0.5104911228960111, "grad_norm": 1.2918560641722026, "learning_rate": 1.7786632857288284e-06, "loss": 0.4886546730995178, "step": 2214 }, { "epoch": 0.5107216970255938, "grad_norm": 1.1611199451436964, "learning_rate": 1.778424009816315e-06, "loss": 0.4793723225593567, "step": 2215 }, { "epoch": 0.5109522711551764, "grad_norm": 1.3312189289078886, "learning_rate": 1.7781846207519826e-06, "loss": 0.5814248323440552, "step": 2216 }, { "epoch": 0.5111828452847591, "grad_norm": 1.1560984097631717, "learning_rate": 1.777945118570629e-06, "loss": 0.5057421326637268, "step": 2217 }, { "epoch": 0.5114134194143417, "grad_norm": 1.3009634347843195, "learning_rate": 1.7777055033070682e-06, "loss": 0.3913435935974121, "step": 2218 }, { "epoch": 0.5116439935439244, "grad_norm": 0.9761581598604525, "learning_rate": 1.7774657749961305e-06, "loss": 0.4450770616531372, "step": 2219 }, { "epoch": 0.511874567673507, "grad_norm": 1.731999332658399, "learning_rate": 1.7772259336726636e-06, "loss": 0.5164940357208252, "step": 2220 }, { "epoch": 0.5121051418030897, "grad_norm": 1.257043827333845, "learning_rate": 1.7769859793715298e-06, "loss": 0.44231802225112915, "step": 2221 }, { "epoch": 0.5123357159326724, "grad_norm": 1.2521439253976214, "learning_rate": 1.7767459121276093e-06, "loss": 0.516791820526123, "step": 2222 }, { "epoch": 0.5125662900622551, "grad_norm": 1.2456616904380073, "learning_rate": 1.7765057319757989e-06, "loss": 0.4180450737476349, "step": 2223 }, { "epoch": 0.5127968641918377, "grad_norm": 1.1350275613249636, "learning_rate": 1.77626543895101e-06, "loss": 0.49246734380722046, "step": 2224 }, { "epoch": 0.5130274383214203, "grad_norm": 1.1582721424765736, "learning_rate": 1.7760250330881728e-06, "loss": 0.5058225393295288, "step": 2225 }, { "epoch": 0.513258012451003, "grad_norm": 1.4118813849041838, "learning_rate": 1.7757845144222321e-06, "loss": 0.4752033054828644, "step": 2226 }, { "epoch": 0.5134885865805856, "grad_norm": 1.2950831387397626, "learning_rate": 1.77554388298815e-06, "loss": 0.45163947343826294, "step": 2227 }, { "epoch": 0.5137191607101683, "grad_norm": 1.387042973653302, "learning_rate": 1.7753031388209044e-06, "loss": 0.46295779943466187, "step": 2228 }, { "epoch": 0.5139497348397509, "grad_norm": 1.2958875463664286, "learning_rate": 1.7750622819554903e-06, "loss": 0.5682947635650635, "step": 2229 }, { "epoch": 0.5141803089693336, "grad_norm": 1.353052791820573, "learning_rate": 1.7748213124269187e-06, "loss": 0.4890878200531006, "step": 2230 }, { "epoch": 0.5144108830989162, "grad_norm": 1.4612536503294715, "learning_rate": 1.7745802302702164e-06, "loss": 0.5952332615852356, "step": 2231 }, { "epoch": 0.514641457228499, "grad_norm": 1.1928368431775584, "learning_rate": 1.7743390355204278e-06, "loss": 0.43224406242370605, "step": 2232 }, { "epoch": 0.5148720313580816, "grad_norm": 1.1851533508030387, "learning_rate": 1.7740977282126122e-06, "loss": 0.5010303258895874, "step": 2233 }, { "epoch": 0.5151026054876643, "grad_norm": 1.105983766082305, "learning_rate": 1.7738563083818469e-06, "loss": 0.5166633725166321, "step": 2234 }, { "epoch": 0.5153331796172469, "grad_norm": 1.0533784617555741, "learning_rate": 1.7736147760632245e-06, "loss": 0.4748263359069824, "step": 2235 }, { "epoch": 0.5155637537468296, "grad_norm": 0.9010011595528595, "learning_rate": 1.773373131291854e-06, "loss": 0.46462053060531616, "step": 2236 }, { "epoch": 0.5157943278764122, "grad_norm": 1.1288843437350349, "learning_rate": 1.7731313741028608e-06, "loss": 0.47799748182296753, "step": 2237 }, { "epoch": 0.5160249020059949, "grad_norm": 1.2958124494051022, "learning_rate": 1.772889504531387e-06, "loss": 0.43448662757873535, "step": 2238 }, { "epoch": 0.5162554761355775, "grad_norm": 1.2781442130344307, "learning_rate": 1.7726475226125905e-06, "loss": 0.4609360098838806, "step": 2239 }, { "epoch": 0.5164860502651603, "grad_norm": 1.123946418980165, "learning_rate": 1.7724054283816463e-06, "loss": 0.505261242389679, "step": 2240 }, { "epoch": 0.5167166243947429, "grad_norm": 1.1143888709548355, "learning_rate": 1.772163221873745e-06, "loss": 0.3812851905822754, "step": 2241 }, { "epoch": 0.5169471985243256, "grad_norm": 1.1698544335678498, "learning_rate": 1.7719209031240938e-06, "loss": 0.42545294761657715, "step": 2242 }, { "epoch": 0.5171777726539082, "grad_norm": 1.3964979839005025, "learning_rate": 1.771678472167916e-06, "loss": 0.45135340094566345, "step": 2243 }, { "epoch": 0.5174083467834909, "grad_norm": 1.1118819857040387, "learning_rate": 1.7714359290404514e-06, "loss": 0.4499250650405884, "step": 2244 }, { "epoch": 0.5176389209130735, "grad_norm": 1.2793420965554383, "learning_rate": 1.7711932737769564e-06, "loss": 0.4355557858943939, "step": 2245 }, { "epoch": 0.5178694950426562, "grad_norm": 1.3068878220482505, "learning_rate": 1.7709505064127036e-06, "loss": 0.4140744209289551, "step": 2246 }, { "epoch": 0.5181000691722388, "grad_norm": 1.2538619837975196, "learning_rate": 1.7707076269829809e-06, "loss": 0.5108504891395569, "step": 2247 }, { "epoch": 0.5183306433018215, "grad_norm": 1.0866593797381727, "learning_rate": 1.7704646355230936e-06, "loss": 0.5064615607261658, "step": 2248 }, { "epoch": 0.5185612174314042, "grad_norm": 1.4034267264652582, "learning_rate": 1.7702215320683636e-06, "loss": 0.5922794342041016, "step": 2249 }, { "epoch": 0.5187917915609869, "grad_norm": 1.236045367714828, "learning_rate": 1.7699783166541279e-06, "loss": 0.3890082836151123, "step": 2250 }, { "epoch": 0.5190223656905695, "grad_norm": 1.1663861833023768, "learning_rate": 1.7697349893157402e-06, "loss": 0.5585668087005615, "step": 2251 }, { "epoch": 0.5192529398201522, "grad_norm": 1.2125542528327162, "learning_rate": 1.7694915500885706e-06, "loss": 0.3904608488082886, "step": 2252 }, { "epoch": 0.5194835139497348, "grad_norm": 1.3213509465151734, "learning_rate": 1.7692479990080056e-06, "loss": 0.4764491617679596, "step": 2253 }, { "epoch": 0.5197140880793175, "grad_norm": 1.3113796870909902, "learning_rate": 1.769004336109448e-06, "loss": 0.49443554878234863, "step": 2254 }, { "epoch": 0.5199446622089001, "grad_norm": 1.2196571448758133, "learning_rate": 1.7687605614283165e-06, "loss": 0.4679003357887268, "step": 2255 }, { "epoch": 0.5201752363384828, "grad_norm": 1.6767016497784393, "learning_rate": 1.7685166750000465e-06, "loss": 0.6968683004379272, "step": 2256 }, { "epoch": 0.5204058104680654, "grad_norm": 1.406455012631932, "learning_rate": 1.7682726768600888e-06, "loss": 0.5688217878341675, "step": 2257 }, { "epoch": 0.5206363845976482, "grad_norm": 1.176050025614157, "learning_rate": 1.7680285670439115e-06, "loss": 0.4688011705875397, "step": 2258 }, { "epoch": 0.5208669587272308, "grad_norm": 1.1772680288415673, "learning_rate": 1.7677843455869984e-06, "loss": 0.6447713971138, "step": 2259 }, { "epoch": 0.5210975328568135, "grad_norm": 1.3187686937196665, "learning_rate": 1.767540012524849e-06, "loss": 0.578650951385498, "step": 2260 }, { "epoch": 0.5213281069863961, "grad_norm": 1.4425748519700892, "learning_rate": 1.76729556789298e-06, "loss": 0.5001357197761536, "step": 2261 }, { "epoch": 0.5215586811159788, "grad_norm": 1.2145912604177214, "learning_rate": 1.7670510117269242e-06, "loss": 0.5336331129074097, "step": 2262 }, { "epoch": 0.5217892552455614, "grad_norm": 1.2105621787494676, "learning_rate": 1.76680634406223e-06, "loss": 0.5628900527954102, "step": 2263 }, { "epoch": 0.5220198293751441, "grad_norm": 1.2476030455409495, "learning_rate": 1.766561564934462e-06, "loss": 0.46497443318367004, "step": 2264 }, { "epoch": 0.5222504035047267, "grad_norm": 1.4921989012106511, "learning_rate": 1.7663166743792019e-06, "loss": 0.617607831954956, "step": 2265 }, { "epoch": 0.5224809776343095, "grad_norm": 1.1582259137476871, "learning_rate": 1.7660716724320468e-06, "loss": 0.5236914157867432, "step": 2266 }, { "epoch": 0.5227115517638921, "grad_norm": 1.2919028654437321, "learning_rate": 1.76582655912861e-06, "loss": 0.5527941584587097, "step": 2267 }, { "epoch": 0.5229421258934748, "grad_norm": 1.208274388494889, "learning_rate": 1.7655813345045218e-06, "loss": 0.5394654273986816, "step": 2268 }, { "epoch": 0.5231727000230574, "grad_norm": 1.1822216818330542, "learning_rate": 1.7653359985954275e-06, "loss": 0.47050246596336365, "step": 2269 }, { "epoch": 0.5234032741526401, "grad_norm": 1.2893306401147882, "learning_rate": 1.7650905514369894e-06, "loss": 0.49413689970970154, "step": 2270 }, { "epoch": 0.5236338482822227, "grad_norm": 1.3086960549802995, "learning_rate": 1.7648449930648856e-06, "loss": 0.5568829774856567, "step": 2271 }, { "epoch": 0.5238644224118054, "grad_norm": 1.2475799557753502, "learning_rate": 1.7645993235148107e-06, "loss": 0.49238815903663635, "step": 2272 }, { "epoch": 0.524094996541388, "grad_norm": 1.16612817534413, "learning_rate": 1.7643535428224752e-06, "loss": 0.5580959320068359, "step": 2273 }, { "epoch": 0.5243255706709707, "grad_norm": 1.4921637909191205, "learning_rate": 1.7641076510236052e-06, "loss": 0.5853499174118042, "step": 2274 }, { "epoch": 0.5245561448005533, "grad_norm": 1.3988944269011947, "learning_rate": 1.7638616481539448e-06, "loss": 0.5638653635978699, "step": 2275 }, { "epoch": 0.5247867189301361, "grad_norm": 1.2859178438597552, "learning_rate": 1.7636155342492521e-06, "loss": 0.5197241306304932, "step": 2276 }, { "epoch": 0.5250172930597187, "grad_norm": 1.1094174928372944, "learning_rate": 1.7633693093453026e-06, "loss": 0.4137725234031677, "step": 2277 }, { "epoch": 0.5252478671893014, "grad_norm": 1.2940062745509122, "learning_rate": 1.7631229734778872e-06, "loss": 0.54244065284729, "step": 2278 }, { "epoch": 0.525478441318884, "grad_norm": 1.1871875469955007, "learning_rate": 1.7628765266828137e-06, "loss": 0.5215432047843933, "step": 2279 }, { "epoch": 0.5257090154484667, "grad_norm": 1.1984410258580116, "learning_rate": 1.7626299689959057e-06, "loss": 0.5559565424919128, "step": 2280 }, { "epoch": 0.5259395895780493, "grad_norm": 1.1663711332671047, "learning_rate": 1.7623833004530026e-06, "loss": 0.5251328945159912, "step": 2281 }, { "epoch": 0.526170163707632, "grad_norm": 1.241523894329925, "learning_rate": 1.7621365210899598e-06, "loss": 0.5351072549819946, "step": 2282 }, { "epoch": 0.5264007378372146, "grad_norm": 1.1901641374825476, "learning_rate": 1.7618896309426504e-06, "loss": 0.46850037574768066, "step": 2283 }, { "epoch": 0.5266313119667974, "grad_norm": 1.1697893294442419, "learning_rate": 1.761642630046961e-06, "loss": 0.5001033544540405, "step": 2284 }, { "epoch": 0.52686188609638, "grad_norm": 0.9279299862604019, "learning_rate": 1.7613955184387968e-06, "loss": 0.47946250438690186, "step": 2285 }, { "epoch": 0.5270924602259627, "grad_norm": 1.0539631796672029, "learning_rate": 1.761148296154077e-06, "loss": 0.4743049144744873, "step": 2286 }, { "epoch": 0.5273230343555453, "grad_norm": 1.154224335020326, "learning_rate": 1.7609009632287389e-06, "loss": 0.4518652558326721, "step": 2287 }, { "epoch": 0.527553608485128, "grad_norm": 1.0859896497705106, "learning_rate": 1.7606535196987338e-06, "loss": 0.5021224617958069, "step": 2288 }, { "epoch": 0.5277841826147106, "grad_norm": 1.4832483769951506, "learning_rate": 1.760405965600031e-06, "loss": 0.4848078489303589, "step": 2289 }, { "epoch": 0.5280147567442933, "grad_norm": 1.22421773905119, "learning_rate": 1.7601583009686142e-06, "loss": 0.49077051877975464, "step": 2290 }, { "epoch": 0.5282453308738759, "grad_norm": 1.2916718452438969, "learning_rate": 1.7599105258404848e-06, "loss": 0.4802943468093872, "step": 2291 }, { "epoch": 0.5284759050034586, "grad_norm": 1.4055248895326071, "learning_rate": 1.7596626402516589e-06, "loss": 0.5397455096244812, "step": 2292 }, { "epoch": 0.5287064791330413, "grad_norm": 1.0497017336135974, "learning_rate": 1.759414644238169e-06, "loss": 0.478559672832489, "step": 2293 }, { "epoch": 0.528937053262624, "grad_norm": 1.112359888255478, "learning_rate": 1.7591665378360644e-06, "loss": 0.5080797672271729, "step": 2294 }, { "epoch": 0.5291676273922066, "grad_norm": 1.0468621326779766, "learning_rate": 1.7589183210814093e-06, "loss": 0.4959479868412018, "step": 2295 }, { "epoch": 0.5293982015217893, "grad_norm": 1.1985868339045591, "learning_rate": 1.7586699940102853e-06, "loss": 0.512288510799408, "step": 2296 }, { "epoch": 0.5296287756513719, "grad_norm": 1.1129893572343195, "learning_rate": 1.7584215566587886e-06, "loss": 0.525113046169281, "step": 2297 }, { "epoch": 0.5298593497809546, "grad_norm": 1.2088844531850982, "learning_rate": 1.7581730090630322e-06, "loss": 0.3715069890022278, "step": 2298 }, { "epoch": 0.5300899239105372, "grad_norm": 1.3852845244524983, "learning_rate": 1.757924351259145e-06, "loss": 0.5833072662353516, "step": 2299 }, { "epoch": 0.5303204980401199, "grad_norm": 1.638098016270419, "learning_rate": 1.7576755832832721e-06, "loss": 0.5942450761795044, "step": 2300 }, { "epoch": 0.5305510721697025, "grad_norm": 1.1523961468173722, "learning_rate": 1.7574267051715745e-06, "loss": 0.4754432737827301, "step": 2301 }, { "epoch": 0.5307816462992853, "grad_norm": 1.3593694553922624, "learning_rate": 1.7571777169602287e-06, "loss": 0.5272700190544128, "step": 2302 }, { "epoch": 0.5310122204288679, "grad_norm": 1.137089307163323, "learning_rate": 1.7569286186854283e-06, "loss": 0.48376554250717163, "step": 2303 }, { "epoch": 0.5312427945584506, "grad_norm": 1.324023805933818, "learning_rate": 1.7566794103833816e-06, "loss": 0.4324077367782593, "step": 2304 }, { "epoch": 0.5314733686880332, "grad_norm": 1.2843168925212602, "learning_rate": 1.7564300920903142e-06, "loss": 0.44939202070236206, "step": 2305 }, { "epoch": 0.5317039428176159, "grad_norm": 1.2413807013846574, "learning_rate": 1.7561806638424662e-06, "loss": 0.5256277322769165, "step": 2306 }, { "epoch": 0.5319345169471985, "grad_norm": 1.0855894350628046, "learning_rate": 1.7559311256760955e-06, "loss": 0.43901991844177246, "step": 2307 }, { "epoch": 0.5321650910767812, "grad_norm": 1.3134089338347328, "learning_rate": 1.7556814776274746e-06, "loss": 0.5256138443946838, "step": 2308 }, { "epoch": 0.5323956652063638, "grad_norm": 1.3769537654510517, "learning_rate": 1.7554317197328922e-06, "loss": 0.4664478600025177, "step": 2309 }, { "epoch": 0.5326262393359465, "grad_norm": 1.1227476903728313, "learning_rate": 1.7551818520286532e-06, "loss": 0.5042726397514343, "step": 2310 }, { "epoch": 0.5328568134655292, "grad_norm": 1.3417267355052607, "learning_rate": 1.754931874551079e-06, "loss": 0.5682350397109985, "step": 2311 }, { "epoch": 0.5330873875951119, "grad_norm": 1.2416043105842551, "learning_rate": 1.754681787336505e-06, "loss": 0.5082807540893555, "step": 2312 }, { "epoch": 0.5333179617246945, "grad_norm": 1.4255568276367208, "learning_rate": 1.754431590421285e-06, "loss": 0.6020215749740601, "step": 2313 }, { "epoch": 0.5335485358542772, "grad_norm": 1.4104154799235167, "learning_rate": 1.7541812838417877e-06, "loss": 0.5004276633262634, "step": 2314 }, { "epoch": 0.5337791099838598, "grad_norm": 1.060415170291065, "learning_rate": 1.753930867634397e-06, "loss": 0.4889993667602539, "step": 2315 }, { "epoch": 0.5340096841134425, "grad_norm": 1.0849217066026469, "learning_rate": 1.7536803418355141e-06, "loss": 0.4179444909095764, "step": 2316 }, { "epoch": 0.5342402582430251, "grad_norm": 1.2618059778728548, "learning_rate": 1.7534297064815554e-06, "loss": 0.46807605028152466, "step": 2317 }, { "epoch": 0.5344708323726078, "grad_norm": 1.2827117317411258, "learning_rate": 1.7531789616089528e-06, "loss": 0.39173221588134766, "step": 2318 }, { "epoch": 0.5347014065021904, "grad_norm": 1.2820357654319097, "learning_rate": 1.7529281072541548e-06, "loss": 0.4290514886379242, "step": 2319 }, { "epoch": 0.5349319806317732, "grad_norm": 1.3778694052072273, "learning_rate": 1.752677143453626e-06, "loss": 0.6052347421646118, "step": 2320 }, { "epoch": 0.5351625547613558, "grad_norm": 1.054542888313722, "learning_rate": 1.752426070243846e-06, "loss": 0.47622209787368774, "step": 2321 }, { "epoch": 0.5353931288909385, "grad_norm": 1.128157779747108, "learning_rate": 1.7521748876613112e-06, "loss": 0.4216923415660858, "step": 2322 }, { "epoch": 0.5356237030205211, "grad_norm": 2.0737049391078384, "learning_rate": 1.751923595742533e-06, "loss": 0.5527430772781372, "step": 2323 }, { "epoch": 0.5358542771501038, "grad_norm": 1.1406433043117166, "learning_rate": 1.75167219452404e-06, "loss": 0.5562101602554321, "step": 2324 }, { "epoch": 0.5360848512796864, "grad_norm": 1.2183539446117024, "learning_rate": 1.7514206840423757e-06, "loss": 0.546181321144104, "step": 2325 }, { "epoch": 0.5363154254092691, "grad_norm": 1.5216852196360238, "learning_rate": 1.7511690643340995e-06, "loss": 0.5883532762527466, "step": 2326 }, { "epoch": 0.5365459995388517, "grad_norm": 1.2667138111118152, "learning_rate": 1.750917335435787e-06, "loss": 0.5231350660324097, "step": 2327 }, { "epoch": 0.5367765736684345, "grad_norm": 1.200525241411545, "learning_rate": 1.7506654973840292e-06, "loss": 0.4846429228782654, "step": 2328 }, { "epoch": 0.5370071477980171, "grad_norm": 1.0815584734915895, "learning_rate": 1.7504135502154335e-06, "loss": 0.43692171573638916, "step": 2329 }, { "epoch": 0.5372377219275998, "grad_norm": 1.0658062374834336, "learning_rate": 1.7501614939666234e-06, "loss": 0.5076167583465576, "step": 2330 }, { "epoch": 0.5374682960571824, "grad_norm": 1.2658937157989252, "learning_rate": 1.7499093286742373e-06, "loss": 0.5302891135215759, "step": 2331 }, { "epoch": 0.5376988701867651, "grad_norm": 1.3200406937261826, "learning_rate": 1.7496570543749303e-06, "loss": 0.5827817916870117, "step": 2332 }, { "epoch": 0.5379294443163477, "grad_norm": 1.3684047155196064, "learning_rate": 1.7494046711053726e-06, "loss": 0.6765470504760742, "step": 2333 }, { "epoch": 0.5381600184459304, "grad_norm": 1.3001315312834418, "learning_rate": 1.7491521789022513e-06, "loss": 0.48666322231292725, "step": 2334 }, { "epoch": 0.538390592575513, "grad_norm": 1.0490910849362622, "learning_rate": 1.7488995778022685e-06, "loss": 0.5163695812225342, "step": 2335 }, { "epoch": 0.5386211667050956, "grad_norm": 1.1765286879203154, "learning_rate": 1.748646867842142e-06, "loss": 0.44487982988357544, "step": 2336 }, { "epoch": 0.5388517408346783, "grad_norm": 1.2992285046307706, "learning_rate": 1.7483940490586058e-06, "loss": 0.5512663722038269, "step": 2337 }, { "epoch": 0.539082314964261, "grad_norm": 1.1533551829707172, "learning_rate": 1.7481411214884098e-06, "loss": 0.461128294467926, "step": 2338 }, { "epoch": 0.5393128890938437, "grad_norm": 1.2239639921661383, "learning_rate": 1.7478880851683197e-06, "loss": 0.47291088104248047, "step": 2339 }, { "epoch": 0.5395434632234263, "grad_norm": 1.1568837363453548, "learning_rate": 1.747634940135117e-06, "loss": 0.5900166034698486, "step": 2340 }, { "epoch": 0.539774037353009, "grad_norm": 1.0385421801821113, "learning_rate": 1.7473816864255983e-06, "loss": 0.3878340721130371, "step": 2341 }, { "epoch": 0.5400046114825916, "grad_norm": 1.442772155197814, "learning_rate": 1.7471283240765775e-06, "loss": 0.5671564340591431, "step": 2342 }, { "epoch": 0.5402351856121743, "grad_norm": 1.1602673867587185, "learning_rate": 1.7468748531248824e-06, "loss": 0.5153918266296387, "step": 2343 }, { "epoch": 0.5404657597417569, "grad_norm": 1.2187996046056446, "learning_rate": 1.7466212736073585e-06, "loss": 0.49520084261894226, "step": 2344 }, { "epoch": 0.5406963338713396, "grad_norm": 1.0955374839449357, "learning_rate": 1.7463675855608654e-06, "loss": 0.4884970784187317, "step": 2345 }, { "epoch": 0.5409269080009222, "grad_norm": 1.401002336922335, "learning_rate": 1.7461137890222798e-06, "loss": 0.5233277678489685, "step": 2346 }, { "epoch": 0.541157482130505, "grad_norm": 1.272363275240415, "learning_rate": 1.7458598840284928e-06, "loss": 0.44011372327804565, "step": 2347 }, { "epoch": 0.5413880562600876, "grad_norm": 1.1593134205382656, "learning_rate": 1.745605870616413e-06, "loss": 0.4833263158798218, "step": 2348 }, { "epoch": 0.5416186303896703, "grad_norm": 1.186578949511732, "learning_rate": 1.7453517488229634e-06, "loss": 0.4852379262447357, "step": 2349 }, { "epoch": 0.5418492045192529, "grad_norm": 1.527590855990685, "learning_rate": 1.7450975186850831e-06, "loss": 0.4710320830345154, "step": 2350 }, { "epoch": 0.5420797786488356, "grad_norm": 1.4382691899722804, "learning_rate": 1.744843180239727e-06, "loss": 0.5144790410995483, "step": 2351 }, { "epoch": 0.5423103527784182, "grad_norm": 1.3784898997392558, "learning_rate": 1.7445887335238663e-06, "loss": 0.5815445184707642, "step": 2352 }, { "epoch": 0.5425409269080009, "grad_norm": 1.1629274836022288, "learning_rate": 1.7443341785744864e-06, "loss": 0.5101407170295715, "step": 2353 }, { "epoch": 0.5427715010375835, "grad_norm": 1.1760272227987194, "learning_rate": 1.7440795154285905e-06, "loss": 0.4584839940071106, "step": 2354 }, { "epoch": 0.5430020751671663, "grad_norm": 1.323122873632264, "learning_rate": 1.743824744123196e-06, "loss": 0.482247531414032, "step": 2355 }, { "epoch": 0.5432326492967489, "grad_norm": 1.1361176263052393, "learning_rate": 1.7435698646953364e-06, "loss": 0.5503325462341309, "step": 2356 }, { "epoch": 0.5434632234263316, "grad_norm": 1.2952580221197654, "learning_rate": 1.7433148771820612e-06, "loss": 0.4803489148616791, "step": 2357 }, { "epoch": 0.5436937975559142, "grad_norm": 1.303291620807208, "learning_rate": 1.7430597816204351e-06, "loss": 0.5388872027397156, "step": 2358 }, { "epoch": 0.5439243716854969, "grad_norm": 1.6209081192397237, "learning_rate": 1.742804578047539e-06, "loss": 0.512636125087738, "step": 2359 }, { "epoch": 0.5441549458150795, "grad_norm": 1.5943501598581358, "learning_rate": 1.7425492665004699e-06, "loss": 0.49154865741729736, "step": 2360 }, { "epoch": 0.5443855199446622, "grad_norm": 1.1498651594774036, "learning_rate": 1.7422938470163389e-06, "loss": 0.5185250639915466, "step": 2361 }, { "epoch": 0.5446160940742448, "grad_norm": 1.5663688017502957, "learning_rate": 1.7420383196322747e-06, "loss": 0.5474511384963989, "step": 2362 }, { "epoch": 0.5448466682038275, "grad_norm": 1.3465441719791955, "learning_rate": 1.7417826843854202e-06, "loss": 0.48212137818336487, "step": 2363 }, { "epoch": 0.5450772423334102, "grad_norm": 1.1320785808666363, "learning_rate": 1.7415269413129348e-06, "loss": 0.47983086109161377, "step": 2364 }, { "epoch": 0.5453078164629929, "grad_norm": 1.1314426678618292, "learning_rate": 1.7412710904519932e-06, "loss": 0.4935225546360016, "step": 2365 }, { "epoch": 0.5455383905925755, "grad_norm": 1.2528535153373956, "learning_rate": 1.7410151318397862e-06, "loss": 0.5167664289474487, "step": 2366 }, { "epoch": 0.5457689647221582, "grad_norm": 1.1782327982922274, "learning_rate": 1.74075906551352e-06, "loss": 0.5116056799888611, "step": 2367 }, { "epoch": 0.5459995388517408, "grad_norm": 1.1184728717072068, "learning_rate": 1.7405028915104158e-06, "loss": 0.4709595739841461, "step": 2368 }, { "epoch": 0.5462301129813235, "grad_norm": 1.560534410686712, "learning_rate": 1.7402466098677118e-06, "loss": 0.3989061117172241, "step": 2369 }, { "epoch": 0.5464606871109061, "grad_norm": 1.1397817693321244, "learning_rate": 1.739990220622661e-06, "loss": 0.45720764994621277, "step": 2370 }, { "epoch": 0.5466912612404888, "grad_norm": 1.6154705847610804, "learning_rate": 1.739733723812532e-06, "loss": 0.5865384936332703, "step": 2371 }, { "epoch": 0.5469218353700714, "grad_norm": 1.3129437136284077, "learning_rate": 1.7394771194746092e-06, "loss": 0.4451501965522766, "step": 2372 }, { "epoch": 0.5471524094996542, "grad_norm": 1.2213938230584949, "learning_rate": 1.7392204076461928e-06, "loss": 0.4628486633300781, "step": 2373 }, { "epoch": 0.5473829836292368, "grad_norm": 1.2854198948482758, "learning_rate": 1.7389635883645984e-06, "loss": 0.4797760248184204, "step": 2374 }, { "epoch": 0.5476135577588195, "grad_norm": 1.2890601616689177, "learning_rate": 1.7387066616671571e-06, "loss": 0.4716770648956299, "step": 2375 }, { "epoch": 0.5478441318884021, "grad_norm": 1.071991179643841, "learning_rate": 1.738449627591216e-06, "loss": 0.504901647567749, "step": 2376 }, { "epoch": 0.5480747060179848, "grad_norm": 1.259141194312177, "learning_rate": 1.7381924861741375e-06, "loss": 0.5248615145683289, "step": 2377 }, { "epoch": 0.5483052801475674, "grad_norm": 1.1551298194401718, "learning_rate": 1.7379352374532998e-06, "loss": 0.41704076528549194, "step": 2378 }, { "epoch": 0.5485358542771501, "grad_norm": 1.1093382819710802, "learning_rate": 1.7376778814660966e-06, "loss": 0.42278197407722473, "step": 2379 }, { "epoch": 0.5487664284067327, "grad_norm": 1.3240414194175114, "learning_rate": 1.7374204182499372e-06, "loss": 0.4104729890823364, "step": 2380 }, { "epoch": 0.5489970025363154, "grad_norm": 1.237574436817826, "learning_rate": 1.7371628478422467e-06, "loss": 0.5205684304237366, "step": 2381 }, { "epoch": 0.549227576665898, "grad_norm": 1.2914374831424469, "learning_rate": 1.7369051702804648e-06, "loss": 0.4743306040763855, "step": 2382 }, { "epoch": 0.5494581507954808, "grad_norm": 1.4263628155545096, "learning_rate": 1.7366473856020486e-06, "loss": 0.6324253678321838, "step": 2383 }, { "epoch": 0.5496887249250634, "grad_norm": 1.2093119037905458, "learning_rate": 1.736389493844469e-06, "loss": 0.46466588973999023, "step": 2384 }, { "epoch": 0.5499192990546461, "grad_norm": 1.257464863029373, "learning_rate": 1.7361314950452136e-06, "loss": 0.4117918014526367, "step": 2385 }, { "epoch": 0.5501498731842287, "grad_norm": 1.0582357147304537, "learning_rate": 1.7358733892417848e-06, "loss": 0.40341615676879883, "step": 2386 }, { "epoch": 0.5503804473138114, "grad_norm": 1.2083128590610215, "learning_rate": 1.735615176471701e-06, "loss": 0.642855167388916, "step": 2387 }, { "epoch": 0.550611021443394, "grad_norm": 1.3821025749968947, "learning_rate": 1.7353568567724959e-06, "loss": 0.5490958094596863, "step": 2388 }, { "epoch": 0.5508415955729767, "grad_norm": 1.0972882559163057, "learning_rate": 1.7350984301817192e-06, "loss": 0.5154834985733032, "step": 2389 }, { "epoch": 0.5510721697025593, "grad_norm": 1.5156914347306212, "learning_rate": 1.7348398967369358e-06, "loss": 0.49488651752471924, "step": 2390 }, { "epoch": 0.5513027438321421, "grad_norm": 1.097164324799634, "learning_rate": 1.7345812564757257e-06, "loss": 0.4211215674877167, "step": 2391 }, { "epoch": 0.5515333179617247, "grad_norm": 1.1060429845011046, "learning_rate": 1.7343225094356855e-06, "loss": 0.41840964555740356, "step": 2392 }, { "epoch": 0.5517638920913074, "grad_norm": 1.1213399734290006, "learning_rate": 1.7340636556544264e-06, "loss": 0.540780782699585, "step": 2393 }, { "epoch": 0.55199446622089, "grad_norm": 1.328334535307567, "learning_rate": 1.7338046951695754e-06, "loss": 0.4967775046825409, "step": 2394 }, { "epoch": 0.5522250403504727, "grad_norm": 1.337457775660936, "learning_rate": 1.733545628018775e-06, "loss": 0.5155577659606934, "step": 2395 }, { "epoch": 0.5524556144800553, "grad_norm": 1.3409169497631646, "learning_rate": 1.7332864542396832e-06, "loss": 0.5106005072593689, "step": 2396 }, { "epoch": 0.552686188609638, "grad_norm": 1.106469342539302, "learning_rate": 1.7330271738699737e-06, "loss": 0.3459712862968445, "step": 2397 }, { "epoch": 0.5529167627392206, "grad_norm": 1.238811250755909, "learning_rate": 1.7327677869473356e-06, "loss": 0.4877927303314209, "step": 2398 }, { "epoch": 0.5531473368688034, "grad_norm": 1.298959309949219, "learning_rate": 1.7325082935094732e-06, "loss": 0.5183857679367065, "step": 2399 }, { "epoch": 0.553377910998386, "grad_norm": 1.1165163437308863, "learning_rate": 1.7322486935941068e-06, "loss": 0.4326491057872772, "step": 2400 }, { "epoch": 0.5536084851279687, "grad_norm": 1.2472729786065346, "learning_rate": 1.7319889872389716e-06, "loss": 0.4688712954521179, "step": 2401 }, { "epoch": 0.5538390592575513, "grad_norm": 1.2787851295656323, "learning_rate": 1.7317291744818184e-06, "loss": 0.4997788071632385, "step": 2402 }, { "epoch": 0.554069633387134, "grad_norm": 1.3085189564145994, "learning_rate": 1.731469255360414e-06, "loss": 0.5271172523498535, "step": 2403 }, { "epoch": 0.5543002075167166, "grad_norm": 1.3689434717845856, "learning_rate": 1.73120922991254e-06, "loss": 0.5339269042015076, "step": 2404 }, { "epoch": 0.5545307816462993, "grad_norm": 1.2181123008680574, "learning_rate": 1.7309490981759938e-06, "loss": 0.47052568197250366, "step": 2405 }, { "epoch": 0.5547613557758819, "grad_norm": 1.2508289898124627, "learning_rate": 1.7306888601885885e-06, "loss": 0.4112280309200287, "step": 2406 }, { "epoch": 0.5549919299054646, "grad_norm": 1.1812487853939355, "learning_rate": 1.730428515988152e-06, "loss": 0.5473710298538208, "step": 2407 }, { "epoch": 0.5552225040350472, "grad_norm": 1.6509587018432181, "learning_rate": 1.7301680656125277e-06, "loss": 0.5079115629196167, "step": 2408 }, { "epoch": 0.55545307816463, "grad_norm": 1.193259996108104, "learning_rate": 1.7299075090995755e-06, "loss": 0.4805012345314026, "step": 2409 }, { "epoch": 0.5556836522942126, "grad_norm": 1.1958830357632493, "learning_rate": 1.729646846487169e-06, "loss": 0.4657474756240845, "step": 2410 }, { "epoch": 0.5559142264237953, "grad_norm": 1.2442110767414496, "learning_rate": 1.729386077813199e-06, "loss": 0.5887978076934814, "step": 2411 }, { "epoch": 0.5561448005533779, "grad_norm": 1.0093517139206267, "learning_rate": 1.7291252031155704e-06, "loss": 0.43841421604156494, "step": 2412 }, { "epoch": 0.5563753746829606, "grad_norm": 1.304380451031228, "learning_rate": 1.728864222432204e-06, "loss": 0.5026551485061646, "step": 2413 }, { "epoch": 0.5566059488125432, "grad_norm": 1.2344100865196312, "learning_rate": 1.728603135801036e-06, "loss": 0.4525277614593506, "step": 2414 }, { "epoch": 0.5568365229421259, "grad_norm": 1.3128956010351178, "learning_rate": 1.7283419432600182e-06, "loss": 0.4095644950866699, "step": 2415 }, { "epoch": 0.5570670970717085, "grad_norm": 1.2351186073808627, "learning_rate": 1.7280806448471173e-06, "loss": 0.5098834037780762, "step": 2416 }, { "epoch": 0.5572976712012913, "grad_norm": 0.9689174321932323, "learning_rate": 1.7278192406003159e-06, "loss": 0.42802777886390686, "step": 2417 }, { "epoch": 0.5575282453308739, "grad_norm": 1.283644069549869, "learning_rate": 1.7275577305576113e-06, "loss": 0.5036378502845764, "step": 2418 }, { "epoch": 0.5577588194604566, "grad_norm": 1.2960652355454445, "learning_rate": 1.7272961147570175e-06, "loss": 0.5324885249137878, "step": 2419 }, { "epoch": 0.5579893935900392, "grad_norm": 1.6334614504341187, "learning_rate": 1.727034393236562e-06, "loss": 0.5763842463493347, "step": 2420 }, { "epoch": 0.5582199677196219, "grad_norm": 1.343133312027108, "learning_rate": 1.7267725660342895e-06, "loss": 0.49291908740997314, "step": 2421 }, { "epoch": 0.5584505418492045, "grad_norm": 1.651006143174213, "learning_rate": 1.7265106331882588e-06, "loss": 0.5114868879318237, "step": 2422 }, { "epoch": 0.5586811159787872, "grad_norm": 1.1152807378164393, "learning_rate": 1.7262485947365449e-06, "loss": 0.42442530393600464, "step": 2423 }, { "epoch": 0.5589116901083698, "grad_norm": 1.1309517905090323, "learning_rate": 1.725986450717237e-06, "loss": 0.3680551052093506, "step": 2424 }, { "epoch": 0.5591422642379525, "grad_norm": 1.2183025106634426, "learning_rate": 1.725724201168441e-06, "loss": 0.5849576592445374, "step": 2425 }, { "epoch": 0.5593728383675352, "grad_norm": 1.3597945996239442, "learning_rate": 1.7254618461282773e-06, "loss": 0.48919233679771423, "step": 2426 }, { "epoch": 0.5596034124971179, "grad_norm": 1.1753552641156777, "learning_rate": 1.7251993856348821e-06, "loss": 0.4857720732688904, "step": 2427 }, { "epoch": 0.5598339866267005, "grad_norm": 1.3324934167522995, "learning_rate": 1.7249368197264062e-06, "loss": 0.5106808543205261, "step": 2428 }, { "epoch": 0.5600645607562832, "grad_norm": 1.305986731975411, "learning_rate": 1.724674148441017e-06, "loss": 0.500100314617157, "step": 2429 }, { "epoch": 0.5602951348858658, "grad_norm": 1.226560051936561, "learning_rate": 1.7244113718168957e-06, "loss": 0.5389110445976257, "step": 2430 }, { "epoch": 0.5605257090154485, "grad_norm": 1.2848731557614161, "learning_rate": 1.72414848989224e-06, "loss": 0.42860496044158936, "step": 2431 }, { "epoch": 0.5607562831450311, "grad_norm": 1.2392935426075953, "learning_rate": 1.723885502705262e-06, "loss": 0.4867728352546692, "step": 2432 }, { "epoch": 0.5609868572746138, "grad_norm": 1.215687300161219, "learning_rate": 1.7236224102941899e-06, "loss": 0.49194633960723877, "step": 2433 }, { "epoch": 0.5612174314041964, "grad_norm": 1.278802988367442, "learning_rate": 1.7233592126972667e-06, "loss": 0.5194358229637146, "step": 2434 }, { "epoch": 0.5614480055337792, "grad_norm": 1.518126298536734, "learning_rate": 1.723095909952751e-06, "loss": 0.4738645553588867, "step": 2435 }, { "epoch": 0.5616785796633618, "grad_norm": 1.1842233457279843, "learning_rate": 1.7228325020989165e-06, "loss": 0.48232927918434143, "step": 2436 }, { "epoch": 0.5619091537929445, "grad_norm": 1.0590325088103263, "learning_rate": 1.7225689891740522e-06, "loss": 0.5192145109176636, "step": 2437 }, { "epoch": 0.5621397279225271, "grad_norm": 1.2756639382228332, "learning_rate": 1.7223053712164621e-06, "loss": 0.4934930205345154, "step": 2438 }, { "epoch": 0.5623703020521098, "grad_norm": 1.294610704846241, "learning_rate": 1.722041648264466e-06, "loss": 0.5022200345993042, "step": 2439 }, { "epoch": 0.5626008761816924, "grad_norm": 1.15319893327068, "learning_rate": 1.7217778203563986e-06, "loss": 0.45300528407096863, "step": 2440 }, { "epoch": 0.5628314503112751, "grad_norm": 1.1335234735988557, "learning_rate": 1.7215138875306103e-06, "loss": 0.4965200126171112, "step": 2441 }, { "epoch": 0.5630620244408577, "grad_norm": 1.3081789750993726, "learning_rate": 1.721249849825466e-06, "loss": 0.4618280231952667, "step": 2442 }, { "epoch": 0.5632925985704405, "grad_norm": 1.255070715358214, "learning_rate": 1.7209857072793464e-06, "loss": 0.42270147800445557, "step": 2443 }, { "epoch": 0.5635231727000231, "grad_norm": 1.0830436199918496, "learning_rate": 1.720721459930647e-06, "loss": 0.5200725793838501, "step": 2444 }, { "epoch": 0.5637537468296058, "grad_norm": 1.1368018551382484, "learning_rate": 1.7204571078177792e-06, "loss": 0.47475337982177734, "step": 2445 }, { "epoch": 0.5639843209591884, "grad_norm": 1.5482537414338693, "learning_rate": 1.7201926509791693e-06, "loss": 0.5493113994598389, "step": 2446 }, { "epoch": 0.564214895088771, "grad_norm": 1.2861044506324582, "learning_rate": 1.719928089453259e-06, "loss": 0.4743562340736389, "step": 2447 }, { "epoch": 0.5644454692183537, "grad_norm": 1.2343956116266135, "learning_rate": 1.7196634232785038e-06, "loss": 0.5145455598831177, "step": 2448 }, { "epoch": 0.5646760433479363, "grad_norm": 1.5340568803714763, "learning_rate": 1.719398652493377e-06, "loss": 0.45072540640830994, "step": 2449 }, { "epoch": 0.564906617477519, "grad_norm": 1.2363775684809537, "learning_rate": 1.7191337771363651e-06, "loss": 0.5150895714759827, "step": 2450 }, { "epoch": 0.5651371916071016, "grad_norm": 1.4238500687035243, "learning_rate": 1.7188687972459705e-06, "loss": 0.5025302171707153, "step": 2451 }, { "epoch": 0.5653677657366843, "grad_norm": 1.2149895801108108, "learning_rate": 1.7186037128607107e-06, "loss": 0.618930459022522, "step": 2452 }, { "epoch": 0.565598339866267, "grad_norm": 1.1681250836374313, "learning_rate": 1.7183385240191183e-06, "loss": 0.5841591358184814, "step": 2453 }, { "epoch": 0.5658289139958497, "grad_norm": 1.2481599814364495, "learning_rate": 1.7180732307597413e-06, "loss": 0.4915233850479126, "step": 2454 }, { "epoch": 0.5660594881254323, "grad_norm": 1.127625184290067, "learning_rate": 1.7178078331211429e-06, "loss": 0.46732476353645325, "step": 2455 }, { "epoch": 0.566290062255015, "grad_norm": 1.1121526599443385, "learning_rate": 1.7175423311419013e-06, "loss": 0.4640737771987915, "step": 2456 }, { "epoch": 0.5665206363845976, "grad_norm": 1.2800685498732043, "learning_rate": 1.7172767248606095e-06, "loss": 0.39535683393478394, "step": 2457 }, { "epoch": 0.5667512105141803, "grad_norm": 1.196636942462094, "learning_rate": 1.7170110143158766e-06, "loss": 0.4782179594039917, "step": 2458 }, { "epoch": 0.5669817846437629, "grad_norm": 1.5731644028680265, "learning_rate": 1.7167451995463258e-06, "loss": 0.6186003684997559, "step": 2459 }, { "epoch": 0.5672123587733456, "grad_norm": 1.3163111292704002, "learning_rate": 1.7164792805905965e-06, "loss": 0.4915347099304199, "step": 2460 }, { "epoch": 0.5674429329029282, "grad_norm": 1.2683630708246802, "learning_rate": 1.7162132574873422e-06, "loss": 0.4789005517959595, "step": 2461 }, { "epoch": 0.567673507032511, "grad_norm": 1.6928847577315913, "learning_rate": 1.7159471302752326e-06, "loss": 0.6307233572006226, "step": 2462 }, { "epoch": 0.5679040811620936, "grad_norm": 1.240574680316347, "learning_rate": 1.7156808989929514e-06, "loss": 0.5278424024581909, "step": 2463 }, { "epoch": 0.5681346552916763, "grad_norm": 1.4388020329709479, "learning_rate": 1.7154145636791988e-06, "loss": 0.48552995920181274, "step": 2464 }, { "epoch": 0.5683652294212589, "grad_norm": 1.3679954470869684, "learning_rate": 1.7151481243726885e-06, "loss": 0.5125370621681213, "step": 2465 }, { "epoch": 0.5685958035508416, "grad_norm": 1.3448408660581435, "learning_rate": 1.7148815811121506e-06, "loss": 0.44231730699539185, "step": 2466 }, { "epoch": 0.5688263776804242, "grad_norm": 1.367567415522102, "learning_rate": 1.7146149339363296e-06, "loss": 0.5593529939651489, "step": 2467 }, { "epoch": 0.5690569518100069, "grad_norm": 1.347377301704866, "learning_rate": 1.714348182883986e-06, "loss": 0.4830925464630127, "step": 2468 }, { "epoch": 0.5692875259395895, "grad_norm": 1.4913136319748062, "learning_rate": 1.714081327993894e-06, "loss": 0.5538743734359741, "step": 2469 }, { "epoch": 0.5695181000691723, "grad_norm": 1.4135532975212044, "learning_rate": 1.7138143693048441e-06, "loss": 0.5145905613899231, "step": 2470 }, { "epoch": 0.5697486741987549, "grad_norm": 1.301183082915478, "learning_rate": 1.713547306855641e-06, "loss": 0.47706612944602966, "step": 2471 }, { "epoch": 0.5699792483283376, "grad_norm": 1.2528774428968483, "learning_rate": 1.7132801406851056e-06, "loss": 0.45162689685821533, "step": 2472 }, { "epoch": 0.5702098224579202, "grad_norm": 1.5721475156494655, "learning_rate": 1.7130128708320727e-06, "loss": 0.5141111612319946, "step": 2473 }, { "epoch": 0.5704403965875029, "grad_norm": 1.0845779630695374, "learning_rate": 1.7127454973353932e-06, "loss": 0.4443173408508301, "step": 2474 }, { "epoch": 0.5706709707170855, "grad_norm": 1.2704796440823871, "learning_rate": 1.7124780202339317e-06, "loss": 0.4162046015262604, "step": 2475 }, { "epoch": 0.5709015448466682, "grad_norm": 1.100254820278883, "learning_rate": 1.7122104395665695e-06, "loss": 0.44526439905166626, "step": 2476 }, { "epoch": 0.5711321189762508, "grad_norm": 1.3237501807128542, "learning_rate": 1.7119427553722016e-06, "loss": 0.5069452524185181, "step": 2477 }, { "epoch": 0.5713626931058335, "grad_norm": 1.2833720010816703, "learning_rate": 1.7116749676897393e-06, "loss": 0.46709829568862915, "step": 2478 }, { "epoch": 0.5715932672354161, "grad_norm": 1.2011083992406753, "learning_rate": 1.7114070765581078e-06, "loss": 0.5443992614746094, "step": 2479 }, { "epoch": 0.5718238413649989, "grad_norm": 1.5805836267397864, "learning_rate": 1.7111390820162477e-06, "loss": 0.4307284653186798, "step": 2480 }, { "epoch": 0.5720544154945815, "grad_norm": 1.272693158326629, "learning_rate": 1.7108709841031148e-06, "loss": 0.4753509759902954, "step": 2481 }, { "epoch": 0.5722849896241642, "grad_norm": 1.3966851487133662, "learning_rate": 1.7106027828576798e-06, "loss": 0.5689436197280884, "step": 2482 }, { "epoch": 0.5725155637537468, "grad_norm": 1.3535603859222731, "learning_rate": 1.710334478318929e-06, "loss": 0.47182410955429077, "step": 2483 }, { "epoch": 0.5727461378833295, "grad_norm": 1.4415402220476166, "learning_rate": 1.7100660705258623e-06, "loss": 0.4418888986110687, "step": 2484 }, { "epoch": 0.5729767120129121, "grad_norm": 1.0842485548099412, "learning_rate": 1.709797559517496e-06, "loss": 0.4315544366836548, "step": 2485 }, { "epoch": 0.5732072861424948, "grad_norm": 1.136143164844157, "learning_rate": 1.709528945332861e-06, "loss": 0.34541741013526917, "step": 2486 }, { "epoch": 0.5734378602720774, "grad_norm": 1.444798755487831, "learning_rate": 1.709260228011003e-06, "loss": 0.5380317568778992, "step": 2487 }, { "epoch": 0.5736684344016602, "grad_norm": 1.1490218932398577, "learning_rate": 1.7089914075909824e-06, "loss": 0.5017478466033936, "step": 2488 }, { "epoch": 0.5738990085312428, "grad_norm": 1.317791376396268, "learning_rate": 1.7087224841118756e-06, "loss": 0.5608090162277222, "step": 2489 }, { "epoch": 0.5741295826608255, "grad_norm": 1.3491498137629283, "learning_rate": 1.708453457612773e-06, "loss": 0.5360782146453857, "step": 2490 }, { "epoch": 0.5743601567904081, "grad_norm": 1.3100243824681166, "learning_rate": 1.7081843281327802e-06, "loss": 0.5638090372085571, "step": 2491 }, { "epoch": 0.5745907309199908, "grad_norm": 1.2532603581217905, "learning_rate": 1.707915095711018e-06, "loss": 0.45777082443237305, "step": 2492 }, { "epoch": 0.5748213050495734, "grad_norm": 1.2028357712850113, "learning_rate": 1.7076457603866224e-06, "loss": 0.5423707962036133, "step": 2493 }, { "epoch": 0.5750518791791561, "grad_norm": 1.3752974790416335, "learning_rate": 1.7073763221987436e-06, "loss": 0.4286508560180664, "step": 2494 }, { "epoch": 0.5752824533087387, "grad_norm": 1.1304014566480758, "learning_rate": 1.7071067811865474e-06, "loss": 0.4197548031806946, "step": 2495 }, { "epoch": 0.5755130274383214, "grad_norm": 1.1820720623961845, "learning_rate": 1.7068371373892142e-06, "loss": 0.47944843769073486, "step": 2496 }, { "epoch": 0.575743601567904, "grad_norm": 1.5454364363464301, "learning_rate": 1.7065673908459396e-06, "loss": 0.49708908796310425, "step": 2497 }, { "epoch": 0.5759741756974868, "grad_norm": 1.2002677488287707, "learning_rate": 1.706297541595934e-06, "loss": 0.46402662992477417, "step": 2498 }, { "epoch": 0.5762047498270694, "grad_norm": 1.2375577528106843, "learning_rate": 1.7060275896784222e-06, "loss": 0.4665846824645996, "step": 2499 }, { "epoch": 0.5764353239566521, "grad_norm": 1.333335025499966, "learning_rate": 1.7057575351326452e-06, "loss": 0.511766791343689, "step": 2500 }, { "epoch": 0.5766658980862347, "grad_norm": 1.3129729051878996, "learning_rate": 1.7054873779978578e-06, "loss": 0.5731323957443237, "step": 2501 }, { "epoch": 0.5768964722158174, "grad_norm": 1.208575824869893, "learning_rate": 1.70521711831333e-06, "loss": 0.43246185779571533, "step": 2502 }, { "epoch": 0.5771270463454, "grad_norm": 1.3743994267646191, "learning_rate": 1.704946756118347e-06, "loss": 0.5062395334243774, "step": 2503 }, { "epoch": 0.5773576204749827, "grad_norm": 1.2169597850499592, "learning_rate": 1.7046762914522087e-06, "loss": 0.5010061264038086, "step": 2504 }, { "epoch": 0.5775881946045653, "grad_norm": 1.1915100175955862, "learning_rate": 1.7044057243542293e-06, "loss": 0.5118759870529175, "step": 2505 }, { "epoch": 0.5778187687341481, "grad_norm": 1.2406153903833703, "learning_rate": 1.7041350548637392e-06, "loss": 0.5796714425086975, "step": 2506 }, { "epoch": 0.5780493428637307, "grad_norm": 1.198072830487735, "learning_rate": 1.7038642830200828e-06, "loss": 0.43587976694107056, "step": 2507 }, { "epoch": 0.5782799169933134, "grad_norm": 1.0836383921827997, "learning_rate": 1.7035934088626193e-06, "loss": 0.4780135154724121, "step": 2508 }, { "epoch": 0.578510491122896, "grad_norm": 1.2949967246283594, "learning_rate": 1.7033224324307232e-06, "loss": 0.48039600253105164, "step": 2509 }, { "epoch": 0.5787410652524787, "grad_norm": 1.4288262034065056, "learning_rate": 1.7030513537637835e-06, "loss": 0.48075419664382935, "step": 2510 }, { "epoch": 0.5789716393820613, "grad_norm": 1.294455603546607, "learning_rate": 1.7027801729012044e-06, "loss": 0.5006246566772461, "step": 2511 }, { "epoch": 0.579202213511644, "grad_norm": 1.3239915881424993, "learning_rate": 1.7025088898824046e-06, "loss": 0.550139307975769, "step": 2512 }, { "epoch": 0.5794327876412266, "grad_norm": 1.273345251271078, "learning_rate": 1.7022375047468178e-06, "loss": 0.5228495001792908, "step": 2513 }, { "epoch": 0.5796633617708093, "grad_norm": 1.223108155250479, "learning_rate": 1.701966017533893e-06, "loss": 0.4783739149570465, "step": 2514 }, { "epoch": 0.579893935900392, "grad_norm": 1.3364695116135945, "learning_rate": 1.701694428283093e-06, "loss": 0.47218769788742065, "step": 2515 }, { "epoch": 0.5801245100299747, "grad_norm": 1.271458214482931, "learning_rate": 1.7014227370338967e-06, "loss": 0.5340671539306641, "step": 2516 }, { "epoch": 0.5803550841595573, "grad_norm": 1.1389068048001012, "learning_rate": 1.7011509438257967e-06, "loss": 0.4629259407520294, "step": 2517 }, { "epoch": 0.58058565828914, "grad_norm": 1.6036419177897663, "learning_rate": 1.7008790486983013e-06, "loss": 0.6334242820739746, "step": 2518 }, { "epoch": 0.5808162324187226, "grad_norm": 1.3328081079482175, "learning_rate": 1.7006070516909327e-06, "loss": 0.544147789478302, "step": 2519 }, { "epoch": 0.5810468065483053, "grad_norm": 1.2269860514972317, "learning_rate": 1.700334952843229e-06, "loss": 0.47045618295669556, "step": 2520 }, { "epoch": 0.5812773806778879, "grad_norm": 1.4613594501045561, "learning_rate": 1.700062752194742e-06, "loss": 0.4582393169403076, "step": 2521 }, { "epoch": 0.5815079548074706, "grad_norm": 1.335231293513905, "learning_rate": 1.699790449785039e-06, "loss": 0.507327139377594, "step": 2522 }, { "epoch": 0.5817385289370532, "grad_norm": 1.3812182502399277, "learning_rate": 1.6995180456537022e-06, "loss": 0.5345891714096069, "step": 2523 }, { "epoch": 0.581969103066636, "grad_norm": 1.3766088909590293, "learning_rate": 1.6992455398403277e-06, "loss": 0.4847550094127655, "step": 2524 }, { "epoch": 0.5821996771962186, "grad_norm": 1.2694420906725428, "learning_rate": 1.6989729323845276e-06, "loss": 0.4472479820251465, "step": 2525 }, { "epoch": 0.5824302513258013, "grad_norm": 1.1676894033843348, "learning_rate": 1.698700223325928e-06, "loss": 0.4426107108592987, "step": 2526 }, { "epoch": 0.5826608254553839, "grad_norm": 1.3669509353012406, "learning_rate": 1.6984274127041696e-06, "loss": 0.4814276099205017, "step": 2527 }, { "epoch": 0.5828913995849666, "grad_norm": 1.3849093780882, "learning_rate": 1.6981545005589084e-06, "loss": 0.5286451578140259, "step": 2528 }, { "epoch": 0.5831219737145492, "grad_norm": 1.3586645163698117, "learning_rate": 1.6978814869298152e-06, "loss": 0.5291767120361328, "step": 2529 }, { "epoch": 0.5833525478441319, "grad_norm": 1.4376369092272532, "learning_rate": 1.6976083718565748e-06, "loss": 0.5807399749755859, "step": 2530 }, { "epoch": 0.5835831219737145, "grad_norm": 1.5620885730430554, "learning_rate": 1.6973351553788878e-06, "loss": 0.5489222407341003, "step": 2531 }, { "epoch": 0.5838136961032973, "grad_norm": 1.5080367455114985, "learning_rate": 1.6970618375364683e-06, "loss": 0.5295521020889282, "step": 2532 }, { "epoch": 0.5840442702328799, "grad_norm": 1.281498688581256, "learning_rate": 1.6967884183690467e-06, "loss": 0.4979495406150818, "step": 2533 }, { "epoch": 0.5842748443624626, "grad_norm": 1.0681769287073983, "learning_rate": 1.6965148979163661e-06, "loss": 0.45667344331741333, "step": 2534 }, { "epoch": 0.5845054184920452, "grad_norm": 1.1552847245372566, "learning_rate": 1.6962412762181866e-06, "loss": 0.42687737941741943, "step": 2535 }, { "epoch": 0.5847359926216279, "grad_norm": 1.2720388462434997, "learning_rate": 1.6959675533142815e-06, "loss": 0.5616278648376465, "step": 2536 }, { "epoch": 0.5849665667512105, "grad_norm": 1.245024966542371, "learning_rate": 1.6956937292444386e-06, "loss": 0.4961121678352356, "step": 2537 }, { "epoch": 0.5851971408807932, "grad_norm": 1.1864554840937962, "learning_rate": 1.6954198040484617e-06, "loss": 0.5115770101547241, "step": 2538 }, { "epoch": 0.5854277150103758, "grad_norm": 1.41778667190123, "learning_rate": 1.6951457777661686e-06, "loss": 0.540202260017395, "step": 2539 }, { "epoch": 0.5856582891399585, "grad_norm": 1.3238570605319384, "learning_rate": 1.6948716504373914e-06, "loss": 0.5312114357948303, "step": 2540 }, { "epoch": 0.5858888632695411, "grad_norm": 1.1842147435507233, "learning_rate": 1.694597422101978e-06, "loss": 0.49323517084121704, "step": 2541 }, { "epoch": 0.5861194373991239, "grad_norm": 1.3138451660312804, "learning_rate": 1.6943230927997894e-06, "loss": 0.42929738759994507, "step": 2542 }, { "epoch": 0.5863500115287065, "grad_norm": 1.2474057622168624, "learning_rate": 1.6940486625707021e-06, "loss": 0.45236462354660034, "step": 2543 }, { "epoch": 0.5865805856582892, "grad_norm": 1.1944700996273265, "learning_rate": 1.6937741314546084e-06, "loss": 0.5129071474075317, "step": 2544 }, { "epoch": 0.5868111597878718, "grad_norm": 1.303867373152147, "learning_rate": 1.693499499491413e-06, "loss": 0.5562577247619629, "step": 2545 }, { "epoch": 0.5870417339174545, "grad_norm": 1.472236761409707, "learning_rate": 1.6932247667210372e-06, "loss": 0.5593177080154419, "step": 2546 }, { "epoch": 0.5872723080470371, "grad_norm": 1.666463518969871, "learning_rate": 1.692949933183416e-06, "loss": 0.5536680221557617, "step": 2547 }, { "epoch": 0.5875028821766198, "grad_norm": 1.552275933236934, "learning_rate": 1.6926749989184993e-06, "loss": 0.5523338317871094, "step": 2548 }, { "epoch": 0.5877334563062024, "grad_norm": 1.3066438958077835, "learning_rate": 1.692399963966251e-06, "loss": 0.41815924644470215, "step": 2549 }, { "epoch": 0.5879640304357852, "grad_norm": 1.1800035534558937, "learning_rate": 1.6921248283666508e-06, "loss": 0.46959248185157776, "step": 2550 }, { "epoch": 0.5881946045653678, "grad_norm": 1.2343992191174948, "learning_rate": 1.6918495921596928e-06, "loss": 0.4748489260673523, "step": 2551 }, { "epoch": 0.5884251786949505, "grad_norm": 1.853505775613954, "learning_rate": 1.6915742553853845e-06, "loss": 0.4541524052619934, "step": 2552 }, { "epoch": 0.5886557528245331, "grad_norm": 1.2688298570187295, "learning_rate": 1.691298818083749e-06, "loss": 0.47106000781059265, "step": 2553 }, { "epoch": 0.5888863269541158, "grad_norm": 1.6112122400264717, "learning_rate": 1.6910232802948246e-06, "loss": 0.5364842414855957, "step": 2554 }, { "epoch": 0.5891169010836984, "grad_norm": 1.402469759006704, "learning_rate": 1.690747642058663e-06, "loss": 0.48388350009918213, "step": 2555 }, { "epoch": 0.5893474752132811, "grad_norm": 1.1992143425994695, "learning_rate": 1.690471903415331e-06, "loss": 0.5075609683990479, "step": 2556 }, { "epoch": 0.5895780493428637, "grad_norm": 1.2039147901396619, "learning_rate": 1.6901960644049102e-06, "loss": 0.45098066329956055, "step": 2557 }, { "epoch": 0.5898086234724463, "grad_norm": 1.1869247135212617, "learning_rate": 1.6899201250674966e-06, "loss": 0.5329077243804932, "step": 2558 }, { "epoch": 0.590039197602029, "grad_norm": 1.2771607201573625, "learning_rate": 1.6896440854432005e-06, "loss": 0.4632904529571533, "step": 2559 }, { "epoch": 0.5902697717316117, "grad_norm": 1.3016593794447966, "learning_rate": 1.6893679455721474e-06, "loss": 0.5302451848983765, "step": 2560 }, { "epoch": 0.5905003458611944, "grad_norm": 1.1349040723062418, "learning_rate": 1.6890917054944768e-06, "loss": 0.45363447070121765, "step": 2561 }, { "epoch": 0.590730919990777, "grad_norm": 1.3869965053274627, "learning_rate": 1.688815365250343e-06, "loss": 0.5103914737701416, "step": 2562 }, { "epoch": 0.5909614941203597, "grad_norm": 1.2859854063949494, "learning_rate": 1.6885389248799152e-06, "loss": 0.45474469661712646, "step": 2563 }, { "epoch": 0.5911920682499423, "grad_norm": 1.3905925832105772, "learning_rate": 1.6882623844233766e-06, "loss": 0.517952024936676, "step": 2564 }, { "epoch": 0.591422642379525, "grad_norm": 1.456181517852448, "learning_rate": 1.6879857439209245e-06, "loss": 0.4872232973575592, "step": 2565 }, { "epoch": 0.5916532165091076, "grad_norm": 1.146992588808451, "learning_rate": 1.6877090034127726e-06, "loss": 0.4938408136367798, "step": 2566 }, { "epoch": 0.5918837906386903, "grad_norm": 0.9819996395503116, "learning_rate": 1.6874321629391469e-06, "loss": 0.42687565088272095, "step": 2567 }, { "epoch": 0.592114364768273, "grad_norm": 1.8882181325825955, "learning_rate": 1.6871552225402896e-06, "loss": 0.5272493362426758, "step": 2568 }, { "epoch": 0.5923449388978557, "grad_norm": 1.265485903227574, "learning_rate": 1.6868781822564565e-06, "loss": 0.4643193185329437, "step": 2569 }, { "epoch": 0.5925755130274383, "grad_norm": 1.5054555077342378, "learning_rate": 1.6866010421279183e-06, "loss": 0.4957782030105591, "step": 2570 }, { "epoch": 0.592806087157021, "grad_norm": 1.2319191303045371, "learning_rate": 1.6863238021949605e-06, "loss": 0.442360520362854, "step": 2571 }, { "epoch": 0.5930366612866036, "grad_norm": 1.365610357460579, "learning_rate": 1.6860464624978824e-06, "loss": 0.5108935832977295, "step": 2572 }, { "epoch": 0.5932672354161863, "grad_norm": 1.1047616502548026, "learning_rate": 1.6857690230769976e-06, "loss": 0.46559715270996094, "step": 2573 }, { "epoch": 0.5934978095457689, "grad_norm": 1.2296310276846145, "learning_rate": 1.6854914839726356e-06, "loss": 0.44752076268196106, "step": 2574 }, { "epoch": 0.5937283836753516, "grad_norm": 1.6735698653712807, "learning_rate": 1.6852138452251387e-06, "loss": 0.4018149971961975, "step": 2575 }, { "epoch": 0.5939589578049342, "grad_norm": 1.407358523561205, "learning_rate": 1.6849361068748652e-06, "loss": 0.47711417078971863, "step": 2576 }, { "epoch": 0.594189531934517, "grad_norm": 1.3386417354625197, "learning_rate": 1.684658268962187e-06, "loss": 0.4671875834465027, "step": 2577 }, { "epoch": 0.5944201060640996, "grad_norm": 1.2780841808458634, "learning_rate": 1.6843803315274906e-06, "loss": 0.48041921854019165, "step": 2578 }, { "epoch": 0.5946506801936823, "grad_norm": 1.105183308056311, "learning_rate": 1.6841022946111772e-06, "loss": 0.3444385528564453, "step": 2579 }, { "epoch": 0.5948812543232649, "grad_norm": 1.3054472047651338, "learning_rate": 1.6838241582536619e-06, "loss": 0.46800029277801514, "step": 2580 }, { "epoch": 0.5951118284528476, "grad_norm": 1.7022638621771704, "learning_rate": 1.683545922495375e-06, "loss": 0.4362339377403259, "step": 2581 }, { "epoch": 0.5953424025824302, "grad_norm": 1.5138702229312708, "learning_rate": 1.6832675873767606e-06, "loss": 0.4818536043167114, "step": 2582 }, { "epoch": 0.5955729767120129, "grad_norm": 1.1464685816902647, "learning_rate": 1.6829891529382775e-06, "loss": 0.47899681329727173, "step": 2583 }, { "epoch": 0.5958035508415955, "grad_norm": 1.028545290493661, "learning_rate": 1.6827106192203995e-06, "loss": 0.4239576458930969, "step": 2584 }, { "epoch": 0.5960341249711782, "grad_norm": 1.299757224081726, "learning_rate": 1.6824319862636136e-06, "loss": 0.545168399810791, "step": 2585 }, { "epoch": 0.5962646991007609, "grad_norm": 1.1433294908143323, "learning_rate": 1.6821532541084228e-06, "loss": 0.4238642156124115, "step": 2586 }, { "epoch": 0.5964952732303436, "grad_norm": 1.1214453575304018, "learning_rate": 1.6818744227953422e-06, "loss": 0.39589810371398926, "step": 2587 }, { "epoch": 0.5967258473599262, "grad_norm": 1.1696584305728281, "learning_rate": 1.6815954923649044e-06, "loss": 0.4358367919921875, "step": 2588 }, { "epoch": 0.5969564214895089, "grad_norm": 1.232714944175718, "learning_rate": 1.6813164628576538e-06, "loss": 0.5012080073356628, "step": 2589 }, { "epoch": 0.5971869956190915, "grad_norm": 1.0762630624781258, "learning_rate": 1.6810373343141503e-06, "loss": 0.4637286365032196, "step": 2590 }, { "epoch": 0.5974175697486742, "grad_norm": 1.4947457348694884, "learning_rate": 1.6807581067749684e-06, "loss": 0.6130828261375427, "step": 2591 }, { "epoch": 0.5976481438782568, "grad_norm": 1.538167494741888, "learning_rate": 1.680478780280696e-06, "loss": 0.5430021286010742, "step": 2592 }, { "epoch": 0.5978787180078395, "grad_norm": 1.4318445545867842, "learning_rate": 1.6801993548719368e-06, "loss": 0.5195741653442383, "step": 2593 }, { "epoch": 0.5981092921374221, "grad_norm": 1.4741188457279395, "learning_rate": 1.6799198305893077e-06, "loss": 0.5452337265014648, "step": 2594 }, { "epoch": 0.5983398662670049, "grad_norm": 1.1858829095847359, "learning_rate": 1.6796402074734402e-06, "loss": 0.4802110493183136, "step": 2595 }, { "epoch": 0.5985704403965875, "grad_norm": 1.114234548006963, "learning_rate": 1.679360485564981e-06, "loss": 0.48554790019989014, "step": 2596 }, { "epoch": 0.5988010145261702, "grad_norm": 1.3519600489481014, "learning_rate": 1.6790806649045896e-06, "loss": 0.5151324272155762, "step": 2597 }, { "epoch": 0.5990315886557528, "grad_norm": 1.4134149785589025, "learning_rate": 1.6788007455329419e-06, "loss": 0.5122699737548828, "step": 2598 }, { "epoch": 0.5992621627853355, "grad_norm": 1.0762809832802989, "learning_rate": 1.6785207274907258e-06, "loss": 0.47776496410369873, "step": 2599 }, { "epoch": 0.5994927369149181, "grad_norm": 1.3625217888513212, "learning_rate": 1.6782406108186455e-06, "loss": 0.5653492212295532, "step": 2600 }, { "epoch": 0.5997233110445008, "grad_norm": 1.2197147141619178, "learning_rate": 1.677960395557419e-06, "loss": 0.44313424825668335, "step": 2601 }, { "epoch": 0.5999538851740834, "grad_norm": 1.137470066753919, "learning_rate": 1.677680081747778e-06, "loss": 0.40465259552001953, "step": 2602 }, { "epoch": 0.6001844593036662, "grad_norm": 1.4481779333184874, "learning_rate": 1.6773996694304687e-06, "loss": 0.5488068461418152, "step": 2603 }, { "epoch": 0.6004150334332488, "grad_norm": 1.2545703783665254, "learning_rate": 1.6771191586462523e-06, "loss": 0.5122859477996826, "step": 2604 }, { "epoch": 0.6006456075628315, "grad_norm": 1.2685821503383574, "learning_rate": 1.6768385494359039e-06, "loss": 0.47173869609832764, "step": 2605 }, { "epoch": 0.6008761816924141, "grad_norm": 1.342808103655164, "learning_rate": 1.6765578418402129e-06, "loss": 0.527764081954956, "step": 2606 }, { "epoch": 0.6011067558219968, "grad_norm": 1.7106657610470863, "learning_rate": 1.6762770358999826e-06, "loss": 0.5399610996246338, "step": 2607 }, { "epoch": 0.6013373299515794, "grad_norm": 1.1677908773060481, "learning_rate": 1.6759961316560314e-06, "loss": 0.3441581428050995, "step": 2608 }, { "epoch": 0.6015679040811621, "grad_norm": 1.2546350672529525, "learning_rate": 1.6757151291491916e-06, "loss": 0.5027580857276917, "step": 2609 }, { "epoch": 0.6017984782107447, "grad_norm": 1.6099655975362483, "learning_rate": 1.6754340284203095e-06, "loss": 0.3898310363292694, "step": 2610 }, { "epoch": 0.6020290523403274, "grad_norm": 1.5075448921993653, "learning_rate": 1.675152829510246e-06, "loss": 0.5577199459075928, "step": 2611 }, { "epoch": 0.60225962646991, "grad_norm": 1.178797634573082, "learning_rate": 1.6748715324598763e-06, "loss": 0.47849035263061523, "step": 2612 }, { "epoch": 0.6024902005994928, "grad_norm": 1.2674537093214957, "learning_rate": 1.6745901373100896e-06, "loss": 0.46845290064811707, "step": 2613 }, { "epoch": 0.6027207747290754, "grad_norm": 1.4078882858329094, "learning_rate": 1.6743086441017899e-06, "loss": 0.46008870005607605, "step": 2614 }, { "epoch": 0.6029513488586581, "grad_norm": 1.3347721564783812, "learning_rate": 1.6740270528758948e-06, "loss": 0.44386154413223267, "step": 2615 }, { "epoch": 0.6031819229882407, "grad_norm": 1.2103476019651458, "learning_rate": 1.6737453636733364e-06, "loss": 0.495368629693985, "step": 2616 }, { "epoch": 0.6034124971178234, "grad_norm": 1.257056760083973, "learning_rate": 1.6734635765350613e-06, "loss": 0.519428551197052, "step": 2617 }, { "epoch": 0.603643071247406, "grad_norm": 1.5181965589957365, "learning_rate": 1.6731816915020302e-06, "loss": 0.49346470832824707, "step": 2618 }, { "epoch": 0.6038736453769887, "grad_norm": 1.3323089431428572, "learning_rate": 1.6728997086152173e-06, "loss": 0.554854691028595, "step": 2619 }, { "epoch": 0.6041042195065713, "grad_norm": 1.503361315997137, "learning_rate": 1.6726176279156125e-06, "loss": 0.4930881857872009, "step": 2620 }, { "epoch": 0.604334793636154, "grad_norm": 1.1576996092953873, "learning_rate": 1.6723354494442186e-06, "loss": 0.4082447588443756, "step": 2621 }, { "epoch": 0.6045653677657367, "grad_norm": 1.2572245396068074, "learning_rate": 1.6720531732420531e-06, "loss": 0.5151821374893188, "step": 2622 }, { "epoch": 0.6047959418953194, "grad_norm": 1.6316483356509275, "learning_rate": 1.671770799350148e-06, "loss": 0.44579264521598816, "step": 2623 }, { "epoch": 0.605026516024902, "grad_norm": 1.5349454914737826, "learning_rate": 1.6714883278095489e-06, "loss": 0.4937717020511627, "step": 2624 }, { "epoch": 0.6052570901544847, "grad_norm": 1.4939841287703146, "learning_rate": 1.671205758661316e-06, "loss": 0.46298685669898987, "step": 2625 }, { "epoch": 0.6054876642840673, "grad_norm": 1.3089529059854432, "learning_rate": 1.6709230919465233e-06, "loss": 0.5535221695899963, "step": 2626 }, { "epoch": 0.60571823841365, "grad_norm": 1.2781536932155106, "learning_rate": 1.6706403277062599e-06, "loss": 0.5289112329483032, "step": 2627 }, { "epoch": 0.6059488125432326, "grad_norm": 1.2619858231183905, "learning_rate": 1.6703574659816285e-06, "loss": 0.506280779838562, "step": 2628 }, { "epoch": 0.6061793866728153, "grad_norm": 1.366142383501645, "learning_rate": 1.6700745068137451e-06, "loss": 0.504257082939148, "step": 2629 }, { "epoch": 0.606409960802398, "grad_norm": 1.2835196483556859, "learning_rate": 1.6697914502437411e-06, "loss": 0.624682605266571, "step": 2630 }, { "epoch": 0.6066405349319807, "grad_norm": 1.1715096985967743, "learning_rate": 1.6695082963127617e-06, "loss": 0.4539645314216614, "step": 2631 }, { "epoch": 0.6068711090615633, "grad_norm": 1.2852717924915888, "learning_rate": 1.6692250450619665e-06, "loss": 0.5461890697479248, "step": 2632 }, { "epoch": 0.607101683191146, "grad_norm": 1.2251930368732282, "learning_rate": 1.6689416965325282e-06, "loss": 0.615606427192688, "step": 2633 }, { "epoch": 0.6073322573207286, "grad_norm": 1.3904526684847855, "learning_rate": 1.668658250765635e-06, "loss": 0.5355387926101685, "step": 2634 }, { "epoch": 0.6075628314503113, "grad_norm": 1.1464900003631002, "learning_rate": 1.6683747078024886e-06, "loss": 0.5804985165596008, "step": 2635 }, { "epoch": 0.6077934055798939, "grad_norm": 1.1983123193544134, "learning_rate": 1.6680910676843042e-06, "loss": 0.4514031410217285, "step": 2636 }, { "epoch": 0.6080239797094766, "grad_norm": 1.3446092692413514, "learning_rate": 1.6678073304523123e-06, "loss": 0.5621001720428467, "step": 2637 }, { "epoch": 0.6082545538390592, "grad_norm": 1.3749875179413227, "learning_rate": 1.667523496147757e-06, "loss": 0.49387669563293457, "step": 2638 }, { "epoch": 0.608485127968642, "grad_norm": 1.0479438264918854, "learning_rate": 1.6672395648118966e-06, "loss": 0.5857938528060913, "step": 2639 }, { "epoch": 0.6087157020982246, "grad_norm": 1.149056345239141, "learning_rate": 1.6669555364860029e-06, "loss": 0.46403199434280396, "step": 2640 }, { "epoch": 0.6089462762278073, "grad_norm": 1.2068025098167319, "learning_rate": 1.6666714112113627e-06, "loss": 0.4998488128185272, "step": 2641 }, { "epoch": 0.6091768503573899, "grad_norm": 1.3686546841392573, "learning_rate": 1.6663871890292765e-06, "loss": 0.6291745901107788, "step": 2642 }, { "epoch": 0.6094074244869726, "grad_norm": 1.7034971765108011, "learning_rate": 1.6661028699810587e-06, "loss": 0.6326058506965637, "step": 2643 }, { "epoch": 0.6096379986165552, "grad_norm": 1.2748339439376004, "learning_rate": 1.6658184541080378e-06, "loss": 0.5737805366516113, "step": 2644 }, { "epoch": 0.6098685727461379, "grad_norm": 1.435593858390691, "learning_rate": 1.6655339414515568e-06, "loss": 0.565047025680542, "step": 2645 }, { "epoch": 0.6100991468757205, "grad_norm": 1.154269897254632, "learning_rate": 1.6652493320529724e-06, "loss": 0.5157296061515808, "step": 2646 }, { "epoch": 0.6103297210053032, "grad_norm": 1.2671967095996914, "learning_rate": 1.6649646259536554e-06, "loss": 0.4475112855434418, "step": 2647 }, { "epoch": 0.6105602951348859, "grad_norm": 1.4397592539357233, "learning_rate": 1.6646798231949911e-06, "loss": 0.5072107315063477, "step": 2648 }, { "epoch": 0.6107908692644686, "grad_norm": 1.3901386223871963, "learning_rate": 1.6643949238183778e-06, "loss": 0.44673952460289, "step": 2649 }, { "epoch": 0.6110214433940512, "grad_norm": 1.4046630639478026, "learning_rate": 1.6641099278652293e-06, "loss": 0.47460734844207764, "step": 2650 }, { "epoch": 0.6112520175236339, "grad_norm": 1.251836663583678, "learning_rate": 1.6638248353769718e-06, "loss": 0.4529770612716675, "step": 2651 }, { "epoch": 0.6114825916532165, "grad_norm": 1.4298404685971746, "learning_rate": 1.6635396463950473e-06, "loss": 0.5200958251953125, "step": 2652 }, { "epoch": 0.6117131657827992, "grad_norm": 1.4871792439140996, "learning_rate": 1.66325436096091e-06, "loss": 0.465969979763031, "step": 2653 }, { "epoch": 0.6119437399123818, "grad_norm": 1.1085493213804483, "learning_rate": 1.6629689791160298e-06, "loss": 0.5173276662826538, "step": 2654 }, { "epoch": 0.6121743140419645, "grad_norm": 1.246647464420017, "learning_rate": 1.6626835009018892e-06, "loss": 0.5539907217025757, "step": 2655 }, { "epoch": 0.6124048881715471, "grad_norm": 1.1686862955670068, "learning_rate": 1.6623979263599857e-06, "loss": 0.5617278814315796, "step": 2656 }, { "epoch": 0.6126354623011299, "grad_norm": 1.3640942620216159, "learning_rate": 1.6621122555318304e-06, "loss": 0.46238285303115845, "step": 2657 }, { "epoch": 0.6128660364307125, "grad_norm": 1.4695540598112733, "learning_rate": 1.6618264884589484e-06, "loss": 0.49247878789901733, "step": 2658 }, { "epoch": 0.6130966105602952, "grad_norm": 1.0811892876151687, "learning_rate": 1.6615406251828793e-06, "loss": 0.4844072163105011, "step": 2659 }, { "epoch": 0.6133271846898778, "grad_norm": 1.2024921886284354, "learning_rate": 1.6612546657451754e-06, "loss": 0.47372323274612427, "step": 2660 }, { "epoch": 0.6135577588194605, "grad_norm": 1.299485129998275, "learning_rate": 1.660968610187404e-06, "loss": 0.5287426114082336, "step": 2661 }, { "epoch": 0.6137883329490431, "grad_norm": 1.4640884136716181, "learning_rate": 1.6606824585511471e-06, "loss": 0.5862994194030762, "step": 2662 }, { "epoch": 0.6140189070786258, "grad_norm": 1.0158009777389652, "learning_rate": 1.6603962108779986e-06, "loss": 0.4866197109222412, "step": 2663 }, { "epoch": 0.6142494812082084, "grad_norm": 1.408246184243547, "learning_rate": 1.660109867209568e-06, "loss": 0.5561861991882324, "step": 2664 }, { "epoch": 0.6144800553377912, "grad_norm": 1.214620364544681, "learning_rate": 1.659823427587478e-06, "loss": 0.4878644645214081, "step": 2665 }, { "epoch": 0.6147106294673738, "grad_norm": 1.3262957238727335, "learning_rate": 1.659536892053366e-06, "loss": 0.5371976494789124, "step": 2666 }, { "epoch": 0.6149412035969565, "grad_norm": 1.2817478175527077, "learning_rate": 1.6592502606488824e-06, "loss": 0.4816581606864929, "step": 2667 }, { "epoch": 0.6151717777265391, "grad_norm": 1.1536826566839264, "learning_rate": 1.6589635334156919e-06, "loss": 0.5105183124542236, "step": 2668 }, { "epoch": 0.6154023518561217, "grad_norm": 1.4584261311401567, "learning_rate": 1.6586767103954737e-06, "loss": 0.5524129271507263, "step": 2669 }, { "epoch": 0.6156329259857044, "grad_norm": 1.3107384301518328, "learning_rate": 1.6583897916299204e-06, "loss": 0.42373913526535034, "step": 2670 }, { "epoch": 0.615863500115287, "grad_norm": 1.3724263799580212, "learning_rate": 1.658102777160738e-06, "loss": 0.5620803833007812, "step": 2671 }, { "epoch": 0.6160940742448697, "grad_norm": 1.3004346965884186, "learning_rate": 1.6578156670296472e-06, "loss": 0.38180166482925415, "step": 2672 }, { "epoch": 0.6163246483744523, "grad_norm": 1.2109058692777805, "learning_rate": 1.6575284612783825e-06, "loss": 0.48596519231796265, "step": 2673 }, { "epoch": 0.616555222504035, "grad_norm": 1.1846928230852602, "learning_rate": 1.657241159948692e-06, "loss": 0.5098127126693726, "step": 2674 }, { "epoch": 0.6167857966336177, "grad_norm": 1.5943292852368571, "learning_rate": 1.6569537630823382e-06, "loss": 0.5650018453598022, "step": 2675 }, { "epoch": 0.6170163707632004, "grad_norm": 1.1501551859696775, "learning_rate": 1.6566662707210967e-06, "loss": 0.45061948895454407, "step": 2676 }, { "epoch": 0.617246944892783, "grad_norm": 1.3028951742766879, "learning_rate": 1.6563786829067576e-06, "loss": 0.4292137622833252, "step": 2677 }, { "epoch": 0.6174775190223657, "grad_norm": 1.269567036808456, "learning_rate": 1.656090999681125e-06, "loss": 0.4837046265602112, "step": 2678 }, { "epoch": 0.6177080931519483, "grad_norm": 1.9486185906204885, "learning_rate": 1.6558032210860162e-06, "loss": 0.43580353260040283, "step": 2679 }, { "epoch": 0.617938667281531, "grad_norm": 1.2529677917985589, "learning_rate": 1.6555153471632628e-06, "loss": 0.47321656346321106, "step": 2680 }, { "epoch": 0.6181692414111136, "grad_norm": 1.1423229113084605, "learning_rate": 1.65522737795471e-06, "loss": 0.47431111335754395, "step": 2681 }, { "epoch": 0.6183998155406963, "grad_norm": 0.9698177160310311, "learning_rate": 1.6549393135022181e-06, "loss": 0.38062599301338196, "step": 2682 }, { "epoch": 0.618630389670279, "grad_norm": 1.2758905094442272, "learning_rate": 1.6546511538476584e-06, "loss": 0.5941839218139648, "step": 2683 }, { "epoch": 0.6188609637998617, "grad_norm": 1.453087551621585, "learning_rate": 1.6543628990329195e-06, "loss": 0.5323158502578735, "step": 2684 }, { "epoch": 0.6190915379294443, "grad_norm": 1.100143863509344, "learning_rate": 1.654074549099901e-06, "loss": 0.3814772367477417, "step": 2685 }, { "epoch": 0.619322112059027, "grad_norm": 1.5499952709692644, "learning_rate": 1.6537861040905181e-06, "loss": 0.5520694255828857, "step": 2686 }, { "epoch": 0.6195526861886096, "grad_norm": 1.297782443862308, "learning_rate": 1.653497564046699e-06, "loss": 0.5514999628067017, "step": 2687 }, { "epoch": 0.6197832603181923, "grad_norm": 1.2170603559624027, "learning_rate": 1.653208929010386e-06, "loss": 0.39057493209838867, "step": 2688 }, { "epoch": 0.6200138344477749, "grad_norm": 1.0224470752428403, "learning_rate": 1.6529201990235352e-06, "loss": 0.4941304922103882, "step": 2689 }, { "epoch": 0.6202444085773576, "grad_norm": 1.2590211215766611, "learning_rate": 1.6526313741281164e-06, "loss": 0.539762020111084, "step": 2690 }, { "epoch": 0.6204749827069402, "grad_norm": 1.3801421787603734, "learning_rate": 1.6523424543661127e-06, "loss": 0.49524787068367004, "step": 2691 }, { "epoch": 0.620705556836523, "grad_norm": 1.2158625492501351, "learning_rate": 1.6520534397795225e-06, "loss": 0.4261528253555298, "step": 2692 }, { "epoch": 0.6209361309661056, "grad_norm": 1.3188986304771895, "learning_rate": 1.6517643304103563e-06, "loss": 0.578548789024353, "step": 2693 }, { "epoch": 0.6211667050956883, "grad_norm": 1.24168526725964, "learning_rate": 1.6514751263006393e-06, "loss": 0.4766680598258972, "step": 2694 }, { "epoch": 0.6213972792252709, "grad_norm": 1.135518406763033, "learning_rate": 1.6511858274924098e-06, "loss": 0.4146459996700287, "step": 2695 }, { "epoch": 0.6216278533548536, "grad_norm": 1.4632792907408574, "learning_rate": 1.650896434027721e-06, "loss": 0.5148390531539917, "step": 2696 }, { "epoch": 0.6218584274844362, "grad_norm": 1.1678475162221296, "learning_rate": 1.6506069459486388e-06, "loss": 0.4830890893936157, "step": 2697 }, { "epoch": 0.6220890016140189, "grad_norm": 1.2027318756470287, "learning_rate": 1.6503173632972434e-06, "loss": 0.4550463557243347, "step": 2698 }, { "epoch": 0.6223195757436015, "grad_norm": 1.3023820822101895, "learning_rate": 1.6500276861156284e-06, "loss": 0.5811448097229004, "step": 2699 }, { "epoch": 0.6225501498731842, "grad_norm": 1.3807858518585416, "learning_rate": 1.6497379144459014e-06, "loss": 0.44733545184135437, "step": 2700 }, { "epoch": 0.6227807240027669, "grad_norm": 1.103384717152327, "learning_rate": 1.6494480483301835e-06, "loss": 0.4379687011241913, "step": 2701 }, { "epoch": 0.6230112981323496, "grad_norm": 1.326644045971959, "learning_rate": 1.6491580878106102e-06, "loss": 0.5163959860801697, "step": 2702 }, { "epoch": 0.6232418722619322, "grad_norm": 1.2037310331107272, "learning_rate": 1.6488680329293297e-06, "loss": 0.5636980533599854, "step": 2703 }, { "epoch": 0.6234724463915149, "grad_norm": 1.1847301227909297, "learning_rate": 1.6485778837285044e-06, "loss": 0.46942776441574097, "step": 2704 }, { "epoch": 0.6237030205210975, "grad_norm": 1.3867166397057658, "learning_rate": 1.6482876402503103e-06, "loss": 0.5104436278343201, "step": 2705 }, { "epoch": 0.6239335946506802, "grad_norm": 1.2701601489299654, "learning_rate": 1.6479973025369379e-06, "loss": 0.4689507484436035, "step": 2706 }, { "epoch": 0.6241641687802628, "grad_norm": 1.2388644364900292, "learning_rate": 1.64770687063059e-06, "loss": 0.4009973406791687, "step": 2707 }, { "epoch": 0.6243947429098455, "grad_norm": 1.4958191711517836, "learning_rate": 1.6474163445734846e-06, "loss": 0.4938286542892456, "step": 2708 }, { "epoch": 0.6246253170394281, "grad_norm": 1.2939637643231117, "learning_rate": 1.6471257244078519e-06, "loss": 0.4756525754928589, "step": 2709 }, { "epoch": 0.6248558911690109, "grad_norm": 1.0308841763344028, "learning_rate": 1.6468350101759366e-06, "loss": 0.4322332739830017, "step": 2710 }, { "epoch": 0.6250864652985935, "grad_norm": 1.381148895283306, "learning_rate": 1.6465442019199972e-06, "loss": 0.4605666995048523, "step": 2711 }, { "epoch": 0.6253170394281762, "grad_norm": 1.3288993921232848, "learning_rate": 1.6462532996823053e-06, "loss": 0.4576036334037781, "step": 2712 }, { "epoch": 0.6255476135577588, "grad_norm": 1.1587792990864858, "learning_rate": 1.645962303505147e-06, "loss": 0.4860233664512634, "step": 2713 }, { "epoch": 0.6257781876873415, "grad_norm": 1.2195714743605923, "learning_rate": 1.6456712134308213e-06, "loss": 0.4717915654182434, "step": 2714 }, { "epoch": 0.6260087618169241, "grad_norm": 1.1008237671202603, "learning_rate": 1.645380029501641e-06, "loss": 0.49637067317962646, "step": 2715 }, { "epoch": 0.6262393359465068, "grad_norm": 1.2218828759453872, "learning_rate": 1.6450887517599326e-06, "loss": 0.45388346910476685, "step": 2716 }, { "epoch": 0.6264699100760894, "grad_norm": 1.6333623536070287, "learning_rate": 1.6447973802480362e-06, "loss": 0.5549031496047974, "step": 2717 }, { "epoch": 0.6267004842056721, "grad_norm": 1.333805192555573, "learning_rate": 1.644505915008306e-06, "loss": 0.39759719371795654, "step": 2718 }, { "epoch": 0.6269310583352548, "grad_norm": 1.2648542744381963, "learning_rate": 1.644214356083109e-06, "loss": 0.5126739740371704, "step": 2719 }, { "epoch": 0.6271616324648375, "grad_norm": 1.1846129595938097, "learning_rate": 1.6439227035148265e-06, "loss": 0.41424083709716797, "step": 2720 }, { "epoch": 0.6273922065944201, "grad_norm": 1.2295786085250646, "learning_rate": 1.643630957345853e-06, "loss": 0.5829803943634033, "step": 2721 }, { "epoch": 0.6276227807240028, "grad_norm": 1.2114307243350246, "learning_rate": 1.6433391176185972e-06, "loss": 0.4736567437648773, "step": 2722 }, { "epoch": 0.6278533548535854, "grad_norm": 1.4670818430092263, "learning_rate": 1.6430471843754804e-06, "loss": 0.41305306553840637, "step": 2723 }, { "epoch": 0.6280839289831681, "grad_norm": 1.5480231340195962, "learning_rate": 1.6427551576589383e-06, "loss": 0.38422563672065735, "step": 2724 }, { "epoch": 0.6283145031127507, "grad_norm": 1.3725795006115715, "learning_rate": 1.6424630375114199e-06, "loss": 0.48302626609802246, "step": 2725 }, { "epoch": 0.6285450772423334, "grad_norm": 1.2880102228926575, "learning_rate": 1.6421708239753875e-06, "loss": 0.4657328128814697, "step": 2726 }, { "epoch": 0.628775651371916, "grad_norm": 1.4057295929235551, "learning_rate": 1.641878517093318e-06, "loss": 0.46126431226730347, "step": 2727 }, { "epoch": 0.6290062255014988, "grad_norm": 1.3246078376538457, "learning_rate": 1.6415861169077007e-06, "loss": 0.5196214914321899, "step": 2728 }, { "epoch": 0.6292367996310814, "grad_norm": 1.4794856753558834, "learning_rate": 1.641293623461039e-06, "loss": 0.5007073879241943, "step": 2729 }, { "epoch": 0.6294673737606641, "grad_norm": 1.1543847272279724, "learning_rate": 1.64100103679585e-06, "loss": 0.4699769616127014, "step": 2730 }, { "epoch": 0.6296979478902467, "grad_norm": 1.3221766888407216, "learning_rate": 1.6407083569546636e-06, "loss": 0.5487842559814453, "step": 2731 }, { "epoch": 0.6299285220198294, "grad_norm": 1.0556125358940756, "learning_rate": 1.6404155839800244e-06, "loss": 0.42733538150787354, "step": 2732 }, { "epoch": 0.630159096149412, "grad_norm": 1.1933689155818472, "learning_rate": 1.64012271791449e-06, "loss": 0.5105363726615906, "step": 2733 }, { "epoch": 0.6303896702789947, "grad_norm": 1.3185367260440977, "learning_rate": 1.6398297588006305e-06, "loss": 0.5836968421936035, "step": 2734 }, { "epoch": 0.6306202444085773, "grad_norm": 1.3830049962050668, "learning_rate": 1.639536706681031e-06, "loss": 0.4350558817386627, "step": 2735 }, { "epoch": 0.63085081853816, "grad_norm": 1.4225393539645832, "learning_rate": 1.63924356159829e-06, "loss": 0.5388341546058655, "step": 2736 }, { "epoch": 0.6310813926677427, "grad_norm": 1.1218759160612528, "learning_rate": 1.6389503235950186e-06, "loss": 0.4576529860496521, "step": 2737 }, { "epoch": 0.6313119667973254, "grad_norm": 1.524583554785293, "learning_rate": 1.6386569927138422e-06, "loss": 0.4525975286960602, "step": 2738 }, { "epoch": 0.631542540926908, "grad_norm": 1.56840988374272, "learning_rate": 1.6383635689973993e-06, "loss": 0.42143142223358154, "step": 2739 }, { "epoch": 0.6317731150564907, "grad_norm": 1.0672209595897675, "learning_rate": 1.6380700524883423e-06, "loss": 0.4440336227416992, "step": 2740 }, { "epoch": 0.6320036891860733, "grad_norm": 1.2412570194863743, "learning_rate": 1.637776443229336e-06, "loss": 0.5009843707084656, "step": 2741 }, { "epoch": 0.632234263315656, "grad_norm": 1.6736573631214935, "learning_rate": 1.6374827412630604e-06, "loss": 0.538151741027832, "step": 2742 }, { "epoch": 0.6324648374452386, "grad_norm": 1.1895254537976463, "learning_rate": 1.6371889466322077e-06, "loss": 0.550201416015625, "step": 2743 }, { "epoch": 0.6326954115748213, "grad_norm": 1.3861259597044466, "learning_rate": 1.6368950593794836e-06, "loss": 0.5707399845123291, "step": 2744 }, { "epoch": 0.632925985704404, "grad_norm": 1.393827128295071, "learning_rate": 1.6366010795476082e-06, "loss": 0.5196787714958191, "step": 2745 }, { "epoch": 0.6331565598339867, "grad_norm": 1.171378891149435, "learning_rate": 1.636307007179314e-06, "loss": 0.5243285894393921, "step": 2746 }, { "epoch": 0.6333871339635693, "grad_norm": 1.249132441469792, "learning_rate": 1.6360128423173473e-06, "loss": 0.4202825427055359, "step": 2747 }, { "epoch": 0.633617708093152, "grad_norm": 1.2547380834154716, "learning_rate": 1.6357185850044681e-06, "loss": 0.49080896377563477, "step": 2748 }, { "epoch": 0.6338482822227346, "grad_norm": 1.2234752623414968, "learning_rate": 1.6354242352834502e-06, "loss": 0.5537371635437012, "step": 2749 }, { "epoch": 0.6340788563523173, "grad_norm": 1.1077493127634728, "learning_rate": 1.6351297931970796e-06, "loss": 0.3744293451309204, "step": 2750 }, { "epoch": 0.6343094304818999, "grad_norm": 1.237975564408939, "learning_rate": 1.634835258788157e-06, "loss": 0.5176748037338257, "step": 2751 }, { "epoch": 0.6345400046114826, "grad_norm": 1.321137847220575, "learning_rate": 1.6345406320994952e-06, "loss": 0.5179395079612732, "step": 2752 }, { "epoch": 0.6347705787410652, "grad_norm": 1.3158476651008661, "learning_rate": 1.634245913173922e-06, "loss": 0.4810818135738373, "step": 2753 }, { "epoch": 0.635001152870648, "grad_norm": 1.2760288557710286, "learning_rate": 1.6339511020542775e-06, "loss": 0.5188307762145996, "step": 2754 }, { "epoch": 0.6352317270002306, "grad_norm": 1.662662743900965, "learning_rate": 1.6336561987834151e-06, "loss": 0.41170865297317505, "step": 2755 }, { "epoch": 0.6354623011298133, "grad_norm": 1.1982414473393, "learning_rate": 1.6333612034042025e-06, "loss": 0.48726415634155273, "step": 2756 }, { "epoch": 0.6356928752593959, "grad_norm": 1.1790415390507374, "learning_rate": 1.63306611595952e-06, "loss": 0.4483524560928345, "step": 2757 }, { "epoch": 0.6359234493889786, "grad_norm": 1.2150870765180466, "learning_rate": 1.6327709364922618e-06, "loss": 0.3979623019695282, "step": 2758 }, { "epoch": 0.6361540235185612, "grad_norm": 1.2093786796022739, "learning_rate": 1.6324756650453346e-06, "loss": 0.461483895778656, "step": 2759 }, { "epoch": 0.6363845976481439, "grad_norm": 1.2350751043575534, "learning_rate": 1.6321803016616598e-06, "loss": 0.40054333209991455, "step": 2760 }, { "epoch": 0.6366151717777265, "grad_norm": 1.1196609017801307, "learning_rate": 1.6318848463841712e-06, "loss": 0.534996747970581, "step": 2761 }, { "epoch": 0.6368457459073092, "grad_norm": 1.260260551672407, "learning_rate": 1.631589299255816e-06, "loss": 0.49408137798309326, "step": 2762 }, { "epoch": 0.6370763200368919, "grad_norm": 1.305230846296416, "learning_rate": 1.6312936603195557e-06, "loss": 0.49098217487335205, "step": 2763 }, { "epoch": 0.6373068941664746, "grad_norm": 1.1344163970655265, "learning_rate": 1.6309979296183636e-06, "loss": 0.4990113377571106, "step": 2764 }, { "epoch": 0.6375374682960572, "grad_norm": 1.2952446438426217, "learning_rate": 1.6307021071952276e-06, "loss": 0.49399930238723755, "step": 2765 }, { "epoch": 0.6377680424256399, "grad_norm": 1.320323762194689, "learning_rate": 1.6304061930931478e-06, "loss": 0.5029928684234619, "step": 2766 }, { "epoch": 0.6379986165552225, "grad_norm": 1.2455728900211775, "learning_rate": 1.6301101873551396e-06, "loss": 0.5732289552688599, "step": 2767 }, { "epoch": 0.6382291906848052, "grad_norm": 1.2965522975146178, "learning_rate": 1.6298140900242293e-06, "loss": 0.47334790229797363, "step": 2768 }, { "epoch": 0.6384597648143878, "grad_norm": 1.2464510374223752, "learning_rate": 1.6295179011434578e-06, "loss": 0.44271016120910645, "step": 2769 }, { "epoch": 0.6386903389439705, "grad_norm": 1.8250225519339747, "learning_rate": 1.6292216207558798e-06, "loss": 0.5768353939056396, "step": 2770 }, { "epoch": 0.6389209130735531, "grad_norm": 1.074704735340539, "learning_rate": 1.6289252489045625e-06, "loss": 0.48315417766571045, "step": 2771 }, { "epoch": 0.6391514872031359, "grad_norm": 1.338382007112913, "learning_rate": 1.6286287856325855e-06, "loss": 0.5745590925216675, "step": 2772 }, { "epoch": 0.6393820613327185, "grad_norm": 1.473033213400145, "learning_rate": 1.6283322309830444e-06, "loss": 0.6084291934967041, "step": 2773 }, { "epoch": 0.6396126354623012, "grad_norm": 1.083816855400547, "learning_rate": 1.6280355849990451e-06, "loss": 0.4995007812976837, "step": 2774 }, { "epoch": 0.6398432095918838, "grad_norm": 1.1962451309299882, "learning_rate": 1.6277388477237084e-06, "loss": 0.45811381936073303, "step": 2775 }, { "epoch": 0.6400737837214665, "grad_norm": 1.448203316971052, "learning_rate": 1.6274420192001689e-06, "loss": 0.5666211247444153, "step": 2776 }, { "epoch": 0.6403043578510491, "grad_norm": 1.3871415999727634, "learning_rate": 1.6271450994715723e-06, "loss": 0.5059396028518677, "step": 2777 }, { "epoch": 0.6405349319806318, "grad_norm": 1.4444216130733851, "learning_rate": 1.6268480885810798e-06, "loss": 0.5418530702590942, "step": 2778 }, { "epoch": 0.6407655061102144, "grad_norm": 1.4034133564890543, "learning_rate": 1.6265509865718647e-06, "loss": 0.5047061443328857, "step": 2779 }, { "epoch": 0.6409960802397972, "grad_norm": 1.6003350461542336, "learning_rate": 1.6262537934871138e-06, "loss": 0.5104432702064514, "step": 2780 }, { "epoch": 0.6412266543693798, "grad_norm": 1.3065683677222188, "learning_rate": 1.625956509370027e-06, "loss": 0.44423484802246094, "step": 2781 }, { "epoch": 0.6414572284989624, "grad_norm": 1.1820302321160245, "learning_rate": 1.6256591342638179e-06, "loss": 0.47618383169174194, "step": 2782 }, { "epoch": 0.6416878026285451, "grad_norm": 1.3796601981562324, "learning_rate": 1.625361668211713e-06, "loss": 0.5423145890235901, "step": 2783 }, { "epoch": 0.6419183767581277, "grad_norm": 1.380895745392916, "learning_rate": 1.6250641112569515e-06, "loss": 0.517102837562561, "step": 2784 }, { "epoch": 0.6421489508877104, "grad_norm": 1.2388489917279923, "learning_rate": 1.6247664634427864e-06, "loss": 0.39601820707321167, "step": 2785 }, { "epoch": 0.642379525017293, "grad_norm": 1.296572577942614, "learning_rate": 1.6244687248124843e-06, "loss": 0.5480250120162964, "step": 2786 }, { "epoch": 0.6426100991468757, "grad_norm": 1.1105051491643492, "learning_rate": 1.624170895409324e-06, "loss": 0.4743092656135559, "step": 2787 }, { "epoch": 0.6428406732764583, "grad_norm": 1.463202362201621, "learning_rate": 1.6238729752765985e-06, "loss": 0.4595726728439331, "step": 2788 }, { "epoch": 0.643071247406041, "grad_norm": 1.2909676791556273, "learning_rate": 1.6235749644576132e-06, "loss": 0.5058779716491699, "step": 2789 }, { "epoch": 0.6433018215356237, "grad_norm": 1.3145538108383794, "learning_rate": 1.623276862995687e-06, "loss": 0.5075543522834778, "step": 2790 }, { "epoch": 0.6435323956652064, "grad_norm": 1.3185436913231439, "learning_rate": 1.622978670934152e-06, "loss": 0.5623351335525513, "step": 2791 }, { "epoch": 0.643762969794789, "grad_norm": 1.1682118545924238, "learning_rate": 1.6226803883163536e-06, "loss": 0.3645760118961334, "step": 2792 }, { "epoch": 0.6439935439243717, "grad_norm": 1.4617740663680228, "learning_rate": 1.6223820151856501e-06, "loss": 0.5666004419326782, "step": 2793 }, { "epoch": 0.6442241180539543, "grad_norm": 1.3342697895697784, "learning_rate": 1.6220835515854133e-06, "loss": 0.6571217775344849, "step": 2794 }, { "epoch": 0.644454692183537, "grad_norm": 1.4229199895470708, "learning_rate": 1.6217849975590271e-06, "loss": 0.5684333443641663, "step": 2795 }, { "epoch": 0.6446852663131196, "grad_norm": 1.5289890556459427, "learning_rate": 1.62148635314989e-06, "loss": 0.43374937772750854, "step": 2796 }, { "epoch": 0.6449158404427023, "grad_norm": 1.1182458179152783, "learning_rate": 1.6211876184014134e-06, "loss": 0.5102420449256897, "step": 2797 }, { "epoch": 0.6451464145722849, "grad_norm": 1.0775475511417847, "learning_rate": 1.6208887933570203e-06, "loss": 0.39345985651016235, "step": 2798 }, { "epoch": 0.6453769887018677, "grad_norm": 1.4503631372644623, "learning_rate": 1.620589878060149e-06, "loss": 0.47554945945739746, "step": 2799 }, { "epoch": 0.6456075628314503, "grad_norm": 1.601431882721041, "learning_rate": 1.6202908725542495e-06, "loss": 0.4385503828525543, "step": 2800 }, { "epoch": 0.645838136961033, "grad_norm": 1.1168858860640334, "learning_rate": 1.619991776882785e-06, "loss": 0.5589696168899536, "step": 2801 }, { "epoch": 0.6460687110906156, "grad_norm": 1.265570460008291, "learning_rate": 1.619692591089232e-06, "loss": 0.4827546179294586, "step": 2802 }, { "epoch": 0.6462992852201983, "grad_norm": 1.3309974001593363, "learning_rate": 1.6193933152170809e-06, "loss": 0.491131067276001, "step": 2803 }, { "epoch": 0.6465298593497809, "grad_norm": 1.2647545815457555, "learning_rate": 1.6190939493098341e-06, "loss": 0.47185173630714417, "step": 2804 }, { "epoch": 0.6467604334793636, "grad_norm": 1.235826049412326, "learning_rate": 1.6187944934110072e-06, "loss": 0.4411182701587677, "step": 2805 }, { "epoch": 0.6469910076089462, "grad_norm": 1.2245067812038697, "learning_rate": 1.6184949475641295e-06, "loss": 0.47243285179138184, "step": 2806 }, { "epoch": 0.647221581738529, "grad_norm": 1.3311536114931484, "learning_rate": 1.6181953118127428e-06, "loss": 0.4449295401573181, "step": 2807 }, { "epoch": 0.6474521558681116, "grad_norm": 1.2292361204281614, "learning_rate": 1.6178955862004024e-06, "loss": 0.5148872137069702, "step": 2808 }, { "epoch": 0.6476827299976943, "grad_norm": 1.2738055603189895, "learning_rate": 1.6175957707706762e-06, "loss": 0.5017277598381042, "step": 2809 }, { "epoch": 0.6479133041272769, "grad_norm": 1.1324070696899262, "learning_rate": 1.6172958655671458e-06, "loss": 0.44220247864723206, "step": 2810 }, { "epoch": 0.6481438782568596, "grad_norm": 1.215492495713019, "learning_rate": 1.6169958706334053e-06, "loss": 0.45421087741851807, "step": 2811 }, { "epoch": 0.6483744523864422, "grad_norm": 1.5167053281985836, "learning_rate": 1.6166957860130618e-06, "loss": 0.4772147536277771, "step": 2812 }, { "epoch": 0.6486050265160249, "grad_norm": 1.1252103890770975, "learning_rate": 1.6163956117497357e-06, "loss": 0.5319628715515137, "step": 2813 }, { "epoch": 0.6488356006456075, "grad_norm": 1.2663721872672429, "learning_rate": 1.6160953478870608e-06, "loss": 0.5109438896179199, "step": 2814 }, { "epoch": 0.6490661747751902, "grad_norm": 1.33543378668276, "learning_rate": 1.6157949944686827e-06, "loss": 0.4417513608932495, "step": 2815 }, { "epoch": 0.6492967489047728, "grad_norm": 1.2535935822359765, "learning_rate": 1.6154945515382616e-06, "loss": 0.5013085007667542, "step": 2816 }, { "epoch": 0.6495273230343556, "grad_norm": 1.1191581438601172, "learning_rate": 1.6151940191394693e-06, "loss": 0.5197368860244751, "step": 2817 }, { "epoch": 0.6497578971639382, "grad_norm": 1.4218758858652996, "learning_rate": 1.6148933973159914e-06, "loss": 0.46540898084640503, "step": 2818 }, { "epoch": 0.6499884712935209, "grad_norm": 1.2080431861739462, "learning_rate": 1.6145926861115268e-06, "loss": 0.4867633581161499, "step": 2819 }, { "epoch": 0.6502190454231035, "grad_norm": 1.1380395234486869, "learning_rate": 1.6142918855697864e-06, "loss": 0.426607221364975, "step": 2820 }, { "epoch": 0.6504496195526862, "grad_norm": 1.2737116095131904, "learning_rate": 1.613990995734495e-06, "loss": 0.5183024406433105, "step": 2821 }, { "epoch": 0.6506801936822688, "grad_norm": 1.3839354752611597, "learning_rate": 1.6136900166493893e-06, "loss": 0.48635101318359375, "step": 2822 }, { "epoch": 0.6509107678118515, "grad_norm": 1.5911912747422927, "learning_rate": 1.6133889483582204e-06, "loss": 0.47468632459640503, "step": 2823 }, { "epoch": 0.6511413419414341, "grad_norm": 1.1598857858501956, "learning_rate": 1.6130877909047515e-06, "loss": 0.4665389358997345, "step": 2824 }, { "epoch": 0.6513719160710169, "grad_norm": 1.1793258331020087, "learning_rate": 1.6127865443327585e-06, "loss": 0.5069966316223145, "step": 2825 }, { "epoch": 0.6516024902005995, "grad_norm": 1.4107626754859688, "learning_rate": 1.612485208686031e-06, "loss": 0.47820740938186646, "step": 2826 }, { "epoch": 0.6518330643301822, "grad_norm": 1.2189859420338702, "learning_rate": 1.612183784008371e-06, "loss": 0.43017104268074036, "step": 2827 }, { "epoch": 0.6520636384597648, "grad_norm": 1.158515500774614, "learning_rate": 1.6118822703435937e-06, "loss": 0.45495298504829407, "step": 2828 }, { "epoch": 0.6522942125893475, "grad_norm": 1.7108375139007879, "learning_rate": 1.6115806677355272e-06, "loss": 0.4624331593513489, "step": 2829 }, { "epoch": 0.6525247867189301, "grad_norm": 1.0788742222165304, "learning_rate": 1.6112789762280125e-06, "loss": 0.39458876848220825, "step": 2830 }, { "epoch": 0.6527553608485128, "grad_norm": 1.4194134450814206, "learning_rate": 1.6109771958649035e-06, "loss": 0.45552846789360046, "step": 2831 }, { "epoch": 0.6529859349780954, "grad_norm": 1.4199555723058743, "learning_rate": 1.6106753266900671e-06, "loss": 0.4579755663871765, "step": 2832 }, { "epoch": 0.6532165091076781, "grad_norm": 1.2589449636358518, "learning_rate": 1.6103733687473823e-06, "loss": 0.5164625644683838, "step": 2833 }, { "epoch": 0.6534470832372608, "grad_norm": 1.3635551079325425, "learning_rate": 1.6100713220807432e-06, "loss": 0.43071237206459045, "step": 2834 }, { "epoch": 0.6536776573668435, "grad_norm": 1.2757429725484968, "learning_rate": 1.6097691867340543e-06, "loss": 0.5174099802970886, "step": 2835 }, { "epoch": 0.6539082314964261, "grad_norm": 1.31351831375575, "learning_rate": 1.609466962751234e-06, "loss": 0.5944932699203491, "step": 2836 }, { "epoch": 0.6541388056260088, "grad_norm": 1.312815606757786, "learning_rate": 1.6091646501762145e-06, "loss": 0.45203912258148193, "step": 2837 }, { "epoch": 0.6543693797555914, "grad_norm": 1.292859531347235, "learning_rate": 1.6088622490529386e-06, "loss": 0.4197826683521271, "step": 2838 }, { "epoch": 0.6545999538851741, "grad_norm": 1.3008648230701247, "learning_rate": 1.6085597594253649e-06, "loss": 0.4806807339191437, "step": 2839 }, { "epoch": 0.6548305280147567, "grad_norm": 1.233893928808971, "learning_rate": 1.608257181337462e-06, "loss": 0.4618797302246094, "step": 2840 }, { "epoch": 0.6550611021443394, "grad_norm": 1.1215282144992917, "learning_rate": 1.6079545148332137e-06, "loss": 0.4901892840862274, "step": 2841 }, { "epoch": 0.655291676273922, "grad_norm": 1.250624448026336, "learning_rate": 1.607651759956615e-06, "loss": 0.44869139790534973, "step": 2842 }, { "epoch": 0.6555222504035048, "grad_norm": 1.1064395173732657, "learning_rate": 1.6073489167516747e-06, "loss": 0.41470903158187866, "step": 2843 }, { "epoch": 0.6557528245330874, "grad_norm": 1.2796938856852533, "learning_rate": 1.6070459852624143e-06, "loss": 0.5498615503311157, "step": 2844 }, { "epoch": 0.6559833986626701, "grad_norm": 1.4741717641783516, "learning_rate": 1.6067429655328675e-06, "loss": 0.5462392568588257, "step": 2845 }, { "epoch": 0.6562139727922527, "grad_norm": 1.5147243124828937, "learning_rate": 1.6064398576070815e-06, "loss": 0.3775100111961365, "step": 2846 }, { "epoch": 0.6564445469218354, "grad_norm": 1.3806942156086204, "learning_rate": 1.6061366615291161e-06, "loss": 0.4712100028991699, "step": 2847 }, { "epoch": 0.656675121051418, "grad_norm": 1.1320542857842297, "learning_rate": 1.6058333773430439e-06, "loss": 0.5152161121368408, "step": 2848 }, { "epoch": 0.6569056951810007, "grad_norm": 1.2222287817453417, "learning_rate": 1.6055300050929502e-06, "loss": 0.46678972244262695, "step": 2849 }, { "epoch": 0.6571362693105833, "grad_norm": 1.1948519980696821, "learning_rate": 1.6052265448229338e-06, "loss": 0.4622490108013153, "step": 2850 }, { "epoch": 0.657366843440166, "grad_norm": 1.2601521252962713, "learning_rate": 1.6049229965771052e-06, "loss": 0.49909311532974243, "step": 2851 }, { "epoch": 0.6575974175697487, "grad_norm": 1.1801405687475501, "learning_rate": 1.6046193603995884e-06, "loss": 0.4428306221961975, "step": 2852 }, { "epoch": 0.6578279916993314, "grad_norm": 1.5295557154716768, "learning_rate": 1.6043156363345196e-06, "loss": 0.5842458009719849, "step": 2853 }, { "epoch": 0.658058565828914, "grad_norm": 1.4945011678677886, "learning_rate": 1.604011824426049e-06, "loss": 0.47183722257614136, "step": 2854 }, { "epoch": 0.6582891399584967, "grad_norm": 1.2843309395390234, "learning_rate": 1.6037079247183379e-06, "loss": 0.44225364923477173, "step": 2855 }, { "epoch": 0.6585197140880793, "grad_norm": 1.3795669225253144, "learning_rate": 1.6034039372555617e-06, "loss": 0.4820272922515869, "step": 2856 }, { "epoch": 0.658750288217662, "grad_norm": 1.6263387244434722, "learning_rate": 1.6030998620819075e-06, "loss": 0.48118168115615845, "step": 2857 }, { "epoch": 0.6589808623472446, "grad_norm": 1.4704169894155685, "learning_rate": 1.6027956992415764e-06, "loss": 0.4386011064052582, "step": 2858 }, { "epoch": 0.6592114364768273, "grad_norm": 1.4148356020107666, "learning_rate": 1.6024914487787814e-06, "loss": 0.48740649223327637, "step": 2859 }, { "epoch": 0.65944201060641, "grad_norm": 1.436235867684013, "learning_rate": 1.602187110737748e-06, "loss": 0.46782761812210083, "step": 2860 }, { "epoch": 0.6596725847359927, "grad_norm": 1.2796166668007127, "learning_rate": 1.6018826851627155e-06, "loss": 0.5086358189582825, "step": 2861 }, { "epoch": 0.6599031588655753, "grad_norm": 1.1582673721463366, "learning_rate": 1.6015781720979344e-06, "loss": 0.5631915330886841, "step": 2862 }, { "epoch": 0.660133732995158, "grad_norm": 1.462417648098582, "learning_rate": 1.6012735715876693e-06, "loss": 0.5134458541870117, "step": 2863 }, { "epoch": 0.6603643071247406, "grad_norm": 1.1268653967137703, "learning_rate": 1.6009688836761969e-06, "loss": 0.4308784008026123, "step": 2864 }, { "epoch": 0.6605948812543233, "grad_norm": 1.3112517816231024, "learning_rate": 1.6006641084078068e-06, "loss": 0.5149765610694885, "step": 2865 }, { "epoch": 0.6608254553839059, "grad_norm": 1.6101510783439525, "learning_rate": 1.6003592458268005e-06, "loss": 0.521892786026001, "step": 2866 }, { "epoch": 0.6610560295134886, "grad_norm": 1.247084334907296, "learning_rate": 1.6000542959774937e-06, "loss": 0.46611008048057556, "step": 2867 }, { "epoch": 0.6612866036430712, "grad_norm": 1.2517698630875118, "learning_rate": 1.5997492589042135e-06, "loss": 0.43080392479896545, "step": 2868 }, { "epoch": 0.661517177772654, "grad_norm": 1.2239680444750303, "learning_rate": 1.5994441346513003e-06, "loss": 0.48026901483535767, "step": 2869 }, { "epoch": 0.6617477519022366, "grad_norm": 1.1948228818170457, "learning_rate": 1.5991389232631068e-06, "loss": 0.48706555366516113, "step": 2870 }, { "epoch": 0.6619783260318193, "grad_norm": 1.205848115890533, "learning_rate": 1.598833624783999e-06, "loss": 0.5093512535095215, "step": 2871 }, { "epoch": 0.6622089001614019, "grad_norm": 1.37517746631934, "learning_rate": 1.5985282392583542e-06, "loss": 0.5197086930274963, "step": 2872 }, { "epoch": 0.6624394742909846, "grad_norm": 1.3389415544634544, "learning_rate": 1.5982227667305646e-06, "loss": 0.497372031211853, "step": 2873 }, { "epoch": 0.6626700484205672, "grad_norm": 1.6851191621911175, "learning_rate": 1.597917207245033e-06, "loss": 0.4746604561805725, "step": 2874 }, { "epoch": 0.6629006225501499, "grad_norm": 1.2864362072574318, "learning_rate": 1.5976115608461755e-06, "loss": 0.5531996488571167, "step": 2875 }, { "epoch": 0.6631311966797325, "grad_norm": 1.2032344825838508, "learning_rate": 1.5973058275784208e-06, "loss": 0.44950544834136963, "step": 2876 }, { "epoch": 0.6633617708093152, "grad_norm": 1.231321509427461, "learning_rate": 1.597000007486211e-06, "loss": 0.45596158504486084, "step": 2877 }, { "epoch": 0.6635923449388978, "grad_norm": 1.1813154846400662, "learning_rate": 1.596694100613999e-06, "loss": 0.5243046879768372, "step": 2878 }, { "epoch": 0.6638229190684806, "grad_norm": 1.2111771126184059, "learning_rate": 1.5963881070062528e-06, "loss": 0.46450644731521606, "step": 2879 }, { "epoch": 0.6640534931980632, "grad_norm": 1.286085494147619, "learning_rate": 1.5960820267074509e-06, "loss": 0.5565767288208008, "step": 2880 }, { "epoch": 0.6642840673276459, "grad_norm": 1.574495375498682, "learning_rate": 1.595775859762085e-06, "loss": 0.4351605176925659, "step": 2881 }, { "epoch": 0.6645146414572285, "grad_norm": 1.3382136213218339, "learning_rate": 1.5954696062146603e-06, "loss": 0.5113346576690674, "step": 2882 }, { "epoch": 0.6647452155868112, "grad_norm": 1.203285083111209, "learning_rate": 1.5951632661096932e-06, "loss": 0.5005035996437073, "step": 2883 }, { "epoch": 0.6649757897163938, "grad_norm": 1.1502074786882042, "learning_rate": 1.5948568394917138e-06, "loss": 0.4539811611175537, "step": 2884 }, { "epoch": 0.6652063638459765, "grad_norm": 1.234546797786613, "learning_rate": 1.5945503264052637e-06, "loss": 0.4519865810871124, "step": 2885 }, { "epoch": 0.6654369379755591, "grad_norm": 1.1932724883335695, "learning_rate": 1.5942437268948985e-06, "loss": 0.5688626766204834, "step": 2886 }, { "epoch": 0.6656675121051419, "grad_norm": 1.1582733834983177, "learning_rate": 1.5939370410051846e-06, "loss": 0.5038400888442993, "step": 2887 }, { "epoch": 0.6658980862347245, "grad_norm": 1.4308591259843988, "learning_rate": 1.5936302687807028e-06, "loss": 0.6332568526268005, "step": 2888 }, { "epoch": 0.6661286603643072, "grad_norm": 1.2020172387992982, "learning_rate": 1.593323410266045e-06, "loss": 0.4994644820690155, "step": 2889 }, { "epoch": 0.6663592344938898, "grad_norm": 1.3423031921779223, "learning_rate": 1.5930164655058165e-06, "loss": 0.4952617883682251, "step": 2890 }, { "epoch": 0.6665898086234725, "grad_norm": 1.1769489968231674, "learning_rate": 1.5927094345446345e-06, "loss": 0.4188910722732544, "step": 2891 }, { "epoch": 0.6668203827530551, "grad_norm": 1.319346697910086, "learning_rate": 1.5924023174271295e-06, "loss": 0.47160637378692627, "step": 2892 }, { "epoch": 0.6670509568826377, "grad_norm": 1.0773369781050426, "learning_rate": 1.592095114197944e-06, "loss": 0.44884049892425537, "step": 2893 }, { "epoch": 0.6672815310122204, "grad_norm": 1.3166895153069564, "learning_rate": 1.5917878249017327e-06, "loss": 0.4105216860771179, "step": 2894 }, { "epoch": 0.667512105141803, "grad_norm": 1.3288589826448391, "learning_rate": 1.5914804495831634e-06, "loss": 0.5000967383384705, "step": 2895 }, { "epoch": 0.6677426792713858, "grad_norm": 1.4772652615504442, "learning_rate": 1.5911729882869163e-06, "loss": 0.45515477657318115, "step": 2896 }, { "epoch": 0.6679732534009684, "grad_norm": 1.2034912342077588, "learning_rate": 1.590865441057684e-06, "loss": 0.4492835998535156, "step": 2897 }, { "epoch": 0.6682038275305511, "grad_norm": 1.5637287950189662, "learning_rate": 1.5905578079401716e-06, "loss": 0.553781270980835, "step": 2898 }, { "epoch": 0.6684344016601337, "grad_norm": 1.235173143749482, "learning_rate": 1.5902500889790967e-06, "loss": 0.5085616111755371, "step": 2899 }, { "epoch": 0.6686649757897164, "grad_norm": 1.2766607551584273, "learning_rate": 1.5899422842191891e-06, "loss": 0.4651145935058594, "step": 2900 }, { "epoch": 0.668895549919299, "grad_norm": 1.3114841240621398, "learning_rate": 1.5896343937051921e-06, "loss": 0.5503841638565063, "step": 2901 }, { "epoch": 0.6691261240488817, "grad_norm": 1.1881721760666544, "learning_rate": 1.5893264174818599e-06, "loss": 0.48213839530944824, "step": 2902 }, { "epoch": 0.6693566981784643, "grad_norm": 1.2726619976847688, "learning_rate": 1.5890183555939604e-06, "loss": 0.4602949023246765, "step": 2903 }, { "epoch": 0.669587272308047, "grad_norm": 1.213092004639277, "learning_rate": 1.5887102080862736e-06, "loss": 0.43991196155548096, "step": 2904 }, { "epoch": 0.6698178464376296, "grad_norm": 1.2472416336517922, "learning_rate": 1.5884019750035914e-06, "loss": 0.48186323046684265, "step": 2905 }, { "epoch": 0.6700484205672124, "grad_norm": 1.3445409358829308, "learning_rate": 1.5880936563907189e-06, "loss": 0.44907671213150024, "step": 2906 }, { "epoch": 0.670278994696795, "grad_norm": 1.874421138474627, "learning_rate": 1.587785252292473e-06, "loss": 0.4475386142730713, "step": 2907 }, { "epoch": 0.6705095688263777, "grad_norm": 1.2649536391923781, "learning_rate": 1.587476762753684e-06, "loss": 0.4504704475402832, "step": 2908 }, { "epoch": 0.6707401429559603, "grad_norm": 2.0624210450483376, "learning_rate": 1.5871681878191937e-06, "loss": 0.5090106129646301, "step": 2909 }, { "epoch": 0.670970717085543, "grad_norm": 1.3010076823717651, "learning_rate": 1.5868595275338561e-06, "loss": 0.46150895953178406, "step": 2910 }, { "epoch": 0.6712012912151256, "grad_norm": 1.2556909013752833, "learning_rate": 1.586550781942539e-06, "loss": 0.5499979257583618, "step": 2911 }, { "epoch": 0.6714318653447083, "grad_norm": 1.2089730243488483, "learning_rate": 1.5862419510901211e-06, "loss": 0.46628689765930176, "step": 2912 }, { "epoch": 0.6716624394742909, "grad_norm": 1.2998808024776154, "learning_rate": 1.5859330350214941e-06, "loss": 0.4517399072647095, "step": 2913 }, { "epoch": 0.6718930136038737, "grad_norm": 1.0879313971673985, "learning_rate": 1.5856240337815621e-06, "loss": 0.4696923792362213, "step": 2914 }, { "epoch": 0.6721235877334563, "grad_norm": 1.5676723620382764, "learning_rate": 1.585314947415242e-06, "loss": 0.41357535123825073, "step": 2915 }, { "epoch": 0.672354161863039, "grad_norm": 1.2988881169526059, "learning_rate": 1.5850057759674621e-06, "loss": 0.5223745107650757, "step": 2916 }, { "epoch": 0.6725847359926216, "grad_norm": 1.5751566352241433, "learning_rate": 1.584696519483164e-06, "loss": 0.48562729358673096, "step": 2917 }, { "epoch": 0.6728153101222043, "grad_norm": 1.147456021361514, "learning_rate": 1.5843871780073009e-06, "loss": 0.3675496280193329, "step": 2918 }, { "epoch": 0.6730458842517869, "grad_norm": 1.4691177353786786, "learning_rate": 1.5840777515848389e-06, "loss": 0.5782667994499207, "step": 2919 }, { "epoch": 0.6732764583813696, "grad_norm": 1.110911745804502, "learning_rate": 1.583768240260756e-06, "loss": 0.419716477394104, "step": 2920 }, { "epoch": 0.6735070325109522, "grad_norm": 1.2625181785612978, "learning_rate": 1.5834586440800434e-06, "loss": 0.4004133939743042, "step": 2921 }, { "epoch": 0.673737606640535, "grad_norm": 1.3860644175168617, "learning_rate": 1.5831489630877037e-06, "loss": 0.4917314350605011, "step": 2922 }, { "epoch": 0.6739681807701176, "grad_norm": 1.3350109690747092, "learning_rate": 1.5828391973287522e-06, "loss": 0.5488141179084778, "step": 2923 }, { "epoch": 0.6741987548997003, "grad_norm": 1.2547850876004316, "learning_rate": 1.5825293468482163e-06, "loss": 0.5047071576118469, "step": 2924 }, { "epoch": 0.6744293290292829, "grad_norm": 1.3178326140677985, "learning_rate": 1.5822194116911364e-06, "loss": 0.4830411672592163, "step": 2925 }, { "epoch": 0.6746599031588656, "grad_norm": 1.2591886503495524, "learning_rate": 1.5819093919025641e-06, "loss": 0.47517114877700806, "step": 2926 }, { "epoch": 0.6748904772884482, "grad_norm": 1.3603729738722081, "learning_rate": 1.5815992875275642e-06, "loss": 0.5617963075637817, "step": 2927 }, { "epoch": 0.6751210514180309, "grad_norm": 1.1752484838801127, "learning_rate": 1.5812890986112137e-06, "loss": 0.4360186457633972, "step": 2928 }, { "epoch": 0.6753516255476135, "grad_norm": 1.5551926866200483, "learning_rate": 1.5809788251986014e-06, "loss": 0.49538636207580566, "step": 2929 }, { "epoch": 0.6755821996771962, "grad_norm": 1.1285780293266063, "learning_rate": 1.5806684673348288e-06, "loss": 0.538766622543335, "step": 2930 }, { "epoch": 0.6758127738067788, "grad_norm": 1.5395880930573347, "learning_rate": 1.5803580250650094e-06, "loss": 0.4113287329673767, "step": 2931 }, { "epoch": 0.6760433479363616, "grad_norm": 1.4441179706006158, "learning_rate": 1.5800474984342698e-06, "loss": 0.5298923254013062, "step": 2932 }, { "epoch": 0.6762739220659442, "grad_norm": 1.2285488161220737, "learning_rate": 1.5797368874877472e-06, "loss": 0.4891100227832794, "step": 2933 }, { "epoch": 0.6765044961955269, "grad_norm": 1.3809520207822814, "learning_rate": 1.579426192270593e-06, "loss": 0.4412326216697693, "step": 2934 }, { "epoch": 0.6767350703251095, "grad_norm": 1.3386538114869513, "learning_rate": 1.5791154128279693e-06, "loss": 0.5514793395996094, "step": 2935 }, { "epoch": 0.6769656444546922, "grad_norm": 1.2065068425398038, "learning_rate": 1.578804549205051e-06, "loss": 0.44050243496894836, "step": 2936 }, { "epoch": 0.6771962185842748, "grad_norm": 1.3084516018872256, "learning_rate": 1.5784936014470256e-06, "loss": 0.47503453493118286, "step": 2937 }, { "epoch": 0.6774267927138575, "grad_norm": 1.445992727647949, "learning_rate": 1.5781825695990922e-06, "loss": 0.524544894695282, "step": 2938 }, { "epoch": 0.6776573668434401, "grad_norm": 1.2672201923678605, "learning_rate": 1.5778714537064628e-06, "loss": 0.4203689694404602, "step": 2939 }, { "epoch": 0.6778879409730229, "grad_norm": 1.255678429788082, "learning_rate": 1.577560253814361e-06, "loss": 0.4305247664451599, "step": 2940 }, { "epoch": 0.6781185151026055, "grad_norm": 1.2383698343036857, "learning_rate": 1.577248969968023e-06, "loss": 0.6129249930381775, "step": 2941 }, { "epoch": 0.6783490892321882, "grad_norm": 1.4217586280781416, "learning_rate": 1.5769376022126969e-06, "loss": 0.44431981444358826, "step": 2942 }, { "epoch": 0.6785796633617708, "grad_norm": 1.2327303005745092, "learning_rate": 1.576626150593643e-06, "loss": 0.4394958019256592, "step": 2943 }, { "epoch": 0.6788102374913535, "grad_norm": 1.2593798978560244, "learning_rate": 1.5763146151561345e-06, "loss": 0.44481268525123596, "step": 2944 }, { "epoch": 0.6790408116209361, "grad_norm": 1.4440486279504336, "learning_rate": 1.5760029959454556e-06, "loss": 0.4251822829246521, "step": 2945 }, { "epoch": 0.6792713857505188, "grad_norm": 1.338830252556874, "learning_rate": 1.575691293006904e-06, "loss": 0.41041696071624756, "step": 2946 }, { "epoch": 0.6795019598801014, "grad_norm": 1.357017341106407, "learning_rate": 1.5753795063857883e-06, "loss": 0.5710239410400391, "step": 2947 }, { "epoch": 0.6797325340096841, "grad_norm": 1.2834985119403657, "learning_rate": 1.57506763612743e-06, "loss": 0.48825210332870483, "step": 2948 }, { "epoch": 0.6799631081392667, "grad_norm": 1.263284608882453, "learning_rate": 1.5747556822771628e-06, "loss": 0.37077784538269043, "step": 2949 }, { "epoch": 0.6801936822688495, "grad_norm": 1.2458271352531185, "learning_rate": 1.5744436448803322e-06, "loss": 0.4618649482727051, "step": 2950 }, { "epoch": 0.6804242563984321, "grad_norm": 1.0624348057433408, "learning_rate": 1.574131523982296e-06, "loss": 0.4415496289730072, "step": 2951 }, { "epoch": 0.6806548305280148, "grad_norm": 1.4732593030941656, "learning_rate": 1.5738193196284239e-06, "loss": 0.440029501914978, "step": 2952 }, { "epoch": 0.6808854046575974, "grad_norm": 1.3992294210480754, "learning_rate": 1.5735070318640986e-06, "loss": 0.5149378776550293, "step": 2953 }, { "epoch": 0.6811159787871801, "grad_norm": 1.3173119180782331, "learning_rate": 1.5731946607347136e-06, "loss": 0.4838085174560547, "step": 2954 }, { "epoch": 0.6813465529167627, "grad_norm": 1.3500402916158631, "learning_rate": 1.5728822062856757e-06, "loss": 0.48472005128860474, "step": 2955 }, { "epoch": 0.6815771270463454, "grad_norm": 1.163167888868214, "learning_rate": 1.572569668562403e-06, "loss": 0.5154656767845154, "step": 2956 }, { "epoch": 0.681807701175928, "grad_norm": 1.1906599654401737, "learning_rate": 1.5722570476103263e-06, "loss": 0.4094988703727722, "step": 2957 }, { "epoch": 0.6820382753055108, "grad_norm": 1.2324943837281264, "learning_rate": 1.5719443434748877e-06, "loss": 0.5125937461853027, "step": 2958 }, { "epoch": 0.6822688494350934, "grad_norm": 1.2538269370063608, "learning_rate": 1.5716315562015428e-06, "loss": 0.4807034730911255, "step": 2959 }, { "epoch": 0.6824994235646761, "grad_norm": 1.3513545314522855, "learning_rate": 1.5713186858357577e-06, "loss": 0.6126741170883179, "step": 2960 }, { "epoch": 0.6827299976942587, "grad_norm": 2.1674593801056887, "learning_rate": 1.5710057324230113e-06, "loss": 0.5450708866119385, "step": 2961 }, { "epoch": 0.6829605718238414, "grad_norm": 1.8355809144200355, "learning_rate": 1.5706926960087948e-06, "loss": 0.47740328311920166, "step": 2962 }, { "epoch": 0.683191145953424, "grad_norm": 1.311529987995532, "learning_rate": 1.5703795766386112e-06, "loss": 0.4731057584285736, "step": 2963 }, { "epoch": 0.6834217200830067, "grad_norm": 1.3162153678952433, "learning_rate": 1.5700663743579754e-06, "loss": 0.49735045433044434, "step": 2964 }, { "epoch": 0.6836522942125893, "grad_norm": 1.2346637447285915, "learning_rate": 1.569753089212415e-06, "loss": 0.5257318019866943, "step": 2965 }, { "epoch": 0.683882868342172, "grad_norm": 1.1458467925306592, "learning_rate": 1.5694397212474685e-06, "loss": 0.3947733938694, "step": 2966 }, { "epoch": 0.6841134424717547, "grad_norm": 1.424176183527685, "learning_rate": 1.5691262705086875e-06, "loss": 0.5078107714653015, "step": 2967 }, { "epoch": 0.6843440166013374, "grad_norm": 1.7316538509871626, "learning_rate": 1.5688127370416351e-06, "loss": 0.5921520590782166, "step": 2968 }, { "epoch": 0.68457459073092, "grad_norm": 1.2277129646213039, "learning_rate": 1.5684991208918866e-06, "loss": 0.45995181798934937, "step": 2969 }, { "epoch": 0.6848051648605027, "grad_norm": 1.1894548452861071, "learning_rate": 1.5681854221050293e-06, "loss": 0.4874386787414551, "step": 2970 }, { "epoch": 0.6850357389900853, "grad_norm": 1.3695475422493124, "learning_rate": 1.5678716407266625e-06, "loss": 0.4522739052772522, "step": 2971 }, { "epoch": 0.685266313119668, "grad_norm": 1.3244142914830208, "learning_rate": 1.5675577768023977e-06, "loss": 0.4596391022205353, "step": 2972 }, { "epoch": 0.6854968872492506, "grad_norm": 1.6847382830263626, "learning_rate": 1.567243830377858e-06, "loss": 0.5391427278518677, "step": 2973 }, { "epoch": 0.6857274613788333, "grad_norm": 1.2164543996098884, "learning_rate": 1.5669298014986786e-06, "loss": 0.5583066940307617, "step": 2974 }, { "epoch": 0.6859580355084159, "grad_norm": 1.3656527800334406, "learning_rate": 1.566615690210507e-06, "loss": 0.5410330295562744, "step": 2975 }, { "epoch": 0.6861886096379987, "grad_norm": 1.2007908045124778, "learning_rate": 1.566301496559002e-06, "loss": 0.5145233273506165, "step": 2976 }, { "epoch": 0.6864191837675813, "grad_norm": 1.4168885241389684, "learning_rate": 1.5659872205898356e-06, "loss": 0.5021970272064209, "step": 2977 }, { "epoch": 0.686649757897164, "grad_norm": 1.0896663307775538, "learning_rate": 1.5656728623486903e-06, "loss": 0.48251593112945557, "step": 2978 }, { "epoch": 0.6868803320267466, "grad_norm": 1.2502610536872558, "learning_rate": 1.5653584218812617e-06, "loss": 0.4228450655937195, "step": 2979 }, { "epoch": 0.6871109061563293, "grad_norm": 1.4048596098114436, "learning_rate": 1.5650438992332567e-06, "loss": 0.3975197374820709, "step": 2980 }, { "epoch": 0.6873414802859119, "grad_norm": 1.386478606714872, "learning_rate": 1.5647292944503945e-06, "loss": 0.5441234707832336, "step": 2981 }, { "epoch": 0.6875720544154946, "grad_norm": 1.3552115877356068, "learning_rate": 1.5644146075784057e-06, "loss": 0.5357148051261902, "step": 2982 }, { "epoch": 0.6878026285450772, "grad_norm": 1.2605289404512496, "learning_rate": 1.5640998386630337e-06, "loss": 0.530154824256897, "step": 2983 }, { "epoch": 0.68803320267466, "grad_norm": 1.3830405468746736, "learning_rate": 1.563784987750033e-06, "loss": 0.480657696723938, "step": 2984 }, { "epoch": 0.6882637768042426, "grad_norm": 1.2595390052779563, "learning_rate": 1.5634700548851712e-06, "loss": 0.4822859764099121, "step": 2985 }, { "epoch": 0.6884943509338253, "grad_norm": 1.4511024891592457, "learning_rate": 1.5631550401142257e-06, "loss": 0.48551490902900696, "step": 2986 }, { "epoch": 0.6887249250634079, "grad_norm": 1.252088599015217, "learning_rate": 1.562839943482988e-06, "loss": 0.43080294132232666, "step": 2987 }, { "epoch": 0.6889554991929906, "grad_norm": 1.1661214157780933, "learning_rate": 1.56252476503726e-06, "loss": 0.42780637741088867, "step": 2988 }, { "epoch": 0.6891860733225732, "grad_norm": 1.3057809079761946, "learning_rate": 1.5622095048228565e-06, "loss": 0.539027214050293, "step": 2989 }, { "epoch": 0.6894166474521559, "grad_norm": 1.2289425463506802, "learning_rate": 1.5618941628856037e-06, "loss": 0.4529460668563843, "step": 2990 }, { "epoch": 0.6896472215817385, "grad_norm": 1.4016140654354556, "learning_rate": 1.5615787392713395e-06, "loss": 0.49724727869033813, "step": 2991 }, { "epoch": 0.6898777957113212, "grad_norm": 1.25157972103927, "learning_rate": 1.5612632340259144e-06, "loss": 0.4711928963661194, "step": 2992 }, { "epoch": 0.6901083698409038, "grad_norm": 1.3707143585352468, "learning_rate": 1.56094764719519e-06, "loss": 0.42258220911026, "step": 2993 }, { "epoch": 0.6903389439704866, "grad_norm": 1.371187363460567, "learning_rate": 1.5606319788250398e-06, "loss": 0.47754064202308655, "step": 2994 }, { "epoch": 0.6905695181000692, "grad_norm": 1.307708883093593, "learning_rate": 1.5603162289613501e-06, "loss": 0.47200560569763184, "step": 2995 }, { "epoch": 0.6908000922296519, "grad_norm": 1.359798809074, "learning_rate": 1.5600003976500173e-06, "loss": 0.5194537043571472, "step": 2996 }, { "epoch": 0.6910306663592345, "grad_norm": 1.707437655194179, "learning_rate": 1.5596844849369518e-06, "loss": 0.4874703586101532, "step": 2997 }, { "epoch": 0.6912612404888172, "grad_norm": 1.262990523197611, "learning_rate": 1.5593684908680738e-06, "loss": 0.5028672218322754, "step": 2998 }, { "epoch": 0.6914918146183998, "grad_norm": 1.2420345591817543, "learning_rate": 1.5590524154893169e-06, "loss": 0.44250521063804626, "step": 2999 }, { "epoch": 0.6917223887479825, "grad_norm": 1.6089998258276121, "learning_rate": 1.5587362588466253e-06, "loss": 0.536510705947876, "step": 3000 }, { "epoch": 0.6919529628775651, "grad_norm": 1.3333649931769909, "learning_rate": 1.5584200209859558e-06, "loss": 0.4514959752559662, "step": 3001 }, { "epoch": 0.6921835370071479, "grad_norm": 1.1923376457733827, "learning_rate": 1.5581037019532773e-06, "loss": 0.4402197301387787, "step": 3002 }, { "epoch": 0.6924141111367305, "grad_norm": 1.1940429657833775, "learning_rate": 1.5577873017945691e-06, "loss": 0.508256196975708, "step": 3003 }, { "epoch": 0.6926446852663131, "grad_norm": 1.2600794916577294, "learning_rate": 1.5574708205558236e-06, "loss": 0.5123175978660583, "step": 3004 }, { "epoch": 0.6928752593958958, "grad_norm": 1.4303227599201425, "learning_rate": 1.5571542582830447e-06, "loss": 0.4874982237815857, "step": 3005 }, { "epoch": 0.6931058335254784, "grad_norm": 1.314228379499143, "learning_rate": 1.556837615022248e-06, "loss": 0.44554391503334045, "step": 3006 }, { "epoch": 0.6933364076550611, "grad_norm": 1.5428941228634732, "learning_rate": 1.5565208908194603e-06, "loss": 0.5899895429611206, "step": 3007 }, { "epoch": 0.6935669817846437, "grad_norm": 1.2685614762262514, "learning_rate": 1.5562040857207208e-06, "loss": 0.5137951374053955, "step": 3008 }, { "epoch": 0.6937975559142264, "grad_norm": 1.2863812659603593, "learning_rate": 1.5558871997720805e-06, "loss": 0.5435892343521118, "step": 3009 }, { "epoch": 0.694028130043809, "grad_norm": 1.4463505314835092, "learning_rate": 1.5555702330196021e-06, "loss": 0.45998525619506836, "step": 3010 }, { "epoch": 0.6942587041733917, "grad_norm": 1.324515476398786, "learning_rate": 1.5552531855093597e-06, "loss": 0.4676332473754883, "step": 3011 }, { "epoch": 0.6944892783029744, "grad_norm": 1.2595225568514163, "learning_rate": 1.5549360572874397e-06, "loss": 0.48250633478164673, "step": 3012 }, { "epoch": 0.6947198524325571, "grad_norm": 1.4537609539003187, "learning_rate": 1.5546188483999396e-06, "loss": 0.4841402769088745, "step": 3013 }, { "epoch": 0.6949504265621397, "grad_norm": 1.401637069375295, "learning_rate": 1.5543015588929688e-06, "loss": 0.4717336893081665, "step": 3014 }, { "epoch": 0.6951810006917224, "grad_norm": 1.3276052543558161, "learning_rate": 1.5539841888126488e-06, "loss": 0.48844897747039795, "step": 3015 }, { "epoch": 0.695411574821305, "grad_norm": 1.539947517538627, "learning_rate": 1.5536667382051127e-06, "loss": 0.5244781970977783, "step": 3016 }, { "epoch": 0.6956421489508877, "grad_norm": 1.2794123200247822, "learning_rate": 1.5533492071165046e-06, "loss": 0.4612278938293457, "step": 3017 }, { "epoch": 0.6958727230804703, "grad_norm": 1.1978546028008836, "learning_rate": 1.5530315955929817e-06, "loss": 0.40461257100105286, "step": 3018 }, { "epoch": 0.696103297210053, "grad_norm": 1.387518032200497, "learning_rate": 1.5527139036807112e-06, "loss": 0.5191174745559692, "step": 3019 }, { "epoch": 0.6963338713396356, "grad_norm": 1.510370534054042, "learning_rate": 1.5523961314258731e-06, "loss": 0.45882558822631836, "step": 3020 }, { "epoch": 0.6965644454692184, "grad_norm": 1.230362803290169, "learning_rate": 1.552078278874659e-06, "loss": 0.4766819477081299, "step": 3021 }, { "epoch": 0.696795019598801, "grad_norm": 1.2822436220739486, "learning_rate": 1.5517603460732724e-06, "loss": 0.4572867751121521, "step": 3022 }, { "epoch": 0.6970255937283837, "grad_norm": 1.5677891937472022, "learning_rate": 1.5514423330679272e-06, "loss": 0.4689183235168457, "step": 3023 }, { "epoch": 0.6972561678579663, "grad_norm": 1.18549719550499, "learning_rate": 1.5511242399048504e-06, "loss": 0.45769914984703064, "step": 3024 }, { "epoch": 0.697486741987549, "grad_norm": 1.3095011770493485, "learning_rate": 1.5508060666302796e-06, "loss": 0.47367236018180847, "step": 3025 }, { "epoch": 0.6977173161171316, "grad_norm": 1.5441644429162589, "learning_rate": 1.550487813290465e-06, "loss": 0.40873080492019653, "step": 3026 }, { "epoch": 0.6979478902467143, "grad_norm": 1.2349195465907241, "learning_rate": 1.5501694799316671e-06, "loss": 0.42366844415664673, "step": 3027 }, { "epoch": 0.6981784643762969, "grad_norm": 1.2587292360565243, "learning_rate": 1.5498510666001602e-06, "loss": 0.3133828043937683, "step": 3028 }, { "epoch": 0.6984090385058797, "grad_norm": 1.5168032500602213, "learning_rate": 1.549532573342228e-06, "loss": 0.5188712477684021, "step": 3029 }, { "epoch": 0.6986396126354623, "grad_norm": 1.2707264640547211, "learning_rate": 1.5492140002041668e-06, "loss": 0.4374960660934448, "step": 3030 }, { "epoch": 0.698870186765045, "grad_norm": 1.6828882278794643, "learning_rate": 1.5488953472322845e-06, "loss": 0.5285592079162598, "step": 3031 }, { "epoch": 0.6991007608946276, "grad_norm": 1.5111090584536853, "learning_rate": 1.5485766144729006e-06, "loss": 0.5331767797470093, "step": 3032 }, { "epoch": 0.6993313350242103, "grad_norm": 1.3626863062762309, "learning_rate": 1.5482578019723462e-06, "loss": 0.4546147584915161, "step": 3033 }, { "epoch": 0.6995619091537929, "grad_norm": 1.2127032724557087, "learning_rate": 1.5479389097769639e-06, "loss": 0.47674182057380676, "step": 3034 }, { "epoch": 0.6997924832833756, "grad_norm": 1.2042624102453106, "learning_rate": 1.5476199379331078e-06, "loss": 0.496138334274292, "step": 3035 }, { "epoch": 0.7000230574129582, "grad_norm": 1.367736432364491, "learning_rate": 1.547300886487144e-06, "loss": 0.4843756854534149, "step": 3036 }, { "epoch": 0.7002536315425409, "grad_norm": 1.5043582093976149, "learning_rate": 1.5469817554854494e-06, "loss": 0.6028264760971069, "step": 3037 }, { "epoch": 0.7004842056721235, "grad_norm": 1.4959257460685322, "learning_rate": 1.5466625449744134e-06, "loss": 0.49528858065605164, "step": 3038 }, { "epoch": 0.7007147798017063, "grad_norm": 1.1403876193260207, "learning_rate": 1.5463432550004358e-06, "loss": 0.466439425945282, "step": 3039 }, { "epoch": 0.7009453539312889, "grad_norm": 1.1012676712945453, "learning_rate": 1.5460238856099292e-06, "loss": 0.4196532368659973, "step": 3040 }, { "epoch": 0.7011759280608716, "grad_norm": 1.40353983379054, "learning_rate": 1.5457044368493173e-06, "loss": 0.47679999470710754, "step": 3041 }, { "epoch": 0.7014065021904542, "grad_norm": 1.2594197008827683, "learning_rate": 1.5453849087650346e-06, "loss": 0.4368046522140503, "step": 3042 }, { "epoch": 0.7016370763200369, "grad_norm": 1.2211703865137815, "learning_rate": 1.5450653014035285e-06, "loss": 0.45165273547172546, "step": 3043 }, { "epoch": 0.7018676504496195, "grad_norm": 1.1456058151260982, "learning_rate": 1.5447456148112563e-06, "loss": 0.44813454151153564, "step": 3044 }, { "epoch": 0.7020982245792022, "grad_norm": 1.269275990698592, "learning_rate": 1.5444258490346882e-06, "loss": 0.44681504368782043, "step": 3045 }, { "epoch": 0.7023287987087848, "grad_norm": 1.3036360811480283, "learning_rate": 1.5441060041203057e-06, "loss": 0.44788169860839844, "step": 3046 }, { "epoch": 0.7025593728383676, "grad_norm": 1.3232925218771132, "learning_rate": 1.5437860801146013e-06, "loss": 0.3754178285598755, "step": 3047 }, { "epoch": 0.7027899469679502, "grad_norm": 1.001044690167693, "learning_rate": 1.5434660770640787e-06, "loss": 0.3582305908203125, "step": 3048 }, { "epoch": 0.7030205210975329, "grad_norm": 1.3449464333610996, "learning_rate": 1.543145995015254e-06, "loss": 0.42649000883102417, "step": 3049 }, { "epoch": 0.7032510952271155, "grad_norm": 1.2880551855073363, "learning_rate": 1.5428258340146543e-06, "loss": 0.5164098143577576, "step": 3050 }, { "epoch": 0.7034816693566982, "grad_norm": 1.2456398303270981, "learning_rate": 1.5425055941088181e-06, "loss": 0.4193584620952606, "step": 3051 }, { "epoch": 0.7037122434862808, "grad_norm": 1.3825374305431077, "learning_rate": 1.5421852753442957e-06, "loss": 0.5230807662010193, "step": 3052 }, { "epoch": 0.7039428176158635, "grad_norm": 1.466681367301644, "learning_rate": 1.5418648777676488e-06, "loss": 0.4573478102684021, "step": 3053 }, { "epoch": 0.7041733917454461, "grad_norm": 1.1343088214156583, "learning_rate": 1.5415444014254503e-06, "loss": 0.47031426429748535, "step": 3054 }, { "epoch": 0.7044039658750288, "grad_norm": 1.3599997528041683, "learning_rate": 1.5412238463642844e-06, "loss": 0.4499198794364929, "step": 3055 }, { "epoch": 0.7046345400046115, "grad_norm": 1.4014132343100743, "learning_rate": 1.5409032126307477e-06, "loss": 0.4775800406932831, "step": 3056 }, { "epoch": 0.7048651141341942, "grad_norm": 1.4264420683743835, "learning_rate": 1.540582500271447e-06, "loss": 0.535969614982605, "step": 3057 }, { "epoch": 0.7050956882637768, "grad_norm": 1.3808494199198469, "learning_rate": 1.5402617093330013e-06, "loss": 0.5358741283416748, "step": 3058 }, { "epoch": 0.7053262623933595, "grad_norm": 1.2492824573732915, "learning_rate": 1.5399408398620406e-06, "loss": 0.5392765998840332, "step": 3059 }, { "epoch": 0.7055568365229421, "grad_norm": 1.275809486426879, "learning_rate": 1.5396198919052066e-06, "loss": 0.47976016998291016, "step": 3060 }, { "epoch": 0.7057874106525248, "grad_norm": 1.2226120465526635, "learning_rate": 1.5392988655091526e-06, "loss": 0.39919328689575195, "step": 3061 }, { "epoch": 0.7060179847821074, "grad_norm": 1.6011371731611943, "learning_rate": 1.538977760720543e-06, "loss": 0.4503553509712219, "step": 3062 }, { "epoch": 0.7062485589116901, "grad_norm": 1.2363983734925073, "learning_rate": 1.5386565775860531e-06, "loss": 0.4570388197898865, "step": 3063 }, { "epoch": 0.7064791330412727, "grad_norm": 1.2640125065615475, "learning_rate": 1.5383353161523706e-06, "loss": 0.54588782787323, "step": 3064 }, { "epoch": 0.7067097071708555, "grad_norm": 1.3495245665399438, "learning_rate": 1.5380139764661945e-06, "loss": 0.40369170904159546, "step": 3065 }, { "epoch": 0.7069402813004381, "grad_norm": 1.40505470554238, "learning_rate": 1.5376925585742341e-06, "loss": 0.5079206228256226, "step": 3066 }, { "epoch": 0.7071708554300208, "grad_norm": 1.2407138703812135, "learning_rate": 1.5373710625232107e-06, "loss": 0.41418159008026123, "step": 3067 }, { "epoch": 0.7074014295596034, "grad_norm": 1.2523103492462024, "learning_rate": 1.5370494883598575e-06, "loss": 0.4546199142932892, "step": 3068 }, { "epoch": 0.7076320036891861, "grad_norm": 1.1794904786936184, "learning_rate": 1.5367278361309183e-06, "loss": 0.48041367530822754, "step": 3069 }, { "epoch": 0.7078625778187687, "grad_norm": 1.3468711432386478, "learning_rate": 1.5364061058831486e-06, "loss": 0.47676384449005127, "step": 3070 }, { "epoch": 0.7080931519483514, "grad_norm": 1.1888236379295274, "learning_rate": 1.5360842976633148e-06, "loss": 0.47341692447662354, "step": 3071 }, { "epoch": 0.708323726077934, "grad_norm": 1.3227579498868685, "learning_rate": 1.5357624115181956e-06, "loss": 0.38436269760131836, "step": 3072 }, { "epoch": 0.7085543002075168, "grad_norm": 1.4827200040386144, "learning_rate": 1.5354404474945798e-06, "loss": 0.5369806289672852, "step": 3073 }, { "epoch": 0.7087848743370994, "grad_norm": 1.404704151375413, "learning_rate": 1.535118405639269e-06, "loss": 0.5314677953720093, "step": 3074 }, { "epoch": 0.7090154484666821, "grad_norm": 1.1927563297298747, "learning_rate": 1.5347962859990742e-06, "loss": 0.49233007431030273, "step": 3075 }, { "epoch": 0.7092460225962647, "grad_norm": 1.3477590726762334, "learning_rate": 1.5344740886208194e-06, "loss": 0.4834766983985901, "step": 3076 }, { "epoch": 0.7094765967258474, "grad_norm": 1.432138793969477, "learning_rate": 1.534151813551339e-06, "loss": 0.505670428276062, "step": 3077 }, { "epoch": 0.70970717085543, "grad_norm": 1.3290190812046396, "learning_rate": 1.533829460837479e-06, "loss": 0.5256010293960571, "step": 3078 }, { "epoch": 0.7099377449850127, "grad_norm": 1.463108893430833, "learning_rate": 1.5335070305260967e-06, "loss": 0.4186098873615265, "step": 3079 }, { "epoch": 0.7101683191145953, "grad_norm": 1.2048981968166306, "learning_rate": 1.5331845226640607e-06, "loss": 0.4034464359283447, "step": 3080 }, { "epoch": 0.710398893244178, "grad_norm": 1.346673761335588, "learning_rate": 1.5328619372982505e-06, "loss": 0.4521537721157074, "step": 3081 }, { "epoch": 0.7106294673737606, "grad_norm": 1.5250190734837208, "learning_rate": 1.5325392744755574e-06, "loss": 0.4919602572917938, "step": 3082 }, { "epoch": 0.7108600415033434, "grad_norm": 1.1734195700346683, "learning_rate": 1.5322165342428835e-06, "loss": 0.4464415907859802, "step": 3083 }, { "epoch": 0.711090615632926, "grad_norm": 1.2610549525832775, "learning_rate": 1.5318937166471427e-06, "loss": 0.47444385290145874, "step": 3084 }, { "epoch": 0.7113211897625087, "grad_norm": 1.1782687896584645, "learning_rate": 1.5315708217352595e-06, "loss": 0.4014730453491211, "step": 3085 }, { "epoch": 0.7115517638920913, "grad_norm": 1.1806273152667501, "learning_rate": 1.5312478495541703e-06, "loss": 0.4528852701187134, "step": 3086 }, { "epoch": 0.711782338021674, "grad_norm": 1.4716504682159035, "learning_rate": 1.5309248001508216e-06, "loss": 0.4919637441635132, "step": 3087 }, { "epoch": 0.7120129121512566, "grad_norm": 1.3824738486934829, "learning_rate": 1.530601673572173e-06, "loss": 0.5630985498428345, "step": 3088 }, { "epoch": 0.7122434862808393, "grad_norm": 1.4462966182250279, "learning_rate": 1.5302784698651935e-06, "loss": 0.3920522630214691, "step": 3089 }, { "epoch": 0.7124740604104219, "grad_norm": 1.3282823423467587, "learning_rate": 1.5299551890768642e-06, "loss": 0.5502145290374756, "step": 3090 }, { "epoch": 0.7127046345400047, "grad_norm": 1.2547204060730106, "learning_rate": 1.5296318312541767e-06, "loss": 0.4839448928833008, "step": 3091 }, { "epoch": 0.7129352086695873, "grad_norm": 1.3486430423834108, "learning_rate": 1.5293083964441355e-06, "loss": 0.5029735565185547, "step": 3092 }, { "epoch": 0.71316578279917, "grad_norm": 1.2299483009823662, "learning_rate": 1.5289848846937544e-06, "loss": 0.4724803566932678, "step": 3093 }, { "epoch": 0.7133963569287526, "grad_norm": 1.1015042263762262, "learning_rate": 1.528661296050059e-06, "loss": 0.4609840512275696, "step": 3094 }, { "epoch": 0.7136269310583353, "grad_norm": 1.4829248198628113, "learning_rate": 1.5283376305600863e-06, "loss": 0.49763959646224976, "step": 3095 }, { "epoch": 0.7138575051879179, "grad_norm": 1.2090810088725865, "learning_rate": 1.5280138882708847e-06, "loss": 0.42384523153305054, "step": 3096 }, { "epoch": 0.7140880793175006, "grad_norm": 1.3550047979469209, "learning_rate": 1.5276900692295134e-06, "loss": 0.5034611225128174, "step": 3097 }, { "epoch": 0.7143186534470832, "grad_norm": 1.3321189275554508, "learning_rate": 1.5273661734830423e-06, "loss": 0.5617417097091675, "step": 3098 }, { "epoch": 0.714549227576666, "grad_norm": 1.320340684589947, "learning_rate": 1.527042201078553e-06, "loss": 0.4562014937400818, "step": 3099 }, { "epoch": 0.7147798017062486, "grad_norm": 1.6932438225785027, "learning_rate": 1.5267181520631386e-06, "loss": 0.5626288056373596, "step": 3100 }, { "epoch": 0.7150103758358313, "grad_norm": 1.4526784651389733, "learning_rate": 1.5263940264839028e-06, "loss": 0.4882054924964905, "step": 3101 }, { "epoch": 0.7152409499654139, "grad_norm": 1.523666745804484, "learning_rate": 1.5260698243879603e-06, "loss": 0.5371058583259583, "step": 3102 }, { "epoch": 0.7154715240949966, "grad_norm": 1.1599798656247362, "learning_rate": 1.5257455458224368e-06, "loss": 0.4683259129524231, "step": 3103 }, { "epoch": 0.7157020982245792, "grad_norm": 1.223986374608111, "learning_rate": 1.5254211908344704e-06, "loss": 0.4894726872444153, "step": 3104 }, { "epoch": 0.7159326723541619, "grad_norm": 1.3226351110788483, "learning_rate": 1.5250967594712089e-06, "loss": 0.4517880082130432, "step": 3105 }, { "epoch": 0.7161632464837445, "grad_norm": 1.162528176566508, "learning_rate": 1.5247722517798118e-06, "loss": 0.5062767267227173, "step": 3106 }, { "epoch": 0.7163938206133272, "grad_norm": 1.6349408984878264, "learning_rate": 1.5244476678074494e-06, "loss": 0.5029302835464478, "step": 3107 }, { "epoch": 0.7166243947429098, "grad_norm": 1.3765367207185526, "learning_rate": 1.5241230076013035e-06, "loss": 0.44112175703048706, "step": 3108 }, { "epoch": 0.7168549688724926, "grad_norm": 1.3847966627377115, "learning_rate": 1.5237982712085665e-06, "loss": 0.43693509697914124, "step": 3109 }, { "epoch": 0.7170855430020752, "grad_norm": 1.3509946026255297, "learning_rate": 1.5234734586764422e-06, "loss": 0.4544166922569275, "step": 3110 }, { "epoch": 0.7173161171316579, "grad_norm": 1.1949924477500942, "learning_rate": 1.5231485700521451e-06, "loss": 0.5470178127288818, "step": 3111 }, { "epoch": 0.7175466912612405, "grad_norm": 1.5007057362656466, "learning_rate": 1.5228236053829017e-06, "loss": 0.5215972065925598, "step": 3112 }, { "epoch": 0.7177772653908232, "grad_norm": 1.1400006826022246, "learning_rate": 1.5224985647159488e-06, "loss": 0.3922381103038788, "step": 3113 }, { "epoch": 0.7180078395204058, "grad_norm": 1.3432802481675237, "learning_rate": 1.5221734480985341e-06, "loss": 0.47455158829689026, "step": 3114 }, { "epoch": 0.7182384136499884, "grad_norm": 1.517078162476979, "learning_rate": 1.5218482555779164e-06, "loss": 0.5776175260543823, "step": 3115 }, { "epoch": 0.7184689877795711, "grad_norm": 1.4757174936390305, "learning_rate": 1.521522987201366e-06, "loss": 0.40414175391197205, "step": 3116 }, { "epoch": 0.7186995619091537, "grad_norm": 1.5441693701407133, "learning_rate": 1.5211976430161643e-06, "loss": 0.44597384333610535, "step": 3117 }, { "epoch": 0.7189301360387365, "grad_norm": 1.6495022083145716, "learning_rate": 1.5208722230696024e-06, "loss": 0.50276118516922, "step": 3118 }, { "epoch": 0.7191607101683191, "grad_norm": 1.255966386168249, "learning_rate": 1.5205467274089844e-06, "loss": 0.43281811475753784, "step": 3119 }, { "epoch": 0.7193912842979018, "grad_norm": 1.196003407991791, "learning_rate": 1.5202211560816243e-06, "loss": 0.3796764016151428, "step": 3120 }, { "epoch": 0.7196218584274844, "grad_norm": 1.1855608567240021, "learning_rate": 1.5198955091348463e-06, "loss": 0.47820231318473816, "step": 3121 }, { "epoch": 0.7198524325570671, "grad_norm": 1.3809241508956476, "learning_rate": 1.5195697866159875e-06, "loss": 0.4737284779548645, "step": 3122 }, { "epoch": 0.7200830066866497, "grad_norm": 1.3019928778593748, "learning_rate": 1.519243988572394e-06, "loss": 0.44652169942855835, "step": 3123 }, { "epoch": 0.7203135808162324, "grad_norm": 1.0393403987452434, "learning_rate": 1.518918115051425e-06, "loss": 0.42702072858810425, "step": 3124 }, { "epoch": 0.720544154945815, "grad_norm": 1.3835329760109338, "learning_rate": 1.5185921661004483e-06, "loss": 0.5003541707992554, "step": 3125 }, { "epoch": 0.7207747290753977, "grad_norm": 1.3444035589789487, "learning_rate": 1.518266141766845e-06, "loss": 0.5045102834701538, "step": 3126 }, { "epoch": 0.7210053032049804, "grad_norm": 1.3069630488439725, "learning_rate": 1.5179400420980052e-06, "loss": 0.46619412302970886, "step": 3127 }, { "epoch": 0.7212358773345631, "grad_norm": 1.7755918931491346, "learning_rate": 1.5176138671413314e-06, "loss": 0.5006855726242065, "step": 3128 }, { "epoch": 0.7214664514641457, "grad_norm": 1.4202077937995432, "learning_rate": 1.5172876169442362e-06, "loss": 0.4394634962081909, "step": 3129 }, { "epoch": 0.7216970255937284, "grad_norm": 1.203576429459206, "learning_rate": 1.5169612915541428e-06, "loss": 0.49311593174934387, "step": 3130 }, { "epoch": 0.721927599723311, "grad_norm": 1.2610358507024448, "learning_rate": 1.5166348910184868e-06, "loss": 0.38406768441200256, "step": 3131 }, { "epoch": 0.7221581738528937, "grad_norm": 1.52088025341024, "learning_rate": 1.5163084153847132e-06, "loss": 0.547613799571991, "step": 3132 }, { "epoch": 0.7223887479824763, "grad_norm": 1.4599825671580298, "learning_rate": 1.515981864700279e-06, "loss": 0.43875589966773987, "step": 3133 }, { "epoch": 0.722619322112059, "grad_norm": 1.3276172293945816, "learning_rate": 1.5156552390126516e-06, "loss": 0.41515982151031494, "step": 3134 }, { "epoch": 0.7228498962416416, "grad_norm": 1.400170522869638, "learning_rate": 1.5153285383693088e-06, "loss": 0.43297481536865234, "step": 3135 }, { "epoch": 0.7230804703712244, "grad_norm": 1.3346402467183769, "learning_rate": 1.5150017628177408e-06, "loss": 0.5059916377067566, "step": 3136 }, { "epoch": 0.723311044500807, "grad_norm": 1.4474439218451525, "learning_rate": 1.514674912405447e-06, "loss": 0.4776325225830078, "step": 3137 }, { "epoch": 0.7235416186303897, "grad_norm": 1.4332410620248028, "learning_rate": 1.5143479871799381e-06, "loss": 0.4925272464752197, "step": 3138 }, { "epoch": 0.7237721927599723, "grad_norm": 0.9806444224416654, "learning_rate": 1.5140209871887368e-06, "loss": 0.3825960159301758, "step": 3139 }, { "epoch": 0.724002766889555, "grad_norm": 1.811554812935443, "learning_rate": 1.513693912479376e-06, "loss": 0.5582098960876465, "step": 3140 }, { "epoch": 0.7242333410191376, "grad_norm": 1.4229587145535472, "learning_rate": 1.5133667630993983e-06, "loss": 0.4079757630825043, "step": 3141 }, { "epoch": 0.7244639151487203, "grad_norm": 1.3307764336864334, "learning_rate": 1.513039539096359e-06, "loss": 0.4996449947357178, "step": 3142 }, { "epoch": 0.7246944892783029, "grad_norm": 1.2360600034220603, "learning_rate": 1.5127122405178233e-06, "loss": 0.4822157323360443, "step": 3143 }, { "epoch": 0.7249250634078857, "grad_norm": 1.2687974509229507, "learning_rate": 1.512384867411367e-06, "loss": 0.43123728036880493, "step": 3144 }, { "epoch": 0.7251556375374683, "grad_norm": 1.2723246094506335, "learning_rate": 1.5120574198245776e-06, "loss": 0.4942808151245117, "step": 3145 }, { "epoch": 0.725386211667051, "grad_norm": 1.1117112525626116, "learning_rate": 1.5117298978050525e-06, "loss": 0.49165093898773193, "step": 3146 }, { "epoch": 0.7256167857966336, "grad_norm": 1.2668452294382095, "learning_rate": 1.5114023014004008e-06, "loss": 0.4700804352760315, "step": 3147 }, { "epoch": 0.7258473599262163, "grad_norm": 1.9638712043686382, "learning_rate": 1.5110746306582413e-06, "loss": 0.4703143835067749, "step": 3148 }, { "epoch": 0.7260779340557989, "grad_norm": 1.2418379131661055, "learning_rate": 1.5107468856262048e-06, "loss": 0.47312211990356445, "step": 3149 }, { "epoch": 0.7263085081853816, "grad_norm": 1.3558937860977873, "learning_rate": 1.5104190663519323e-06, "loss": 0.49607813358306885, "step": 3150 }, { "epoch": 0.7265390823149642, "grad_norm": 1.2747447528869889, "learning_rate": 1.5100911728830754e-06, "loss": 0.4401499629020691, "step": 3151 }, { "epoch": 0.7267696564445469, "grad_norm": 1.3050498169083122, "learning_rate": 1.5097632052672973e-06, "loss": 0.4979579448699951, "step": 3152 }, { "epoch": 0.7270002305741295, "grad_norm": 1.1477032098667286, "learning_rate": 1.5094351635522706e-06, "loss": 0.42917048931121826, "step": 3153 }, { "epoch": 0.7272308047037123, "grad_norm": 1.2688450847611672, "learning_rate": 1.50910704778568e-06, "loss": 0.41664260625839233, "step": 3154 }, { "epoch": 0.7274613788332949, "grad_norm": 1.4083630490412662, "learning_rate": 1.5087788580152206e-06, "loss": 0.5000253915786743, "step": 3155 }, { "epoch": 0.7276919529628776, "grad_norm": 1.2424572303309531, "learning_rate": 1.5084505942885976e-06, "loss": 0.5075093507766724, "step": 3156 }, { "epoch": 0.7279225270924602, "grad_norm": 1.319578470826436, "learning_rate": 1.508122256653528e-06, "loss": 0.44975680112838745, "step": 3157 }, { "epoch": 0.7281531012220429, "grad_norm": 1.1450711263341298, "learning_rate": 1.5077938451577383e-06, "loss": 0.44494926929473877, "step": 3158 }, { "epoch": 0.7283836753516255, "grad_norm": 1.3333716905743178, "learning_rate": 1.5074653598489673e-06, "loss": 0.5664352178573608, "step": 3159 }, { "epoch": 0.7286142494812082, "grad_norm": 1.1840094617058035, "learning_rate": 1.507136800774963e-06, "loss": 0.5694705247879028, "step": 3160 }, { "epoch": 0.7288448236107908, "grad_norm": 1.5658434570152957, "learning_rate": 1.506808167983485e-06, "loss": 0.5121151804924011, "step": 3161 }, { "epoch": 0.7290753977403736, "grad_norm": 1.3559529766390859, "learning_rate": 1.5064794615223034e-06, "loss": 0.45935380458831787, "step": 3162 }, { "epoch": 0.7293059718699562, "grad_norm": 1.2036749528520703, "learning_rate": 1.506150681439199e-06, "loss": 0.517521858215332, "step": 3163 }, { "epoch": 0.7295365459995389, "grad_norm": 1.271352713883254, "learning_rate": 1.5058218277819638e-06, "loss": 0.5078546404838562, "step": 3164 }, { "epoch": 0.7297671201291215, "grad_norm": 1.4877111530715366, "learning_rate": 1.5054929005983992e-06, "loss": 0.47892552614212036, "step": 3165 }, { "epoch": 0.7299976942587042, "grad_norm": 1.5569470487033794, "learning_rate": 1.5051638999363185e-06, "loss": 0.48825597763061523, "step": 3166 }, { "epoch": 0.7302282683882868, "grad_norm": 1.2181600327145499, "learning_rate": 1.5048348258435457e-06, "loss": 0.488031804561615, "step": 3167 }, { "epoch": 0.7304588425178695, "grad_norm": 1.178638754387744, "learning_rate": 1.5045056783679143e-06, "loss": 0.4669504761695862, "step": 3168 }, { "epoch": 0.7306894166474521, "grad_norm": 1.364305786110939, "learning_rate": 1.5041764575572695e-06, "loss": 0.45620614290237427, "step": 3169 }, { "epoch": 0.7309199907770348, "grad_norm": 1.4607481202185084, "learning_rate": 1.5038471634594667e-06, "loss": 0.4271177649497986, "step": 3170 }, { "epoch": 0.7311505649066175, "grad_norm": 1.4441980354968733, "learning_rate": 1.5035177961223726e-06, "loss": 0.5170531272888184, "step": 3171 }, { "epoch": 0.7313811390362002, "grad_norm": 1.046719642579895, "learning_rate": 1.5031883555938638e-06, "loss": 0.4261493682861328, "step": 3172 }, { "epoch": 0.7316117131657828, "grad_norm": 1.4357281868096983, "learning_rate": 1.502858841921828e-06, "loss": 0.4958994686603546, "step": 3173 }, { "epoch": 0.7318422872953655, "grad_norm": 1.631538220078115, "learning_rate": 1.502529255154163e-06, "loss": 0.49798572063446045, "step": 3174 }, { "epoch": 0.7320728614249481, "grad_norm": 1.3524076496726538, "learning_rate": 1.502199595338778e-06, "loss": 0.4067850708961487, "step": 3175 }, { "epoch": 0.7323034355545308, "grad_norm": 1.2000506588677564, "learning_rate": 1.5018698625235916e-06, "loss": 0.4680994153022766, "step": 3176 }, { "epoch": 0.7325340096841134, "grad_norm": 1.3054261583860276, "learning_rate": 1.501540056756535e-06, "loss": 0.49181580543518066, "step": 3177 }, { "epoch": 0.7327645838136961, "grad_norm": 1.485479754545564, "learning_rate": 1.501210178085548e-06, "loss": 0.5425546169281006, "step": 3178 }, { "epoch": 0.7329951579432787, "grad_norm": 1.1514309763496005, "learning_rate": 1.500880226558582e-06, "loss": 0.4869355261325836, "step": 3179 }, { "epoch": 0.7332257320728615, "grad_norm": 1.5737536993523387, "learning_rate": 1.500550202223599e-06, "loss": 0.5157885551452637, "step": 3180 }, { "epoch": 0.7334563062024441, "grad_norm": 1.4471157017235972, "learning_rate": 1.5002201051285707e-06, "loss": 0.528350293636322, "step": 3181 }, { "epoch": 0.7336868803320268, "grad_norm": 1.0924579051997452, "learning_rate": 1.499889935321481e-06, "loss": 0.3963279128074646, "step": 3182 }, { "epoch": 0.7339174544616094, "grad_norm": 1.0536411378011648, "learning_rate": 1.499559692850323e-06, "loss": 0.36777108907699585, "step": 3183 }, { "epoch": 0.7341480285911921, "grad_norm": 1.3572066258310391, "learning_rate": 1.4992293777631004e-06, "loss": 0.4592905044555664, "step": 3184 }, { "epoch": 0.7343786027207747, "grad_norm": 1.3801194879873266, "learning_rate": 1.4988989901078285e-06, "loss": 0.458257257938385, "step": 3185 }, { "epoch": 0.7346091768503574, "grad_norm": 1.2823442631336313, "learning_rate": 1.4985685299325316e-06, "loss": 0.4844989478588104, "step": 3186 }, { "epoch": 0.73483975097994, "grad_norm": 1.3019212093413413, "learning_rate": 1.498237997285247e-06, "loss": 0.381417453289032, "step": 3187 }, { "epoch": 0.7350703251095227, "grad_norm": 1.267517645310936, "learning_rate": 1.4979073922140196e-06, "loss": 0.42452555894851685, "step": 3188 }, { "epoch": 0.7353008992391054, "grad_norm": 1.2143530957836637, "learning_rate": 1.4975767147669063e-06, "loss": 0.4660685956478119, "step": 3189 }, { "epoch": 0.7355314733686881, "grad_norm": 1.243568614271109, "learning_rate": 1.4972459649919748e-06, "loss": 0.4332653880119324, "step": 3190 }, { "epoch": 0.7357620474982707, "grad_norm": 1.4818958085574696, "learning_rate": 1.496915142937303e-06, "loss": 0.5580132007598877, "step": 3191 }, { "epoch": 0.7359926216278534, "grad_norm": 1.102415574688255, "learning_rate": 1.4965842486509792e-06, "loss": 0.43711793422698975, "step": 3192 }, { "epoch": 0.736223195757436, "grad_norm": 1.1786805187530485, "learning_rate": 1.496253282181102e-06, "loss": 0.44969767332077026, "step": 3193 }, { "epoch": 0.7364537698870187, "grad_norm": 1.5017804708887366, "learning_rate": 1.4959222435757809e-06, "loss": 0.5288668870925903, "step": 3194 }, { "epoch": 0.7366843440166013, "grad_norm": 1.2442315862489326, "learning_rate": 1.4955911328831353e-06, "loss": 0.45993220806121826, "step": 3195 }, { "epoch": 0.736914918146184, "grad_norm": 1.6618645292728147, "learning_rate": 1.4952599501512963e-06, "loss": 0.5360512733459473, "step": 3196 }, { "epoch": 0.7371454922757666, "grad_norm": 1.2833906478614454, "learning_rate": 1.4949286954284044e-06, "loss": 0.3923282325267792, "step": 3197 }, { "epoch": 0.7373760664053494, "grad_norm": 1.2830570803742403, "learning_rate": 1.4945973687626103e-06, "loss": 0.5051449537277222, "step": 3198 }, { "epoch": 0.737606640534932, "grad_norm": 1.288727241344276, "learning_rate": 1.4942659702020763e-06, "loss": 0.5035187602043152, "step": 3199 }, { "epoch": 0.7378372146645147, "grad_norm": 1.1929311231536464, "learning_rate": 1.4939344997949742e-06, "loss": 0.4922195076942444, "step": 3200 }, { "epoch": 0.7380677887940973, "grad_norm": 1.1654414900260779, "learning_rate": 1.4936029575894865e-06, "loss": 0.49664247035980225, "step": 3201 }, { "epoch": 0.73829836292368, "grad_norm": 1.2090144084254086, "learning_rate": 1.4932713436338065e-06, "loss": 0.4240155816078186, "step": 3202 }, { "epoch": 0.7385289370532626, "grad_norm": 1.150655085488804, "learning_rate": 1.4929396579761376e-06, "loss": 0.3830781579017639, "step": 3203 }, { "epoch": 0.7387595111828453, "grad_norm": 1.2626520886498587, "learning_rate": 1.4926079006646936e-06, "loss": 0.37983447313308716, "step": 3204 }, { "epoch": 0.7389900853124279, "grad_norm": 1.37294258180721, "learning_rate": 1.4922760717476989e-06, "loss": 0.4680769443511963, "step": 3205 }, { "epoch": 0.7392206594420107, "grad_norm": 1.0992782157194299, "learning_rate": 1.4919441712733878e-06, "loss": 0.3801664710044861, "step": 3206 }, { "epoch": 0.7394512335715933, "grad_norm": 1.2101909370157682, "learning_rate": 1.4916121992900062e-06, "loss": 0.5506627559661865, "step": 3207 }, { "epoch": 0.739681807701176, "grad_norm": 1.4326210599966231, "learning_rate": 1.4912801558458087e-06, "loss": 0.4976215660572052, "step": 3208 }, { "epoch": 0.7399123818307586, "grad_norm": 1.269851030633043, "learning_rate": 1.4909480409890615e-06, "loss": 0.42806485295295715, "step": 3209 }, { "epoch": 0.7401429559603413, "grad_norm": 1.5738327378318604, "learning_rate": 1.4906158547680413e-06, "loss": 0.3850712180137634, "step": 3210 }, { "epoch": 0.7403735300899239, "grad_norm": 1.1706966056418486, "learning_rate": 1.4902835972310342e-06, "loss": 0.4356945753097534, "step": 3211 }, { "epoch": 0.7406041042195066, "grad_norm": 1.3196733008465567, "learning_rate": 1.4899512684263373e-06, "loss": 0.4806904196739197, "step": 3212 }, { "epoch": 0.7408346783490892, "grad_norm": 1.6634902313002624, "learning_rate": 1.489618868402258e-06, "loss": 0.544597327709198, "step": 3213 }, { "epoch": 0.7410652524786719, "grad_norm": 1.2400106880376924, "learning_rate": 1.4892863972071141e-06, "loss": 0.39847469329833984, "step": 3214 }, { "epoch": 0.7412958266082545, "grad_norm": 1.165782132875825, "learning_rate": 1.4889538548892336e-06, "loss": 0.4959847331047058, "step": 3215 }, { "epoch": 0.7415264007378373, "grad_norm": 1.1727701470106202, "learning_rate": 1.488621241496955e-06, "loss": 0.3839089870452881, "step": 3216 }, { "epoch": 0.7417569748674199, "grad_norm": 1.4119004491894294, "learning_rate": 1.4882885570786266e-06, "loss": 0.5187599658966064, "step": 3217 }, { "epoch": 0.7419875489970026, "grad_norm": 1.1715648701346035, "learning_rate": 1.4879558016826082e-06, "loss": 0.45735663175582886, "step": 3218 }, { "epoch": 0.7422181231265852, "grad_norm": 1.2093385209256575, "learning_rate": 1.4876229753572687e-06, "loss": 0.5635267496109009, "step": 3219 }, { "epoch": 0.7424486972561679, "grad_norm": 1.5737635031230153, "learning_rate": 1.4872900781509876e-06, "loss": 0.5255833268165588, "step": 3220 }, { "epoch": 0.7426792713857505, "grad_norm": 1.3608013352784492, "learning_rate": 1.486957110112155e-06, "loss": 0.4563497304916382, "step": 3221 }, { "epoch": 0.7429098455153332, "grad_norm": 1.2494840959741684, "learning_rate": 1.4866240712891714e-06, "loss": 0.3737669885158539, "step": 3222 }, { "epoch": 0.7431404196449158, "grad_norm": 1.3341042787752078, "learning_rate": 1.4862909617304473e-06, "loss": 0.48965659737586975, "step": 3223 }, { "epoch": 0.7433709937744986, "grad_norm": 1.138792861067833, "learning_rate": 1.4859577814844036e-06, "loss": 0.40867483615875244, "step": 3224 }, { "epoch": 0.7436015679040812, "grad_norm": 1.6873709244395776, "learning_rate": 1.4856245305994711e-06, "loss": 0.5870566368103027, "step": 3225 }, { "epoch": 0.7438321420336638, "grad_norm": 1.9479920905112817, "learning_rate": 1.4852912091240914e-06, "loss": 0.5424025654792786, "step": 3226 }, { "epoch": 0.7440627161632465, "grad_norm": 1.3117337551828157, "learning_rate": 1.4849578171067166e-06, "loss": 0.5305285453796387, "step": 3227 }, { "epoch": 0.7442932902928291, "grad_norm": 1.6524409541791285, "learning_rate": 1.4846243545958078e-06, "loss": 0.4189227819442749, "step": 3228 }, { "epoch": 0.7445238644224118, "grad_norm": 1.3163917938675591, "learning_rate": 1.4842908216398379e-06, "loss": 0.44568121433258057, "step": 3229 }, { "epoch": 0.7447544385519944, "grad_norm": 1.57546318763007, "learning_rate": 1.4839572182872883e-06, "loss": 0.5177523493766785, "step": 3230 }, { "epoch": 0.7449850126815771, "grad_norm": 2.0231485633083213, "learning_rate": 1.4836235445866528e-06, "loss": 0.5100630521774292, "step": 3231 }, { "epoch": 0.7452155868111597, "grad_norm": 1.2988766977840327, "learning_rate": 1.4832898005864336e-06, "loss": 0.45731791853904724, "step": 3232 }, { "epoch": 0.7454461609407425, "grad_norm": 1.4418312758556044, "learning_rate": 1.4829559863351437e-06, "loss": 0.5161736011505127, "step": 3233 }, { "epoch": 0.7456767350703251, "grad_norm": 1.2131599613200943, "learning_rate": 1.4826221018813067e-06, "loss": 0.4778611660003662, "step": 3234 }, { "epoch": 0.7459073091999078, "grad_norm": 1.208766404583587, "learning_rate": 1.482288147273456e-06, "loss": 0.467506468296051, "step": 3235 }, { "epoch": 0.7461378833294904, "grad_norm": 1.3564852786094337, "learning_rate": 1.4819541225601352e-06, "loss": 0.5061084032058716, "step": 3236 }, { "epoch": 0.7463684574590731, "grad_norm": 1.3693293129226278, "learning_rate": 1.4816200277898983e-06, "loss": 0.5066365599632263, "step": 3237 }, { "epoch": 0.7465990315886557, "grad_norm": 1.2091939411250054, "learning_rate": 1.4812858630113093e-06, "loss": 0.44285398721694946, "step": 3238 }, { "epoch": 0.7468296057182384, "grad_norm": 1.3395886619598594, "learning_rate": 1.4809516282729426e-06, "loss": 0.5325936079025269, "step": 3239 }, { "epoch": 0.747060179847821, "grad_norm": 1.2575363206535257, "learning_rate": 1.4806173236233818e-06, "loss": 0.37296950817108154, "step": 3240 }, { "epoch": 0.7472907539774037, "grad_norm": 1.3466058050144787, "learning_rate": 1.4802829491112228e-06, "loss": 0.4596887230873108, "step": 3241 }, { "epoch": 0.7475213281069863, "grad_norm": 1.4791727382559166, "learning_rate": 1.4799485047850693e-06, "loss": 0.4344385266304016, "step": 3242 }, { "epoch": 0.7477519022365691, "grad_norm": 1.235031250671636, "learning_rate": 1.4796139906935365e-06, "loss": 0.458631306886673, "step": 3243 }, { "epoch": 0.7479824763661517, "grad_norm": 1.3676048590005543, "learning_rate": 1.4792794068852494e-06, "loss": 0.5425032377243042, "step": 3244 }, { "epoch": 0.7482130504957344, "grad_norm": 1.1764717045773245, "learning_rate": 1.478944753408843e-06, "loss": 0.4240065813064575, "step": 3245 }, { "epoch": 0.748443624625317, "grad_norm": 1.3527342191314002, "learning_rate": 1.478610030312963e-06, "loss": 0.5533365607261658, "step": 3246 }, { "epoch": 0.7486741987548997, "grad_norm": 1.4574041701217884, "learning_rate": 1.4782752376462647e-06, "loss": 0.4089345335960388, "step": 3247 }, { "epoch": 0.7489047728844823, "grad_norm": 1.3793731191813918, "learning_rate": 1.4779403754574131e-06, "loss": 0.5098259449005127, "step": 3248 }, { "epoch": 0.749135347014065, "grad_norm": 1.3041128935188901, "learning_rate": 1.4776054437950842e-06, "loss": 0.4615677297115326, "step": 3249 }, { "epoch": 0.7493659211436476, "grad_norm": 1.3216071057711354, "learning_rate": 1.4772704427079639e-06, "loss": 0.460266649723053, "step": 3250 }, { "epoch": 0.7495964952732304, "grad_norm": 1.4054347579351087, "learning_rate": 1.4769353722447476e-06, "loss": 0.4727064371109009, "step": 3251 }, { "epoch": 0.749827069402813, "grad_norm": 1.3954753679563598, "learning_rate": 1.4766002324541411e-06, "loss": 0.4733152985572815, "step": 3252 }, { "epoch": 0.7500576435323957, "grad_norm": 1.408517900798552, "learning_rate": 1.4762650233848609e-06, "loss": 0.5055218935012817, "step": 3253 }, { "epoch": 0.7502882176619783, "grad_norm": 1.3285058616446128, "learning_rate": 1.4759297450856324e-06, "loss": 0.6129124164581299, "step": 3254 }, { "epoch": 0.750518791791561, "grad_norm": 1.6354094862337523, "learning_rate": 1.4755943976051926e-06, "loss": 0.46197545528411865, "step": 3255 }, { "epoch": 0.7507493659211436, "grad_norm": 1.3239897164772563, "learning_rate": 1.4752589809922868e-06, "loss": 0.5227653980255127, "step": 3256 }, { "epoch": 0.7509799400507263, "grad_norm": 1.4638577740242362, "learning_rate": 1.4749234952956715e-06, "loss": 0.5189518928527832, "step": 3257 }, { "epoch": 0.7512105141803089, "grad_norm": 1.2059107130307087, "learning_rate": 1.474587940564113e-06, "loss": 0.4850584864616394, "step": 3258 }, { "epoch": 0.7514410883098916, "grad_norm": 1.4809027704015267, "learning_rate": 1.4742523168463876e-06, "loss": 0.5218943357467651, "step": 3259 }, { "epoch": 0.7516716624394743, "grad_norm": 1.130064311367936, "learning_rate": 1.4739166241912814e-06, "loss": 0.4311223030090332, "step": 3260 }, { "epoch": 0.751902236569057, "grad_norm": 1.372801682112421, "learning_rate": 1.473580862647591e-06, "loss": 0.525306224822998, "step": 3261 }, { "epoch": 0.7521328106986396, "grad_norm": 1.291063350632538, "learning_rate": 1.4732450322641225e-06, "loss": 0.506609320640564, "step": 3262 }, { "epoch": 0.7523633848282223, "grad_norm": 1.4043846834415283, "learning_rate": 1.4729091330896926e-06, "loss": 0.5477846264839172, "step": 3263 }, { "epoch": 0.7525939589578049, "grad_norm": 1.1342853276703964, "learning_rate": 1.4725731651731268e-06, "loss": 0.48802629113197327, "step": 3264 }, { "epoch": 0.7528245330873876, "grad_norm": 1.5090127096652195, "learning_rate": 1.4722371285632626e-06, "loss": 0.4774906635284424, "step": 3265 }, { "epoch": 0.7530551072169702, "grad_norm": 1.4537920297241385, "learning_rate": 1.4719010233089458e-06, "loss": 0.4220488667488098, "step": 3266 }, { "epoch": 0.7532856813465529, "grad_norm": 1.441465153643324, "learning_rate": 1.4715648494590324e-06, "loss": 0.43912187218666077, "step": 3267 }, { "epoch": 0.7535162554761355, "grad_norm": 1.3653901674246531, "learning_rate": 1.4712286070623892e-06, "loss": 0.5302494764328003, "step": 3268 }, { "epoch": 0.7537468296057183, "grad_norm": 1.3282339539348487, "learning_rate": 1.4708922961678923e-06, "loss": 0.4800306260585785, "step": 3269 }, { "epoch": 0.7539774037353009, "grad_norm": 1.2634165352126685, "learning_rate": 1.4705559168244275e-06, "loss": 0.3993161618709564, "step": 3270 }, { "epoch": 0.7542079778648836, "grad_norm": 1.446141365903489, "learning_rate": 1.4702194690808916e-06, "loss": 0.37037837505340576, "step": 3271 }, { "epoch": 0.7544385519944662, "grad_norm": 1.3105522613811469, "learning_rate": 1.4698829529861898e-06, "loss": 0.44288602471351624, "step": 3272 }, { "epoch": 0.7546691261240489, "grad_norm": 1.542566998549956, "learning_rate": 1.469546368589239e-06, "loss": 0.5480727553367615, "step": 3273 }, { "epoch": 0.7548997002536315, "grad_norm": 1.5093924463506492, "learning_rate": 1.4692097159389649e-06, "loss": 0.4964104890823364, "step": 3274 }, { "epoch": 0.7551302743832142, "grad_norm": 1.5912503319666471, "learning_rate": 1.4688729950843033e-06, "loss": 0.4744144082069397, "step": 3275 }, { "epoch": 0.7553608485127968, "grad_norm": 1.1258853516330976, "learning_rate": 1.4685362060741997e-06, "loss": 0.44675350189208984, "step": 3276 }, { "epoch": 0.7555914226423796, "grad_norm": 1.4768191837188436, "learning_rate": 1.46819934895761e-06, "loss": 0.45261216163635254, "step": 3277 }, { "epoch": 0.7558219967719622, "grad_norm": 1.3183121513891758, "learning_rate": 1.4678624237835005e-06, "loss": 0.4180977940559387, "step": 3278 }, { "epoch": 0.7560525709015449, "grad_norm": 1.34629761070606, "learning_rate": 1.4675254306008456e-06, "loss": 0.39477843046188354, "step": 3279 }, { "epoch": 0.7562831450311275, "grad_norm": 1.439585323315283, "learning_rate": 1.467188369458631e-06, "loss": 0.5033801198005676, "step": 3280 }, { "epoch": 0.7565137191607102, "grad_norm": 1.3522884656136929, "learning_rate": 1.4668512404058527e-06, "loss": 0.5719846487045288, "step": 3281 }, { "epoch": 0.7567442932902928, "grad_norm": 1.6993262990855147, "learning_rate": 1.4665140434915147e-06, "loss": 0.5198945999145508, "step": 3282 }, { "epoch": 0.7569748674198755, "grad_norm": 1.6486008286234453, "learning_rate": 1.4661767787646326e-06, "loss": 0.4641912579536438, "step": 3283 }, { "epoch": 0.7572054415494581, "grad_norm": 1.542363438136225, "learning_rate": 1.4658394462742309e-06, "loss": 0.44070225954055786, "step": 3284 }, { "epoch": 0.7574360156790408, "grad_norm": 1.1923089532877131, "learning_rate": 1.465502046069345e-06, "loss": 0.4324581027030945, "step": 3285 }, { "epoch": 0.7576665898086234, "grad_norm": 1.5168087965785, "learning_rate": 1.4651645781990187e-06, "loss": 0.5789060592651367, "step": 3286 }, { "epoch": 0.7578971639382062, "grad_norm": 1.7886030443223944, "learning_rate": 1.4648270427123068e-06, "loss": 0.45642149448394775, "step": 3287 }, { "epoch": 0.7581277380677888, "grad_norm": 1.222780244920245, "learning_rate": 1.4644894396582732e-06, "loss": 0.4587763547897339, "step": 3288 }, { "epoch": 0.7583583121973715, "grad_norm": 1.570757900264253, "learning_rate": 1.4641517690859924e-06, "loss": 0.5472866892814636, "step": 3289 }, { "epoch": 0.7585888863269541, "grad_norm": 1.4662287757114318, "learning_rate": 1.4638140310445476e-06, "loss": 0.5274207592010498, "step": 3290 }, { "epoch": 0.7588194604565368, "grad_norm": 1.5317060576828687, "learning_rate": 1.4634762255830326e-06, "loss": 0.46280741691589355, "step": 3291 }, { "epoch": 0.7590500345861194, "grad_norm": 1.357303550008307, "learning_rate": 1.4631383527505515e-06, "loss": 0.5395090579986572, "step": 3292 }, { "epoch": 0.7592806087157021, "grad_norm": 1.3556569618907826, "learning_rate": 1.4628004125962168e-06, "loss": 0.49923229217529297, "step": 3293 }, { "epoch": 0.7595111828452847, "grad_norm": 1.437270857620585, "learning_rate": 1.462462405169152e-06, "loss": 0.5414037108421326, "step": 3294 }, { "epoch": 0.7597417569748675, "grad_norm": 1.2450139122326453, "learning_rate": 1.4621243305184895e-06, "loss": 0.4246688485145569, "step": 3295 }, { "epoch": 0.7599723311044501, "grad_norm": 1.2346000309431113, "learning_rate": 1.461786188693372e-06, "loss": 0.4997994005680084, "step": 3296 }, { "epoch": 0.7602029052340328, "grad_norm": 1.2539682682883548, "learning_rate": 1.4614479797429523e-06, "loss": 0.4571123719215393, "step": 3297 }, { "epoch": 0.7604334793636154, "grad_norm": 1.3546747118119653, "learning_rate": 1.4611097037163917e-06, "loss": 0.5178083181381226, "step": 3298 }, { "epoch": 0.7606640534931981, "grad_norm": 1.438807896221459, "learning_rate": 1.4607713606628625e-06, "loss": 0.538001298904419, "step": 3299 }, { "epoch": 0.7608946276227807, "grad_norm": 1.6495208547410056, "learning_rate": 1.4604329506315464e-06, "loss": 0.45941218733787537, "step": 3300 }, { "epoch": 0.7611252017523634, "grad_norm": 1.469904127152949, "learning_rate": 1.4600944736716344e-06, "loss": 0.619648277759552, "step": 3301 }, { "epoch": 0.761355775881946, "grad_norm": 1.3648924598961014, "learning_rate": 1.4597559298323281e-06, "loss": 0.4035170376300812, "step": 3302 }, { "epoch": 0.7615863500115287, "grad_norm": 1.4623041349874883, "learning_rate": 1.4594173191628374e-06, "loss": 0.48657041788101196, "step": 3303 }, { "epoch": 0.7618169241411114, "grad_norm": 1.3486514765257445, "learning_rate": 1.4590786417123838e-06, "loss": 0.43324801325798035, "step": 3304 }, { "epoch": 0.7620474982706941, "grad_norm": 1.3543990457839288, "learning_rate": 1.4587398975301968e-06, "loss": 0.5020644664764404, "step": 3305 }, { "epoch": 0.7622780724002767, "grad_norm": 1.4758408294809282, "learning_rate": 1.4584010866655163e-06, "loss": 0.4123230576515198, "step": 3306 }, { "epoch": 0.7625086465298594, "grad_norm": 1.4629462638568174, "learning_rate": 1.4580622091675925e-06, "loss": 0.5110459327697754, "step": 3307 }, { "epoch": 0.762739220659442, "grad_norm": 1.3128675599733384, "learning_rate": 1.4577232650856842e-06, "loss": 0.3956744074821472, "step": 3308 }, { "epoch": 0.7629697947890247, "grad_norm": 1.028092913473986, "learning_rate": 1.4573842544690602e-06, "loss": 0.44418880343437195, "step": 3309 }, { "epoch": 0.7632003689186073, "grad_norm": 1.2935675774179733, "learning_rate": 1.4570451773669993e-06, "loss": 0.46690821647644043, "step": 3310 }, { "epoch": 0.76343094304819, "grad_norm": 1.7250402170715877, "learning_rate": 1.45670603382879e-06, "loss": 0.5631324052810669, "step": 3311 }, { "epoch": 0.7636615171777726, "grad_norm": 1.3197309301962783, "learning_rate": 1.4563668239037301e-06, "loss": 0.42355209589004517, "step": 3312 }, { "epoch": 0.7638920913073554, "grad_norm": 1.1819135136971526, "learning_rate": 1.4560275476411273e-06, "loss": 0.4509078860282898, "step": 3313 }, { "epoch": 0.764122665436938, "grad_norm": 1.2704317123198696, "learning_rate": 1.4556882050902986e-06, "loss": 0.48707491159439087, "step": 3314 }, { "epoch": 0.7643532395665207, "grad_norm": 1.2817274130067733, "learning_rate": 1.455348796300571e-06, "loss": 0.4768955707550049, "step": 3315 }, { "epoch": 0.7645838136961033, "grad_norm": 1.1995539933150834, "learning_rate": 1.4550093213212812e-06, "loss": 0.44231370091438293, "step": 3316 }, { "epoch": 0.764814387825686, "grad_norm": 1.283098801050818, "learning_rate": 1.4546697802017752e-06, "loss": 0.41919445991516113, "step": 3317 }, { "epoch": 0.7650449619552686, "grad_norm": 1.3370966440445557, "learning_rate": 1.4543301729914086e-06, "loss": 0.5004634857177734, "step": 3318 }, { "epoch": 0.7652755360848513, "grad_norm": 1.3058062554730827, "learning_rate": 1.4539904997395467e-06, "loss": 0.5327651500701904, "step": 3319 }, { "epoch": 0.7655061102144339, "grad_norm": 1.2690140519120048, "learning_rate": 1.4536507604955647e-06, "loss": 0.4571789801120758, "step": 3320 }, { "epoch": 0.7657366843440166, "grad_norm": 1.4712336124149359, "learning_rate": 1.4533109553088474e-06, "loss": 0.3989352583885193, "step": 3321 }, { "epoch": 0.7659672584735993, "grad_norm": 1.390525487190819, "learning_rate": 1.452971084228788e-06, "loss": 0.4661702513694763, "step": 3322 }, { "epoch": 0.766197832603182, "grad_norm": 1.4525582608827485, "learning_rate": 1.4526311473047911e-06, "loss": 0.5007051825523376, "step": 3323 }, { "epoch": 0.7664284067327646, "grad_norm": 1.4087277102322913, "learning_rate": 1.4522911445862697e-06, "loss": 0.44391199946403503, "step": 3324 }, { "epoch": 0.7666589808623473, "grad_norm": 1.5508781982933997, "learning_rate": 1.4519510761226466e-06, "loss": 0.48606377840042114, "step": 3325 }, { "epoch": 0.7668895549919299, "grad_norm": 1.4942248011879364, "learning_rate": 1.4516109419633543e-06, "loss": 0.4831564426422119, "step": 3326 }, { "epoch": 0.7671201291215126, "grad_norm": 1.2492238673667777, "learning_rate": 1.4512707421578344e-06, "loss": 0.5033055543899536, "step": 3327 }, { "epoch": 0.7673507032510952, "grad_norm": 1.268639260981401, "learning_rate": 1.4509304767555385e-06, "loss": 0.40440869331359863, "step": 3328 }, { "epoch": 0.7675812773806779, "grad_norm": 1.154540060885232, "learning_rate": 1.4505901458059282e-06, "loss": 0.4281578063964844, "step": 3329 }, { "epoch": 0.7678118515102605, "grad_norm": 1.2646658661078, "learning_rate": 1.4502497493584735e-06, "loss": 0.45301395654678345, "step": 3330 }, { "epoch": 0.7680424256398433, "grad_norm": 1.2708958618179473, "learning_rate": 1.4499092874626545e-06, "loss": 0.3971232771873474, "step": 3331 }, { "epoch": 0.7682729997694259, "grad_norm": 1.470304815457328, "learning_rate": 1.4495687601679607e-06, "loss": 0.45382559299468994, "step": 3332 }, { "epoch": 0.7685035738990086, "grad_norm": 1.5230375908041864, "learning_rate": 1.4492281675238916e-06, "loss": 0.4101349711418152, "step": 3333 }, { "epoch": 0.7687341480285912, "grad_norm": 1.7708001369907398, "learning_rate": 1.4488875095799555e-06, "loss": 0.5322436690330505, "step": 3334 }, { "epoch": 0.7689647221581739, "grad_norm": 1.4488936734065874, "learning_rate": 1.4485467863856703e-06, "loss": 0.5497866272926331, "step": 3335 }, { "epoch": 0.7691952962877565, "grad_norm": 1.5286830910755105, "learning_rate": 1.4482059979905642e-06, "loss": 0.5088074207305908, "step": 3336 }, { "epoch": 0.7694258704173391, "grad_norm": 1.2530470288119384, "learning_rate": 1.4478651444441736e-06, "loss": 0.4444946050643921, "step": 3337 }, { "epoch": 0.7696564445469218, "grad_norm": 1.1602955966590311, "learning_rate": 1.4475242257960454e-06, "loss": 0.41257357597351074, "step": 3338 }, { "epoch": 0.7698870186765044, "grad_norm": 1.3512416855290101, "learning_rate": 1.4471832420957356e-06, "loss": 0.47933512926101685, "step": 3339 }, { "epoch": 0.7701175928060872, "grad_norm": 1.204411185284335, "learning_rate": 1.4468421933928093e-06, "loss": 0.41331803798675537, "step": 3340 }, { "epoch": 0.7703481669356698, "grad_norm": 1.3617384100749454, "learning_rate": 1.4465010797368416e-06, "loss": 0.5047392845153809, "step": 3341 }, { "epoch": 0.7705787410652525, "grad_norm": 1.2651645489335748, "learning_rate": 1.446159901177417e-06, "loss": 0.5265953540802002, "step": 3342 }, { "epoch": 0.7708093151948351, "grad_norm": 1.5538943468041178, "learning_rate": 1.4458186577641285e-06, "loss": 0.48366689682006836, "step": 3343 }, { "epoch": 0.7710398893244178, "grad_norm": 1.3170443751716914, "learning_rate": 1.4454773495465805e-06, "loss": 0.4303058087825775, "step": 3344 }, { "epoch": 0.7712704634540004, "grad_norm": 1.2782967712931992, "learning_rate": 1.4451359765743845e-06, "loss": 0.44936758279800415, "step": 3345 }, { "epoch": 0.7715010375835831, "grad_norm": 1.1273529926323729, "learning_rate": 1.4447945388971631e-06, "loss": 0.37891095876693726, "step": 3346 }, { "epoch": 0.7717316117131657, "grad_norm": 1.3818395750162065, "learning_rate": 1.4444530365645477e-06, "loss": 0.4958759546279907, "step": 3347 }, { "epoch": 0.7719621858427484, "grad_norm": 1.2809802910956953, "learning_rate": 1.4441114696261791e-06, "loss": 0.5180525183677673, "step": 3348 }, { "epoch": 0.772192759972331, "grad_norm": 1.3137706702012002, "learning_rate": 1.4437698381317076e-06, "loss": 0.4760133624076843, "step": 3349 }, { "epoch": 0.7724233341019138, "grad_norm": 1.6019634089420207, "learning_rate": 1.4434281421307923e-06, "loss": 0.5095269680023193, "step": 3350 }, { "epoch": 0.7726539082314964, "grad_norm": 1.3897770832286553, "learning_rate": 1.443086381673103e-06, "loss": 0.41132962703704834, "step": 3351 }, { "epoch": 0.7728844823610791, "grad_norm": 2.1191686086439687, "learning_rate": 1.442744556808317e-06, "loss": 0.5617398023605347, "step": 3352 }, { "epoch": 0.7731150564906617, "grad_norm": 1.3926070515875653, "learning_rate": 1.4424026675861229e-06, "loss": 0.4421590566635132, "step": 3353 }, { "epoch": 0.7733456306202444, "grad_norm": 1.3079796762796725, "learning_rate": 1.4420607140562175e-06, "loss": 0.5533363223075867, "step": 3354 }, { "epoch": 0.773576204749827, "grad_norm": 1.2259362177236217, "learning_rate": 1.441718696268307e-06, "loss": 0.3703731298446655, "step": 3355 }, { "epoch": 0.7738067788794097, "grad_norm": 1.3132566837825874, "learning_rate": 1.4413766142721074e-06, "loss": 0.4078833758831024, "step": 3356 }, { "epoch": 0.7740373530089923, "grad_norm": 1.3669338987803128, "learning_rate": 1.4410344681173436e-06, "loss": 0.47297823429107666, "step": 3357 }, { "epoch": 0.7742679271385751, "grad_norm": 1.44476399239333, "learning_rate": 1.4406922578537501e-06, "loss": 0.4586789309978485, "step": 3358 }, { "epoch": 0.7744985012681577, "grad_norm": 2.005996053014414, "learning_rate": 1.440349983531071e-06, "loss": 0.5284359455108643, "step": 3359 }, { "epoch": 0.7747290753977404, "grad_norm": 1.453810263762319, "learning_rate": 1.4400076451990585e-06, "loss": 0.47153323888778687, "step": 3360 }, { "epoch": 0.774959649527323, "grad_norm": 1.277395230723769, "learning_rate": 1.4396652429074758e-06, "loss": 0.3862396478652954, "step": 3361 }, { "epoch": 0.7751902236569057, "grad_norm": 1.4585054412515979, "learning_rate": 1.4393227767060938e-06, "loss": 0.48918354511260986, "step": 3362 }, { "epoch": 0.7754207977864883, "grad_norm": 1.2680408475983538, "learning_rate": 1.4389802466446942e-06, "loss": 0.5541480779647827, "step": 3363 }, { "epoch": 0.775651371916071, "grad_norm": 1.3507983643401953, "learning_rate": 1.4386376527730665e-06, "loss": 0.48972445726394653, "step": 3364 }, { "epoch": 0.7758819460456536, "grad_norm": 1.7557497204808084, "learning_rate": 1.4382949951410109e-06, "loss": 0.5016083717346191, "step": 3365 }, { "epoch": 0.7761125201752364, "grad_norm": 1.3196221720148595, "learning_rate": 1.4379522737983351e-06, "loss": 0.40227651596069336, "step": 3366 }, { "epoch": 0.776343094304819, "grad_norm": 1.596207218013102, "learning_rate": 1.4376094887948584e-06, "loss": 0.42994722723960876, "step": 3367 }, { "epoch": 0.7765736684344017, "grad_norm": 1.516975070106083, "learning_rate": 1.4372666401804073e-06, "loss": 0.5087350010871887, "step": 3368 }, { "epoch": 0.7768042425639843, "grad_norm": 1.2618017709219296, "learning_rate": 1.4369237280048186e-06, "loss": 0.39419132471084595, "step": 3369 }, { "epoch": 0.777034816693567, "grad_norm": 1.3456260179482487, "learning_rate": 1.4365807523179376e-06, "loss": 0.500682532787323, "step": 3370 }, { "epoch": 0.7772653908231496, "grad_norm": 1.4316905894274476, "learning_rate": 1.4362377131696198e-06, "loss": 0.49243754148483276, "step": 3371 }, { "epoch": 0.7774959649527323, "grad_norm": 1.4395314935622772, "learning_rate": 1.4358946106097295e-06, "loss": 0.5479283332824707, "step": 3372 }, { "epoch": 0.7777265390823149, "grad_norm": 1.08521870178353, "learning_rate": 1.4355514446881396e-06, "loss": 0.43217700719833374, "step": 3373 }, { "epoch": 0.7779571132118976, "grad_norm": 1.292406809665349, "learning_rate": 1.435208215454733e-06, "loss": 0.5351289510726929, "step": 3374 }, { "epoch": 0.7781876873414802, "grad_norm": 1.2023765125576906, "learning_rate": 1.4348649229594016e-06, "loss": 0.45523375272750854, "step": 3375 }, { "epoch": 0.778418261471063, "grad_norm": 1.1345172738470508, "learning_rate": 1.4345215672520465e-06, "loss": 0.49811118841171265, "step": 3376 }, { "epoch": 0.7786488356006456, "grad_norm": 1.3017016981868919, "learning_rate": 1.434178148382578e-06, "loss": 0.40621131658554077, "step": 3377 }, { "epoch": 0.7788794097302283, "grad_norm": 1.322929743849566, "learning_rate": 1.4338346664009152e-06, "loss": 0.43339842557907104, "step": 3378 }, { "epoch": 0.7791099838598109, "grad_norm": 1.4276417953872829, "learning_rate": 1.433491121356987e-06, "loss": 0.4397253096103668, "step": 3379 }, { "epoch": 0.7793405579893936, "grad_norm": 1.3957946390360352, "learning_rate": 1.433147513300731e-06, "loss": 0.5146217942237854, "step": 3380 }, { "epoch": 0.7795711321189762, "grad_norm": 1.3181842447854462, "learning_rate": 1.432803842282094e-06, "loss": 0.46328768134117126, "step": 3381 }, { "epoch": 0.7798017062485589, "grad_norm": 1.4008272791948313, "learning_rate": 1.432460108351032e-06, "loss": 0.47743386030197144, "step": 3382 }, { "epoch": 0.7800322803781415, "grad_norm": 1.4765555896470939, "learning_rate": 1.4321163115575105e-06, "loss": 0.467747300863266, "step": 3383 }, { "epoch": 0.7802628545077243, "grad_norm": 1.2334202034705792, "learning_rate": 1.431772451951504e-06, "loss": 0.4269976019859314, "step": 3384 }, { "epoch": 0.7804934286373069, "grad_norm": 1.4332482963337814, "learning_rate": 1.4314285295829956e-06, "loss": 0.5440881252288818, "step": 3385 }, { "epoch": 0.7807240027668896, "grad_norm": 1.5634188347498899, "learning_rate": 1.431084544501978e-06, "loss": 0.42413994669914246, "step": 3386 }, { "epoch": 0.7809545768964722, "grad_norm": 1.250472551312306, "learning_rate": 1.4307404967584528e-06, "loss": 0.5563687086105347, "step": 3387 }, { "epoch": 0.7811851510260549, "grad_norm": 1.2530390736213655, "learning_rate": 1.4303963864024314e-06, "loss": 0.4822027087211609, "step": 3388 }, { "epoch": 0.7814157251556375, "grad_norm": 1.265644144731409, "learning_rate": 1.430052213483933e-06, "loss": 0.5267205834388733, "step": 3389 }, { "epoch": 0.7816462992852202, "grad_norm": 1.464631682134491, "learning_rate": 1.4297079780529868e-06, "loss": 0.49257054924964905, "step": 3390 }, { "epoch": 0.7818768734148028, "grad_norm": 1.4967498256417051, "learning_rate": 1.4293636801596314e-06, "loss": 0.45225608348846436, "step": 3391 }, { "epoch": 0.7821074475443855, "grad_norm": 1.3090966398510886, "learning_rate": 1.4290193198539133e-06, "loss": 0.4891412854194641, "step": 3392 }, { "epoch": 0.7823380216739682, "grad_norm": 1.2913501590758174, "learning_rate": 1.4286748971858893e-06, "loss": 0.4411062002182007, "step": 3393 }, { "epoch": 0.7825685958035509, "grad_norm": 1.3634871078304074, "learning_rate": 1.4283304122056242e-06, "loss": 0.4584164619445801, "step": 3394 }, { "epoch": 0.7827991699331335, "grad_norm": 1.2884433704058607, "learning_rate": 1.4279858649631928e-06, "loss": 0.46913737058639526, "step": 3395 }, { "epoch": 0.7830297440627162, "grad_norm": 1.320207574562506, "learning_rate": 1.4276412555086786e-06, "loss": 0.40582767128944397, "step": 3396 }, { "epoch": 0.7832603181922988, "grad_norm": 1.4930886994867976, "learning_rate": 1.4272965838921737e-06, "loss": 0.5089453458786011, "step": 3397 }, { "epoch": 0.7834908923218815, "grad_norm": 1.3151641529095257, "learning_rate": 1.4269518501637798e-06, "loss": 0.4744444489479065, "step": 3398 }, { "epoch": 0.7837214664514641, "grad_norm": 1.3271165993445435, "learning_rate": 1.426607054373608e-06, "loss": 0.49168163537979126, "step": 3399 }, { "epoch": 0.7839520405810468, "grad_norm": 1.4774301348156431, "learning_rate": 1.4262621965717768e-06, "loss": 0.4423940181732178, "step": 3400 }, { "epoch": 0.7841826147106294, "grad_norm": 1.541226385884193, "learning_rate": 1.4259172768084152e-06, "loss": 0.5138403177261353, "step": 3401 }, { "epoch": 0.7844131888402122, "grad_norm": 1.5691210214340656, "learning_rate": 1.425572295133661e-06, "loss": 0.5248140096664429, "step": 3402 }, { "epoch": 0.7846437629697948, "grad_norm": 1.4659537352972094, "learning_rate": 1.4252272515976607e-06, "loss": 0.39161059260368347, "step": 3403 }, { "epoch": 0.7848743370993775, "grad_norm": 1.307338649596764, "learning_rate": 1.4248821462505699e-06, "loss": 0.46826744079589844, "step": 3404 }, { "epoch": 0.7851049112289601, "grad_norm": 1.3428424961182877, "learning_rate": 1.424536979142553e-06, "loss": 0.4329161047935486, "step": 3405 }, { "epoch": 0.7853354853585428, "grad_norm": 1.3831028347986385, "learning_rate": 1.4241917503237834e-06, "loss": 0.4691393971443176, "step": 3406 }, { "epoch": 0.7855660594881254, "grad_norm": 1.819344171969547, "learning_rate": 1.423846459844444e-06, "loss": 0.5130072236061096, "step": 3407 }, { "epoch": 0.7857966336177081, "grad_norm": 1.4381134289937085, "learning_rate": 1.4235011077547264e-06, "loss": 0.37478166818618774, "step": 3408 }, { "epoch": 0.7860272077472907, "grad_norm": 1.1654669583674488, "learning_rate": 1.4231556941048307e-06, "loss": 0.46112769842147827, "step": 3409 }, { "epoch": 0.7862577818768735, "grad_norm": 1.3711520199030207, "learning_rate": 1.422810218944966e-06, "loss": 0.5095282793045044, "step": 3410 }, { "epoch": 0.7864883560064561, "grad_norm": 1.4830709787042864, "learning_rate": 1.422464682325351e-06, "loss": 0.4182342290878296, "step": 3411 }, { "epoch": 0.7867189301360388, "grad_norm": 1.4898619625675633, "learning_rate": 1.422119084296213e-06, "loss": 0.3892830014228821, "step": 3412 }, { "epoch": 0.7869495042656214, "grad_norm": 1.655445800570714, "learning_rate": 1.4217734249077877e-06, "loss": 0.5294528603553772, "step": 3413 }, { "epoch": 0.7871800783952041, "grad_norm": 1.501568458574139, "learning_rate": 1.4214277042103208e-06, "loss": 0.471803218126297, "step": 3414 }, { "epoch": 0.7874106525247867, "grad_norm": 1.2078819401351728, "learning_rate": 1.4210819222540662e-06, "loss": 0.4363842010498047, "step": 3415 }, { "epoch": 0.7876412266543694, "grad_norm": 1.191025232167839, "learning_rate": 1.4207360790892867e-06, "loss": 0.3834928870201111, "step": 3416 }, { "epoch": 0.787871800783952, "grad_norm": 1.342904245190706, "learning_rate": 1.4203901747662539e-06, "loss": 0.4639194905757904, "step": 3417 }, { "epoch": 0.7881023749135347, "grad_norm": 1.4526860275619324, "learning_rate": 1.4200442093352486e-06, "loss": 0.47130632400512695, "step": 3418 }, { "epoch": 0.7883329490431173, "grad_norm": 1.2585342771790389, "learning_rate": 1.4196981828465606e-06, "loss": 0.4848192632198334, "step": 3419 }, { "epoch": 0.7885635231727001, "grad_norm": 1.2424140051596944, "learning_rate": 1.4193520953504884e-06, "loss": 0.5137286186218262, "step": 3420 }, { "epoch": 0.7887940973022827, "grad_norm": 1.4833943072924853, "learning_rate": 1.4190059468973385e-06, "loss": 0.47639960050582886, "step": 3421 }, { "epoch": 0.7890246714318654, "grad_norm": 1.3974399628621321, "learning_rate": 1.418659737537428e-06, "loss": 0.4300975799560547, "step": 3422 }, { "epoch": 0.789255245561448, "grad_norm": 1.6248920549834995, "learning_rate": 1.4183134673210817e-06, "loss": 0.5669160485267639, "step": 3423 }, { "epoch": 0.7894858196910307, "grad_norm": 1.3431432318053507, "learning_rate": 1.4179671362986336e-06, "loss": 0.4113837480545044, "step": 3424 }, { "epoch": 0.7897163938206133, "grad_norm": 1.3611327690280945, "learning_rate": 1.417620744520426e-06, "loss": 0.4992315173149109, "step": 3425 }, { "epoch": 0.789946967950196, "grad_norm": 1.6418572453635272, "learning_rate": 1.417274292036811e-06, "loss": 0.5556696653366089, "step": 3426 }, { "epoch": 0.7901775420797786, "grad_norm": 1.367999541896107, "learning_rate": 1.4169277788981485e-06, "loss": 0.47911009192466736, "step": 3427 }, { "epoch": 0.7904081162093614, "grad_norm": 1.2100320134669527, "learning_rate": 1.416581205154808e-06, "loss": 0.45395466685295105, "step": 3428 }, { "epoch": 0.790638690338944, "grad_norm": 1.5386887400015699, "learning_rate": 1.4162345708571674e-06, "loss": 0.4404561519622803, "step": 3429 }, { "epoch": 0.7908692644685267, "grad_norm": 1.3845404606780534, "learning_rate": 1.4158878760556136e-06, "loss": 0.5541578531265259, "step": 3430 }, { "epoch": 0.7910998385981093, "grad_norm": 1.4234082473199938, "learning_rate": 1.4155411208005422e-06, "loss": 0.5517834424972534, "step": 3431 }, { "epoch": 0.791330412727692, "grad_norm": 1.2851916229874634, "learning_rate": 1.4151943051423574e-06, "loss": 0.42650169134140015, "step": 3432 }, { "epoch": 0.7915609868572746, "grad_norm": 1.7886227172970943, "learning_rate": 1.414847429131472e-06, "loss": 0.42724043130874634, "step": 3433 }, { "epoch": 0.7917915609868573, "grad_norm": 1.3978336018588784, "learning_rate": 1.414500492818309e-06, "loss": 0.41757941246032715, "step": 3434 }, { "epoch": 0.7920221351164399, "grad_norm": 1.4250040620354028, "learning_rate": 1.4141534962532984e-06, "loss": 0.47318267822265625, "step": 3435 }, { "epoch": 0.7922527092460226, "grad_norm": 1.5092267765141392, "learning_rate": 1.41380643948688e-06, "loss": 0.5540967583656311, "step": 3436 }, { "epoch": 0.7924832833756053, "grad_norm": 1.2943595959957308, "learning_rate": 1.4134593225695013e-06, "loss": 0.4459697902202606, "step": 3437 }, { "epoch": 0.792713857505188, "grad_norm": 1.2950911274447663, "learning_rate": 1.41311214555162e-06, "loss": 0.5263698101043701, "step": 3438 }, { "epoch": 0.7929444316347706, "grad_norm": 1.321260987570187, "learning_rate": 1.4127649084837016e-06, "loss": 0.40453940629959106, "step": 3439 }, { "epoch": 0.7931750057643533, "grad_norm": 1.4138023773004598, "learning_rate": 1.412417611416221e-06, "loss": 0.3859207034111023, "step": 3440 }, { "epoch": 0.7934055798939359, "grad_norm": 1.3373104076984894, "learning_rate": 1.4120702543996603e-06, "loss": 0.4604511260986328, "step": 3441 }, { "epoch": 0.7936361540235186, "grad_norm": 1.2912472996688542, "learning_rate": 1.411722837484512e-06, "loss": 0.40292084217071533, "step": 3442 }, { "epoch": 0.7938667281531012, "grad_norm": 1.3099743009304052, "learning_rate": 1.4113753607212766e-06, "loss": 0.40447625517845154, "step": 3443 }, { "epoch": 0.7940973022826839, "grad_norm": 1.1711578682822494, "learning_rate": 1.4110278241604635e-06, "loss": 0.48472997546195984, "step": 3444 }, { "epoch": 0.7943278764122665, "grad_norm": 1.304688924593958, "learning_rate": 1.4106802278525902e-06, "loss": 0.5404670238494873, "step": 3445 }, { "epoch": 0.7945584505418493, "grad_norm": 1.2201185877258616, "learning_rate": 1.4103325718481838e-06, "loss": 0.5885064005851746, "step": 3446 }, { "epoch": 0.7947890246714319, "grad_norm": 1.2045708529585497, "learning_rate": 1.4099848561977794e-06, "loss": 0.47806939482688904, "step": 3447 }, { "epoch": 0.7950195988010145, "grad_norm": 1.2183758256079422, "learning_rate": 1.4096370809519213e-06, "loss": 0.4247834086418152, "step": 3448 }, { "epoch": 0.7952501729305972, "grad_norm": 1.4701805176850054, "learning_rate": 1.409289246161162e-06, "loss": 0.508902370929718, "step": 3449 }, { "epoch": 0.7954807470601798, "grad_norm": 1.3709386014599791, "learning_rate": 1.4089413518760626e-06, "loss": 0.4866124987602234, "step": 3450 }, { "epoch": 0.7957113211897625, "grad_norm": 1.4351510328158692, "learning_rate": 1.408593398147193e-06, "loss": 0.5168731212615967, "step": 3451 }, { "epoch": 0.7959418953193451, "grad_norm": 1.257672253058261, "learning_rate": 1.4082453850251326e-06, "loss": 0.5039271712303162, "step": 3452 }, { "epoch": 0.7961724694489278, "grad_norm": 1.3767040030777011, "learning_rate": 1.4078973125604674e-06, "loss": 0.3660929799079895, "step": 3453 }, { "epoch": 0.7964030435785104, "grad_norm": 1.5330992916300397, "learning_rate": 1.407549180803794e-06, "loss": 0.514503538608551, "step": 3454 }, { "epoch": 0.7966336177080932, "grad_norm": 1.5704286671243526, "learning_rate": 1.4072009898057172e-06, "loss": 0.4803028702735901, "step": 3455 }, { "epoch": 0.7968641918376758, "grad_norm": 1.2332119133725918, "learning_rate": 1.4068527396168492e-06, "loss": 0.43116262555122375, "step": 3456 }, { "epoch": 0.7970947659672585, "grad_norm": 1.522287028583898, "learning_rate": 1.4065044302878125e-06, "loss": 0.5009680986404419, "step": 3457 }, { "epoch": 0.7973253400968411, "grad_norm": 1.1307500814268987, "learning_rate": 1.406156061869237e-06, "loss": 0.4047713875770569, "step": 3458 }, { "epoch": 0.7975559142264238, "grad_norm": 1.348066090689188, "learning_rate": 1.4058076344117615e-06, "loss": 0.5287230014801025, "step": 3459 }, { "epoch": 0.7977864883560064, "grad_norm": 1.7810979263679612, "learning_rate": 1.4054591479660335e-06, "loss": 0.5602750778198242, "step": 3460 }, { "epoch": 0.7980170624855891, "grad_norm": 1.0587308388288128, "learning_rate": 1.4051106025827096e-06, "loss": 0.4178144335746765, "step": 3461 }, { "epoch": 0.7982476366151717, "grad_norm": 1.408691487644406, "learning_rate": 1.4047619983124536e-06, "loss": 0.5061960220336914, "step": 3462 }, { "epoch": 0.7984782107447544, "grad_norm": 1.5043212480263244, "learning_rate": 1.4044133352059392e-06, "loss": 0.5091691017150879, "step": 3463 }, { "epoch": 0.798708784874337, "grad_norm": 1.3793897642043385, "learning_rate": 1.4040646133138478e-06, "loss": 0.5100894570350647, "step": 3464 }, { "epoch": 0.7989393590039198, "grad_norm": 1.2188849241203001, "learning_rate": 1.4037158326868697e-06, "loss": 0.47493505477905273, "step": 3465 }, { "epoch": 0.7991699331335024, "grad_norm": 1.637846674977116, "learning_rate": 1.4033669933757038e-06, "loss": 0.5561350584030151, "step": 3466 }, { "epoch": 0.7994005072630851, "grad_norm": 1.4971197328143675, "learning_rate": 1.4030180954310574e-06, "loss": 0.44552814960479736, "step": 3467 }, { "epoch": 0.7996310813926677, "grad_norm": 1.219192969590734, "learning_rate": 1.4026691389036465e-06, "loss": 0.4624238908290863, "step": 3468 }, { "epoch": 0.7998616555222504, "grad_norm": 1.348458578104898, "learning_rate": 1.4023201238441951e-06, "loss": 0.5424448251724243, "step": 3469 }, { "epoch": 0.800092229651833, "grad_norm": 1.2410568882309463, "learning_rate": 1.4019710503034367e-06, "loss": 0.4629395008087158, "step": 3470 }, { "epoch": 0.8003228037814157, "grad_norm": 1.3564725845833965, "learning_rate": 1.401621918332112e-06, "loss": 0.4375717043876648, "step": 3471 }, { "epoch": 0.8005533779109983, "grad_norm": 1.5212509367699154, "learning_rate": 1.401272727980971e-06, "loss": 0.4419640302658081, "step": 3472 }, { "epoch": 0.8007839520405811, "grad_norm": 1.3621301015547722, "learning_rate": 1.4009234793007724e-06, "loss": 0.42077577114105225, "step": 3473 }, { "epoch": 0.8010145261701637, "grad_norm": 1.394506766094276, "learning_rate": 1.400574172342283e-06, "loss": 0.3735182583332062, "step": 3474 }, { "epoch": 0.8012451002997464, "grad_norm": 1.3325918102604086, "learning_rate": 1.4002248071562778e-06, "loss": 0.4263458251953125, "step": 3475 }, { "epoch": 0.801475674429329, "grad_norm": 1.3278985843191269, "learning_rate": 1.3998753837935406e-06, "loss": 0.42377904057502747, "step": 3476 }, { "epoch": 0.8017062485589117, "grad_norm": 1.4415172635554745, "learning_rate": 1.399525902304864e-06, "loss": 0.5017589330673218, "step": 3477 }, { "epoch": 0.8019368226884943, "grad_norm": 1.2695777372701094, "learning_rate": 1.3991763627410485e-06, "loss": 0.41022592782974243, "step": 3478 }, { "epoch": 0.802167396818077, "grad_norm": 1.6097549722001219, "learning_rate": 1.3988267651529028e-06, "loss": 0.49957793951034546, "step": 3479 }, { "epoch": 0.8023979709476596, "grad_norm": 1.4695518489034636, "learning_rate": 1.398477109591245e-06, "loss": 0.5065722465515137, "step": 3480 }, { "epoch": 0.8026285450772424, "grad_norm": 1.264735145451503, "learning_rate": 1.398127396106901e-06, "loss": 0.4353798031806946, "step": 3481 }, { "epoch": 0.802859119206825, "grad_norm": 1.5800938751579423, "learning_rate": 1.3977776247507049e-06, "loss": 0.41438236832618713, "step": 3482 }, { "epoch": 0.8030896933364077, "grad_norm": 1.2712154799989346, "learning_rate": 1.3974277955734996e-06, "loss": 0.4348248839378357, "step": 3483 }, { "epoch": 0.8033202674659903, "grad_norm": 1.3020033760882643, "learning_rate": 1.3970779086261363e-06, "loss": 0.49369150400161743, "step": 3484 }, { "epoch": 0.803550841595573, "grad_norm": 1.445427514378273, "learning_rate": 1.396727963959475e-06, "loss": 0.5694580078125, "step": 3485 }, { "epoch": 0.8037814157251556, "grad_norm": 1.3859575121879733, "learning_rate": 1.3963779616243834e-06, "loss": 0.5357070565223694, "step": 3486 }, { "epoch": 0.8040119898547383, "grad_norm": 1.3071217267808923, "learning_rate": 1.3960279016717377e-06, "loss": 0.41300907731056213, "step": 3487 }, { "epoch": 0.8042425639843209, "grad_norm": 1.4713226080636248, "learning_rate": 1.395677784152423e-06, "loss": 0.5058030486106873, "step": 3488 }, { "epoch": 0.8044731381139036, "grad_norm": 1.394990226330868, "learning_rate": 1.3953276091173326e-06, "loss": 0.5225522518157959, "step": 3489 }, { "epoch": 0.8047037122434862, "grad_norm": 1.3669211701935395, "learning_rate": 1.3949773766173675e-06, "loss": 0.43893736600875854, "step": 3490 }, { "epoch": 0.804934286373069, "grad_norm": 1.575168458794386, "learning_rate": 1.3946270867034375e-06, "loss": 0.4583659768104553, "step": 3491 }, { "epoch": 0.8051648605026516, "grad_norm": 1.2728568882138123, "learning_rate": 1.394276739426461e-06, "loss": 0.49550747871398926, "step": 3492 }, { "epoch": 0.8053954346322343, "grad_norm": 1.9438900883437185, "learning_rate": 1.3939263348373648e-06, "loss": 0.5637674331665039, "step": 3493 }, { "epoch": 0.8056260087618169, "grad_norm": 1.3206034443977903, "learning_rate": 1.3935758729870835e-06, "loss": 0.4853670299053192, "step": 3494 }, { "epoch": 0.8058565828913996, "grad_norm": 1.479029501570459, "learning_rate": 1.3932253539265603e-06, "loss": 0.4535500407218933, "step": 3495 }, { "epoch": 0.8060871570209822, "grad_norm": 1.4461411101486477, "learning_rate": 1.3928747777067464e-06, "loss": 0.4198870062828064, "step": 3496 }, { "epoch": 0.8063177311505649, "grad_norm": 1.3336585529006162, "learning_rate": 1.392524144378602e-06, "loss": 0.45773670077323914, "step": 3497 }, { "epoch": 0.8065483052801475, "grad_norm": 1.718264798623436, "learning_rate": 1.3921734539930952e-06, "loss": 0.45263248682022095, "step": 3498 }, { "epoch": 0.8067788794097303, "grad_norm": 1.300886470112164, "learning_rate": 1.3918227066012025e-06, "loss": 0.473066508769989, "step": 3499 }, { "epoch": 0.8070094535393129, "grad_norm": 1.1261914460441818, "learning_rate": 1.3914719022539082e-06, "loss": 0.35737159848213196, "step": 3500 }, { "epoch": 0.8072400276688956, "grad_norm": 1.4095537979750905, "learning_rate": 1.3911210410022054e-06, "loss": 0.5162703394889832, "step": 3501 }, { "epoch": 0.8074706017984782, "grad_norm": 1.494617165800155, "learning_rate": 1.3907701228970955e-06, "loss": 0.5347551703453064, "step": 3502 }, { "epoch": 0.8077011759280609, "grad_norm": 1.7642790890319513, "learning_rate": 1.390419147989588e-06, "loss": 0.4889448881149292, "step": 3503 }, { "epoch": 0.8079317500576435, "grad_norm": 1.380092267420659, "learning_rate": 1.3900681163306999e-06, "loss": 0.47468650341033936, "step": 3504 }, { "epoch": 0.8081623241872262, "grad_norm": 1.4749480234582377, "learning_rate": 1.3897170279714585e-06, "loss": 0.43236857652664185, "step": 3505 }, { "epoch": 0.8083928983168088, "grad_norm": 1.4419786763918543, "learning_rate": 1.3893658829628974e-06, "loss": 0.46778976917266846, "step": 3506 }, { "epoch": 0.8086234724463915, "grad_norm": 1.353368455676612, "learning_rate": 1.389014681356059e-06, "loss": 0.49447667598724365, "step": 3507 }, { "epoch": 0.8088540465759742, "grad_norm": 1.3574196281726325, "learning_rate": 1.388663423201994e-06, "loss": 0.5221220254898071, "step": 3508 }, { "epoch": 0.8090846207055569, "grad_norm": 1.8319434066548141, "learning_rate": 1.3883121085517615e-06, "loss": 0.5037325620651245, "step": 3509 }, { "epoch": 0.8093151948351395, "grad_norm": 1.1547190760847952, "learning_rate": 1.387960737456429e-06, "loss": 0.46879589557647705, "step": 3510 }, { "epoch": 0.8095457689647222, "grad_norm": 1.3552976314399992, "learning_rate": 1.387609309967071e-06, "loss": 0.44216716289520264, "step": 3511 }, { "epoch": 0.8097763430943048, "grad_norm": 1.2016377736710804, "learning_rate": 1.3872578261347716e-06, "loss": 0.4525749981403351, "step": 3512 }, { "epoch": 0.8100069172238875, "grad_norm": 1.3138421579944453, "learning_rate": 1.3869062860106224e-06, "loss": 0.44681644439697266, "step": 3513 }, { "epoch": 0.8102374913534701, "grad_norm": 1.5030736189155554, "learning_rate": 1.3865546896457233e-06, "loss": 0.4162617325782776, "step": 3514 }, { "epoch": 0.8104680654830528, "grad_norm": 1.4360914568156404, "learning_rate": 1.3862030370911827e-06, "loss": 0.5262776613235474, "step": 3515 }, { "epoch": 0.8106986396126354, "grad_norm": 1.3010389916824352, "learning_rate": 1.3858513283981163e-06, "loss": 0.48102372884750366, "step": 3516 }, { "epoch": 0.8109292137422182, "grad_norm": 1.41037363508679, "learning_rate": 1.385499563617649e-06, "loss": 0.46166497468948364, "step": 3517 }, { "epoch": 0.8111597878718008, "grad_norm": 1.4145741054815544, "learning_rate": 1.3851477428009133e-06, "loss": 0.43523284792900085, "step": 3518 }, { "epoch": 0.8113903620013835, "grad_norm": 1.3662294611202825, "learning_rate": 1.3847958659990497e-06, "loss": 0.5413048267364502, "step": 3519 }, { "epoch": 0.8116209361309661, "grad_norm": 1.1462124150969017, "learning_rate": 1.3844439332632073e-06, "loss": 0.4257383346557617, "step": 3520 }, { "epoch": 0.8118515102605488, "grad_norm": 1.5928313905350753, "learning_rate": 1.3840919446445427e-06, "loss": 0.4812018871307373, "step": 3521 }, { "epoch": 0.8120820843901314, "grad_norm": 1.5231442697754751, "learning_rate": 1.3837399001942216e-06, "loss": 0.4890254735946655, "step": 3522 }, { "epoch": 0.8123126585197141, "grad_norm": 1.7091323269762855, "learning_rate": 1.3833877999634166e-06, "loss": 0.5079991817474365, "step": 3523 }, { "epoch": 0.8125432326492967, "grad_norm": 1.6148941470526432, "learning_rate": 1.3830356440033096e-06, "loss": 0.44703438878059387, "step": 3524 }, { "epoch": 0.8127738067788794, "grad_norm": 1.4685605039032132, "learning_rate": 1.3826834323650898e-06, "loss": 0.4218645989894867, "step": 3525 }, { "epoch": 0.813004380908462, "grad_norm": 1.585977018929449, "learning_rate": 1.3823311650999547e-06, "loss": 0.4544546902179718, "step": 3526 }, { "epoch": 0.8132349550380448, "grad_norm": 1.2954656146833265, "learning_rate": 1.3819788422591099e-06, "loss": 0.4978422224521637, "step": 3527 }, { "epoch": 0.8134655291676274, "grad_norm": 1.3262250095489831, "learning_rate": 1.3816264638937688e-06, "loss": 0.42122140526771545, "step": 3528 }, { "epoch": 0.8136961032972101, "grad_norm": 1.0995613789441223, "learning_rate": 1.381274030055154e-06, "loss": 0.45674729347229004, "step": 3529 }, { "epoch": 0.8139266774267927, "grad_norm": 1.5614041042611542, "learning_rate": 1.3809215407944947e-06, "loss": 0.5075385570526123, "step": 3530 }, { "epoch": 0.8141572515563754, "grad_norm": 1.4231357002591019, "learning_rate": 1.380568996163029e-06, "loss": 0.45952552556991577, "step": 3531 }, { "epoch": 0.814387825685958, "grad_norm": 1.239122573849665, "learning_rate": 1.3802163962120025e-06, "loss": 0.5062624216079712, "step": 3532 }, { "epoch": 0.8146183998155407, "grad_norm": 1.4910945652834293, "learning_rate": 1.3798637409926698e-06, "loss": 0.49294552206993103, "step": 3533 }, { "epoch": 0.8148489739451233, "grad_norm": 1.347255149566569, "learning_rate": 1.3795110305562926e-06, "loss": 0.4389861822128296, "step": 3534 }, { "epoch": 0.8150795480747061, "grad_norm": 1.5704776908584448, "learning_rate": 1.3791582649541401e-06, "loss": 0.47733181715011597, "step": 3535 }, { "epoch": 0.8153101222042887, "grad_norm": 1.3661823105841888, "learning_rate": 1.3788054442374918e-06, "loss": 0.5007725358009338, "step": 3536 }, { "epoch": 0.8155406963338714, "grad_norm": 1.617600694156108, "learning_rate": 1.378452568457633e-06, "loss": 0.4857913553714752, "step": 3537 }, { "epoch": 0.815771270463454, "grad_norm": 1.4509204702050165, "learning_rate": 1.3780996376658577e-06, "loss": 0.5330549478530884, "step": 3538 }, { "epoch": 0.8160018445930367, "grad_norm": 1.283827597345967, "learning_rate": 1.3777466519134684e-06, "loss": 0.45034217834472656, "step": 3539 }, { "epoch": 0.8162324187226193, "grad_norm": 1.313177908039173, "learning_rate": 1.3773936112517746e-06, "loss": 0.4442213773727417, "step": 3540 }, { "epoch": 0.816462992852202, "grad_norm": 1.479375223581317, "learning_rate": 1.377040515732095e-06, "loss": 0.5000369548797607, "step": 3541 }, { "epoch": 0.8166935669817846, "grad_norm": 1.3177535399447533, "learning_rate": 1.3766873654057551e-06, "loss": 0.5117775797843933, "step": 3542 }, { "epoch": 0.8169241411113674, "grad_norm": 1.4163300067502158, "learning_rate": 1.3763341603240889e-06, "loss": 0.431648850440979, "step": 3543 }, { "epoch": 0.81715471524095, "grad_norm": 1.230235072546183, "learning_rate": 1.3759809005384387e-06, "loss": 0.39463019371032715, "step": 3544 }, { "epoch": 0.8173852893705327, "grad_norm": 1.4412595458793114, "learning_rate": 1.375627586100154e-06, "loss": 0.38739651441574097, "step": 3545 }, { "epoch": 0.8176158635001153, "grad_norm": 1.1409525851258608, "learning_rate": 1.3752742170605927e-06, "loss": 0.3973360061645508, "step": 3546 }, { "epoch": 0.817846437629698, "grad_norm": 1.3276328290635366, "learning_rate": 1.3749207934711207e-06, "loss": 0.4791724383831024, "step": 3547 }, { "epoch": 0.8180770117592806, "grad_norm": 1.2963607541712077, "learning_rate": 1.3745673153831114e-06, "loss": 0.5245905518531799, "step": 3548 }, { "epoch": 0.8183075858888633, "grad_norm": 1.4724838776986868, "learning_rate": 1.3742137828479472e-06, "loss": 0.5507007241249084, "step": 3549 }, { "epoch": 0.8185381600184459, "grad_norm": 1.6416778504866436, "learning_rate": 1.373860195917017e-06, "loss": 0.4555748701095581, "step": 3550 }, { "epoch": 0.8187687341480286, "grad_norm": 1.2633428656921684, "learning_rate": 1.3735065546417182e-06, "loss": 0.39309239387512207, "step": 3551 }, { "epoch": 0.8189993082776112, "grad_norm": 1.205265119124541, "learning_rate": 1.3731528590734564e-06, "loss": 0.4984157681465149, "step": 3552 }, { "epoch": 0.819229882407194, "grad_norm": 1.4373490041823445, "learning_rate": 1.3727991092636448e-06, "loss": 0.45853057503700256, "step": 3553 }, { "epoch": 0.8194604565367766, "grad_norm": 1.427750473352885, "learning_rate": 1.3724453052637043e-06, "loss": 0.47412237524986267, "step": 3554 }, { "epoch": 0.8196910306663593, "grad_norm": 1.5140095273509309, "learning_rate": 1.3720914471250642e-06, "loss": 0.46433544158935547, "step": 3555 }, { "epoch": 0.8199216047959419, "grad_norm": 1.3530305082066354, "learning_rate": 1.3717375348991612e-06, "loss": 0.5773437023162842, "step": 3556 }, { "epoch": 0.8201521789255246, "grad_norm": 1.519657617219548, "learning_rate": 1.37138356863744e-06, "loss": 0.5943500995635986, "step": 3557 }, { "epoch": 0.8203827530551072, "grad_norm": 1.1903323655602067, "learning_rate": 1.3710295483913533e-06, "loss": 0.4970731735229492, "step": 3558 }, { "epoch": 0.8206133271846898, "grad_norm": 1.3936455952745408, "learning_rate": 1.3706754742123611e-06, "loss": 0.44726189970970154, "step": 3559 }, { "epoch": 0.8208439013142725, "grad_norm": 1.257368755928624, "learning_rate": 1.3703213461519325e-06, "loss": 0.3980759382247925, "step": 3560 }, { "epoch": 0.8210744754438551, "grad_norm": 1.510740752003684, "learning_rate": 1.3699671642615434e-06, "loss": 0.5521829724311829, "step": 3561 }, { "epoch": 0.8213050495734379, "grad_norm": 1.4257916187791417, "learning_rate": 1.3696129285926769e-06, "loss": 0.42630624771118164, "step": 3562 }, { "epoch": 0.8215356237030205, "grad_norm": 1.3813571407602123, "learning_rate": 1.3692586391968254e-06, "loss": 0.5060243606567383, "step": 3563 }, { "epoch": 0.8217661978326032, "grad_norm": 1.553405319049413, "learning_rate": 1.3689042961254884e-06, "loss": 0.5803407430648804, "step": 3564 }, { "epoch": 0.8219967719621858, "grad_norm": 1.1610478816524794, "learning_rate": 1.3685498994301735e-06, "loss": 0.4510403871536255, "step": 3565 }, { "epoch": 0.8222273460917685, "grad_norm": 1.668001711945016, "learning_rate": 1.3681954491623953e-06, "loss": 0.5350467562675476, "step": 3566 }, { "epoch": 0.8224579202213511, "grad_norm": 1.4589682016059282, "learning_rate": 1.367840945373677e-06, "loss": 0.5194679498672485, "step": 3567 }, { "epoch": 0.8226884943509338, "grad_norm": 1.5164701950999842, "learning_rate": 1.3674863881155495e-06, "loss": 0.43574345111846924, "step": 3568 }, { "epoch": 0.8229190684805164, "grad_norm": 1.2235692010100727, "learning_rate": 1.367131777439551e-06, "loss": 0.43051451444625854, "step": 3569 }, { "epoch": 0.8231496426100992, "grad_norm": 1.4294583851960962, "learning_rate": 1.3667771133972278e-06, "loss": 0.44449925422668457, "step": 3570 }, { "epoch": 0.8233802167396818, "grad_norm": 1.4281775124274958, "learning_rate": 1.3664223960401342e-06, "loss": 0.4466608464717865, "step": 3571 }, { "epoch": 0.8236107908692645, "grad_norm": 1.506734312309144, "learning_rate": 1.3660676254198318e-06, "loss": 0.6172389984130859, "step": 3572 }, { "epoch": 0.8238413649988471, "grad_norm": 1.3071294444794341, "learning_rate": 1.36571280158789e-06, "loss": 0.3789742588996887, "step": 3573 }, { "epoch": 0.8240719391284298, "grad_norm": 1.2713531694738989, "learning_rate": 1.365357924595886e-06, "loss": 0.3871726095676422, "step": 3574 }, { "epoch": 0.8243025132580124, "grad_norm": 1.3659394637334186, "learning_rate": 1.3650029944954047e-06, "loss": 0.5464534759521484, "step": 3575 }, { "epoch": 0.8245330873875951, "grad_norm": 1.4254183485118588, "learning_rate": 1.3646480113380392e-06, "loss": 0.4924513101577759, "step": 3576 }, { "epoch": 0.8247636615171777, "grad_norm": 1.3350624286567714, "learning_rate": 1.3642929751753896e-06, "loss": 0.39648669958114624, "step": 3577 }, { "epoch": 0.8249942356467604, "grad_norm": 1.155634552535419, "learning_rate": 1.3639378860590642e-06, "loss": 0.44139498472213745, "step": 3578 }, { "epoch": 0.825224809776343, "grad_norm": 1.4016430263315434, "learning_rate": 1.3635827440406784e-06, "loss": 0.4477856159210205, "step": 3579 }, { "epoch": 0.8254553839059258, "grad_norm": 1.2543072909410065, "learning_rate": 1.363227549171856e-06, "loss": 0.48722583055496216, "step": 3580 }, { "epoch": 0.8256859580355084, "grad_norm": 1.5407337854642607, "learning_rate": 1.3628723015042285e-06, "loss": 0.44485795497894287, "step": 3581 }, { "epoch": 0.8259165321650911, "grad_norm": 1.481687909768813, "learning_rate": 1.362517001089434e-06, "loss": 0.510918140411377, "step": 3582 }, { "epoch": 0.8261471062946737, "grad_norm": 1.4714123899535927, "learning_rate": 1.3621616479791196e-06, "loss": 0.5157535076141357, "step": 3583 }, { "epoch": 0.8263776804242564, "grad_norm": 1.601097277197277, "learning_rate": 1.361806242224939e-06, "loss": 0.6120826005935669, "step": 3584 }, { "epoch": 0.826608254553839, "grad_norm": 1.379062804125132, "learning_rate": 1.3614507838785545e-06, "loss": 0.47521674633026123, "step": 3585 }, { "epoch": 0.8268388286834217, "grad_norm": 1.2544051986437676, "learning_rate": 1.3610952729916352e-06, "loss": 0.431441068649292, "step": 3586 }, { "epoch": 0.8270694028130043, "grad_norm": 1.4333858511847595, "learning_rate": 1.3607397096158587e-06, "loss": 0.5168293118476868, "step": 3587 }, { "epoch": 0.8272999769425871, "grad_norm": 1.4075386997192105, "learning_rate": 1.3603840938029092e-06, "loss": 0.47669821977615356, "step": 3588 }, { "epoch": 0.8275305510721697, "grad_norm": 1.6345113020695277, "learning_rate": 1.3600284256044791e-06, "loss": 0.5170806050300598, "step": 3589 }, { "epoch": 0.8277611252017524, "grad_norm": 1.3443972777893194, "learning_rate": 1.359672705072269e-06, "loss": 0.5578932762145996, "step": 3590 }, { "epoch": 0.827991699331335, "grad_norm": 1.2931790064355784, "learning_rate": 1.3593169322579855e-06, "loss": 0.45000678300857544, "step": 3591 }, { "epoch": 0.8282222734609177, "grad_norm": 1.7408157234389992, "learning_rate": 1.3589611072133448e-06, "loss": 0.47859635949134827, "step": 3592 }, { "epoch": 0.8284528475905003, "grad_norm": 1.629320946493551, "learning_rate": 1.3586052299900693e-06, "loss": 0.5373919606208801, "step": 3593 }, { "epoch": 0.828683421720083, "grad_norm": 1.4093194136520946, "learning_rate": 1.3582493006398888e-06, "loss": 0.5461571216583252, "step": 3594 }, { "epoch": 0.8289139958496656, "grad_norm": 1.4221547222488737, "learning_rate": 1.357893319214542e-06, "loss": 0.522891640663147, "step": 3595 }, { "epoch": 0.8291445699792483, "grad_norm": 1.3931497044748549, "learning_rate": 1.3575372857657739e-06, "loss": 0.503441572189331, "step": 3596 }, { "epoch": 0.829375144108831, "grad_norm": 1.4755218467347275, "learning_rate": 1.357181200345338e-06, "loss": 0.45475268363952637, "step": 3597 }, { "epoch": 0.8296057182384137, "grad_norm": 1.3529340787561033, "learning_rate": 1.3568250630049944e-06, "loss": 0.4626728296279907, "step": 3598 }, { "epoch": 0.8298362923679963, "grad_norm": 1.5106243497530205, "learning_rate": 1.3564688737965118e-06, "loss": 0.590618371963501, "step": 3599 }, { "epoch": 0.830066866497579, "grad_norm": 1.1729232075760356, "learning_rate": 1.3561126327716658e-06, "loss": 0.4252029061317444, "step": 3600 }, { "epoch": 0.8302974406271616, "grad_norm": 1.5093126003070163, "learning_rate": 1.3557563399822396e-06, "loss": 0.5741503238677979, "step": 3601 }, { "epoch": 0.8305280147567443, "grad_norm": 1.346541706093541, "learning_rate": 1.3553999954800236e-06, "loss": 0.4591038227081299, "step": 3602 }, { "epoch": 0.8307585888863269, "grad_norm": 1.5342817778823432, "learning_rate": 1.3550435993168164e-06, "loss": 0.5761657953262329, "step": 3603 }, { "epoch": 0.8309891630159096, "grad_norm": 1.4873747737215213, "learning_rate": 1.3546871515444239e-06, "loss": 0.4835323691368103, "step": 3604 }, { "epoch": 0.8312197371454922, "grad_norm": 1.3474153162620106, "learning_rate": 1.3543306522146594e-06, "loss": 0.6152533292770386, "step": 3605 }, { "epoch": 0.831450311275075, "grad_norm": 1.7615931586989606, "learning_rate": 1.3539741013793431e-06, "loss": 0.48106616735458374, "step": 3606 }, { "epoch": 0.8316808854046576, "grad_norm": 1.3977429311647935, "learning_rate": 1.3536174990903042e-06, "loss": 0.48128771781921387, "step": 3607 }, { "epoch": 0.8319114595342403, "grad_norm": 1.5624866131401935, "learning_rate": 1.353260845399378e-06, "loss": 0.4395609498023987, "step": 3608 }, { "epoch": 0.8321420336638229, "grad_norm": 1.6243424583265862, "learning_rate": 1.3529041403584076e-06, "loss": 0.5298231840133667, "step": 3609 }, { "epoch": 0.8323726077934056, "grad_norm": 1.610376085646533, "learning_rate": 1.3525473840192436e-06, "loss": 0.4694434404373169, "step": 3610 }, { "epoch": 0.8326031819229882, "grad_norm": 1.3870293085196028, "learning_rate": 1.3521905764337449e-06, "loss": 0.4264890253543854, "step": 3611 }, { "epoch": 0.8328337560525709, "grad_norm": 1.3900907609641087, "learning_rate": 1.3518337176537762e-06, "loss": 0.3266828656196594, "step": 3612 }, { "epoch": 0.8330643301821535, "grad_norm": 1.548598004244933, "learning_rate": 1.351476807731211e-06, "loss": 0.5554935336112976, "step": 3613 }, { "epoch": 0.8332949043117363, "grad_norm": 1.3139574983210685, "learning_rate": 1.3511198467179295e-06, "loss": 0.4375999867916107, "step": 3614 }, { "epoch": 0.8335254784413189, "grad_norm": 1.3568296792682797, "learning_rate": 1.35076283466582e-06, "loss": 0.564457893371582, "step": 3615 }, { "epoch": 0.8337560525709016, "grad_norm": 1.5648573569840147, "learning_rate": 1.3504057716267776e-06, "loss": 0.5141148567199707, "step": 3616 }, { "epoch": 0.8339866267004842, "grad_norm": 1.2607282701974722, "learning_rate": 1.350048657652705e-06, "loss": 0.45514535903930664, "step": 3617 }, { "epoch": 0.8342172008300669, "grad_norm": 1.298858308641179, "learning_rate": 1.3496914927955122e-06, "loss": 0.5224772691726685, "step": 3618 }, { "epoch": 0.8344477749596495, "grad_norm": 1.3773935543957632, "learning_rate": 1.349334277107117e-06, "loss": 0.45185205340385437, "step": 3619 }, { "epoch": 0.8346783490892322, "grad_norm": 1.3400411570126707, "learning_rate": 1.3489770106394444e-06, "loss": 0.47232794761657715, "step": 3620 }, { "epoch": 0.8349089232188148, "grad_norm": 1.3564585933268873, "learning_rate": 1.3486196934444264e-06, "loss": 0.44031190872192383, "step": 3621 }, { "epoch": 0.8351394973483975, "grad_norm": 1.2921832515242213, "learning_rate": 1.3482623255740028e-06, "loss": 0.4594510793685913, "step": 3622 }, { "epoch": 0.8353700714779801, "grad_norm": 1.3491628541071723, "learning_rate": 1.347904907080121e-06, "loss": 0.38726723194122314, "step": 3623 }, { "epoch": 0.8356006456075629, "grad_norm": 1.4086239991990677, "learning_rate": 1.3475474380147347e-06, "loss": 0.544617772102356, "step": 3624 }, { "epoch": 0.8358312197371455, "grad_norm": 1.5645995914963535, "learning_rate": 1.347189918429806e-06, "loss": 0.503423810005188, "step": 3625 }, { "epoch": 0.8360617938667282, "grad_norm": 1.3950432339665733, "learning_rate": 1.3468323483773038e-06, "loss": 0.4395143985748291, "step": 3626 }, { "epoch": 0.8362923679963108, "grad_norm": 1.6308000434387062, "learning_rate": 1.346474727909205e-06, "loss": 0.41464856266975403, "step": 3627 }, { "epoch": 0.8365229421258935, "grad_norm": 1.4008674771220466, "learning_rate": 1.346117057077493e-06, "loss": 0.4782845079898834, "step": 3628 }, { "epoch": 0.8367535162554761, "grad_norm": 1.2484540580184977, "learning_rate": 1.345759335934159e-06, "loss": 0.48308104276657104, "step": 3629 }, { "epoch": 0.8369840903850588, "grad_norm": 1.3935764281095124, "learning_rate": 1.345401564531201e-06, "loss": 0.5759967565536499, "step": 3630 }, { "epoch": 0.8372146645146414, "grad_norm": 1.421077506310717, "learning_rate": 1.3450437429206256e-06, "loss": 0.5900512337684631, "step": 3631 }, { "epoch": 0.8374452386442242, "grad_norm": 1.3643346247687353, "learning_rate": 1.3446858711544451e-06, "loss": 0.4776286482810974, "step": 3632 }, { "epoch": 0.8376758127738068, "grad_norm": 1.5796891796446009, "learning_rate": 1.34432794928468e-06, "loss": 0.5123563408851624, "step": 3633 }, { "epoch": 0.8379063869033895, "grad_norm": 1.6272139775850447, "learning_rate": 1.3439699773633574e-06, "loss": 0.5505821108818054, "step": 3634 }, { "epoch": 0.8381369610329721, "grad_norm": 1.4456391396483874, "learning_rate": 1.343611955442513e-06, "loss": 0.5525364875793457, "step": 3635 }, { "epoch": 0.8383675351625548, "grad_norm": 1.1644228181066894, "learning_rate": 1.3432538835741884e-06, "loss": 0.44074952602386475, "step": 3636 }, { "epoch": 0.8385981092921374, "grad_norm": 1.3792820862390651, "learning_rate": 1.3428957618104331e-06, "loss": 0.5488649606704712, "step": 3637 }, { "epoch": 0.8388286834217201, "grad_norm": 1.159150884236996, "learning_rate": 1.3425375902033034e-06, "loss": 0.4427725672721863, "step": 3638 }, { "epoch": 0.8390592575513027, "grad_norm": 1.5753495335559473, "learning_rate": 1.3421793688048636e-06, "loss": 0.5244250297546387, "step": 3639 }, { "epoch": 0.8392898316808854, "grad_norm": 1.2853956216426152, "learning_rate": 1.3418210976671845e-06, "loss": 0.4684640169143677, "step": 3640 }, { "epoch": 0.839520405810468, "grad_norm": 1.4767228704961965, "learning_rate": 1.3414627768423449e-06, "loss": 0.4518035054206848, "step": 3641 }, { "epoch": 0.8397509799400508, "grad_norm": 1.5338085000094812, "learning_rate": 1.34110440638243e-06, "loss": 0.47504323720932007, "step": 3642 }, { "epoch": 0.8399815540696334, "grad_norm": 1.7182899921711987, "learning_rate": 1.3407459863395326e-06, "loss": 0.3835057020187378, "step": 3643 }, { "epoch": 0.8402121281992161, "grad_norm": 1.4517538314936977, "learning_rate": 1.3403875167657529e-06, "loss": 0.4103546738624573, "step": 3644 }, { "epoch": 0.8404427023287987, "grad_norm": 1.3338056576205999, "learning_rate": 1.3400289977131974e-06, "loss": 0.48064136505126953, "step": 3645 }, { "epoch": 0.8406732764583814, "grad_norm": 1.5606949897639386, "learning_rate": 1.3396704292339813e-06, "loss": 0.49655234813690186, "step": 3646 }, { "epoch": 0.840903850587964, "grad_norm": 1.3180737586627664, "learning_rate": 1.3393118113802259e-06, "loss": 0.5559303760528564, "step": 3647 }, { "epoch": 0.8411344247175467, "grad_norm": 1.3902505896601203, "learning_rate": 1.3389531442040599e-06, "loss": 0.5173505544662476, "step": 3648 }, { "epoch": 0.8413649988471293, "grad_norm": 1.4997400095057662, "learning_rate": 1.338594427757619e-06, "loss": 0.500524640083313, "step": 3649 }, { "epoch": 0.8415955729767121, "grad_norm": 1.3017945585861477, "learning_rate": 1.3382356620930467e-06, "loss": 0.5167285203933716, "step": 3650 }, { "epoch": 0.8418261471062947, "grad_norm": 1.4661199659605932, "learning_rate": 1.3378768472624929e-06, "loss": 0.5006825923919678, "step": 3651 }, { "epoch": 0.8420567212358774, "grad_norm": 1.5253217794534257, "learning_rate": 1.3375179833181153e-06, "loss": 0.5421864986419678, "step": 3652 }, { "epoch": 0.84228729536546, "grad_norm": 1.5304567180850979, "learning_rate": 1.337159070312078e-06, "loss": 0.4964475929737091, "step": 3653 }, { "epoch": 0.8425178694950427, "grad_norm": 1.2795061721511742, "learning_rate": 1.3368001082965528e-06, "loss": 0.4020928144454956, "step": 3654 }, { "epoch": 0.8427484436246253, "grad_norm": 1.3457912405228358, "learning_rate": 1.3364410973237183e-06, "loss": 0.43009278178215027, "step": 3655 }, { "epoch": 0.842979017754208, "grad_norm": 1.3663101783603413, "learning_rate": 1.3360820374457608e-06, "loss": 0.5939761400222778, "step": 3656 }, { "epoch": 0.8432095918837906, "grad_norm": 1.3723718945789372, "learning_rate": 1.335722928714873e-06, "loss": 0.43889346718788147, "step": 3657 }, { "epoch": 0.8434401660133733, "grad_norm": 1.510811137049935, "learning_rate": 1.335363771183255e-06, "loss": 0.5125945806503296, "step": 3658 }, { "epoch": 0.843670740142956, "grad_norm": 1.2988273180041983, "learning_rate": 1.3350045649031143e-06, "loss": 0.516818642616272, "step": 3659 }, { "epoch": 0.8439013142725387, "grad_norm": 1.2172726171902464, "learning_rate": 1.3346453099266649e-06, "loss": 0.5098299980163574, "step": 3660 }, { "epoch": 0.8441318884021213, "grad_norm": 1.4809835823543989, "learning_rate": 1.334286006306128e-06, "loss": 0.46228134632110596, "step": 3661 }, { "epoch": 0.844362462531704, "grad_norm": 1.518730905252404, "learning_rate": 1.3339266540937324e-06, "loss": 0.38364481925964355, "step": 3662 }, { "epoch": 0.8445930366612866, "grad_norm": 1.2447229933483466, "learning_rate": 1.3335672533417134e-06, "loss": 0.4363073706626892, "step": 3663 }, { "epoch": 0.8448236107908693, "grad_norm": 1.5445839123019949, "learning_rate": 1.3332078041023133e-06, "loss": 0.463603675365448, "step": 3664 }, { "epoch": 0.8450541849204519, "grad_norm": 1.118250112497339, "learning_rate": 1.3328483064277816e-06, "loss": 0.4173084795475006, "step": 3665 }, { "epoch": 0.8452847590500346, "grad_norm": 1.2905398126594152, "learning_rate": 1.3324887603703756e-06, "loss": 0.41451913118362427, "step": 3666 }, { "epoch": 0.8455153331796172, "grad_norm": 1.3301474043831027, "learning_rate": 1.3321291659823587e-06, "loss": 0.49418264627456665, "step": 3667 }, { "epoch": 0.8457459073092, "grad_norm": 1.323747824550861, "learning_rate": 1.3317695233160015e-06, "loss": 0.48787444829940796, "step": 3668 }, { "epoch": 0.8459764814387826, "grad_norm": 1.419516654753041, "learning_rate": 1.3314098324235814e-06, "loss": 0.484865665435791, "step": 3669 }, { "epoch": 0.8462070555683652, "grad_norm": 1.4996660725713626, "learning_rate": 1.3310500933573837e-06, "loss": 0.44162076711654663, "step": 3670 }, { "epoch": 0.8464376296979479, "grad_norm": 1.4496595059902684, "learning_rate": 1.3306903061696999e-06, "loss": 0.39880990982055664, "step": 3671 }, { "epoch": 0.8466682038275305, "grad_norm": 1.596735486600776, "learning_rate": 1.3303304709128288e-06, "loss": 0.4405972957611084, "step": 3672 }, { "epoch": 0.8468987779571132, "grad_norm": 1.8476371944591239, "learning_rate": 1.3299705876390755e-06, "loss": 0.4228917956352234, "step": 3673 }, { "epoch": 0.8471293520866958, "grad_norm": 1.3245854918753257, "learning_rate": 1.3296106564007532e-06, "loss": 0.44533059000968933, "step": 3674 }, { "epoch": 0.8473599262162785, "grad_norm": 1.324480419314636, "learning_rate": 1.3292506772501816e-06, "loss": 0.4672505855560303, "step": 3675 }, { "epoch": 0.8475905003458611, "grad_norm": 1.5345690520656405, "learning_rate": 1.3288906502396873e-06, "loss": 0.5651025772094727, "step": 3676 }, { "epoch": 0.8478210744754439, "grad_norm": 1.4113200785742674, "learning_rate": 1.3285305754216034e-06, "loss": 0.4877372086048126, "step": 3677 }, { "epoch": 0.8480516486050265, "grad_norm": 1.6156626909271148, "learning_rate": 1.3281704528482713e-06, "loss": 0.43767499923706055, "step": 3678 }, { "epoch": 0.8482822227346092, "grad_norm": 1.6309175000442955, "learning_rate": 1.3278102825720376e-06, "loss": 0.5077182650566101, "step": 3679 }, { "epoch": 0.8485127968641918, "grad_norm": 1.5150502093819094, "learning_rate": 1.3274500646452573e-06, "loss": 0.4814456105232239, "step": 3680 }, { "epoch": 0.8487433709937745, "grad_norm": 1.3626740483959299, "learning_rate": 1.3270897991202913e-06, "loss": 0.4454193115234375, "step": 3681 }, { "epoch": 0.8489739451233571, "grad_norm": 1.1173863119708762, "learning_rate": 1.3267294860495084e-06, "loss": 0.3973482549190521, "step": 3682 }, { "epoch": 0.8492045192529398, "grad_norm": 1.5337644837004238, "learning_rate": 1.3263691254852834e-06, "loss": 0.5115909576416016, "step": 3683 }, { "epoch": 0.8494350933825224, "grad_norm": 1.2962888350788886, "learning_rate": 1.3260087174799982e-06, "loss": 0.4217768907546997, "step": 3684 }, { "epoch": 0.8496656675121051, "grad_norm": 1.5676465439666392, "learning_rate": 1.3256482620860414e-06, "loss": 0.4462714195251465, "step": 3685 }, { "epoch": 0.8498962416416878, "grad_norm": 1.278085511550712, "learning_rate": 1.32528775935581e-06, "loss": 0.4617312550544739, "step": 3686 }, { "epoch": 0.8501268157712705, "grad_norm": 1.2760475898780375, "learning_rate": 1.324927209341706e-06, "loss": 0.4774616062641144, "step": 3687 }, { "epoch": 0.8503573899008531, "grad_norm": 1.389927333157612, "learning_rate": 1.3245666120961389e-06, "loss": 0.38730189204216003, "step": 3688 }, { "epoch": 0.8505879640304358, "grad_norm": 1.5164687032364252, "learning_rate": 1.324205967671525e-06, "loss": 0.45189517736434937, "step": 3689 }, { "epoch": 0.8508185381600184, "grad_norm": 1.489462413187487, "learning_rate": 1.3238452761202887e-06, "loss": 0.4965584874153137, "step": 3690 }, { "epoch": 0.8510491122896011, "grad_norm": 1.2283217886481297, "learning_rate": 1.3234845374948591e-06, "loss": 0.4409075975418091, "step": 3691 }, { "epoch": 0.8512796864191837, "grad_norm": 1.3545920303070538, "learning_rate": 1.3231237518476737e-06, "loss": 0.4457218647003174, "step": 3692 }, { "epoch": 0.8515102605487664, "grad_norm": 1.2432481704868787, "learning_rate": 1.3227629192311762e-06, "loss": 0.42810603976249695, "step": 3693 }, { "epoch": 0.851740834678349, "grad_norm": 1.3504737245283156, "learning_rate": 1.3224020396978172e-06, "loss": 0.40753173828125, "step": 3694 }, { "epoch": 0.8519714088079318, "grad_norm": 1.5063309076640758, "learning_rate": 1.3220411133000542e-06, "loss": 0.5057830810546875, "step": 3695 }, { "epoch": 0.8522019829375144, "grad_norm": 1.4625648008354504, "learning_rate": 1.3216801400903515e-06, "loss": 0.42498981952667236, "step": 3696 }, { "epoch": 0.8524325570670971, "grad_norm": 1.736302707969947, "learning_rate": 1.3213191201211806e-06, "loss": 0.44985881447792053, "step": 3697 }, { "epoch": 0.8526631311966797, "grad_norm": 1.5257289791960187, "learning_rate": 1.3209580534450192e-06, "loss": 0.39984816312789917, "step": 3698 }, { "epoch": 0.8528937053262624, "grad_norm": 1.4859934204912078, "learning_rate": 1.3205969401143516e-06, "loss": 0.4773896038532257, "step": 3699 }, { "epoch": 0.853124279455845, "grad_norm": 1.5299580963987478, "learning_rate": 1.3202357801816698e-06, "loss": 0.5699855089187622, "step": 3700 }, { "epoch": 0.8533548535854277, "grad_norm": 1.5124437197630332, "learning_rate": 1.3198745736994714e-06, "loss": 0.4486675262451172, "step": 3701 }, { "epoch": 0.8535854277150103, "grad_norm": 1.3641053506348044, "learning_rate": 1.3195133207202625e-06, "loss": 0.47909995913505554, "step": 3702 }, { "epoch": 0.853816001844593, "grad_norm": 1.3267279385735278, "learning_rate": 1.3191520212965542e-06, "loss": 0.4356222450733185, "step": 3703 }, { "epoch": 0.8540465759741757, "grad_norm": 1.5161594053893233, "learning_rate": 1.3187906754808646e-06, "loss": 0.4734821319580078, "step": 3704 }, { "epoch": 0.8542771501037584, "grad_norm": 1.1414361983546972, "learning_rate": 1.3184292833257197e-06, "loss": 0.4164031744003296, "step": 3705 }, { "epoch": 0.854507724233341, "grad_norm": 1.5194682024268111, "learning_rate": 1.3180678448836516e-06, "loss": 0.505548357963562, "step": 3706 }, { "epoch": 0.8547382983629237, "grad_norm": 1.4180879233512311, "learning_rate": 1.3177063602071985e-06, "loss": 0.4443202316761017, "step": 3707 }, { "epoch": 0.8549688724925063, "grad_norm": 1.4808642334806548, "learning_rate": 1.317344829348906e-06, "loss": 0.4594070017337799, "step": 3708 }, { "epoch": 0.855199446622089, "grad_norm": 1.595149298191138, "learning_rate": 1.3169832523613265e-06, "loss": 0.5346768498420715, "step": 3709 }, { "epoch": 0.8554300207516716, "grad_norm": 1.4211934536480004, "learning_rate": 1.3166216292970185e-06, "loss": 0.44471168518066406, "step": 3710 }, { "epoch": 0.8556605948812543, "grad_norm": 1.3967510109946715, "learning_rate": 1.3162599602085482e-06, "loss": 0.4414154589176178, "step": 3711 }, { "epoch": 0.855891169010837, "grad_norm": 1.2591243363727789, "learning_rate": 1.3158982451484873e-06, "loss": 0.4267842769622803, "step": 3712 }, { "epoch": 0.8561217431404197, "grad_norm": 1.5517519524370356, "learning_rate": 1.315536484169415e-06, "loss": 0.5282812118530273, "step": 3713 }, { "epoch": 0.8563523172700023, "grad_norm": 1.3747848129200213, "learning_rate": 1.3151746773239167e-06, "loss": 0.3831692934036255, "step": 3714 }, { "epoch": 0.856582891399585, "grad_norm": 1.3399055617764033, "learning_rate": 1.3148128246645848e-06, "loss": 0.4714779853820801, "step": 3715 }, { "epoch": 0.8568134655291676, "grad_norm": 1.5957966977407376, "learning_rate": 1.3144509262440185e-06, "loss": 0.515029788017273, "step": 3716 }, { "epoch": 0.8570440396587503, "grad_norm": 1.6565005005078866, "learning_rate": 1.314088982114823e-06, "loss": 0.48407065868377686, "step": 3717 }, { "epoch": 0.8572746137883329, "grad_norm": 1.2250893853794216, "learning_rate": 1.3137269923296111e-06, "loss": 0.4756847620010376, "step": 3718 }, { "epoch": 0.8575051879179156, "grad_norm": 1.4417516161095163, "learning_rate": 1.313364956941001e-06, "loss": 0.47744277119636536, "step": 3719 }, { "epoch": 0.8577357620474982, "grad_norm": 1.4540506451139732, "learning_rate": 1.3130028760016187e-06, "loss": 0.4967440366744995, "step": 3720 }, { "epoch": 0.857966336177081, "grad_norm": 1.5755023694033539, "learning_rate": 1.312640749564096e-06, "loss": 0.44999921321868896, "step": 3721 }, { "epoch": 0.8581969103066636, "grad_norm": 1.1829331105101752, "learning_rate": 1.3122785776810723e-06, "loss": 0.4454652667045593, "step": 3722 }, { "epoch": 0.8584274844362463, "grad_norm": 1.220523426514953, "learning_rate": 1.3119163604051923e-06, "loss": 0.37483078241348267, "step": 3723 }, { "epoch": 0.8586580585658289, "grad_norm": 1.45963624909142, "learning_rate": 1.3115540977891076e-06, "loss": 0.3732140064239502, "step": 3724 }, { "epoch": 0.8588886326954116, "grad_norm": 1.5667872254799649, "learning_rate": 1.3111917898854779e-06, "loss": 0.5709421634674072, "step": 3725 }, { "epoch": 0.8591192068249942, "grad_norm": 2.0482790256244514, "learning_rate": 1.3108294367469677e-06, "loss": 0.5301297307014465, "step": 3726 }, { "epoch": 0.8593497809545769, "grad_norm": 1.2253994153188903, "learning_rate": 1.3104670384262484e-06, "loss": 0.45979735255241394, "step": 3727 }, { "epoch": 0.8595803550841595, "grad_norm": 1.5172885339612137, "learning_rate": 1.3101045949759985e-06, "loss": 0.5051921606063843, "step": 3728 }, { "epoch": 0.8598109292137422, "grad_norm": 1.5432212262669465, "learning_rate": 1.309742106448903e-06, "loss": 0.5057204365730286, "step": 3729 }, { "epoch": 0.8600415033433249, "grad_norm": 1.3029916397805466, "learning_rate": 1.3093795728976535e-06, "loss": 0.4265059530735016, "step": 3730 }, { "epoch": 0.8602720774729076, "grad_norm": 1.2392416355330595, "learning_rate": 1.3090169943749473e-06, "loss": 0.39166492223739624, "step": 3731 }, { "epoch": 0.8605026516024902, "grad_norm": 1.4335892651385718, "learning_rate": 1.308654370933489e-06, "loss": 0.4321832060813904, "step": 3732 }, { "epoch": 0.8607332257320729, "grad_norm": 1.4026009292758175, "learning_rate": 1.3082917026259906e-06, "loss": 0.5028939247131348, "step": 3733 }, { "epoch": 0.8609637998616555, "grad_norm": 1.461263824354524, "learning_rate": 1.3079289895051681e-06, "loss": 0.4642373323440552, "step": 3734 }, { "epoch": 0.8611943739912382, "grad_norm": 1.2616373488525174, "learning_rate": 1.3075662316237464e-06, "loss": 0.416348397731781, "step": 3735 }, { "epoch": 0.8614249481208208, "grad_norm": 1.9156143459520234, "learning_rate": 1.3072034290344556e-06, "loss": 0.48442524671554565, "step": 3736 }, { "epoch": 0.8616555222504035, "grad_norm": 1.4675369296005183, "learning_rate": 1.3068405817900332e-06, "loss": 0.46903935074806213, "step": 3737 }, { "epoch": 0.8618860963799861, "grad_norm": 1.433982633948309, "learning_rate": 1.3064776899432224e-06, "loss": 0.48172008991241455, "step": 3738 }, { "epoch": 0.8621166705095689, "grad_norm": 1.4697783322173945, "learning_rate": 1.3061147535467734e-06, "loss": 0.44460922479629517, "step": 3739 }, { "epoch": 0.8623472446391515, "grad_norm": 1.4552688390934359, "learning_rate": 1.3057517726534423e-06, "loss": 0.4728608727455139, "step": 3740 }, { "epoch": 0.8625778187687342, "grad_norm": 1.2981084774118934, "learning_rate": 1.3053887473159928e-06, "loss": 0.36457544565200806, "step": 3741 }, { "epoch": 0.8628083928983168, "grad_norm": 1.3219603285138386, "learning_rate": 1.3050256775871936e-06, "loss": 0.3753359317779541, "step": 3742 }, { "epoch": 0.8630389670278995, "grad_norm": 1.71764180047156, "learning_rate": 1.304662563519821e-06, "loss": 0.38679057359695435, "step": 3743 }, { "epoch": 0.8632695411574821, "grad_norm": 1.2517686459377946, "learning_rate": 1.304299405166657e-06, "loss": 0.5008635520935059, "step": 3744 }, { "epoch": 0.8635001152870648, "grad_norm": 1.6524585351681906, "learning_rate": 1.3039362025804903e-06, "loss": 0.3723052740097046, "step": 3745 }, { "epoch": 0.8637306894166474, "grad_norm": 1.4101013037777343, "learning_rate": 1.3035729558141166e-06, "loss": 0.4227592945098877, "step": 3746 }, { "epoch": 0.8639612635462302, "grad_norm": 1.2385954175555658, "learning_rate": 1.3032096649203369e-06, "loss": 0.44072139263153076, "step": 3747 }, { "epoch": 0.8641918376758128, "grad_norm": 1.330285491132409, "learning_rate": 1.3028463299519594e-06, "loss": 0.49321871995925903, "step": 3748 }, { "epoch": 0.8644224118053955, "grad_norm": 1.1777120494442346, "learning_rate": 1.3024829509617987e-06, "loss": 0.3751382827758789, "step": 3749 }, { "epoch": 0.8646529859349781, "grad_norm": 1.2092220891938048, "learning_rate": 1.3021195280026755e-06, "loss": 0.43967729806900024, "step": 3750 }, { "epoch": 0.8648835600645608, "grad_norm": 1.2227774970491123, "learning_rate": 1.3017560611274172e-06, "loss": 0.4102880358695984, "step": 3751 }, { "epoch": 0.8651141341941434, "grad_norm": 1.4524327131347594, "learning_rate": 1.301392550388857e-06, "loss": 0.5225233435630798, "step": 3752 }, { "epoch": 0.8653447083237261, "grad_norm": 1.7121734467218848, "learning_rate": 1.3010289958398352e-06, "loss": 0.6021677255630493, "step": 3753 }, { "epoch": 0.8655752824533087, "grad_norm": 1.294116122042798, "learning_rate": 1.300665397533198e-06, "loss": 0.5031560063362122, "step": 3754 }, { "epoch": 0.8658058565828914, "grad_norm": 1.2573123861588813, "learning_rate": 1.300301755521798e-06, "loss": 0.5406110286712646, "step": 3755 }, { "epoch": 0.866036430712474, "grad_norm": 1.3123644187859618, "learning_rate": 1.2999380698584945e-06, "loss": 0.5359587669372559, "step": 3756 }, { "epoch": 0.8662670048420568, "grad_norm": 1.4006997771166723, "learning_rate": 1.2995743405961525e-06, "loss": 0.46089720726013184, "step": 3757 }, { "epoch": 0.8664975789716394, "grad_norm": 1.3064464980724229, "learning_rate": 1.2992105677876444e-06, "loss": 0.4611746668815613, "step": 3758 }, { "epoch": 0.8667281531012221, "grad_norm": 1.3860871410802968, "learning_rate": 1.2988467514858478e-06, "loss": 0.47040778398513794, "step": 3759 }, { "epoch": 0.8669587272308047, "grad_norm": 1.4624604845389892, "learning_rate": 1.2984828917436469e-06, "loss": 0.5118452310562134, "step": 3760 }, { "epoch": 0.8671893013603874, "grad_norm": 1.3248325273306294, "learning_rate": 1.2981189886139326e-06, "loss": 0.42349302768707275, "step": 3761 }, { "epoch": 0.86741987548997, "grad_norm": 1.4983666129317725, "learning_rate": 1.2977550421496022e-06, "loss": 0.4888027310371399, "step": 3762 }, { "epoch": 0.8676504496195527, "grad_norm": 1.5557430857836938, "learning_rate": 1.2973910524035587e-06, "loss": 0.5637897849082947, "step": 3763 }, { "epoch": 0.8678810237491353, "grad_norm": 1.2906063231523421, "learning_rate": 1.2970270194287119e-06, "loss": 0.4159572124481201, "step": 3764 }, { "epoch": 0.868111597878718, "grad_norm": 1.613449710248156, "learning_rate": 1.2966629432779775e-06, "loss": 0.4558612108230591, "step": 3765 }, { "epoch": 0.8683421720083007, "grad_norm": 1.229959300374187, "learning_rate": 1.2962988240042775e-06, "loss": 0.4235115647315979, "step": 3766 }, { "epoch": 0.8685727461378834, "grad_norm": 1.5042750051225975, "learning_rate": 1.2959346616605404e-06, "loss": 0.5096476078033447, "step": 3767 }, { "epoch": 0.868803320267466, "grad_norm": 1.3849812365321899, "learning_rate": 1.2955704562997013e-06, "loss": 0.47097906470298767, "step": 3768 }, { "epoch": 0.8690338943970487, "grad_norm": 1.2057643302548011, "learning_rate": 1.2952062079747008e-06, "loss": 0.4508157968521118, "step": 3769 }, { "epoch": 0.8692644685266313, "grad_norm": 1.3904260388472953, "learning_rate": 1.2948419167384864e-06, "loss": 0.43800675868988037, "step": 3770 }, { "epoch": 0.869495042656214, "grad_norm": 1.3552023829739699, "learning_rate": 1.2944775826440108e-06, "loss": 0.5512480735778809, "step": 3771 }, { "epoch": 0.8697256167857966, "grad_norm": 1.4428129453899297, "learning_rate": 1.2941132057442342e-06, "loss": 0.4654430150985718, "step": 3772 }, { "epoch": 0.8699561909153793, "grad_norm": 1.3297596373891312, "learning_rate": 1.293748786092123e-06, "loss": 0.5429458618164062, "step": 3773 }, { "epoch": 0.870186765044962, "grad_norm": 1.7953090529311853, "learning_rate": 1.2933843237406481e-06, "loss": 0.415671169757843, "step": 3774 }, { "epoch": 0.8704173391745447, "grad_norm": 1.3784118855195835, "learning_rate": 1.2930198187427884e-06, "loss": 0.4347325563430786, "step": 3775 }, { "epoch": 0.8706479133041273, "grad_norm": 1.3858530201589612, "learning_rate": 1.2926552711515287e-06, "loss": 0.41997528076171875, "step": 3776 }, { "epoch": 0.87087848743371, "grad_norm": 1.4475652450278216, "learning_rate": 1.292290681019859e-06, "loss": 0.45956090092658997, "step": 3777 }, { "epoch": 0.8711090615632926, "grad_norm": 1.3318373392521217, "learning_rate": 1.2919260484007767e-06, "loss": 0.4615165889263153, "step": 3778 }, { "epoch": 0.8713396356928753, "grad_norm": 1.5526291007190895, "learning_rate": 1.2915613733472848e-06, "loss": 0.3919866681098938, "step": 3779 }, { "epoch": 0.8715702098224579, "grad_norm": 1.5182901628405527, "learning_rate": 1.2911966559123922e-06, "loss": 0.5324772000312805, "step": 3780 }, { "epoch": 0.8718007839520405, "grad_norm": 1.4899431097732017, "learning_rate": 1.2908318961491147e-06, "loss": 0.4813354015350342, "step": 3781 }, { "epoch": 0.8720313580816232, "grad_norm": 1.6904916219237236, "learning_rate": 1.2904670941104735e-06, "loss": 0.5617851614952087, "step": 3782 }, { "epoch": 0.8722619322112058, "grad_norm": 1.5869523154671146, "learning_rate": 1.2901022498494963e-06, "loss": 0.5369905233383179, "step": 3783 }, { "epoch": 0.8724925063407886, "grad_norm": 1.4103839502113327, "learning_rate": 1.289737363419217e-06, "loss": 0.469723641872406, "step": 3784 }, { "epoch": 0.8727230804703712, "grad_norm": 1.5392452648373567, "learning_rate": 1.2893724348726757e-06, "loss": 0.5100580453872681, "step": 3785 }, { "epoch": 0.8729536545999539, "grad_norm": 1.4522390007049084, "learning_rate": 1.289007464262918e-06, "loss": 0.3959219455718994, "step": 3786 }, { "epoch": 0.8731842287295365, "grad_norm": 1.3370969443139462, "learning_rate": 1.2886424516429967e-06, "loss": 0.4237936735153198, "step": 3787 }, { "epoch": 0.8734148028591192, "grad_norm": 1.6505369649722645, "learning_rate": 1.2882773970659693e-06, "loss": 0.4604552984237671, "step": 3788 }, { "epoch": 0.8736453769887018, "grad_norm": 1.4408188813706955, "learning_rate": 1.287912300584901e-06, "loss": 0.4265769124031067, "step": 3789 }, { "epoch": 0.8738759511182845, "grad_norm": 1.185765484689313, "learning_rate": 1.2875471622528617e-06, "loss": 0.4644312262535095, "step": 3790 }, { "epoch": 0.8741065252478671, "grad_norm": 1.5605966972230738, "learning_rate": 1.2871819821229282e-06, "loss": 0.5520300269126892, "step": 3791 }, { "epoch": 0.8743370993774499, "grad_norm": 1.2172431342127952, "learning_rate": 1.2868167602481831e-06, "loss": 0.42350637912750244, "step": 3792 }, { "epoch": 0.8745676735070325, "grad_norm": 1.3605025828289865, "learning_rate": 1.2864514966817155e-06, "loss": 0.5148683786392212, "step": 3793 }, { "epoch": 0.8747982476366152, "grad_norm": 1.2825363473778824, "learning_rate": 1.2860861914766191e-06, "loss": 0.4506865441799164, "step": 3794 }, { "epoch": 0.8750288217661978, "grad_norm": 1.240014068038836, "learning_rate": 1.2857208446859957e-06, "loss": 0.4042026996612549, "step": 3795 }, { "epoch": 0.8752593958957805, "grad_norm": 1.749789157467437, "learning_rate": 1.2853554563629521e-06, "loss": 0.4601382613182068, "step": 3796 }, { "epoch": 0.8754899700253631, "grad_norm": 1.1956968937229655, "learning_rate": 1.2849900265606007e-06, "loss": 0.3387809097766876, "step": 3797 }, { "epoch": 0.8757205441549458, "grad_norm": 1.3296970918872935, "learning_rate": 1.2846245553320604e-06, "loss": 0.5295180082321167, "step": 3798 }, { "epoch": 0.8759511182845284, "grad_norm": 1.518762035085977, "learning_rate": 1.2842590427304564e-06, "loss": 0.47733891010284424, "step": 3799 }, { "epoch": 0.8761816924141111, "grad_norm": 1.3675518552119075, "learning_rate": 1.2838934888089198e-06, "loss": 0.46294957399368286, "step": 3800 }, { "epoch": 0.8764122665436938, "grad_norm": 1.3892016156570253, "learning_rate": 1.2835278936205877e-06, "loss": 0.4638972580432892, "step": 3801 }, { "epoch": 0.8766428406732765, "grad_norm": 1.2670627732920314, "learning_rate": 1.2831622572186027e-06, "loss": 0.5078087449073792, "step": 3802 }, { "epoch": 0.8768734148028591, "grad_norm": 1.2490466990727205, "learning_rate": 1.2827965796561138e-06, "loss": 0.49626827239990234, "step": 3803 }, { "epoch": 0.8771039889324418, "grad_norm": 1.3784871825818807, "learning_rate": 1.2824308609862758e-06, "loss": 0.4857192635536194, "step": 3804 }, { "epoch": 0.8773345630620244, "grad_norm": 1.5003545684747548, "learning_rate": 1.2820651012622498e-06, "loss": 0.5403131246566772, "step": 3805 }, { "epoch": 0.8775651371916071, "grad_norm": 1.532730699853752, "learning_rate": 1.2816993005372029e-06, "loss": 0.519463837146759, "step": 3806 }, { "epoch": 0.8777957113211897, "grad_norm": 1.648937105926222, "learning_rate": 1.2813334588643077e-06, "loss": 0.6038607954978943, "step": 3807 }, { "epoch": 0.8780262854507724, "grad_norm": 1.5251750284604964, "learning_rate": 1.280967576296743e-06, "loss": 0.4892663359642029, "step": 3808 }, { "epoch": 0.878256859580355, "grad_norm": 1.4437992115831912, "learning_rate": 1.2806016528876934e-06, "loss": 0.47872501611709595, "step": 3809 }, { "epoch": 0.8784874337099378, "grad_norm": 1.401497704596745, "learning_rate": 1.28023568869035e-06, "loss": 0.4863993227481842, "step": 3810 }, { "epoch": 0.8787180078395204, "grad_norm": 1.2319881889422357, "learning_rate": 1.2798696837579088e-06, "loss": 0.45241546630859375, "step": 3811 }, { "epoch": 0.8789485819691031, "grad_norm": 1.26957816055566, "learning_rate": 1.2795036381435728e-06, "loss": 0.48720863461494446, "step": 3812 }, { "epoch": 0.8791791560986857, "grad_norm": 1.4244000796725484, "learning_rate": 1.2791375519005507e-06, "loss": 0.49139827489852905, "step": 3813 }, { "epoch": 0.8794097302282684, "grad_norm": 1.1021730064681352, "learning_rate": 1.278771425082056e-06, "loss": 0.41915225982666016, "step": 3814 }, { "epoch": 0.879640304357851, "grad_norm": 1.164668093587021, "learning_rate": 1.2784052577413095e-06, "loss": 0.41831016540527344, "step": 3815 }, { "epoch": 0.8798708784874337, "grad_norm": 1.392466935090571, "learning_rate": 1.2780390499315374e-06, "loss": 0.49456197023391724, "step": 3816 }, { "epoch": 0.8801014526170163, "grad_norm": 1.4645341817096265, "learning_rate": 1.2776728017059714e-06, "loss": 0.4656866192817688, "step": 3817 }, { "epoch": 0.880332026746599, "grad_norm": 1.375452516729426, "learning_rate": 1.2773065131178494e-06, "loss": 0.449514776468277, "step": 3818 }, { "epoch": 0.8805626008761817, "grad_norm": 1.320026502962018, "learning_rate": 1.2769401842204156e-06, "loss": 0.3762073516845703, "step": 3819 }, { "epoch": 0.8807931750057644, "grad_norm": 1.6471923718834367, "learning_rate": 1.2765738150669192e-06, "loss": 0.5680521130561829, "step": 3820 }, { "epoch": 0.881023749135347, "grad_norm": 1.227867578043664, "learning_rate": 1.276207405710616e-06, "loss": 0.35371482372283936, "step": 3821 }, { "epoch": 0.8812543232649297, "grad_norm": 1.6584454245429339, "learning_rate": 1.2758409562047669e-06, "loss": 0.5145018100738525, "step": 3822 }, { "epoch": 0.8814848973945123, "grad_norm": 1.4264603788288566, "learning_rate": 1.2754744666026392e-06, "loss": 0.5425234436988831, "step": 3823 }, { "epoch": 0.881715471524095, "grad_norm": 1.605664005655016, "learning_rate": 1.275107936957506e-06, "loss": 0.48439931869506836, "step": 3824 }, { "epoch": 0.8819460456536776, "grad_norm": 1.4836193722422002, "learning_rate": 1.2747413673226462e-06, "loss": 0.5177323818206787, "step": 3825 }, { "epoch": 0.8821766197832603, "grad_norm": 1.4672524591279896, "learning_rate": 1.2743747577513437e-06, "loss": 0.4718499779701233, "step": 3826 }, { "epoch": 0.882407193912843, "grad_norm": 1.3580668132517044, "learning_rate": 1.27400810829689e-06, "loss": 0.5140804648399353, "step": 3827 }, { "epoch": 0.8826377680424257, "grad_norm": 1.2476007061260952, "learning_rate": 1.2736414190125805e-06, "loss": 0.4611731767654419, "step": 3828 }, { "epoch": 0.8828683421720083, "grad_norm": 1.3574827964922753, "learning_rate": 1.2732746899517175e-06, "loss": 0.526127815246582, "step": 3829 }, { "epoch": 0.883098916301591, "grad_norm": 1.3368001624765957, "learning_rate": 1.2729079211676085e-06, "loss": 0.4039766192436218, "step": 3830 }, { "epoch": 0.8833294904311736, "grad_norm": 1.5033466347185125, "learning_rate": 1.2725411127135676e-06, "loss": 0.4232807159423828, "step": 3831 }, { "epoch": 0.8835600645607563, "grad_norm": 1.2556638937655993, "learning_rate": 1.2721742646429142e-06, "loss": 0.48490262031555176, "step": 3832 }, { "epoch": 0.8837906386903389, "grad_norm": 1.278298782194165, "learning_rate": 1.2718073770089729e-06, "loss": 0.4664677083492279, "step": 3833 }, { "epoch": 0.8840212128199216, "grad_norm": 1.3387833207328181, "learning_rate": 1.2714404498650742e-06, "loss": 0.4402846097946167, "step": 3834 }, { "epoch": 0.8842517869495042, "grad_norm": 1.195436797590032, "learning_rate": 1.2710734832645555e-06, "loss": 0.45942988991737366, "step": 3835 }, { "epoch": 0.884482361079087, "grad_norm": 1.3235253441897963, "learning_rate": 1.2707064772607587e-06, "loss": 0.45924365520477295, "step": 3836 }, { "epoch": 0.8847129352086696, "grad_norm": 1.2350134713864223, "learning_rate": 1.270339431907032e-06, "loss": 0.3877851963043213, "step": 3837 }, { "epoch": 0.8849435093382523, "grad_norm": 1.381311043724791, "learning_rate": 1.2699723472567288e-06, "loss": 0.45364105701446533, "step": 3838 }, { "epoch": 0.8851740834678349, "grad_norm": 1.2798000201692457, "learning_rate": 1.2696052233632089e-06, "loss": 0.3527877926826477, "step": 3839 }, { "epoch": 0.8854046575974176, "grad_norm": 1.7105597319107566, "learning_rate": 1.2692380602798375e-06, "loss": 0.499268501996994, "step": 3840 }, { "epoch": 0.8856352317270002, "grad_norm": 1.2823188650483364, "learning_rate": 1.2688708580599854e-06, "loss": 0.39443689584732056, "step": 3841 }, { "epoch": 0.8858658058565829, "grad_norm": 1.442355552170661, "learning_rate": 1.268503616757029e-06, "loss": 0.5262328386306763, "step": 3842 }, { "epoch": 0.8860963799861655, "grad_norm": 1.4602798515117177, "learning_rate": 1.2681363364243509e-06, "loss": 0.4761236608028412, "step": 3843 }, { "epoch": 0.8863269541157482, "grad_norm": 1.3806283660695482, "learning_rate": 1.2677690171153391e-06, "loss": 0.5173169374465942, "step": 3844 }, { "epoch": 0.8865575282453309, "grad_norm": 1.4796905287439253, "learning_rate": 1.2674016588833866e-06, "loss": 0.5304574966430664, "step": 3845 }, { "epoch": 0.8867881023749136, "grad_norm": 1.2451043989470143, "learning_rate": 1.2670342617818925e-06, "loss": 0.44707632064819336, "step": 3846 }, { "epoch": 0.8870186765044962, "grad_norm": 1.4327430501013436, "learning_rate": 1.2666668258642628e-06, "loss": 0.44395360350608826, "step": 3847 }, { "epoch": 0.8872492506340789, "grad_norm": 1.5382701800989709, "learning_rate": 1.266299351183907e-06, "loss": 0.4993078112602234, "step": 3848 }, { "epoch": 0.8874798247636615, "grad_norm": 1.447761685140105, "learning_rate": 1.2659318377942418e-06, "loss": 0.4836229681968689, "step": 3849 }, { "epoch": 0.8877103988932442, "grad_norm": 1.1586406035440977, "learning_rate": 1.2655642857486885e-06, "loss": 0.4898098111152649, "step": 3850 }, { "epoch": 0.8879409730228268, "grad_norm": 1.4550595650341691, "learning_rate": 1.2651966951006753e-06, "loss": 0.5117218494415283, "step": 3851 }, { "epoch": 0.8881715471524095, "grad_norm": 1.1751749847019868, "learning_rate": 1.2648290659036347e-06, "loss": 0.3920857906341553, "step": 3852 }, { "epoch": 0.8884021212819921, "grad_norm": 1.2103531492140316, "learning_rate": 1.2644613982110055e-06, "loss": 0.42527467012405396, "step": 3853 }, { "epoch": 0.8886326954115749, "grad_norm": 1.4673474591941762, "learning_rate": 1.2640936920762318e-06, "loss": 0.5283650159835815, "step": 3854 }, { "epoch": 0.8888632695411575, "grad_norm": 1.1384795561192926, "learning_rate": 1.2637259475527634e-06, "loss": 0.3976718783378601, "step": 3855 }, { "epoch": 0.8890938436707402, "grad_norm": 1.3777221980377923, "learning_rate": 1.2633581646940555e-06, "loss": 0.3767106533050537, "step": 3856 }, { "epoch": 0.8893244178003228, "grad_norm": 1.2421308508382682, "learning_rate": 1.2629903435535695e-06, "loss": 0.4002486765384674, "step": 3857 }, { "epoch": 0.8895549919299055, "grad_norm": 1.7761729251417224, "learning_rate": 1.2626224841847718e-06, "loss": 0.3829443156719208, "step": 3858 }, { "epoch": 0.8897855660594881, "grad_norm": 1.6906089339859913, "learning_rate": 1.2622545866411342e-06, "loss": 0.5338312983512878, "step": 3859 }, { "epoch": 0.8900161401890708, "grad_norm": 1.3435755743208722, "learning_rate": 1.2618866509761347e-06, "loss": 0.49615299701690674, "step": 3860 }, { "epoch": 0.8902467143186534, "grad_norm": 1.3772165276715471, "learning_rate": 1.2615186772432562e-06, "loss": 0.5080281496047974, "step": 3861 }, { "epoch": 0.8904772884482361, "grad_norm": 1.3191602759544514, "learning_rate": 1.2611506654959877e-06, "loss": 0.4631335139274597, "step": 3862 }, { "epoch": 0.8907078625778188, "grad_norm": 1.6754337710064344, "learning_rate": 1.2607826157878232e-06, "loss": 0.5179207921028137, "step": 3863 }, { "epoch": 0.8909384367074015, "grad_norm": 1.8689690583071528, "learning_rate": 1.260414528172263e-06, "loss": 0.5107406973838806, "step": 3864 }, { "epoch": 0.8911690108369841, "grad_norm": 1.4263135964434357, "learning_rate": 1.2600464027028112e-06, "loss": 0.3719855844974518, "step": 3865 }, { "epoch": 0.8913995849665668, "grad_norm": 1.2717821474296322, "learning_rate": 1.2596782394329797e-06, "loss": 0.4703129231929779, "step": 3866 }, { "epoch": 0.8916301590961494, "grad_norm": 1.4971801597034615, "learning_rate": 1.2593100384162842e-06, "loss": 0.49239644408226013, "step": 3867 }, { "epoch": 0.8918607332257321, "grad_norm": 1.505796830220407, "learning_rate": 1.2589417997062468e-06, "loss": 0.5194324851036072, "step": 3868 }, { "epoch": 0.8920913073553147, "grad_norm": 1.2722329079463401, "learning_rate": 1.2585735233563943e-06, "loss": 0.4224633574485779, "step": 3869 }, { "epoch": 0.8923218814848974, "grad_norm": 1.7020995758876771, "learning_rate": 1.2582052094202594e-06, "loss": 0.4377749562263489, "step": 3870 }, { "epoch": 0.89255245561448, "grad_norm": 1.2037908365106704, "learning_rate": 1.2578368579513809e-06, "loss": 0.42847269773483276, "step": 3871 }, { "epoch": 0.8927830297440628, "grad_norm": 1.4087908465200083, "learning_rate": 1.2574684690033018e-06, "loss": 0.5194802284240723, "step": 3872 }, { "epoch": 0.8930136038736454, "grad_norm": 1.3553883811442613, "learning_rate": 1.2571000426295716e-06, "loss": 0.4401082396507263, "step": 3873 }, { "epoch": 0.8932441780032281, "grad_norm": 1.5117708123403886, "learning_rate": 1.2567315788837442e-06, "loss": 0.38890570402145386, "step": 3874 }, { "epoch": 0.8934747521328107, "grad_norm": 1.4931972330534145, "learning_rate": 1.2563630778193802e-06, "loss": 0.522612452507019, "step": 3875 }, { "epoch": 0.8937053262623934, "grad_norm": 1.757870637645656, "learning_rate": 1.2559945394900447e-06, "loss": 0.516444981098175, "step": 3876 }, { "epoch": 0.893935900391976, "grad_norm": 1.193092685346779, "learning_rate": 1.255625963949308e-06, "loss": 0.4084436297416687, "step": 3877 }, { "epoch": 0.8941664745215587, "grad_norm": 1.4364911954858623, "learning_rate": 1.2552573512507474e-06, "loss": 0.4561755657196045, "step": 3878 }, { "epoch": 0.8943970486511413, "grad_norm": 1.3498949478529019, "learning_rate": 1.2548887014479435e-06, "loss": 0.44372665882110596, "step": 3879 }, { "epoch": 0.894627622780724, "grad_norm": 1.4181034577590674, "learning_rate": 1.2545200145944837e-06, "loss": 0.4714791774749756, "step": 3880 }, { "epoch": 0.8948581969103067, "grad_norm": 1.506508633299638, "learning_rate": 1.25415129074396e-06, "loss": 0.48050814867019653, "step": 3881 }, { "epoch": 0.8950887710398894, "grad_norm": 1.7788226663138391, "learning_rate": 1.2537825299499708e-06, "loss": 0.4078127145767212, "step": 3882 }, { "epoch": 0.895319345169472, "grad_norm": 1.1273639481853348, "learning_rate": 1.2534137322661187e-06, "loss": 0.41556763648986816, "step": 3883 }, { "epoch": 0.8955499192990547, "grad_norm": 1.2916565664076916, "learning_rate": 1.2530448977460127e-06, "loss": 0.3862306475639343, "step": 3884 }, { "epoch": 0.8957804934286373, "grad_norm": 1.2417402269481763, "learning_rate": 1.2526760264432656e-06, "loss": 0.4071112871170044, "step": 3885 }, { "epoch": 0.89601106755822, "grad_norm": 1.2074121865816745, "learning_rate": 1.2523071184114978e-06, "loss": 0.36956706643104553, "step": 3886 }, { "epoch": 0.8962416416878026, "grad_norm": 1.5187969981751328, "learning_rate": 1.251938173704333e-06, "loss": 0.5087941884994507, "step": 3887 }, { "epoch": 0.8964722158173853, "grad_norm": 1.5300476571906632, "learning_rate": 1.2515691923754017e-06, "loss": 0.5636804103851318, "step": 3888 }, { "epoch": 0.896702789946968, "grad_norm": 1.2028947296679213, "learning_rate": 1.2512001744783383e-06, "loss": 0.40899237990379333, "step": 3889 }, { "epoch": 0.8969333640765507, "grad_norm": 1.2319974158201112, "learning_rate": 1.2508311200667839e-06, "loss": 0.3964187800884247, "step": 3890 }, { "epoch": 0.8971639382061333, "grad_norm": 1.1881521968898023, "learning_rate": 1.2504620291943838e-06, "loss": 0.43190568685531616, "step": 3891 }, { "epoch": 0.897394512335716, "grad_norm": 1.5323277954151004, "learning_rate": 1.25009290191479e-06, "loss": 0.5640079379081726, "step": 3892 }, { "epoch": 0.8976250864652986, "grad_norm": 1.5228387521540339, "learning_rate": 1.2497237382816577e-06, "loss": 0.4969727396965027, "step": 3893 }, { "epoch": 0.8978556605948812, "grad_norm": 1.438395912517929, "learning_rate": 1.2493545383486497e-06, "loss": 0.43710076808929443, "step": 3894 }, { "epoch": 0.8980862347244639, "grad_norm": 1.217545409086522, "learning_rate": 1.248985302169432e-06, "loss": 0.4246212840080261, "step": 3895 }, { "epoch": 0.8983168088540465, "grad_norm": 1.1837244532547113, "learning_rate": 1.2486160297976776e-06, "loss": 0.3812369108200073, "step": 3896 }, { "epoch": 0.8985473829836292, "grad_norm": 2.1554879190255685, "learning_rate": 1.248246721287063e-06, "loss": 0.6407653093338013, "step": 3897 }, { "epoch": 0.8987779571132118, "grad_norm": 1.6947319293322312, "learning_rate": 1.247877376691272e-06, "loss": 0.47748661041259766, "step": 3898 }, { "epoch": 0.8990085312427946, "grad_norm": 1.5504399903750061, "learning_rate": 1.2475079960639922e-06, "loss": 0.5047964453697205, "step": 3899 }, { "epoch": 0.8992391053723772, "grad_norm": 1.1781117181895115, "learning_rate": 1.2471385794589167e-06, "loss": 0.37989485263824463, "step": 3900 }, { "epoch": 0.8994696795019599, "grad_norm": 1.2955755733611327, "learning_rate": 1.2467691269297437e-06, "loss": 0.38857924938201904, "step": 3901 }, { "epoch": 0.8997002536315425, "grad_norm": 1.2312069291338004, "learning_rate": 1.2463996385301776e-06, "loss": 0.45452386140823364, "step": 3902 }, { "epoch": 0.8999308277611252, "grad_norm": 1.5565774035889273, "learning_rate": 1.2460301143139267e-06, "loss": 0.41920900344848633, "step": 3903 }, { "epoch": 0.9001614018907078, "grad_norm": 1.542875547138451, "learning_rate": 1.2456605543347051e-06, "loss": 0.5979125499725342, "step": 3904 }, { "epoch": 0.9003919760202905, "grad_norm": 1.5505304900467811, "learning_rate": 1.2452909586462323e-06, "loss": 0.5517082214355469, "step": 3905 }, { "epoch": 0.9006225501498731, "grad_norm": 1.2381443535248697, "learning_rate": 1.244921327302233e-06, "loss": 0.4558248519897461, "step": 3906 }, { "epoch": 0.9008531242794559, "grad_norm": 1.5503878716470787, "learning_rate": 1.2445516603564362e-06, "loss": 0.5637399554252625, "step": 3907 }, { "epoch": 0.9010836984090385, "grad_norm": 1.2396897738245216, "learning_rate": 1.2441819578625775e-06, "loss": 0.5208043456077576, "step": 3908 }, { "epoch": 0.9013142725386212, "grad_norm": 1.400218770913741, "learning_rate": 1.243812219874396e-06, "loss": 0.3901744484901428, "step": 3909 }, { "epoch": 0.9015448466682038, "grad_norm": 1.4025338042989108, "learning_rate": 1.2434424464456376e-06, "loss": 0.5770972967147827, "step": 3910 }, { "epoch": 0.9017754207977865, "grad_norm": 1.375223010916462, "learning_rate": 1.2430726376300525e-06, "loss": 0.3457295894622803, "step": 3911 }, { "epoch": 0.9020059949273691, "grad_norm": 1.3118554362154196, "learning_rate": 1.242702793481396e-06, "loss": 0.4487595558166504, "step": 3912 }, { "epoch": 0.9022365690569518, "grad_norm": 1.2548104794507453, "learning_rate": 1.2423329140534286e-06, "loss": 0.4369876980781555, "step": 3913 }, { "epoch": 0.9024671431865344, "grad_norm": 1.5693012853497335, "learning_rate": 1.2419629993999165e-06, "loss": 0.43154388666152954, "step": 3914 }, { "epoch": 0.9026977173161171, "grad_norm": 1.313977531855456, "learning_rate": 1.24159304957463e-06, "loss": 0.4528294801712036, "step": 3915 }, { "epoch": 0.9029282914456997, "grad_norm": 1.4152554930408472, "learning_rate": 1.2412230646313452e-06, "loss": 0.4204830527305603, "step": 3916 }, { "epoch": 0.9031588655752825, "grad_norm": 1.3117655747531898, "learning_rate": 1.2408530446238433e-06, "loss": 0.46544623374938965, "step": 3917 }, { "epoch": 0.9033894397048651, "grad_norm": 1.19103055945586, "learning_rate": 1.2404829896059107e-06, "loss": 0.39419203996658325, "step": 3918 }, { "epoch": 0.9036200138344478, "grad_norm": 1.3085505059347724, "learning_rate": 1.240112899631338e-06, "loss": 0.4214451014995575, "step": 3919 }, { "epoch": 0.9038505879640304, "grad_norm": 1.310156094815825, "learning_rate": 1.239742774753922e-06, "loss": 0.42385220527648926, "step": 3920 }, { "epoch": 0.9040811620936131, "grad_norm": 1.4457769612459037, "learning_rate": 1.2393726150274636e-06, "loss": 0.5206592082977295, "step": 3921 }, { "epoch": 0.9043117362231957, "grad_norm": 1.4602545667694231, "learning_rate": 1.23900242050577e-06, "loss": 0.4358803629875183, "step": 3922 }, { "epoch": 0.9045423103527784, "grad_norm": 1.3596132034754325, "learning_rate": 1.2386321912426524e-06, "loss": 0.4525173306465149, "step": 3923 }, { "epoch": 0.904772884482361, "grad_norm": 1.4736466426478543, "learning_rate": 1.2382619272919273e-06, "loss": 0.48877185583114624, "step": 3924 }, { "epoch": 0.9050034586119438, "grad_norm": 1.152358955118646, "learning_rate": 1.2378916287074162e-06, "loss": 0.4401814341545105, "step": 3925 }, { "epoch": 0.9052340327415264, "grad_norm": 1.337265572878916, "learning_rate": 1.2375212955429459e-06, "loss": 0.37818846106529236, "step": 3926 }, { "epoch": 0.9054646068711091, "grad_norm": 1.285760527835995, "learning_rate": 1.2371509278523482e-06, "loss": 0.36472904682159424, "step": 3927 }, { "epoch": 0.9056951810006917, "grad_norm": 1.2999097028645303, "learning_rate": 1.2367805256894596e-06, "loss": 0.5113309025764465, "step": 3928 }, { "epoch": 0.9059257551302744, "grad_norm": 1.2052405163032573, "learning_rate": 1.2364100891081218e-06, "loss": 0.36074432730674744, "step": 3929 }, { "epoch": 0.906156329259857, "grad_norm": 1.3493065976556424, "learning_rate": 1.2360396181621819e-06, "loss": 0.39177048206329346, "step": 3930 }, { "epoch": 0.9063869033894397, "grad_norm": 1.3736058093352046, "learning_rate": 1.2356691129054912e-06, "loss": 0.4758113622665405, "step": 3931 }, { "epoch": 0.9066174775190223, "grad_norm": 1.3614234520329223, "learning_rate": 1.2352985733919065e-06, "loss": 0.3840598464012146, "step": 3932 }, { "epoch": 0.906848051648605, "grad_norm": 1.510763334369694, "learning_rate": 1.2349279996752892e-06, "loss": 0.5103816986083984, "step": 3933 }, { "epoch": 0.9070786257781877, "grad_norm": 1.466046011323441, "learning_rate": 1.234557391809507e-06, "loss": 0.4175255298614502, "step": 3934 }, { "epoch": 0.9073091999077704, "grad_norm": 2.627411026682294, "learning_rate": 1.2341867498484302e-06, "loss": 0.4504377245903015, "step": 3935 }, { "epoch": 0.907539774037353, "grad_norm": 1.2868923632717955, "learning_rate": 1.2338160738459355e-06, "loss": 0.45868122577667236, "step": 3936 }, { "epoch": 0.9077703481669357, "grad_norm": 1.3231771761325972, "learning_rate": 1.2334453638559054e-06, "loss": 0.5161639451980591, "step": 3937 }, { "epoch": 0.9080009222965183, "grad_norm": 1.5486748129834036, "learning_rate": 1.2330746199322257e-06, "loss": 0.44561630487442017, "step": 3938 }, { "epoch": 0.908231496426101, "grad_norm": 1.595486700598371, "learning_rate": 1.2327038421287876e-06, "loss": 0.4780126214027405, "step": 3939 }, { "epoch": 0.9084620705556836, "grad_norm": 1.2226582649026916, "learning_rate": 1.2323330304994877e-06, "loss": 0.505066990852356, "step": 3940 }, { "epoch": 0.9086926446852663, "grad_norm": 1.3041405659013958, "learning_rate": 1.2319621850982274e-06, "loss": 0.5053813457489014, "step": 3941 }, { "epoch": 0.9089232188148489, "grad_norm": 1.178162092657054, "learning_rate": 1.2315913059789125e-06, "loss": 0.3579134941101074, "step": 3942 }, { "epoch": 0.9091537929444317, "grad_norm": 1.4949007072050957, "learning_rate": 1.2312203931954543e-06, "loss": 0.5703507661819458, "step": 3943 }, { "epoch": 0.9093843670740143, "grad_norm": 1.4141867956521472, "learning_rate": 1.2308494468017685e-06, "loss": 0.4972035884857178, "step": 3944 }, { "epoch": 0.909614941203597, "grad_norm": 1.8338477540837272, "learning_rate": 1.230478466851776e-06, "loss": 0.5528955459594727, "step": 3945 }, { "epoch": 0.9098455153331796, "grad_norm": 1.4009292239467905, "learning_rate": 1.2301074533994024e-06, "loss": 0.4099786877632141, "step": 3946 }, { "epoch": 0.9100760894627623, "grad_norm": 1.3414325662099453, "learning_rate": 1.2297364064985786e-06, "loss": 0.41020166873931885, "step": 3947 }, { "epoch": 0.9103066635923449, "grad_norm": 1.4112377219226224, "learning_rate": 1.2293653262032395e-06, "loss": 0.4340355694293976, "step": 3948 }, { "epoch": 0.9105372377219276, "grad_norm": 1.376446280407005, "learning_rate": 1.2289942125673261e-06, "loss": 0.4369847774505615, "step": 3949 }, { "epoch": 0.9107678118515102, "grad_norm": 1.4688076477466663, "learning_rate": 1.228623065644783e-06, "loss": 0.406423956155777, "step": 3950 }, { "epoch": 0.910998385981093, "grad_norm": 1.4230223897567287, "learning_rate": 1.22825188548956e-06, "loss": 0.5081946849822998, "step": 3951 }, { "epoch": 0.9112289601106756, "grad_norm": 1.7017899930713631, "learning_rate": 1.2278806721556124e-06, "loss": 0.43494492769241333, "step": 3952 }, { "epoch": 0.9114595342402583, "grad_norm": 1.348884752431283, "learning_rate": 1.2275094256968996e-06, "loss": 0.35356831550598145, "step": 3953 }, { "epoch": 0.9116901083698409, "grad_norm": 1.2260567341450548, "learning_rate": 1.227138146167386e-06, "loss": 0.36741551756858826, "step": 3954 }, { "epoch": 0.9119206824994236, "grad_norm": 1.4686302016765889, "learning_rate": 1.226766833621041e-06, "loss": 0.491504430770874, "step": 3955 }, { "epoch": 0.9121512566290062, "grad_norm": 1.266294151631501, "learning_rate": 1.2263954881118384e-06, "loss": 0.4558037519454956, "step": 3956 }, { "epoch": 0.9123818307585889, "grad_norm": 1.398276341256052, "learning_rate": 1.2260241096937571e-06, "loss": 0.3941671848297119, "step": 3957 }, { "epoch": 0.9126124048881715, "grad_norm": 1.7133993603535684, "learning_rate": 1.2256526984207809e-06, "loss": 0.40505191683769226, "step": 3958 }, { "epoch": 0.9128429790177542, "grad_norm": 1.3369540241008888, "learning_rate": 1.2252812543468982e-06, "loss": 0.4669588804244995, "step": 3959 }, { "epoch": 0.9130735531473368, "grad_norm": 1.6346862522902008, "learning_rate": 1.2249097775261014e-06, "loss": 0.535057544708252, "step": 3960 }, { "epoch": 0.9133041272769196, "grad_norm": 1.465530924269544, "learning_rate": 1.2245382680123898e-06, "loss": 0.5127478837966919, "step": 3961 }, { "epoch": 0.9135347014065022, "grad_norm": 1.239878706419753, "learning_rate": 1.224166725859765e-06, "loss": 0.5004767179489136, "step": 3962 }, { "epoch": 0.9137652755360849, "grad_norm": 1.3382850542269662, "learning_rate": 1.2237951511222346e-06, "loss": 0.47929924726486206, "step": 3963 }, { "epoch": 0.9139958496656675, "grad_norm": 1.3650943807220162, "learning_rate": 1.2234235438538109e-06, "loss": 0.5619359016418457, "step": 3964 }, { "epoch": 0.9142264237952502, "grad_norm": 2.173999313160228, "learning_rate": 1.223051904108511e-06, "loss": 0.44648507237434387, "step": 3965 }, { "epoch": 0.9144569979248328, "grad_norm": 1.5081082363333118, "learning_rate": 1.2226802319403562e-06, "loss": 0.4451872706413269, "step": 3966 }, { "epoch": 0.9146875720544155, "grad_norm": 1.1999813764066747, "learning_rate": 1.222308527403373e-06, "loss": 0.44295474886894226, "step": 3967 }, { "epoch": 0.9149181461839981, "grad_norm": 1.4510785821223537, "learning_rate": 1.221936790551592e-06, "loss": 0.517430305480957, "step": 3968 }, { "epoch": 0.9151487203135809, "grad_norm": 1.2648448897941866, "learning_rate": 1.2215650214390493e-06, "loss": 0.4819454252719879, "step": 3969 }, { "epoch": 0.9153792944431635, "grad_norm": 1.40726836834287, "learning_rate": 1.2211932201197855e-06, "loss": 0.41739264130592346, "step": 3970 }, { "epoch": 0.9156098685727462, "grad_norm": 1.214750449543567, "learning_rate": 1.2208213866478452e-06, "loss": 0.38833269476890564, "step": 3971 }, { "epoch": 0.9158404427023288, "grad_norm": 1.4780394203565799, "learning_rate": 1.2204495210772784e-06, "loss": 0.48899054527282715, "step": 3972 }, { "epoch": 0.9160710168319115, "grad_norm": 1.4236888721907983, "learning_rate": 1.2200776234621395e-06, "loss": 0.5201622247695923, "step": 3973 }, { "epoch": 0.9163015909614941, "grad_norm": 1.4696703280770271, "learning_rate": 1.219705693856488e-06, "loss": 0.4105098843574524, "step": 3974 }, { "epoch": 0.9165321650910768, "grad_norm": 1.2658629585457457, "learning_rate": 1.2193337323143865e-06, "loss": 0.45458245277404785, "step": 3975 }, { "epoch": 0.9167627392206594, "grad_norm": 1.4906657502786624, "learning_rate": 1.2189617388899049e-06, "loss": 0.5013390779495239, "step": 3976 }, { "epoch": 0.9169933133502421, "grad_norm": 1.3837275498584536, "learning_rate": 1.218589713637115e-06, "loss": 0.37065303325653076, "step": 3977 }, { "epoch": 0.9172238874798248, "grad_norm": 1.4237915808433583, "learning_rate": 1.218217656610095e-06, "loss": 0.45158177614212036, "step": 3978 }, { "epoch": 0.9174544616094075, "grad_norm": 1.3261399530988285, "learning_rate": 1.2178455678629271e-06, "loss": 0.4439426064491272, "step": 3979 }, { "epoch": 0.9176850357389901, "grad_norm": 1.4056969202356144, "learning_rate": 1.217473447449698e-06, "loss": 0.42215704917907715, "step": 3980 }, { "epoch": 0.9179156098685728, "grad_norm": 1.6572776500354818, "learning_rate": 1.2171012954244991e-06, "loss": 0.42273545265197754, "step": 3981 }, { "epoch": 0.9181461839981554, "grad_norm": 1.5659197643503024, "learning_rate": 1.216729111841427e-06, "loss": 0.6045219898223877, "step": 3982 }, { "epoch": 0.9183767581277381, "grad_norm": 1.318642532575583, "learning_rate": 1.216356896754582e-06, "loss": 0.49316874146461487, "step": 3983 }, { "epoch": 0.9186073322573207, "grad_norm": 1.2984174252340932, "learning_rate": 1.2159846502180692e-06, "loss": 0.5222599506378174, "step": 3984 }, { "epoch": 0.9188379063869034, "grad_norm": 1.21924477747188, "learning_rate": 1.2156123722859988e-06, "loss": 0.4513903856277466, "step": 3985 }, { "epoch": 0.919068480516486, "grad_norm": 1.5286242494549134, "learning_rate": 1.2152400630124846e-06, "loss": 0.4946150779724121, "step": 3986 }, { "epoch": 0.9192990546460688, "grad_norm": 1.6287340554518628, "learning_rate": 1.2148677224516458e-06, "loss": 0.5482569336891174, "step": 3987 }, { "epoch": 0.9195296287756514, "grad_norm": 1.4490082622042646, "learning_rate": 1.2144953506576061e-06, "loss": 0.457091361284256, "step": 3988 }, { "epoch": 0.9197602029052341, "grad_norm": 1.378032718586854, "learning_rate": 1.2141229476844933e-06, "loss": 0.4262084364891052, "step": 3989 }, { "epoch": 0.9199907770348167, "grad_norm": 1.2394422456854066, "learning_rate": 1.2137505135864402e-06, "loss": 0.4905529022216797, "step": 3990 }, { "epoch": 0.9202213511643994, "grad_norm": 1.3246738813802295, "learning_rate": 1.2133780484175833e-06, "loss": 0.5001873970031738, "step": 3991 }, { "epoch": 0.920451925293982, "grad_norm": 1.4663495799657225, "learning_rate": 1.2130055522320647e-06, "loss": 0.396418035030365, "step": 3992 }, { "epoch": 0.9206824994235647, "grad_norm": 1.5742445852004807, "learning_rate": 1.2126330250840302e-06, "loss": 0.5743722915649414, "step": 3993 }, { "epoch": 0.9209130735531473, "grad_norm": 1.720134285882963, "learning_rate": 1.212260467027631e-06, "loss": 0.5134707689285278, "step": 3994 }, { "epoch": 0.92114364768273, "grad_norm": 1.2913764867867046, "learning_rate": 1.2118878781170213e-06, "loss": 0.4191853404045105, "step": 3995 }, { "epoch": 0.9213742218123127, "grad_norm": 1.8061166260156263, "learning_rate": 1.2115152584063613e-06, "loss": 0.3430103063583374, "step": 3996 }, { "epoch": 0.9216047959418954, "grad_norm": 1.491788048135039, "learning_rate": 1.2111426079498147e-06, "loss": 0.5229896903038025, "step": 3997 }, { "epoch": 0.921835370071478, "grad_norm": 1.9288487767080142, "learning_rate": 1.2107699268015501e-06, "loss": 0.5028181076049805, "step": 3998 }, { "epoch": 0.9220659442010607, "grad_norm": 1.8323250729268132, "learning_rate": 1.2103972150157407e-06, "loss": 0.4662501811981201, "step": 3999 }, { "epoch": 0.9222965183306433, "grad_norm": 1.7877363086665337, "learning_rate": 1.2100244726465636e-06, "loss": 0.5581385493278503, "step": 4000 }, { "epoch": 0.922527092460226, "grad_norm": 1.5059656153682595, "learning_rate": 1.2096516997482012e-06, "loss": 0.3925841450691223, "step": 4001 }, { "epoch": 0.9227576665898086, "grad_norm": 1.4478402824011334, "learning_rate": 1.2092788963748393e-06, "loss": 0.4021197557449341, "step": 4002 }, { "epoch": 0.9229882407193913, "grad_norm": 1.5875480480080288, "learning_rate": 1.2089060625806683e-06, "loss": 0.5519800186157227, "step": 4003 }, { "epoch": 0.923218814848974, "grad_norm": 1.4740215502095901, "learning_rate": 1.2085331984198847e-06, "loss": 0.4426038861274719, "step": 4004 }, { "epoch": 0.9234493889785566, "grad_norm": 1.3127950735735558, "learning_rate": 1.2081603039466872e-06, "loss": 0.4370608925819397, "step": 4005 }, { "epoch": 0.9236799631081393, "grad_norm": 1.6270244555647773, "learning_rate": 1.2077873792152797e-06, "loss": 0.5535042881965637, "step": 4006 }, { "epoch": 0.9239105372377219, "grad_norm": 1.4254025319676356, "learning_rate": 1.2074144242798708e-06, "loss": 0.45786774158477783, "step": 4007 }, { "epoch": 0.9241411113673046, "grad_norm": 1.305332226115227, "learning_rate": 1.207041439194673e-06, "loss": 0.38189244270324707, "step": 4008 }, { "epoch": 0.9243716854968872, "grad_norm": 1.4825176983109143, "learning_rate": 1.206668424013904e-06, "loss": 0.48782190680503845, "step": 4009 }, { "epoch": 0.9246022596264699, "grad_norm": 1.4182276344304934, "learning_rate": 1.2062953787917852e-06, "loss": 0.46295344829559326, "step": 4010 }, { "epoch": 0.9248328337560525, "grad_norm": 1.370453601452758, "learning_rate": 1.205922303582542e-06, "loss": 0.5205795764923096, "step": 4011 }, { "epoch": 0.9250634078856352, "grad_norm": 1.431830816120071, "learning_rate": 1.205549198440405e-06, "loss": 0.47622987627983093, "step": 4012 }, { "epoch": 0.9252939820152178, "grad_norm": 1.3190370245605134, "learning_rate": 1.2051760634196091e-06, "loss": 0.4826146960258484, "step": 4013 }, { "epoch": 0.9255245561448006, "grad_norm": 1.608771307027525, "learning_rate": 1.2048028985743928e-06, "loss": 0.46193474531173706, "step": 4014 }, { "epoch": 0.9257551302743832, "grad_norm": 1.4926107871852312, "learning_rate": 1.2044297039589996e-06, "loss": 0.523394763469696, "step": 4015 }, { "epoch": 0.9259857044039659, "grad_norm": 1.3096026982819484, "learning_rate": 1.2040564796276773e-06, "loss": 0.3963446617126465, "step": 4016 }, { "epoch": 0.9262162785335485, "grad_norm": 1.3803899653039033, "learning_rate": 1.2036832256346774e-06, "loss": 0.5016456842422485, "step": 4017 }, { "epoch": 0.9264468526631312, "grad_norm": 1.2198633348825472, "learning_rate": 1.2033099420342566e-06, "loss": 0.47298160195350647, "step": 4018 }, { "epoch": 0.9266774267927138, "grad_norm": 1.5448162104307424, "learning_rate": 1.2029366288806748e-06, "loss": 0.387129545211792, "step": 4019 }, { "epoch": 0.9269080009222965, "grad_norm": 1.4210281769521962, "learning_rate": 1.2025632862281976e-06, "loss": 0.46101367473602295, "step": 4020 }, { "epoch": 0.9271385750518791, "grad_norm": 1.364554371793265, "learning_rate": 1.2021899141310938e-06, "loss": 0.4242950677871704, "step": 4021 }, { "epoch": 0.9273691491814618, "grad_norm": 1.5524341283687932, "learning_rate": 1.201816512643637e-06, "loss": 0.45983830094337463, "step": 4022 }, { "epoch": 0.9275997233110445, "grad_norm": 1.3760025635830133, "learning_rate": 1.2014430818201044e-06, "loss": 0.39785802364349365, "step": 4023 }, { "epoch": 0.9278302974406272, "grad_norm": 1.254017871701417, "learning_rate": 1.2010696217147783e-06, "loss": 0.39265739917755127, "step": 4024 }, { "epoch": 0.9280608715702098, "grad_norm": 1.4761130221315304, "learning_rate": 1.2006961323819455e-06, "loss": 0.49783703684806824, "step": 4025 }, { "epoch": 0.9282914456997925, "grad_norm": 1.3764899481486361, "learning_rate": 1.2003226138758953e-06, "loss": 0.4479181170463562, "step": 4026 }, { "epoch": 0.9285220198293751, "grad_norm": 1.4404345233811269, "learning_rate": 1.199949066250923e-06, "loss": 0.5205901265144348, "step": 4027 }, { "epoch": 0.9287525939589578, "grad_norm": 1.3718010528366764, "learning_rate": 1.1995754895613277e-06, "loss": 0.5163009762763977, "step": 4028 }, { "epoch": 0.9289831680885404, "grad_norm": 1.6219891318512447, "learning_rate": 1.1992018838614124e-06, "loss": 0.5746268033981323, "step": 4029 }, { "epoch": 0.9292137422181231, "grad_norm": 1.2896226756922917, "learning_rate": 1.1988282492054844e-06, "loss": 0.5306442975997925, "step": 4030 }, { "epoch": 0.9294443163477057, "grad_norm": 1.1978686339854372, "learning_rate": 1.198454585647855e-06, "loss": 0.4219534993171692, "step": 4031 }, { "epoch": 0.9296748904772885, "grad_norm": 1.3997557750947305, "learning_rate": 1.1980808932428406e-06, "loss": 0.4167936444282532, "step": 4032 }, { "epoch": 0.9299054646068711, "grad_norm": 1.2271684703243566, "learning_rate": 1.197707172044761e-06, "loss": 0.42376089096069336, "step": 4033 }, { "epoch": 0.9301360387364538, "grad_norm": 1.5370602561856461, "learning_rate": 1.1973334221079398e-06, "loss": 0.48729848861694336, "step": 4034 }, { "epoch": 0.9303666128660364, "grad_norm": 1.2353226603771892, "learning_rate": 1.1969596434867062e-06, "loss": 0.45877987146377563, "step": 4035 }, { "epoch": 0.9305971869956191, "grad_norm": 1.2531522631367908, "learning_rate": 1.196585836235392e-06, "loss": 0.504621684551239, "step": 4036 }, { "epoch": 0.9308277611252017, "grad_norm": 1.202880043912139, "learning_rate": 1.1962120004083342e-06, "loss": 0.45170748233795166, "step": 4037 }, { "epoch": 0.9310583352547844, "grad_norm": 1.3604906368473153, "learning_rate": 1.1958381360598737e-06, "loss": 0.3969152569770813, "step": 4038 }, { "epoch": 0.931288909384367, "grad_norm": 1.2718279913855612, "learning_rate": 1.1954642432443553e-06, "loss": 0.4286048412322998, "step": 4039 }, { "epoch": 0.9315194835139498, "grad_norm": 1.4261317138789782, "learning_rate": 1.1950903220161284e-06, "loss": 0.3755400776863098, "step": 4040 }, { "epoch": 0.9317500576435324, "grad_norm": 1.7559058405972485, "learning_rate": 1.1947163724295457e-06, "loss": 0.553135871887207, "step": 4041 }, { "epoch": 0.9319806317731151, "grad_norm": 1.3529681190465184, "learning_rate": 1.194342394538965e-06, "loss": 0.53995281457901, "step": 4042 }, { "epoch": 0.9322112059026977, "grad_norm": 1.3239114086556873, "learning_rate": 1.1939683883987476e-06, "loss": 0.4405739903450012, "step": 4043 }, { "epoch": 0.9324417800322804, "grad_norm": 1.4320084668753248, "learning_rate": 1.1935943540632591e-06, "loss": 0.5046489238739014, "step": 4044 }, { "epoch": 0.932672354161863, "grad_norm": 1.63220562819442, "learning_rate": 1.1932202915868694e-06, "loss": 0.4699453115463257, "step": 4045 }, { "epoch": 0.9329029282914457, "grad_norm": 1.791152379500816, "learning_rate": 1.192846201023952e-06, "loss": 0.5643539428710938, "step": 4046 }, { "epoch": 0.9331335024210283, "grad_norm": 1.3213038373558907, "learning_rate": 1.192472082428885e-06, "loss": 0.4423527121543884, "step": 4047 }, { "epoch": 0.933364076550611, "grad_norm": 1.488626793530787, "learning_rate": 1.1920979358560498e-06, "loss": 0.4446362257003784, "step": 4048 }, { "epoch": 0.9335946506801936, "grad_norm": 1.6284188135746005, "learning_rate": 1.1917237613598332e-06, "loss": 0.48347601294517517, "step": 4049 }, { "epoch": 0.9338252248097764, "grad_norm": 1.339621886087554, "learning_rate": 1.1913495589946243e-06, "loss": 0.4736206531524658, "step": 4050 }, { "epoch": 0.934055798939359, "grad_norm": 1.5821523477294297, "learning_rate": 1.1909753288148181e-06, "loss": 0.4896177053451538, "step": 4051 }, { "epoch": 0.9342863730689417, "grad_norm": 1.3503870180183308, "learning_rate": 1.1906010708748124e-06, "loss": 0.3953405022621155, "step": 4052 }, { "epoch": 0.9345169471985243, "grad_norm": 1.75805064255455, "learning_rate": 1.1902267852290092e-06, "loss": 0.30871689319610596, "step": 4053 }, { "epoch": 0.934747521328107, "grad_norm": 1.4966149449301516, "learning_rate": 1.1898524719318151e-06, "loss": 0.44187474250793457, "step": 4054 }, { "epoch": 0.9349780954576896, "grad_norm": 1.3440011557143472, "learning_rate": 1.1894781310376396e-06, "loss": 0.4069768488407135, "step": 4055 }, { "epoch": 0.9352086695872723, "grad_norm": 1.2938244564986259, "learning_rate": 1.1891037626008982e-06, "loss": 0.36307692527770996, "step": 4056 }, { "epoch": 0.9354392437168549, "grad_norm": 1.2107088826138788, "learning_rate": 1.188729366676008e-06, "loss": 0.38535594940185547, "step": 4057 }, { "epoch": 0.9356698178464377, "grad_norm": 1.416105966319888, "learning_rate": 1.1883549433173916e-06, "loss": 0.46454256772994995, "step": 4058 }, { "epoch": 0.9359003919760203, "grad_norm": 1.5618282514551205, "learning_rate": 1.1879804925794752e-06, "loss": 0.48537465929985046, "step": 4059 }, { "epoch": 0.936130966105603, "grad_norm": 1.4027831120439134, "learning_rate": 1.1876060145166893e-06, "loss": 0.4355062246322632, "step": 4060 }, { "epoch": 0.9363615402351856, "grad_norm": 1.4619447190479122, "learning_rate": 1.1872315091834676e-06, "loss": 0.47248804569244385, "step": 4061 }, { "epoch": 0.9365921143647683, "grad_norm": 1.4336627602293526, "learning_rate": 1.1868569766342488e-06, "loss": 0.4896939992904663, "step": 4062 }, { "epoch": 0.9368226884943509, "grad_norm": 1.7008224797561309, "learning_rate": 1.1864824169234744e-06, "loss": 0.4259600043296814, "step": 4063 }, { "epoch": 0.9370532626239336, "grad_norm": 1.4119659383453314, "learning_rate": 1.186107830105591e-06, "loss": 0.4228817820549011, "step": 4064 }, { "epoch": 0.9372838367535162, "grad_norm": 1.4911543620584802, "learning_rate": 1.1857332162350484e-06, "loss": 0.44750750064849854, "step": 4065 }, { "epoch": 0.937514410883099, "grad_norm": 1.4424129451647476, "learning_rate": 1.1853585753663003e-06, "loss": 0.49125558137893677, "step": 4066 }, { "epoch": 0.9377449850126816, "grad_norm": 1.2540485430842725, "learning_rate": 1.1849839075538048e-06, "loss": 0.446805477142334, "step": 4067 }, { "epoch": 0.9379755591422643, "grad_norm": 1.6527694351266196, "learning_rate": 1.1846092128520235e-06, "loss": 0.4516616463661194, "step": 4068 }, { "epoch": 0.9382061332718469, "grad_norm": 1.2461495462560317, "learning_rate": 1.1842344913154223e-06, "loss": 0.5271207690238953, "step": 4069 }, { "epoch": 0.9384367074014296, "grad_norm": 1.3340471888093621, "learning_rate": 1.1838597429984702e-06, "loss": 0.46718811988830566, "step": 4070 }, { "epoch": 0.9386672815310122, "grad_norm": 1.6970586095771742, "learning_rate": 1.1834849679556416e-06, "loss": 0.4948880672454834, "step": 4071 }, { "epoch": 0.9388978556605949, "grad_norm": 1.570925891079885, "learning_rate": 1.183110166241413e-06, "loss": 0.5141744613647461, "step": 4072 }, { "epoch": 0.9391284297901775, "grad_norm": 1.683475962747206, "learning_rate": 1.1827353379102662e-06, "loss": 0.43921130895614624, "step": 4073 }, { "epoch": 0.9393590039197602, "grad_norm": 1.458461387708897, "learning_rate": 1.182360483016686e-06, "loss": 0.35931193828582764, "step": 4074 }, { "epoch": 0.9395895780493428, "grad_norm": 1.4562814179425503, "learning_rate": 1.1819856016151615e-06, "loss": 0.4376310408115387, "step": 4075 }, { "epoch": 0.9398201521789256, "grad_norm": 1.1615675527476144, "learning_rate": 1.1816106937601856e-06, "loss": 0.45419907569885254, "step": 4076 }, { "epoch": 0.9400507263085082, "grad_norm": 1.447994335613413, "learning_rate": 1.1812357595062545e-06, "loss": 0.4077754616737366, "step": 4077 }, { "epoch": 0.9402813004380909, "grad_norm": 1.4463033622550583, "learning_rate": 1.1808607989078686e-06, "loss": 0.5555585622787476, "step": 4078 }, { "epoch": 0.9405118745676735, "grad_norm": 1.4616481074430372, "learning_rate": 1.1804858120195334e-06, "loss": 0.4566183090209961, "step": 4079 }, { "epoch": 0.9407424486972562, "grad_norm": 1.3314435652232666, "learning_rate": 1.180110798895756e-06, "loss": 0.39149847626686096, "step": 4080 }, { "epoch": 0.9409730228268388, "grad_norm": 1.3122400287018474, "learning_rate": 1.1797357595910485e-06, "loss": 0.42695966362953186, "step": 4081 }, { "epoch": 0.9412035969564215, "grad_norm": 1.4264504061469645, "learning_rate": 1.1793606941599266e-06, "loss": 0.49673956632614136, "step": 4082 }, { "epoch": 0.9414341710860041, "grad_norm": 1.3703442162376693, "learning_rate": 1.17898560265691e-06, "loss": 0.44765836000442505, "step": 4083 }, { "epoch": 0.9416647452155869, "grad_norm": 1.2694691955405566, "learning_rate": 1.1786104851365227e-06, "loss": 0.40580642223358154, "step": 4084 }, { "epoch": 0.9418953193451695, "grad_norm": 1.6554640938571203, "learning_rate": 1.1782353416532907e-06, "loss": 0.5389235019683838, "step": 4085 }, { "epoch": 0.9421258934747522, "grad_norm": 1.4858385739097846, "learning_rate": 1.1778601722617456e-06, "loss": 0.5130764245986938, "step": 4086 }, { "epoch": 0.9423564676043348, "grad_norm": 1.4406092108567712, "learning_rate": 1.1774849770164218e-06, "loss": 0.5031291842460632, "step": 4087 }, { "epoch": 0.9425870417339175, "grad_norm": 1.474863885181778, "learning_rate": 1.1771097559718581e-06, "loss": 0.463434636592865, "step": 4088 }, { "epoch": 0.9428176158635001, "grad_norm": 1.3059771334220434, "learning_rate": 1.1767345091825962e-06, "loss": 0.4249681234359741, "step": 4089 }, { "epoch": 0.9430481899930828, "grad_norm": 1.322875104249168, "learning_rate": 1.176359236703182e-06, "loss": 0.39353805780410767, "step": 4090 }, { "epoch": 0.9432787641226654, "grad_norm": 1.1645299347166784, "learning_rate": 1.1759839385881657e-06, "loss": 0.4554273188114166, "step": 4091 }, { "epoch": 0.9435093382522481, "grad_norm": 1.5935626726835685, "learning_rate": 1.1756086148921005e-06, "loss": 0.6275606155395508, "step": 4092 }, { "epoch": 0.9437399123818307, "grad_norm": 1.40548177481024, "learning_rate": 1.1752332656695432e-06, "loss": 0.5058892965316772, "step": 4093 }, { "epoch": 0.9439704865114135, "grad_norm": 1.4618963991295721, "learning_rate": 1.1748578909750547e-06, "loss": 0.4318118095397949, "step": 4094 }, { "epoch": 0.9442010606409961, "grad_norm": 1.5133013388223657, "learning_rate": 1.1744824908631996e-06, "loss": 0.4873964190483093, "step": 4095 }, { "epoch": 0.9444316347705788, "grad_norm": 1.7199346017960337, "learning_rate": 1.1741070653885467e-06, "loss": 0.5026696920394897, "step": 4096 }, { "epoch": 0.9446622089001614, "grad_norm": 1.1838920009196625, "learning_rate": 1.1737316146056667e-06, "loss": 0.4337490200996399, "step": 4097 }, { "epoch": 0.9448927830297441, "grad_norm": 1.4841621540296046, "learning_rate": 1.173356138569136e-06, "loss": 0.4552634358406067, "step": 4098 }, { "epoch": 0.9451233571593267, "grad_norm": 1.50340660176824, "learning_rate": 1.1729806373335336e-06, "loss": 0.4631303548812866, "step": 4099 }, { "epoch": 0.9453539312889094, "grad_norm": 1.2840677998534646, "learning_rate": 1.1726051109534424e-06, "loss": 0.5004513263702393, "step": 4100 }, { "epoch": 0.945584505418492, "grad_norm": 1.4218926297879624, "learning_rate": 1.172229559483449e-06, "loss": 0.4634668827056885, "step": 4101 }, { "epoch": 0.9458150795480748, "grad_norm": 1.3580815662313042, "learning_rate": 1.171853982978144e-06, "loss": 0.4034295678138733, "step": 4102 }, { "epoch": 0.9460456536776574, "grad_norm": 1.4066326558267837, "learning_rate": 1.1714783814921206e-06, "loss": 0.4981224536895752, "step": 4103 }, { "epoch": 0.9462762278072401, "grad_norm": 1.637441573047362, "learning_rate": 1.1711027550799767e-06, "loss": 0.460249125957489, "step": 4104 }, { "epoch": 0.9465068019368227, "grad_norm": 1.7282687422797383, "learning_rate": 1.170727103796313e-06, "loss": 0.4794936180114746, "step": 4105 }, { "epoch": 0.9467373760664054, "grad_norm": 1.679442128589896, "learning_rate": 1.170351427695735e-06, "loss": 0.42724454402923584, "step": 4106 }, { "epoch": 0.946967950195988, "grad_norm": 1.5092304593591768, "learning_rate": 1.16997572683285e-06, "loss": 0.4612593948841095, "step": 4107 }, { "epoch": 0.9471985243255707, "grad_norm": 1.4462371891962704, "learning_rate": 1.169600001262271e-06, "loss": 0.49512046575546265, "step": 4108 }, { "epoch": 0.9474290984551533, "grad_norm": 1.382963972341291, "learning_rate": 1.1692242510386124e-06, "loss": 0.49438196420669556, "step": 4109 }, { "epoch": 0.947659672584736, "grad_norm": 1.246967438511099, "learning_rate": 1.1688484762164938e-06, "loss": 0.4833865165710449, "step": 4110 }, { "epoch": 0.9478902467143187, "grad_norm": 1.6394354229670154, "learning_rate": 1.1684726768505385e-06, "loss": 0.49647942185401917, "step": 4111 }, { "epoch": 0.9481208208439014, "grad_norm": 1.3141370309593936, "learning_rate": 1.1680968529953718e-06, "loss": 0.4299147129058838, "step": 4112 }, { "epoch": 0.948351394973484, "grad_norm": 1.2751791494481195, "learning_rate": 1.167721004705624e-06, "loss": 0.42613041400909424, "step": 4113 }, { "epoch": 0.9485819691030667, "grad_norm": 1.5850112492057793, "learning_rate": 1.1673451320359284e-06, "loss": 0.3989883065223694, "step": 4114 }, { "epoch": 0.9488125432326493, "grad_norm": 1.6195345588406382, "learning_rate": 1.1669692350409222e-06, "loss": 0.41362684965133667, "step": 4115 }, { "epoch": 0.9490431173622319, "grad_norm": 1.3043186455514282, "learning_rate": 1.1665933137752452e-06, "loss": 0.3807048201560974, "step": 4116 }, { "epoch": 0.9492736914918146, "grad_norm": 1.452270133487064, "learning_rate": 1.1662173682935414e-06, "loss": 0.3440876007080078, "step": 4117 }, { "epoch": 0.9495042656213972, "grad_norm": 1.5051121617765968, "learning_rate": 1.165841398650459e-06, "loss": 0.43534499406814575, "step": 4118 }, { "epoch": 0.9497348397509799, "grad_norm": 1.2124174426672352, "learning_rate": 1.1654654049006484e-06, "loss": 0.4900544285774231, "step": 4119 }, { "epoch": 0.9499654138805625, "grad_norm": 1.4219346573372744, "learning_rate": 1.1650893870987643e-06, "loss": 0.5189288854598999, "step": 4120 }, { "epoch": 0.9501959880101453, "grad_norm": 1.5561303354373495, "learning_rate": 1.1647133452994643e-06, "loss": 0.587873101234436, "step": 4121 }, { "epoch": 0.9504265621397279, "grad_norm": 1.2947612520331362, "learning_rate": 1.1643372795574106e-06, "loss": 0.4367108941078186, "step": 4122 }, { "epoch": 0.9506571362693106, "grad_norm": 1.3855876287330298, "learning_rate": 1.1639611899272679e-06, "loss": 0.4121246635913849, "step": 4123 }, { "epoch": 0.9508877103988932, "grad_norm": 1.371083137252789, "learning_rate": 1.1635850764637042e-06, "loss": 0.4993973672389984, "step": 4124 }, { "epoch": 0.9511182845284759, "grad_norm": 1.3729377845652901, "learning_rate": 1.163208939221392e-06, "loss": 0.39145413041114807, "step": 4125 }, { "epoch": 0.9513488586580585, "grad_norm": 1.5515816392895183, "learning_rate": 1.1628327782550065e-06, "loss": 0.45954760909080505, "step": 4126 }, { "epoch": 0.9515794327876412, "grad_norm": 1.5137997254417062, "learning_rate": 1.1624565936192263e-06, "loss": 0.5159680843353271, "step": 4127 }, { "epoch": 0.9518100069172238, "grad_norm": 1.5429829982679306, "learning_rate": 1.1620803853687337e-06, "loss": 0.4441346228122711, "step": 4128 }, { "epoch": 0.9520405810468066, "grad_norm": 1.1994992888255296, "learning_rate": 1.1617041535582144e-06, "loss": 0.3842248320579529, "step": 4129 }, { "epoch": 0.9522711551763892, "grad_norm": 1.5742838715827387, "learning_rate": 1.1613278982423577e-06, "loss": 0.5332437753677368, "step": 4130 }, { "epoch": 0.9525017293059719, "grad_norm": 1.416443461852387, "learning_rate": 1.160951619475856e-06, "loss": 0.4265931248664856, "step": 4131 }, { "epoch": 0.9527323034355545, "grad_norm": 1.344407559333665, "learning_rate": 1.1605753173134052e-06, "loss": 0.47442418336868286, "step": 4132 }, { "epoch": 0.9529628775651372, "grad_norm": 1.4385000789860496, "learning_rate": 1.1601989918097044e-06, "loss": 0.6128898859024048, "step": 4133 }, { "epoch": 0.9531934516947198, "grad_norm": 1.3167710707989233, "learning_rate": 1.159822643019457e-06, "loss": 0.5347775220870972, "step": 4134 }, { "epoch": 0.9534240258243025, "grad_norm": 1.1478699481046142, "learning_rate": 1.1594462709973682e-06, "loss": 0.39984625577926636, "step": 4135 }, { "epoch": 0.9536545999538851, "grad_norm": 1.411910940206958, "learning_rate": 1.1590698757981483e-06, "loss": 0.5146951675415039, "step": 4136 }, { "epoch": 0.9538851740834678, "grad_norm": 1.4057451726772026, "learning_rate": 1.1586934574765097e-06, "loss": 0.3589641749858856, "step": 4137 }, { "epoch": 0.9541157482130505, "grad_norm": 1.4047870659239305, "learning_rate": 1.1583170160871689e-06, "loss": 0.428930401802063, "step": 4138 }, { "epoch": 0.9543463223426332, "grad_norm": 1.3760779428564116, "learning_rate": 1.1579405516848452e-06, "loss": 0.46921080350875854, "step": 4139 }, { "epoch": 0.9545768964722158, "grad_norm": 1.462957669946579, "learning_rate": 1.1575640643242616e-06, "loss": 0.39079514145851135, "step": 4140 }, { "epoch": 0.9548074706017985, "grad_norm": 1.5322762323160557, "learning_rate": 1.1571875540601443e-06, "loss": 0.4475102424621582, "step": 4141 }, { "epoch": 0.9550380447313811, "grad_norm": 1.3964952325110702, "learning_rate": 1.1568110209472232e-06, "loss": 0.43881016969680786, "step": 4142 }, { "epoch": 0.9552686188609638, "grad_norm": 1.2846843095885363, "learning_rate": 1.156434465040231e-06, "loss": 0.4382214844226837, "step": 4143 }, { "epoch": 0.9554991929905464, "grad_norm": 1.6590322564778253, "learning_rate": 1.1560578863939037e-06, "loss": 0.5390958786010742, "step": 4144 }, { "epoch": 0.9557297671201291, "grad_norm": 1.2966408722030756, "learning_rate": 1.155681285062981e-06, "loss": 0.4276137948036194, "step": 4145 }, { "epoch": 0.9559603412497117, "grad_norm": 1.3756682316204962, "learning_rate": 1.1553046611022058e-06, "loss": 0.4541968107223511, "step": 4146 }, { "epoch": 0.9561909153792945, "grad_norm": 1.4806679512404375, "learning_rate": 1.1549280145663242e-06, "loss": 0.43287473917007446, "step": 4147 }, { "epoch": 0.9564214895088771, "grad_norm": 1.5507500145218385, "learning_rate": 1.1545513455100855e-06, "loss": 0.432822585105896, "step": 4148 }, { "epoch": 0.9566520636384598, "grad_norm": 1.4662390355071035, "learning_rate": 1.1541746539882424e-06, "loss": 0.519271969795227, "step": 4149 }, { "epoch": 0.9568826377680424, "grad_norm": 1.4521470663351335, "learning_rate": 1.1537979400555506e-06, "loss": 0.4158627390861511, "step": 4150 }, { "epoch": 0.9571132118976251, "grad_norm": 1.4834584070713739, "learning_rate": 1.1534212037667698e-06, "loss": 0.42122989892959595, "step": 4151 }, { "epoch": 0.9573437860272077, "grad_norm": 1.696588703842723, "learning_rate": 1.1530444451766623e-06, "loss": 0.4141794443130493, "step": 4152 }, { "epoch": 0.9575743601567904, "grad_norm": 1.3149219500885996, "learning_rate": 1.1526676643399933e-06, "loss": 0.4935780167579651, "step": 4153 }, { "epoch": 0.957804934286373, "grad_norm": 1.3661965645097156, "learning_rate": 1.152290861311532e-06, "loss": 0.5075733661651611, "step": 4154 }, { "epoch": 0.9580355084159557, "grad_norm": 1.37824406851626, "learning_rate": 1.151914036146051e-06, "loss": 0.4852841794490814, "step": 4155 }, { "epoch": 0.9582660825455384, "grad_norm": 1.2576277022731817, "learning_rate": 1.151537188898325e-06, "loss": 0.46114620566368103, "step": 4156 }, { "epoch": 0.9584966566751211, "grad_norm": 1.6662322349225411, "learning_rate": 1.1511603196231327e-06, "loss": 0.519254207611084, "step": 4157 }, { "epoch": 0.9587272308047037, "grad_norm": 1.3283960828325414, "learning_rate": 1.1507834283752562e-06, "loss": 0.43635690212249756, "step": 4158 }, { "epoch": 0.9589578049342864, "grad_norm": 1.3730336798021219, "learning_rate": 1.1504065152094802e-06, "loss": 0.48448023200035095, "step": 4159 }, { "epoch": 0.959188379063869, "grad_norm": 1.320755520801986, "learning_rate": 1.1500295801805927e-06, "loss": 0.4461054801940918, "step": 4160 }, { "epoch": 0.9594189531934517, "grad_norm": 1.3183810948385437, "learning_rate": 1.1496526233433852e-06, "loss": 0.44869595766067505, "step": 4161 }, { "epoch": 0.9596495273230343, "grad_norm": 1.5137169599039804, "learning_rate": 1.1492756447526524e-06, "loss": 0.4592103660106659, "step": 4162 }, { "epoch": 0.959880101452617, "grad_norm": 1.3625000210250673, "learning_rate": 1.1488986444631918e-06, "loss": 0.48352301120758057, "step": 4163 }, { "epoch": 0.9601106755821996, "grad_norm": 1.2039059688900335, "learning_rate": 1.1485216225298043e-06, "loss": 0.44718503952026367, "step": 4164 }, { "epoch": 0.9603412497117824, "grad_norm": 1.7796976813489804, "learning_rate": 1.1481445790072933e-06, "loss": 0.44659486413002014, "step": 4165 }, { "epoch": 0.960571823841365, "grad_norm": 1.464260426957605, "learning_rate": 1.1477675139504665e-06, "loss": 0.5143063068389893, "step": 4166 }, { "epoch": 0.9608023979709477, "grad_norm": 1.825014649582591, "learning_rate": 1.1473904274141344e-06, "loss": 0.6708887815475464, "step": 4167 }, { "epoch": 0.9610329721005303, "grad_norm": 1.4397638416262573, "learning_rate": 1.1470133194531094e-06, "loss": 0.3889666199684143, "step": 4168 }, { "epoch": 0.961263546230113, "grad_norm": 1.2805774485856607, "learning_rate": 1.1466361901222086e-06, "loss": 0.4610622227191925, "step": 4169 }, { "epoch": 0.9614941203596956, "grad_norm": 1.4320030308850267, "learning_rate": 1.1462590394762514e-06, "loss": 0.46372538805007935, "step": 4170 }, { "epoch": 0.9617246944892783, "grad_norm": 1.5638922992309852, "learning_rate": 1.1458818675700607e-06, "loss": 0.5197097063064575, "step": 4171 }, { "epoch": 0.9619552686188609, "grad_norm": 1.2417860513603916, "learning_rate": 1.145504674458462e-06, "loss": 0.3849745988845825, "step": 4172 }, { "epoch": 0.9621858427484437, "grad_norm": 1.5196854039542969, "learning_rate": 1.1451274601962841e-06, "loss": 0.4572817385196686, "step": 4173 }, { "epoch": 0.9624164168780263, "grad_norm": 1.4154832612934123, "learning_rate": 1.1447502248383594e-06, "loss": 0.4383746385574341, "step": 4174 }, { "epoch": 0.962646991007609, "grad_norm": 1.473681287130909, "learning_rate": 1.1443729684395222e-06, "loss": 0.5319672226905823, "step": 4175 }, { "epoch": 0.9628775651371916, "grad_norm": 1.2307542062760268, "learning_rate": 1.143995691054611e-06, "loss": 0.4351249933242798, "step": 4176 }, { "epoch": 0.9631081392667743, "grad_norm": 1.42416527435209, "learning_rate": 1.1436183927384668e-06, "loss": 0.5453774929046631, "step": 4177 }, { "epoch": 0.9633387133963569, "grad_norm": 1.569291329857932, "learning_rate": 1.1432410735459336e-06, "loss": 0.5605905055999756, "step": 4178 }, { "epoch": 0.9635692875259396, "grad_norm": 1.3825364023898294, "learning_rate": 1.1428637335318587e-06, "loss": 0.4556693434715271, "step": 4179 }, { "epoch": 0.9637998616555222, "grad_norm": 1.316766347101971, "learning_rate": 1.142486372751092e-06, "loss": 0.45428892970085144, "step": 4180 }, { "epoch": 0.9640304357851049, "grad_norm": 1.4252168865652697, "learning_rate": 1.142108991258487e-06, "loss": 0.4897412657737732, "step": 4181 }, { "epoch": 0.9642610099146876, "grad_norm": 1.984637391356181, "learning_rate": 1.1417315891089004e-06, "loss": 0.5478836894035339, "step": 4182 }, { "epoch": 0.9644915840442703, "grad_norm": 1.4620834191298895, "learning_rate": 1.1413541663571904e-06, "loss": 0.42394131422042847, "step": 4183 }, { "epoch": 0.9647221581738529, "grad_norm": 1.585175673978148, "learning_rate": 1.1409767230582199e-06, "loss": 0.5047104954719543, "step": 4184 }, { "epoch": 0.9649527323034356, "grad_norm": 1.4749915601759833, "learning_rate": 1.1405992592668538e-06, "loss": 0.43985825777053833, "step": 4185 }, { "epoch": 0.9651833064330182, "grad_norm": 1.3061643078097422, "learning_rate": 1.1402217750379608e-06, "loss": 0.4338407516479492, "step": 4186 }, { "epoch": 0.9654138805626009, "grad_norm": 1.5404850502320075, "learning_rate": 1.1398442704264118e-06, "loss": 0.4532614052295685, "step": 4187 }, { "epoch": 0.9656444546921835, "grad_norm": 1.2345047018331374, "learning_rate": 1.1394667454870802e-06, "loss": 0.4546123445034027, "step": 4188 }, { "epoch": 0.9658750288217662, "grad_norm": 1.5321856096614175, "learning_rate": 1.139089200274844e-06, "loss": 0.44743451476097107, "step": 4189 }, { "epoch": 0.9661056029513488, "grad_norm": 1.3411063865526411, "learning_rate": 1.138711634844583e-06, "loss": 0.4566968083381653, "step": 4190 }, { "epoch": 0.9663361770809316, "grad_norm": 1.481468600614622, "learning_rate": 1.13833404925118e-06, "loss": 0.46385467052459717, "step": 4191 }, { "epoch": 0.9665667512105142, "grad_norm": 1.2411450691863102, "learning_rate": 1.137956443549521e-06, "loss": 0.4614461660385132, "step": 4192 }, { "epoch": 0.9667973253400969, "grad_norm": 1.3326432316915904, "learning_rate": 1.1375788177944945e-06, "loss": 0.4351955056190491, "step": 4193 }, { "epoch": 0.9670278994696795, "grad_norm": 1.368161025215393, "learning_rate": 1.1372011720409927e-06, "loss": 0.4172135591506958, "step": 4194 }, { "epoch": 0.9672584735992622, "grad_norm": 1.6941620223477674, "learning_rate": 1.1368235063439102e-06, "loss": 0.5482916831970215, "step": 4195 }, { "epoch": 0.9674890477288448, "grad_norm": 1.3508434751874687, "learning_rate": 1.136445820758144e-06, "loss": 0.4336891770362854, "step": 4196 }, { "epoch": 0.9677196218584275, "grad_norm": 1.5072664158429512, "learning_rate": 1.1360681153385956e-06, "loss": 0.42612385749816895, "step": 4197 }, { "epoch": 0.9679501959880101, "grad_norm": 1.5000454097568379, "learning_rate": 1.135690390140167e-06, "loss": 0.513736367225647, "step": 4198 }, { "epoch": 0.9681807701175928, "grad_norm": 1.8279069537189752, "learning_rate": 1.1353126452177656e-06, "loss": 0.45551058650016785, "step": 4199 }, { "epoch": 0.9684113442471755, "grad_norm": 1.3479770342549766, "learning_rate": 1.1349348806262994e-06, "loss": 0.45450061559677124, "step": 4200 }, { "epoch": 0.9686419183767582, "grad_norm": 1.5942392384347237, "learning_rate": 1.1345570964206807e-06, "loss": 0.43962353467941284, "step": 4201 }, { "epoch": 0.9688724925063408, "grad_norm": 1.4695533515040724, "learning_rate": 1.1341792926558245e-06, "loss": 0.5304821729660034, "step": 4202 }, { "epoch": 0.9691030666359235, "grad_norm": 1.57215629996827, "learning_rate": 1.1338014693866483e-06, "loss": 0.6079045534133911, "step": 4203 }, { "epoch": 0.9693336407655061, "grad_norm": 1.3451772860900804, "learning_rate": 1.1334236266680724e-06, "loss": 0.39895182847976685, "step": 4204 }, { "epoch": 0.9695642148950888, "grad_norm": 1.4224201035305835, "learning_rate": 1.1330457645550202e-06, "loss": 0.5264945030212402, "step": 4205 }, { "epoch": 0.9697947890246714, "grad_norm": 1.3209691457440123, "learning_rate": 1.1326678831024178e-06, "loss": 0.4794533848762512, "step": 4206 }, { "epoch": 0.9700253631542541, "grad_norm": 1.472204632290126, "learning_rate": 1.1322899823651938e-06, "loss": 0.42917680740356445, "step": 4207 }, { "epoch": 0.9702559372838367, "grad_norm": 1.4163025348687577, "learning_rate": 1.1319120623982804e-06, "loss": 0.42155951261520386, "step": 4208 }, { "epoch": 0.9704865114134195, "grad_norm": 1.455345134423215, "learning_rate": 1.1315341232566121e-06, "loss": 0.5119719505310059, "step": 4209 }, { "epoch": 0.9707170855430021, "grad_norm": 1.4441630965274395, "learning_rate": 1.1311561649951255e-06, "loss": 0.5261529684066772, "step": 4210 }, { "epoch": 0.9709476596725848, "grad_norm": 1.3046857195112773, "learning_rate": 1.1307781876687609e-06, "loss": 0.5133010149002075, "step": 4211 }, { "epoch": 0.9711782338021674, "grad_norm": 1.4061037707348525, "learning_rate": 1.1304001913324617e-06, "loss": 0.5214196443557739, "step": 4212 }, { "epoch": 0.9714088079317501, "grad_norm": 1.4191122003483587, "learning_rate": 1.1300221760411732e-06, "loss": 0.4665095806121826, "step": 4213 }, { "epoch": 0.9716393820613327, "grad_norm": 1.2917310787961995, "learning_rate": 1.1296441418498435e-06, "loss": 0.44912537932395935, "step": 4214 }, { "epoch": 0.9718699561909154, "grad_norm": 1.384060094796334, "learning_rate": 1.1292660888134241e-06, "loss": 0.48622840642929077, "step": 4215 }, { "epoch": 0.972100530320498, "grad_norm": 1.3952506250953003, "learning_rate": 1.128888016986868e-06, "loss": 0.40099745988845825, "step": 4216 }, { "epoch": 0.9723311044500808, "grad_norm": 1.6661609433762745, "learning_rate": 1.1285099264251331e-06, "loss": 0.4981631934642792, "step": 4217 }, { "epoch": 0.9725616785796634, "grad_norm": 1.3061541456837051, "learning_rate": 1.1281318171831778e-06, "loss": 0.3902980387210846, "step": 4218 }, { "epoch": 0.9727922527092461, "grad_norm": 1.646940009523485, "learning_rate": 1.1277536893159641e-06, "loss": 0.5120723843574524, "step": 4219 }, { "epoch": 0.9730228268388287, "grad_norm": 1.4050676349560098, "learning_rate": 1.1273755428784568e-06, "loss": 0.47908157110214233, "step": 4220 }, { "epoch": 0.9732534009684114, "grad_norm": 1.3980215754858654, "learning_rate": 1.126997377925624e-06, "loss": 0.44935697317123413, "step": 4221 }, { "epoch": 0.973483975097994, "grad_norm": 1.7936737063106103, "learning_rate": 1.1266191945124345e-06, "loss": 0.46883124113082886, "step": 4222 }, { "epoch": 0.9737145492275767, "grad_norm": 1.3605023071963889, "learning_rate": 1.1262409926938622e-06, "loss": 0.41385799646377563, "step": 4223 }, { "epoch": 0.9739451233571593, "grad_norm": 1.352097187992639, "learning_rate": 1.1258627725248821e-06, "loss": 0.5450118780136108, "step": 4224 }, { "epoch": 0.974175697486742, "grad_norm": 1.3149598759310381, "learning_rate": 1.1254845340604725e-06, "loss": 0.4728820323944092, "step": 4225 }, { "epoch": 0.9744062716163246, "grad_norm": 1.490906480143449, "learning_rate": 1.1251062773556143e-06, "loss": 0.5111296772956848, "step": 4226 }, { "epoch": 0.9746368457459073, "grad_norm": 1.6529549144482583, "learning_rate": 1.1247280024652908e-06, "loss": 0.4538743793964386, "step": 4227 }, { "epoch": 0.97486741987549, "grad_norm": 1.4130886870951611, "learning_rate": 1.1243497094444877e-06, "loss": 0.4917091131210327, "step": 4228 }, { "epoch": 0.9750979940050726, "grad_norm": 1.387244231549714, "learning_rate": 1.1239713983481945e-06, "loss": 0.40376198291778564, "step": 4229 }, { "epoch": 0.9753285681346553, "grad_norm": 1.4554658551428983, "learning_rate": 1.1235930692314019e-06, "loss": 0.5356566905975342, "step": 4230 }, { "epoch": 0.9755591422642379, "grad_norm": 1.4359135131794967, "learning_rate": 1.123214722149104e-06, "loss": 0.4374624490737915, "step": 4231 }, { "epoch": 0.9757897163938206, "grad_norm": 1.4746549529981767, "learning_rate": 1.1228363571562976e-06, "loss": 0.4225429594516754, "step": 4232 }, { "epoch": 0.9760202905234032, "grad_norm": 1.4500544144002923, "learning_rate": 1.1224579743079819e-06, "loss": 0.5389699935913086, "step": 4233 }, { "epoch": 0.9762508646529859, "grad_norm": 1.39848035447059, "learning_rate": 1.1220795736591584e-06, "loss": 0.4925463795661926, "step": 4234 }, { "epoch": 0.9764814387825685, "grad_norm": 1.2916834361485914, "learning_rate": 1.1217011552648315e-06, "loss": 0.4694328308105469, "step": 4235 }, { "epoch": 0.9767120129121513, "grad_norm": 1.377557176325016, "learning_rate": 1.1213227191800086e-06, "loss": 0.39887624979019165, "step": 4236 }, { "epoch": 0.9769425870417339, "grad_norm": 1.5555659299458584, "learning_rate": 1.120944265459699e-06, "loss": 0.4930388927459717, "step": 4237 }, { "epoch": 0.9771731611713166, "grad_norm": 1.2486101676760866, "learning_rate": 1.1205657941589143e-06, "loss": 0.4595404863357544, "step": 4238 }, { "epoch": 0.9774037353008992, "grad_norm": 1.4574273243269236, "learning_rate": 1.1201873053326695e-06, "loss": 0.44177496433258057, "step": 4239 }, { "epoch": 0.9776343094304819, "grad_norm": 1.4308970126871865, "learning_rate": 1.119808799035982e-06, "loss": 0.47095373272895813, "step": 4240 }, { "epoch": 0.9778648835600645, "grad_norm": 1.4049777741841016, "learning_rate": 1.1194302753238716e-06, "loss": 0.4649583697319031, "step": 4241 }, { "epoch": 0.9780954576896472, "grad_norm": 1.5269711326381101, "learning_rate": 1.1190517342513598e-06, "loss": 0.44815266132354736, "step": 4242 }, { "epoch": 0.9783260318192298, "grad_norm": 1.462868793648971, "learning_rate": 1.118673175873472e-06, "loss": 0.4861665368080139, "step": 4243 }, { "epoch": 0.9785566059488126, "grad_norm": 1.3395897424173215, "learning_rate": 1.1182946002452354e-06, "loss": 0.5196468830108643, "step": 4244 }, { "epoch": 0.9787871800783952, "grad_norm": 1.5910002582718288, "learning_rate": 1.11791600742168e-06, "loss": 0.49746841192245483, "step": 4245 }, { "epoch": 0.9790177542079779, "grad_norm": 1.2919062217717159, "learning_rate": 1.1175373974578377e-06, "loss": 0.4637739956378937, "step": 4246 }, { "epoch": 0.9792483283375605, "grad_norm": 1.228394275609753, "learning_rate": 1.1171587704087434e-06, "loss": 0.46009692549705505, "step": 4247 }, { "epoch": 0.9794789024671432, "grad_norm": 2.1569798034684706, "learning_rate": 1.1167801263294346e-06, "loss": 0.49036258459091187, "step": 4248 }, { "epoch": 0.9797094765967258, "grad_norm": 1.395933426650918, "learning_rate": 1.1164014652749509e-06, "loss": 0.4730580449104309, "step": 4249 }, { "epoch": 0.9799400507263085, "grad_norm": 1.618438538763921, "learning_rate": 1.1160227873003345e-06, "loss": 0.5029968023300171, "step": 4250 }, { "epoch": 0.9801706248558911, "grad_norm": 1.4870951402562973, "learning_rate": 1.1156440924606299e-06, "loss": 0.5149805545806885, "step": 4251 }, { "epoch": 0.9804011989854738, "grad_norm": 1.6248587467562292, "learning_rate": 1.1152653808108845e-06, "loss": 0.5017384886741638, "step": 4252 }, { "epoch": 0.9806317731150564, "grad_norm": 1.486462967422998, "learning_rate": 1.114886652406148e-06, "loss": 0.47569048404693604, "step": 4253 }, { "epoch": 0.9808623472446392, "grad_norm": 1.4476623501612873, "learning_rate": 1.1145079073014722e-06, "loss": 0.5127655863761902, "step": 4254 }, { "epoch": 0.9810929213742218, "grad_norm": 1.4943063660203757, "learning_rate": 1.1141291455519114e-06, "loss": 0.4014360308647156, "step": 4255 }, { "epoch": 0.9813234955038045, "grad_norm": 1.4814879590427052, "learning_rate": 1.1137503672125228e-06, "loss": 0.43737465143203735, "step": 4256 }, { "epoch": 0.9815540696333871, "grad_norm": 1.413525212350489, "learning_rate": 1.1133715723383655e-06, "loss": 0.4389764070510864, "step": 4257 }, { "epoch": 0.9817846437629698, "grad_norm": 1.3532173754404184, "learning_rate": 1.112992760984501e-06, "loss": 0.5105381608009338, "step": 4258 }, { "epoch": 0.9820152178925524, "grad_norm": 1.4052776017835835, "learning_rate": 1.1126139332059937e-06, "loss": 0.4393002688884735, "step": 4259 }, { "epoch": 0.9822457920221351, "grad_norm": 1.3179147448132482, "learning_rate": 1.1122350890579102e-06, "loss": 0.541419267654419, "step": 4260 }, { "epoch": 0.9824763661517177, "grad_norm": 1.5177150542407778, "learning_rate": 1.1118562285953186e-06, "loss": 0.4153546094894409, "step": 4261 }, { "epoch": 0.9827069402813005, "grad_norm": 1.4649176443917427, "learning_rate": 1.1114773518732907e-06, "loss": 0.5060696601867676, "step": 4262 }, { "epoch": 0.9829375144108831, "grad_norm": 1.6266321171712574, "learning_rate": 1.1110984589468998e-06, "loss": 0.5975456237792969, "step": 4263 }, { "epoch": 0.9831680885404658, "grad_norm": 1.4920078622156363, "learning_rate": 1.110719549871222e-06, "loss": 0.5729621648788452, "step": 4264 }, { "epoch": 0.9833986626700484, "grad_norm": 1.3838030985279757, "learning_rate": 1.1103406247013356e-06, "loss": 0.3948165476322174, "step": 4265 }, { "epoch": 0.9836292367996311, "grad_norm": 1.3893062538653607, "learning_rate": 1.1099616834923212e-06, "loss": 0.41744932532310486, "step": 4266 }, { "epoch": 0.9838598109292137, "grad_norm": 1.3638196246051946, "learning_rate": 1.1095827262992611e-06, "loss": 0.4701330065727234, "step": 4267 }, { "epoch": 0.9840903850587964, "grad_norm": 1.4764746527882953, "learning_rate": 1.109203753177242e-06, "loss": 0.4841681718826294, "step": 4268 }, { "epoch": 0.984320959188379, "grad_norm": 1.3604414964396274, "learning_rate": 1.10882476418135e-06, "loss": 0.4180435538291931, "step": 4269 }, { "epoch": 0.9845515333179617, "grad_norm": 1.4211218067668543, "learning_rate": 1.1084457593666758e-06, "loss": 0.39362633228302, "step": 4270 }, { "epoch": 0.9847821074475444, "grad_norm": 1.4239354595534417, "learning_rate": 1.1080667387883116e-06, "loss": 0.5192993879318237, "step": 4271 }, { "epoch": 0.9850126815771271, "grad_norm": 1.5201720088447181, "learning_rate": 1.1076877025013517e-06, "loss": 0.48835504055023193, "step": 4272 }, { "epoch": 0.9852432557067097, "grad_norm": 1.5142338003412266, "learning_rate": 1.1073086505608925e-06, "loss": 0.44442474842071533, "step": 4273 }, { "epoch": 0.9854738298362924, "grad_norm": 1.3436041344969518, "learning_rate": 1.1069295830220339e-06, "loss": 0.4544455409049988, "step": 4274 }, { "epoch": 0.985704403965875, "grad_norm": 1.5833831369807498, "learning_rate": 1.106550499939876e-06, "loss": 0.482341468334198, "step": 4275 }, { "epoch": 0.9859349780954577, "grad_norm": 1.421534858967002, "learning_rate": 1.1061714013695236e-06, "loss": 0.5251357555389404, "step": 4276 }, { "epoch": 0.9861655522250403, "grad_norm": 1.2537356796939523, "learning_rate": 1.1057922873660819e-06, "loss": 0.4538683295249939, "step": 4277 }, { "epoch": 0.986396126354623, "grad_norm": 2.0128553783671728, "learning_rate": 1.105413157984659e-06, "loss": 0.5112448930740356, "step": 4278 }, { "epoch": 0.9866267004842056, "grad_norm": 1.4914994042257563, "learning_rate": 1.1050340132803654e-06, "loss": 0.48863890767097473, "step": 4279 }, { "epoch": 0.9868572746137884, "grad_norm": 1.494741313695512, "learning_rate": 1.1046548533083134e-06, "loss": 0.43637439608573914, "step": 4280 }, { "epoch": 0.987087848743371, "grad_norm": 1.5727176113962202, "learning_rate": 1.104275678123618e-06, "loss": 0.5231983065605164, "step": 4281 }, { "epoch": 0.9873184228729537, "grad_norm": 1.7169447967595874, "learning_rate": 1.1038964877813955e-06, "loss": 0.46838122606277466, "step": 4282 }, { "epoch": 0.9875489970025363, "grad_norm": 1.3537630033218837, "learning_rate": 1.1035172823367658e-06, "loss": 0.4330589473247528, "step": 4283 }, { "epoch": 0.987779571132119, "grad_norm": 1.4178119046272273, "learning_rate": 1.1031380618448501e-06, "loss": 0.44962531328201294, "step": 4284 }, { "epoch": 0.9880101452617016, "grad_norm": 1.3547255909489988, "learning_rate": 1.1027588263607719e-06, "loss": 0.44549795985221863, "step": 4285 }, { "epoch": 0.9882407193912843, "grad_norm": 1.7082954293487662, "learning_rate": 1.1023795759396568e-06, "loss": 0.43510758876800537, "step": 4286 }, { "epoch": 0.9884712935208669, "grad_norm": 1.3135837847563279, "learning_rate": 1.1020003106366324e-06, "loss": 0.4369906187057495, "step": 4287 }, { "epoch": 0.9887018676504497, "grad_norm": 1.416650593568537, "learning_rate": 1.1016210305068296e-06, "loss": 0.42049574851989746, "step": 4288 }, { "epoch": 0.9889324417800323, "grad_norm": 1.6285692706476314, "learning_rate": 1.10124173560538e-06, "loss": 0.449156790971756, "step": 4289 }, { "epoch": 0.989163015909615, "grad_norm": 1.5784410678150576, "learning_rate": 1.1008624259874177e-06, "loss": 0.4736451506614685, "step": 4290 }, { "epoch": 0.9893935900391976, "grad_norm": 1.3029401584123959, "learning_rate": 1.10048310170808e-06, "loss": 0.3988722860813141, "step": 4291 }, { "epoch": 0.9896241641687803, "grad_norm": 1.4221756045070393, "learning_rate": 1.100103762822505e-06, "loss": 0.44330862164497375, "step": 4292 }, { "epoch": 0.9898547382983629, "grad_norm": 1.5471015099626197, "learning_rate": 1.0997244093858336e-06, "loss": 0.5294286608695984, "step": 4293 }, { "epoch": 0.9900853124279456, "grad_norm": 1.3808712553027187, "learning_rate": 1.0993450414532082e-06, "loss": 0.463120698928833, "step": 4294 }, { "epoch": 0.9903158865575282, "grad_norm": 1.294463919332552, "learning_rate": 1.0989656590797747e-06, "loss": 0.4481865167617798, "step": 4295 }, { "epoch": 0.9905464606871109, "grad_norm": 1.4153337646078945, "learning_rate": 1.0985862623206794e-06, "loss": 0.4467630386352539, "step": 4296 }, { "epoch": 0.9907770348166935, "grad_norm": 1.8865527079498654, "learning_rate": 1.0982068512310717e-06, "loss": 0.43485027551651, "step": 4297 }, { "epoch": 0.9910076089462763, "grad_norm": 1.5277390713389145, "learning_rate": 1.0978274258661032e-06, "loss": 0.4556450843811035, "step": 4298 }, { "epoch": 0.9912381830758589, "grad_norm": 1.4768070925377026, "learning_rate": 1.0974479862809268e-06, "loss": 0.48326122760772705, "step": 4299 }, { "epoch": 0.9914687572054416, "grad_norm": 1.1782147993424035, "learning_rate": 1.097068532530698e-06, "loss": 0.42254534363746643, "step": 4300 }, { "epoch": 0.9916993313350242, "grad_norm": 1.3623288149981243, "learning_rate": 1.096689064670574e-06, "loss": 0.4076887369155884, "step": 4301 }, { "epoch": 0.9919299054646069, "grad_norm": 1.4246737986617306, "learning_rate": 1.0963095827557146e-06, "loss": 0.40615612268447876, "step": 4302 }, { "epoch": 0.9921604795941895, "grad_norm": 1.391998245639926, "learning_rate": 1.095930086841281e-06, "loss": 0.47794467210769653, "step": 4303 }, { "epoch": 0.9923910537237722, "grad_norm": 1.479591301344316, "learning_rate": 1.0955505769824375e-06, "loss": 0.4927758574485779, "step": 4304 }, { "epoch": 0.9926216278533548, "grad_norm": 1.1962407216416377, "learning_rate": 1.0951710532343493e-06, "loss": 0.40777790546417236, "step": 4305 }, { "epoch": 0.9928522019829376, "grad_norm": 1.2781565166204398, "learning_rate": 1.0947915156521837e-06, "loss": 0.41996532678604126, "step": 4306 }, { "epoch": 0.9930827761125202, "grad_norm": 1.3495931588969972, "learning_rate": 1.0944119642911107e-06, "loss": 0.4366680383682251, "step": 4307 }, { "epoch": 0.9933133502421029, "grad_norm": 1.4609250216040512, "learning_rate": 1.094032399206302e-06, "loss": 0.5350530743598938, "step": 4308 }, { "epoch": 0.9935439243716855, "grad_norm": 1.5545326791900604, "learning_rate": 1.093652820452931e-06, "loss": 0.5166209936141968, "step": 4309 }, { "epoch": 0.9937744985012682, "grad_norm": 1.3624754056256652, "learning_rate": 1.0932732280861734e-06, "loss": 0.5104992389678955, "step": 4310 }, { "epoch": 0.9940050726308508, "grad_norm": 1.293281056582964, "learning_rate": 1.0928936221612068e-06, "loss": 0.38249820470809937, "step": 4311 }, { "epoch": 0.9942356467604335, "grad_norm": 1.5718744647134053, "learning_rate": 1.0925140027332107e-06, "loss": 0.4930746555328369, "step": 4312 }, { "epoch": 0.9944662208900161, "grad_norm": 1.5006868919231642, "learning_rate": 1.092134369857367e-06, "loss": 0.46536654233932495, "step": 4313 }, { "epoch": 0.9946967950195988, "grad_norm": 1.5384946564391833, "learning_rate": 1.0917547235888582e-06, "loss": 0.4591559171676636, "step": 4314 }, { "epoch": 0.9949273691491815, "grad_norm": 1.609102883203802, "learning_rate": 1.0913750639828709e-06, "loss": 0.5034719705581665, "step": 4315 }, { "epoch": 0.9951579432787642, "grad_norm": 1.3461654572756176, "learning_rate": 1.0909953910945921e-06, "loss": 0.5289135575294495, "step": 4316 }, { "epoch": 0.9953885174083468, "grad_norm": 1.5181970245510374, "learning_rate": 1.090615704979211e-06, "loss": 0.48736900091171265, "step": 4317 }, { "epoch": 0.9956190915379295, "grad_norm": 1.347314123709775, "learning_rate": 1.0902360056919186e-06, "loss": 0.44812899827957153, "step": 4318 }, { "epoch": 0.9958496656675121, "grad_norm": 1.717313100956624, "learning_rate": 1.0898562932879083e-06, "loss": 0.42837953567504883, "step": 4319 }, { "epoch": 0.9960802397970948, "grad_norm": 1.3616068420969312, "learning_rate": 1.089476567822375e-06, "loss": 0.4946538805961609, "step": 4320 }, { "epoch": 0.9963108139266774, "grad_norm": 1.3738772638549184, "learning_rate": 1.089096829350516e-06, "loss": 0.472694993019104, "step": 4321 }, { "epoch": 0.9965413880562601, "grad_norm": 1.51102718471871, "learning_rate": 1.0887170779275297e-06, "loss": 0.546560525894165, "step": 4322 }, { "epoch": 0.9967719621858427, "grad_norm": 1.7144585803126207, "learning_rate": 1.088337313608617e-06, "loss": 0.5098580718040466, "step": 4323 }, { "epoch": 0.9970025363154255, "grad_norm": 1.4511718916783138, "learning_rate": 1.0879575364489807e-06, "loss": 0.4127371907234192, "step": 4324 }, { "epoch": 0.9972331104450081, "grad_norm": 1.361622993253284, "learning_rate": 1.0875777465038249e-06, "loss": 0.41234201192855835, "step": 4325 }, { "epoch": 0.9974636845745908, "grad_norm": 1.334187068919988, "learning_rate": 1.087197943828356e-06, "loss": 0.42657697200775146, "step": 4326 }, { "epoch": 0.9976942587041734, "grad_norm": 1.5731685077464828, "learning_rate": 1.0868181284777825e-06, "loss": 0.5168975591659546, "step": 4327 }, { "epoch": 0.9979248328337561, "grad_norm": 1.3417267376651396, "learning_rate": 1.0864383005073142e-06, "loss": 0.4712294340133667, "step": 4328 }, { "epoch": 0.9981554069633387, "grad_norm": 1.514146578387226, "learning_rate": 1.0860584599721624e-06, "loss": 0.4685649871826172, "step": 4329 }, { "epoch": 0.9983859810929214, "grad_norm": 1.4104009699586146, "learning_rate": 1.0856786069275417e-06, "loss": 0.4699268937110901, "step": 4330 }, { "epoch": 0.998616555222504, "grad_norm": 1.5072273981885642, "learning_rate": 1.0852987414286669e-06, "loss": 0.44216299057006836, "step": 4331 }, { "epoch": 0.9988471293520867, "grad_norm": 1.489870947647978, "learning_rate": 1.0849188635307558e-06, "loss": 0.4374035894870758, "step": 4332 }, { "epoch": 0.9990777034816694, "grad_norm": 1.396380314188184, "learning_rate": 1.0845389732890269e-06, "loss": 0.4538502097129822, "step": 4333 }, { "epoch": 0.9993082776112521, "grad_norm": 1.5201233043344708, "learning_rate": 1.0841590707587017e-06, "loss": 0.4432523250579834, "step": 4334 }, { "epoch": 0.9995388517408347, "grad_norm": 1.3401246835224159, "learning_rate": 1.0837791559950026e-06, "loss": 0.3614054322242737, "step": 4335 }, { "epoch": 0.9997694258704174, "grad_norm": 1.5241184734301618, "learning_rate": 1.0833992290531542e-06, "loss": 0.5412651300430298, "step": 4336 }, { "epoch": 1.0, "grad_norm": 1.3961487739465548, "learning_rate": 1.0830192899883825e-06, "loss": 0.43333327770233154, "step": 4337 }, { "epoch": 1.0002305741295827, "grad_norm": 1.3739097269887006, "learning_rate": 1.0826393388559156e-06, "loss": 0.40433377027511597, "step": 4338 }, { "epoch": 1.0004611482591652, "grad_norm": 1.5246903566917884, "learning_rate": 1.0822593757109835e-06, "loss": 0.49699902534484863, "step": 4339 }, { "epoch": 1.000691722388748, "grad_norm": 1.4093275236950669, "learning_rate": 1.0818794006088174e-06, "loss": 0.4992629289627075, "step": 4340 }, { "epoch": 1.0009222965183306, "grad_norm": 1.546985643456235, "learning_rate": 1.0814994136046503e-06, "loss": 0.39532744884490967, "step": 4341 }, { "epoch": 1.0011528706479134, "grad_norm": 1.4715614082094945, "learning_rate": 1.0811194147537177e-06, "loss": 0.48260024189949036, "step": 4342 }, { "epoch": 1.0013834447774959, "grad_norm": 1.1813818983438111, "learning_rate": 1.0807394041112562e-06, "loss": 0.40896737575531006, "step": 4343 }, { "epoch": 1.0016140189070786, "grad_norm": 1.373003199387245, "learning_rate": 1.0803593817325037e-06, "loss": 0.361757755279541, "step": 4344 }, { "epoch": 1.0018445930366613, "grad_norm": 1.3113582417275997, "learning_rate": 1.0799793476727006e-06, "loss": 0.5524640083312988, "step": 4345 }, { "epoch": 1.002075167166244, "grad_norm": 1.4504745740569693, "learning_rate": 1.0795993019870891e-06, "loss": 0.4798622727394104, "step": 4346 }, { "epoch": 1.0023057412958265, "grad_norm": 1.1125620580650875, "learning_rate": 1.079219244730912e-06, "loss": 0.3408532440662384, "step": 4347 }, { "epoch": 1.0025363154254092, "grad_norm": 1.6198320758392701, "learning_rate": 1.0788391759594152e-06, "loss": 0.4185452461242676, "step": 4348 }, { "epoch": 1.002766889554992, "grad_norm": 1.4569047754589481, "learning_rate": 1.078459095727845e-06, "loss": 0.4656596779823303, "step": 4349 }, { "epoch": 1.0029974636845747, "grad_norm": 1.2861299587948707, "learning_rate": 1.07807900409145e-06, "loss": 0.45649081468582153, "step": 4350 }, { "epoch": 1.0032280378141571, "grad_norm": 1.4368410869138808, "learning_rate": 1.0776989011054806e-06, "loss": 0.4732903242111206, "step": 4351 }, { "epoch": 1.0034586119437399, "grad_norm": 1.4875640347613817, "learning_rate": 1.0773187868251882e-06, "loss": 0.5313757658004761, "step": 4352 }, { "epoch": 1.0036891860733226, "grad_norm": 1.7663418153227872, "learning_rate": 1.0769386613058267e-06, "loss": 0.5373719334602356, "step": 4353 }, { "epoch": 1.0039197602029053, "grad_norm": 1.4108655227977445, "learning_rate": 1.076558524602651e-06, "loss": 0.4530528783798218, "step": 4354 }, { "epoch": 1.0041503343324878, "grad_norm": 2.0172927781638816, "learning_rate": 1.076178376770918e-06, "loss": 0.361511766910553, "step": 4355 }, { "epoch": 1.0043809084620705, "grad_norm": 1.5430566364369291, "learning_rate": 1.0757982178658857e-06, "loss": 0.4260486364364624, "step": 4356 }, { "epoch": 1.0046114825916532, "grad_norm": 1.4352564218347874, "learning_rate": 1.0754180479428142e-06, "loss": 0.4765712320804596, "step": 4357 }, { "epoch": 1.004842056721236, "grad_norm": 1.408849526827852, "learning_rate": 1.0750378670569652e-06, "loss": 0.485443115234375, "step": 4358 }, { "epoch": 1.0050726308508184, "grad_norm": 1.3833154190721015, "learning_rate": 1.074657675263602e-06, "loss": 0.5010418891906738, "step": 4359 }, { "epoch": 1.0053032049804012, "grad_norm": 1.2138138176978153, "learning_rate": 1.074277472617989e-06, "loss": 0.42195719480514526, "step": 4360 }, { "epoch": 1.0055337791099839, "grad_norm": 1.4341592826356415, "learning_rate": 1.073897259175392e-06, "loss": 0.48555606603622437, "step": 4361 }, { "epoch": 1.0057643532395666, "grad_norm": 1.4030257216310642, "learning_rate": 1.07351703499108e-06, "loss": 0.4991112947463989, "step": 4362 }, { "epoch": 1.005994927369149, "grad_norm": 1.365972754336138, "learning_rate": 1.0731368001203217e-06, "loss": 0.43016430735588074, "step": 4363 }, { "epoch": 1.0062255014987318, "grad_norm": 1.635861674358112, "learning_rate": 1.0727565546183883e-06, "loss": 0.47147876024246216, "step": 4364 }, { "epoch": 1.0064560756283145, "grad_norm": 1.4724107461573035, "learning_rate": 1.0723762985405522e-06, "loss": 0.4695407748222351, "step": 4365 }, { "epoch": 1.0066866497578972, "grad_norm": 1.4167512288976294, "learning_rate": 1.0719960319420878e-06, "loss": 0.42666512727737427, "step": 4366 }, { "epoch": 1.0069172238874797, "grad_norm": 1.4965231034133355, "learning_rate": 1.0716157548782705e-06, "loss": 0.5685237050056458, "step": 4367 }, { "epoch": 1.0071477980170624, "grad_norm": 1.2856237164503312, "learning_rate": 1.0712354674043774e-06, "loss": 0.45181894302368164, "step": 4368 }, { "epoch": 1.0073783721466452, "grad_norm": 1.479568259964695, "learning_rate": 1.070855169575687e-06, "loss": 0.4079795479774475, "step": 4369 }, { "epoch": 1.0076089462762279, "grad_norm": 1.196685278300245, "learning_rate": 1.0704748614474798e-06, "loss": 0.4011094570159912, "step": 4370 }, { "epoch": 1.0078395204058104, "grad_norm": 1.5280378960817975, "learning_rate": 1.0700945430750373e-06, "loss": 0.48842671513557434, "step": 4371 }, { "epoch": 1.008070094535393, "grad_norm": 1.237232307792151, "learning_rate": 1.0697142145136425e-06, "loss": 0.5183907151222229, "step": 4372 }, { "epoch": 1.0083006686649758, "grad_norm": 1.4080736997180416, "learning_rate": 1.0693338758185797e-06, "loss": 0.5022784471511841, "step": 4373 }, { "epoch": 1.0085312427945585, "grad_norm": 1.5160750764739457, "learning_rate": 1.0689535270451358e-06, "loss": 0.500054121017456, "step": 4374 }, { "epoch": 1.008761816924141, "grad_norm": 1.331407944528498, "learning_rate": 1.068573168248598e-06, "loss": 0.43674880266189575, "step": 4375 }, { "epoch": 1.0089923910537237, "grad_norm": 1.3441260000045296, "learning_rate": 1.068192799484255e-06, "loss": 0.4272059202194214, "step": 4376 }, { "epoch": 1.0092229651833065, "grad_norm": 1.3188087584834265, "learning_rate": 1.0678124208073972e-06, "loss": 0.41053932905197144, "step": 4377 }, { "epoch": 1.0094535393128892, "grad_norm": 1.3285405544041065, "learning_rate": 1.0674320322733173e-06, "loss": 0.4571593701839447, "step": 4378 }, { "epoch": 1.0096841134424717, "grad_norm": 1.2947195973212757, "learning_rate": 1.0670516339373081e-06, "loss": 0.464965283870697, "step": 4379 }, { "epoch": 1.0099146875720544, "grad_norm": 1.2757697611295247, "learning_rate": 1.0666712258546639e-06, "loss": 0.4086726903915405, "step": 4380 }, { "epoch": 1.010145261701637, "grad_norm": 1.3664230084580502, "learning_rate": 1.0662908080806815e-06, "loss": 0.49988412857055664, "step": 4381 }, { "epoch": 1.0103758358312198, "grad_norm": 1.33263070405775, "learning_rate": 1.0659103806706587e-06, "loss": 0.3976360559463501, "step": 4382 }, { "epoch": 1.0106064099608023, "grad_norm": 1.3554444243435904, "learning_rate": 1.065529943679894e-06, "loss": 0.4500683546066284, "step": 4383 }, { "epoch": 1.010836984090385, "grad_norm": 1.4532099828866123, "learning_rate": 1.0651494971636875e-06, "loss": 0.5617754459381104, "step": 4384 }, { "epoch": 1.0110675582199677, "grad_norm": 1.2285766706051995, "learning_rate": 1.0647690411773414e-06, "loss": 0.4180886745452881, "step": 4385 }, { "epoch": 1.0112981323495505, "grad_norm": 1.3797895213155087, "learning_rate": 1.0643885757761588e-06, "loss": 0.406663179397583, "step": 4386 }, { "epoch": 1.011528706479133, "grad_norm": 1.2899676326462104, "learning_rate": 1.0640081010154443e-06, "loss": 0.4698946475982666, "step": 4387 }, { "epoch": 1.0117592806087157, "grad_norm": 1.2421672055806043, "learning_rate": 1.0636276169505034e-06, "loss": 0.4845995306968689, "step": 4388 }, { "epoch": 1.0119898547382984, "grad_norm": 1.7127723444190444, "learning_rate": 1.0632471236366435e-06, "loss": 0.5065066814422607, "step": 4389 }, { "epoch": 1.012220428867881, "grad_norm": 1.5183614166838566, "learning_rate": 1.0628666211291735e-06, "loss": 0.4302946925163269, "step": 4390 }, { "epoch": 1.0124510029974636, "grad_norm": 1.682116735922279, "learning_rate": 1.0624861094834029e-06, "loss": 0.5772345066070557, "step": 4391 }, { "epoch": 1.0126815771270463, "grad_norm": 1.3399536785573158, "learning_rate": 1.0621055887546425e-06, "loss": 0.5294336080551147, "step": 4392 }, { "epoch": 1.012912151256629, "grad_norm": 1.1967430772955985, "learning_rate": 1.0617250589982059e-06, "loss": 0.5028249621391296, "step": 4393 }, { "epoch": 1.0131427253862118, "grad_norm": 1.3120231857267954, "learning_rate": 1.0613445202694065e-06, "loss": 0.5072348713874817, "step": 4394 }, { "epoch": 1.0133732995157942, "grad_norm": 1.3107230472369709, "learning_rate": 1.060963972623559e-06, "loss": 0.3632262945175171, "step": 4395 }, { "epoch": 1.013603873645377, "grad_norm": 1.4739700660925632, "learning_rate": 1.06058341611598e-06, "loss": 0.419277161359787, "step": 4396 }, { "epoch": 1.0138344477749597, "grad_norm": 1.4201089967708693, "learning_rate": 1.060202850801988e-06, "loss": 0.4056069850921631, "step": 4397 }, { "epoch": 1.0140650219045424, "grad_norm": 1.4908298419223913, "learning_rate": 1.0598222767369014e-06, "loss": 0.5591505765914917, "step": 4398 }, { "epoch": 1.014295596034125, "grad_norm": 1.2646885984398546, "learning_rate": 1.0594416939760408e-06, "loss": 0.38529443740844727, "step": 4399 }, { "epoch": 1.0145261701637076, "grad_norm": 1.3255980825912217, "learning_rate": 1.0590611025747272e-06, "loss": 0.3609437644481659, "step": 4400 }, { "epoch": 1.0147567442932903, "grad_norm": 1.3538282738769345, "learning_rate": 1.058680502588284e-06, "loss": 0.4849050045013428, "step": 4401 }, { "epoch": 1.014987318422873, "grad_norm": 1.4516377120705455, "learning_rate": 1.058299894072035e-06, "loss": 0.39454251527786255, "step": 4402 }, { "epoch": 1.0152178925524555, "grad_norm": 1.5578248119945644, "learning_rate": 1.0579192770813052e-06, "loss": 0.39726459980010986, "step": 4403 }, { "epoch": 1.0154484666820383, "grad_norm": 1.4398814364290877, "learning_rate": 1.0575386516714218e-06, "loss": 0.4730626940727234, "step": 4404 }, { "epoch": 1.015679040811621, "grad_norm": 1.5842749126492264, "learning_rate": 1.0571580178977123e-06, "loss": 0.5436214804649353, "step": 4405 }, { "epoch": 1.0159096149412037, "grad_norm": 1.4188700773135285, "learning_rate": 1.0567773758155055e-06, "loss": 0.4197273850440979, "step": 4406 }, { "epoch": 1.0161401890707862, "grad_norm": 1.2873423308659837, "learning_rate": 1.0563967254801316e-06, "loss": 0.46460944414138794, "step": 4407 }, { "epoch": 1.016370763200369, "grad_norm": 1.3771325056314752, "learning_rate": 1.056016066946922e-06, "loss": 0.3504630923271179, "step": 4408 }, { "epoch": 1.0166013373299516, "grad_norm": 1.3484234762530152, "learning_rate": 1.0556354002712098e-06, "loss": 0.4620180130004883, "step": 4409 }, { "epoch": 1.0168319114595343, "grad_norm": 1.414975730602458, "learning_rate": 1.0552547255083283e-06, "loss": 0.5642764568328857, "step": 4410 }, { "epoch": 1.0170624855891168, "grad_norm": 1.3858649703726607, "learning_rate": 1.054874042713612e-06, "loss": 0.48283201456069946, "step": 4411 }, { "epoch": 1.0172930597186995, "grad_norm": 1.3477248933257546, "learning_rate": 1.0544933519423976e-06, "loss": 0.5346091985702515, "step": 4412 }, { "epoch": 1.0175236338482823, "grad_norm": 1.216774984460132, "learning_rate": 1.0541126532500224e-06, "loss": 0.4710259437561035, "step": 4413 }, { "epoch": 1.017754207977865, "grad_norm": 1.6611025915045114, "learning_rate": 1.0537319466918243e-06, "loss": 0.535955548286438, "step": 4414 }, { "epoch": 1.0179847821074475, "grad_norm": 1.298601209078171, "learning_rate": 1.0533512323231438e-06, "loss": 0.4127902388572693, "step": 4415 }, { "epoch": 1.0182153562370302, "grad_norm": 1.6222892430544704, "learning_rate": 1.0529705101993203e-06, "loss": 0.5209894180297852, "step": 4416 }, { "epoch": 1.018445930366613, "grad_norm": 1.5702821211846574, "learning_rate": 1.0525897803756967e-06, "loss": 0.45600390434265137, "step": 4417 }, { "epoch": 1.0186765044961956, "grad_norm": 1.6858904509627837, "learning_rate": 1.0522090429076155e-06, "loss": 0.5043426156044006, "step": 4418 }, { "epoch": 1.0189070786257781, "grad_norm": 1.8442717417612486, "learning_rate": 1.0518282978504207e-06, "loss": 0.43386173248291016, "step": 4419 }, { "epoch": 1.0191376527553608, "grad_norm": 1.4810433748538916, "learning_rate": 1.0514475452594578e-06, "loss": 0.44956767559051514, "step": 4420 }, { "epoch": 1.0193682268849436, "grad_norm": 1.4162663845873593, "learning_rate": 1.0510667851900726e-06, "loss": 0.47164878249168396, "step": 4421 }, { "epoch": 1.0195988010145263, "grad_norm": 1.3111398742961289, "learning_rate": 1.0506860176976127e-06, "loss": 0.4977136552333832, "step": 4422 }, { "epoch": 1.0198293751441088, "grad_norm": 1.2272027402421368, "learning_rate": 1.0503052428374264e-06, "loss": 0.4344305396080017, "step": 4423 }, { "epoch": 1.0200599492736915, "grad_norm": 1.4594484344103595, "learning_rate": 1.049924460664863e-06, "loss": 0.46536487340927124, "step": 4424 }, { "epoch": 1.0202905234032742, "grad_norm": 1.5676489928965973, "learning_rate": 1.0495436712352733e-06, "loss": 0.4583844840526581, "step": 4425 }, { "epoch": 1.020521097532857, "grad_norm": 1.3353943490467204, "learning_rate": 1.049162874604009e-06, "loss": 0.4098002314567566, "step": 4426 }, { "epoch": 1.0207516716624394, "grad_norm": 1.5212892459953231, "learning_rate": 1.0487820708264227e-06, "loss": 0.48168665170669556, "step": 4427 }, { "epoch": 1.0209822457920221, "grad_norm": 1.575752706874104, "learning_rate": 1.048401259957868e-06, "loss": 0.5517562627792358, "step": 4428 }, { "epoch": 1.0212128199216048, "grad_norm": 1.4762864972879257, "learning_rate": 1.0480204420536998e-06, "loss": 0.5131476521492004, "step": 4429 }, { "epoch": 1.0214433940511876, "grad_norm": 1.3669237261259728, "learning_rate": 1.0476396171692734e-06, "loss": 0.4590519666671753, "step": 4430 }, { "epoch": 1.02167396818077, "grad_norm": 1.6209541549743127, "learning_rate": 1.0472587853599458e-06, "loss": 0.5581461191177368, "step": 4431 }, { "epoch": 1.0219045423103528, "grad_norm": 1.9464318549736228, "learning_rate": 1.046877946681075e-06, "loss": 0.4169657826423645, "step": 4432 }, { "epoch": 1.0221351164399355, "grad_norm": 1.6990409231148407, "learning_rate": 1.0464971011880195e-06, "loss": 0.48135459423065186, "step": 4433 }, { "epoch": 1.0223656905695182, "grad_norm": 1.5888684830629844, "learning_rate": 1.046116248936139e-06, "loss": 0.5116040706634521, "step": 4434 }, { "epoch": 1.0225962646991007, "grad_norm": 1.2239425777755701, "learning_rate": 1.0457353899807946e-06, "loss": 0.4369809329509735, "step": 4435 }, { "epoch": 1.0228268388286834, "grad_norm": 1.3094581394180187, "learning_rate": 1.0453545243773474e-06, "loss": 0.42936772108078003, "step": 4436 }, { "epoch": 1.0230574129582661, "grad_norm": 1.4191745941139933, "learning_rate": 1.0449736521811605e-06, "loss": 0.3614712357521057, "step": 4437 }, { "epoch": 1.0232879870878488, "grad_norm": 1.4958077731615864, "learning_rate": 1.0445927734475977e-06, "loss": 0.40728119015693665, "step": 4438 }, { "epoch": 1.0235185612174313, "grad_norm": 1.6199665099354292, "learning_rate": 1.0442118882320233e-06, "loss": 0.4940561056137085, "step": 4439 }, { "epoch": 1.023749135347014, "grad_norm": 1.5292135898443935, "learning_rate": 1.0438309965898027e-06, "loss": 0.49529674649238586, "step": 4440 }, { "epoch": 1.0239797094765968, "grad_norm": 1.3839632419664316, "learning_rate": 1.0434500985763027e-06, "loss": 0.4849408268928528, "step": 4441 }, { "epoch": 1.0242102836061795, "grad_norm": 1.2306090654878221, "learning_rate": 1.0430691942468903e-06, "loss": 0.4121132791042328, "step": 4442 }, { "epoch": 1.024440857735762, "grad_norm": 1.3788405992777184, "learning_rate": 1.042688283656934e-06, "loss": 0.4348478317260742, "step": 4443 }, { "epoch": 1.0246714318653447, "grad_norm": 1.4946594419770094, "learning_rate": 1.0423073668618033e-06, "loss": 0.46817919611930847, "step": 4444 }, { "epoch": 1.0249020059949274, "grad_norm": 1.4309128927667782, "learning_rate": 1.041926443916868e-06, "loss": 0.4422008991241455, "step": 4445 }, { "epoch": 1.02513258012451, "grad_norm": 1.4766353003575698, "learning_rate": 1.041545514877499e-06, "loss": 0.5108183026313782, "step": 4446 }, { "epoch": 1.0253631542540926, "grad_norm": 1.4287581583003561, "learning_rate": 1.0411645797990685e-06, "loss": 0.4759529232978821, "step": 4447 }, { "epoch": 1.0255937283836754, "grad_norm": 1.4822019265627726, "learning_rate": 1.040783638736949e-06, "loss": 0.44447648525238037, "step": 4448 }, { "epoch": 1.025824302513258, "grad_norm": 1.9820121270715096, "learning_rate": 1.0404026917465144e-06, "loss": 0.4558752477169037, "step": 4449 }, { "epoch": 1.0260548766428408, "grad_norm": 1.5117188074263472, "learning_rate": 1.0400217388831393e-06, "loss": 0.4728459417819977, "step": 4450 }, { "epoch": 1.0262854507724233, "grad_norm": 1.2832295949174854, "learning_rate": 1.0396407802021985e-06, "loss": 0.4815519452095032, "step": 4451 }, { "epoch": 1.026516024902006, "grad_norm": 1.493224641636315, "learning_rate": 1.0392598157590685e-06, "loss": 0.5173656344413757, "step": 4452 }, { "epoch": 1.0267465990315887, "grad_norm": 1.389267472286255, "learning_rate": 1.0388788456091267e-06, "loss": 0.5280762910842896, "step": 4453 }, { "epoch": 1.0269771731611712, "grad_norm": 1.3239342530675255, "learning_rate": 1.0384978698077506e-06, "loss": 0.4524118900299072, "step": 4454 }, { "epoch": 1.027207747290754, "grad_norm": 1.3855017021962426, "learning_rate": 1.0381168884103186e-06, "loss": 0.4011715054512024, "step": 4455 }, { "epoch": 1.0274383214203366, "grad_norm": 1.6664926632341406, "learning_rate": 1.0377359014722108e-06, "loss": 0.518020749092102, "step": 4456 }, { "epoch": 1.0276688955499194, "grad_norm": 1.3443799803410221, "learning_rate": 1.0373549090488073e-06, "loss": 0.44726112484931946, "step": 4457 }, { "epoch": 1.0278994696795019, "grad_norm": 1.5697915792497608, "learning_rate": 1.0369739111954894e-06, "loss": 0.5344264507293701, "step": 4458 }, { "epoch": 1.0281300438090846, "grad_norm": 1.3300732692572412, "learning_rate": 1.0365929079676387e-06, "loss": 0.4902813732624054, "step": 4459 }, { "epoch": 1.0283606179386673, "grad_norm": 1.6676294678142136, "learning_rate": 1.0362118994206378e-06, "loss": 0.38346555829048157, "step": 4460 }, { "epoch": 1.02859119206825, "grad_norm": 1.4992112279059755, "learning_rate": 1.0358308856098705e-06, "loss": 0.4232872724533081, "step": 4461 }, { "epoch": 1.0288217661978325, "grad_norm": 1.4973168899301483, "learning_rate": 1.0354498665907207e-06, "loss": 0.5184470415115356, "step": 4462 }, { "epoch": 1.0290523403274152, "grad_norm": 1.3344202325848402, "learning_rate": 1.0350688424185733e-06, "loss": 0.4989054203033447, "step": 4463 }, { "epoch": 1.029282914456998, "grad_norm": 1.4348006325476266, "learning_rate": 1.0346878131488145e-06, "loss": 0.5204064249992371, "step": 4464 }, { "epoch": 1.0295134885865806, "grad_norm": 1.5066284997527284, "learning_rate": 1.0343067788368307e-06, "loss": 0.47872811555862427, "step": 4465 }, { "epoch": 1.0297440627161631, "grad_norm": 1.4195028916227292, "learning_rate": 1.0339257395380087e-06, "loss": 0.4104915261268616, "step": 4466 }, { "epoch": 1.0299746368457459, "grad_norm": 1.3696214178005537, "learning_rate": 1.0335446953077366e-06, "loss": 0.39327263832092285, "step": 4467 }, { "epoch": 1.0302052109753286, "grad_norm": 1.4702497550106948, "learning_rate": 1.033163646201403e-06, "loss": 0.4395657777786255, "step": 4468 }, { "epoch": 1.0304357851049113, "grad_norm": 1.419425725268843, "learning_rate": 1.0327825922743976e-06, "loss": 0.462537944316864, "step": 4469 }, { "epoch": 1.0306663592344938, "grad_norm": 1.3686105119540095, "learning_rate": 1.03240153358211e-06, "loss": 0.4399976134300232, "step": 4470 }, { "epoch": 1.0308969333640765, "grad_norm": 1.2004518913155955, "learning_rate": 1.0320204701799311e-06, "loss": 0.4289684593677521, "step": 4471 }, { "epoch": 1.0311275074936592, "grad_norm": 1.700414177665105, "learning_rate": 1.0316394021232524e-06, "loss": 0.4771305322647095, "step": 4472 }, { "epoch": 1.031358081623242, "grad_norm": 1.3381367861828992, "learning_rate": 1.031258329467466e-06, "loss": 0.4544849395751953, "step": 4473 }, { "epoch": 1.0315886557528244, "grad_norm": 1.7319531178301495, "learning_rate": 1.0308772522679646e-06, "loss": 0.5362099409103394, "step": 4474 }, { "epoch": 1.0318192298824072, "grad_norm": 1.564907240947497, "learning_rate": 1.0304961705801413e-06, "loss": 0.48966753482818604, "step": 4475 }, { "epoch": 1.0320498040119899, "grad_norm": 1.379783010020372, "learning_rate": 1.0301150844593908e-06, "loss": 0.3750344216823578, "step": 4476 }, { "epoch": 1.0322803781415726, "grad_norm": 1.3651499470494945, "learning_rate": 1.0297339939611076e-06, "loss": 0.453983873128891, "step": 4477 }, { "epoch": 1.032510952271155, "grad_norm": 1.837467998410361, "learning_rate": 1.029352899140687e-06, "loss": 0.5096027255058289, "step": 4478 }, { "epoch": 1.0327415264007378, "grad_norm": 1.395622916901131, "learning_rate": 1.028971800053525e-06, "loss": 0.4387558698654175, "step": 4479 }, { "epoch": 1.0329721005303205, "grad_norm": 1.324708629656248, "learning_rate": 1.0285906967550184e-06, "loss": 0.45710843801498413, "step": 4480 }, { "epoch": 1.0332026746599032, "grad_norm": 1.631576144246761, "learning_rate": 1.0282095893005643e-06, "loss": 0.5258994102478027, "step": 4481 }, { "epoch": 1.0334332487894857, "grad_norm": 1.320456527047697, "learning_rate": 1.0278284777455603e-06, "loss": 0.5037236213684082, "step": 4482 }, { "epoch": 1.0336638229190684, "grad_norm": 1.3671446032683054, "learning_rate": 1.027447362145405e-06, "loss": 0.4730300307273865, "step": 4483 }, { "epoch": 1.0338943970486512, "grad_norm": 1.5284074958618745, "learning_rate": 1.0270662425554974e-06, "loss": 0.4373326301574707, "step": 4484 }, { "epoch": 1.0341249711782339, "grad_norm": 1.379045843622324, "learning_rate": 1.0266851190312373e-06, "loss": 0.3915579319000244, "step": 4485 }, { "epoch": 1.0343555453078164, "grad_norm": 1.3482794503547837, "learning_rate": 1.0263039916280247e-06, "loss": 0.36588191986083984, "step": 4486 }, { "epoch": 1.034586119437399, "grad_norm": 1.2333606023937755, "learning_rate": 1.0259228604012602e-06, "loss": 0.4287286400794983, "step": 4487 }, { "epoch": 1.0348166935669818, "grad_norm": 1.3775270616642934, "learning_rate": 1.0255417254063454e-06, "loss": 0.4405861496925354, "step": 4488 }, { "epoch": 1.0350472676965645, "grad_norm": 1.443831892269548, "learning_rate": 1.0251605866986818e-06, "loss": 0.4859738349914551, "step": 4489 }, { "epoch": 1.035277841826147, "grad_norm": 1.4103288990509777, "learning_rate": 1.0247794443336722e-06, "loss": 0.40879446268081665, "step": 4490 }, { "epoch": 1.0355084159557297, "grad_norm": 1.4900612923986292, "learning_rate": 1.024398298366719e-06, "loss": 0.44872337579727173, "step": 4491 }, { "epoch": 1.0357389900853124, "grad_norm": 1.3707597883324278, "learning_rate": 1.0240171488532258e-06, "loss": 0.41155117750167847, "step": 4492 }, { "epoch": 1.0359695642148952, "grad_norm": 1.4935319402234073, "learning_rate": 1.0236359958485966e-06, "loss": 0.48941487073898315, "step": 4493 }, { "epoch": 1.0362001383444777, "grad_norm": 1.3889526979110256, "learning_rate": 1.0232548394082362e-06, "loss": 0.4462544322013855, "step": 4494 }, { "epoch": 1.0364307124740604, "grad_norm": 1.7635931454030804, "learning_rate": 1.0228736795875487e-06, "loss": 0.3791837692260742, "step": 4495 }, { "epoch": 1.036661286603643, "grad_norm": 1.7988283203699307, "learning_rate": 1.0224925164419404e-06, "loss": 0.5037285685539246, "step": 4496 }, { "epoch": 1.0368918607332258, "grad_norm": 1.5033654685782605, "learning_rate": 1.0221113500268169e-06, "loss": 0.4762890636920929, "step": 4497 }, { "epoch": 1.0371224348628083, "grad_norm": 1.2678994584792878, "learning_rate": 1.0217301803975844e-06, "loss": 0.4673793315887451, "step": 4498 }, { "epoch": 1.037353008992391, "grad_norm": 1.4491139066226089, "learning_rate": 1.0213490076096501e-06, "loss": 0.37522250413894653, "step": 4499 }, { "epoch": 1.0375835831219737, "grad_norm": 1.4197729369573655, "learning_rate": 1.020967831718421e-06, "loss": 0.4986375570297241, "step": 4500 }, { "epoch": 1.0378141572515565, "grad_norm": 1.3424622189818292, "learning_rate": 1.0205866527793053e-06, "loss": 0.488337904214859, "step": 4501 }, { "epoch": 1.038044731381139, "grad_norm": 1.2513264252251595, "learning_rate": 1.0202054708477107e-06, "loss": 0.37420767545700073, "step": 4502 }, { "epoch": 1.0382753055107217, "grad_norm": 1.1901249454864467, "learning_rate": 1.0198242859790465e-06, "loss": 0.42453843355178833, "step": 4503 }, { "epoch": 1.0385058796403044, "grad_norm": 1.5998980096348292, "learning_rate": 1.0194430982287211e-06, "loss": 0.4431978166103363, "step": 4504 }, { "epoch": 1.038736453769887, "grad_norm": 1.2584649975167521, "learning_rate": 1.0190619076521445e-06, "loss": 0.5079195499420166, "step": 4505 }, { "epoch": 1.0389670278994696, "grad_norm": 1.3630757915855334, "learning_rate": 1.0186807143047263e-06, "loss": 0.442915678024292, "step": 4506 }, { "epoch": 1.0391976020290523, "grad_norm": 1.4946032354137926, "learning_rate": 1.018299518241877e-06, "loss": 0.4720972180366516, "step": 4507 }, { "epoch": 1.039428176158635, "grad_norm": 1.407838633939113, "learning_rate": 1.0179183195190073e-06, "loss": 0.4637352526187897, "step": 4508 }, { "epoch": 1.0396587502882177, "grad_norm": 1.3457342565284411, "learning_rate": 1.0175371181915283e-06, "loss": 0.4207759499549866, "step": 4509 }, { "epoch": 1.0398893244178002, "grad_norm": 1.5872196626053143, "learning_rate": 1.0171559143148514e-06, "loss": 0.49227845668792725, "step": 4510 }, { "epoch": 1.040119898547383, "grad_norm": 1.4565076836431372, "learning_rate": 1.0167747079443884e-06, "loss": 0.5006893873214722, "step": 4511 }, { "epoch": 1.0403504726769657, "grad_norm": 1.4618469895611303, "learning_rate": 1.016393499135552e-06, "loss": 0.42048192024230957, "step": 4512 }, { "epoch": 1.0405810468065484, "grad_norm": 1.5634742093932859, "learning_rate": 1.0160122879437538e-06, "loss": 0.5275895595550537, "step": 4513 }, { "epoch": 1.0408116209361309, "grad_norm": 1.1544305266604897, "learning_rate": 1.0156310744244073e-06, "loss": 0.4677985906600952, "step": 4514 }, { "epoch": 1.0410421950657136, "grad_norm": 1.422644417212902, "learning_rate": 1.015249858632926e-06, "loss": 0.5214150547981262, "step": 4515 }, { "epoch": 1.0412727691952963, "grad_norm": 1.2418435857264525, "learning_rate": 1.0148686406247232e-06, "loss": 0.40790024399757385, "step": 4516 }, { "epoch": 1.041503343324879, "grad_norm": 1.6199751141856524, "learning_rate": 1.0144874204552125e-06, "loss": 0.5943785309791565, "step": 4517 }, { "epoch": 1.0417339174544615, "grad_norm": 1.531988684910503, "learning_rate": 1.0141061981798086e-06, "loss": 0.4590263366699219, "step": 4518 }, { "epoch": 1.0419644915840443, "grad_norm": 1.3212940799821826, "learning_rate": 1.0137249738539257e-06, "loss": 0.4106098413467407, "step": 4519 }, { "epoch": 1.042195065713627, "grad_norm": 1.4102973636174063, "learning_rate": 1.013343747532979e-06, "loss": 0.4730203151702881, "step": 4520 }, { "epoch": 1.0424256398432097, "grad_norm": 1.2769276209650842, "learning_rate": 1.0129625192723833e-06, "loss": 0.43245944380760193, "step": 4521 }, { "epoch": 1.0426562139727922, "grad_norm": 1.3088740452256564, "learning_rate": 1.012581289127554e-06, "loss": 0.40828272700309753, "step": 4522 }, { "epoch": 1.042886788102375, "grad_norm": 1.5940499075267438, "learning_rate": 1.0122000571539069e-06, "loss": 0.4232874810695648, "step": 4523 }, { "epoch": 1.0431173622319576, "grad_norm": 1.45477003479617, "learning_rate": 1.0118188234068579e-06, "loss": 0.43044984340667725, "step": 4524 }, { "epoch": 1.0433479363615403, "grad_norm": 1.6545172631907663, "learning_rate": 1.011437587941823e-06, "loss": 0.4502897262573242, "step": 4525 }, { "epoch": 1.0435785104911228, "grad_norm": 2.0995258586192467, "learning_rate": 1.0110563508142185e-06, "loss": 0.5505340099334717, "step": 4526 }, { "epoch": 1.0438090846207055, "grad_norm": 1.5629586322344833, "learning_rate": 1.0106751120794617e-06, "loss": 0.4026086628437042, "step": 4527 }, { "epoch": 1.0440396587502883, "grad_norm": 1.5105039899180257, "learning_rate": 1.0102938717929692e-06, "loss": 0.3910222053527832, "step": 4528 }, { "epoch": 1.044270232879871, "grad_norm": 1.6830902678008934, "learning_rate": 1.009912630010158e-06, "loss": 0.4134068191051483, "step": 4529 }, { "epoch": 1.0445008070094535, "grad_norm": 1.4825250898714368, "learning_rate": 1.0095313867864457e-06, "loss": 0.4801563024520874, "step": 4530 }, { "epoch": 1.0447313811390362, "grad_norm": 1.2424640239796358, "learning_rate": 1.0091501421772495e-06, "loss": 0.4269358515739441, "step": 4531 }, { "epoch": 1.044961955268619, "grad_norm": 1.3485994976026512, "learning_rate": 1.0087688962379877e-06, "loss": 0.5300281047821045, "step": 4532 }, { "epoch": 1.0451925293982016, "grad_norm": 1.6865287595757648, "learning_rate": 1.0083876490240777e-06, "loss": 0.4634189009666443, "step": 4533 }, { "epoch": 1.0454231035277841, "grad_norm": 1.5187760856795984, "learning_rate": 1.0080064005909379e-06, "loss": 0.37037551403045654, "step": 4534 }, { "epoch": 1.0456536776573668, "grad_norm": 1.2977267015714409, "learning_rate": 1.0076251509939867e-06, "loss": 0.4740016460418701, "step": 4535 }, { "epoch": 1.0458842517869495, "grad_norm": 1.4686161726335998, "learning_rate": 1.0072439002886426e-06, "loss": 0.4824775159358978, "step": 4536 }, { "epoch": 1.0461148259165323, "grad_norm": 1.4032368341998698, "learning_rate": 1.0068626485303242e-06, "loss": 0.4891430735588074, "step": 4537 }, { "epoch": 1.0463454000461148, "grad_norm": 1.440410031419601, "learning_rate": 1.00648139577445e-06, "loss": 0.48089975118637085, "step": 4538 }, { "epoch": 1.0465759741756975, "grad_norm": 1.3280505427696812, "learning_rate": 1.0061001420764395e-06, "loss": 0.4353799521923065, "step": 4539 }, { "epoch": 1.0468065483052802, "grad_norm": 1.5425308952951848, "learning_rate": 1.0057188874917117e-06, "loss": 0.4259982705116272, "step": 4540 }, { "epoch": 1.047037122434863, "grad_norm": 1.502788920344227, "learning_rate": 1.0053376320756852e-06, "loss": 0.4400532841682434, "step": 4541 }, { "epoch": 1.0472676965644454, "grad_norm": 1.398609267878258, "learning_rate": 1.00495637588378e-06, "loss": 0.48598533868789673, "step": 4542 }, { "epoch": 1.0474982706940281, "grad_norm": 1.7261761893493324, "learning_rate": 1.0045751189714153e-06, "loss": 0.6310586929321289, "step": 4543 }, { "epoch": 1.0477288448236108, "grad_norm": 1.4822203646620422, "learning_rate": 1.0041938613940108e-06, "loss": 0.49084293842315674, "step": 4544 }, { "epoch": 1.0479594189531936, "grad_norm": 1.6167393331453148, "learning_rate": 1.003812603206986e-06, "loss": 0.5144428014755249, "step": 4545 }, { "epoch": 1.048189993082776, "grad_norm": 1.4962485615696877, "learning_rate": 1.0034313444657605e-06, "loss": 0.4480917155742645, "step": 4546 }, { "epoch": 1.0484205672123588, "grad_norm": 1.4833727438286728, "learning_rate": 1.0030500852257545e-06, "loss": 0.4505491852760315, "step": 4547 }, { "epoch": 1.0486511413419415, "grad_norm": 1.3728340651335322, "learning_rate": 1.0026688255423876e-06, "loss": 0.3344930410385132, "step": 4548 }, { "epoch": 1.0488817154715242, "grad_norm": 1.3493238342876126, "learning_rate": 1.0022875654710801e-06, "loss": 0.4006739854812622, "step": 4549 }, { "epoch": 1.0491122896011067, "grad_norm": 1.4777604777161095, "learning_rate": 1.0019063050672517e-06, "loss": 0.4815717935562134, "step": 4550 }, { "epoch": 1.0493428637306894, "grad_norm": 1.4182246513528267, "learning_rate": 1.0015250443863223e-06, "loss": 0.4660469889640808, "step": 4551 }, { "epoch": 1.0495734378602721, "grad_norm": 1.4298035442899577, "learning_rate": 1.0011437834837125e-06, "loss": 0.5233521461486816, "step": 4552 }, { "epoch": 1.0498040119898548, "grad_norm": 1.7530768174577198, "learning_rate": 1.0007625224148418e-06, "loss": 0.6037864685058594, "step": 4553 }, { "epoch": 1.0500345861194373, "grad_norm": 1.726860458569315, "learning_rate": 1.000381261235131e-06, "loss": 0.469952255487442, "step": 4554 }, { "epoch": 1.05026516024902, "grad_norm": 1.302712404041117, "learning_rate": 1e-06, "loss": 0.4577752649784088, "step": 4555 }, { "epoch": 1.0504957343786028, "grad_norm": 1.537724574807554, "learning_rate": 9.996187387648692e-07, "loss": 0.46796074509620667, "step": 4556 }, { "epoch": 1.0507263085081853, "grad_norm": 1.3633141581703183, "learning_rate": 9.992374775851583e-07, "loss": 0.40709036588668823, "step": 4557 }, { "epoch": 1.050956882637768, "grad_norm": 1.2121351653860253, "learning_rate": 9.988562165162878e-07, "loss": 0.3997795879840851, "step": 4558 }, { "epoch": 1.0511874567673507, "grad_norm": 1.6938685288563167, "learning_rate": 9.984749556136779e-07, "loss": 0.4677845239639282, "step": 4559 }, { "epoch": 1.0514180308969334, "grad_norm": 1.315537055431831, "learning_rate": 9.980936949327487e-07, "loss": 0.40411800146102905, "step": 4560 }, { "epoch": 1.0516486050265161, "grad_norm": 1.3999939149032237, "learning_rate": 9.9771243452892e-07, "loss": 0.50546795129776, "step": 4561 }, { "epoch": 1.0518791791560986, "grad_norm": 1.5468163611837324, "learning_rate": 9.973311744576125e-07, "loss": 0.4116637110710144, "step": 4562 }, { "epoch": 1.0521097532856813, "grad_norm": 1.2997915019544943, "learning_rate": 9.969499147742454e-07, "loss": 0.4271109700202942, "step": 4563 }, { "epoch": 1.052340327415264, "grad_norm": 1.1760164248835672, "learning_rate": 9.965686555342396e-07, "loss": 0.37195074558258057, "step": 4564 }, { "epoch": 1.0525709015448466, "grad_norm": 1.6759945376385115, "learning_rate": 9.96187396793014e-07, "loss": 0.4020707607269287, "step": 4565 }, { "epoch": 1.0528014756744293, "grad_norm": 1.5880882887273124, "learning_rate": 9.95806138605989e-07, "loss": 0.4980151951313019, "step": 4566 }, { "epoch": 1.053032049804012, "grad_norm": 1.419377079967674, "learning_rate": 9.95424881028585e-07, "loss": 0.39553767442703247, "step": 4567 }, { "epoch": 1.0532626239335947, "grad_norm": 1.3361167736969362, "learning_rate": 9.9504362411622e-07, "loss": 0.47618645429611206, "step": 4568 }, { "epoch": 1.0534931980631772, "grad_norm": 1.6469408967264108, "learning_rate": 9.94662367924315e-07, "loss": 0.4613817036151886, "step": 4569 }, { "epoch": 1.05372377219276, "grad_norm": 1.4563205269464143, "learning_rate": 9.942811125082884e-07, "loss": 0.35888034105300903, "step": 4570 }, { "epoch": 1.0539543463223426, "grad_norm": 1.896669698951033, "learning_rate": 9.938998579235606e-07, "loss": 0.45810097455978394, "step": 4571 }, { "epoch": 1.0541849204519254, "grad_norm": 1.4115626759758866, "learning_rate": 9.935186042255499e-07, "loss": 0.5351384878158569, "step": 4572 }, { "epoch": 1.0544154945815079, "grad_norm": 1.4888165757644622, "learning_rate": 9.931373514696759e-07, "loss": 0.5261274576187134, "step": 4573 }, { "epoch": 1.0546460687110906, "grad_norm": 1.368295507669899, "learning_rate": 9.927560997113573e-07, "loss": 0.483295202255249, "step": 4574 }, { "epoch": 1.0548766428406733, "grad_norm": 1.5639325535974613, "learning_rate": 9.923748490060132e-07, "loss": 0.5371580719947815, "step": 4575 }, { "epoch": 1.055107216970256, "grad_norm": 1.8721225876517977, "learning_rate": 9.919935994090622e-07, "loss": 0.4863673746585846, "step": 4576 }, { "epoch": 1.0553377910998385, "grad_norm": 1.5391981555318386, "learning_rate": 9.916123509759224e-07, "loss": 0.47929099202156067, "step": 4577 }, { "epoch": 1.0555683652294212, "grad_norm": 1.3884034720788059, "learning_rate": 9.912311037620126e-07, "loss": 0.4687851667404175, "step": 4578 }, { "epoch": 1.055798939359004, "grad_norm": 1.5841867302150618, "learning_rate": 9.908498578227504e-07, "loss": 0.5308720469474792, "step": 4579 }, { "epoch": 1.0560295134885866, "grad_norm": 1.8691314272616926, "learning_rate": 9.904686132135546e-07, "loss": 0.45900580286979675, "step": 4580 }, { "epoch": 1.0562600876181691, "grad_norm": 1.4586686619480431, "learning_rate": 9.900873699898422e-07, "loss": 0.49392157793045044, "step": 4581 }, { "epoch": 1.0564906617477519, "grad_norm": 1.6139111586944341, "learning_rate": 9.89706128207031e-07, "loss": 0.47190070152282715, "step": 4582 }, { "epoch": 1.0567212358773346, "grad_norm": 1.7781894650458763, "learning_rate": 9.893248879205382e-07, "loss": 0.4431575834751129, "step": 4583 }, { "epoch": 1.0569518100069173, "grad_norm": 1.293421470994464, "learning_rate": 9.889436491857814e-07, "loss": 0.49873441457748413, "step": 4584 }, { "epoch": 1.0571823841364998, "grad_norm": 1.4263954197349762, "learning_rate": 9.885624120581772e-07, "loss": 0.41190844774246216, "step": 4585 }, { "epoch": 1.0574129582660825, "grad_norm": 1.5698735406284627, "learning_rate": 9.881811765931423e-07, "loss": 0.5164123773574829, "step": 4586 }, { "epoch": 1.0576435323956652, "grad_norm": 1.5034141006108586, "learning_rate": 9.877999428460933e-07, "loss": 0.4141567349433899, "step": 4587 }, { "epoch": 1.057874106525248, "grad_norm": 1.557658840701198, "learning_rate": 9.87418710872446e-07, "loss": 0.457628458738327, "step": 4588 }, { "epoch": 1.0581046806548304, "grad_norm": 1.4732865673601758, "learning_rate": 9.870374807276168e-07, "loss": 0.41788995265960693, "step": 4589 }, { "epoch": 1.0583352547844131, "grad_norm": 1.6240063497851516, "learning_rate": 9.866562524670209e-07, "loss": 0.5124667882919312, "step": 4590 }, { "epoch": 1.0585658289139959, "grad_norm": 1.1619873853554898, "learning_rate": 9.862750261460742e-07, "loss": 0.4192196726799011, "step": 4591 }, { "epoch": 1.0587964030435786, "grad_norm": 1.3804521479784477, "learning_rate": 9.858938018201913e-07, "loss": 0.4345153868198395, "step": 4592 }, { "epoch": 1.059026977173161, "grad_norm": 1.3186049119261667, "learning_rate": 9.855125795447874e-07, "loss": 0.391804963350296, "step": 4593 }, { "epoch": 1.0592575513027438, "grad_norm": 1.3394610780120433, "learning_rate": 9.851313593752767e-07, "loss": 0.3904710114002228, "step": 4594 }, { "epoch": 1.0594881254323265, "grad_norm": 1.4234043935357816, "learning_rate": 9.847501413670742e-07, "loss": 0.37314411997795105, "step": 4595 }, { "epoch": 1.0597186995619092, "grad_norm": 1.7572920451540888, "learning_rate": 9.843689255755926e-07, "loss": 0.5402779579162598, "step": 4596 }, { "epoch": 1.0599492736914917, "grad_norm": 1.4688689617213957, "learning_rate": 9.839877120562463e-07, "loss": 0.4243565797805786, "step": 4597 }, { "epoch": 1.0601798478210744, "grad_norm": 1.6330717694890693, "learning_rate": 9.836065008644484e-07, "loss": 0.4504585564136505, "step": 4598 }, { "epoch": 1.0604104219506572, "grad_norm": 1.3073319656874434, "learning_rate": 9.832252920556115e-07, "loss": 0.46487870812416077, "step": 4599 }, { "epoch": 1.0606409960802399, "grad_norm": 1.452752590173503, "learning_rate": 9.828440856851487e-07, "loss": 0.470059871673584, "step": 4600 }, { "epoch": 1.0608715702098224, "grad_norm": 1.4580866952416336, "learning_rate": 9.824628818084716e-07, "loss": 0.4307391047477722, "step": 4601 }, { "epoch": 1.061102144339405, "grad_norm": 1.545423985207434, "learning_rate": 9.820816804809927e-07, "loss": 0.49449142813682556, "step": 4602 }, { "epoch": 1.0613327184689878, "grad_norm": 1.4803985945664777, "learning_rate": 9.817004817581229e-07, "loss": 0.4932701885700226, "step": 4603 }, { "epoch": 1.0615632925985705, "grad_norm": 1.4502372729626234, "learning_rate": 9.813192856952739e-07, "loss": 0.49543553590774536, "step": 4604 }, { "epoch": 1.061793866728153, "grad_norm": 1.1578379554584357, "learning_rate": 9.809380923478554e-07, "loss": 0.3906818926334381, "step": 4605 }, { "epoch": 1.0620244408577357, "grad_norm": 1.4436425775524195, "learning_rate": 9.80556901771279e-07, "loss": 0.41667112708091736, "step": 4606 }, { "epoch": 1.0622550149873184, "grad_norm": 1.475010908303335, "learning_rate": 9.801757140209538e-07, "loss": 0.36195361614227295, "step": 4607 }, { "epoch": 1.0624855891169012, "grad_norm": 1.4053500417900708, "learning_rate": 9.797945291522892e-07, "loss": 0.4056081175804138, "step": 4608 }, { "epoch": 1.0627161632464837, "grad_norm": 1.4310559040175581, "learning_rate": 9.794133472206948e-07, "loss": 0.5048736929893494, "step": 4609 }, { "epoch": 1.0629467373760664, "grad_norm": 1.3896886111265523, "learning_rate": 9.790321682815788e-07, "loss": 0.4846169352531433, "step": 4610 }, { "epoch": 1.063177311505649, "grad_norm": 1.3569892439901554, "learning_rate": 9.7865099239035e-07, "loss": 0.5149316787719727, "step": 4611 }, { "epoch": 1.0634078856352318, "grad_norm": 1.5344870466099163, "learning_rate": 9.782698196024155e-07, "loss": 0.3816874623298645, "step": 4612 }, { "epoch": 1.0636384597648143, "grad_norm": 1.39688044025804, "learning_rate": 9.77888649973183e-07, "loss": 0.5469645261764526, "step": 4613 }, { "epoch": 1.063869033894397, "grad_norm": 1.2954034757094786, "learning_rate": 9.775074835580593e-07, "loss": 0.42796647548675537, "step": 4614 }, { "epoch": 1.0640996080239797, "grad_norm": 1.4924945772778404, "learning_rate": 9.771263204124512e-07, "loss": 0.4931715726852417, "step": 4615 }, { "epoch": 1.0643301821535625, "grad_norm": 1.367565961969811, "learning_rate": 9.767451605917641e-07, "loss": 0.5435268878936768, "step": 4616 }, { "epoch": 1.064560756283145, "grad_norm": 1.6066093331363582, "learning_rate": 9.763640041514033e-07, "loss": 0.46361953020095825, "step": 4617 }, { "epoch": 1.0647913304127277, "grad_norm": 1.240667858579194, "learning_rate": 9.759828511467743e-07, "loss": 0.3742775619029999, "step": 4618 }, { "epoch": 1.0650219045423104, "grad_norm": 1.5520509510364326, "learning_rate": 9.75601701633281e-07, "loss": 0.4060659408569336, "step": 4619 }, { "epoch": 1.065252478671893, "grad_norm": 1.2052909018096978, "learning_rate": 9.75220555666328e-07, "loss": 0.45316505432128906, "step": 4620 }, { "epoch": 1.0654830528014756, "grad_norm": 1.4180749825165042, "learning_rate": 9.748394133013179e-07, "loss": 0.4548850655555725, "step": 4621 }, { "epoch": 1.0657136269310583, "grad_norm": 1.2793215690458788, "learning_rate": 9.744582745936547e-07, "loss": 0.5065705180168152, "step": 4622 }, { "epoch": 1.065944201060641, "grad_norm": 1.4912306578981507, "learning_rate": 9.740771395987395e-07, "loss": 0.4114503860473633, "step": 4623 }, { "epoch": 1.0661747751902237, "grad_norm": 1.4280192292492455, "learning_rate": 9.736960083719752e-07, "loss": 0.4568501114845276, "step": 4624 }, { "epoch": 1.0664053493198062, "grad_norm": 1.2972553921673455, "learning_rate": 9.733148809687624e-07, "loss": 0.49967026710510254, "step": 4625 }, { "epoch": 1.066635923449389, "grad_norm": 1.4642812597554793, "learning_rate": 9.729337574445025e-07, "loss": 0.529681384563446, "step": 4626 }, { "epoch": 1.0668664975789717, "grad_norm": 1.4791668180519966, "learning_rate": 9.72552637854595e-07, "loss": 0.4819791316986084, "step": 4627 }, { "epoch": 1.0670970717085544, "grad_norm": 1.3549019355661691, "learning_rate": 9.721715222544396e-07, "loss": 0.4186001718044281, "step": 4628 }, { "epoch": 1.0673276458381369, "grad_norm": 1.221767945169434, "learning_rate": 9.717904106994359e-07, "loss": 0.4442529082298279, "step": 4629 }, { "epoch": 1.0675582199677196, "grad_norm": 1.886711265076429, "learning_rate": 9.714093032449815e-07, "loss": 0.4655953049659729, "step": 4630 }, { "epoch": 1.0677887940973023, "grad_norm": 1.2641786187672595, "learning_rate": 9.71028199946475e-07, "loss": 0.45248714089393616, "step": 4631 }, { "epoch": 1.068019368226885, "grad_norm": 1.547270813258376, "learning_rate": 9.706471008593128e-07, "loss": 0.4244336485862732, "step": 4632 }, { "epoch": 1.0682499423564675, "grad_norm": 1.441914160495435, "learning_rate": 9.702660060388923e-07, "loss": 0.4396495819091797, "step": 4633 }, { "epoch": 1.0684805164860502, "grad_norm": 1.3832490714301353, "learning_rate": 9.698849155406089e-07, "loss": 0.4504232406616211, "step": 4634 }, { "epoch": 1.068711090615633, "grad_norm": 1.5660708185651993, "learning_rate": 9.695038294198588e-07, "loss": 0.40112000703811646, "step": 4635 }, { "epoch": 1.0689416647452157, "grad_norm": 1.5797332497697052, "learning_rate": 9.691227477320357e-07, "loss": 0.4511067271232605, "step": 4636 }, { "epoch": 1.0691722388747982, "grad_norm": 1.4624732720511697, "learning_rate": 9.687416705325342e-07, "loss": 0.44541406631469727, "step": 4637 }, { "epoch": 1.069402813004381, "grad_norm": 1.3872197811900322, "learning_rate": 9.68360597876748e-07, "loss": 0.5038847327232361, "step": 4638 }, { "epoch": 1.0696333871339636, "grad_norm": 1.2356986255488158, "learning_rate": 9.67979529820069e-07, "loss": 0.41960060596466064, "step": 4639 }, { "epoch": 1.0698639612635463, "grad_norm": 1.6121133741192841, "learning_rate": 9.6759846641789e-07, "loss": 0.49760064482688904, "step": 4640 }, { "epoch": 1.0700945353931288, "grad_norm": 1.7920934015909264, "learning_rate": 9.672174077256023e-07, "loss": 0.46513333916664124, "step": 4641 }, { "epoch": 1.0703251095227115, "grad_norm": 1.5128396951273724, "learning_rate": 9.66836353798597e-07, "loss": 0.41129356622695923, "step": 4642 }, { "epoch": 1.0705556836522943, "grad_norm": 1.1803503202020598, "learning_rate": 9.664553046922634e-07, "loss": 0.5021853446960449, "step": 4643 }, { "epoch": 1.070786257781877, "grad_norm": 1.7444146178498035, "learning_rate": 9.660742604619912e-07, "loss": 0.5184302926063538, "step": 4644 }, { "epoch": 1.0710168319114595, "grad_norm": 1.8278981381437267, "learning_rate": 9.65693221163169e-07, "loss": 0.4793940484523773, "step": 4645 }, { "epoch": 1.0712474060410422, "grad_norm": 1.6157027564363053, "learning_rate": 9.653121868511854e-07, "loss": 0.43454456329345703, "step": 4646 }, { "epoch": 1.071477980170625, "grad_norm": 1.3605748894383922, "learning_rate": 9.649311575814266e-07, "loss": 0.49123185873031616, "step": 4647 }, { "epoch": 1.0717085543002076, "grad_norm": 1.2316654311751212, "learning_rate": 9.645501334092792e-07, "loss": 0.37020617723464966, "step": 4648 }, { "epoch": 1.0719391284297901, "grad_norm": 1.3370776970957903, "learning_rate": 9.641691143901296e-07, "loss": 0.461778849363327, "step": 4649 }, { "epoch": 1.0721697025593728, "grad_norm": 1.7402606402657241, "learning_rate": 9.63788100579362e-07, "loss": 0.46640273928642273, "step": 4650 }, { "epoch": 1.0724002766889555, "grad_norm": 1.543123481033078, "learning_rate": 9.634070920323614e-07, "loss": 0.44978517293930054, "step": 4651 }, { "epoch": 1.0726308508185383, "grad_norm": 1.5280216878422028, "learning_rate": 9.630260888045103e-07, "loss": 0.5070945024490356, "step": 4652 }, { "epoch": 1.0728614249481208, "grad_norm": 1.3361545028178132, "learning_rate": 9.626450909511926e-07, "loss": 0.4513545334339142, "step": 4653 }, { "epoch": 1.0730919990777035, "grad_norm": 1.2352969540055843, "learning_rate": 9.622640985277889e-07, "loss": 0.4430030584335327, "step": 4654 }, { "epoch": 1.0733225732072862, "grad_norm": 1.7185507494111099, "learning_rate": 9.618831115896814e-07, "loss": 0.45619165897369385, "step": 4655 }, { "epoch": 1.073553147336869, "grad_norm": 1.3452693944435885, "learning_rate": 9.615021301922497e-07, "loss": 0.411594033241272, "step": 4656 }, { "epoch": 1.0737837214664514, "grad_norm": 1.696260647190632, "learning_rate": 9.611211543908732e-07, "loss": 0.5230164527893066, "step": 4657 }, { "epoch": 1.0740142955960341, "grad_norm": 1.2546383850728546, "learning_rate": 9.607401842409316e-07, "loss": 0.45379406213760376, "step": 4658 }, { "epoch": 1.0742448697256168, "grad_norm": 1.4465974878955368, "learning_rate": 9.603592197978016e-07, "loss": 0.47254839539527893, "step": 4659 }, { "epoch": 1.0744754438551993, "grad_norm": 1.4899733507525732, "learning_rate": 9.59978261116861e-07, "loss": 0.3990492820739746, "step": 4660 }, { "epoch": 1.074706017984782, "grad_norm": 1.2629235312972213, "learning_rate": 9.595973082534855e-07, "loss": 0.41671720147132874, "step": 4661 }, { "epoch": 1.0749365921143648, "grad_norm": 1.3769486256402874, "learning_rate": 9.59216361263051e-07, "loss": 0.4269324839115143, "step": 4662 }, { "epoch": 1.0751671662439475, "grad_norm": 1.7548425902665015, "learning_rate": 9.588354202009314e-07, "loss": 0.42989516258239746, "step": 4663 }, { "epoch": 1.0753977403735302, "grad_norm": 1.5474664125691167, "learning_rate": 9.584544851225008e-07, "loss": 0.5224605798721313, "step": 4664 }, { "epoch": 1.0756283145031127, "grad_norm": 1.393419713492626, "learning_rate": 9.580735560831318e-07, "loss": 0.3853871524333954, "step": 4665 }, { "epoch": 1.0758588886326954, "grad_norm": 1.360242198109215, "learning_rate": 9.576926331381968e-07, "loss": 0.4460698366165161, "step": 4666 }, { "epoch": 1.0760894627622781, "grad_norm": 1.524802030014046, "learning_rate": 9.57311716343066e-07, "loss": 0.45617812871932983, "step": 4667 }, { "epoch": 1.0763200368918606, "grad_norm": 1.7079854681006486, "learning_rate": 9.569308057531096e-07, "loss": 0.5631355047225952, "step": 4668 }, { "epoch": 1.0765506110214433, "grad_norm": 1.3155596598859882, "learning_rate": 9.565499014236977e-07, "loss": 0.4197179973125458, "step": 4669 }, { "epoch": 1.076781185151026, "grad_norm": 1.5894301477582775, "learning_rate": 9.561690034101973e-07, "loss": 0.4262646436691284, "step": 4670 }, { "epoch": 1.0770117592806088, "grad_norm": 1.4805271814916348, "learning_rate": 9.557881117679768e-07, "loss": 0.42719966173171997, "step": 4671 }, { "epoch": 1.0772423334101915, "grad_norm": 1.3479731294807211, "learning_rate": 9.554072265524022e-07, "loss": 0.4278491735458374, "step": 4672 }, { "epoch": 1.077472907539774, "grad_norm": 1.4324931591130032, "learning_rate": 9.550263478188396e-07, "loss": 0.3915478587150574, "step": 4673 }, { "epoch": 1.0777034816693567, "grad_norm": 1.4807606218185139, "learning_rate": 9.546454756226525e-07, "loss": 0.4391477704048157, "step": 4674 }, { "epoch": 1.0779340557989394, "grad_norm": 1.6230153652074522, "learning_rate": 9.542646100192055e-07, "loss": 0.47325795888900757, "step": 4675 }, { "epoch": 1.078164629928522, "grad_norm": 1.3326185339285364, "learning_rate": 9.538837510638607e-07, "loss": 0.4698373079299927, "step": 4676 }, { "epoch": 1.0783952040581046, "grad_norm": 1.5843176103578385, "learning_rate": 9.535028988119805e-07, "loss": 0.4252272844314575, "step": 4677 }, { "epoch": 1.0786257781876873, "grad_norm": 1.4642476960881914, "learning_rate": 9.531220533189253e-07, "loss": 0.46726179122924805, "step": 4678 }, { "epoch": 1.07885635231727, "grad_norm": 1.3792408296611596, "learning_rate": 9.527412146400542e-07, "loss": 0.46616411209106445, "step": 4679 }, { "epoch": 1.0790869264468528, "grad_norm": 1.3938952826758202, "learning_rate": 9.523603828307268e-07, "loss": 0.5607181787490845, "step": 4680 }, { "epoch": 1.0793175005764353, "grad_norm": 1.6234566687004295, "learning_rate": 9.519795579463002e-07, "loss": 0.5039520859718323, "step": 4681 }, { "epoch": 1.079548074706018, "grad_norm": 1.6358698645091259, "learning_rate": 9.515987400421322e-07, "loss": 0.45532113313674927, "step": 4682 }, { "epoch": 1.0797786488356007, "grad_norm": 1.3987490622653254, "learning_rate": 9.512179291735772e-07, "loss": 0.4198398292064667, "step": 4683 }, { "epoch": 1.0800092229651832, "grad_norm": 2.0745649369110577, "learning_rate": 9.508371253959909e-07, "loss": 0.371380090713501, "step": 4684 }, { "epoch": 1.080239797094766, "grad_norm": 1.6602368865180097, "learning_rate": 9.504563287647265e-07, "loss": 0.44341978430747986, "step": 4685 }, { "epoch": 1.0804703712243486, "grad_norm": 1.3233390600316475, "learning_rate": 9.500755393351372e-07, "loss": 0.4184574484825134, "step": 4686 }, { "epoch": 1.0807009453539314, "grad_norm": 1.554478033670439, "learning_rate": 9.496947571625739e-07, "loss": 0.5584033727645874, "step": 4687 }, { "epoch": 1.0809315194835138, "grad_norm": 1.4303675439776025, "learning_rate": 9.493139823023874e-07, "loss": 0.44405317306518555, "step": 4688 }, { "epoch": 1.0811620936130966, "grad_norm": 1.5109921870756446, "learning_rate": 9.489332148099277e-07, "loss": 0.41137009859085083, "step": 4689 }, { "epoch": 1.0813926677426793, "grad_norm": 1.5933695881826222, "learning_rate": 9.485524547405424e-07, "loss": 0.4831092357635498, "step": 4690 }, { "epoch": 1.081623241872262, "grad_norm": 1.3224307777817799, "learning_rate": 9.481717021495793e-07, "loss": 0.41243845224380493, "step": 4691 }, { "epoch": 1.0818538160018445, "grad_norm": 1.506253034871724, "learning_rate": 9.477909570923844e-07, "loss": 0.33649003505706787, "step": 4692 }, { "epoch": 1.0820843901314272, "grad_norm": 1.3759728989311568, "learning_rate": 9.474102196243033e-07, "loss": 0.4959014654159546, "step": 4693 }, { "epoch": 1.08231496426101, "grad_norm": 1.4717496348190642, "learning_rate": 9.470294898006795e-07, "loss": 0.43924248218536377, "step": 4694 }, { "epoch": 1.0825455383905926, "grad_norm": 1.5425758669304555, "learning_rate": 9.466487676768563e-07, "loss": 0.4777243137359619, "step": 4695 }, { "epoch": 1.0827761125201751, "grad_norm": 1.7258911046059784, "learning_rate": 9.462680533081752e-07, "loss": 0.4488077759742737, "step": 4696 }, { "epoch": 1.0830066866497579, "grad_norm": 1.5375128445555653, "learning_rate": 9.458873467499778e-07, "loss": 0.5058270692825317, "step": 4697 }, { "epoch": 1.0832372607793406, "grad_norm": 1.5052517610014813, "learning_rate": 9.455066480576025e-07, "loss": 0.4537619650363922, "step": 4698 }, { "epoch": 1.0834678349089233, "grad_norm": 1.5194044905455244, "learning_rate": 9.45125957286388e-07, "loss": 0.4725874960422516, "step": 4699 }, { "epoch": 1.0836984090385058, "grad_norm": 1.61840988882087, "learning_rate": 9.447452744916722e-07, "loss": 0.4967196583747864, "step": 4700 }, { "epoch": 1.0839289831680885, "grad_norm": 1.3272496966479597, "learning_rate": 9.443645997287902e-07, "loss": 0.43682345747947693, "step": 4701 }, { "epoch": 1.0841595572976712, "grad_norm": 1.4038050893134464, "learning_rate": 9.439839330530781e-07, "loss": 0.48844271898269653, "step": 4702 }, { "epoch": 1.084390131427254, "grad_norm": 1.3581740542884078, "learning_rate": 9.436032745198682e-07, "loss": 0.43654918670654297, "step": 4703 }, { "epoch": 1.0846207055568364, "grad_norm": 1.6070546851567389, "learning_rate": 9.432226241844947e-07, "loss": 0.5034382939338684, "step": 4704 }, { "epoch": 1.0848512796864191, "grad_norm": 1.9516449815592325, "learning_rate": 9.428419821022877e-07, "loss": 0.5407527089118958, "step": 4705 }, { "epoch": 1.0850818538160019, "grad_norm": 1.3188521673213394, "learning_rate": 9.424613483285783e-07, "loss": 0.4372078478336334, "step": 4706 }, { "epoch": 1.0853124279455846, "grad_norm": 1.3673238165045705, "learning_rate": 9.420807229186949e-07, "loss": 0.5264855623245239, "step": 4707 }, { "epoch": 1.085543002075167, "grad_norm": 1.2884056915833075, "learning_rate": 9.417001059279652e-07, "loss": 0.3810223937034607, "step": 4708 }, { "epoch": 1.0857735762047498, "grad_norm": 1.318670262430079, "learning_rate": 9.413194974117163e-07, "loss": 0.368865430355072, "step": 4709 }, { "epoch": 1.0860041503343325, "grad_norm": 1.3202107346651724, "learning_rate": 9.409388974252729e-07, "loss": 0.41845810413360596, "step": 4710 }, { "epoch": 1.0862347244639152, "grad_norm": 1.4709870024189373, "learning_rate": 9.405583060239594e-07, "loss": 0.5185590982437134, "step": 4711 }, { "epoch": 1.0864652985934977, "grad_norm": 1.7793671382372165, "learning_rate": 9.401777232630983e-07, "loss": 0.4848501682281494, "step": 4712 }, { "epoch": 1.0866958727230804, "grad_norm": 1.5218788678149173, "learning_rate": 9.397971491980119e-07, "loss": 0.5581566691398621, "step": 4713 }, { "epoch": 1.0869264468526632, "grad_norm": 1.475012350727374, "learning_rate": 9.394165838840196e-07, "loss": 0.42043447494506836, "step": 4714 }, { "epoch": 1.0871570209822459, "grad_norm": 1.3731967040929853, "learning_rate": 9.39036027376441e-07, "loss": 0.45076289772987366, "step": 4715 }, { "epoch": 1.0873875951118284, "grad_norm": 1.353578451117457, "learning_rate": 9.386554797305934e-07, "loss": 0.3650796413421631, "step": 4716 }, { "epoch": 1.087618169241411, "grad_norm": 1.436571768450736, "learning_rate": 9.38274941001794e-07, "loss": 0.4837912321090698, "step": 4717 }, { "epoch": 1.0878487433709938, "grad_norm": 1.5272898845570653, "learning_rate": 9.378944112453574e-07, "loss": 0.41277679800987244, "step": 4718 }, { "epoch": 1.0880793175005765, "grad_norm": 1.7344713328668464, "learning_rate": 9.375138905165973e-07, "loss": 0.48409390449523926, "step": 4719 }, { "epoch": 1.088309891630159, "grad_norm": 1.360949967282617, "learning_rate": 9.371333788708268e-07, "loss": 0.3952450752258301, "step": 4720 }, { "epoch": 1.0885404657597417, "grad_norm": 1.6450358552008089, "learning_rate": 9.367528763633563e-07, "loss": 0.42314866185188293, "step": 4721 }, { "epoch": 1.0887710398893244, "grad_norm": 1.492846868063658, "learning_rate": 9.363723830494966e-07, "loss": 0.5322449207305908, "step": 4722 }, { "epoch": 1.0890016140189072, "grad_norm": 1.3552869600155872, "learning_rate": 9.359918989845557e-07, "loss": 0.42307883501052856, "step": 4723 }, { "epoch": 1.0892321881484897, "grad_norm": 1.3481901437941268, "learning_rate": 9.356114242238413e-07, "loss": 0.39321061968803406, "step": 4724 }, { "epoch": 1.0894627622780724, "grad_norm": 1.6333273110158268, "learning_rate": 9.352309588226585e-07, "loss": 0.5064421892166138, "step": 4725 }, { "epoch": 1.089693336407655, "grad_norm": 1.4475724274606394, "learning_rate": 9.348505028363125e-07, "loss": 0.44825220108032227, "step": 4726 }, { "epoch": 1.0899239105372378, "grad_norm": 1.384316241889946, "learning_rate": 9.344700563201065e-07, "loss": 0.4323306679725647, "step": 4727 }, { "epoch": 1.0901544846668203, "grad_norm": 1.3254947105842285, "learning_rate": 9.340896193293414e-07, "loss": 0.44907987117767334, "step": 4728 }, { "epoch": 1.090385058796403, "grad_norm": 1.3161326376052391, "learning_rate": 9.337091919193185e-07, "loss": 0.416559636592865, "step": 4729 }, { "epoch": 1.0906156329259857, "grad_norm": 1.6044534711260028, "learning_rate": 9.33328774145336e-07, "loss": 0.5361836552619934, "step": 4730 }, { "epoch": 1.0908462070555685, "grad_norm": 1.3742080048163032, "learning_rate": 9.329483660626922e-07, "loss": 0.4815465211868286, "step": 4731 }, { "epoch": 1.091076781185151, "grad_norm": 1.4553535934080677, "learning_rate": 9.325679677266826e-07, "loss": 0.5205050110816956, "step": 4732 }, { "epoch": 1.0913073553147337, "grad_norm": 1.9887709257052897, "learning_rate": 9.321875791926028e-07, "loss": 0.4830896258354187, "step": 4733 }, { "epoch": 1.0915379294443164, "grad_norm": 1.3739860439026885, "learning_rate": 9.318072005157451e-07, "loss": 0.4394579827785492, "step": 4734 }, { "epoch": 1.091768503573899, "grad_norm": 1.6664317769247758, "learning_rate": 9.314268317514022e-07, "loss": 0.4614049792289734, "step": 4735 }, { "epoch": 1.0919990777034816, "grad_norm": 1.5989711566807139, "learning_rate": 9.31046472954864e-07, "loss": 0.5123867988586426, "step": 4736 }, { "epoch": 1.0922296518330643, "grad_norm": 1.879970895540274, "learning_rate": 9.306661241814204e-07, "loss": 0.43548035621643066, "step": 4737 }, { "epoch": 1.092460225962647, "grad_norm": 1.4190205685105515, "learning_rate": 9.302857854863579e-07, "loss": 0.4102709889411926, "step": 4738 }, { "epoch": 1.0926908000922297, "grad_norm": 1.7007344632271022, "learning_rate": 9.299054569249628e-07, "loss": 0.46276605129241943, "step": 4739 }, { "epoch": 1.0929213742218122, "grad_norm": 1.5950261365712695, "learning_rate": 9.295251385525204e-07, "loss": 0.47700244188308716, "step": 4740 }, { "epoch": 1.093151948351395, "grad_norm": 1.5081940540312389, "learning_rate": 9.29144830424313e-07, "loss": 0.5492758750915527, "step": 4741 }, { "epoch": 1.0933825224809777, "grad_norm": 1.6521559747103167, "learning_rate": 9.287645325956228e-07, "loss": 0.3846803307533264, "step": 4742 }, { "epoch": 1.0936130966105604, "grad_norm": 1.4300122822608972, "learning_rate": 9.283842451217294e-07, "loss": 0.47237372398376465, "step": 4743 }, { "epoch": 1.0938436707401429, "grad_norm": 1.6996074936661776, "learning_rate": 9.280039680579122e-07, "loss": 0.4651675820350647, "step": 4744 }, { "epoch": 1.0940742448697256, "grad_norm": 1.6397662048344088, "learning_rate": 9.276237014594476e-07, "loss": 0.5472640991210938, "step": 4745 }, { "epoch": 1.0943048189993083, "grad_norm": 1.3158004626748314, "learning_rate": 9.272434453816117e-07, "loss": 0.45672351121902466, "step": 4746 }, { "epoch": 1.094535393128891, "grad_norm": 1.4246135812847533, "learning_rate": 9.268631998796785e-07, "loss": 0.4589729905128479, "step": 4747 }, { "epoch": 1.0947659672584735, "grad_norm": 1.4398967186683822, "learning_rate": 9.264829650089201e-07, "loss": 0.45882588624954224, "step": 4748 }, { "epoch": 1.0949965413880562, "grad_norm": 1.8586265213095916, "learning_rate": 9.26102740824608e-07, "loss": 0.6183863282203674, "step": 4749 }, { "epoch": 1.095227115517639, "grad_norm": 1.4631882562588927, "learning_rate": 9.257225273820112e-07, "loss": 0.4512014389038086, "step": 4750 }, { "epoch": 1.0954576896472217, "grad_norm": 1.5706161838979387, "learning_rate": 9.253423247363983e-07, "loss": 0.5006139874458313, "step": 4751 }, { "epoch": 1.0956882637768042, "grad_norm": 1.4110458948787974, "learning_rate": 9.249621329430346e-07, "loss": 0.5394018888473511, "step": 4752 }, { "epoch": 1.095918837906387, "grad_norm": 1.5150959480945791, "learning_rate": 9.245819520571858e-07, "loss": 0.35523056983947754, "step": 4753 }, { "epoch": 1.0961494120359696, "grad_norm": 1.3819812548856059, "learning_rate": 9.242017821341143e-07, "loss": 0.44379743933677673, "step": 4754 }, { "epoch": 1.0963799861655523, "grad_norm": 1.6129174796361336, "learning_rate": 9.238216232290821e-07, "loss": 0.4190908968448639, "step": 4755 }, { "epoch": 1.0966105602951348, "grad_norm": 1.6222067534589701, "learning_rate": 9.234414753973488e-07, "loss": 0.44818970561027527, "step": 4756 }, { "epoch": 1.0968411344247175, "grad_norm": 1.4925644141379035, "learning_rate": 9.230613386941734e-07, "loss": 0.4134204685688019, "step": 4757 }, { "epoch": 1.0970717085543003, "grad_norm": 1.2148478016107016, "learning_rate": 9.226812131748118e-07, "loss": 0.3554952144622803, "step": 4758 }, { "epoch": 1.097302282683883, "grad_norm": 1.674922299722459, "learning_rate": 9.223010988945194e-07, "loss": 0.522594690322876, "step": 4759 }, { "epoch": 1.0975328568134655, "grad_norm": 1.4320622438584156, "learning_rate": 9.219209959085502e-07, "loss": 0.44814133644104004, "step": 4760 }, { "epoch": 1.0977634309430482, "grad_norm": 1.4723286174250931, "learning_rate": 9.215409042721551e-07, "loss": 0.42479634284973145, "step": 4761 }, { "epoch": 1.097994005072631, "grad_norm": 1.5414891522514993, "learning_rate": 9.211608240405849e-07, "loss": 0.4384934902191162, "step": 4762 }, { "epoch": 1.0982245792022136, "grad_norm": 1.4811013868533904, "learning_rate": 9.207807552690878e-07, "loss": 0.5378658771514893, "step": 4763 }, { "epoch": 1.098455153331796, "grad_norm": 1.4445039209024981, "learning_rate": 9.204006980129111e-07, "loss": 0.5071386694908142, "step": 4764 }, { "epoch": 1.0986857274613788, "grad_norm": 1.5460474623164162, "learning_rate": 9.200206523272992e-07, "loss": 0.46085822582244873, "step": 4765 }, { "epoch": 1.0989163015909615, "grad_norm": 1.544747382675103, "learning_rate": 9.196406182674964e-07, "loss": 0.5083057880401611, "step": 4766 }, { "epoch": 1.0991468757205443, "grad_norm": 1.2845065354356755, "learning_rate": 9.192605958887438e-07, "loss": 0.48307740688323975, "step": 4767 }, { "epoch": 1.0993774498501268, "grad_norm": 1.8405581264672015, "learning_rate": 9.188805852462824e-07, "loss": 0.5195509791374207, "step": 4768 }, { "epoch": 1.0996080239797095, "grad_norm": 1.5537273798526559, "learning_rate": 9.185005863953498e-07, "loss": 0.5161266326904297, "step": 4769 }, { "epoch": 1.0998385981092922, "grad_norm": 1.5985708455901557, "learning_rate": 9.181205993911827e-07, "loss": 0.4757764935493469, "step": 4770 }, { "epoch": 1.1000691722388747, "grad_norm": 1.5307887938016926, "learning_rate": 9.177406242890167e-07, "loss": 0.4071381688117981, "step": 4771 }, { "epoch": 1.1002997463684574, "grad_norm": 1.3525378547606768, "learning_rate": 9.173606611440842e-07, "loss": 0.4794449210166931, "step": 4772 }, { "epoch": 1.1005303204980401, "grad_norm": 1.3205547171467464, "learning_rate": 9.169807100116175e-07, "loss": 0.4678712487220764, "step": 4773 }, { "epoch": 1.1007608946276228, "grad_norm": 1.2863487713029464, "learning_rate": 9.166007709468456e-07, "loss": 0.43200960755348206, "step": 4774 }, { "epoch": 1.1009914687572055, "grad_norm": 1.8114336882311408, "learning_rate": 9.162208440049974e-07, "loss": 0.49283260107040405, "step": 4775 }, { "epoch": 1.101222042886788, "grad_norm": 1.2265456496064566, "learning_rate": 9.158409292412982e-07, "loss": 0.4430215358734131, "step": 4776 }, { "epoch": 1.1014526170163708, "grad_norm": 1.282698473472426, "learning_rate": 9.154610267109731e-07, "loss": 0.4529581069946289, "step": 4777 }, { "epoch": 1.1016831911459535, "grad_norm": 1.3698366211761768, "learning_rate": 9.150811364692446e-07, "loss": 0.3872554302215576, "step": 4778 }, { "epoch": 1.101913765275536, "grad_norm": 1.4034579683870105, "learning_rate": 9.147012585713331e-07, "loss": 0.466983437538147, "step": 4779 }, { "epoch": 1.1021443394051187, "grad_norm": 1.3799350437064777, "learning_rate": 9.143213930724587e-07, "loss": 0.4841456115245819, "step": 4780 }, { "epoch": 1.1023749135347014, "grad_norm": 2.083063073101601, "learning_rate": 9.139415400278376e-07, "loss": 0.4506613612174988, "step": 4781 }, { "epoch": 1.1026054876642841, "grad_norm": 1.5047320834529434, "learning_rate": 9.135616994926861e-07, "loss": 0.428241491317749, "step": 4782 }, { "epoch": 1.1028360617938668, "grad_norm": 1.3329992006000018, "learning_rate": 9.131818715222175e-07, "loss": 0.46940821409225464, "step": 4783 }, { "epoch": 1.1030666359234493, "grad_norm": 1.5416614978551508, "learning_rate": 9.12802056171644e-07, "loss": 0.4527658224105835, "step": 4784 }, { "epoch": 1.103297210053032, "grad_norm": 1.3412511641642377, "learning_rate": 9.124222534961749e-07, "loss": 0.3284989893436432, "step": 4785 }, { "epoch": 1.1035277841826148, "grad_norm": 1.497248247266052, "learning_rate": 9.120424635510193e-07, "loss": 0.448346883058548, "step": 4786 }, { "epoch": 1.1037583583121973, "grad_norm": 1.5413647461227613, "learning_rate": 9.116626863913826e-07, "loss": 0.4625587463378906, "step": 4787 }, { "epoch": 1.10398893244178, "grad_norm": 1.398727589269655, "learning_rate": 9.112829220724703e-07, "loss": 0.37891942262649536, "step": 4788 }, { "epoch": 1.1042195065713627, "grad_norm": 1.510309439727558, "learning_rate": 9.109031706494841e-07, "loss": 0.48719239234924316, "step": 4789 }, { "epoch": 1.1044500807009454, "grad_norm": 1.695631911449914, "learning_rate": 9.105234321776247e-07, "loss": 0.5341615676879883, "step": 4790 }, { "epoch": 1.1046806548305281, "grad_norm": 1.30752453253924, "learning_rate": 9.101437067120918e-07, "loss": 0.36677777767181396, "step": 4791 }, { "epoch": 1.1049112289601106, "grad_norm": 1.3000512165603213, "learning_rate": 9.097639943080813e-07, "loss": 0.4348159432411194, "step": 4792 }, { "epoch": 1.1051418030896933, "grad_norm": 1.3763164723830184, "learning_rate": 9.093842950207891e-07, "loss": 0.44912683963775635, "step": 4793 }, { "epoch": 1.105372377219276, "grad_norm": 1.655048045877048, "learning_rate": 9.090046089054077e-07, "loss": 0.5576057434082031, "step": 4794 }, { "epoch": 1.1056029513488586, "grad_norm": 1.4655907130631036, "learning_rate": 9.08624936017129e-07, "loss": 0.43964770436286926, "step": 4795 }, { "epoch": 1.1058335254784413, "grad_norm": 1.3648059541391266, "learning_rate": 9.082452764111415e-07, "loss": 0.4285386800765991, "step": 4796 }, { "epoch": 1.106064099608024, "grad_norm": 1.6322901017927212, "learning_rate": 9.078656301426332e-07, "loss": 0.4257868230342865, "step": 4797 }, { "epoch": 1.1062946737376067, "grad_norm": 1.9314022304382554, "learning_rate": 9.074859972667895e-07, "loss": 0.4540346562862396, "step": 4798 }, { "epoch": 1.1065252478671892, "grad_norm": 1.6801359554397164, "learning_rate": 9.071063778387933e-07, "loss": 0.5273457765579224, "step": 4799 }, { "epoch": 1.106755821996772, "grad_norm": 1.4107980839711056, "learning_rate": 9.067267719138268e-07, "loss": 0.391310453414917, "step": 4800 }, { "epoch": 1.1069863961263546, "grad_norm": 1.4182050274963418, "learning_rate": 9.063471795470691e-07, "loss": 0.47945383191108704, "step": 4801 }, { "epoch": 1.1072169702559373, "grad_norm": 1.7087277476088294, "learning_rate": 9.05967600793698e-07, "loss": 0.49561476707458496, "step": 4802 }, { "epoch": 1.1074475443855198, "grad_norm": 1.3070252929290396, "learning_rate": 9.05588035708889e-07, "loss": 0.4505256414413452, "step": 4803 }, { "epoch": 1.1076781185151026, "grad_norm": 1.6864844579974707, "learning_rate": 9.052084843478164e-07, "loss": 0.37591490149497986, "step": 4804 }, { "epoch": 1.1079086926446853, "grad_norm": 1.486226704077577, "learning_rate": 9.048289467656508e-07, "loss": 0.478586345911026, "step": 4805 }, { "epoch": 1.108139266774268, "grad_norm": 1.3819959446941394, "learning_rate": 9.044494230175625e-07, "loss": 0.4373725354671478, "step": 4806 }, { "epoch": 1.1083698409038505, "grad_norm": 1.4091791216138099, "learning_rate": 9.040699131587186e-07, "loss": 0.3976345360279083, "step": 4807 }, { "epoch": 1.1086004150334332, "grad_norm": 1.3848852740812903, "learning_rate": 9.036904172442857e-07, "loss": 0.44611310958862305, "step": 4808 }, { "epoch": 1.108830989163016, "grad_norm": 1.3117584806534919, "learning_rate": 9.033109353294262e-07, "loss": 0.40816667675971985, "step": 4809 }, { "epoch": 1.1090615632925986, "grad_norm": 1.359605756890841, "learning_rate": 9.029314674693023e-07, "loss": 0.37462317943573, "step": 4810 }, { "epoch": 1.1092921374221811, "grad_norm": 1.3641846963299056, "learning_rate": 9.025520137190735e-07, "loss": 0.3856509327888489, "step": 4811 }, { "epoch": 1.1095227115517639, "grad_norm": 1.5740711616700624, "learning_rate": 9.021725741338969e-07, "loss": 0.4728443920612335, "step": 4812 }, { "epoch": 1.1097532856813466, "grad_norm": 2.0717537833557773, "learning_rate": 9.017931487689282e-07, "loss": 0.4614938795566559, "step": 4813 }, { "epoch": 1.1099838598109293, "grad_norm": 1.4925546437709947, "learning_rate": 9.014137376793203e-07, "loss": 0.4137331247329712, "step": 4814 }, { "epoch": 1.1102144339405118, "grad_norm": 1.2481779358565226, "learning_rate": 9.010343409202255e-07, "loss": 0.42436620593070984, "step": 4815 }, { "epoch": 1.1104450080700945, "grad_norm": 1.3339513565407848, "learning_rate": 9.006549585467916e-07, "loss": 0.43592822551727295, "step": 4816 }, { "epoch": 1.1106755821996772, "grad_norm": 1.3742872645989155, "learning_rate": 9.002755906141666e-07, "loss": 0.45627349615097046, "step": 4817 }, { "epoch": 1.11090615632926, "grad_norm": 1.819907938722267, "learning_rate": 8.998962371774953e-07, "loss": 0.5103771686553955, "step": 4818 }, { "epoch": 1.1111367304588424, "grad_norm": 1.4418115437773273, "learning_rate": 8.995168982919201e-07, "loss": 0.470276802778244, "step": 4819 }, { "epoch": 1.1113673045884251, "grad_norm": 1.3186176277536419, "learning_rate": 8.991375740125823e-07, "loss": 0.49486416578292847, "step": 4820 }, { "epoch": 1.1115978787180079, "grad_norm": 1.143316450397621, "learning_rate": 8.987582643946201e-07, "loss": 0.338329017162323, "step": 4821 }, { "epoch": 1.1118284528475906, "grad_norm": 1.4885392176771477, "learning_rate": 8.983789694931706e-07, "loss": 0.38252198696136475, "step": 4822 }, { "epoch": 1.112059026977173, "grad_norm": 1.4537319037859584, "learning_rate": 8.979996893633675e-07, "loss": 0.47691571712493896, "step": 4823 }, { "epoch": 1.1122896011067558, "grad_norm": 1.41954873904419, "learning_rate": 8.976204240603433e-07, "loss": 0.40156808495521545, "step": 4824 }, { "epoch": 1.1125201752363385, "grad_norm": 1.312743475511893, "learning_rate": 8.97241173639228e-07, "loss": 0.3837090730667114, "step": 4825 }, { "epoch": 1.1127507493659212, "grad_norm": 1.6300077035939553, "learning_rate": 8.968619381551499e-07, "loss": 0.5094380378723145, "step": 4826 }, { "epoch": 1.1129813234955037, "grad_norm": 1.4389159508234053, "learning_rate": 8.964827176632339e-07, "loss": 0.48674100637435913, "step": 4827 }, { "epoch": 1.1132118976250864, "grad_norm": 1.7742534070601, "learning_rate": 8.961035122186045e-07, "loss": 0.49288761615753174, "step": 4828 }, { "epoch": 1.1134424717546691, "grad_norm": 1.4156686622304593, "learning_rate": 8.957243218763824e-07, "loss": 0.42933952808380127, "step": 4829 }, { "epoch": 1.1136730458842519, "grad_norm": 1.838762036908513, "learning_rate": 8.953451466916866e-07, "loss": 0.39244914054870605, "step": 4830 }, { "epoch": 1.1139036200138344, "grad_norm": 1.3776049792093739, "learning_rate": 8.949659867196348e-07, "loss": 0.44688090682029724, "step": 4831 }, { "epoch": 1.114134194143417, "grad_norm": 1.6923430022628052, "learning_rate": 8.945868420153409e-07, "loss": 0.5388743877410889, "step": 4832 }, { "epoch": 1.1143647682729998, "grad_norm": 1.6108426528928312, "learning_rate": 8.942077126339182e-07, "loss": 0.4320666193962097, "step": 4833 }, { "epoch": 1.1145953424025825, "grad_norm": 1.3700008221476991, "learning_rate": 8.938285986304762e-07, "loss": 0.37623411417007446, "step": 4834 }, { "epoch": 1.114825916532165, "grad_norm": 1.4274453986312428, "learning_rate": 8.93449500060124e-07, "loss": 0.4743962287902832, "step": 4835 }, { "epoch": 1.1150564906617477, "grad_norm": 1.4687481503878526, "learning_rate": 8.930704169779663e-07, "loss": 0.4833221435546875, "step": 4836 }, { "epoch": 1.1152870647913304, "grad_norm": 1.580828459296504, "learning_rate": 8.926913494391074e-07, "loss": 0.48811084032058716, "step": 4837 }, { "epoch": 1.1155176389209132, "grad_norm": 1.4663777441823886, "learning_rate": 8.923122974986487e-07, "loss": 0.42525774240493774, "step": 4838 }, { "epoch": 1.1157482130504957, "grad_norm": 1.4773669175093567, "learning_rate": 8.919332612116884e-07, "loss": 0.4347909688949585, "step": 4839 }, { "epoch": 1.1159787871800784, "grad_norm": 1.9619203877260345, "learning_rate": 8.915542406333241e-07, "loss": 0.5085601806640625, "step": 4840 }, { "epoch": 1.116209361309661, "grad_norm": 1.4214902735687815, "learning_rate": 8.911752358186497e-07, "loss": 0.4620482325553894, "step": 4841 }, { "epoch": 1.1164399354392438, "grad_norm": 1.3147570239530335, "learning_rate": 8.907962468227582e-07, "loss": 0.44923216104507446, "step": 4842 }, { "epoch": 1.1166705095688263, "grad_norm": 1.6422580107908513, "learning_rate": 8.904172737007386e-07, "loss": 0.547439694404602, "step": 4843 }, { "epoch": 1.116901083698409, "grad_norm": 1.7769022711207687, "learning_rate": 8.900383165076789e-07, "loss": 0.4609268307685852, "step": 4844 }, { "epoch": 1.1171316578279917, "grad_norm": 1.4046866803141593, "learning_rate": 8.896593752986642e-07, "loss": 0.41780030727386475, "step": 4845 }, { "epoch": 1.1173622319575744, "grad_norm": 1.3641825367692086, "learning_rate": 8.89280450128778e-07, "loss": 0.506212592124939, "step": 4846 }, { "epoch": 1.117592806087157, "grad_norm": 1.4049897839890735, "learning_rate": 8.889015410531001e-07, "loss": 0.4436545968055725, "step": 4847 }, { "epoch": 1.1178233802167397, "grad_norm": 1.3856199735325436, "learning_rate": 8.885226481267093e-07, "loss": 0.4473826289176941, "step": 4848 }, { "epoch": 1.1180539543463224, "grad_norm": 1.42622736433257, "learning_rate": 8.881437714046815e-07, "loss": 0.43499836325645447, "step": 4849 }, { "epoch": 1.118284528475905, "grad_norm": 1.5927469786677344, "learning_rate": 8.877649109420899e-07, "loss": 0.522705078125, "step": 4850 }, { "epoch": 1.1185151026054876, "grad_norm": 1.5596781330511842, "learning_rate": 8.873860667940064e-07, "loss": 0.42146036028862, "step": 4851 }, { "epoch": 1.1187456767350703, "grad_norm": 1.649425162171124, "learning_rate": 8.870072390154989e-07, "loss": 0.5875130891799927, "step": 4852 }, { "epoch": 1.118976250864653, "grad_norm": 1.6372722830693418, "learning_rate": 8.866284276616345e-07, "loss": 0.5187985301017761, "step": 4853 }, { "epoch": 1.1192068249942357, "grad_norm": 2.6266893474509474, "learning_rate": 8.86249632787477e-07, "loss": 0.46115952730178833, "step": 4854 }, { "epoch": 1.1194373991238182, "grad_norm": 1.4714921061709185, "learning_rate": 8.858708544480886e-07, "loss": 0.4926493167877197, "step": 4855 }, { "epoch": 1.119667973253401, "grad_norm": 1.5525331026142626, "learning_rate": 8.854920926985278e-07, "loss": 0.44512006640434265, "step": 4856 }, { "epoch": 1.1198985473829837, "grad_norm": 1.5145408688074757, "learning_rate": 8.85113347593852e-07, "loss": 0.45973241329193115, "step": 4857 }, { "epoch": 1.1201291215125664, "grad_norm": 1.5400172209521554, "learning_rate": 8.847346191891157e-07, "loss": 0.4915385842323303, "step": 4858 }, { "epoch": 1.1203596956421489, "grad_norm": 1.4900152202768027, "learning_rate": 8.843559075393701e-07, "loss": 0.4457864463329315, "step": 4859 }, { "epoch": 1.1205902697717316, "grad_norm": 1.3414730221020197, "learning_rate": 8.839772126996658e-07, "loss": 0.4782453775405884, "step": 4860 }, { "epoch": 1.1208208439013143, "grad_norm": 1.3591384899787133, "learning_rate": 8.835985347250492e-07, "loss": 0.42789584398269653, "step": 4861 }, { "epoch": 1.121051418030897, "grad_norm": 1.8532602863182117, "learning_rate": 8.832198736705657e-07, "loss": 0.49990910291671753, "step": 4862 }, { "epoch": 1.1212819921604795, "grad_norm": 1.4158258863269764, "learning_rate": 8.828412295912566e-07, "loss": 0.3735005855560303, "step": 4863 }, { "epoch": 1.1215125662900622, "grad_norm": 1.3744374187815367, "learning_rate": 8.824626025421624e-07, "loss": 0.402673602104187, "step": 4864 }, { "epoch": 1.121743140419645, "grad_norm": 1.57241412674585, "learning_rate": 8.820839925783198e-07, "loss": 0.4675491452217102, "step": 4865 }, { "epoch": 1.1219737145492277, "grad_norm": 2.0200104658377254, "learning_rate": 8.817053997547645e-07, "loss": 0.5098662376403809, "step": 4866 }, { "epoch": 1.1222042886788102, "grad_norm": 1.3880207155981488, "learning_rate": 8.813268241265278e-07, "loss": 0.44478029012680054, "step": 4867 }, { "epoch": 1.1224348628083929, "grad_norm": 1.4983402004688406, "learning_rate": 8.809482657486401e-07, "loss": 0.410754919052124, "step": 4868 }, { "epoch": 1.1226654369379756, "grad_norm": 1.193726420763111, "learning_rate": 8.805697246761288e-07, "loss": 0.4198191165924072, "step": 4869 }, { "epoch": 1.1228960110675583, "grad_norm": 1.6015778378598091, "learning_rate": 8.801912009640178e-07, "loss": 0.5399911403656006, "step": 4870 }, { "epoch": 1.1231265851971408, "grad_norm": 1.3209581029003303, "learning_rate": 8.798126946673305e-07, "loss": 0.3879680633544922, "step": 4871 }, { "epoch": 1.1233571593267235, "grad_norm": 1.7893299917127135, "learning_rate": 8.794342058410856e-07, "loss": 0.4629073739051819, "step": 4872 }, { "epoch": 1.1235877334563062, "grad_norm": 1.25180398717926, "learning_rate": 8.790557345403013e-07, "loss": 0.42299884557724, "step": 4873 }, { "epoch": 1.123818307585889, "grad_norm": 1.5467146262725529, "learning_rate": 8.786772808199912e-07, "loss": 0.509437620639801, "step": 4874 }, { "epoch": 1.1240488817154715, "grad_norm": 1.3436359029840506, "learning_rate": 8.782988447351684e-07, "loss": 0.4682687222957611, "step": 4875 }, { "epoch": 1.1242794558450542, "grad_norm": 1.2884743737928093, "learning_rate": 8.779204263408416e-07, "loss": 0.41155606508255005, "step": 4876 }, { "epoch": 1.124510029974637, "grad_norm": 1.6449136860944156, "learning_rate": 8.775420256920182e-07, "loss": 0.4705810844898224, "step": 4877 }, { "epoch": 1.1247406041042196, "grad_norm": 1.4648471947605348, "learning_rate": 8.771636428437022e-07, "loss": 0.36571264266967773, "step": 4878 }, { "epoch": 1.124971178233802, "grad_norm": 1.1768139651906544, "learning_rate": 8.76785277850896e-07, "loss": 0.36618396639823914, "step": 4879 }, { "epoch": 1.1252017523633848, "grad_norm": 1.5334328638730685, "learning_rate": 8.764069307685983e-07, "loss": 0.4861210584640503, "step": 4880 }, { "epoch": 1.1254323264929675, "grad_norm": 1.457839206264918, "learning_rate": 8.760286016518056e-07, "loss": 0.43346846103668213, "step": 4881 }, { "epoch": 1.12566290062255, "grad_norm": 1.28421921022301, "learning_rate": 8.756502905555123e-07, "loss": 0.40088707208633423, "step": 4882 }, { "epoch": 1.1258934747521328, "grad_norm": 1.4643062187844458, "learning_rate": 8.752719975347092e-07, "loss": 0.4088619649410248, "step": 4883 }, { "epoch": 1.1261240488817155, "grad_norm": 1.5527291710325282, "learning_rate": 8.748937226443857e-07, "loss": 0.4988909661769867, "step": 4884 }, { "epoch": 1.1263546230112982, "grad_norm": 1.5377239167998313, "learning_rate": 8.745154659395271e-07, "loss": 0.47022196650505066, "step": 4885 }, { "epoch": 1.126585197140881, "grad_norm": 1.3259626220698026, "learning_rate": 8.741372274751178e-07, "loss": 0.45005398988723755, "step": 4886 }, { "epoch": 1.1268157712704634, "grad_norm": 1.5001674672720546, "learning_rate": 8.737590073061376e-07, "loss": 0.4632537364959717, "step": 4887 }, { "epoch": 1.1270463454000461, "grad_norm": 1.2983235840008036, "learning_rate": 8.733808054875653e-07, "loss": 0.41034963726997375, "step": 4888 }, { "epoch": 1.1272769195296288, "grad_norm": 1.423352740140202, "learning_rate": 8.730026220743765e-07, "loss": 0.5169668793678284, "step": 4889 }, { "epoch": 1.1275074936592113, "grad_norm": 1.46630659535839, "learning_rate": 8.726244571215431e-07, "loss": 0.44972485303878784, "step": 4890 }, { "epoch": 1.127738067788794, "grad_norm": 1.5712937661942725, "learning_rate": 8.722463106840361e-07, "loss": 0.4854368567466736, "step": 4891 }, { "epoch": 1.1279686419183768, "grad_norm": 1.0525840961962005, "learning_rate": 8.718681828168223e-07, "loss": 0.3029147982597351, "step": 4892 }, { "epoch": 1.1281992160479595, "grad_norm": 1.5856241308624208, "learning_rate": 8.714900735748671e-07, "loss": 0.4770504832267761, "step": 4893 }, { "epoch": 1.1284297901775422, "grad_norm": 1.3799690323722245, "learning_rate": 8.711119830131317e-07, "loss": 0.48508110642433167, "step": 4894 }, { "epoch": 1.1286603643071247, "grad_norm": 1.4227656672873528, "learning_rate": 8.707339111865761e-07, "loss": 0.43302488327026367, "step": 4895 }, { "epoch": 1.1288909384367074, "grad_norm": 1.3481652076868464, "learning_rate": 8.703558581501563e-07, "loss": 0.5720575451850891, "step": 4896 }, { "epoch": 1.1291215125662901, "grad_norm": 1.1736572520471924, "learning_rate": 8.69977823958827e-07, "loss": 0.48236098885536194, "step": 4897 }, { "epoch": 1.1293520866958726, "grad_norm": 1.6539784416028527, "learning_rate": 8.69599808667538e-07, "loss": 0.48531901836395264, "step": 4898 }, { "epoch": 1.1295826608254553, "grad_norm": 1.390226643422974, "learning_rate": 8.69221812331239e-07, "loss": 0.4150174856185913, "step": 4899 }, { "epoch": 1.129813234955038, "grad_norm": 1.4594360531114157, "learning_rate": 8.688438350048748e-07, "loss": 0.4729560911655426, "step": 4900 }, { "epoch": 1.1300438090846208, "grad_norm": 1.5805161631694824, "learning_rate": 8.684658767433881e-07, "loss": 0.5081748962402344, "step": 4901 }, { "epoch": 1.1302743832142035, "grad_norm": 1.3577399194161552, "learning_rate": 8.680879376017197e-07, "loss": 0.4552333354949951, "step": 4902 }, { "epoch": 1.130504957343786, "grad_norm": 1.666206186626053, "learning_rate": 8.67710017634806e-07, "loss": 0.4784387946128845, "step": 4903 }, { "epoch": 1.1307355314733687, "grad_norm": 1.7781011363806714, "learning_rate": 8.673321168975823e-07, "loss": 0.46922338008880615, "step": 4904 }, { "epoch": 1.1309661056029514, "grad_norm": 1.414520843561148, "learning_rate": 8.669542354449797e-07, "loss": 0.38181525468826294, "step": 4905 }, { "epoch": 1.131196679732534, "grad_norm": 1.409807627526861, "learning_rate": 8.665763733319278e-07, "loss": 0.4729689359664917, "step": 4906 }, { "epoch": 1.1314272538621166, "grad_norm": 1.3128859029806206, "learning_rate": 8.661985306133517e-07, "loss": 0.3934294581413269, "step": 4907 }, { "epoch": 1.1316578279916993, "grad_norm": 1.1525332387894895, "learning_rate": 8.658207073441754e-07, "loss": 0.40270352363586426, "step": 4908 }, { "epoch": 1.131888402121282, "grad_norm": 1.245477282269021, "learning_rate": 8.654429035793196e-07, "loss": 0.43291163444519043, "step": 4909 }, { "epoch": 1.1321189762508648, "grad_norm": 1.8011937733870678, "learning_rate": 8.650651193737009e-07, "loss": 0.5054877996444702, "step": 4910 }, { "epoch": 1.1323495503804473, "grad_norm": 1.4188548576207016, "learning_rate": 8.646873547822347e-07, "loss": 0.5043776035308838, "step": 4911 }, { "epoch": 1.13258012451003, "grad_norm": 1.511127988179462, "learning_rate": 8.643096098598328e-07, "loss": 0.4246225953102112, "step": 4912 }, { "epoch": 1.1328106986396127, "grad_norm": 1.3198976342579845, "learning_rate": 8.639318846614048e-07, "loss": 0.4514849781990051, "step": 4913 }, { "epoch": 1.1330412727691952, "grad_norm": 1.5409054507370947, "learning_rate": 8.635541792418557e-07, "loss": 0.4780477285385132, "step": 4914 }, { "epoch": 1.133271846898778, "grad_norm": 1.4447509965410514, "learning_rate": 8.631764936560899e-07, "loss": 0.47164270281791687, "step": 4915 }, { "epoch": 1.1335024210283606, "grad_norm": 1.4642572467177732, "learning_rate": 8.62798827959007e-07, "loss": 0.5462276339530945, "step": 4916 }, { "epoch": 1.1337329951579433, "grad_norm": 1.3611348332418316, "learning_rate": 8.624211822055055e-07, "loss": 0.37229591608047485, "step": 4917 }, { "epoch": 1.133963569287526, "grad_norm": 1.6004056206114348, "learning_rate": 8.620435564504791e-07, "loss": 0.46595901250839233, "step": 4918 }, { "epoch": 1.1341941434171086, "grad_norm": 1.899603419019246, "learning_rate": 8.616659507488201e-07, "loss": 0.4645708203315735, "step": 4919 }, { "epoch": 1.1344247175466913, "grad_norm": 1.3014565799840314, "learning_rate": 8.612883651554173e-07, "loss": 0.4309888482093811, "step": 4920 }, { "epoch": 1.134655291676274, "grad_norm": 1.2254662174184374, "learning_rate": 8.60910799725156e-07, "loss": 0.4000548720359802, "step": 4921 }, { "epoch": 1.1348858658058565, "grad_norm": 1.2990272231335294, "learning_rate": 8.6053325451292e-07, "loss": 0.41321274638175964, "step": 4922 }, { "epoch": 1.1351164399354392, "grad_norm": 1.7479036509525407, "learning_rate": 8.601557295735884e-07, "loss": 0.38982951641082764, "step": 4923 }, { "epoch": 1.135347014065022, "grad_norm": 1.3265126570648142, "learning_rate": 8.597782249620394e-07, "loss": 0.44623300433158875, "step": 4924 }, { "epoch": 1.1355775881946046, "grad_norm": 1.6004563551212632, "learning_rate": 8.594007407331458e-07, "loss": 0.46876993775367737, "step": 4925 }, { "epoch": 1.1358081623241871, "grad_norm": 1.4785026933128127, "learning_rate": 8.590232769417803e-07, "loss": 0.41345149278640747, "step": 4926 }, { "epoch": 1.1360387364537698, "grad_norm": 1.6712340860086734, "learning_rate": 8.586458336428095e-07, "loss": 0.4199402332305908, "step": 4927 }, { "epoch": 1.1362693105833526, "grad_norm": 1.5807454346525946, "learning_rate": 8.582684108910998e-07, "loss": 0.4424753785133362, "step": 4928 }, { "epoch": 1.1364998847129353, "grad_norm": 1.5318763722061228, "learning_rate": 8.57891008741513e-07, "loss": 0.5066598057746887, "step": 4929 }, { "epoch": 1.1367304588425178, "grad_norm": 1.409045447069904, "learning_rate": 8.575136272489081e-07, "loss": 0.45959407091140747, "step": 4930 }, { "epoch": 1.1369610329721005, "grad_norm": 1.191773933725539, "learning_rate": 8.571362664681415e-07, "loss": 0.4579051733016968, "step": 4931 }, { "epoch": 1.1371916071016832, "grad_norm": 1.4061203144708347, "learning_rate": 8.567589264540665e-07, "loss": 0.5125559568405151, "step": 4932 }, { "epoch": 1.137422181231266, "grad_norm": 1.484125992313306, "learning_rate": 8.563816072615335e-07, "loss": 0.4236595630645752, "step": 4933 }, { "epoch": 1.1376527553608484, "grad_norm": 1.3909472723060943, "learning_rate": 8.56004308945389e-07, "loss": 0.40187013149261475, "step": 4934 }, { "epoch": 1.1378833294904311, "grad_norm": 1.7306785223672838, "learning_rate": 8.556270315604778e-07, "loss": 0.5069487690925598, "step": 4935 }, { "epoch": 1.1381139036200139, "grad_norm": 1.2666499948179348, "learning_rate": 8.552497751616406e-07, "loss": 0.4032680094242096, "step": 4936 }, { "epoch": 1.1383444777495966, "grad_norm": 1.5147949059405765, "learning_rate": 8.548725398037158e-07, "loss": 0.4745323061943054, "step": 4937 }, { "epoch": 1.138575051879179, "grad_norm": 1.6025857024716508, "learning_rate": 8.544953255415379e-07, "loss": 0.5203470587730408, "step": 4938 }, { "epoch": 1.1388056260087618, "grad_norm": 1.3018365690111693, "learning_rate": 8.541181324299392e-07, "loss": 0.3780457079410553, "step": 4939 }, { "epoch": 1.1390362001383445, "grad_norm": 1.4908739703097478, "learning_rate": 8.537409605237486e-07, "loss": 0.4544069766998291, "step": 4940 }, { "epoch": 1.1392667742679272, "grad_norm": 1.3726631913286653, "learning_rate": 8.533638098777914e-07, "loss": 0.3692469000816345, "step": 4941 }, { "epoch": 1.1394973483975097, "grad_norm": 1.7461198015621147, "learning_rate": 8.529866805468907e-07, "loss": 0.4733508825302124, "step": 4942 }, { "epoch": 1.1397279225270924, "grad_norm": 1.7055847796006547, "learning_rate": 8.526095725858658e-07, "loss": 0.5165152549743652, "step": 4943 }, { "epoch": 1.1399584966566751, "grad_norm": 1.5781652989183093, "learning_rate": 8.522324860495336e-07, "loss": 0.40220290422439575, "step": 4944 }, { "epoch": 1.1401890707862579, "grad_norm": 1.676524129553008, "learning_rate": 8.518554209927066e-07, "loss": 0.511976957321167, "step": 4945 }, { "epoch": 1.1404196449158404, "grad_norm": 1.4578766238891505, "learning_rate": 8.514783774701959e-07, "loss": 0.4472247362136841, "step": 4946 }, { "epoch": 1.140650219045423, "grad_norm": 1.3731717985494665, "learning_rate": 8.51101355536808e-07, "loss": 0.4368797242641449, "step": 4947 }, { "epoch": 1.1408807931750058, "grad_norm": 1.3383514367818596, "learning_rate": 8.507243552473476e-07, "loss": 0.3794320225715637, "step": 4948 }, { "epoch": 1.1411113673045885, "grad_norm": 1.7604514892248042, "learning_rate": 8.50347376656615e-07, "loss": 0.5229817628860474, "step": 4949 }, { "epoch": 1.141341941434171, "grad_norm": 1.4803188344976619, "learning_rate": 8.499704198194075e-07, "loss": 0.4771326780319214, "step": 4950 }, { "epoch": 1.1415725155637537, "grad_norm": 1.406078110966921, "learning_rate": 8.495934847905201e-07, "loss": 0.45151978731155396, "step": 4951 }, { "epoch": 1.1418030896933364, "grad_norm": 1.3579359781108167, "learning_rate": 8.492165716247439e-07, "loss": 0.3963208496570587, "step": 4952 }, { "epoch": 1.1420336638229192, "grad_norm": 1.2797227148111936, "learning_rate": 8.488396803768675e-07, "loss": 0.37465882301330566, "step": 4953 }, { "epoch": 1.1422642379525016, "grad_norm": 1.7257432451816517, "learning_rate": 8.484628111016752e-07, "loss": 0.437372088432312, "step": 4954 }, { "epoch": 1.1424948120820844, "grad_norm": 1.3198726990576308, "learning_rate": 8.480859638539492e-07, "loss": 0.40495651960372925, "step": 4955 }, { "epoch": 1.142725386211667, "grad_norm": 1.5937176142563847, "learning_rate": 8.477091386884677e-07, "loss": 0.5346927642822266, "step": 4956 }, { "epoch": 1.1429559603412498, "grad_norm": 1.7035083737998966, "learning_rate": 8.473323356600068e-07, "loss": 0.42448925971984863, "step": 4957 }, { "epoch": 1.1431865344708323, "grad_norm": 1.4329878189218077, "learning_rate": 8.469555548233378e-07, "loss": 0.4715193808078766, "step": 4958 }, { "epoch": 1.143417108600415, "grad_norm": 1.5249370547485697, "learning_rate": 8.465787962332301e-07, "loss": 0.4721440076828003, "step": 4959 }, { "epoch": 1.1436476827299977, "grad_norm": 1.4963659204960478, "learning_rate": 8.462020599444495e-07, "loss": 0.5478333234786987, "step": 4960 }, { "epoch": 1.1438782568595804, "grad_norm": 1.5534391969085817, "learning_rate": 8.458253460117577e-07, "loss": 0.4005582928657532, "step": 4961 }, { "epoch": 1.144108830989163, "grad_norm": 1.4816205297794078, "learning_rate": 8.454486544899146e-07, "loss": 0.43886178731918335, "step": 4962 }, { "epoch": 1.1443394051187457, "grad_norm": 1.2296294541393762, "learning_rate": 8.450719854336758e-07, "loss": 0.4404095709323883, "step": 4963 }, { "epoch": 1.1445699792483284, "grad_norm": 1.5412493838775327, "learning_rate": 8.446953388977943e-07, "loss": 0.5386335849761963, "step": 4964 }, { "epoch": 1.144800553377911, "grad_norm": 1.5969922474986569, "learning_rate": 8.44318714937019e-07, "loss": 0.4576258659362793, "step": 4965 }, { "epoch": 1.1450311275074936, "grad_norm": 1.2968718824878773, "learning_rate": 8.439421136060964e-07, "loss": 0.4619024991989136, "step": 4966 }, { "epoch": 1.1452617016370763, "grad_norm": 1.4106895392209726, "learning_rate": 8.435655349597689e-07, "loss": 0.4071081876754761, "step": 4967 }, { "epoch": 1.145492275766659, "grad_norm": 1.3534750631649812, "learning_rate": 8.431889790527769e-07, "loss": 0.4605948328971863, "step": 4968 }, { "epoch": 1.1457228498962417, "grad_norm": 1.4715761177473734, "learning_rate": 8.428124459398554e-07, "loss": 0.46706438064575195, "step": 4969 }, { "epoch": 1.1459534240258242, "grad_norm": 1.480784825415981, "learning_rate": 8.424359356757383e-07, "loss": 0.39674657583236694, "step": 4970 }, { "epoch": 1.146183998155407, "grad_norm": 1.4606371633345823, "learning_rate": 8.42059448315155e-07, "loss": 0.4421246647834778, "step": 4971 }, { "epoch": 1.1464145722849897, "grad_norm": 1.6921922922853865, "learning_rate": 8.416829839128312e-07, "loss": 0.5220682621002197, "step": 4972 }, { "epoch": 1.1466451464145724, "grad_norm": 1.338254387958773, "learning_rate": 8.413065425234904e-07, "loss": 0.40189129114151, "step": 4973 }, { "epoch": 1.1468757205441549, "grad_norm": 1.3011913252808138, "learning_rate": 8.409301242018517e-07, "loss": 0.448421835899353, "step": 4974 }, { "epoch": 1.1471062946737376, "grad_norm": 1.5996651322296722, "learning_rate": 8.405537290026318e-07, "loss": 0.49476757645606995, "step": 4975 }, { "epoch": 1.1473368688033203, "grad_norm": 1.4573872381246367, "learning_rate": 8.401773569805431e-07, "loss": 0.3888528347015381, "step": 4976 }, { "epoch": 1.1475674429329028, "grad_norm": 1.4760563096119323, "learning_rate": 8.398010081902956e-07, "loss": 0.49057653546333313, "step": 4977 }, { "epoch": 1.1477980170624855, "grad_norm": 1.3851559333900214, "learning_rate": 8.39424682686595e-07, "loss": 0.41700610518455505, "step": 4978 }, { "epoch": 1.1480285911920682, "grad_norm": 1.5382531029836037, "learning_rate": 8.390483805241441e-07, "loss": 0.4801902770996094, "step": 4979 }, { "epoch": 1.148259165321651, "grad_norm": 1.5691797878096674, "learning_rate": 8.386721017576426e-07, "loss": 0.5438926219940186, "step": 4980 }, { "epoch": 1.1484897394512337, "grad_norm": 1.3886510011393631, "learning_rate": 8.382958464417857e-07, "loss": 0.3991735577583313, "step": 4981 }, { "epoch": 1.1487203135808162, "grad_norm": 1.5064271527131172, "learning_rate": 8.379196146312664e-07, "loss": 0.4918370246887207, "step": 4982 }, { "epoch": 1.1489508877103989, "grad_norm": 1.713149481922198, "learning_rate": 8.375434063807737e-07, "loss": 0.5280467867851257, "step": 4983 }, { "epoch": 1.1491814618399816, "grad_norm": 1.2990876069782782, "learning_rate": 8.371672217449936e-07, "loss": 0.4186179041862488, "step": 4984 }, { "epoch": 1.149412035969564, "grad_norm": 1.3742464834005608, "learning_rate": 8.367910607786079e-07, "loss": 0.3698224723339081, "step": 4985 }, { "epoch": 1.1496426100991468, "grad_norm": 1.4766762383505605, "learning_rate": 8.364149235362956e-07, "loss": 0.45402267575263977, "step": 4986 }, { "epoch": 1.1498731842287295, "grad_norm": 1.530758978566143, "learning_rate": 8.36038810072732e-07, "loss": 0.5145484209060669, "step": 4987 }, { "epoch": 1.1501037583583122, "grad_norm": 1.2257671687651395, "learning_rate": 8.356627204425893e-07, "loss": 0.4293951392173767, "step": 4988 }, { "epoch": 1.150334332487895, "grad_norm": 1.5415847348488914, "learning_rate": 8.352866547005354e-07, "loss": 0.3916272521018982, "step": 4989 }, { "epoch": 1.1505649066174775, "grad_norm": 1.6777087516004896, "learning_rate": 8.349106129012357e-07, "loss": 0.40171611309051514, "step": 4990 }, { "epoch": 1.1507954807470602, "grad_norm": 1.5767244212385862, "learning_rate": 8.345345950993518e-07, "loss": 0.49580252170562744, "step": 4991 }, { "epoch": 1.151026054876643, "grad_norm": 1.491822308561489, "learning_rate": 8.34158601349541e-07, "loss": 0.4521256685256958, "step": 4992 }, { "epoch": 1.1512566290062254, "grad_norm": 1.5317445246777317, "learning_rate": 8.337826317064585e-07, "loss": 0.3920813798904419, "step": 4993 }, { "epoch": 1.151487203135808, "grad_norm": 1.4336055128806646, "learning_rate": 8.334066862247547e-07, "loss": 0.4263145923614502, "step": 4994 }, { "epoch": 1.1517177772653908, "grad_norm": 1.513949850078891, "learning_rate": 8.330307649590779e-07, "loss": 0.4746140241622925, "step": 4995 }, { "epoch": 1.1519483513949735, "grad_norm": 1.6708741885004843, "learning_rate": 8.326548679640713e-07, "loss": 0.37520158290863037, "step": 4996 }, { "epoch": 1.1521789255245563, "grad_norm": 1.4060610690176367, "learning_rate": 8.322789952943759e-07, "loss": 0.4481951892375946, "step": 4997 }, { "epoch": 1.1524094996541387, "grad_norm": 1.4336851088246751, "learning_rate": 8.319031470046281e-07, "loss": 0.40319859981536865, "step": 4998 }, { "epoch": 1.1526400737837215, "grad_norm": 1.805948160607668, "learning_rate": 8.315273231494615e-07, "loss": 0.47720152139663696, "step": 4999 }, { "epoch": 1.1528706479133042, "grad_norm": 1.2994404231083814, "learning_rate": 8.311515237835063e-07, "loss": 0.4027557969093323, "step": 5000 }, { "epoch": 1.1531012220428867, "grad_norm": 1.5346692874582604, "learning_rate": 8.307757489613878e-07, "loss": 0.3939552307128906, "step": 5001 }, { "epoch": 1.1533317961724694, "grad_norm": 1.541801101637957, "learning_rate": 8.303999987377295e-07, "loss": 0.379425585269928, "step": 5002 }, { "epoch": 1.153562370302052, "grad_norm": 1.3222707927494204, "learning_rate": 8.300242731671499e-07, "loss": 0.46231499314308167, "step": 5003 }, { "epoch": 1.1537929444316348, "grad_norm": 1.5623820882279815, "learning_rate": 8.296485723042654e-07, "loss": 0.4639621675014496, "step": 5004 }, { "epoch": 1.1540235185612175, "grad_norm": 1.4577901713449948, "learning_rate": 8.29272896203687e-07, "loss": 0.49264025688171387, "step": 5005 }, { "epoch": 1.1542540926908, "grad_norm": 1.2796677798690286, "learning_rate": 8.288972449200233e-07, "loss": 0.4145156145095825, "step": 5006 }, { "epoch": 1.1544846668203828, "grad_norm": 1.3338594060824709, "learning_rate": 8.285216185078792e-07, "loss": 0.39693811535835266, "step": 5007 }, { "epoch": 1.1547152409499655, "grad_norm": 1.356694069152444, "learning_rate": 8.281460170218561e-07, "loss": 0.46224820613861084, "step": 5008 }, { "epoch": 1.154945815079548, "grad_norm": 1.5380330607680774, "learning_rate": 8.277704405165506e-07, "loss": 0.48868128657341003, "step": 5009 }, { "epoch": 1.1551763892091307, "grad_norm": 1.4024811483349113, "learning_rate": 8.273948890465574e-07, "loss": 0.5127776265144348, "step": 5010 }, { "epoch": 1.1554069633387134, "grad_norm": 1.4092381840768406, "learning_rate": 8.270193626664665e-07, "loss": 0.4039389491081238, "step": 5011 }, { "epoch": 1.1556375374682961, "grad_norm": 1.5807780806971976, "learning_rate": 8.266438614308641e-07, "loss": 0.4224502444267273, "step": 5012 }, { "epoch": 1.1558681115978788, "grad_norm": 1.42726619115002, "learning_rate": 8.262683853943335e-07, "loss": 0.4392918050289154, "step": 5013 }, { "epoch": 1.1560986857274613, "grad_norm": 1.5001771531608157, "learning_rate": 8.258929346114534e-07, "loss": 0.5055289268493652, "step": 5014 }, { "epoch": 1.156329259857044, "grad_norm": 1.3839083181087675, "learning_rate": 8.255175091368003e-07, "loss": 0.43851351737976074, "step": 5015 }, { "epoch": 1.1565598339866268, "grad_norm": 1.576893376736649, "learning_rate": 8.251421090249451e-07, "loss": 0.4557814598083496, "step": 5016 }, { "epoch": 1.1567904081162093, "grad_norm": 1.2994912796642604, "learning_rate": 8.247667343304568e-07, "loss": 0.4288882613182068, "step": 5017 }, { "epoch": 1.157020982245792, "grad_norm": 1.4237104241903844, "learning_rate": 8.243913851078994e-07, "loss": 0.42711886763572693, "step": 5018 }, { "epoch": 1.1572515563753747, "grad_norm": 1.8597293679946851, "learning_rate": 8.240160614118342e-07, "loss": 0.515809953212738, "step": 5019 }, { "epoch": 1.1574821305049574, "grad_norm": 1.828777504717114, "learning_rate": 8.236407632968182e-07, "loss": 0.5754632949829102, "step": 5020 }, { "epoch": 1.1577127046345401, "grad_norm": 1.553176542229762, "learning_rate": 8.232654908174038e-07, "loss": 0.4601830244064331, "step": 5021 }, { "epoch": 1.1579432787641226, "grad_norm": 1.500802040492981, "learning_rate": 8.228902440281422e-07, "loss": 0.4740797281265259, "step": 5022 }, { "epoch": 1.1581738528937053, "grad_norm": 1.688304974088827, "learning_rate": 8.225150229835781e-07, "loss": 0.4066367745399475, "step": 5023 }, { "epoch": 1.158404427023288, "grad_norm": 1.357187761009418, "learning_rate": 8.221398277382546e-07, "loss": 0.4664362668991089, "step": 5024 }, { "epoch": 1.1586350011528705, "grad_norm": 1.3912425171719864, "learning_rate": 8.217646583467093e-07, "loss": 0.5204637050628662, "step": 5025 }, { "epoch": 1.1588655752824533, "grad_norm": 1.4227227145637968, "learning_rate": 8.213895148634775e-07, "loss": 0.4991419017314911, "step": 5026 }, { "epoch": 1.159096149412036, "grad_norm": 1.2844880437163813, "learning_rate": 8.210143973430896e-07, "loss": 0.40420424938201904, "step": 5027 }, { "epoch": 1.1593267235416187, "grad_norm": 1.4946107412544847, "learning_rate": 8.206393058400736e-07, "loss": 0.523331880569458, "step": 5028 }, { "epoch": 1.1595572976712014, "grad_norm": 1.4908780499938201, "learning_rate": 8.202642404089516e-07, "loss": 0.5019216537475586, "step": 5029 }, { "epoch": 1.159787871800784, "grad_norm": 1.6451488656605473, "learning_rate": 8.198892011042442e-07, "loss": 0.522672712802887, "step": 5030 }, { "epoch": 1.1600184459303666, "grad_norm": 1.505727418733034, "learning_rate": 8.195141879804668e-07, "loss": 0.418377548456192, "step": 5031 }, { "epoch": 1.1602490200599493, "grad_norm": 1.5635210393713965, "learning_rate": 8.191392010921312e-07, "loss": 0.4914432764053345, "step": 5032 }, { "epoch": 1.1604795941895318, "grad_norm": 1.3929576184838368, "learning_rate": 8.187642404937459e-07, "loss": 0.42149683833122253, "step": 5033 }, { "epoch": 1.1607101683191146, "grad_norm": 1.6811040317548793, "learning_rate": 8.183893062398145e-07, "loss": 0.5637058019638062, "step": 5034 }, { "epoch": 1.1609407424486973, "grad_norm": 1.2252559322458123, "learning_rate": 8.180143983848387e-07, "loss": 0.49930211901664734, "step": 5035 }, { "epoch": 1.16117131657828, "grad_norm": 1.626369547940987, "learning_rate": 8.176395169833139e-07, "loss": 0.4217071235179901, "step": 5036 }, { "epoch": 1.1614018907078625, "grad_norm": 1.9654976691842632, "learning_rate": 8.172646620897336e-07, "loss": 0.4208733141422272, "step": 5037 }, { "epoch": 1.1616324648374452, "grad_norm": 1.434216808832, "learning_rate": 8.168898337585866e-07, "loss": 0.42970529198646545, "step": 5038 }, { "epoch": 1.161863038967028, "grad_norm": 1.429859410744686, "learning_rate": 8.165150320443584e-07, "loss": 0.49482622742652893, "step": 5039 }, { "epoch": 1.1620936130966106, "grad_norm": 1.2888747437309156, "learning_rate": 8.161402570015297e-07, "loss": 0.4106384217739105, "step": 5040 }, { "epoch": 1.1623241872261931, "grad_norm": 1.8632515092828725, "learning_rate": 8.157655086845778e-07, "loss": 0.4550397992134094, "step": 5041 }, { "epoch": 1.1625547613557758, "grad_norm": 1.4636128502892785, "learning_rate": 8.153907871479768e-07, "loss": 0.5144504308700562, "step": 5042 }, { "epoch": 1.1627853354853586, "grad_norm": 1.4308354935014596, "learning_rate": 8.150160924461953e-07, "loss": 0.3970009684562683, "step": 5043 }, { "epoch": 1.1630159096149413, "grad_norm": 1.4674063038688332, "learning_rate": 8.146414246336998e-07, "loss": 0.45825856924057007, "step": 5044 }, { "epoch": 1.1632464837445238, "grad_norm": 1.6850972190756333, "learning_rate": 8.142667837649515e-07, "loss": 0.4515247344970703, "step": 5045 }, { "epoch": 1.1634770578741065, "grad_norm": 1.347770803032681, "learning_rate": 8.13892169894409e-07, "loss": 0.41265833377838135, "step": 5046 }, { "epoch": 1.1637076320036892, "grad_norm": 1.4117996459358377, "learning_rate": 8.135175830765254e-07, "loss": 0.39820557832717896, "step": 5047 }, { "epoch": 1.163938206133272, "grad_norm": 1.4272016239744356, "learning_rate": 8.131430233657514e-07, "loss": 0.41528987884521484, "step": 5048 }, { "epoch": 1.1641687802628544, "grad_norm": 1.3404996701874776, "learning_rate": 8.127684908165323e-07, "loss": 0.4453636407852173, "step": 5049 }, { "epoch": 1.1643993543924371, "grad_norm": 1.846029547761043, "learning_rate": 8.123939854833107e-07, "loss": 0.45008519291877747, "step": 5050 }, { "epoch": 1.1646299285220199, "grad_norm": 1.7254544812081525, "learning_rate": 8.120195074205249e-07, "loss": 0.456550657749176, "step": 5051 }, { "epoch": 1.1648605026516026, "grad_norm": 1.4455041595835194, "learning_rate": 8.116450566826086e-07, "loss": 0.44465887546539307, "step": 5052 }, { "epoch": 1.165091076781185, "grad_norm": 1.4606872040412728, "learning_rate": 8.112706333239923e-07, "loss": 0.4769172668457031, "step": 5053 }, { "epoch": 1.1653216509107678, "grad_norm": 1.5800176181940382, "learning_rate": 8.108962373991019e-07, "loss": 0.42662739753723145, "step": 5054 }, { "epoch": 1.1655522250403505, "grad_norm": 1.533727299161298, "learning_rate": 8.105218689623603e-07, "loss": 0.4923250079154968, "step": 5055 }, { "epoch": 1.1657827991699332, "grad_norm": 1.5783599756682145, "learning_rate": 8.10147528068185e-07, "loss": 0.42462587356567383, "step": 5056 }, { "epoch": 1.1660133732995157, "grad_norm": 1.3458818448335859, "learning_rate": 8.097732147709908e-07, "loss": 0.47610223293304443, "step": 5057 }, { "epoch": 1.1662439474290984, "grad_norm": 1.6207397386125497, "learning_rate": 8.093989291251875e-07, "loss": 0.47519630193710327, "step": 5058 }, { "epoch": 1.1664745215586811, "grad_norm": 1.3901575117179885, "learning_rate": 8.090246711851819e-07, "loss": 0.38865840435028076, "step": 5059 }, { "epoch": 1.1667050956882639, "grad_norm": 1.271312682478528, "learning_rate": 8.086504410053757e-07, "loss": 0.39990776777267456, "step": 5060 }, { "epoch": 1.1669356698178464, "grad_norm": 1.4665951386644982, "learning_rate": 8.082762386401669e-07, "loss": 0.4330836534500122, "step": 5061 }, { "epoch": 1.167166243947429, "grad_norm": 1.286707043518209, "learning_rate": 8.079020641439504e-07, "loss": 0.4285934865474701, "step": 5062 }, { "epoch": 1.1673968180770118, "grad_norm": 1.7499199825760443, "learning_rate": 8.075279175711152e-07, "loss": 0.3900645077228546, "step": 5063 }, { "epoch": 1.1676273922065945, "grad_norm": 1.3606445329404238, "learning_rate": 8.07153798976048e-07, "loss": 0.48145759105682373, "step": 5064 }, { "epoch": 1.167857966336177, "grad_norm": 1.7592322949259351, "learning_rate": 8.067797084131305e-07, "loss": 0.4239045977592468, "step": 5065 }, { "epoch": 1.1680885404657597, "grad_norm": 1.7501505795878665, "learning_rate": 8.064056459367409e-07, "loss": 0.55517578125, "step": 5066 }, { "epoch": 1.1683191145953424, "grad_norm": 1.588400616006081, "learning_rate": 8.060316116012524e-07, "loss": 0.4956046938896179, "step": 5067 }, { "epoch": 1.1685496887249252, "grad_norm": 1.3607022789051413, "learning_rate": 8.05657605461035e-07, "loss": 0.4051878750324249, "step": 5068 }, { "epoch": 1.1687802628545076, "grad_norm": 1.6471264462607456, "learning_rate": 8.052836275704541e-07, "loss": 0.47389912605285645, "step": 5069 }, { "epoch": 1.1690108369840904, "grad_norm": 1.3462872241997197, "learning_rate": 8.049096779838717e-07, "loss": 0.5023842453956604, "step": 5070 }, { "epoch": 1.169241411113673, "grad_norm": 1.3943514778037218, "learning_rate": 8.045357567556449e-07, "loss": 0.4895137548446655, "step": 5071 }, { "epoch": 1.1694719852432558, "grad_norm": 1.5328176046123796, "learning_rate": 8.041618639401264e-07, "loss": 0.47874224185943604, "step": 5072 }, { "epoch": 1.1697025593728383, "grad_norm": 1.4666773972258982, "learning_rate": 8.037879995916659e-07, "loss": 0.4784395694732666, "step": 5073 }, { "epoch": 1.169933133502421, "grad_norm": 1.4433652991816976, "learning_rate": 8.034141637646079e-07, "loss": 0.45289772748947144, "step": 5074 }, { "epoch": 1.1701637076320037, "grad_norm": 1.931933746015264, "learning_rate": 8.030403565132942e-07, "loss": 0.5375204682350159, "step": 5075 }, { "epoch": 1.1703942817615864, "grad_norm": 1.4956339972756536, "learning_rate": 8.026665778920602e-07, "loss": 0.45003899931907654, "step": 5076 }, { "epoch": 1.170624855891169, "grad_norm": 1.348037979358877, "learning_rate": 8.022928279552392e-07, "loss": 0.4236389994621277, "step": 5077 }, { "epoch": 1.1708554300207517, "grad_norm": 1.3333943245649609, "learning_rate": 8.019191067571592e-07, "loss": 0.43182557821273804, "step": 5078 }, { "epoch": 1.1710860041503344, "grad_norm": 1.7521692166476222, "learning_rate": 8.01545414352145e-07, "loss": 0.5171953439712524, "step": 5079 }, { "epoch": 1.171316578279917, "grad_norm": 1.5319548219026522, "learning_rate": 8.011717507945157e-07, "loss": 0.5084770321846008, "step": 5080 }, { "epoch": 1.1715471524094996, "grad_norm": 1.6342595542262888, "learning_rate": 8.007981161385876e-07, "loss": 0.4685532748699188, "step": 5081 }, { "epoch": 1.1717777265390823, "grad_norm": 1.5086552244362486, "learning_rate": 8.004245104386724e-07, "loss": 0.4647448658943176, "step": 5082 }, { "epoch": 1.172008300668665, "grad_norm": 1.4914913702780284, "learning_rate": 8.000509337490768e-07, "loss": 0.4038098454475403, "step": 5083 }, { "epoch": 1.1722388747982477, "grad_norm": 1.435384500623052, "learning_rate": 7.996773861241047e-07, "loss": 0.4153759479522705, "step": 5084 }, { "epoch": 1.1724694489278302, "grad_norm": 1.5573715225755111, "learning_rate": 7.993038676180545e-07, "loss": 0.4569447636604309, "step": 5085 }, { "epoch": 1.172700023057413, "grad_norm": 1.4307958679817, "learning_rate": 7.989303782852215e-07, "loss": 0.4419426918029785, "step": 5086 }, { "epoch": 1.1729305971869957, "grad_norm": 1.4177391878017933, "learning_rate": 7.985569181798955e-07, "loss": 0.3902894854545593, "step": 5087 }, { "epoch": 1.1731611713165782, "grad_norm": 1.3935681641299988, "learning_rate": 7.981834873563631e-07, "loss": 0.4066358208656311, "step": 5088 }, { "epoch": 1.1733917454461609, "grad_norm": 1.579270038843054, "learning_rate": 7.978100858689059e-07, "loss": 0.4589639902114868, "step": 5089 }, { "epoch": 1.1736223195757436, "grad_norm": 1.5868805646941586, "learning_rate": 7.974367137718024e-07, "loss": 0.4431188106536865, "step": 5090 }, { "epoch": 1.1738528937053263, "grad_norm": 1.3420666663317198, "learning_rate": 7.970633711193252e-07, "loss": 0.43412742018699646, "step": 5091 }, { "epoch": 1.174083467834909, "grad_norm": 1.360898150528172, "learning_rate": 7.966900579657435e-07, "loss": 0.40296387672424316, "step": 5092 }, { "epoch": 1.1743140419644915, "grad_norm": 1.4702894316239854, "learning_rate": 7.963167743653228e-07, "loss": 0.4814741611480713, "step": 5093 }, { "epoch": 1.1745446160940742, "grad_norm": 1.7678935112109417, "learning_rate": 7.959435203723228e-07, "loss": 0.4412423372268677, "step": 5094 }, { "epoch": 1.174775190223657, "grad_norm": 1.698823813376211, "learning_rate": 7.955702960410006e-07, "loss": 0.49773266911506653, "step": 5095 }, { "epoch": 1.1750057643532394, "grad_norm": 1.445996901779518, "learning_rate": 7.951971014256073e-07, "loss": 0.4657529592514038, "step": 5096 }, { "epoch": 1.1752363384828222, "grad_norm": 1.4844953949134, "learning_rate": 7.94823936580391e-07, "loss": 0.4062782824039459, "step": 5097 }, { "epoch": 1.1754669126124049, "grad_norm": 1.3280643963390701, "learning_rate": 7.944508015595948e-07, "loss": 0.4154980182647705, "step": 5098 }, { "epoch": 1.1756974867419876, "grad_norm": 1.3235405382692107, "learning_rate": 7.940776964174582e-07, "loss": 0.4724680185317993, "step": 5099 }, { "epoch": 1.1759280608715703, "grad_norm": 1.4212228031547876, "learning_rate": 7.937046212082149e-07, "loss": 0.48808538913726807, "step": 5100 }, { "epoch": 1.1761586350011528, "grad_norm": 1.3949555418133748, "learning_rate": 7.933315759860959e-07, "loss": 0.4985845983028412, "step": 5101 }, { "epoch": 1.1763892091307355, "grad_norm": 1.2192149824969183, "learning_rate": 7.92958560805327e-07, "loss": 0.3735587000846863, "step": 5102 }, { "epoch": 1.1766197832603182, "grad_norm": 1.3793872147262238, "learning_rate": 7.925855757201294e-07, "loss": 0.4198414385318756, "step": 5103 }, { "epoch": 1.1768503573899007, "grad_norm": 1.7231390796467927, "learning_rate": 7.922126207847204e-07, "loss": 0.41973787546157837, "step": 5104 }, { "epoch": 1.1770809315194835, "grad_norm": 1.8258365265115961, "learning_rate": 7.918396960533128e-07, "loss": 0.5179545283317566, "step": 5105 }, { "epoch": 1.1773115056490662, "grad_norm": 1.5757377934881964, "learning_rate": 7.914668015801153e-07, "loss": 0.4917227625846863, "step": 5106 }, { "epoch": 1.1775420797786489, "grad_norm": 1.5132865673859617, "learning_rate": 7.910939374193312e-07, "loss": 0.41775548458099365, "step": 5107 }, { "epoch": 1.1777726539082316, "grad_norm": 1.484971286444874, "learning_rate": 7.907211036251608e-07, "loss": 0.45468997955322266, "step": 5108 }, { "epoch": 1.178003228037814, "grad_norm": 1.292166499414124, "learning_rate": 7.903483002517988e-07, "loss": 0.3749620318412781, "step": 5109 }, { "epoch": 1.1782338021673968, "grad_norm": 1.3945828421286317, "learning_rate": 7.899755273534365e-07, "loss": 0.48940956592559814, "step": 5110 }, { "epoch": 1.1784643762969795, "grad_norm": 1.3575927994558319, "learning_rate": 7.896027849842594e-07, "loss": 0.4561386704444885, "step": 5111 }, { "epoch": 1.178694950426562, "grad_norm": 1.4968176209501343, "learning_rate": 7.892300731984498e-07, "loss": 0.441898375749588, "step": 5112 }, { "epoch": 1.1789255245561447, "grad_norm": 1.7617220832230103, "learning_rate": 7.888573920501856e-07, "loss": 0.43445056676864624, "step": 5113 }, { "epoch": 1.1791560986857275, "grad_norm": 1.4680500200302005, "learning_rate": 7.884847415936389e-07, "loss": 0.42653167247772217, "step": 5114 }, { "epoch": 1.1793866728153102, "grad_norm": 1.3867120793190437, "learning_rate": 7.881121218829787e-07, "loss": 0.42003321647644043, "step": 5115 }, { "epoch": 1.179617246944893, "grad_norm": 1.613544333660259, "learning_rate": 7.87739532972369e-07, "loss": 0.4920128881931305, "step": 5116 }, { "epoch": 1.1798478210744754, "grad_norm": 1.430783098871577, "learning_rate": 7.873669749159697e-07, "loss": 0.49529707431793213, "step": 5117 }, { "epoch": 1.180078395204058, "grad_norm": 1.4915607575501106, "learning_rate": 7.869944477679351e-07, "loss": 0.4813005328178406, "step": 5118 }, { "epoch": 1.1803089693336408, "grad_norm": 1.4923304237688, "learning_rate": 7.866219515824168e-07, "loss": 0.47239556908607483, "step": 5119 }, { "epoch": 1.1805395434632233, "grad_norm": 1.7203098580351979, "learning_rate": 7.862494864135596e-07, "loss": 0.4808405935764313, "step": 5120 }, { "epoch": 1.180770117592806, "grad_norm": 1.5206410201181635, "learning_rate": 7.858770523155066e-07, "loss": 0.44946521520614624, "step": 5121 }, { "epoch": 1.1810006917223888, "grad_norm": 1.8958199353441048, "learning_rate": 7.85504649342394e-07, "loss": 0.5344874858856201, "step": 5122 }, { "epoch": 1.1812312658519715, "grad_norm": 1.729692211161555, "learning_rate": 7.851322775483542e-07, "loss": 0.49354079365730286, "step": 5123 }, { "epoch": 1.1814618399815542, "grad_norm": 1.6407900723292905, "learning_rate": 7.847599369875155e-07, "loss": 0.414085328578949, "step": 5124 }, { "epoch": 1.1816924141111367, "grad_norm": 1.51838750003237, "learning_rate": 7.843876277140013e-07, "loss": 0.4638150632381439, "step": 5125 }, { "epoch": 1.1819229882407194, "grad_norm": 1.5309477954820934, "learning_rate": 7.84015349781931e-07, "loss": 0.39239877462387085, "step": 5126 }, { "epoch": 1.1821535623703021, "grad_norm": 1.456140160914471, "learning_rate": 7.83643103245418e-07, "loss": 0.46846455335617065, "step": 5127 }, { "epoch": 1.1823841364998846, "grad_norm": 1.7368044200229882, "learning_rate": 7.832708881585729e-07, "loss": 0.5257229804992676, "step": 5128 }, { "epoch": 1.1826147106294673, "grad_norm": 1.246852967804398, "learning_rate": 7.828987045755006e-07, "loss": 0.3858698904514313, "step": 5129 }, { "epoch": 1.18284528475905, "grad_norm": 1.526790126487461, "learning_rate": 7.82526552550302e-07, "loss": 0.48664575815200806, "step": 5130 }, { "epoch": 1.1830758588886328, "grad_norm": 1.4370667079865387, "learning_rate": 7.821544321370731e-07, "loss": 0.5246836543083191, "step": 5131 }, { "epoch": 1.1833064330182155, "grad_norm": 1.6695741670894575, "learning_rate": 7.817823433899049e-07, "loss": 0.5538516640663147, "step": 5132 }, { "epoch": 1.183537007147798, "grad_norm": 1.5154692060299837, "learning_rate": 7.814102863628852e-07, "loss": 0.4563618302345276, "step": 5133 }, { "epoch": 1.1837675812773807, "grad_norm": 1.6013623117191365, "learning_rate": 7.810382611100952e-07, "loss": 0.48093757033348083, "step": 5134 }, { "epoch": 1.1839981554069634, "grad_norm": 1.4079128694512013, "learning_rate": 7.806662676856133e-07, "loss": 0.41152772307395935, "step": 5135 }, { "epoch": 1.184228729536546, "grad_norm": 1.470828934761741, "learning_rate": 7.802943061435121e-07, "loss": 0.4429926574230194, "step": 5136 }, { "epoch": 1.1844593036661286, "grad_norm": 1.6844871985058756, "learning_rate": 7.799223765378604e-07, "loss": 0.5795058012008667, "step": 5137 }, { "epoch": 1.1846898777957113, "grad_norm": 1.3964078038325152, "learning_rate": 7.795504789227214e-07, "loss": 0.43219637870788574, "step": 5138 }, { "epoch": 1.184920451925294, "grad_norm": 1.3120429368988666, "learning_rate": 7.791786133521547e-07, "loss": 0.472915917634964, "step": 5139 }, { "epoch": 1.1851510260548768, "grad_norm": 1.8547533260703066, "learning_rate": 7.788067798802144e-07, "loss": 0.609251081943512, "step": 5140 }, { "epoch": 1.1853816001844593, "grad_norm": 1.5647854614729606, "learning_rate": 7.784349785609506e-07, "loss": 0.5051882266998291, "step": 5141 }, { "epoch": 1.185612174314042, "grad_norm": 1.8256847598733492, "learning_rate": 7.780632094484081e-07, "loss": 0.5062044858932495, "step": 5142 }, { "epoch": 1.1858427484436247, "grad_norm": 1.6792228276022907, "learning_rate": 7.77691472596627e-07, "loss": 0.48717936873435974, "step": 5143 }, { "epoch": 1.1860733225732072, "grad_norm": 1.4962691739334948, "learning_rate": 7.773197680596439e-07, "loss": 0.4755759537220001, "step": 5144 }, { "epoch": 1.18630389670279, "grad_norm": 1.5701944534084074, "learning_rate": 7.769480958914889e-07, "loss": 0.4549487829208374, "step": 5145 }, { "epoch": 1.1865344708323726, "grad_norm": 1.3416043214582947, "learning_rate": 7.765764561461891e-07, "loss": 0.39759546518325806, "step": 5146 }, { "epoch": 1.1867650449619553, "grad_norm": 1.7321999626139561, "learning_rate": 7.762048488777654e-07, "loss": 0.5151915550231934, "step": 5147 }, { "epoch": 1.1869956190915378, "grad_norm": 1.739537041268416, "learning_rate": 7.758332741402351e-07, "loss": 0.4555166959762573, "step": 5148 }, { "epoch": 1.1872261932211206, "grad_norm": 1.246823148309275, "learning_rate": 7.754617319876102e-07, "loss": 0.3639993667602539, "step": 5149 }, { "epoch": 1.1874567673507033, "grad_norm": 1.4228626603425891, "learning_rate": 7.750902224738984e-07, "loss": 0.4158916473388672, "step": 5150 }, { "epoch": 1.187687341480286, "grad_norm": 1.5159845507016538, "learning_rate": 7.747187456531021e-07, "loss": 0.44933754205703735, "step": 5151 }, { "epoch": 1.1879179156098685, "grad_norm": 1.1574431418082898, "learning_rate": 7.74347301579219e-07, "loss": 0.35436397790908813, "step": 5152 }, { "epoch": 1.1881484897394512, "grad_norm": 1.7559371420298944, "learning_rate": 7.73975890306243e-07, "loss": 0.40650928020477295, "step": 5153 }, { "epoch": 1.188379063869034, "grad_norm": 1.655955114095899, "learning_rate": 7.736045118881615e-07, "loss": 0.424211710691452, "step": 5154 }, { "epoch": 1.1886096379986166, "grad_norm": 1.386370427214692, "learning_rate": 7.73233166378959e-07, "loss": 0.38909512758255005, "step": 5155 }, { "epoch": 1.1888402121281991, "grad_norm": 1.6273556393891413, "learning_rate": 7.728618538326139e-07, "loss": 0.4452083110809326, "step": 5156 }, { "epoch": 1.1890707862577818, "grad_norm": 1.7325341862894768, "learning_rate": 7.724905743031005e-07, "loss": 0.45061540603637695, "step": 5157 }, { "epoch": 1.1893013603873646, "grad_norm": 1.875195364158454, "learning_rate": 7.721193278443875e-07, "loss": 0.5301374197006226, "step": 5158 }, { "epoch": 1.1895319345169473, "grad_norm": 1.32653936253781, "learning_rate": 7.717481145104398e-07, "loss": 0.4386521577835083, "step": 5159 }, { "epoch": 1.1897625086465298, "grad_norm": 1.5893013583646332, "learning_rate": 7.713769343552169e-07, "loss": 0.447623074054718, "step": 5160 }, { "epoch": 1.1899930827761125, "grad_norm": 1.4757184491338362, "learning_rate": 7.71005787432674e-07, "loss": 0.44326454401016235, "step": 5161 }, { "epoch": 1.1902236569056952, "grad_norm": 1.4868394681814385, "learning_rate": 7.706346737967603e-07, "loss": 0.564007043838501, "step": 5162 }, { "epoch": 1.190454231035278, "grad_norm": 1.4497565739191507, "learning_rate": 7.702635935014213e-07, "loss": 0.5338540077209473, "step": 5163 }, { "epoch": 1.1906848051648604, "grad_norm": 1.5430964424900424, "learning_rate": 7.698925466005977e-07, "loss": 0.45307862758636475, "step": 5164 }, { "epoch": 1.1909153792944431, "grad_norm": 1.4703583168080245, "learning_rate": 7.69521533148224e-07, "loss": 0.5383142232894897, "step": 5165 }, { "epoch": 1.1911459534240258, "grad_norm": 1.46357622305891, "learning_rate": 7.691505531982316e-07, "loss": 0.3794770836830139, "step": 5166 }, { "epoch": 1.1913765275536086, "grad_norm": 1.73725405615964, "learning_rate": 7.687796068045455e-07, "loss": 0.4633198082447052, "step": 5167 }, { "epoch": 1.191607101683191, "grad_norm": 1.4824242158713679, "learning_rate": 7.684086940210875e-07, "loss": 0.5080294609069824, "step": 5168 }, { "epoch": 1.1918376758127738, "grad_norm": 1.4742940614632714, "learning_rate": 7.680378149017724e-07, "loss": 0.3952289819717407, "step": 5169 }, { "epoch": 1.1920682499423565, "grad_norm": 1.6284523488523228, "learning_rate": 7.676669695005122e-07, "loss": 0.4518551528453827, "step": 5170 }, { "epoch": 1.1922988240719392, "grad_norm": 1.3915500318606786, "learning_rate": 7.672961578712125e-07, "loss": 0.4752943515777588, "step": 5171 }, { "epoch": 1.1925293982015217, "grad_norm": 1.4424968675316805, "learning_rate": 7.669253800677744e-07, "loss": 0.5059680342674255, "step": 5172 }, { "epoch": 1.1927599723311044, "grad_norm": 1.4513506332822887, "learning_rate": 7.665546361440949e-07, "loss": 0.47073960304260254, "step": 5173 }, { "epoch": 1.1929905464606871, "grad_norm": 1.6974826094600077, "learning_rate": 7.661839261540644e-07, "loss": 0.5851496458053589, "step": 5174 }, { "epoch": 1.1932211205902699, "grad_norm": 1.4255244135326766, "learning_rate": 7.658132501515701e-07, "loss": 0.44255387783050537, "step": 5175 }, { "epoch": 1.1934516947198524, "grad_norm": 1.7360033352973823, "learning_rate": 7.654426081904931e-07, "loss": 0.543785810470581, "step": 5176 }, { "epoch": 1.193682268849435, "grad_norm": 1.697289945139709, "learning_rate": 7.650720003247107e-07, "loss": 0.503501296043396, "step": 5177 }, { "epoch": 1.1939128429790178, "grad_norm": 1.6448034142146566, "learning_rate": 7.647014266080935e-07, "loss": 0.43894368410110474, "step": 5178 }, { "epoch": 1.1941434171086005, "grad_norm": 1.9780925681836061, "learning_rate": 7.643308870945088e-07, "loss": 0.5014036297798157, "step": 5179 }, { "epoch": 1.194373991238183, "grad_norm": 1.3813934145743847, "learning_rate": 7.639603818378178e-07, "loss": 0.4859309196472168, "step": 5180 }, { "epoch": 1.1946045653677657, "grad_norm": 1.611175852060371, "learning_rate": 7.635899108918781e-07, "loss": 0.40631920099258423, "step": 5181 }, { "epoch": 1.1948351394973484, "grad_norm": 1.923584573200039, "learning_rate": 7.632194743105405e-07, "loss": 0.5206565856933594, "step": 5182 }, { "epoch": 1.1950657136269311, "grad_norm": 1.659582338573284, "learning_rate": 7.628490721476517e-07, "loss": 0.5052351355552673, "step": 5183 }, { "epoch": 1.1952962877565136, "grad_norm": 1.3967739180573415, "learning_rate": 7.624787044570543e-07, "loss": 0.4921465516090393, "step": 5184 }, { "epoch": 1.1955268618860964, "grad_norm": 1.2706689377506823, "learning_rate": 7.621083712925839e-07, "loss": 0.3307859003543854, "step": 5185 }, { "epoch": 1.195757436015679, "grad_norm": 1.5942715812711645, "learning_rate": 7.617380727080728e-07, "loss": 0.4276743531227112, "step": 5186 }, { "epoch": 1.1959880101452618, "grad_norm": 1.434739100338101, "learning_rate": 7.613678087573475e-07, "loss": 0.5065702795982361, "step": 5187 }, { "epoch": 1.1962185842748443, "grad_norm": 1.2918886211693255, "learning_rate": 7.609975794942301e-07, "loss": 0.3588709533214569, "step": 5188 }, { "epoch": 1.196449158404427, "grad_norm": 1.4907134183008088, "learning_rate": 7.606273849725362e-07, "loss": 0.4296506941318512, "step": 5189 }, { "epoch": 1.1966797325340097, "grad_norm": 1.5501182036176049, "learning_rate": 7.602572252460782e-07, "loss": 0.517792820930481, "step": 5190 }, { "epoch": 1.1969103066635924, "grad_norm": 1.6883448687359832, "learning_rate": 7.598871003686619e-07, "loss": 0.38939881324768066, "step": 5191 }, { "epoch": 1.197140880793175, "grad_norm": 1.5288548185908284, "learning_rate": 7.595170103940896e-07, "loss": 0.5759290456771851, "step": 5192 }, { "epoch": 1.1973714549227576, "grad_norm": 1.975229876516129, "learning_rate": 7.591469553761569e-07, "loss": 0.4705851078033447, "step": 5193 }, { "epoch": 1.1976020290523404, "grad_norm": 1.4820736709912923, "learning_rate": 7.587769353686548e-07, "loss": 0.5137619972229004, "step": 5194 }, { "epoch": 1.197832603181923, "grad_norm": 1.426346211238444, "learning_rate": 7.584069504253701e-07, "loss": 0.43207496404647827, "step": 5195 }, { "epoch": 1.1980631773115056, "grad_norm": 1.7446559629267169, "learning_rate": 7.580370006000835e-07, "loss": 0.3976139426231384, "step": 5196 }, { "epoch": 1.1982937514410883, "grad_norm": 1.3117053560833851, "learning_rate": 7.576670859465715e-07, "loss": 0.41323673725128174, "step": 5197 }, { "epoch": 1.198524325570671, "grad_norm": 1.5110343718270132, "learning_rate": 7.57297206518604e-07, "loss": 0.404024600982666, "step": 5198 }, { "epoch": 1.1987548997002535, "grad_norm": 1.3684281900258655, "learning_rate": 7.569273623699475e-07, "loss": 0.4010540843009949, "step": 5199 }, { "epoch": 1.1989854738298362, "grad_norm": 1.5739020793077496, "learning_rate": 7.565575535543623e-07, "loss": 0.44299256801605225, "step": 5200 }, { "epoch": 1.199216047959419, "grad_norm": 1.5204166282494558, "learning_rate": 7.561877801256041e-07, "loss": 0.5217546820640564, "step": 5201 }, { "epoch": 1.1994466220890017, "grad_norm": 1.868873770331591, "learning_rate": 7.558180421374229e-07, "loss": 0.5192688703536987, "step": 5202 }, { "epoch": 1.1996771962185844, "grad_norm": 1.5743910950617057, "learning_rate": 7.554483396435637e-07, "loss": 0.38272884488105774, "step": 5203 }, { "epoch": 1.1999077703481669, "grad_norm": 1.4246723536184043, "learning_rate": 7.550786726977673e-07, "loss": 0.474464476108551, "step": 5204 }, { "epoch": 1.2001383444777496, "grad_norm": 1.6360159300410695, "learning_rate": 7.547090413537676e-07, "loss": 0.540134072303772, "step": 5205 }, { "epoch": 1.2003689186073323, "grad_norm": 1.4752644193711169, "learning_rate": 7.543394456652948e-07, "loss": 0.4662882089614868, "step": 5206 }, { "epoch": 1.2005994927369148, "grad_norm": 1.6858064119472538, "learning_rate": 7.539698856860732e-07, "loss": 0.440970778465271, "step": 5207 }, { "epoch": 1.2008300668664975, "grad_norm": 1.3786365004169476, "learning_rate": 7.536003614698225e-07, "loss": 0.41787397861480713, "step": 5208 }, { "epoch": 1.2010606409960802, "grad_norm": 1.4726677497641942, "learning_rate": 7.532308730702561e-07, "loss": 0.5503408908843994, "step": 5209 }, { "epoch": 1.201291215125663, "grad_norm": 1.4739960164302617, "learning_rate": 7.528614205410833e-07, "loss": 0.43713903427124023, "step": 5210 }, { "epoch": 1.2015217892552457, "grad_norm": 1.5362481289460599, "learning_rate": 7.524920039360076e-07, "loss": 0.4145667552947998, "step": 5211 }, { "epoch": 1.2017523633848282, "grad_norm": 1.4800845890771783, "learning_rate": 7.521226233087279e-07, "loss": 0.4307587146759033, "step": 5212 }, { "epoch": 1.2019829375144109, "grad_norm": 1.436182742461266, "learning_rate": 7.517532787129369e-07, "loss": 0.43784570693969727, "step": 5213 }, { "epoch": 1.2022135116439936, "grad_norm": 1.3395031095564736, "learning_rate": 7.513839702023226e-07, "loss": 0.40003830194473267, "step": 5214 }, { "epoch": 1.202444085773576, "grad_norm": 1.4786298792735793, "learning_rate": 7.510146978305682e-07, "loss": 0.4880738854408264, "step": 5215 }, { "epoch": 1.2026746599031588, "grad_norm": 1.31895753202322, "learning_rate": 7.506454616513505e-07, "loss": 0.39548349380493164, "step": 5216 }, { "epoch": 1.2029052340327415, "grad_norm": 1.5189592384869435, "learning_rate": 7.502762617183425e-07, "loss": 0.4060090184211731, "step": 5217 }, { "epoch": 1.2031358081623242, "grad_norm": 1.6902238907281657, "learning_rate": 7.499070980852101e-07, "loss": 0.44657808542251587, "step": 5218 }, { "epoch": 1.203366382291907, "grad_norm": 1.553015362629627, "learning_rate": 7.495379708056161e-07, "loss": 0.5283595323562622, "step": 5219 }, { "epoch": 1.2035969564214895, "grad_norm": 1.5940858647104894, "learning_rate": 7.49168879933216e-07, "loss": 0.4424205422401428, "step": 5220 }, { "epoch": 1.2038275305510722, "grad_norm": 1.4929497446465205, "learning_rate": 7.487998255216619e-07, "loss": 0.4998319745063782, "step": 5221 }, { "epoch": 1.2040581046806549, "grad_norm": 1.3437939609448373, "learning_rate": 7.484308076245987e-07, "loss": 0.3821876645088196, "step": 5222 }, { "epoch": 1.2042886788102374, "grad_norm": 1.4227177114495277, "learning_rate": 7.480618262956669e-07, "loss": 0.4567919373512268, "step": 5223 }, { "epoch": 1.20451925293982, "grad_norm": 1.4207326358395804, "learning_rate": 7.476928815885026e-07, "loss": 0.4561428427696228, "step": 5224 }, { "epoch": 1.2047498270694028, "grad_norm": 1.5720016799439587, "learning_rate": 7.473239735567344e-07, "loss": 0.4384823739528656, "step": 5225 }, { "epoch": 1.2049804011989855, "grad_norm": 1.518914607229236, "learning_rate": 7.469551022539877e-07, "loss": 0.42840123176574707, "step": 5226 }, { "epoch": 1.2052109753285682, "grad_norm": 1.4031825092609558, "learning_rate": 7.465862677338812e-07, "loss": 0.39553213119506836, "step": 5227 }, { "epoch": 1.2054415494581507, "grad_norm": 1.521464998921144, "learning_rate": 7.462174700500295e-07, "loss": 0.4325043559074402, "step": 5228 }, { "epoch": 1.2056721235877335, "grad_norm": 1.7451009485961195, "learning_rate": 7.4584870925604e-07, "loss": 0.5004623532295227, "step": 5229 }, { "epoch": 1.2059026977173162, "grad_norm": 1.6975060246760258, "learning_rate": 7.454799854055165e-07, "loss": 0.42296791076660156, "step": 5230 }, { "epoch": 1.2061332718468987, "grad_norm": 1.7859122255595659, "learning_rate": 7.451112985520565e-07, "loss": 0.45638370513916016, "step": 5231 }, { "epoch": 1.2063638459764814, "grad_norm": 1.9018837416313183, "learning_rate": 7.447426487492528e-07, "loss": 0.5134493112564087, "step": 5232 }, { "epoch": 1.206594420106064, "grad_norm": 1.382989024686568, "learning_rate": 7.443740360506918e-07, "loss": 0.4132578372955322, "step": 5233 }, { "epoch": 1.2068249942356468, "grad_norm": 1.321784021070878, "learning_rate": 7.440054605099552e-07, "loss": 0.4363224506378174, "step": 5234 }, { "epoch": 1.2070555683652295, "grad_norm": 1.4395608486144074, "learning_rate": 7.4363692218062e-07, "loss": 0.44970041513442993, "step": 5235 }, { "epoch": 1.207286142494812, "grad_norm": 1.3219627332758312, "learning_rate": 7.432684211162556e-07, "loss": 0.39787235856056213, "step": 5236 }, { "epoch": 1.2075167166243947, "grad_norm": 1.694639970069785, "learning_rate": 7.428999573704284e-07, "loss": 0.46057572960853577, "step": 5237 }, { "epoch": 1.2077472907539775, "grad_norm": 1.3954230269661139, "learning_rate": 7.42531530996698e-07, "loss": 0.46754559874534607, "step": 5238 }, { "epoch": 1.20797786488356, "grad_norm": 1.4060087118514482, "learning_rate": 7.42163142048619e-07, "loss": 0.5072697401046753, "step": 5239 }, { "epoch": 1.2082084390131427, "grad_norm": 1.5355585762921151, "learning_rate": 7.417947905797403e-07, "loss": 0.4691959023475647, "step": 5240 }, { "epoch": 1.2084390131427254, "grad_norm": 1.4596733170422231, "learning_rate": 7.414264766436056e-07, "loss": 0.43248072266578674, "step": 5241 }, { "epoch": 1.208669587272308, "grad_norm": 1.8386458599943265, "learning_rate": 7.410582002937534e-07, "loss": 0.4748457968235016, "step": 5242 }, { "epoch": 1.2089001614018908, "grad_norm": 1.413498638420547, "learning_rate": 7.406899615837157e-07, "loss": 0.4682820439338684, "step": 5243 }, { "epoch": 1.2091307355314733, "grad_norm": 1.3788557575990639, "learning_rate": 7.403217605670205e-07, "loss": 0.41747021675109863, "step": 5244 }, { "epoch": 1.209361309661056, "grad_norm": 1.5523861247321795, "learning_rate": 7.399535972971886e-07, "loss": 0.4968727231025696, "step": 5245 }, { "epoch": 1.2095918837906388, "grad_norm": 1.6255626899279143, "learning_rate": 7.395854718277372e-07, "loss": 0.486778199672699, "step": 5246 }, { "epoch": 1.2098224579202213, "grad_norm": 1.938770231002498, "learning_rate": 7.392173842121765e-07, "loss": 0.5153725147247314, "step": 5247 }, { "epoch": 1.210053032049804, "grad_norm": 1.6258479412197122, "learning_rate": 7.388493345040123e-07, "loss": 0.42352354526519775, "step": 5248 }, { "epoch": 1.2102836061793867, "grad_norm": 1.477454043811349, "learning_rate": 7.384813227567437e-07, "loss": 0.363994300365448, "step": 5249 }, { "epoch": 1.2105141803089694, "grad_norm": 1.3450193947115454, "learning_rate": 7.381133490238654e-07, "loss": 0.44195863604545593, "step": 5250 }, { "epoch": 1.2107447544385521, "grad_norm": 1.6510262733932026, "learning_rate": 7.377454133588657e-07, "loss": 0.5031026601791382, "step": 5251 }, { "epoch": 1.2109753285681346, "grad_norm": 1.1126223170422647, "learning_rate": 7.373775158152284e-07, "loss": 0.3900304436683655, "step": 5252 }, { "epoch": 1.2112059026977173, "grad_norm": 1.4718461813811798, "learning_rate": 7.370096564464308e-07, "loss": 0.406912624835968, "step": 5253 }, { "epoch": 1.2114364768273, "grad_norm": 1.2742945351379469, "learning_rate": 7.366418353059445e-07, "loss": 0.407238632440567, "step": 5254 }, { "epoch": 1.2116670509568825, "grad_norm": 2.3145771276343625, "learning_rate": 7.36274052447237e-07, "loss": 0.5605549216270447, "step": 5255 }, { "epoch": 1.2118976250864653, "grad_norm": 1.7547311772877803, "learning_rate": 7.359063079237684e-07, "loss": 0.5016111731529236, "step": 5256 }, { "epoch": 1.212128199216048, "grad_norm": 1.31999939383151, "learning_rate": 7.355386017889946e-07, "loss": 0.38812315464019775, "step": 5257 }, { "epoch": 1.2123587733456307, "grad_norm": 1.5177330463551633, "learning_rate": 7.35170934096365e-07, "loss": 0.46022963523864746, "step": 5258 }, { "epoch": 1.2125893474752132, "grad_norm": 1.4118628857930515, "learning_rate": 7.348033048993246e-07, "loss": 0.40029624104499817, "step": 5259 }, { "epoch": 1.212819921604796, "grad_norm": 1.4051430521275825, "learning_rate": 7.344357142513111e-07, "loss": 0.4331943392753601, "step": 5260 }, { "epoch": 1.2130504957343786, "grad_norm": 1.565074125850335, "learning_rate": 7.340681622057582e-07, "loss": 0.43757596611976624, "step": 5261 }, { "epoch": 1.2132810698639613, "grad_norm": 1.7743971563599887, "learning_rate": 7.337006488160931e-07, "loss": 0.49733203649520874, "step": 5262 }, { "epoch": 1.2135116439935438, "grad_norm": 1.341577967095045, "learning_rate": 7.333331741357373e-07, "loss": 0.35552018880844116, "step": 5263 }, { "epoch": 1.2137422181231265, "grad_norm": 1.6321675762702066, "learning_rate": 7.329657382181074e-07, "loss": 0.4102798104286194, "step": 5264 }, { "epoch": 1.2139727922527093, "grad_norm": 1.4184297160567871, "learning_rate": 7.325983411166136e-07, "loss": 0.4517349600791931, "step": 5265 }, { "epoch": 1.214203366382292, "grad_norm": 1.6427775893660324, "learning_rate": 7.322309828846613e-07, "loss": 0.48924458026885986, "step": 5266 }, { "epoch": 1.2144339405118745, "grad_norm": 1.4030974508932201, "learning_rate": 7.31863663575649e-07, "loss": 0.38971561193466187, "step": 5267 }, { "epoch": 1.2146645146414572, "grad_norm": 1.6155044970268224, "learning_rate": 7.31496383242971e-07, "loss": 0.6503559350967407, "step": 5268 }, { "epoch": 1.21489508877104, "grad_norm": 1.6905359606856467, "learning_rate": 7.311291419400146e-07, "loss": 0.4615272879600525, "step": 5269 }, { "epoch": 1.2151256629006226, "grad_norm": 1.6629441467357413, "learning_rate": 7.307619397201625e-07, "loss": 0.3793429732322693, "step": 5270 }, { "epoch": 1.2153562370302051, "grad_norm": 1.3076578533376795, "learning_rate": 7.303947766367909e-07, "loss": 0.48186585307121277, "step": 5271 }, { "epoch": 1.2155868111597878, "grad_norm": 1.4243590091370186, "learning_rate": 7.300276527432713e-07, "loss": 0.4051778018474579, "step": 5272 }, { "epoch": 1.2158173852893706, "grad_norm": 1.6820510248806995, "learning_rate": 7.296605680929684e-07, "loss": 0.43364250659942627, "step": 5273 }, { "epoch": 1.2160479594189533, "grad_norm": 1.6130796939421093, "learning_rate": 7.292935227392414e-07, "loss": 0.4893898367881775, "step": 5274 }, { "epoch": 1.2162785335485358, "grad_norm": 1.240780138685616, "learning_rate": 7.289265167354448e-07, "loss": 0.43125462532043457, "step": 5275 }, { "epoch": 1.2165091076781185, "grad_norm": 1.6108443522760163, "learning_rate": 7.285595501349258e-07, "loss": 0.4086509943008423, "step": 5276 }, { "epoch": 1.2167396818077012, "grad_norm": 1.838256686394942, "learning_rate": 7.281926229910274e-07, "loss": 0.5176471471786499, "step": 5277 }, { "epoch": 1.216970255937284, "grad_norm": 1.8145364687667531, "learning_rate": 7.278257353570857e-07, "loss": 0.4783210754394531, "step": 5278 }, { "epoch": 1.2172008300668664, "grad_norm": 1.5012148176529632, "learning_rate": 7.274588872864322e-07, "loss": 0.4847145080566406, "step": 5279 }, { "epoch": 1.2174314041964491, "grad_norm": 1.4076947828029491, "learning_rate": 7.270920788323911e-07, "loss": 0.4691849946975708, "step": 5280 }, { "epoch": 1.2176619783260318, "grad_norm": 1.8729494542899485, "learning_rate": 7.267253100482824e-07, "loss": 0.5755687952041626, "step": 5281 }, { "epoch": 1.2178925524556146, "grad_norm": 1.3639853941099451, "learning_rate": 7.263585809874193e-07, "loss": 0.42995721101760864, "step": 5282 }, { "epoch": 1.218123126585197, "grad_norm": 1.4560966669318844, "learning_rate": 7.259918917031101e-07, "loss": 0.501590371131897, "step": 5283 }, { "epoch": 1.2183537007147798, "grad_norm": 1.5326641731074693, "learning_rate": 7.256252422486563e-07, "loss": 0.5499469041824341, "step": 5284 }, { "epoch": 1.2185842748443625, "grad_norm": 1.7075536366613502, "learning_rate": 7.25258632677354e-07, "loss": 0.4567297399044037, "step": 5285 }, { "epoch": 1.2188148489739452, "grad_norm": 1.3251311548344207, "learning_rate": 7.248920630424942e-07, "loss": 0.4046020805835724, "step": 5286 }, { "epoch": 1.2190454231035277, "grad_norm": 1.4721989927884918, "learning_rate": 7.245255333973608e-07, "loss": 0.3534840941429138, "step": 5287 }, { "epoch": 1.2192759972331104, "grad_norm": 1.4151850401024268, "learning_rate": 7.241590437952331e-07, "loss": 0.45795637369155884, "step": 5288 }, { "epoch": 1.2195065713626931, "grad_norm": 1.4921564176260302, "learning_rate": 7.237925942893839e-07, "loss": 0.3984150290489197, "step": 5289 }, { "epoch": 1.2197371454922759, "grad_norm": 1.5617581917582364, "learning_rate": 7.234261849330807e-07, "loss": 0.46833336353302, "step": 5290 }, { "epoch": 1.2199677196218583, "grad_norm": 1.6200691445613622, "learning_rate": 7.230598157795842e-07, "loss": 0.5395709276199341, "step": 5291 }, { "epoch": 1.220198293751441, "grad_norm": 1.300141768975315, "learning_rate": 7.226934868821505e-07, "loss": 0.4556152820587158, "step": 5292 }, { "epoch": 1.2204288678810238, "grad_norm": 1.5916352600329198, "learning_rate": 7.223271982940287e-07, "loss": 0.49564266204833984, "step": 5293 }, { "epoch": 1.2206594420106065, "grad_norm": 1.5492667362910795, "learning_rate": 7.219609500684625e-07, "loss": 0.5389127731323242, "step": 5294 }, { "epoch": 1.220890016140189, "grad_norm": 1.3125997254034645, "learning_rate": 7.215947422586905e-07, "loss": 0.48815661668777466, "step": 5295 }, { "epoch": 1.2211205902697717, "grad_norm": 1.6576709424363434, "learning_rate": 7.21228574917944e-07, "loss": 0.4204339385032654, "step": 5296 }, { "epoch": 1.2213511643993544, "grad_norm": 1.2807688149232648, "learning_rate": 7.208624480994494e-07, "loss": 0.39993199706077576, "step": 5297 }, { "epoch": 1.2215817385289371, "grad_norm": 1.7420778835945019, "learning_rate": 7.204963618564268e-07, "loss": 0.5679433941841125, "step": 5298 }, { "epoch": 1.2218123126585196, "grad_norm": 1.819503614929131, "learning_rate": 7.201303162420913e-07, "loss": 0.46620815992355347, "step": 5299 }, { "epoch": 1.2220428867881024, "grad_norm": 1.4667553556365653, "learning_rate": 7.1976431130965e-07, "loss": 0.44684547185897827, "step": 5300 }, { "epoch": 1.222273460917685, "grad_norm": 1.6182813529173974, "learning_rate": 7.193983471123066e-07, "loss": 0.4518858790397644, "step": 5301 }, { "epoch": 1.2225040350472678, "grad_norm": 1.497058969625444, "learning_rate": 7.190324237032569e-07, "loss": 0.3966304659843445, "step": 5302 }, { "epoch": 1.2227346091768503, "grad_norm": 1.7688402904846452, "learning_rate": 7.186665411356925e-07, "loss": 0.5541782379150391, "step": 5303 }, { "epoch": 1.222965183306433, "grad_norm": 1.5748150394963076, "learning_rate": 7.183006994627972e-07, "loss": 0.3986799120903015, "step": 5304 }, { "epoch": 1.2231957574360157, "grad_norm": 1.3179167901427211, "learning_rate": 7.1793489873775e-07, "loss": 0.485867977142334, "step": 5305 }, { "epoch": 1.2234263315655984, "grad_norm": 1.6264368495030206, "learning_rate": 7.175691390137244e-07, "loss": 0.40187692642211914, "step": 5306 }, { "epoch": 1.223656905695181, "grad_norm": 1.5085798270078894, "learning_rate": 7.172034203438864e-07, "loss": 0.4679393172264099, "step": 5307 }, { "epoch": 1.2238874798247636, "grad_norm": 1.3178949369734356, "learning_rate": 7.168377427813974e-07, "loss": 0.512301504611969, "step": 5308 }, { "epoch": 1.2241180539543464, "grad_norm": 1.4684075358167812, "learning_rate": 7.164721063794122e-07, "loss": 0.5340646505355835, "step": 5309 }, { "epoch": 1.224348628083929, "grad_norm": 1.6528941936609833, "learning_rate": 7.1610651119108e-07, "loss": 0.4757506847381592, "step": 5310 }, { "epoch": 1.2245792022135116, "grad_norm": 1.5982652868975813, "learning_rate": 7.157409572695434e-07, "loss": 0.5697519779205322, "step": 5311 }, { "epoch": 1.2248097763430943, "grad_norm": 1.4427165421847559, "learning_rate": 7.153754446679395e-07, "loss": 0.47521811723709106, "step": 5312 }, { "epoch": 1.225040350472677, "grad_norm": 1.4092560589123113, "learning_rate": 7.150099734393997e-07, "loss": 0.40484973788261414, "step": 5313 }, { "epoch": 1.2252709246022597, "grad_norm": 1.4095470452598946, "learning_rate": 7.146445436370481e-07, "loss": 0.4465969204902649, "step": 5314 }, { "epoch": 1.2255014987318422, "grad_norm": 1.5543895211488108, "learning_rate": 7.142791553140044e-07, "loss": 0.44878089427948, "step": 5315 }, { "epoch": 1.225732072861425, "grad_norm": 1.657847170962442, "learning_rate": 7.139138085233809e-07, "loss": 0.5049536228179932, "step": 5316 }, { "epoch": 1.2259626469910077, "grad_norm": 1.377588971885486, "learning_rate": 7.135485033182847e-07, "loss": 0.42945951223373413, "step": 5317 }, { "epoch": 1.2261932211205901, "grad_norm": 1.607627236207016, "learning_rate": 7.131832397518167e-07, "loss": 0.4668564200401306, "step": 5318 }, { "epoch": 1.2264237952501729, "grad_norm": 1.640684584420395, "learning_rate": 7.128180178770718e-07, "loss": 0.4691551625728607, "step": 5319 }, { "epoch": 1.2266543693797556, "grad_norm": 1.4653351758865718, "learning_rate": 7.124528377471382e-07, "loss": 0.4306211769580841, "step": 5320 }, { "epoch": 1.2268849435093383, "grad_norm": 1.7130888177954928, "learning_rate": 7.120876994150991e-07, "loss": 0.4986322522163391, "step": 5321 }, { "epoch": 1.227115517638921, "grad_norm": 1.4775997138779564, "learning_rate": 7.117226029340304e-07, "loss": 0.4058566093444824, "step": 5322 }, { "epoch": 1.2273460917685035, "grad_norm": 1.3729187298835452, "learning_rate": 7.113575483570036e-07, "loss": 0.390174925327301, "step": 5323 }, { "epoch": 1.2275766658980862, "grad_norm": 1.3070483816242904, "learning_rate": 7.109925357370821e-07, "loss": 0.38822996616363525, "step": 5324 }, { "epoch": 1.227807240027669, "grad_norm": 1.3599088173875424, "learning_rate": 7.106275651273244e-07, "loss": 0.47792741656303406, "step": 5325 }, { "epoch": 1.2280378141572514, "grad_norm": 1.52666177684785, "learning_rate": 7.102626365807833e-07, "loss": 0.5332789421081543, "step": 5326 }, { "epoch": 1.2282683882868342, "grad_norm": 1.4337525635961101, "learning_rate": 7.098977501505036e-07, "loss": 0.5325096845626831, "step": 5327 }, { "epoch": 1.2284989624164169, "grad_norm": 1.6185088994304762, "learning_rate": 7.095329058895267e-07, "loss": 0.4184231162071228, "step": 5328 }, { "epoch": 1.2287295365459996, "grad_norm": 1.7570013482364435, "learning_rate": 7.091681038508852e-07, "loss": 0.43037641048431396, "step": 5329 }, { "epoch": 1.2289601106755823, "grad_norm": 1.5067774692843796, "learning_rate": 7.088033440876078e-07, "loss": 0.4466821551322937, "step": 5330 }, { "epoch": 1.2291906848051648, "grad_norm": 1.5083021571464743, "learning_rate": 7.084386266527151e-07, "loss": 0.35853004455566406, "step": 5331 }, { "epoch": 1.2294212589347475, "grad_norm": 1.542402337323393, "learning_rate": 7.080739515992231e-07, "loss": 0.44986268877983093, "step": 5332 }, { "epoch": 1.2296518330643302, "grad_norm": 1.7104999289185845, "learning_rate": 7.07709318980141e-07, "loss": 0.3563602566719055, "step": 5333 }, { "epoch": 1.2298824071939127, "grad_norm": 1.5401970805558025, "learning_rate": 7.073447288484715e-07, "loss": 0.4505435824394226, "step": 5334 }, { "epoch": 1.2301129813234954, "grad_norm": 1.3508208021904817, "learning_rate": 7.069801812572116e-07, "loss": 0.4477807283401489, "step": 5335 }, { "epoch": 1.2303435554530782, "grad_norm": 1.5084663891676386, "learning_rate": 7.066156762593518e-07, "loss": 0.4470565915107727, "step": 5336 }, { "epoch": 1.2305741295826609, "grad_norm": 1.4627780913359043, "learning_rate": 7.062512139078773e-07, "loss": 0.4236464500427246, "step": 5337 }, { "epoch": 1.2308047037122436, "grad_norm": 1.3002436810863733, "learning_rate": 7.058867942557655e-07, "loss": 0.3221476376056671, "step": 5338 }, { "epoch": 1.231035277841826, "grad_norm": 1.818660153327524, "learning_rate": 7.055224173559891e-07, "loss": 0.502305269241333, "step": 5339 }, { "epoch": 1.2312658519714088, "grad_norm": 1.655814956644188, "learning_rate": 7.051580832615136e-07, "loss": 0.5121853351593018, "step": 5340 }, { "epoch": 1.2314964261009915, "grad_norm": 1.713071870874518, "learning_rate": 7.047937920252991e-07, "loss": 0.5468438863754272, "step": 5341 }, { "epoch": 1.231727000230574, "grad_norm": 1.2030374980808431, "learning_rate": 7.044295437002985e-07, "loss": 0.5026402473449707, "step": 5342 }, { "epoch": 1.2319575743601567, "grad_norm": 1.9445671085046203, "learning_rate": 7.040653383394596e-07, "loss": 0.5205342173576355, "step": 5343 }, { "epoch": 1.2321881484897395, "grad_norm": 1.5970504229179872, "learning_rate": 7.037011759957228e-07, "loss": 0.5184727311134338, "step": 5344 }, { "epoch": 1.2324187226193222, "grad_norm": 1.3779493729990695, "learning_rate": 7.033370567220227e-07, "loss": 0.414316862821579, "step": 5345 }, { "epoch": 1.2326492967489049, "grad_norm": 1.4260441300832385, "learning_rate": 7.029729805712885e-07, "loss": 0.42133980989456177, "step": 5346 }, { "epoch": 1.2328798708784874, "grad_norm": 1.8139584962445312, "learning_rate": 7.026089475964414e-07, "loss": 0.4888553321361542, "step": 5347 }, { "epoch": 1.23311044500807, "grad_norm": 1.3419182130591616, "learning_rate": 7.022449578503979e-07, "loss": 0.4702431857585907, "step": 5348 }, { "epoch": 1.2333410191376528, "grad_norm": 1.7237576970327266, "learning_rate": 7.018810113860672e-07, "loss": 0.5312628746032715, "step": 5349 }, { "epoch": 1.2335715932672353, "grad_norm": 1.3183810824607851, "learning_rate": 7.015171082563533e-07, "loss": 0.5297777056694031, "step": 5350 }, { "epoch": 1.233802167396818, "grad_norm": 1.4423147751678271, "learning_rate": 7.011532485141524e-07, "loss": 0.5172504782676697, "step": 5351 }, { "epoch": 1.2340327415264007, "grad_norm": 1.4663357988839691, "learning_rate": 7.007894322123556e-07, "loss": 0.4288995862007141, "step": 5352 }, { "epoch": 1.2342633156559835, "grad_norm": 1.373863251988179, "learning_rate": 7.004256594038475e-07, "loss": 0.4194108247756958, "step": 5353 }, { "epoch": 1.2344938897855662, "grad_norm": 1.6567765897983155, "learning_rate": 7.000619301415056e-07, "loss": 0.48825979232788086, "step": 5354 }, { "epoch": 1.2347244639151487, "grad_norm": 1.5674749005570563, "learning_rate": 6.99698244478202e-07, "loss": 0.4721163213253021, "step": 5355 }, { "epoch": 1.2349550380447314, "grad_norm": 1.4292932334311201, "learning_rate": 6.993346024668019e-07, "loss": 0.5104520916938782, "step": 5356 }, { "epoch": 1.235185612174314, "grad_norm": 1.757397862406759, "learning_rate": 6.98971004160165e-07, "loss": 0.5257378816604614, "step": 5357 }, { "epoch": 1.2354161863038966, "grad_norm": 1.5756368498047397, "learning_rate": 6.986074496111429e-07, "loss": 0.5624911785125732, "step": 5358 }, { "epoch": 1.2356467604334793, "grad_norm": 1.4832170020848512, "learning_rate": 6.982439388725828e-07, "loss": 0.5186502933502197, "step": 5359 }, { "epoch": 1.235877334563062, "grad_norm": 1.4333093290057806, "learning_rate": 6.978804719973241e-07, "loss": 0.42711856961250305, "step": 5360 }, { "epoch": 1.2361079086926448, "grad_norm": 1.5710112274218073, "learning_rate": 6.975170490382013e-07, "loss": 0.525848388671875, "step": 5361 }, { "epoch": 1.2363384828222275, "grad_norm": 1.475742371846223, "learning_rate": 6.971536700480405e-07, "loss": 0.41279107332229614, "step": 5362 }, { "epoch": 1.23656905695181, "grad_norm": 1.381610773190275, "learning_rate": 6.967903350796632e-07, "loss": 0.38868075609207153, "step": 5363 }, { "epoch": 1.2367996310813927, "grad_norm": 1.2852056850014901, "learning_rate": 6.964270441858837e-07, "loss": 0.41875284910202026, "step": 5364 }, { "epoch": 1.2370302052109754, "grad_norm": 1.6506819982730945, "learning_rate": 6.960637974195096e-07, "loss": 0.4754808843135834, "step": 5365 }, { "epoch": 1.237260779340558, "grad_norm": 1.367170455716087, "learning_rate": 6.957005948333434e-07, "loss": 0.5073249340057373, "step": 5366 }, { "epoch": 1.2374913534701406, "grad_norm": 1.4682970250918908, "learning_rate": 6.953374364801792e-07, "loss": 0.4545915126800537, "step": 5367 }, { "epoch": 1.2377219275997233, "grad_norm": 1.4664699450973697, "learning_rate": 6.949743224128064e-07, "loss": 0.42797422409057617, "step": 5368 }, { "epoch": 1.237952501729306, "grad_norm": 1.7409270878989862, "learning_rate": 6.946112526840071e-07, "loss": 0.570556104183197, "step": 5369 }, { "epoch": 1.2381830758588885, "grad_norm": 1.21807525986395, "learning_rate": 6.942482273465577e-07, "loss": 0.3866136074066162, "step": 5370 }, { "epoch": 1.2384136499884713, "grad_norm": 1.385922338157159, "learning_rate": 6.938852464532267e-07, "loss": 0.3716529309749603, "step": 5371 }, { "epoch": 1.238644224118054, "grad_norm": 1.5756601150848535, "learning_rate": 6.935223100567776e-07, "loss": 0.4781096577644348, "step": 5372 }, { "epoch": 1.2388747982476367, "grad_norm": 1.5023911555765588, "learning_rate": 6.931594182099671e-07, "loss": 0.4262877106666565, "step": 5373 }, { "epoch": 1.2391053723772192, "grad_norm": 1.6023295142223875, "learning_rate": 6.927965709655444e-07, "loss": 0.49859267473220825, "step": 5374 }, { "epoch": 1.239335946506802, "grad_norm": 1.8550612096678925, "learning_rate": 6.924337683762539e-07, "loss": 0.4710119664669037, "step": 5375 }, { "epoch": 1.2395665206363846, "grad_norm": 1.518585467890365, "learning_rate": 6.92071010494832e-07, "loss": 0.4974974989891052, "step": 5376 }, { "epoch": 1.2397970947659673, "grad_norm": 2.029509938602293, "learning_rate": 6.917082973740098e-07, "loss": 0.4118514657020569, "step": 5377 }, { "epoch": 1.2400276688955498, "grad_norm": 1.391922482329176, "learning_rate": 6.913456290665106e-07, "loss": 0.4223165214061737, "step": 5378 }, { "epoch": 1.2402582430251325, "grad_norm": 1.5760276199817416, "learning_rate": 6.909830056250526e-07, "loss": 0.4896865487098694, "step": 5379 }, { "epoch": 1.2404888171547153, "grad_norm": 1.35318854532684, "learning_rate": 6.906204271023463e-07, "loss": 0.36112266778945923, "step": 5380 }, { "epoch": 1.240719391284298, "grad_norm": 1.4255868593911465, "learning_rate": 6.902578935510969e-07, "loss": 0.4665502905845642, "step": 5381 }, { "epoch": 1.2409499654138805, "grad_norm": 1.6036447338223971, "learning_rate": 6.898954050240013e-07, "loss": 0.46059858798980713, "step": 5382 }, { "epoch": 1.2411805395434632, "grad_norm": 1.4844055015741944, "learning_rate": 6.895329615737515e-07, "loss": 0.46149420738220215, "step": 5383 }, { "epoch": 1.241411113673046, "grad_norm": 1.5602784439666317, "learning_rate": 6.891705632530327e-07, "loss": 0.42226743698120117, "step": 5384 }, { "epoch": 1.2416416878026286, "grad_norm": 1.4308699177023212, "learning_rate": 6.88808210114522e-07, "loss": 0.45789939165115356, "step": 5385 }, { "epoch": 1.2418722619322111, "grad_norm": 1.5754200685163184, "learning_rate": 6.884459022108922e-07, "loss": 0.44569891691207886, "step": 5386 }, { "epoch": 1.2421028360617938, "grad_norm": 1.4099412845136035, "learning_rate": 6.880836395948078e-07, "loss": 0.3971112370491028, "step": 5387 }, { "epoch": 1.2423334101913766, "grad_norm": 1.6636550459216706, "learning_rate": 6.877214223189278e-07, "loss": 0.46052566170692444, "step": 5388 }, { "epoch": 1.2425639843209593, "grad_norm": 1.2735689149473257, "learning_rate": 6.873592504359037e-07, "loss": 0.42730599641799927, "step": 5389 }, { "epoch": 1.2427945584505418, "grad_norm": 1.5806143555224212, "learning_rate": 6.869971239983814e-07, "loss": 0.4391734004020691, "step": 5390 }, { "epoch": 1.2430251325801245, "grad_norm": 1.5314248582389964, "learning_rate": 6.866350430589989e-07, "loss": 0.4523593485355377, "step": 5391 }, { "epoch": 1.2432557067097072, "grad_norm": 1.587550694342246, "learning_rate": 6.86273007670389e-07, "loss": 0.5398315787315369, "step": 5392 }, { "epoch": 1.24348628083929, "grad_norm": 1.2298139407771986, "learning_rate": 6.859110178851767e-07, "loss": 0.40480807423591614, "step": 5393 }, { "epoch": 1.2437168549688724, "grad_norm": 1.4233815325100456, "learning_rate": 6.855490737559816e-07, "loss": 0.42483675479888916, "step": 5394 }, { "epoch": 1.2439474290984551, "grad_norm": 1.611497963721617, "learning_rate": 6.851871753354153e-07, "loss": 0.39951619505882263, "step": 5395 }, { "epoch": 1.2441780032280378, "grad_norm": 1.5084898015563448, "learning_rate": 6.848253226760833e-07, "loss": 0.48650771379470825, "step": 5396 }, { "epoch": 1.2444085773576206, "grad_norm": 1.5899141960647352, "learning_rate": 6.844635158305853e-07, "loss": 0.5377830266952515, "step": 5397 }, { "epoch": 1.244639151487203, "grad_norm": 1.667763606347776, "learning_rate": 6.841017548515127e-07, "loss": 0.4365614950656891, "step": 5398 }, { "epoch": 1.2448697256167858, "grad_norm": 1.2560105349082187, "learning_rate": 6.837400397914519e-07, "loss": 0.39739400148391724, "step": 5399 }, { "epoch": 1.2451002997463685, "grad_norm": 1.3287360038901976, "learning_rate": 6.833783707029812e-07, "loss": 0.4005683660507202, "step": 5400 }, { "epoch": 1.2453308738759512, "grad_norm": 1.6646043641444999, "learning_rate": 6.830167476386737e-07, "loss": 0.5635108351707458, "step": 5401 }, { "epoch": 1.2455614480055337, "grad_norm": 1.6642180514990483, "learning_rate": 6.82655170651094e-07, "loss": 0.4332388639450073, "step": 5402 }, { "epoch": 1.2457920221351164, "grad_norm": 1.525164084943155, "learning_rate": 6.822936397928015e-07, "loss": 0.47506433725357056, "step": 5403 }, { "epoch": 1.2460225962646991, "grad_norm": 1.600563207739989, "learning_rate": 6.819321551163486e-07, "loss": 0.5081777572631836, "step": 5404 }, { "epoch": 1.2462531703942819, "grad_norm": 1.6650056699718765, "learning_rate": 6.815707166742801e-07, "loss": 0.4038957953453064, "step": 5405 }, { "epoch": 1.2464837445238643, "grad_norm": 1.759676797230376, "learning_rate": 6.812093245191354e-07, "loss": 0.4665706753730774, "step": 5406 }, { "epoch": 1.246714318653447, "grad_norm": 1.8957165771048585, "learning_rate": 6.808479787034459e-07, "loss": 0.45610785484313965, "step": 5407 }, { "epoch": 1.2469448927830298, "grad_norm": 1.443572019443965, "learning_rate": 6.804866792797377e-07, "loss": 0.4334958493709564, "step": 5408 }, { "epoch": 1.2471754669126125, "grad_norm": 1.4719822396111175, "learning_rate": 6.801254263005283e-07, "loss": 0.5505996942520142, "step": 5409 }, { "epoch": 1.247406041042195, "grad_norm": 1.5261896109132582, "learning_rate": 6.797642198183303e-07, "loss": 0.5589424967765808, "step": 5410 }, { "epoch": 1.2476366151717777, "grad_norm": 1.892082521677576, "learning_rate": 6.794030598856483e-07, "loss": 0.48142847418785095, "step": 5411 }, { "epoch": 1.2478671893013604, "grad_norm": 1.6606812394072976, "learning_rate": 6.790419465549811e-07, "loss": 0.5549830198287964, "step": 5412 }, { "epoch": 1.2480977634309431, "grad_norm": 1.6097248774465256, "learning_rate": 6.786808798788193e-07, "loss": 0.5974072217941284, "step": 5413 }, { "epoch": 1.2483283375605256, "grad_norm": 1.3333137403479542, "learning_rate": 6.783198599096484e-07, "loss": 0.38189029693603516, "step": 5414 }, { "epoch": 1.2485589116901084, "grad_norm": 1.4543286006354934, "learning_rate": 6.779588866999459e-07, "loss": 0.41150039434432983, "step": 5415 }, { "epoch": 1.248789485819691, "grad_norm": 1.451215833026304, "learning_rate": 6.775979603021828e-07, "loss": 0.4291636645793915, "step": 5416 }, { "epoch": 1.2490200599492738, "grad_norm": 1.2798211834451962, "learning_rate": 6.772370807688242e-07, "loss": 0.45324140787124634, "step": 5417 }, { "epoch": 1.2492506340788563, "grad_norm": 1.3895968147090427, "learning_rate": 6.768762481523262e-07, "loss": 0.4748731851577759, "step": 5418 }, { "epoch": 1.249481208208439, "grad_norm": 1.618628812481624, "learning_rate": 6.765154625051408e-07, "loss": 0.43602505326271057, "step": 5419 }, { "epoch": 1.2497117823380217, "grad_norm": 1.4027608933739075, "learning_rate": 6.761547238797112e-07, "loss": 0.49135684967041016, "step": 5420 }, { "epoch": 1.2499423564676044, "grad_norm": 1.6315360373382408, "learning_rate": 6.757940323284747e-07, "loss": 0.47508272528648376, "step": 5421 }, { "epoch": 1.250172930597187, "grad_norm": 1.612865868213556, "learning_rate": 6.754333879038611e-07, "loss": 0.399259090423584, "step": 5422 }, { "epoch": 1.2504035047267696, "grad_norm": 1.6878741312884291, "learning_rate": 6.750727906582941e-07, "loss": 0.426364004611969, "step": 5423 }, { "epoch": 1.2506340788563524, "grad_norm": 1.4584807010931917, "learning_rate": 6.747122406441903e-07, "loss": 0.4641951322555542, "step": 5424 }, { "epoch": 1.250864652985935, "grad_norm": 1.3880451781756755, "learning_rate": 6.743517379139585e-07, "loss": 0.35008323192596436, "step": 5425 }, { "epoch": 1.2510952271155176, "grad_norm": 1.4485633708895984, "learning_rate": 6.739912825200022e-07, "loss": 0.49627771973609924, "step": 5426 }, { "epoch": 1.2513258012451003, "grad_norm": 1.628398042874366, "learning_rate": 6.736308745147168e-07, "loss": 0.4926851987838745, "step": 5427 }, { "epoch": 1.251556375374683, "grad_norm": 1.622960147434406, "learning_rate": 6.732705139504917e-07, "loss": 0.44777536392211914, "step": 5428 }, { "epoch": 1.2517869495042655, "grad_norm": 1.6523545202218224, "learning_rate": 6.729102008797085e-07, "loss": 0.39160430431365967, "step": 5429 }, { "epoch": 1.2520175236338482, "grad_norm": 1.5184849781676724, "learning_rate": 6.725499353547426e-07, "loss": 0.4585273861885071, "step": 5430 }, { "epoch": 1.252248097763431, "grad_norm": 1.5327675196324342, "learning_rate": 6.721897174279621e-07, "loss": 0.5245224237442017, "step": 5431 }, { "epoch": 1.2524786718930137, "grad_norm": 1.5257069000403813, "learning_rate": 6.718295471517288e-07, "loss": 0.4217349886894226, "step": 5432 }, { "epoch": 1.2527092460225964, "grad_norm": 1.4826939266004133, "learning_rate": 6.714694245783963e-07, "loss": 0.4944193661212921, "step": 5433 }, { "epoch": 1.2529398201521789, "grad_norm": 1.387839760206308, "learning_rate": 6.711093497603127e-07, "loss": 0.5058057904243469, "step": 5434 }, { "epoch": 1.2531703942817616, "grad_norm": 1.381621888753065, "learning_rate": 6.707493227498186e-07, "loss": 0.45669037103652954, "step": 5435 }, { "epoch": 1.2534009684113443, "grad_norm": 1.5997486257834712, "learning_rate": 6.703893435992469e-07, "loss": 0.4248945116996765, "step": 5436 }, { "epoch": 1.2536315425409268, "grad_norm": 1.6056111266165571, "learning_rate": 6.700294123609249e-07, "loss": 0.3984343707561493, "step": 5437 }, { "epoch": 1.2538621166705095, "grad_norm": 1.5349078061254786, "learning_rate": 6.696695290871715e-07, "loss": 0.435299813747406, "step": 5438 }, { "epoch": 1.2540926908000922, "grad_norm": 1.6277363060500583, "learning_rate": 6.693096938303002e-07, "loss": 0.4225304126739502, "step": 5439 }, { "epoch": 1.254323264929675, "grad_norm": 1.6495416759002697, "learning_rate": 6.689499066426161e-07, "loss": 0.4686669111251831, "step": 5440 }, { "epoch": 1.2545538390592577, "grad_norm": 1.5168957851404996, "learning_rate": 6.685901675764186e-07, "loss": 0.45163553953170776, "step": 5441 }, { "epoch": 1.2547844131888402, "grad_norm": 1.3593822737620262, "learning_rate": 6.682304766839986e-07, "loss": 0.44223567843437195, "step": 5442 }, { "epoch": 1.2550149873184229, "grad_norm": 1.5363469724843986, "learning_rate": 6.678708340176413e-07, "loss": 0.4008648991584778, "step": 5443 }, { "epoch": 1.2552455614480056, "grad_norm": 1.4199248627467993, "learning_rate": 6.675112396296245e-07, "loss": 0.4500792324542999, "step": 5444 }, { "epoch": 1.255476135577588, "grad_norm": 1.490145734356762, "learning_rate": 6.671516935722183e-07, "loss": 0.42558690905570984, "step": 5445 }, { "epoch": 1.2557067097071708, "grad_norm": 1.7098682543926618, "learning_rate": 6.667921958976871e-07, "loss": 0.4676043391227722, "step": 5446 }, { "epoch": 1.2559372838367535, "grad_norm": 1.8041492407407758, "learning_rate": 6.664327466582869e-07, "loss": 0.44114184379577637, "step": 5447 }, { "epoch": 1.2561678579663362, "grad_norm": 1.6102069805165957, "learning_rate": 6.660733459062679e-07, "loss": 0.33865463733673096, "step": 5448 }, { "epoch": 1.256398432095919, "grad_norm": 1.8619975614063338, "learning_rate": 6.65713993693872e-07, "loss": 0.5397414565086365, "step": 5449 }, { "epoch": 1.2566290062255014, "grad_norm": 1.4730562973077854, "learning_rate": 6.653546900733352e-07, "loss": 0.49249517917633057, "step": 5450 }, { "epoch": 1.2568595803550842, "grad_norm": 1.5757041605280757, "learning_rate": 6.649954350968855e-07, "loss": 0.5438433885574341, "step": 5451 }, { "epoch": 1.2570901544846669, "grad_norm": 1.4727448576353426, "learning_rate": 6.646362288167448e-07, "loss": 0.43725037574768066, "step": 5452 }, { "epoch": 1.2573207286142494, "grad_norm": 1.5159104216766552, "learning_rate": 6.642770712851269e-07, "loss": 0.5369226336479187, "step": 5453 }, { "epoch": 1.257551302743832, "grad_norm": 1.4915531986930697, "learning_rate": 6.63917962554239e-07, "loss": 0.45022842288017273, "step": 5454 }, { "epoch": 1.2577818768734148, "grad_norm": 1.6219974371712227, "learning_rate": 6.635589026762818e-07, "loss": 0.42483362555503845, "step": 5455 }, { "epoch": 1.2580124510029975, "grad_norm": 1.4115832140490556, "learning_rate": 6.631998917034474e-07, "loss": 0.4909497797489166, "step": 5456 }, { "epoch": 1.2582430251325802, "grad_norm": 1.3159817254483799, "learning_rate": 6.628409296879223e-07, "loss": 0.4927433431148529, "step": 5457 }, { "epoch": 1.2584735992621627, "grad_norm": 1.550356576361105, "learning_rate": 6.624820166818847e-07, "loss": 0.4452761113643646, "step": 5458 }, { "epoch": 1.2587041733917455, "grad_norm": 1.5683413746620685, "learning_rate": 6.62123152737507e-07, "loss": 0.4637982249259949, "step": 5459 }, { "epoch": 1.2589347475213282, "grad_norm": 1.3293268937895057, "learning_rate": 6.617643379069532e-07, "loss": 0.3189438581466675, "step": 5460 }, { "epoch": 1.2591653216509107, "grad_norm": 1.3296675722252447, "learning_rate": 6.614055722423808e-07, "loss": 0.420698881149292, "step": 5461 }, { "epoch": 1.2593958957804934, "grad_norm": 1.5202476608747133, "learning_rate": 6.610468557959398e-07, "loss": 0.5187642574310303, "step": 5462 }, { "epoch": 1.259626469910076, "grad_norm": 1.4954844764147424, "learning_rate": 6.606881886197741e-07, "loss": 0.48519381880760193, "step": 5463 }, { "epoch": 1.2598570440396588, "grad_norm": 1.4755140585184632, "learning_rate": 6.60329570766019e-07, "loss": 0.3930806815624237, "step": 5464 }, { "epoch": 1.2600876181692415, "grad_norm": 1.8617928902566707, "learning_rate": 6.599710022868027e-07, "loss": 0.4890612065792084, "step": 5465 }, { "epoch": 1.260318192298824, "grad_norm": 1.2781262224531547, "learning_rate": 6.596124832342476e-07, "loss": 0.4202774465084076, "step": 5466 }, { "epoch": 1.2605487664284067, "grad_norm": 1.5196012608537903, "learning_rate": 6.592540136604674e-07, "loss": 0.5053761005401611, "step": 5467 }, { "epoch": 1.2607793405579895, "grad_norm": 1.4874107682553572, "learning_rate": 6.588955936175702e-07, "loss": 0.4827175736427307, "step": 5468 }, { "epoch": 1.261009914687572, "grad_norm": 1.4659080652243894, "learning_rate": 6.585372231576551e-07, "loss": 0.45179229974746704, "step": 5469 }, { "epoch": 1.2612404888171547, "grad_norm": 1.3781712796058982, "learning_rate": 6.581789023328155e-07, "loss": 0.4024949073791504, "step": 5470 }, { "epoch": 1.2614710629467374, "grad_norm": 1.7288759385339574, "learning_rate": 6.578206311951363e-07, "loss": 0.48839491605758667, "step": 5471 }, { "epoch": 1.26170163707632, "grad_norm": 1.4778086795689929, "learning_rate": 6.574624097966968e-07, "loss": 0.45897620916366577, "step": 5472 }, { "epoch": 1.2619322112059028, "grad_norm": 1.5548512112712307, "learning_rate": 6.571042381895671e-07, "loss": 0.48471882939338684, "step": 5473 }, { "epoch": 1.2621627853354853, "grad_norm": 2.0045804163216414, "learning_rate": 6.567461164258117e-07, "loss": 0.44159913063049316, "step": 5474 }, { "epoch": 1.262393359465068, "grad_norm": 1.5752243442253915, "learning_rate": 6.563880445574872e-07, "loss": 0.39186012744903564, "step": 5475 }, { "epoch": 1.2626239335946507, "grad_norm": 1.818057995697113, "learning_rate": 6.560300226366425e-07, "loss": 0.5332233905792236, "step": 5476 }, { "epoch": 1.2628545077242332, "grad_norm": 1.350222227503923, "learning_rate": 6.556720507153201e-07, "loss": 0.4252084195613861, "step": 5477 }, { "epoch": 1.263085081853816, "grad_norm": 1.4204993118440263, "learning_rate": 6.553141288455548e-07, "loss": 0.36927711963653564, "step": 5478 }, { "epoch": 1.2633156559833987, "grad_norm": 1.5676826878414558, "learning_rate": 6.549562570793745e-07, "loss": 0.4405602216720581, "step": 5479 }, { "epoch": 1.2635462301129814, "grad_norm": 1.5245742985153417, "learning_rate": 6.545984354687986e-07, "loss": 0.5691590309143066, "step": 5480 }, { "epoch": 1.2637768042425641, "grad_norm": 1.468644623890153, "learning_rate": 6.542406640658411e-07, "loss": 0.3750354051589966, "step": 5481 }, { "epoch": 1.2640073783721466, "grad_norm": 1.5266320276968284, "learning_rate": 6.538829429225068e-07, "loss": 0.47816041111946106, "step": 5482 }, { "epoch": 1.2642379525017293, "grad_norm": 1.4911563737024116, "learning_rate": 6.535252720907951e-07, "loss": 0.42470186948776245, "step": 5483 }, { "epoch": 1.264468526631312, "grad_norm": 1.4256480441382235, "learning_rate": 6.531676516226961e-07, "loss": 0.37356555461883545, "step": 5484 }, { "epoch": 1.2646991007608945, "grad_norm": 1.4604810104028516, "learning_rate": 6.528100815701942e-07, "loss": 0.4895293116569519, "step": 5485 }, { "epoch": 1.2649296748904773, "grad_norm": 1.9575945537740915, "learning_rate": 6.524525619852656e-07, "loss": 0.4963725805282593, "step": 5486 }, { "epoch": 1.26516024902006, "grad_norm": 1.7629474018170985, "learning_rate": 6.520950929198792e-07, "loss": 0.5443764925003052, "step": 5487 }, { "epoch": 1.2653908231496427, "grad_norm": 1.2536482779264142, "learning_rate": 6.517376744259972e-07, "loss": 0.400549054145813, "step": 5488 }, { "epoch": 1.2656213972792254, "grad_norm": 1.8850482793273033, "learning_rate": 6.513803065555736e-07, "loss": 0.46384042501449585, "step": 5489 }, { "epoch": 1.265851971408808, "grad_norm": 1.4893040501119004, "learning_rate": 6.510229893605556e-07, "loss": 0.5044240951538086, "step": 5490 }, { "epoch": 1.2660825455383906, "grad_norm": 1.477450831039122, "learning_rate": 6.506657228928827e-07, "loss": 0.4544214904308319, "step": 5491 }, { "epoch": 1.2663131196679733, "grad_norm": 1.441487086349296, "learning_rate": 6.503085072044878e-07, "loss": 0.36688071489334106, "step": 5492 }, { "epoch": 1.2665436937975558, "grad_norm": 1.4594163949727883, "learning_rate": 6.499513423472951e-07, "loss": 0.4058225154876709, "step": 5493 }, { "epoch": 1.2667742679271385, "grad_norm": 1.4647938941101153, "learning_rate": 6.495942283732225e-07, "loss": 0.36429229378700256, "step": 5494 }, { "epoch": 1.2670048420567213, "grad_norm": 1.7674965095028434, "learning_rate": 6.492371653341802e-07, "loss": 0.47116899490356445, "step": 5495 }, { "epoch": 1.267235416186304, "grad_norm": 1.4923904627456126, "learning_rate": 6.488801532820706e-07, "loss": 0.4437965750694275, "step": 5496 }, { "epoch": 1.2674659903158867, "grad_norm": 1.5533994295939695, "learning_rate": 6.485231922687893e-07, "loss": 0.4810328483581543, "step": 5497 }, { "epoch": 1.2676965644454692, "grad_norm": 1.4632129166419525, "learning_rate": 6.481662823462238e-07, "loss": 0.362907350063324, "step": 5498 }, { "epoch": 1.267927138575052, "grad_norm": 1.375729756251652, "learning_rate": 6.478094235662554e-07, "loss": 0.43647170066833496, "step": 5499 }, { "epoch": 1.2681577127046346, "grad_norm": 1.422215620145209, "learning_rate": 6.474526159807563e-07, "loss": 0.4566631317138672, "step": 5500 }, { "epoch": 1.2683882868342171, "grad_norm": 1.5097982290449063, "learning_rate": 6.470958596415925e-07, "loss": 0.3940081298351288, "step": 5501 }, { "epoch": 1.2686188609637998, "grad_norm": 1.617526881385646, "learning_rate": 6.46739154600622e-07, "loss": 0.5275603532791138, "step": 5502 }, { "epoch": 1.2688494350933825, "grad_norm": 1.846449658895825, "learning_rate": 6.463825009096959e-07, "loss": 0.42546436190605164, "step": 5503 }, { "epoch": 1.2690800092229653, "grad_norm": 1.6068032996774941, "learning_rate": 6.460258986206566e-07, "loss": 0.3833821713924408, "step": 5504 }, { "epoch": 1.2693105833525478, "grad_norm": 1.4806797403979666, "learning_rate": 6.456693477853408e-07, "loss": 0.5056046843528748, "step": 5505 }, { "epoch": 1.2695411574821305, "grad_norm": 1.6345259734279236, "learning_rate": 6.453128484555764e-07, "loss": 0.3544192910194397, "step": 5506 }, { "epoch": 1.2697717316117132, "grad_norm": 1.684231386275673, "learning_rate": 6.449564006831836e-07, "loss": 0.47164130210876465, "step": 5507 }, { "epoch": 1.2700023057412957, "grad_norm": 1.3334241214641123, "learning_rate": 6.446000045199765e-07, "loss": 0.4580638110637665, "step": 5508 }, { "epoch": 1.2702328798708784, "grad_norm": 1.2809631136030655, "learning_rate": 6.442436600177606e-07, "loss": 0.45945844054222107, "step": 5509 }, { "epoch": 1.2704634540004611, "grad_norm": 1.447660138842985, "learning_rate": 6.438873672283343e-07, "loss": 0.5539910793304443, "step": 5510 }, { "epoch": 1.2706940281300438, "grad_norm": 1.6550705344684873, "learning_rate": 6.43531126203488e-07, "loss": 0.4661790132522583, "step": 5511 }, { "epoch": 1.2709246022596266, "grad_norm": 1.7015547164246037, "learning_rate": 6.431749369950057e-07, "loss": 0.3781178891658783, "step": 5512 }, { "epoch": 1.271155176389209, "grad_norm": 1.571227420481097, "learning_rate": 6.428187996546621e-07, "loss": 0.4858461618423462, "step": 5513 }, { "epoch": 1.2713857505187918, "grad_norm": 1.5308384830726272, "learning_rate": 6.424627142342262e-07, "loss": 0.5003963708877563, "step": 5514 }, { "epoch": 1.2716163246483745, "grad_norm": 1.3605664168425382, "learning_rate": 6.421066807854584e-07, "loss": 0.4620795249938965, "step": 5515 }, { "epoch": 1.271846898777957, "grad_norm": 1.385915858471925, "learning_rate": 6.417506993601114e-07, "loss": 0.43998581171035767, "step": 5516 }, { "epoch": 1.2720774729075397, "grad_norm": 1.6777446711260993, "learning_rate": 6.413947700099311e-07, "loss": 0.5204107165336609, "step": 5517 }, { "epoch": 1.2723080470371224, "grad_norm": 1.5515853600398104, "learning_rate": 6.410388927866551e-07, "loss": 0.46675950288772583, "step": 5518 }, { "epoch": 1.2725386211667051, "grad_norm": 1.4020610518461032, "learning_rate": 6.406830677420146e-07, "loss": 0.4002436101436615, "step": 5519 }, { "epoch": 1.2727691952962878, "grad_norm": 1.6847281008342299, "learning_rate": 6.403272949277312e-07, "loss": 0.4051012396812439, "step": 5520 }, { "epoch": 1.2729997694258703, "grad_norm": 1.4780078562694616, "learning_rate": 6.399715743955209e-07, "loss": 0.4847797751426697, "step": 5521 }, { "epoch": 1.273230343555453, "grad_norm": 1.6389704995828815, "learning_rate": 6.396159061970907e-07, "loss": 0.4742053151130676, "step": 5522 }, { "epoch": 1.2734609176850358, "grad_norm": 1.4123933831310747, "learning_rate": 6.392602903841415e-07, "loss": 0.44291001558303833, "step": 5523 }, { "epoch": 1.2736914918146183, "grad_norm": 1.438016627678946, "learning_rate": 6.389047270083646e-07, "loss": 0.38993996381759644, "step": 5524 }, { "epoch": 1.273922065944201, "grad_norm": 1.5621491080936318, "learning_rate": 6.385492161214454e-07, "loss": 0.5045995116233826, "step": 5525 }, { "epoch": 1.2741526400737837, "grad_norm": 1.4769511790871679, "learning_rate": 6.381937577750611e-07, "loss": 0.4377788305282593, "step": 5526 }, { "epoch": 1.2743832142033664, "grad_norm": 1.470801087764595, "learning_rate": 6.378383520208806e-07, "loss": 0.5363353490829468, "step": 5527 }, { "epoch": 1.2746137883329491, "grad_norm": 1.340047582641372, "learning_rate": 6.374829989105661e-07, "loss": 0.42230546474456787, "step": 5528 }, { "epoch": 1.2748443624625316, "grad_norm": 1.2882420810653734, "learning_rate": 6.371276984957715e-07, "loss": 0.39565908908843994, "step": 5529 }, { "epoch": 1.2750749365921143, "grad_norm": 1.3633189139651096, "learning_rate": 6.36772450828144e-07, "loss": 0.4375323951244354, "step": 5530 }, { "epoch": 1.275305510721697, "grad_norm": 1.5028848525750826, "learning_rate": 6.364172559593215e-07, "loss": 0.4901241660118103, "step": 5531 }, { "epoch": 1.2755360848512796, "grad_norm": 1.3653729298225772, "learning_rate": 6.360621139409359e-07, "loss": 0.4108780026435852, "step": 5532 }, { "epoch": 1.2757666589808623, "grad_norm": 1.4800363393725149, "learning_rate": 6.357070248246102e-07, "loss": 0.43631279468536377, "step": 5533 }, { "epoch": 1.275997233110445, "grad_norm": 1.5982504223136969, "learning_rate": 6.353519886619607e-07, "loss": 0.4623757004737854, "step": 5534 }, { "epoch": 1.2762278072400277, "grad_norm": 1.5284512936045929, "learning_rate": 6.349970055045954e-07, "loss": 0.41303062438964844, "step": 5535 }, { "epoch": 1.2764583813696104, "grad_norm": 1.7689201212047627, "learning_rate": 6.34642075404114e-07, "loss": 0.5157878994941711, "step": 5536 }, { "epoch": 1.276688955499193, "grad_norm": 1.6093049161057067, "learning_rate": 6.342871984121103e-07, "loss": 0.41295093297958374, "step": 5537 }, { "epoch": 1.2769195296287756, "grad_norm": 1.4185213028911483, "learning_rate": 6.339323745801682e-07, "loss": 0.4636460542678833, "step": 5538 }, { "epoch": 1.2771501037583584, "grad_norm": 1.44057433861511, "learning_rate": 6.335776039598659e-07, "loss": 0.45273804664611816, "step": 5539 }, { "epoch": 1.2773806778879409, "grad_norm": 1.7212686324453035, "learning_rate": 6.332228866027721e-07, "loss": 0.4562758803367615, "step": 5540 }, { "epoch": 1.2776112520175236, "grad_norm": 1.5821328258880776, "learning_rate": 6.328682225604491e-07, "loss": 0.3162837326526642, "step": 5541 }, { "epoch": 1.2778418261471063, "grad_norm": 1.4226618207277133, "learning_rate": 6.325136118844504e-07, "loss": 0.48594871163368225, "step": 5542 }, { "epoch": 1.278072400276689, "grad_norm": 1.398820126458318, "learning_rate": 6.321590546263231e-07, "loss": 0.4346798360347748, "step": 5543 }, { "epoch": 1.2783029744062717, "grad_norm": 1.7945463027279862, "learning_rate": 6.318045508376046e-07, "loss": 0.5133204460144043, "step": 5544 }, { "epoch": 1.2785335485358542, "grad_norm": 1.6462955147402891, "learning_rate": 6.314501005698266e-07, "loss": 0.40679338574409485, "step": 5545 }, { "epoch": 1.278764122665437, "grad_norm": 1.341754342655084, "learning_rate": 6.310957038745117e-07, "loss": 0.363874614238739, "step": 5546 }, { "epoch": 1.2789946967950196, "grad_norm": 1.3013776361069782, "learning_rate": 6.307413608031746e-07, "loss": 0.43020665645599365, "step": 5547 }, { "epoch": 1.2792252709246021, "grad_norm": 1.301444097702827, "learning_rate": 6.303870714073233e-07, "loss": 0.5280083417892456, "step": 5548 }, { "epoch": 1.2794558450541849, "grad_norm": 1.803757705570539, "learning_rate": 6.300328357384568e-07, "loss": 0.4584185481071472, "step": 5549 }, { "epoch": 1.2796864191837676, "grad_norm": 1.4682285924702114, "learning_rate": 6.296786538480675e-07, "loss": 0.4068162441253662, "step": 5550 }, { "epoch": 1.2799169933133503, "grad_norm": 1.361515758715701, "learning_rate": 6.293245257876387e-07, "loss": 0.4336085915565491, "step": 5551 }, { "epoch": 1.280147567442933, "grad_norm": 1.4906971509519245, "learning_rate": 6.289704516086468e-07, "loss": 0.4932886064052582, "step": 5552 }, { "epoch": 1.2803781415725155, "grad_norm": 1.3660207414526373, "learning_rate": 6.2861643136256e-07, "loss": 0.437292218208313, "step": 5553 }, { "epoch": 1.2806087157020982, "grad_norm": 1.5017461161180483, "learning_rate": 6.28262465100839e-07, "loss": 0.4131085276603699, "step": 5554 }, { "epoch": 1.280839289831681, "grad_norm": 1.441603184912447, "learning_rate": 6.27908552874936e-07, "loss": 0.4146266579627991, "step": 5555 }, { "epoch": 1.2810698639612634, "grad_norm": 1.6115588407174422, "learning_rate": 6.275546947362957e-07, "loss": 0.4778539538383484, "step": 5556 }, { "epoch": 1.2813004380908461, "grad_norm": 1.4722189673341872, "learning_rate": 6.272008907363555e-07, "loss": 0.3989019989967346, "step": 5557 }, { "epoch": 1.2815310122204289, "grad_norm": 1.5188067628601776, "learning_rate": 6.268471409265436e-07, "loss": 0.4433528184890747, "step": 5558 }, { "epoch": 1.2817615863500116, "grad_norm": 1.4551631195697798, "learning_rate": 6.264934453582817e-07, "loss": 0.46929931640625, "step": 5559 }, { "epoch": 1.2819921604795943, "grad_norm": 1.749202490253535, "learning_rate": 6.261398040829829e-07, "loss": 0.4908202886581421, "step": 5560 }, { "epoch": 1.2822227346091768, "grad_norm": 1.766310768413501, "learning_rate": 6.257862171520528e-07, "loss": 0.44195377826690674, "step": 5561 }, { "epoch": 1.2824533087387595, "grad_norm": 1.8716445464357578, "learning_rate": 6.254326846168882e-07, "loss": 0.548696756362915, "step": 5562 }, { "epoch": 1.2826838828683422, "grad_norm": 1.6355324229757326, "learning_rate": 6.250792065288794e-07, "loss": 0.4015994668006897, "step": 5563 }, { "epoch": 1.2829144569979247, "grad_norm": 1.5798153885574688, "learning_rate": 6.247257829394074e-07, "loss": 0.4281688928604126, "step": 5564 }, { "epoch": 1.2831450311275074, "grad_norm": 1.2159971773233473, "learning_rate": 6.243724138998462e-07, "loss": 0.37623634934425354, "step": 5565 }, { "epoch": 1.2833756052570902, "grad_norm": 1.7282596196498647, "learning_rate": 6.240190994615617e-07, "loss": 0.4753819704055786, "step": 5566 }, { "epoch": 1.2836061793866729, "grad_norm": 1.8092084567061366, "learning_rate": 6.236658396759111e-07, "loss": 0.4584893584251404, "step": 5567 }, { "epoch": 1.2838367535162556, "grad_norm": 1.598249680169706, "learning_rate": 6.23312634594245e-07, "loss": 0.445067435503006, "step": 5568 }, { "epoch": 1.284067327645838, "grad_norm": 1.402901275205923, "learning_rate": 6.229594842679049e-07, "loss": 0.4209640920162201, "step": 5569 }, { "epoch": 1.2842979017754208, "grad_norm": 1.3481434606649714, "learning_rate": 6.226063887482254e-07, "loss": 0.34620141983032227, "step": 5570 }, { "epoch": 1.2845284759050035, "grad_norm": 1.2702834444597235, "learning_rate": 6.222533480865315e-07, "loss": 0.43683767318725586, "step": 5571 }, { "epoch": 1.284759050034586, "grad_norm": 1.5394879174992184, "learning_rate": 6.219003623341421e-07, "loss": 0.45881450176239014, "step": 5572 }, { "epoch": 1.2849896241641687, "grad_norm": 1.2015099259152706, "learning_rate": 6.215474315423667e-07, "loss": 0.40115928649902344, "step": 5573 }, { "epoch": 1.2852201982937514, "grad_norm": 1.5480428253925462, "learning_rate": 6.211945557625082e-07, "loss": 0.4181373119354248, "step": 5574 }, { "epoch": 1.2854507724233342, "grad_norm": 1.6874872010842208, "learning_rate": 6.208417350458598e-07, "loss": 0.4743300676345825, "step": 5575 }, { "epoch": 1.2856813465529169, "grad_norm": 1.6331906817141153, "learning_rate": 6.204889694437077e-07, "loss": 0.4236707091331482, "step": 5576 }, { "epoch": 1.2859119206824994, "grad_norm": 1.1887995996963334, "learning_rate": 6.201362590073305e-07, "loss": 0.4105497896671295, "step": 5577 }, { "epoch": 1.286142494812082, "grad_norm": 1.3982883240902815, "learning_rate": 6.197836037879973e-07, "loss": 0.4164474606513977, "step": 5578 }, { "epoch": 1.2863730689416648, "grad_norm": 1.648111600369129, "learning_rate": 6.19431003836971e-07, "loss": 0.49809616804122925, "step": 5579 }, { "epoch": 1.2866036430712473, "grad_norm": 1.608787056057215, "learning_rate": 6.19078459205505e-07, "loss": 0.4902994632720947, "step": 5580 }, { "epoch": 1.28683421720083, "grad_norm": 1.336430500063446, "learning_rate": 6.18725969944846e-07, "loss": 0.3697085380554199, "step": 5581 }, { "epoch": 1.2870647913304127, "grad_norm": 1.353359914681952, "learning_rate": 6.183735361062309e-07, "loss": 0.446627140045166, "step": 5582 }, { "epoch": 1.2872953654599955, "grad_norm": 1.590519620379444, "learning_rate": 6.180211577408901e-07, "loss": 0.39521220326423645, "step": 5583 }, { "epoch": 1.2875259395895782, "grad_norm": 1.7929636253307002, "learning_rate": 6.176688349000452e-07, "loss": 0.6308573484420776, "step": 5584 }, { "epoch": 1.2877565137191607, "grad_norm": 1.5017758457543093, "learning_rate": 6.173165676349102e-07, "loss": 0.4558343291282654, "step": 5585 }, { "epoch": 1.2879870878487434, "grad_norm": 1.4546689222111522, "learning_rate": 6.169643559966906e-07, "loss": 0.5487015247344971, "step": 5586 }, { "epoch": 1.288217661978326, "grad_norm": 1.3949279502201517, "learning_rate": 6.166122000365834e-07, "loss": 0.39074039459228516, "step": 5587 }, { "epoch": 1.2884482361079086, "grad_norm": 1.4687466147876906, "learning_rate": 6.162600998057787e-07, "loss": 0.5136120915412903, "step": 5588 }, { "epoch": 1.2886788102374913, "grad_norm": 1.5457442901158343, "learning_rate": 6.159080553554572e-07, "loss": 0.5344336628913879, "step": 5589 }, { "epoch": 1.288909384367074, "grad_norm": 1.5840783894802135, "learning_rate": 6.15556066736793e-07, "loss": 0.5204205513000488, "step": 5590 }, { "epoch": 1.2891399584966567, "grad_norm": 1.588345092971114, "learning_rate": 6.152041340009504e-07, "loss": 0.4768211245536804, "step": 5591 }, { "epoch": 1.2893705326262395, "grad_norm": 2.0914169507965936, "learning_rate": 6.148522571990868e-07, "loss": 0.44098299741744995, "step": 5592 }, { "epoch": 1.289601106755822, "grad_norm": 1.6411833405865308, "learning_rate": 6.145004363823509e-07, "loss": 0.5038055181503296, "step": 5593 }, { "epoch": 1.2898316808854047, "grad_norm": 1.6256634474518743, "learning_rate": 6.141486716018837e-07, "loss": 0.417998343706131, "step": 5594 }, { "epoch": 1.2900622550149874, "grad_norm": 1.755327490864145, "learning_rate": 6.137969629088174e-07, "loss": 0.48858124017715454, "step": 5595 }, { "epoch": 1.2902928291445699, "grad_norm": 1.6236287189755654, "learning_rate": 6.134453103542765e-07, "loss": 0.46988582611083984, "step": 5596 }, { "epoch": 1.2905234032741526, "grad_norm": 1.4715150644247719, "learning_rate": 6.130937139893779e-07, "loss": 0.5100589394569397, "step": 5597 }, { "epoch": 1.2907539774037353, "grad_norm": 1.861124742863941, "learning_rate": 6.127421738652286e-07, "loss": 0.490558922290802, "step": 5598 }, { "epoch": 1.290984551533318, "grad_norm": 1.624496792014592, "learning_rate": 6.123906900329291e-07, "loss": 0.4749597907066345, "step": 5599 }, { "epoch": 1.2912151256629008, "grad_norm": 1.4155787175262067, "learning_rate": 6.12039262543571e-07, "loss": 0.5006792545318604, "step": 5600 }, { "epoch": 1.2914456997924832, "grad_norm": 1.6772265070157861, "learning_rate": 6.116878914482384e-07, "loss": 0.46902909874916077, "step": 5601 }, { "epoch": 1.291676273922066, "grad_norm": 1.4563548131763813, "learning_rate": 6.113365767980059e-07, "loss": 0.46765559911727905, "step": 5602 }, { "epoch": 1.2919068480516487, "grad_norm": 1.4143636586875892, "learning_rate": 6.10985318643941e-07, "loss": 0.45960646867752075, "step": 5603 }, { "epoch": 1.2921374221812312, "grad_norm": 1.578129032516793, "learning_rate": 6.106341170371024e-07, "loss": 0.4067912697792053, "step": 5604 }, { "epoch": 1.292367996310814, "grad_norm": 1.653263856685772, "learning_rate": 6.102829720285414e-07, "loss": 0.45004114508628845, "step": 5605 }, { "epoch": 1.2925985704403966, "grad_norm": 1.698803058368325, "learning_rate": 6.099318836692999e-07, "loss": 0.5086014270782471, "step": 5606 }, { "epoch": 1.2928291445699793, "grad_norm": 1.5400277013654406, "learning_rate": 6.095808520104122e-07, "loss": 0.49985191226005554, "step": 5607 }, { "epoch": 1.293059718699562, "grad_norm": 1.5622376081366391, "learning_rate": 6.092298771029047e-07, "loss": 0.5066381096839905, "step": 5608 }, { "epoch": 1.2932902928291445, "grad_norm": 1.5786958248418999, "learning_rate": 6.088789589977947e-07, "loss": 0.49626559019088745, "step": 5609 }, { "epoch": 1.2935208669587273, "grad_norm": 1.6542820345168319, "learning_rate": 6.085280977460921e-07, "loss": 0.4837498962879181, "step": 5610 }, { "epoch": 1.29375144108831, "grad_norm": 1.3607897650960659, "learning_rate": 6.081772933987977e-07, "loss": 0.41308102011680603, "step": 5611 }, { "epoch": 1.2939820152178925, "grad_norm": 1.4026215025684987, "learning_rate": 6.078265460069048e-07, "loss": 0.4453086853027344, "step": 5612 }, { "epoch": 1.2942125893474752, "grad_norm": 1.5506248233039113, "learning_rate": 6.074758556213976e-07, "loss": 0.4700174927711487, "step": 5613 }, { "epoch": 1.294443163477058, "grad_norm": 1.6021152444285431, "learning_rate": 6.071252222932537e-07, "loss": 0.578227162361145, "step": 5614 }, { "epoch": 1.2946737376066406, "grad_norm": 1.3711009132002785, "learning_rate": 6.067746460734398e-07, "loss": 0.36468571424484253, "step": 5615 }, { "epoch": 1.2949043117362231, "grad_norm": 1.7197393040240752, "learning_rate": 6.064241270129166e-07, "loss": 0.4793199896812439, "step": 5616 }, { "epoch": 1.2951348858658058, "grad_norm": 1.4731744493442007, "learning_rate": 6.060736651626355e-07, "loss": 0.40342214703559875, "step": 5617 }, { "epoch": 1.2953654599953885, "grad_norm": 1.2868571274228024, "learning_rate": 6.05723260573539e-07, "loss": 0.4212435185909271, "step": 5618 }, { "epoch": 1.295596034124971, "grad_norm": 1.592545901664945, "learning_rate": 6.053729132965626e-07, "loss": 0.44668713212013245, "step": 5619 }, { "epoch": 1.2958266082545538, "grad_norm": 1.3590289444558108, "learning_rate": 6.050226233826326e-07, "loss": 0.5159831643104553, "step": 5620 }, { "epoch": 1.2960571823841365, "grad_norm": 1.792827614220507, "learning_rate": 6.046723908826676e-07, "loss": 0.5091866850852966, "step": 5621 }, { "epoch": 1.2962877565137192, "grad_norm": 1.3636713576072057, "learning_rate": 6.043222158475767e-07, "loss": 0.34838563203811646, "step": 5622 }, { "epoch": 1.296518330643302, "grad_norm": 1.679394698956229, "learning_rate": 6.039720983282621e-07, "loss": 0.46576952934265137, "step": 5623 }, { "epoch": 1.2967489047728844, "grad_norm": 1.5739745386461328, "learning_rate": 6.036220383756163e-07, "loss": 0.4971234202384949, "step": 5624 }, { "epoch": 1.2969794789024671, "grad_norm": 1.3832811037885837, "learning_rate": 6.03272036040525e-07, "loss": 0.4792482256889343, "step": 5625 }, { "epoch": 1.2972100530320498, "grad_norm": 1.5438407741127544, "learning_rate": 6.029220913738636e-07, "loss": 0.45584213733673096, "step": 5626 }, { "epoch": 1.2974406271616323, "grad_norm": 2.1628056802136686, "learning_rate": 6.025722044265004e-07, "loss": 0.5094096064567566, "step": 5627 }, { "epoch": 1.297671201291215, "grad_norm": 1.2707985126710273, "learning_rate": 6.022223752492954e-07, "loss": 0.33178865909576416, "step": 5628 }, { "epoch": 1.2979017754207978, "grad_norm": 1.4977758648466553, "learning_rate": 6.018726038930991e-07, "loss": 0.4955121874809265, "step": 5629 }, { "epoch": 1.2981323495503805, "grad_norm": 1.9087861970540962, "learning_rate": 6.01522890408755e-07, "loss": 0.46253639459609985, "step": 5630 }, { "epoch": 1.2983629236799632, "grad_norm": 1.725580686624441, "learning_rate": 6.011732348470971e-07, "loss": 0.4760236442089081, "step": 5631 }, { "epoch": 1.2985934978095457, "grad_norm": 1.487451213133888, "learning_rate": 6.008236372589516e-07, "loss": 0.44413092732429504, "step": 5632 }, { "epoch": 1.2988240719391284, "grad_norm": 1.5710401716420814, "learning_rate": 6.004740976951358e-07, "loss": 0.5431559681892395, "step": 5633 }, { "epoch": 1.2990546460687111, "grad_norm": 1.448678008923642, "learning_rate": 6.001246162064592e-07, "loss": 0.41276806592941284, "step": 5634 }, { "epoch": 1.2992852201982936, "grad_norm": 1.8698453553316883, "learning_rate": 5.997751928437219e-07, "loss": 0.3998986482620239, "step": 5635 }, { "epoch": 1.2995157943278763, "grad_norm": 1.7019145009400753, "learning_rate": 5.994258276577169e-07, "loss": 0.47741782665252686, "step": 5636 }, { "epoch": 1.299746368457459, "grad_norm": 1.8471752326794122, "learning_rate": 5.990765206992277e-07, "loss": 0.4294115900993347, "step": 5637 }, { "epoch": 1.2999769425870418, "grad_norm": 1.2676173155963009, "learning_rate": 5.987272720190288e-07, "loss": 0.4717773199081421, "step": 5638 }, { "epoch": 1.3002075167166245, "grad_norm": 1.4764264012124577, "learning_rate": 5.983780816678881e-07, "loss": 0.5169499516487122, "step": 5639 }, { "epoch": 1.300438090846207, "grad_norm": 1.3402196455719508, "learning_rate": 5.980289496965634e-07, "loss": 0.3796359598636627, "step": 5640 }, { "epoch": 1.3006686649757897, "grad_norm": 1.439771899645747, "learning_rate": 5.976798761558048e-07, "loss": 0.44377613067626953, "step": 5641 }, { "epoch": 1.3008992391053724, "grad_norm": 1.4787491173073983, "learning_rate": 5.973308610963534e-07, "loss": 0.46863383054733276, "step": 5642 }, { "epoch": 1.301129813234955, "grad_norm": 1.6231703309548882, "learning_rate": 5.969819045689426e-07, "loss": 0.5437184572219849, "step": 5643 }, { "epoch": 1.3013603873645376, "grad_norm": 1.3526724102376106, "learning_rate": 5.96633006624296e-07, "loss": 0.4487720727920532, "step": 5644 }, { "epoch": 1.3015909614941203, "grad_norm": 1.4099594164441491, "learning_rate": 5.962841673131305e-07, "loss": 0.42834270000457764, "step": 5645 }, { "epoch": 1.301821535623703, "grad_norm": 1.6303538612123332, "learning_rate": 5.959353866861525e-07, "loss": 0.5242533087730408, "step": 5646 }, { "epoch": 1.3020521097532858, "grad_norm": 1.467793467454458, "learning_rate": 5.955866647940609e-07, "loss": 0.4529950022697449, "step": 5647 }, { "epoch": 1.3022826838828683, "grad_norm": 1.704233159172443, "learning_rate": 5.952380016875465e-07, "loss": 0.41109561920166016, "step": 5648 }, { "epoch": 1.302513258012451, "grad_norm": 2.1978948521850237, "learning_rate": 5.948893974172904e-07, "loss": 0.5468418598175049, "step": 5649 }, { "epoch": 1.3027438321420337, "grad_norm": 1.6524182777322811, "learning_rate": 5.945408520339663e-07, "loss": 0.4594927430152893, "step": 5650 }, { "epoch": 1.3029744062716162, "grad_norm": 1.8822005278969978, "learning_rate": 5.941923655882383e-07, "loss": 0.5011999011039734, "step": 5651 }, { "epoch": 1.303204980401199, "grad_norm": 1.3940543055361847, "learning_rate": 5.938439381307632e-07, "loss": 0.519101083278656, "step": 5652 }, { "epoch": 1.3034355545307816, "grad_norm": 1.3048743953658823, "learning_rate": 5.934955697121875e-07, "loss": 0.521979570388794, "step": 5653 }, { "epoch": 1.3036661286603644, "grad_norm": 1.5140544105240696, "learning_rate": 5.931472603831507e-07, "loss": 0.5969122648239136, "step": 5654 }, { "epoch": 1.303896702789947, "grad_norm": 1.6283257057537612, "learning_rate": 5.927990101942826e-07, "loss": 0.47013232111930847, "step": 5655 }, { "epoch": 1.3041272769195296, "grad_norm": 1.485470149052559, "learning_rate": 5.924508191962059e-07, "loss": 0.4135271906852722, "step": 5656 }, { "epoch": 1.3043578510491123, "grad_norm": 1.6826248484124529, "learning_rate": 5.921026874395327e-07, "loss": 0.45639151334762573, "step": 5657 }, { "epoch": 1.304588425178695, "grad_norm": 1.4851105420204929, "learning_rate": 5.917546149748676e-07, "loss": 0.4047633409500122, "step": 5658 }, { "epoch": 1.3048189993082775, "grad_norm": 1.470073094956581, "learning_rate": 5.91406601852807e-07, "loss": 0.4352290630340576, "step": 5659 }, { "epoch": 1.3050495734378602, "grad_norm": 1.569723084578139, "learning_rate": 5.910586481239375e-07, "loss": 0.4912130534648895, "step": 5660 }, { "epoch": 1.305280147567443, "grad_norm": 1.4302762159123064, "learning_rate": 5.907107538388383e-07, "loss": 0.4114433526992798, "step": 5661 }, { "epoch": 1.3055107216970256, "grad_norm": 1.6307461117750972, "learning_rate": 5.903629190480786e-07, "loss": 0.4230955243110657, "step": 5662 }, { "epoch": 1.3057412958266084, "grad_norm": 1.525164874833489, "learning_rate": 5.900151438022205e-07, "loss": 0.5020648241043091, "step": 5663 }, { "epoch": 1.3059718699561909, "grad_norm": 1.6834639607808413, "learning_rate": 5.89667428151816e-07, "loss": 0.48636388778686523, "step": 5664 }, { "epoch": 1.3062024440857736, "grad_norm": 1.376635193773143, "learning_rate": 5.893197721474099e-07, "loss": 0.412000447511673, "step": 5665 }, { "epoch": 1.3064330182153563, "grad_norm": 1.8328035722486296, "learning_rate": 5.889721758395369e-07, "loss": 0.3584952652454376, "step": 5666 }, { "epoch": 1.3066635923449388, "grad_norm": 1.599166825150926, "learning_rate": 5.886246392787234e-07, "loss": 0.4538918733596802, "step": 5667 }, { "epoch": 1.3068941664745215, "grad_norm": 1.3551701558323133, "learning_rate": 5.882771625154883e-07, "loss": 0.478498637676239, "step": 5668 }, { "epoch": 1.3071247406041042, "grad_norm": 1.5353917292288828, "learning_rate": 5.879297456003398e-07, "loss": 0.49535906314849854, "step": 5669 }, { "epoch": 1.307355314733687, "grad_norm": 1.4516733372645705, "learning_rate": 5.875823885837793e-07, "loss": 0.48975661396980286, "step": 5670 }, { "epoch": 1.3075858888632697, "grad_norm": 1.675865776424194, "learning_rate": 5.87235091516298e-07, "loss": 0.4870087802410126, "step": 5671 }, { "epoch": 1.3078164629928521, "grad_norm": 1.5358758810801338, "learning_rate": 5.8688785444838e-07, "loss": 0.43411481380462646, "step": 5672 }, { "epoch": 1.3080470371224349, "grad_norm": 1.5956307221574964, "learning_rate": 5.865406774304986e-07, "loss": 0.5108835697174072, "step": 5673 }, { "epoch": 1.3082776112520176, "grad_norm": 1.6165992027891032, "learning_rate": 5.861935605131202e-07, "loss": 0.47449198365211487, "step": 5674 }, { "epoch": 1.3085081853816, "grad_norm": 1.8165499378032328, "learning_rate": 5.858465037467014e-07, "loss": 0.5550234913825989, "step": 5675 }, { "epoch": 1.3087387595111828, "grad_norm": 1.5758581559369806, "learning_rate": 5.854995071816911e-07, "loss": 0.4548208713531494, "step": 5676 }, { "epoch": 1.3089693336407655, "grad_norm": 1.4849539841305146, "learning_rate": 5.851525708685279e-07, "loss": 0.5176935195922852, "step": 5677 }, { "epoch": 1.3091999077703482, "grad_norm": 1.5664760566663032, "learning_rate": 5.848056948576428e-07, "loss": 0.4460016191005707, "step": 5678 }, { "epoch": 1.309430481899931, "grad_norm": 1.808203061607658, "learning_rate": 5.84458879199458e-07, "loss": 0.5344464182853699, "step": 5679 }, { "epoch": 1.3096610560295134, "grad_norm": 1.3109840468073877, "learning_rate": 5.841121239443863e-07, "loss": 0.48601672053337097, "step": 5680 }, { "epoch": 1.3098916301590962, "grad_norm": 1.3467689115963568, "learning_rate": 5.837654291428327e-07, "loss": 0.46849286556243896, "step": 5681 }, { "epoch": 1.3101222042886789, "grad_norm": 1.2665516862618484, "learning_rate": 5.834187948451918e-07, "loss": 0.4353019893169403, "step": 5682 }, { "epoch": 1.3103527784182614, "grad_norm": 1.7099740749541261, "learning_rate": 5.830722211018516e-07, "loss": 0.5345665812492371, "step": 5683 }, { "epoch": 1.310583352547844, "grad_norm": 1.4659221660940824, "learning_rate": 5.827257079631886e-07, "loss": 0.4060036540031433, "step": 5684 }, { "epoch": 1.3108139266774268, "grad_norm": 1.3640742579072, "learning_rate": 5.823792554795738e-07, "loss": 0.43724536895751953, "step": 5685 }, { "epoch": 1.3110445008070095, "grad_norm": 1.550163679413481, "learning_rate": 5.820328637013665e-07, "loss": 0.4600690007209778, "step": 5686 }, { "epoch": 1.3112750749365922, "grad_norm": 1.5199243554334652, "learning_rate": 5.816865326789182e-07, "loss": 0.4352531433105469, "step": 5687 }, { "epoch": 1.3115056490661747, "grad_norm": 1.4575114943022274, "learning_rate": 5.813402624625722e-07, "loss": 0.39384984970092773, "step": 5688 }, { "epoch": 1.3117362231957574, "grad_norm": 1.329194110980277, "learning_rate": 5.809940531026616e-07, "loss": 0.44367098808288574, "step": 5689 }, { "epoch": 1.3119667973253402, "grad_norm": 1.4497223943190725, "learning_rate": 5.806479046495123e-07, "loss": 0.4757416546344757, "step": 5690 }, { "epoch": 1.3121973714549227, "grad_norm": 1.5821654764353048, "learning_rate": 5.803018171534396e-07, "loss": 0.521708607673645, "step": 5691 }, { "epoch": 1.3124279455845054, "grad_norm": 1.3510537988002305, "learning_rate": 5.799557906647514e-07, "loss": 0.4127439260482788, "step": 5692 }, { "epoch": 1.312658519714088, "grad_norm": 1.4570205213875538, "learning_rate": 5.79609825233746e-07, "loss": 0.4809693396091461, "step": 5693 }, { "epoch": 1.3128890938436708, "grad_norm": 1.2590938015478794, "learning_rate": 5.792639209107134e-07, "loss": 0.5075684189796448, "step": 5694 }, { "epoch": 1.3131196679732535, "grad_norm": 1.3738792104421846, "learning_rate": 5.789180777459336e-07, "loss": 0.416393518447876, "step": 5695 }, { "epoch": 1.313350242102836, "grad_norm": 1.4282126857493198, "learning_rate": 5.78572295789679e-07, "loss": 0.4456642270088196, "step": 5696 }, { "epoch": 1.3135808162324187, "grad_norm": 1.327521871832615, "learning_rate": 5.782265750922124e-07, "loss": 0.4757812023162842, "step": 5697 }, { "epoch": 1.3138113903620015, "grad_norm": 1.6103197546493997, "learning_rate": 5.778809157037872e-07, "loss": 0.5081768035888672, "step": 5698 }, { "epoch": 1.314041964491584, "grad_norm": 1.6849043068796357, "learning_rate": 5.775353176746489e-07, "loss": 0.4604584872722626, "step": 5699 }, { "epoch": 1.3142725386211667, "grad_norm": 1.3964100189157245, "learning_rate": 5.771897810550339e-07, "loss": 0.4153773784637451, "step": 5700 }, { "epoch": 1.3145031127507494, "grad_norm": 1.5346514188080242, "learning_rate": 5.768443058951695e-07, "loss": 0.5194085836410522, "step": 5701 }, { "epoch": 1.314733686880332, "grad_norm": 1.6610989574168062, "learning_rate": 5.764988922452733e-07, "loss": 0.4398482143878937, "step": 5702 }, { "epoch": 1.3149642610099148, "grad_norm": 1.747178590910114, "learning_rate": 5.761535401555558e-07, "loss": 0.5148836374282837, "step": 5703 }, { "epoch": 1.3151948351394973, "grad_norm": 1.8977812861580863, "learning_rate": 5.758082496762163e-07, "loss": 0.533142626285553, "step": 5704 }, { "epoch": 1.31542540926908, "grad_norm": 1.3488739739710767, "learning_rate": 5.754630208574473e-07, "loss": 0.4059423804283142, "step": 5705 }, { "epoch": 1.3156559833986627, "grad_norm": 1.3213051571946475, "learning_rate": 5.751178537494302e-07, "loss": 0.4685533940792084, "step": 5706 }, { "epoch": 1.3158865575282452, "grad_norm": 1.5403217644159128, "learning_rate": 5.747727484023392e-07, "loss": 0.4454694986343384, "step": 5707 }, { "epoch": 1.316117131657828, "grad_norm": 1.481350859430692, "learning_rate": 5.74427704866339e-07, "loss": 0.4058796167373657, "step": 5708 }, { "epoch": 1.3163477057874107, "grad_norm": 1.3294270142641733, "learning_rate": 5.740827231915847e-07, "loss": 0.3891766369342804, "step": 5709 }, { "epoch": 1.3165782799169934, "grad_norm": 1.5072356875610937, "learning_rate": 5.737378034282235e-07, "loss": 0.47912657260894775, "step": 5710 }, { "epoch": 1.316808854046576, "grad_norm": 1.5228549079910219, "learning_rate": 5.733929456263922e-07, "loss": 0.4221952557563782, "step": 5711 }, { "epoch": 1.3170394281761586, "grad_norm": 1.5405159904484362, "learning_rate": 5.730481498362202e-07, "loss": 0.39018404483795166, "step": 5712 }, { "epoch": 1.3172700023057413, "grad_norm": 1.6184406292698126, "learning_rate": 5.727034161078262e-07, "loss": 0.5388307571411133, "step": 5713 }, { "epoch": 1.317500576435324, "grad_norm": 1.5278965195377916, "learning_rate": 5.723587444913216e-07, "loss": 0.3243408501148224, "step": 5714 }, { "epoch": 1.3177311505649065, "grad_norm": 1.6496814482710773, "learning_rate": 5.720141350368072e-07, "loss": 0.46480363607406616, "step": 5715 }, { "epoch": 1.3179617246944892, "grad_norm": 1.6265951465013608, "learning_rate": 5.716695877943757e-07, "loss": 0.5286417603492737, "step": 5716 }, { "epoch": 1.318192298824072, "grad_norm": 1.455901542591345, "learning_rate": 5.71325102814111e-07, "loss": 0.4170069694519043, "step": 5717 }, { "epoch": 1.3184228729536547, "grad_norm": 1.5051159019770526, "learning_rate": 5.709806801460867e-07, "loss": 0.5738973617553711, "step": 5718 }, { "epoch": 1.3186534470832374, "grad_norm": 1.4473352410585376, "learning_rate": 5.706363198403689e-07, "loss": 0.5309658050537109, "step": 5719 }, { "epoch": 1.31888402121282, "grad_norm": 1.588487236125564, "learning_rate": 5.70292021947013e-07, "loss": 0.4569379389286041, "step": 5720 }, { "epoch": 1.3191145953424026, "grad_norm": 1.5641598702256398, "learning_rate": 5.699477865160674e-07, "loss": 0.46686258912086487, "step": 5721 }, { "epoch": 1.3193451694719853, "grad_norm": 1.551220703032623, "learning_rate": 5.696036135975688e-07, "loss": 0.5333213806152344, "step": 5722 }, { "epoch": 1.3195757436015678, "grad_norm": 1.6027893782611593, "learning_rate": 5.69259503241547e-07, "loss": 0.3519536256790161, "step": 5723 }, { "epoch": 1.3198063177311505, "grad_norm": 1.5104260104986362, "learning_rate": 5.689154554980218e-07, "loss": 0.4763161242008209, "step": 5724 }, { "epoch": 1.3200368918607333, "grad_norm": 1.5061315373489772, "learning_rate": 5.685714704170044e-07, "loss": 0.43600207567214966, "step": 5725 }, { "epoch": 1.320267465990316, "grad_norm": 1.4992417251350876, "learning_rate": 5.682275480484958e-07, "loss": 0.41991305351257324, "step": 5726 }, { "epoch": 1.3204980401198987, "grad_norm": 1.663551629444692, "learning_rate": 5.678836884424894e-07, "loss": 0.44275131821632385, "step": 5727 }, { "epoch": 1.3207286142494812, "grad_norm": 1.65999947024113, "learning_rate": 5.675398916489682e-07, "loss": 0.4339372515678406, "step": 5728 }, { "epoch": 1.320959188379064, "grad_norm": 1.484455134036602, "learning_rate": 5.671961577179062e-07, "loss": 0.4462248384952545, "step": 5729 }, { "epoch": 1.3211897625086464, "grad_norm": 1.4704913213821902, "learning_rate": 5.668524866992693e-07, "loss": 0.36548441648483276, "step": 5730 }, { "epoch": 1.321420336638229, "grad_norm": 1.5370532211440713, "learning_rate": 5.665088786430129e-07, "loss": 0.4709678888320923, "step": 5731 }, { "epoch": 1.3216509107678118, "grad_norm": 1.4993066403144744, "learning_rate": 5.661653335990848e-07, "loss": 0.40125030279159546, "step": 5732 }, { "epoch": 1.3218814848973945, "grad_norm": 1.8517319571144346, "learning_rate": 5.658218516174218e-07, "loss": 0.5288605690002441, "step": 5733 }, { "epoch": 1.3221120590269773, "grad_norm": 1.2954018601150643, "learning_rate": 5.654784327479534e-07, "loss": 0.41306072473526, "step": 5734 }, { "epoch": 1.3223426331565598, "grad_norm": 1.3199807449430407, "learning_rate": 5.651350770405983e-07, "loss": 0.34327009320259094, "step": 5735 }, { "epoch": 1.3225732072861425, "grad_norm": 1.4524630442098247, "learning_rate": 5.647917845452671e-07, "loss": 0.5055800080299377, "step": 5736 }, { "epoch": 1.3228037814157252, "grad_norm": 1.7153085926535214, "learning_rate": 5.644485553118609e-07, "loss": 0.45496249198913574, "step": 5737 }, { "epoch": 1.3230343555453077, "grad_norm": 1.6142993934275558, "learning_rate": 5.641053893902708e-07, "loss": 0.4626169502735138, "step": 5738 }, { "epoch": 1.3232649296748904, "grad_norm": 1.3569624734396053, "learning_rate": 5.637622868303802e-07, "loss": 0.46621328592300415, "step": 5739 }, { "epoch": 1.3234955038044731, "grad_norm": 1.5833136701466524, "learning_rate": 5.634192476820623e-07, "loss": 0.47793662548065186, "step": 5740 }, { "epoch": 1.3237260779340558, "grad_norm": 1.5367680790773321, "learning_rate": 5.630762719951816e-07, "loss": 0.42578715085983276, "step": 5741 }, { "epoch": 1.3239566520636386, "grad_norm": 1.7421270871218182, "learning_rate": 5.627333598195927e-07, "loss": 0.3146113157272339, "step": 5742 }, { "epoch": 1.324187226193221, "grad_norm": 1.376620002714832, "learning_rate": 5.623905112051417e-07, "loss": 0.39731544256210327, "step": 5743 }, { "epoch": 1.3244178003228038, "grad_norm": 1.6655684412604148, "learning_rate": 5.620477262016647e-07, "loss": 0.3755846619606018, "step": 5744 }, { "epoch": 1.3246483744523865, "grad_norm": 1.5953907301532468, "learning_rate": 5.617050048589896e-07, "loss": 0.43060415983200073, "step": 5745 }, { "epoch": 1.324878948581969, "grad_norm": 1.54564820857706, "learning_rate": 5.613623472269334e-07, "loss": 0.4213481545448303, "step": 5746 }, { "epoch": 1.3251095227115517, "grad_norm": 1.2422408749001486, "learning_rate": 5.610197533553057e-07, "loss": 0.3923456072807312, "step": 5747 }, { "epoch": 1.3253400968411344, "grad_norm": 1.6088447345623693, "learning_rate": 5.606772232939061e-07, "loss": 0.42293328046798706, "step": 5748 }, { "epoch": 1.3255706709707171, "grad_norm": 1.596682526932072, "learning_rate": 5.603347570925242e-07, "loss": 0.4545479118824005, "step": 5749 }, { "epoch": 1.3258012451002998, "grad_norm": 1.4262513090332916, "learning_rate": 5.599923548009416e-07, "loss": 0.3969312310218811, "step": 5750 }, { "epoch": 1.3260318192298823, "grad_norm": 1.687653911460881, "learning_rate": 5.59650016468929e-07, "loss": 0.4296644330024719, "step": 5751 }, { "epoch": 1.326262393359465, "grad_norm": 1.4928189267328964, "learning_rate": 5.5930774214625e-07, "loss": 0.43291348218917847, "step": 5752 }, { "epoch": 1.3264929674890478, "grad_norm": 1.4463941028108167, "learning_rate": 5.589655318826564e-07, "loss": 0.47684454917907715, "step": 5753 }, { "epoch": 1.3267235416186303, "grad_norm": 1.3515496302725483, "learning_rate": 5.586233857278924e-07, "loss": 0.48520004749298096, "step": 5754 }, { "epoch": 1.326954115748213, "grad_norm": 1.6127441732883512, "learning_rate": 5.582813037316926e-07, "loss": 0.4434587359428406, "step": 5755 }, { "epoch": 1.3271846898777957, "grad_norm": 1.7808352880972456, "learning_rate": 5.579392859437825e-07, "loss": 0.47306808829307556, "step": 5756 }, { "epoch": 1.3274152640073784, "grad_norm": 1.5663021335869645, "learning_rate": 5.575973324138772e-07, "loss": 0.4349653720855713, "step": 5757 }, { "epoch": 1.3276458381369611, "grad_norm": 1.2914359149982935, "learning_rate": 5.572554431916829e-07, "loss": 0.31277602910995483, "step": 5758 }, { "epoch": 1.3278764122665436, "grad_norm": 1.5658319454866303, "learning_rate": 5.569136183268974e-07, "loss": 0.4281114637851715, "step": 5759 }, { "epoch": 1.3281069863961263, "grad_norm": 1.2867721627127386, "learning_rate": 5.565718578692076e-07, "loss": 0.45071113109588623, "step": 5760 }, { "epoch": 1.328337560525709, "grad_norm": 1.4460147363867, "learning_rate": 5.562301618682927e-07, "loss": 0.426133394241333, "step": 5761 }, { "epoch": 1.3285681346552916, "grad_norm": 1.3630920926710801, "learning_rate": 5.558885303738209e-07, "loss": 0.3882424235343933, "step": 5762 }, { "epoch": 1.3287987087848743, "grad_norm": 1.3878174095068123, "learning_rate": 5.55546963435452e-07, "loss": 0.4706958532333374, "step": 5763 }, { "epoch": 1.329029282914457, "grad_norm": 1.9122348340273743, "learning_rate": 5.552054611028365e-07, "loss": 0.4868433475494385, "step": 5764 }, { "epoch": 1.3292598570440397, "grad_norm": 1.4411048310630292, "learning_rate": 5.548640234256154e-07, "loss": 0.41839566826820374, "step": 5765 }, { "epoch": 1.3294904311736224, "grad_norm": 1.9627530346102546, "learning_rate": 5.545226504534195e-07, "loss": 0.4088629484176636, "step": 5766 }, { "epoch": 1.329721005303205, "grad_norm": 1.3819218540316194, "learning_rate": 5.541813422358715e-07, "loss": 0.34617769718170166, "step": 5767 }, { "epoch": 1.3299515794327876, "grad_norm": 1.5711021474470717, "learning_rate": 5.538400988225835e-07, "loss": 0.5098900198936462, "step": 5768 }, { "epoch": 1.3301821535623704, "grad_norm": 1.5683015797269382, "learning_rate": 5.534989202631586e-07, "loss": 0.4294108748435974, "step": 5769 }, { "epoch": 1.3304127276919528, "grad_norm": 1.3488716534216894, "learning_rate": 5.531578066071907e-07, "loss": 0.42205139994621277, "step": 5770 }, { "epoch": 1.3306433018215356, "grad_norm": 1.8657910300729754, "learning_rate": 5.528167579042645e-07, "loss": 0.5009530186653137, "step": 5771 }, { "epoch": 1.3308738759511183, "grad_norm": 1.468249228101101, "learning_rate": 5.524757742039545e-07, "loss": 0.554497241973877, "step": 5772 }, { "epoch": 1.331104450080701, "grad_norm": 1.711116822757576, "learning_rate": 5.521348555558263e-07, "loss": 0.3514432907104492, "step": 5773 }, { "epoch": 1.3313350242102837, "grad_norm": 1.4224522574801144, "learning_rate": 5.51794002009436e-07, "loss": 0.4712038040161133, "step": 5774 }, { "epoch": 1.3315655983398662, "grad_norm": 1.6288850118765847, "learning_rate": 5.514532136143295e-07, "loss": 0.48556071519851685, "step": 5775 }, { "epoch": 1.331796172469449, "grad_norm": 1.42798680480441, "learning_rate": 5.511124904200448e-07, "loss": 0.43158456683158875, "step": 5776 }, { "epoch": 1.3320267465990316, "grad_norm": 1.8128360066016722, "learning_rate": 5.507718324761085e-07, "loss": 0.5376255512237549, "step": 5777 }, { "epoch": 1.3322573207286141, "grad_norm": 1.446480187929883, "learning_rate": 5.504312398320392e-07, "loss": 0.3800685405731201, "step": 5778 }, { "epoch": 1.3324878948581969, "grad_norm": 1.3675185316121448, "learning_rate": 5.500907125373458e-07, "loss": 0.4015260338783264, "step": 5779 }, { "epoch": 1.3327184689877796, "grad_norm": 1.7400186621828952, "learning_rate": 5.497502506415266e-07, "loss": 0.42762285470962524, "step": 5780 }, { "epoch": 1.3329490431173623, "grad_norm": 1.4501572722598215, "learning_rate": 5.494098541940719e-07, "loss": 0.4467644691467285, "step": 5781 }, { "epoch": 1.333179617246945, "grad_norm": 1.9298171674754279, "learning_rate": 5.490695232444613e-07, "loss": 0.42699599266052246, "step": 5782 }, { "epoch": 1.3334101913765275, "grad_norm": 1.6654850032985582, "learning_rate": 5.487292578421659e-07, "loss": 0.586537778377533, "step": 5783 }, { "epoch": 1.3336407655061102, "grad_norm": 1.761605169999467, "learning_rate": 5.48389058036646e-07, "loss": 0.4525066018104553, "step": 5784 }, { "epoch": 1.333871339635693, "grad_norm": 1.4697934550209713, "learning_rate": 5.480489238773535e-07, "loss": 0.40520548820495605, "step": 5785 }, { "epoch": 1.3341019137652754, "grad_norm": 1.7127717596843188, "learning_rate": 5.477088554137304e-07, "loss": 0.3910450339317322, "step": 5786 }, { "epoch": 1.3343324878948581, "grad_norm": 1.781985995356997, "learning_rate": 5.473688526952087e-07, "loss": 0.45285511016845703, "step": 5787 }, { "epoch": 1.3345630620244409, "grad_norm": 1.3079701521023397, "learning_rate": 5.47028915771212e-07, "loss": 0.39207279682159424, "step": 5788 }, { "epoch": 1.3347936361540236, "grad_norm": 1.3401224496215014, "learning_rate": 5.466890446911527e-07, "loss": 0.40281063318252563, "step": 5789 }, { "epoch": 1.3350242102836063, "grad_norm": 1.5855589292084546, "learning_rate": 5.463492395044354e-07, "loss": 0.5087814927101135, "step": 5790 }, { "epoch": 1.3352547844131888, "grad_norm": 1.6443172906836578, "learning_rate": 5.460095002604532e-07, "loss": 0.47597891092300415, "step": 5791 }, { "epoch": 1.3354853585427715, "grad_norm": 1.656230003127049, "learning_rate": 5.456698270085917e-07, "loss": 0.5722953677177429, "step": 5792 }, { "epoch": 1.3357159326723542, "grad_norm": 1.6424947586218923, "learning_rate": 5.45330219798225e-07, "loss": 0.5133349299430847, "step": 5793 }, { "epoch": 1.3359465068019367, "grad_norm": 1.5413030595202453, "learning_rate": 5.449906786787187e-07, "loss": 0.46230804920196533, "step": 5794 }, { "epoch": 1.3361770809315194, "grad_norm": 1.6839619437291453, "learning_rate": 5.446512036994286e-07, "loss": 0.42002394795417786, "step": 5795 }, { "epoch": 1.3364076550611022, "grad_norm": 1.46623243210155, "learning_rate": 5.443117949097013e-07, "loss": 0.42281097173690796, "step": 5796 }, { "epoch": 1.3366382291906849, "grad_norm": 1.4476698476010996, "learning_rate": 5.439724523588726e-07, "loss": 0.511898398399353, "step": 5797 }, { "epoch": 1.3368688033202676, "grad_norm": 1.4307520026731049, "learning_rate": 5.4363317609627e-07, "loss": 0.4475559592247009, "step": 5798 }, { "epoch": 1.33709937744985, "grad_norm": 1.509864957359139, "learning_rate": 5.432939661712103e-07, "loss": 0.4872414469718933, "step": 5799 }, { "epoch": 1.3373299515794328, "grad_norm": 1.3480605234272842, "learning_rate": 5.429548226330009e-07, "loss": 0.40401679277420044, "step": 5800 }, { "epoch": 1.3375605257090155, "grad_norm": 2.083088707198395, "learning_rate": 5.426157455309399e-07, "loss": 0.43559926748275757, "step": 5801 }, { "epoch": 1.337791099838598, "grad_norm": 1.6000855398004097, "learning_rate": 5.422767349143158e-07, "loss": 0.44283759593963623, "step": 5802 }, { "epoch": 1.3380216739681807, "grad_norm": 1.310277684226626, "learning_rate": 5.419377908324077e-07, "loss": 0.3770032525062561, "step": 5803 }, { "epoch": 1.3382522480977634, "grad_norm": 1.3856773934136148, "learning_rate": 5.415989133344834e-07, "loss": 0.4497501850128174, "step": 5804 }, { "epoch": 1.3384828222273462, "grad_norm": 1.49195449044666, "learning_rate": 5.412601024698033e-07, "loss": 0.5008253455162048, "step": 5805 }, { "epoch": 1.3387133963569289, "grad_norm": 1.3694796854029274, "learning_rate": 5.409213582876162e-07, "loss": 0.46178537607192993, "step": 5806 }, { "epoch": 1.3389439704865114, "grad_norm": 1.1951838089282807, "learning_rate": 5.405826808371625e-07, "loss": 0.39843931794166565, "step": 5807 }, { "epoch": 1.339174544616094, "grad_norm": 1.4243934050525646, "learning_rate": 5.402440701676724e-07, "loss": 0.4829174280166626, "step": 5808 }, { "epoch": 1.3394051187456768, "grad_norm": 1.0859530853021675, "learning_rate": 5.399055263283656e-07, "loss": 0.36173316836357117, "step": 5809 }, { "epoch": 1.3396356928752593, "grad_norm": 1.5741135880130834, "learning_rate": 5.395670493684536e-07, "loss": 0.400304913520813, "step": 5810 }, { "epoch": 1.339866267004842, "grad_norm": 1.507879612413509, "learning_rate": 5.392286393371372e-07, "loss": 0.4536975622177124, "step": 5811 }, { "epoch": 1.3400968411344247, "grad_norm": 1.7310508291395992, "learning_rate": 5.388902962836084e-07, "loss": 0.6474577188491821, "step": 5812 }, { "epoch": 1.3403274152640074, "grad_norm": 1.6348182443046517, "learning_rate": 5.385520202570477e-07, "loss": 0.48008009791374207, "step": 5813 }, { "epoch": 1.3405579893935902, "grad_norm": 1.6214175923335088, "learning_rate": 5.38213811306628e-07, "loss": 0.4518657326698303, "step": 5814 }, { "epoch": 1.3407885635231727, "grad_norm": 1.280530895656809, "learning_rate": 5.378756694815105e-07, "loss": 0.449008584022522, "step": 5815 }, { "epoch": 1.3410191376527554, "grad_norm": 1.689898643370083, "learning_rate": 5.375375948308483e-07, "loss": 0.5448319315910339, "step": 5816 }, { "epoch": 1.341249711782338, "grad_norm": 1.5166178678578832, "learning_rate": 5.371995874037832e-07, "loss": 0.5078369379043579, "step": 5817 }, { "epoch": 1.3414802859119206, "grad_norm": 1.611364899344997, "learning_rate": 5.368616472494482e-07, "loss": 0.508685290813446, "step": 5818 }, { "epoch": 1.3417108600415033, "grad_norm": 1.3809568946566115, "learning_rate": 5.365237744169672e-07, "loss": 0.4166705012321472, "step": 5819 }, { "epoch": 1.341941434171086, "grad_norm": 1.432431964622234, "learning_rate": 5.361859689554524e-07, "loss": 0.4741361737251282, "step": 5820 }, { "epoch": 1.3421720083006687, "grad_norm": 1.5546451283342237, "learning_rate": 5.358482309140079e-07, "loss": 0.36658185720443726, "step": 5821 }, { "epoch": 1.3424025824302515, "grad_norm": 1.9632157270552801, "learning_rate": 5.355105603417267e-07, "loss": 0.38921263813972473, "step": 5822 }, { "epoch": 1.342633156559834, "grad_norm": 1.9732368197118861, "learning_rate": 5.351729572876935e-07, "loss": 0.5553977489471436, "step": 5823 }, { "epoch": 1.3428637306894167, "grad_norm": 1.4618484003422054, "learning_rate": 5.348354218009813e-07, "loss": 0.3968391418457031, "step": 5824 }, { "epoch": 1.3430943048189994, "grad_norm": 1.4937275325292458, "learning_rate": 5.344979539306549e-07, "loss": 0.4289783239364624, "step": 5825 }, { "epoch": 1.3433248789485819, "grad_norm": 1.313862309148984, "learning_rate": 5.341605537257686e-07, "loss": 0.45359861850738525, "step": 5826 }, { "epoch": 1.3435554530781646, "grad_norm": 1.366684570776694, "learning_rate": 5.338232212353675e-07, "loss": 0.3571642339229584, "step": 5827 }, { "epoch": 1.3437860272077473, "grad_norm": 1.1954938252676188, "learning_rate": 5.334859565084855e-07, "loss": 0.3784096837043762, "step": 5828 }, { "epoch": 1.34401660133733, "grad_norm": 1.5372749019268697, "learning_rate": 5.331487595941475e-07, "loss": 0.44996407628059387, "step": 5829 }, { "epoch": 1.3442471754669127, "grad_norm": 1.4793854978740197, "learning_rate": 5.32811630541369e-07, "loss": 0.4466405510902405, "step": 5830 }, { "epoch": 1.3444777495964952, "grad_norm": 1.3432081322840168, "learning_rate": 5.324745693991545e-07, "loss": 0.34488850831985474, "step": 5831 }, { "epoch": 1.344708323726078, "grad_norm": 1.589654871057016, "learning_rate": 5.321375762164999e-07, "loss": 0.5530165433883667, "step": 5832 }, { "epoch": 1.3449388978556607, "grad_norm": 1.6555576202053326, "learning_rate": 5.318006510423898e-07, "loss": 0.40732342004776, "step": 5833 }, { "epoch": 1.3451694719852432, "grad_norm": 1.5528027430812303, "learning_rate": 5.314637939258002e-07, "loss": 0.3364611566066742, "step": 5834 }, { "epoch": 1.3454000461148259, "grad_norm": 1.4557702222082582, "learning_rate": 5.311270049156966e-07, "loss": 0.43964290618896484, "step": 5835 }, { "epoch": 1.3456306202444086, "grad_norm": 1.5963363545263636, "learning_rate": 5.30790284061035e-07, "loss": 0.5203431844711304, "step": 5836 }, { "epoch": 1.3458611943739913, "grad_norm": 1.356219303149177, "learning_rate": 5.304536314107607e-07, "loss": 0.4779793620109558, "step": 5837 }, { "epoch": 1.346091768503574, "grad_norm": 1.4030454651132978, "learning_rate": 5.301170470138102e-07, "loss": 0.4769410490989685, "step": 5838 }, { "epoch": 1.3463223426331565, "grad_norm": 1.5437367488200047, "learning_rate": 5.297805309191089e-07, "loss": 0.42390304803848267, "step": 5839 }, { "epoch": 1.3465529167627392, "grad_norm": 1.6498587295444291, "learning_rate": 5.294440831755727e-07, "loss": 0.5550302863121033, "step": 5840 }, { "epoch": 1.3467834908923217, "grad_norm": 1.5927381474044073, "learning_rate": 5.291077038321078e-07, "loss": 0.4897978901863098, "step": 5841 }, { "epoch": 1.3470140650219045, "grad_norm": 1.5707311912828865, "learning_rate": 5.287713929376105e-07, "loss": 0.4014284610748291, "step": 5842 }, { "epoch": 1.3472446391514872, "grad_norm": 1.61036503253005, "learning_rate": 5.284351505409675e-07, "loss": 0.4299513101577759, "step": 5843 }, { "epoch": 1.34747521328107, "grad_norm": 1.382725158348277, "learning_rate": 5.280989766910541e-07, "loss": 0.44863104820251465, "step": 5844 }, { "epoch": 1.3477057874106526, "grad_norm": 1.4391517424186664, "learning_rate": 5.277628714367374e-07, "loss": 0.41933274269104004, "step": 5845 }, { "epoch": 1.347936361540235, "grad_norm": 1.5110585127257306, "learning_rate": 5.274268348268729e-07, "loss": 0.48257556557655334, "step": 5846 }, { "epoch": 1.3481669356698178, "grad_norm": 1.6840388322451993, "learning_rate": 5.270908669103078e-07, "loss": 0.435384064912796, "step": 5847 }, { "epoch": 1.3483975097994005, "grad_norm": 1.502056490079635, "learning_rate": 5.267549677358775e-07, "loss": 0.43291670083999634, "step": 5848 }, { "epoch": 1.348628083928983, "grad_norm": 2.07427587572329, "learning_rate": 5.264191373524089e-07, "loss": 0.4584086537361145, "step": 5849 }, { "epoch": 1.3488586580585658, "grad_norm": 1.4212548389061759, "learning_rate": 5.260833758087187e-07, "loss": 0.44879037141799927, "step": 5850 }, { "epoch": 1.3490892321881485, "grad_norm": 1.4876230861981237, "learning_rate": 5.257476831536124e-07, "loss": 0.48467326164245605, "step": 5851 }, { "epoch": 1.3493198063177312, "grad_norm": 1.4803329007154076, "learning_rate": 5.254120594358871e-07, "loss": 0.4126189947128296, "step": 5852 }, { "epoch": 1.349550380447314, "grad_norm": 1.494164620045959, "learning_rate": 5.250765047043284e-07, "loss": 0.5592546463012695, "step": 5853 }, { "epoch": 1.3497809545768964, "grad_norm": 1.2572079660485564, "learning_rate": 5.247410190077134e-07, "loss": 0.3269529342651367, "step": 5854 }, { "epoch": 1.3500115287064791, "grad_norm": 1.4784058003593112, "learning_rate": 5.244056023948075e-07, "loss": 0.42812949419021606, "step": 5855 }, { "epoch": 1.3502421028360618, "grad_norm": 1.643847647603701, "learning_rate": 5.240702549143676e-07, "loss": 0.4266297221183777, "step": 5856 }, { "epoch": 1.3504726769656443, "grad_norm": 1.6490610440384348, "learning_rate": 5.237349766151392e-07, "loss": 0.43848085403442383, "step": 5857 }, { "epoch": 1.350703251095227, "grad_norm": 1.5778355488021025, "learning_rate": 5.233997675458588e-07, "loss": 0.47512906789779663, "step": 5858 }, { "epoch": 1.3509338252248098, "grad_norm": 1.4893970639177625, "learning_rate": 5.230646277552527e-07, "loss": 0.3484492897987366, "step": 5859 }, { "epoch": 1.3511643993543925, "grad_norm": 1.5529244445697006, "learning_rate": 5.227295572920363e-07, "loss": 0.48915669322013855, "step": 5860 }, { "epoch": 1.3513949734839752, "grad_norm": 1.687195391171769, "learning_rate": 5.223945562049159e-07, "loss": 0.415932834148407, "step": 5861 }, { "epoch": 1.3516255476135577, "grad_norm": 1.8036222540660396, "learning_rate": 5.220596245425869e-07, "loss": 0.47945982217788696, "step": 5862 }, { "epoch": 1.3518561217431404, "grad_norm": 1.7032993247582504, "learning_rate": 5.217247623537356e-07, "loss": 0.4322330951690674, "step": 5863 }, { "epoch": 1.3520866958727231, "grad_norm": 1.7271334098970212, "learning_rate": 5.213899696870369e-07, "loss": 0.4608469605445862, "step": 5864 }, { "epoch": 1.3523172700023056, "grad_norm": 1.4726583260713841, "learning_rate": 5.210552465911566e-07, "loss": 0.5108528137207031, "step": 5865 }, { "epoch": 1.3525478441318883, "grad_norm": 1.3172906919344538, "learning_rate": 5.207205931147502e-07, "loss": 0.37947285175323486, "step": 5866 }, { "epoch": 1.352778418261471, "grad_norm": 1.5825329658520386, "learning_rate": 5.203860093064635e-07, "loss": 0.49094486236572266, "step": 5867 }, { "epoch": 1.3530089923910538, "grad_norm": 1.7057097538270483, "learning_rate": 5.200514952149308e-07, "loss": 0.34238702058792114, "step": 5868 }, { "epoch": 1.3532395665206365, "grad_norm": 1.4815052827701158, "learning_rate": 5.197170508887774e-07, "loss": 0.46390393376350403, "step": 5869 }, { "epoch": 1.353470140650219, "grad_norm": 1.517083535949924, "learning_rate": 5.193826763766183e-07, "loss": 0.44219160079956055, "step": 5870 }, { "epoch": 1.3537007147798017, "grad_norm": 1.2444078580604416, "learning_rate": 5.190483717270578e-07, "loss": 0.42801350355148315, "step": 5871 }, { "epoch": 1.3539312889093844, "grad_norm": 1.5276855271974423, "learning_rate": 5.187141369886906e-07, "loss": 0.43861454725265503, "step": 5872 }, { "epoch": 1.354161863038967, "grad_norm": 1.3684710867849712, "learning_rate": 5.183799722101014e-07, "loss": 0.4381449222564697, "step": 5873 }, { "epoch": 1.3543924371685496, "grad_norm": 1.6990772878337996, "learning_rate": 5.180458774398646e-07, "loss": 0.4341619610786438, "step": 5874 }, { "epoch": 1.3546230112981323, "grad_norm": 1.5170997767832792, "learning_rate": 5.177118527265437e-07, "loss": 0.4376588463783264, "step": 5875 }, { "epoch": 1.354853585427715, "grad_norm": 1.4712846387139202, "learning_rate": 5.173778981186932e-07, "loss": 0.38568538427352905, "step": 5876 }, { "epoch": 1.3550841595572978, "grad_norm": 1.4162179235966152, "learning_rate": 5.170440136648561e-07, "loss": 0.44178056716918945, "step": 5877 }, { "epoch": 1.3553147336868803, "grad_norm": 1.434763306400174, "learning_rate": 5.167101994135665e-07, "loss": 0.49847882986068726, "step": 5878 }, { "epoch": 1.355545307816463, "grad_norm": 1.3114035605969607, "learning_rate": 5.163764554133476e-07, "loss": 0.33697545528411865, "step": 5879 }, { "epoch": 1.3557758819460457, "grad_norm": 1.9314852987462174, "learning_rate": 5.160427817127117e-07, "loss": 0.5216578841209412, "step": 5880 }, { "epoch": 1.3560064560756282, "grad_norm": 1.5367735086016923, "learning_rate": 5.157091783601624e-07, "loss": 0.5101301670074463, "step": 5881 }, { "epoch": 1.356237030205211, "grad_norm": 1.4437708354871932, "learning_rate": 5.15375645404192e-07, "loss": 0.47876495122909546, "step": 5882 }, { "epoch": 1.3564676043347936, "grad_norm": 1.413429948502146, "learning_rate": 5.150421828932837e-07, "loss": 0.4656233787536621, "step": 5883 }, { "epoch": 1.3566981784643763, "grad_norm": 1.4503708847221477, "learning_rate": 5.147087908759082e-07, "loss": 0.4392930269241333, "step": 5884 }, { "epoch": 1.356928752593959, "grad_norm": 1.6187538312851866, "learning_rate": 5.143754694005289e-07, "loss": 0.5044047832489014, "step": 5885 }, { "epoch": 1.3571593267235416, "grad_norm": 1.3914560087628793, "learning_rate": 5.140422185155964e-07, "loss": 0.4345476031303406, "step": 5886 }, { "epoch": 1.3573899008531243, "grad_norm": 1.768236932460398, "learning_rate": 5.137090382695528e-07, "loss": 0.49207669496536255, "step": 5887 }, { "epoch": 1.357620474982707, "grad_norm": 1.531417533887488, "learning_rate": 5.133759287108286e-07, "loss": 0.4054356813430786, "step": 5888 }, { "epoch": 1.3578510491122895, "grad_norm": 1.9704323937726442, "learning_rate": 5.130428898878449e-07, "loss": 0.5436004400253296, "step": 5889 }, { "epoch": 1.3580816232418722, "grad_norm": 1.521959500035041, "learning_rate": 5.127099218490127e-07, "loss": 0.4832550287246704, "step": 5890 }, { "epoch": 1.358312197371455, "grad_norm": 1.4438750839498624, "learning_rate": 5.123770246427315e-07, "loss": 0.38890475034713745, "step": 5891 }, { "epoch": 1.3585427715010376, "grad_norm": 1.3028583829520697, "learning_rate": 5.12044198317392e-07, "loss": 0.49784210324287415, "step": 5892 }, { "epoch": 1.3587733456306204, "grad_norm": 1.5058620289816076, "learning_rate": 5.117114429213732e-07, "loss": 0.5033924579620361, "step": 5893 }, { "epoch": 1.3590039197602028, "grad_norm": 1.5069016697055244, "learning_rate": 5.113787585030454e-07, "loss": 0.4857698678970337, "step": 5894 }, { "epoch": 1.3592344938897856, "grad_norm": 1.6430229342698937, "learning_rate": 5.110461451107663e-07, "loss": 0.4269944429397583, "step": 5895 }, { "epoch": 1.3594650680193683, "grad_norm": 1.5554523008644683, "learning_rate": 5.107136027928858e-07, "loss": 0.44045162200927734, "step": 5896 }, { "epoch": 1.3596956421489508, "grad_norm": 1.6719472262672752, "learning_rate": 5.103811315977418e-07, "loss": 0.5223391056060791, "step": 5897 }, { "epoch": 1.3599262162785335, "grad_norm": 1.6234993813736853, "learning_rate": 5.100487315736627e-07, "loss": 0.45988473296165466, "step": 5898 }, { "epoch": 1.3601567904081162, "grad_norm": 1.3494964030299075, "learning_rate": 5.097164027689661e-07, "loss": 0.46342164278030396, "step": 5899 }, { "epoch": 1.360387364537699, "grad_norm": 1.6151646749241875, "learning_rate": 5.093841452319588e-07, "loss": 0.48150479793548584, "step": 5900 }, { "epoch": 1.3606179386672816, "grad_norm": 1.3258214555354595, "learning_rate": 5.090519590109386e-07, "loss": 0.3971351981163025, "step": 5901 }, { "epoch": 1.3608485127968641, "grad_norm": 1.755266254483419, "learning_rate": 5.087198441541914e-07, "loss": 0.44869956374168396, "step": 5902 }, { "epoch": 1.3610790869264469, "grad_norm": 1.4425507935259798, "learning_rate": 5.083878007099943e-07, "loss": 0.3402775526046753, "step": 5903 }, { "epoch": 1.3613096610560296, "grad_norm": 1.3415772700158808, "learning_rate": 5.080558287266119e-07, "loss": 0.4031033515930176, "step": 5904 }, { "epoch": 1.361540235185612, "grad_norm": 1.6435607583739225, "learning_rate": 5.077239282523012e-07, "loss": 0.493259459733963, "step": 5905 }, { "epoch": 1.3617708093151948, "grad_norm": 1.4120722192098578, "learning_rate": 5.073920993353063e-07, "loss": 0.39178919792175293, "step": 5906 }, { "epoch": 1.3620013834447775, "grad_norm": 1.6684880889475469, "learning_rate": 5.070603420238624e-07, "loss": 0.5091253519058228, "step": 5907 }, { "epoch": 1.3622319575743602, "grad_norm": 1.3497137288112562, "learning_rate": 5.067286563661934e-07, "loss": 0.416462779045105, "step": 5908 }, { "epoch": 1.362462531703943, "grad_norm": 1.7821137618482668, "learning_rate": 5.063970424105137e-07, "loss": 0.5018768310546875, "step": 5909 }, { "epoch": 1.3626931058335254, "grad_norm": 1.4656990143163084, "learning_rate": 5.060655002050262e-07, "loss": 0.5512624979019165, "step": 5910 }, { "epoch": 1.3629236799631081, "grad_norm": 1.3507263825947706, "learning_rate": 5.057340297979241e-07, "loss": 0.3953768014907837, "step": 5911 }, { "epoch": 1.3631542540926909, "grad_norm": 1.2807145092132266, "learning_rate": 5.054026312373896e-07, "loss": 0.4355456233024597, "step": 5912 }, { "epoch": 1.3633848282222734, "grad_norm": 1.7515987196576535, "learning_rate": 5.050713045715955e-07, "loss": 0.4826827645301819, "step": 5913 }, { "epoch": 1.363615402351856, "grad_norm": 1.5075633708078446, "learning_rate": 5.047400498487035e-07, "loss": 0.47084230184555054, "step": 5914 }, { "epoch": 1.3638459764814388, "grad_norm": 1.750968751768445, "learning_rate": 5.044088671168644e-07, "loss": 0.5273452997207642, "step": 5915 }, { "epoch": 1.3640765506110215, "grad_norm": 1.484245498844297, "learning_rate": 5.040777564242194e-07, "loss": 0.44878947734832764, "step": 5916 }, { "epoch": 1.3643071247406042, "grad_norm": 1.5815904358854045, "learning_rate": 5.03746717818898e-07, "loss": 0.47986388206481934, "step": 5917 }, { "epoch": 1.3645376988701867, "grad_norm": 1.4148899602283196, "learning_rate": 5.034157513490211e-07, "loss": 0.4807628393173218, "step": 5918 }, { "epoch": 1.3647682729997694, "grad_norm": 1.3747301384734179, "learning_rate": 5.030848570626969e-07, "loss": 0.46027708053588867, "step": 5919 }, { "epoch": 1.3649988471293522, "grad_norm": 1.517934310152821, "learning_rate": 5.027540350080249e-07, "loss": 0.3803088963031769, "step": 5920 }, { "epoch": 1.3652294212589347, "grad_norm": 1.7239494972976075, "learning_rate": 5.024232852330939e-07, "loss": 0.5530920028686523, "step": 5921 }, { "epoch": 1.3654599953885174, "grad_norm": 1.7183928961648565, "learning_rate": 5.020926077859805e-07, "loss": 0.45984846353530884, "step": 5922 }, { "epoch": 1.3656905695181, "grad_norm": 1.5752429840016822, "learning_rate": 5.017620027147533e-07, "loss": 0.4448089301586151, "step": 5923 }, { "epoch": 1.3659211436476828, "grad_norm": 1.713335636587649, "learning_rate": 5.01431470067468e-07, "loss": 0.4226706326007843, "step": 5924 }, { "epoch": 1.3661517177772655, "grad_norm": 1.9953320185051966, "learning_rate": 5.011010098921718e-07, "loss": 0.5243814587593079, "step": 5925 }, { "epoch": 1.366382291906848, "grad_norm": 1.6278540239253128, "learning_rate": 5.007706222368995e-07, "loss": 0.5733383893966675, "step": 5926 }, { "epoch": 1.3666128660364307, "grad_norm": 1.373199955472141, "learning_rate": 5.00440307149677e-07, "loss": 0.4583539366722107, "step": 5927 }, { "epoch": 1.3668434401660134, "grad_norm": 1.5871148090703988, "learning_rate": 5.001100646785186e-07, "loss": 0.474712610244751, "step": 5928 }, { "epoch": 1.367074014295596, "grad_norm": 1.6888872351824356, "learning_rate": 4.997798948714291e-07, "loss": 0.3995950222015381, "step": 5929 }, { "epoch": 1.3673045884251787, "grad_norm": 1.7317310910620232, "learning_rate": 4.994497977764011e-07, "loss": 0.4236767888069153, "step": 5930 }, { "epoch": 1.3675351625547614, "grad_norm": 1.6853541022393534, "learning_rate": 4.991197734414178e-07, "loss": 0.4972396492958069, "step": 5931 }, { "epoch": 1.367765736684344, "grad_norm": 1.503037819471691, "learning_rate": 4.98789821914452e-07, "loss": 0.444613516330719, "step": 5932 }, { "epoch": 1.3679963108139268, "grad_norm": 1.6912958330957677, "learning_rate": 4.984599432434649e-07, "loss": 0.4955690801143646, "step": 5933 }, { "epoch": 1.3682268849435093, "grad_norm": 1.559115794882019, "learning_rate": 4.981301374764084e-07, "loss": 0.4983398914337158, "step": 5934 }, { "epoch": 1.368457459073092, "grad_norm": 1.5588186216828477, "learning_rate": 4.978004046612223e-07, "loss": 0.45190921425819397, "step": 5935 }, { "epoch": 1.3686880332026747, "grad_norm": 1.757499738470118, "learning_rate": 4.974707448458369e-07, "loss": 0.5014151334762573, "step": 5936 }, { "epoch": 1.3689186073322572, "grad_norm": 1.5399509659752455, "learning_rate": 4.971411580781719e-07, "loss": 0.3868405818939209, "step": 5937 }, { "epoch": 1.36914918146184, "grad_norm": 1.42775142494789, "learning_rate": 4.968116444061363e-07, "loss": 0.4093654155731201, "step": 5938 }, { "epoch": 1.3693797555914227, "grad_norm": 1.318689202230345, "learning_rate": 4.964822038776276e-07, "loss": 0.3945506513118744, "step": 5939 }, { "epoch": 1.3696103297210054, "grad_norm": 1.5874458283663229, "learning_rate": 4.961528365405333e-07, "loss": 0.3645547330379486, "step": 5940 }, { "epoch": 1.369840903850588, "grad_norm": 1.760752800086673, "learning_rate": 4.958235424427309e-07, "loss": 0.36679786443710327, "step": 5941 }, { "epoch": 1.3700714779801706, "grad_norm": 1.5458160371079348, "learning_rate": 4.954943216320861e-07, "loss": 0.4892774820327759, "step": 5942 }, { "epoch": 1.3703020521097533, "grad_norm": 1.4817693224477149, "learning_rate": 4.951651741564544e-07, "loss": 0.40406349301338196, "step": 5943 }, { "epoch": 1.370532626239336, "grad_norm": 1.277384097830529, "learning_rate": 4.948361000636812e-07, "loss": 0.4219849407672882, "step": 5944 }, { "epoch": 1.3707632003689185, "grad_norm": 1.7190062313169097, "learning_rate": 4.945070994016008e-07, "loss": 0.5329363346099854, "step": 5945 }, { "epoch": 1.3709937744985012, "grad_norm": 1.5495655705207303, "learning_rate": 4.941781722180361e-07, "loss": 0.42577850818634033, "step": 5946 }, { "epoch": 1.371224348628084, "grad_norm": 1.3916296167797302, "learning_rate": 4.938493185608008e-07, "loss": 0.4157155156135559, "step": 5947 }, { "epoch": 1.3714549227576667, "grad_norm": 1.5016286739703502, "learning_rate": 4.935205384776965e-07, "loss": 0.46491485834121704, "step": 5948 }, { "epoch": 1.3716854968872494, "grad_norm": 1.6766694792768029, "learning_rate": 4.931918320165151e-07, "loss": 0.39582759141921997, "step": 5949 }, { "epoch": 1.3719160710168319, "grad_norm": 1.3277840228822322, "learning_rate": 4.928631992250371e-07, "loss": 0.4380473792552948, "step": 5950 }, { "epoch": 1.3721466451464146, "grad_norm": 1.5358043238579873, "learning_rate": 4.925346401510327e-07, "loss": 0.5044572949409485, "step": 5951 }, { "epoch": 1.372377219275997, "grad_norm": 1.6172521688559274, "learning_rate": 4.922061548422617e-07, "loss": 0.4808889627456665, "step": 5952 }, { "epoch": 1.3726077934055798, "grad_norm": 1.370713689883329, "learning_rate": 4.91877743346472e-07, "loss": 0.4215632677078247, "step": 5953 }, { "epoch": 1.3728383675351625, "grad_norm": 1.4640509349497177, "learning_rate": 4.915494057114025e-07, "loss": 0.4999268651008606, "step": 5954 }, { "epoch": 1.3730689416647452, "grad_norm": 1.593000178254792, "learning_rate": 4.912211419847793e-07, "loss": 0.476152241230011, "step": 5955 }, { "epoch": 1.373299515794328, "grad_norm": 1.5436036358421792, "learning_rate": 4.908929522143201e-07, "loss": 0.4253045320510864, "step": 5956 }, { "epoch": 1.3735300899239105, "grad_norm": 1.6726587032262756, "learning_rate": 4.905648364477293e-07, "loss": 0.4251098036766052, "step": 5957 }, { "epoch": 1.3737606640534932, "grad_norm": 1.5635582188699524, "learning_rate": 4.902367947327029e-07, "loss": 0.3820844888687134, "step": 5958 }, { "epoch": 1.373991238183076, "grad_norm": 1.5563353591748068, "learning_rate": 4.899088271169245e-07, "loss": 0.4725508689880371, "step": 5959 }, { "epoch": 1.3742218123126584, "grad_norm": 1.4545077693536257, "learning_rate": 4.895809336480675e-07, "loss": 0.48313626646995544, "step": 5960 }, { "epoch": 1.374452386442241, "grad_norm": 1.6596316713803083, "learning_rate": 4.892531143737952e-07, "loss": 0.5344939231872559, "step": 5961 }, { "epoch": 1.3746829605718238, "grad_norm": 1.7551620350578117, "learning_rate": 4.889253693417585e-07, "loss": 0.4305552840232849, "step": 5962 }, { "epoch": 1.3749135347014065, "grad_norm": 1.4302106398553562, "learning_rate": 4.885976985995996e-07, "loss": 0.3564034700393677, "step": 5963 }, { "epoch": 1.3751441088309893, "grad_norm": 1.4796542999179279, "learning_rate": 4.882701021949475e-07, "loss": 0.5498751997947693, "step": 5964 }, { "epoch": 1.3753746829605717, "grad_norm": 1.5956710623028654, "learning_rate": 4.879425801754226e-07, "loss": 0.4489964246749878, "step": 5965 }, { "epoch": 1.3756052570901545, "grad_norm": 1.7595842751992934, "learning_rate": 4.87615132588633e-07, "loss": 0.4142688810825348, "step": 5966 }, { "epoch": 1.3758358312197372, "grad_norm": 1.483255834477138, "learning_rate": 4.872877594821767e-07, "loss": 0.3823632597923279, "step": 5967 }, { "epoch": 1.3760664053493197, "grad_norm": 1.603982795420405, "learning_rate": 4.869604609036408e-07, "loss": 0.39014697074890137, "step": 5968 }, { "epoch": 1.3762969794789024, "grad_norm": 1.5363032345717058, "learning_rate": 4.866332369006016e-07, "loss": 0.3907933235168457, "step": 5969 }, { "epoch": 1.376527553608485, "grad_norm": 1.5125931439342233, "learning_rate": 4.863060875206244e-07, "loss": 0.3872087001800537, "step": 5970 }, { "epoch": 1.3767581277380678, "grad_norm": 1.5847290584713085, "learning_rate": 4.85979012811263e-07, "loss": 0.40380537509918213, "step": 5971 }, { "epoch": 1.3769887018676505, "grad_norm": 1.3127541034285726, "learning_rate": 4.856520128200621e-07, "loss": 0.39867663383483887, "step": 5972 }, { "epoch": 1.377219275997233, "grad_norm": 1.7829413941875683, "learning_rate": 4.853250875945534e-07, "loss": 0.5337423086166382, "step": 5973 }, { "epoch": 1.3774498501268158, "grad_norm": 1.4903518724810052, "learning_rate": 4.849982371822593e-07, "loss": 0.3824300765991211, "step": 5974 }, { "epoch": 1.3776804242563985, "grad_norm": 1.4611697760932394, "learning_rate": 4.846714616306907e-07, "loss": 0.3613823652267456, "step": 5975 }, { "epoch": 1.377910998385981, "grad_norm": 1.5701851835478555, "learning_rate": 4.843447609873484e-07, "loss": 0.5040241479873657, "step": 5976 }, { "epoch": 1.3781415725155637, "grad_norm": 1.5801365248176698, "learning_rate": 4.840181352997207e-07, "loss": 0.4639400243759155, "step": 5977 }, { "epoch": 1.3783721466451464, "grad_norm": 1.730401874176074, "learning_rate": 4.836915846152867e-07, "loss": 0.503246009349823, "step": 5978 }, { "epoch": 1.3786027207747291, "grad_norm": 1.6695377873006745, "learning_rate": 4.833651089815135e-07, "loss": 0.3974607586860657, "step": 5979 }, { "epoch": 1.3788332949043118, "grad_norm": 1.556324884896908, "learning_rate": 4.830387084458573e-07, "loss": 0.43200844526290894, "step": 5980 }, { "epoch": 1.3790638690338943, "grad_norm": 1.8355646307086506, "learning_rate": 4.827123830557644e-07, "loss": 0.547272801399231, "step": 5981 }, { "epoch": 1.379294443163477, "grad_norm": 1.5723785141918243, "learning_rate": 4.823861328586688e-07, "loss": 0.4509696960449219, "step": 5982 }, { "epoch": 1.3795250172930598, "grad_norm": 1.53889123165165, "learning_rate": 4.820599579019946e-07, "loss": 0.46022483706474304, "step": 5983 }, { "epoch": 1.3797555914226423, "grad_norm": 1.5251655198087088, "learning_rate": 4.817338582331548e-07, "loss": 0.40973198413848877, "step": 5984 }, { "epoch": 1.379986165552225, "grad_norm": 1.6235538954137896, "learning_rate": 4.814078338995515e-07, "loss": 0.39012736082077026, "step": 5985 }, { "epoch": 1.3802167396818077, "grad_norm": 1.6954879615528178, "learning_rate": 4.810818849485749e-07, "loss": 0.40657323598861694, "step": 5986 }, { "epoch": 1.3804473138113904, "grad_norm": 1.4158383607530642, "learning_rate": 4.80756011427606e-07, "loss": 0.38662189245224, "step": 5987 }, { "epoch": 1.3806778879409731, "grad_norm": 1.629559894183336, "learning_rate": 4.804302133840126e-07, "loss": 0.4888705015182495, "step": 5988 }, { "epoch": 1.3809084620705556, "grad_norm": 1.4732586688358036, "learning_rate": 4.801044908651537e-07, "loss": 0.4559556245803833, "step": 5989 }, { "epoch": 1.3811390362001383, "grad_norm": 1.773370569584542, "learning_rate": 4.797788439183757e-07, "loss": 0.40912386775016785, "step": 5990 }, { "epoch": 1.381369610329721, "grad_norm": 1.3364334005028415, "learning_rate": 4.794532725910152e-07, "loss": 0.3848627209663391, "step": 5991 }, { "epoch": 1.3816001844593035, "grad_norm": 1.3860556916017956, "learning_rate": 4.791277769303975e-07, "loss": 0.4995359778404236, "step": 5992 }, { "epoch": 1.3818307585888863, "grad_norm": 1.3898521995378452, "learning_rate": 4.788023569838356e-07, "loss": 0.38717859983444214, "step": 5993 }, { "epoch": 1.382061332718469, "grad_norm": 1.7766923949498086, "learning_rate": 4.784770127986339e-07, "loss": 0.39855217933654785, "step": 5994 }, { "epoch": 1.3822919068480517, "grad_norm": 1.337680228597258, "learning_rate": 4.781517444220835e-07, "loss": 0.38494858145713806, "step": 5995 }, { "epoch": 1.3825224809776344, "grad_norm": 1.4735802599680248, "learning_rate": 4.778265519014661e-07, "loss": 0.44064784049987793, "step": 5996 }, { "epoch": 1.382753055107217, "grad_norm": 1.8926413264660993, "learning_rate": 4.775014352840512e-07, "loss": 0.39377373456954956, "step": 5997 }, { "epoch": 1.3829836292367996, "grad_norm": 1.5108151654480286, "learning_rate": 4.771763946170979e-07, "loss": 0.45127296447753906, "step": 5998 }, { "epoch": 1.3832142033663823, "grad_norm": 1.4916107560429466, "learning_rate": 4.768514299478545e-07, "loss": 0.4999358654022217, "step": 5999 }, { "epoch": 1.3834447774959648, "grad_norm": 1.7185286370183794, "learning_rate": 4.7652654132355784e-07, "loss": 0.49552851915359497, "step": 6000 }, { "epoch": 1.3836753516255476, "grad_norm": 1.7765151369959267, "learning_rate": 4.762017287914338e-07, "loss": 0.49196135997772217, "step": 6001 }, { "epoch": 1.3839059257551303, "grad_norm": 1.6417248034868954, "learning_rate": 4.758769923986966e-07, "loss": 0.3870600461959839, "step": 6002 }, { "epoch": 1.384136499884713, "grad_norm": 1.6104154654929026, "learning_rate": 4.7555233219255074e-07, "loss": 0.4585425853729248, "step": 6003 }, { "epoch": 1.3843670740142957, "grad_norm": 1.3699827425500786, "learning_rate": 4.752277482201882e-07, "loss": 0.4332588315010071, "step": 6004 }, { "epoch": 1.3845976481438782, "grad_norm": 1.6005942921335146, "learning_rate": 4.749032405287913e-07, "loss": 0.4386274814605713, "step": 6005 }, { "epoch": 1.384828222273461, "grad_norm": 1.430715117905666, "learning_rate": 4.745788091655295e-07, "loss": 0.5064895749092102, "step": 6006 }, { "epoch": 1.3850587964030436, "grad_norm": 1.470846994377081, "learning_rate": 4.7425445417756295e-07, "loss": 0.4441327452659607, "step": 6007 }, { "epoch": 1.3852893705326261, "grad_norm": 1.6191746478584856, "learning_rate": 4.7393017561203965e-07, "loss": 0.4415687918663025, "step": 6008 }, { "epoch": 1.3855199446622088, "grad_norm": 1.4021203224812295, "learning_rate": 4.736059735160973e-07, "loss": 0.4668382704257965, "step": 6009 }, { "epoch": 1.3857505187917916, "grad_norm": 1.6079029250549948, "learning_rate": 4.732818479368615e-07, "loss": 0.3981805443763733, "step": 6010 }, { "epoch": 1.3859810929213743, "grad_norm": 1.4448652226463723, "learning_rate": 4.7295779892144694e-07, "loss": 0.4465348720550537, "step": 6011 }, { "epoch": 1.386211667050957, "grad_norm": 1.7530840597871544, "learning_rate": 4.7263382651695805e-07, "loss": 0.4844682812690735, "step": 6012 }, { "epoch": 1.3864422411805395, "grad_norm": 1.417618664232542, "learning_rate": 4.723099307704868e-07, "loss": 0.4261378347873688, "step": 6013 }, { "epoch": 1.3866728153101222, "grad_norm": 1.4997543603341101, "learning_rate": 4.7198611172911506e-07, "loss": 0.457815945148468, "step": 6014 }, { "epoch": 1.386903389439705, "grad_norm": 1.570655771567204, "learning_rate": 4.7166236943991333e-07, "loss": 0.46352216601371765, "step": 6015 }, { "epoch": 1.3871339635692874, "grad_norm": 1.486567492766103, "learning_rate": 4.7133870394994104e-07, "loss": 0.4166485667228699, "step": 6016 }, { "epoch": 1.3873645376988701, "grad_norm": 1.6982826579565595, "learning_rate": 4.710151153062456e-07, "loss": 0.405789852142334, "step": 6017 }, { "epoch": 1.3875951118284529, "grad_norm": 1.7459761562612983, "learning_rate": 4.7069160355586456e-07, "loss": 0.47718119621276855, "step": 6018 }, { "epoch": 1.3878256859580356, "grad_norm": 1.5824023496617, "learning_rate": 4.7036816874582307e-07, "loss": 0.5040356516838074, "step": 6019 }, { "epoch": 1.3880562600876183, "grad_norm": 1.5657039890557007, "learning_rate": 4.700448109231362e-07, "loss": 0.45093637704849243, "step": 6020 }, { "epoch": 1.3882868342172008, "grad_norm": 1.4929438188817195, "learning_rate": 4.6972153013480666e-07, "loss": 0.5363638997077942, "step": 6021 }, { "epoch": 1.3885174083467835, "grad_norm": 1.6076509313088967, "learning_rate": 4.6939832642782684e-07, "loss": 0.4917050004005432, "step": 6022 }, { "epoch": 1.3887479824763662, "grad_norm": 1.692377103708349, "learning_rate": 4.690751998491782e-07, "loss": 0.43033331632614136, "step": 6023 }, { "epoch": 1.3889785566059487, "grad_norm": 1.5272594017885164, "learning_rate": 4.6875215044582973e-07, "loss": 0.36168330907821655, "step": 6024 }, { "epoch": 1.3892091307355314, "grad_norm": 1.693805471797637, "learning_rate": 4.6842917826474047e-07, "loss": 0.48347967863082886, "step": 6025 }, { "epoch": 1.3894397048651141, "grad_norm": 1.332022962916858, "learning_rate": 4.681062833528572e-07, "loss": 0.4493439495563507, "step": 6026 }, { "epoch": 1.3896702789946969, "grad_norm": 1.4842335012941816, "learning_rate": 4.677834657571165e-07, "loss": 0.385773628950119, "step": 6027 }, { "epoch": 1.3899008531242796, "grad_norm": 1.396017775513053, "learning_rate": 4.674607255244426e-07, "loss": 0.4254469573497772, "step": 6028 }, { "epoch": 1.390131427253862, "grad_norm": 1.6964811881797437, "learning_rate": 4.671380627017497e-07, "loss": 0.5070454478263855, "step": 6029 }, { "epoch": 1.3903620013834448, "grad_norm": 1.4647574188657595, "learning_rate": 4.668154773359394e-07, "loss": 0.44099801778793335, "step": 6030 }, { "epoch": 1.3905925755130275, "grad_norm": 1.6731498815474952, "learning_rate": 4.6649296947390314e-07, "loss": 0.4965481162071228, "step": 6031 }, { "epoch": 1.39082314964261, "grad_norm": 1.6621123973009748, "learning_rate": 4.6617053916252116e-07, "loss": 0.4085753262042999, "step": 6032 }, { "epoch": 1.3910537237721927, "grad_norm": 1.473260966023028, "learning_rate": 4.6584818644866106e-07, "loss": 0.3768424391746521, "step": 6033 }, { "epoch": 1.3912842979017754, "grad_norm": 1.7152094772871185, "learning_rate": 4.6552591137918087e-07, "loss": 0.4330044388771057, "step": 6034 }, { "epoch": 1.3915148720313582, "grad_norm": 1.5907700374750249, "learning_rate": 4.6520371400092584e-07, "loss": 0.4669216275215149, "step": 6035 }, { "epoch": 1.3917454461609409, "grad_norm": 1.8634085835731031, "learning_rate": 4.648815943607314e-07, "loss": 0.5491182208061218, "step": 6036 }, { "epoch": 1.3919760202905234, "grad_norm": 1.439715262819595, "learning_rate": 4.6455955250542e-07, "loss": 0.4842255413532257, "step": 6037 }, { "epoch": 1.392206594420106, "grad_norm": 1.598726710739168, "learning_rate": 4.6423758848180427e-07, "loss": 0.45479631423950195, "step": 6038 }, { "epoch": 1.3924371685496888, "grad_norm": 1.5770365297702393, "learning_rate": 4.6391570233668486e-07, "loss": 0.4209587574005127, "step": 6039 }, { "epoch": 1.3926677426792713, "grad_norm": 1.4722680740741498, "learning_rate": 4.6359389411685145e-07, "loss": 0.5061464905738831, "step": 6040 }, { "epoch": 1.392898316808854, "grad_norm": 1.5166334201375402, "learning_rate": 4.6327216386908196e-07, "loss": 0.39443570375442505, "step": 6041 }, { "epoch": 1.3931288909384367, "grad_norm": 1.6936024892202146, "learning_rate": 4.6295051164014256e-07, "loss": 0.4784463942050934, "step": 6042 }, { "epoch": 1.3933594650680194, "grad_norm": 1.623401531095956, "learning_rate": 4.6262893747678957e-07, "loss": 0.41256606578826904, "step": 6043 }, { "epoch": 1.3935900391976022, "grad_norm": 1.430742297932055, "learning_rate": 4.623074414257662e-07, "loss": 0.4507666230201721, "step": 6044 }, { "epoch": 1.3938206133271847, "grad_norm": 1.4646678303979026, "learning_rate": 4.6198602353380545e-07, "loss": 0.3783376216888428, "step": 6045 }, { "epoch": 1.3940511874567674, "grad_norm": 1.5485119918407955, "learning_rate": 4.616646838476289e-07, "loss": 0.47854840755462646, "step": 6046 }, { "epoch": 1.39428176158635, "grad_norm": 1.506150277535636, "learning_rate": 4.6134342241394685e-07, "loss": 0.47121208906173706, "step": 6047 }, { "epoch": 1.3945123357159326, "grad_norm": 1.4779397331062858, "learning_rate": 4.610222392794569e-07, "loss": 0.5211559534072876, "step": 6048 }, { "epoch": 1.3947429098455153, "grad_norm": 2.0522570691736606, "learning_rate": 4.6070113449084747e-07, "loss": 0.5846370458602905, "step": 6049 }, { "epoch": 1.394973483975098, "grad_norm": 1.6651959806589232, "learning_rate": 4.6038010809479365e-07, "loss": 0.4787401854991913, "step": 6050 }, { "epoch": 1.3952040581046807, "grad_norm": 1.336725780471279, "learning_rate": 4.600591601379596e-07, "loss": 0.36429738998413086, "step": 6051 }, { "epoch": 1.3954346322342635, "grad_norm": 1.606284081701607, "learning_rate": 4.597382906669992e-07, "loss": 0.49923771619796753, "step": 6052 }, { "epoch": 1.395665206363846, "grad_norm": 1.5476584348847333, "learning_rate": 4.5941749972855326e-07, "loss": 0.408005028963089, "step": 6053 }, { "epoch": 1.3958957804934287, "grad_norm": 1.72927604568786, "learning_rate": 4.590967873692523e-07, "loss": 0.4524402618408203, "step": 6054 }, { "epoch": 1.3961263546230114, "grad_norm": 1.5041096845532136, "learning_rate": 4.587761536357152e-07, "loss": 0.5264980792999268, "step": 6055 }, { "epoch": 1.3963569287525939, "grad_norm": 1.6066275699787076, "learning_rate": 4.5845559857454976e-07, "loss": 0.5324279069900513, "step": 6056 }, { "epoch": 1.3965875028821766, "grad_norm": 1.4996065290876746, "learning_rate": 4.581351222323511e-07, "loss": 0.5197574496269226, "step": 6057 }, { "epoch": 1.3968180770117593, "grad_norm": 1.6418756331716369, "learning_rate": 4.578147246557043e-07, "loss": 0.4549001157283783, "step": 6058 }, { "epoch": 1.397048651141342, "grad_norm": 1.374490396915421, "learning_rate": 4.5749440589118183e-07, "loss": 0.38597673177719116, "step": 6059 }, { "epoch": 1.3972792252709247, "grad_norm": 1.3707652210777583, "learning_rate": 4.57174165985346e-07, "loss": 0.4104316532611847, "step": 6060 }, { "epoch": 1.3975097994005072, "grad_norm": 1.7242255092716443, "learning_rate": 4.5685400498474614e-07, "loss": 0.5241787433624268, "step": 6061 }, { "epoch": 1.39774037353009, "grad_norm": 1.668574015144598, "learning_rate": 4.565339229359213e-07, "loss": 0.5033289790153503, "step": 6062 }, { "epoch": 1.3979709476596724, "grad_norm": 1.3309384356199967, "learning_rate": 4.5621391988539894e-07, "loss": 0.436188280582428, "step": 6063 }, { "epoch": 1.3982015217892552, "grad_norm": 1.4783680897212301, "learning_rate": 4.5589399587969414e-07, "loss": 0.3885838985443115, "step": 6064 }, { "epoch": 1.3984320959188379, "grad_norm": 1.6395174483956128, "learning_rate": 4.555741509653116e-07, "loss": 0.5140193104743958, "step": 6065 }, { "epoch": 1.3986626700484206, "grad_norm": 1.360236032045127, "learning_rate": 4.552543851887436e-07, "loss": 0.41084468364715576, "step": 6066 }, { "epoch": 1.3988932441780033, "grad_norm": 1.417896120601143, "learning_rate": 4.549346985964718e-07, "loss": 0.3606417179107666, "step": 6067 }, { "epoch": 1.3991238183075858, "grad_norm": 1.5212574193639694, "learning_rate": 4.546150912349653e-07, "loss": 0.48518556356430054, "step": 6068 }, { "epoch": 1.3993543924371685, "grad_norm": 1.6821671640024862, "learning_rate": 4.5429556315068264e-07, "loss": 0.5394424200057983, "step": 6069 }, { "epoch": 1.3995849665667512, "grad_norm": 1.3734997636022714, "learning_rate": 4.539761143900708e-07, "loss": 0.40272367000579834, "step": 6070 }, { "epoch": 1.3998155406963337, "grad_norm": 1.6175896107942709, "learning_rate": 4.536567449995641e-07, "loss": 0.4279879331588745, "step": 6071 }, { "epoch": 1.4000461148259165, "grad_norm": 1.4620694447822713, "learning_rate": 4.5333745502558695e-07, "loss": 0.48560982942581177, "step": 6072 }, { "epoch": 1.4002766889554992, "grad_norm": 1.7184355426607418, "learning_rate": 4.530182445145506e-07, "loss": 0.49256429076194763, "step": 6073 }, { "epoch": 1.4005072630850819, "grad_norm": 1.4236944961072253, "learning_rate": 4.5269911351285614e-07, "loss": 0.5015553832054138, "step": 6074 }, { "epoch": 1.4007378372146646, "grad_norm": 1.4505255602543088, "learning_rate": 4.5238006206689204e-07, "loss": 0.4313800632953644, "step": 6075 }, { "epoch": 1.400968411344247, "grad_norm": 1.311079736416616, "learning_rate": 4.520610902230363e-07, "loss": 0.3440586030483246, "step": 6076 }, { "epoch": 1.4011989854738298, "grad_norm": 1.4064686390113332, "learning_rate": 4.517421980276538e-07, "loss": 0.43868017196655273, "step": 6077 }, { "epoch": 1.4014295596034125, "grad_norm": 1.6307364330463041, "learning_rate": 4.5142338552709923e-07, "loss": 0.5581029057502747, "step": 6078 }, { "epoch": 1.401660133732995, "grad_norm": 1.6962393590938891, "learning_rate": 4.5110465276771524e-07, "loss": 0.4543154835700989, "step": 6079 }, { "epoch": 1.4018907078625777, "grad_norm": 1.5554679193557313, "learning_rate": 4.507859997958333e-07, "loss": 0.5229466557502747, "step": 6080 }, { "epoch": 1.4021212819921605, "grad_norm": 1.5285075075955497, "learning_rate": 4.504674266577724e-07, "loss": 0.46781739592552185, "step": 6081 }, { "epoch": 1.4023518561217432, "grad_norm": 1.6198419428344395, "learning_rate": 4.5014893339983993e-07, "loss": 0.48040711879730225, "step": 6082 }, { "epoch": 1.402582430251326, "grad_norm": 1.5279313939865138, "learning_rate": 4.49830520068333e-07, "loss": 0.5039708018302917, "step": 6083 }, { "epoch": 1.4028130043809084, "grad_norm": 1.4998739241266676, "learning_rate": 4.495121867095354e-07, "loss": 0.43496155738830566, "step": 6084 }, { "epoch": 1.403043578510491, "grad_norm": 1.3838778339679694, "learning_rate": 4.4919393336972045e-07, "loss": 0.4603109061717987, "step": 6085 }, { "epoch": 1.4032741526400738, "grad_norm": 1.476085268646584, "learning_rate": 4.488757600951496e-07, "loss": 0.4571962356567383, "step": 6086 }, { "epoch": 1.4035047267696563, "grad_norm": 1.4791952167701867, "learning_rate": 4.485576669320729e-07, "loss": 0.46302443742752075, "step": 6087 }, { "epoch": 1.403735300899239, "grad_norm": 1.675302072516594, "learning_rate": 4.482396539267275e-07, "loss": 0.39066869020462036, "step": 6088 }, { "epoch": 1.4039658750288218, "grad_norm": 1.704176039322231, "learning_rate": 4.4792172112534076e-07, "loss": 0.4797130823135376, "step": 6089 }, { "epoch": 1.4041964491584045, "grad_norm": 1.5835144658620484, "learning_rate": 4.4760386857412704e-07, "loss": 0.4578198492527008, "step": 6090 }, { "epoch": 1.4044270232879872, "grad_norm": 1.3987211085891795, "learning_rate": 4.472860963192889e-07, "loss": 0.40768736600875854, "step": 6091 }, { "epoch": 1.4046575974175697, "grad_norm": 1.4530633567004236, "learning_rate": 4.4696840440701846e-07, "loss": 0.4201413094997406, "step": 6092 }, { "epoch": 1.4048881715471524, "grad_norm": 1.3648395822246437, "learning_rate": 4.466507928834951e-07, "loss": 0.45901796221733093, "step": 6093 }, { "epoch": 1.4051187456767351, "grad_norm": 1.6465847208416895, "learning_rate": 4.463332617948874e-07, "loss": 0.4699435830116272, "step": 6094 }, { "epoch": 1.4053493198063176, "grad_norm": 1.4755445259366653, "learning_rate": 4.46015811187351e-07, "loss": 0.4526669383049011, "step": 6095 }, { "epoch": 1.4055798939359003, "grad_norm": 1.5721685230021194, "learning_rate": 4.456984411070313e-07, "loss": 0.46754884719848633, "step": 6096 }, { "epoch": 1.405810468065483, "grad_norm": 2.1874728205075495, "learning_rate": 4.453811516000604e-07, "loss": 0.5119268894195557, "step": 6097 }, { "epoch": 1.4060410421950658, "grad_norm": 2.056110026644097, "learning_rate": 4.4506394271256043e-07, "loss": 0.42980802059173584, "step": 6098 }, { "epoch": 1.4062716163246485, "grad_norm": 1.5339161636381375, "learning_rate": 4.447468144906401e-07, "loss": 0.5895063281059265, "step": 6099 }, { "epoch": 1.406502190454231, "grad_norm": 1.3796241305160553, "learning_rate": 4.4442976698039803e-07, "loss": 0.42768803238868713, "step": 6100 }, { "epoch": 1.4067327645838137, "grad_norm": 1.608854909074267, "learning_rate": 4.4411280022791943e-07, "loss": 0.44234544038772583, "step": 6101 }, { "epoch": 1.4069633387133964, "grad_norm": 1.3028889839673445, "learning_rate": 4.437959142792791e-07, "loss": 0.4382736086845398, "step": 6102 }, { "epoch": 1.407193912842979, "grad_norm": 1.6088674485493302, "learning_rate": 4.4347910918054e-07, "loss": 0.47603681683540344, "step": 6103 }, { "epoch": 1.4074244869725616, "grad_norm": 1.8816511615485159, "learning_rate": 4.431623849777522e-07, "loss": 0.5562035441398621, "step": 6104 }, { "epoch": 1.4076550611021443, "grad_norm": 2.2517510056002763, "learning_rate": 4.4284574171695535e-07, "loss": 0.4153141677379608, "step": 6105 }, { "epoch": 1.407885635231727, "grad_norm": 1.2534764690727898, "learning_rate": 4.425291794441762e-07, "loss": 0.4825887680053711, "step": 6106 }, { "epoch": 1.4081162093613098, "grad_norm": 1.4829126230878127, "learning_rate": 4.4221269820543104e-07, "loss": 0.4853668808937073, "step": 6107 }, { "epoch": 1.4083467834908923, "grad_norm": 1.6140810272295893, "learning_rate": 4.418962980467229e-07, "loss": 0.5615251064300537, "step": 6108 }, { "epoch": 1.408577357620475, "grad_norm": 1.8397680714752904, "learning_rate": 4.4157997901404396e-07, "loss": 0.38605546951293945, "step": 6109 }, { "epoch": 1.4088079317500577, "grad_norm": 1.412066772348378, "learning_rate": 4.412637411533745e-07, "loss": 0.41582173109054565, "step": 6110 }, { "epoch": 1.4090385058796402, "grad_norm": 1.4963267141581975, "learning_rate": 4.4094758451068327e-07, "loss": 0.38091376423835754, "step": 6111 }, { "epoch": 1.409269080009223, "grad_norm": 1.5465721612260863, "learning_rate": 4.4063150913192635e-07, "loss": 0.43319058418273926, "step": 6112 }, { "epoch": 1.4094996541388056, "grad_norm": 1.2123497825560654, "learning_rate": 4.403155150630484e-07, "loss": 0.43207013607025146, "step": 6113 }, { "epoch": 1.4097302282683883, "grad_norm": 1.7217391258871346, "learning_rate": 4.399996023499829e-07, "loss": 0.43750250339508057, "step": 6114 }, { "epoch": 1.409960802397971, "grad_norm": 1.5123653802002535, "learning_rate": 4.3968377103865016e-07, "loss": 0.44084444642066956, "step": 6115 }, { "epoch": 1.4101913765275536, "grad_norm": 1.4135580211481893, "learning_rate": 4.3936802117495997e-07, "loss": 0.4752010405063629, "step": 6116 }, { "epoch": 1.4104219506571363, "grad_norm": 1.384945744446678, "learning_rate": 4.390523528048098e-07, "loss": 0.39239025115966797, "step": 6117 }, { "epoch": 1.410652524786719, "grad_norm": 1.7179287290824201, "learning_rate": 4.387367659740856e-07, "loss": 0.46021080017089844, "step": 6118 }, { "epoch": 1.4108830989163015, "grad_norm": 1.3751290560349647, "learning_rate": 4.3842126072866014e-07, "loss": 0.4079766571521759, "step": 6119 }, { "epoch": 1.4111136730458842, "grad_norm": 1.5182170234243058, "learning_rate": 4.381058371143964e-07, "loss": 0.4922672510147095, "step": 6120 }, { "epoch": 1.411344247175467, "grad_norm": 1.5200373777326295, "learning_rate": 4.377904951771438e-07, "loss": 0.3950929045677185, "step": 6121 }, { "epoch": 1.4115748213050496, "grad_norm": 1.6189013836504815, "learning_rate": 4.374752349627402e-07, "loss": 0.503406286239624, "step": 6122 }, { "epoch": 1.4118053954346323, "grad_norm": 1.724327270706253, "learning_rate": 4.3716005651701215e-07, "loss": 0.49198317527770996, "step": 6123 }, { "epoch": 1.4120359695642148, "grad_norm": 1.424527206510087, "learning_rate": 4.368449598857742e-07, "loss": 0.47396305203437805, "step": 6124 }, { "epoch": 1.4122665436937976, "grad_norm": 1.7537535213801698, "learning_rate": 4.365299451148291e-07, "loss": 0.5248152017593384, "step": 6125 }, { "epoch": 1.4124971178233803, "grad_norm": 1.310814657820865, "learning_rate": 4.362150122499666e-07, "loss": 0.44327419996261597, "step": 6126 }, { "epoch": 1.4127276919529628, "grad_norm": 1.5885906377106098, "learning_rate": 4.3590016133696626e-07, "loss": 0.4628877639770508, "step": 6127 }, { "epoch": 1.4129582660825455, "grad_norm": 1.5166490469327556, "learning_rate": 4.355853924215942e-07, "loss": 0.5277193188667297, "step": 6128 }, { "epoch": 1.4131888402121282, "grad_norm": 1.6202759290555122, "learning_rate": 4.3527070554960577e-07, "loss": 0.4675426781177521, "step": 6129 }, { "epoch": 1.413419414341711, "grad_norm": 1.668904355836008, "learning_rate": 4.349561007667433e-07, "loss": 0.3762160539627075, "step": 6130 }, { "epoch": 1.4136499884712936, "grad_norm": 1.5686457690092273, "learning_rate": 4.346415781187385e-07, "loss": 0.4797256588935852, "step": 6131 }, { "epoch": 1.4138805626008761, "grad_norm": 1.283129438483415, "learning_rate": 4.3432713765130967e-07, "loss": 0.4348931312561035, "step": 6132 }, { "epoch": 1.4141111367304589, "grad_norm": 1.72495987311985, "learning_rate": 4.3401277941016435e-07, "loss": 0.5080585479736328, "step": 6133 }, { "epoch": 1.4143417108600416, "grad_norm": 1.5083246190317607, "learning_rate": 4.33698503440998e-07, "loss": 0.40223604440689087, "step": 6134 }, { "epoch": 1.414572284989624, "grad_norm": 1.5888336584861464, "learning_rate": 4.3338430978949315e-07, "loss": 0.4460202753543854, "step": 6135 }, { "epoch": 1.4148028591192068, "grad_norm": 1.6992292342961226, "learning_rate": 4.3307019850132167e-07, "loss": 0.5814889669418335, "step": 6136 }, { "epoch": 1.4150334332487895, "grad_norm": 1.366462724450419, "learning_rate": 4.3275616962214214e-07, "loss": 0.39237886667251587, "step": 6137 }, { "epoch": 1.4152640073783722, "grad_norm": 1.8844588932900945, "learning_rate": 4.324422231976025e-07, "loss": 0.4621772766113281, "step": 6138 }, { "epoch": 1.415494581507955, "grad_norm": 1.2090393738968102, "learning_rate": 4.3212835927333745e-07, "loss": 0.3722139596939087, "step": 6139 }, { "epoch": 1.4157251556375374, "grad_norm": 1.4849768206374545, "learning_rate": 4.3181457789497055e-07, "loss": 0.5007534623146057, "step": 6140 }, { "epoch": 1.4159557297671201, "grad_norm": 1.603501037396303, "learning_rate": 4.315008791081135e-07, "loss": 0.470672607421875, "step": 6141 }, { "epoch": 1.4161863038967029, "grad_norm": 1.6882048347200689, "learning_rate": 4.3118726295836495e-07, "loss": 0.5196114778518677, "step": 6142 }, { "epoch": 1.4164168780262854, "grad_norm": 1.686399785386393, "learning_rate": 4.3087372949131275e-07, "loss": 0.4606804847717285, "step": 6143 }, { "epoch": 1.416647452155868, "grad_norm": 1.2427386262927842, "learning_rate": 4.3056027875253156e-07, "loss": 0.3926661014556885, "step": 6144 }, { "epoch": 1.4168780262854508, "grad_norm": 1.5075319697699416, "learning_rate": 4.3024691078758536e-07, "loss": 0.4570828080177307, "step": 6145 }, { "epoch": 1.4171086004150335, "grad_norm": 1.4876286685500335, "learning_rate": 4.299336256420245e-07, "loss": 0.398615300655365, "step": 6146 }, { "epoch": 1.4173391745446162, "grad_norm": 1.5413174329970663, "learning_rate": 4.2962042336138873e-07, "loss": 0.47571802139282227, "step": 6147 }, { "epoch": 1.4175697486741987, "grad_norm": 1.5960399575320494, "learning_rate": 4.2930730399120487e-07, "loss": 0.4266431927680969, "step": 6148 }, { "epoch": 1.4178003228037814, "grad_norm": 1.5511638894349447, "learning_rate": 4.289942675769886e-07, "loss": 0.47870057821273804, "step": 6149 }, { "epoch": 1.4180308969333641, "grad_norm": 1.3514029969532406, "learning_rate": 4.2868131416424223e-07, "loss": 0.3947669267654419, "step": 6150 }, { "epoch": 1.4182614710629466, "grad_norm": 1.6045441623823578, "learning_rate": 4.283684437984573e-07, "loss": 0.49074164032936096, "step": 6151 }, { "epoch": 1.4184920451925294, "grad_norm": 1.5267380397937564, "learning_rate": 4.280556565251123e-07, "loss": 0.5540445446968079, "step": 6152 }, { "epoch": 1.418722619322112, "grad_norm": 1.4292058799019856, "learning_rate": 4.2774295238967386e-07, "loss": 0.4898286461830139, "step": 6153 }, { "epoch": 1.4189531934516948, "grad_norm": 1.5872207462828773, "learning_rate": 4.2743033143759733e-07, "loss": 0.5432708859443665, "step": 6154 }, { "epoch": 1.4191837675812775, "grad_norm": 1.811563729099354, "learning_rate": 4.2711779371432445e-07, "loss": 0.4438853859901428, "step": 6155 }, { "epoch": 1.41941434171086, "grad_norm": 1.4197202159023756, "learning_rate": 4.268053392652863e-07, "loss": 0.4885905385017395, "step": 6156 }, { "epoch": 1.4196449158404427, "grad_norm": 2.10234923243058, "learning_rate": 4.264929681359013e-07, "loss": 0.4465547204017639, "step": 6157 }, { "epoch": 1.4198754899700254, "grad_norm": 1.5987256760741122, "learning_rate": 4.2618068037157594e-07, "loss": 0.4392780661582947, "step": 6158 }, { "epoch": 1.420106064099608, "grad_norm": 1.7421664904589054, "learning_rate": 4.258684760177039e-07, "loss": 0.4501269459724426, "step": 6159 }, { "epoch": 1.4203366382291907, "grad_norm": 1.399976858224263, "learning_rate": 4.2555635511966783e-07, "loss": 0.38439738750457764, "step": 6160 }, { "epoch": 1.4205672123587734, "grad_norm": 1.4211214514262747, "learning_rate": 4.2524431772283743e-07, "loss": 0.4679202437400818, "step": 6161 }, { "epoch": 1.420797786488356, "grad_norm": 1.3094843029172225, "learning_rate": 4.2493236387257e-07, "loss": 0.33505773544311523, "step": 6162 }, { "epoch": 1.4210283606179388, "grad_norm": 1.7083049967506945, "learning_rate": 4.246204936142116e-07, "loss": 0.39141514897346497, "step": 6163 }, { "epoch": 1.4212589347475213, "grad_norm": 1.5786326298364493, "learning_rate": 4.243087069930958e-07, "loss": 0.49278295040130615, "step": 6164 }, { "epoch": 1.421489508877104, "grad_norm": 2.2314439595882214, "learning_rate": 4.239970040545442e-07, "loss": 0.44093143939971924, "step": 6165 }, { "epoch": 1.4217200830066867, "grad_norm": 1.5138193694081605, "learning_rate": 4.236853848438654e-07, "loss": 0.3840683102607727, "step": 6166 }, { "epoch": 1.4219506571362692, "grad_norm": 1.7654139979291832, "learning_rate": 4.23373849406357e-07, "loss": 0.49814748764038086, "step": 6167 }, { "epoch": 1.422181231265852, "grad_norm": 1.672205831624779, "learning_rate": 4.2306239778730314e-07, "loss": 0.37481504678726196, "step": 6168 }, { "epoch": 1.4224118053954347, "grad_norm": 1.6089555356775624, "learning_rate": 4.227510300319772e-07, "loss": 0.3936859965324402, "step": 6169 }, { "epoch": 1.4226423795250174, "grad_norm": 1.6958111197730896, "learning_rate": 4.224397461856389e-07, "loss": 0.4448816478252411, "step": 6170 }, { "epoch": 1.4228729536546, "grad_norm": 1.7506080980818486, "learning_rate": 4.22128546293537e-07, "loss": 0.5494886040687561, "step": 6171 }, { "epoch": 1.4231035277841826, "grad_norm": 1.6093955633210433, "learning_rate": 4.218174304009078e-07, "loss": 0.4532161355018616, "step": 6172 }, { "epoch": 1.4233341019137653, "grad_norm": 1.5423276922709723, "learning_rate": 4.215063985529743e-07, "loss": 0.4771450161933899, "step": 6173 }, { "epoch": 1.4235646760433478, "grad_norm": 1.4359456178719159, "learning_rate": 4.211954507949491e-07, "loss": 0.40784329175949097, "step": 6174 }, { "epoch": 1.4237952501729305, "grad_norm": 1.6548161498628766, "learning_rate": 4.208845871720308e-07, "loss": 0.5336268544197083, "step": 6175 }, { "epoch": 1.4240258243025132, "grad_norm": 1.495644640745375, "learning_rate": 4.205738077294072e-07, "loss": 0.44641751050949097, "step": 6176 }, { "epoch": 1.424256398432096, "grad_norm": 1.650188328042211, "learning_rate": 4.2026311251225264e-07, "loss": 0.4370793104171753, "step": 6177 }, { "epoch": 1.4244869725616787, "grad_norm": 1.5423618719597711, "learning_rate": 4.1995250156573046e-07, "loss": 0.4290730953216553, "step": 6178 }, { "epoch": 1.4247175466912612, "grad_norm": 1.8757556733756044, "learning_rate": 4.196419749349904e-07, "loss": 0.5021491646766663, "step": 6179 }, { "epoch": 1.4249481208208439, "grad_norm": 1.4243786827618563, "learning_rate": 4.193315326651711e-07, "loss": 0.3880186080932617, "step": 6180 }, { "epoch": 1.4251786949504266, "grad_norm": 1.6032235222838507, "learning_rate": 4.1902117480139876e-07, "loss": 0.46498721837997437, "step": 6181 }, { "epoch": 1.425409269080009, "grad_norm": 1.6074916356613946, "learning_rate": 4.187109013887863e-07, "loss": 0.45799821615219116, "step": 6182 }, { "epoch": 1.4256398432095918, "grad_norm": 1.7936327965955485, "learning_rate": 4.1840071247243594e-07, "loss": 0.47459733486175537, "step": 6183 }, { "epoch": 1.4258704173391745, "grad_norm": 1.7628830057109544, "learning_rate": 4.18090608097436e-07, "loss": 0.47636276483535767, "step": 6184 }, { "epoch": 1.4261009914687572, "grad_norm": 1.4575388433663756, "learning_rate": 4.17780588308864e-07, "loss": 0.4710165858268738, "step": 6185 }, { "epoch": 1.42633156559834, "grad_norm": 1.6068491390352067, "learning_rate": 4.174706531517836e-07, "loss": 0.4222904443740845, "step": 6186 }, { "epoch": 1.4265621397279225, "grad_norm": 1.6136307494472921, "learning_rate": 4.171608026712476e-07, "loss": 0.43496620655059814, "step": 6187 }, { "epoch": 1.4267927138575052, "grad_norm": 1.6637888441260775, "learning_rate": 4.1685103691229597e-07, "loss": 0.5178344249725342, "step": 6188 }, { "epoch": 1.4270232879870879, "grad_norm": 1.2438461713878222, "learning_rate": 4.1654135591995644e-07, "loss": 0.4033231735229492, "step": 6189 }, { "epoch": 1.4272538621166704, "grad_norm": 1.6711330724791171, "learning_rate": 4.162317597392436e-07, "loss": 0.3368793725967407, "step": 6190 }, { "epoch": 1.427484436246253, "grad_norm": 1.6185157962363963, "learning_rate": 4.159222484151612e-07, "loss": 0.44133609533309937, "step": 6191 }, { "epoch": 1.4277150103758358, "grad_norm": 1.4778493402771002, "learning_rate": 4.1561282199269944e-07, "loss": 0.431888222694397, "step": 6192 }, { "epoch": 1.4279455845054185, "grad_norm": 1.6042487363335018, "learning_rate": 4.1530348051683615e-07, "loss": 0.4319697618484497, "step": 6193 }, { "epoch": 1.4281761586350012, "grad_norm": 2.1012743912812986, "learning_rate": 4.1499422403253783e-07, "loss": 0.5468018054962158, "step": 6194 }, { "epoch": 1.4284067327645837, "grad_norm": 1.5851271799276925, "learning_rate": 4.1468505258475784e-07, "loss": 0.5083246231079102, "step": 6195 }, { "epoch": 1.4286373068941665, "grad_norm": 1.5639019523203612, "learning_rate": 4.1437596621843774e-07, "loss": 0.3767821788787842, "step": 6196 }, { "epoch": 1.4288678810237492, "grad_norm": 1.7459586887034657, "learning_rate": 4.140669649785058e-07, "loss": 0.5210238099098206, "step": 6197 }, { "epoch": 1.4290984551533317, "grad_norm": 1.7429606479800976, "learning_rate": 4.1375804890987907e-07, "loss": 0.4498119354248047, "step": 6198 }, { "epoch": 1.4293290292829144, "grad_norm": 1.8267093368864302, "learning_rate": 4.134492180574609e-07, "loss": 0.5093557238578796, "step": 6199 }, { "epoch": 1.429559603412497, "grad_norm": 1.422406352052411, "learning_rate": 4.131404724661438e-07, "loss": 0.4745742082595825, "step": 6200 }, { "epoch": 1.4297901775420798, "grad_norm": 1.506088588333767, "learning_rate": 4.128318121808068e-07, "loss": 0.45697301626205444, "step": 6201 }, { "epoch": 1.4300207516716625, "grad_norm": 1.7309660786915744, "learning_rate": 4.125232372463161e-07, "loss": 0.4690994918346405, "step": 6202 }, { "epoch": 1.430251325801245, "grad_norm": 1.6241026421208185, "learning_rate": 4.1221474770752696e-07, "loss": 0.49369046092033386, "step": 6203 }, { "epoch": 1.4304818999308277, "grad_norm": 1.573925179309737, "learning_rate": 4.1190634360928113e-07, "loss": 0.5137126445770264, "step": 6204 }, { "epoch": 1.4307124740604105, "grad_norm": 1.492371449937338, "learning_rate": 4.1159802499640883e-07, "loss": 0.43663549423217773, "step": 6205 }, { "epoch": 1.430943048189993, "grad_norm": 1.373244593865611, "learning_rate": 4.112897919137265e-07, "loss": 0.40197718143463135, "step": 6206 }, { "epoch": 1.4311736223195757, "grad_norm": 1.782636444844866, "learning_rate": 4.1098164440603967e-07, "loss": 0.5537480115890503, "step": 6207 }, { "epoch": 1.4314041964491584, "grad_norm": 1.415124349915093, "learning_rate": 4.1067358251814e-07, "loss": 0.36077365279197693, "step": 6208 }, { "epoch": 1.4316347705787411, "grad_norm": 1.8848844116732066, "learning_rate": 4.103656062948081e-07, "loss": 0.5421038866043091, "step": 6209 }, { "epoch": 1.4318653447083238, "grad_norm": 1.5989095555214856, "learning_rate": 4.100577157808107e-07, "loss": 0.4330317974090576, "step": 6210 }, { "epoch": 1.4320959188379063, "grad_norm": 1.5778977933757077, "learning_rate": 4.0974991102090315e-07, "loss": 0.4734618067741394, "step": 6211 }, { "epoch": 1.432326492967489, "grad_norm": 1.7307541730622933, "learning_rate": 4.0944219205982853e-07, "loss": 0.4664125442504883, "step": 6212 }, { "epoch": 1.4325570670970718, "grad_norm": 1.5163510968488794, "learning_rate": 4.09134558942316e-07, "loss": 0.5214053988456726, "step": 6213 }, { "epoch": 1.4327876412266543, "grad_norm": 1.4446024999002893, "learning_rate": 4.08827011713084e-07, "loss": 0.4694370627403259, "step": 6214 }, { "epoch": 1.433018215356237, "grad_norm": 1.4399092047479434, "learning_rate": 4.0851955041683674e-07, "loss": 0.46517378091812134, "step": 6215 }, { "epoch": 1.4332487894858197, "grad_norm": 1.589744461016997, "learning_rate": 4.0821217509826766e-07, "loss": 0.49152523279190063, "step": 6216 }, { "epoch": 1.4334793636154024, "grad_norm": 1.3335404796705832, "learning_rate": 4.0790488580205616e-07, "loss": 0.4272884726524353, "step": 6217 }, { "epoch": 1.4337099377449851, "grad_norm": 1.7167989658225775, "learning_rate": 4.075976825728703e-07, "loss": 0.4585829973220825, "step": 6218 }, { "epoch": 1.4339405118745676, "grad_norm": 1.4284884424474726, "learning_rate": 4.07290565455365e-07, "loss": 0.33463186025619507, "step": 6219 }, { "epoch": 1.4341710860041503, "grad_norm": 1.618873724040505, "learning_rate": 4.0698353449418344e-07, "loss": 0.4228953719139099, "step": 6220 }, { "epoch": 1.434401660133733, "grad_norm": 1.688194150248175, "learning_rate": 4.066765897339547e-07, "loss": 0.5336583256721497, "step": 6221 }, { "epoch": 1.4346322342633155, "grad_norm": 1.590308662997971, "learning_rate": 4.063697312192972e-07, "loss": 0.4779771864414215, "step": 6222 }, { "epoch": 1.4348628083928983, "grad_norm": 1.4786534556099964, "learning_rate": 4.060629589948155e-07, "loss": 0.35226666927337646, "step": 6223 }, { "epoch": 1.435093382522481, "grad_norm": 1.7110004239307235, "learning_rate": 4.0575627310510174e-07, "loss": 0.5006309747695923, "step": 6224 }, { "epoch": 1.4353239566520637, "grad_norm": 1.5102552970375984, "learning_rate": 4.0544967359473645e-07, "loss": 0.3925382196903229, "step": 6225 }, { "epoch": 1.4355545307816464, "grad_norm": 1.4323897305301354, "learning_rate": 4.0514316050828643e-07, "loss": 0.3443659543991089, "step": 6226 }, { "epoch": 1.435785104911229, "grad_norm": 1.3832333833383677, "learning_rate": 4.048367338903067e-07, "loss": 0.35585030913352966, "step": 6227 }, { "epoch": 1.4360156790408116, "grad_norm": 1.551815991519559, "learning_rate": 4.045303937853395e-07, "loss": 0.4147206246852875, "step": 6228 }, { "epoch": 1.4362462531703943, "grad_norm": 1.2817256800052734, "learning_rate": 4.0422414023791486e-07, "loss": 0.4475427567958832, "step": 6229 }, { "epoch": 1.4364768272999768, "grad_norm": 1.3842198366935599, "learning_rate": 4.0391797329254897e-07, "loss": 0.5235386490821838, "step": 6230 }, { "epoch": 1.4367074014295595, "grad_norm": 1.4929978689012695, "learning_rate": 4.036118929937472e-07, "loss": 0.3543087840080261, "step": 6231 }, { "epoch": 1.4369379755591423, "grad_norm": 1.793735853632873, "learning_rate": 4.03305899386001e-07, "loss": 0.4718255400657654, "step": 6232 }, { "epoch": 1.437168549688725, "grad_norm": 1.338180352532036, "learning_rate": 4.0299999251378924e-07, "loss": 0.41239792108535767, "step": 6233 }, { "epoch": 1.4373991238183077, "grad_norm": 1.5900128771725797, "learning_rate": 4.026941724215791e-07, "loss": 0.4241238236427307, "step": 6234 }, { "epoch": 1.4376296979478902, "grad_norm": 1.4625134538700348, "learning_rate": 4.0238843915382435e-07, "loss": 0.43678992986679077, "step": 6235 }, { "epoch": 1.437860272077473, "grad_norm": 1.3845075397304552, "learning_rate": 4.0208279275496706e-07, "loss": 0.4304202198982239, "step": 6236 }, { "epoch": 1.4380908462070556, "grad_norm": 1.4379971371115365, "learning_rate": 4.0177723326943516e-07, "loss": 0.4297143816947937, "step": 6237 }, { "epoch": 1.4383214203366381, "grad_norm": 1.4713452003345164, "learning_rate": 4.0147176074164557e-07, "loss": 0.4823951721191406, "step": 6238 }, { "epoch": 1.4385519944662208, "grad_norm": 1.4766475893290447, "learning_rate": 4.0116637521600104e-07, "loss": 0.41384291648864746, "step": 6239 }, { "epoch": 1.4387825685958036, "grad_norm": 1.4772189735738515, "learning_rate": 4.008610767368933e-07, "loss": 0.5725995898246765, "step": 6240 }, { "epoch": 1.4390131427253863, "grad_norm": 1.580155865045121, "learning_rate": 4.0055586534869976e-07, "loss": 0.5222553014755249, "step": 6241 }, { "epoch": 1.439243716854969, "grad_norm": 1.3886146191032183, "learning_rate": 4.002507410957864e-07, "loss": 0.33871912956237793, "step": 6242 }, { "epoch": 1.4394742909845515, "grad_norm": 1.6215524550661136, "learning_rate": 3.9994570402250647e-07, "loss": 0.423028826713562, "step": 6243 }, { "epoch": 1.4397048651141342, "grad_norm": 1.5682836985778081, "learning_rate": 3.996407541731994e-07, "loss": 0.4235682785511017, "step": 6244 }, { "epoch": 1.439935439243717, "grad_norm": 1.231022526448631, "learning_rate": 3.993358915921936e-07, "loss": 0.43758147954940796, "step": 6245 }, { "epoch": 1.4401660133732994, "grad_norm": 1.4111669631590298, "learning_rate": 3.9903111632380314e-07, "loss": 0.4462485611438751, "step": 6246 }, { "epoch": 1.4403965875028821, "grad_norm": 1.4290246546090093, "learning_rate": 3.9872642841233086e-07, "loss": 0.4650310277938843, "step": 6247 }, { "epoch": 1.4406271616324648, "grad_norm": 1.4998946903017614, "learning_rate": 3.984218279020656e-07, "loss": 0.36653342843055725, "step": 6248 }, { "epoch": 1.4408577357620476, "grad_norm": 1.4936296304301175, "learning_rate": 3.9811731483728483e-07, "loss": 0.4102433919906616, "step": 6249 }, { "epoch": 1.4410883098916303, "grad_norm": 1.6065631349936378, "learning_rate": 3.9781288926225187e-07, "loss": 0.46611371636390686, "step": 6250 }, { "epoch": 1.4413188840212128, "grad_norm": 1.4339333577964222, "learning_rate": 3.9750855122121854e-07, "loss": 0.39757978916168213, "step": 6251 }, { "epoch": 1.4415494581507955, "grad_norm": 1.762654016187883, "learning_rate": 3.972043007584236e-07, "loss": 0.3736093044281006, "step": 6252 }, { "epoch": 1.4417800322803782, "grad_norm": 1.463877920104907, "learning_rate": 3.9690013791809243e-07, "loss": 0.4907599091529846, "step": 6253 }, { "epoch": 1.4420106064099607, "grad_norm": 1.8306810417206691, "learning_rate": 3.965960627444387e-07, "loss": 0.4852679967880249, "step": 6254 }, { "epoch": 1.4422411805395434, "grad_norm": 1.379992571943406, "learning_rate": 3.962920752816622e-07, "loss": 0.3681846261024475, "step": 6255 }, { "epoch": 1.4424717546691261, "grad_norm": 1.3930271555712797, "learning_rate": 3.9598817557395136e-07, "loss": 0.36029407382011414, "step": 6256 }, { "epoch": 1.4427023287987089, "grad_norm": 1.5468752557100751, "learning_rate": 3.9568436366548044e-07, "loss": 0.4156547486782074, "step": 6257 }, { "epoch": 1.4429329029282916, "grad_norm": 1.2893479866141693, "learning_rate": 3.9538063960041155e-07, "loss": 0.417999804019928, "step": 6258 }, { "epoch": 1.443163477057874, "grad_norm": 1.5873772931626444, "learning_rate": 3.9507700342289454e-07, "loss": 0.34347790479660034, "step": 6259 }, { "epoch": 1.4433940511874568, "grad_norm": 1.6747174695424258, "learning_rate": 3.9477345517706606e-07, "loss": 0.5093958973884583, "step": 6260 }, { "epoch": 1.4436246253170395, "grad_norm": 1.3786087360846342, "learning_rate": 3.9446999490704935e-07, "loss": 0.45406264066696167, "step": 6261 }, { "epoch": 1.443855199446622, "grad_norm": 1.4643807349818905, "learning_rate": 3.941666226569561e-07, "loss": 0.35074740648269653, "step": 6262 }, { "epoch": 1.4440857735762047, "grad_norm": 1.9209061652207753, "learning_rate": 3.9386333847088414e-07, "loss": 0.4588093161582947, "step": 6263 }, { "epoch": 1.4443163477057874, "grad_norm": 1.706957598822881, "learning_rate": 3.935601423929187e-07, "loss": 0.5431508421897888, "step": 6264 }, { "epoch": 1.4445469218353701, "grad_norm": 2.1293944579193744, "learning_rate": 3.9325703446713253e-07, "loss": 0.5942284464836121, "step": 6265 }, { "epoch": 1.4447774959649529, "grad_norm": 1.563688512589723, "learning_rate": 3.929540147375856e-07, "loss": 0.45533287525177, "step": 6266 }, { "epoch": 1.4450080700945354, "grad_norm": 1.4069649860322977, "learning_rate": 3.926510832483252e-07, "loss": 0.41154634952545166, "step": 6267 }, { "epoch": 1.445238644224118, "grad_norm": 1.7442081379649044, "learning_rate": 3.923482400433847e-07, "loss": 0.548882246017456, "step": 6268 }, { "epoch": 1.4454692183537008, "grad_norm": 1.6064445647457797, "learning_rate": 3.9204548516678635e-07, "loss": 0.4062466621398926, "step": 6269 }, { "epoch": 1.4456997924832833, "grad_norm": 1.4970160030578672, "learning_rate": 3.917428186625378e-07, "loss": 0.39035165309906006, "step": 6270 }, { "epoch": 1.445930366612866, "grad_norm": 1.647666751716306, "learning_rate": 3.9144024057463545e-07, "loss": 0.44899889826774597, "step": 6271 }, { "epoch": 1.4461609407424487, "grad_norm": 1.6865824844286113, "learning_rate": 3.911377509470616e-07, "loss": 0.5676968097686768, "step": 6272 }, { "epoch": 1.4463915148720314, "grad_norm": 1.5001442753287921, "learning_rate": 3.9083534982378596e-07, "loss": 0.5157150626182556, "step": 6273 }, { "epoch": 1.4466220890016142, "grad_norm": 1.3999116109701921, "learning_rate": 3.9053303724876595e-07, "loss": 0.4405839443206787, "step": 6274 }, { "epoch": 1.4468526631311966, "grad_norm": 1.4027072316284976, "learning_rate": 3.9023081326594564e-07, "loss": 0.4184240400791168, "step": 6275 }, { "epoch": 1.4470832372607794, "grad_norm": 1.4676581347164595, "learning_rate": 3.8992867791925687e-07, "loss": 0.46825113892555237, "step": 6276 }, { "epoch": 1.447313811390362, "grad_norm": 1.5974669468558875, "learning_rate": 3.896266312526174e-07, "loss": 0.39870697259902954, "step": 6277 }, { "epoch": 1.4475443855199446, "grad_norm": 1.5056097224989398, "learning_rate": 3.893246733099332e-07, "loss": 0.5021681785583496, "step": 6278 }, { "epoch": 1.4477749596495273, "grad_norm": 1.6448123845050522, "learning_rate": 3.890228041350966e-07, "loss": 0.5453378558158875, "step": 6279 }, { "epoch": 1.44800553377911, "grad_norm": 1.6411917622938994, "learning_rate": 3.887210237719877e-07, "loss": 0.4488704800605774, "step": 6280 }, { "epoch": 1.4482361079086927, "grad_norm": 1.5018657352386517, "learning_rate": 3.8841933226447274e-07, "loss": 0.45669007301330566, "step": 6281 }, { "epoch": 1.4484666820382754, "grad_norm": 1.704954137797073, "learning_rate": 3.881177296564061e-07, "loss": 0.43954944610595703, "step": 6282 }, { "epoch": 1.448697256167858, "grad_norm": 1.3077525799414271, "learning_rate": 3.8781621599162896e-07, "loss": 0.39490729570388794, "step": 6283 }, { "epoch": 1.4489278302974407, "grad_norm": 1.8875404119821422, "learning_rate": 3.875147913139688e-07, "loss": 0.44206392765045166, "step": 6284 }, { "epoch": 1.4491584044270232, "grad_norm": 1.5003627073617865, "learning_rate": 3.872134556672415e-07, "loss": 0.3874932527542114, "step": 6285 }, { "epoch": 1.4493889785566059, "grad_norm": 1.616983828039009, "learning_rate": 3.8691220909524847e-07, "loss": 0.4762042760848999, "step": 6286 }, { "epoch": 1.4496195526861886, "grad_norm": 1.4983771405139852, "learning_rate": 3.8661105164177955e-07, "loss": 0.45220378041267395, "step": 6287 }, { "epoch": 1.4498501268157713, "grad_norm": 1.5182044259213916, "learning_rate": 3.863099833506105e-07, "loss": 0.48711973428726196, "step": 6288 }, { "epoch": 1.450080700945354, "grad_norm": 1.795485740865634, "learning_rate": 3.8600900426550495e-07, "loss": 0.3985457420349121, "step": 6289 }, { "epoch": 1.4503112750749365, "grad_norm": 1.8111920220274738, "learning_rate": 3.8570811443021324e-07, "loss": 0.4626576006412506, "step": 6290 }, { "epoch": 1.4505418492045192, "grad_norm": 1.3056530217454654, "learning_rate": 3.8540731388847303e-07, "loss": 0.49909156560897827, "step": 6291 }, { "epoch": 1.450772423334102, "grad_norm": 1.6088418800938844, "learning_rate": 3.8510660268400853e-07, "loss": 0.47779160737991333, "step": 6292 }, { "epoch": 1.4510029974636844, "grad_norm": 1.7546373602134575, "learning_rate": 3.8480598086053073e-07, "loss": 0.41273951530456543, "step": 6293 }, { "epoch": 1.4512335715932672, "grad_norm": 1.372334717947673, "learning_rate": 3.8450544846173873e-07, "loss": 0.49659836292266846, "step": 6294 }, { "epoch": 1.4514641457228499, "grad_norm": 1.5745738888755318, "learning_rate": 3.842050055313174e-07, "loss": 0.48864418268203735, "step": 6295 }, { "epoch": 1.4516947198524326, "grad_norm": 1.5511685453466029, "learning_rate": 3.8390465211293964e-07, "loss": 0.4437263011932373, "step": 6296 }, { "epoch": 1.4519252939820153, "grad_norm": 1.425822828962689, "learning_rate": 3.83604388250264e-07, "loss": 0.4785847067832947, "step": 6297 }, { "epoch": 1.4521558681115978, "grad_norm": 1.4667204310824673, "learning_rate": 3.8330421398693815e-07, "loss": 0.4376726746559143, "step": 6298 }, { "epoch": 1.4523864422411805, "grad_norm": 1.3570227959381094, "learning_rate": 3.8300412936659456e-07, "loss": 0.39121049642562866, "step": 6299 }, { "epoch": 1.4526170163707632, "grad_norm": 1.3658035995507571, "learning_rate": 3.827041344328541e-07, "loss": 0.4635738730430603, "step": 6300 }, { "epoch": 1.4528475905003457, "grad_norm": 2.0304852722065068, "learning_rate": 3.8240422922932345e-07, "loss": 0.502306342124939, "step": 6301 }, { "epoch": 1.4530781646299284, "grad_norm": 1.4029845821737765, "learning_rate": 3.8210441379959765e-07, "loss": 0.4401247799396515, "step": 6302 }, { "epoch": 1.4533087387595112, "grad_norm": 1.3861824238158087, "learning_rate": 3.8180468818725744e-07, "loss": 0.5291532874107361, "step": 6303 }, { "epoch": 1.4535393128890939, "grad_norm": 1.6276608547131342, "learning_rate": 3.8150505243587074e-07, "loss": 0.44658181071281433, "step": 6304 }, { "epoch": 1.4537698870186766, "grad_norm": 1.6458326531407963, "learning_rate": 3.8120550658899284e-07, "loss": 0.45127803087234497, "step": 6305 }, { "epoch": 1.454000461148259, "grad_norm": 1.492007208083286, "learning_rate": 3.809060506901659e-07, "loss": 0.42187097668647766, "step": 6306 }, { "epoch": 1.4542310352778418, "grad_norm": 1.5038936507089915, "learning_rate": 3.806066847829191e-07, "loss": 0.3573130667209625, "step": 6307 }, { "epoch": 1.4544616094074245, "grad_norm": 1.9148379623538745, "learning_rate": 3.8030740891076775e-07, "loss": 0.4350733757019043, "step": 6308 }, { "epoch": 1.454692183537007, "grad_norm": 1.541900067739278, "learning_rate": 3.8000822311721526e-07, "loss": 0.48514148592948914, "step": 6309 }, { "epoch": 1.4549227576665897, "grad_norm": 1.4827947959124368, "learning_rate": 3.797091274457507e-07, "loss": 0.41036373376846313, "step": 6310 }, { "epoch": 1.4551533317961725, "grad_norm": 1.494922453363639, "learning_rate": 3.7941012193985113e-07, "loss": 0.4141424298286438, "step": 6311 }, { "epoch": 1.4553839059257552, "grad_norm": 1.273366480801725, "learning_rate": 3.7911120664297947e-07, "loss": 0.4465962052345276, "step": 6312 }, { "epoch": 1.455614480055338, "grad_norm": 1.5781844793110138, "learning_rate": 3.7881238159858653e-07, "loss": 0.42370718717575073, "step": 6313 }, { "epoch": 1.4558450541849204, "grad_norm": 1.5971127849956464, "learning_rate": 3.785136468501098e-07, "loss": 0.5199419260025024, "step": 6314 }, { "epoch": 1.456075628314503, "grad_norm": 1.617344004292436, "learning_rate": 3.782150024409727e-07, "loss": 0.4802842140197754, "step": 6315 }, { "epoch": 1.4563062024440858, "grad_norm": 1.24431475405318, "learning_rate": 3.77916448414587e-07, "loss": 0.4640405476093292, "step": 6316 }, { "epoch": 1.4565367765736683, "grad_norm": 1.4636172678889559, "learning_rate": 3.776179848143497e-07, "loss": 0.4338728189468384, "step": 6317 }, { "epoch": 1.456767350703251, "grad_norm": 2.139264242241595, "learning_rate": 3.7731961168364644e-07, "loss": 0.42709267139434814, "step": 6318 }, { "epoch": 1.4569979248328337, "grad_norm": 1.6617712318798017, "learning_rate": 3.7702132906584784e-07, "loss": 0.4985729455947876, "step": 6319 }, { "epoch": 1.4572284989624165, "grad_norm": 1.441274937368423, "learning_rate": 3.7672313700431277e-07, "loss": 0.46335911750793457, "step": 6320 }, { "epoch": 1.4574590730919992, "grad_norm": 1.416712646344965, "learning_rate": 3.7642503554238657e-07, "loss": 0.39897364377975464, "step": 6321 }, { "epoch": 1.4576896472215817, "grad_norm": 1.7524170106258121, "learning_rate": 3.761270247234014e-07, "loss": 0.4338347017765045, "step": 6322 }, { "epoch": 1.4579202213511644, "grad_norm": 1.5421394568485456, "learning_rate": 3.7582910459067607e-07, "loss": 0.4619752764701843, "step": 6323 }, { "epoch": 1.458150795480747, "grad_norm": 1.6592584693059589, "learning_rate": 3.7553127518751583e-07, "loss": 0.4676104784011841, "step": 6324 }, { "epoch": 1.4583813696103296, "grad_norm": 1.495504668484879, "learning_rate": 3.752335365572138e-07, "loss": 0.37536361813545227, "step": 6325 }, { "epoch": 1.4586119437399123, "grad_norm": 1.5747560176376743, "learning_rate": 3.749358887430487e-07, "loss": 0.4389209449291229, "step": 6326 }, { "epoch": 1.458842517869495, "grad_norm": 1.561809426616513, "learning_rate": 3.746383317882874e-07, "loss": 0.44722115993499756, "step": 6327 }, { "epoch": 1.4590730919990778, "grad_norm": 1.8177515516918266, "learning_rate": 3.743408657361821e-07, "loss": 0.39179277420043945, "step": 6328 }, { "epoch": 1.4593036661286605, "grad_norm": 1.5511886302037754, "learning_rate": 3.7404349062997275e-07, "loss": 0.4704967737197876, "step": 6329 }, { "epoch": 1.459534240258243, "grad_norm": 1.4679557991806869, "learning_rate": 3.737462065128859e-07, "loss": 0.4294360876083374, "step": 6330 }, { "epoch": 1.4597648143878257, "grad_norm": 1.5082268745032619, "learning_rate": 3.734490134281353e-07, "loss": 0.5070170760154724, "step": 6331 }, { "epoch": 1.4599953885174084, "grad_norm": 1.4285887900302483, "learning_rate": 3.7315191141892013e-07, "loss": 0.3670409023761749, "step": 6332 }, { "epoch": 1.460225962646991, "grad_norm": 1.4866250279072872, "learning_rate": 3.7285490052842785e-07, "loss": 0.5043025016784668, "step": 6333 }, { "epoch": 1.4604565367765736, "grad_norm": 1.5557807366245089, "learning_rate": 3.725579807998316e-07, "loss": 0.43942689895629883, "step": 6334 }, { "epoch": 1.4606871109061563, "grad_norm": 1.61242194971354, "learning_rate": 3.7226115227629164e-07, "loss": 0.3444882035255432, "step": 6335 }, { "epoch": 1.460917685035739, "grad_norm": 1.4093154726677697, "learning_rate": 3.71964415000955e-07, "loss": 0.3994483947753906, "step": 6336 }, { "epoch": 1.4611482591653218, "grad_norm": 1.799524270186483, "learning_rate": 3.7166776901695564e-07, "loss": 0.3581928014755249, "step": 6337 }, { "epoch": 1.4613788332949043, "grad_norm": 1.4094806965107296, "learning_rate": 3.7137121436741423e-07, "loss": 0.4068276286125183, "step": 6338 }, { "epoch": 1.461609407424487, "grad_norm": 1.5430920931361498, "learning_rate": 3.710747510954376e-07, "loss": 0.4140080213546753, "step": 6339 }, { "epoch": 1.4618399815540697, "grad_norm": 1.5667918006300834, "learning_rate": 3.707783792441201e-07, "loss": 0.4328460097312927, "step": 6340 }, { "epoch": 1.4620705556836522, "grad_norm": 1.7344820768552758, "learning_rate": 3.704820988565419e-07, "loss": 0.49252209067344666, "step": 6341 }, { "epoch": 1.462301129813235, "grad_norm": 1.4564646974830249, "learning_rate": 3.7018590997577093e-07, "loss": 0.43051671981811523, "step": 6342 }, { "epoch": 1.4625317039428176, "grad_norm": 1.5901870751351228, "learning_rate": 3.698898126448605e-07, "loss": 0.5131059288978577, "step": 6343 }, { "epoch": 1.4627622780724003, "grad_norm": 2.025312431684147, "learning_rate": 3.6959380690685185e-07, "loss": 0.4633597731590271, "step": 6344 }, { "epoch": 1.462992852201983, "grad_norm": 1.5138095102076332, "learning_rate": 3.6929789280477265e-07, "loss": 0.3603428602218628, "step": 6345 }, { "epoch": 1.4632234263315655, "grad_norm": 1.4981993836978438, "learning_rate": 3.6900207038163633e-07, "loss": 0.5337490439414978, "step": 6346 }, { "epoch": 1.4634540004611483, "grad_norm": 1.8305905685338713, "learning_rate": 3.687063396804444e-07, "loss": 0.4940665066242218, "step": 6347 }, { "epoch": 1.463684574590731, "grad_norm": 2.012256207996667, "learning_rate": 3.6841070074418367e-07, "loss": 0.45664387941360474, "step": 6348 }, { "epoch": 1.4639151487203135, "grad_norm": 1.6965611532451377, "learning_rate": 3.681151536158289e-07, "loss": 0.4546254277229309, "step": 6349 }, { "epoch": 1.4641457228498962, "grad_norm": 1.4760234786987596, "learning_rate": 3.6781969833834015e-07, "loss": 0.37474149465560913, "step": 6350 }, { "epoch": 1.464376296979479, "grad_norm": 1.473821341410815, "learning_rate": 3.675243349546655e-07, "loss": 0.38016337156295776, "step": 6351 }, { "epoch": 1.4646068711090616, "grad_norm": 1.3725937182091388, "learning_rate": 3.672290635077384e-07, "loss": 0.46079233288764954, "step": 6352 }, { "epoch": 1.4648374452386443, "grad_norm": 1.754716547965532, "learning_rate": 3.669338840404799e-07, "loss": 0.39382117986679077, "step": 6353 }, { "epoch": 1.4650680193682268, "grad_norm": 1.5018040161914972, "learning_rate": 3.6663879659579766e-07, "loss": 0.4502074718475342, "step": 6354 }, { "epoch": 1.4652985934978096, "grad_norm": 1.4446726503170868, "learning_rate": 3.663438012165848e-07, "loss": 0.38199833035469055, "step": 6355 }, { "epoch": 1.4655291676273923, "grad_norm": 1.4760781012903512, "learning_rate": 3.660488979457228e-07, "loss": 0.4340086579322815, "step": 6356 }, { "epoch": 1.4657597417569748, "grad_norm": 1.7005769563076596, "learning_rate": 3.65754086826078e-07, "loss": 0.5425105094909668, "step": 6357 }, { "epoch": 1.4659903158865575, "grad_norm": 1.4480393161895644, "learning_rate": 3.654593679005048e-07, "loss": 0.4671604633331299, "step": 6358 }, { "epoch": 1.4662208900161402, "grad_norm": 1.6404775976624013, "learning_rate": 3.6516474121184317e-07, "loss": 0.4608290195465088, "step": 6359 }, { "epoch": 1.466451464145723, "grad_norm": 1.9415349791307541, "learning_rate": 3.6487020680292023e-07, "loss": 0.5272650122642517, "step": 6360 }, { "epoch": 1.4666820382753056, "grad_norm": 1.4115666654764834, "learning_rate": 3.645757647165495e-07, "loss": 0.40990152955055237, "step": 6361 }, { "epoch": 1.4669126124048881, "grad_norm": 1.405277693008717, "learning_rate": 3.6428141499553166e-07, "loss": 0.4723639488220215, "step": 6362 }, { "epoch": 1.4671431865344708, "grad_norm": 1.7789473556982454, "learning_rate": 3.639871576826529e-07, "loss": 0.5115963220596313, "step": 6363 }, { "epoch": 1.4673737606640536, "grad_norm": 1.669989973617769, "learning_rate": 3.636929928206862e-07, "loss": 0.44548431038856506, "step": 6364 }, { "epoch": 1.467604334793636, "grad_norm": 1.5904330694852653, "learning_rate": 3.633989204523922e-07, "loss": 0.48599356412887573, "step": 6365 }, { "epoch": 1.4678349089232188, "grad_norm": 1.4664661517676485, "learning_rate": 3.631049406205164e-07, "loss": 0.463236004114151, "step": 6366 }, { "epoch": 1.4680654830528015, "grad_norm": 1.7238002544119735, "learning_rate": 3.6281105336779225e-07, "loss": 0.4840255379676819, "step": 6367 }, { "epoch": 1.4682960571823842, "grad_norm": 1.5727046676978498, "learning_rate": 3.6251725873693926e-07, "loss": 0.39191675186157227, "step": 6368 }, { "epoch": 1.468526631311967, "grad_norm": 1.4333992251496341, "learning_rate": 3.622235567706637e-07, "loss": 0.5161769986152649, "step": 6369 }, { "epoch": 1.4687572054415494, "grad_norm": 1.811820117175508, "learning_rate": 3.6192994751165764e-07, "loss": 0.4579160213470459, "step": 6370 }, { "epoch": 1.4689877795711321, "grad_norm": 1.5348364339019953, "learning_rate": 3.616364310026006e-07, "loss": 0.4254727363586426, "step": 6371 }, { "epoch": 1.4692183537007149, "grad_norm": 1.60846510703603, "learning_rate": 3.613430072861575e-07, "loss": 0.3614911139011383, "step": 6372 }, { "epoch": 1.4694489278302973, "grad_norm": 1.332197813540827, "learning_rate": 3.610496764049814e-07, "loss": 0.4501386284828186, "step": 6373 }, { "epoch": 1.46967950195988, "grad_norm": 1.4207205401720155, "learning_rate": 3.607564384017102e-07, "loss": 0.4988802671432495, "step": 6374 }, { "epoch": 1.4699100760894628, "grad_norm": 1.5751788296655767, "learning_rate": 3.6046329331896907e-07, "loss": 0.4277713894844055, "step": 6375 }, { "epoch": 1.4701406502190455, "grad_norm": 1.5414838298104503, "learning_rate": 3.601702411993697e-07, "loss": 0.5007919073104858, "step": 6376 }, { "epoch": 1.4703712243486282, "grad_norm": 1.5705777345927519, "learning_rate": 3.5987728208551015e-07, "loss": 0.4857282042503357, "step": 6377 }, { "epoch": 1.4706017984782107, "grad_norm": 1.3913774043642957, "learning_rate": 3.595844160199756e-07, "loss": 0.45752188563346863, "step": 6378 }, { "epoch": 1.4708323726077934, "grad_norm": 1.3374827793978188, "learning_rate": 3.592916430453361e-07, "loss": 0.4364059269428253, "step": 6379 }, { "epoch": 1.4710629467373761, "grad_norm": 1.4896729369612345, "learning_rate": 3.589989632041501e-07, "loss": 0.48765695095062256, "step": 6380 }, { "epoch": 1.4712935208669586, "grad_norm": 1.8321401665511103, "learning_rate": 3.5870637653896087e-07, "loss": 0.5505347847938538, "step": 6381 }, { "epoch": 1.4715240949965414, "grad_norm": 1.5940287914496154, "learning_rate": 3.584138830922994e-07, "loss": 0.4468069076538086, "step": 6382 }, { "epoch": 1.471754669126124, "grad_norm": 1.2639532856264213, "learning_rate": 3.5812148290668186e-07, "loss": 0.4050968289375305, "step": 6383 }, { "epoch": 1.4719852432557068, "grad_norm": 1.6709771008348266, "learning_rate": 3.578291760246122e-07, "loss": 0.47324883937835693, "step": 6384 }, { "epoch": 1.4722158173852895, "grad_norm": 1.646291535207369, "learning_rate": 3.5753696248858025e-07, "loss": 0.4431450366973877, "step": 6385 }, { "epoch": 1.472446391514872, "grad_norm": 1.3398593447687968, "learning_rate": 3.5724484234106166e-07, "loss": 0.4599822163581848, "step": 6386 }, { "epoch": 1.4726769656444547, "grad_norm": 1.6764694987177748, "learning_rate": 3.5695281562451964e-07, "loss": 0.3655046224594116, "step": 6387 }, { "epoch": 1.4729075397740374, "grad_norm": 1.925765064850511, "learning_rate": 3.5666088238140267e-07, "loss": 0.4543811082839966, "step": 6388 }, { "epoch": 1.47313811390362, "grad_norm": 1.7682119668466059, "learning_rate": 3.563690426541469e-07, "loss": 0.45380568504333496, "step": 6389 }, { "epoch": 1.4733686880332026, "grad_norm": 1.3928278789748259, "learning_rate": 3.5607729648517336e-07, "loss": 0.3640294373035431, "step": 6390 }, { "epoch": 1.4735992621627854, "grad_norm": 1.4826659174775283, "learning_rate": 3.557856439168907e-07, "loss": 0.39890235662460327, "step": 6391 }, { "epoch": 1.473829836292368, "grad_norm": 1.7657939773449876, "learning_rate": 3.5549408499169374e-07, "loss": 0.47551727294921875, "step": 6392 }, { "epoch": 1.4740604104219508, "grad_norm": 1.5946717850777934, "learning_rate": 3.5520261975196364e-07, "loss": 0.43851834535598755, "step": 6393 }, { "epoch": 1.4742909845515333, "grad_norm": 1.7160257871535318, "learning_rate": 3.549112482400676e-07, "loss": 0.45289307832717896, "step": 6394 }, { "epoch": 1.474521558681116, "grad_norm": 1.660677297447299, "learning_rate": 3.546199704983591e-07, "loss": 0.5229180455207825, "step": 6395 }, { "epoch": 1.4747521328106985, "grad_norm": 1.5089259577077747, "learning_rate": 3.5432878656917884e-07, "loss": 0.47332310676574707, "step": 6396 }, { "epoch": 1.4749827069402812, "grad_norm": 1.402371205517633, "learning_rate": 3.540376964948529e-07, "loss": 0.4079092741012573, "step": 6397 }, { "epoch": 1.475213281069864, "grad_norm": 1.607654850710184, "learning_rate": 3.5374670031769484e-07, "loss": 0.43366020917892456, "step": 6398 }, { "epoch": 1.4754438551994467, "grad_norm": 1.6067458113996615, "learning_rate": 3.5345579808000294e-07, "loss": 0.45040106773376465, "step": 6399 }, { "epoch": 1.4756744293290294, "grad_norm": 1.584960802510298, "learning_rate": 3.531649898240634e-07, "loss": 0.4409756064414978, "step": 6400 }, { "epoch": 1.4759050034586119, "grad_norm": 1.5204759785794038, "learning_rate": 3.528742755921481e-07, "loss": 0.4141521751880646, "step": 6401 }, { "epoch": 1.4761355775881946, "grad_norm": 1.6363482264143396, "learning_rate": 3.525836554265156e-07, "loss": 0.4697296619415283, "step": 6402 }, { "epoch": 1.4763661517177773, "grad_norm": 1.3771953803345143, "learning_rate": 3.5229312936941013e-07, "loss": 0.4369434714317322, "step": 6403 }, { "epoch": 1.4765967258473598, "grad_norm": 1.3415133870830294, "learning_rate": 3.5200269746306224e-07, "loss": 0.4197359085083008, "step": 6404 }, { "epoch": 1.4768272999769425, "grad_norm": 1.8249279231813902, "learning_rate": 3.5171235974968996e-07, "loss": 0.495933473110199, "step": 6405 }, { "epoch": 1.4770578741065252, "grad_norm": 1.3638396377453934, "learning_rate": 3.51422116271496e-07, "loss": 0.4177231192588806, "step": 6406 }, { "epoch": 1.477288448236108, "grad_norm": 1.5336568107147823, "learning_rate": 3.511319670706705e-07, "loss": 0.5366500020027161, "step": 6407 }, { "epoch": 1.4775190223656907, "grad_norm": 1.5479295323166011, "learning_rate": 3.508419121893897e-07, "loss": 0.3900446891784668, "step": 6408 }, { "epoch": 1.4777495964952732, "grad_norm": 1.8223854522009124, "learning_rate": 3.5055195166981646e-07, "loss": 0.40877431631088257, "step": 6409 }, { "epoch": 1.4779801706248559, "grad_norm": 1.3594177124317366, "learning_rate": 3.502620855540985e-07, "loss": 0.4381163716316223, "step": 6410 }, { "epoch": 1.4782107447544386, "grad_norm": 1.2256800281998605, "learning_rate": 3.4997231388437167e-07, "loss": 0.3449817895889282, "step": 6411 }, { "epoch": 1.478441318884021, "grad_norm": 1.4879818959728963, "learning_rate": 3.4968263670275653e-07, "loss": 0.4879523515701294, "step": 6412 }, { "epoch": 1.4786718930136038, "grad_norm": 1.5651020351069762, "learning_rate": 3.493930540513613e-07, "loss": 0.3781365156173706, "step": 6413 }, { "epoch": 1.4789024671431865, "grad_norm": 1.6645622352676888, "learning_rate": 3.49103565972279e-07, "loss": 0.4505656361579895, "step": 6414 }, { "epoch": 1.4791330412727692, "grad_norm": 1.4565716791756764, "learning_rate": 3.4881417250759006e-07, "loss": 0.4285612106323242, "step": 6415 }, { "epoch": 1.479363615402352, "grad_norm": 1.5357416036601346, "learning_rate": 3.48524873699361e-07, "loss": 0.5285177826881409, "step": 6416 }, { "epoch": 1.4795941895319344, "grad_norm": 1.6484784065232339, "learning_rate": 3.482356695896437e-07, "loss": 0.4504782259464264, "step": 6417 }, { "epoch": 1.4798247636615172, "grad_norm": 1.5658620514352724, "learning_rate": 3.4794656022047765e-07, "loss": 0.45295125246047974, "step": 6418 }, { "epoch": 1.4800553377910999, "grad_norm": 1.3627022105594853, "learning_rate": 3.47657545633887e-07, "loss": 0.35889285802841187, "step": 6419 }, { "epoch": 1.4802859119206824, "grad_norm": 1.5560865897069756, "learning_rate": 3.4736862587188384e-07, "loss": 0.49129703640937805, "step": 6420 }, { "epoch": 1.480516486050265, "grad_norm": 1.6626930717329957, "learning_rate": 3.4707980097646474e-07, "loss": 0.5018036365509033, "step": 6421 }, { "epoch": 1.4807470601798478, "grad_norm": 1.6557207215915222, "learning_rate": 3.46791070989614e-07, "loss": 0.48743095993995667, "step": 6422 }, { "epoch": 1.4809776343094305, "grad_norm": 1.5043027194300391, "learning_rate": 3.46502435953301e-07, "loss": 0.4876127243041992, "step": 6423 }, { "epoch": 1.4812082084390132, "grad_norm": 1.971149486413709, "learning_rate": 3.462138959094818e-07, "loss": 0.517420768737793, "step": 6424 }, { "epoch": 1.4814387825685957, "grad_norm": 1.8274785313456325, "learning_rate": 3.4592545090009907e-07, "loss": 0.49587076902389526, "step": 6425 }, { "epoch": 1.4816693566981785, "grad_norm": 1.5362037346917286, "learning_rate": 3.4563710096708063e-07, "loss": 0.43007123470306396, "step": 6426 }, { "epoch": 1.4818999308277612, "grad_norm": 1.358212427456112, "learning_rate": 3.4534884615234163e-07, "loss": 0.41231095790863037, "step": 6427 }, { "epoch": 1.4821305049573437, "grad_norm": 1.6451517308598724, "learning_rate": 3.450606864977822e-07, "loss": 0.4454977512359619, "step": 6428 }, { "epoch": 1.4823610790869264, "grad_norm": 1.3739971676037328, "learning_rate": 3.447726220452899e-07, "loss": 0.4432292878627777, "step": 6429 }, { "epoch": 1.482591653216509, "grad_norm": 1.6222705799101154, "learning_rate": 3.444846528367372e-07, "loss": 0.47547852993011475, "step": 6430 }, { "epoch": 1.4828222273460918, "grad_norm": 1.522255385470065, "learning_rate": 3.441967789139837e-07, "loss": 0.45712774991989136, "step": 6431 }, { "epoch": 1.4830528014756745, "grad_norm": 2.2700209255759107, "learning_rate": 3.439090003188748e-07, "loss": 0.4485551714897156, "step": 6432 }, { "epoch": 1.483283375605257, "grad_norm": 1.4019614855782472, "learning_rate": 3.4362131709324225e-07, "loss": 0.5157139301300049, "step": 6433 }, { "epoch": 1.4835139497348397, "grad_norm": 1.6970431173839349, "learning_rate": 3.4333372927890346e-07, "loss": 0.3786337375640869, "step": 6434 }, { "epoch": 1.4837445238644225, "grad_norm": 1.430215191007922, "learning_rate": 3.430462369176619e-07, "loss": 0.444644033908844, "step": 6435 }, { "epoch": 1.483975097994005, "grad_norm": 1.5213084700296855, "learning_rate": 3.427588400513082e-07, "loss": 0.450777530670166, "step": 6436 }, { "epoch": 1.4842056721235877, "grad_norm": 1.6553650689166306, "learning_rate": 3.424715387216176e-07, "loss": 0.4547499418258667, "step": 6437 }, { "epoch": 1.4844362462531704, "grad_norm": 1.3603667716838959, "learning_rate": 3.4218433297035274e-07, "loss": 0.41394394636154175, "step": 6438 }, { "epoch": 1.484666820382753, "grad_norm": 1.3921623882761025, "learning_rate": 3.4189722283926194e-07, "loss": 0.46392822265625, "step": 6439 }, { "epoch": 1.4848973945123358, "grad_norm": 1.3499969732544597, "learning_rate": 3.416102083700797e-07, "loss": 0.443311870098114, "step": 6440 }, { "epoch": 1.4851279686419183, "grad_norm": 1.3830140570978715, "learning_rate": 3.4132328960452594e-07, "loss": 0.49744826555252075, "step": 6441 }, { "epoch": 1.485358542771501, "grad_norm": 1.5191431970911358, "learning_rate": 3.4103646658430787e-07, "loss": 0.3906005620956421, "step": 6442 }, { "epoch": 1.4855891169010838, "grad_norm": 1.3526583076340324, "learning_rate": 3.407497393511175e-07, "loss": 0.4236280918121338, "step": 6443 }, { "epoch": 1.4858196910306662, "grad_norm": 1.6787824686307624, "learning_rate": 3.4046310794663403e-07, "loss": 0.5457645654678345, "step": 6444 }, { "epoch": 1.486050265160249, "grad_norm": 1.7325001007084588, "learning_rate": 3.4017657241252217e-07, "loss": 0.541573703289032, "step": 6445 }, { "epoch": 1.4862808392898317, "grad_norm": 1.9081537369674455, "learning_rate": 3.398901327904322e-07, "loss": 0.496945858001709, "step": 6446 }, { "epoch": 1.4865114134194144, "grad_norm": 1.5413856714091914, "learning_rate": 3.3960378912200136e-07, "loss": 0.46119701862335205, "step": 6447 }, { "epoch": 1.4867419875489971, "grad_norm": 1.8976464043536114, "learning_rate": 3.3931754144885284e-07, "loss": 0.5169441103935242, "step": 6448 }, { "epoch": 1.4869725616785796, "grad_norm": 1.7130869588848308, "learning_rate": 3.390313898125957e-07, "loss": 0.525173544883728, "step": 6449 }, { "epoch": 1.4872031358081623, "grad_norm": 1.6684348208587065, "learning_rate": 3.3874533425482457e-07, "loss": 0.46877139806747437, "step": 6450 }, { "epoch": 1.487433709937745, "grad_norm": 1.6810644095850389, "learning_rate": 3.3845937481712096e-07, "loss": 0.49436479806900024, "step": 6451 }, { "epoch": 1.4876642840673275, "grad_norm": 1.2950679928032611, "learning_rate": 3.3817351154105145e-07, "loss": 0.40879231691360474, "step": 6452 }, { "epoch": 1.4878948581969103, "grad_norm": 1.5253823933458253, "learning_rate": 3.378877444681697e-07, "loss": 0.5060825347900391, "step": 6453 }, { "epoch": 1.488125432326493, "grad_norm": 1.4561081118713566, "learning_rate": 3.3760207364001434e-07, "loss": 0.4875546097755432, "step": 6454 }, { "epoch": 1.4883560064560757, "grad_norm": 1.5036556031092911, "learning_rate": 3.373164990981108e-07, "loss": 0.3791916072368622, "step": 6455 }, { "epoch": 1.4885865805856584, "grad_norm": 1.4585716739422292, "learning_rate": 3.370310208839704e-07, "loss": 0.46757322549819946, "step": 6456 }, { "epoch": 1.488817154715241, "grad_norm": 1.4061567541704671, "learning_rate": 3.3674563903908994e-07, "loss": 0.4334050416946411, "step": 6457 }, { "epoch": 1.4890477288448236, "grad_norm": 1.4217577265821555, "learning_rate": 3.3646035360495294e-07, "loss": 0.4408720135688782, "step": 6458 }, { "epoch": 1.4892783029744063, "grad_norm": 1.637938092148249, "learning_rate": 3.3617516462302795e-07, "loss": 0.46556228399276733, "step": 6459 }, { "epoch": 1.4895088771039888, "grad_norm": 1.3694379850190115, "learning_rate": 3.3589007213477096e-07, "loss": 0.5212184190750122, "step": 6460 }, { "epoch": 1.4897394512335715, "grad_norm": 1.6425370019041445, "learning_rate": 3.35605076181622e-07, "loss": 0.5340084433555603, "step": 6461 }, { "epoch": 1.4899700253631543, "grad_norm": 1.4674031830711234, "learning_rate": 3.353201768050088e-07, "loss": 0.38049495220184326, "step": 6462 }, { "epoch": 1.490200599492737, "grad_norm": 1.5849611777401629, "learning_rate": 3.350353740463442e-07, "loss": 0.5480734705924988, "step": 6463 }, { "epoch": 1.4904311736223197, "grad_norm": 1.4050939080217109, "learning_rate": 3.3475066794702756e-07, "loss": 0.4179231524467468, "step": 6464 }, { "epoch": 1.4906617477519022, "grad_norm": 1.8331951463468434, "learning_rate": 3.3446605854844335e-07, "loss": 0.5380987524986267, "step": 6465 }, { "epoch": 1.490892321881485, "grad_norm": 1.4221970681414315, "learning_rate": 3.3418154589196226e-07, "loss": 0.41146454215049744, "step": 6466 }, { "epoch": 1.4911228960110676, "grad_norm": 1.5814296524447065, "learning_rate": 3.3389713001894157e-07, "loss": 0.4586387276649475, "step": 6467 }, { "epoch": 1.4913534701406501, "grad_norm": 1.1757977126470995, "learning_rate": 3.336128109707236e-07, "loss": 0.4023931920528412, "step": 6468 }, { "epoch": 1.4915840442702328, "grad_norm": 1.6673237012516164, "learning_rate": 3.333285887886373e-07, "loss": 0.5373448133468628, "step": 6469 }, { "epoch": 1.4918146183998156, "grad_norm": 1.4523946751037105, "learning_rate": 3.330444635139971e-07, "loss": 0.4413643479347229, "step": 6470 }, { "epoch": 1.4920451925293983, "grad_norm": 1.3734904271626787, "learning_rate": 3.3276043518810327e-07, "loss": 0.399494469165802, "step": 6471 }, { "epoch": 1.492275766658981, "grad_norm": 1.4170973987364872, "learning_rate": 3.3247650385224256e-07, "loss": 0.4353644847869873, "step": 6472 }, { "epoch": 1.4925063407885635, "grad_norm": 1.7462483377307876, "learning_rate": 3.3219266954768743e-07, "loss": 0.5231607556343079, "step": 6473 }, { "epoch": 1.4927369149181462, "grad_norm": 1.55800999194994, "learning_rate": 3.3190893231569596e-07, "loss": 0.414408802986145, "step": 6474 }, { "epoch": 1.492967489047729, "grad_norm": 1.6408204727748315, "learning_rate": 3.3162529219751155e-07, "loss": 0.3921009302139282, "step": 6475 }, { "epoch": 1.4931980631773114, "grad_norm": 1.6197044883986413, "learning_rate": 3.3134174923436506e-07, "loss": 0.4317164421081543, "step": 6476 }, { "epoch": 1.4934286373068941, "grad_norm": 1.5697343564549593, "learning_rate": 3.3105830346747175e-07, "loss": 0.46302181482315063, "step": 6477 }, { "epoch": 1.4936592114364768, "grad_norm": 1.464087037907405, "learning_rate": 3.307749549380335e-07, "loss": 0.45704615116119385, "step": 6478 }, { "epoch": 1.4938897855660596, "grad_norm": 1.5032451370482525, "learning_rate": 3.304917036872379e-07, "loss": 0.45455485582351685, "step": 6479 }, { "epoch": 1.4941203596956423, "grad_norm": 1.5465084069557762, "learning_rate": 3.302085497562588e-07, "loss": 0.41939157247543335, "step": 6480 }, { "epoch": 1.4943509338252248, "grad_norm": 1.3682263746176198, "learning_rate": 3.2992549318625487e-07, "loss": 0.4109286367893219, "step": 6481 }, { "epoch": 1.4945815079548075, "grad_norm": 2.0164734849697, "learning_rate": 3.2964253401837173e-07, "loss": 0.44710463285446167, "step": 6482 }, { "epoch": 1.4948120820843902, "grad_norm": 1.6884711291100036, "learning_rate": 3.2935967229373986e-07, "loss": 0.4330691695213318, "step": 6483 }, { "epoch": 1.4950426562139727, "grad_norm": 1.4066891595951536, "learning_rate": 3.2907690805347667e-07, "loss": 0.41174834966659546, "step": 6484 }, { "epoch": 1.4952732303435554, "grad_norm": 1.5235589172624593, "learning_rate": 3.2879424133868406e-07, "loss": 0.4368870258331299, "step": 6485 }, { "epoch": 1.4955038044731381, "grad_norm": 1.581699276196859, "learning_rate": 3.2851167219045107e-07, "loss": 0.5155518651008606, "step": 6486 }, { "epoch": 1.4957343786027208, "grad_norm": 1.4965040692694338, "learning_rate": 3.282292006498522e-07, "loss": 0.47015419602394104, "step": 6487 }, { "epoch": 1.4959649527323036, "grad_norm": 1.4271101962383341, "learning_rate": 3.2794682675794684e-07, "loss": 0.41059884428977966, "step": 6488 }, { "epoch": 1.496195526861886, "grad_norm": 1.7728377181019612, "learning_rate": 3.2766455055578157e-07, "loss": 0.4864136278629303, "step": 6489 }, { "epoch": 1.4964261009914688, "grad_norm": 1.1780419841322618, "learning_rate": 3.2738237208438744e-07, "loss": 0.3599165976047516, "step": 6490 }, { "epoch": 1.4966566751210515, "grad_norm": 1.4373611771192503, "learning_rate": 3.2710029138478267e-07, "loss": 0.4734029769897461, "step": 6491 }, { "epoch": 1.496887249250634, "grad_norm": 1.5053587105753783, "learning_rate": 3.268183084979699e-07, "loss": 0.46739861369132996, "step": 6492 }, { "epoch": 1.4971178233802167, "grad_norm": 1.745789102022849, "learning_rate": 3.265364234649387e-07, "loss": 0.46794670820236206, "step": 6493 }, { "epoch": 1.4973483975097994, "grad_norm": 1.6683012395243093, "learning_rate": 3.262546363266635e-07, "loss": 0.463203489780426, "step": 6494 }, { "epoch": 1.4975789716393821, "grad_norm": 1.4489172807794646, "learning_rate": 3.2597294712410504e-07, "loss": 0.4495059847831726, "step": 6495 }, { "epoch": 1.4978095457689649, "grad_norm": 1.464704014292867, "learning_rate": 3.256913558982101e-07, "loss": 0.43549245595932007, "step": 6496 }, { "epoch": 1.4980401198985474, "grad_norm": 1.552183908593376, "learning_rate": 3.254098626899102e-07, "loss": 0.40582704544067383, "step": 6497 }, { "epoch": 1.49827069402813, "grad_norm": 1.527774566610999, "learning_rate": 3.251284675401238e-07, "loss": 0.3720378279685974, "step": 6498 }, { "epoch": 1.4985012681577128, "grad_norm": 1.4814613073983138, "learning_rate": 3.24847170489754e-07, "loss": 0.42694520950317383, "step": 6499 }, { "epoch": 1.4987318422872953, "grad_norm": 1.4768231117771715, "learning_rate": 3.2456597157969066e-07, "loss": 0.442158043384552, "step": 6500 }, { "epoch": 1.498962416416878, "grad_norm": 1.4765054194953837, "learning_rate": 3.2428487085080846e-07, "loss": 0.44245558977127075, "step": 6501 }, { "epoch": 1.4991929905464607, "grad_norm": 1.3559485373971267, "learning_rate": 3.240038683439684e-07, "loss": 0.4127236008644104, "step": 6502 }, { "epoch": 1.4994235646760434, "grad_norm": 1.4985576311709152, "learning_rate": 3.237229641000171e-07, "loss": 0.4262787103652954, "step": 6503 }, { "epoch": 1.4996541388056261, "grad_norm": 1.6706445028718073, "learning_rate": 3.2344215815978714e-07, "loss": 0.4181264042854309, "step": 6504 }, { "epoch": 1.4998847129352086, "grad_norm": 1.6044294628436637, "learning_rate": 3.2316145056409616e-07, "loss": 0.4416937530040741, "step": 6505 }, { "epoch": 1.5001152870647914, "grad_norm": 1.8850023720212492, "learning_rate": 3.228808413537476e-07, "loss": 0.4901489019393921, "step": 6506 }, { "epoch": 1.5003458611943739, "grad_norm": 1.3996173090866784, "learning_rate": 3.2260033056953153e-07, "loss": 0.37932026386260986, "step": 6507 }, { "epoch": 1.5005764353239566, "grad_norm": 1.649923361135509, "learning_rate": 3.223199182522223e-07, "loss": 0.4680899381637573, "step": 6508 }, { "epoch": 1.5008070094535393, "grad_norm": 1.6955418693371036, "learning_rate": 3.2203960444258105e-07, "loss": 0.508334219455719, "step": 6509 }, { "epoch": 1.501037583583122, "grad_norm": 2.0480591557575685, "learning_rate": 3.2175938918135415e-07, "loss": 0.3386784791946411, "step": 6510 }, { "epoch": 1.5012681577127047, "grad_norm": 1.860117074212897, "learning_rate": 3.214792725092741e-07, "loss": 0.4315892457962036, "step": 6511 }, { "epoch": 1.5014987318422874, "grad_norm": 1.4533616152071933, "learning_rate": 3.211992544670582e-07, "loss": 0.3709627389907837, "step": 6512 }, { "epoch": 1.50172930597187, "grad_norm": 1.6433224440752017, "learning_rate": 3.2091933509541023e-07, "loss": 0.5260987877845764, "step": 6513 }, { "epoch": 1.5019598801014526, "grad_norm": 1.5201640514539732, "learning_rate": 3.20639514435019e-07, "loss": 0.5379073619842529, "step": 6514 }, { "epoch": 1.5021904542310351, "grad_norm": 1.2867052063244526, "learning_rate": 3.2035979252655976e-07, "loss": 0.47530391812324524, "step": 6515 }, { "epoch": 1.5024210283606179, "grad_norm": 1.5201328820105404, "learning_rate": 3.200801694106926e-07, "loss": 0.459227979183197, "step": 6516 }, { "epoch": 1.5026516024902006, "grad_norm": 1.5330729417783509, "learning_rate": 3.19800645128063e-07, "loss": 0.4867238998413086, "step": 6517 }, { "epoch": 1.5028821766197833, "grad_norm": 1.4246709864782185, "learning_rate": 3.195212197193039e-07, "loss": 0.38478928804397583, "step": 6518 }, { "epoch": 1.503112750749366, "grad_norm": 1.625989812299007, "learning_rate": 3.192418932250316e-07, "loss": 0.3938423991203308, "step": 6519 }, { "epoch": 1.5033433248789487, "grad_norm": 1.8227844221564524, "learning_rate": 3.1896266568584975e-07, "loss": 0.457303911447525, "step": 6520 }, { "epoch": 1.5035738990085312, "grad_norm": 1.5422494994233005, "learning_rate": 3.1868353714234607e-07, "loss": 0.5007269382476807, "step": 6521 }, { "epoch": 1.503804473138114, "grad_norm": 1.4891205198132078, "learning_rate": 3.1840450763509576e-07, "loss": 0.3878381848335266, "step": 6522 }, { "epoch": 1.5040350472676964, "grad_norm": 1.798955261342233, "learning_rate": 3.181255772046575e-07, "loss": 0.488269567489624, "step": 6523 }, { "epoch": 1.5042656213972792, "grad_norm": 1.4981578078592954, "learning_rate": 3.1784674589157767e-07, "loss": 0.41664889454841614, "step": 6524 }, { "epoch": 1.5044961955268619, "grad_norm": 1.6014375227212925, "learning_rate": 3.175680137363863e-07, "loss": 0.4862533509731293, "step": 6525 }, { "epoch": 1.5047267696564446, "grad_norm": 1.599713126186934, "learning_rate": 3.172893807796004e-07, "loss": 0.4629037380218506, "step": 6526 }, { "epoch": 1.5049573437860273, "grad_norm": 1.6094632634811818, "learning_rate": 3.1701084706172245e-07, "loss": 0.46300196647644043, "step": 6527 }, { "epoch": 1.50518791791561, "grad_norm": 1.4186362500626026, "learning_rate": 3.1673241262323934e-07, "loss": 0.40698888897895813, "step": 6528 }, { "epoch": 1.5054184920451925, "grad_norm": 1.484473947418196, "learning_rate": 3.1645407750462514e-07, "loss": 0.4344380497932434, "step": 6529 }, { "epoch": 1.5056490661747752, "grad_norm": 1.6200348544461498, "learning_rate": 3.1617584174633806e-07, "loss": 0.49757128953933716, "step": 6530 }, { "epoch": 1.5058796403043577, "grad_norm": 1.6256839483530447, "learning_rate": 3.15897705388823e-07, "loss": 0.4506916105747223, "step": 6531 }, { "epoch": 1.5061102144339404, "grad_norm": 1.5009759227514647, "learning_rate": 3.156196684725093e-07, "loss": 0.3941146731376648, "step": 6532 }, { "epoch": 1.5063407885635232, "grad_norm": 1.9065405733956409, "learning_rate": 3.153417310378127e-07, "loss": 0.5400820374488831, "step": 6533 }, { "epoch": 1.5065713626931059, "grad_norm": 1.774411964329925, "learning_rate": 3.1506389312513435e-07, "loss": 0.4418470859527588, "step": 6534 }, { "epoch": 1.5068019368226886, "grad_norm": 1.3196915654196755, "learning_rate": 3.1478615477486113e-07, "loss": 0.3897334933280945, "step": 6535 }, { "epoch": 1.5070325109522713, "grad_norm": 1.5772083777596413, "learning_rate": 3.145085160273647e-07, "loss": 0.4923437833786011, "step": 6536 }, { "epoch": 1.5072630850818538, "grad_norm": 1.575539005736493, "learning_rate": 3.142309769230025e-07, "loss": 0.41996920108795166, "step": 6537 }, { "epoch": 1.5074936592114365, "grad_norm": 1.5634954618427415, "learning_rate": 3.1395353750211806e-07, "loss": 0.38584667444229126, "step": 6538 }, { "epoch": 1.507724233341019, "grad_norm": 1.5469052539454182, "learning_rate": 3.136761978050395e-07, "loss": 0.5093455910682678, "step": 6539 }, { "epoch": 1.5079548074706017, "grad_norm": 1.8844111555093896, "learning_rate": 3.1339895787208126e-07, "loss": 0.5592935681343079, "step": 6540 }, { "epoch": 1.5081853816001844, "grad_norm": 1.7670191671756568, "learning_rate": 3.1312181774354306e-07, "loss": 0.38311779499053955, "step": 6541 }, { "epoch": 1.5084159557297672, "grad_norm": 1.6894588927823573, "learning_rate": 3.1284477745971025e-07, "loss": 0.4422299265861511, "step": 6542 }, { "epoch": 1.5086465298593499, "grad_norm": 1.5653024747826005, "learning_rate": 3.125678370608528e-07, "loss": 0.5097527503967285, "step": 6543 }, { "epoch": 1.5088771039889326, "grad_norm": 1.4635088499535702, "learning_rate": 3.1229099658722747e-07, "loss": 0.42586642503738403, "step": 6544 }, { "epoch": 1.509107678118515, "grad_norm": 1.7853929312810684, "learning_rate": 3.120142560790755e-07, "loss": 0.5006861686706543, "step": 6545 }, { "epoch": 1.5093382522480978, "grad_norm": 1.292111562170076, "learning_rate": 3.117376155766237e-07, "loss": 0.4361686706542969, "step": 6546 }, { "epoch": 1.5095688263776803, "grad_norm": 1.4890005224956508, "learning_rate": 3.11461075120085e-07, "loss": 0.45466339588165283, "step": 6547 }, { "epoch": 1.509799400507263, "grad_norm": 1.4657261766322067, "learning_rate": 3.1118463474965697e-07, "loss": 0.39591068029403687, "step": 6548 }, { "epoch": 1.5100299746368457, "grad_norm": 1.669083463008409, "learning_rate": 3.1090829450552316e-07, "loss": 0.4672427475452423, "step": 6549 }, { "epoch": 1.5102605487664285, "grad_norm": 1.6273442700037082, "learning_rate": 3.1063205442785234e-07, "loss": 0.4785880148410797, "step": 6550 }, { "epoch": 1.5104911228960112, "grad_norm": 1.3915985235576667, "learning_rate": 3.103559145567994e-07, "loss": 0.441936731338501, "step": 6551 }, { "epoch": 1.510721697025594, "grad_norm": 1.5501390159164539, "learning_rate": 3.1007987493250334e-07, "loss": 0.49719512462615967, "step": 6552 }, { "epoch": 1.5109522711551764, "grad_norm": 1.7806538694012621, "learning_rate": 3.098039355950899e-07, "loss": 0.40702491998672485, "step": 6553 }, { "epoch": 1.511182845284759, "grad_norm": 1.4605232780084745, "learning_rate": 3.0952809658466896e-07, "loss": 0.44754648208618164, "step": 6554 }, { "epoch": 1.5114134194143416, "grad_norm": 1.7119927234849008, "learning_rate": 3.0925235794133717e-07, "loss": 0.5370102524757385, "step": 6555 }, { "epoch": 1.5116439935439243, "grad_norm": 1.4781444883115034, "learning_rate": 3.089767197051755e-07, "loss": 0.46693646907806396, "step": 6556 }, { "epoch": 1.511874567673507, "grad_norm": 1.3940905139236526, "learning_rate": 3.0870118191625084e-07, "loss": 0.3887597322463989, "step": 6557 }, { "epoch": 1.5121051418030897, "grad_norm": 1.509297997221229, "learning_rate": 3.0842574461461577e-07, "loss": 0.4783397912979126, "step": 6558 }, { "epoch": 1.5123357159326725, "grad_norm": 2.254982960205746, "learning_rate": 3.081504078403073e-07, "loss": 0.5305588245391846, "step": 6559 }, { "epoch": 1.5125662900622552, "grad_norm": 1.867807225680096, "learning_rate": 3.078751716333492e-07, "loss": 0.45315784215927124, "step": 6560 }, { "epoch": 1.5127968641918377, "grad_norm": 1.6356411182801975, "learning_rate": 3.0760003603374897e-07, "loss": 0.4805132746696472, "step": 6561 }, { "epoch": 1.5130274383214202, "grad_norm": 1.5579254915377003, "learning_rate": 3.0732500108150104e-07, "loss": 0.4956076145172119, "step": 6562 }, { "epoch": 1.5132580124510029, "grad_norm": 1.6872988549232402, "learning_rate": 3.07050066816584e-07, "loss": 0.3629196882247925, "step": 6563 }, { "epoch": 1.5134885865805856, "grad_norm": 1.4271734684348691, "learning_rate": 3.067752332789626e-07, "loss": 0.43240371346473694, "step": 6564 }, { "epoch": 1.5137191607101683, "grad_norm": 1.4730845718882644, "learning_rate": 3.065005005085869e-07, "loss": 0.4933302402496338, "step": 6565 }, { "epoch": 1.513949734839751, "grad_norm": 1.5594123406832316, "learning_rate": 3.0622586854539155e-07, "loss": 0.47905197739601135, "step": 6566 }, { "epoch": 1.5141803089693338, "grad_norm": 1.3120965583955209, "learning_rate": 3.059513374292978e-07, "loss": 0.4245232343673706, "step": 6567 }, { "epoch": 1.5144108830989162, "grad_norm": 1.6401225191596096, "learning_rate": 3.0567690720021077e-07, "loss": 0.40526312589645386, "step": 6568 }, { "epoch": 1.514641457228499, "grad_norm": 1.7208705138340397, "learning_rate": 3.0540257789802227e-07, "loss": 0.5808804631233215, "step": 6569 }, { "epoch": 1.5148720313580815, "grad_norm": 1.791338069752229, "learning_rate": 3.0512834956260836e-07, "loss": 0.44997286796569824, "step": 6570 }, { "epoch": 1.5151026054876642, "grad_norm": 1.6800897456169108, "learning_rate": 3.048542222338315e-07, "loss": 0.44051581621170044, "step": 6571 }, { "epoch": 1.515333179617247, "grad_norm": 1.525217042834723, "learning_rate": 3.045801959515382e-07, "loss": 0.5113236308097839, "step": 6572 }, { "epoch": 1.5155637537468296, "grad_norm": 1.5439102757372205, "learning_rate": 3.0430627075556125e-07, "loss": 0.554703950881958, "step": 6573 }, { "epoch": 1.5157943278764123, "grad_norm": 1.600156572288611, "learning_rate": 3.0403244668571847e-07, "loss": 0.3819808065891266, "step": 6574 }, { "epoch": 1.516024902005995, "grad_norm": 1.4872928405937125, "learning_rate": 3.037587237818133e-07, "loss": 0.47970864176750183, "step": 6575 }, { "epoch": 1.5162554761355775, "grad_norm": 1.4776778157711579, "learning_rate": 3.0348510208363386e-07, "loss": 0.4296469986438751, "step": 6576 }, { "epoch": 1.5164860502651603, "grad_norm": 1.462836798021035, "learning_rate": 3.032115816309535e-07, "loss": 0.4372752904891968, "step": 6577 }, { "epoch": 1.5167166243947428, "grad_norm": 1.673613757204577, "learning_rate": 3.029381624635318e-07, "loss": 0.4711950719356537, "step": 6578 }, { "epoch": 1.5169471985243255, "grad_norm": 1.3932522433513406, "learning_rate": 3.026648446211124e-07, "loss": 0.4448170065879822, "step": 6579 }, { "epoch": 1.5171777726539082, "grad_norm": 1.6184181695445041, "learning_rate": 3.02391628143425e-07, "loss": 0.4527873992919922, "step": 6580 }, { "epoch": 1.517408346783491, "grad_norm": 1.6799725255249693, "learning_rate": 3.0211851307018463e-07, "loss": 0.453765332698822, "step": 6581 }, { "epoch": 1.5176389209130736, "grad_norm": 1.686193810125547, "learning_rate": 3.018454994410915e-07, "loss": 0.46818265318870544, "step": 6582 }, { "epoch": 1.5178694950426563, "grad_norm": 1.6601834563107158, "learning_rate": 3.0157258729583026e-07, "loss": 0.38551369309425354, "step": 6583 }, { "epoch": 1.5181000691722388, "grad_norm": 1.2759146716130436, "learning_rate": 3.012997766740721e-07, "loss": 0.3651260733604431, "step": 6584 }, { "epoch": 1.5183306433018215, "grad_norm": 1.4942378521466573, "learning_rate": 3.010270676154726e-07, "loss": 0.36894726753234863, "step": 6585 }, { "epoch": 1.518561217431404, "grad_norm": 1.5163949110289714, "learning_rate": 3.007544601596722e-07, "loss": 0.42595791816711426, "step": 6586 }, { "epoch": 1.5187917915609868, "grad_norm": 1.9011368495730705, "learning_rate": 3.004819543462979e-07, "loss": 0.4916795492172241, "step": 6587 }, { "epoch": 1.5190223656905695, "grad_norm": 3.958756092482824, "learning_rate": 3.0020955021496073e-07, "loss": 0.5098932385444641, "step": 6588 }, { "epoch": 1.5192529398201522, "grad_norm": 1.7429564765653418, "learning_rate": 2.9993724780525796e-07, "loss": 0.6336305737495422, "step": 6589 }, { "epoch": 1.519483513949735, "grad_norm": 1.6454779446539551, "learning_rate": 2.996650471567709e-07, "loss": 0.4911893606185913, "step": 6590 }, { "epoch": 1.5197140880793176, "grad_norm": 1.6053455149976412, "learning_rate": 2.9939294830906727e-07, "loss": 0.4388008117675781, "step": 6591 }, { "epoch": 1.5199446622089001, "grad_norm": 1.4960203678707569, "learning_rate": 2.991209513016986e-07, "loss": 0.392263799905777, "step": 6592 }, { "epoch": 1.5201752363384828, "grad_norm": 1.4101720949081316, "learning_rate": 2.988490561742032e-07, "loss": 0.36495402455329895, "step": 6593 }, { "epoch": 1.5204058104680653, "grad_norm": 1.6817212910549741, "learning_rate": 2.985772629661032e-07, "loss": 0.5280855298042297, "step": 6594 }, { "epoch": 1.520636384597648, "grad_norm": 1.4575719708434207, "learning_rate": 2.9830557171690693e-07, "loss": 0.43953752517700195, "step": 6595 }, { "epoch": 1.5208669587272308, "grad_norm": 1.261754251016282, "learning_rate": 2.980339824661071e-07, "loss": 0.41361862421035767, "step": 6596 }, { "epoch": 1.5210975328568135, "grad_norm": 1.4525947923531464, "learning_rate": 2.977624952531821e-07, "loss": 0.39955854415893555, "step": 6597 }, { "epoch": 1.5213281069863962, "grad_norm": 1.664684863463753, "learning_rate": 2.9749111011759565e-07, "loss": 0.505165696144104, "step": 6598 }, { "epoch": 1.521558681115979, "grad_norm": 1.5619432117854901, "learning_rate": 2.9721982709879566e-07, "loss": 0.4388153851032257, "step": 6599 }, { "epoch": 1.5217892552455614, "grad_norm": 1.454152411615684, "learning_rate": 2.969486462362167e-07, "loss": 0.4479100704193115, "step": 6600 }, { "epoch": 1.5220198293751441, "grad_norm": 1.4345831092951191, "learning_rate": 2.9667756756927686e-07, "loss": 0.4005380868911743, "step": 6601 }, { "epoch": 1.5222504035047266, "grad_norm": 1.707280681236192, "learning_rate": 2.9640659113738087e-07, "loss": 0.43774881958961487, "step": 6602 }, { "epoch": 1.5224809776343093, "grad_norm": 1.5608510724785551, "learning_rate": 2.9613571697991725e-07, "loss": 0.4449707865715027, "step": 6603 }, { "epoch": 1.522711551763892, "grad_norm": 1.6567386639534631, "learning_rate": 2.958649451362606e-07, "loss": 0.454499751329422, "step": 6604 }, { "epoch": 1.5229421258934748, "grad_norm": 1.2977143159727098, "learning_rate": 2.955942756457707e-07, "loss": 0.35601305961608887, "step": 6605 }, { "epoch": 1.5231727000230575, "grad_norm": 1.6684183476509384, "learning_rate": 2.9532370854779143e-07, "loss": 0.5252523422241211, "step": 6606 }, { "epoch": 1.5234032741526402, "grad_norm": 1.3731317276647081, "learning_rate": 2.950532438816531e-07, "loss": 0.4311884939670563, "step": 6607 }, { "epoch": 1.5236338482822227, "grad_norm": 1.5784692430456444, "learning_rate": 2.9478288168667e-07, "loss": 0.43956485390663147, "step": 6608 }, { "epoch": 1.5238644224118054, "grad_norm": 1.4213527447836085, "learning_rate": 2.9451262200214235e-07, "loss": 0.400115430355072, "step": 6609 }, { "epoch": 1.524094996541388, "grad_norm": 1.6612091081011793, "learning_rate": 2.942424648673548e-07, "loss": 0.41738802194595337, "step": 6610 }, { "epoch": 1.5243255706709706, "grad_norm": 1.5951584459105572, "learning_rate": 2.939724103215776e-07, "loss": 0.412765771150589, "step": 6611 }, { "epoch": 1.5245561448005533, "grad_norm": 1.6739308031441762, "learning_rate": 2.937024584040659e-07, "loss": 0.44869422912597656, "step": 6612 }, { "epoch": 1.524786718930136, "grad_norm": 1.5443554211834334, "learning_rate": 2.934326091540603e-07, "loss": 0.39191997051239014, "step": 6613 }, { "epoch": 1.5250172930597188, "grad_norm": 1.307209963924962, "learning_rate": 2.9316286261078547e-07, "loss": 0.36575692892074585, "step": 6614 }, { "epoch": 1.5252478671893015, "grad_norm": 1.5775953874602453, "learning_rate": 2.9289321881345254e-07, "loss": 0.49928778409957886, "step": 6615 }, { "epoch": 1.525478441318884, "grad_norm": 1.5029437064522762, "learning_rate": 2.926236778012565e-07, "loss": 0.49619296193122864, "step": 6616 }, { "epoch": 1.5257090154484667, "grad_norm": 1.5175956935877304, "learning_rate": 2.923542396133777e-07, "loss": 0.4614447355270386, "step": 6617 }, { "epoch": 1.5259395895780492, "grad_norm": 1.5326379965687464, "learning_rate": 2.9208490428898213e-07, "loss": 0.43820804357528687, "step": 6618 }, { "epoch": 1.526170163707632, "grad_norm": 1.7297859153701105, "learning_rate": 2.9181567186722e-07, "loss": 0.46856528520584106, "step": 6619 }, { "epoch": 1.5264007378372146, "grad_norm": 1.5560178508678546, "learning_rate": 2.915465423872272e-07, "loss": 0.45428818464279175, "step": 6620 }, { "epoch": 1.5266313119667974, "grad_norm": 1.765757281110695, "learning_rate": 2.912775158881243e-07, "loss": 0.44715386629104614, "step": 6621 }, { "epoch": 1.52686188609638, "grad_norm": 1.845941311143575, "learning_rate": 2.9100859240901764e-07, "loss": 0.537441611289978, "step": 6622 }, { "epoch": 1.5270924602259628, "grad_norm": 2.100811269468338, "learning_rate": 2.9073977198899714e-07, "loss": 0.4430112838745117, "step": 6623 }, { "epoch": 1.5273230343555453, "grad_norm": 1.625928583733216, "learning_rate": 2.904710546671392e-07, "loss": 0.41713255643844604, "step": 6624 }, { "epoch": 1.527553608485128, "grad_norm": 1.639578198355071, "learning_rate": 2.9020244048250396e-07, "loss": 0.4313931465148926, "step": 6625 }, { "epoch": 1.5277841826147105, "grad_norm": 1.617455818460061, "learning_rate": 2.899339294741379e-07, "loss": 0.5038034319877625, "step": 6626 }, { "epoch": 1.5280147567442932, "grad_norm": 1.6017224429954546, "learning_rate": 2.8966552168107127e-07, "loss": 0.45088762044906616, "step": 6627 }, { "epoch": 1.528245330873876, "grad_norm": 1.6027378992570083, "learning_rate": 2.8939721714232e-07, "loss": 0.40857064723968506, "step": 6628 }, { "epoch": 1.5284759050034586, "grad_norm": 1.5432592985198028, "learning_rate": 2.891290158968853e-07, "loss": 0.43766242265701294, "step": 6629 }, { "epoch": 1.5287064791330414, "grad_norm": 1.6663524119863393, "learning_rate": 2.888609179837523e-07, "loss": 0.45986247062683105, "step": 6630 }, { "epoch": 1.528937053262624, "grad_norm": 1.5102818288035118, "learning_rate": 2.8859292344189236e-07, "loss": 0.4681728482246399, "step": 6631 }, { "epoch": 1.5291676273922066, "grad_norm": 1.4009274503220306, "learning_rate": 2.883250323102605e-07, "loss": 0.36730295419692993, "step": 6632 }, { "epoch": 1.5293982015217893, "grad_norm": 1.6785355662696937, "learning_rate": 2.880572446277982e-07, "loss": 0.43494418263435364, "step": 6633 }, { "epoch": 1.5296287756513718, "grad_norm": 1.6257441783659756, "learning_rate": 2.877895604334305e-07, "loss": 0.49145790934562683, "step": 6634 }, { "epoch": 1.5298593497809545, "grad_norm": 1.4638603112091872, "learning_rate": 2.875219797660681e-07, "loss": 0.4166264832019806, "step": 6635 }, { "epoch": 1.5300899239105372, "grad_norm": 1.3504636181719787, "learning_rate": 2.8725450266460704e-07, "loss": 0.4336514472961426, "step": 6636 }, { "epoch": 1.53032049804012, "grad_norm": 1.6796430942391267, "learning_rate": 2.869871291679271e-07, "loss": 0.44186240434646606, "step": 6637 }, { "epoch": 1.5305510721697027, "grad_norm": 1.4751166079505253, "learning_rate": 2.867198593148945e-07, "loss": 0.40619733929634094, "step": 6638 }, { "epoch": 1.5307816462992854, "grad_norm": 1.4034694689938345, "learning_rate": 2.864526931443588e-07, "loss": 0.45552101731300354, "step": 6639 }, { "epoch": 1.5310122204288679, "grad_norm": 1.3563039501008287, "learning_rate": 2.861856306951562e-07, "loss": 0.45153865218162537, "step": 6640 }, { "epoch": 1.5312427945584506, "grad_norm": 1.5793746333655185, "learning_rate": 2.859186720061061e-07, "loss": 0.5146148204803467, "step": 6641 }, { "epoch": 1.531473368688033, "grad_norm": 1.5627792728055054, "learning_rate": 2.856518171160143e-07, "loss": 0.4566080868244171, "step": 6642 }, { "epoch": 1.5317039428176158, "grad_norm": 1.93802928616596, "learning_rate": 2.853850660636703e-07, "loss": 0.4390585124492645, "step": 6643 }, { "epoch": 1.5319345169471985, "grad_norm": 1.7734959004013588, "learning_rate": 2.851184188878493e-07, "loss": 0.5508195757865906, "step": 6644 }, { "epoch": 1.5321650910767812, "grad_norm": 1.6721581584041076, "learning_rate": 2.8485187562731126e-07, "loss": 0.47640183568000793, "step": 6645 }, { "epoch": 1.532395665206364, "grad_norm": 1.421769874384772, "learning_rate": 2.8458543632080123e-07, "loss": 0.4511566758155823, "step": 6646 }, { "epoch": 1.5326262393359467, "grad_norm": 1.5003089507123706, "learning_rate": 2.843191010070486e-07, "loss": 0.414367139339447, "step": 6647 }, { "epoch": 1.5328568134655292, "grad_norm": 1.5192326893049226, "learning_rate": 2.840528697247674e-07, "loss": 0.4611589312553406, "step": 6648 }, { "epoch": 1.5330873875951119, "grad_norm": 1.6397285440449882, "learning_rate": 2.8378674251265787e-07, "loss": 0.4675883948802948, "step": 6649 }, { "epoch": 1.5333179617246944, "grad_norm": 1.6281144487220143, "learning_rate": 2.835207194094036e-07, "loss": 0.49039095640182495, "step": 6650 }, { "epoch": 1.533548535854277, "grad_norm": 1.6636356702139277, "learning_rate": 2.832548004536741e-07, "loss": 0.45641693472862244, "step": 6651 }, { "epoch": 1.5337791099838598, "grad_norm": 1.7323507398911224, "learning_rate": 2.829889856841233e-07, "loss": 0.4858587682247162, "step": 6652 }, { "epoch": 1.5340096841134425, "grad_norm": 1.3640056940377991, "learning_rate": 2.8272327513939055e-07, "loss": 0.3640017807483673, "step": 6653 }, { "epoch": 1.5342402582430252, "grad_norm": 1.5342226074105705, "learning_rate": 2.8245766885809865e-07, "loss": 0.42915207147598267, "step": 6654 }, { "epoch": 1.534470832372608, "grad_norm": 1.5250515427099394, "learning_rate": 2.8219216687885707e-07, "loss": 0.5041407346725464, "step": 6655 }, { "epoch": 1.5347014065021904, "grad_norm": 1.479165849869464, "learning_rate": 2.8192676924025885e-07, "loss": 0.4748334288597107, "step": 6656 }, { "epoch": 1.5349319806317732, "grad_norm": 1.5854109757101433, "learning_rate": 2.8166147598088173e-07, "loss": 0.4745975136756897, "step": 6657 }, { "epoch": 1.5351625547613557, "grad_norm": 1.6430139570672564, "learning_rate": 2.813962871392893e-07, "loss": 0.49246084690093994, "step": 6658 }, { "epoch": 1.5353931288909384, "grad_norm": 1.3796442061928538, "learning_rate": 2.8113120275402936e-07, "loss": 0.47876033186912537, "step": 6659 }, { "epoch": 1.535623703020521, "grad_norm": 1.6460545742229191, "learning_rate": 2.808662228636348e-07, "loss": 0.5244987607002258, "step": 6660 }, { "epoch": 1.5358542771501038, "grad_norm": 1.6433381019004774, "learning_rate": 2.8060134750662277e-07, "loss": 0.44661569595336914, "step": 6661 }, { "epoch": 1.5360848512796865, "grad_norm": 1.4583799872096337, "learning_rate": 2.8033657672149615e-07, "loss": 0.4508060812950134, "step": 6662 }, { "epoch": 1.5363154254092692, "grad_norm": 1.3497148067649773, "learning_rate": 2.8007191054674117e-07, "loss": 0.4657326340675354, "step": 6663 }, { "epoch": 1.5365459995388517, "grad_norm": 1.4227603766742651, "learning_rate": 2.798073490208307e-07, "loss": 0.495077520608902, "step": 6664 }, { "epoch": 1.5367765736684345, "grad_norm": 1.4557135691757939, "learning_rate": 2.795428921822206e-07, "loss": 0.40721309185028076, "step": 6665 }, { "epoch": 1.537007147798017, "grad_norm": 1.4109014285343175, "learning_rate": 2.7927854006935315e-07, "loss": 0.3279367685317993, "step": 6666 }, { "epoch": 1.5372377219275997, "grad_norm": 1.6893419118169095, "learning_rate": 2.790142927206538e-07, "loss": 0.4849242866039276, "step": 6667 }, { "epoch": 1.5374682960571824, "grad_norm": 1.7502055418971636, "learning_rate": 2.7875015017453394e-07, "loss": 0.45151397585868835, "step": 6668 }, { "epoch": 1.537698870186765, "grad_norm": 1.7275509884274352, "learning_rate": 2.784861124693898e-07, "loss": 0.43480992317199707, "step": 6669 }, { "epoch": 1.5379294443163478, "grad_norm": 1.606181868361543, "learning_rate": 2.782221796436012e-07, "loss": 0.48764440417289734, "step": 6670 }, { "epoch": 1.5381600184459305, "grad_norm": 1.5345831310523104, "learning_rate": 2.7795835173553407e-07, "loss": 0.4164161682128906, "step": 6671 }, { "epoch": 1.538390592575513, "grad_norm": 1.8060994369656536, "learning_rate": 2.7769462878353777e-07, "loss": 0.49934858083724976, "step": 6672 }, { "epoch": 1.5386211667050955, "grad_norm": 1.4004311994850918, "learning_rate": 2.77431010825948e-07, "loss": 0.4877321124076843, "step": 6673 }, { "epoch": 1.5388517408346782, "grad_norm": 1.7442704894714258, "learning_rate": 2.771674979010834e-07, "loss": 0.44518858194351196, "step": 6674 }, { "epoch": 1.539082314964261, "grad_norm": 1.4902795732558884, "learning_rate": 2.769040900472488e-07, "loss": 0.4237474203109741, "step": 6675 }, { "epoch": 1.5393128890938437, "grad_norm": 1.8818051716593445, "learning_rate": 2.7664078730273335e-07, "loss": 0.45270341634750366, "step": 6676 }, { "epoch": 1.5395434632234264, "grad_norm": 1.9777420597791724, "learning_rate": 2.7637758970581004e-07, "loss": 0.3866819739341736, "step": 6677 }, { "epoch": 1.539774037353009, "grad_norm": 1.709571144624541, "learning_rate": 2.7611449729473825e-07, "loss": 0.4384220838546753, "step": 6678 }, { "epoch": 1.5400046114825916, "grad_norm": 1.523752237168306, "learning_rate": 2.758515101077602e-07, "loss": 0.4462182819843292, "step": 6679 }, { "epoch": 1.5402351856121743, "grad_norm": 1.6129576485586044, "learning_rate": 2.755886281831046e-07, "loss": 0.3927033245563507, "step": 6680 }, { "epoch": 1.5404657597417568, "grad_norm": 1.7095013933604486, "learning_rate": 2.7532585155898314e-07, "loss": 0.4678634703159332, "step": 6681 }, { "epoch": 1.5406963338713395, "grad_norm": 1.4524055684149206, "learning_rate": 2.750631802735935e-07, "loss": 0.4165131151676178, "step": 6682 }, { "epoch": 1.5409269080009222, "grad_norm": 1.1494402193253566, "learning_rate": 2.748006143651178e-07, "loss": 0.3705793023109436, "step": 6683 }, { "epoch": 1.541157482130505, "grad_norm": 1.5819526439113667, "learning_rate": 2.745381538717226e-07, "loss": 0.5428882837295532, "step": 6684 }, { "epoch": 1.5413880562600877, "grad_norm": 1.6426127293668795, "learning_rate": 2.742757988315589e-07, "loss": 0.4116673171520233, "step": 6685 }, { "epoch": 1.5416186303896704, "grad_norm": 1.4540567592422353, "learning_rate": 2.740135492827631e-07, "loss": 0.4617515802383423, "step": 6686 }, { "epoch": 1.541849204519253, "grad_norm": 1.6140828940427878, "learning_rate": 2.737514052634555e-07, "loss": 0.5002453923225403, "step": 6687 }, { "epoch": 1.5420797786488356, "grad_norm": 1.4130856063185002, "learning_rate": 2.734893668117412e-07, "loss": 0.46029362082481384, "step": 6688 }, { "epoch": 1.542310352778418, "grad_norm": 1.4809565956171882, "learning_rate": 2.732274339657107e-07, "loss": 0.40502026677131653, "step": 6689 }, { "epoch": 1.5425409269080008, "grad_norm": 1.6538580711421296, "learning_rate": 2.7296560676343803e-07, "loss": 0.5267831087112427, "step": 6690 }, { "epoch": 1.5427715010375835, "grad_norm": 1.3087993674480496, "learning_rate": 2.727038852429826e-07, "loss": 0.3464335799217224, "step": 6691 }, { "epoch": 1.5430020751671663, "grad_norm": 1.5384863769893498, "learning_rate": 2.7244226944238847e-07, "loss": 0.36635881662368774, "step": 6692 }, { "epoch": 1.543232649296749, "grad_norm": 1.7314925345176482, "learning_rate": 2.7218075939968435e-07, "loss": 0.4567757844924927, "step": 6693 }, { "epoch": 1.5434632234263317, "grad_norm": 1.9452957704897642, "learning_rate": 2.719193551528827e-07, "loss": 0.539220929145813, "step": 6694 }, { "epoch": 1.5436937975559142, "grad_norm": 1.653206530012829, "learning_rate": 2.71658056739982e-07, "loss": 0.48553818464279175, "step": 6695 }, { "epoch": 1.543924371685497, "grad_norm": 1.5040526715775615, "learning_rate": 2.7139686419896424e-07, "loss": 0.48564499616622925, "step": 6696 }, { "epoch": 1.5441549458150794, "grad_norm": 1.3502417010865393, "learning_rate": 2.7113577756779616e-07, "loss": 0.4163014590740204, "step": 6697 }, { "epoch": 1.544385519944662, "grad_norm": 1.864828438533457, "learning_rate": 2.708747968844296e-07, "loss": 0.5686431527137756, "step": 6698 }, { "epoch": 1.5446160940742448, "grad_norm": 1.8608147536494253, "learning_rate": 2.706139221868008e-07, "loss": 0.5365211963653564, "step": 6699 }, { "epoch": 1.5448466682038275, "grad_norm": 1.5480523179756653, "learning_rate": 2.7035315351283084e-07, "loss": 0.4147397577762604, "step": 6700 }, { "epoch": 1.5450772423334103, "grad_norm": 1.5279455451058772, "learning_rate": 2.7009249090042454e-07, "loss": 0.3938590884208679, "step": 6701 }, { "epoch": 1.545307816462993, "grad_norm": 1.726862148896079, "learning_rate": 2.698319343874722e-07, "loss": 0.3521370589733124, "step": 6702 }, { "epoch": 1.5455383905925755, "grad_norm": 1.6305887024948476, "learning_rate": 2.69571484011848e-07, "loss": 0.430014967918396, "step": 6703 }, { "epoch": 1.5457689647221582, "grad_norm": 1.636933956561892, "learning_rate": 2.6931113981141164e-07, "loss": 0.4697108864784241, "step": 6704 }, { "epoch": 1.5459995388517407, "grad_norm": 1.5552943329509785, "learning_rate": 2.69050901824006e-07, "loss": 0.46567851305007935, "step": 6705 }, { "epoch": 1.5462301129813234, "grad_norm": 1.620367133120872, "learning_rate": 2.6879077008745986e-07, "loss": 0.46061819791793823, "step": 6706 }, { "epoch": 1.5464606871109061, "grad_norm": 1.5411435279833592, "learning_rate": 2.6853074463958614e-07, "loss": 0.568658709526062, "step": 6707 }, { "epoch": 1.5466912612404888, "grad_norm": 1.3834999667432357, "learning_rate": 2.682708255181815e-07, "loss": 0.42816412448883057, "step": 6708 }, { "epoch": 1.5469218353700716, "grad_norm": 1.576410551372393, "learning_rate": 2.6801101276102866e-07, "loss": 0.42515552043914795, "step": 6709 }, { "epoch": 1.5471524094996543, "grad_norm": 1.5447523266389376, "learning_rate": 2.677513064058932e-07, "loss": 0.46513399481773376, "step": 6710 }, { "epoch": 1.5473829836292368, "grad_norm": 1.3853944144224488, "learning_rate": 2.6749170649052675e-07, "loss": 0.4194756746292114, "step": 6711 }, { "epoch": 1.5476135577588195, "grad_norm": 1.4035563039276318, "learning_rate": 2.672322130526643e-07, "loss": 0.4456541836261749, "step": 6712 }, { "epoch": 1.547844131888402, "grad_norm": 1.5113453932130136, "learning_rate": 2.669728261300264e-07, "loss": 0.493444561958313, "step": 6713 }, { "epoch": 1.5480747060179847, "grad_norm": 1.582884732282312, "learning_rate": 2.6671354576031645e-07, "loss": 0.47202616930007935, "step": 6714 }, { "epoch": 1.5483052801475674, "grad_norm": 1.824788636144565, "learning_rate": 2.66454371981225e-07, "loss": 0.4584811329841614, "step": 6715 }, { "epoch": 1.5485358542771501, "grad_norm": 1.3167028831683925, "learning_rate": 2.6619530483042485e-07, "loss": 0.4072091579437256, "step": 6716 }, { "epoch": 1.5487664284067328, "grad_norm": 1.5656021898929726, "learning_rate": 2.6593634434557365e-07, "loss": 0.49742361903190613, "step": 6717 }, { "epoch": 1.5489970025363156, "grad_norm": 1.6686846450785309, "learning_rate": 2.6567749056431467e-07, "loss": 0.49291643500328064, "step": 6718 }, { "epoch": 1.549227576665898, "grad_norm": 1.5234565390584587, "learning_rate": 2.6541874352427427e-07, "loss": 0.5210362076759338, "step": 6719 }, { "epoch": 1.5494581507954808, "grad_norm": 1.523136615036839, "learning_rate": 2.651601032630645e-07, "loss": 0.4489557147026062, "step": 6720 }, { "epoch": 1.5496887249250633, "grad_norm": 1.515706035484409, "learning_rate": 2.649015698182808e-07, "loss": 0.4417908191680908, "step": 6721 }, { "epoch": 1.549919299054646, "grad_norm": 1.5123745571810647, "learning_rate": 2.6464314322750404e-07, "loss": 0.45177266001701355, "step": 6722 }, { "epoch": 1.5501498731842287, "grad_norm": 1.5422888438788165, "learning_rate": 2.6438482352829896e-07, "loss": 0.37720638513565063, "step": 6723 }, { "epoch": 1.5503804473138114, "grad_norm": 1.5572735157633186, "learning_rate": 2.641266107582153e-07, "loss": 0.5108897089958191, "step": 6724 }, { "epoch": 1.5506110214433941, "grad_norm": 1.5098940840101445, "learning_rate": 2.638685049547863e-07, "loss": 0.449248731136322, "step": 6725 }, { "epoch": 1.5508415955729768, "grad_norm": 1.4667668469814954, "learning_rate": 2.636105061555309e-07, "loss": 0.4692652225494385, "step": 6726 }, { "epoch": 1.5510721697025593, "grad_norm": 1.5150559633489926, "learning_rate": 2.6335261439795153e-07, "loss": 0.49128347635269165, "step": 6727 }, { "epoch": 1.551302743832142, "grad_norm": 1.5725646817979666, "learning_rate": 2.630948297195351e-07, "loss": 0.4618053436279297, "step": 6728 }, { "epoch": 1.5515333179617246, "grad_norm": 1.5786249232029208, "learning_rate": 2.6283715215775336e-07, "loss": 0.4342828094959259, "step": 6729 }, { "epoch": 1.5517638920913073, "grad_norm": 1.5592983853420144, "learning_rate": 2.625795817500626e-07, "loss": 0.5214434862136841, "step": 6730 }, { "epoch": 1.55199446622089, "grad_norm": 1.521395946192631, "learning_rate": 2.623221185339034e-07, "loss": 0.4873029589653015, "step": 6731 }, { "epoch": 1.5522250403504727, "grad_norm": 1.5014817933254478, "learning_rate": 2.6206476254670007e-07, "loss": 0.4510548412799835, "step": 6732 }, { "epoch": 1.5524556144800554, "grad_norm": 1.5931454307395074, "learning_rate": 2.6180751382586265e-07, "loss": 0.4832548499107361, "step": 6733 }, { "epoch": 1.5526861886096381, "grad_norm": 1.8273040799326088, "learning_rate": 2.6155037240878406e-07, "loss": 0.5438823699951172, "step": 6734 }, { "epoch": 1.5529167627392206, "grad_norm": 1.488758610712305, "learning_rate": 2.6129333833284315e-07, "loss": 0.4967566728591919, "step": 6735 }, { "epoch": 1.5531473368688034, "grad_norm": 1.419700158234616, "learning_rate": 2.610364116354018e-07, "loss": 0.5187437534332275, "step": 6736 }, { "epoch": 1.5533779109983858, "grad_norm": 1.3624978155475462, "learning_rate": 2.607795923538072e-07, "loss": 0.4199862480163574, "step": 6737 }, { "epoch": 1.5536084851279686, "grad_norm": 1.463828508781327, "learning_rate": 2.6052288052539084e-07, "loss": 0.5009325742721558, "step": 6738 }, { "epoch": 1.5538390592575513, "grad_norm": 1.5361155892650822, "learning_rate": 2.602662761874679e-07, "loss": 0.48698678612709045, "step": 6739 }, { "epoch": 1.554069633387134, "grad_norm": 1.4600353762817446, "learning_rate": 2.6000977937733905e-07, "loss": 0.4845883846282959, "step": 6740 }, { "epoch": 1.5543002075167167, "grad_norm": 1.6153802807658302, "learning_rate": 2.59753390132288e-07, "loss": 0.512161135673523, "step": 6741 }, { "epoch": 1.5545307816462994, "grad_norm": 1.756231295082545, "learning_rate": 2.5949710848958415e-07, "loss": 0.42334964871406555, "step": 6742 }, { "epoch": 1.554761355775882, "grad_norm": 1.2927501946290025, "learning_rate": 2.592409344864801e-07, "loss": 0.3781980276107788, "step": 6743 }, { "epoch": 1.5549919299054646, "grad_norm": 1.5363470406300028, "learning_rate": 2.5898486816021394e-07, "loss": 0.4989853501319885, "step": 6744 }, { "epoch": 1.5552225040350471, "grad_norm": 1.5873964925893267, "learning_rate": 2.5872890954800676e-07, "loss": 0.45715585350990295, "step": 6745 }, { "epoch": 1.5554530781646299, "grad_norm": 1.3499060893753405, "learning_rate": 2.5847305868706515e-07, "loss": 0.5025684833526611, "step": 6746 }, { "epoch": 1.5556836522942126, "grad_norm": 1.5290460697986008, "learning_rate": 2.5821731561457994e-07, "loss": 0.47298115491867065, "step": 6747 }, { "epoch": 1.5559142264237953, "grad_norm": 1.4250590830459762, "learning_rate": 2.5796168036772524e-07, "loss": 0.45412957668304443, "step": 6748 }, { "epoch": 1.556144800553378, "grad_norm": 1.6230149340497857, "learning_rate": 2.5770615298366107e-07, "loss": 0.3958669602870941, "step": 6749 }, { "epoch": 1.5563753746829607, "grad_norm": 1.4992477100706287, "learning_rate": 2.574507334995302e-07, "loss": 0.4748396873474121, "step": 6750 }, { "epoch": 1.5566059488125432, "grad_norm": 2.1473408883216534, "learning_rate": 2.5719542195246093e-07, "loss": 0.4741169810295105, "step": 6751 }, { "epoch": 1.556836522942126, "grad_norm": 1.5072269547692108, "learning_rate": 2.569402183795648e-07, "loss": 0.4362972378730774, "step": 6752 }, { "epoch": 1.5570670970717084, "grad_norm": 1.5695384848079892, "learning_rate": 2.5668512281793873e-07, "loss": 0.48013412952423096, "step": 6753 }, { "epoch": 1.5572976712012911, "grad_norm": 1.4514603270444408, "learning_rate": 2.564301353046634e-07, "loss": 0.4728567600250244, "step": 6754 }, { "epoch": 1.5575282453308739, "grad_norm": 1.7592773476195727, "learning_rate": 2.56175255876804e-07, "loss": 0.4304337501525879, "step": 6755 }, { "epoch": 1.5577588194604566, "grad_norm": 1.5275686028016913, "learning_rate": 2.5592048457140926e-07, "loss": 0.43467870354652405, "step": 6756 }, { "epoch": 1.5579893935900393, "grad_norm": 1.9596482130933712, "learning_rate": 2.556658214255134e-07, "loss": 0.3912844657897949, "step": 6757 }, { "epoch": 1.558219967719622, "grad_norm": 1.5284327791141838, "learning_rate": 2.5541126647613397e-07, "loss": 0.4462862014770508, "step": 6758 }, { "epoch": 1.5584505418492045, "grad_norm": 1.5847675751494867, "learning_rate": 2.551568197602729e-07, "loss": 0.43929487466812134, "step": 6759 }, { "epoch": 1.5586811159787872, "grad_norm": 1.5077581986013873, "learning_rate": 2.549024813149169e-07, "loss": 0.44473958015441895, "step": 6760 }, { "epoch": 1.5589116901083697, "grad_norm": 1.5536876763085832, "learning_rate": 2.546482511770365e-07, "loss": 0.5159727931022644, "step": 6761 }, { "epoch": 1.5591422642379524, "grad_norm": 1.7371461951042986, "learning_rate": 2.5439412938358696e-07, "loss": 0.3975204825401306, "step": 6762 }, { "epoch": 1.5593728383675352, "grad_norm": 1.493493619365051, "learning_rate": 2.54140115971507e-07, "loss": 0.5198286175727844, "step": 6763 }, { "epoch": 1.5596034124971179, "grad_norm": 1.4309109790386, "learning_rate": 2.5388621097772046e-07, "loss": 0.4815763831138611, "step": 6764 }, { "epoch": 1.5598339866267006, "grad_norm": 1.3803469238514527, "learning_rate": 2.5363241443913454e-07, "loss": 0.365215539932251, "step": 6765 }, { "epoch": 1.5600645607562833, "grad_norm": 1.6088793691676593, "learning_rate": 2.533787263926417e-07, "loss": 0.486020028591156, "step": 6766 }, { "epoch": 1.5602951348858658, "grad_norm": 1.5355383857513338, "learning_rate": 2.5312514687511766e-07, "loss": 0.38536715507507324, "step": 6767 }, { "epoch": 1.5605257090154485, "grad_norm": 1.649862765507334, "learning_rate": 2.528716759234227e-07, "loss": 0.44713371992111206, "step": 6768 }, { "epoch": 1.560756283145031, "grad_norm": 1.868794454538197, "learning_rate": 2.5261831357440154e-07, "loss": 0.4122806489467621, "step": 6769 }, { "epoch": 1.5609868572746137, "grad_norm": 1.6234940940069353, "learning_rate": 2.523650598648829e-07, "loss": 0.40514320135116577, "step": 6770 }, { "epoch": 1.5612174314041964, "grad_norm": 1.4417973525561176, "learning_rate": 2.5211191483168027e-07, "loss": 0.4273102283477783, "step": 6771 }, { "epoch": 1.5614480055337792, "grad_norm": 1.4229504510118502, "learning_rate": 2.5185887851159005e-07, "loss": 0.4774209260940552, "step": 6772 }, { "epoch": 1.5616785796633619, "grad_norm": 1.583645566960067, "learning_rate": 2.5160595094139436e-07, "loss": 0.3928600549697876, "step": 6773 }, { "epoch": 1.5619091537929446, "grad_norm": 1.6757793450729852, "learning_rate": 2.5135313215785816e-07, "loss": 0.4414944052696228, "step": 6774 }, { "epoch": 1.562139727922527, "grad_norm": 1.733143939427008, "learning_rate": 2.5110042219773176e-07, "loss": 0.36133646965026855, "step": 6775 }, { "epoch": 1.5623703020521098, "grad_norm": 1.8443586806925936, "learning_rate": 2.508478210977486e-07, "loss": 0.44824904203414917, "step": 6776 }, { "epoch": 1.5626008761816923, "grad_norm": 1.1693439456079453, "learning_rate": 2.5059532889462707e-07, "loss": 0.3699820637702942, "step": 6777 }, { "epoch": 1.562831450311275, "grad_norm": 1.9309547773144982, "learning_rate": 2.5034294562506976e-07, "loss": 0.4809808135032654, "step": 6778 }, { "epoch": 1.5630620244408577, "grad_norm": 1.7665230327633363, "learning_rate": 2.5009067132576256e-07, "loss": 0.487751841545105, "step": 6779 }, { "epoch": 1.5632925985704405, "grad_norm": 1.5839144124062823, "learning_rate": 2.4983850603337675e-07, "loss": 0.47932374477386475, "step": 6780 }, { "epoch": 1.5635231727000232, "grad_norm": 1.4782012523005248, "learning_rate": 2.495864497845663e-07, "loss": 0.42852234840393066, "step": 6781 }, { "epoch": 1.5637537468296059, "grad_norm": 1.4802387383863571, "learning_rate": 2.49334502615971e-07, "loss": 0.4392131567001343, "step": 6782 }, { "epoch": 1.5639843209591884, "grad_norm": 1.5042475261036963, "learning_rate": 2.4908266456421323e-07, "loss": 0.45050233602523804, "step": 6783 }, { "epoch": 1.5642148950887709, "grad_norm": 1.4962883173938244, "learning_rate": 2.488309356659004e-07, "loss": 0.45328110456466675, "step": 6784 }, { "epoch": 1.5644454692183536, "grad_norm": 1.451199382042834, "learning_rate": 2.4857931595762403e-07, "loss": 0.3851325511932373, "step": 6785 }, { "epoch": 1.5646760433479363, "grad_norm": 1.5269726027188475, "learning_rate": 2.4832780547595976e-07, "loss": 0.4096960127353668, "step": 6786 }, { "epoch": 1.564906617477519, "grad_norm": 1.4158017969205454, "learning_rate": 2.480764042574669e-07, "loss": 0.4439825117588043, "step": 6787 }, { "epoch": 1.5651371916071017, "grad_norm": 1.5084778231824414, "learning_rate": 2.4782511233868895e-07, "loss": 0.4259459972381592, "step": 6788 }, { "epoch": 1.5653677657366845, "grad_norm": 1.6383230301383533, "learning_rate": 2.475739297561542e-07, "loss": 0.4701216220855713, "step": 6789 }, { "epoch": 1.565598339866267, "grad_norm": 1.4707071600317903, "learning_rate": 2.473228565463742e-07, "loss": 0.4435737133026123, "step": 6790 }, { "epoch": 1.5658289139958497, "grad_norm": 1.4361527011832544, "learning_rate": 2.4707189274584537e-07, "loss": 0.4476662278175354, "step": 6791 }, { "epoch": 1.5660594881254322, "grad_norm": 1.8319243980176085, "learning_rate": 2.468210383910474e-07, "loss": 0.4399911165237427, "step": 6792 }, { "epoch": 1.5662900622550149, "grad_norm": 1.5617800363149925, "learning_rate": 2.465702935184446e-07, "loss": 0.4206039309501648, "step": 6793 }, { "epoch": 1.5665206363845976, "grad_norm": 1.5998109403316092, "learning_rate": 2.463196581644855e-07, "loss": 0.44936686754226685, "step": 6794 }, { "epoch": 1.5667512105141803, "grad_norm": 1.4750351364947134, "learning_rate": 2.4606913236560277e-07, "loss": 0.39926016330718994, "step": 6795 }, { "epoch": 1.566981784643763, "grad_norm": 1.607414705164721, "learning_rate": 2.4581871615821216e-07, "loss": 0.4338487982749939, "step": 6796 }, { "epoch": 1.5672123587733457, "grad_norm": 1.6693881073802184, "learning_rate": 2.455684095787148e-07, "loss": 0.5047430992126465, "step": 6797 }, { "epoch": 1.5674429329029282, "grad_norm": 1.623571142038879, "learning_rate": 2.4531821266349504e-07, "loss": 0.46082550287246704, "step": 6798 }, { "epoch": 1.567673507032511, "grad_norm": 1.5687485332342288, "learning_rate": 2.450681254489214e-07, "loss": 0.44586509466171265, "step": 6799 }, { "epoch": 1.5679040811620935, "grad_norm": 1.6011741376497353, "learning_rate": 2.4481814797134657e-07, "loss": 0.5167746543884277, "step": 6800 }, { "epoch": 1.5681346552916762, "grad_norm": 1.4074512111564024, "learning_rate": 2.4456828026710753e-07, "loss": 0.44062116742134094, "step": 6801 }, { "epoch": 1.5683652294212589, "grad_norm": 1.718295945554571, "learning_rate": 2.4431852237252524e-07, "loss": 0.5096040368080139, "step": 6802 }, { "epoch": 1.5685958035508416, "grad_norm": 1.3369851313651875, "learning_rate": 2.440688743239042e-07, "loss": 0.44234153628349304, "step": 6803 }, { "epoch": 1.5688263776804243, "grad_norm": 1.7878168925295264, "learning_rate": 2.4381933615753357e-07, "loss": 0.431011825799942, "step": 6804 }, { "epoch": 1.569056951810007, "grad_norm": 1.5221569168970472, "learning_rate": 2.435699079096858e-07, "loss": 0.4903266131877899, "step": 6805 }, { "epoch": 1.5692875259395895, "grad_norm": 1.4830626229942445, "learning_rate": 2.433205896166185e-07, "loss": 0.4698626399040222, "step": 6806 }, { "epoch": 1.5695181000691723, "grad_norm": 1.7678576287420633, "learning_rate": 2.4307138131457184e-07, "loss": 0.37576574087142944, "step": 6807 }, { "epoch": 1.5697486741987547, "grad_norm": 1.442601981615427, "learning_rate": 2.4282228303977113e-07, "loss": 0.47068172693252563, "step": 6808 }, { "epoch": 1.5699792483283375, "grad_norm": 1.5121414961596256, "learning_rate": 2.425732948284257e-07, "loss": 0.45246315002441406, "step": 6809 }, { "epoch": 1.5702098224579202, "grad_norm": 1.670746435704044, "learning_rate": 2.423244167167278e-07, "loss": 0.4746376574039459, "step": 6810 }, { "epoch": 1.570440396587503, "grad_norm": 1.6491072802367082, "learning_rate": 2.420756487408551e-07, "loss": 0.413469135761261, "step": 6811 }, { "epoch": 1.5706709707170856, "grad_norm": 1.4392614299059656, "learning_rate": 2.418269909369678e-07, "loss": 0.3567890226840973, "step": 6812 }, { "epoch": 1.5709015448466683, "grad_norm": 1.9034789277869502, "learning_rate": 2.415784433412116e-07, "loss": 0.4676034450531006, "step": 6813 }, { "epoch": 1.5711321189762508, "grad_norm": 1.5100461636177536, "learning_rate": 2.4133000598971477e-07, "loss": 0.429337739944458, "step": 6814 }, { "epoch": 1.5713626931058335, "grad_norm": 1.657098818036463, "learning_rate": 2.4108167891859065e-07, "loss": 0.35861289501190186, "step": 6815 }, { "epoch": 1.571593267235416, "grad_norm": 1.7985300174152374, "learning_rate": 2.4083346216393564e-07, "loss": 0.43728363513946533, "step": 6816 }, { "epoch": 1.5718238413649988, "grad_norm": 1.6655671112295587, "learning_rate": 2.405853557618308e-07, "loss": 0.44594380259513855, "step": 6817 }, { "epoch": 1.5720544154945815, "grad_norm": 1.430621764890317, "learning_rate": 2.403373597483414e-07, "loss": 0.36871337890625, "step": 6818 }, { "epoch": 1.5722849896241642, "grad_norm": 1.4284927159530842, "learning_rate": 2.400894741595152e-07, "loss": 0.3769477307796478, "step": 6819 }, { "epoch": 1.572515563753747, "grad_norm": 1.6803573488891066, "learning_rate": 2.3984169903138583e-07, "loss": 0.503145694732666, "step": 6820 }, { "epoch": 1.5727461378833296, "grad_norm": 1.552866324250783, "learning_rate": 2.395940343999691e-07, "loss": 0.4082655906677246, "step": 6821 }, { "epoch": 1.5729767120129121, "grad_norm": 1.4215190376699491, "learning_rate": 2.3934648030126625e-07, "loss": 0.4106418192386627, "step": 6822 }, { "epoch": 1.5732072861424948, "grad_norm": 1.663561714777188, "learning_rate": 2.390990367712613e-07, "loss": 0.45363783836364746, "step": 6823 }, { "epoch": 1.5734378602720773, "grad_norm": 1.4253235303875884, "learning_rate": 2.388517038459227e-07, "loss": 0.4416825473308563, "step": 6824 }, { "epoch": 1.57366843440166, "grad_norm": 1.5727508875619094, "learning_rate": 2.3860448156120304e-07, "loss": 0.5106863379478455, "step": 6825 }, { "epoch": 1.5738990085312428, "grad_norm": 1.431151413456896, "learning_rate": 2.3835736995303879e-07, "loss": 0.4618466794490814, "step": 6826 }, { "epoch": 1.5741295826608255, "grad_norm": 1.6611294255159201, "learning_rate": 2.381103690573495e-07, "loss": 0.414678692817688, "step": 6827 }, { "epoch": 1.5743601567904082, "grad_norm": 1.3583782134926532, "learning_rate": 2.3786347891004e-07, "loss": 0.39774662256240845, "step": 6828 }, { "epoch": 1.574590730919991, "grad_norm": 1.3689702631653482, "learning_rate": 2.376166995469977e-07, "loss": 0.4513537287712097, "step": 6829 }, { "epoch": 1.5748213050495734, "grad_norm": 1.5433747348092586, "learning_rate": 2.3737003100409447e-07, "loss": 0.44062697887420654, "step": 6830 }, { "epoch": 1.5750518791791561, "grad_norm": 1.6549219639884087, "learning_rate": 2.3712347331718617e-07, "loss": 0.42305582761764526, "step": 6831 }, { "epoch": 1.5752824533087386, "grad_norm": 1.628456252942963, "learning_rate": 2.3687702652211262e-07, "loss": 0.46731626987457275, "step": 6832 }, { "epoch": 1.5755130274383213, "grad_norm": 1.569042371408869, "learning_rate": 2.3663069065469753e-07, "loss": 0.4926149845123291, "step": 6833 }, { "epoch": 1.575743601567904, "grad_norm": 1.8433451746214373, "learning_rate": 2.3638446575074777e-07, "loss": 0.49002933502197266, "step": 6834 }, { "epoch": 1.5759741756974868, "grad_norm": 1.9286763636552064, "learning_rate": 2.3613835184605523e-07, "loss": 0.47110694646835327, "step": 6835 }, { "epoch": 1.5762047498270695, "grad_norm": 1.7003781450027053, "learning_rate": 2.3589234897639444e-07, "loss": 0.4257816672325134, "step": 6836 }, { "epoch": 1.5764353239566522, "grad_norm": 1.4515610553726317, "learning_rate": 2.3564645717752506e-07, "loss": 0.4031051695346832, "step": 6837 }, { "epoch": 1.5766658980862347, "grad_norm": 1.7208107126331553, "learning_rate": 2.3540067648518957e-07, "loss": 0.5077808499336243, "step": 6838 }, { "epoch": 1.5768964722158174, "grad_norm": 1.4184547433402042, "learning_rate": 2.3515500693511449e-07, "loss": 0.3877585232257843, "step": 6839 }, { "epoch": 1.5771270463454, "grad_norm": 1.6806127701824354, "learning_rate": 2.3490944856301064e-07, "loss": 0.4356805682182312, "step": 6840 }, { "epoch": 1.5773576204749826, "grad_norm": 1.5102184976880006, "learning_rate": 2.346640014045723e-07, "loss": 0.46679362654685974, "step": 6841 }, { "epoch": 1.5775881946045653, "grad_norm": 1.4361079018846885, "learning_rate": 2.3441866549547817e-07, "loss": 0.4837648272514343, "step": 6842 }, { "epoch": 1.577818768734148, "grad_norm": 1.5395603940472438, "learning_rate": 2.341734408713897e-07, "loss": 0.42723533511161804, "step": 6843 }, { "epoch": 1.5780493428637308, "grad_norm": 1.7296429757269751, "learning_rate": 2.3392832756795322e-07, "loss": 0.3680928647518158, "step": 6844 }, { "epoch": 1.5782799169933135, "grad_norm": 1.3398871717628533, "learning_rate": 2.3368332562079797e-07, "loss": 0.434980571269989, "step": 6845 }, { "epoch": 1.578510491122896, "grad_norm": 1.5976407072584213, "learning_rate": 2.3343843506553805e-07, "loss": 0.45552271604537964, "step": 6846 }, { "epoch": 1.5787410652524787, "grad_norm": 1.5496903398620734, "learning_rate": 2.331936559377702e-07, "loss": 0.4292616844177246, "step": 6847 }, { "epoch": 1.5789716393820612, "grad_norm": 1.6907239258434268, "learning_rate": 2.3294898827307573e-07, "loss": 0.5025339126586914, "step": 6848 }, { "epoch": 1.579202213511644, "grad_norm": 1.434142265629081, "learning_rate": 2.3270443210701996e-07, "loss": 0.47567370533943176, "step": 6849 }, { "epoch": 1.5794327876412266, "grad_norm": 1.9792768486961878, "learning_rate": 2.3245998747515095e-07, "loss": 0.5435467958450317, "step": 6850 }, { "epoch": 1.5796633617708093, "grad_norm": 1.2141081677893035, "learning_rate": 2.3221565441300194e-07, "loss": 0.4409145712852478, "step": 6851 }, { "epoch": 1.579893935900392, "grad_norm": 1.3643265195449554, "learning_rate": 2.3197143295608845e-07, "loss": 0.40482181310653687, "step": 6852 }, { "epoch": 1.5801245100299748, "grad_norm": 1.8983898955785605, "learning_rate": 2.317273231399113e-07, "loss": 0.40231794118881226, "step": 6853 }, { "epoch": 1.5803550841595573, "grad_norm": 1.3860542767537625, "learning_rate": 2.314833249999535e-07, "loss": 0.43245166540145874, "step": 6854 }, { "epoch": 1.58058565828914, "grad_norm": 1.5386782332278715, "learning_rate": 2.3123943857168315e-07, "loss": 0.40237659215927124, "step": 6855 }, { "epoch": 1.5808162324187225, "grad_norm": 1.7869361833965254, "learning_rate": 2.309956638905517e-07, "loss": 0.48900318145751953, "step": 6856 }, { "epoch": 1.5810468065483052, "grad_norm": 1.482622476685355, "learning_rate": 2.3075200099199422e-07, "loss": 0.42364567518234253, "step": 6857 }, { "epoch": 1.581277380677888, "grad_norm": 1.6159587255295897, "learning_rate": 2.3050844991142958e-07, "loss": 0.4658735990524292, "step": 6858 }, { "epoch": 1.5815079548074706, "grad_norm": 1.4775627716781476, "learning_rate": 2.3026501068426007e-07, "loss": 0.42268991470336914, "step": 6859 }, { "epoch": 1.5817385289370534, "grad_norm": 1.4348002511722773, "learning_rate": 2.3002168334587247e-07, "loss": 0.44876742362976074, "step": 6860 }, { "epoch": 1.581969103066636, "grad_norm": 1.5171591869453156, "learning_rate": 2.2977846793163646e-07, "loss": 0.42540132999420166, "step": 6861 }, { "epoch": 1.5821996771962186, "grad_norm": 1.4296859038074168, "learning_rate": 2.2953536447690636e-07, "loss": 0.48768138885498047, "step": 6862 }, { "epoch": 1.5824302513258013, "grad_norm": 1.5445046236967466, "learning_rate": 2.292923730170192e-07, "loss": 0.42905953526496887, "step": 6863 }, { "epoch": 1.5826608254553838, "grad_norm": 1.4472242985886439, "learning_rate": 2.2904949358729653e-07, "loss": 0.4103778004646301, "step": 6864 }, { "epoch": 1.5828913995849665, "grad_norm": 1.5180272333652802, "learning_rate": 2.2880672622304331e-07, "loss": 0.39303290843963623, "step": 6865 }, { "epoch": 1.5831219737145492, "grad_norm": 1.4702183686842207, "learning_rate": 2.2856407095954843e-07, "loss": 0.5087130069732666, "step": 6866 }, { "epoch": 1.583352547844132, "grad_norm": 1.5644640444387603, "learning_rate": 2.283215278320839e-07, "loss": 0.33117055892944336, "step": 6867 }, { "epoch": 1.5835831219737146, "grad_norm": 1.7090383225203818, "learning_rate": 2.280790968759063e-07, "loss": 0.41781488060951233, "step": 6868 }, { "epoch": 1.5838136961032974, "grad_norm": 1.4121975925065597, "learning_rate": 2.2783677812625523e-07, "loss": 0.5104382634162903, "step": 6869 }, { "epoch": 1.5840442702328799, "grad_norm": 1.5723614045021508, "learning_rate": 2.2759457161835372e-07, "loss": 0.3987969160079956, "step": 6870 }, { "epoch": 1.5842748443624626, "grad_norm": 1.705658009146651, "learning_rate": 2.2735247738740936e-07, "loss": 0.4723064601421356, "step": 6871 }, { "epoch": 1.584505418492045, "grad_norm": 1.707721278006975, "learning_rate": 2.2711049546861293e-07, "loss": 0.3942141830921173, "step": 6872 }, { "epoch": 1.5847359926216278, "grad_norm": 1.5657011191058785, "learning_rate": 2.268686258971393e-07, "loss": 0.38271787762641907, "step": 6873 }, { "epoch": 1.5849665667512105, "grad_norm": 1.3977071321322045, "learning_rate": 2.2662686870814607e-07, "loss": 0.4944665729999542, "step": 6874 }, { "epoch": 1.5851971408807932, "grad_norm": 1.7910306093530013, "learning_rate": 2.2638522393677562e-07, "loss": 0.46695005893707275, "step": 6875 }, { "epoch": 1.585427715010376, "grad_norm": 1.7074115790208728, "learning_rate": 2.2614369161815295e-07, "loss": 0.4620080888271332, "step": 6876 }, { "epoch": 1.5856582891399587, "grad_norm": 1.6877087434684872, "learning_rate": 2.2590227178738776e-07, "loss": 0.5650279521942139, "step": 6877 }, { "epoch": 1.5858888632695411, "grad_norm": 1.3471081039016284, "learning_rate": 2.2566096447957227e-07, "loss": 0.3556622564792633, "step": 6878 }, { "epoch": 1.5861194373991239, "grad_norm": 1.3889188451731431, "learning_rate": 2.254197697297834e-07, "loss": 0.4978718161582947, "step": 6879 }, { "epoch": 1.5863500115287064, "grad_norm": 1.375490517958548, "learning_rate": 2.2517868757308146e-07, "loss": 0.4759003520011902, "step": 6880 }, { "epoch": 1.586580585658289, "grad_norm": 1.579013983466932, "learning_rate": 2.2493771804450945e-07, "loss": 0.5078370571136475, "step": 6881 }, { "epoch": 1.5868111597878718, "grad_norm": 1.3607586792133322, "learning_rate": 2.2469686117909547e-07, "loss": 0.4188239276409149, "step": 6882 }, { "epoch": 1.5870417339174545, "grad_norm": 1.3488510335317552, "learning_rate": 2.2445611701184997e-07, "loss": 0.4075232744216919, "step": 6883 }, { "epoch": 1.5872723080470372, "grad_norm": 1.5004910712339554, "learning_rate": 2.2421548557776794e-07, "loss": 0.3643442988395691, "step": 6884 }, { "epoch": 1.58750288217662, "grad_norm": 1.4193604715362476, "learning_rate": 2.2397496691182716e-07, "loss": 0.38767147064208984, "step": 6885 }, { "epoch": 1.5877334563062024, "grad_norm": 1.6373352976605955, "learning_rate": 2.2373456104899e-07, "loss": 0.4874354600906372, "step": 6886 }, { "epoch": 1.5879640304357852, "grad_norm": 1.5573200679287742, "learning_rate": 2.2349426802420134e-07, "loss": 0.46412762999534607, "step": 6887 }, { "epoch": 1.5881946045653677, "grad_norm": 1.3720639419051985, "learning_rate": 2.2325408787239054e-07, "loss": 0.4299372434616089, "step": 6888 }, { "epoch": 1.5884251786949504, "grad_norm": 1.6309152140238423, "learning_rate": 2.230140206284703e-07, "loss": 0.3962220549583435, "step": 6889 }, { "epoch": 1.588655752824533, "grad_norm": 1.617512400235996, "learning_rate": 2.2277406632733653e-07, "loss": 0.5048998594284058, "step": 6890 }, { "epoch": 1.5888863269541158, "grad_norm": 2.0443646004817024, "learning_rate": 2.2253422500386932e-07, "loss": 0.35463857650756836, "step": 6891 }, { "epoch": 1.5891169010836985, "grad_norm": 1.5696832175175914, "learning_rate": 2.2229449669293165e-07, "loss": 0.3969672620296478, "step": 6892 }, { "epoch": 1.5893474752132812, "grad_norm": 1.5166803382402412, "learning_rate": 2.22054881429371e-07, "loss": 0.36300575733184814, "step": 6893 }, { "epoch": 1.5895780493428637, "grad_norm": 1.41057555150973, "learning_rate": 2.2181537924801729e-07, "loss": 0.45796507596969604, "step": 6894 }, { "epoch": 1.5898086234724462, "grad_norm": 1.556089643432737, "learning_rate": 2.2157599018368488e-07, "loss": 0.42725688219070435, "step": 6895 }, { "epoch": 1.590039197602029, "grad_norm": 1.8436048050065164, "learning_rate": 2.213367142711714e-07, "loss": 0.4959419369697571, "step": 6896 }, { "epoch": 1.5902697717316117, "grad_norm": 1.6607109480306586, "learning_rate": 2.2109755154525821e-07, "loss": 0.3707115948200226, "step": 6897 }, { "epoch": 1.5905003458611944, "grad_norm": 1.4025605906760028, "learning_rate": 2.2085850204070989e-07, "loss": 0.3647577166557312, "step": 6898 }, { "epoch": 1.590730919990777, "grad_norm": 1.505368584241417, "learning_rate": 2.2061956579227447e-07, "loss": 0.42227697372436523, "step": 6899 }, { "epoch": 1.5909614941203598, "grad_norm": 1.508703122498175, "learning_rate": 2.2038074283468412e-07, "loss": 0.41736292839050293, "step": 6900 }, { "epoch": 1.5911920682499423, "grad_norm": 1.6418039973045746, "learning_rate": 2.201420332026538e-07, "loss": 0.46005967259407043, "step": 6901 }, { "epoch": 1.591422642379525, "grad_norm": 1.4328523009517202, "learning_rate": 2.1990343693088243e-07, "loss": 0.3572643995285034, "step": 6902 }, { "epoch": 1.5916532165091075, "grad_norm": 1.744760153255399, "learning_rate": 2.196649540540527e-07, "loss": 0.5321012735366821, "step": 6903 }, { "epoch": 1.5918837906386902, "grad_norm": 1.5415731453823578, "learning_rate": 2.194265846068305e-07, "loss": 0.4913836419582367, "step": 6904 }, { "epoch": 1.592114364768273, "grad_norm": 1.7016363411577065, "learning_rate": 2.1918832862386493e-07, "loss": 0.37674903869628906, "step": 6905 }, { "epoch": 1.5923449388978557, "grad_norm": 1.5772289300833298, "learning_rate": 2.1895018613978934e-07, "loss": 0.4385930001735687, "step": 6906 }, { "epoch": 1.5925755130274384, "grad_norm": 2.224743671968565, "learning_rate": 2.1871215718921964e-07, "loss": 0.5219674706459045, "step": 6907 }, { "epoch": 1.592806087157021, "grad_norm": 1.5215408344839954, "learning_rate": 2.1847424180675622e-07, "loss": 0.4241113066673279, "step": 6908 }, { "epoch": 1.5930366612866036, "grad_norm": 1.4296843598144484, "learning_rate": 2.1823644002698237e-07, "loss": 0.4008786082267761, "step": 6909 }, { "epoch": 1.5932672354161863, "grad_norm": 1.5021365471039205, "learning_rate": 2.179987518844645e-07, "loss": 0.3333933651447296, "step": 6910 }, { "epoch": 1.5934978095457688, "grad_norm": 1.652596855301234, "learning_rate": 2.1776117741375343e-07, "loss": 0.48857730627059937, "step": 6911 }, { "epoch": 1.5937283836753515, "grad_norm": 1.4724322236306013, "learning_rate": 2.1752371664938306e-07, "loss": 0.37393617630004883, "step": 6912 }, { "epoch": 1.5939589578049342, "grad_norm": 1.4102085657254086, "learning_rate": 2.172863696258709e-07, "loss": 0.5365080833435059, "step": 6913 }, { "epoch": 1.594189531934517, "grad_norm": 1.7683912421422305, "learning_rate": 2.1704913637771705e-07, "loss": 0.49318936467170715, "step": 6914 }, { "epoch": 1.5944201060640997, "grad_norm": 1.8200372673393599, "learning_rate": 2.1681201693940666e-07, "loss": 0.37682920694351196, "step": 6915 }, { "epoch": 1.5946506801936824, "grad_norm": 1.4120260343966702, "learning_rate": 2.1657501134540657e-07, "loss": 0.4894877076148987, "step": 6916 }, { "epoch": 1.5948812543232649, "grad_norm": 1.5895963005275906, "learning_rate": 2.1633811963016869e-07, "loss": 0.4200783967971802, "step": 6917 }, { "epoch": 1.5951118284528476, "grad_norm": 1.7361608161591027, "learning_rate": 2.1610134182812702e-07, "loss": 0.3953052759170532, "step": 6918 }, { "epoch": 1.59534240258243, "grad_norm": 1.4727518091374385, "learning_rate": 2.158646779736999e-07, "loss": 0.4006558656692505, "step": 6919 }, { "epoch": 1.5955729767120128, "grad_norm": 1.7355475804217702, "learning_rate": 2.1562812810128906e-07, "loss": 0.3749210238456726, "step": 6920 }, { "epoch": 1.5958035508415955, "grad_norm": 1.5378158592599445, "learning_rate": 2.1539169224527887e-07, "loss": 0.4688538610935211, "step": 6921 }, { "epoch": 1.5960341249711782, "grad_norm": 1.590308500795848, "learning_rate": 2.151553704400383e-07, "loss": 0.4483727216720581, "step": 6922 }, { "epoch": 1.596264699100761, "grad_norm": 1.589431373760787, "learning_rate": 2.149191627199185e-07, "loss": 0.5118253827095032, "step": 6923 }, { "epoch": 1.5964952732303437, "grad_norm": 1.644731800905039, "learning_rate": 2.1468306911925525e-07, "loss": 0.43641170859336853, "step": 6924 }, { "epoch": 1.5967258473599262, "grad_norm": 1.4755114053374785, "learning_rate": 2.1444708967236657e-07, "loss": 0.38253384828567505, "step": 6925 }, { "epoch": 1.596956421489509, "grad_norm": 1.5638213373412855, "learning_rate": 2.1421122441355476e-07, "loss": 0.43674635887145996, "step": 6926 }, { "epoch": 1.5971869956190914, "grad_norm": 1.3940207891491625, "learning_rate": 2.1397547337710519e-07, "loss": 0.37392908334732056, "step": 6927 }, { "epoch": 1.597417569748674, "grad_norm": 1.5097907813025324, "learning_rate": 2.13739836597287e-07, "loss": 0.4531250298023224, "step": 6928 }, { "epoch": 1.5976481438782568, "grad_norm": 1.3308296891253455, "learning_rate": 2.13504314108352e-07, "loss": 0.38579899072647095, "step": 6929 }, { "epoch": 1.5978787180078395, "grad_norm": 1.8618083111554995, "learning_rate": 2.1326890594453563e-07, "loss": 0.5215288400650024, "step": 6930 }, { "epoch": 1.5981092921374223, "grad_norm": 1.6019249166669218, "learning_rate": 2.130336121400572e-07, "loss": 0.4396743178367615, "step": 6931 }, { "epoch": 1.598339866267005, "grad_norm": 1.5371889029106374, "learning_rate": 2.127984327291188e-07, "loss": 0.5068432688713074, "step": 6932 }, { "epoch": 1.5985704403965875, "grad_norm": 1.7855756215277538, "learning_rate": 2.1256336774590643e-07, "loss": 0.48809194564819336, "step": 6933 }, { "epoch": 1.5988010145261702, "grad_norm": 1.4166815561679078, "learning_rate": 2.123284172245885e-07, "loss": 0.4191613793373108, "step": 6934 }, { "epoch": 1.5990315886557527, "grad_norm": 1.5763678308245206, "learning_rate": 2.1209358119931843e-07, "loss": 0.41901010274887085, "step": 6935 }, { "epoch": 1.5992621627853354, "grad_norm": 1.8296822391624505, "learning_rate": 2.1185885970423133e-07, "loss": 0.5046913623809814, "step": 6936 }, { "epoch": 1.5994927369149181, "grad_norm": 2.1559492699976492, "learning_rate": 2.1162425277344675e-07, "loss": 0.5113730430603027, "step": 6937 }, { "epoch": 1.5997233110445008, "grad_norm": 1.520077424866564, "learning_rate": 2.1138976044106672e-07, "loss": 0.34129637479782104, "step": 6938 }, { "epoch": 1.5999538851740835, "grad_norm": 1.5890047902961466, "learning_rate": 2.1115538274117762e-07, "loss": 0.4492289423942566, "step": 6939 }, { "epoch": 1.6001844593036663, "grad_norm": 1.5532375131614289, "learning_rate": 2.1092111970784833e-07, "loss": 0.41002708673477173, "step": 6940 }, { "epoch": 1.6004150334332488, "grad_norm": 1.887817008406582, "learning_rate": 2.1068697137513113e-07, "loss": 0.5444740056991577, "step": 6941 }, { "epoch": 1.6006456075628315, "grad_norm": 1.518981510824895, "learning_rate": 2.1045293777706196e-07, "loss": 0.3489699959754944, "step": 6942 }, { "epoch": 1.600876181692414, "grad_norm": 1.5115486172446684, "learning_rate": 2.1021901894766025e-07, "loss": 0.41807419061660767, "step": 6943 }, { "epoch": 1.6011067558219967, "grad_norm": 1.7376028221450257, "learning_rate": 2.0998521492092857e-07, "loss": 0.41074657440185547, "step": 6944 }, { "epoch": 1.6013373299515794, "grad_norm": 1.370751011576157, "learning_rate": 2.097515257308521e-07, "loss": 0.4085312485694885, "step": 6945 }, { "epoch": 1.6015679040811621, "grad_norm": 1.6632563260665783, "learning_rate": 2.095179514114006e-07, "loss": 0.42699170112609863, "step": 6946 }, { "epoch": 1.6017984782107448, "grad_norm": 1.6347540938108835, "learning_rate": 2.0928449199652597e-07, "loss": 0.40041583776474, "step": 6947 }, { "epoch": 1.6020290523403276, "grad_norm": 1.385214375087801, "learning_rate": 2.090511475201643e-07, "loss": 0.47465208172798157, "step": 6948 }, { "epoch": 1.60225962646991, "grad_norm": 1.5233208405026366, "learning_rate": 2.0881791801623405e-07, "loss": 0.4338058829307556, "step": 6949 }, { "epoch": 1.6024902005994928, "grad_norm": 1.857588116409586, "learning_rate": 2.0858480351863794e-07, "loss": 0.5398772954940796, "step": 6950 }, { "epoch": 1.6027207747290753, "grad_norm": 1.41461865858101, "learning_rate": 2.0835180406126151e-07, "loss": 0.40750259160995483, "step": 6951 }, { "epoch": 1.602951348858658, "grad_norm": 1.6330208123854022, "learning_rate": 2.0811891967797336e-07, "loss": 0.4365716278553009, "step": 6952 }, { "epoch": 1.6031819229882407, "grad_norm": 1.395812913626374, "learning_rate": 2.078861504026258e-07, "loss": 0.41537174582481384, "step": 6953 }, { "epoch": 1.6034124971178234, "grad_norm": 1.331855885968294, "learning_rate": 2.0765349626905394e-07, "loss": 0.3687853217124939, "step": 6954 }, { "epoch": 1.6036430712474061, "grad_norm": 1.4291699726024594, "learning_rate": 2.074209573110769e-07, "loss": 0.48866790533065796, "step": 6955 }, { "epoch": 1.6038736453769888, "grad_norm": 1.7541297686576787, "learning_rate": 2.0718853356249588e-07, "loss": 0.4618760347366333, "step": 6956 }, { "epoch": 1.6041042195065713, "grad_norm": 1.820272898606224, "learning_rate": 2.0695622505709654e-07, "loss": 0.365873247385025, "step": 6957 }, { "epoch": 1.604334793636154, "grad_norm": 1.7127779412462347, "learning_rate": 2.0672403182864706e-07, "loss": 0.4346495270729065, "step": 6958 }, { "epoch": 1.6045653677657365, "grad_norm": 1.4385774019168192, "learning_rate": 2.0649195391089935e-07, "loss": 0.3995724618434906, "step": 6959 }, { "epoch": 1.6047959418953193, "grad_norm": 1.890499669463449, "learning_rate": 2.062599913375882e-07, "loss": 0.4628515839576721, "step": 6960 }, { "epoch": 1.605026516024902, "grad_norm": 1.8491035226730044, "learning_rate": 2.060281441424314e-07, "loss": 0.39776262640953064, "step": 6961 }, { "epoch": 1.6052570901544847, "grad_norm": 1.6838333142700899, "learning_rate": 2.057964123591307e-07, "loss": 0.4622994065284729, "step": 6962 }, { "epoch": 1.6054876642840674, "grad_norm": 1.3806987670969462, "learning_rate": 2.0556479602137033e-07, "loss": 0.4028933048248291, "step": 6963 }, { "epoch": 1.6057182384136501, "grad_norm": 1.592137730506949, "learning_rate": 2.0533329516281838e-07, "loss": 0.46639660000801086, "step": 6964 }, { "epoch": 1.6059488125432326, "grad_norm": 1.3243378898371028, "learning_rate": 2.0510190981712537e-07, "loss": 0.4063863158226013, "step": 6965 }, { "epoch": 1.6061793866728153, "grad_norm": 1.6927530193908227, "learning_rate": 2.0487064001792586e-07, "loss": 0.471376895904541, "step": 6966 }, { "epoch": 1.6064099608023978, "grad_norm": 1.5262354616100662, "learning_rate": 2.0463948579883727e-07, "loss": 0.5094102025032043, "step": 6967 }, { "epoch": 1.6066405349319806, "grad_norm": 1.613731344454896, "learning_rate": 2.0440844719346039e-07, "loss": 0.3922441005706787, "step": 6968 }, { "epoch": 1.6068711090615633, "grad_norm": 1.7524315605420397, "learning_rate": 2.0417752423537882e-07, "loss": 0.47777149081230164, "step": 6969 }, { "epoch": 1.607101683191146, "grad_norm": 2.2487851564601065, "learning_rate": 2.0394671695815924e-07, "loss": 0.5780138969421387, "step": 6970 }, { "epoch": 1.6073322573207287, "grad_norm": 1.6028588432287403, "learning_rate": 2.0371602539535237e-07, "loss": 0.43968862295150757, "step": 6971 }, { "epoch": 1.6075628314503114, "grad_norm": 1.877374036184133, "learning_rate": 2.0348544958049096e-07, "loss": 0.5204722881317139, "step": 6972 }, { "epoch": 1.607793405579894, "grad_norm": 1.5207193577135807, "learning_rate": 2.0325498954709198e-07, "loss": 0.3944805860519409, "step": 6973 }, { "epoch": 1.6080239797094766, "grad_norm": 1.454235622222141, "learning_rate": 2.0302464532865505e-07, "loss": 0.42686349153518677, "step": 6974 }, { "epoch": 1.6082545538390591, "grad_norm": 1.5958289830519565, "learning_rate": 2.027944169586633e-07, "loss": 0.3860762119293213, "step": 6975 }, { "epoch": 1.6084851279686418, "grad_norm": 1.880005605643703, "learning_rate": 2.0256430447058215e-07, "loss": 0.5570458769798279, "step": 6976 }, { "epoch": 1.6087157020982246, "grad_norm": 1.8351241687154358, "learning_rate": 2.0233430789786132e-07, "loss": 0.4556728005409241, "step": 6977 }, { "epoch": 1.6089462762278073, "grad_norm": 1.4746534507162423, "learning_rate": 2.0210442727393285e-07, "loss": 0.48365700244903564, "step": 6978 }, { "epoch": 1.60917685035739, "grad_norm": 1.7835628524046172, "learning_rate": 2.018746626322124e-07, "loss": 0.4456971287727356, "step": 6979 }, { "epoch": 1.6094074244869727, "grad_norm": 1.6700237073697568, "learning_rate": 2.0164501400609835e-07, "loss": 0.41877123713493347, "step": 6980 }, { "epoch": 1.6096379986165552, "grad_norm": 1.3803715462197303, "learning_rate": 2.0141548142897246e-07, "loss": 0.4073547124862671, "step": 6981 }, { "epoch": 1.609868572746138, "grad_norm": 1.5181775501419725, "learning_rate": 2.0118606493420021e-07, "loss": 0.4987693727016449, "step": 6982 }, { "epoch": 1.6100991468757204, "grad_norm": 1.603543806365415, "learning_rate": 2.0095676455512878e-07, "loss": 0.4391751289367676, "step": 6983 }, { "epoch": 1.6103297210053031, "grad_norm": 1.4062982467603231, "learning_rate": 2.0072758032508996e-07, "loss": 0.409262478351593, "step": 6984 }, { "epoch": 1.6105602951348859, "grad_norm": 1.353394057864669, "learning_rate": 2.0049851227739744e-07, "loss": 0.38653457164764404, "step": 6985 }, { "epoch": 1.6107908692644686, "grad_norm": 1.9189325963312815, "learning_rate": 2.0026956044534914e-07, "loss": 0.4824348986148834, "step": 6986 }, { "epoch": 1.6110214433940513, "grad_norm": 1.7037748706735498, "learning_rate": 2.00040724862225e-07, "loss": 0.45774850249290466, "step": 6987 }, { "epoch": 1.611252017523634, "grad_norm": 1.5419477618151842, "learning_rate": 1.9981200556128906e-07, "loss": 0.45437830686569214, "step": 6988 }, { "epoch": 1.6114825916532165, "grad_norm": 1.4581568342693196, "learning_rate": 1.9958340257578753e-07, "loss": 0.4563155770301819, "step": 6989 }, { "epoch": 1.6117131657827992, "grad_norm": 1.7363246075229848, "learning_rate": 1.9935491593895048e-07, "loss": 0.5786794424057007, "step": 6990 }, { "epoch": 1.6119437399123817, "grad_norm": 1.6120161181322603, "learning_rate": 1.991265456839909e-07, "loss": 0.5290218591690063, "step": 6991 }, { "epoch": 1.6121743140419644, "grad_norm": 1.607774677113548, "learning_rate": 1.9889829184410434e-07, "loss": 0.3456650376319885, "step": 6992 }, { "epoch": 1.6124048881715471, "grad_norm": 1.414142582496391, "learning_rate": 1.9867015445247015e-07, "loss": 0.40869832038879395, "step": 6993 }, { "epoch": 1.6126354623011299, "grad_norm": 2.3563881452147992, "learning_rate": 1.9844213354225004e-07, "loss": 0.49926644563674927, "step": 6994 }, { "epoch": 1.6128660364307126, "grad_norm": 1.904270429684393, "learning_rate": 1.9821422914658957e-07, "loss": 0.4874018132686615, "step": 6995 }, { "epoch": 1.6130966105602953, "grad_norm": 1.872252891476363, "learning_rate": 1.9798644129861654e-07, "loss": 0.4228810667991638, "step": 6996 }, { "epoch": 1.6133271846898778, "grad_norm": 1.4437194678200662, "learning_rate": 1.9775877003144237e-07, "loss": 0.4309043884277344, "step": 6997 }, { "epoch": 1.6135577588194605, "grad_norm": 1.6133739556944033, "learning_rate": 1.9753121537816142e-07, "loss": 0.3917756676673889, "step": 6998 }, { "epoch": 1.613788332949043, "grad_norm": 1.492105866056543, "learning_rate": 1.9730377737185145e-07, "loss": 0.4074435830116272, "step": 6999 }, { "epoch": 1.6140189070786257, "grad_norm": 1.7474889804918834, "learning_rate": 1.9707645604557243e-07, "loss": 0.4581322968006134, "step": 7000 }, { "epoch": 1.6142494812082084, "grad_norm": 1.5240615238309698, "learning_rate": 1.9684925143236776e-07, "loss": 0.4479151666164398, "step": 7001 }, { "epoch": 1.6144800553377912, "grad_norm": 1.4379805154063257, "learning_rate": 1.966221635652643e-07, "loss": 0.3378838300704956, "step": 7002 }, { "epoch": 1.6147106294673739, "grad_norm": 1.6755517427089033, "learning_rate": 1.96395192477271e-07, "loss": 0.3383278250694275, "step": 7003 }, { "epoch": 1.6149412035969566, "grad_norm": 1.5430108527415651, "learning_rate": 1.9616833820138091e-07, "loss": 0.5164717435836792, "step": 7004 }, { "epoch": 1.615171777726539, "grad_norm": 1.6927378959186403, "learning_rate": 1.9594160077056932e-07, "loss": 0.4548792243003845, "step": 7005 }, { "epoch": 1.6154023518561216, "grad_norm": 1.608730816141968, "learning_rate": 1.9571498021779531e-07, "loss": 0.41074928641319275, "step": 7006 }, { "epoch": 1.6156329259857043, "grad_norm": 1.5384399915677613, "learning_rate": 1.9548847657599976e-07, "loss": 0.4156193137168884, "step": 7007 }, { "epoch": 1.615863500115287, "grad_norm": 1.742725966102226, "learning_rate": 1.95262089878108e-07, "loss": 0.4602770209312439, "step": 7008 }, { "epoch": 1.6160940742448697, "grad_norm": 1.5880816009582301, "learning_rate": 1.9503582015702713e-07, "loss": 0.4911346733570099, "step": 7009 }, { "epoch": 1.6163246483744524, "grad_norm": 1.5007140709934312, "learning_rate": 1.9480966744564764e-07, "loss": 0.394087553024292, "step": 7010 }, { "epoch": 1.6165552225040352, "grad_norm": 1.5836059389854649, "learning_rate": 1.9458363177684367e-07, "loss": 0.4845706820487976, "step": 7011 }, { "epoch": 1.6167857966336177, "grad_norm": 1.7088454795128305, "learning_rate": 1.9435771318347116e-07, "loss": 0.49142736196517944, "step": 7012 }, { "epoch": 1.6170163707632004, "grad_norm": 1.3798831769041013, "learning_rate": 1.9413191169836996e-07, "loss": 0.4408283829689026, "step": 7013 }, { "epoch": 1.6172469448927829, "grad_norm": 1.6476950016993046, "learning_rate": 1.9390622735436268e-07, "loss": 0.6088640689849854, "step": 7014 }, { "epoch": 1.6174775190223656, "grad_norm": 1.912745817268737, "learning_rate": 1.93680660184255e-07, "loss": 0.5208842158317566, "step": 7015 }, { "epoch": 1.6177080931519483, "grad_norm": 1.7742607180865566, "learning_rate": 1.9345521022083488e-07, "loss": 0.5652821660041809, "step": 7016 }, { "epoch": 1.617938667281531, "grad_norm": 1.5895189074949856, "learning_rate": 1.9322987749687437e-07, "loss": 0.4861832857131958, "step": 7017 }, { "epoch": 1.6181692414111137, "grad_norm": 1.5693969535816144, "learning_rate": 1.930046620451272e-07, "loss": 0.39583832025527954, "step": 7018 }, { "epoch": 1.6183998155406965, "grad_norm": 1.6283824576887038, "learning_rate": 1.927795638983313e-07, "loss": 0.5638653039932251, "step": 7019 }, { "epoch": 1.618630389670279, "grad_norm": 1.7595661530223012, "learning_rate": 1.9255458308920648e-07, "loss": 0.4737275242805481, "step": 7020 }, { "epoch": 1.6188609637998617, "grad_norm": 1.3807112997659796, "learning_rate": 1.923297196504563e-07, "loss": 0.4526802897453308, "step": 7021 }, { "epoch": 1.6190915379294442, "grad_norm": 1.5519742811018764, "learning_rate": 1.9210497361476708e-07, "loss": 0.40800565481185913, "step": 7022 }, { "epoch": 1.6193221120590269, "grad_norm": 1.3169867108502276, "learning_rate": 1.9188034501480744e-07, "loss": 0.39532414078712463, "step": 7023 }, { "epoch": 1.6195526861886096, "grad_norm": 1.3982522966659368, "learning_rate": 1.9165583388322993e-07, "loss": 0.40236538648605347, "step": 7024 }, { "epoch": 1.6197832603181923, "grad_norm": 1.4838960013292628, "learning_rate": 1.91431440252669e-07, "loss": 0.4421047866344452, "step": 7025 }, { "epoch": 1.620013834447775, "grad_norm": 1.5688320926864374, "learning_rate": 1.9120716415574322e-07, "loss": 0.4149084687232971, "step": 7026 }, { "epoch": 1.6202444085773577, "grad_norm": 1.8747733544619556, "learning_rate": 1.9098300562505264e-07, "loss": 0.4186127185821533, "step": 7027 }, { "epoch": 1.6204749827069402, "grad_norm": 1.5276498671204974, "learning_rate": 1.9075896469318132e-07, "loss": 0.4649406671524048, "step": 7028 }, { "epoch": 1.620705556836523, "grad_norm": 1.5217002126023946, "learning_rate": 1.9053504139269593e-07, "loss": 0.43240052461624146, "step": 7029 }, { "epoch": 1.6209361309661054, "grad_norm": 1.7731525747902717, "learning_rate": 1.9031123575614628e-07, "loss": 0.4874862730503082, "step": 7030 }, { "epoch": 1.6211667050956882, "grad_norm": 1.6133636879972175, "learning_rate": 1.900875478160644e-07, "loss": 0.3771815896034241, "step": 7031 }, { "epoch": 1.6213972792252709, "grad_norm": 1.548316338784864, "learning_rate": 1.898639776049653e-07, "loss": 0.49882376194000244, "step": 7032 }, { "epoch": 1.6216278533548536, "grad_norm": 1.5189621230999546, "learning_rate": 1.896405251553479e-07, "loss": 0.3813830614089966, "step": 7033 }, { "epoch": 1.6218584274844363, "grad_norm": 1.588790821712345, "learning_rate": 1.8941719049969272e-07, "loss": 0.41883599758148193, "step": 7034 }, { "epoch": 1.622089001614019, "grad_norm": 1.4271058877816405, "learning_rate": 1.8919397367046409e-07, "loss": 0.42194586992263794, "step": 7035 }, { "epoch": 1.6223195757436015, "grad_norm": 1.5957469997065072, "learning_rate": 1.889708747001084e-07, "loss": 0.36967700719833374, "step": 7036 }, { "epoch": 1.6225501498731842, "grad_norm": 1.4373460175753532, "learning_rate": 1.887478936210556e-07, "loss": 0.4493946433067322, "step": 7037 }, { "epoch": 1.6227807240027667, "grad_norm": 1.6526676224310628, "learning_rate": 1.8852503046571833e-07, "loss": 0.42121458053588867, "step": 7038 }, { "epoch": 1.6230112981323495, "grad_norm": 1.430632776113786, "learning_rate": 1.8830228526649207e-07, "loss": 0.4529588222503662, "step": 7039 }, { "epoch": 1.6232418722619322, "grad_norm": 1.537552702708545, "learning_rate": 1.88079658055755e-07, "loss": 0.387844443321228, "step": 7040 }, { "epoch": 1.623472446391515, "grad_norm": 1.4872655198554567, "learning_rate": 1.8785714886586802e-07, "loss": 0.49954158067703247, "step": 7041 }, { "epoch": 1.6237030205210976, "grad_norm": 1.3845875929093436, "learning_rate": 1.8763475772917548e-07, "loss": 0.4016296863555908, "step": 7042 }, { "epoch": 1.6239335946506803, "grad_norm": 1.5208389143205874, "learning_rate": 1.8741248467800362e-07, "loss": 0.358657568693161, "step": 7043 }, { "epoch": 1.6241641687802628, "grad_norm": 1.471037478852436, "learning_rate": 1.8719032974466264e-07, "loss": 0.434385746717453, "step": 7044 }, { "epoch": 1.6243947429098455, "grad_norm": 1.4705602216948914, "learning_rate": 1.8696829296144466e-07, "loss": 0.4658992886543274, "step": 7045 }, { "epoch": 1.624625317039428, "grad_norm": 1.8724382429627917, "learning_rate": 1.8674637436062545e-07, "loss": 0.5438188910484314, "step": 7046 }, { "epoch": 1.6248558911690107, "grad_norm": 1.9024479318941907, "learning_rate": 1.8652457397446254e-07, "loss": 0.47364577651023865, "step": 7047 }, { "epoch": 1.6250864652985935, "grad_norm": 1.386287471529149, "learning_rate": 1.8630289183519733e-07, "loss": 0.3664509654045105, "step": 7048 }, { "epoch": 1.6253170394281762, "grad_norm": 1.5676786934992741, "learning_rate": 1.8608132797505317e-07, "loss": 0.4226282835006714, "step": 7049 }, { "epoch": 1.625547613557759, "grad_norm": 1.4581751590991685, "learning_rate": 1.8585988242623706e-07, "loss": 0.47477972507476807, "step": 7050 }, { "epoch": 1.6257781876873416, "grad_norm": 2.082606809210874, "learning_rate": 1.8563855522093786e-07, "loss": 0.5372269749641418, "step": 7051 }, { "epoch": 1.626008761816924, "grad_norm": 1.3565872618977541, "learning_rate": 1.8541734639132788e-07, "loss": 0.37929385900497437, "step": 7052 }, { "epoch": 1.6262393359465068, "grad_norm": 1.5119164625864447, "learning_rate": 1.8519625596956244e-07, "loss": 0.4029538631439209, "step": 7053 }, { "epoch": 1.6264699100760893, "grad_norm": 1.5739338248608081, "learning_rate": 1.8497528398777874e-07, "loss": 0.3932439982891083, "step": 7054 }, { "epoch": 1.626700484205672, "grad_norm": 1.5806776566898322, "learning_rate": 1.847544304780978e-07, "loss": 0.45190152525901794, "step": 7055 }, { "epoch": 1.6269310583352548, "grad_norm": 1.8629994959724827, "learning_rate": 1.8453369547262242e-07, "loss": 0.4852195382118225, "step": 7056 }, { "epoch": 1.6271616324648375, "grad_norm": 1.608209634523461, "learning_rate": 1.8431307900343918e-07, "loss": 0.41676801443099976, "step": 7057 }, { "epoch": 1.6273922065944202, "grad_norm": 1.388166685170728, "learning_rate": 1.8409258110261626e-07, "loss": 0.44374561309814453, "step": 7058 }, { "epoch": 1.627622780724003, "grad_norm": 1.5975340281654677, "learning_rate": 1.838722018022061e-07, "loss": 0.4348192811012268, "step": 7059 }, { "epoch": 1.6278533548535854, "grad_norm": 1.626194256762104, "learning_rate": 1.836519411342422e-07, "loss": 0.46572640538215637, "step": 7060 }, { "epoch": 1.6280839289831681, "grad_norm": 1.4985871084379754, "learning_rate": 1.8343179913074214e-07, "loss": 0.4633631408214569, "step": 7061 }, { "epoch": 1.6283145031127506, "grad_norm": 1.3260867645697678, "learning_rate": 1.8321177582370605e-07, "loss": 0.44420552253723145, "step": 7062 }, { "epoch": 1.6285450772423333, "grad_norm": 1.8207040168707305, "learning_rate": 1.8299187124511594e-07, "loss": 0.5628370046615601, "step": 7063 }, { "epoch": 1.628775651371916, "grad_norm": 1.7448936691285617, "learning_rate": 1.8277208542693778e-07, "loss": 0.5342314839363098, "step": 7064 }, { "epoch": 1.6290062255014988, "grad_norm": 1.529076197622531, "learning_rate": 1.82552418401119e-07, "loss": 0.440934419631958, "step": 7065 }, { "epoch": 1.6292367996310815, "grad_norm": 1.4532572456773438, "learning_rate": 1.823328701995912e-07, "loss": 0.45218637585639954, "step": 7066 }, { "epoch": 1.6294673737606642, "grad_norm": 1.456173637640115, "learning_rate": 1.8211344085426716e-07, "loss": 0.4059211015701294, "step": 7067 }, { "epoch": 1.6296979478902467, "grad_norm": 2.0474805024349876, "learning_rate": 1.818941303970435e-07, "loss": 0.5036444067955017, "step": 7068 }, { "epoch": 1.6299285220198294, "grad_norm": 1.6421868165266436, "learning_rate": 1.8167493885979935e-07, "loss": 0.5034196972846985, "step": 7069 }, { "epoch": 1.630159096149412, "grad_norm": 1.5247456374523982, "learning_rate": 1.8145586627439645e-07, "loss": 0.4199259281158447, "step": 7070 }, { "epoch": 1.6303896702789946, "grad_norm": 1.5913722133067008, "learning_rate": 1.8123691267267915e-07, "loss": 0.5439015626907349, "step": 7071 }, { "epoch": 1.6306202444085773, "grad_norm": 1.6181852234306913, "learning_rate": 1.810180780864743e-07, "loss": 0.4349868893623352, "step": 7072 }, { "epoch": 1.63085081853816, "grad_norm": 1.5299206997440553, "learning_rate": 1.807993625475921e-07, "loss": 0.39939552545547485, "step": 7073 }, { "epoch": 1.6310813926677428, "grad_norm": 1.575600412629914, "learning_rate": 1.8058076608782468e-07, "loss": 0.43073540925979614, "step": 7074 }, { "epoch": 1.6313119667973255, "grad_norm": 1.6461603718238804, "learning_rate": 1.8036228873894744e-07, "loss": 0.4735824465751648, "step": 7075 }, { "epoch": 1.631542540926908, "grad_norm": 1.466337846989889, "learning_rate": 1.8014393053271836e-07, "loss": 0.42971551418304443, "step": 7076 }, { "epoch": 1.6317731150564907, "grad_norm": 1.694502155411865, "learning_rate": 1.7992569150087823e-07, "loss": 0.48593759536743164, "step": 7077 }, { "epoch": 1.6320036891860732, "grad_norm": 1.55292324755966, "learning_rate": 1.7970757167514973e-07, "loss": 0.530194878578186, "step": 7078 }, { "epoch": 1.632234263315656, "grad_norm": 1.7324585048939796, "learning_rate": 1.794895710872394e-07, "loss": 0.43393629789352417, "step": 7079 }, { "epoch": 1.6324648374452386, "grad_norm": 1.5827349286667418, "learning_rate": 1.7927168976883556e-07, "loss": 0.4211798906326294, "step": 7080 }, { "epoch": 1.6326954115748213, "grad_norm": 1.5939322533043618, "learning_rate": 1.790539277516091e-07, "loss": 0.39001476764678955, "step": 7081 }, { "epoch": 1.632925985704404, "grad_norm": 1.6028280785725797, "learning_rate": 1.788362850672146e-07, "loss": 0.4360283613204956, "step": 7082 }, { "epoch": 1.6331565598339868, "grad_norm": 1.6516207153980025, "learning_rate": 1.7861876174728807e-07, "loss": 0.47754842042922974, "step": 7083 }, { "epoch": 1.6333871339635693, "grad_norm": 1.634690883802538, "learning_rate": 1.7840135782344888e-07, "loss": 0.35193490982055664, "step": 7084 }, { "epoch": 1.633617708093152, "grad_norm": 1.2825662437681398, "learning_rate": 1.7818407332729912e-07, "loss": 0.39997392892837524, "step": 7085 }, { "epoch": 1.6338482822227345, "grad_norm": 1.324570823301632, "learning_rate": 1.7796690829042328e-07, "loss": 0.3255331218242645, "step": 7086 }, { "epoch": 1.6340788563523172, "grad_norm": 1.424074701555127, "learning_rate": 1.777498627443882e-07, "loss": 0.47072282433509827, "step": 7087 }, { "epoch": 1.6343094304819, "grad_norm": 1.5293726959445282, "learning_rate": 1.775329367207441e-07, "loss": 0.4231484830379486, "step": 7088 }, { "epoch": 1.6345400046114826, "grad_norm": 1.4406985915809287, "learning_rate": 1.7731613025102276e-07, "loss": 0.37112197279930115, "step": 7089 }, { "epoch": 1.6347705787410653, "grad_norm": 1.5117815815493545, "learning_rate": 1.7709944336673986e-07, "loss": 0.5772623419761658, "step": 7090 }, { "epoch": 1.635001152870648, "grad_norm": 1.4205344879838042, "learning_rate": 1.7688287609939244e-07, "loss": 0.45922917127609253, "step": 7091 }, { "epoch": 1.6352317270002306, "grad_norm": 1.6262912271430976, "learning_rate": 1.7666642848046098e-07, "loss": 0.42784950137138367, "step": 7092 }, { "epoch": 1.6354623011298133, "grad_norm": 1.585709168390131, "learning_rate": 1.7645010054140873e-07, "loss": 0.4676967263221741, "step": 7093 }, { "epoch": 1.6356928752593958, "grad_norm": 1.4782811209898545, "learning_rate": 1.7623389231368046e-07, "loss": 0.434337317943573, "step": 7094 }, { "epoch": 1.6359234493889785, "grad_norm": 1.512954791126533, "learning_rate": 1.760178038287048e-07, "loss": 0.4667350947856903, "step": 7095 }, { "epoch": 1.6361540235185612, "grad_norm": 1.3397712801467159, "learning_rate": 1.7580183511789204e-07, "loss": 0.42233705520629883, "step": 7096 }, { "epoch": 1.636384597648144, "grad_norm": 1.5093056460018237, "learning_rate": 1.7558598621263565e-07, "loss": 0.4488460421562195, "step": 7097 }, { "epoch": 1.6366151717777266, "grad_norm": 1.6708888950919063, "learning_rate": 1.753702571443112e-07, "loss": 0.4264194667339325, "step": 7098 }, { "epoch": 1.6368457459073094, "grad_norm": 1.414729354018089, "learning_rate": 1.7515464794427715e-07, "loss": 0.32695144414901733, "step": 7099 }, { "epoch": 1.6370763200368919, "grad_norm": 2.0744464699438825, "learning_rate": 1.7493915864387487e-07, "loss": 0.3573018014431, "step": 7100 }, { "epoch": 1.6373068941664746, "grad_norm": 1.4506197336511393, "learning_rate": 1.7472378927442732e-07, "loss": 0.4545198082923889, "step": 7101 }, { "epoch": 1.637537468296057, "grad_norm": 1.59875503504847, "learning_rate": 1.7450853986724123e-07, "loss": 0.42589202523231506, "step": 7102 }, { "epoch": 1.6377680424256398, "grad_norm": 1.5169081767342318, "learning_rate": 1.742934104536048e-07, "loss": 0.4403502345085144, "step": 7103 }, { "epoch": 1.6379986165552225, "grad_norm": 1.7606747961526963, "learning_rate": 1.7407840106478955e-07, "loss": 0.4262208938598633, "step": 7104 }, { "epoch": 1.6382291906848052, "grad_norm": 1.6000265796951778, "learning_rate": 1.7386351173204905e-07, "loss": 0.4706578254699707, "step": 7105 }, { "epoch": 1.638459764814388, "grad_norm": 1.4657752166922586, "learning_rate": 1.7364874248661986e-07, "loss": 0.4526079297065735, "step": 7106 }, { "epoch": 1.6386903389439706, "grad_norm": 1.7833403214487409, "learning_rate": 1.734340933597207e-07, "loss": 0.42836326360702515, "step": 7107 }, { "epoch": 1.6389209130735531, "grad_norm": 1.4453465477500804, "learning_rate": 1.7321956438255292e-07, "loss": 0.42680823802948, "step": 7108 }, { "epoch": 1.6391514872031359, "grad_norm": 1.3964828689114657, "learning_rate": 1.7300515558630068e-07, "loss": 0.38365036249160767, "step": 7109 }, { "epoch": 1.6393820613327184, "grad_norm": 1.4748773918598719, "learning_rate": 1.7279086700213063e-07, "loss": 0.4153991937637329, "step": 7110 }, { "epoch": 1.639612635462301, "grad_norm": 1.5777502702437645, "learning_rate": 1.7257669866119163e-07, "loss": 0.42257291078567505, "step": 7111 }, { "epoch": 1.6398432095918838, "grad_norm": 1.7309640190055833, "learning_rate": 1.7236265059461498e-07, "loss": 0.34990063309669495, "step": 7112 }, { "epoch": 1.6400737837214665, "grad_norm": 1.3939407429934887, "learning_rate": 1.72148722833515e-07, "loss": 0.44848760962486267, "step": 7113 }, { "epoch": 1.6403043578510492, "grad_norm": 1.4649667660689574, "learning_rate": 1.7193491540898808e-07, "loss": 0.4649186134338379, "step": 7114 }, { "epoch": 1.640534931980632, "grad_norm": 1.5050161434573055, "learning_rate": 1.7172122835211333e-07, "loss": 0.480952650308609, "step": 7115 }, { "epoch": 1.6407655061102144, "grad_norm": 1.6101365826637175, "learning_rate": 1.7150766169395235e-07, "loss": 0.4669501483440399, "step": 7116 }, { "epoch": 1.6409960802397972, "grad_norm": 1.486994174732026, "learning_rate": 1.7129421546554957e-07, "loss": 0.4273250102996826, "step": 7117 }, { "epoch": 1.6412266543693796, "grad_norm": 1.8106380448833757, "learning_rate": 1.71080889697931e-07, "loss": 0.47923076152801514, "step": 7118 }, { "epoch": 1.6414572284989624, "grad_norm": 1.5033931180120297, "learning_rate": 1.708676844221061e-07, "loss": 0.42801159620285034, "step": 7119 }, { "epoch": 1.641687802628545, "grad_norm": 1.4792875147029159, "learning_rate": 1.7065459966906636e-07, "loss": 0.39929044246673584, "step": 7120 }, { "epoch": 1.6419183767581278, "grad_norm": 1.4727601001923896, "learning_rate": 1.7044163546978553e-07, "loss": 0.4919764995574951, "step": 7121 }, { "epoch": 1.6421489508877105, "grad_norm": 1.5018740505050776, "learning_rate": 1.702287918552202e-07, "loss": 0.45943617820739746, "step": 7122 }, { "epoch": 1.642379525017293, "grad_norm": 1.5202994857697039, "learning_rate": 1.7001606885630948e-07, "loss": 0.48078954219818115, "step": 7123 }, { "epoch": 1.6426100991468757, "grad_norm": 1.406204806461001, "learning_rate": 1.6980346650397505e-07, "loss": 0.4217113256454468, "step": 7124 }, { "epoch": 1.6428406732764582, "grad_norm": 1.479814078881505, "learning_rate": 1.6959098482912037e-07, "loss": 0.4643937051296234, "step": 7125 }, { "epoch": 1.643071247406041, "grad_norm": 1.6157838326637273, "learning_rate": 1.6937862386263212e-07, "loss": 0.43977001309394836, "step": 7126 }, { "epoch": 1.6433018215356237, "grad_norm": 1.4653862858165947, "learning_rate": 1.6916638363537882e-07, "loss": 0.3872392177581787, "step": 7127 }, { "epoch": 1.6435323956652064, "grad_norm": 1.4668608493131068, "learning_rate": 1.6895426417821213e-07, "loss": 0.44625502824783325, "step": 7128 }, { "epoch": 1.643762969794789, "grad_norm": 1.6445652935798991, "learning_rate": 1.6874226552196523e-07, "loss": 0.36836186051368713, "step": 7129 }, { "epoch": 1.6439935439243718, "grad_norm": 1.5181829131466213, "learning_rate": 1.6853038769745465e-07, "loss": 0.35491907596588135, "step": 7130 }, { "epoch": 1.6442241180539543, "grad_norm": 1.5107933584098798, "learning_rate": 1.6831863073547913e-07, "loss": 0.5210527181625366, "step": 7131 }, { "epoch": 1.644454692183537, "grad_norm": 1.5854667470103982, "learning_rate": 1.6810699466681932e-07, "loss": 0.3805693984031677, "step": 7132 }, { "epoch": 1.6446852663131195, "grad_norm": 1.8089883418272688, "learning_rate": 1.6789547952223893e-07, "loss": 0.5768346786499023, "step": 7133 }, { "epoch": 1.6449158404427022, "grad_norm": 1.8423402992377882, "learning_rate": 1.6768408533248356e-07, "loss": 0.46465635299682617, "step": 7134 }, { "epoch": 1.645146414572285, "grad_norm": 1.8710111931219464, "learning_rate": 1.674728121282819e-07, "loss": 0.43119215965270996, "step": 7135 }, { "epoch": 1.6453769887018677, "grad_norm": 1.4436891948188744, "learning_rate": 1.6726165994034402e-07, "loss": 0.42814093828201294, "step": 7136 }, { "epoch": 1.6456075628314504, "grad_norm": 1.5822684467576347, "learning_rate": 1.6705062879936382e-07, "loss": 0.41762328147888184, "step": 7137 }, { "epoch": 1.645838136961033, "grad_norm": 2.059560914873905, "learning_rate": 1.668397187360161e-07, "loss": 0.42717012763023376, "step": 7138 }, { "epoch": 1.6460687110906156, "grad_norm": 1.3692759576709286, "learning_rate": 1.666289297809591e-07, "loss": 0.37660926580429077, "step": 7139 }, { "epoch": 1.6462992852201983, "grad_norm": 1.689926156627043, "learning_rate": 1.664182619648331e-07, "loss": 0.3905887007713318, "step": 7140 }, { "epoch": 1.6465298593497808, "grad_norm": 1.5648955881343065, "learning_rate": 1.6620771531826117e-07, "loss": 0.4848547577857971, "step": 7141 }, { "epoch": 1.6467604334793635, "grad_norm": 1.5642509939041707, "learning_rate": 1.659972898718479e-07, "loss": 0.37895438075065613, "step": 7142 }, { "epoch": 1.6469910076089462, "grad_norm": 1.6050388867308452, "learning_rate": 1.6578698565618075e-07, "loss": 0.46770527958869934, "step": 7143 }, { "epoch": 1.647221581738529, "grad_norm": 1.705579614415488, "learning_rate": 1.6557680270182995e-07, "loss": 0.44138044118881226, "step": 7144 }, { "epoch": 1.6474521558681117, "grad_norm": 1.7922951246817975, "learning_rate": 1.6536674103934734e-07, "loss": 0.3681126832962036, "step": 7145 }, { "epoch": 1.6476827299976944, "grad_norm": 1.454313444949356, "learning_rate": 1.651568006992675e-07, "loss": 0.4410884380340576, "step": 7146 }, { "epoch": 1.6479133041272769, "grad_norm": 1.444668904765709, "learning_rate": 1.6494698171210776e-07, "loss": 0.4161960482597351, "step": 7147 }, { "epoch": 1.6481438782568596, "grad_norm": 1.6873012096950248, "learning_rate": 1.647372841083674e-07, "loss": 0.4912784695625305, "step": 7148 }, { "epoch": 1.648374452386442, "grad_norm": 1.8457570973340096, "learning_rate": 1.6452770791852766e-07, "loss": 0.5137985944747925, "step": 7149 }, { "epoch": 1.6486050265160248, "grad_norm": 1.845102008062213, "learning_rate": 1.6431825317305303e-07, "loss": 0.43644070625305176, "step": 7150 }, { "epoch": 1.6488356006456075, "grad_norm": 1.508191131690363, "learning_rate": 1.6410891990238973e-07, "loss": 0.4319378733634949, "step": 7151 }, { "epoch": 1.6490661747751902, "grad_norm": 1.6137067673031091, "learning_rate": 1.6389970813696619e-07, "loss": 0.474090039730072, "step": 7152 }, { "epoch": 1.649296748904773, "grad_norm": 1.656766330100741, "learning_rate": 1.6369061790719375e-07, "loss": 0.40291503071784973, "step": 7153 }, { "epoch": 1.6495273230343557, "grad_norm": 1.5434308580585603, "learning_rate": 1.6348164924346562e-07, "loss": 0.51482754945755, "step": 7154 }, { "epoch": 1.6497578971639382, "grad_norm": 1.421069671161851, "learning_rate": 1.632728021761579e-07, "loss": 0.35308974981307983, "step": 7155 }, { "epoch": 1.6499884712935209, "grad_norm": 1.7501565194944115, "learning_rate": 1.6306407673562815e-07, "loss": 0.5269055366516113, "step": 7156 }, { "epoch": 1.6502190454231034, "grad_norm": 1.4775332310798848, "learning_rate": 1.6285547295221724e-07, "loss": 0.41290512681007385, "step": 7157 }, { "epoch": 1.650449619552686, "grad_norm": 1.4513808656924674, "learning_rate": 1.6264699085624721e-07, "loss": 0.39930522441864014, "step": 7158 }, { "epoch": 1.6506801936822688, "grad_norm": 1.475028134913826, "learning_rate": 1.6243863047802365e-07, "loss": 0.4617648422718048, "step": 7159 }, { "epoch": 1.6509107678118515, "grad_norm": 1.6583284073308129, "learning_rate": 1.6223039184783337e-07, "loss": 0.4618498980998993, "step": 7160 }, { "epoch": 1.6511413419414342, "grad_norm": 1.5177380348824272, "learning_rate": 1.6202227499594635e-07, "loss": 0.43138834834098816, "step": 7161 }, { "epoch": 1.651371916071017, "grad_norm": 1.9944130162827052, "learning_rate": 1.618142799526141e-07, "loss": 0.5330632925033569, "step": 7162 }, { "epoch": 1.6516024902005995, "grad_norm": 1.4381555357456468, "learning_rate": 1.6160640674807103e-07, "loss": 0.45410698652267456, "step": 7163 }, { "epoch": 1.6518330643301822, "grad_norm": 1.52256812211894, "learning_rate": 1.6139865541253384e-07, "loss": 0.4216715693473816, "step": 7164 }, { "epoch": 1.6520636384597647, "grad_norm": 1.6818151368938485, "learning_rate": 1.6119102597620083e-07, "loss": 0.3738868832588196, "step": 7165 }, { "epoch": 1.6522942125893474, "grad_norm": 1.587335339212439, "learning_rate": 1.609835184692535e-07, "loss": 0.44595998525619507, "step": 7166 }, { "epoch": 1.65252478671893, "grad_norm": 1.8461813575956394, "learning_rate": 1.6077613292185466e-07, "loss": 0.5446096062660217, "step": 7167 }, { "epoch": 1.6527553608485128, "grad_norm": 1.5661326715584178, "learning_rate": 1.605688693641505e-07, "loss": 0.47280746698379517, "step": 7168 }, { "epoch": 1.6529859349780955, "grad_norm": 1.6260653553703972, "learning_rate": 1.6036172782626823e-07, "loss": 0.5280133485794067, "step": 7169 }, { "epoch": 1.6532165091076783, "grad_norm": 1.6507744528919734, "learning_rate": 1.6015470833831835e-07, "loss": 0.4659959375858307, "step": 7170 }, { "epoch": 1.6534470832372608, "grad_norm": 1.5548632331284282, "learning_rate": 1.5994781093039335e-07, "loss": 0.5196797251701355, "step": 7171 }, { "epoch": 1.6536776573668435, "grad_norm": 1.298650586457363, "learning_rate": 1.597410356325676e-07, "loss": 0.41855669021606445, "step": 7172 }, { "epoch": 1.653908231496426, "grad_norm": 1.6301682003715197, "learning_rate": 1.5953438247489814e-07, "loss": 0.43063706159591675, "step": 7173 }, { "epoch": 1.6541388056260087, "grad_norm": 1.556025937846025, "learning_rate": 1.59327851487424e-07, "loss": 0.3954850435256958, "step": 7174 }, { "epoch": 1.6543693797555914, "grad_norm": 1.6096102290125367, "learning_rate": 1.591214427001667e-07, "loss": 0.4497464895248413, "step": 7175 }, { "epoch": 1.6545999538851741, "grad_norm": 1.573427243133678, "learning_rate": 1.5891515614312967e-07, "loss": 0.47012704610824585, "step": 7176 }, { "epoch": 1.6548305280147568, "grad_norm": 1.345166831078004, "learning_rate": 1.5870899184629872e-07, "loss": 0.399054616689682, "step": 7177 }, { "epoch": 1.6550611021443395, "grad_norm": 1.68897296856965, "learning_rate": 1.5850294983964208e-07, "loss": 0.41277164220809937, "step": 7178 }, { "epoch": 1.655291676273922, "grad_norm": 1.6410807386564468, "learning_rate": 1.5829703015311013e-07, "loss": 0.4735640287399292, "step": 7179 }, { "epoch": 1.6555222504035048, "grad_norm": 1.5414168893805387, "learning_rate": 1.5809123281663516e-07, "loss": 0.4244140386581421, "step": 7180 }, { "epoch": 1.6557528245330873, "grad_norm": 1.6196858148033184, "learning_rate": 1.5788555786013212e-07, "loss": 0.4291320741176605, "step": 7181 }, { "epoch": 1.65598339866267, "grad_norm": 1.8656270771434302, "learning_rate": 1.576800053134979e-07, "loss": 0.3965643048286438, "step": 7182 }, { "epoch": 1.6562139727922527, "grad_norm": 1.5939688831505687, "learning_rate": 1.5747457520661123e-07, "loss": 0.4087764620780945, "step": 7183 }, { "epoch": 1.6564445469218354, "grad_norm": 1.523375144006796, "learning_rate": 1.5726926756933411e-07, "loss": 0.4207920432090759, "step": 7184 }, { "epoch": 1.6566751210514181, "grad_norm": 1.757376584691626, "learning_rate": 1.570640824315095e-07, "loss": 0.34311753511428833, "step": 7185 }, { "epoch": 1.6569056951810008, "grad_norm": 2.079059544313622, "learning_rate": 1.5685901982296345e-07, "loss": 0.44728145003318787, "step": 7186 }, { "epoch": 1.6571362693105833, "grad_norm": 1.6933442739443483, "learning_rate": 1.5665407977350386e-07, "loss": 0.38300156593322754, "step": 7187 }, { "epoch": 1.657366843440166, "grad_norm": 1.4613322908312483, "learning_rate": 1.56449262312921e-07, "loss": 0.32724204659461975, "step": 7188 }, { "epoch": 1.6575974175697485, "grad_norm": 1.5277123552551555, "learning_rate": 1.562445674709868e-07, "loss": 0.4812743067741394, "step": 7189 }, { "epoch": 1.6578279916993313, "grad_norm": 1.279031260784297, "learning_rate": 1.5603999527745615e-07, "loss": 0.3974485397338867, "step": 7190 }, { "epoch": 1.658058565828914, "grad_norm": 1.729819799365075, "learning_rate": 1.5583554576206536e-07, "loss": 0.5058138370513916, "step": 7191 }, { "epoch": 1.6582891399584967, "grad_norm": 1.451214505055382, "learning_rate": 1.5563121895453323e-07, "loss": 0.4442358613014221, "step": 7192 }, { "epoch": 1.6585197140880794, "grad_norm": 1.6317499919466611, "learning_rate": 1.5542701488456077e-07, "loss": 0.35400623083114624, "step": 7193 }, { "epoch": 1.6587502882176621, "grad_norm": 1.8335890419904581, "learning_rate": 1.5522293358183125e-07, "loss": 0.5046352744102478, "step": 7194 }, { "epoch": 1.6589808623472446, "grad_norm": 1.8150914477063191, "learning_rate": 1.5501897507601015e-07, "loss": 0.45344769954681396, "step": 7195 }, { "epoch": 1.6592114364768273, "grad_norm": 1.7111771949579255, "learning_rate": 1.548151393967444e-07, "loss": 0.4251500368118286, "step": 7196 }, { "epoch": 1.6594420106064098, "grad_norm": 1.4323459769713944, "learning_rate": 1.5461142657366399e-07, "loss": 0.3728788495063782, "step": 7197 }, { "epoch": 1.6596725847359926, "grad_norm": 1.5246938682723656, "learning_rate": 1.5440783663638036e-07, "loss": 0.3143829107284546, "step": 7198 }, { "epoch": 1.6599031588655753, "grad_norm": 1.3416076020806418, "learning_rate": 1.5420436961448758e-07, "loss": 0.5070813894271851, "step": 7199 }, { "epoch": 1.660133732995158, "grad_norm": 1.2380684135092845, "learning_rate": 1.5400102553756145e-07, "loss": 0.3644014000892639, "step": 7200 }, { "epoch": 1.6603643071247407, "grad_norm": 2.973338937285917, "learning_rate": 1.5379780443516023e-07, "loss": 0.4120270609855652, "step": 7201 }, { "epoch": 1.6605948812543234, "grad_norm": 1.6150469405356445, "learning_rate": 1.5359470633682425e-07, "loss": 0.4327865242958069, "step": 7202 }, { "epoch": 1.660825455383906, "grad_norm": 2.011470811225138, "learning_rate": 1.5339173127207562e-07, "loss": 0.626624584197998, "step": 7203 }, { "epoch": 1.6610560295134886, "grad_norm": 1.6601868604564274, "learning_rate": 1.5318887927041913e-07, "loss": 0.45536088943481445, "step": 7204 }, { "epoch": 1.6612866036430711, "grad_norm": 1.6789895391694964, "learning_rate": 1.52986150361341e-07, "loss": 0.5306276082992554, "step": 7205 }, { "epoch": 1.6615171777726538, "grad_norm": 1.5374267124283623, "learning_rate": 1.5278354457431043e-07, "loss": 0.4263244867324829, "step": 7206 }, { "epoch": 1.6617477519022366, "grad_norm": 1.5390387444640852, "learning_rate": 1.5258106193877762e-07, "loss": 0.4578266143798828, "step": 7207 }, { "epoch": 1.6619783260318193, "grad_norm": 1.4963429405053086, "learning_rate": 1.5237870248417605e-07, "loss": 0.5120365619659424, "step": 7208 }, { "epoch": 1.662208900161402, "grad_norm": 1.7987725718508283, "learning_rate": 1.521764662399202e-07, "loss": 0.4491463005542755, "step": 7209 }, { "epoch": 1.6624394742909847, "grad_norm": 1.588713571736857, "learning_rate": 1.5197435323540752e-07, "loss": 0.4810635447502136, "step": 7210 }, { "epoch": 1.6626700484205672, "grad_norm": 1.549550087406024, "learning_rate": 1.5177236350001722e-07, "loss": 0.4250200390815735, "step": 7211 }, { "epoch": 1.66290062255015, "grad_norm": 1.8619243359226805, "learning_rate": 1.515704970631102e-07, "loss": 0.49981385469436646, "step": 7212 }, { "epoch": 1.6631311966797324, "grad_norm": 1.621928409701738, "learning_rate": 1.5136875395403027e-07, "loss": 0.40204358100891113, "step": 7213 }, { "epoch": 1.6633617708093151, "grad_norm": 1.504987607563178, "learning_rate": 1.5116713420210236e-07, "loss": 0.514127254486084, "step": 7214 }, { "epoch": 1.6635923449388978, "grad_norm": 1.8745773841611948, "learning_rate": 1.509656378366343e-07, "loss": 0.5119338631629944, "step": 7215 }, { "epoch": 1.6638229190684806, "grad_norm": 1.6137446017437618, "learning_rate": 1.507642648869153e-07, "loss": 0.45031970739364624, "step": 7216 }, { "epoch": 1.6640534931980633, "grad_norm": 1.427878863576358, "learning_rate": 1.5056301538221716e-07, "loss": 0.4503582715988159, "step": 7217 }, { "epoch": 1.664284067327646, "grad_norm": 1.4651953746761925, "learning_rate": 1.503618893517935e-07, "loss": 0.38793227076530457, "step": 7218 }, { "epoch": 1.6645146414572285, "grad_norm": 1.4683280962315126, "learning_rate": 1.5016088682488026e-07, "loss": 0.4446987211704254, "step": 7219 }, { "epoch": 1.6647452155868112, "grad_norm": 1.7835855909787117, "learning_rate": 1.4996000783069485e-07, "loss": 0.4687119722366333, "step": 7220 }, { "epoch": 1.6649757897163937, "grad_norm": 1.6205230957470973, "learning_rate": 1.4975925239843734e-07, "loss": 0.48283010721206665, "step": 7221 }, { "epoch": 1.6652063638459764, "grad_norm": 1.630894562773258, "learning_rate": 1.4955862055728941e-07, "loss": 0.510201632976532, "step": 7222 }, { "epoch": 1.6654369379755591, "grad_norm": 1.4932233099831633, "learning_rate": 1.4935811233641471e-07, "loss": 0.4070482850074768, "step": 7223 }, { "epoch": 1.6656675121051419, "grad_norm": 1.5683915035975688, "learning_rate": 1.4915772776495948e-07, "loss": 0.44347989559173584, "step": 7224 }, { "epoch": 1.6658980862347246, "grad_norm": 1.6817444257008654, "learning_rate": 1.4895746687205147e-07, "loss": 0.4160166382789612, "step": 7225 }, { "epoch": 1.6661286603643073, "grad_norm": 1.5428277862719844, "learning_rate": 1.4875732968680098e-07, "loss": 0.39939236640930176, "step": 7226 }, { "epoch": 1.6663592344938898, "grad_norm": 1.8461591057744162, "learning_rate": 1.4855731623829936e-07, "loss": 0.4604174494743347, "step": 7227 }, { "epoch": 1.6665898086234725, "grad_norm": 1.5963571116977615, "learning_rate": 1.4835742655562134e-07, "loss": 0.4691208004951477, "step": 7228 }, { "epoch": 1.666820382753055, "grad_norm": 1.358957710417088, "learning_rate": 1.481576606678222e-07, "loss": 0.4146147668361664, "step": 7229 }, { "epoch": 1.6670509568826377, "grad_norm": 1.4681059084163257, "learning_rate": 1.4795801860394041e-07, "loss": 0.4064391255378723, "step": 7230 }, { "epoch": 1.6672815310122204, "grad_norm": 1.233349352710464, "learning_rate": 1.4775850039299587e-07, "loss": 0.3696960210800171, "step": 7231 }, { "epoch": 1.6675121051418031, "grad_norm": 1.763624641268307, "learning_rate": 1.4755910606399023e-07, "loss": 0.4356287121772766, "step": 7232 }, { "epoch": 1.6677426792713859, "grad_norm": 1.6119962512147328, "learning_rate": 1.473598356459078e-07, "loss": 0.39327436685562134, "step": 7233 }, { "epoch": 1.6679732534009684, "grad_norm": 1.4528281796334948, "learning_rate": 1.4716068916771452e-07, "loss": 0.4722225069999695, "step": 7234 }, { "epoch": 1.668203827530551, "grad_norm": 1.3954919737652625, "learning_rate": 1.4696166665835852e-07, "loss": 0.3645583987236023, "step": 7235 }, { "epoch": 1.6684344016601336, "grad_norm": 1.628738998914794, "learning_rate": 1.4676276814676935e-07, "loss": 0.4153117537498474, "step": 7236 }, { "epoch": 1.6686649757897163, "grad_norm": 1.2987847859472657, "learning_rate": 1.4656399366185933e-07, "loss": 0.3470612168312073, "step": 7237 }, { "epoch": 1.668895549919299, "grad_norm": 1.424067964832139, "learning_rate": 1.4636534323252203e-07, "loss": 0.3934207260608673, "step": 7238 }, { "epoch": 1.6691261240488817, "grad_norm": 1.6191654953115664, "learning_rate": 1.4616681688763355e-07, "loss": 0.35530412197113037, "step": 7239 }, { "epoch": 1.6693566981784644, "grad_norm": 1.5867473768730196, "learning_rate": 1.4596841465605136e-07, "loss": 0.5218726396560669, "step": 7240 }, { "epoch": 1.6695872723080472, "grad_norm": 1.9070671037743527, "learning_rate": 1.4577013656661542e-07, "loss": 0.4287494421005249, "step": 7241 }, { "epoch": 1.6698178464376296, "grad_norm": 2.099754040079973, "learning_rate": 1.4557198264814775e-07, "loss": 0.5161805152893066, "step": 7242 }, { "epoch": 1.6700484205672124, "grad_norm": 1.485709070131558, "learning_rate": 1.4537395292945153e-07, "loss": 0.4843006730079651, "step": 7243 }, { "epoch": 1.6702789946967949, "grad_norm": 1.416657421952009, "learning_rate": 1.4517604743931288e-07, "loss": 0.526993989944458, "step": 7244 }, { "epoch": 1.6705095688263776, "grad_norm": 1.318696888956493, "learning_rate": 1.4497826620649888e-07, "loss": 0.43705734610557556, "step": 7245 }, { "epoch": 1.6707401429559603, "grad_norm": 1.626300355229789, "learning_rate": 1.4478060925975942e-07, "loss": 0.6001747846603394, "step": 7246 }, { "epoch": 1.670970717085543, "grad_norm": 1.6701240840694564, "learning_rate": 1.4458307662782564e-07, "loss": 0.4041635990142822, "step": 7247 }, { "epoch": 1.6712012912151257, "grad_norm": 1.6291301094782007, "learning_rate": 1.4438566833941112e-07, "loss": 0.4425908923149109, "step": 7248 }, { "epoch": 1.6714318653447084, "grad_norm": 1.8234242321709921, "learning_rate": 1.4418838442321102e-07, "loss": 0.5202267169952393, "step": 7249 }, { "epoch": 1.671662439474291, "grad_norm": 1.3646967283137599, "learning_rate": 1.4399122490790293e-07, "loss": 0.44352006912231445, "step": 7250 }, { "epoch": 1.6718930136038737, "grad_norm": 1.5745296606833632, "learning_rate": 1.4379418982214542e-07, "loss": 0.4757179021835327, "step": 7251 }, { "epoch": 1.6721235877334562, "grad_norm": 2.0125776677757825, "learning_rate": 1.4359727919457998e-07, "loss": 0.4748988747596741, "step": 7252 }, { "epoch": 1.6723541618630389, "grad_norm": 1.4390886859105494, "learning_rate": 1.434004930538294e-07, "loss": 0.4280398190021515, "step": 7253 }, { "epoch": 1.6725847359926216, "grad_norm": 1.5844583735943714, "learning_rate": 1.4320383142849834e-07, "loss": 0.4959871172904968, "step": 7254 }, { "epoch": 1.6728153101222043, "grad_norm": 1.6551218088905322, "learning_rate": 1.4300729434717396e-07, "loss": 0.506413996219635, "step": 7255 }, { "epoch": 1.673045884251787, "grad_norm": 1.5894513628120581, "learning_rate": 1.4281088183842448e-07, "loss": 0.4723675847053528, "step": 7256 }, { "epoch": 1.6732764583813697, "grad_norm": 1.5735532616627814, "learning_rate": 1.4261459393080076e-07, "loss": 0.41801339387893677, "step": 7257 }, { "epoch": 1.6735070325109522, "grad_norm": 1.651784117733762, "learning_rate": 1.424184306528351e-07, "loss": 0.4463369846343994, "step": 7258 }, { "epoch": 1.673737606640535, "grad_norm": 1.6205372576102755, "learning_rate": 1.422223920330421e-07, "loss": 0.4167429506778717, "step": 7259 }, { "epoch": 1.6739681807701174, "grad_norm": 1.448285732733219, "learning_rate": 1.420264780999174e-07, "loss": 0.48808401823043823, "step": 7260 }, { "epoch": 1.6741987548997002, "grad_norm": 1.7994342785579152, "learning_rate": 1.4183068888193973e-07, "loss": 0.515659749507904, "step": 7261 }, { "epoch": 1.6744293290292829, "grad_norm": 1.6582236339460064, "learning_rate": 1.416350244075688e-07, "loss": 0.4393026530742645, "step": 7262 }, { "epoch": 1.6746599031588656, "grad_norm": 1.6750398739214198, "learning_rate": 1.4143948470524602e-07, "loss": 0.35053056478500366, "step": 7263 }, { "epoch": 1.6748904772884483, "grad_norm": 1.1872706234379884, "learning_rate": 1.4124406980339532e-07, "loss": 0.35598453879356384, "step": 7264 }, { "epoch": 1.675121051418031, "grad_norm": 1.747342634360751, "learning_rate": 1.410487797304224e-07, "loss": 0.47989165782928467, "step": 7265 }, { "epoch": 1.6753516255476135, "grad_norm": 1.4767801179152846, "learning_rate": 1.408536145147148e-07, "loss": 0.4621499180793762, "step": 7266 }, { "epoch": 1.6755821996771962, "grad_norm": 1.4469255776490486, "learning_rate": 1.4065857418464122e-07, "loss": 0.40567925572395325, "step": 7267 }, { "epoch": 1.6758127738067787, "grad_norm": 2.121901896007684, "learning_rate": 1.4046365876855326e-07, "loss": 0.38889849185943604, "step": 7268 }, { "epoch": 1.6760433479363614, "grad_norm": 1.8036845925466258, "learning_rate": 1.4026886829478345e-07, "loss": 0.516187846660614, "step": 7269 }, { "epoch": 1.6762739220659442, "grad_norm": 1.3670995724086425, "learning_rate": 1.4007420279164706e-07, "loss": 0.4007910192012787, "step": 7270 }, { "epoch": 1.6765044961955269, "grad_norm": 1.4513245632029468, "learning_rate": 1.3987966228744007e-07, "loss": 0.4426886737346649, "step": 7271 }, { "epoch": 1.6767350703251096, "grad_norm": 1.7767592903800882, "learning_rate": 1.3968524681044114e-07, "loss": 0.46890369057655334, "step": 7272 }, { "epoch": 1.6769656444546923, "grad_norm": 1.714201330640179, "learning_rate": 1.3949095638891096e-07, "loss": 0.510369598865509, "step": 7273 }, { "epoch": 1.6771962185842748, "grad_norm": 1.697492362317676, "learning_rate": 1.3929679105109106e-07, "loss": 0.47810226678848267, "step": 7274 }, { "epoch": 1.6774267927138575, "grad_norm": 1.6234301902278867, "learning_rate": 1.3910275082520572e-07, "loss": 0.48592591285705566, "step": 7275 }, { "epoch": 1.67765736684344, "grad_norm": 1.5107060260742486, "learning_rate": 1.3890883573946021e-07, "loss": 0.4664943814277649, "step": 7276 }, { "epoch": 1.6778879409730227, "grad_norm": 1.6514095493299281, "learning_rate": 1.3871504582204263e-07, "loss": 0.47146645188331604, "step": 7277 }, { "epoch": 1.6781185151026055, "grad_norm": 1.615997642769361, "learning_rate": 1.3852138110112166e-07, "loss": 0.5171671509742737, "step": 7278 }, { "epoch": 1.6783490892321882, "grad_norm": 1.8275491234958787, "learning_rate": 1.3832784160484913e-07, "loss": 0.45887336134910583, "step": 7279 }, { "epoch": 1.678579663361771, "grad_norm": 1.494861700798582, "learning_rate": 1.3813442736135728e-07, "loss": 0.4363539516925812, "step": 7280 }, { "epoch": 1.6788102374913536, "grad_norm": 2.0171892009876147, "learning_rate": 1.379411383987612e-07, "loss": 0.4626097083091736, "step": 7281 }, { "epoch": 1.679040811620936, "grad_norm": 1.8196525383976765, "learning_rate": 1.3774797474515766e-07, "loss": 0.5939204096794128, "step": 7282 }, { "epoch": 1.6792713857505188, "grad_norm": 1.6878435890648014, "learning_rate": 1.3755493642862437e-07, "loss": 0.5463666915893555, "step": 7283 }, { "epoch": 1.6795019598801013, "grad_norm": 1.622691460463702, "learning_rate": 1.3736202347722182e-07, "loss": 0.3634001910686493, "step": 7284 }, { "epoch": 1.679732534009684, "grad_norm": 1.6327202188647956, "learning_rate": 1.3716923591899166e-07, "loss": 0.39512360095977783, "step": 7285 }, { "epoch": 1.6799631081392667, "grad_norm": 1.3361978857608434, "learning_rate": 1.3697657378195772e-07, "loss": 0.3858473300933838, "step": 7286 }, { "epoch": 1.6801936822688495, "grad_norm": 1.4527844976472322, "learning_rate": 1.36784037094125e-07, "loss": 0.473757266998291, "step": 7287 }, { "epoch": 1.6804242563984322, "grad_norm": 1.410877918262981, "learning_rate": 1.3659162588348107e-07, "loss": 0.41679126024246216, "step": 7288 }, { "epoch": 1.680654830528015, "grad_norm": 1.7135792249847552, "learning_rate": 1.363993401779946e-07, "loss": 0.4267998933792114, "step": 7289 }, { "epoch": 1.6808854046575974, "grad_norm": 1.6476835268765473, "learning_rate": 1.3620718000561648e-07, "loss": 0.5453667044639587, "step": 7290 }, { "epoch": 1.68111597878718, "grad_norm": 1.4347316593862658, "learning_rate": 1.3601514539427895e-07, "loss": 0.3882933259010315, "step": 7291 }, { "epoch": 1.6813465529167626, "grad_norm": 1.7177796725752086, "learning_rate": 1.3582323637189653e-07, "loss": 0.5565635561943054, "step": 7292 }, { "epoch": 1.6815771270463453, "grad_norm": 1.448665873125515, "learning_rate": 1.356314529663647e-07, "loss": 0.49807024002075195, "step": 7293 }, { "epoch": 1.681807701175928, "grad_norm": 1.5449122885779156, "learning_rate": 1.3543979520556116e-07, "loss": 0.40868130326271057, "step": 7294 }, { "epoch": 1.6820382753055108, "grad_norm": 1.4045709349742252, "learning_rate": 1.352482631173455e-07, "loss": 0.46088406443595886, "step": 7295 }, { "epoch": 1.6822688494350935, "grad_norm": 1.7658846162202777, "learning_rate": 1.3505685672955869e-07, "loss": 0.44346722960472107, "step": 7296 }, { "epoch": 1.6824994235646762, "grad_norm": 1.3703801713050607, "learning_rate": 1.348655760700239e-07, "loss": 0.36585044860839844, "step": 7297 }, { "epoch": 1.6827299976942587, "grad_norm": 1.8199719530329925, "learning_rate": 1.3467442116654536e-07, "loss": 0.46082472801208496, "step": 7298 }, { "epoch": 1.6829605718238414, "grad_norm": 1.8043564550526412, "learning_rate": 1.3448339204690974e-07, "loss": 0.5011709928512573, "step": 7299 }, { "epoch": 1.683191145953424, "grad_norm": 2.1355217293891378, "learning_rate": 1.3429248873888454e-07, "loss": 0.4382838010787964, "step": 7300 }, { "epoch": 1.6834217200830066, "grad_norm": 1.4118543770807777, "learning_rate": 1.3410171127022008e-07, "loss": 0.35204610228538513, "step": 7301 }, { "epoch": 1.6836522942125893, "grad_norm": 1.3718001359049319, "learning_rate": 1.3391105966864745e-07, "loss": 0.3915257453918457, "step": 7302 }, { "epoch": 1.683882868342172, "grad_norm": 1.4102637825932318, "learning_rate": 1.3372053396187967e-07, "loss": 0.3945339322090149, "step": 7303 }, { "epoch": 1.6841134424717548, "grad_norm": 1.7911618298179695, "learning_rate": 1.335301341776117e-07, "loss": 0.48783642053604126, "step": 7304 }, { "epoch": 1.6843440166013375, "grad_norm": 1.745012134293522, "learning_rate": 1.333398603435203e-07, "loss": 0.49026161432266235, "step": 7305 }, { "epoch": 1.68457459073092, "grad_norm": 1.9699708710220791, "learning_rate": 1.3314971248726358e-07, "loss": 0.5035061836242676, "step": 7306 }, { "epoch": 1.6848051648605027, "grad_norm": 1.7602149086036532, "learning_rate": 1.3295969063648126e-07, "loss": 0.5452826023101807, "step": 7307 }, { "epoch": 1.6850357389900852, "grad_norm": 1.7088858518580703, "learning_rate": 1.3276979481879524e-07, "loss": 0.4609105885028839, "step": 7308 }, { "epoch": 1.685266313119668, "grad_norm": 1.6869514802612067, "learning_rate": 1.3258002506180855e-07, "loss": 0.5799046754837036, "step": 7309 }, { "epoch": 1.6854968872492506, "grad_norm": 1.6691103426337504, "learning_rate": 1.3239038139310644e-07, "loss": 0.42096465826034546, "step": 7310 }, { "epoch": 1.6857274613788333, "grad_norm": 1.9781377178498367, "learning_rate": 1.3220086384025508e-07, "loss": 0.4741813540458679, "step": 7311 }, { "epoch": 1.685958035508416, "grad_norm": 1.5972207301313162, "learning_rate": 1.3201147243080302e-07, "loss": 0.4872191250324249, "step": 7312 }, { "epoch": 1.6861886096379988, "grad_norm": 1.7767879845396581, "learning_rate": 1.3182220719228054e-07, "loss": 0.5210198163986206, "step": 7313 }, { "epoch": 1.6864191837675813, "grad_norm": 1.932834262840403, "learning_rate": 1.3163306815219878e-07, "loss": 0.4873948395252228, "step": 7314 }, { "epoch": 1.686649757897164, "grad_norm": 1.723686253702064, "learning_rate": 1.3144405533805136e-07, "loss": 0.46856212615966797, "step": 7315 }, { "epoch": 1.6868803320267465, "grad_norm": 1.549399332710726, "learning_rate": 1.3125516877731279e-07, "loss": 0.3931645154953003, "step": 7316 }, { "epoch": 1.6871109061563292, "grad_norm": 1.5988122745666866, "learning_rate": 1.3106640849744023e-07, "loss": 0.4473317861557007, "step": 7317 }, { "epoch": 1.687341480285912, "grad_norm": 1.5841372684708825, "learning_rate": 1.3087777452587124e-07, "loss": 0.4499043822288513, "step": 7318 }, { "epoch": 1.6875720544154946, "grad_norm": 1.6054649930580802, "learning_rate": 1.30689266890026e-07, "loss": 0.4992508292198181, "step": 7319 }, { "epoch": 1.6878026285450773, "grad_norm": 1.426896936128743, "learning_rate": 1.305008856173061e-07, "loss": 0.4684743583202362, "step": 7320 }, { "epoch": 1.68803320267466, "grad_norm": 1.7876602073965717, "learning_rate": 1.303126307350948e-07, "loss": 0.5543930530548096, "step": 7321 }, { "epoch": 1.6882637768042426, "grad_norm": 1.3482084944505501, "learning_rate": 1.3012450227075655e-07, "loss": 0.3812211751937866, "step": 7322 }, { "epoch": 1.6884943509338253, "grad_norm": 2.079165248146425, "learning_rate": 1.299365002516377e-07, "loss": 0.5455845594406128, "step": 7323 }, { "epoch": 1.6887249250634078, "grad_norm": 1.3768890960712863, "learning_rate": 1.2974862470506654e-07, "loss": 0.4256778657436371, "step": 7324 }, { "epoch": 1.6889554991929905, "grad_norm": 1.9468423520002898, "learning_rate": 1.2956087565835228e-07, "loss": 0.4973354637622833, "step": 7325 }, { "epoch": 1.6891860733225732, "grad_norm": 1.5779840439512345, "learning_rate": 1.2937325313878666e-07, "loss": 0.5141343474388123, "step": 7326 }, { "epoch": 1.689416647452156, "grad_norm": 1.5179632497576485, "learning_rate": 1.2918575717364178e-07, "loss": 0.3872978687286377, "step": 7327 }, { "epoch": 1.6896472215817386, "grad_norm": 1.3857087225021212, "learning_rate": 1.2899838779017292e-07, "loss": 0.4333486557006836, "step": 7328 }, { "epoch": 1.6898777957113214, "grad_norm": 1.5624646221048997, "learning_rate": 1.2881114501561553e-07, "loss": 0.42979496717453003, "step": 7329 }, { "epoch": 1.6901083698409038, "grad_norm": 1.6512939392276094, "learning_rate": 1.2862402887718771e-07, "loss": 0.43296414613723755, "step": 7330 }, { "epoch": 1.6903389439704866, "grad_norm": 1.4822998528875215, "learning_rate": 1.2843703940208816e-07, "loss": 0.41763681173324585, "step": 7331 }, { "epoch": 1.690569518100069, "grad_norm": 1.4433304691783968, "learning_rate": 1.2825017661749814e-07, "loss": 0.4531592130661011, "step": 7332 }, { "epoch": 1.6908000922296518, "grad_norm": 1.5515786608723572, "learning_rate": 1.2806344055057995e-07, "loss": 0.4608149826526642, "step": 7333 }, { "epoch": 1.6910306663592345, "grad_norm": 1.5678716271625897, "learning_rate": 1.2787683122847726e-07, "loss": 0.4298786520957947, "step": 7334 }, { "epoch": 1.6912612404888172, "grad_norm": 1.5882305453896473, "learning_rate": 1.2769034867831586e-07, "loss": 0.4404297471046448, "step": 7335 }, { "epoch": 1.6914918146184, "grad_norm": 1.590662947019878, "learning_rate": 1.2750399292720281e-07, "loss": 0.3857702910900116, "step": 7336 }, { "epoch": 1.6917223887479826, "grad_norm": 1.5092920813034143, "learning_rate": 1.2731776400222716e-07, "loss": 0.351214200258255, "step": 7337 }, { "epoch": 1.6919529628775651, "grad_norm": 1.6618460717985095, "learning_rate": 1.2713166193045854e-07, "loss": 0.4711484909057617, "step": 7338 }, { "epoch": 1.6921835370071479, "grad_norm": 1.605912014604012, "learning_rate": 1.2694568673894946e-07, "loss": 0.4819946587085724, "step": 7339 }, { "epoch": 1.6924141111367303, "grad_norm": 1.5366035327097678, "learning_rate": 1.267598384547327e-07, "loss": 0.39870262145996094, "step": 7340 }, { "epoch": 1.692644685266313, "grad_norm": 1.410709311062986, "learning_rate": 1.265741171048237e-07, "loss": 0.4775997996330261, "step": 7341 }, { "epoch": 1.6928752593958958, "grad_norm": 1.5031428119722987, "learning_rate": 1.2638852271621836e-07, "loss": 0.4166836738586426, "step": 7342 }, { "epoch": 1.6931058335254785, "grad_norm": 1.362546283009112, "learning_rate": 1.2620305531589514e-07, "loss": 0.396761953830719, "step": 7343 }, { "epoch": 1.6933364076550612, "grad_norm": 1.5811036971551204, "learning_rate": 1.260177149308136e-07, "loss": 0.36929184198379517, "step": 7344 }, { "epoch": 1.6935669817846437, "grad_norm": 1.6142308009439483, "learning_rate": 1.2583250158791459e-07, "loss": 0.4664369821548462, "step": 7345 }, { "epoch": 1.6937975559142264, "grad_norm": 1.4490673957983151, "learning_rate": 1.2564741531412115e-07, "loss": 0.40877625346183777, "step": 7346 }, { "epoch": 1.694028130043809, "grad_norm": 1.3363670323915413, "learning_rate": 1.254624561363369e-07, "loss": 0.4282684922218323, "step": 7347 }, { "epoch": 1.6942587041733916, "grad_norm": 1.7781191335343183, "learning_rate": 1.2527762408144805e-07, "loss": 0.5430412292480469, "step": 7348 }, { "epoch": 1.6944892783029744, "grad_norm": 1.7384245962384524, "learning_rate": 1.2509291917632147e-07, "loss": 0.45990923047065735, "step": 7349 }, { "epoch": 1.694719852432557, "grad_norm": 1.5699544039589348, "learning_rate": 1.2490834144780593e-07, "loss": 0.38062262535095215, "step": 7350 }, { "epoch": 1.6949504265621398, "grad_norm": 1.5427808320923257, "learning_rate": 1.2472389092273172e-07, "loss": 0.4704701900482178, "step": 7351 }, { "epoch": 1.6951810006917225, "grad_norm": 1.3215044901700805, "learning_rate": 1.2453956762791084e-07, "loss": 0.4439951181411743, "step": 7352 }, { "epoch": 1.695411574821305, "grad_norm": 1.6827848110964911, "learning_rate": 1.2435537159013632e-07, "loss": 0.49405014514923096, "step": 7353 }, { "epoch": 1.6956421489508877, "grad_norm": 1.4071924274505998, "learning_rate": 1.2417130283618282e-07, "loss": 0.4282076060771942, "step": 7354 }, { "epoch": 1.6958727230804702, "grad_norm": 1.4129187553888694, "learning_rate": 1.2398736139280687e-07, "loss": 0.43492811918258667, "step": 7355 }, { "epoch": 1.696103297210053, "grad_norm": 1.550272919478409, "learning_rate": 1.238035472867458e-07, "loss": 0.37239378690719604, "step": 7356 }, { "epoch": 1.6963338713396356, "grad_norm": 1.2721176079849843, "learning_rate": 1.236198605447194e-07, "loss": 0.39911961555480957, "step": 7357 }, { "epoch": 1.6965644454692184, "grad_norm": 1.911188398718987, "learning_rate": 1.2343630119342786e-07, "loss": 0.4962255656719208, "step": 7358 }, { "epoch": 1.696795019598801, "grad_norm": 1.3131623819116638, "learning_rate": 1.2325286925955358e-07, "loss": 0.37414759397506714, "step": 7359 }, { "epoch": 1.6970255937283838, "grad_norm": 1.5092759235813635, "learning_rate": 1.230695647697604e-07, "loss": 0.41224929690361023, "step": 7360 }, { "epoch": 1.6972561678579663, "grad_norm": 1.3964295729715615, "learning_rate": 1.228863877506936e-07, "loss": 0.43184489011764526, "step": 7361 }, { "epoch": 1.697486741987549, "grad_norm": 1.6991026917946972, "learning_rate": 1.227033382289795e-07, "loss": 0.4741829037666321, "step": 7362 }, { "epoch": 1.6977173161171315, "grad_norm": 1.677947901828469, "learning_rate": 1.2252041623122643e-07, "loss": 0.43224620819091797, "step": 7363 }, { "epoch": 1.6979478902467142, "grad_norm": 1.678576477296345, "learning_rate": 1.2233762178402386e-07, "loss": 0.46645525097846985, "step": 7364 }, { "epoch": 1.698178464376297, "grad_norm": 1.4201537921120515, "learning_rate": 1.2215495491394256e-07, "loss": 0.4237707555294037, "step": 7365 }, { "epoch": 1.6984090385058797, "grad_norm": 1.3069690432597363, "learning_rate": 1.2197241564753535e-07, "loss": 0.36378395557403564, "step": 7366 }, { "epoch": 1.6986396126354624, "grad_norm": 1.6387935949488672, "learning_rate": 1.21790004011336e-07, "loss": 0.4564269185066223, "step": 7367 }, { "epoch": 1.698870186765045, "grad_norm": 1.3009015849639454, "learning_rate": 1.2160772003186027e-07, "loss": 0.4492420256137848, "step": 7368 }, { "epoch": 1.6991007608946276, "grad_norm": 1.6097888974991954, "learning_rate": 1.214255637356043e-07, "loss": 0.515146017074585, "step": 7369 }, { "epoch": 1.6993313350242103, "grad_norm": 1.5565943453492384, "learning_rate": 1.2124353514904707e-07, "loss": 0.41473329067230225, "step": 7370 }, { "epoch": 1.6995619091537928, "grad_norm": 1.6571527829218886, "learning_rate": 1.210616342986477e-07, "loss": 0.4408412575721741, "step": 7371 }, { "epoch": 1.6997924832833755, "grad_norm": 1.6546450900594125, "learning_rate": 1.208798612108477e-07, "loss": 0.5370820760726929, "step": 7372 }, { "epoch": 1.7000230574129582, "grad_norm": 1.502975927661507, "learning_rate": 1.206982159120693e-07, "loss": 0.46518170833587646, "step": 7373 }, { "epoch": 1.700253631542541, "grad_norm": 1.5801444025292624, "learning_rate": 1.205166984287167e-07, "loss": 0.45063477754592896, "step": 7374 }, { "epoch": 1.7004842056721237, "grad_norm": 1.4109266758667123, "learning_rate": 1.2033530878717546e-07, "loss": 0.47391965985298157, "step": 7375 }, { "epoch": 1.7007147798017064, "grad_norm": 1.680591382104731, "learning_rate": 1.2015404701381205e-07, "loss": 0.45812156796455383, "step": 7376 }, { "epoch": 1.7009453539312889, "grad_norm": 1.7661450796417113, "learning_rate": 1.1997291313497503e-07, "loss": 0.5174708366394043, "step": 7377 }, { "epoch": 1.7011759280608716, "grad_norm": 1.2379321910437706, "learning_rate": 1.1979190717699373e-07, "loss": 0.3412814736366272, "step": 7378 }, { "epoch": 1.701406502190454, "grad_norm": 1.6619687091053885, "learning_rate": 1.196110291661796e-07, "loss": 0.41912171244621277, "step": 7379 }, { "epoch": 1.7016370763200368, "grad_norm": 1.7384039938738447, "learning_rate": 1.1943027912882464e-07, "loss": 0.5569772720336914, "step": 7380 }, { "epoch": 1.7018676504496195, "grad_norm": 1.309448309717786, "learning_rate": 1.1924965709120304e-07, "loss": 0.40875375270843506, "step": 7381 }, { "epoch": 1.7020982245792022, "grad_norm": 1.5803953469974217, "learning_rate": 1.1906916307956983e-07, "loss": 0.46906760334968567, "step": 7382 }, { "epoch": 1.702328798708785, "grad_norm": 1.2850228520937832, "learning_rate": 1.1888879712016165e-07, "loss": 0.40830397605895996, "step": 7383 }, { "epoch": 1.7025593728383677, "grad_norm": 1.4770811279187035, "learning_rate": 1.1870855923919687e-07, "loss": 0.4051646590232849, "step": 7384 }, { "epoch": 1.7027899469679502, "grad_norm": 1.696009847928002, "learning_rate": 1.1852844946287432e-07, "loss": 0.5042610764503479, "step": 7385 }, { "epoch": 1.7030205210975329, "grad_norm": 1.6262740295484197, "learning_rate": 1.183484678173754e-07, "loss": 0.5304923057556152, "step": 7386 }, { "epoch": 1.7032510952271154, "grad_norm": 1.2604579461831944, "learning_rate": 1.1816861432886171e-07, "loss": 0.443366676568985, "step": 7387 }, { "epoch": 1.703481669356698, "grad_norm": 1.3836719865657088, "learning_rate": 1.1798888902347714e-07, "loss": 0.4527779817581177, "step": 7388 }, { "epoch": 1.7037122434862808, "grad_norm": 1.3616715508883823, "learning_rate": 1.1780929192734634e-07, "loss": 0.4277183413505554, "step": 7389 }, { "epoch": 1.7039428176158635, "grad_norm": 1.3714415020573154, "learning_rate": 1.1762982306657577e-07, "loss": 0.4908677637577057, "step": 7390 }, { "epoch": 1.7041733917454462, "grad_norm": 1.4373179697113392, "learning_rate": 1.1745048246725286e-07, "loss": 0.398892879486084, "step": 7391 }, { "epoch": 1.704403965875029, "grad_norm": 1.801155926723525, "learning_rate": 1.1727127015544691e-07, "loss": 0.4654615521430969, "step": 7392 }, { "epoch": 1.7046345400046115, "grad_norm": 1.6258673974312492, "learning_rate": 1.1709218615720806e-07, "loss": 0.4850313663482666, "step": 7393 }, { "epoch": 1.7048651141341942, "grad_norm": 1.3854283292952871, "learning_rate": 1.1691323049856772e-07, "loss": 0.4036976099014282, "step": 7394 }, { "epoch": 1.7050956882637767, "grad_norm": 1.6824325261066553, "learning_rate": 1.167344032055394e-07, "loss": 0.39174383878707886, "step": 7395 }, { "epoch": 1.7053262623933594, "grad_norm": 1.49190685623753, "learning_rate": 1.1655570430411699e-07, "loss": 0.44915109872817993, "step": 7396 }, { "epoch": 1.705556836522942, "grad_norm": 1.4487302731781821, "learning_rate": 1.1637713382027636e-07, "loss": 0.4720522165298462, "step": 7397 }, { "epoch": 1.7057874106525248, "grad_norm": 1.5236154065511855, "learning_rate": 1.1619869177997455e-07, "loss": 0.4452325105667114, "step": 7398 }, { "epoch": 1.7060179847821075, "grad_norm": 1.489108876491428, "learning_rate": 1.1602037820915023e-07, "loss": 0.4009271562099457, "step": 7399 }, { "epoch": 1.7062485589116902, "grad_norm": 1.3320502296097492, "learning_rate": 1.1584219313372257e-07, "loss": 0.37518051266670227, "step": 7400 }, { "epoch": 1.7064791330412727, "grad_norm": 1.5361245639590775, "learning_rate": 1.1566413657959295e-07, "loss": 0.42883241176605225, "step": 7401 }, { "epoch": 1.7067097071708555, "grad_norm": 1.5311391941499002, "learning_rate": 1.1548620857264346e-07, "loss": 0.4597551226615906, "step": 7402 }, { "epoch": 1.706940281300438, "grad_norm": 1.4815045613998048, "learning_rate": 1.1530840913873797e-07, "loss": 0.5491876006126404, "step": 7403 }, { "epoch": 1.7071708554300207, "grad_norm": 1.8810828492754625, "learning_rate": 1.1513073830372122e-07, "loss": 0.5632074475288391, "step": 7404 }, { "epoch": 1.7074014295596034, "grad_norm": 1.557196455612015, "learning_rate": 1.1495319609341947e-07, "loss": 0.5251858234405518, "step": 7405 }, { "epoch": 1.707632003689186, "grad_norm": 1.7979639485315768, "learning_rate": 1.1477578253364028e-07, "loss": 0.5388965606689453, "step": 7406 }, { "epoch": 1.7078625778187688, "grad_norm": 1.7322317596816112, "learning_rate": 1.145984976501726e-07, "loss": 0.4429551959037781, "step": 7407 }, { "epoch": 1.7080931519483515, "grad_norm": 1.5048923212213088, "learning_rate": 1.144213414687868e-07, "loss": 0.4702358841896057, "step": 7408 }, { "epoch": 1.708323726077934, "grad_norm": 1.616629635802576, "learning_rate": 1.1424431401523382e-07, "loss": 0.4506569504737854, "step": 7409 }, { "epoch": 1.7085543002075168, "grad_norm": 1.5722880063833475, "learning_rate": 1.1406741531524689e-07, "loss": 0.384244441986084, "step": 7410 }, { "epoch": 1.7087848743370992, "grad_norm": 1.6254816299222574, "learning_rate": 1.1389064539453952e-07, "loss": 0.4642629027366638, "step": 7411 }, { "epoch": 1.709015448466682, "grad_norm": 1.5180284715923413, "learning_rate": 1.1371400427880761e-07, "loss": 0.4568482041358948, "step": 7412 }, { "epoch": 1.7092460225962647, "grad_norm": 1.6058744016500281, "learning_rate": 1.135374919937272e-07, "loss": 0.536895215511322, "step": 7413 }, { "epoch": 1.7094765967258474, "grad_norm": 1.6944575711634469, "learning_rate": 1.1336110856495628e-07, "loss": 0.49696239829063416, "step": 7414 }, { "epoch": 1.7097071708554301, "grad_norm": 1.802031783829704, "learning_rate": 1.1318485401813438e-07, "loss": 0.3857358694076538, "step": 7415 }, { "epoch": 1.7099377449850128, "grad_norm": 1.5410848248596472, "learning_rate": 1.1300872837888121e-07, "loss": 0.38111335039138794, "step": 7416 }, { "epoch": 1.7101683191145953, "grad_norm": 1.6014644101172142, "learning_rate": 1.1283273167279906e-07, "loss": 0.4255755543708801, "step": 7417 }, { "epoch": 1.710398893244178, "grad_norm": 1.6646696692039435, "learning_rate": 1.1265686392547024e-07, "loss": 0.5048757791519165, "step": 7418 }, { "epoch": 1.7106294673737605, "grad_norm": 1.6262992093918878, "learning_rate": 1.1248112516245944e-07, "loss": 0.5402916073799133, "step": 7419 }, { "epoch": 1.7108600415033433, "grad_norm": 1.6105931834922984, "learning_rate": 1.1230551540931165e-07, "loss": 0.3617591857910156, "step": 7420 }, { "epoch": 1.711090615632926, "grad_norm": 1.584818843359006, "learning_rate": 1.1213003469155369e-07, "loss": 0.4636116921901703, "step": 7421 }, { "epoch": 1.7113211897625087, "grad_norm": 1.7626797404606351, "learning_rate": 1.1195468303469346e-07, "loss": 0.4675198495388031, "step": 7422 }, { "epoch": 1.7115517638920914, "grad_norm": 1.6024517382949015, "learning_rate": 1.1177946046422038e-07, "loss": 0.48491787910461426, "step": 7423 }, { "epoch": 1.7117823380216741, "grad_norm": 1.5413352133121294, "learning_rate": 1.1160436700560449e-07, "loss": 0.3898283839225769, "step": 7424 }, { "epoch": 1.7120129121512566, "grad_norm": 1.5514584947710022, "learning_rate": 1.1142940268429735e-07, "loss": 0.41522908210754395, "step": 7425 }, { "epoch": 1.7122434862808393, "grad_norm": 1.430903522239028, "learning_rate": 1.1125456752573215e-07, "loss": 0.4681985378265381, "step": 7426 }, { "epoch": 1.7124740604104218, "grad_norm": 1.8962296460852388, "learning_rate": 1.1107986155532245e-07, "loss": 0.4788553714752197, "step": 7427 }, { "epoch": 1.7127046345400045, "grad_norm": 1.5072364623848036, "learning_rate": 1.1090528479846406e-07, "loss": 0.43853843212127686, "step": 7428 }, { "epoch": 1.7129352086695873, "grad_norm": 1.542463594674994, "learning_rate": 1.107308372805329e-07, "loss": 0.3736591637134552, "step": 7429 }, { "epoch": 1.71316578279917, "grad_norm": 1.8237435289536401, "learning_rate": 1.1055651902688712e-07, "loss": 0.5770819783210754, "step": 7430 }, { "epoch": 1.7133963569287527, "grad_norm": 1.7972828104133267, "learning_rate": 1.1038233006286558e-07, "loss": 0.5906555652618408, "step": 7431 }, { "epoch": 1.7136269310583354, "grad_norm": 1.396062928601261, "learning_rate": 1.1020827041378844e-07, "loss": 0.4621407389640808, "step": 7432 }, { "epoch": 1.713857505187918, "grad_norm": 1.6487194571266346, "learning_rate": 1.1003434010495705e-07, "loss": 0.4203164279460907, "step": 7433 }, { "epoch": 1.7140880793175006, "grad_norm": 1.59720117870823, "learning_rate": 1.0986053916165373e-07, "loss": 0.4607565104961395, "step": 7434 }, { "epoch": 1.7143186534470831, "grad_norm": 1.4411738322949479, "learning_rate": 1.0968686760914248e-07, "loss": 0.47256794571876526, "step": 7435 }, { "epoch": 1.7145492275766658, "grad_norm": 2.1203032230505414, "learning_rate": 1.0951332547266778e-07, "loss": 0.479513943195343, "step": 7436 }, { "epoch": 1.7147798017062486, "grad_norm": 1.7633354860000339, "learning_rate": 1.0933991277745614e-07, "loss": 0.47687965631484985, "step": 7437 }, { "epoch": 1.7150103758358313, "grad_norm": 1.6696730348311766, "learning_rate": 1.091666295487147e-07, "loss": 0.45799845457077026, "step": 7438 }, { "epoch": 1.715240949965414, "grad_norm": 1.4765505689651048, "learning_rate": 1.089934758116322e-07, "loss": 0.43398863077163696, "step": 7439 }, { "epoch": 1.7154715240949967, "grad_norm": 1.627580558092534, "learning_rate": 1.0882045159137788e-07, "loss": 0.4098217189311981, "step": 7440 }, { "epoch": 1.7157020982245792, "grad_norm": 1.8062601643320504, "learning_rate": 1.086475569131029e-07, "loss": 0.49889707565307617, "step": 7441 }, { "epoch": 1.715932672354162, "grad_norm": 1.4613353368332702, "learning_rate": 1.0847479180193897e-07, "loss": 0.4187192916870117, "step": 7442 }, { "epoch": 1.7161632464837444, "grad_norm": 2.068945016126778, "learning_rate": 1.0830215628299954e-07, "loss": 0.44331133365631104, "step": 7443 }, { "epoch": 1.7163938206133271, "grad_norm": 1.6773749938074582, "learning_rate": 1.0812965038137856e-07, "loss": 0.4888196587562561, "step": 7444 }, { "epoch": 1.7166243947429098, "grad_norm": 1.6578617629701122, "learning_rate": 1.0795727412215183e-07, "loss": 0.4884798228740692, "step": 7445 }, { "epoch": 1.7168549688724926, "grad_norm": 1.5723023883356735, "learning_rate": 1.07785027530376e-07, "loss": 0.45655232667922974, "step": 7446 }, { "epoch": 1.7170855430020753, "grad_norm": 1.685893884498356, "learning_rate": 1.0761291063108857e-07, "loss": 0.3086237907409668, "step": 7447 }, { "epoch": 1.717316117131658, "grad_norm": 1.5738053973393145, "learning_rate": 1.0744092344930888e-07, "loss": 0.4279823899269104, "step": 7448 }, { "epoch": 1.7175466912612405, "grad_norm": 1.7221029802689058, "learning_rate": 1.072690660100366e-07, "loss": 0.4241681396961212, "step": 7449 }, { "epoch": 1.7177772653908232, "grad_norm": 1.7874830878272077, "learning_rate": 1.070973383382533e-07, "loss": 0.47086501121520996, "step": 7450 }, { "epoch": 1.7180078395204057, "grad_norm": 1.3780373187479635, "learning_rate": 1.0692574045892099e-07, "loss": 0.43798619508743286, "step": 7451 }, { "epoch": 1.7182384136499884, "grad_norm": 1.7289936352675708, "learning_rate": 1.0675427239698354e-07, "loss": 0.5781964659690857, "step": 7452 }, { "epoch": 1.7184689877795711, "grad_norm": 1.4621228929512655, "learning_rate": 1.0658293417736508e-07, "loss": 0.4850879907608032, "step": 7453 }, { "epoch": 1.7186995619091539, "grad_norm": 1.3236244677460836, "learning_rate": 1.064117258249717e-07, "loss": 0.40468811988830566, "step": 7454 }, { "epoch": 1.7189301360387366, "grad_norm": 1.7069112900372936, "learning_rate": 1.0624064736469052e-07, "loss": 0.4054880142211914, "step": 7455 }, { "epoch": 1.719160710168319, "grad_norm": 1.7589002706519377, "learning_rate": 1.0606969882138894e-07, "loss": 0.38633522391319275, "step": 7456 }, { "epoch": 1.7193912842979018, "grad_norm": 1.6917357500409704, "learning_rate": 1.0589888021991644e-07, "loss": 0.4287499785423279, "step": 7457 }, { "epoch": 1.7196218584274843, "grad_norm": 1.613018561241669, "learning_rate": 1.0572819158510316e-07, "loss": 0.49269533157348633, "step": 7458 }, { "epoch": 1.719852432557067, "grad_norm": 1.4600608769783265, "learning_rate": 1.0555763294176045e-07, "loss": 0.38874679803848267, "step": 7459 }, { "epoch": 1.7200830066866497, "grad_norm": 1.5663184097893508, "learning_rate": 1.0538720431468051e-07, "loss": 0.4381089508533478, "step": 7460 }, { "epoch": 1.7203135808162324, "grad_norm": 1.6242553694361792, "learning_rate": 1.0521690572863706e-07, "loss": 0.4550422430038452, "step": 7461 }, { "epoch": 1.7205441549458151, "grad_norm": 1.5017985009159773, "learning_rate": 1.0504673720838476e-07, "loss": 0.5173785090446472, "step": 7462 }, { "epoch": 1.7207747290753979, "grad_norm": 1.4906138636113029, "learning_rate": 1.0487669877865945e-07, "loss": 0.5082184076309204, "step": 7463 }, { "epoch": 1.7210053032049804, "grad_norm": 1.7383580581523643, "learning_rate": 1.0470679046417786e-07, "loss": 0.49810969829559326, "step": 7464 }, { "epoch": 1.721235877334563, "grad_norm": 1.7302456540952424, "learning_rate": 1.0453701228963751e-07, "loss": 0.47808337211608887, "step": 7465 }, { "epoch": 1.7214664514641456, "grad_norm": 1.6093569631380469, "learning_rate": 1.0436736427971782e-07, "loss": 0.5100537538528442, "step": 7466 }, { "epoch": 1.7216970255937283, "grad_norm": 1.5019138408689112, "learning_rate": 1.0419784645907858e-07, "loss": 0.44948023557662964, "step": 7467 }, { "epoch": 1.721927599723311, "grad_norm": 1.3792836042899619, "learning_rate": 1.040284588523611e-07, "loss": 0.4653180241584778, "step": 7468 }, { "epoch": 1.7221581738528937, "grad_norm": 1.901421358760061, "learning_rate": 1.0385920148418737e-07, "loss": 0.4930723309516907, "step": 7469 }, { "epoch": 1.7223887479824764, "grad_norm": 1.5964124799736943, "learning_rate": 1.036900743791611e-07, "loss": 0.48883867263793945, "step": 7470 }, { "epoch": 1.7226193221120591, "grad_norm": 1.27924002772244, "learning_rate": 1.0352107756186624e-07, "loss": 0.4030319154262543, "step": 7471 }, { "epoch": 1.7228498962416416, "grad_norm": 1.8060139526740588, "learning_rate": 1.033522110568683e-07, "loss": 0.4174875319004059, "step": 7472 }, { "epoch": 1.7230804703712244, "grad_norm": 1.731157383735833, "learning_rate": 1.0318347488871371e-07, "loss": 0.5152361392974854, "step": 7473 }, { "epoch": 1.7233110445008069, "grad_norm": 1.3983774946509473, "learning_rate": 1.0301486908193014e-07, "loss": 0.43221428990364075, "step": 7474 }, { "epoch": 1.7235416186303896, "grad_norm": 1.6931290113673243, "learning_rate": 1.0284639366102598e-07, "loss": 0.4239969849586487, "step": 7475 }, { "epoch": 1.7237721927599723, "grad_norm": 1.5094560861426634, "learning_rate": 1.0267804865049068e-07, "loss": 0.5171400904655457, "step": 7476 }, { "epoch": 1.724002766889555, "grad_norm": 1.3913671775557208, "learning_rate": 1.0250983407479518e-07, "loss": 0.45670178532600403, "step": 7477 }, { "epoch": 1.7242333410191377, "grad_norm": 1.3489970844922, "learning_rate": 1.0234174995839107e-07, "loss": 0.36458373069763184, "step": 7478 }, { "epoch": 1.7244639151487204, "grad_norm": 1.6926167509742018, "learning_rate": 1.0217379632571122e-07, "loss": 0.4940750002861023, "step": 7479 }, { "epoch": 1.724694489278303, "grad_norm": 1.3742895139526408, "learning_rate": 1.0200597320116911e-07, "loss": 0.43453872203826904, "step": 7480 }, { "epoch": 1.7249250634078857, "grad_norm": 1.4325916198137496, "learning_rate": 1.0183828060915989e-07, "loss": 0.49255162477493286, "step": 7481 }, { "epoch": 1.7251556375374681, "grad_norm": 1.5551839406586245, "learning_rate": 1.0167071857405906e-07, "loss": 0.46221014857292175, "step": 7482 }, { "epoch": 1.7253862116670509, "grad_norm": 1.6044214909369097, "learning_rate": 1.015032871202236e-07, "loss": 0.43426087498664856, "step": 7483 }, { "epoch": 1.7256167857966336, "grad_norm": 1.3471292376409894, "learning_rate": 1.0133598627199136e-07, "loss": 0.45327985286712646, "step": 7484 }, { "epoch": 1.7258473599262163, "grad_norm": 1.7300792096053668, "learning_rate": 1.011688160536811e-07, "loss": 0.4691676199436188, "step": 7485 }, { "epoch": 1.726077934055799, "grad_norm": 1.7168424748125397, "learning_rate": 1.0100177648959296e-07, "loss": 0.5080254077911377, "step": 7486 }, { "epoch": 1.7263085081853817, "grad_norm": 1.3360541862160926, "learning_rate": 1.008348676040075e-07, "loss": 0.34122025966644287, "step": 7487 }, { "epoch": 1.7265390823149642, "grad_norm": 1.650892930499383, "learning_rate": 1.0066808942118699e-07, "loss": 0.44408074021339417, "step": 7488 }, { "epoch": 1.726769656444547, "grad_norm": 1.4603224951411022, "learning_rate": 1.0050144196537402e-07, "loss": 0.3777790665626526, "step": 7489 }, { "epoch": 1.7270002305741294, "grad_norm": 1.6365267437093343, "learning_rate": 1.0033492526079279e-07, "loss": 0.48730146884918213, "step": 7490 }, { "epoch": 1.7272308047037122, "grad_norm": 1.5792338555913825, "learning_rate": 1.001685393316477e-07, "loss": 0.35903626680374146, "step": 7491 }, { "epoch": 1.7274613788332949, "grad_norm": 1.3953813288199584, "learning_rate": 1.0000228420212509e-07, "loss": 0.37729373574256897, "step": 7492 }, { "epoch": 1.7276919529628776, "grad_norm": 1.6314801226105193, "learning_rate": 9.98361598963916e-08, "loss": 0.4388326406478882, "step": 7493 }, { "epoch": 1.7279225270924603, "grad_norm": 1.4829220781258674, "learning_rate": 9.967016643859527e-08, "loss": 0.45095232129096985, "step": 7494 }, { "epoch": 1.728153101222043, "grad_norm": 1.5130736602015042, "learning_rate": 9.95043038528649e-08, "loss": 0.4736475944519043, "step": 7495 }, { "epoch": 1.7283836753516255, "grad_norm": 1.6393405202034401, "learning_rate": 9.933857216330999e-08, "loss": 0.2984190285205841, "step": 7496 }, { "epoch": 1.7286142494812082, "grad_norm": 1.5993261500159095, "learning_rate": 9.91729713940218e-08, "loss": 0.45391780138015747, "step": 7497 }, { "epoch": 1.7288448236107907, "grad_norm": 1.732905558263472, "learning_rate": 9.900750156907157e-08, "loss": 0.5150727033615112, "step": 7498 }, { "epoch": 1.7290753977403734, "grad_norm": 1.372519788443724, "learning_rate": 9.884216271251256e-08, "loss": 0.41298598051071167, "step": 7499 }, { "epoch": 1.7293059718699562, "grad_norm": 1.5310483983437806, "learning_rate": 9.86769548483779e-08, "loss": 0.4820541441440582, "step": 7500 }, { "epoch": 1.7295365459995389, "grad_norm": 1.4103659952581913, "learning_rate": 9.85118780006825e-08, "loss": 0.4148511290550232, "step": 7501 }, { "epoch": 1.7297671201291216, "grad_norm": 1.535383378975012, "learning_rate": 9.834693219342183e-08, "loss": 0.39676210284233093, "step": 7502 }, { "epoch": 1.7299976942587043, "grad_norm": 1.3969764743432636, "learning_rate": 9.818211745057292e-08, "loss": 0.3665908873081207, "step": 7503 }, { "epoch": 1.7302282683882868, "grad_norm": 1.5255452230855382, "learning_rate": 9.801743379609274e-08, "loss": 0.39340025186538696, "step": 7504 }, { "epoch": 1.7304588425178695, "grad_norm": 1.4673439514671116, "learning_rate": 9.785288125391977e-08, "loss": 0.4677412807941437, "step": 7505 }, { "epoch": 1.730689416647452, "grad_norm": 1.8421716352805986, "learning_rate": 9.768845984797369e-08, "loss": 0.49413764476776123, "step": 7506 }, { "epoch": 1.7309199907770347, "grad_norm": 2.1097980684598223, "learning_rate": 9.752416960215437e-08, "loss": 0.5312438607215881, "step": 7507 }, { "epoch": 1.7311505649066175, "grad_norm": 1.408973464564324, "learning_rate": 9.736001054034338e-08, "loss": 0.38522863388061523, "step": 7508 }, { "epoch": 1.7313811390362002, "grad_norm": 1.4496862609377634, "learning_rate": 9.719598268640283e-08, "loss": 0.49167078733444214, "step": 7509 }, { "epoch": 1.7316117131657829, "grad_norm": 1.7071655256469307, "learning_rate": 9.7032086064176e-08, "loss": 0.4465949535369873, "step": 7510 }, { "epoch": 1.7318422872953656, "grad_norm": 1.580755639233498, "learning_rate": 9.686832069748663e-08, "loss": 0.4627634882926941, "step": 7511 }, { "epoch": 1.732072861424948, "grad_norm": 1.5945960217093318, "learning_rate": 9.670468661013998e-08, "loss": 0.4188409447669983, "step": 7512 }, { "epoch": 1.7323034355545308, "grad_norm": 1.6767285085334622, "learning_rate": 9.654118382592146e-08, "loss": 0.5775213241577148, "step": 7513 }, { "epoch": 1.7325340096841133, "grad_norm": 1.4889326648746473, "learning_rate": 9.637781236859843e-08, "loss": 0.43912672996520996, "step": 7514 }, { "epoch": 1.732764583813696, "grad_norm": 1.677177851910315, "learning_rate": 9.62145722619182e-08, "loss": 0.5364755392074585, "step": 7515 }, { "epoch": 1.7329951579432787, "grad_norm": 1.5135890648676678, "learning_rate": 9.605146352960935e-08, "loss": 0.4832648038864136, "step": 7516 }, { "epoch": 1.7332257320728615, "grad_norm": 1.640472153194824, "learning_rate": 9.588848619538182e-08, "loss": 0.36932459473609924, "step": 7517 }, { "epoch": 1.7334563062024442, "grad_norm": 1.4731235594964114, "learning_rate": 9.57256402829254e-08, "loss": 0.43458276987075806, "step": 7518 }, { "epoch": 1.733686880332027, "grad_norm": 1.457966513875051, "learning_rate": 9.556292581591196e-08, "loss": 0.41533568501472473, "step": 7519 }, { "epoch": 1.7339174544616094, "grad_norm": 1.4363289807621746, "learning_rate": 9.540034281799325e-08, "loss": 0.45898690819740295, "step": 7520 }, { "epoch": 1.734148028591192, "grad_norm": 1.610315429506808, "learning_rate": 9.523789131280279e-08, "loss": 0.3321181535720825, "step": 7521 }, { "epoch": 1.7343786027207746, "grad_norm": 1.5824862936232118, "learning_rate": 9.507557132395416e-08, "loss": 0.3926161229610443, "step": 7522 }, { "epoch": 1.7346091768503573, "grad_norm": 1.264710302836967, "learning_rate": 9.491338287504247e-08, "loss": 0.41051846742630005, "step": 7523 }, { "epoch": 1.73483975097994, "grad_norm": 1.3604853902379428, "learning_rate": 9.47513259896432e-08, "loss": 0.4440652132034302, "step": 7524 }, { "epoch": 1.7350703251095227, "grad_norm": 1.5933781203678954, "learning_rate": 9.458940069131304e-08, "loss": 0.5175125598907471, "step": 7525 }, { "epoch": 1.7353008992391055, "grad_norm": 1.4535445480892137, "learning_rate": 9.442760700358987e-08, "loss": 0.45521751046180725, "step": 7526 }, { "epoch": 1.7355314733686882, "grad_norm": 1.5707484811695662, "learning_rate": 9.426594494999151e-08, "loss": 0.5133911967277527, "step": 7527 }, { "epoch": 1.7357620474982707, "grad_norm": 1.8770278394623805, "learning_rate": 9.410441455401752e-08, "loss": 0.4397609233856201, "step": 7528 }, { "epoch": 1.7359926216278534, "grad_norm": 3.7292879258339693, "learning_rate": 9.394301583914765e-08, "loss": 0.4503510594367981, "step": 7529 }, { "epoch": 1.7362231957574359, "grad_norm": 1.5909450336667472, "learning_rate": 9.378174882884327e-08, "loss": 0.44119834899902344, "step": 7530 }, { "epoch": 1.7364537698870186, "grad_norm": 1.5959659498105105, "learning_rate": 9.362061354654583e-08, "loss": 0.46257996559143066, "step": 7531 }, { "epoch": 1.7366843440166013, "grad_norm": 1.4727698319610416, "learning_rate": 9.345961001567792e-08, "loss": 0.4468308687210083, "step": 7532 }, { "epoch": 1.736914918146184, "grad_norm": 1.329652616869682, "learning_rate": 9.32987382596433e-08, "loss": 0.3837989568710327, "step": 7533 }, { "epoch": 1.7371454922757668, "grad_norm": 1.7149798865191848, "learning_rate": 9.313799830182644e-08, "loss": 0.4224961996078491, "step": 7534 }, { "epoch": 1.7373760664053495, "grad_norm": 1.3527154365554523, "learning_rate": 9.297739016559225e-08, "loss": 0.37379956245422363, "step": 7535 }, { "epoch": 1.737606640534932, "grad_norm": 1.3983736958193809, "learning_rate": 9.281691387428658e-08, "loss": 0.4204242527484894, "step": 7536 }, { "epoch": 1.7378372146645147, "grad_norm": 1.550547566194999, "learning_rate": 9.265656945123678e-08, "loss": 0.5270572900772095, "step": 7537 }, { "epoch": 1.7380677887940972, "grad_norm": 1.6826850331086136, "learning_rate": 9.249635691975e-08, "loss": 0.44208282232284546, "step": 7538 }, { "epoch": 1.73829836292368, "grad_norm": 1.158547237110862, "learning_rate": 9.233627630311502e-08, "loss": 0.32514283061027527, "step": 7539 }, { "epoch": 1.7385289370532626, "grad_norm": 1.42135951118167, "learning_rate": 9.217632762460126e-08, "loss": 0.35472434759140015, "step": 7540 }, { "epoch": 1.7387595111828453, "grad_norm": 1.9134735814581072, "learning_rate": 9.201651090745888e-08, "loss": 0.5034215450286865, "step": 7541 }, { "epoch": 1.738990085312428, "grad_norm": 1.4950522917395752, "learning_rate": 9.185682617491863e-08, "loss": 0.4779762029647827, "step": 7542 }, { "epoch": 1.7392206594420108, "grad_norm": 1.7544463226218252, "learning_rate": 9.169727345019263e-08, "loss": 0.4964079260826111, "step": 7543 }, { "epoch": 1.7394512335715933, "grad_norm": 1.8208500448761544, "learning_rate": 9.153785275647319e-08, "loss": 0.5125068426132202, "step": 7544 }, { "epoch": 1.739681807701176, "grad_norm": 1.369096268264849, "learning_rate": 9.13785641169339e-08, "loss": 0.39051756262779236, "step": 7545 }, { "epoch": 1.7399123818307585, "grad_norm": 1.6132499721446665, "learning_rate": 9.121940755472901e-08, "loss": 0.45951950550079346, "step": 7546 }, { "epoch": 1.7401429559603412, "grad_norm": 1.402513218333582, "learning_rate": 9.106038309299302e-08, "loss": 0.42676979303359985, "step": 7547 }, { "epoch": 1.740373530089924, "grad_norm": 1.6248647623340229, "learning_rate": 9.090149075484255e-08, "loss": 0.3585033416748047, "step": 7548 }, { "epoch": 1.7406041042195066, "grad_norm": 1.5204418845888263, "learning_rate": 9.074273056337366e-08, "loss": 0.4613775312900543, "step": 7549 }, { "epoch": 1.7408346783490893, "grad_norm": 1.5756472296671777, "learning_rate": 9.058410254166415e-08, "loss": 0.48934412002563477, "step": 7550 }, { "epoch": 1.741065252478672, "grad_norm": 2.3682357853653895, "learning_rate": 9.042560671277177e-08, "loss": 0.5749069452285767, "step": 7551 }, { "epoch": 1.7412958266082545, "grad_norm": 1.4990310296288942, "learning_rate": 9.026724309973588e-08, "loss": 0.4760423004627228, "step": 7552 }, { "epoch": 1.7415264007378373, "grad_norm": 1.38070744019409, "learning_rate": 9.010901172557594e-08, "loss": 0.43080049753189087, "step": 7553 }, { "epoch": 1.7417569748674198, "grad_norm": 1.4636238536042068, "learning_rate": 8.99509126132928e-08, "loss": 0.44850271940231323, "step": 7554 }, { "epoch": 1.7419875489970025, "grad_norm": 1.5357653243690434, "learning_rate": 8.979294578586738e-08, "loss": 0.34593498706817627, "step": 7555 }, { "epoch": 1.7422181231265852, "grad_norm": 1.3635590695208566, "learning_rate": 8.963511126626188e-08, "loss": 0.3738324046134949, "step": 7556 }, { "epoch": 1.742448697256168, "grad_norm": 1.6262402635208488, "learning_rate": 8.947740907741952e-08, "loss": 0.47988662123680115, "step": 7557 }, { "epoch": 1.7426792713857506, "grad_norm": 1.904530616299084, "learning_rate": 8.931983924226338e-08, "loss": 0.5863034725189209, "step": 7558 }, { "epoch": 1.7429098455153333, "grad_norm": 1.497315511162884, "learning_rate": 8.916240178369827e-08, "loss": 0.38455232977867126, "step": 7559 }, { "epoch": 1.7431404196449158, "grad_norm": 1.711133818053075, "learning_rate": 8.900509672460899e-08, "loss": 0.3919760584831238, "step": 7560 }, { "epoch": 1.7433709937744986, "grad_norm": 1.8876361089943499, "learning_rate": 8.884792408786169e-08, "loss": 0.4090653657913208, "step": 7561 }, { "epoch": 1.743601567904081, "grad_norm": 1.458591423296693, "learning_rate": 8.869088389630264e-08, "loss": 0.42597073316574097, "step": 7562 }, { "epoch": 1.7438321420336638, "grad_norm": 1.4410906971279085, "learning_rate": 8.853397617275959e-08, "loss": 0.38760805130004883, "step": 7563 }, { "epoch": 1.7440627161632465, "grad_norm": 1.3930314463175644, "learning_rate": 8.837720094004042e-08, "loss": 0.3753165900707245, "step": 7564 }, { "epoch": 1.7442932902928292, "grad_norm": 1.4708100181524995, "learning_rate": 8.822055822093432e-08, "loss": 0.5169536471366882, "step": 7565 }, { "epoch": 1.744523864422412, "grad_norm": 1.436339252382814, "learning_rate": 8.806404803821077e-08, "loss": 0.3886902332305908, "step": 7566 }, { "epoch": 1.7447544385519944, "grad_norm": 1.7378167101447366, "learning_rate": 8.790767041461977e-08, "loss": 0.48971402645111084, "step": 7567 }, { "epoch": 1.7449850126815771, "grad_norm": 1.3555756556469605, "learning_rate": 8.775142537289282e-08, "loss": 0.4656449556350708, "step": 7568 }, { "epoch": 1.7452155868111596, "grad_norm": 1.24689144854066, "learning_rate": 8.75953129357414e-08, "loss": 0.43197786808013916, "step": 7569 }, { "epoch": 1.7454461609407423, "grad_norm": 1.6584429086506909, "learning_rate": 8.743933312585816e-08, "loss": 0.5062606930732727, "step": 7570 }, { "epoch": 1.745676735070325, "grad_norm": 1.714345013647294, "learning_rate": 8.728348596591639e-08, "loss": 0.5489983558654785, "step": 7571 }, { "epoch": 1.7459073091999078, "grad_norm": 1.4457283500823468, "learning_rate": 8.712777147857031e-08, "loss": 0.4351652264595032, "step": 7572 }, { "epoch": 1.7461378833294905, "grad_norm": 2.160367880410759, "learning_rate": 8.697218968645403e-08, "loss": 0.5096884965896606, "step": 7573 }, { "epoch": 1.7463684574590732, "grad_norm": 1.2837319415683648, "learning_rate": 8.681674061218347e-08, "loss": 0.3127269744873047, "step": 7574 }, { "epoch": 1.7465990315886557, "grad_norm": 1.8378362837335938, "learning_rate": 8.666142427835443e-08, "loss": 0.4738629460334778, "step": 7575 }, { "epoch": 1.7468296057182384, "grad_norm": 1.5090024147723615, "learning_rate": 8.650624070754375e-08, "loss": 0.46921902894973755, "step": 7576 }, { "epoch": 1.747060179847821, "grad_norm": 1.578667567709185, "learning_rate": 8.635118992230906e-08, "loss": 0.5296987891197205, "step": 7577 }, { "epoch": 1.7472907539774036, "grad_norm": 1.1732895039201416, "learning_rate": 8.619627194518819e-08, "loss": 0.3522387742996216, "step": 7578 }, { "epoch": 1.7475213281069863, "grad_norm": 1.550879536093582, "learning_rate": 8.604148679870049e-08, "loss": 0.42747724056243896, "step": 7579 }, { "epoch": 1.747751902236569, "grad_norm": 1.535695568842986, "learning_rate": 8.588683450534528e-08, "loss": 0.399990439414978, "step": 7580 }, { "epoch": 1.7479824763661518, "grad_norm": 1.688266581429453, "learning_rate": 8.573231508760315e-08, "loss": 0.48220518231391907, "step": 7581 }, { "epoch": 1.7482130504957345, "grad_norm": 1.8452105924711204, "learning_rate": 8.557792856793455e-08, "loss": 0.5227106213569641, "step": 7582 }, { "epoch": 1.748443624625317, "grad_norm": 1.596076015195143, "learning_rate": 8.542367496878178e-08, "loss": 0.5436732769012451, "step": 7583 }, { "epoch": 1.7486741987548997, "grad_norm": 1.5781135040763308, "learning_rate": 8.526955431256644e-08, "loss": 0.48398053646087646, "step": 7584 }, { "epoch": 1.7489047728844822, "grad_norm": 1.8109008330023073, "learning_rate": 8.511556662169217e-08, "loss": 0.5727924108505249, "step": 7585 }, { "epoch": 1.749135347014065, "grad_norm": 1.7451913815699138, "learning_rate": 8.496171191854229e-08, "loss": 0.48077693581581116, "step": 7586 }, { "epoch": 1.7493659211436476, "grad_norm": 1.4513314868999736, "learning_rate": 8.480799022548113e-08, "loss": 0.45447635650634766, "step": 7587 }, { "epoch": 1.7495964952732304, "grad_norm": 1.7305734402801412, "learning_rate": 8.465440156485392e-08, "loss": 0.4605486989021301, "step": 7588 }, { "epoch": 1.749827069402813, "grad_norm": 1.6087138586576477, "learning_rate": 8.450094595898604e-08, "loss": 0.4229927062988281, "step": 7589 }, { "epoch": 1.7500576435323958, "grad_norm": 1.371495589643338, "learning_rate": 8.434762343018408e-08, "loss": 0.43005260825157166, "step": 7590 }, { "epoch": 1.7502882176619783, "grad_norm": 1.739761797548497, "learning_rate": 8.41944340007349e-08, "loss": 0.47446098923683167, "step": 7591 }, { "epoch": 1.750518791791561, "grad_norm": 1.6084919754115274, "learning_rate": 8.40413776929062e-08, "loss": 0.40554216504096985, "step": 7592 }, { "epoch": 1.7507493659211435, "grad_norm": 1.2363538330087616, "learning_rate": 8.38884545289461e-08, "loss": 0.4144189953804016, "step": 7593 }, { "epoch": 1.7509799400507262, "grad_norm": 1.6677815347140812, "learning_rate": 8.373566453108361e-08, "loss": 0.449351966381073, "step": 7594 }, { "epoch": 1.751210514180309, "grad_norm": 1.8357616333643774, "learning_rate": 8.358300772152849e-08, "loss": 0.4584103226661682, "step": 7595 }, { "epoch": 1.7514410883098916, "grad_norm": 1.6545876792386258, "learning_rate": 8.343048412247066e-08, "loss": 0.4739362895488739, "step": 7596 }, { "epoch": 1.7516716624394744, "grad_norm": 1.3684829539670578, "learning_rate": 8.327809375608131e-08, "loss": 0.3970356583595276, "step": 7597 }, { "epoch": 1.751902236569057, "grad_norm": 1.390074068538192, "learning_rate": 8.312583664451157e-08, "loss": 0.4298238754272461, "step": 7598 }, { "epoch": 1.7521328106986396, "grad_norm": 1.5218432452457022, "learning_rate": 8.297371280989385e-08, "loss": 0.4920361340045929, "step": 7599 }, { "epoch": 1.7523633848282223, "grad_norm": 1.6001856104794878, "learning_rate": 8.282172227434059e-08, "loss": 0.5035870671272278, "step": 7600 }, { "epoch": 1.7525939589578048, "grad_norm": 1.8053658495544915, "learning_rate": 8.266986505994555e-08, "loss": 0.373248815536499, "step": 7601 }, { "epoch": 1.7528245330873875, "grad_norm": 2.0338367024251345, "learning_rate": 8.25181411887822e-08, "loss": 0.48491543531417847, "step": 7602 }, { "epoch": 1.7530551072169702, "grad_norm": 1.6403088167242337, "learning_rate": 8.236655068290554e-08, "loss": 0.4298476576805115, "step": 7603 }, { "epoch": 1.753285681346553, "grad_norm": 1.5503246605292686, "learning_rate": 8.221509356435064e-08, "loss": 0.48804932832717896, "step": 7604 }, { "epoch": 1.7535162554761357, "grad_norm": 1.595278442494436, "learning_rate": 8.206376985513353e-08, "loss": 0.467857301235199, "step": 7605 }, { "epoch": 1.7537468296057184, "grad_norm": 1.8978537163965867, "learning_rate": 8.19125795772504e-08, "loss": 0.48995548486709595, "step": 7606 }, { "epoch": 1.7539774037353009, "grad_norm": 1.488521983097995, "learning_rate": 8.176152275267823e-08, "loss": 0.4459487795829773, "step": 7607 }, { "epoch": 1.7542079778648836, "grad_norm": 1.4326042778667836, "learning_rate": 8.1610599403375e-08, "loss": 0.5054866671562195, "step": 7608 }, { "epoch": 1.754438551994466, "grad_norm": 1.4563884146816763, "learning_rate": 8.145980955127862e-08, "loss": 0.46223869919776917, "step": 7609 }, { "epoch": 1.7546691261240488, "grad_norm": 1.696768225081691, "learning_rate": 8.1309153218308e-08, "loss": 0.4743426442146301, "step": 7610 }, { "epoch": 1.7548997002536315, "grad_norm": 1.7623915082520603, "learning_rate": 8.115863042636262e-08, "loss": 0.40808072686195374, "step": 7611 }, { "epoch": 1.7551302743832142, "grad_norm": 1.3859431275297254, "learning_rate": 8.100824119732263e-08, "loss": 0.4452321231365204, "step": 7612 }, { "epoch": 1.755360848512797, "grad_norm": 1.556764426976114, "learning_rate": 8.085798555304824e-08, "loss": 0.4211857318878174, "step": 7613 }, { "epoch": 1.7555914226423797, "grad_norm": 1.5080375348033017, "learning_rate": 8.070786351538117e-08, "loss": 0.3356667757034302, "step": 7614 }, { "epoch": 1.7558219967719622, "grad_norm": 1.7842469682737618, "learning_rate": 8.055787510614287e-08, "loss": 0.4636021852493286, "step": 7615 }, { "epoch": 1.7560525709015449, "grad_norm": 1.624229543588168, "learning_rate": 8.040802034713546e-08, "loss": 0.4066168963909149, "step": 7616 }, { "epoch": 1.7562831450311274, "grad_norm": 1.4896510438449921, "learning_rate": 8.025829926014216e-08, "loss": 0.426937460899353, "step": 7617 }, { "epoch": 1.75651371916071, "grad_norm": 1.838065393231424, "learning_rate": 8.010871186692625e-08, "loss": 0.464493989944458, "step": 7618 }, { "epoch": 1.7567442932902928, "grad_norm": 1.7522078931434732, "learning_rate": 7.995925818923222e-08, "loss": 0.44130605459213257, "step": 7619 }, { "epoch": 1.7569748674198755, "grad_norm": 1.6877219329526134, "learning_rate": 7.980993824878402e-08, "loss": 0.5241909027099609, "step": 7620 }, { "epoch": 1.7572054415494582, "grad_norm": 1.605603526262718, "learning_rate": 7.96607520672874e-08, "loss": 0.45450860261917114, "step": 7621 }, { "epoch": 1.757436015679041, "grad_norm": 1.6393742771356723, "learning_rate": 7.951169966642757e-08, "loss": 0.443767786026001, "step": 7622 }, { "epoch": 1.7576665898086234, "grad_norm": 1.5258486167332923, "learning_rate": 7.936278106787131e-08, "loss": 0.3951075077056885, "step": 7623 }, { "epoch": 1.7578971639382062, "grad_norm": 1.8216713225734935, "learning_rate": 7.921399629326509e-08, "loss": 0.44628477096557617, "step": 7624 }, { "epoch": 1.7581277380677887, "grad_norm": 1.7421703870668572, "learning_rate": 7.906534536423648e-08, "loss": 0.38743889331817627, "step": 7625 }, { "epoch": 1.7583583121973714, "grad_norm": 1.4726686928375068, "learning_rate": 7.891682830239311e-08, "loss": 0.4338032007217407, "step": 7626 }, { "epoch": 1.758588886326954, "grad_norm": 1.7605246972541082, "learning_rate": 7.876844512932367e-08, "loss": 0.47387874126434326, "step": 7627 }, { "epoch": 1.7588194604565368, "grad_norm": 1.6222674378421518, "learning_rate": 7.86201958665973e-08, "loss": 0.4082717299461365, "step": 7628 }, { "epoch": 1.7590500345861195, "grad_norm": 1.462169761343313, "learning_rate": 7.847208053576326e-08, "loss": 0.4254682958126068, "step": 7629 }, { "epoch": 1.7592806087157022, "grad_norm": 1.319688989297758, "learning_rate": 7.832409915835181e-08, "loss": 0.3572045564651489, "step": 7630 }, { "epoch": 1.7595111828452847, "grad_norm": 1.398732808330898, "learning_rate": 7.817625175587328e-08, "loss": 0.39110279083251953, "step": 7631 }, { "epoch": 1.7597417569748675, "grad_norm": 2.455493892116574, "learning_rate": 7.802853834981926e-08, "loss": 0.49292176961898804, "step": 7632 }, { "epoch": 1.75997233110445, "grad_norm": 1.460109162216243, "learning_rate": 7.78809589616608e-08, "loss": 0.4271275997161865, "step": 7633 }, { "epoch": 1.7602029052340327, "grad_norm": 1.5973984242111468, "learning_rate": 7.77335136128503e-08, "loss": 0.470772922039032, "step": 7634 }, { "epoch": 1.7604334793636154, "grad_norm": 1.5415713448452681, "learning_rate": 7.758620232482083e-08, "loss": 0.4872988760471344, "step": 7635 }, { "epoch": 1.760664053493198, "grad_norm": 1.2959777480648245, "learning_rate": 7.743902511898492e-08, "loss": 0.4300990104675293, "step": 7636 }, { "epoch": 1.7608946276227808, "grad_norm": 1.4331560277043864, "learning_rate": 7.729198201673682e-08, "loss": 0.4524795711040497, "step": 7637 }, { "epoch": 1.7611252017523635, "grad_norm": 1.580884966063861, "learning_rate": 7.714507303945028e-08, "loss": 0.4673241376876831, "step": 7638 }, { "epoch": 1.761355775881946, "grad_norm": 1.7656151539321776, "learning_rate": 7.699829820848048e-08, "loss": 0.5171443223953247, "step": 7639 }, { "epoch": 1.7615863500115287, "grad_norm": 1.5721911288259287, "learning_rate": 7.68516575451621e-08, "loss": 0.44416171312332153, "step": 7640 }, { "epoch": 1.7618169241411112, "grad_norm": 1.8596688405579505, "learning_rate": 7.670515107081122e-08, "loss": 0.4456225633621216, "step": 7641 }, { "epoch": 1.762047498270694, "grad_norm": 1.427384194238264, "learning_rate": 7.65587788067239e-08, "loss": 0.5235984921455383, "step": 7642 }, { "epoch": 1.7622780724002767, "grad_norm": 1.5098894741733768, "learning_rate": 7.641254077417702e-08, "loss": 0.4957311749458313, "step": 7643 }, { "epoch": 1.7625086465298594, "grad_norm": 1.9524483698152115, "learning_rate": 7.626643699442748e-08, "loss": 0.48401015996932983, "step": 7644 }, { "epoch": 1.762739220659442, "grad_norm": 1.5925905896008645, "learning_rate": 7.612046748871326e-08, "loss": 0.5440249443054199, "step": 7645 }, { "epoch": 1.7629697947890248, "grad_norm": 1.5363697612706335, "learning_rate": 7.597463227825229e-08, "loss": 0.3922181725502014, "step": 7646 }, { "epoch": 1.7632003689186073, "grad_norm": 1.7121602067196948, "learning_rate": 7.582893138424318e-08, "loss": 0.4679541289806366, "step": 7647 }, { "epoch": 1.76343094304819, "grad_norm": 1.63738592997542, "learning_rate": 7.568336482786508e-08, "loss": 0.4461076557636261, "step": 7648 }, { "epoch": 1.7636615171777725, "grad_norm": 1.769800706819883, "learning_rate": 7.553793263027752e-08, "loss": 0.4028201997280121, "step": 7649 }, { "epoch": 1.7638920913073552, "grad_norm": 1.6924130336118084, "learning_rate": 7.53926348126206e-08, "loss": 0.47307640314102173, "step": 7650 }, { "epoch": 1.764122665436938, "grad_norm": 1.7236868707009407, "learning_rate": 7.524747139601473e-08, "loss": 0.4763333201408386, "step": 7651 }, { "epoch": 1.7643532395665207, "grad_norm": 1.5475351462285587, "learning_rate": 7.510244240156127e-08, "loss": 0.5062815546989441, "step": 7652 }, { "epoch": 1.7645838136961034, "grad_norm": 1.4648234779945293, "learning_rate": 7.495754785034114e-08, "loss": 0.38344740867614746, "step": 7653 }, { "epoch": 1.7648143878256861, "grad_norm": 1.5630602768230752, "learning_rate": 7.48127877634166e-08, "loss": 0.36255425214767456, "step": 7654 }, { "epoch": 1.7650449619552686, "grad_norm": 1.4144647369682326, "learning_rate": 7.466816216182969e-08, "loss": 0.4136468172073364, "step": 7655 }, { "epoch": 1.7652755360848513, "grad_norm": 1.5589028620208925, "learning_rate": 7.452367106660351e-08, "loss": 0.4294041395187378, "step": 7656 }, { "epoch": 1.7655061102144338, "grad_norm": 1.5271012787948486, "learning_rate": 7.437931449874101e-08, "loss": 0.3865356147289276, "step": 7657 }, { "epoch": 1.7657366843440165, "grad_norm": 1.5355711497321805, "learning_rate": 7.42350924792261e-08, "loss": 0.44538289308547974, "step": 7658 }, { "epoch": 1.7659672584735993, "grad_norm": 1.6285566114230512, "learning_rate": 7.409100502902299e-08, "loss": 0.4943844676017761, "step": 7659 }, { "epoch": 1.766197832603182, "grad_norm": 1.759721404059002, "learning_rate": 7.394705216907582e-08, "loss": 0.41705092787742615, "step": 7660 }, { "epoch": 1.7664284067327647, "grad_norm": 1.4175389623557053, "learning_rate": 7.380323392031018e-08, "loss": 0.4304206967353821, "step": 7661 }, { "epoch": 1.7666589808623474, "grad_norm": 1.3933381760031749, "learning_rate": 7.365955030363102e-08, "loss": 0.4830179214477539, "step": 7662 }, { "epoch": 1.76688955499193, "grad_norm": 1.51616499834235, "learning_rate": 7.351600133992452e-08, "loss": 0.47749078273773193, "step": 7663 }, { "epoch": 1.7671201291215126, "grad_norm": 1.4074934707168656, "learning_rate": 7.337258705005667e-08, "loss": 0.3899204730987549, "step": 7664 }, { "epoch": 1.7673507032510951, "grad_norm": 1.4123867126002758, "learning_rate": 7.322930745487443e-08, "loss": 0.4621524214744568, "step": 7665 }, { "epoch": 1.7675812773806778, "grad_norm": 1.725639837898645, "learning_rate": 7.308616257520506e-08, "loss": 0.5305047035217285, "step": 7666 }, { "epoch": 1.7678118515102605, "grad_norm": 2.1356750734168646, "learning_rate": 7.294315243185578e-08, "loss": 0.5894631147384644, "step": 7667 }, { "epoch": 1.7680424256398433, "grad_norm": 1.5389151696841823, "learning_rate": 7.280027704561498e-08, "loss": 0.38509970903396606, "step": 7668 }, { "epoch": 1.768272999769426, "grad_norm": 1.7309245548099654, "learning_rate": 7.265753643725048e-08, "loss": 0.45494410395622253, "step": 7669 }, { "epoch": 1.7685035738990087, "grad_norm": 1.7035489800713894, "learning_rate": 7.251493062751169e-08, "loss": 0.4819248914718628, "step": 7670 }, { "epoch": 1.7687341480285912, "grad_norm": 1.4325571648838293, "learning_rate": 7.237245963712724e-08, "loss": 0.43286386132240295, "step": 7671 }, { "epoch": 1.768964722158174, "grad_norm": 1.3036122364237743, "learning_rate": 7.223012348680724e-08, "loss": 0.4285479187965393, "step": 7672 }, { "epoch": 1.7691952962877564, "grad_norm": 1.6598071005655777, "learning_rate": 7.208792219724124e-08, "loss": 0.42678505182266235, "step": 7673 }, { "epoch": 1.7694258704173391, "grad_norm": 1.647090361621967, "learning_rate": 7.194585578909995e-08, "loss": 0.47091686725616455, "step": 7674 }, { "epoch": 1.7696564445469218, "grad_norm": 1.5115484466399114, "learning_rate": 7.180392428303394e-08, "loss": 0.41932445764541626, "step": 7675 }, { "epoch": 1.7698870186765046, "grad_norm": 1.2463006271885857, "learning_rate": 7.166212769967483e-08, "loss": 0.4043616056442261, "step": 7676 }, { "epoch": 1.7701175928060873, "grad_norm": 1.5310666660883137, "learning_rate": 7.15204660596338e-08, "loss": 0.395826518535614, "step": 7677 }, { "epoch": 1.7703481669356698, "grad_norm": 1.4874807127430703, "learning_rate": 7.13789393835027e-08, "loss": 0.4684498906135559, "step": 7678 }, { "epoch": 1.7705787410652525, "grad_norm": 1.8560085011670902, "learning_rate": 7.12375476918542e-08, "loss": 0.4713285565376282, "step": 7679 }, { "epoch": 1.770809315194835, "grad_norm": 1.487262641155755, "learning_rate": 7.109629100524073e-08, "loss": 0.47559499740600586, "step": 7680 }, { "epoch": 1.7710398893244177, "grad_norm": 1.5741914036439861, "learning_rate": 7.095516934419554e-08, "loss": 0.5364210605621338, "step": 7681 }, { "epoch": 1.7712704634540004, "grad_norm": 1.942648846069337, "learning_rate": 7.081418272923212e-08, "loss": 0.5731894969940186, "step": 7682 }, { "epoch": 1.7715010375835831, "grad_norm": 1.7006107903804015, "learning_rate": 7.067333118084428e-08, "loss": 0.4287458062171936, "step": 7683 }, { "epoch": 1.7717316117131658, "grad_norm": 1.5575643616743255, "learning_rate": 7.053261471950612e-08, "loss": 0.3849913775920868, "step": 7684 }, { "epoch": 1.7719621858427486, "grad_norm": 1.4243498094919005, "learning_rate": 7.039203336567245e-08, "loss": 0.4933156371116638, "step": 7685 }, { "epoch": 1.772192759972331, "grad_norm": 1.897795122632639, "learning_rate": 7.025158713977808e-08, "loss": 0.5185002088546753, "step": 7686 }, { "epoch": 1.7724233341019138, "grad_norm": 1.634847266537775, "learning_rate": 7.011127606223799e-08, "loss": 0.514995276927948, "step": 7687 }, { "epoch": 1.7726539082314963, "grad_norm": 1.5845868665458605, "learning_rate": 6.99711001534481e-08, "loss": 0.4362761676311493, "step": 7688 }, { "epoch": 1.772884482361079, "grad_norm": 1.699858455397738, "learning_rate": 6.983105943378431e-08, "loss": 0.44117432832717896, "step": 7689 }, { "epoch": 1.7731150564906617, "grad_norm": 1.5875521204144505, "learning_rate": 6.969115392360325e-08, "loss": 0.4940808415412903, "step": 7690 }, { "epoch": 1.7733456306202444, "grad_norm": 1.9046624573594293, "learning_rate": 6.955138364324109e-08, "loss": 0.4322758913040161, "step": 7691 }, { "epoch": 1.7735762047498271, "grad_norm": 1.467450936859881, "learning_rate": 6.941174861301536e-08, "loss": 0.3867933750152588, "step": 7692 }, { "epoch": 1.7738067788794099, "grad_norm": 1.6321329987514115, "learning_rate": 6.927224885322302e-08, "loss": 0.4380000829696655, "step": 7693 }, { "epoch": 1.7740373530089923, "grad_norm": 1.7183023620516549, "learning_rate": 6.913288438414222e-08, "loss": 0.46499723196029663, "step": 7694 }, { "epoch": 1.774267927138575, "grad_norm": 1.6625572218896962, "learning_rate": 6.89936552260304e-08, "loss": 0.4845675230026245, "step": 7695 }, { "epoch": 1.7744985012681576, "grad_norm": 1.3920222388819354, "learning_rate": 6.88545613991266e-08, "loss": 0.3755526542663574, "step": 7696 }, { "epoch": 1.7747290753977403, "grad_norm": 1.358162383242242, "learning_rate": 6.871560292364887e-08, "loss": 0.4765484929084778, "step": 7697 }, { "epoch": 1.774959649527323, "grad_norm": 1.5701618596645643, "learning_rate": 6.857677981979659e-08, "loss": 0.4176154136657715, "step": 7698 }, { "epoch": 1.7751902236569057, "grad_norm": 1.5881043143352427, "learning_rate": 6.84380921077492e-08, "loss": 0.410483717918396, "step": 7699 }, { "epoch": 1.7754207977864884, "grad_norm": 1.876508092569716, "learning_rate": 6.829953980766612e-08, "loss": 0.5188060998916626, "step": 7700 }, { "epoch": 1.7756513719160711, "grad_norm": 1.5514145308665186, "learning_rate": 6.816112293968745e-08, "loss": 0.47039783000946045, "step": 7701 }, { "epoch": 1.7758819460456536, "grad_norm": 1.6296649452825585, "learning_rate": 6.802284152393345e-08, "loss": 0.5367648601531982, "step": 7702 }, { "epoch": 1.7761125201752364, "grad_norm": 1.55513001656084, "learning_rate": 6.78846955805048e-08, "loss": 0.500449538230896, "step": 7703 }, { "epoch": 1.7763430943048188, "grad_norm": 1.5060722099238588, "learning_rate": 6.774668512948234e-08, "loss": 0.4579819440841675, "step": 7704 }, { "epoch": 1.7765736684344016, "grad_norm": 1.7824280377613644, "learning_rate": 6.760881019092712e-08, "loss": 0.41459107398986816, "step": 7705 }, { "epoch": 1.7768042425639843, "grad_norm": 1.7900526752813857, "learning_rate": 6.747107078488112e-08, "loss": 0.46020573377609253, "step": 7706 }, { "epoch": 1.777034816693567, "grad_norm": 1.7709884076088374, "learning_rate": 6.733346693136566e-08, "loss": 0.48069459199905396, "step": 7707 }, { "epoch": 1.7772653908231497, "grad_norm": 1.4499402707441236, "learning_rate": 6.719599865038328e-08, "loss": 0.3514458239078522, "step": 7708 }, { "epoch": 1.7774959649527324, "grad_norm": 1.7044500533180955, "learning_rate": 6.705866596191601e-08, "loss": 0.4696041941642761, "step": 7709 }, { "epoch": 1.777726539082315, "grad_norm": 1.6058185659780073, "learning_rate": 6.692146888592675e-08, "loss": 0.45286083221435547, "step": 7710 }, { "epoch": 1.7779571132118976, "grad_norm": 1.8525271361461533, "learning_rate": 6.678440744235848e-08, "loss": 0.4659677743911743, "step": 7711 }, { "epoch": 1.7781876873414801, "grad_norm": 1.5770202034991272, "learning_rate": 6.664748165113432e-08, "loss": 0.4030906558036804, "step": 7712 }, { "epoch": 1.7784182614710629, "grad_norm": 1.4781448065809968, "learning_rate": 6.651069153215804e-08, "loss": 0.4878493547439575, "step": 7713 }, { "epoch": 1.7786488356006456, "grad_norm": 2.5716911461046115, "learning_rate": 6.637403710531352e-08, "loss": 0.4651924669742584, "step": 7714 }, { "epoch": 1.7788794097302283, "grad_norm": 1.5268258649377473, "learning_rate": 6.623751839046455e-08, "loss": 0.37795954942703247, "step": 7715 }, { "epoch": 1.779109983859811, "grad_norm": 1.8617699048987524, "learning_rate": 6.610113540745577e-08, "loss": 0.5722923278808594, "step": 7716 }, { "epoch": 1.7793405579893937, "grad_norm": 2.039919155814789, "learning_rate": 6.59648881761118e-08, "loss": 0.46933984756469727, "step": 7717 }, { "epoch": 1.7795711321189762, "grad_norm": 1.7692714186594531, "learning_rate": 6.582877671623732e-08, "loss": 0.5066707134246826, "step": 7718 }, { "epoch": 1.779801706248559, "grad_norm": 1.5518843020711044, "learning_rate": 6.569280104761787e-08, "loss": 0.5064150094985962, "step": 7719 }, { "epoch": 1.7800322803781414, "grad_norm": 1.4858522723338492, "learning_rate": 6.555696119001853e-08, "loss": 0.408633828163147, "step": 7720 }, { "epoch": 1.7802628545077241, "grad_norm": 1.9460802080180855, "learning_rate": 6.542125716318514e-08, "loss": 0.4960691034793854, "step": 7721 }, { "epoch": 1.7804934286373069, "grad_norm": 1.609433139750494, "learning_rate": 6.528568898684373e-08, "loss": 0.4275667071342468, "step": 7722 }, { "epoch": 1.7807240027668896, "grad_norm": 1.5242191505097453, "learning_rate": 6.515025668070062e-08, "loss": 0.5309962630271912, "step": 7723 }, { "epoch": 1.7809545768964723, "grad_norm": 1.3218748644597216, "learning_rate": 6.501496026444197e-08, "loss": 0.42067253589630127, "step": 7724 }, { "epoch": 1.781185151026055, "grad_norm": 1.5205678956011466, "learning_rate": 6.487979975773484e-08, "loss": 0.43419337272644043, "step": 7725 }, { "epoch": 1.7814157251556375, "grad_norm": 1.728456021255068, "learning_rate": 6.474477518022592e-08, "loss": 0.46563541889190674, "step": 7726 }, { "epoch": 1.7816462992852202, "grad_norm": 1.2994636821353438, "learning_rate": 6.460988655154232e-08, "loss": 0.4233010411262512, "step": 7727 }, { "epoch": 1.7818768734148027, "grad_norm": 1.5541073736247684, "learning_rate": 6.447513389129155e-08, "loss": 0.47119754552841187, "step": 7728 }, { "epoch": 1.7821074475443854, "grad_norm": 1.7457851161988949, "learning_rate": 6.434051721906142e-08, "loss": 0.5227707624435425, "step": 7729 }, { "epoch": 1.7823380216739682, "grad_norm": 1.6453844551794445, "learning_rate": 6.42060365544198e-08, "loss": 0.4521239399909973, "step": 7730 }, { "epoch": 1.7825685958035509, "grad_norm": 1.5739071323130231, "learning_rate": 6.407169191691464e-08, "loss": 0.36693084239959717, "step": 7731 }, { "epoch": 1.7827991699331336, "grad_norm": 1.9032214424835083, "learning_rate": 6.393748332607463e-08, "loss": 0.43610745668411255, "step": 7732 }, { "epoch": 1.7830297440627163, "grad_norm": 1.4784257370105836, "learning_rate": 6.380341080140794e-08, "loss": 0.4471576511859894, "step": 7733 }, { "epoch": 1.7832603181922988, "grad_norm": 1.61284007349941, "learning_rate": 6.366947436240367e-08, "loss": 0.48119011521339417, "step": 7734 }, { "epoch": 1.7834908923218815, "grad_norm": 1.4393647934894105, "learning_rate": 6.353567402853055e-08, "loss": 0.44503623247146606, "step": 7735 }, { "epoch": 1.783721466451464, "grad_norm": 1.3430253886827939, "learning_rate": 6.340200981923804e-08, "loss": 0.3350965678691864, "step": 7736 }, { "epoch": 1.7839520405810467, "grad_norm": 1.4031838686370632, "learning_rate": 6.326848175395572e-08, "loss": 0.4814649224281311, "step": 7737 }, { "epoch": 1.7841826147106294, "grad_norm": 1.3042254858214102, "learning_rate": 6.313508985209281e-08, "loss": 0.42114442586898804, "step": 7738 }, { "epoch": 1.7844131888402122, "grad_norm": 1.4924201661244643, "learning_rate": 6.30018341330396e-08, "loss": 0.5044004917144775, "step": 7739 }, { "epoch": 1.7846437629697949, "grad_norm": 1.7211591431218773, "learning_rate": 6.286871461616594e-08, "loss": 0.46084678173065186, "step": 7740 }, { "epoch": 1.7848743370993776, "grad_norm": 1.8074380950640034, "learning_rate": 6.273573132082222e-08, "loss": 0.5159536600112915, "step": 7741 }, { "epoch": 1.78510491122896, "grad_norm": 2.6340339816007394, "learning_rate": 6.260288426633875e-08, "loss": 0.4394105076789856, "step": 7742 }, { "epoch": 1.7853354853585428, "grad_norm": 1.415651636415873, "learning_rate": 6.247017347202643e-08, "loss": 0.39798909425735474, "step": 7743 }, { "epoch": 1.7855660594881253, "grad_norm": 1.439083218855293, "learning_rate": 6.23375989571756e-08, "loss": 0.3865649104118347, "step": 7744 }, { "epoch": 1.785796633617708, "grad_norm": 1.3172940172138528, "learning_rate": 6.220516074105808e-08, "loss": 0.3641304671764374, "step": 7745 }, { "epoch": 1.7860272077472907, "grad_norm": 1.7148086023867872, "learning_rate": 6.207285884292468e-08, "loss": 0.5025773644447327, "step": 7746 }, { "epoch": 1.7862577818768735, "grad_norm": 1.5237733931532715, "learning_rate": 6.194069328200669e-08, "loss": 0.4289078414440155, "step": 7747 }, { "epoch": 1.7864883560064562, "grad_norm": 1.5368409458369108, "learning_rate": 6.180866407751595e-08, "loss": 0.37442147731781006, "step": 7748 }, { "epoch": 1.7867189301360389, "grad_norm": 1.6962674881863276, "learning_rate": 6.167677124864412e-08, "loss": 0.4975471794605255, "step": 7749 }, { "epoch": 1.7869495042656214, "grad_norm": 1.7290797112616507, "learning_rate": 6.154501481456331e-08, "loss": 0.42754751443862915, "step": 7750 }, { "epoch": 1.787180078395204, "grad_norm": 1.508949301788889, "learning_rate": 6.141339479442542e-08, "loss": 0.40203964710235596, "step": 7751 }, { "epoch": 1.7874106525247866, "grad_norm": 1.6453479393381845, "learning_rate": 6.128191120736293e-08, "loss": 0.46465349197387695, "step": 7752 }, { "epoch": 1.7876412266543693, "grad_norm": 1.527112166022553, "learning_rate": 6.11505640724882e-08, "loss": 0.43915730714797974, "step": 7753 }, { "epoch": 1.787871800783952, "grad_norm": 1.6855929805801586, "learning_rate": 6.101935340889419e-08, "loss": 0.5205652713775635, "step": 7754 }, { "epoch": 1.7881023749135347, "grad_norm": 1.8024849017160496, "learning_rate": 6.088827923565321e-08, "loss": 0.39400190114974976, "step": 7755 }, { "epoch": 1.7883329490431175, "grad_norm": 1.585632228373493, "learning_rate": 6.075734157181855e-08, "loss": 0.48021531105041504, "step": 7756 }, { "epoch": 1.7885635231727002, "grad_norm": 1.313118747015303, "learning_rate": 6.062654043642334e-08, "loss": 0.42780327796936035, "step": 7757 }, { "epoch": 1.7887940973022827, "grad_norm": 1.5444008946931698, "learning_rate": 6.049587584848059e-08, "loss": 0.4307866096496582, "step": 7758 }, { "epoch": 1.7890246714318654, "grad_norm": 1.8803266889221286, "learning_rate": 6.036534782698377e-08, "loss": 0.4258533716201782, "step": 7759 }, { "epoch": 1.7892552455614479, "grad_norm": 1.7033971690196206, "learning_rate": 6.02349563909067e-08, "loss": 0.5159060955047607, "step": 7760 }, { "epoch": 1.7894858196910306, "grad_norm": 1.4016246032179807, "learning_rate": 6.0104701559203e-08, "loss": 0.4407171308994293, "step": 7761 }, { "epoch": 1.7897163938206133, "grad_norm": 1.4060175796774192, "learning_rate": 5.99745833508063e-08, "loss": 0.40273964405059814, "step": 7762 }, { "epoch": 1.789946967950196, "grad_norm": 1.5929040194351833, "learning_rate": 5.984460178463102e-08, "loss": 0.42018163204193115, "step": 7763 }, { "epoch": 1.7901775420797787, "grad_norm": 1.5421517490968868, "learning_rate": 5.971475687957084e-08, "loss": 0.519807755947113, "step": 7764 }, { "epoch": 1.7904081162093615, "grad_norm": 1.4320196013314206, "learning_rate": 5.9585048654500535e-08, "loss": 0.42557477951049805, "step": 7765 }, { "epoch": 1.790638690338944, "grad_norm": 1.520426042431449, "learning_rate": 5.9455477128273924e-08, "loss": 0.39568305015563965, "step": 7766 }, { "epoch": 1.7908692644685267, "grad_norm": 1.566797519717712, "learning_rate": 5.932604231972593e-08, "loss": 0.43125781416893005, "step": 7767 }, { "epoch": 1.7910998385981092, "grad_norm": 1.5764190405770546, "learning_rate": 5.919674424767129e-08, "loss": 0.46194958686828613, "step": 7768 }, { "epoch": 1.791330412727692, "grad_norm": 1.3811294262508054, "learning_rate": 5.906758293090441e-08, "loss": 0.40115779638290405, "step": 7769 }, { "epoch": 1.7915609868572746, "grad_norm": 1.4511176958262644, "learning_rate": 5.893855838820061e-08, "loss": 0.46589648723602295, "step": 7770 }, { "epoch": 1.7917915609868573, "grad_norm": 1.4613820552852321, "learning_rate": 5.880967063831455e-08, "loss": 0.3540228605270386, "step": 7771 }, { "epoch": 1.79202213511644, "grad_norm": 1.3900736631273891, "learning_rate": 5.868091969998168e-08, "loss": 0.4324638545513153, "step": 7772 }, { "epoch": 1.7922527092460228, "grad_norm": 1.426811730253004, "learning_rate": 5.855230559191693e-08, "loss": 0.4301075339317322, "step": 7773 }, { "epoch": 1.7924832833756053, "grad_norm": 1.4903234676277026, "learning_rate": 5.842382833281612e-08, "loss": 0.4496096670627594, "step": 7774 }, { "epoch": 1.792713857505188, "grad_norm": 1.7119132871592322, "learning_rate": 5.8295487941354195e-08, "loss": 0.4554907977581024, "step": 7775 }, { "epoch": 1.7929444316347705, "grad_norm": 1.6357284914311145, "learning_rate": 5.816728443618701e-08, "loss": 0.5020148158073425, "step": 7776 }, { "epoch": 1.7931750057643532, "grad_norm": 1.5886767874513543, "learning_rate": 5.803921783595045e-08, "loss": 0.4073353409767151, "step": 7777 }, { "epoch": 1.793405579893936, "grad_norm": 1.7806143022342438, "learning_rate": 5.791128815925983e-08, "loss": 0.4995894432067871, "step": 7778 }, { "epoch": 1.7936361540235186, "grad_norm": 1.4290018525481676, "learning_rate": 5.778349542471139e-08, "loss": 0.5383706092834473, "step": 7779 }, { "epoch": 1.7938667281531013, "grad_norm": 1.5928372327878688, "learning_rate": 5.765583965088083e-08, "loss": 0.4206235408782959, "step": 7780 }, { "epoch": 1.794097302282684, "grad_norm": 1.516533597399375, "learning_rate": 5.752832085632453e-08, "loss": 0.49053555727005005, "step": 7781 }, { "epoch": 1.7943278764122665, "grad_norm": 1.4761016261714877, "learning_rate": 5.740093905957832e-08, "loss": 0.4372660517692566, "step": 7782 }, { "epoch": 1.7945584505418493, "grad_norm": 1.364372499711938, "learning_rate": 5.727369427915851e-08, "loss": 0.40125733613967896, "step": 7783 }, { "epoch": 1.7947890246714318, "grad_norm": 1.5421908029736124, "learning_rate": 5.714658653356153e-08, "loss": 0.3595162034034729, "step": 7784 }, { "epoch": 1.7950195988010145, "grad_norm": 1.4909078230640012, "learning_rate": 5.7019615841263915e-08, "loss": 0.42618101835250854, "step": 7785 }, { "epoch": 1.7952501729305972, "grad_norm": 1.2890347032019704, "learning_rate": 5.6892782220721694e-08, "loss": 0.39135509729385376, "step": 7786 }, { "epoch": 1.79548074706018, "grad_norm": 1.2930421412734876, "learning_rate": 5.6766085690372004e-08, "loss": 0.3792929947376251, "step": 7787 }, { "epoch": 1.7957113211897626, "grad_norm": 2.137954515105217, "learning_rate": 5.6639526268631e-08, "loss": 0.5193231105804443, "step": 7788 }, { "epoch": 1.7959418953193451, "grad_norm": 1.3992061535387368, "learning_rate": 5.6513103973895415e-08, "loss": 0.3896862268447876, "step": 7789 }, { "epoch": 1.7961724694489278, "grad_norm": 1.6107653457361368, "learning_rate": 5.638681882454211e-08, "loss": 0.5345273017883301, "step": 7790 }, { "epoch": 1.7964030435785103, "grad_norm": 1.597285051654587, "learning_rate": 5.626067083892794e-08, "loss": 0.4297627806663513, "step": 7791 }, { "epoch": 1.796633617708093, "grad_norm": 1.8890048408663909, "learning_rate": 5.6134660035389914e-08, "loss": 0.3176969587802887, "step": 7792 }, { "epoch": 1.7968641918376758, "grad_norm": 1.684652354437091, "learning_rate": 5.600878643224471e-08, "loss": 0.5449323654174805, "step": 7793 }, { "epoch": 1.7970947659672585, "grad_norm": 1.3924882582172304, "learning_rate": 5.588305004778959e-08, "loss": 0.38096293807029724, "step": 7794 }, { "epoch": 1.7973253400968412, "grad_norm": 1.6284420500901806, "learning_rate": 5.575745090030137e-08, "loss": 0.3917475938796997, "step": 7795 }, { "epoch": 1.797555914226424, "grad_norm": 1.8012275849309003, "learning_rate": 5.563198900803734e-08, "loss": 0.41522616147994995, "step": 7796 }, { "epoch": 1.7977864883560064, "grad_norm": 1.4000666419018515, "learning_rate": 5.550666438923468e-08, "loss": 0.46558207273483276, "step": 7797 }, { "epoch": 1.7980170624855891, "grad_norm": 1.4562091239424864, "learning_rate": 5.538147706211038e-08, "loss": 0.43256324529647827, "step": 7798 }, { "epoch": 1.7982476366151716, "grad_norm": 1.5167378404298808, "learning_rate": 5.5256427044861666e-08, "loss": 0.37302178144454956, "step": 7799 }, { "epoch": 1.7984782107447543, "grad_norm": 1.7103098772379584, "learning_rate": 5.5131514355666095e-08, "loss": 0.5247504711151123, "step": 7800 }, { "epoch": 1.798708784874337, "grad_norm": 1.3345270008803303, "learning_rate": 5.5006739012680934e-08, "loss": 0.3906348943710327, "step": 7801 }, { "epoch": 1.7989393590039198, "grad_norm": 1.863821074304618, "learning_rate": 5.488210103404345e-08, "loss": 0.5293325185775757, "step": 7802 }, { "epoch": 1.7991699331335025, "grad_norm": 1.8021445170106478, "learning_rate": 5.4757600437871146e-08, "loss": 0.4189381003379822, "step": 7803 }, { "epoch": 1.7994005072630852, "grad_norm": 1.4161978936431723, "learning_rate": 5.4633237242261207e-08, "loss": 0.40476128458976746, "step": 7804 }, { "epoch": 1.7996310813926677, "grad_norm": 1.6288403815954717, "learning_rate": 5.45090114652913e-08, "loss": 0.3908376097679138, "step": 7805 }, { "epoch": 1.7998616555222504, "grad_norm": 1.4731211435711635, "learning_rate": 5.438492312501885e-08, "loss": 0.42332786321640015, "step": 7806 }, { "epoch": 1.800092229651833, "grad_norm": 1.2492034971721793, "learning_rate": 5.426097223948123e-08, "loss": 0.3398321866989136, "step": 7807 }, { "epoch": 1.8003228037814156, "grad_norm": 1.410970674481118, "learning_rate": 5.413715882669623e-08, "loss": 0.4610673189163208, "step": 7808 }, { "epoch": 1.8005533779109983, "grad_norm": 1.4416956666235687, "learning_rate": 5.401348290466112e-08, "loss": 0.4149124026298523, "step": 7809 }, { "epoch": 1.800783952040581, "grad_norm": 1.4475278396115219, "learning_rate": 5.388994449135376e-08, "loss": 0.47464168071746826, "step": 7810 }, { "epoch": 1.8010145261701638, "grad_norm": 1.4581354291230397, "learning_rate": 5.376654360473121e-08, "loss": 0.4530913829803467, "step": 7811 }, { "epoch": 1.8012451002997465, "grad_norm": 1.7198902838066041, "learning_rate": 5.364328026273157e-08, "loss": 0.5577078461647034, "step": 7812 }, { "epoch": 1.801475674429329, "grad_norm": 1.828526033611825, "learning_rate": 5.3520154483272075e-08, "loss": 0.4772539436817169, "step": 7813 }, { "epoch": 1.8017062485589117, "grad_norm": 1.690066578469317, "learning_rate": 5.339716628425039e-08, "loss": 0.5387610197067261, "step": 7814 }, { "epoch": 1.8019368226884942, "grad_norm": 1.7130913599502742, "learning_rate": 5.327431568354401e-08, "loss": 0.4505125880241394, "step": 7815 }, { "epoch": 1.802167396818077, "grad_norm": 1.5145450098970203, "learning_rate": 5.3151602699010867e-08, "loss": 0.43021589517593384, "step": 7816 }, { "epoch": 1.8023979709476596, "grad_norm": 1.6184493194868252, "learning_rate": 5.3029027348488244e-08, "loss": 0.44107457995414734, "step": 7817 }, { "epoch": 1.8026285450772424, "grad_norm": 1.6224833006548345, "learning_rate": 5.2906589649793666e-08, "loss": 0.42265504598617554, "step": 7818 }, { "epoch": 1.802859119206825, "grad_norm": 1.3828256021454344, "learning_rate": 5.2784289620724895e-08, "loss": 0.4814263582229614, "step": 7819 }, { "epoch": 1.8030896933364078, "grad_norm": 1.3840958899744187, "learning_rate": 5.2662127279059275e-08, "loss": 0.4255106747150421, "step": 7820 }, { "epoch": 1.8033202674659903, "grad_norm": 1.3789211684549096, "learning_rate": 5.2540102642554593e-08, "loss": 0.43405312299728394, "step": 7821 }, { "epoch": 1.803550841595573, "grad_norm": 1.5062041567676776, "learning_rate": 5.2418215728948004e-08, "loss": 0.3986097574234009, "step": 7822 }, { "epoch": 1.8037814157251555, "grad_norm": 1.7653469724585684, "learning_rate": 5.2296466555957205e-08, "loss": 0.4988093972206116, "step": 7823 }, { "epoch": 1.8040119898547382, "grad_norm": 1.6382094442265007, "learning_rate": 5.217485514127973e-08, "loss": 0.5290527939796448, "step": 7824 }, { "epoch": 1.804242563984321, "grad_norm": 1.4794199807921353, "learning_rate": 5.205338150259308e-08, "loss": 0.3705815076828003, "step": 7825 }, { "epoch": 1.8044731381139036, "grad_norm": 1.3872232407887637, "learning_rate": 5.193204565755449e-08, "loss": 0.37735384702682495, "step": 7826 }, { "epoch": 1.8047037122434864, "grad_norm": 1.38875357732027, "learning_rate": 5.1810847623801504e-08, "loss": 0.39033758640289307, "step": 7827 }, { "epoch": 1.804934286373069, "grad_norm": 1.5105458662939806, "learning_rate": 5.168978741895147e-08, "loss": 0.4669237732887268, "step": 7828 }, { "epoch": 1.8051648605026516, "grad_norm": 1.6910832171163468, "learning_rate": 5.156886506060154e-08, "loss": 0.5178482532501221, "step": 7829 }, { "epoch": 1.8053954346322343, "grad_norm": 1.4473544670706617, "learning_rate": 5.14480805663291e-08, "loss": 0.44134122133255005, "step": 7830 }, { "epoch": 1.8056260087618168, "grad_norm": 1.5836257156251672, "learning_rate": 5.132743395369144e-08, "loss": 0.44371920824050903, "step": 7831 }, { "epoch": 1.8058565828913995, "grad_norm": 1.513244295553376, "learning_rate": 5.1206925240225964e-08, "loss": 0.43268662691116333, "step": 7832 }, { "epoch": 1.8060871570209822, "grad_norm": 1.736730853895812, "learning_rate": 5.1086554443449445e-08, "loss": 0.5035665035247803, "step": 7833 }, { "epoch": 1.806317731150565, "grad_norm": 1.3694047806165788, "learning_rate": 5.0966321580859336e-08, "loss": 0.4987141191959381, "step": 7834 }, { "epoch": 1.8065483052801476, "grad_norm": 1.816085685560109, "learning_rate": 5.0846226669932437e-08, "loss": 0.5951617956161499, "step": 7835 }, { "epoch": 1.8067788794097304, "grad_norm": 1.464038827862328, "learning_rate": 5.072626972812599e-08, "loss": 0.4710814654827118, "step": 7836 }, { "epoch": 1.8070094535393129, "grad_norm": 1.6196482413694708, "learning_rate": 5.060645077287662e-08, "loss": 0.5173348188400269, "step": 7837 }, { "epoch": 1.8072400276688956, "grad_norm": 1.4170272466334293, "learning_rate": 5.048676982160161e-08, "loss": 0.49508416652679443, "step": 7838 }, { "epoch": 1.807470601798478, "grad_norm": 1.7639395740589152, "learning_rate": 5.03672268916977e-08, "loss": 0.4535290598869324, "step": 7839 }, { "epoch": 1.8077011759280608, "grad_norm": 1.7696762607003815, "learning_rate": 5.024782200054145e-08, "loss": 0.5337553024291992, "step": 7840 }, { "epoch": 1.8079317500576435, "grad_norm": 1.6346280356935987, "learning_rate": 5.012855516548986e-08, "loss": 0.47118210792541504, "step": 7841 }, { "epoch": 1.8081623241872262, "grad_norm": 1.504680600844573, "learning_rate": 5.0009426403879283e-08, "loss": 0.4458848237991333, "step": 7842 }, { "epoch": 1.808392898316809, "grad_norm": 1.5297682575974059, "learning_rate": 4.9890435733026536e-08, "loss": 0.5055558681488037, "step": 7843 }, { "epoch": 1.8086234724463917, "grad_norm": 1.4365609441585347, "learning_rate": 4.9771583170228006e-08, "loss": 0.43715038895606995, "step": 7844 }, { "epoch": 1.8088540465759742, "grad_norm": 1.545411862707653, "learning_rate": 4.96528687327602e-08, "loss": 0.427906334400177, "step": 7845 }, { "epoch": 1.8090846207055569, "grad_norm": 1.6703597275780244, "learning_rate": 4.953429243787932e-08, "loss": 0.48160994052886963, "step": 7846 }, { "epoch": 1.8093151948351394, "grad_norm": 1.3261658854233023, "learning_rate": 4.941585430282158e-08, "loss": 0.40856754779815674, "step": 7847 }, { "epoch": 1.809545768964722, "grad_norm": 1.3569384823756985, "learning_rate": 4.929755434480354e-08, "loss": 0.40482330322265625, "step": 7848 }, { "epoch": 1.8097763430943048, "grad_norm": 1.530544362283251, "learning_rate": 4.9179392581021e-08, "loss": 0.4286755323410034, "step": 7849 }, { "epoch": 1.8100069172238875, "grad_norm": 1.5805205551700128, "learning_rate": 4.906136902864999e-08, "loss": 0.4436051547527313, "step": 7850 }, { "epoch": 1.8102374913534702, "grad_norm": 1.5320309451669083, "learning_rate": 4.8943483704846465e-08, "loss": 0.41794437170028687, "step": 7851 }, { "epoch": 1.810468065483053, "grad_norm": 1.4506407579843814, "learning_rate": 4.8825736626746384e-08, "loss": 0.4308912754058838, "step": 7852 }, { "epoch": 1.8106986396126354, "grad_norm": 1.5274898640972132, "learning_rate": 4.870812781146516e-08, "loss": 0.43090081214904785, "step": 7853 }, { "epoch": 1.8109292137422182, "grad_norm": 1.3117483081436436, "learning_rate": 4.859065727609857e-08, "loss": 0.4329320192337036, "step": 7854 }, { "epoch": 1.8111597878718007, "grad_norm": 1.266199300666261, "learning_rate": 4.8473325037722276e-08, "loss": 0.3162953853607178, "step": 7855 }, { "epoch": 1.8113903620013834, "grad_norm": 1.4534333887380995, "learning_rate": 4.835613111339165e-08, "loss": 0.37513065338134766, "step": 7856 }, { "epoch": 1.811620936130966, "grad_norm": 1.494207838495638, "learning_rate": 4.823907552014195e-08, "loss": 0.4120938181877136, "step": 7857 }, { "epoch": 1.8118515102605488, "grad_norm": 1.555741011782435, "learning_rate": 4.8122158274988555e-08, "loss": 0.4295421242713928, "step": 7858 }, { "epoch": 1.8120820843901315, "grad_norm": 1.4697042695976983, "learning_rate": 4.8005379394926435e-08, "loss": 0.44738203287124634, "step": 7859 }, { "epoch": 1.8123126585197142, "grad_norm": 1.7388489283467792, "learning_rate": 4.7888738896930456e-08, "loss": 0.447609007358551, "step": 7860 }, { "epoch": 1.8125432326492967, "grad_norm": 1.6367328188270214, "learning_rate": 4.777223679795561e-08, "loss": 0.38288167119026184, "step": 7861 }, { "epoch": 1.8127738067788794, "grad_norm": 1.5566909994885838, "learning_rate": 4.765587311493668e-08, "loss": 0.5003981590270996, "step": 7862 }, { "epoch": 1.813004380908462, "grad_norm": 1.5140425774804767, "learning_rate": 4.7539647864788476e-08, "loss": 0.5244492888450623, "step": 7863 }, { "epoch": 1.8132349550380447, "grad_norm": 1.4098788698269693, "learning_rate": 4.742356106440526e-08, "loss": 0.505184531211853, "step": 7864 }, { "epoch": 1.8134655291676274, "grad_norm": 2.493869291024891, "learning_rate": 4.7307612730661636e-08, "loss": 0.5364291071891785, "step": 7865 }, { "epoch": 1.81369610329721, "grad_norm": 1.5655893218937025, "learning_rate": 4.719180288041158e-08, "loss": 0.4370742738246918, "step": 7866 }, { "epoch": 1.8139266774267928, "grad_norm": 1.3233268572547954, "learning_rate": 4.7076131530489505e-08, "loss": 0.37784355878829956, "step": 7867 }, { "epoch": 1.8141572515563755, "grad_norm": 1.6040150628213576, "learning_rate": 4.6960598697709294e-08, "loss": 0.5184513330459595, "step": 7868 }, { "epoch": 1.814387825685958, "grad_norm": 1.6174173359265467, "learning_rate": 4.6845204398864743e-08, "loss": 0.41221511363983154, "step": 7869 }, { "epoch": 1.8146183998155407, "grad_norm": 1.960596641519608, "learning_rate": 4.672994865072965e-08, "loss": 0.43040651082992554, "step": 7870 }, { "epoch": 1.8148489739451232, "grad_norm": 1.887961823292038, "learning_rate": 4.6614831470057625e-08, "loss": 0.4681999385356903, "step": 7871 }, { "epoch": 1.815079548074706, "grad_norm": 1.5463001442495705, "learning_rate": 4.649985287358227e-08, "loss": 0.49752098321914673, "step": 7872 }, { "epoch": 1.8153101222042887, "grad_norm": 1.4528059880154254, "learning_rate": 4.6385012878016663e-08, "loss": 0.4621706008911133, "step": 7873 }, { "epoch": 1.8155406963338714, "grad_norm": 1.339046035541834, "learning_rate": 4.627031150005401e-08, "loss": 0.4359724521636963, "step": 7874 }, { "epoch": 1.815771270463454, "grad_norm": 1.4288119410903932, "learning_rate": 4.6155748756367294e-08, "loss": 0.4901214838027954, "step": 7875 }, { "epoch": 1.8160018445930368, "grad_norm": 1.7234395975437273, "learning_rate": 4.604132466360955e-08, "loss": 0.5012428760528564, "step": 7876 }, { "epoch": 1.8162324187226193, "grad_norm": 1.6768636456338364, "learning_rate": 4.592703923841323e-08, "loss": 0.5048446655273438, "step": 7877 }, { "epoch": 1.816462992852202, "grad_norm": 1.5761086054200695, "learning_rate": 4.5812892497390955e-08, "loss": 0.5025140047073364, "step": 7878 }, { "epoch": 1.8166935669817845, "grad_norm": 1.5593886228823222, "learning_rate": 4.5698884457135324e-08, "loss": 0.4456709623336792, "step": 7879 }, { "epoch": 1.8169241411113672, "grad_norm": 1.4583950124069596, "learning_rate": 4.5585015134218196e-08, "loss": 0.38283586502075195, "step": 7880 }, { "epoch": 1.81715471524095, "grad_norm": 1.5479198908902716, "learning_rate": 4.5471284545192004e-08, "loss": 0.3458648920059204, "step": 7881 }, { "epoch": 1.8173852893705327, "grad_norm": 1.7126815699296334, "learning_rate": 4.53576927065884e-08, "loss": 0.4609532952308655, "step": 7882 }, { "epoch": 1.8176158635001154, "grad_norm": 1.238404719965568, "learning_rate": 4.524423963491919e-08, "loss": 0.4250793159008026, "step": 7883 }, { "epoch": 1.817846437629698, "grad_norm": 1.7276559977997992, "learning_rate": 4.513092534667584e-08, "loss": 0.41343796253204346, "step": 7884 }, { "epoch": 1.8180770117592806, "grad_norm": 1.5863495927207087, "learning_rate": 4.5017749858329736e-08, "loss": 0.46575528383255005, "step": 7885 }, { "epoch": 1.8183075858888633, "grad_norm": 1.7387493602059383, "learning_rate": 4.4904713186332156e-08, "loss": 0.47052180767059326, "step": 7886 }, { "epoch": 1.8185381600184458, "grad_norm": 1.4938009961123744, "learning_rate": 4.479181534711429e-08, "loss": 0.42979568243026733, "step": 7887 }, { "epoch": 1.8187687341480285, "grad_norm": 1.4298617258142596, "learning_rate": 4.46790563570868e-08, "loss": 0.4278537929058075, "step": 7888 }, { "epoch": 1.8189993082776112, "grad_norm": 1.6571154898401685, "learning_rate": 4.456643623264022e-08, "loss": 0.45380616188049316, "step": 7889 }, { "epoch": 1.819229882407194, "grad_norm": 1.6141969165708208, "learning_rate": 4.445395499014526e-08, "loss": 0.46085125207901, "step": 7890 }, { "epoch": 1.8194604565367767, "grad_norm": 1.7363894486391924, "learning_rate": 4.434161264595204e-08, "loss": 0.47558531165122986, "step": 7891 }, { "epoch": 1.8196910306663594, "grad_norm": 1.552212209885486, "learning_rate": 4.4229409216390845e-08, "loss": 0.42082321643829346, "step": 7892 }, { "epoch": 1.819921604795942, "grad_norm": 1.6844917452185877, "learning_rate": 4.411734471777129e-08, "loss": 0.40222978591918945, "step": 7893 }, { "epoch": 1.8201521789255246, "grad_norm": 1.7385505168528088, "learning_rate": 4.400541916638323e-08, "loss": 0.39737701416015625, "step": 7894 }, { "epoch": 1.820382753055107, "grad_norm": 1.6976347614290264, "learning_rate": 4.389363257849632e-08, "loss": 0.46538835763931274, "step": 7895 }, { "epoch": 1.8206133271846898, "grad_norm": 2.034464057065236, "learning_rate": 4.378198497035979e-08, "loss": 0.4994567036628723, "step": 7896 }, { "epoch": 1.8208439013142725, "grad_norm": 1.517699554285521, "learning_rate": 4.367047635820264e-08, "loss": 0.4574298858642578, "step": 7897 }, { "epoch": 1.8210744754438553, "grad_norm": 1.7361916973448048, "learning_rate": 4.3559106758234044e-08, "loss": 0.4716116786003113, "step": 7898 }, { "epoch": 1.821305049573438, "grad_norm": 1.7495776361282012, "learning_rate": 4.344787618664247e-08, "loss": 0.35549741983413696, "step": 7899 }, { "epoch": 1.8215356237030205, "grad_norm": 1.673931935617008, "learning_rate": 4.3336784659596226e-08, "loss": 0.44955599308013916, "step": 7900 }, { "epoch": 1.8217661978326032, "grad_norm": 1.2588104675314307, "learning_rate": 4.322583219324394e-08, "loss": 0.4047467112541199, "step": 7901 }, { "epoch": 1.8219967719621857, "grad_norm": 1.3892625958432285, "learning_rate": 4.3115018803713596e-08, "loss": 0.40367889404296875, "step": 7902 }, { "epoch": 1.8222273460917684, "grad_norm": 1.3189968956301878, "learning_rate": 4.3004344507113096e-08, "loss": 0.32705235481262207, "step": 7903 }, { "epoch": 1.8224579202213511, "grad_norm": 1.3777118561947166, "learning_rate": 4.2893809319529794e-08, "loss": 0.3845488727092743, "step": 7904 }, { "epoch": 1.8226884943509338, "grad_norm": 1.4977030222677208, "learning_rate": 4.2783413257031495e-08, "loss": 0.49070197343826294, "step": 7905 }, { "epoch": 1.8229190684805165, "grad_norm": 1.729181630904155, "learning_rate": 4.267315633566493e-08, "loss": 0.550437867641449, "step": 7906 }, { "epoch": 1.8231496426100993, "grad_norm": 1.6119404797366197, "learning_rate": 4.25630385714576e-08, "loss": 0.5042926073074341, "step": 7907 }, { "epoch": 1.8233802167396818, "grad_norm": 1.5956788246532367, "learning_rate": 4.245305998041571e-08, "loss": 0.48839205503463745, "step": 7908 }, { "epoch": 1.8236107908692645, "grad_norm": 1.6028821186444346, "learning_rate": 4.234322057852602e-08, "loss": 0.4754030108451843, "step": 7909 }, { "epoch": 1.823841364998847, "grad_norm": 1.5406282114264656, "learning_rate": 4.223352038175487e-08, "loss": 0.394174188375473, "step": 7910 }, { "epoch": 1.8240719391284297, "grad_norm": 1.3144512253416945, "learning_rate": 4.2123959406048183e-08, "loss": 0.39882469177246094, "step": 7911 }, { "epoch": 1.8243025132580124, "grad_norm": 1.3036980510979261, "learning_rate": 4.201453766733176e-08, "loss": 0.4611927270889282, "step": 7912 }, { "epoch": 1.8245330873875951, "grad_norm": 1.3717750651706109, "learning_rate": 4.190525518151122e-08, "loss": 0.4164184331893921, "step": 7913 }, { "epoch": 1.8247636615171778, "grad_norm": 1.7048234275294294, "learning_rate": 4.179611196447186e-08, "loss": 0.41586828231811523, "step": 7914 }, { "epoch": 1.8249942356467606, "grad_norm": 1.486464242852147, "learning_rate": 4.168710803207864e-08, "loss": 0.4707748591899872, "step": 7915 }, { "epoch": 1.825224809776343, "grad_norm": 1.6925426332325308, "learning_rate": 4.157824340017657e-08, "loss": 0.4235571622848511, "step": 7916 }, { "epoch": 1.8254553839059258, "grad_norm": 1.5746767320284107, "learning_rate": 4.146951808458998e-08, "loss": 0.3761681914329529, "step": 7917 }, { "epoch": 1.8256859580355083, "grad_norm": 1.9541083814793623, "learning_rate": 4.136093210112346e-08, "loss": 0.45545494556427, "step": 7918 }, { "epoch": 1.825916532165091, "grad_norm": 1.4946968371557119, "learning_rate": 4.1252485465561035e-08, "loss": 0.4154251515865326, "step": 7919 }, { "epoch": 1.8261471062946737, "grad_norm": 1.4442817043721163, "learning_rate": 4.114417819366633e-08, "loss": 0.3664330244064331, "step": 7920 }, { "epoch": 1.8263776804242564, "grad_norm": 1.4915985489350694, "learning_rate": 4.10360103011832e-08, "loss": 0.4527730643749237, "step": 7921 }, { "epoch": 1.8266082545538391, "grad_norm": 1.6683615123339999, "learning_rate": 4.092798180383461e-08, "loss": 0.5245767831802368, "step": 7922 }, { "epoch": 1.8268388286834218, "grad_norm": 1.6122193238326974, "learning_rate": 4.0820092717323894e-08, "loss": 0.39781343936920166, "step": 7923 }, { "epoch": 1.8270694028130043, "grad_norm": 1.592304216861808, "learning_rate": 4.071234305733362e-08, "loss": 0.4173957109451294, "step": 7924 }, { "epoch": 1.827299976942587, "grad_norm": 1.7592031102615102, "learning_rate": 4.0604732839526256e-08, "loss": 0.38840869069099426, "step": 7925 }, { "epoch": 1.8275305510721696, "grad_norm": 1.777360398097105, "learning_rate": 4.0497262079544294e-08, "loss": 0.4107547998428345, "step": 7926 }, { "epoch": 1.8277611252017523, "grad_norm": 1.5475583296259725, "learning_rate": 4.038993079300956e-08, "loss": 0.41102874279022217, "step": 7927 }, { "epoch": 1.827991699331335, "grad_norm": 1.4229533643496446, "learning_rate": 4.028273899552381e-08, "loss": 0.3393939733505249, "step": 7928 }, { "epoch": 1.8282222734609177, "grad_norm": 1.4844610719466356, "learning_rate": 4.017568670266835e-08, "loss": 0.42469024658203125, "step": 7929 }, { "epoch": 1.8284528475905004, "grad_norm": 1.316542585504155, "learning_rate": 4.006877393000441e-08, "loss": 0.4869099259376526, "step": 7930 }, { "epoch": 1.8286834217200831, "grad_norm": 1.3905230120628338, "learning_rate": 3.996200069307265e-08, "loss": 0.4463779926300049, "step": 7931 }, { "epoch": 1.8289139958496656, "grad_norm": 1.908726864953878, "learning_rate": 3.985536700739378e-08, "loss": 0.429579496383667, "step": 7932 }, { "epoch": 1.8291445699792483, "grad_norm": 1.555687929117211, "learning_rate": 3.9748872888468065e-08, "loss": 0.38837558031082153, "step": 7933 }, { "epoch": 1.8293751441088308, "grad_norm": 1.467502995951613, "learning_rate": 3.964251835177568e-08, "loss": 0.4444499909877777, "step": 7934 }, { "epoch": 1.8296057182384136, "grad_norm": 1.5836026531003116, "learning_rate": 3.953630341277603e-08, "loss": 0.5216259360313416, "step": 7935 }, { "epoch": 1.8298362923679963, "grad_norm": 1.316614330242316, "learning_rate": 3.943022808690888e-08, "loss": 0.46454817056655884, "step": 7936 }, { "epoch": 1.830066866497579, "grad_norm": 1.5390661326727673, "learning_rate": 3.9324292389593005e-08, "loss": 0.38960570096969604, "step": 7937 }, { "epoch": 1.8302974406271617, "grad_norm": 1.2960127878271992, "learning_rate": 3.9218496336227426e-08, "loss": 0.3318006992340088, "step": 7938 }, { "epoch": 1.8305280147567444, "grad_norm": 1.501585055160058, "learning_rate": 3.9112839942190725e-08, "loss": 0.41555076837539673, "step": 7939 }, { "epoch": 1.830758588886327, "grad_norm": 1.4035625255113318, "learning_rate": 3.900732322284095e-08, "loss": 0.4296320080757141, "step": 7940 }, { "epoch": 1.8309891630159096, "grad_norm": 1.6738155247978692, "learning_rate": 3.8901946193516055e-08, "loss": 0.4416658282279968, "step": 7941 }, { "epoch": 1.8312197371454921, "grad_norm": 1.885789179393057, "learning_rate": 3.8796708869533676e-08, "loss": 0.4539029598236084, "step": 7942 }, { "epoch": 1.8314503112750748, "grad_norm": 1.4867619575158202, "learning_rate": 3.869161126619136e-08, "loss": 0.4526992440223694, "step": 7943 }, { "epoch": 1.8316808854046576, "grad_norm": 1.5927522884216676, "learning_rate": 3.8586653398765766e-08, "loss": 0.3991963863372803, "step": 7944 }, { "epoch": 1.8319114595342403, "grad_norm": 1.4460483349984772, "learning_rate": 3.848183528251381e-08, "loss": 0.44474589824676514, "step": 7945 }, { "epoch": 1.832142033663823, "grad_norm": 1.7969739964524274, "learning_rate": 3.837715693267174e-08, "loss": 0.5022028684616089, "step": 7946 }, { "epoch": 1.8323726077934057, "grad_norm": 1.6274178723126447, "learning_rate": 3.8272618364455836e-08, "loss": 0.4839058518409729, "step": 7947 }, { "epoch": 1.8326031819229882, "grad_norm": 1.7924980398771633, "learning_rate": 3.8168219593061376e-08, "loss": 0.3580874800682068, "step": 7948 }, { "epoch": 1.832833756052571, "grad_norm": 1.6096517551702718, "learning_rate": 3.806396063366424e-08, "loss": 0.4350799024105072, "step": 7949 }, { "epoch": 1.8330643301821534, "grad_norm": 1.3546161389632028, "learning_rate": 3.79598415014194e-08, "loss": 0.4386145770549774, "step": 7950 }, { "epoch": 1.8332949043117361, "grad_norm": 1.4421267919386862, "learning_rate": 3.785586221146142e-08, "loss": 0.5122627019882202, "step": 7951 }, { "epoch": 1.8335254784413189, "grad_norm": 1.3507016201924953, "learning_rate": 3.77520227789051e-08, "loss": 0.41197121143341064, "step": 7952 }, { "epoch": 1.8337560525709016, "grad_norm": 1.7729553069577912, "learning_rate": 3.764832321884426e-08, "loss": 0.5508084297180176, "step": 7953 }, { "epoch": 1.8339866267004843, "grad_norm": 1.3788371713361898, "learning_rate": 3.754476354635283e-08, "loss": 0.40791934728622437, "step": 7954 }, { "epoch": 1.834217200830067, "grad_norm": 1.4693932480728087, "learning_rate": 3.7441343776484113e-08, "loss": 0.3880457878112793, "step": 7955 }, { "epoch": 1.8344477749596495, "grad_norm": 1.4561569110121497, "learning_rate": 3.7338063924271304e-08, "loss": 0.40519118309020996, "step": 7956 }, { "epoch": 1.8346783490892322, "grad_norm": 1.4799489730655653, "learning_rate": 3.723492400472716e-08, "loss": 0.46081095933914185, "step": 7957 }, { "epoch": 1.8349089232188147, "grad_norm": 1.3167338346767847, "learning_rate": 3.713192403284438e-08, "loss": 0.3946321904659271, "step": 7958 }, { "epoch": 1.8351394973483974, "grad_norm": 1.743632986191688, "learning_rate": 3.702906402359474e-08, "loss": 0.4699859023094177, "step": 7959 }, { "epoch": 1.8353700714779801, "grad_norm": 1.4691817330554993, "learning_rate": 3.692634399192995e-08, "loss": 0.43031781911849976, "step": 7960 }, { "epoch": 1.8356006456075629, "grad_norm": 1.5694622813964751, "learning_rate": 3.6823763952781636e-08, "loss": 0.4072418212890625, "step": 7961 }, { "epoch": 1.8358312197371456, "grad_norm": 1.7009922761684866, "learning_rate": 3.672132392106053e-08, "loss": 0.40659528970718384, "step": 7962 }, { "epoch": 1.8360617938667283, "grad_norm": 1.2845193385628964, "learning_rate": 3.661902391165772e-08, "loss": 0.41279205679893494, "step": 7963 }, { "epoch": 1.8362923679963108, "grad_norm": 1.407521764327922, "learning_rate": 3.65168639394432e-08, "loss": 0.43887826800346375, "step": 7964 }, { "epoch": 1.8365229421258935, "grad_norm": 1.585883988281566, "learning_rate": 3.6414844019267196e-08, "loss": 0.46111762523651123, "step": 7965 }, { "epoch": 1.836753516255476, "grad_norm": 1.5089060420061358, "learning_rate": 3.63129641659593e-08, "loss": 0.42694801092147827, "step": 7966 }, { "epoch": 1.8369840903850587, "grad_norm": 1.563222995065882, "learning_rate": 3.6211224394328775e-08, "loss": 0.4674855172634125, "step": 7967 }, { "epoch": 1.8372146645146414, "grad_norm": 1.6612957725595774, "learning_rate": 3.610962471916435e-08, "loss": 0.48998844623565674, "step": 7968 }, { "epoch": 1.8374452386442242, "grad_norm": 1.517118505836267, "learning_rate": 3.600816515523486e-08, "loss": 0.4162273406982422, "step": 7969 }, { "epoch": 1.8376758127738069, "grad_norm": 1.6498845355681542, "learning_rate": 3.5906845717288304e-08, "loss": 0.4446166753768921, "step": 7970 }, { "epoch": 1.8379063869033896, "grad_norm": 1.6723175784368125, "learning_rate": 3.580566642005245e-08, "loss": 0.4782527983188629, "step": 7971 }, { "epoch": 1.838136961032972, "grad_norm": 1.667138689471541, "learning_rate": 3.570462727823476e-08, "loss": 0.43014609813690186, "step": 7972 }, { "epoch": 1.8383675351625548, "grad_norm": 1.5808858327085533, "learning_rate": 3.560372830652225e-08, "loss": 0.5155357122421265, "step": 7973 }, { "epoch": 1.8385981092921373, "grad_norm": 1.4181681095350445, "learning_rate": 3.5502969519581984e-08, "loss": 0.4231104254722595, "step": 7974 }, { "epoch": 1.83882868342172, "grad_norm": 1.8426199170185766, "learning_rate": 3.540235093205979e-08, "loss": 0.529877245426178, "step": 7975 }, { "epoch": 1.8390592575513027, "grad_norm": 1.5632800597633676, "learning_rate": 3.530187255858186e-08, "loss": 0.4841991662979126, "step": 7976 }, { "epoch": 1.8392898316808854, "grad_norm": 1.5770240615602402, "learning_rate": 3.520153441375362e-08, "loss": 0.40202534198760986, "step": 7977 }, { "epoch": 1.8395204058104682, "grad_norm": 1.4104759549786023, "learning_rate": 3.51013365121603e-08, "loss": 0.398551344871521, "step": 7978 }, { "epoch": 1.8397509799400509, "grad_norm": 1.5102819529399165, "learning_rate": 3.500127886836668e-08, "loss": 0.49139225482940674, "step": 7979 }, { "epoch": 1.8399815540696334, "grad_norm": 1.7659081046335245, "learning_rate": 3.4901361496917135e-08, "loss": 0.4708287715911865, "step": 7980 }, { "epoch": 1.840212128199216, "grad_norm": 1.3491474153090526, "learning_rate": 3.4801584412335714e-08, "loss": 0.4174381494522095, "step": 7981 }, { "epoch": 1.8404427023287986, "grad_norm": 1.6453019064878467, "learning_rate": 3.470194762912593e-08, "loss": 0.535778284072876, "step": 7982 }, { "epoch": 1.8406732764583813, "grad_norm": 1.7228199406120377, "learning_rate": 3.4602451161771186e-08, "loss": 0.540034294128418, "step": 7983 }, { "epoch": 1.840903850587964, "grad_norm": 1.794022377740068, "learning_rate": 3.450309502473403e-08, "loss": 0.4399121403694153, "step": 7984 }, { "epoch": 1.8411344247175467, "grad_norm": 1.6932512977389786, "learning_rate": 3.4403879232457134e-08, "loss": 0.5011022686958313, "step": 7985 }, { "epoch": 1.8413649988471295, "grad_norm": 1.580497796669037, "learning_rate": 3.4304803799362405e-08, "loss": 0.392477810382843, "step": 7986 }, { "epoch": 1.8415955729767122, "grad_norm": 1.5439573803469637, "learning_rate": 3.420586873985132e-08, "loss": 0.4734686315059662, "step": 7987 }, { "epoch": 1.8418261471062947, "grad_norm": 1.3285059669744466, "learning_rate": 3.410707406830537e-08, "loss": 0.37347573041915894, "step": 7988 }, { "epoch": 1.8420567212358774, "grad_norm": 1.6328708193086845, "learning_rate": 3.400841979908531e-08, "loss": 0.38837599754333496, "step": 7989 }, { "epoch": 1.8422872953654599, "grad_norm": 1.6277616294407593, "learning_rate": 3.390990594653142e-08, "loss": 0.38598424196243286, "step": 7990 }, { "epoch": 1.8425178694950426, "grad_norm": 1.584379501910531, "learning_rate": 3.381153252496371e-08, "loss": 0.48508739471435547, "step": 7991 }, { "epoch": 1.8427484436246253, "grad_norm": 1.609395355542375, "learning_rate": 3.3713299548681736e-08, "loss": 0.41946491599082947, "step": 7992 }, { "epoch": 1.842979017754208, "grad_norm": 1.4959274640542461, "learning_rate": 3.3615207031964744e-08, "loss": 0.4803915023803711, "step": 7993 }, { "epoch": 1.8432095918837907, "grad_norm": 1.3835076847275678, "learning_rate": 3.351725498907143e-08, "loss": 0.39463797211647034, "step": 7994 }, { "epoch": 1.8434401660133735, "grad_norm": 1.5742658557245284, "learning_rate": 3.341944343424008e-08, "loss": 0.43345123529434204, "step": 7995 }, { "epoch": 1.843670740142956, "grad_norm": 1.7826616989180466, "learning_rate": 3.332177238168854e-08, "loss": 0.5164570212364197, "step": 7996 }, { "epoch": 1.8439013142725387, "grad_norm": 1.71354580792071, "learning_rate": 3.322424184561445e-08, "loss": 0.5313355922698975, "step": 7997 }, { "epoch": 1.8441318884021212, "grad_norm": 1.901316143248936, "learning_rate": 3.3126851840194815e-08, "loss": 0.4488258361816406, "step": 7998 }, { "epoch": 1.8443624625317039, "grad_norm": 1.479116299891256, "learning_rate": 3.30296023795863e-08, "loss": 0.5122581720352173, "step": 7999 }, { "epoch": 1.8445930366612866, "grad_norm": 1.4735639536720297, "learning_rate": 3.293249347792493e-08, "loss": 0.4619610905647278, "step": 8000 }, { "epoch": 1.8448236107908693, "grad_norm": 1.3540260330438945, "learning_rate": 3.2835525149326636e-08, "loss": 0.4214603006839752, "step": 8001 }, { "epoch": 1.845054184920452, "grad_norm": 1.4074387483331625, "learning_rate": 3.2738697407886485e-08, "loss": 0.40279510617256165, "step": 8002 }, { "epoch": 1.8452847590500348, "grad_norm": 1.4474967943141424, "learning_rate": 3.264201026767977e-08, "loss": 0.4797242283821106, "step": 8003 }, { "epoch": 1.8455153331796172, "grad_norm": 1.3554973222515974, "learning_rate": 3.254546374276057e-08, "loss": 0.3833237588405609, "step": 8004 }, { "epoch": 1.8457459073092, "grad_norm": 1.4594426546625732, "learning_rate": 3.244905784716323e-08, "loss": 0.41461342573165894, "step": 8005 }, { "epoch": 1.8459764814387825, "grad_norm": 1.5177617199741877, "learning_rate": 3.235279259490109e-08, "loss": 0.592107892036438, "step": 8006 }, { "epoch": 1.8462070555683652, "grad_norm": 1.684042887917187, "learning_rate": 3.2256667999967405e-08, "loss": 0.39025670289993286, "step": 8007 }, { "epoch": 1.846437629697948, "grad_norm": 1.286539298720562, "learning_rate": 3.2160684076334766e-08, "loss": 0.40197378396987915, "step": 8008 }, { "epoch": 1.8466682038275306, "grad_norm": 1.8155125046022762, "learning_rate": 3.206484083795558e-08, "loss": 0.4013815224170685, "step": 8009 }, { "epoch": 1.8468987779571133, "grad_norm": 1.5762142363003944, "learning_rate": 3.1969138298761356e-08, "loss": 0.45386412739753723, "step": 8010 }, { "epoch": 1.8471293520866958, "grad_norm": 1.8756892627173425, "learning_rate": 3.187357647266353e-08, "loss": 0.43034985661506653, "step": 8011 }, { "epoch": 1.8473599262162785, "grad_norm": 1.6730495727197179, "learning_rate": 3.177815537355322e-08, "loss": 0.4346637725830078, "step": 8012 }, { "epoch": 1.847590500345861, "grad_norm": 1.8461631710642654, "learning_rate": 3.1682875015300535e-08, "loss": 0.5203511118888855, "step": 8013 }, { "epoch": 1.8478210744754437, "grad_norm": 1.5817324628827356, "learning_rate": 3.1587735411755636e-08, "loss": 0.37658393383026123, "step": 8014 }, { "epoch": 1.8480516486050265, "grad_norm": 1.6304961028131815, "learning_rate": 3.149273657674789e-08, "loss": 0.5473518371582031, "step": 8015 }, { "epoch": 1.8482822227346092, "grad_norm": 1.800633884327913, "learning_rate": 3.1397878524086484e-08, "loss": 0.5171597599983215, "step": 8016 }, { "epoch": 1.848512796864192, "grad_norm": 1.585245081928725, "learning_rate": 3.130316126755983e-08, "loss": 0.46588706970214844, "step": 8017 }, { "epoch": 1.8487433709937746, "grad_norm": 1.496582071882617, "learning_rate": 3.1208584820936244e-08, "loss": 0.5571366548538208, "step": 8018 }, { "epoch": 1.848973945123357, "grad_norm": 1.5249372170069353, "learning_rate": 3.111414919796318e-08, "loss": 0.45803195238113403, "step": 8019 }, { "epoch": 1.8492045192529398, "grad_norm": 1.4834943043987898, "learning_rate": 3.1019854412367875e-08, "loss": 0.4732629060745239, "step": 8020 }, { "epoch": 1.8494350933825223, "grad_norm": 1.7625144420898597, "learning_rate": 3.092570047785714e-08, "loss": 0.5268767476081848, "step": 8021 }, { "epoch": 1.849665667512105, "grad_norm": 1.5017810734056087, "learning_rate": 3.0831687408117035e-08, "loss": 0.5179537534713745, "step": 8022 }, { "epoch": 1.8498962416416878, "grad_norm": 1.7406452748153565, "learning_rate": 3.073781521681351e-08, "loss": 0.5110389590263367, "step": 8023 }, { "epoch": 1.8501268157712705, "grad_norm": 1.442631804804713, "learning_rate": 3.064408391759154e-08, "loss": 0.4078633189201355, "step": 8024 }, { "epoch": 1.8503573899008532, "grad_norm": 1.6619024740283894, "learning_rate": 3.055049352407624e-08, "loss": 0.4632648229598999, "step": 8025 }, { "epoch": 1.850587964030436, "grad_norm": 1.577432813868154, "learning_rate": 3.0457044049871705e-08, "loss": 0.41569265723228455, "step": 8026 }, { "epoch": 1.8508185381600184, "grad_norm": 1.3795657287644, "learning_rate": 3.036373550856186e-08, "loss": 0.4105853736400604, "step": 8027 }, { "epoch": 1.8510491122896011, "grad_norm": 1.6584799060214424, "learning_rate": 3.027056791370996e-08, "loss": 0.4415978789329529, "step": 8028 }, { "epoch": 1.8512796864191836, "grad_norm": 1.571030596092026, "learning_rate": 3.017754127885908e-08, "loss": 0.3990614414215088, "step": 8029 }, { "epoch": 1.8515102605487663, "grad_norm": 1.5323241652532567, "learning_rate": 3.0084655617531376e-08, "loss": 0.42349040508270264, "step": 8030 }, { "epoch": 1.851740834678349, "grad_norm": 1.4436112405033301, "learning_rate": 2.9991910943228725e-08, "loss": 0.4687228798866272, "step": 8031 }, { "epoch": 1.8519714088079318, "grad_norm": 1.91227305815919, "learning_rate": 2.989930726943268e-08, "loss": 0.6091229915618896, "step": 8032 }, { "epoch": 1.8522019829375145, "grad_norm": 1.527659992048368, "learning_rate": 2.980684460960381e-08, "loss": 0.43401795625686646, "step": 8033 }, { "epoch": 1.8524325570670972, "grad_norm": 1.521615388244922, "learning_rate": 2.9714522977182688e-08, "loss": 0.47280481457710266, "step": 8034 }, { "epoch": 1.8526631311966797, "grad_norm": 1.6019291161476, "learning_rate": 2.962234238558925e-08, "loss": 0.5078729391098022, "step": 8035 }, { "epoch": 1.8528937053262624, "grad_norm": 1.8353491661496104, "learning_rate": 2.9530302848223e-08, "loss": 0.4279085695743561, "step": 8036 }, { "epoch": 1.853124279455845, "grad_norm": 1.4587208506754334, "learning_rate": 2.9438404378462455e-08, "loss": 0.3720093369483948, "step": 8037 }, { "epoch": 1.8533548535854276, "grad_norm": 1.810026420285634, "learning_rate": 2.934664698966627e-08, "loss": 0.26778513193130493, "step": 8038 }, { "epoch": 1.8535854277150103, "grad_norm": 1.569617242169025, "learning_rate": 2.9255030695172324e-08, "loss": 0.47606828808784485, "step": 8039 }, { "epoch": 1.853816001844593, "grad_norm": 1.8330928647910023, "learning_rate": 2.9163555508297632e-08, "loss": 0.437153160572052, "step": 8040 }, { "epoch": 1.8540465759741758, "grad_norm": 1.3219241142527494, "learning_rate": 2.907222144233945e-08, "loss": 0.408009797334671, "step": 8041 }, { "epoch": 1.8542771501037585, "grad_norm": 1.3761080217774861, "learning_rate": 2.8981028510573824e-08, "loss": 0.3435688018798828, "step": 8042 }, { "epoch": 1.854507724233341, "grad_norm": 1.881646492298394, "learning_rate": 2.8889976726256705e-08, "loss": 0.4829018712043762, "step": 8043 }, { "epoch": 1.8547382983629237, "grad_norm": 1.5758694223281, "learning_rate": 2.879906610262339e-08, "loss": 0.44579288363456726, "step": 8044 }, { "epoch": 1.8549688724925062, "grad_norm": 1.3922554430382053, "learning_rate": 2.8708296652888764e-08, "loss": 0.4952869415283203, "step": 8045 }, { "epoch": 1.855199446622089, "grad_norm": 1.4450922871815606, "learning_rate": 2.8617668390246818e-08, "loss": 0.4870997965335846, "step": 8046 }, { "epoch": 1.8554300207516716, "grad_norm": 1.5651252792966914, "learning_rate": 2.8527181327871465e-08, "loss": 0.5009135603904724, "step": 8047 }, { "epoch": 1.8556605948812543, "grad_norm": 1.3977550991376733, "learning_rate": 2.8436835478915954e-08, "loss": 0.4837114214897156, "step": 8048 }, { "epoch": 1.855891169010837, "grad_norm": 1.6474653449248091, "learning_rate": 2.8346630856512897e-08, "loss": 0.47955578565597534, "step": 8049 }, { "epoch": 1.8561217431404198, "grad_norm": 1.705788106947518, "learning_rate": 2.8256567473774363e-08, "loss": 0.4882965385913849, "step": 8050 }, { "epoch": 1.8563523172700023, "grad_norm": 1.5940097685845425, "learning_rate": 2.8166645343792094e-08, "loss": 0.4542367458343506, "step": 8051 }, { "epoch": 1.856582891399585, "grad_norm": 1.5880265061576002, "learning_rate": 2.8076864479637198e-08, "loss": 0.4506416916847229, "step": 8052 }, { "epoch": 1.8568134655291675, "grad_norm": 1.699970116686096, "learning_rate": 2.798722489436012e-08, "loss": 0.5043084025382996, "step": 8053 }, { "epoch": 1.8570440396587502, "grad_norm": 1.397398070036947, "learning_rate": 2.78977266009911e-08, "loss": 0.3711032271385193, "step": 8054 }, { "epoch": 1.857274613788333, "grad_norm": 1.3008294527362816, "learning_rate": 2.7808369612539405e-08, "loss": 0.33371198177337646, "step": 8055 }, { "epoch": 1.8575051879179156, "grad_norm": 1.7364482681056421, "learning_rate": 2.771915394199409e-08, "loss": 0.5328178405761719, "step": 8056 }, { "epoch": 1.8577357620474984, "grad_norm": 1.925308909381556, "learning_rate": 2.7630079602323443e-08, "loss": 0.4615975618362427, "step": 8057 }, { "epoch": 1.857966336177081, "grad_norm": 1.506605490676224, "learning_rate": 2.754114660647533e-08, "loss": 0.4667460024356842, "step": 8058 }, { "epoch": 1.8581969103066636, "grad_norm": 1.7246190337812906, "learning_rate": 2.745235496737719e-08, "loss": 0.483825147151947, "step": 8059 }, { "epoch": 1.8584274844362463, "grad_norm": 1.7802094460466942, "learning_rate": 2.736370469793592e-08, "loss": 0.4376814365386963, "step": 8060 }, { "epoch": 1.8586580585658288, "grad_norm": 1.4605341926622646, "learning_rate": 2.7275195811037432e-08, "loss": 0.4862465262413025, "step": 8061 }, { "epoch": 1.8588886326954115, "grad_norm": 1.6497121576486102, "learning_rate": 2.718682831954744e-08, "loss": 0.48104172945022583, "step": 8062 }, { "epoch": 1.8591192068249942, "grad_norm": 1.3643295104524422, "learning_rate": 2.709860223631122e-08, "loss": 0.43358030915260315, "step": 8063 }, { "epoch": 1.859349780954577, "grad_norm": 1.3052220670178016, "learning_rate": 2.701051757415307e-08, "loss": 0.44614607095718384, "step": 8064 }, { "epoch": 1.8595803550841596, "grad_norm": 1.8220525339474862, "learning_rate": 2.6922574345877303e-08, "loss": 0.49824249744415283, "step": 8065 }, { "epoch": 1.8598109292137424, "grad_norm": 1.3314333068504594, "learning_rate": 2.683477256426714e-08, "loss": 0.39621901512145996, "step": 8066 }, { "epoch": 1.8600415033433249, "grad_norm": 1.3391032368154236, "learning_rate": 2.6747112242085478e-08, "loss": 0.40166205167770386, "step": 8067 }, { "epoch": 1.8602720774729076, "grad_norm": 1.720101921843303, "learning_rate": 2.6659593392074575e-08, "loss": 0.4249534606933594, "step": 8068 }, { "epoch": 1.86050265160249, "grad_norm": 1.3203085704476971, "learning_rate": 2.6572216026956473e-08, "loss": 0.4015510678291321, "step": 8069 }, { "epoch": 1.8607332257320728, "grad_norm": 1.8982655978960439, "learning_rate": 2.6484980159432236e-08, "loss": 0.4691264033317566, "step": 8070 }, { "epoch": 1.8609637998616555, "grad_norm": 1.6363630573411998, "learning_rate": 2.639788580218216e-08, "loss": 0.5095053315162659, "step": 8071 }, { "epoch": 1.8611943739912382, "grad_norm": 1.707433776183968, "learning_rate": 2.6310932967866794e-08, "loss": 0.4658794403076172, "step": 8072 }, { "epoch": 1.861424948120821, "grad_norm": 1.7622547433521365, "learning_rate": 2.622412166912513e-08, "loss": 0.495827853679657, "step": 8073 }, { "epoch": 1.8616555222504036, "grad_norm": 1.6584095706736666, "learning_rate": 2.6137451918576413e-08, "loss": 0.43652772903442383, "step": 8074 }, { "epoch": 1.8618860963799861, "grad_norm": 1.410927084601702, "learning_rate": 2.6050923728818784e-08, "loss": 0.4636423587799072, "step": 8075 }, { "epoch": 1.8621166705095689, "grad_norm": 1.6137478822178715, "learning_rate": 2.5964537112430186e-08, "loss": 0.4572441577911377, "step": 8076 }, { "epoch": 1.8623472446391514, "grad_norm": 1.5268149737583054, "learning_rate": 2.587829208196757e-08, "loss": 0.4549320340156555, "step": 8077 }, { "epoch": 1.862577818768734, "grad_norm": 1.4757300368438027, "learning_rate": 2.5792188649967795e-08, "loss": 0.46412795782089233, "step": 8078 }, { "epoch": 1.8628083928983168, "grad_norm": 1.566100546942984, "learning_rate": 2.570622682894652e-08, "loss": 0.40059781074523926, "step": 8079 }, { "epoch": 1.8630389670278995, "grad_norm": 1.8382248312833556, "learning_rate": 2.5620406631399416e-08, "loss": 0.5396246910095215, "step": 8080 }, { "epoch": 1.8632695411574822, "grad_norm": 1.630240250521673, "learning_rate": 2.553472806980128e-08, "loss": 0.4793856143951416, "step": 8081 }, { "epoch": 1.863500115287065, "grad_norm": 1.7081981493499068, "learning_rate": 2.5449191156606264e-08, "loss": 0.4428815543651581, "step": 8082 }, { "epoch": 1.8637306894166474, "grad_norm": 1.3161952024113066, "learning_rate": 2.5363795904248086e-08, "loss": 0.4024256467819214, "step": 8083 }, { "epoch": 1.8639612635462302, "grad_norm": 1.7334425937535092, "learning_rate": 2.5278542325139818e-08, "loss": 0.4868123531341553, "step": 8084 }, { "epoch": 1.8641918376758126, "grad_norm": 1.8199560965911645, "learning_rate": 2.519343043167399e-08, "loss": 0.602108359336853, "step": 8085 }, { "epoch": 1.8644224118053954, "grad_norm": 1.8527423308196338, "learning_rate": 2.510846023622237e-08, "loss": 0.4500008225440979, "step": 8086 }, { "epoch": 1.864652985934978, "grad_norm": 1.4521386296534855, "learning_rate": 2.502363175113642e-08, "loss": 0.3894640803337097, "step": 8087 }, { "epoch": 1.8648835600645608, "grad_norm": 1.471988486213167, "learning_rate": 2.493894498874649e-08, "loss": 0.4525550305843353, "step": 8088 }, { "epoch": 1.8651141341941435, "grad_norm": 1.362693221908779, "learning_rate": 2.485439996136296e-08, "loss": 0.3908608555793762, "step": 8089 }, { "epoch": 1.8653447083237262, "grad_norm": 1.5537540661666722, "learning_rate": 2.4769996681275106e-08, "loss": 0.4551984667778015, "step": 8090 }, { "epoch": 1.8655752824533087, "grad_norm": 1.3331466559033927, "learning_rate": 2.468573516075201e-08, "loss": 0.34474045038223267, "step": 8091 }, { "epoch": 1.8658058565828914, "grad_norm": 1.675344505563735, "learning_rate": 2.4601615412041755e-08, "loss": 0.41480594873428345, "step": 8092 }, { "epoch": 1.866036430712474, "grad_norm": 1.6368782805002868, "learning_rate": 2.4517637447372007e-08, "loss": 0.5043104887008667, "step": 8093 }, { "epoch": 1.8662670048420567, "grad_norm": 1.7139805676568358, "learning_rate": 2.4433801278950007e-08, "loss": 0.4467152953147888, "step": 8094 }, { "epoch": 1.8664975789716394, "grad_norm": 1.5274424401661542, "learning_rate": 2.4350106918962e-08, "loss": 0.454445481300354, "step": 8095 }, { "epoch": 1.866728153101222, "grad_norm": 1.5661075903861215, "learning_rate": 2.426655437957392e-08, "loss": 0.4639291763305664, "step": 8096 }, { "epoch": 1.8669587272308048, "grad_norm": 1.6251687636184629, "learning_rate": 2.418314367293084e-08, "loss": 0.46178731322288513, "step": 8097 }, { "epoch": 1.8671893013603875, "grad_norm": 1.5047265923361783, "learning_rate": 2.4099874811157383e-08, "loss": 0.43832290172576904, "step": 8098 }, { "epoch": 1.86741987548997, "grad_norm": 1.569040322283118, "learning_rate": 2.4016747806357652e-08, "loss": 0.4586114287376404, "step": 8099 }, { "epoch": 1.8676504496195527, "grad_norm": 1.403368540081911, "learning_rate": 2.3933762670614978e-08, "loss": 0.37975889444351196, "step": 8100 }, { "epoch": 1.8678810237491352, "grad_norm": 1.6666819300781532, "learning_rate": 2.3850919415992042e-08, "loss": 0.4579748511314392, "step": 8101 }, { "epoch": 1.868111597878718, "grad_norm": 1.5976733248377182, "learning_rate": 2.3768218054530775e-08, "loss": 0.5120238661766052, "step": 8102 }, { "epoch": 1.8683421720083007, "grad_norm": 1.47865092584181, "learning_rate": 2.3685658598253e-08, "loss": 0.41514822840690613, "step": 8103 }, { "epoch": 1.8685727461378834, "grad_norm": 1.6132937806442644, "learning_rate": 2.360324105915934e-08, "loss": 0.49480026960372925, "step": 8104 }, { "epoch": 1.868803320267466, "grad_norm": 1.516759878457302, "learning_rate": 2.352096544922999e-08, "loss": 0.41115111112594604, "step": 8105 }, { "epoch": 1.8690338943970488, "grad_norm": 1.8593225608723183, "learning_rate": 2.3438831780424607e-08, "loss": 0.44793501496315, "step": 8106 }, { "epoch": 1.8692644685266313, "grad_norm": 2.087747863463927, "learning_rate": 2.3356840064682305e-08, "loss": 0.4197582006454468, "step": 8107 }, { "epoch": 1.869495042656214, "grad_norm": 1.3708560469219937, "learning_rate": 2.3274990313921218e-08, "loss": 0.3654597997665405, "step": 8108 }, { "epoch": 1.8697256167857965, "grad_norm": 1.6733057347639861, "learning_rate": 2.319328254003927e-08, "loss": 0.5105487704277039, "step": 8109 }, { "epoch": 1.8699561909153792, "grad_norm": 1.6787548385436994, "learning_rate": 2.3111716754913192e-08, "loss": 0.5202287435531616, "step": 8110 }, { "epoch": 1.870186765044962, "grad_norm": 1.5305524386936447, "learning_rate": 2.303029297039949e-08, "loss": 0.4475836753845215, "step": 8111 }, { "epoch": 1.8704173391745447, "grad_norm": 1.579007380002247, "learning_rate": 2.2949011198334144e-08, "loss": 0.5010285973548889, "step": 8112 }, { "epoch": 1.8706479133041274, "grad_norm": 1.4473541177707174, "learning_rate": 2.286787145053204e-08, "loss": 0.41949477791786194, "step": 8113 }, { "epoch": 1.87087848743371, "grad_norm": 1.3276801089952157, "learning_rate": 2.2786873738787738e-08, "loss": 0.38505449891090393, "step": 8114 }, { "epoch": 1.8711090615632926, "grad_norm": 1.8776948972547884, "learning_rate": 2.2706018074875043e-08, "loss": 0.4854990839958191, "step": 8115 }, { "epoch": 1.8713396356928753, "grad_norm": 1.3982424394333428, "learning_rate": 2.2625304470547336e-08, "loss": 0.3846585154533386, "step": 8116 }, { "epoch": 1.8715702098224578, "grad_norm": 1.7499321509858707, "learning_rate": 2.2544732937537003e-08, "loss": 0.48948657512664795, "step": 8117 }, { "epoch": 1.8718007839520405, "grad_norm": 2.062408637955344, "learning_rate": 2.2464303487555902e-08, "loss": 0.5571197867393494, "step": 8118 }, { "epoch": 1.8720313580816232, "grad_norm": 1.6301482456607912, "learning_rate": 2.2384016132295345e-08, "loss": 0.514819324016571, "step": 8119 }, { "epoch": 1.872261932211206, "grad_norm": 1.5677432247071832, "learning_rate": 2.230387088342589e-08, "loss": 0.4411713182926178, "step": 8120 }, { "epoch": 1.8724925063407887, "grad_norm": 1.4508146354194726, "learning_rate": 2.2223867752597437e-08, "loss": 0.4494340717792511, "step": 8121 }, { "epoch": 1.8727230804703712, "grad_norm": 1.6205003929883524, "learning_rate": 2.2144006751439236e-08, "loss": 0.4186316132545471, "step": 8122 }, { "epoch": 1.8729536545999539, "grad_norm": 1.5017815147990925, "learning_rate": 2.2064287891560007e-08, "loss": 0.45932692289352417, "step": 8123 }, { "epoch": 1.8731842287295364, "grad_norm": 1.475598332139336, "learning_rate": 2.1984711184547477e-08, "loss": 0.4095005989074707, "step": 8124 }, { "epoch": 1.873414802859119, "grad_norm": 1.4633944208901333, "learning_rate": 2.1905276641969284e-08, "loss": 0.3822292685508728, "step": 8125 }, { "epoch": 1.8736453769887018, "grad_norm": 1.5993925787143786, "learning_rate": 2.1825984275371633e-08, "loss": 0.41837501525878906, "step": 8126 }, { "epoch": 1.8738759511182845, "grad_norm": 1.6176173713553115, "learning_rate": 2.1746834096280752e-08, "loss": 0.3903341591358185, "step": 8127 }, { "epoch": 1.8741065252478672, "grad_norm": 1.4079834631265329, "learning_rate": 2.166782611620177e-08, "loss": 0.4760533571243286, "step": 8128 }, { "epoch": 1.87433709937745, "grad_norm": 1.4208864897990974, "learning_rate": 2.1588960346619388e-08, "loss": 0.43960827589035034, "step": 8129 }, { "epoch": 1.8745676735070325, "grad_norm": 1.7654096006141957, "learning_rate": 2.151023679899755e-08, "loss": 0.47941142320632935, "step": 8130 }, { "epoch": 1.8747982476366152, "grad_norm": 1.41048993466122, "learning_rate": 2.143165548477943e-08, "loss": 0.4467000961303711, "step": 8131 }, { "epoch": 1.8750288217661977, "grad_norm": 1.4796609851220597, "learning_rate": 2.1353216415387788e-08, "loss": 0.42472416162490845, "step": 8132 }, { "epoch": 1.8752593958957804, "grad_norm": 1.9200971165248846, "learning_rate": 2.1274919602224273e-08, "loss": 0.5127208232879639, "step": 8133 }, { "epoch": 1.875489970025363, "grad_norm": 1.8325759046238386, "learning_rate": 2.119676505667045e-08, "loss": 0.5362575650215149, "step": 8134 }, { "epoch": 1.8757205441549458, "grad_norm": 1.2983178226172876, "learning_rate": 2.111875279008657e-08, "loss": 0.4025413990020752, "step": 8135 }, { "epoch": 1.8759511182845285, "grad_norm": 1.5647543555868217, "learning_rate": 2.1040882813812667e-08, "loss": 0.49126237630844116, "step": 8136 }, { "epoch": 1.8761816924141113, "grad_norm": 1.64373423682739, "learning_rate": 2.096315513916791e-08, "loss": 0.40609198808670044, "step": 8137 }, { "epoch": 1.8764122665436938, "grad_norm": 1.4881317882345182, "learning_rate": 2.0885569777450707e-08, "loss": 0.47826945781707764, "step": 8138 }, { "epoch": 1.8766428406732765, "grad_norm": 1.4578062807690564, "learning_rate": 2.0808126739939035e-08, "loss": 0.39987948536872864, "step": 8139 }, { "epoch": 1.876873414802859, "grad_norm": 1.6010627164873539, "learning_rate": 2.0730826037890003e-08, "loss": 0.5727471113204956, "step": 8140 }, { "epoch": 1.8771039889324417, "grad_norm": 1.3737495035065335, "learning_rate": 2.0653667682540066e-08, "loss": 0.4772847294807434, "step": 8141 }, { "epoch": 1.8773345630620244, "grad_norm": 1.54097710668183, "learning_rate": 2.0576651685104697e-08, "loss": 0.3258974552154541, "step": 8142 }, { "epoch": 1.8775651371916071, "grad_norm": 1.4067173519179077, "learning_rate": 2.049977805677938e-08, "loss": 0.5220766067504883, "step": 8143 }, { "epoch": 1.8777957113211898, "grad_norm": 1.2918102910413813, "learning_rate": 2.0423046808738077e-08, "loss": 0.39550334215164185, "step": 8144 }, { "epoch": 1.8780262854507725, "grad_norm": 2.3983596335767334, "learning_rate": 2.034645795213463e-08, "loss": 0.4487137198448181, "step": 8145 }, { "epoch": 1.878256859580355, "grad_norm": 1.3947776950768658, "learning_rate": 2.0270011498102147e-08, "loss": 0.3363339304924011, "step": 8146 }, { "epoch": 1.8784874337099378, "grad_norm": 1.5333942075668883, "learning_rate": 2.019370745775273e-08, "loss": 0.5161975026130676, "step": 8147 }, { "epoch": 1.8787180078395203, "grad_norm": 1.4587907721196531, "learning_rate": 2.011754584217784e-08, "loss": 0.359643816947937, "step": 8148 }, { "epoch": 1.878948581969103, "grad_norm": 1.3696377552673178, "learning_rate": 2.0041526662448625e-08, "loss": 0.4472349286079407, "step": 8149 }, { "epoch": 1.8791791560986857, "grad_norm": 1.6693442042315434, "learning_rate": 1.9965649929615135e-08, "loss": 0.40363550186157227, "step": 8150 }, { "epoch": 1.8794097302282684, "grad_norm": 1.7598833036688746, "learning_rate": 1.9889915654706656e-08, "loss": 0.46063172817230225, "step": 8151 }, { "epoch": 1.8796403043578511, "grad_norm": 1.6348416553504144, "learning_rate": 1.981432384873205e-08, "loss": 0.4478832483291626, "step": 8152 }, { "epoch": 1.8798708784874338, "grad_norm": 1.7016857171242656, "learning_rate": 1.9738874522679304e-08, "loss": 0.3438538908958435, "step": 8153 }, { "epoch": 1.8801014526170163, "grad_norm": 2.2031337611169435, "learning_rate": 1.966356768751598e-08, "loss": 0.6035101413726807, "step": 8154 }, { "epoch": 1.880332026746599, "grad_norm": 1.6642481554824737, "learning_rate": 1.958840335418832e-08, "loss": 0.42533814907073975, "step": 8155 }, { "epoch": 1.8805626008761815, "grad_norm": 1.5825430260849223, "learning_rate": 1.9513381533622587e-08, "loss": 0.4117417633533478, "step": 8156 }, { "epoch": 1.8807931750057643, "grad_norm": 1.6218701576707837, "learning_rate": 1.943850223672361e-08, "loss": 0.4353973865509033, "step": 8157 }, { "epoch": 1.881023749135347, "grad_norm": 1.5613174256794196, "learning_rate": 1.9363765474376125e-08, "loss": 0.46115410327911377, "step": 8158 }, { "epoch": 1.8812543232649297, "grad_norm": 1.4415196194001674, "learning_rate": 1.9289171257443782e-08, "loss": 0.3851476311683655, "step": 8159 }, { "epoch": 1.8814848973945124, "grad_norm": 1.5586436794771006, "learning_rate": 1.921471959676957e-08, "loss": 0.4786919355392456, "step": 8160 }, { "epoch": 1.8817154715240951, "grad_norm": 1.6398537249529117, "learning_rate": 1.914041050317583e-08, "loss": 0.4427906274795532, "step": 8161 }, { "epoch": 1.8819460456536776, "grad_norm": 1.495606046913042, "learning_rate": 1.906624398746415e-08, "loss": 0.37774696946144104, "step": 8162 }, { "epoch": 1.8821766197832603, "grad_norm": 1.5733237369323263, "learning_rate": 1.8992220060415343e-08, "loss": 0.43793195486068726, "step": 8163 }, { "epoch": 1.8824071939128428, "grad_norm": 1.2904039749569203, "learning_rate": 1.8918338732789587e-08, "loss": 0.3869394063949585, "step": 8164 }, { "epoch": 1.8826377680424256, "grad_norm": 1.9325019962539283, "learning_rate": 1.8844600015326283e-08, "loss": 0.4963928461074829, "step": 8165 }, { "epoch": 1.8828683421720083, "grad_norm": 1.5945637624217548, "learning_rate": 1.8771003918743978e-08, "loss": 0.45727187395095825, "step": 8166 }, { "epoch": 1.883098916301591, "grad_norm": 1.8455372682093192, "learning_rate": 1.8697550453740884e-08, "loss": 0.4878919720649719, "step": 8167 }, { "epoch": 1.8833294904311737, "grad_norm": 1.7826396913976752, "learning_rate": 1.862423963099391e-08, "loss": 0.5376998782157898, "step": 8168 }, { "epoch": 1.8835600645607564, "grad_norm": 1.4765870494853872, "learning_rate": 1.8551071461159638e-08, "loss": 0.4534180760383606, "step": 8169 }, { "epoch": 1.883790638690339, "grad_norm": 1.561114582514347, "learning_rate": 1.847804595487379e-08, "loss": 0.43389183282852173, "step": 8170 }, { "epoch": 1.8840212128199216, "grad_norm": 1.535519375075225, "learning_rate": 1.8405163122751532e-08, "loss": 0.4833742678165436, "step": 8171 }, { "epoch": 1.8842517869495041, "grad_norm": 1.622186588307033, "learning_rate": 1.833242297538695e-08, "loss": 0.49344220757484436, "step": 8172 }, { "epoch": 1.8844823610790868, "grad_norm": 1.4984978840285303, "learning_rate": 1.8259825523353478e-08, "loss": 0.49290287494659424, "step": 8173 }, { "epoch": 1.8847129352086696, "grad_norm": 1.3380486770022888, "learning_rate": 1.8187370777204115e-08, "loss": 0.3971661627292633, "step": 8174 }, { "epoch": 1.8849435093382523, "grad_norm": 1.5640300636460862, "learning_rate": 1.811505874747066e-08, "loss": 0.4984559416770935, "step": 8175 }, { "epoch": 1.885174083467835, "grad_norm": 1.5865101985098036, "learning_rate": 1.804288944466459e-08, "loss": 0.38448822498321533, "step": 8176 }, { "epoch": 1.8854046575974177, "grad_norm": 1.9477188873182039, "learning_rate": 1.7970862879276406e-08, "loss": 0.5468838214874268, "step": 8177 }, { "epoch": 1.8856352317270002, "grad_norm": 1.4768596083300787, "learning_rate": 1.7898979061775844e-08, "loss": 0.46132227778434753, "step": 8178 }, { "epoch": 1.885865805856583, "grad_norm": 1.436520509516384, "learning_rate": 1.782723800261199e-08, "loss": 0.4636603593826294, "step": 8179 }, { "epoch": 1.8860963799861654, "grad_norm": 1.5429934177783204, "learning_rate": 1.7755639712213057e-08, "loss": 0.5302075147628784, "step": 8180 }, { "epoch": 1.8863269541157481, "grad_norm": 1.6563780466455296, "learning_rate": 1.7684184200986718e-08, "loss": 0.4817178249359131, "step": 8181 }, { "epoch": 1.8865575282453309, "grad_norm": 1.4897334937072715, "learning_rate": 1.7612871479319668e-08, "loss": 0.4535263180732727, "step": 8182 }, { "epoch": 1.8867881023749136, "grad_norm": 1.6029244875460678, "learning_rate": 1.7541701557577837e-08, "loss": 0.5260534286499023, "step": 8183 }, { "epoch": 1.8870186765044963, "grad_norm": 1.4065276330082377, "learning_rate": 1.7470674446106614e-08, "loss": 0.4526366591453552, "step": 8184 }, { "epoch": 1.887249250634079, "grad_norm": 1.663451618032215, "learning_rate": 1.7399790155230632e-08, "loss": 0.4721973240375519, "step": 8185 }, { "epoch": 1.8874798247636615, "grad_norm": 1.6510288712519465, "learning_rate": 1.7329048695253422e-08, "loss": 0.4331268072128296, "step": 8186 }, { "epoch": 1.8877103988932442, "grad_norm": 1.9623503418050199, "learning_rate": 1.7258450076458097e-08, "loss": 0.5175650119781494, "step": 8187 }, { "epoch": 1.8879409730228267, "grad_norm": 1.3640756960267433, "learning_rate": 1.718799430910678e-08, "loss": 0.45537033677101135, "step": 8188 }, { "epoch": 1.8881715471524094, "grad_norm": 1.540072753548263, "learning_rate": 1.7117681403441054e-08, "loss": 0.5055547952651978, "step": 8189 }, { "epoch": 1.8884021212819921, "grad_norm": 1.5849214553434074, "learning_rate": 1.7047511369681522e-08, "loss": 0.45514553785324097, "step": 8190 }, { "epoch": 1.8886326954115749, "grad_norm": 1.4821599822935887, "learning_rate": 1.6977484218028136e-08, "loss": 0.44227129220962524, "step": 8191 }, { "epoch": 1.8888632695411576, "grad_norm": 1.7163429603820965, "learning_rate": 1.690759995866009e-08, "loss": 0.4916682839393616, "step": 8192 }, { "epoch": 1.8890938436707403, "grad_norm": 1.8219225402151713, "learning_rate": 1.683785860173559e-08, "loss": 0.48626652359962463, "step": 8193 }, { "epoch": 1.8893244178003228, "grad_norm": 1.491517373721971, "learning_rate": 1.676826015739252e-08, "loss": 0.39982378482818604, "step": 8194 }, { "epoch": 1.8895549919299055, "grad_norm": 1.8710391095575285, "learning_rate": 1.6698804635747576e-08, "loss": 0.49218645691871643, "step": 8195 }, { "epoch": 1.889785566059488, "grad_norm": 1.5127362254029266, "learning_rate": 1.6629492046896897e-08, "loss": 0.38896578550338745, "step": 8196 }, { "epoch": 1.8900161401890707, "grad_norm": 1.5870268370960243, "learning_rate": 1.6560322400915538e-08, "loss": 0.4217762053012848, "step": 8197 }, { "epoch": 1.8902467143186534, "grad_norm": 1.5231528042475502, "learning_rate": 1.6491295707858343e-08, "loss": 0.4020112156867981, "step": 8198 }, { "epoch": 1.8904772884482361, "grad_norm": 2.1189678944561954, "learning_rate": 1.6422411977758843e-08, "loss": 0.4630794823169708, "step": 8199 }, { "epoch": 1.8907078625778189, "grad_norm": 1.526138087578761, "learning_rate": 1.6353671220629917e-08, "loss": 0.3673272132873535, "step": 8200 }, { "epoch": 1.8909384367074016, "grad_norm": 1.4930616058109705, "learning_rate": 1.6285073446463903e-08, "loss": 0.4677228331565857, "step": 8201 }, { "epoch": 1.891169010836984, "grad_norm": 1.718939922651036, "learning_rate": 1.621661866523216e-08, "loss": 0.4532579183578491, "step": 8202 }, { "epoch": 1.8913995849665668, "grad_norm": 1.4990742550855458, "learning_rate": 1.6148306886885287e-08, "loss": 0.3011256456375122, "step": 8203 }, { "epoch": 1.8916301590961493, "grad_norm": 1.731114486954807, "learning_rate": 1.6080138121352892e-08, "loss": 0.43071651458740234, "step": 8204 }, { "epoch": 1.891860733225732, "grad_norm": 1.4183554819693576, "learning_rate": 1.6012112378544272e-08, "loss": 0.3180675506591797, "step": 8205 }, { "epoch": 1.8920913073553147, "grad_norm": 1.6038525214828652, "learning_rate": 1.594422966834741e-08, "loss": 0.35130774974823, "step": 8206 }, { "epoch": 1.8923218814848974, "grad_norm": 1.388613528735296, "learning_rate": 1.587649000062996e-08, "loss": 0.4953269958496094, "step": 8207 }, { "epoch": 1.8925524556144802, "grad_norm": 1.5668590048532676, "learning_rate": 1.5808893385238388e-08, "loss": 0.3713166415691376, "step": 8208 }, { "epoch": 1.8927830297440629, "grad_norm": 1.4824855259294067, "learning_rate": 1.5741439831998827e-08, "loss": 0.4273546040058136, "step": 8209 }, { "epoch": 1.8930136038736454, "grad_norm": 1.8212221910711959, "learning_rate": 1.5674129350715994e-08, "loss": 0.45312386751174927, "step": 8210 }, { "epoch": 1.893244178003228, "grad_norm": 1.4687276423683582, "learning_rate": 1.560696195117439e-08, "loss": 0.40246695280075073, "step": 8211 }, { "epoch": 1.8934747521328106, "grad_norm": 1.9323139227263069, "learning_rate": 1.5539937643137325e-08, "loss": 0.5229366421699524, "step": 8212 }, { "epoch": 1.8937053262623933, "grad_norm": 1.4419033757005335, "learning_rate": 1.5473056436347554e-08, "loss": 0.43834251165390015, "step": 8213 }, { "epoch": 1.893935900391976, "grad_norm": 1.5176292463299432, "learning_rate": 1.540631834052697e-08, "loss": 0.4423528015613556, "step": 8214 }, { "epoch": 1.8941664745215587, "grad_norm": 1.6176606345399394, "learning_rate": 1.5339723365376478e-08, "loss": 0.49888452887535095, "step": 8215 }, { "epoch": 1.8943970486511414, "grad_norm": 1.7422668701695732, "learning_rate": 1.5273271520576448e-08, "loss": 0.44023919105529785, "step": 8216 }, { "epoch": 1.8946276227807242, "grad_norm": 1.5430241161700802, "learning_rate": 1.5206962815786262e-08, "loss": 0.4733201861381531, "step": 8217 }, { "epoch": 1.8948581969103067, "grad_norm": 1.992567039765999, "learning_rate": 1.5140797260644768e-08, "loss": 0.5393285751342773, "step": 8218 }, { "epoch": 1.8950887710398894, "grad_norm": 1.5439154792235448, "learning_rate": 1.507477486476949e-08, "loss": 0.4240071773529053, "step": 8219 }, { "epoch": 1.8953193451694719, "grad_norm": 1.4272355688005478, "learning_rate": 1.5008895637757647e-08, "loss": 0.42983078956604004, "step": 8220 }, { "epoch": 1.8955499192990546, "grad_norm": 1.470069283076572, "learning_rate": 1.4943159589185462e-08, "loss": 0.47513502836227417, "step": 8221 }, { "epoch": 1.8957804934286373, "grad_norm": 1.49966428795426, "learning_rate": 1.4877566728608293e-08, "loss": 0.41938167810440063, "step": 8222 }, { "epoch": 1.89601106755822, "grad_norm": 1.513306290399523, "learning_rate": 1.4812117065560625e-08, "loss": 0.44817137718200684, "step": 8223 }, { "epoch": 1.8962416416878027, "grad_norm": 1.6563869108965783, "learning_rate": 1.4746810609556292e-08, "loss": 0.46840909123420715, "step": 8224 }, { "epoch": 1.8964722158173855, "grad_norm": 1.4822882914533433, "learning_rate": 1.4681647370088369e-08, "loss": 0.377409964799881, "step": 8225 }, { "epoch": 1.896702789946968, "grad_norm": 1.595495246407856, "learning_rate": 1.4616627356628831e-08, "loss": 0.41149425506591797, "step": 8226 }, { "epoch": 1.8969333640765507, "grad_norm": 1.548113444870098, "learning_rate": 1.455175057862923e-08, "loss": 0.39183878898620605, "step": 8227 }, { "epoch": 1.8971639382061332, "grad_norm": 1.3643453838150799, "learning_rate": 1.448701704551969e-08, "loss": 0.3629387617111206, "step": 8228 }, { "epoch": 1.8973945123357159, "grad_norm": 1.6546771139251113, "learning_rate": 1.4422426766710239e-08, "loss": 0.4007713794708252, "step": 8229 }, { "epoch": 1.8976250864652986, "grad_norm": 1.648419698601457, "learning_rate": 1.4357979751589477e-08, "loss": 0.42354586720466614, "step": 8230 }, { "epoch": 1.8978556605948813, "grad_norm": 1.9683167812350795, "learning_rate": 1.429367600952558e-08, "loss": 0.5321829319000244, "step": 8231 }, { "epoch": 1.898086234724464, "grad_norm": 1.5240649560541817, "learning_rate": 1.4229515549865845e-08, "loss": 0.4840988218784332, "step": 8232 }, { "epoch": 1.8983168088540465, "grad_norm": 1.6587626955063286, "learning_rate": 1.4165498381936369e-08, "loss": 0.5006803870201111, "step": 8233 }, { "epoch": 1.8985473829836292, "grad_norm": 1.855334923621547, "learning_rate": 1.4101624515042821e-08, "loss": 0.40582865476608276, "step": 8234 }, { "epoch": 1.8987779571132117, "grad_norm": 1.6458084674224973, "learning_rate": 1.4037893958469993e-08, "loss": 0.38199514150619507, "step": 8235 }, { "epoch": 1.8990085312427945, "grad_norm": 1.4513711417071327, "learning_rate": 1.3974306721481699e-08, "loss": 0.39234936237335205, "step": 8236 }, { "epoch": 1.8992391053723772, "grad_norm": 1.661857153956049, "learning_rate": 1.391086281332099e-08, "loss": 0.42211759090423584, "step": 8237 }, { "epoch": 1.8994696795019599, "grad_norm": 1.5171507269414566, "learning_rate": 1.3847562243210043e-08, "loss": 0.4519961476325989, "step": 8238 }, { "epoch": 1.8997002536315426, "grad_norm": 1.618394005210342, "learning_rate": 1.3784405020350276e-08, "loss": 0.4795762896537781, "step": 8239 }, { "epoch": 1.8999308277611253, "grad_norm": 1.5749927795923588, "learning_rate": 1.3721391153922235e-08, "loss": 0.4549542963504791, "step": 8240 }, { "epoch": 1.9001614018907078, "grad_norm": 1.759482125374446, "learning_rate": 1.3658520653085703e-08, "loss": 0.5253233313560486, "step": 8241 }, { "epoch": 1.9003919760202905, "grad_norm": 1.4274315163192688, "learning_rate": 1.3595793526979371e-08, "loss": 0.44850921630859375, "step": 8242 }, { "epoch": 1.900622550149873, "grad_norm": 1.5448941620644567, "learning_rate": 1.35332097847215e-08, "loss": 0.4416281580924988, "step": 8243 }, { "epoch": 1.9008531242794557, "grad_norm": 1.932595440608825, "learning_rate": 1.3470769435409036e-08, "loss": 0.5567417740821838, "step": 8244 }, { "epoch": 1.9010836984090385, "grad_norm": 1.4810071060864598, "learning_rate": 1.3408472488118383e-08, "loss": 0.43554848432540894, "step": 8245 }, { "epoch": 1.9013142725386212, "grad_norm": 1.6729713604736038, "learning_rate": 1.3346318951905077e-08, "loss": 0.4219995141029358, "step": 8246 }, { "epoch": 1.901544846668204, "grad_norm": 1.5600368865419485, "learning_rate": 1.328430883580367e-08, "loss": 0.45862913131713867, "step": 8247 }, { "epoch": 1.9017754207977866, "grad_norm": 1.5932092717655322, "learning_rate": 1.3222442148828172e-08, "loss": 0.5026064515113831, "step": 8248 }, { "epoch": 1.902005994927369, "grad_norm": 1.6308659122795583, "learning_rate": 1.316071889997139e-08, "loss": 0.46948713064193726, "step": 8249 }, { "epoch": 1.9022365690569518, "grad_norm": 1.5718314790268124, "learning_rate": 1.3099139098205258e-08, "loss": 0.4263686537742615, "step": 8250 }, { "epoch": 1.9024671431865343, "grad_norm": 1.516002170215572, "learning_rate": 1.3037702752481394e-08, "loss": 0.4652191400527954, "step": 8251 }, { "epoch": 1.902697717316117, "grad_norm": 1.553138573631746, "learning_rate": 1.2976409871729987e-08, "loss": 0.4918743371963501, "step": 8252 }, { "epoch": 1.9029282914456997, "grad_norm": 1.4916920711393407, "learning_rate": 1.2915260464860466e-08, "loss": 0.5297696590423584, "step": 8253 }, { "epoch": 1.9031588655752825, "grad_norm": 1.7049232652010609, "learning_rate": 1.2854254540761722e-08, "loss": 0.5320281982421875, "step": 8254 }, { "epoch": 1.9033894397048652, "grad_norm": 1.6403951625522013, "learning_rate": 1.2793392108301437e-08, "loss": 0.4424601197242737, "step": 8255 }, { "epoch": 1.903620013834448, "grad_norm": 1.7301429652605729, "learning_rate": 1.2732673176326758e-08, "loss": 0.4811365008354187, "step": 8256 }, { "epoch": 1.9038505879640304, "grad_norm": 1.4707627617860477, "learning_rate": 1.2672097753663624e-08, "loss": 0.3744504451751709, "step": 8257 }, { "epoch": 1.904081162093613, "grad_norm": 1.4178929694153364, "learning_rate": 1.2611665849117326e-08, "loss": 0.4703986644744873, "step": 8258 }, { "epoch": 1.9043117362231956, "grad_norm": 1.7267205141598052, "learning_rate": 1.255137747147228e-08, "loss": 0.5431181192398071, "step": 8259 }, { "epoch": 1.9045423103527783, "grad_norm": 1.8088892551764337, "learning_rate": 1.2491232629492143e-08, "loss": 0.5066450238227844, "step": 8260 }, { "epoch": 1.904772884482361, "grad_norm": 1.4945728049455276, "learning_rate": 1.2431231331919368e-08, "loss": 0.4374620020389557, "step": 8261 }, { "epoch": 1.9050034586119438, "grad_norm": 1.5574450804582989, "learning_rate": 1.2371373587475753e-08, "loss": 0.3628976345062256, "step": 8262 }, { "epoch": 1.9052340327415265, "grad_norm": 1.6159357629155715, "learning_rate": 1.231165940486234e-08, "loss": 0.43471890687942505, "step": 8263 }, { "epoch": 1.9054646068711092, "grad_norm": 1.4892272896008858, "learning_rate": 1.2252088792759074e-08, "loss": 0.5038785934448242, "step": 8264 }, { "epoch": 1.9056951810006917, "grad_norm": 1.388813738509663, "learning_rate": 1.2192661759825363e-08, "loss": 0.44022035598754883, "step": 8265 }, { "epoch": 1.9059257551302744, "grad_norm": 1.8473214990080156, "learning_rate": 1.2133378314699294e-08, "loss": 0.4924722909927368, "step": 8266 }, { "epoch": 1.906156329259857, "grad_norm": 1.525292247487046, "learning_rate": 1.2074238465998532e-08, "loss": 0.3824247121810913, "step": 8267 }, { "epoch": 1.9063869033894396, "grad_norm": 1.821466956277618, "learning_rate": 1.2015242222319422e-08, "loss": 0.47094473242759705, "step": 8268 }, { "epoch": 1.9066174775190223, "grad_norm": 1.7313158547849, "learning_rate": 1.1956389592237881e-08, "loss": 0.5653735399246216, "step": 8269 }, { "epoch": 1.906848051648605, "grad_norm": 1.7620428814203788, "learning_rate": 1.1897680584308512e-08, "loss": 0.4763476848602295, "step": 8270 }, { "epoch": 1.9070786257781878, "grad_norm": 1.5194232107831984, "learning_rate": 1.1839115207065487e-08, "loss": 0.3845449686050415, "step": 8271 }, { "epoch": 1.9073091999077705, "grad_norm": 1.5881713237890829, "learning_rate": 1.1780693469021775e-08, "loss": 0.43071988224983215, "step": 8272 }, { "epoch": 1.907539774037353, "grad_norm": 1.4466344827167648, "learning_rate": 1.172241537866947e-08, "loss": 0.43860751390457153, "step": 8273 }, { "epoch": 1.9077703481669357, "grad_norm": 1.7623171007667486, "learning_rate": 1.1664280944480132e-08, "loss": 0.5077678561210632, "step": 8274 }, { "epoch": 1.9080009222965182, "grad_norm": 1.4297374268054954, "learning_rate": 1.1606290174903888e-08, "loss": 0.3832993805408478, "step": 8275 }, { "epoch": 1.908231496426101, "grad_norm": 1.629527864713481, "learning_rate": 1.1548443078370551e-08, "loss": 0.48003530502319336, "step": 8276 }, { "epoch": 1.9084620705556836, "grad_norm": 1.5503547776003848, "learning_rate": 1.1490739663288618e-08, "loss": 0.6109439134597778, "step": 8277 }, { "epoch": 1.9086926446852663, "grad_norm": 1.9064677948637023, "learning_rate": 1.1433179938045823e-08, "loss": 0.4559859037399292, "step": 8278 }, { "epoch": 1.908923218814849, "grad_norm": 1.4670877218502, "learning_rate": 1.137576391100925e-08, "loss": 0.3935600221157074, "step": 8279 }, { "epoch": 1.9091537929444318, "grad_norm": 1.6460426557554972, "learning_rate": 1.1318491590524782e-08, "loss": 0.44477611780166626, "step": 8280 }, { "epoch": 1.9093843670740143, "grad_norm": 1.652813391764361, "learning_rate": 1.1261362984917533e-08, "loss": 0.47065627574920654, "step": 8281 }, { "epoch": 1.909614941203597, "grad_norm": 1.567401132156008, "learning_rate": 1.1204378102491862e-08, "loss": 0.44851434230804443, "step": 8282 }, { "epoch": 1.9098455153331795, "grad_norm": 1.6119259284309502, "learning_rate": 1.1147536951530923e-08, "loss": 0.38606488704681396, "step": 8283 }, { "epoch": 1.9100760894627622, "grad_norm": 1.7145601291142103, "learning_rate": 1.1090839540297103e-08, "loss": 0.5400182008743286, "step": 8284 }, { "epoch": 1.910306663592345, "grad_norm": 1.5193110263706777, "learning_rate": 1.1034285877032146e-08, "loss": 0.4225059449672699, "step": 8285 }, { "epoch": 1.9105372377219276, "grad_norm": 1.8787563951518915, "learning_rate": 1.0977875969956584e-08, "loss": 0.5111556649208069, "step": 8286 }, { "epoch": 1.9107678118515103, "grad_norm": 1.583999151547768, "learning_rate": 1.0921609827270196e-08, "loss": 0.40596213936805725, "step": 8287 }, { "epoch": 1.910998385981093, "grad_norm": 1.619272502884341, "learning_rate": 1.0865487457151768e-08, "loss": 0.47917360067367554, "step": 8288 }, { "epoch": 1.9112289601106756, "grad_norm": 1.8556422558472565, "learning_rate": 1.0809508867759331e-08, "loss": 0.45154574513435364, "step": 8289 }, { "epoch": 1.9114595342402583, "grad_norm": 1.7391028962680364, "learning_rate": 1.0753674067229935e-08, "loss": 0.5024373531341553, "step": 8290 }, { "epoch": 1.9116901083698408, "grad_norm": 1.6003253992080113, "learning_rate": 1.069798306367975e-08, "loss": 0.5084686875343323, "step": 8291 }, { "epoch": 1.9119206824994235, "grad_norm": 1.5906220140950642, "learning_rate": 1.064243586520408e-08, "loss": 0.3947920501232147, "step": 8292 }, { "epoch": 1.9121512566290062, "grad_norm": 1.5037329879323602, "learning_rate": 1.0587032479877023e-08, "loss": 0.5011960864067078, "step": 8293 }, { "epoch": 1.912381830758589, "grad_norm": 1.6116996984750152, "learning_rate": 1.0531772915752247e-08, "loss": 0.43622612953186035, "step": 8294 }, { "epoch": 1.9126124048881716, "grad_norm": 1.664400790122745, "learning_rate": 1.0476657180862325e-08, "loss": 0.380764365196228, "step": 8295 }, { "epoch": 1.9128429790177544, "grad_norm": 1.59176785573853, "learning_rate": 1.042168528321874e-08, "loss": 0.4183109700679779, "step": 8296 }, { "epoch": 1.9130735531473368, "grad_norm": 1.7993335153125511, "learning_rate": 1.036685723081221e-08, "loss": 0.4221222698688507, "step": 8297 }, { "epoch": 1.9133041272769196, "grad_norm": 1.7816315005923467, "learning_rate": 1.0312173031612804e-08, "loss": 0.543656051158905, "step": 8298 }, { "epoch": 1.913534701406502, "grad_norm": 1.5681621709441897, "learning_rate": 1.0257632693569052e-08, "loss": 0.48872441053390503, "step": 8299 }, { "epoch": 1.9137652755360848, "grad_norm": 1.5640812032082956, "learning_rate": 1.0203236224609169e-08, "loss": 0.5447995662689209, "step": 8300 }, { "epoch": 1.9139958496656675, "grad_norm": 1.4954141524676323, "learning_rate": 1.0148983632640162e-08, "loss": 0.39448055624961853, "step": 8301 }, { "epoch": 1.9142264237952502, "grad_norm": 1.755968676337724, "learning_rate": 1.009487492554828e-08, "loss": 0.44735193252563477, "step": 8302 }, { "epoch": 1.914456997924833, "grad_norm": 1.6151813931913763, "learning_rate": 1.0040910111198786e-08, "loss": 0.4747859537601471, "step": 8303 }, { "epoch": 1.9146875720544156, "grad_norm": 1.6130507888649155, "learning_rate": 9.987089197435739e-09, "loss": 0.5120220184326172, "step": 8304 }, { "epoch": 1.9149181461839981, "grad_norm": 1.6267491510418168, "learning_rate": 9.933412192082991e-09, "loss": 0.3889455795288086, "step": 8305 }, { "epoch": 1.9151487203135809, "grad_norm": 1.497355606160038, "learning_rate": 9.879879102942635e-09, "loss": 0.36584073305130005, "step": 8306 }, { "epoch": 1.9153792944431633, "grad_norm": 2.0010610263228643, "learning_rate": 9.826489937796556e-09, "loss": 0.6259280443191528, "step": 8307 }, { "epoch": 1.915609868572746, "grad_norm": 1.780257440356438, "learning_rate": 9.773244704405104e-09, "loss": 0.45160970091819763, "step": 8308 }, { "epoch": 1.9158404427023288, "grad_norm": 1.559258218463348, "learning_rate": 9.720143410508309e-09, "loss": 0.47028589248657227, "step": 8309 }, { "epoch": 1.9160710168319115, "grad_norm": 1.7146410364961069, "learning_rate": 9.667186063824773e-09, "loss": 0.3850802183151245, "step": 8310 }, { "epoch": 1.9163015909614942, "grad_norm": 1.69252010891113, "learning_rate": 9.614372672052451e-09, "loss": 0.4134417772293091, "step": 8311 }, { "epoch": 1.916532165091077, "grad_norm": 1.4197660481073355, "learning_rate": 9.561703242868425e-09, "loss": 0.5340328216552734, "step": 8312 }, { "epoch": 1.9167627392206594, "grad_norm": 1.5089395557239718, "learning_rate": 9.509177783928569e-09, "loss": 0.4580942392349243, "step": 8313 }, { "epoch": 1.9169933133502421, "grad_norm": 1.559427035261756, "learning_rate": 9.45679630286811e-09, "loss": 0.4227365553379059, "step": 8314 }, { "epoch": 1.9172238874798246, "grad_norm": 1.462151537342571, "learning_rate": 9.404558807301065e-09, "loss": 0.42711400985717773, "step": 8315 }, { "epoch": 1.9174544616094074, "grad_norm": 1.6466969798320865, "learning_rate": 9.352465304820811e-09, "loss": 0.41088467836380005, "step": 8316 }, { "epoch": 1.91768503573899, "grad_norm": 1.7161905508950221, "learning_rate": 9.30051580299962e-09, "loss": 0.4669058918952942, "step": 8317 }, { "epoch": 1.9179156098685728, "grad_norm": 1.8956617878589224, "learning_rate": 9.248710309388896e-09, "loss": 0.34129124879837036, "step": 8318 }, { "epoch": 1.9181461839981555, "grad_norm": 1.6346151888813216, "learning_rate": 9.19704883151906e-09, "loss": 0.5538367033004761, "step": 8319 }, { "epoch": 1.9183767581277382, "grad_norm": 1.8993289351204807, "learning_rate": 9.145531376899773e-09, "loss": 0.4591939151287079, "step": 8320 }, { "epoch": 1.9186073322573207, "grad_norm": 1.531598340011727, "learning_rate": 9.094157953019376e-09, "loss": 0.38709723949432373, "step": 8321 }, { "epoch": 1.9188379063869034, "grad_norm": 1.7947823187484588, "learning_rate": 9.042928567345787e-09, "loss": 0.503919780254364, "step": 8322 }, { "epoch": 1.919068480516486, "grad_norm": 1.6367087262197295, "learning_rate": 8.991843227325491e-09, "loss": 0.510110080242157, "step": 8323 }, { "epoch": 1.9192990546460686, "grad_norm": 1.6066272425773898, "learning_rate": 8.940901940384437e-09, "loss": 0.5100687146186829, "step": 8324 }, { "epoch": 1.9195296287756514, "grad_norm": 1.513750458500578, "learning_rate": 8.89010471392726e-09, "loss": 0.44701308012008667, "step": 8325 }, { "epoch": 1.919760202905234, "grad_norm": 1.563320875474341, "learning_rate": 8.83945155533794e-09, "loss": 0.4657078981399536, "step": 8326 }, { "epoch": 1.9199907770348168, "grad_norm": 1.9297827676028427, "learning_rate": 8.788942471979588e-09, "loss": 0.510329008102417, "step": 8327 }, { "epoch": 1.9202213511643995, "grad_norm": 1.471307451139604, "learning_rate": 8.738577471193997e-09, "loss": 0.5373008847236633, "step": 8328 }, { "epoch": 1.920451925293982, "grad_norm": 1.9012550118721963, "learning_rate": 8.688356560302313e-09, "loss": 0.46517014503479004, "step": 8329 }, { "epoch": 1.9206824994235647, "grad_norm": 1.6705233787528915, "learning_rate": 8.638279746604582e-09, "loss": 0.3993692398071289, "step": 8330 }, { "epoch": 1.9209130735531472, "grad_norm": 1.366585505535673, "learning_rate": 8.588347037380095e-09, "loss": 0.42480504512786865, "step": 8331 }, { "epoch": 1.92114364768273, "grad_norm": 1.7413386006663227, "learning_rate": 8.538558439887044e-09, "loss": 0.44433218240737915, "step": 8332 }, { "epoch": 1.9213742218123127, "grad_norm": 1.59463524320548, "learning_rate": 8.488913961362643e-09, "loss": 0.4645090103149414, "step": 8333 }, { "epoch": 1.9216047959418954, "grad_norm": 1.7690127959905497, "learning_rate": 8.439413609023227e-09, "loss": 0.47265806794166565, "step": 8334 }, { "epoch": 1.921835370071478, "grad_norm": 1.6930025984848287, "learning_rate": 8.390057390064265e-09, "loss": 0.46389561891555786, "step": 8335 }, { "epoch": 1.9220659442010608, "grad_norm": 1.8286869444988214, "learning_rate": 8.340845311660127e-09, "loss": 0.45355337858200073, "step": 8336 }, { "epoch": 1.9222965183306433, "grad_norm": 1.6861508362464954, "learning_rate": 8.291777380964315e-09, "loss": 0.47136229276657104, "step": 8337 }, { "epoch": 1.922527092460226, "grad_norm": 1.7162470073135112, "learning_rate": 8.242853605109234e-09, "loss": 0.4914461374282837, "step": 8338 }, { "epoch": 1.9227576665898085, "grad_norm": 1.5896610300054894, "learning_rate": 8.194073991206641e-09, "loss": 0.48298412561416626, "step": 8339 }, { "epoch": 1.9229882407193912, "grad_norm": 1.591559243664797, "learning_rate": 8.145438546346971e-09, "loss": 0.5316052436828613, "step": 8340 }, { "epoch": 1.923218814848974, "grad_norm": 1.530763445371585, "learning_rate": 8.09694727760002e-09, "loss": 0.45742303133010864, "step": 8341 }, { "epoch": 1.9234493889785567, "grad_norm": 1.800664891434664, "learning_rate": 8.048600192014365e-09, "loss": 0.41579365730285645, "step": 8342 }, { "epoch": 1.9236799631081394, "grad_norm": 1.4284255731817002, "learning_rate": 8.000397296617834e-09, "loss": 0.37775835394859314, "step": 8343 }, { "epoch": 1.9239105372377219, "grad_norm": 1.7051685129810905, "learning_rate": 7.95233859841704e-09, "loss": 0.4720783531665802, "step": 8344 }, { "epoch": 1.9241411113673046, "grad_norm": 1.608380789109436, "learning_rate": 7.904424104398067e-09, "loss": 0.5015095472335815, "step": 8345 }, { "epoch": 1.924371685496887, "grad_norm": 1.5886093342032406, "learning_rate": 7.856653821525672e-09, "loss": 0.6053783893585205, "step": 8346 }, { "epoch": 1.9246022596264698, "grad_norm": 1.71106607476921, "learning_rate": 7.809027756743635e-09, "loss": 0.47775521874427795, "step": 8347 }, { "epoch": 1.9248328337560525, "grad_norm": 1.559597916397487, "learning_rate": 7.761545916974976e-09, "loss": 0.36487245559692383, "step": 8348 }, { "epoch": 1.9250634078856352, "grad_norm": 1.6596969619350017, "learning_rate": 7.714208309121617e-09, "loss": 0.48085975646972656, "step": 8349 }, { "epoch": 1.925293982015218, "grad_norm": 1.6156245324091865, "learning_rate": 7.667014940064609e-09, "loss": 0.48800790309906006, "step": 8350 }, { "epoch": 1.9255245561448007, "grad_norm": 1.654653168113963, "learning_rate": 7.61996581666402e-09, "loss": 0.5294181704521179, "step": 8351 }, { "epoch": 1.9257551302743832, "grad_norm": 1.4725020612800932, "learning_rate": 7.573060945758936e-09, "loss": 0.44024503231048584, "step": 8352 }, { "epoch": 1.9259857044039659, "grad_norm": 1.8377372608503795, "learning_rate": 7.526300334167235e-09, "loss": 0.4359186887741089, "step": 8353 }, { "epoch": 1.9262162785335484, "grad_norm": 1.6594669465231893, "learning_rate": 7.479683988686259e-09, "loss": 0.4803895652294159, "step": 8354 }, { "epoch": 1.926446852663131, "grad_norm": 1.5824042504509404, "learning_rate": 7.433211916092141e-09, "loss": 0.43153274059295654, "step": 8355 }, { "epoch": 1.9266774267927138, "grad_norm": 1.812737055881384, "learning_rate": 7.386884123140036e-09, "loss": 0.38263070583343506, "step": 8356 }, { "epoch": 1.9269080009222965, "grad_norm": 1.42789662226475, "learning_rate": 7.340700616564e-09, "loss": 0.42121192812919617, "step": 8357 }, { "epoch": 1.9271385750518792, "grad_norm": 1.6902764865159838, "learning_rate": 7.294661403077662e-09, "loss": 0.46008965373039246, "step": 8358 }, { "epoch": 1.927369149181462, "grad_norm": 1.5923895901686829, "learning_rate": 7.248766489372893e-09, "loss": 0.48495203256607056, "step": 8359 }, { "epoch": 1.9275997233110445, "grad_norm": 1.6833123633851883, "learning_rate": 7.203015882121244e-09, "loss": 0.5004169940948486, "step": 8360 }, { "epoch": 1.9278302974406272, "grad_norm": 1.4732497687996942, "learning_rate": 7.15740958797284e-09, "loss": 0.5660319328308105, "step": 8361 }, { "epoch": 1.9280608715702097, "grad_norm": 1.588922332622674, "learning_rate": 7.111947613557268e-09, "loss": 0.43854010105133057, "step": 8362 }, { "epoch": 1.9282914456997924, "grad_norm": 2.093362311602714, "learning_rate": 7.066629965482574e-09, "loss": 0.44730937480926514, "step": 8363 }, { "epoch": 1.928522019829375, "grad_norm": 1.6568658526601971, "learning_rate": 7.021456650336377e-09, "loss": 0.45642590522766113, "step": 8364 }, { "epoch": 1.9287525939589578, "grad_norm": 1.9173353497487595, "learning_rate": 6.976427674684871e-09, "loss": 0.5613523721694946, "step": 8365 }, { "epoch": 1.9289831680885405, "grad_norm": 1.7976713831697748, "learning_rate": 6.931543045073706e-09, "loss": 0.4231454133987427, "step": 8366 }, { "epoch": 1.9292137422181233, "grad_norm": 1.9184335289270926, "learning_rate": 6.886802768027223e-09, "loss": 0.464144766330719, "step": 8367 }, { "epoch": 1.9294443163477057, "grad_norm": 1.6282751196601715, "learning_rate": 6.8422068500487705e-09, "loss": 0.4303344488143921, "step": 8368 }, { "epoch": 1.9296748904772885, "grad_norm": 1.5717538042291814, "learning_rate": 6.797755297620944e-09, "loss": 0.4333549737930298, "step": 8369 }, { "epoch": 1.929905464606871, "grad_norm": 1.5673646456508366, "learning_rate": 6.753448117205241e-09, "loss": 0.4656146466732025, "step": 8370 }, { "epoch": 1.9301360387364537, "grad_norm": 2.0556236314521077, "learning_rate": 6.709285315242063e-09, "loss": 0.3823866844177246, "step": 8371 }, { "epoch": 1.9303666128660364, "grad_norm": 1.5412445917312292, "learning_rate": 6.665266898150946e-09, "loss": 0.4552363157272339, "step": 8372 }, { "epoch": 1.930597186995619, "grad_norm": 1.5304233694461045, "learning_rate": 6.6213928723304335e-09, "loss": 0.48757460713386536, "step": 8373 }, { "epoch": 1.9308277611252018, "grad_norm": 1.0877844091844102, "learning_rate": 6.577663244158094e-09, "loss": 0.3263235092163086, "step": 8374 }, { "epoch": 1.9310583352547845, "grad_norm": 1.6065207890727204, "learning_rate": 6.534078019990397e-09, "loss": 0.510450541973114, "step": 8375 }, { "epoch": 1.931288909384367, "grad_norm": 1.4737968731950963, "learning_rate": 6.490637206162941e-09, "loss": 0.37407904863357544, "step": 8376 }, { "epoch": 1.9315194835139498, "grad_norm": 1.5691906942234775, "learning_rate": 6.4473408089902315e-09, "loss": 0.4216376543045044, "step": 8377 }, { "epoch": 1.9317500576435322, "grad_norm": 1.647678033925203, "learning_rate": 6.404188834766011e-09, "loss": 0.41611379384994507, "step": 8378 }, { "epoch": 1.931980631773115, "grad_norm": 1.6406917387427478, "learning_rate": 6.361181289762596e-09, "loss": 0.5301774740219116, "step": 8379 }, { "epoch": 1.9322112059026977, "grad_norm": 1.457780743812755, "learning_rate": 6.3183181802317635e-09, "loss": 0.43767407536506653, "step": 8380 }, { "epoch": 1.9324417800322804, "grad_norm": 1.5497586314138279, "learning_rate": 6.275599512404084e-09, "loss": 0.417082279920578, "step": 8381 }, { "epoch": 1.9326723541618631, "grad_norm": 1.646560289289956, "learning_rate": 6.233025292489147e-09, "loss": 0.41670864820480347, "step": 8382 }, { "epoch": 1.9329029282914458, "grad_norm": 1.4085441335066406, "learning_rate": 6.190595526675446e-09, "loss": 0.48778587579727173, "step": 8383 }, { "epoch": 1.9331335024210283, "grad_norm": 1.39299487584749, "learning_rate": 6.148310221130604e-09, "loss": 0.44433802366256714, "step": 8384 }, { "epoch": 1.933364076550611, "grad_norm": 1.7057166388160585, "learning_rate": 6.106169382001369e-09, "loss": 0.46826764941215515, "step": 8385 }, { "epoch": 1.9335946506801935, "grad_norm": 1.6832081073908207, "learning_rate": 6.064173015413177e-09, "loss": 0.5509334802627563, "step": 8386 }, { "epoch": 1.9338252248097763, "grad_norm": 1.4200036599053338, "learning_rate": 6.022321127470698e-09, "loss": 0.4436245560646057, "step": 8387 }, { "epoch": 1.934055798939359, "grad_norm": 1.4658061886752614, "learning_rate": 5.9806137242574e-09, "loss": 0.3577145040035248, "step": 8388 }, { "epoch": 1.9342863730689417, "grad_norm": 1.3485508447539643, "learning_rate": 5.939050811835988e-09, "loss": 0.39893999695777893, "step": 8389 }, { "epoch": 1.9345169471985244, "grad_norm": 1.4373848732418595, "learning_rate": 5.897632396248075e-09, "loss": 0.4109868109226227, "step": 8390 }, { "epoch": 1.9347475213281071, "grad_norm": 1.6148537069486861, "learning_rate": 5.85635848351429e-09, "loss": 0.4193134307861328, "step": 8391 }, { "epoch": 1.9349780954576896, "grad_norm": 1.774944389887914, "learning_rate": 5.8152290796340545e-09, "loss": 0.44189178943634033, "step": 8392 }, { "epoch": 1.9352086695872723, "grad_norm": 1.7653802191556502, "learning_rate": 5.774244190586141e-09, "loss": 0.5014302730560303, "step": 8393 }, { "epoch": 1.9354392437168548, "grad_norm": 1.5565367331009852, "learning_rate": 5.733403822328009e-09, "loss": 0.4962024688720703, "step": 8394 }, { "epoch": 1.9356698178464375, "grad_norm": 1.585877874844532, "learning_rate": 5.69270798079613e-09, "loss": 0.45495474338531494, "step": 8395 }, { "epoch": 1.9359003919760203, "grad_norm": 1.4665884192601668, "learning_rate": 5.652156671906105e-09, "loss": 0.49062758684158325, "step": 8396 }, { "epoch": 1.936130966105603, "grad_norm": 1.6573434385643893, "learning_rate": 5.611749901552554e-09, "loss": 0.45899879932403564, "step": 8397 }, { "epoch": 1.9363615402351857, "grad_norm": 1.511951038657192, "learning_rate": 5.57148767560911e-09, "loss": 0.47287002205848694, "step": 8398 }, { "epoch": 1.9365921143647684, "grad_norm": 1.5970704539129832, "learning_rate": 5.531369999927982e-09, "loss": 0.439136266708374, "step": 8399 }, { "epoch": 1.936822688494351, "grad_norm": 1.2795152915391526, "learning_rate": 5.4913968803410594e-09, "loss": 0.3920954465866089, "step": 8400 }, { "epoch": 1.9370532626239336, "grad_norm": 1.254790295470771, "learning_rate": 5.451568322658473e-09, "loss": 0.4608895480632782, "step": 8401 }, { "epoch": 1.9372838367535161, "grad_norm": 1.4389672316514175, "learning_rate": 5.4118843326699246e-09, "loss": 0.4617875814437866, "step": 8402 }, { "epoch": 1.9375144108830988, "grad_norm": 1.8398027260263112, "learning_rate": 5.372344916143912e-09, "loss": 0.5293254852294922, "step": 8403 }, { "epoch": 1.9377449850126816, "grad_norm": 1.2603762011573385, "learning_rate": 5.332950078827725e-09, "loss": 0.3935343623161316, "step": 8404 }, { "epoch": 1.9379755591422643, "grad_norm": 1.3159194137267558, "learning_rate": 5.293699826447895e-09, "loss": 0.4612414240837097, "step": 8405 }, { "epoch": 1.938206133271847, "grad_norm": 1.5616222982589931, "learning_rate": 5.254594164709858e-09, "loss": 0.4779428243637085, "step": 8406 }, { "epoch": 1.9384367074014297, "grad_norm": 1.3393838173044101, "learning_rate": 5.215633099298067e-09, "loss": 0.37436819076538086, "step": 8407 }, { "epoch": 1.9386672815310122, "grad_norm": 1.5367283978531407, "learning_rate": 5.1768166358757695e-09, "loss": 0.458698570728302, "step": 8408 }, { "epoch": 1.938897855660595, "grad_norm": 1.52395102556278, "learning_rate": 5.1381447800854515e-09, "loss": 0.39365172386169434, "step": 8409 }, { "epoch": 1.9391284297901774, "grad_norm": 1.6915141620999796, "learning_rate": 5.099617537548284e-09, "loss": 0.46358722448349, "step": 8410 }, { "epoch": 1.9393590039197601, "grad_norm": 1.4920931037664487, "learning_rate": 5.061234913864898e-09, "loss": 0.4286697506904602, "step": 8411 }, { "epoch": 1.9395895780493428, "grad_norm": 1.2865245997479036, "learning_rate": 5.022996914614275e-09, "loss": 0.4925898015499115, "step": 8412 }, { "epoch": 1.9398201521789256, "grad_norm": 1.5226712255874009, "learning_rate": 4.984903545354857e-09, "loss": 0.46924275159835815, "step": 8413 }, { "epoch": 1.9400507263085083, "grad_norm": 1.5857623247989538, "learning_rate": 4.946954811623994e-09, "loss": 0.5326268672943115, "step": 8414 }, { "epoch": 1.940281300438091, "grad_norm": 1.5901041586459477, "learning_rate": 4.909150718937716e-09, "loss": 0.4367690682411194, "step": 8415 }, { "epoch": 1.9405118745676735, "grad_norm": 1.5390541996103484, "learning_rate": 4.8714912727914055e-09, "loss": 0.45579224824905396, "step": 8416 }, { "epoch": 1.9407424486972562, "grad_norm": 1.5246826105956603, "learning_rate": 4.8339764786590186e-09, "loss": 0.4420431852340698, "step": 8417 }, { "epoch": 1.9409730228268387, "grad_norm": 1.7713819487127218, "learning_rate": 4.79660634199397e-09, "loss": 0.4175274670124054, "step": 8418 }, { "epoch": 1.9412035969564214, "grad_norm": 1.4046803968065067, "learning_rate": 4.759380868228246e-09, "loss": 0.41451364755630493, "step": 8419 }, { "epoch": 1.9414341710860041, "grad_norm": 1.5394804899846177, "learning_rate": 4.722300062772966e-09, "loss": 0.4211805462837219, "step": 8420 }, { "epoch": 1.9416647452155869, "grad_norm": 1.5805052208208792, "learning_rate": 4.68536393101826e-09, "loss": 0.4458296000957489, "step": 8421 }, { "epoch": 1.9418953193451696, "grad_norm": 1.8263114249420374, "learning_rate": 4.648572478333057e-09, "loss": 0.6226488351821899, "step": 8422 }, { "epoch": 1.9421258934747523, "grad_norm": 1.467298573422793, "learning_rate": 4.611925710065523e-09, "loss": 0.343037486076355, "step": 8423 }, { "epoch": 1.9423564676043348, "grad_norm": 1.4279799784372957, "learning_rate": 4.575423631542397e-09, "loss": 0.42478299140930176, "step": 8424 }, { "epoch": 1.9425870417339175, "grad_norm": 1.4809253602160373, "learning_rate": 4.539066248069878e-09, "loss": 0.4467424750328064, "step": 8425 }, { "epoch": 1.9428176158635, "grad_norm": 1.5230213064501263, "learning_rate": 4.50285356493274e-09, "loss": 0.4598960876464844, "step": 8426 }, { "epoch": 1.9430481899930827, "grad_norm": 1.767389183054306, "learning_rate": 4.466785587394883e-09, "loss": 0.43005913496017456, "step": 8427 }, { "epoch": 1.9432787641226654, "grad_norm": 1.6819998310369073, "learning_rate": 4.430862320699114e-09, "loss": 0.4259253740310669, "step": 8428 }, { "epoch": 1.9435093382522481, "grad_norm": 1.4809575809160866, "learning_rate": 4.395083770067476e-09, "loss": 0.4275285601615906, "step": 8429 }, { "epoch": 1.9437399123818309, "grad_norm": 1.5009509074634573, "learning_rate": 4.3594499407003656e-09, "loss": 0.42151302099227905, "step": 8430 }, { "epoch": 1.9439704865114136, "grad_norm": 1.2121055184272223, "learning_rate": 4.3239608377778625e-09, "loss": 0.41727957129478455, "step": 8431 }, { "epoch": 1.944201060640996, "grad_norm": 1.6993320655678226, "learning_rate": 4.288616466458395e-09, "loss": 0.5026905536651611, "step": 8432 }, { "epoch": 1.9444316347705788, "grad_norm": 1.7732059667125062, "learning_rate": 4.2534168318798524e-09, "loss": 0.5170408487319946, "step": 8433 }, { "epoch": 1.9446622089001613, "grad_norm": 1.4027101607713113, "learning_rate": 4.21836193915881e-09, "loss": 0.3918447196483612, "step": 8434 }, { "epoch": 1.944892783029744, "grad_norm": 1.6652823795220828, "learning_rate": 4.183451793390747e-09, "loss": 0.49871906638145447, "step": 8435 }, { "epoch": 1.9451233571593267, "grad_norm": 1.4696705484226025, "learning_rate": 4.1486863996502694e-09, "loss": 0.43729400634765625, "step": 8436 }, { "epoch": 1.9453539312889094, "grad_norm": 1.6971586346839116, "learning_rate": 4.114065762990781e-09, "loss": 0.49198442697525024, "step": 8437 }, { "epoch": 1.9455845054184921, "grad_norm": 1.7555960999646751, "learning_rate": 4.079589888444923e-09, "loss": 0.48610788583755493, "step": 8438 }, { "epoch": 1.9458150795480749, "grad_norm": 1.4385738810997333, "learning_rate": 4.045258781024019e-09, "loss": 0.43962734937667847, "step": 8439 }, { "epoch": 1.9460456536776574, "grad_norm": 1.5800303425440292, "learning_rate": 4.011072445718522e-09, "loss": 0.3320704400539398, "step": 8440 }, { "epoch": 1.94627622780724, "grad_norm": 1.6634559640737916, "learning_rate": 3.977030887497568e-09, "loss": 0.4773918092250824, "step": 8441 }, { "epoch": 1.9465068019368226, "grad_norm": 1.6386159776295786, "learning_rate": 3.9431341113096425e-09, "loss": 0.424363911151886, "step": 8442 }, { "epoch": 1.9467373760664053, "grad_norm": 1.9939094308024221, "learning_rate": 3.9093821220818055e-09, "loss": 0.5321601033210754, "step": 8443 }, { "epoch": 1.946967950195988, "grad_norm": 1.7091737329216896, "learning_rate": 3.875774924720465e-09, "loss": 0.48579344153404236, "step": 8444 }, { "epoch": 1.9471985243255707, "grad_norm": 1.4617398717494952, "learning_rate": 3.842312524110603e-09, "loss": 0.39313316345214844, "step": 8445 }, { "epoch": 1.9474290984551534, "grad_norm": 1.6233833617742501, "learning_rate": 3.8089949251163264e-09, "loss": 0.522427499294281, "step": 8446 }, { "epoch": 1.9476596725847362, "grad_norm": 1.601217744469266, "learning_rate": 3.775822132580875e-09, "loss": 0.3822653889656067, "step": 8447 }, { "epoch": 1.9478902467143187, "grad_norm": 1.5787465509087006, "learning_rate": 3.7427941513259454e-09, "loss": 0.4322483241558075, "step": 8448 }, { "epoch": 1.9481208208439014, "grad_norm": 1.6934897718136162, "learning_rate": 3.7099109861528087e-09, "loss": 0.4862939715385437, "step": 8449 }, { "epoch": 1.9483513949734839, "grad_norm": 1.5875963080752307, "learning_rate": 3.6771726418410863e-09, "loss": 0.45388323068618774, "step": 8450 }, { "epoch": 1.9485819691030666, "grad_norm": 1.5187043160616758, "learning_rate": 3.644579123149749e-09, "loss": 0.3937215805053711, "step": 8451 }, { "epoch": 1.9488125432326493, "grad_norm": 1.5446261991465484, "learning_rate": 3.6121304348165628e-09, "loss": 0.46887993812561035, "step": 8452 }, { "epoch": 1.949043117362232, "grad_norm": 1.763834546986469, "learning_rate": 3.5798265815584204e-09, "loss": 0.4444226026535034, "step": 8453 }, { "epoch": 1.9492736914918147, "grad_norm": 1.639572253352884, "learning_rate": 3.5476675680709e-09, "loss": 0.4938625991344452, "step": 8454 }, { "epoch": 1.9495042656213972, "grad_norm": 1.456362188758518, "learning_rate": 3.5156533990285953e-09, "loss": 0.37632471323013306, "step": 8455 }, { "epoch": 1.94973483975098, "grad_norm": 1.8608548289842328, "learning_rate": 3.483784079085117e-09, "loss": 0.4345025420188904, "step": 8456 }, { "epoch": 1.9499654138805624, "grad_norm": 1.4598938490767328, "learning_rate": 3.4520596128729818e-09, "loss": 0.3721727132797241, "step": 8457 }, { "epoch": 1.9501959880101452, "grad_norm": 1.6409042038383927, "learning_rate": 3.4204800050037232e-09, "loss": 0.4871670603752136, "step": 8458 }, { "epoch": 1.9504265621397279, "grad_norm": 1.8307964169711943, "learning_rate": 3.38904526006778e-09, "loss": 0.578133225440979, "step": 8459 }, { "epoch": 1.9506571362693106, "grad_norm": 1.5202457315236042, "learning_rate": 3.357755382634386e-09, "loss": 0.4721870422363281, "step": 8460 }, { "epoch": 1.9508877103988933, "grad_norm": 1.798795599183991, "learning_rate": 3.3266103772519037e-09, "loss": 0.4569184184074402, "step": 8461 }, { "epoch": 1.951118284528476, "grad_norm": 1.7311036262190431, "learning_rate": 3.2956102484477112e-09, "loss": 0.48763811588287354, "step": 8462 }, { "epoch": 1.9513488586580585, "grad_norm": 1.5898725581558353, "learning_rate": 3.264755000727759e-09, "loss": 0.45957818627357483, "step": 8463 }, { "epoch": 1.9515794327876412, "grad_norm": 1.661536076059429, "learning_rate": 3.234044638577238e-09, "loss": 0.49398598074913025, "step": 8464 }, { "epoch": 1.9518100069172237, "grad_norm": 1.8367269278410805, "learning_rate": 3.2034791664603544e-09, "loss": 0.48884931206703186, "step": 8465 }, { "epoch": 1.9520405810468064, "grad_norm": 1.4322798652039197, "learning_rate": 3.173058588819999e-09, "loss": 0.45171886682510376, "step": 8466 }, { "epoch": 1.9522711551763892, "grad_norm": 1.7896431151356735, "learning_rate": 3.142782910077968e-09, "loss": 0.45110028982162476, "step": 8467 }, { "epoch": 1.9525017293059719, "grad_norm": 1.6339596386172939, "learning_rate": 3.1126521346354074e-09, "loss": 0.4602523446083069, "step": 8468 }, { "epoch": 1.9527323034355546, "grad_norm": 1.4993439724695443, "learning_rate": 3.082666266872036e-09, "loss": 0.3908727169036865, "step": 8469 }, { "epoch": 1.9529628775651373, "grad_norm": 1.6588394319404383, "learning_rate": 3.0528253111464786e-09, "loss": 0.4886831045150757, "step": 8470 }, { "epoch": 1.9531934516947198, "grad_norm": 1.8142188930520524, "learning_rate": 3.023129271796598e-09, "loss": 0.4407721161842346, "step": 8471 }, { "epoch": 1.9534240258243025, "grad_norm": 1.545809203271424, "learning_rate": 2.9935781531389425e-09, "loss": 0.46958622336387634, "step": 8472 }, { "epoch": 1.953654599953885, "grad_norm": 1.5632050072309709, "learning_rate": 2.964171959469075e-09, "loss": 0.4642796516418457, "step": 8473 }, { "epoch": 1.9538851740834677, "grad_norm": 1.5522529280671595, "learning_rate": 2.9349106950613545e-09, "loss": 0.5124588012695312, "step": 8474 }, { "epoch": 1.9541157482130505, "grad_norm": 1.7441462887025347, "learning_rate": 2.9057943641693784e-09, "loss": 0.516730546951294, "step": 8475 }, { "epoch": 1.9543463223426332, "grad_norm": 1.6015713883307108, "learning_rate": 2.876822971025428e-09, "loss": 0.47847944498062134, "step": 8476 }, { "epoch": 1.9545768964722159, "grad_norm": 1.9133896423438201, "learning_rate": 2.8479965198408007e-09, "loss": 0.5167095065116882, "step": 8477 }, { "epoch": 1.9548074706017986, "grad_norm": 1.4489948600651796, "learning_rate": 2.819315014805812e-09, "loss": 0.40728163719177246, "step": 8478 }, { "epoch": 1.955038044731381, "grad_norm": 1.4413821780207463, "learning_rate": 2.790778460089349e-09, "loss": 0.49741852283477783, "step": 8479 }, { "epoch": 1.9552686188609638, "grad_norm": 1.3759130199865537, "learning_rate": 2.7623868598397603e-09, "loss": 0.33847475051879883, "step": 8480 }, { "epoch": 1.9554991929905463, "grad_norm": 1.6995475203184411, "learning_rate": 2.734140218183856e-09, "loss": 0.39727652072906494, "step": 8481 }, { "epoch": 1.955729767120129, "grad_norm": 1.7012108842781224, "learning_rate": 2.706038539227795e-09, "loss": 0.40332260727882385, "step": 8482 }, { "epoch": 1.9559603412497117, "grad_norm": 1.3388931691886075, "learning_rate": 2.6780818270562e-09, "loss": 0.40296924114227295, "step": 8483 }, { "epoch": 1.9561909153792945, "grad_norm": 1.4889010944404621, "learning_rate": 2.650270085732931e-09, "loss": 0.4253476858139038, "step": 8484 }, { "epoch": 1.9564214895088772, "grad_norm": 1.5794301308382195, "learning_rate": 2.6226033193007535e-09, "loss": 0.448941171169281, "step": 8485 }, { "epoch": 1.95665206363846, "grad_norm": 1.9411463996799059, "learning_rate": 2.59508153178134e-09, "loss": 0.48213180899620056, "step": 8486 }, { "epoch": 1.9568826377680424, "grad_norm": 1.6243019689896288, "learning_rate": 2.5677047271752683e-09, "loss": 0.48886558413505554, "step": 8487 }, { "epoch": 1.957113211897625, "grad_norm": 1.4212209484619593, "learning_rate": 2.5404729094619103e-09, "loss": 0.49786341190338135, "step": 8488 }, { "epoch": 1.9573437860272076, "grad_norm": 2.1312601270605365, "learning_rate": 2.5133860825997667e-09, "loss": 0.4487866163253784, "step": 8489 }, { "epoch": 1.9575743601567903, "grad_norm": 1.7672945087914924, "learning_rate": 2.486444250526243e-09, "loss": 0.46193206310272217, "step": 8490 }, { "epoch": 1.957804934286373, "grad_norm": 1.5923899778865398, "learning_rate": 2.459647417157429e-09, "loss": 0.44729042053222656, "step": 8491 }, { "epoch": 1.9580355084159557, "grad_norm": 1.8298057614969963, "learning_rate": 2.432995586388764e-09, "loss": 0.4646851718425751, "step": 8492 }, { "epoch": 1.9582660825455385, "grad_norm": 1.6514495959092017, "learning_rate": 2.40648876209415e-09, "loss": 0.49538400769233704, "step": 8493 }, { "epoch": 1.9584966566751212, "grad_norm": 1.7330889796307278, "learning_rate": 2.3801269481267262e-09, "loss": 0.5548783540725708, "step": 8494 }, { "epoch": 1.9587272308047037, "grad_norm": 1.65108674708811, "learning_rate": 2.3539101483184277e-09, "loss": 0.4390280544757843, "step": 8495 }, { "epoch": 1.9589578049342864, "grad_norm": 1.323831070791993, "learning_rate": 2.327838366480095e-09, "loss": 0.3079942464828491, "step": 8496 }, { "epoch": 1.959188379063869, "grad_norm": 2.030408303723105, "learning_rate": 2.301911606401585e-09, "loss": 0.5199894309043884, "step": 8497 }, { "epoch": 1.9594189531934516, "grad_norm": 1.6402740340647268, "learning_rate": 2.276129871851662e-09, "loss": 0.3403523564338684, "step": 8498 }, { "epoch": 1.9596495273230343, "grad_norm": 1.785907762491574, "learning_rate": 2.2504931665777714e-09, "loss": 0.49699991941452026, "step": 8499 }, { "epoch": 1.959880101452617, "grad_norm": 1.5969429106714301, "learning_rate": 2.2250014943066e-09, "loss": 0.4178547263145447, "step": 8500 }, { "epoch": 1.9601106755821998, "grad_norm": 1.8924231136601524, "learning_rate": 2.199654858743627e-09, "loss": 0.5622760057449341, "step": 8501 }, { "epoch": 1.9603412497117825, "grad_norm": 1.4610200259542554, "learning_rate": 2.1744532635733505e-09, "loss": 0.4072464406490326, "step": 8502 }, { "epoch": 1.960571823841365, "grad_norm": 1.5401248564682235, "learning_rate": 2.1493967124587287e-09, "loss": 0.475033164024353, "step": 8503 }, { "epoch": 1.9608023979709477, "grad_norm": 1.7291130993603476, "learning_rate": 2.1244852090424035e-09, "loss": 0.4734419584274292, "step": 8504 }, { "epoch": 1.9610329721005302, "grad_norm": 1.7230208360471804, "learning_rate": 2.099718756945257e-09, "loss": 0.42523911595344543, "step": 8505 }, { "epoch": 1.961263546230113, "grad_norm": 1.510126016418521, "learning_rate": 2.075097359767297e-09, "loss": 0.5085049867630005, "step": 8506 }, { "epoch": 1.9614941203596956, "grad_norm": 1.6269226735706044, "learning_rate": 2.0506210210877728e-09, "loss": 0.5682120323181152, "step": 8507 }, { "epoch": 1.9617246944892783, "grad_norm": 1.5852715445159862, "learning_rate": 2.0262897444642823e-09, "loss": 0.4550264775753021, "step": 8508 }, { "epoch": 1.961955268618861, "grad_norm": 1.560540594785291, "learning_rate": 2.0021035334337745e-09, "loss": 0.43745940923690796, "step": 8509 }, { "epoch": 1.9621858427484438, "grad_norm": 1.421824915655791, "learning_rate": 1.9780623915118812e-09, "loss": 0.4523237347602844, "step": 8510 }, { "epoch": 1.9624164168780263, "grad_norm": 1.354930266701351, "learning_rate": 1.9541663221933623e-09, "loss": 0.43080687522888184, "step": 8511 }, { "epoch": 1.962646991007609, "grad_norm": 1.6208010256189354, "learning_rate": 1.930415328951551e-09, "loss": 0.5265613794326782, "step": 8512 }, { "epoch": 1.9628775651371915, "grad_norm": 1.6858160892782517, "learning_rate": 1.906809415239019e-09, "loss": 0.5482667684555054, "step": 8513 }, { "epoch": 1.9631081392667742, "grad_norm": 1.8258400073226166, "learning_rate": 1.8833485844871322e-09, "loss": 0.43548330664634705, "step": 8514 }, { "epoch": 1.963338713396357, "grad_norm": 1.4726232338870595, "learning_rate": 1.8600328401061627e-09, "loss": 0.45715010166168213, "step": 8515 }, { "epoch": 1.9635692875259396, "grad_norm": 1.4143739917928304, "learning_rate": 1.8368621854852884e-09, "loss": 0.48137760162353516, "step": 8516 }, { "epoch": 1.9637998616555223, "grad_norm": 1.5443669851131265, "learning_rate": 1.8138366239924818e-09, "loss": 0.4607926607131958, "step": 8517 }, { "epoch": 1.964030435785105, "grad_norm": 1.2018843862548443, "learning_rate": 1.7909561589749545e-09, "loss": 0.3551321029663086, "step": 8518 }, { "epoch": 1.9642610099146876, "grad_norm": 1.4318523604861806, "learning_rate": 1.7682207937583792e-09, "loss": 0.4075126647949219, "step": 8519 }, { "epoch": 1.9644915840442703, "grad_norm": 1.5238435411050293, "learning_rate": 1.7456305316477793e-09, "loss": 0.4470815658569336, "step": 8520 }, { "epoch": 1.9647221581738528, "grad_norm": 1.7248235582994178, "learning_rate": 1.72318537592675e-09, "loss": 0.5074938535690308, "step": 8521 }, { "epoch": 1.9649527323034355, "grad_norm": 1.684987227657268, "learning_rate": 1.700885329857904e-09, "loss": 0.4799109697341919, "step": 8522 }, { "epoch": 1.9651833064330182, "grad_norm": 1.6217891186344597, "learning_rate": 1.6787303966828703e-09, "loss": 0.5603263974189758, "step": 8523 }, { "epoch": 1.965413880562601, "grad_norm": 1.386089333333111, "learning_rate": 1.656720579622073e-09, "loss": 0.45492851734161377, "step": 8524 }, { "epoch": 1.9656444546921836, "grad_norm": 1.9563157820273458, "learning_rate": 1.6348558818748414e-09, "loss": 0.47700050473213196, "step": 8525 }, { "epoch": 1.9658750288217663, "grad_norm": 1.7426284772598926, "learning_rate": 1.6131363066194115e-09, "loss": 0.5105462074279785, "step": 8526 }, { "epoch": 1.9661056029513488, "grad_norm": 1.6514750536849407, "learning_rate": 1.5915618570130351e-09, "loss": 0.47818124294281006, "step": 8527 }, { "epoch": 1.9663361770809316, "grad_norm": 1.7136861974622173, "learning_rate": 1.5701325361916484e-09, "loss": 0.4549172520637512, "step": 8528 }, { "epoch": 1.966566751210514, "grad_norm": 1.7152545383952742, "learning_rate": 1.5488483472703151e-09, "loss": 0.406271755695343, "step": 8529 }, { "epoch": 1.9667973253400968, "grad_norm": 1.772427841344589, "learning_rate": 1.5277092933427827e-09, "loss": 0.4452788829803467, "step": 8530 }, { "epoch": 1.9670278994696795, "grad_norm": 1.7369674304649072, "learning_rate": 1.5067153774820374e-09, "loss": 0.46621495485305786, "step": 8531 }, { "epoch": 1.9672584735992622, "grad_norm": 1.294422205793256, "learning_rate": 1.4858666027395272e-09, "loss": 0.47837382555007935, "step": 8532 }, { "epoch": 1.967489047728845, "grad_norm": 1.754058349269308, "learning_rate": 1.4651629721460501e-09, "loss": 0.5690933465957642, "step": 8533 }, { "epoch": 1.9677196218584276, "grad_norm": 1.7627173783003411, "learning_rate": 1.4446044887109764e-09, "loss": 0.478906512260437, "step": 8534 }, { "epoch": 1.9679501959880101, "grad_norm": 1.7296669537147416, "learning_rate": 1.4241911554225827e-09, "loss": 0.5024028420448303, "step": 8535 }, { "epoch": 1.9681807701175928, "grad_norm": 1.6971062366905785, "learning_rate": 1.4039229752483839e-09, "loss": 0.4430769979953766, "step": 8536 }, { "epoch": 1.9684113442471753, "grad_norm": 1.5177256060076265, "learning_rate": 1.3837999511343567e-09, "loss": 0.34506234526634216, "step": 8537 }, { "epoch": 1.968641918376758, "grad_norm": 1.6051884301428612, "learning_rate": 1.363822086005717e-09, "loss": 0.47483426332473755, "step": 8538 }, { "epoch": 1.9688724925063408, "grad_norm": 1.4685071017788778, "learning_rate": 1.343989382766475e-09, "loss": 0.3902367651462555, "step": 8539 }, { "epoch": 1.9691030666359235, "grad_norm": 1.5919563191923878, "learning_rate": 1.3243018442994358e-09, "loss": 0.5114254951477051, "step": 8540 }, { "epoch": 1.9693336407655062, "grad_norm": 1.6064476628756739, "learning_rate": 1.3047594734663104e-09, "loss": 0.4048948287963867, "step": 8541 }, { "epoch": 1.969564214895089, "grad_norm": 1.3533697409791567, "learning_rate": 1.2853622731079372e-09, "loss": 0.4168536067008972, "step": 8542 }, { "epoch": 1.9697947890246714, "grad_norm": 1.459175077584749, "learning_rate": 1.2661102460437279e-09, "loss": 0.38410186767578125, "step": 8543 }, { "epoch": 1.9700253631542541, "grad_norm": 1.5096843994913236, "learning_rate": 1.2470033950724435e-09, "loss": 0.4931117296218872, "step": 8544 }, { "epoch": 1.9702559372838366, "grad_norm": 1.863771997387379, "learning_rate": 1.228041722971085e-09, "loss": 0.41142135858535767, "step": 8545 }, { "epoch": 1.9704865114134194, "grad_norm": 1.7868633908108185, "learning_rate": 1.209225232496225e-09, "loss": 0.5165313482284546, "step": 8546 }, { "epoch": 1.970717085543002, "grad_norm": 1.284821780038077, "learning_rate": 1.190553926382898e-09, "loss": 0.3330427408218384, "step": 8547 }, { "epoch": 1.9709476596725848, "grad_norm": 1.5242411906386457, "learning_rate": 1.172027807345155e-09, "loss": 0.43116509914398193, "step": 8548 }, { "epoch": 1.9711782338021675, "grad_norm": 1.8011852071569119, "learning_rate": 1.1536468780760643e-09, "loss": 0.43564409017562866, "step": 8549 }, { "epoch": 1.9714088079317502, "grad_norm": 1.7422483041269035, "learning_rate": 1.1354111412472666e-09, "loss": 0.5361013412475586, "step": 8550 }, { "epoch": 1.9716393820613327, "grad_norm": 1.6110906687473352, "learning_rate": 1.1173205995097524e-09, "loss": 0.4049466550350189, "step": 8551 }, { "epoch": 1.9718699561909154, "grad_norm": 1.6636539568656024, "learning_rate": 1.0993752554930847e-09, "loss": 0.45090144872665405, "step": 8552 }, { "epoch": 1.972100530320498, "grad_norm": 1.5627616190247176, "learning_rate": 1.0815751118057326e-09, "loss": 0.43933606147766113, "step": 8553 }, { "epoch": 1.9723311044500806, "grad_norm": 1.672183185343667, "learning_rate": 1.063920171035182e-09, "loss": 0.5254300832748413, "step": 8554 }, { "epoch": 1.9725616785796634, "grad_norm": 1.4309558177904258, "learning_rate": 1.0464104357477132e-09, "loss": 0.45544567704200745, "step": 8555 }, { "epoch": 1.972792252709246, "grad_norm": 1.9479324504983593, "learning_rate": 1.0290459084886238e-09, "loss": 0.5177001357078552, "step": 8556 }, { "epoch": 1.9730228268388288, "grad_norm": 1.585288183336846, "learning_rate": 1.0118265917818946e-09, "loss": 0.4669674038887024, "step": 8557 }, { "epoch": 1.9732534009684115, "grad_norm": 1.5203759714638625, "learning_rate": 9.947524881307456e-10, "loss": 0.4244263172149658, "step": 8558 }, { "epoch": 1.973483975097994, "grad_norm": 1.810087521792982, "learning_rate": 9.778236000168583e-10, "loss": 0.44121527671813965, "step": 8559 }, { "epoch": 1.9737145492275767, "grad_norm": 1.59326202559186, "learning_rate": 9.610399299010418e-10, "loss": 0.44209837913513184, "step": 8560 }, { "epoch": 1.9739451233571592, "grad_norm": 1.5399236076354037, "learning_rate": 9.444014802231226e-10, "loss": 0.4036273956298828, "step": 8561 }, { "epoch": 1.974175697486742, "grad_norm": 1.5589230288439277, "learning_rate": 9.279082534014992e-10, "loss": 0.47106266021728516, "step": 8562 }, { "epoch": 1.9744062716163246, "grad_norm": 1.6389105898260865, "learning_rate": 9.115602518338095e-10, "loss": 0.41080260276794434, "step": 8563 }, { "epoch": 1.9746368457459074, "grad_norm": 2.0418613187292918, "learning_rate": 8.953574778962635e-10, "loss": 0.4333069920539856, "step": 8564 }, { "epoch": 1.97486741987549, "grad_norm": 1.4286669807437469, "learning_rate": 8.792999339440887e-10, "loss": 0.3939141631126404, "step": 8565 }, { "epoch": 1.9750979940050726, "grad_norm": 1.7648959719228037, "learning_rate": 8.633876223114178e-10, "loss": 0.4202404022216797, "step": 8566 }, { "epoch": 1.9753285681346553, "grad_norm": 1.6239377555078118, "learning_rate": 8.476205453114005e-10, "loss": 0.44722893834114075, "step": 8567 }, { "epoch": 1.9755591422642378, "grad_norm": 1.6159852265335335, "learning_rate": 8.319987052357591e-10, "loss": 0.4095258414745331, "step": 8568 }, { "epoch": 1.9757897163938205, "grad_norm": 1.359270850467109, "learning_rate": 8.165221043553439e-10, "loss": 0.43372297286987305, "step": 8569 }, { "epoch": 1.9760202905234032, "grad_norm": 1.7602005237852472, "learning_rate": 8.011907449199106e-10, "loss": 0.4697731137275696, "step": 8570 }, { "epoch": 1.976250864652986, "grad_norm": 1.759646277514859, "learning_rate": 7.860046291580103e-10, "loss": 0.49179136753082275, "step": 8571 }, { "epoch": 1.9764814387825687, "grad_norm": 1.5966011788910657, "learning_rate": 7.70963759277099e-10, "loss": 0.35898157954216003, "step": 8572 }, { "epoch": 1.9767120129121514, "grad_norm": 1.5427594087958296, "learning_rate": 7.560681374634282e-10, "loss": 0.48293429613113403, "step": 8573 }, { "epoch": 1.9769425870417339, "grad_norm": 1.4911498565229593, "learning_rate": 7.413177658822656e-10, "loss": 0.39636045694351196, "step": 8574 }, { "epoch": 1.9771731611713166, "grad_norm": 1.294544438076297, "learning_rate": 7.267126466777851e-10, "loss": 0.375876784324646, "step": 8575 }, { "epoch": 1.977403735300899, "grad_norm": 1.438449662082489, "learning_rate": 7.122527819729551e-10, "loss": 0.4064311385154724, "step": 8576 }, { "epoch": 1.9776343094304818, "grad_norm": 1.3024542737808098, "learning_rate": 6.979381738696499e-10, "loss": 0.4373857378959656, "step": 8577 }, { "epoch": 1.9778648835600645, "grad_norm": 2.013857406007071, "learning_rate": 6.837688244486494e-10, "loss": 0.5008025765419006, "step": 8578 }, { "epoch": 1.9780954576896472, "grad_norm": 1.5523385427514034, "learning_rate": 6.697447357695285e-10, "loss": 0.4286271035671234, "step": 8579 }, { "epoch": 1.97832603181923, "grad_norm": 1.6941567857927917, "learning_rate": 6.558659098711006e-10, "loss": 0.4420759081840515, "step": 8580 }, { "epoch": 1.9785566059488127, "grad_norm": 1.314306142904572, "learning_rate": 6.421323487705299e-10, "loss": 0.3946709632873535, "step": 8581 }, { "epoch": 1.9787871800783952, "grad_norm": 1.6731376396011677, "learning_rate": 6.285440544641085e-10, "loss": 0.42874544858932495, "step": 8582 }, { "epoch": 1.9790177542079779, "grad_norm": 1.5147129393930194, "learning_rate": 6.151010289272563e-10, "loss": 0.4728921055793762, "step": 8583 }, { "epoch": 1.9792483283375604, "grad_norm": 1.4681942656331504, "learning_rate": 6.018032741139656e-10, "loss": 0.3756295442581177, "step": 8584 }, { "epoch": 1.979478902467143, "grad_norm": 1.5314225760860438, "learning_rate": 5.886507919570239e-10, "loss": 0.48663657903671265, "step": 8585 }, { "epoch": 1.9797094765967258, "grad_norm": 2.0571870297763377, "learning_rate": 5.756435843685681e-10, "loss": 0.46127766370773315, "step": 8586 }, { "epoch": 1.9799400507263085, "grad_norm": 1.4783867212667936, "learning_rate": 5.627816532390862e-10, "loss": 0.493796169757843, "step": 8587 }, { "epoch": 1.9801706248558912, "grad_norm": 1.2639174296233155, "learning_rate": 5.500650004383045e-10, "loss": 0.3703004717826843, "step": 8588 }, { "epoch": 1.980401198985474, "grad_norm": 1.6202036973245495, "learning_rate": 5.374936278146336e-10, "loss": 0.5385284423828125, "step": 8589 }, { "epoch": 1.9806317731150564, "grad_norm": 1.5325088206554112, "learning_rate": 5.250675371956115e-10, "loss": 0.3996584713459015, "step": 8590 }, { "epoch": 1.9808623472446392, "grad_norm": 1.6001328200790206, "learning_rate": 5.12786730387349e-10, "loss": 0.4513227641582489, "step": 8591 }, { "epoch": 1.9810929213742217, "grad_norm": 1.5317035339628575, "learning_rate": 5.006512091750848e-10, "loss": 0.46632474660873413, "step": 8592 }, { "epoch": 1.9813234955038044, "grad_norm": 1.5599775050602098, "learning_rate": 4.886609753227411e-10, "loss": 0.5379712581634521, "step": 8593 }, { "epoch": 1.981554069633387, "grad_norm": 1.6572300992446405, "learning_rate": 4.768160305732572e-10, "loss": 0.3606422543525696, "step": 8594 }, { "epoch": 1.9817846437629698, "grad_norm": 1.927352159029303, "learning_rate": 4.651163766484778e-10, "loss": 0.39339596033096313, "step": 8595 }, { "epoch": 1.9820152178925525, "grad_norm": 1.5930436461957604, "learning_rate": 4.535620152489317e-10, "loss": 0.4606707692146301, "step": 8596 }, { "epoch": 1.9822457920221352, "grad_norm": 1.484957242621252, "learning_rate": 4.421529480543862e-10, "loss": 0.4234154522418976, "step": 8597 }, { "epoch": 1.9824763661517177, "grad_norm": 1.3985130447330405, "learning_rate": 4.308891767229594e-10, "loss": 0.49317437410354614, "step": 8598 }, { "epoch": 1.9827069402813005, "grad_norm": 1.5795407686648721, "learning_rate": 4.197707028922304e-10, "loss": 0.47756847739219666, "step": 8599 }, { "epoch": 1.982937514410883, "grad_norm": 1.437347041692997, "learning_rate": 4.0879752817823963e-10, "loss": 0.37664321064949036, "step": 8600 }, { "epoch": 1.9831680885404657, "grad_norm": 1.4684607347638514, "learning_rate": 3.9796965417604465e-10, "loss": 0.3927830457687378, "step": 8601 }, { "epoch": 1.9833986626700484, "grad_norm": 1.5410832268522827, "learning_rate": 3.8728708245971966e-10, "loss": 0.41071420907974243, "step": 8602 }, { "epoch": 1.983629236799631, "grad_norm": 1.7060421891461264, "learning_rate": 3.7674981458191145e-10, "loss": 0.49516505002975464, "step": 8603 }, { "epoch": 1.9838598109292138, "grad_norm": 1.451667871155561, "learning_rate": 3.6635785207439486e-10, "loss": 0.474129855632782, "step": 8604 }, { "epoch": 1.9840903850587965, "grad_norm": 1.6840089122105588, "learning_rate": 3.5611119644773923e-10, "loss": 0.4445813298225403, "step": 8605 }, { "epoch": 1.984320959188379, "grad_norm": 2.027307915892804, "learning_rate": 3.4600984919141987e-10, "loss": 0.46165329217910767, "step": 8606 }, { "epoch": 1.9845515333179617, "grad_norm": 1.3540207698004456, "learning_rate": 3.3605381177381764e-10, "loss": 0.4073392152786255, "step": 8607 }, { "epoch": 1.9847821074475442, "grad_norm": 1.5051036984917558, "learning_rate": 3.262430856419973e-10, "loss": 0.46712470054626465, "step": 8608 }, { "epoch": 1.985012681577127, "grad_norm": 1.4968737511198085, "learning_rate": 3.165776722222624e-10, "loss": 0.49993449449539185, "step": 8609 }, { "epoch": 1.9852432557067097, "grad_norm": 1.67576101698744, "learning_rate": 3.0705757291926705e-10, "loss": 0.40737634897232056, "step": 8610 }, { "epoch": 1.9854738298362924, "grad_norm": 1.5973815539324434, "learning_rate": 2.976827891172373e-10, "loss": 0.3714853823184967, "step": 8611 }, { "epoch": 1.985704403965875, "grad_norm": 1.425745294363986, "learning_rate": 2.884533221785279e-10, "loss": 0.3818984925746918, "step": 8612 }, { "epoch": 1.9859349780954578, "grad_norm": 1.892989564850047, "learning_rate": 2.7936917344495435e-10, "loss": 0.4529988765716553, "step": 8613 }, { "epoch": 1.9861655522250403, "grad_norm": 1.3609709522865416, "learning_rate": 2.7043034423701595e-10, "loss": 0.44964706897735596, "step": 8614 }, { "epoch": 1.986396126354623, "grad_norm": 1.5417500191784284, "learning_rate": 2.616368358538956e-10, "loss": 0.49079659581184387, "step": 8615 }, { "epoch": 1.9866267004842055, "grad_norm": 1.5113331636323986, "learning_rate": 2.529886495739042e-10, "loss": 0.4411408305168152, "step": 8616 }, { "epoch": 1.9868572746137882, "grad_norm": 1.5784526966638346, "learning_rate": 2.444857866541472e-10, "loss": 0.4386615455150604, "step": 8617 }, { "epoch": 1.987087848743371, "grad_norm": 1.4030710400001012, "learning_rate": 2.3612824833063594e-10, "loss": 0.4545249342918396, "step": 8618 }, { "epoch": 1.9873184228729537, "grad_norm": 1.3707438995019952, "learning_rate": 2.2791603581817643e-10, "loss": 0.40094703435897827, "step": 8619 }, { "epoch": 1.9875489970025364, "grad_norm": 1.3947569997576104, "learning_rate": 2.1984915031048047e-10, "loss": 0.40233147144317627, "step": 8620 }, { "epoch": 1.9877795711321191, "grad_norm": 1.6068677090202075, "learning_rate": 2.1192759298016562e-10, "loss": 0.460537314414978, "step": 8621 }, { "epoch": 1.9880101452617016, "grad_norm": 1.5790092282402457, "learning_rate": 2.0415136497875518e-10, "loss": 0.4602966904640198, "step": 8622 }, { "epoch": 1.9882407193912843, "grad_norm": 1.3484786116390262, "learning_rate": 1.9652046743656724e-10, "loss": 0.5004392266273499, "step": 8623 }, { "epoch": 1.9884712935208668, "grad_norm": 1.6014865414140482, "learning_rate": 1.8903490146282564e-10, "loss": 0.48196107149124146, "step": 8624 }, { "epoch": 1.9887018676504495, "grad_norm": 1.6074894882455422, "learning_rate": 1.8169466814565992e-10, "loss": 0.45684510469436646, "step": 8625 }, { "epoch": 1.9889324417800323, "grad_norm": 1.60134146592956, "learning_rate": 1.7449976855199444e-10, "loss": 0.44381850957870483, "step": 8626 }, { "epoch": 1.989163015909615, "grad_norm": 2.135748914298638, "learning_rate": 1.674502037277703e-10, "loss": 0.5301632881164551, "step": 8627 }, { "epoch": 1.9893935900391977, "grad_norm": 1.6146386939845652, "learning_rate": 1.6054597469761233e-10, "loss": 0.5154398679733276, "step": 8628 }, { "epoch": 1.9896241641687804, "grad_norm": 1.9992239097696207, "learning_rate": 1.5378708246516215e-10, "loss": 0.4334644079208374, "step": 8629 }, { "epoch": 1.989854738298363, "grad_norm": 1.615721145436376, "learning_rate": 1.4717352801296713e-10, "loss": 0.45578733086586, "step": 8630 }, { "epoch": 1.9900853124279456, "grad_norm": 1.9680117779038706, "learning_rate": 1.4070531230225834e-10, "loss": 0.48997777700424194, "step": 8631 }, { "epoch": 1.9903158865575281, "grad_norm": 1.6305724090422111, "learning_rate": 1.3438243627328371e-10, "loss": 0.4760161340236664, "step": 8632 }, { "epoch": 1.9905464606871108, "grad_norm": 1.628677759157358, "learning_rate": 1.2820490084508583e-10, "loss": 0.43040308356285095, "step": 8633 }, { "epoch": 1.9907770348166935, "grad_norm": 1.4320674775365163, "learning_rate": 1.2217270691583514e-10, "loss": 0.4588020443916321, "step": 8634 }, { "epoch": 1.9910076089462763, "grad_norm": 1.562424742526405, "learning_rate": 1.1628585536216374e-10, "loss": 0.46267229318618774, "step": 8635 }, { "epoch": 1.991238183075859, "grad_norm": 1.5109131359979342, "learning_rate": 1.1054434703994253e-10, "loss": 0.4159420132637024, "step": 8636 }, { "epoch": 1.9914687572054417, "grad_norm": 1.5987294041380085, "learning_rate": 1.0494818278361518e-10, "loss": 0.47950947284698486, "step": 8637 }, { "epoch": 1.9916993313350242, "grad_norm": 1.6664716034008127, "learning_rate": 9.949736340664206e-11, "loss": 0.4912334680557251, "step": 8638 }, { "epoch": 1.991929905464607, "grad_norm": 1.5249112719703917, "learning_rate": 9.419188970150038e-11, "loss": 0.4895044267177582, "step": 8639 }, { "epoch": 1.9921604795941894, "grad_norm": 1.6059730233512621, "learning_rate": 8.903176243935106e-11, "loss": 0.4822810888290405, "step": 8640 }, { "epoch": 1.9923910537237721, "grad_norm": 1.6775671432311143, "learning_rate": 8.401698237014975e-11, "loss": 0.4739280045032501, "step": 8641 }, { "epoch": 1.9926216278533548, "grad_norm": 1.5254015473001428, "learning_rate": 7.91475502228689e-11, "loss": 0.5394953489303589, "step": 8642 }, { "epoch": 1.9928522019829376, "grad_norm": 1.5656411080833423, "learning_rate": 7.44234667054977e-11, "loss": 0.38446712493896484, "step": 8643 }, { "epoch": 1.9930827761125203, "grad_norm": 1.817887515771179, "learning_rate": 6.98447325045981e-11, "loss": 0.46814244985580444, "step": 8644 }, { "epoch": 1.993313350242103, "grad_norm": 2.1046790616702284, "learning_rate": 6.541134828574879e-11, "loss": 0.5420444011688232, "step": 8645 }, { "epoch": 1.9935439243716855, "grad_norm": 1.7622016760188661, "learning_rate": 6.112331469332321e-11, "loss": 0.45574939250946045, "step": 8646 }, { "epoch": 1.9937744985012682, "grad_norm": 1.3457673361522478, "learning_rate": 5.69806323507116e-11, "loss": 0.37707841396331787, "step": 8647 }, { "epoch": 1.9940050726308507, "grad_norm": 1.713931828869125, "learning_rate": 5.298330186020994e-11, "loss": 0.5139172077178955, "step": 8648 }, { "epoch": 1.9942356467604334, "grad_norm": 1.5184794936547403, "learning_rate": 4.913132380268692e-11, "loss": 0.5251332521438599, "step": 8649 }, { "epoch": 1.9944662208900161, "grad_norm": 1.603884960010875, "learning_rate": 4.542469873802801e-11, "loss": 0.38396936655044556, "step": 8650 }, { "epoch": 1.9946967950195988, "grad_norm": 1.3451562633349459, "learning_rate": 4.1863427205246495e-11, "loss": 0.42507076263427734, "step": 8651 }, { "epoch": 1.9949273691491816, "grad_norm": 1.3938730213086719, "learning_rate": 3.8447509721817316e-11, "loss": 0.3914533257484436, "step": 8652 }, { "epoch": 1.9951579432787643, "grad_norm": 1.8085258279642746, "learning_rate": 3.5176946784343245e-11, "loss": 0.46923860907554626, "step": 8653 }, { "epoch": 1.9953885174083468, "grad_norm": 1.5078315206639539, "learning_rate": 3.205173886822177e-11, "loss": 0.35363346338272095, "step": 8654 }, { "epoch": 1.9956190915379295, "grad_norm": 1.8022263810516201, "learning_rate": 2.9071886427867175e-11, "loss": 0.4142746925354004, "step": 8655 }, { "epoch": 1.995849665667512, "grad_norm": 1.4186888258792274, "learning_rate": 2.623738989626645e-11, "loss": 0.34989133477211, "step": 8656 }, { "epoch": 1.9960802397970947, "grad_norm": 1.6690344619586774, "learning_rate": 2.354824968542335e-11, "loss": 0.5059055089950562, "step": 8657 }, { "epoch": 1.9963108139266774, "grad_norm": 1.6307598945991617, "learning_rate": 2.1004466186358426e-11, "loss": 0.4772738516330719, "step": 8658 }, { "epoch": 1.9965413880562601, "grad_norm": 2.0155808953661456, "learning_rate": 1.860603976877595e-11, "loss": 0.5055459141731262, "step": 8659 }, { "epoch": 1.9967719621858429, "grad_norm": 1.634582725028991, "learning_rate": 1.6352970781285946e-11, "loss": 0.3764510154724121, "step": 8660 }, { "epoch": 1.9970025363154256, "grad_norm": 1.5414383762022799, "learning_rate": 1.424525955140421e-11, "loss": 0.42315495014190674, "step": 8661 }, { "epoch": 1.997233110445008, "grad_norm": 1.435478088309439, "learning_rate": 1.2282906385552295e-11, "loss": 0.3647070527076721, "step": 8662 }, { "epoch": 1.9974636845745908, "grad_norm": 1.5518093691270274, "learning_rate": 1.0465911568946495e-11, "loss": 0.3832179307937622, "step": 8663 }, { "epoch": 1.9976942587041733, "grad_norm": 1.6197061226224263, "learning_rate": 8.79427536570887e-12, "loss": 0.46649307012557983, "step": 8664 }, { "epoch": 1.997924832833756, "grad_norm": 1.4914895158884427, "learning_rate": 7.267998018867238e-12, "loss": 0.5101447701454163, "step": 8665 }, { "epoch": 1.9981554069633387, "grad_norm": 1.7518527885996649, "learning_rate": 5.8870797502441615e-12, "loss": 0.48426300287246704, "step": 8666 }, { "epoch": 1.9983859810929214, "grad_norm": 1.74396723859127, "learning_rate": 4.65152076045694e-12, "loss": 0.5109666585922241, "step": 8667 }, { "epoch": 1.9986165552225041, "grad_norm": 1.5553329658335424, "learning_rate": 3.5613212293617023e-12, "loss": 0.36605560779571533, "step": 8668 }, { "epoch": 1.9988471293520869, "grad_norm": 1.635805382712207, "learning_rate": 2.6164813152762533e-12, "loss": 0.515751302242279, "step": 8669 }, { "epoch": 1.9990777034816694, "grad_norm": 1.530462345782049, "learning_rate": 1.8170011554241582e-12, "loss": 0.48570311069488525, "step": 8670 }, { "epoch": 1.999308277611252, "grad_norm": 1.643766587262656, "learning_rate": 1.1628808662678124e-12, "loss": 0.5033636093139648, "step": 8671 }, { "epoch": 1.9995388517408346, "grad_norm": 1.839455005664103, "learning_rate": 6.541205427312846e-13, "loss": 0.4581984281539917, "step": 8672 }, { "epoch": 1.9997694258704173, "grad_norm": 1.6255864134270288, "learning_rate": 2.9072025886645037e-13, "loss": 0.4574134945869446, "step": 8673 }, { "epoch": 2.0, "grad_norm": 1.4400793609212648, "learning_rate": 7.268006729788112e-14, "loss": 0.39279258251190186, "step": 8674 }, { "epoch": 2.0, "step": 8674, "total_flos": 2994036868841472.0, "train_loss": 0.5227575608908595, "train_runtime": 21685.2, "train_samples_per_second": 1.6, "train_steps_per_second": 0.4 } ], "logging_steps": 1, "max_steps": 8674, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2994036868841472.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }