| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 56591, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0008835327172165185, |
| "grad_norm": 5.665971279144287, |
| "learning_rate": 4.3286219081272084e-07, |
| "loss": 1.3738, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.001767065434433037, |
| "grad_norm": 5.6161651611328125, |
| "learning_rate": 8.745583038869259e-07, |
| "loss": 1.1661, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0026505981516495554, |
| "grad_norm": 7.866199970245361, |
| "learning_rate": 1.3162544169611309e-06, |
| "loss": 1.2107, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.003534130868866074, |
| "grad_norm": 5.07379674911499, |
| "learning_rate": 1.7579505300353357e-06, |
| "loss": 0.9855, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.004417663586082593, |
| "grad_norm": 3.2607851028442383, |
| "learning_rate": 2.199646643109541e-06, |
| "loss": 0.9431, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.005301196303299111, |
| "grad_norm": 6.517599105834961, |
| "learning_rate": 2.6413427561837457e-06, |
| "loss": 0.8566, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.00618472902051563, |
| "grad_norm": 2.8523333072662354, |
| "learning_rate": 3.0830388692579506e-06, |
| "loss": 0.8697, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.007068261737732148, |
| "grad_norm": 3.460226058959961, |
| "learning_rate": 3.5247349823321555e-06, |
| "loss": 0.8099, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.007951794454948667, |
| "grad_norm": 3.2528891563415527, |
| "learning_rate": 3.966431095406361e-06, |
| "loss": 0.766, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.008835327172165185, |
| "grad_norm": 4.1086039543151855, |
| "learning_rate": 4.408127208480566e-06, |
| "loss": 0.7402, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.009718859889381704, |
| "grad_norm": 3.8160510063171387, |
| "learning_rate": 4.849823321554771e-06, |
| "loss": 0.8769, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.010602392606598222, |
| "grad_norm": 2.901653289794922, |
| "learning_rate": 5.291519434628975e-06, |
| "loss": 0.6827, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.011485925323814742, |
| "grad_norm": 2.5824739933013916, |
| "learning_rate": 5.73321554770318e-06, |
| "loss": 0.7252, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.01236945804103126, |
| "grad_norm": 2.586138963699341, |
| "learning_rate": 6.174911660777385e-06, |
| "loss": 0.7701, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.013252990758247778, |
| "grad_norm": 2.3450210094451904, |
| "learning_rate": 6.6166077738515904e-06, |
| "loss": 0.7525, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.014136523475464296, |
| "grad_norm": 2.7902042865753174, |
| "learning_rate": 7.058303886925795e-06, |
| "loss": 0.7097, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.015020056192680814, |
| "grad_norm": 3.297929286956787, |
| "learning_rate": 7.5e-06, |
| "loss": 0.7575, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.015903588909897334, |
| "grad_norm": 4.028406143188477, |
| "learning_rate": 7.941696113074205e-06, |
| "loss": 0.6899, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.016787121627113853, |
| "grad_norm": 2.2513041496276855, |
| "learning_rate": 8.38339222614841e-06, |
| "loss": 0.6655, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.01767065434433037, |
| "grad_norm": 2.402355670928955, |
| "learning_rate": 8.825088339222614e-06, |
| "loss": 0.6601, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.01855418706154689, |
| "grad_norm": 4.492621898651123, |
| "learning_rate": 9.26678445229682e-06, |
| "loss": 0.6925, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.019437719778763407, |
| "grad_norm": 3.8099517822265625, |
| "learning_rate": 9.708480565371025e-06, |
| "loss": 0.6169, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.020321252495979925, |
| "grad_norm": 4.58193826675415, |
| "learning_rate": 1.0150176678445231e-05, |
| "loss": 0.6367, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.021204785213196443, |
| "grad_norm": 4.745123863220215, |
| "learning_rate": 1.0591872791519434e-05, |
| "loss": 0.615, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.02208831793041296, |
| "grad_norm": 3.260239601135254, |
| "learning_rate": 1.103356890459364e-05, |
| "loss": 0.6869, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.022971850647629483, |
| "grad_norm": 2.485383987426758, |
| "learning_rate": 1.1475265017667845e-05, |
| "loss": 0.7527, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.023855383364846, |
| "grad_norm": 2.26680326461792, |
| "learning_rate": 1.191696113074205e-05, |
| "loss": 0.6124, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.02473891608206252, |
| "grad_norm": 2.348688840866089, |
| "learning_rate": 1.2358657243816255e-05, |
| "loss": 0.6511, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.025622448799279038, |
| "grad_norm": 2.770859956741333, |
| "learning_rate": 1.280035335689046e-05, |
| "loss": 0.7047, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.026505981516495556, |
| "grad_norm": 3.188656806945801, |
| "learning_rate": 1.3242049469964666e-05, |
| "loss": 0.6639, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.027389514233712074, |
| "grad_norm": 2.7158899307250977, |
| "learning_rate": 1.368374558303887e-05, |
| "loss": 0.6795, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.028273046950928592, |
| "grad_norm": 2.7986080646514893, |
| "learning_rate": 1.4125441696113076e-05, |
| "loss": 0.6341, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.02915657966814511, |
| "grad_norm": 1.9698214530944824, |
| "learning_rate": 1.456713780918728e-05, |
| "loss": 0.6031, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.03004011238536163, |
| "grad_norm": 2.495985507965088, |
| "learning_rate": 1.5008833922261484e-05, |
| "loss": 0.5959, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.030923645102578147, |
| "grad_norm": 2.990360975265503, |
| "learning_rate": 1.545053003533569e-05, |
| "loss": 0.6412, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.03180717781979467, |
| "grad_norm": 3.658212184906006, |
| "learning_rate": 1.5892226148409894e-05, |
| "loss": 0.5065, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.03269071053701118, |
| "grad_norm": 2.010875940322876, |
| "learning_rate": 1.63339222614841e-05, |
| "loss": 0.5611, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.033574243254227705, |
| "grad_norm": 2.408937692642212, |
| "learning_rate": 1.6775618374558306e-05, |
| "loss": 0.5298, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.03445777597144422, |
| "grad_norm": 2.3144407272338867, |
| "learning_rate": 1.721731448763251e-05, |
| "loss": 0.5759, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.03534130868866074, |
| "grad_norm": 2.944115400314331, |
| "learning_rate": 1.7659010600706715e-05, |
| "loss": 0.5782, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.03622484140587726, |
| "grad_norm": 2.3239428997039795, |
| "learning_rate": 1.810070671378092e-05, |
| "loss": 0.5221, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.03710837412309378, |
| "grad_norm": 4.565939426422119, |
| "learning_rate": 1.8542402826855124e-05, |
| "loss": 0.5966, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.0379919068403103, |
| "grad_norm": 2.6089091300964355, |
| "learning_rate": 1.898409893992933e-05, |
| "loss": 0.5989, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.038875439557526814, |
| "grad_norm": 2.4395945072174072, |
| "learning_rate": 1.9425795053003533e-05, |
| "loss": 0.5097, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.039758972274743336, |
| "grad_norm": 2.274600028991699, |
| "learning_rate": 1.986749116607774e-05, |
| "loss": 0.4934, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.04064250499195985, |
| "grad_norm": 2.393251895904541, |
| "learning_rate": 2.0309187279151945e-05, |
| "loss": 0.5354, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.04152603770917637, |
| "grad_norm": 2.613900899887085, |
| "learning_rate": 2.075088339222615e-05, |
| "loss": 0.5236, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.04240957042639289, |
| "grad_norm": 2.233302116394043, |
| "learning_rate": 2.1192579505300354e-05, |
| "loss": 0.5057, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.04329310314360941, |
| "grad_norm": 2.2634503841400146, |
| "learning_rate": 2.163427561837456e-05, |
| "loss": 0.5448, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.04417663586082592, |
| "grad_norm": 1.6744658946990967, |
| "learning_rate": 2.2075971731448763e-05, |
| "loss": 0.5418, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.045060168578042445, |
| "grad_norm": 2.9320178031921387, |
| "learning_rate": 2.2517667844522968e-05, |
| "loss": 0.5944, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.04594370129525897, |
| "grad_norm": 2.2643797397613525, |
| "learning_rate": 2.2959363957597176e-05, |
| "loss": 0.4945, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.04682723401247548, |
| "grad_norm": 2.389902114868164, |
| "learning_rate": 2.340106007067138e-05, |
| "loss": 0.5225, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.047710766729692, |
| "grad_norm": 2.2676665782928467, |
| "learning_rate": 2.3842756183745584e-05, |
| "loss": 0.5661, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.04859429944690852, |
| "grad_norm": 2.340926170349121, |
| "learning_rate": 2.428445229681979e-05, |
| "loss": 0.6125, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.04947783216412504, |
| "grad_norm": 1.925943374633789, |
| "learning_rate": 2.4726148409893997e-05, |
| "loss": 0.5105, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.050361364881341554, |
| "grad_norm": 3.1281192302703857, |
| "learning_rate": 2.5167844522968198e-05, |
| "loss": 0.5893, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.051244897598558076, |
| "grad_norm": 2.345649242401123, |
| "learning_rate": 2.5609540636042406e-05, |
| "loss": 0.545, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.05212843031577459, |
| "grad_norm": 2.9023561477661133, |
| "learning_rate": 2.605123674911661e-05, |
| "loss": 0.5299, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.05301196303299111, |
| "grad_norm": 2.491269588470459, |
| "learning_rate": 2.649293286219081e-05, |
| "loss": 0.5186, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.05389549575020763, |
| "grad_norm": 1.842517375946045, |
| "learning_rate": 2.693462897526502e-05, |
| "loss": 0.5259, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.05477902846742415, |
| "grad_norm": 3.319514274597168, |
| "learning_rate": 2.7376325088339223e-05, |
| "loss": 0.6663, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.05566256118464067, |
| "grad_norm": 2.7143654823303223, |
| "learning_rate": 2.781802120141343e-05, |
| "loss": 0.5152, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.056546093901857185, |
| "grad_norm": 2.8187732696533203, |
| "learning_rate": 2.8259717314487632e-05, |
| "loss": 0.5417, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.057429626619073706, |
| "grad_norm": 2.8348097801208496, |
| "learning_rate": 2.870141342756184e-05, |
| "loss": 0.5039, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.05831315933629022, |
| "grad_norm": 3.6297833919525146, |
| "learning_rate": 2.9143109540636045e-05, |
| "loss": 0.4647, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.05919669205350674, |
| "grad_norm": 2.6729063987731934, |
| "learning_rate": 2.9584805653710253e-05, |
| "loss": 0.4652, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.06008022477072326, |
| "grad_norm": 3.030548572540283, |
| "learning_rate": 3.0026501766784454e-05, |
| "loss": 0.4914, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.06096375748793978, |
| "grad_norm": 1.844643235206604, |
| "learning_rate": 3.0468197879858658e-05, |
| "loss": 0.5449, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.061847290205156294, |
| "grad_norm": 1.6973118782043457, |
| "learning_rate": 3.090989399293286e-05, |
| "loss": 0.5072, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.06273082292237281, |
| "grad_norm": 2.626692295074463, |
| "learning_rate": 3.135159010600707e-05, |
| "loss": 0.5639, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.06361435563958934, |
| "grad_norm": 2.971773624420166, |
| "learning_rate": 3.179328621908128e-05, |
| "loss": 0.4729, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.06449788835680585, |
| "grad_norm": 2.134610414505005, |
| "learning_rate": 3.2234982332155476e-05, |
| "loss": 0.6047, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.06538142107402237, |
| "grad_norm": 1.8596552610397339, |
| "learning_rate": 3.267667844522969e-05, |
| "loss": 0.5369, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.0662649537912389, |
| "grad_norm": 2.5137698650360107, |
| "learning_rate": 3.311837455830389e-05, |
| "loss": 0.5014, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.06714848650845541, |
| "grad_norm": 2.8211522102355957, |
| "learning_rate": 3.356007067137809e-05, |
| "loss": 0.5128, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.06803201922567192, |
| "grad_norm": 2.095426559448242, |
| "learning_rate": 3.40017667844523e-05, |
| "loss": 0.5345, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.06891555194288844, |
| "grad_norm": 2.1965081691741943, |
| "learning_rate": 3.4443462897526505e-05, |
| "loss": 0.479, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.06979908466010497, |
| "grad_norm": 2.1722958087921143, |
| "learning_rate": 3.488515901060071e-05, |
| "loss": 0.5652, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.07068261737732148, |
| "grad_norm": 2.7183449268341064, |
| "learning_rate": 3.5326855123674914e-05, |
| "loss": 0.5272, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.071566150094538, |
| "grad_norm": 2.356076717376709, |
| "learning_rate": 3.576855123674912e-05, |
| "loss": 0.4904, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.07244968281175453, |
| "grad_norm": 1.7549006938934326, |
| "learning_rate": 3.621024734982332e-05, |
| "loss": 0.4755, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.07333321552897104, |
| "grad_norm": 2.0377912521362305, |
| "learning_rate": 3.665194346289753e-05, |
| "loss": 0.4897, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.07421674824618756, |
| "grad_norm": 2.4711716175079346, |
| "learning_rate": 3.709363957597173e-05, |
| "loss": 0.4679, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.07510028096340407, |
| "grad_norm": 2.700162649154663, |
| "learning_rate": 3.7535335689045936e-05, |
| "loss": 0.4712, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.0759838136806206, |
| "grad_norm": 1.9648590087890625, |
| "learning_rate": 3.797703180212015e-05, |
| "loss": 0.4779, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.07686734639783711, |
| "grad_norm": 2.4238970279693604, |
| "learning_rate": 3.8418727915194345e-05, |
| "loss": 0.4463, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.07775087911505363, |
| "grad_norm": 1.745356798171997, |
| "learning_rate": 3.8860424028268556e-05, |
| "loss": 0.4917, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.07863441183227014, |
| "grad_norm": 5.889612197875977, |
| "learning_rate": 3.930212014134276e-05, |
| "loss": 0.5572, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.07951794454948667, |
| "grad_norm": 2.7529609203338623, |
| "learning_rate": 3.9743816254416965e-05, |
| "loss": 0.4553, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.08040147726670319, |
| "grad_norm": 2.4175944328308105, |
| "learning_rate": 4.018551236749117e-05, |
| "loss": 0.4598, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.0812850099839197, |
| "grad_norm": 2.2330217361450195, |
| "learning_rate": 4.0627208480565374e-05, |
| "loss": 0.5445, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.08216854270113623, |
| "grad_norm": 2.4177329540252686, |
| "learning_rate": 4.106890459363958e-05, |
| "loss": 0.4537, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.08305207541835274, |
| "grad_norm": 2.6188764572143555, |
| "learning_rate": 4.151060070671378e-05, |
| "loss": 0.5158, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.08393560813556926, |
| "grad_norm": 3.5044455528259277, |
| "learning_rate": 4.195229681978799e-05, |
| "loss": 0.4598, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.08481914085278577, |
| "grad_norm": 2.2751505374908447, |
| "learning_rate": 4.239399293286219e-05, |
| "loss": 0.4662, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.0857026735700023, |
| "grad_norm": 2.0289080142974854, |
| "learning_rate": 4.28356890459364e-05, |
| "loss": 0.459, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.08658620628721882, |
| "grad_norm": 2.6102516651153564, |
| "learning_rate": 4.32773851590106e-05, |
| "loss": 0.4275, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.08746973900443533, |
| "grad_norm": 2.5842251777648926, |
| "learning_rate": 4.3719081272084805e-05, |
| "loss": 0.5575, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.08835327172165185, |
| "grad_norm": 3.6427652835845947, |
| "learning_rate": 4.4160777385159016e-05, |
| "loss": 0.4197, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.08923680443886838, |
| "grad_norm": 1.8962676525115967, |
| "learning_rate": 4.4602473498233214e-05, |
| "loss": 0.4525, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.09012033715608489, |
| "grad_norm": 2.1373822689056396, |
| "learning_rate": 4.5044169611307425e-05, |
| "loss": 0.4469, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.0910038698733014, |
| "grad_norm": 5.542126178741455, |
| "learning_rate": 4.548586572438163e-05, |
| "loss": 0.5283, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.09188740259051793, |
| "grad_norm": 2.4414310455322266, |
| "learning_rate": 4.5927561837455834e-05, |
| "loss": 0.4826, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.09277093530773445, |
| "grad_norm": 3.52422833442688, |
| "learning_rate": 4.636925795053004e-05, |
| "loss": 0.3895, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.09365446802495096, |
| "grad_norm": 2.1975631713867188, |
| "learning_rate": 4.681095406360424e-05, |
| "loss": 0.4873, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.09453800074216748, |
| "grad_norm": 3.4910616874694824, |
| "learning_rate": 4.725265017667845e-05, |
| "loss": 0.4895, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.095421533459384, |
| "grad_norm": 2.1225690841674805, |
| "learning_rate": 4.769434628975265e-05, |
| "loss": 0.4686, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.09630506617660052, |
| "grad_norm": 2.2319257259368896, |
| "learning_rate": 4.8136042402826856e-05, |
| "loss": 0.4723, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.09718859889381704, |
| "grad_norm": 2.2340879440307617, |
| "learning_rate": 4.857773851590106e-05, |
| "loss": 0.5258, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.09807213161103355, |
| "grad_norm": 3.2808139324188232, |
| "learning_rate": 4.901943462897527e-05, |
| "loss": 0.4851, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.09895566432825008, |
| "grad_norm": 2.4828484058380127, |
| "learning_rate": 4.946113074204947e-05, |
| "loss": 0.5311, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.0998391970454666, |
| "grad_norm": 1.7307246923446655, |
| "learning_rate": 4.990282685512368e-05, |
| "loss": 0.411, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.10072272976268311, |
| "grad_norm": 1.9073278903961182, |
| "learning_rate": 4.996171290569595e-05, |
| "loss": 0.4184, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.10160626247989964, |
| "grad_norm": 1.8571208715438843, |
| "learning_rate": 4.9912626887357406e-05, |
| "loss": 0.4071, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.10248979519711615, |
| "grad_norm": 1.7524621486663818, |
| "learning_rate": 4.986354086901887e-05, |
| "loss": 0.4712, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.10337332791433267, |
| "grad_norm": 4.2943434715271, |
| "learning_rate": 4.9814454850680335e-05, |
| "loss": 0.4912, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.10425686063154918, |
| "grad_norm": 2.398043632507324, |
| "learning_rate": 4.97653688323418e-05, |
| "loss": 0.5589, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.10514039334876571, |
| "grad_norm": 1.9587973356246948, |
| "learning_rate": 4.9716282814003265e-05, |
| "loss": 0.4507, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.10602392606598222, |
| "grad_norm": 2.0629475116729736, |
| "learning_rate": 4.966719679566473e-05, |
| "loss": 0.5429, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.10690745878319874, |
| "grad_norm": 1.6127039194107056, |
| "learning_rate": 4.961811077732619e-05, |
| "loss": 0.3789, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.10779099150041525, |
| "grad_norm": 2.230015993118286, |
| "learning_rate": 4.956902475898765e-05, |
| "loss": 0.3949, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.10867452421763178, |
| "grad_norm": 1.9963310956954956, |
| "learning_rate": 4.9519938740649116e-05, |
| "loss": 0.4491, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.1095580569348483, |
| "grad_norm": 2.2731542587280273, |
| "learning_rate": 4.947085272231058e-05, |
| "loss": 0.435, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.11044158965206481, |
| "grad_norm": 2.447551727294922, |
| "learning_rate": 4.9421766703972046e-05, |
| "loss": 0.3865, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.11132512236928134, |
| "grad_norm": 2.126950740814209, |
| "learning_rate": 4.9372680685633504e-05, |
| "loss": 0.4175, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.11220865508649785, |
| "grad_norm": 2.22995924949646, |
| "learning_rate": 4.932359466729497e-05, |
| "loss": 0.4387, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.11309218780371437, |
| "grad_norm": 1.5801736116409302, |
| "learning_rate": 4.927450864895643e-05, |
| "loss": 0.4554, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.11397572052093088, |
| "grad_norm": 4.113645553588867, |
| "learning_rate": 4.92254226306179e-05, |
| "loss": 0.581, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.11485925323814741, |
| "grad_norm": 1.6027569770812988, |
| "learning_rate": 4.917633661227936e-05, |
| "loss": 0.4746, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.11574278595536393, |
| "grad_norm": 2.0555272102355957, |
| "learning_rate": 4.912725059394083e-05, |
| "loss": 0.4511, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.11662631867258044, |
| "grad_norm": 2.6827495098114014, |
| "learning_rate": 4.9078164575602285e-05, |
| "loss": 0.3871, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.11750985138979697, |
| "grad_norm": 1.969202995300293, |
| "learning_rate": 4.902907855726375e-05, |
| "loss": 0.449, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.11839338410701349, |
| "grad_norm": 1.9535086154937744, |
| "learning_rate": 4.8979992538925214e-05, |
| "loss": 0.3458, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.11927691682423, |
| "grad_norm": 1.7251821756362915, |
| "learning_rate": 4.893090652058668e-05, |
| "loss": 0.4791, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.12016044954144652, |
| "grad_norm": 1.7175688743591309, |
| "learning_rate": 4.8881820502248144e-05, |
| "loss": 0.4445, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.12104398225866304, |
| "grad_norm": 3.1055896282196045, |
| "learning_rate": 4.88327344839096e-05, |
| "loss": 0.4907, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.12192751497587956, |
| "grad_norm": 3.251380681991577, |
| "learning_rate": 4.8783648465571066e-05, |
| "loss": 0.5377, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.12281104769309607, |
| "grad_norm": 2.909510850906372, |
| "learning_rate": 4.873456244723254e-05, |
| "loss": 0.5275, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.12369458041031259, |
| "grad_norm": 2.0700035095214844, |
| "learning_rate": 4.8685476428893995e-05, |
| "loss": 0.5489, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.12457811312752912, |
| "grad_norm": 1.9759315252304077, |
| "learning_rate": 4.863639041055546e-05, |
| "loss": 0.3931, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.12546164584474562, |
| "grad_norm": 1.9036837816238403, |
| "learning_rate": 4.8587304392216925e-05, |
| "loss": 0.5155, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.12634517856196215, |
| "grad_norm": 3.4224536418914795, |
| "learning_rate": 4.853821837387838e-05, |
| "loss": 0.4282, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.12722871127917867, |
| "grad_norm": 3.1725916862487793, |
| "learning_rate": 4.8489132355539854e-05, |
| "loss": 0.4639, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.12811224399639518, |
| "grad_norm": 1.7154817581176758, |
| "learning_rate": 4.844004633720131e-05, |
| "loss": 0.5294, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.1289957767136117, |
| "grad_norm": 2.130659580230713, |
| "learning_rate": 4.839096031886278e-05, |
| "loss": 0.4121, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.12987930943082823, |
| "grad_norm": 1.8878060579299927, |
| "learning_rate": 4.834187430052424e-05, |
| "loss": 0.4139, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.13076284214804473, |
| "grad_norm": 1.9885565042495728, |
| "learning_rate": 4.82927882821857e-05, |
| "loss": 0.4311, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.13164637486526126, |
| "grad_norm": 2.3639650344848633, |
| "learning_rate": 4.824370226384717e-05, |
| "loss": 0.4025, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.1325299075824778, |
| "grad_norm": 3.4997270107269287, |
| "learning_rate": 4.8194616245508635e-05, |
| "loss": 0.4791, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.1334134402996943, |
| "grad_norm": 1.644084095954895, |
| "learning_rate": 4.814553022717009e-05, |
| "loss": 0.4498, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.13429697301691082, |
| "grad_norm": 1.8292336463928223, |
| "learning_rate": 4.809644420883156e-05, |
| "loss": 0.4538, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.13518050573412735, |
| "grad_norm": 3.380443572998047, |
| "learning_rate": 4.804735819049302e-05, |
| "loss": 0.4596, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.13606403845134385, |
| "grad_norm": 1.6248747110366821, |
| "learning_rate": 4.799827217215449e-05, |
| "loss": 0.3508, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.13694757116856038, |
| "grad_norm": 1.6644774675369263, |
| "learning_rate": 4.794918615381595e-05, |
| "loss": 0.5145, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.13783110388577688, |
| "grad_norm": 1.8441638946533203, |
| "learning_rate": 4.790010013547741e-05, |
| "loss": 0.3505, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.1387146366029934, |
| "grad_norm": 1.761982798576355, |
| "learning_rate": 4.7851014117138874e-05, |
| "loss": 0.3354, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.13959816932020994, |
| "grad_norm": 3.417602777481079, |
| "learning_rate": 4.780192809880034e-05, |
| "loss": 0.4474, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.14048170203742644, |
| "grad_norm": 1.7687017917633057, |
| "learning_rate": 4.7752842080461804e-05, |
| "loss": 0.3524, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.14136523475464297, |
| "grad_norm": 3.2442593574523926, |
| "learning_rate": 4.770375606212327e-05, |
| "loss": 0.4957, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.1422487674718595, |
| "grad_norm": 1.813818335533142, |
| "learning_rate": 4.765467004378473e-05, |
| "loss": 0.4461, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.143132300189076, |
| "grad_norm": 1.936123013496399, |
| "learning_rate": 4.760558402544619e-05, |
| "loss": 0.4983, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.14401583290629252, |
| "grad_norm": 2.0068929195404053, |
| "learning_rate": 4.7556498007107656e-05, |
| "loss": 0.4535, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.14489936562350905, |
| "grad_norm": 1.6743545532226562, |
| "learning_rate": 4.750741198876913e-05, |
| "loss": 0.3668, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.14578289834072555, |
| "grad_norm": 1.9963476657867432, |
| "learning_rate": 4.7458325970430585e-05, |
| "loss": 0.4688, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.14666643105794208, |
| "grad_norm": 1.7402074337005615, |
| "learning_rate": 4.740923995209205e-05, |
| "loss": 0.3967, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.14754996377515858, |
| "grad_norm": 2.0074145793914795, |
| "learning_rate": 4.736015393375351e-05, |
| "loss": 0.4911, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.1484334964923751, |
| "grad_norm": 1.7804876565933228, |
| "learning_rate": 4.731106791541497e-05, |
| "loss": 0.4076, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.14931702920959164, |
| "grad_norm": 2.1234054565429688, |
| "learning_rate": 4.7261981897076444e-05, |
| "loss": 0.398, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.15020056192680814, |
| "grad_norm": 2.1532113552093506, |
| "learning_rate": 4.72128958787379e-05, |
| "loss": 0.4203, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.15108409464402467, |
| "grad_norm": 1.8909550905227661, |
| "learning_rate": 4.7163809860399366e-05, |
| "loss": 0.414, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.1519676273612412, |
| "grad_norm": 1.9415462017059326, |
| "learning_rate": 4.711472384206083e-05, |
| "loss": 0.3436, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.1528511600784577, |
| "grad_norm": 2.2018544673919678, |
| "learning_rate": 4.706563782372229e-05, |
| "loss": 0.436, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.15373469279567423, |
| "grad_norm": 1.5418767929077148, |
| "learning_rate": 4.701655180538376e-05, |
| "loss": 0.3761, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.15461822551289076, |
| "grad_norm": 4.974616050720215, |
| "learning_rate": 4.6967465787045225e-05, |
| "loss": 0.5579, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.15550175823010726, |
| "grad_norm": 1.8653486967086792, |
| "learning_rate": 4.691837976870668e-05, |
| "loss": 0.441, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.15638529094732379, |
| "grad_norm": 2.2241523265838623, |
| "learning_rate": 4.686929375036815e-05, |
| "loss": 0.5877, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.15726882366454029, |
| "grad_norm": 1.8084393739700317, |
| "learning_rate": 4.6820207732029605e-05, |
| "loss": 0.4081, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.15815235638175681, |
| "grad_norm": 1.5464160442352295, |
| "learning_rate": 4.677112171369108e-05, |
| "loss": 0.4648, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.15903588909897334, |
| "grad_norm": 1.7731395959854126, |
| "learning_rate": 4.672203569535254e-05, |
| "loss": 0.4321, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.15991942181618984, |
| "grad_norm": 1.8130481243133545, |
| "learning_rate": 4.6672949677014e-05, |
| "loss": 0.4226, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.16080295453340637, |
| "grad_norm": 2.4127371311187744, |
| "learning_rate": 4.6623863658675464e-05, |
| "loss": 0.3634, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.1616864872506229, |
| "grad_norm": 2.362494707107544, |
| "learning_rate": 4.657477764033693e-05, |
| "loss": 0.4252, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.1625700199678394, |
| "grad_norm": 1.855000615119934, |
| "learning_rate": 4.6525691621998393e-05, |
| "loss": 0.3899, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.16345355268505593, |
| "grad_norm": 1.8728185892105103, |
| "learning_rate": 4.647660560365986e-05, |
| "loss": 0.4335, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.16433708540227246, |
| "grad_norm": 1.977250576019287, |
| "learning_rate": 4.642751958532132e-05, |
| "loss": 0.4204, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.16522061811948896, |
| "grad_norm": 4.992434978485107, |
| "learning_rate": 4.637843356698278e-05, |
| "loss": 0.5576, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.1661041508367055, |
| "grad_norm": 1.673086166381836, |
| "learning_rate": 4.6329347548644245e-05, |
| "loss": 0.4712, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.166987683553922, |
| "grad_norm": 1.8109374046325684, |
| "learning_rate": 4.628026153030571e-05, |
| "loss": 0.366, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.16787121627113852, |
| "grad_norm": 1.9352269172668457, |
| "learning_rate": 4.6231175511967175e-05, |
| "loss": 0.3932, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.16875474898835505, |
| "grad_norm": 1.7740451097488403, |
| "learning_rate": 4.618208949362864e-05, |
| "loss": 0.4836, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.16963828170557155, |
| "grad_norm": 2.0106916427612305, |
| "learning_rate": 4.61330034752901e-05, |
| "loss": 0.3989, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.17052181442278808, |
| "grad_norm": 1.5831292867660522, |
| "learning_rate": 4.608391745695156e-05, |
| "loss": 0.4025, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.1714053471400046, |
| "grad_norm": 5.1861371994018555, |
| "learning_rate": 4.6034831438613027e-05, |
| "loss": 0.467, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.1722888798572211, |
| "grad_norm": 3.7466721534729004, |
| "learning_rate": 4.598574542027449e-05, |
| "loss": 0.3558, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.17317241257443763, |
| "grad_norm": 2.143721342086792, |
| "learning_rate": 4.5936659401935956e-05, |
| "loss": 0.3623, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.17405594529165416, |
| "grad_norm": 2.1482434272766113, |
| "learning_rate": 4.588757338359742e-05, |
| "loss": 0.3438, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.17493947800887066, |
| "grad_norm": 1.458309531211853, |
| "learning_rate": 4.583848736525888e-05, |
| "loss": 0.4193, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.1758230107260872, |
| "grad_norm": 1.8698090314865112, |
| "learning_rate": 4.578940134692034e-05, |
| "loss": 0.3173, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.1767065434433037, |
| "grad_norm": 2.087970018386841, |
| "learning_rate": 4.574031532858181e-05, |
| "loss": 0.4569, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.17759007616052022, |
| "grad_norm": 1.6226812601089478, |
| "learning_rate": 4.569122931024327e-05, |
| "loss": 0.4538, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.17847360887773675, |
| "grad_norm": 1.9845385551452637, |
| "learning_rate": 4.564214329190474e-05, |
| "loss": 0.4422, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.17935714159495325, |
| "grad_norm": 1.7016047239303589, |
| "learning_rate": 4.5593057273566195e-05, |
| "loss": 0.3747, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.18024067431216978, |
| "grad_norm": 2.2167670726776123, |
| "learning_rate": 4.5543971255227666e-05, |
| "loss": 0.3989, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.1811242070293863, |
| "grad_norm": 1.464385747909546, |
| "learning_rate": 4.549488523688913e-05, |
| "loss": 0.5315, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.1820077397466028, |
| "grad_norm": 1.2073971033096313, |
| "learning_rate": 4.544579921855059e-05, |
| "loss": 0.3565, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.18289127246381934, |
| "grad_norm": 1.1773017644882202, |
| "learning_rate": 4.5396713200212054e-05, |
| "loss": 0.4409, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.18377480518103587, |
| "grad_norm": 2.4389290809631348, |
| "learning_rate": 4.534762718187352e-05, |
| "loss": 0.3762, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.18465833789825237, |
| "grad_norm": 3.560997247695923, |
| "learning_rate": 4.529854116353498e-05, |
| "loss": 0.4571, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.1855418706154689, |
| "grad_norm": 2.0075438022613525, |
| "learning_rate": 4.524945514519645e-05, |
| "loss": 0.3561, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.1864254033326854, |
| "grad_norm": 2.405439853668213, |
| "learning_rate": 4.5200369126857906e-05, |
| "loss": 0.4595, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.18730893604990193, |
| "grad_norm": 1.6211732625961304, |
| "learning_rate": 4.515128310851937e-05, |
| "loss": 0.4576, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.18819246876711845, |
| "grad_norm": 1.7272285223007202, |
| "learning_rate": 4.5102197090180835e-05, |
| "loss": 0.4957, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.18907600148433495, |
| "grad_norm": 1.529583215713501, |
| "learning_rate": 4.50531110718423e-05, |
| "loss": 0.3533, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.18995953420155148, |
| "grad_norm": 1.3267425298690796, |
| "learning_rate": 4.5004025053503764e-05, |
| "loss": 0.5213, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.190843066918768, |
| "grad_norm": 2.40889573097229, |
| "learning_rate": 4.495493903516523e-05, |
| "loss": 0.4372, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.1917265996359845, |
| "grad_norm": 2.532017230987549, |
| "learning_rate": 4.4906834737193457e-05, |
| "loss": 0.3286, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.19261013235320104, |
| "grad_norm": 3.721505641937256, |
| "learning_rate": 4.485774871885493e-05, |
| "loss": 0.4082, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.19349366507041757, |
| "grad_norm": 2.2368271350860596, |
| "learning_rate": 4.4808662700516386e-05, |
| "loss": 0.4056, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.19437719778763407, |
| "grad_norm": 2.2011897563934326, |
| "learning_rate": 4.475957668217785e-05, |
| "loss": 0.4435, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.1952607305048506, |
| "grad_norm": 2.1512463092803955, |
| "learning_rate": 4.4710490663839315e-05, |
| "loss": 0.4272, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.1961442632220671, |
| "grad_norm": 1.5526123046875, |
| "learning_rate": 4.466140464550077e-05, |
| "loss": 0.4334, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.19702779593928363, |
| "grad_norm": 1.4258567094802856, |
| "learning_rate": 4.4612318627162245e-05, |
| "loss": 0.4479, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.19791132865650016, |
| "grad_norm": 3.2408463954925537, |
| "learning_rate": 4.456323260882371e-05, |
| "loss": 0.3545, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.19879486137371666, |
| "grad_norm": 2.1903252601623535, |
| "learning_rate": 4.451414659048517e-05, |
| "loss": 0.3192, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.1996783940909332, |
| "grad_norm": 1.9699974060058594, |
| "learning_rate": 4.446506057214663e-05, |
| "loss": 0.3883, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.20056192680814972, |
| "grad_norm": 1.7133831977844238, |
| "learning_rate": 4.441597455380809e-05, |
| "loss": 0.3312, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.20144545952536622, |
| "grad_norm": 3.0174543857574463, |
| "learning_rate": 4.436688853546956e-05, |
| "loss": 0.4888, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.20232899224258274, |
| "grad_norm": 2.010566473007202, |
| "learning_rate": 4.4317802517131026e-05, |
| "loss": 0.5102, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.20321252495979927, |
| "grad_norm": 2.093271493911743, |
| "learning_rate": 4.4268716498792484e-05, |
| "loss": 0.4133, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.20409605767701577, |
| "grad_norm": 1.9231561422348022, |
| "learning_rate": 4.421963048045395e-05, |
| "loss": 0.4255, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.2049795903942323, |
| "grad_norm": 1.561781644821167, |
| "learning_rate": 4.417054446211541e-05, |
| "loss": 0.3766, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.2058631231114488, |
| "grad_norm": 2.006748676300049, |
| "learning_rate": 4.412145844377688e-05, |
| "loss": 0.3651, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.20674665582866533, |
| "grad_norm": 1.5192091464996338, |
| "learning_rate": 4.407237242543834e-05, |
| "loss": 0.4562, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.20763018854588186, |
| "grad_norm": 1.820331335067749, |
| "learning_rate": 4.402328640709981e-05, |
| "loss": 0.3946, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.20851372126309836, |
| "grad_norm": 3.302582025527954, |
| "learning_rate": 4.3974200388761265e-05, |
| "loss": 0.4075, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.2093972539803149, |
| "grad_norm": 2.601897716522217, |
| "learning_rate": 4.392511437042273e-05, |
| "loss": 0.4304, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.21028078669753142, |
| "grad_norm": 1.58085036277771, |
| "learning_rate": 4.3876028352084194e-05, |
| "loss": 0.3404, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.21116431941474792, |
| "grad_norm": 1.7569571733474731, |
| "learning_rate": 4.382694233374566e-05, |
| "loss": 0.4013, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.21204785213196445, |
| "grad_norm": 1.9872467517852783, |
| "learning_rate": 4.3777856315407124e-05, |
| "loss": 0.4278, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.21293138484918098, |
| "grad_norm": 1.4981114864349365, |
| "learning_rate": 4.372877029706858e-05, |
| "loss": 0.3905, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.21381491756639748, |
| "grad_norm": 1.6444882154464722, |
| "learning_rate": 4.3679684278730046e-05, |
| "loss": 0.4082, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.214698450283614, |
| "grad_norm": 1.9731707572937012, |
| "learning_rate": 4.363059826039151e-05, |
| "loss": 0.3855, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.2155819830008305, |
| "grad_norm": 2.66648268699646, |
| "learning_rate": 4.3581512242052976e-05, |
| "loss": 0.4567, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.21646551571804704, |
| "grad_norm": 2.0770373344421387, |
| "learning_rate": 4.353242622371444e-05, |
| "loss": 0.4368, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.21734904843526356, |
| "grad_norm": 1.4739536046981812, |
| "learning_rate": 4.3483340205375905e-05, |
| "loss": 0.3686, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.21823258115248007, |
| "grad_norm": 1.8857239484786987, |
| "learning_rate": 4.343425418703736e-05, |
| "loss": 0.4163, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.2191161138696966, |
| "grad_norm": 1.722424030303955, |
| "learning_rate": 4.3385168168698834e-05, |
| "loss": 0.3595, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.21999964658691312, |
| "grad_norm": 1.5602166652679443, |
| "learning_rate": 4.333608215036029e-05, |
| "loss": 0.3326, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.22088317930412962, |
| "grad_norm": 1.7230535745620728, |
| "learning_rate": 4.328699613202176e-05, |
| "loss": 0.3775, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.22176671202134615, |
| "grad_norm": 1.8666094541549683, |
| "learning_rate": 4.323791011368322e-05, |
| "loss": 0.3695, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.22265024473856268, |
| "grad_norm": 3.1689233779907227, |
| "learning_rate": 4.318882409534468e-05, |
| "loss": 0.3545, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.22353377745577918, |
| "grad_norm": 1.8885284662246704, |
| "learning_rate": 4.313973807700615e-05, |
| "loss": 0.3548, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.2244173101729957, |
| "grad_norm": 1.8508330583572388, |
| "learning_rate": 4.3090652058667615e-05, |
| "loss": 0.4847, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.22530084289021224, |
| "grad_norm": 2.1445882320404053, |
| "learning_rate": 4.304156604032907e-05, |
| "loss": 0.4, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.22618437560742874, |
| "grad_norm": 1.721024990081787, |
| "learning_rate": 4.299248002199054e-05, |
| "loss": 0.4755, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.22706790832464527, |
| "grad_norm": 1.7713844776153564, |
| "learning_rate": 4.2943394003652e-05, |
| "loss": 0.3399, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.22795144104186177, |
| "grad_norm": 1.2936394214630127, |
| "learning_rate": 4.289528970568024e-05, |
| "loss": 0.3297, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.2288349737590783, |
| "grad_norm": 1.6622658967971802, |
| "learning_rate": 4.28462036873417e-05, |
| "loss": 0.4071, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.22971850647629483, |
| "grad_norm": 1.3949196338653564, |
| "learning_rate": 4.279711766900316e-05, |
| "loss": 0.4069, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.23060203919351133, |
| "grad_norm": 1.8681453466415405, |
| "learning_rate": 4.2748031650664624e-05, |
| "loss": 0.5156, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.23148557191072786, |
| "grad_norm": 1.6242793798446655, |
| "learning_rate": 4.2698945632326096e-05, |
| "loss": 0.4359, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.23236910462794438, |
| "grad_norm": 2.897428035736084, |
| "learning_rate": 4.2649859613987554e-05, |
| "loss": 0.3702, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.23325263734516088, |
| "grad_norm": 1.855938196182251, |
| "learning_rate": 4.260077359564902e-05, |
| "loss": 0.5026, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.2341361700623774, |
| "grad_norm": 1.818076252937317, |
| "learning_rate": 4.2551687577310476e-05, |
| "loss": 0.5201, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.23501970277959394, |
| "grad_norm": 1.9688682556152344, |
| "learning_rate": 4.250260155897194e-05, |
| "loss": 0.3857, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.23590323549681044, |
| "grad_norm": 2.4908297061920166, |
| "learning_rate": 4.245351554063341e-05, |
| "loss": 0.3555, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.23678676821402697, |
| "grad_norm": 1.9015276432037354, |
| "learning_rate": 4.240442952229487e-05, |
| "loss": 0.381, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.23767030093124347, |
| "grad_norm": 3.011683225631714, |
| "learning_rate": 4.2355343503956335e-05, |
| "loss": 0.3804, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.23855383364846, |
| "grad_norm": 3.5077691078186035, |
| "learning_rate": 4.23062574856178e-05, |
| "loss": 0.3666, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.23943736636567653, |
| "grad_norm": 2.875953197479248, |
| "learning_rate": 4.225717146727926e-05, |
| "loss": 0.3792, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.24032089908289303, |
| "grad_norm": 2.3432717323303223, |
| "learning_rate": 4.220808544894073e-05, |
| "loss": 0.3341, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.24120443180010956, |
| "grad_norm": 1.6648529767990112, |
| "learning_rate": 4.2158999430602194e-05, |
| "loss": 0.4906, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.2420879645173261, |
| "grad_norm": 2.034646987915039, |
| "learning_rate": 4.210991341226365e-05, |
| "loss": 0.541, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.2429714972345426, |
| "grad_norm": 1.2273883819580078, |
| "learning_rate": 4.2060827393925116e-05, |
| "loss": 0.3936, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.24385502995175912, |
| "grad_norm": 1.6031947135925293, |
| "learning_rate": 4.201174137558658e-05, |
| "loss": 0.3871, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.24473856266897565, |
| "grad_norm": 1.7289350032806396, |
| "learning_rate": 4.1962655357248045e-05, |
| "loss": 0.2983, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.24562209538619215, |
| "grad_norm": 1.792413592338562, |
| "learning_rate": 4.191356933890951e-05, |
| "loss": 0.4071, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.24650562810340867, |
| "grad_norm": 1.5456571578979492, |
| "learning_rate": 4.186448332057097e-05, |
| "loss": 0.3434, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.24738916082062518, |
| "grad_norm": 1.9666177034378052, |
| "learning_rate": 4.181539730223243e-05, |
| "loss": 0.3885, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.2482726935378417, |
| "grad_norm": 2.5290989875793457, |
| "learning_rate": 4.17663112838939e-05, |
| "loss": 0.4296, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.24915622625505823, |
| "grad_norm": 1.9654839038848877, |
| "learning_rate": 4.171722526555536e-05, |
| "loss": 0.3853, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.25003975897227476, |
| "grad_norm": 1.68603515625, |
| "learning_rate": 4.166813924721683e-05, |
| "loss": 0.4068, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.25092329168949123, |
| "grad_norm": 1.9062405824661255, |
| "learning_rate": 4.161905322887829e-05, |
| "loss": 0.4071, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.25180682440670776, |
| "grad_norm": 1.7028473615646362, |
| "learning_rate": 4.156996721053975e-05, |
| "loss": 0.3588, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.2526903571239243, |
| "grad_norm": 1.6032434701919556, |
| "learning_rate": 4.1520881192201214e-05, |
| "loss": 0.4161, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.2535738898411408, |
| "grad_norm": 1.6103026866912842, |
| "learning_rate": 4.147179517386268e-05, |
| "loss": 0.3431, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.25445742255835735, |
| "grad_norm": 3.727078914642334, |
| "learning_rate": 4.142270915552414e-05, |
| "loss": 0.3576, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.2553409552755739, |
| "grad_norm": 1.3540493249893188, |
| "learning_rate": 4.137362313718561e-05, |
| "loss": 0.3563, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.25622448799279035, |
| "grad_norm": 1.7373064756393433, |
| "learning_rate": 4.1324537118847066e-05, |
| "loss": 0.3406, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.2571080207100069, |
| "grad_norm": 2.6311392784118652, |
| "learning_rate": 4.127545110050853e-05, |
| "loss": 0.4397, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.2579915534272234, |
| "grad_norm": 1.845186471939087, |
| "learning_rate": 4.122636508217e-05, |
| "loss": 0.411, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.25887508614443994, |
| "grad_norm": 1.5897334814071655, |
| "learning_rate": 4.117727906383146e-05, |
| "loss": 0.3742, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.25975861886165647, |
| "grad_norm": 3.667428970336914, |
| "learning_rate": 4.1128193045492924e-05, |
| "loss": 0.3622, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.26064215157887294, |
| "grad_norm": 1.7393996715545654, |
| "learning_rate": 4.107910702715439e-05, |
| "loss": 0.2782, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.26152568429608947, |
| "grad_norm": 1.6495802402496338, |
| "learning_rate": 4.103002100881585e-05, |
| "loss": 0.36, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.262409217013306, |
| "grad_norm": 1.5133942365646362, |
| "learning_rate": 4.098093499047732e-05, |
| "loss": 0.486, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.2632927497305225, |
| "grad_norm": 1.848177194595337, |
| "learning_rate": 4.0932830692505546e-05, |
| "loss": 0.406, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.26417628244773905, |
| "grad_norm": 3.320469379425049, |
| "learning_rate": 4.088374467416701e-05, |
| "loss": 0.357, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.2650598151649556, |
| "grad_norm": 1.417015790939331, |
| "learning_rate": 4.0834658655828475e-05, |
| "loss": 0.2855, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.26594334788217205, |
| "grad_norm": 1.8597488403320312, |
| "learning_rate": 4.078557263748994e-05, |
| "loss": 0.4424, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.2668268805993886, |
| "grad_norm": 1.651663899421692, |
| "learning_rate": 4.0736486619151405e-05, |
| "loss": 0.352, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.2677104133166051, |
| "grad_norm": 1.452006459236145, |
| "learning_rate": 4.068740060081286e-05, |
| "loss": 0.3638, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.26859394603382164, |
| "grad_norm": 2.7887187004089355, |
| "learning_rate": 4.063831458247433e-05, |
| "loss": 0.3727, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.26947747875103817, |
| "grad_norm": 1.9209206104278564, |
| "learning_rate": 4.058922856413579e-05, |
| "loss": 0.3842, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.2703610114682547, |
| "grad_norm": 1.946022868156433, |
| "learning_rate": 4.054014254579726e-05, |
| "loss": 0.3625, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.27124454418547117, |
| "grad_norm": 1.4893426895141602, |
| "learning_rate": 4.049105652745872e-05, |
| "loss": 0.4088, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.2721280769026877, |
| "grad_norm": 1.7391968965530396, |
| "learning_rate": 4.0441970509120186e-05, |
| "loss": 0.4126, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.2730116096199042, |
| "grad_norm": 1.7254865169525146, |
| "learning_rate": 4.0392884490781644e-05, |
| "loss": 0.4662, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.27389514233712076, |
| "grad_norm": 4.502954483032227, |
| "learning_rate": 4.034379847244311e-05, |
| "loss": 0.3889, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.2747786750543373, |
| "grad_norm": 2.4406206607818604, |
| "learning_rate": 4.029471245410458e-05, |
| "loss": 0.3618, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.27566220777155376, |
| "grad_norm": 1.6272777318954468, |
| "learning_rate": 4.024562643576604e-05, |
| "loss": 0.4126, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.2765457404887703, |
| "grad_norm": 1.5262032747268677, |
| "learning_rate": 4.01965404174275e-05, |
| "loss": 0.3771, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.2774292732059868, |
| "grad_norm": 1.8245854377746582, |
| "learning_rate": 4.014745439908896e-05, |
| "loss": 0.4377, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.27831280592320334, |
| "grad_norm": 2.8566267490386963, |
| "learning_rate": 4.0098368380750425e-05, |
| "loss": 0.4041, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.27919633864041987, |
| "grad_norm": 2.0167641639709473, |
| "learning_rate": 4.00492823624119e-05, |
| "loss": 0.375, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.2800798713576364, |
| "grad_norm": 1.9363830089569092, |
| "learning_rate": 4.0000196344073355e-05, |
| "loss": 0.3339, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.2809634040748529, |
| "grad_norm": 2.208641767501831, |
| "learning_rate": 3.995111032573482e-05, |
| "loss": 0.348, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.2818469367920694, |
| "grad_norm": 1.5789657831192017, |
| "learning_rate": 3.9902024307396284e-05, |
| "loss": 0.367, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.28273046950928593, |
| "grad_norm": 1.6666336059570312, |
| "learning_rate": 3.985293828905775e-05, |
| "loss": 0.3427, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.28361400222650246, |
| "grad_norm": 3.725020170211792, |
| "learning_rate": 3.980385227071921e-05, |
| "loss": 0.3637, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.284497534943719, |
| "grad_norm": 1.5958735942840576, |
| "learning_rate": 3.975476625238068e-05, |
| "loss": 0.3489, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.28538106766093546, |
| "grad_norm": 1.3779951333999634, |
| "learning_rate": 3.9705680234042136e-05, |
| "loss": 0.4209, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.286264600378152, |
| "grad_norm": 1.6636724472045898, |
| "learning_rate": 3.96565942157036e-05, |
| "loss": 0.2984, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.2871481330953685, |
| "grad_norm": 1.705592155456543, |
| "learning_rate": 3.9607508197365065e-05, |
| "loss": 0.3877, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.28803166581258505, |
| "grad_norm": 1.5367944240570068, |
| "learning_rate": 3.955842217902653e-05, |
| "loss": 0.3508, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.2889151985298016, |
| "grad_norm": 3.140960693359375, |
| "learning_rate": 3.9509336160687994e-05, |
| "loss": 0.3443, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.2897987312470181, |
| "grad_norm": 1.2341272830963135, |
| "learning_rate": 3.946025014234945e-05, |
| "loss": 0.4346, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.2906822639642346, |
| "grad_norm": 1.9500783681869507, |
| "learning_rate": 3.941116412401092e-05, |
| "loss": 0.4262, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.2915657966814511, |
| "grad_norm": 1.344519853591919, |
| "learning_rate": 3.936207810567238e-05, |
| "loss": 0.3065, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.29244932939866763, |
| "grad_norm": 1.4747456312179565, |
| "learning_rate": 3.9312992087333846e-05, |
| "loss": 0.4003, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.29333286211588416, |
| "grad_norm": 1.5639158487319946, |
| "learning_rate": 3.926390606899531e-05, |
| "loss": 0.5295, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.2942163948331007, |
| "grad_norm": 1.9425716400146484, |
| "learning_rate": 3.9214820050656776e-05, |
| "loss": 0.3582, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.29509992755031716, |
| "grad_norm": 3.003871440887451, |
| "learning_rate": 3.9165734032318234e-05, |
| "loss": 0.3299, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.2959834602675337, |
| "grad_norm": 3.689194679260254, |
| "learning_rate": 3.91166480139797e-05, |
| "loss": 0.3493, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.2968669929847502, |
| "grad_norm": 1.9439842700958252, |
| "learning_rate": 3.906756199564116e-05, |
| "loss": 0.2752, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.29775052570196675, |
| "grad_norm": 1.8846018314361572, |
| "learning_rate": 3.901847597730263e-05, |
| "loss": 0.3254, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.2986340584191833, |
| "grad_norm": 2.9167964458465576, |
| "learning_rate": 3.896938995896409e-05, |
| "loss": 0.3352, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.2995175911363998, |
| "grad_norm": 2.6470940113067627, |
| "learning_rate": 3.892128566099233e-05, |
| "loss": 0.3812, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.3004011238536163, |
| "grad_norm": 2.1021623611450195, |
| "learning_rate": 3.887219964265379e-05, |
| "loss": 0.3332, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.3012846565708328, |
| "grad_norm": 1.9923433065414429, |
| "learning_rate": 3.882311362431525e-05, |
| "loss": 0.3472, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.30216818928804934, |
| "grad_norm": 1.5736125707626343, |
| "learning_rate": 3.8774027605976714e-05, |
| "loss": 0.4207, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.30305172200526587, |
| "grad_norm": 2.2181496620178223, |
| "learning_rate": 3.872494158763818e-05, |
| "loss": 0.3849, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.3039352547224824, |
| "grad_norm": 1.5112169981002808, |
| "learning_rate": 3.867585556929964e-05, |
| "loss": 0.3272, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.30481878743969887, |
| "grad_norm": 1.5218919515609741, |
| "learning_rate": 3.862676955096111e-05, |
| "loss": 0.3037, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.3057023201569154, |
| "grad_norm": 1.5864076614379883, |
| "learning_rate": 3.857768353262257e-05, |
| "loss": 0.2924, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.3065858528741319, |
| "grad_norm": 1.8895894289016724, |
| "learning_rate": 3.852859751428403e-05, |
| "loss": 0.4029, |
| "step": 17350 |
| }, |
| { |
| "epoch": 0.30746938559134845, |
| "grad_norm": 1.4156498908996582, |
| "learning_rate": 3.8479511495945495e-05, |
| "loss": 0.5016, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.308352918308565, |
| "grad_norm": 1.4788236618041992, |
| "learning_rate": 3.843042547760696e-05, |
| "loss": 0.3648, |
| "step": 17450 |
| }, |
| { |
| "epoch": 0.3092364510257815, |
| "grad_norm": 1.7631937265396118, |
| "learning_rate": 3.8381339459268424e-05, |
| "loss": 0.3045, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.310119983742998, |
| "grad_norm": 1.9122941493988037, |
| "learning_rate": 3.833225344092989e-05, |
| "loss": 0.3271, |
| "step": 17550 |
| }, |
| { |
| "epoch": 0.3110035164602145, |
| "grad_norm": 1.6838266849517822, |
| "learning_rate": 3.828316742259135e-05, |
| "loss": 0.519, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.31188704917743104, |
| "grad_norm": 4.507582187652588, |
| "learning_rate": 3.823408140425281e-05, |
| "loss": 0.341, |
| "step": 17650 |
| }, |
| { |
| "epoch": 0.31277058189464757, |
| "grad_norm": 1.3272327184677124, |
| "learning_rate": 3.8184995385914276e-05, |
| "loss": 0.3352, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.3136541146118641, |
| "grad_norm": 2.516676664352417, |
| "learning_rate": 3.813590936757574e-05, |
| "loss": 0.4406, |
| "step": 17750 |
| }, |
| { |
| "epoch": 0.31453764732908057, |
| "grad_norm": 1.8230887651443481, |
| "learning_rate": 3.8086823349237206e-05, |
| "loss": 0.3822, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.3154211800462971, |
| "grad_norm": 1.5267698764801025, |
| "learning_rate": 3.803773733089867e-05, |
| "loss": 0.287, |
| "step": 17850 |
| }, |
| { |
| "epoch": 0.31630471276351363, |
| "grad_norm": 2.647895574569702, |
| "learning_rate": 3.798865131256013e-05, |
| "loss": 0.4349, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.31718824548073016, |
| "grad_norm": 1.5159648656845093, |
| "learning_rate": 3.793956529422159e-05, |
| "loss": 0.3633, |
| "step": 17950 |
| }, |
| { |
| "epoch": 0.3180717781979467, |
| "grad_norm": 1.9135470390319824, |
| "learning_rate": 3.7890479275883064e-05, |
| "loss": 0.3431, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.3189553109151632, |
| "grad_norm": 1.6438477039337158, |
| "learning_rate": 3.784139325754452e-05, |
| "loss": 0.3986, |
| "step": 18050 |
| }, |
| { |
| "epoch": 0.3198388436323797, |
| "grad_norm": 1.6794339418411255, |
| "learning_rate": 3.779230723920599e-05, |
| "loss": 0.3279, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.3207223763495962, |
| "grad_norm": 1.5067431926727295, |
| "learning_rate": 3.7743221220867445e-05, |
| "loss": 0.3062, |
| "step": 18150 |
| }, |
| { |
| "epoch": 0.32160590906681275, |
| "grad_norm": 1.6953719854354858, |
| "learning_rate": 3.7694135202528916e-05, |
| "loss": 0.2973, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.3224894417840293, |
| "grad_norm": 2.819748640060425, |
| "learning_rate": 3.764504918419038e-05, |
| "loss": 0.4078, |
| "step": 18250 |
| }, |
| { |
| "epoch": 0.3233729745012458, |
| "grad_norm": 1.5743447542190552, |
| "learning_rate": 3.759596316585184e-05, |
| "loss": 0.31, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.3242565072184623, |
| "grad_norm": 1.8966853618621826, |
| "learning_rate": 3.7546877147513303e-05, |
| "loss": 0.306, |
| "step": 18350 |
| }, |
| { |
| "epoch": 0.3251400399356788, |
| "grad_norm": 2.7652056217193604, |
| "learning_rate": 3.749779112917477e-05, |
| "loss": 0.3426, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.32602357265289533, |
| "grad_norm": 3.006504535675049, |
| "learning_rate": 3.744870511083623e-05, |
| "loss": 0.2807, |
| "step": 18450 |
| }, |
| { |
| "epoch": 0.32690710537011186, |
| "grad_norm": 1.5666753053665161, |
| "learning_rate": 3.73996190924977e-05, |
| "loss": 0.3856, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.3277906380873284, |
| "grad_norm": 1.9692752361297607, |
| "learning_rate": 3.735053307415916e-05, |
| "loss": 0.3575, |
| "step": 18550 |
| }, |
| { |
| "epoch": 0.3286741708045449, |
| "grad_norm": 3.517622232437134, |
| "learning_rate": 3.730144705582062e-05, |
| "loss": 0.347, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.3295577035217614, |
| "grad_norm": 1.8076531887054443, |
| "learning_rate": 3.7252361037482085e-05, |
| "loss": 0.3195, |
| "step": 18650 |
| }, |
| { |
| "epoch": 0.3304412362389779, |
| "grad_norm": 1.8082791566848755, |
| "learning_rate": 3.720327501914355e-05, |
| "loss": 0.3543, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.33132476895619445, |
| "grad_norm": 1.3712306022644043, |
| "learning_rate": 3.7154189000805014e-05, |
| "loss": 0.3642, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.332208301673411, |
| "grad_norm": 1.5654476881027222, |
| "learning_rate": 3.710510298246648e-05, |
| "loss": 0.3415, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.3330918343906275, |
| "grad_norm": 1.4388914108276367, |
| "learning_rate": 3.7056016964127937e-05, |
| "loss": 0.3069, |
| "step": 18850 |
| }, |
| { |
| "epoch": 0.333975367107844, |
| "grad_norm": 1.5527664422988892, |
| "learning_rate": 3.70069309457894e-05, |
| "loss": 0.2962, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.3348588998250605, |
| "grad_norm": 1.6680736541748047, |
| "learning_rate": 3.6957844927450866e-05, |
| "loss": 0.3156, |
| "step": 18950 |
| }, |
| { |
| "epoch": 0.33574243254227704, |
| "grad_norm": 2.266108274459839, |
| "learning_rate": 3.69097406294791e-05, |
| "loss": 0.3791, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.33662596525949356, |
| "grad_norm": 1.4146838188171387, |
| "learning_rate": 3.6860654611140565e-05, |
| "loss": 0.3287, |
| "step": 19050 |
| }, |
| { |
| "epoch": 0.3375094979767101, |
| "grad_norm": 1.640153169631958, |
| "learning_rate": 3.681156859280202e-05, |
| "loss": 0.4034, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.3383930306939266, |
| "grad_norm": 1.670589804649353, |
| "learning_rate": 3.6762482574463494e-05, |
| "loss": 0.3476, |
| "step": 19150 |
| }, |
| { |
| "epoch": 0.3392765634111431, |
| "grad_norm": 3.375941753387451, |
| "learning_rate": 3.671339655612496e-05, |
| "loss": 0.363, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.3401600961283596, |
| "grad_norm": 1.965834379196167, |
| "learning_rate": 3.666431053778642e-05, |
| "loss": 0.3182, |
| "step": 19250 |
| }, |
| { |
| "epoch": 0.34104362884557615, |
| "grad_norm": 1.607900857925415, |
| "learning_rate": 3.661522451944788e-05, |
| "loss": 0.3238, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.3419271615627927, |
| "grad_norm": 1.4051165580749512, |
| "learning_rate": 3.6566138501109346e-05, |
| "loss": 0.3043, |
| "step": 19350 |
| }, |
| { |
| "epoch": 0.3428106942800092, |
| "grad_norm": 1.4679523706436157, |
| "learning_rate": 3.651705248277081e-05, |
| "loss": 0.3902, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.3436942269972257, |
| "grad_norm": 1.5135536193847656, |
| "learning_rate": 3.6467966464432276e-05, |
| "loss": 0.3085, |
| "step": 19450 |
| }, |
| { |
| "epoch": 0.3445777597144422, |
| "grad_norm": 2.2533581256866455, |
| "learning_rate": 3.6418880446093734e-05, |
| "loss": 0.3162, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.34546129243165874, |
| "grad_norm": 1.625067949295044, |
| "learning_rate": 3.63697944277552e-05, |
| "loss": 0.345, |
| "step": 19550 |
| }, |
| { |
| "epoch": 0.34634482514887527, |
| "grad_norm": 1.1573612689971924, |
| "learning_rate": 3.632070840941666e-05, |
| "loss": 0.3017, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.3472283578660918, |
| "grad_norm": 3.46663498878479, |
| "learning_rate": 3.627162239107813e-05, |
| "loss": 0.4232, |
| "step": 19650 |
| }, |
| { |
| "epoch": 0.3481118905833083, |
| "grad_norm": 1.5614382028579712, |
| "learning_rate": 3.622253637273959e-05, |
| "loss": 0.3363, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.3489954233005248, |
| "grad_norm": 1.3841484785079956, |
| "learning_rate": 3.617345035440106e-05, |
| "loss": 0.3484, |
| "step": 19750 |
| }, |
| { |
| "epoch": 0.3498789560177413, |
| "grad_norm": 1.941517949104309, |
| "learning_rate": 3.6124364336062515e-05, |
| "loss": 0.3719, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.35076248873495786, |
| "grad_norm": 4.908963680267334, |
| "learning_rate": 3.607527831772398e-05, |
| "loss": 0.3226, |
| "step": 19850 |
| }, |
| { |
| "epoch": 0.3516460214521744, |
| "grad_norm": 1.5221627950668335, |
| "learning_rate": 3.6026192299385444e-05, |
| "loss": 0.3636, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.3525295541693909, |
| "grad_norm": 1.8089814186096191, |
| "learning_rate": 3.597710628104691e-05, |
| "loss": 0.3704, |
| "step": 19950 |
| }, |
| { |
| "epoch": 0.3534130868866074, |
| "grad_norm": 2.786560535430908, |
| "learning_rate": 3.5928020262708373e-05, |
| "loss": 0.3459, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.3542966196038239, |
| "grad_norm": 2.97851824760437, |
| "learning_rate": 3.587893424436983e-05, |
| "loss": 0.3226, |
| "step": 20050 |
| }, |
| { |
| "epoch": 0.35518015232104044, |
| "grad_norm": 2.1979775428771973, |
| "learning_rate": 3.5829848226031296e-05, |
| "loss": 0.3256, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.35606368503825697, |
| "grad_norm": 1.762453556060791, |
| "learning_rate": 3.578076220769276e-05, |
| "loss": 0.3179, |
| "step": 20150 |
| }, |
| { |
| "epoch": 0.3569472177554735, |
| "grad_norm": 1.4908533096313477, |
| "learning_rate": 3.5731676189354225e-05, |
| "loss": 0.4226, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.35783075047269003, |
| "grad_norm": 1.3192092180252075, |
| "learning_rate": 3.568259017101569e-05, |
| "loss": 0.4196, |
| "step": 20250 |
| }, |
| { |
| "epoch": 0.3587142831899065, |
| "grad_norm": 1.421736717224121, |
| "learning_rate": 3.5633504152677155e-05, |
| "loss": 0.3618, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.35959781590712303, |
| "grad_norm": 2.0631330013275146, |
| "learning_rate": 3.558441813433861e-05, |
| "loss": 0.4093, |
| "step": 20350 |
| }, |
| { |
| "epoch": 0.36048134862433956, |
| "grad_norm": 1.6250920295715332, |
| "learning_rate": 3.5535332116000084e-05, |
| "loss": 0.3051, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.3613648813415561, |
| "grad_norm": 1.4659417867660522, |
| "learning_rate": 3.548624609766155e-05, |
| "loss": 0.3379, |
| "step": 20450 |
| }, |
| { |
| "epoch": 0.3622484140587726, |
| "grad_norm": 1.520573616027832, |
| "learning_rate": 3.5437160079323007e-05, |
| "loss": 0.3582, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.3631319467759891, |
| "grad_norm": 2.158830165863037, |
| "learning_rate": 3.538807406098447e-05, |
| "loss": 0.4004, |
| "step": 20550 |
| }, |
| { |
| "epoch": 0.3640154794932056, |
| "grad_norm": 1.7503968477249146, |
| "learning_rate": 3.533898804264593e-05, |
| "loss": 0.33, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.36489901221042215, |
| "grad_norm": 1.5064153671264648, |
| "learning_rate": 3.52899020243074e-05, |
| "loss": 0.3072, |
| "step": 20650 |
| }, |
| { |
| "epoch": 0.3657825449276387, |
| "grad_norm": 3.5023598670959473, |
| "learning_rate": 3.5240816005968865e-05, |
| "loss": 0.35, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.3666660776448552, |
| "grad_norm": 1.7911083698272705, |
| "learning_rate": 3.519172998763032e-05, |
| "loss": 0.3241, |
| "step": 20750 |
| }, |
| { |
| "epoch": 0.36754961036207173, |
| "grad_norm": 1.50026273727417, |
| "learning_rate": 3.514264396929179e-05, |
| "loss": 0.37, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.3684331430792882, |
| "grad_norm": 1.5556259155273438, |
| "learning_rate": 3.509355795095325e-05, |
| "loss": 0.2689, |
| "step": 20850 |
| }, |
| { |
| "epoch": 0.36931667579650473, |
| "grad_norm": 1.6530933380126953, |
| "learning_rate": 3.504447193261472e-05, |
| "loss": 0.4061, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.37020020851372126, |
| "grad_norm": 1.250317931175232, |
| "learning_rate": 3.499538591427618e-05, |
| "loss": 0.3412, |
| "step": 20950 |
| }, |
| { |
| "epoch": 0.3710837412309378, |
| "grad_norm": 1.9599151611328125, |
| "learning_rate": 3.494728161630441e-05, |
| "loss": 0.3619, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.3719672739481543, |
| "grad_norm": 1.3728086948394775, |
| "learning_rate": 3.4898195597965874e-05, |
| "loss": 0.314, |
| "step": 21050 |
| }, |
| { |
| "epoch": 0.3728508066653708, |
| "grad_norm": 1.6389710903167725, |
| "learning_rate": 3.4849109579627346e-05, |
| "loss": 0.2912, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.3737343393825873, |
| "grad_norm": 3.552582025527954, |
| "learning_rate": 3.4800023561288803e-05, |
| "loss": 0.3402, |
| "step": 21150 |
| }, |
| { |
| "epoch": 0.37461787209980385, |
| "grad_norm": 1.6479156017303467, |
| "learning_rate": 3.475093754295027e-05, |
| "loss": 0.3462, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.3755014048170204, |
| "grad_norm": 1.593705415725708, |
| "learning_rate": 3.470185152461173e-05, |
| "loss": 0.2775, |
| "step": 21250 |
| }, |
| { |
| "epoch": 0.3763849375342369, |
| "grad_norm": 2.1807069778442383, |
| "learning_rate": 3.465276550627319e-05, |
| "loss": 0.3825, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.37726847025145344, |
| "grad_norm": 1.6359409093856812, |
| "learning_rate": 3.460367948793466e-05, |
| "loss": 0.3931, |
| "step": 21350 |
| }, |
| { |
| "epoch": 0.3781520029686699, |
| "grad_norm": 1.5960018634796143, |
| "learning_rate": 3.455459346959612e-05, |
| "loss": 0.4059, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.37903553568588644, |
| "grad_norm": 3.367835283279419, |
| "learning_rate": 3.4505507451257585e-05, |
| "loss": 0.3264, |
| "step": 21450 |
| }, |
| { |
| "epoch": 0.37991906840310297, |
| "grad_norm": 1.5965161323547363, |
| "learning_rate": 3.445642143291905e-05, |
| "loss": 0.2605, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.3808026011203195, |
| "grad_norm": 1.5011396408081055, |
| "learning_rate": 3.440733541458051e-05, |
| "loss": 0.3658, |
| "step": 21550 |
| }, |
| { |
| "epoch": 0.381686133837536, |
| "grad_norm": 1.5021259784698486, |
| "learning_rate": 3.435824939624198e-05, |
| "loss": 0.3274, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.3825696665547525, |
| "grad_norm": 1.5224860906600952, |
| "learning_rate": 3.430916337790344e-05, |
| "loss": 0.3094, |
| "step": 21650 |
| }, |
| { |
| "epoch": 0.383453199271969, |
| "grad_norm": 3.36433482170105, |
| "learning_rate": 3.42600773595649e-05, |
| "loss": 0.3556, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.38433673198918555, |
| "grad_norm": 1.9824773073196411, |
| "learning_rate": 3.4210991341226366e-05, |
| "loss": 0.2877, |
| "step": 21750 |
| }, |
| { |
| "epoch": 0.3852202647064021, |
| "grad_norm": 1.5103614330291748, |
| "learning_rate": 3.416190532288783e-05, |
| "loss": 0.3203, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.3861037974236186, |
| "grad_norm": 1.1625959873199463, |
| "learning_rate": 3.4112819304549295e-05, |
| "loss": 0.2553, |
| "step": 21850 |
| }, |
| { |
| "epoch": 0.38698733014083514, |
| "grad_norm": 1.5695985555648804, |
| "learning_rate": 3.406373328621076e-05, |
| "loss": 0.4425, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.3878708628580516, |
| "grad_norm": 1.6758594512939453, |
| "learning_rate": 3.401464726787222e-05, |
| "loss": 0.3249, |
| "step": 21950 |
| }, |
| { |
| "epoch": 0.38875439557526814, |
| "grad_norm": 3.6129748821258545, |
| "learning_rate": 3.396556124953368e-05, |
| "loss": 0.3649, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.38963792829248467, |
| "grad_norm": 1.6155461072921753, |
| "learning_rate": 3.391647523119515e-05, |
| "loss": 0.3621, |
| "step": 22050 |
| }, |
| { |
| "epoch": 0.3905214610097012, |
| "grad_norm": 1.7477047443389893, |
| "learning_rate": 3.386738921285661e-05, |
| "loss": 0.4232, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.3914049937269177, |
| "grad_norm": 3.0512797832489014, |
| "learning_rate": 3.3818303194518076e-05, |
| "loss": 0.266, |
| "step": 22150 |
| }, |
| { |
| "epoch": 0.3922885264441342, |
| "grad_norm": 1.4074236154556274, |
| "learning_rate": 3.376921717617954e-05, |
| "loss": 0.3767, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.39317205916135073, |
| "grad_norm": 1.7168455123901367, |
| "learning_rate": 3.3720131157841e-05, |
| "loss": 0.366, |
| "step": 22250 |
| }, |
| { |
| "epoch": 0.39405559187856726, |
| "grad_norm": 3.360104560852051, |
| "learning_rate": 3.3671045139502464e-05, |
| "loss": 0.3211, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.3949391245957838, |
| "grad_norm": 1.527031660079956, |
| "learning_rate": 3.3621959121163935e-05, |
| "loss": 0.2505, |
| "step": 22350 |
| }, |
| { |
| "epoch": 0.3958226573130003, |
| "grad_norm": 1.7586029767990112, |
| "learning_rate": 3.357287310282539e-05, |
| "loss": 0.3824, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.39670619003021684, |
| "grad_norm": 2.3490004539489746, |
| "learning_rate": 3.352378708448686e-05, |
| "loss": 0.331, |
| "step": 22450 |
| }, |
| { |
| "epoch": 0.3975897227474333, |
| "grad_norm": 1.5686146020889282, |
| "learning_rate": 3.3474701066148316e-05, |
| "loss": 0.3136, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.39847325546464984, |
| "grad_norm": 1.5068285465240479, |
| "learning_rate": 3.342561504780978e-05, |
| "loss": 0.297, |
| "step": 22550 |
| }, |
| { |
| "epoch": 0.3993567881818664, |
| "grad_norm": 1.81602942943573, |
| "learning_rate": 3.337652902947125e-05, |
| "loss": 0.2933, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.4002403208990829, |
| "grad_norm": 3.4516189098358154, |
| "learning_rate": 3.332744301113271e-05, |
| "loss": 0.4026, |
| "step": 22650 |
| }, |
| { |
| "epoch": 0.40112385361629943, |
| "grad_norm": 1.5759230852127075, |
| "learning_rate": 3.3278356992794174e-05, |
| "loss": 0.3567, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.4020073863335159, |
| "grad_norm": 1.9385254383087158, |
| "learning_rate": 3.322927097445564e-05, |
| "loss": 0.3711, |
| "step": 22750 |
| }, |
| { |
| "epoch": 0.40289091905073243, |
| "grad_norm": 1.6334116458892822, |
| "learning_rate": 3.31801849561171e-05, |
| "loss": 0.378, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.40377445176794896, |
| "grad_norm": 2.0981173515319824, |
| "learning_rate": 3.313109893777857e-05, |
| "loss": 0.355, |
| "step": 22850 |
| }, |
| { |
| "epoch": 0.4046579844851655, |
| "grad_norm": 1.6996448040008545, |
| "learning_rate": 3.308201291944003e-05, |
| "loss": 0.3044, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.405541517202382, |
| "grad_norm": 1.3511463403701782, |
| "learning_rate": 3.303292690110149e-05, |
| "loss": 0.357, |
| "step": 22950 |
| }, |
| { |
| "epoch": 0.40642504991959855, |
| "grad_norm": 1.7596737146377563, |
| "learning_rate": 3.2983840882762956e-05, |
| "loss": 0.3616, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.407308582636815, |
| "grad_norm": 2.8382747173309326, |
| "learning_rate": 3.2934754864424413e-05, |
| "loss": 0.3139, |
| "step": 23050 |
| }, |
| { |
| "epoch": 0.40819211535403155, |
| "grad_norm": 3.052281618118286, |
| "learning_rate": 3.2885668846085885e-05, |
| "loss": 0.3474, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.4090756480712481, |
| "grad_norm": 1.373552680015564, |
| "learning_rate": 3.283756454811412e-05, |
| "loss": 0.3208, |
| "step": 23150 |
| }, |
| { |
| "epoch": 0.4099591807884646, |
| "grad_norm": 1.6797386407852173, |
| "learning_rate": 3.278847852977558e-05, |
| "loss": 0.3798, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.41084271350568113, |
| "grad_norm": 1.8930203914642334, |
| "learning_rate": 3.273939251143704e-05, |
| "loss": 0.3282, |
| "step": 23250 |
| }, |
| { |
| "epoch": 0.4117262462228976, |
| "grad_norm": 1.256135106086731, |
| "learning_rate": 3.2690306493098507e-05, |
| "loss": 0.3302, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.41260977894011414, |
| "grad_norm": 1.952988862991333, |
| "learning_rate": 3.264122047475997e-05, |
| "loss": 0.3599, |
| "step": 23350 |
| }, |
| { |
| "epoch": 0.41349331165733066, |
| "grad_norm": 1.3686082363128662, |
| "learning_rate": 3.2592134456421436e-05, |
| "loss": 0.3608, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.4143768443745472, |
| "grad_norm": 1.56107759475708, |
| "learning_rate": 3.2543048438082894e-05, |
| "loss": 0.3387, |
| "step": 23450 |
| }, |
| { |
| "epoch": 0.4152603770917637, |
| "grad_norm": 1.823240876197815, |
| "learning_rate": 3.249396241974436e-05, |
| "loss": 0.3987, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.41614390980898025, |
| "grad_norm": 1.2912514209747314, |
| "learning_rate": 3.244487640140583e-05, |
| "loss": 0.3387, |
| "step": 23550 |
| }, |
| { |
| "epoch": 0.4170274425261967, |
| "grad_norm": 1.5520604848861694, |
| "learning_rate": 3.239579038306729e-05, |
| "loss": 0.2989, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.41791097524341325, |
| "grad_norm": 1.4236600399017334, |
| "learning_rate": 3.234670436472875e-05, |
| "loss": 0.2629, |
| "step": 23650 |
| }, |
| { |
| "epoch": 0.4187945079606298, |
| "grad_norm": 3.2101380825042725, |
| "learning_rate": 3.229761834639022e-05, |
| "loss": 0.2905, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.4196780406778463, |
| "grad_norm": 1.3380919694900513, |
| "learning_rate": 3.2248532328051675e-05, |
| "loss": 0.3234, |
| "step": 23750 |
| }, |
| { |
| "epoch": 0.42056157339506284, |
| "grad_norm": 1.5015414953231812, |
| "learning_rate": 3.2199446309713146e-05, |
| "loss": 0.3063, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.4214451061122793, |
| "grad_norm": 1.289444923400879, |
| "learning_rate": 3.2150360291374604e-05, |
| "loss": 0.3386, |
| "step": 23850 |
| }, |
| { |
| "epoch": 0.42232863882949584, |
| "grad_norm": 2.95922589302063, |
| "learning_rate": 3.210127427303607e-05, |
| "loss": 0.3431, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.42321217154671237, |
| "grad_norm": 1.6753530502319336, |
| "learning_rate": 3.2052188254697534e-05, |
| "loss": 0.2902, |
| "step": 23950 |
| }, |
| { |
| "epoch": 0.4240957042639289, |
| "grad_norm": 1.6901003122329712, |
| "learning_rate": 3.2003102236359e-05, |
| "loss": 0.3136, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.4249792369811454, |
| "grad_norm": 4.797271251678467, |
| "learning_rate": 3.195401621802046e-05, |
| "loss": 0.4001, |
| "step": 24050 |
| }, |
| { |
| "epoch": 0.42586276969836195, |
| "grad_norm": 1.4796360731124878, |
| "learning_rate": 3.190493019968193e-05, |
| "loss": 0.285, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.4267463024155784, |
| "grad_norm": 1.4410722255706787, |
| "learning_rate": 3.1855844181343386e-05, |
| "loss": 0.4717, |
| "step": 24150 |
| }, |
| { |
| "epoch": 0.42762983513279496, |
| "grad_norm": 1.398037075996399, |
| "learning_rate": 3.180675816300485e-05, |
| "loss": 0.3391, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.4285133678500115, |
| "grad_norm": 1.3054397106170654, |
| "learning_rate": 3.1757672144666315e-05, |
| "loss": 0.2913, |
| "step": 24250 |
| }, |
| { |
| "epoch": 0.429396900567228, |
| "grad_norm": 1.7768748998641968, |
| "learning_rate": 3.170858612632778e-05, |
| "loss": 0.3417, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.43028043328444454, |
| "grad_norm": 1.2682479619979858, |
| "learning_rate": 3.1659500107989244e-05, |
| "loss": 0.2909, |
| "step": 24350 |
| }, |
| { |
| "epoch": 0.431163966001661, |
| "grad_norm": 1.791175365447998, |
| "learning_rate": 3.16104140896507e-05, |
| "loss": 0.2871, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.43204749871887754, |
| "grad_norm": 1.5249110460281372, |
| "learning_rate": 3.156132807131217e-05, |
| "loss": 0.3929, |
| "step": 24450 |
| }, |
| { |
| "epoch": 0.43293103143609407, |
| "grad_norm": 1.2778598070144653, |
| "learning_rate": 3.151224205297363e-05, |
| "loss": 0.278, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.4338145641533106, |
| "grad_norm": 3.55033278465271, |
| "learning_rate": 3.1463156034635096e-05, |
| "loss": 0.4386, |
| "step": 24550 |
| }, |
| { |
| "epoch": 0.43469809687052713, |
| "grad_norm": 1.4700381755828857, |
| "learning_rate": 3.141407001629656e-05, |
| "loss": 0.4193, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.43558162958774366, |
| "grad_norm": 1.150854468345642, |
| "learning_rate": 3.1364983997958025e-05, |
| "loss": 0.367, |
| "step": 24650 |
| }, |
| { |
| "epoch": 0.43646516230496013, |
| "grad_norm": 1.6972355842590332, |
| "learning_rate": 3.131589797961948e-05, |
| "loss": 0.3474, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.43734869502217666, |
| "grad_norm": 1.355474829673767, |
| "learning_rate": 3.126681196128095e-05, |
| "loss": 0.3116, |
| "step": 24750 |
| }, |
| { |
| "epoch": 0.4382322277393932, |
| "grad_norm": 1.4246526956558228, |
| "learning_rate": 3.121772594294242e-05, |
| "loss": 0.2733, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.4391157604566097, |
| "grad_norm": 1.5642348527908325, |
| "learning_rate": 3.116863992460388e-05, |
| "loss": 0.3046, |
| "step": 24850 |
| }, |
| { |
| "epoch": 0.43999929317382624, |
| "grad_norm": 1.5843394994735718, |
| "learning_rate": 3.111955390626534e-05, |
| "loss": 0.3627, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.4408828258910427, |
| "grad_norm": 1.6260349750518799, |
| "learning_rate": 3.10704678879268e-05, |
| "loss": 0.3403, |
| "step": 24950 |
| }, |
| { |
| "epoch": 0.44176635860825925, |
| "grad_norm": 1.7742459774017334, |
| "learning_rate": 3.1021381869588265e-05, |
| "loss": 0.349, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.4426498913254758, |
| "grad_norm": 1.4080630540847778, |
| "learning_rate": 3.0972295851249736e-05, |
| "loss": 0.3527, |
| "step": 25050 |
| }, |
| { |
| "epoch": 0.4435334240426923, |
| "grad_norm": 1.7197438478469849, |
| "learning_rate": 3.0923209832911194e-05, |
| "loss": 0.3773, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.44441695675990883, |
| "grad_norm": 1.5831055641174316, |
| "learning_rate": 3.087510553493943e-05, |
| "loss": 0.3372, |
| "step": 25150 |
| }, |
| { |
| "epoch": 0.44530048947712536, |
| "grad_norm": 1.7535090446472168, |
| "learning_rate": 3.082601951660089e-05, |
| "loss": 0.3178, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.44618402219434183, |
| "grad_norm": 1.6131466627120972, |
| "learning_rate": 3.077693349826236e-05, |
| "loss": 0.2745, |
| "step": 25250 |
| }, |
| { |
| "epoch": 0.44706755491155836, |
| "grad_norm": 1.5419201850891113, |
| "learning_rate": 3.072784747992382e-05, |
| "loss": 0.2773, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.4479510876287749, |
| "grad_norm": 1.6418931484222412, |
| "learning_rate": 3.067876146158528e-05, |
| "loss": 0.3822, |
| "step": 25350 |
| }, |
| { |
| "epoch": 0.4488346203459914, |
| "grad_norm": 1.288121223449707, |
| "learning_rate": 3.0629675443246745e-05, |
| "loss": 0.3851, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.44971815306320795, |
| "grad_norm": 1.9523035287857056, |
| "learning_rate": 3.058058942490821e-05, |
| "loss": 0.3805, |
| "step": 25450 |
| }, |
| { |
| "epoch": 0.4506016857804245, |
| "grad_norm": 3.3735404014587402, |
| "learning_rate": 3.0531503406569674e-05, |
| "loss": 0.3245, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.45148521849764095, |
| "grad_norm": 1.4013001918792725, |
| "learning_rate": 3.048241738823114e-05, |
| "loss": 0.2978, |
| "step": 25550 |
| }, |
| { |
| "epoch": 0.4523687512148575, |
| "grad_norm": 1.9055225849151611, |
| "learning_rate": 3.0433331369892604e-05, |
| "loss": 0.3397, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.453252283932074, |
| "grad_norm": 3.319705009460449, |
| "learning_rate": 3.0384245351554065e-05, |
| "loss": 0.4655, |
| "step": 25650 |
| }, |
| { |
| "epoch": 0.45413581664929054, |
| "grad_norm": 1.3729950189590454, |
| "learning_rate": 3.033515933321553e-05, |
| "loss": 0.2669, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.45501934936650706, |
| "grad_norm": 1.3527820110321045, |
| "learning_rate": 3.028607331487699e-05, |
| "loss": 0.3316, |
| "step": 25750 |
| }, |
| { |
| "epoch": 0.45590288208372354, |
| "grad_norm": 1.4500503540039062, |
| "learning_rate": 3.0236987296538455e-05, |
| "loss": 0.3395, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.45678641480094007, |
| "grad_norm": 2.8250796794891357, |
| "learning_rate": 3.018790127819992e-05, |
| "loss": 0.3631, |
| "step": 25850 |
| }, |
| { |
| "epoch": 0.4576699475181566, |
| "grad_norm": 1.1532173156738281, |
| "learning_rate": 3.013881525986138e-05, |
| "loss": 0.3418, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.4585534802353731, |
| "grad_norm": 1.687465786933899, |
| "learning_rate": 3.0089729241522846e-05, |
| "loss": 0.3351, |
| "step": 25950 |
| }, |
| { |
| "epoch": 0.45943701295258965, |
| "grad_norm": 4.05789852142334, |
| "learning_rate": 3.004064322318431e-05, |
| "loss": 0.3117, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.4603205456698062, |
| "grad_norm": 1.4303230047225952, |
| "learning_rate": 2.9991557204845772e-05, |
| "loss": 0.3197, |
| "step": 26050 |
| }, |
| { |
| "epoch": 0.46120407838702265, |
| "grad_norm": 3.692739248275757, |
| "learning_rate": 2.9942471186507237e-05, |
| "loss": 0.2856, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.4620876111042392, |
| "grad_norm": 2.6494288444519043, |
| "learning_rate": 2.98933851681687e-05, |
| "loss": 0.3668, |
| "step": 26150 |
| }, |
| { |
| "epoch": 0.4629711438214557, |
| "grad_norm": 1.832560420036316, |
| "learning_rate": 2.9844299149830163e-05, |
| "loss": 0.4672, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.46385467653867224, |
| "grad_norm": 3.4169373512268066, |
| "learning_rate": 2.9795213131491627e-05, |
| "loss": 0.373, |
| "step": 26250 |
| }, |
| { |
| "epoch": 0.46473820925588877, |
| "grad_norm": 1.5430257320404053, |
| "learning_rate": 2.974612711315309e-05, |
| "loss": 0.3232, |
| "step": 26300 |
| }, |
| { |
| "epoch": 0.46562174197310524, |
| "grad_norm": 1.674177646636963, |
| "learning_rate": 2.9697041094814553e-05, |
| "loss": 0.3461, |
| "step": 26350 |
| }, |
| { |
| "epoch": 0.46650527469032177, |
| "grad_norm": 1.7116457223892212, |
| "learning_rate": 2.9647955076476018e-05, |
| "loss": 0.2937, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.4673888074075383, |
| "grad_norm": 1.3711694478988647, |
| "learning_rate": 2.9599850778504252e-05, |
| "loss": 0.3511, |
| "step": 26450 |
| }, |
| { |
| "epoch": 0.4682723401247548, |
| "grad_norm": 3.0807628631591797, |
| "learning_rate": 2.9550764760165717e-05, |
| "loss": 0.3204, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.46915587284197136, |
| "grad_norm": 1.5949090719223022, |
| "learning_rate": 2.950167874182718e-05, |
| "loss": 0.2698, |
| "step": 26550 |
| }, |
| { |
| "epoch": 0.4700394055591879, |
| "grad_norm": 1.6748404502868652, |
| "learning_rate": 2.9452592723488643e-05, |
| "loss": 0.3019, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.47092293827640436, |
| "grad_norm": 1.6362017393112183, |
| "learning_rate": 2.9403506705150108e-05, |
| "loss": 0.276, |
| "step": 26650 |
| }, |
| { |
| "epoch": 0.4718064709936209, |
| "grad_norm": 1.5143210887908936, |
| "learning_rate": 2.935442068681157e-05, |
| "loss": 0.2572, |
| "step": 26700 |
| }, |
| { |
| "epoch": 0.4726900037108374, |
| "grad_norm": 2.1000730991363525, |
| "learning_rate": 2.9305334668473034e-05, |
| "loss": 0.2821, |
| "step": 26750 |
| }, |
| { |
| "epoch": 0.47357353642805394, |
| "grad_norm": 1.9400396347045898, |
| "learning_rate": 2.9256248650134498e-05, |
| "loss": 0.2753, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.47445706914527047, |
| "grad_norm": 1.7398908138275146, |
| "learning_rate": 2.920716263179596e-05, |
| "loss": 0.2789, |
| "step": 26850 |
| }, |
| { |
| "epoch": 0.47534060186248694, |
| "grad_norm": 1.456929087638855, |
| "learning_rate": 2.9158076613457424e-05, |
| "loss": 0.5175, |
| "step": 26900 |
| }, |
| { |
| "epoch": 0.4762241345797035, |
| "grad_norm": 1.4763001203536987, |
| "learning_rate": 2.910899059511889e-05, |
| "loss": 0.3398, |
| "step": 26950 |
| }, |
| { |
| "epoch": 0.47710766729692, |
| "grad_norm": 1.3316082954406738, |
| "learning_rate": 2.905990457678035e-05, |
| "loss": 0.3683, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.47799120001413653, |
| "grad_norm": 1.1095103025436401, |
| "learning_rate": 2.9010818558441815e-05, |
| "loss": 0.341, |
| "step": 27050 |
| }, |
| { |
| "epoch": 0.47887473273135306, |
| "grad_norm": 1.5168321132659912, |
| "learning_rate": 2.8961732540103276e-05, |
| "loss": 0.2753, |
| "step": 27100 |
| }, |
| { |
| "epoch": 0.4797582654485696, |
| "grad_norm": 1.9980124235153198, |
| "learning_rate": 2.891264652176474e-05, |
| "loss": 0.35, |
| "step": 27150 |
| }, |
| { |
| "epoch": 0.48064179816578606, |
| "grad_norm": 1.6252918243408203, |
| "learning_rate": 2.8863560503426205e-05, |
| "loss": 0.3143, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.4815253308830026, |
| "grad_norm": 1.6409038305282593, |
| "learning_rate": 2.8814474485087667e-05, |
| "loss": 0.3968, |
| "step": 27250 |
| }, |
| { |
| "epoch": 0.4824088636002191, |
| "grad_norm": 1.4830607175827026, |
| "learning_rate": 2.876538846674913e-05, |
| "loss": 0.3246, |
| "step": 27300 |
| }, |
| { |
| "epoch": 0.48329239631743565, |
| "grad_norm": 1.6359367370605469, |
| "learning_rate": 2.87163024484106e-05, |
| "loss": 0.3131, |
| "step": 27350 |
| }, |
| { |
| "epoch": 0.4841759290346522, |
| "grad_norm": 1.1834681034088135, |
| "learning_rate": 2.8667216430072057e-05, |
| "loss": 0.3078, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.48505946175186865, |
| "grad_norm": 1.3667497634887695, |
| "learning_rate": 2.8618130411733522e-05, |
| "loss": 0.419, |
| "step": 27450 |
| }, |
| { |
| "epoch": 0.4859429944690852, |
| "grad_norm": 4.66032075881958, |
| "learning_rate": 2.856904439339499e-05, |
| "loss": 0.3959, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.4868265271863017, |
| "grad_norm": 1.530393362045288, |
| "learning_rate": 2.8519958375056448e-05, |
| "loss": 0.3754, |
| "step": 27550 |
| }, |
| { |
| "epoch": 0.48771005990351823, |
| "grad_norm": 0.9399372935295105, |
| "learning_rate": 2.8470872356717916e-05, |
| "loss": 0.3163, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.48859359262073476, |
| "grad_norm": 1.654520869255066, |
| "learning_rate": 2.8421786338379374e-05, |
| "loss": 0.35, |
| "step": 27650 |
| }, |
| { |
| "epoch": 0.4894771253379513, |
| "grad_norm": 1.5777958631515503, |
| "learning_rate": 2.8372700320040842e-05, |
| "loss": 0.3397, |
| "step": 27700 |
| }, |
| { |
| "epoch": 0.49036065805516776, |
| "grad_norm": 1.4474226236343384, |
| "learning_rate": 2.8323614301702307e-05, |
| "loss": 0.3853, |
| "step": 27750 |
| }, |
| { |
| "epoch": 0.4912441907723843, |
| "grad_norm": 1.603667140007019, |
| "learning_rate": 2.8274528283363765e-05, |
| "loss": 0.2568, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.4921277234896008, |
| "grad_norm": 1.727280855178833, |
| "learning_rate": 2.8225442265025233e-05, |
| "loss": 0.3108, |
| "step": 27850 |
| }, |
| { |
| "epoch": 0.49301125620681735, |
| "grad_norm": 1.4632737636566162, |
| "learning_rate": 2.8176356246686697e-05, |
| "loss": 0.4098, |
| "step": 27900 |
| }, |
| { |
| "epoch": 0.4938947889240339, |
| "grad_norm": 1.5443991422653198, |
| "learning_rate": 2.812727022834816e-05, |
| "loss": 0.3364, |
| "step": 27950 |
| }, |
| { |
| "epoch": 0.49477832164125035, |
| "grad_norm": 1.7304097414016724, |
| "learning_rate": 2.8078184210009623e-05, |
| "loss": 0.3354, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.4956618543584669, |
| "grad_norm": 1.141662359237671, |
| "learning_rate": 2.8029098191671088e-05, |
| "loss": 0.2879, |
| "step": 28050 |
| }, |
| { |
| "epoch": 0.4965453870756834, |
| "grad_norm": 1.5769354104995728, |
| "learning_rate": 2.798001217333255e-05, |
| "loss": 0.3604, |
| "step": 28100 |
| }, |
| { |
| "epoch": 0.49742891979289994, |
| "grad_norm": 2.3104453086853027, |
| "learning_rate": 2.7930926154994014e-05, |
| "loss": 0.2612, |
| "step": 28150 |
| }, |
| { |
| "epoch": 0.49831245251011647, |
| "grad_norm": 0.764305830001831, |
| "learning_rate": 2.7881840136655475e-05, |
| "loss": 0.3593, |
| "step": 28200 |
| }, |
| { |
| "epoch": 0.499195985227333, |
| "grad_norm": 1.1693766117095947, |
| "learning_rate": 2.783275411831694e-05, |
| "loss": 0.2961, |
| "step": 28250 |
| }, |
| { |
| "epoch": 0.5000795179445495, |
| "grad_norm": 1.65450918674469, |
| "learning_rate": 2.7783668099978404e-05, |
| "loss": 0.3338, |
| "step": 28300 |
| }, |
| { |
| "epoch": 0.500963050661766, |
| "grad_norm": 1.438693642616272, |
| "learning_rate": 2.7734582081639866e-05, |
| "loss": 0.3109, |
| "step": 28350 |
| }, |
| { |
| "epoch": 0.5018465833789825, |
| "grad_norm": 1.5170999765396118, |
| "learning_rate": 2.768549606330133e-05, |
| "loss": 0.3234, |
| "step": 28400 |
| }, |
| { |
| "epoch": 0.502730116096199, |
| "grad_norm": 1.497454285621643, |
| "learning_rate": 2.7636410044962795e-05, |
| "loss": 0.3257, |
| "step": 28450 |
| }, |
| { |
| "epoch": 0.5036136488134155, |
| "grad_norm": 3.3886194229125977, |
| "learning_rate": 2.7587324026624256e-05, |
| "loss": 0.4675, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.504497181530632, |
| "grad_norm": 1.6604270935058594, |
| "learning_rate": 2.753823800828572e-05, |
| "loss": 0.4318, |
| "step": 28550 |
| }, |
| { |
| "epoch": 0.5053807142478486, |
| "grad_norm": 1.7005223035812378, |
| "learning_rate": 2.7489151989947186e-05, |
| "loss": 0.3594, |
| "step": 28600 |
| }, |
| { |
| "epoch": 0.5062642469650651, |
| "grad_norm": 1.109703540802002, |
| "learning_rate": 2.7440065971608647e-05, |
| "loss": 0.3214, |
| "step": 28650 |
| }, |
| { |
| "epoch": 0.5071477796822816, |
| "grad_norm": 1.9164469242095947, |
| "learning_rate": 2.739097995327011e-05, |
| "loss": 0.2856, |
| "step": 28700 |
| }, |
| { |
| "epoch": 0.5080313123994982, |
| "grad_norm": 1.3944114446640015, |
| "learning_rate": 2.7341893934931573e-05, |
| "loss": 0.3094, |
| "step": 28750 |
| }, |
| { |
| "epoch": 0.5089148451167147, |
| "grad_norm": 1.3844256401062012, |
| "learning_rate": 2.7292807916593038e-05, |
| "loss": 0.3933, |
| "step": 28800 |
| }, |
| { |
| "epoch": 0.5097983778339312, |
| "grad_norm": 3.18278431892395, |
| "learning_rate": 2.7243721898254506e-05, |
| "loss": 0.3432, |
| "step": 28850 |
| }, |
| { |
| "epoch": 0.5106819105511478, |
| "grad_norm": 1.7024506330490112, |
| "learning_rate": 2.7194635879915964e-05, |
| "loss": 0.3766, |
| "step": 28900 |
| }, |
| { |
| "epoch": 0.5115654432683642, |
| "grad_norm": 1.4224214553833008, |
| "learning_rate": 2.7145549861577428e-05, |
| "loss": 0.3308, |
| "step": 28950 |
| }, |
| { |
| "epoch": 0.5124489759855807, |
| "grad_norm": 1.5428136587142944, |
| "learning_rate": 2.7096463843238896e-05, |
| "loss": 0.3453, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.5133325087027972, |
| "grad_norm": 1.4710556268692017, |
| "learning_rate": 2.7047377824900354e-05, |
| "loss": 0.2904, |
| "step": 29050 |
| }, |
| { |
| "epoch": 0.5142160414200138, |
| "grad_norm": 1.5080032348632812, |
| "learning_rate": 2.6998291806561822e-05, |
| "loss": 0.2647, |
| "step": 29100 |
| }, |
| { |
| "epoch": 0.5150995741372303, |
| "grad_norm": 1.7176605463027954, |
| "learning_rate": 2.6949205788223287e-05, |
| "loss": 0.4395, |
| "step": 29150 |
| }, |
| { |
| "epoch": 0.5159831068544468, |
| "grad_norm": 1.4339267015457153, |
| "learning_rate": 2.6900119769884745e-05, |
| "loss": 0.295, |
| "step": 29200 |
| }, |
| { |
| "epoch": 0.5168666395716633, |
| "grad_norm": 1.1258848905563354, |
| "learning_rate": 2.6851033751546213e-05, |
| "loss": 0.3927, |
| "step": 29250 |
| }, |
| { |
| "epoch": 0.5177501722888799, |
| "grad_norm": 2.5667836666107178, |
| "learning_rate": 2.680194773320767e-05, |
| "loss": 0.3492, |
| "step": 29300 |
| }, |
| { |
| "epoch": 0.5186337050060964, |
| "grad_norm": 1.7218468189239502, |
| "learning_rate": 2.675286171486914e-05, |
| "loss": 0.3304, |
| "step": 29350 |
| }, |
| { |
| "epoch": 0.5195172377233129, |
| "grad_norm": 2.4908971786499023, |
| "learning_rate": 2.6703775696530603e-05, |
| "loss": 0.3557, |
| "step": 29400 |
| }, |
| { |
| "epoch": 0.5204007704405295, |
| "grad_norm": 1.787463665008545, |
| "learning_rate": 2.665468967819206e-05, |
| "loss": 0.3389, |
| "step": 29450 |
| }, |
| { |
| "epoch": 0.5212843031577459, |
| "grad_norm": 3.174107789993286, |
| "learning_rate": 2.660560365985353e-05, |
| "loss": 0.3322, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.5221678358749624, |
| "grad_norm": 1.648913025856018, |
| "learning_rate": 2.6556517641514994e-05, |
| "loss": 0.3053, |
| "step": 29550 |
| }, |
| { |
| "epoch": 0.5230513685921789, |
| "grad_norm": 1.648561954498291, |
| "learning_rate": 2.6507431623176455e-05, |
| "loss": 0.2486, |
| "step": 29600 |
| }, |
| { |
| "epoch": 0.5239349013093955, |
| "grad_norm": 1.199449062347412, |
| "learning_rate": 2.645834560483792e-05, |
| "loss": 0.282, |
| "step": 29650 |
| }, |
| { |
| "epoch": 0.524818434026612, |
| "grad_norm": 0.9432544112205505, |
| "learning_rate": 2.6409259586499385e-05, |
| "loss": 0.3791, |
| "step": 29700 |
| }, |
| { |
| "epoch": 0.5257019667438285, |
| "grad_norm": 2.9582953453063965, |
| "learning_rate": 2.6360173568160846e-05, |
| "loss": 0.3346, |
| "step": 29750 |
| }, |
| { |
| "epoch": 0.526585499461045, |
| "grad_norm": 1.5263501405715942, |
| "learning_rate": 2.631108754982231e-05, |
| "loss": 0.2743, |
| "step": 29800 |
| }, |
| { |
| "epoch": 0.5274690321782616, |
| "grad_norm": 1.63582181930542, |
| "learning_rate": 2.6262001531483772e-05, |
| "loss": 0.2927, |
| "step": 29850 |
| }, |
| { |
| "epoch": 0.5283525648954781, |
| "grad_norm": 1.843386173248291, |
| "learning_rate": 2.6212915513145237e-05, |
| "loss": 0.3775, |
| "step": 29900 |
| }, |
| { |
| "epoch": 0.5292360976126946, |
| "grad_norm": 1.236327886581421, |
| "learning_rate": 2.61638294948067e-05, |
| "loss": 0.3114, |
| "step": 29950 |
| }, |
| { |
| "epoch": 0.5301196303299112, |
| "grad_norm": 1.5327879190444946, |
| "learning_rate": 2.6114743476468162e-05, |
| "loss": 0.2383, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.5310031630471276, |
| "grad_norm": 1.6281217336654663, |
| "learning_rate": 2.6065657458129627e-05, |
| "loss": 0.3798, |
| "step": 30050 |
| }, |
| { |
| "epoch": 0.5318866957643441, |
| "grad_norm": 1.1688692569732666, |
| "learning_rate": 2.6016571439791092e-05, |
| "loss": 0.3204, |
| "step": 30100 |
| }, |
| { |
| "epoch": 0.5327702284815606, |
| "grad_norm": 1.354048490524292, |
| "learning_rate": 2.5967485421452553e-05, |
| "loss": 0.2496, |
| "step": 30150 |
| }, |
| { |
| "epoch": 0.5336537611987772, |
| "grad_norm": 2.8124821186065674, |
| "learning_rate": 2.5918399403114018e-05, |
| "loss": 0.4147, |
| "step": 30200 |
| }, |
| { |
| "epoch": 0.5345372939159937, |
| "grad_norm": 1.886425495147705, |
| "learning_rate": 2.5869313384775486e-05, |
| "loss": 0.3021, |
| "step": 30250 |
| }, |
| { |
| "epoch": 0.5354208266332102, |
| "grad_norm": 1.6316314935684204, |
| "learning_rate": 2.5820227366436944e-05, |
| "loss": 0.2758, |
| "step": 30300 |
| }, |
| { |
| "epoch": 0.5363043593504268, |
| "grad_norm": 1.3990044593811035, |
| "learning_rate": 2.577114134809841e-05, |
| "loss": 0.3166, |
| "step": 30350 |
| }, |
| { |
| "epoch": 0.5371878920676433, |
| "grad_norm": 2.1562857627868652, |
| "learning_rate": 2.572205532975987e-05, |
| "loss": 0.35, |
| "step": 30400 |
| }, |
| { |
| "epoch": 0.5380714247848598, |
| "grad_norm": 1.1287676095962524, |
| "learning_rate": 2.5672969311421334e-05, |
| "loss": 0.3391, |
| "step": 30450 |
| }, |
| { |
| "epoch": 0.5389549575020763, |
| "grad_norm": 1.7524675130844116, |
| "learning_rate": 2.5623883293082802e-05, |
| "loss": 0.3576, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.5398384902192929, |
| "grad_norm": 1.1238594055175781, |
| "learning_rate": 2.5575778995111033e-05, |
| "loss": 0.295, |
| "step": 30550 |
| }, |
| { |
| "epoch": 0.5407220229365094, |
| "grad_norm": 0.9298042058944702, |
| "learning_rate": 2.5526692976772498e-05, |
| "loss": 0.3449, |
| "step": 30600 |
| }, |
| { |
| "epoch": 0.5416055556537258, |
| "grad_norm": 1.5093685388565063, |
| "learning_rate": 2.547760695843396e-05, |
| "loss": 0.3274, |
| "step": 30650 |
| }, |
| { |
| "epoch": 0.5424890883709423, |
| "grad_norm": 1.4606502056121826, |
| "learning_rate": 2.5428520940095424e-05, |
| "loss": 0.3094, |
| "step": 30700 |
| }, |
| { |
| "epoch": 0.5433726210881589, |
| "grad_norm": 1.7957881689071655, |
| "learning_rate": 2.537943492175689e-05, |
| "loss": 0.3077, |
| "step": 30750 |
| }, |
| { |
| "epoch": 0.5442561538053754, |
| "grad_norm": 1.4665497541427612, |
| "learning_rate": 2.533034890341835e-05, |
| "loss": 0.3505, |
| "step": 30800 |
| }, |
| { |
| "epoch": 0.5451396865225919, |
| "grad_norm": 1.785367488861084, |
| "learning_rate": 2.5281262885079815e-05, |
| "loss": 0.3485, |
| "step": 30850 |
| }, |
| { |
| "epoch": 0.5460232192398085, |
| "grad_norm": 4.639885425567627, |
| "learning_rate": 2.523217686674128e-05, |
| "loss": 0.331, |
| "step": 30900 |
| }, |
| { |
| "epoch": 0.546906751957025, |
| "grad_norm": 1.308772325515747, |
| "learning_rate": 2.518309084840274e-05, |
| "loss": 0.2846, |
| "step": 30950 |
| }, |
| { |
| "epoch": 0.5477902846742415, |
| "grad_norm": 1.3961265087127686, |
| "learning_rate": 2.5134004830064205e-05, |
| "loss": 0.3647, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.548673817391458, |
| "grad_norm": 1.0688265562057495, |
| "learning_rate": 2.5084918811725673e-05, |
| "loss": 0.3475, |
| "step": 31050 |
| }, |
| { |
| "epoch": 0.5495573501086746, |
| "grad_norm": 1.7052621841430664, |
| "learning_rate": 2.503583279338713e-05, |
| "loss": 0.2833, |
| "step": 31100 |
| }, |
| { |
| "epoch": 0.5504408828258911, |
| "grad_norm": 1.5378305912017822, |
| "learning_rate": 2.4986746775048596e-05, |
| "loss": 0.3, |
| "step": 31150 |
| }, |
| { |
| "epoch": 0.5513244155431075, |
| "grad_norm": 3.8670883178710938, |
| "learning_rate": 2.493766075671006e-05, |
| "loss": 0.3568, |
| "step": 31200 |
| }, |
| { |
| "epoch": 0.552207948260324, |
| "grad_norm": 1.8015788793563843, |
| "learning_rate": 2.4888574738371522e-05, |
| "loss": 0.3268, |
| "step": 31250 |
| }, |
| { |
| "epoch": 0.5530914809775406, |
| "grad_norm": 2.7606303691864014, |
| "learning_rate": 2.4839488720032987e-05, |
| "loss": 0.4005, |
| "step": 31300 |
| }, |
| { |
| "epoch": 0.5539750136947571, |
| "grad_norm": 1.3418834209442139, |
| "learning_rate": 2.479040270169445e-05, |
| "loss": 0.2993, |
| "step": 31350 |
| }, |
| { |
| "epoch": 0.5548585464119736, |
| "grad_norm": 1.3790879249572754, |
| "learning_rate": 2.4741316683355912e-05, |
| "loss": 0.3463, |
| "step": 31400 |
| }, |
| { |
| "epoch": 0.5557420791291902, |
| "grad_norm": 1.5994555950164795, |
| "learning_rate": 2.4692230665017377e-05, |
| "loss": 0.3654, |
| "step": 31450 |
| }, |
| { |
| "epoch": 0.5566256118464067, |
| "grad_norm": 1.528947114944458, |
| "learning_rate": 2.4643144646678842e-05, |
| "loss": 0.3329, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.5575091445636232, |
| "grad_norm": 1.4391777515411377, |
| "learning_rate": 2.4594058628340306e-05, |
| "loss": 0.2794, |
| "step": 31550 |
| }, |
| { |
| "epoch": 0.5583926772808397, |
| "grad_norm": 4.419312953948975, |
| "learning_rate": 2.4544972610001768e-05, |
| "loss": 0.4189, |
| "step": 31600 |
| }, |
| { |
| "epoch": 0.5592762099980563, |
| "grad_norm": 1.5030118227005005, |
| "learning_rate": 2.4495886591663232e-05, |
| "loss": 0.3643, |
| "step": 31650 |
| }, |
| { |
| "epoch": 0.5601597427152728, |
| "grad_norm": 1.3483951091766357, |
| "learning_rate": 2.4446800573324697e-05, |
| "loss": 0.3578, |
| "step": 31700 |
| }, |
| { |
| "epoch": 0.5610432754324892, |
| "grad_norm": 1.5314035415649414, |
| "learning_rate": 2.439771455498616e-05, |
| "loss": 0.3193, |
| "step": 31750 |
| }, |
| { |
| "epoch": 0.5619268081497057, |
| "grad_norm": 1.1020389795303345, |
| "learning_rate": 2.4348628536647623e-05, |
| "loss": 0.327, |
| "step": 31800 |
| }, |
| { |
| "epoch": 0.5628103408669223, |
| "grad_norm": 1.445654034614563, |
| "learning_rate": 2.4299542518309084e-05, |
| "loss": 0.3429, |
| "step": 31850 |
| }, |
| { |
| "epoch": 0.5636938735841388, |
| "grad_norm": 1.3795325756072998, |
| "learning_rate": 2.425045649997055e-05, |
| "loss": 0.2994, |
| "step": 31900 |
| }, |
| { |
| "epoch": 0.5645774063013553, |
| "grad_norm": 1.7217411994934082, |
| "learning_rate": 2.4201370481632014e-05, |
| "loss": 0.3219, |
| "step": 31950 |
| }, |
| { |
| "epoch": 0.5654609390185719, |
| "grad_norm": 1.3482351303100586, |
| "learning_rate": 2.4152284463293475e-05, |
| "loss": 0.2902, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.5663444717357884, |
| "grad_norm": 2.785452365875244, |
| "learning_rate": 2.4103198444954943e-05, |
| "loss": 0.3896, |
| "step": 32050 |
| }, |
| { |
| "epoch": 0.5672280044530049, |
| "grad_norm": 2.5383968353271484, |
| "learning_rate": 2.4054112426616404e-05, |
| "loss": 0.2491, |
| "step": 32100 |
| }, |
| { |
| "epoch": 0.5681115371702214, |
| "grad_norm": 1.584861397743225, |
| "learning_rate": 2.4005026408277866e-05, |
| "loss": 0.2663, |
| "step": 32150 |
| }, |
| { |
| "epoch": 0.568995069887438, |
| "grad_norm": 1.5586644411087036, |
| "learning_rate": 2.395594038993933e-05, |
| "loss": 0.3433, |
| "step": 32200 |
| }, |
| { |
| "epoch": 0.5698786026046545, |
| "grad_norm": 1.4697036743164062, |
| "learning_rate": 2.3906854371600795e-05, |
| "loss": 0.3375, |
| "step": 32250 |
| }, |
| { |
| "epoch": 0.5707621353218709, |
| "grad_norm": 2.39277720451355, |
| "learning_rate": 2.385776835326226e-05, |
| "loss": 0.2891, |
| "step": 32300 |
| }, |
| { |
| "epoch": 0.5716456680390875, |
| "grad_norm": 1.5755674839019775, |
| "learning_rate": 2.380868233492372e-05, |
| "loss": 0.296, |
| "step": 32350 |
| }, |
| { |
| "epoch": 0.572529200756304, |
| "grad_norm": 1.5802369117736816, |
| "learning_rate": 2.3759596316585182e-05, |
| "loss": 0.2478, |
| "step": 32400 |
| }, |
| { |
| "epoch": 0.5734127334735205, |
| "grad_norm": 2.731212615966797, |
| "learning_rate": 2.371051029824665e-05, |
| "loss": 0.3514, |
| "step": 32450 |
| }, |
| { |
| "epoch": 0.574296266190737, |
| "grad_norm": 1.70058274269104, |
| "learning_rate": 2.366142427990811e-05, |
| "loss": 0.2741, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.5751797989079536, |
| "grad_norm": 3.394753932952881, |
| "learning_rate": 2.3612338261569576e-05, |
| "loss": 0.3546, |
| "step": 32550 |
| }, |
| { |
| "epoch": 0.5760633316251701, |
| "grad_norm": 2.7270805835723877, |
| "learning_rate": 2.356423396359781e-05, |
| "loss": 0.3927, |
| "step": 32600 |
| }, |
| { |
| "epoch": 0.5769468643423866, |
| "grad_norm": 2.3731272220611572, |
| "learning_rate": 2.3515147945259272e-05, |
| "loss": 0.2725, |
| "step": 32650 |
| }, |
| { |
| "epoch": 0.5778303970596032, |
| "grad_norm": 1.4900075197219849, |
| "learning_rate": 2.3466061926920737e-05, |
| "loss": 0.3167, |
| "step": 32700 |
| }, |
| { |
| "epoch": 0.5787139297768197, |
| "grad_norm": 1.2145545482635498, |
| "learning_rate": 2.34169759085822e-05, |
| "loss": 0.3249, |
| "step": 32750 |
| }, |
| { |
| "epoch": 0.5795974624940362, |
| "grad_norm": 1.725298285484314, |
| "learning_rate": 2.3367889890243662e-05, |
| "loss": 0.2443, |
| "step": 32800 |
| }, |
| { |
| "epoch": 0.5804809952112526, |
| "grad_norm": 1.316084384918213, |
| "learning_rate": 2.331880387190513e-05, |
| "loss": 0.4113, |
| "step": 32850 |
| }, |
| { |
| "epoch": 0.5813645279284692, |
| "grad_norm": 1.8195414543151855, |
| "learning_rate": 2.3269717853566592e-05, |
| "loss": 0.3106, |
| "step": 32900 |
| }, |
| { |
| "epoch": 0.5822480606456857, |
| "grad_norm": 1.1715435981750488, |
| "learning_rate": 2.3220631835228053e-05, |
| "loss": 0.2841, |
| "step": 32950 |
| }, |
| { |
| "epoch": 0.5831315933629022, |
| "grad_norm": 1.3928303718566895, |
| "learning_rate": 2.3171545816889518e-05, |
| "loss": 0.2786, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.5840151260801187, |
| "grad_norm": 1.4881165027618408, |
| "learning_rate": 2.3122459798550982e-05, |
| "loss": 0.3576, |
| "step": 33050 |
| }, |
| { |
| "epoch": 0.5848986587973353, |
| "grad_norm": 2.8615384101867676, |
| "learning_rate": 2.3073373780212447e-05, |
| "loss": 0.2475, |
| "step": 33100 |
| }, |
| { |
| "epoch": 0.5857821915145518, |
| "grad_norm": 1.819924235343933, |
| "learning_rate": 2.302428776187391e-05, |
| "loss": 0.348, |
| "step": 33150 |
| }, |
| { |
| "epoch": 0.5866657242317683, |
| "grad_norm": 1.5402089357376099, |
| "learning_rate": 2.297520174353537e-05, |
| "loss": 0.2779, |
| "step": 33200 |
| }, |
| { |
| "epoch": 0.5875492569489849, |
| "grad_norm": 1.7234498262405396, |
| "learning_rate": 2.2926115725196838e-05, |
| "loss": 0.3166, |
| "step": 33250 |
| }, |
| { |
| "epoch": 0.5884327896662014, |
| "grad_norm": 1.4789388179779053, |
| "learning_rate": 2.28770297068583e-05, |
| "loss": 0.3448, |
| "step": 33300 |
| }, |
| { |
| "epoch": 0.5893163223834179, |
| "grad_norm": 0.8780321478843689, |
| "learning_rate": 2.2827943688519764e-05, |
| "loss": 0.2409, |
| "step": 33350 |
| }, |
| { |
| "epoch": 0.5901998551006343, |
| "grad_norm": 1.9462053775787354, |
| "learning_rate": 2.277885767018123e-05, |
| "loss": 0.3313, |
| "step": 33400 |
| }, |
| { |
| "epoch": 0.5910833878178509, |
| "grad_norm": 1.6026935577392578, |
| "learning_rate": 2.272977165184269e-05, |
| "loss": 0.2981, |
| "step": 33450 |
| }, |
| { |
| "epoch": 0.5919669205350674, |
| "grad_norm": 2.3030807971954346, |
| "learning_rate": 2.2680685633504154e-05, |
| "loss": 0.269, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.5928504532522839, |
| "grad_norm": 2.8911454677581787, |
| "learning_rate": 2.2631599615165616e-05, |
| "loss": 0.4098, |
| "step": 33550 |
| }, |
| { |
| "epoch": 0.5937339859695004, |
| "grad_norm": 1.4643045663833618, |
| "learning_rate": 2.258251359682708e-05, |
| "loss": 0.2924, |
| "step": 33600 |
| }, |
| { |
| "epoch": 0.594617518686717, |
| "grad_norm": 2.0076584815979004, |
| "learning_rate": 2.2533427578488545e-05, |
| "loss": 0.2952, |
| "step": 33650 |
| }, |
| { |
| "epoch": 0.5955010514039335, |
| "grad_norm": 1.203574299812317, |
| "learning_rate": 2.2484341560150006e-05, |
| "loss": 0.2754, |
| "step": 33700 |
| }, |
| { |
| "epoch": 0.59638458412115, |
| "grad_norm": 2.815420150756836, |
| "learning_rate": 2.243525554181147e-05, |
| "loss": 0.3434, |
| "step": 33750 |
| }, |
| { |
| "epoch": 0.5972681168383666, |
| "grad_norm": 1.487236499786377, |
| "learning_rate": 2.2386169523472935e-05, |
| "loss": 0.2541, |
| "step": 33800 |
| }, |
| { |
| "epoch": 0.5981516495555831, |
| "grad_norm": 1.532326102256775, |
| "learning_rate": 2.23370835051344e-05, |
| "loss": 0.2923, |
| "step": 33850 |
| }, |
| { |
| "epoch": 0.5990351822727996, |
| "grad_norm": 1.543256402015686, |
| "learning_rate": 2.228799748679586e-05, |
| "loss": 0.2361, |
| "step": 33900 |
| }, |
| { |
| "epoch": 0.599918714990016, |
| "grad_norm": 1.5733423233032227, |
| "learning_rate": 2.2238911468457326e-05, |
| "loss": 0.4459, |
| "step": 33950 |
| }, |
| { |
| "epoch": 0.6008022477072326, |
| "grad_norm": 1.2398439645767212, |
| "learning_rate": 2.218982545011879e-05, |
| "loss": 0.3169, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.6016857804244491, |
| "grad_norm": 1.1555734872817993, |
| "learning_rate": 2.2140739431780252e-05, |
| "loss": 0.3146, |
| "step": 34050 |
| }, |
| { |
| "epoch": 0.6025693131416656, |
| "grad_norm": 1.4827885627746582, |
| "learning_rate": 2.2091653413441717e-05, |
| "loss": 0.3125, |
| "step": 34100 |
| }, |
| { |
| "epoch": 0.6034528458588821, |
| "grad_norm": 1.5724104642868042, |
| "learning_rate": 2.204256739510318e-05, |
| "loss": 0.2738, |
| "step": 34150 |
| }, |
| { |
| "epoch": 0.6043363785760987, |
| "grad_norm": 1.5903054475784302, |
| "learning_rate": 2.1993481376764643e-05, |
| "loss": 0.3062, |
| "step": 34200 |
| }, |
| { |
| "epoch": 0.6052199112933152, |
| "grad_norm": 1.5402554273605347, |
| "learning_rate": 2.1944395358426107e-05, |
| "loss": 0.3566, |
| "step": 34250 |
| }, |
| { |
| "epoch": 0.6061034440105317, |
| "grad_norm": 1.7631182670593262, |
| "learning_rate": 2.189530934008757e-05, |
| "loss": 0.3466, |
| "step": 34300 |
| }, |
| { |
| "epoch": 0.6069869767277483, |
| "grad_norm": 1.2873070240020752, |
| "learning_rate": 2.1846223321749033e-05, |
| "loss": 0.2608, |
| "step": 34350 |
| }, |
| { |
| "epoch": 0.6078705094449648, |
| "grad_norm": 1.8117417097091675, |
| "learning_rate": 2.1797137303410498e-05, |
| "loss": 0.3239, |
| "step": 34400 |
| }, |
| { |
| "epoch": 0.6087540421621813, |
| "grad_norm": 1.5316294431686401, |
| "learning_rate": 2.174805128507196e-05, |
| "loss": 0.3557, |
| "step": 34450 |
| }, |
| { |
| "epoch": 0.6096375748793977, |
| "grad_norm": 1.539382815361023, |
| "learning_rate": 2.1698965266733427e-05, |
| "loss": 0.4175, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.6105211075966143, |
| "grad_norm": 1.6773380041122437, |
| "learning_rate": 2.164987924839489e-05, |
| "loss": 0.3104, |
| "step": 34550 |
| }, |
| { |
| "epoch": 0.6114046403138308, |
| "grad_norm": 1.3534982204437256, |
| "learning_rate": 2.1601774950423123e-05, |
| "loss": 0.3305, |
| "step": 34600 |
| }, |
| { |
| "epoch": 0.6122881730310473, |
| "grad_norm": 1.416923999786377, |
| "learning_rate": 2.1552688932084584e-05, |
| "loss": 0.2832, |
| "step": 34650 |
| }, |
| { |
| "epoch": 0.6131717057482639, |
| "grad_norm": 1.7992863655090332, |
| "learning_rate": 2.150360291374605e-05, |
| "loss": 0.3007, |
| "step": 34700 |
| }, |
| { |
| "epoch": 0.6140552384654804, |
| "grad_norm": 1.3988946676254272, |
| "learning_rate": 2.1454516895407514e-05, |
| "loss": 0.3932, |
| "step": 34750 |
| }, |
| { |
| "epoch": 0.6149387711826969, |
| "grad_norm": 1.7125048637390137, |
| "learning_rate": 2.1405430877068978e-05, |
| "loss": 0.3, |
| "step": 34800 |
| }, |
| { |
| "epoch": 0.6158223038999134, |
| "grad_norm": 1.4415560960769653, |
| "learning_rate": 2.135634485873044e-05, |
| "loss": 0.2785, |
| "step": 34850 |
| }, |
| { |
| "epoch": 0.61670583661713, |
| "grad_norm": 1.8688596487045288, |
| "learning_rate": 2.1307258840391904e-05, |
| "loss": 0.3015, |
| "step": 34900 |
| }, |
| { |
| "epoch": 0.6175893693343465, |
| "grad_norm": 3.085685968399048, |
| "learning_rate": 2.125817282205337e-05, |
| "loss": 0.3291, |
| "step": 34950 |
| }, |
| { |
| "epoch": 0.618472902051563, |
| "grad_norm": 1.3053193092346191, |
| "learning_rate": 2.120908680371483e-05, |
| "loss": 0.2634, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.6193564347687794, |
| "grad_norm": 1.4780889749526978, |
| "learning_rate": 2.1160000785376295e-05, |
| "loss": 0.3212, |
| "step": 35050 |
| }, |
| { |
| "epoch": 0.620239967485996, |
| "grad_norm": 1.699916124343872, |
| "learning_rate": 2.1110914767037756e-05, |
| "loss": 0.2965, |
| "step": 35100 |
| }, |
| { |
| "epoch": 0.6211235002032125, |
| "grad_norm": 1.6198956966400146, |
| "learning_rate": 2.106182874869922e-05, |
| "loss": 0.3557, |
| "step": 35150 |
| }, |
| { |
| "epoch": 0.622007032920429, |
| "grad_norm": 1.2697581052780151, |
| "learning_rate": 2.1012742730360685e-05, |
| "loss": 0.3535, |
| "step": 35200 |
| }, |
| { |
| "epoch": 0.6228905656376456, |
| "grad_norm": 1.9256399869918823, |
| "learning_rate": 2.0963656712022147e-05, |
| "loss": 0.4183, |
| "step": 35250 |
| }, |
| { |
| "epoch": 0.6237740983548621, |
| "grad_norm": 1.4346308708190918, |
| "learning_rate": 2.0914570693683615e-05, |
| "loss": 0.3355, |
| "step": 35300 |
| }, |
| { |
| "epoch": 0.6246576310720786, |
| "grad_norm": 1.3797852993011475, |
| "learning_rate": 2.0865484675345076e-05, |
| "loss": 0.3626, |
| "step": 35350 |
| }, |
| { |
| "epoch": 0.6255411637892951, |
| "grad_norm": 3.1976869106292725, |
| "learning_rate": 2.0816398657006537e-05, |
| "loss": 0.2542, |
| "step": 35400 |
| }, |
| { |
| "epoch": 0.6264246965065117, |
| "grad_norm": 1.4315252304077148, |
| "learning_rate": 2.0767312638668002e-05, |
| "loss": 0.2555, |
| "step": 35450 |
| }, |
| { |
| "epoch": 0.6273082292237282, |
| "grad_norm": 2.861154079437256, |
| "learning_rate": 2.0718226620329467e-05, |
| "loss": 0.3418, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.6281917619409447, |
| "grad_norm": 1.378416895866394, |
| "learning_rate": 2.066914060199093e-05, |
| "loss": 0.3118, |
| "step": 35550 |
| }, |
| { |
| "epoch": 0.6290752946581611, |
| "grad_norm": 4.129642486572266, |
| "learning_rate": 2.0620054583652393e-05, |
| "loss": 0.2889, |
| "step": 35600 |
| }, |
| { |
| "epoch": 0.6299588273753777, |
| "grad_norm": 1.478084683418274, |
| "learning_rate": 2.0570968565313857e-05, |
| "loss": 0.319, |
| "step": 35650 |
| }, |
| { |
| "epoch": 0.6308423600925942, |
| "grad_norm": 3.230463743209839, |
| "learning_rate": 2.0521882546975322e-05, |
| "loss": 0.312, |
| "step": 35700 |
| }, |
| { |
| "epoch": 0.6317258928098107, |
| "grad_norm": 1.2029914855957031, |
| "learning_rate": 2.0472796528636783e-05, |
| "loss": 0.2776, |
| "step": 35750 |
| }, |
| { |
| "epoch": 0.6326094255270273, |
| "grad_norm": 1.6909867525100708, |
| "learning_rate": 2.0423710510298248e-05, |
| "loss": 0.2574, |
| "step": 35800 |
| }, |
| { |
| "epoch": 0.6334929582442438, |
| "grad_norm": 1.6969387531280518, |
| "learning_rate": 2.0374624491959713e-05, |
| "loss": 0.2816, |
| "step": 35850 |
| }, |
| { |
| "epoch": 0.6343764909614603, |
| "grad_norm": 2.4319510459899902, |
| "learning_rate": 2.0325538473621174e-05, |
| "loss": 0.3392, |
| "step": 35900 |
| }, |
| { |
| "epoch": 0.6352600236786768, |
| "grad_norm": 1.4081567525863647, |
| "learning_rate": 2.027645245528264e-05, |
| "loss": 0.2856, |
| "step": 35950 |
| }, |
| { |
| "epoch": 0.6361435563958934, |
| "grad_norm": 1.2967078685760498, |
| "learning_rate": 2.02273664369441e-05, |
| "loss": 0.286, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.6370270891131099, |
| "grad_norm": 1.3550012111663818, |
| "learning_rate": 2.0178280418605568e-05, |
| "loss": 0.3555, |
| "step": 36050 |
| }, |
| { |
| "epoch": 0.6379106218303264, |
| "grad_norm": 1.9244177341461182, |
| "learning_rate": 2.012919440026703e-05, |
| "loss": 0.3065, |
| "step": 36100 |
| }, |
| { |
| "epoch": 0.6387941545475428, |
| "grad_norm": 2.1921980381011963, |
| "learning_rate": 2.008010838192849e-05, |
| "loss": 0.3696, |
| "step": 36150 |
| }, |
| { |
| "epoch": 0.6396776872647594, |
| "grad_norm": 1.6438093185424805, |
| "learning_rate": 2.0031022363589955e-05, |
| "loss": 0.2626, |
| "step": 36200 |
| }, |
| { |
| "epoch": 0.6405612199819759, |
| "grad_norm": 1.4499566555023193, |
| "learning_rate": 1.998193634525142e-05, |
| "loss": 0.2681, |
| "step": 36250 |
| }, |
| { |
| "epoch": 0.6414447526991924, |
| "grad_norm": 1.7677289247512817, |
| "learning_rate": 1.9932850326912884e-05, |
| "loss": 0.3015, |
| "step": 36300 |
| }, |
| { |
| "epoch": 0.642328285416409, |
| "grad_norm": 2.6856095790863037, |
| "learning_rate": 1.9883764308574346e-05, |
| "loss": 0.3587, |
| "step": 36350 |
| }, |
| { |
| "epoch": 0.6432118181336255, |
| "grad_norm": 1.9422292709350586, |
| "learning_rate": 1.983467829023581e-05, |
| "loss": 0.4138, |
| "step": 36400 |
| }, |
| { |
| "epoch": 0.644095350850842, |
| "grad_norm": 0.9389033317565918, |
| "learning_rate": 1.9785592271897275e-05, |
| "loss": 0.3785, |
| "step": 36450 |
| }, |
| { |
| "epoch": 0.6449788835680585, |
| "grad_norm": 1.297255516052246, |
| "learning_rate": 1.9736506253558736e-05, |
| "loss": 0.3385, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.6458624162852751, |
| "grad_norm": 1.0876415967941284, |
| "learning_rate": 1.96874202352202e-05, |
| "loss": 0.3423, |
| "step": 36550 |
| }, |
| { |
| "epoch": 0.6467459490024916, |
| "grad_norm": 1.2366421222686768, |
| "learning_rate": 1.9638334216881666e-05, |
| "loss": 0.3793, |
| "step": 36600 |
| }, |
| { |
| "epoch": 0.6476294817197081, |
| "grad_norm": 4.349328517913818, |
| "learning_rate": 1.95902299189099e-05, |
| "loss": 0.3819, |
| "step": 36650 |
| }, |
| { |
| "epoch": 0.6485130144369246, |
| "grad_norm": 1.804661750793457, |
| "learning_rate": 1.954114390057136e-05, |
| "loss": 0.3111, |
| "step": 36700 |
| }, |
| { |
| "epoch": 0.6493965471541411, |
| "grad_norm": 2.6138484477996826, |
| "learning_rate": 1.9492057882232826e-05, |
| "loss": 0.3299, |
| "step": 36750 |
| }, |
| { |
| "epoch": 0.6502800798713576, |
| "grad_norm": 1.8608500957489014, |
| "learning_rate": 1.9442971863894287e-05, |
| "loss": 0.3994, |
| "step": 36800 |
| }, |
| { |
| "epoch": 0.6511636125885741, |
| "grad_norm": 2.2977466583251953, |
| "learning_rate": 1.9393885845555755e-05, |
| "loss": 0.3595, |
| "step": 36850 |
| }, |
| { |
| "epoch": 0.6520471453057907, |
| "grad_norm": 1.6370161771774292, |
| "learning_rate": 1.9344799827217217e-05, |
| "loss": 0.3572, |
| "step": 36900 |
| }, |
| { |
| "epoch": 0.6529306780230072, |
| "grad_norm": 1.4357324838638306, |
| "learning_rate": 1.9295713808878678e-05, |
| "loss": 0.3085, |
| "step": 36950 |
| }, |
| { |
| "epoch": 0.6538142107402237, |
| "grad_norm": 1.8057055473327637, |
| "learning_rate": 1.9246627790540143e-05, |
| "loss": 0.2647, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.6546977434574403, |
| "grad_norm": 1.230721354484558, |
| "learning_rate": 1.9197541772201607e-05, |
| "loss": 0.2622, |
| "step": 37050 |
| }, |
| { |
| "epoch": 0.6555812761746568, |
| "grad_norm": 1.6303822994232178, |
| "learning_rate": 1.9148455753863072e-05, |
| "loss": 0.3814, |
| "step": 37100 |
| }, |
| { |
| "epoch": 0.6564648088918733, |
| "grad_norm": 1.2327115535736084, |
| "learning_rate": 1.9099369735524533e-05, |
| "loss": 0.4174, |
| "step": 37150 |
| }, |
| { |
| "epoch": 0.6573483416090898, |
| "grad_norm": 1.4918360710144043, |
| "learning_rate": 1.9050283717185998e-05, |
| "loss": 0.3473, |
| "step": 37200 |
| }, |
| { |
| "epoch": 0.6582318743263063, |
| "grad_norm": 1.6960564851760864, |
| "learning_rate": 1.9001197698847463e-05, |
| "loss": 0.3598, |
| "step": 37250 |
| }, |
| { |
| "epoch": 0.6591154070435228, |
| "grad_norm": 1.8127328157424927, |
| "learning_rate": 1.8952111680508924e-05, |
| "loss": 0.2822, |
| "step": 37300 |
| }, |
| { |
| "epoch": 0.6599989397607393, |
| "grad_norm": 1.7553006410598755, |
| "learning_rate": 1.890302566217039e-05, |
| "loss": 0.2538, |
| "step": 37350 |
| }, |
| { |
| "epoch": 0.6608824724779558, |
| "grad_norm": 1.111005187034607, |
| "learning_rate": 1.8853939643831853e-05, |
| "loss": 0.2212, |
| "step": 37400 |
| }, |
| { |
| "epoch": 0.6617660051951724, |
| "grad_norm": 1.2916769981384277, |
| "learning_rate": 1.8804853625493314e-05, |
| "loss": 0.2687, |
| "step": 37450 |
| }, |
| { |
| "epoch": 0.6626495379123889, |
| "grad_norm": 1.5212571620941162, |
| "learning_rate": 1.875576760715478e-05, |
| "loss": 0.3288, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.6635330706296054, |
| "grad_norm": 1.5829190015792847, |
| "learning_rate": 1.870668158881624e-05, |
| "loss": 0.4221, |
| "step": 37550 |
| }, |
| { |
| "epoch": 0.664416603346822, |
| "grad_norm": 1.4784077405929565, |
| "learning_rate": 1.8657595570477705e-05, |
| "loss": 0.3711, |
| "step": 37600 |
| }, |
| { |
| "epoch": 0.6653001360640385, |
| "grad_norm": 1.907202959060669, |
| "learning_rate": 1.860850955213917e-05, |
| "loss": 0.2418, |
| "step": 37650 |
| }, |
| { |
| "epoch": 0.666183668781255, |
| "grad_norm": 1.4358186721801758, |
| "learning_rate": 1.855942353380063e-05, |
| "loss": 0.3, |
| "step": 37700 |
| }, |
| { |
| "epoch": 0.6670672014984715, |
| "grad_norm": 1.4791388511657715, |
| "learning_rate": 1.85103375154621e-05, |
| "loss": 0.3147, |
| "step": 37750 |
| }, |
| { |
| "epoch": 0.667950734215688, |
| "grad_norm": 1.383799433708191, |
| "learning_rate": 1.846125149712356e-05, |
| "loss": 0.2895, |
| "step": 37800 |
| }, |
| { |
| "epoch": 0.6688342669329045, |
| "grad_norm": 1.7297286987304688, |
| "learning_rate": 1.8412165478785025e-05, |
| "loss": 0.2649, |
| "step": 37850 |
| }, |
| { |
| "epoch": 0.669717799650121, |
| "grad_norm": 1.2361524105072021, |
| "learning_rate": 1.8363079460446486e-05, |
| "loss": 0.287, |
| "step": 37900 |
| }, |
| { |
| "epoch": 0.6706013323673375, |
| "grad_norm": 1.472721815109253, |
| "learning_rate": 1.831399344210795e-05, |
| "loss": 0.267, |
| "step": 37950 |
| }, |
| { |
| "epoch": 0.6714848650845541, |
| "grad_norm": 1.7498071193695068, |
| "learning_rate": 1.8264907423769416e-05, |
| "loss": 0.2862, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.6723683978017706, |
| "grad_norm": 1.389864444732666, |
| "learning_rate": 1.8215821405430877e-05, |
| "loss": 0.2931, |
| "step": 38050 |
| }, |
| { |
| "epoch": 0.6732519305189871, |
| "grad_norm": 1.2709695100784302, |
| "learning_rate": 1.816673538709234e-05, |
| "loss": 0.3039, |
| "step": 38100 |
| }, |
| { |
| "epoch": 0.6741354632362037, |
| "grad_norm": 1.2036606073379517, |
| "learning_rate": 1.8117649368753806e-05, |
| "loss": 0.3067, |
| "step": 38150 |
| }, |
| { |
| "epoch": 0.6750189959534202, |
| "grad_norm": 1.3336296081542969, |
| "learning_rate": 1.8068563350415268e-05, |
| "loss": 0.3072, |
| "step": 38200 |
| }, |
| { |
| "epoch": 0.6759025286706367, |
| "grad_norm": 1.8485578298568726, |
| "learning_rate": 1.8019477332076732e-05, |
| "loss": 0.2744, |
| "step": 38250 |
| }, |
| { |
| "epoch": 0.6767860613878532, |
| "grad_norm": 1.350595235824585, |
| "learning_rate": 1.7970391313738197e-05, |
| "loss": 0.3098, |
| "step": 38300 |
| }, |
| { |
| "epoch": 0.6776695941050697, |
| "grad_norm": 1.8860970735549927, |
| "learning_rate": 1.7921305295399658e-05, |
| "loss": 0.2837, |
| "step": 38350 |
| }, |
| { |
| "epoch": 0.6785531268222862, |
| "grad_norm": 1.3870184421539307, |
| "learning_rate": 1.7872219277061123e-05, |
| "loss": 0.2998, |
| "step": 38400 |
| }, |
| { |
| "epoch": 0.6794366595395027, |
| "grad_norm": 1.5092830657958984, |
| "learning_rate": 1.7823133258722584e-05, |
| "loss": 0.2477, |
| "step": 38450 |
| }, |
| { |
| "epoch": 0.6803201922567192, |
| "grad_norm": 1.4017945528030396, |
| "learning_rate": 1.7774047240384052e-05, |
| "loss": 0.3343, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.6812037249739358, |
| "grad_norm": 1.5817060470581055, |
| "learning_rate": 1.7724961222045513e-05, |
| "loss": 0.2411, |
| "step": 38550 |
| }, |
| { |
| "epoch": 0.6820872576911523, |
| "grad_norm": 1.4471608400344849, |
| "learning_rate": 1.7675875203706975e-05, |
| "loss": 0.2641, |
| "step": 38600 |
| }, |
| { |
| "epoch": 0.6829707904083688, |
| "grad_norm": 1.6398324966430664, |
| "learning_rate": 1.762777090573521e-05, |
| "loss": 0.3673, |
| "step": 38650 |
| }, |
| { |
| "epoch": 0.6838543231255854, |
| "grad_norm": 1.5645078420639038, |
| "learning_rate": 1.7578684887396674e-05, |
| "loss": 0.3456, |
| "step": 38700 |
| }, |
| { |
| "epoch": 0.6847378558428019, |
| "grad_norm": 1.4957185983657837, |
| "learning_rate": 1.752959886905814e-05, |
| "loss": 0.2554, |
| "step": 38750 |
| }, |
| { |
| "epoch": 0.6856213885600184, |
| "grad_norm": 1.5689042806625366, |
| "learning_rate": 1.7480512850719603e-05, |
| "loss": 0.2289, |
| "step": 38800 |
| }, |
| { |
| "epoch": 0.686504921277235, |
| "grad_norm": 1.4531927108764648, |
| "learning_rate": 1.7431426832381064e-05, |
| "loss": 0.2409, |
| "step": 38850 |
| }, |
| { |
| "epoch": 0.6873884539944514, |
| "grad_norm": 3.168332576751709, |
| "learning_rate": 1.738234081404253e-05, |
| "loss": 0.3395, |
| "step": 38900 |
| }, |
| { |
| "epoch": 0.6882719867116679, |
| "grad_norm": 1.5215202569961548, |
| "learning_rate": 1.7333254795703994e-05, |
| "loss": 0.3296, |
| "step": 38950 |
| }, |
| { |
| "epoch": 0.6891555194288844, |
| "grad_norm": 1.9889358282089233, |
| "learning_rate": 1.7284168777365455e-05, |
| "loss": 0.349, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.690039052146101, |
| "grad_norm": 1.6141583919525146, |
| "learning_rate": 1.723508275902692e-05, |
| "loss": 0.2841, |
| "step": 39050 |
| }, |
| { |
| "epoch": 0.6909225848633175, |
| "grad_norm": 1.3648995161056519, |
| "learning_rate": 1.7185996740688384e-05, |
| "loss": 0.3101, |
| "step": 39100 |
| }, |
| { |
| "epoch": 0.691806117580534, |
| "grad_norm": 1.897626280784607, |
| "learning_rate": 1.7136910722349846e-05, |
| "loss": 0.3094, |
| "step": 39150 |
| }, |
| { |
| "epoch": 0.6926896502977505, |
| "grad_norm": 1.4272273778915405, |
| "learning_rate": 1.708782470401131e-05, |
| "loss": 0.2485, |
| "step": 39200 |
| }, |
| { |
| "epoch": 0.6935731830149671, |
| "grad_norm": 1.1664527654647827, |
| "learning_rate": 1.703873868567277e-05, |
| "loss": 0.2985, |
| "step": 39250 |
| }, |
| { |
| "epoch": 0.6944567157321836, |
| "grad_norm": 1.5030759572982788, |
| "learning_rate": 1.698965266733424e-05, |
| "loss": 0.3039, |
| "step": 39300 |
| }, |
| { |
| "epoch": 0.6953402484494001, |
| "grad_norm": 1.2608274221420288, |
| "learning_rate": 1.69405666489957e-05, |
| "loss": 0.2616, |
| "step": 39350 |
| }, |
| { |
| "epoch": 0.6962237811666167, |
| "grad_norm": 1.173496961593628, |
| "learning_rate": 1.6891480630657162e-05, |
| "loss": 0.2932, |
| "step": 39400 |
| }, |
| { |
| "epoch": 0.6971073138838331, |
| "grad_norm": 1.3213509321212769, |
| "learning_rate": 1.6842394612318627e-05, |
| "loss": 0.3707, |
| "step": 39450 |
| }, |
| { |
| "epoch": 0.6979908466010496, |
| "grad_norm": 1.9010616540908813, |
| "learning_rate": 1.679330859398009e-05, |
| "loss": 0.2728, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.6988743793182661, |
| "grad_norm": 1.143967866897583, |
| "learning_rate": 1.6744222575641556e-05, |
| "loss": 0.2482, |
| "step": 39550 |
| }, |
| { |
| "epoch": 0.6997579120354827, |
| "grad_norm": 1.108268141746521, |
| "learning_rate": 1.6695136557303018e-05, |
| "loss": 0.3475, |
| "step": 39600 |
| }, |
| { |
| "epoch": 0.7006414447526992, |
| "grad_norm": 0.6908143758773804, |
| "learning_rate": 1.6646050538964482e-05, |
| "loss": 0.3447, |
| "step": 39650 |
| }, |
| { |
| "epoch": 0.7015249774699157, |
| "grad_norm": 1.5447782278060913, |
| "learning_rate": 1.6596964520625947e-05, |
| "loss": 0.2503, |
| "step": 39700 |
| }, |
| { |
| "epoch": 0.7024085101871322, |
| "grad_norm": 1.515202283859253, |
| "learning_rate": 1.6547878502287408e-05, |
| "loss": 0.3593, |
| "step": 39750 |
| }, |
| { |
| "epoch": 0.7032920429043488, |
| "grad_norm": 4.640558242797852, |
| "learning_rate": 1.6498792483948873e-05, |
| "loss": 0.2933, |
| "step": 39800 |
| }, |
| { |
| "epoch": 0.7041755756215653, |
| "grad_norm": 1.1238136291503906, |
| "learning_rate": 1.6449706465610338e-05, |
| "loss": 0.3565, |
| "step": 39850 |
| }, |
| { |
| "epoch": 0.7050591083387818, |
| "grad_norm": 1.5694066286087036, |
| "learning_rate": 1.64006204472718e-05, |
| "loss": 0.2838, |
| "step": 39900 |
| }, |
| { |
| "epoch": 0.7059426410559984, |
| "grad_norm": 1.808310866355896, |
| "learning_rate": 1.6351534428933263e-05, |
| "loss": 0.2604, |
| "step": 39950 |
| }, |
| { |
| "epoch": 0.7068261737732148, |
| "grad_norm": 1.6668068170547485, |
| "learning_rate": 1.6302448410594725e-05, |
| "loss": 0.2286, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.7077097064904313, |
| "grad_norm": 1.541528344154358, |
| "learning_rate": 1.6253362392256193e-05, |
| "loss": 0.2766, |
| "step": 40050 |
| }, |
| { |
| "epoch": 0.7085932392076478, |
| "grad_norm": 1.4408469200134277, |
| "learning_rate": 1.6204276373917654e-05, |
| "loss": 0.2649, |
| "step": 40100 |
| }, |
| { |
| "epoch": 0.7094767719248644, |
| "grad_norm": 1.57314932346344, |
| "learning_rate": 1.6155190355579115e-05, |
| "loss": 0.3184, |
| "step": 40150 |
| }, |
| { |
| "epoch": 0.7103603046420809, |
| "grad_norm": 2.2145802974700928, |
| "learning_rate": 1.6106104337240583e-05, |
| "loss": 0.334, |
| "step": 40200 |
| }, |
| { |
| "epoch": 0.7112438373592974, |
| "grad_norm": 1.348560094833374, |
| "learning_rate": 1.6057018318902045e-05, |
| "loss": 0.3609, |
| "step": 40250 |
| }, |
| { |
| "epoch": 0.7121273700765139, |
| "grad_norm": 0.9920164942741394, |
| "learning_rate": 1.600793230056351e-05, |
| "loss": 0.306, |
| "step": 40300 |
| }, |
| { |
| "epoch": 0.7130109027937305, |
| "grad_norm": 1.3321669101715088, |
| "learning_rate": 1.595884628222497e-05, |
| "loss": 0.2629, |
| "step": 40350 |
| }, |
| { |
| "epoch": 0.713894435510947, |
| "grad_norm": 1.257283329963684, |
| "learning_rate": 1.5909760263886435e-05, |
| "loss": 0.2311, |
| "step": 40400 |
| }, |
| { |
| "epoch": 0.7147779682281635, |
| "grad_norm": 1.3199965953826904, |
| "learning_rate": 1.58606742455479e-05, |
| "loss": 0.2909, |
| "step": 40450 |
| }, |
| { |
| "epoch": 0.7156615009453801, |
| "grad_norm": 1.4839483499526978, |
| "learning_rate": 1.581158822720936e-05, |
| "loss": 0.343, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.7165450336625965, |
| "grad_norm": 1.0020859241485596, |
| "learning_rate": 1.5762502208870826e-05, |
| "loss": 0.3013, |
| "step": 40550 |
| }, |
| { |
| "epoch": 0.717428566379813, |
| "grad_norm": 1.3758106231689453, |
| "learning_rate": 1.571341619053229e-05, |
| "loss": 0.3128, |
| "step": 40600 |
| }, |
| { |
| "epoch": 0.7183120990970295, |
| "grad_norm": 1.5661957263946533, |
| "learning_rate": 1.5664330172193752e-05, |
| "loss": 0.2073, |
| "step": 40650 |
| }, |
| { |
| "epoch": 0.7191956318142461, |
| "grad_norm": 1.4217487573623657, |
| "learning_rate": 1.5615244153855217e-05, |
| "loss": 0.2873, |
| "step": 40700 |
| }, |
| { |
| "epoch": 0.7200791645314626, |
| "grad_norm": 1.6037381887435913, |
| "learning_rate": 1.556713985588345e-05, |
| "loss": 0.3705, |
| "step": 40750 |
| }, |
| { |
| "epoch": 0.7209626972486791, |
| "grad_norm": 1.7782158851623535, |
| "learning_rate": 1.5518053837544916e-05, |
| "loss": 0.2985, |
| "step": 40800 |
| }, |
| { |
| "epoch": 0.7218462299658956, |
| "grad_norm": 1.8306645154953003, |
| "learning_rate": 1.546896781920638e-05, |
| "loss": 0.3355, |
| "step": 40850 |
| }, |
| { |
| "epoch": 0.7227297626831122, |
| "grad_norm": 1.4840078353881836, |
| "learning_rate": 1.541988180086784e-05, |
| "loss": 0.3322, |
| "step": 40900 |
| }, |
| { |
| "epoch": 0.7236132954003287, |
| "grad_norm": 2.7773265838623047, |
| "learning_rate": 1.5370795782529303e-05, |
| "loss": 0.3424, |
| "step": 40950 |
| }, |
| { |
| "epoch": 0.7244968281175452, |
| "grad_norm": 1.3592840433120728, |
| "learning_rate": 1.532170976419077e-05, |
| "loss": 0.2834, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.7253803608347618, |
| "grad_norm": 2.0050973892211914, |
| "learning_rate": 1.5272623745852232e-05, |
| "loss": 0.2776, |
| "step": 41050 |
| }, |
| { |
| "epoch": 0.7262638935519782, |
| "grad_norm": 1.4293886423110962, |
| "learning_rate": 1.5223537727513695e-05, |
| "loss": 0.2924, |
| "step": 41100 |
| }, |
| { |
| "epoch": 0.7271474262691947, |
| "grad_norm": 1.5391188859939575, |
| "learning_rate": 1.5174451709175158e-05, |
| "loss": 0.3982, |
| "step": 41150 |
| }, |
| { |
| "epoch": 0.7280309589864112, |
| "grad_norm": 1.4493207931518555, |
| "learning_rate": 1.5125365690836625e-05, |
| "loss": 0.3379, |
| "step": 41200 |
| }, |
| { |
| "epoch": 0.7289144917036278, |
| "grad_norm": 2.0417702198028564, |
| "learning_rate": 1.5076279672498086e-05, |
| "loss": 0.2876, |
| "step": 41250 |
| }, |
| { |
| "epoch": 0.7297980244208443, |
| "grad_norm": 1.7424287796020508, |
| "learning_rate": 1.5027193654159549e-05, |
| "loss": 0.3289, |
| "step": 41300 |
| }, |
| { |
| "epoch": 0.7306815571380608, |
| "grad_norm": 1.1353446245193481, |
| "learning_rate": 1.4978107635821015e-05, |
| "loss": 0.3119, |
| "step": 41350 |
| }, |
| { |
| "epoch": 0.7315650898552774, |
| "grad_norm": 1.4479618072509766, |
| "learning_rate": 1.4929021617482478e-05, |
| "loss": 0.3007, |
| "step": 41400 |
| }, |
| { |
| "epoch": 0.7324486225724939, |
| "grad_norm": 2.910383939743042, |
| "learning_rate": 1.4879935599143941e-05, |
| "loss": 0.3499, |
| "step": 41450 |
| }, |
| { |
| "epoch": 0.7333321552897104, |
| "grad_norm": 1.0191997289657593, |
| "learning_rate": 1.4830849580805404e-05, |
| "loss": 0.3016, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.7342156880069269, |
| "grad_norm": 0.9859305620193481, |
| "learning_rate": 1.4781763562466869e-05, |
| "loss": 0.3232, |
| "step": 41550 |
| }, |
| { |
| "epoch": 0.7350992207241435, |
| "grad_norm": 1.4275975227355957, |
| "learning_rate": 1.4732677544128332e-05, |
| "loss": 0.2577, |
| "step": 41600 |
| }, |
| { |
| "epoch": 0.7359827534413599, |
| "grad_norm": 1.0362133979797363, |
| "learning_rate": 1.4683591525789795e-05, |
| "loss": 0.2928, |
| "step": 41650 |
| }, |
| { |
| "epoch": 0.7368662861585764, |
| "grad_norm": 2.07706618309021, |
| "learning_rate": 1.4634505507451258e-05, |
| "loss": 0.2814, |
| "step": 41700 |
| }, |
| { |
| "epoch": 0.7377498188757929, |
| "grad_norm": 1.6817320585250854, |
| "learning_rate": 1.4585419489112722e-05, |
| "loss": 0.2962, |
| "step": 41750 |
| }, |
| { |
| "epoch": 0.7386333515930095, |
| "grad_norm": 2.021404981613159, |
| "learning_rate": 1.4536333470774185e-05, |
| "loss": 0.2756, |
| "step": 41800 |
| }, |
| { |
| "epoch": 0.739516884310226, |
| "grad_norm": 1.3860830068588257, |
| "learning_rate": 1.4487247452435648e-05, |
| "loss": 0.3287, |
| "step": 41850 |
| }, |
| { |
| "epoch": 0.7404004170274425, |
| "grad_norm": 1.266453742980957, |
| "learning_rate": 1.4438161434097111e-05, |
| "loss": 0.34, |
| "step": 41900 |
| }, |
| { |
| "epoch": 0.741283949744659, |
| "grad_norm": 1.5601640939712524, |
| "learning_rate": 1.4389075415758576e-05, |
| "loss": 0.3402, |
| "step": 41950 |
| }, |
| { |
| "epoch": 0.7421674824618756, |
| "grad_norm": 1.6014955043792725, |
| "learning_rate": 1.4339989397420039e-05, |
| "loss": 0.2404, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.7430510151790921, |
| "grad_norm": 1.4757792949676514, |
| "learning_rate": 1.4290903379081502e-05, |
| "loss": 0.2655, |
| "step": 42050 |
| }, |
| { |
| "epoch": 0.7439345478963086, |
| "grad_norm": 1.6618765592575073, |
| "learning_rate": 1.4241817360742968e-05, |
| "loss": 0.3288, |
| "step": 42100 |
| }, |
| { |
| "epoch": 0.7448180806135252, |
| "grad_norm": 1.3700001239776611, |
| "learning_rate": 1.4192731342404431e-05, |
| "loss": 0.3333, |
| "step": 42150 |
| }, |
| { |
| "epoch": 0.7457016133307416, |
| "grad_norm": 1.4557344913482666, |
| "learning_rate": 1.4143645324065894e-05, |
| "loss": 0.3602, |
| "step": 42200 |
| }, |
| { |
| "epoch": 0.7465851460479581, |
| "grad_norm": 1.546533226966858, |
| "learning_rate": 1.4094559305727355e-05, |
| "loss": 0.3631, |
| "step": 42250 |
| }, |
| { |
| "epoch": 0.7474686787651746, |
| "grad_norm": 1.3664302825927734, |
| "learning_rate": 1.4045473287388822e-05, |
| "loss": 0.2374, |
| "step": 42300 |
| }, |
| { |
| "epoch": 0.7483522114823912, |
| "grad_norm": 4.161416053771973, |
| "learning_rate": 1.3996387269050285e-05, |
| "loss": 0.3347, |
| "step": 42350 |
| }, |
| { |
| "epoch": 0.7492357441996077, |
| "grad_norm": 1.3883721828460693, |
| "learning_rate": 1.3947301250711748e-05, |
| "loss": 0.3031, |
| "step": 42400 |
| }, |
| { |
| "epoch": 0.7501192769168242, |
| "grad_norm": 2.6039016246795654, |
| "learning_rate": 1.389821523237321e-05, |
| "loss": 0.2817, |
| "step": 42450 |
| }, |
| { |
| "epoch": 0.7510028096340408, |
| "grad_norm": 4.557380676269531, |
| "learning_rate": 1.3849129214034675e-05, |
| "loss": 0.3433, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.7518863423512573, |
| "grad_norm": 2.938749074935913, |
| "learning_rate": 1.3800043195696138e-05, |
| "loss": 0.3273, |
| "step": 42550 |
| }, |
| { |
| "epoch": 0.7527698750684738, |
| "grad_norm": 1.3836658000946045, |
| "learning_rate": 1.3750957177357601e-05, |
| "loss": 0.3123, |
| "step": 42600 |
| }, |
| { |
| "epoch": 0.7536534077856903, |
| "grad_norm": 4.600383758544922, |
| "learning_rate": 1.3701871159019066e-05, |
| "loss": 0.29, |
| "step": 42650 |
| }, |
| { |
| "epoch": 0.7545369405029069, |
| "grad_norm": 4.080932140350342, |
| "learning_rate": 1.3652785140680529e-05, |
| "loss": 0.3352, |
| "step": 42700 |
| }, |
| { |
| "epoch": 0.7554204732201233, |
| "grad_norm": 1.4026703834533691, |
| "learning_rate": 1.3603699122341992e-05, |
| "loss": 0.3143, |
| "step": 42750 |
| }, |
| { |
| "epoch": 0.7563040059373398, |
| "grad_norm": 1.4037362337112427, |
| "learning_rate": 1.3555594824370226e-05, |
| "loss": 0.2833, |
| "step": 42800 |
| }, |
| { |
| "epoch": 0.7571875386545563, |
| "grad_norm": 1.5535756349563599, |
| "learning_rate": 1.350650880603169e-05, |
| "loss": 0.2851, |
| "step": 42850 |
| }, |
| { |
| "epoch": 0.7580710713717729, |
| "grad_norm": 1.3919951915740967, |
| "learning_rate": 1.3457422787693156e-05, |
| "loss": 0.2956, |
| "step": 42900 |
| }, |
| { |
| "epoch": 0.7589546040889894, |
| "grad_norm": 1.531242847442627, |
| "learning_rate": 1.3408336769354619e-05, |
| "loss": 0.3038, |
| "step": 42950 |
| }, |
| { |
| "epoch": 0.7598381368062059, |
| "grad_norm": 1.4396170377731323, |
| "learning_rate": 1.335925075101608e-05, |
| "loss": 0.2195, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.7607216695234225, |
| "grad_norm": 1.4077396392822266, |
| "learning_rate": 1.3310164732677543e-05, |
| "loss": 0.2588, |
| "step": 43050 |
| }, |
| { |
| "epoch": 0.761605202240639, |
| "grad_norm": 2.881322145462036, |
| "learning_rate": 1.326107871433901e-05, |
| "loss": 0.3065, |
| "step": 43100 |
| }, |
| { |
| "epoch": 0.7624887349578555, |
| "grad_norm": 1.5936981439590454, |
| "learning_rate": 1.3211992696000472e-05, |
| "loss": 0.3252, |
| "step": 43150 |
| }, |
| { |
| "epoch": 0.763372267675072, |
| "grad_norm": 1.4670791625976562, |
| "learning_rate": 1.3162906677661935e-05, |
| "loss": 0.2418, |
| "step": 43200 |
| }, |
| { |
| "epoch": 0.7642558003922886, |
| "grad_norm": 1.6417291164398193, |
| "learning_rate": 1.31138206593234e-05, |
| "loss": 0.4002, |
| "step": 43250 |
| }, |
| { |
| "epoch": 0.765139333109505, |
| "grad_norm": 1.5653693675994873, |
| "learning_rate": 1.3064734640984863e-05, |
| "loss": 0.3379, |
| "step": 43300 |
| }, |
| { |
| "epoch": 0.7660228658267215, |
| "grad_norm": 1.279615879058838, |
| "learning_rate": 1.3015648622646326e-05, |
| "loss": 0.3574, |
| "step": 43350 |
| }, |
| { |
| "epoch": 0.766906398543938, |
| "grad_norm": 1.693057894706726, |
| "learning_rate": 1.2966562604307789e-05, |
| "loss": 0.3664, |
| "step": 43400 |
| }, |
| { |
| "epoch": 0.7677899312611546, |
| "grad_norm": 2.931711196899414, |
| "learning_rate": 1.2917476585969254e-05, |
| "loss": 0.3274, |
| "step": 43450 |
| }, |
| { |
| "epoch": 0.7686734639783711, |
| "grad_norm": 1.3495726585388184, |
| "learning_rate": 1.2868390567630716e-05, |
| "loss": 0.2443, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.7695569966955876, |
| "grad_norm": 1.4437354803085327, |
| "learning_rate": 1.281930454929218e-05, |
| "loss": 0.2998, |
| "step": 43550 |
| }, |
| { |
| "epoch": 0.7704405294128042, |
| "grad_norm": 4.394979000091553, |
| "learning_rate": 1.2770218530953642e-05, |
| "loss": 0.3619, |
| "step": 43600 |
| }, |
| { |
| "epoch": 0.7713240621300207, |
| "grad_norm": 2.726393461227417, |
| "learning_rate": 1.2721132512615109e-05, |
| "loss": 0.2586, |
| "step": 43650 |
| }, |
| { |
| "epoch": 0.7722075948472372, |
| "grad_norm": 1.146583080291748, |
| "learning_rate": 1.2672046494276572e-05, |
| "loss": 0.251, |
| "step": 43700 |
| }, |
| { |
| "epoch": 0.7730911275644538, |
| "grad_norm": 1.2839117050170898, |
| "learning_rate": 1.2622960475938033e-05, |
| "loss": 0.3154, |
| "step": 43750 |
| }, |
| { |
| "epoch": 0.7739746602816703, |
| "grad_norm": 1.3681036233901978, |
| "learning_rate": 1.25738744575995e-05, |
| "loss": 0.3309, |
| "step": 43800 |
| }, |
| { |
| "epoch": 0.7748581929988867, |
| "grad_norm": 1.3661130666732788, |
| "learning_rate": 1.2524788439260962e-05, |
| "loss": 0.2768, |
| "step": 43850 |
| }, |
| { |
| "epoch": 0.7757417257161032, |
| "grad_norm": 1.3001888990402222, |
| "learning_rate": 1.2475702420922425e-05, |
| "loss": 0.3049, |
| "step": 43900 |
| }, |
| { |
| "epoch": 0.7766252584333198, |
| "grad_norm": 1.4377065896987915, |
| "learning_rate": 1.2426616402583888e-05, |
| "loss": 0.2939, |
| "step": 43950 |
| }, |
| { |
| "epoch": 0.7775087911505363, |
| "grad_norm": 2.084547519683838, |
| "learning_rate": 1.2377530384245351e-05, |
| "loss": 0.2909, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.7783923238677528, |
| "grad_norm": 1.421271562576294, |
| "learning_rate": 1.2328444365906816e-05, |
| "loss": 0.2157, |
| "step": 44050 |
| }, |
| { |
| "epoch": 0.7792758565849693, |
| "grad_norm": 1.2172672748565674, |
| "learning_rate": 1.2279358347568279e-05, |
| "loss": 0.2785, |
| "step": 44100 |
| }, |
| { |
| "epoch": 0.7801593893021859, |
| "grad_norm": 1.5359545946121216, |
| "learning_rate": 1.2230272329229744e-05, |
| "loss": 0.2386, |
| "step": 44150 |
| }, |
| { |
| "epoch": 0.7810429220194024, |
| "grad_norm": 1.3057314157485962, |
| "learning_rate": 1.2181186310891207e-05, |
| "loss": 0.3297, |
| "step": 44200 |
| }, |
| { |
| "epoch": 0.7819264547366189, |
| "grad_norm": 1.0489422082901, |
| "learning_rate": 1.213210029255267e-05, |
| "loss": 0.2576, |
| "step": 44250 |
| }, |
| { |
| "epoch": 0.7828099874538355, |
| "grad_norm": 2.2906908988952637, |
| "learning_rate": 1.2083014274214133e-05, |
| "loss": 0.2678, |
| "step": 44300 |
| }, |
| { |
| "epoch": 0.783693520171052, |
| "grad_norm": 1.5887507200241089, |
| "learning_rate": 1.2033928255875597e-05, |
| "loss": 0.3625, |
| "step": 44350 |
| }, |
| { |
| "epoch": 0.7845770528882684, |
| "grad_norm": 1.592004418373108, |
| "learning_rate": 1.1984842237537062e-05, |
| "loss": 0.311, |
| "step": 44400 |
| }, |
| { |
| "epoch": 0.7854605856054849, |
| "grad_norm": 1.2201918363571167, |
| "learning_rate": 1.1935756219198523e-05, |
| "loss": 0.2924, |
| "step": 44450 |
| }, |
| { |
| "epoch": 0.7863441183227015, |
| "grad_norm": 1.6248202323913574, |
| "learning_rate": 1.1886670200859988e-05, |
| "loss": 0.2602, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.787227651039918, |
| "grad_norm": 2.032122850418091, |
| "learning_rate": 1.183758418252145e-05, |
| "loss": 0.3936, |
| "step": 44550 |
| }, |
| { |
| "epoch": 0.7881111837571345, |
| "grad_norm": 1.149383783340454, |
| "learning_rate": 1.1788498164182915e-05, |
| "loss": 0.229, |
| "step": 44600 |
| }, |
| { |
| "epoch": 0.788994716474351, |
| "grad_norm": 4.3157148361206055, |
| "learning_rate": 1.1739412145844378e-05, |
| "loss": 0.3017, |
| "step": 44650 |
| }, |
| { |
| "epoch": 0.7898782491915676, |
| "grad_norm": 1.8925341367721558, |
| "learning_rate": 1.1690326127505841e-05, |
| "loss": 0.2795, |
| "step": 44700 |
| }, |
| { |
| "epoch": 0.7907617819087841, |
| "grad_norm": 1.678846001625061, |
| "learning_rate": 1.1641240109167304e-05, |
| "loss": 0.279, |
| "step": 44750 |
| }, |
| { |
| "epoch": 0.7916453146260006, |
| "grad_norm": 1.3051953315734863, |
| "learning_rate": 1.1592154090828769e-05, |
| "loss": 0.227, |
| "step": 44800 |
| }, |
| { |
| "epoch": 0.7925288473432172, |
| "grad_norm": 1.5428054332733154, |
| "learning_rate": 1.1543068072490232e-05, |
| "loss": 0.25, |
| "step": 44850 |
| }, |
| { |
| "epoch": 0.7934123800604337, |
| "grad_norm": 4.3441057205200195, |
| "learning_rate": 1.1494963774518466e-05, |
| "loss": 0.3381, |
| "step": 44900 |
| }, |
| { |
| "epoch": 0.7942959127776501, |
| "grad_norm": 1.589739203453064, |
| "learning_rate": 1.1445877756179931e-05, |
| "loss": 0.2914, |
| "step": 44950 |
| }, |
| { |
| "epoch": 0.7951794454948666, |
| "grad_norm": 1.8874093294143677, |
| "learning_rate": 1.1396791737841392e-05, |
| "loss": 0.282, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.7960629782120832, |
| "grad_norm": 1.1699323654174805, |
| "learning_rate": 1.1347705719502857e-05, |
| "loss": 0.2596, |
| "step": 45050 |
| }, |
| { |
| "epoch": 0.7969465109292997, |
| "grad_norm": 1.4282245635986328, |
| "learning_rate": 1.129861970116432e-05, |
| "loss": 0.311, |
| "step": 45100 |
| }, |
| { |
| "epoch": 0.7978300436465162, |
| "grad_norm": 1.4583613872528076, |
| "learning_rate": 1.1249533682825785e-05, |
| "loss": 0.2368, |
| "step": 45150 |
| }, |
| { |
| "epoch": 0.7987135763637327, |
| "grad_norm": 1.3440780639648438, |
| "learning_rate": 1.120044766448725e-05, |
| "loss": 0.3147, |
| "step": 45200 |
| }, |
| { |
| "epoch": 0.7995971090809493, |
| "grad_norm": 1.4544724225997925, |
| "learning_rate": 1.115136164614871e-05, |
| "loss": 0.2964, |
| "step": 45250 |
| }, |
| { |
| "epoch": 0.8004806417981658, |
| "grad_norm": 1.5149188041687012, |
| "learning_rate": 1.1102275627810175e-05, |
| "loss": 0.34, |
| "step": 45300 |
| }, |
| { |
| "epoch": 0.8013641745153823, |
| "grad_norm": 1.4456932544708252, |
| "learning_rate": 1.1053189609471638e-05, |
| "loss": 0.3249, |
| "step": 45350 |
| }, |
| { |
| "epoch": 0.8022477072325989, |
| "grad_norm": 4.4482574462890625, |
| "learning_rate": 1.1004103591133103e-05, |
| "loss": 0.4092, |
| "step": 45400 |
| }, |
| { |
| "epoch": 0.8031312399498154, |
| "grad_norm": 1.7785700559616089, |
| "learning_rate": 1.0955017572794566e-05, |
| "loss": 0.2601, |
| "step": 45450 |
| }, |
| { |
| "epoch": 0.8040147726670318, |
| "grad_norm": 1.560614824295044, |
| "learning_rate": 1.0905931554456029e-05, |
| "loss": 0.306, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.8048983053842483, |
| "grad_norm": 1.2510974407196045, |
| "learning_rate": 1.0856845536117492e-05, |
| "loss": 0.2888, |
| "step": 45550 |
| }, |
| { |
| "epoch": 0.8057818381014649, |
| "grad_norm": 2.532653331756592, |
| "learning_rate": 1.0807759517778957e-05, |
| "loss": 0.3101, |
| "step": 45600 |
| }, |
| { |
| "epoch": 0.8066653708186814, |
| "grad_norm": 1.2750502824783325, |
| "learning_rate": 1.075867349944042e-05, |
| "loss": 0.3148, |
| "step": 45650 |
| }, |
| { |
| "epoch": 0.8075489035358979, |
| "grad_norm": 2.6815085411071777, |
| "learning_rate": 1.0709587481101884e-05, |
| "loss": 0.3975, |
| "step": 45700 |
| }, |
| { |
| "epoch": 0.8084324362531145, |
| "grad_norm": 1.3361300230026245, |
| "learning_rate": 1.0660501462763347e-05, |
| "loss": 0.391, |
| "step": 45750 |
| }, |
| { |
| "epoch": 0.809315968970331, |
| "grad_norm": 1.438496470451355, |
| "learning_rate": 1.061141544442481e-05, |
| "loss": 0.3055, |
| "step": 45800 |
| }, |
| { |
| "epoch": 0.8101995016875475, |
| "grad_norm": 1.3673596382141113, |
| "learning_rate": 1.0562329426086275e-05, |
| "loss": 0.2434, |
| "step": 45850 |
| }, |
| { |
| "epoch": 0.811083034404764, |
| "grad_norm": 2.5049281120300293, |
| "learning_rate": 1.0513243407747738e-05, |
| "loss": 0.285, |
| "step": 45900 |
| }, |
| { |
| "epoch": 0.8119665671219806, |
| "grad_norm": 4.577225208282471, |
| "learning_rate": 1.04641573894092e-05, |
| "loss": 0.3952, |
| "step": 45950 |
| }, |
| { |
| "epoch": 0.8128500998391971, |
| "grad_norm": 1.4778873920440674, |
| "learning_rate": 1.0415071371070664e-05, |
| "loss": 0.2802, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.8137336325564135, |
| "grad_norm": 0.8188498020172119, |
| "learning_rate": 1.0365985352732128e-05, |
| "loss": 0.2687, |
| "step": 46050 |
| }, |
| { |
| "epoch": 0.81461716527363, |
| "grad_norm": 1.3656600713729858, |
| "learning_rate": 1.0316899334393591e-05, |
| "loss": 0.2787, |
| "step": 46100 |
| }, |
| { |
| "epoch": 0.8155006979908466, |
| "grad_norm": 1.3830708265304565, |
| "learning_rate": 1.0267813316055056e-05, |
| "loss": 0.2943, |
| "step": 46150 |
| }, |
| { |
| "epoch": 0.8163842307080631, |
| "grad_norm": 1.866623878479004, |
| "learning_rate": 1.0218727297716519e-05, |
| "loss": 0.3386, |
| "step": 46200 |
| }, |
| { |
| "epoch": 0.8172677634252796, |
| "grad_norm": 1.5368878841400146, |
| "learning_rate": 1.0169641279377982e-05, |
| "loss": 0.3227, |
| "step": 46250 |
| }, |
| { |
| "epoch": 0.8181512961424962, |
| "grad_norm": 3.281324625015259, |
| "learning_rate": 1.0120555261039447e-05, |
| "loss": 0.2582, |
| "step": 46300 |
| }, |
| { |
| "epoch": 0.8190348288597127, |
| "grad_norm": 1.4477012157440186, |
| "learning_rate": 1.007146924270091e-05, |
| "loss": 0.285, |
| "step": 46350 |
| }, |
| { |
| "epoch": 0.8199183615769292, |
| "grad_norm": 2.9229135513305664, |
| "learning_rate": 1.0022383224362374e-05, |
| "loss": 0.2989, |
| "step": 46400 |
| }, |
| { |
| "epoch": 0.8208018942941457, |
| "grad_norm": 1.5713211297988892, |
| "learning_rate": 9.973297206023836e-06, |
| "loss": 0.2761, |
| "step": 46450 |
| }, |
| { |
| "epoch": 0.8216854270113623, |
| "grad_norm": 1.5968650579452515, |
| "learning_rate": 9.9242111876853e-06, |
| "loss": 0.299, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.8225689597285788, |
| "grad_norm": 1.5933504104614258, |
| "learning_rate": 9.875125169346763e-06, |
| "loss": 0.2908, |
| "step": 46550 |
| }, |
| { |
| "epoch": 0.8234524924457952, |
| "grad_norm": 0.9819146990776062, |
| "learning_rate": 9.826039151008228e-06, |
| "loss": 0.3134, |
| "step": 46600 |
| }, |
| { |
| "epoch": 0.8243360251630117, |
| "grad_norm": 1.4541929960250854, |
| "learning_rate": 9.776953132669691e-06, |
| "loss": 0.2603, |
| "step": 46650 |
| }, |
| { |
| "epoch": 0.8252195578802283, |
| "grad_norm": 1.499306559562683, |
| "learning_rate": 9.727867114331154e-06, |
| "loss": 0.2965, |
| "step": 46700 |
| }, |
| { |
| "epoch": 0.8261030905974448, |
| "grad_norm": 2.7763867378234863, |
| "learning_rate": 9.678781095992617e-06, |
| "loss": 0.2506, |
| "step": 46750 |
| }, |
| { |
| "epoch": 0.8269866233146613, |
| "grad_norm": 1.4240554571151733, |
| "learning_rate": 9.629695077654082e-06, |
| "loss": 0.2258, |
| "step": 46800 |
| }, |
| { |
| "epoch": 0.8278701560318779, |
| "grad_norm": 1.71811842918396, |
| "learning_rate": 9.580609059315546e-06, |
| "loss": 0.2209, |
| "step": 46850 |
| }, |
| { |
| "epoch": 0.8287536887490944, |
| "grad_norm": 3.2347002029418945, |
| "learning_rate": 9.53152304097701e-06, |
| "loss": 0.2756, |
| "step": 46900 |
| }, |
| { |
| "epoch": 0.8296372214663109, |
| "grad_norm": 2.865858554840088, |
| "learning_rate": 9.483418743005244e-06, |
| "loss": 0.3345, |
| "step": 46950 |
| }, |
| { |
| "epoch": 0.8305207541835274, |
| "grad_norm": 1.4720476865768433, |
| "learning_rate": 9.434332724666705e-06, |
| "loss": 0.3016, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.831404286900744, |
| "grad_norm": 2.7783117294311523, |
| "learning_rate": 9.38524670632817e-06, |
| "loss": 0.2712, |
| "step": 47050 |
| }, |
| { |
| "epoch": 0.8322878196179605, |
| "grad_norm": 4.626585483551025, |
| "learning_rate": 9.336160687989634e-06, |
| "loss": 0.3721, |
| "step": 47100 |
| }, |
| { |
| "epoch": 0.8331713523351769, |
| "grad_norm": 1.097589135169983, |
| "learning_rate": 9.287074669651097e-06, |
| "loss": 0.3483, |
| "step": 47150 |
| }, |
| { |
| "epoch": 0.8340548850523934, |
| "grad_norm": 1.6428859233856201, |
| "learning_rate": 9.237988651312562e-06, |
| "loss": 0.3002, |
| "step": 47200 |
| }, |
| { |
| "epoch": 0.83493841776961, |
| "grad_norm": 1.0670841932296753, |
| "learning_rate": 9.188902632974023e-06, |
| "loss": 0.2333, |
| "step": 47250 |
| }, |
| { |
| "epoch": 0.8358219504868265, |
| "grad_norm": 1.9052667617797852, |
| "learning_rate": 9.139816614635488e-06, |
| "loss": 0.3043, |
| "step": 47300 |
| }, |
| { |
| "epoch": 0.836705483204043, |
| "grad_norm": 2.9311811923980713, |
| "learning_rate": 9.09073059629695e-06, |
| "loss": 0.2992, |
| "step": 47350 |
| }, |
| { |
| "epoch": 0.8375890159212596, |
| "grad_norm": 1.2520331144332886, |
| "learning_rate": 9.041644577958415e-06, |
| "loss": 0.3367, |
| "step": 47400 |
| }, |
| { |
| "epoch": 0.8384725486384761, |
| "grad_norm": 1.483476996421814, |
| "learning_rate": 8.992558559619878e-06, |
| "loss": 0.3135, |
| "step": 47450 |
| }, |
| { |
| "epoch": 0.8393560813556926, |
| "grad_norm": 1.5691540241241455, |
| "learning_rate": 8.943472541281341e-06, |
| "loss": 0.2681, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.8402396140729091, |
| "grad_norm": 1.2460750341415405, |
| "learning_rate": 8.894386522942804e-06, |
| "loss": 0.2764, |
| "step": 47550 |
| }, |
| { |
| "epoch": 0.8411231467901257, |
| "grad_norm": 1.3095312118530273, |
| "learning_rate": 8.845300504604269e-06, |
| "loss": 0.3211, |
| "step": 47600 |
| }, |
| { |
| "epoch": 0.8420066795073422, |
| "grad_norm": 1.5162594318389893, |
| "learning_rate": 8.796214486265734e-06, |
| "loss": 0.3081, |
| "step": 47650 |
| }, |
| { |
| "epoch": 0.8428902122245586, |
| "grad_norm": 1.3636444807052612, |
| "learning_rate": 8.747128467927197e-06, |
| "loss": 0.2929, |
| "step": 47700 |
| }, |
| { |
| "epoch": 0.8437737449417752, |
| "grad_norm": 1.2207202911376953, |
| "learning_rate": 8.69804244958866e-06, |
| "loss": 0.3466, |
| "step": 47750 |
| }, |
| { |
| "epoch": 0.8446572776589917, |
| "grad_norm": 1.294301152229309, |
| "learning_rate": 8.648956431250123e-06, |
| "loss": 0.3012, |
| "step": 47800 |
| }, |
| { |
| "epoch": 0.8455408103762082, |
| "grad_norm": 1.188514232635498, |
| "learning_rate": 8.599870412911587e-06, |
| "loss": 0.2953, |
| "step": 47850 |
| }, |
| { |
| "epoch": 0.8464243430934247, |
| "grad_norm": 1.5537595748901367, |
| "learning_rate": 8.55078439457305e-06, |
| "loss": 0.2914, |
| "step": 47900 |
| }, |
| { |
| "epoch": 0.8473078758106413, |
| "grad_norm": 1.640060544013977, |
| "learning_rate": 8.501698376234513e-06, |
| "loss": 0.3219, |
| "step": 47950 |
| }, |
| { |
| "epoch": 0.8481914085278578, |
| "grad_norm": 1.896763801574707, |
| "learning_rate": 8.452612357895976e-06, |
| "loss": 0.3104, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.8490749412450743, |
| "grad_norm": 1.4819157123565674, |
| "learning_rate": 8.403526339557441e-06, |
| "loss": 0.3676, |
| "step": 48050 |
| }, |
| { |
| "epoch": 0.8499584739622909, |
| "grad_norm": 1.882551670074463, |
| "learning_rate": 8.354440321218904e-06, |
| "loss": 0.2989, |
| "step": 48100 |
| }, |
| { |
| "epoch": 0.8508420066795074, |
| "grad_norm": 1.3162806034088135, |
| "learning_rate": 8.305354302880369e-06, |
| "loss": 0.3259, |
| "step": 48150 |
| }, |
| { |
| "epoch": 0.8517255393967239, |
| "grad_norm": 1.8228886127471924, |
| "learning_rate": 8.256268284541832e-06, |
| "loss": 0.2949, |
| "step": 48200 |
| }, |
| { |
| "epoch": 0.8526090721139403, |
| "grad_norm": 1.490918517112732, |
| "learning_rate": 8.207182266203294e-06, |
| "loss": 0.3914, |
| "step": 48250 |
| }, |
| { |
| "epoch": 0.8534926048311569, |
| "grad_norm": 1.5268231630325317, |
| "learning_rate": 8.158096247864759e-06, |
| "loss": 0.2645, |
| "step": 48300 |
| }, |
| { |
| "epoch": 0.8543761375483734, |
| "grad_norm": 1.9607213735580444, |
| "learning_rate": 8.109010229526222e-06, |
| "loss": 0.3358, |
| "step": 48350 |
| }, |
| { |
| "epoch": 0.8552596702655899, |
| "grad_norm": 1.4697561264038086, |
| "learning_rate": 8.059924211187687e-06, |
| "loss": 0.2939, |
| "step": 48400 |
| }, |
| { |
| "epoch": 0.8561432029828064, |
| "grad_norm": 4.159787654876709, |
| "learning_rate": 8.010838192849148e-06, |
| "loss": 0.4063, |
| "step": 48450 |
| }, |
| { |
| "epoch": 0.857026735700023, |
| "grad_norm": 2.129241466522217, |
| "learning_rate": 7.961752174510613e-06, |
| "loss": 0.3492, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.8579102684172395, |
| "grad_norm": 1.48981511592865, |
| "learning_rate": 7.912666156172076e-06, |
| "loss": 0.2794, |
| "step": 48550 |
| }, |
| { |
| "epoch": 0.858793801134456, |
| "grad_norm": 2.017918348312378, |
| "learning_rate": 7.86358013783354e-06, |
| "loss": 0.2854, |
| "step": 48600 |
| }, |
| { |
| "epoch": 0.8596773338516726, |
| "grad_norm": 1.4077606201171875, |
| "learning_rate": 7.814494119495003e-06, |
| "loss": 0.2664, |
| "step": 48650 |
| }, |
| { |
| "epoch": 0.8605608665688891, |
| "grad_norm": 1.417729139328003, |
| "learning_rate": 7.765408101156466e-06, |
| "loss": 0.3028, |
| "step": 48700 |
| }, |
| { |
| "epoch": 0.8614443992861056, |
| "grad_norm": 1.0813167095184326, |
| "learning_rate": 7.716322082817931e-06, |
| "loss": 0.2579, |
| "step": 48750 |
| }, |
| { |
| "epoch": 0.862327932003322, |
| "grad_norm": 2.008650064468384, |
| "learning_rate": 7.667236064479394e-06, |
| "loss": 0.3404, |
| "step": 48800 |
| }, |
| { |
| "epoch": 0.8632114647205386, |
| "grad_norm": 1.3516128063201904, |
| "learning_rate": 7.618150046140858e-06, |
| "loss": 0.3535, |
| "step": 48850 |
| }, |
| { |
| "epoch": 0.8640949974377551, |
| "grad_norm": 1.4083527326583862, |
| "learning_rate": 7.569064027802321e-06, |
| "loss": 0.2224, |
| "step": 48900 |
| }, |
| { |
| "epoch": 0.8649785301549716, |
| "grad_norm": 1.421423077583313, |
| "learning_rate": 7.5199780094637854e-06, |
| "loss": 0.2701, |
| "step": 48950 |
| }, |
| { |
| "epoch": 0.8658620628721881, |
| "grad_norm": 2.421118974685669, |
| "learning_rate": 7.4708919911252476e-06, |
| "loss": 0.2506, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.8667455955894047, |
| "grad_norm": 1.3858048915863037, |
| "learning_rate": 7.421805972786712e-06, |
| "loss": 0.3032, |
| "step": 49050 |
| }, |
| { |
| "epoch": 0.8676291283066212, |
| "grad_norm": 1.2791121006011963, |
| "learning_rate": 7.373701674814947e-06, |
| "loss": 0.3881, |
| "step": 49100 |
| }, |
| { |
| "epoch": 0.8685126610238377, |
| "grad_norm": 1.6947706937789917, |
| "learning_rate": 7.32461565647641e-06, |
| "loss": 0.253, |
| "step": 49150 |
| }, |
| { |
| "epoch": 0.8693961937410543, |
| "grad_norm": 1.4587703943252563, |
| "learning_rate": 7.2755296381378734e-06, |
| "loss": 0.2827, |
| "step": 49200 |
| }, |
| { |
| "epoch": 0.8702797264582708, |
| "grad_norm": 1.333967924118042, |
| "learning_rate": 7.2264436197993364e-06, |
| "loss": 0.266, |
| "step": 49250 |
| }, |
| { |
| "epoch": 0.8711632591754873, |
| "grad_norm": 1.3817411661148071, |
| "learning_rate": 7.1773576014608e-06, |
| "loss": 0.3008, |
| "step": 49300 |
| }, |
| { |
| "epoch": 0.8720467918927037, |
| "grad_norm": 1.269362211227417, |
| "learning_rate": 7.128271583122263e-06, |
| "loss": 0.3406, |
| "step": 49350 |
| }, |
| { |
| "epoch": 0.8729303246099203, |
| "grad_norm": 1.5153824090957642, |
| "learning_rate": 7.079185564783727e-06, |
| "loss": 0.3348, |
| "step": 49400 |
| }, |
| { |
| "epoch": 0.8738138573271368, |
| "grad_norm": 1.6337603330612183, |
| "learning_rate": 7.03009954644519e-06, |
| "loss": 0.3021, |
| "step": 49450 |
| }, |
| { |
| "epoch": 0.8746973900443533, |
| "grad_norm": 1.4994523525238037, |
| "learning_rate": 6.981013528106655e-06, |
| "loss": 0.3182, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.8755809227615698, |
| "grad_norm": 1.2485002279281616, |
| "learning_rate": 6.9319275097681185e-06, |
| "loss": 0.3063, |
| "step": 49550 |
| }, |
| { |
| "epoch": 0.8764644554787864, |
| "grad_norm": 1.538524866104126, |
| "learning_rate": 6.8828414914295815e-06, |
| "loss": 0.2385, |
| "step": 49600 |
| }, |
| { |
| "epoch": 0.8773479881960029, |
| "grad_norm": 1.3927173614501953, |
| "learning_rate": 6.833755473091045e-06, |
| "loss": 0.3152, |
| "step": 49650 |
| }, |
| { |
| "epoch": 0.8782315209132194, |
| "grad_norm": 1.4090054035186768, |
| "learning_rate": 6.784669454752508e-06, |
| "loss": 0.267, |
| "step": 49700 |
| }, |
| { |
| "epoch": 0.879115053630436, |
| "grad_norm": 1.5765697956085205, |
| "learning_rate": 6.735583436413972e-06, |
| "loss": 0.2599, |
| "step": 49750 |
| }, |
| { |
| "epoch": 0.8799985863476525, |
| "grad_norm": 1.617443323135376, |
| "learning_rate": 6.686497418075435e-06, |
| "loss": 0.3226, |
| "step": 49800 |
| }, |
| { |
| "epoch": 0.880882119064869, |
| "grad_norm": 1.385986089706421, |
| "learning_rate": 6.6374113997369e-06, |
| "loss": 0.3516, |
| "step": 49850 |
| }, |
| { |
| "epoch": 0.8817656517820854, |
| "grad_norm": 1.4890649318695068, |
| "learning_rate": 6.588325381398362e-06, |
| "loss": 0.2912, |
| "step": 49900 |
| }, |
| { |
| "epoch": 0.882649184499302, |
| "grad_norm": 2.459829807281494, |
| "learning_rate": 6.5392393630598265e-06, |
| "loss": 0.2853, |
| "step": 49950 |
| }, |
| { |
| "epoch": 0.8835327172165185, |
| "grad_norm": 1.6274219751358032, |
| "learning_rate": 6.4901533447212895e-06, |
| "loss": 0.3212, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.884416249933735, |
| "grad_norm": 2.2164740562438965, |
| "learning_rate": 6.441067326382753e-06, |
| "loss": 0.3399, |
| "step": 50050 |
| }, |
| { |
| "epoch": 0.8852997826509515, |
| "grad_norm": 3.567988157272339, |
| "learning_rate": 6.391981308044218e-06, |
| "loss": 0.3104, |
| "step": 50100 |
| }, |
| { |
| "epoch": 0.8861833153681681, |
| "grad_norm": 1.5539664030075073, |
| "learning_rate": 6.34289528970568e-06, |
| "loss": 0.368, |
| "step": 50150 |
| }, |
| { |
| "epoch": 0.8870668480853846, |
| "grad_norm": 1.6674470901489258, |
| "learning_rate": 6.293809271367145e-06, |
| "loss": 0.2848, |
| "step": 50200 |
| }, |
| { |
| "epoch": 0.8879503808026011, |
| "grad_norm": 1.1558799743652344, |
| "learning_rate": 6.244723253028607e-06, |
| "loss": 0.4137, |
| "step": 50250 |
| }, |
| { |
| "epoch": 0.8888339135198177, |
| "grad_norm": 1.2852174043655396, |
| "learning_rate": 6.195637234690072e-06, |
| "loss": 0.3597, |
| "step": 50300 |
| }, |
| { |
| "epoch": 0.8897174462370342, |
| "grad_norm": 2.747140407562256, |
| "learning_rate": 6.146551216351535e-06, |
| "loss": 0.3246, |
| "step": 50350 |
| }, |
| { |
| "epoch": 0.8906009789542507, |
| "grad_norm": 1.5731008052825928, |
| "learning_rate": 6.097465198012998e-06, |
| "loss": 0.2658, |
| "step": 50400 |
| }, |
| { |
| "epoch": 0.8914845116714671, |
| "grad_norm": 1.7012232542037964, |
| "learning_rate": 6.048379179674462e-06, |
| "loss": 0.2954, |
| "step": 50450 |
| }, |
| { |
| "epoch": 0.8923680443886837, |
| "grad_norm": 1.2959450483322144, |
| "learning_rate": 5.999293161335925e-06, |
| "loss": 0.3035, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.8932515771059002, |
| "grad_norm": 1.6592167615890503, |
| "learning_rate": 5.950207142997389e-06, |
| "loss": 0.2654, |
| "step": 50550 |
| }, |
| { |
| "epoch": 0.8941351098231167, |
| "grad_norm": 1.2229481935501099, |
| "learning_rate": 5.901121124658853e-06, |
| "loss": 0.2742, |
| "step": 50600 |
| }, |
| { |
| "epoch": 0.8950186425403333, |
| "grad_norm": 1.4973150491714478, |
| "learning_rate": 5.852035106320316e-06, |
| "loss": 0.3435, |
| "step": 50650 |
| }, |
| { |
| "epoch": 0.8959021752575498, |
| "grad_norm": 1.2695672512054443, |
| "learning_rate": 5.80294908798178e-06, |
| "loss": 0.2947, |
| "step": 50700 |
| }, |
| { |
| "epoch": 0.8967857079747663, |
| "grad_norm": 0.9303974509239197, |
| "learning_rate": 5.753863069643243e-06, |
| "loss": 0.3013, |
| "step": 50750 |
| }, |
| { |
| "epoch": 0.8976692406919828, |
| "grad_norm": 1.5696642398834229, |
| "learning_rate": 5.704777051304706e-06, |
| "loss": 0.3845, |
| "step": 50800 |
| }, |
| { |
| "epoch": 0.8985527734091994, |
| "grad_norm": 1.9302955865859985, |
| "learning_rate": 5.65569103296617e-06, |
| "loss": 0.2882, |
| "step": 50850 |
| }, |
| { |
| "epoch": 0.8994363061264159, |
| "grad_norm": 2.9837305545806885, |
| "learning_rate": 5.606605014627634e-06, |
| "loss": 0.3639, |
| "step": 50900 |
| }, |
| { |
| "epoch": 0.9003198388436324, |
| "grad_norm": 1.3305821418762207, |
| "learning_rate": 5.557518996289098e-06, |
| "loss": 0.2332, |
| "step": 50950 |
| }, |
| { |
| "epoch": 0.901203371560849, |
| "grad_norm": 1.2136187553405762, |
| "learning_rate": 5.508432977950561e-06, |
| "loss": 0.3363, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.9020869042780654, |
| "grad_norm": 1.782301664352417, |
| "learning_rate": 5.459346959612025e-06, |
| "loss": 0.2674, |
| "step": 51050 |
| }, |
| { |
| "epoch": 0.9029704369952819, |
| "grad_norm": 1.5983684062957764, |
| "learning_rate": 5.411242661640259e-06, |
| "loss": 0.331, |
| "step": 51100 |
| }, |
| { |
| "epoch": 0.9038539697124984, |
| "grad_norm": 1.749089002609253, |
| "learning_rate": 5.362156643301722e-06, |
| "loss": 0.2881, |
| "step": 51150 |
| }, |
| { |
| "epoch": 0.904737502429715, |
| "grad_norm": 1.2461782693862915, |
| "learning_rate": 5.313070624963186e-06, |
| "loss": 0.3553, |
| "step": 51200 |
| }, |
| { |
| "epoch": 0.9056210351469315, |
| "grad_norm": 2.317101001739502, |
| "learning_rate": 5.26398460662465e-06, |
| "loss": 0.2936, |
| "step": 51250 |
| }, |
| { |
| "epoch": 0.906504567864148, |
| "grad_norm": 1.3416547775268555, |
| "learning_rate": 5.214898588286113e-06, |
| "loss": 0.2987, |
| "step": 51300 |
| }, |
| { |
| "epoch": 0.9073881005813645, |
| "grad_norm": 3.7747082710266113, |
| "learning_rate": 5.1658125699475765e-06, |
| "loss": 0.3343, |
| "step": 51350 |
| }, |
| { |
| "epoch": 0.9082716332985811, |
| "grad_norm": 1.4777984619140625, |
| "learning_rate": 5.1167265516090395e-06, |
| "loss": 0.2919, |
| "step": 51400 |
| }, |
| { |
| "epoch": 0.9091551660157976, |
| "grad_norm": 1.3142715692520142, |
| "learning_rate": 5.067640533270503e-06, |
| "loss": 0.2997, |
| "step": 51450 |
| }, |
| { |
| "epoch": 0.9100386987330141, |
| "grad_norm": 1.3387079238891602, |
| "learning_rate": 5.018554514931967e-06, |
| "loss": 0.2247, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.9109222314502307, |
| "grad_norm": 1.9581636190414429, |
| "learning_rate": 4.96946849659343e-06, |
| "loss": 0.2918, |
| "step": 51550 |
| }, |
| { |
| "epoch": 0.9118057641674471, |
| "grad_norm": 1.3822007179260254, |
| "learning_rate": 4.920382478254894e-06, |
| "loss": 0.3295, |
| "step": 51600 |
| }, |
| { |
| "epoch": 0.9126892968846636, |
| "grad_norm": 1.4896866083145142, |
| "learning_rate": 4.871296459916358e-06, |
| "loss": 0.2493, |
| "step": 51650 |
| }, |
| { |
| "epoch": 0.9135728296018801, |
| "grad_norm": 4.590723037719727, |
| "learning_rate": 4.8222104415778216e-06, |
| "loss": 0.3088, |
| "step": 51700 |
| }, |
| { |
| "epoch": 0.9144563623190967, |
| "grad_norm": 1.653506875038147, |
| "learning_rate": 4.7731244232392845e-06, |
| "loss": 0.2287, |
| "step": 51750 |
| }, |
| { |
| "epoch": 0.9153398950363132, |
| "grad_norm": 1.7086869478225708, |
| "learning_rate": 4.724038404900748e-06, |
| "loss": 0.2067, |
| "step": 51800 |
| }, |
| { |
| "epoch": 0.9162234277535297, |
| "grad_norm": 1.1146478652954102, |
| "learning_rate": 4.674952386562212e-06, |
| "loss": 0.2735, |
| "step": 51850 |
| }, |
| { |
| "epoch": 0.9171069604707462, |
| "grad_norm": 2.2454397678375244, |
| "learning_rate": 4.625866368223675e-06, |
| "loss": 0.3976, |
| "step": 51900 |
| }, |
| { |
| "epoch": 0.9179904931879628, |
| "grad_norm": 1.902377724647522, |
| "learning_rate": 4.576780349885139e-06, |
| "loss": 0.2939, |
| "step": 51950 |
| }, |
| { |
| "epoch": 0.9188740259051793, |
| "grad_norm": 4.320808410644531, |
| "learning_rate": 4.527694331546602e-06, |
| "loss": 0.3193, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.9197575586223958, |
| "grad_norm": 1.4950217008590698, |
| "learning_rate": 4.478608313208066e-06, |
| "loss": 0.2988, |
| "step": 52050 |
| }, |
| { |
| "epoch": 0.9206410913396124, |
| "grad_norm": 1.5405720472335815, |
| "learning_rate": 4.42952229486953e-06, |
| "loss": 0.2286, |
| "step": 52100 |
| }, |
| { |
| "epoch": 0.9215246240568288, |
| "grad_norm": 1.5918203592300415, |
| "learning_rate": 4.3804362765309926e-06, |
| "loss": 0.371, |
| "step": 52150 |
| }, |
| { |
| "epoch": 0.9224081567740453, |
| "grad_norm": 1.2329323291778564, |
| "learning_rate": 4.331350258192457e-06, |
| "loss": 0.2825, |
| "step": 52200 |
| }, |
| { |
| "epoch": 0.9232916894912618, |
| "grad_norm": 1.2270597219467163, |
| "learning_rate": 4.28226423985392e-06, |
| "loss": 0.3728, |
| "step": 52250 |
| }, |
| { |
| "epoch": 0.9241752222084784, |
| "grad_norm": 1.8672150373458862, |
| "learning_rate": 4.233178221515384e-06, |
| "loss": 0.3196, |
| "step": 52300 |
| }, |
| { |
| "epoch": 0.9250587549256949, |
| "grad_norm": 1.6005786657333374, |
| "learning_rate": 4.184092203176848e-06, |
| "loss": 0.3322, |
| "step": 52350 |
| }, |
| { |
| "epoch": 0.9259422876429114, |
| "grad_norm": 1.4158750772476196, |
| "learning_rate": 4.135006184838311e-06, |
| "loss": 0.3258, |
| "step": 52400 |
| }, |
| { |
| "epoch": 0.926825820360128, |
| "grad_norm": 4.394749164581299, |
| "learning_rate": 4.085920166499775e-06, |
| "loss": 0.3419, |
| "step": 52450 |
| }, |
| { |
| "epoch": 0.9277093530773445, |
| "grad_norm": 0.856221616268158, |
| "learning_rate": 4.036834148161238e-06, |
| "loss": 0.2997, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.928592885794561, |
| "grad_norm": 1.1520658731460571, |
| "learning_rate": 3.9877481298227014e-06, |
| "loss": 0.2425, |
| "step": 52550 |
| }, |
| { |
| "epoch": 0.9294764185117775, |
| "grad_norm": 1.2415558099746704, |
| "learning_rate": 3.938662111484165e-06, |
| "loss": 0.2708, |
| "step": 52600 |
| }, |
| { |
| "epoch": 0.9303599512289941, |
| "grad_norm": 2.747580051422119, |
| "learning_rate": 3.889576093145628e-06, |
| "loss": 0.2201, |
| "step": 52650 |
| }, |
| { |
| "epoch": 0.9312434839462105, |
| "grad_norm": 2.005228281021118, |
| "learning_rate": 3.840490074807092e-06, |
| "loss": 0.2958, |
| "step": 52700 |
| }, |
| { |
| "epoch": 0.932127016663427, |
| "grad_norm": 1.6022164821624756, |
| "learning_rate": 3.791404056468556e-06, |
| "loss": 0.2647, |
| "step": 52750 |
| }, |
| { |
| "epoch": 0.9330105493806435, |
| "grad_norm": 1.7913720607757568, |
| "learning_rate": 3.7423180381300193e-06, |
| "loss": 0.2329, |
| "step": 52800 |
| }, |
| { |
| "epoch": 0.9338940820978601, |
| "grad_norm": 2.5619053840637207, |
| "learning_rate": 3.693232019791483e-06, |
| "loss": 0.2984, |
| "step": 52850 |
| }, |
| { |
| "epoch": 0.9347776148150766, |
| "grad_norm": 1.545856237411499, |
| "learning_rate": 3.6441460014529465e-06, |
| "loss": 0.2546, |
| "step": 52900 |
| }, |
| { |
| "epoch": 0.9356611475322931, |
| "grad_norm": 2.4920833110809326, |
| "learning_rate": 3.59505998311441e-06, |
| "loss": 0.2433, |
| "step": 52950 |
| }, |
| { |
| "epoch": 0.9365446802495097, |
| "grad_norm": 1.65108323097229, |
| "learning_rate": 3.5459739647758733e-06, |
| "loss": 0.3632, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.9374282129667262, |
| "grad_norm": 2.5942931175231934, |
| "learning_rate": 3.496887946437337e-06, |
| "loss": 0.2602, |
| "step": 53050 |
| }, |
| { |
| "epoch": 0.9383117456839427, |
| "grad_norm": 1.256638526916504, |
| "learning_rate": 3.4478019280988005e-06, |
| "loss": 0.2066, |
| "step": 53100 |
| }, |
| { |
| "epoch": 0.9391952784011592, |
| "grad_norm": 3.677544593811035, |
| "learning_rate": 3.399697630127035e-06, |
| "loss": 0.2957, |
| "step": 53150 |
| }, |
| { |
| "epoch": 0.9400788111183758, |
| "grad_norm": 1.3518919944763184, |
| "learning_rate": 3.3506116117884983e-06, |
| "loss": 0.3931, |
| "step": 53200 |
| }, |
| { |
| "epoch": 0.9409623438355922, |
| "grad_norm": 1.065996766090393, |
| "learning_rate": 3.3015255934499617e-06, |
| "loss": 0.3384, |
| "step": 53250 |
| }, |
| { |
| "epoch": 0.9418458765528087, |
| "grad_norm": 1.57516610622406, |
| "learning_rate": 3.252439575111425e-06, |
| "loss": 0.234, |
| "step": 53300 |
| }, |
| { |
| "epoch": 0.9427294092700252, |
| "grad_norm": 1.2013062238693237, |
| "learning_rate": 3.203353556772889e-06, |
| "loss": 0.2244, |
| "step": 53350 |
| }, |
| { |
| "epoch": 0.9436129419872418, |
| "grad_norm": 1.448370099067688, |
| "learning_rate": 3.1542675384343524e-06, |
| "loss": 0.2736, |
| "step": 53400 |
| }, |
| { |
| "epoch": 0.9444964747044583, |
| "grad_norm": 1.7333183288574219, |
| "learning_rate": 3.105181520095816e-06, |
| "loss": 0.3088, |
| "step": 53450 |
| }, |
| { |
| "epoch": 0.9453800074216748, |
| "grad_norm": 1.5718059539794922, |
| "learning_rate": 3.0560955017572796e-06, |
| "loss": 0.3135, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.9462635401388914, |
| "grad_norm": 1.3086848258972168, |
| "learning_rate": 3.007009483418743e-06, |
| "loss": 0.2813, |
| "step": 53550 |
| }, |
| { |
| "epoch": 0.9471470728561079, |
| "grad_norm": 1.3118650913238525, |
| "learning_rate": 2.9579234650802064e-06, |
| "loss": 0.2333, |
| "step": 53600 |
| }, |
| { |
| "epoch": 0.9480306055733244, |
| "grad_norm": 3.0708839893341064, |
| "learning_rate": 2.9088374467416706e-06, |
| "loss": 0.3022, |
| "step": 53650 |
| }, |
| { |
| "epoch": 0.9489141382905409, |
| "grad_norm": 1.637635588645935, |
| "learning_rate": 2.859751428403134e-06, |
| "loss": 0.303, |
| "step": 53700 |
| }, |
| { |
| "epoch": 0.9497976710077575, |
| "grad_norm": 2.5479607582092285, |
| "learning_rate": 2.8106654100645974e-06, |
| "loss": 0.2651, |
| "step": 53750 |
| }, |
| { |
| "epoch": 0.9506812037249739, |
| "grad_norm": 4.394486427307129, |
| "learning_rate": 2.761579391726061e-06, |
| "loss": 0.3384, |
| "step": 53800 |
| }, |
| { |
| "epoch": 0.9515647364421904, |
| "grad_norm": 3.1194252967834473, |
| "learning_rate": 2.712493373387524e-06, |
| "loss": 0.3324, |
| "step": 53850 |
| }, |
| { |
| "epoch": 0.952448269159407, |
| "grad_norm": 1.082737684249878, |
| "learning_rate": 2.663407355048988e-06, |
| "loss": 0.2253, |
| "step": 53900 |
| }, |
| { |
| "epoch": 0.9533318018766235, |
| "grad_norm": 1.0127415657043457, |
| "learning_rate": 2.614321336710452e-06, |
| "loss": 0.2942, |
| "step": 53950 |
| }, |
| { |
| "epoch": 0.95421533459384, |
| "grad_norm": 4.512701988220215, |
| "learning_rate": 2.5652353183719152e-06, |
| "loss": 0.2997, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.9550988673110565, |
| "grad_norm": 1.0720359086990356, |
| "learning_rate": 2.5161493000333786e-06, |
| "loss": 0.3954, |
| "step": 54050 |
| }, |
| { |
| "epoch": 0.9559824000282731, |
| "grad_norm": 1.608279824256897, |
| "learning_rate": 2.467063281694842e-06, |
| "loss": 0.3496, |
| "step": 54100 |
| }, |
| { |
| "epoch": 0.9568659327454896, |
| "grad_norm": 1.2330106496810913, |
| "learning_rate": 2.4179772633563054e-06, |
| "loss": 0.2609, |
| "step": 54150 |
| }, |
| { |
| "epoch": 0.9577494654627061, |
| "grad_norm": 1.4279929399490356, |
| "learning_rate": 2.3688912450177693e-06, |
| "loss": 0.3942, |
| "step": 54200 |
| }, |
| { |
| "epoch": 0.9586329981799226, |
| "grad_norm": 1.4870383739471436, |
| "learning_rate": 2.319805226679233e-06, |
| "loss": 0.3794, |
| "step": 54250 |
| }, |
| { |
| "epoch": 0.9595165308971392, |
| "grad_norm": 3.1990461349487305, |
| "learning_rate": 2.2707192083406965e-06, |
| "loss": 0.2834, |
| "step": 54300 |
| }, |
| { |
| "epoch": 0.9604000636143556, |
| "grad_norm": 1.178895115852356, |
| "learning_rate": 2.22163319000216e-06, |
| "loss": 0.3271, |
| "step": 54350 |
| }, |
| { |
| "epoch": 0.9612835963315721, |
| "grad_norm": 1.724674105644226, |
| "learning_rate": 2.1725471716636233e-06, |
| "loss": 0.3048, |
| "step": 54400 |
| }, |
| { |
| "epoch": 0.9621671290487886, |
| "grad_norm": 1.5154780149459839, |
| "learning_rate": 2.1234611533250867e-06, |
| "loss": 0.2813, |
| "step": 54450 |
| }, |
| { |
| "epoch": 0.9630506617660052, |
| "grad_norm": 1.3216954469680786, |
| "learning_rate": 2.0743751349865505e-06, |
| "loss": 0.3229, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.9639341944832217, |
| "grad_norm": 1.5333393812179565, |
| "learning_rate": 2.0252891166480143e-06, |
| "loss": 0.3064, |
| "step": 54550 |
| }, |
| { |
| "epoch": 0.9648177272004382, |
| "grad_norm": 1.3715639114379883, |
| "learning_rate": 1.9762030983094777e-06, |
| "loss": 0.2925, |
| "step": 54600 |
| }, |
| { |
| "epoch": 0.9657012599176548, |
| "grad_norm": 3.4723856449127197, |
| "learning_rate": 1.927117079970941e-06, |
| "loss": 0.301, |
| "step": 54650 |
| }, |
| { |
| "epoch": 0.9665847926348713, |
| "grad_norm": 3.3657915592193604, |
| "learning_rate": 1.8780310616324047e-06, |
| "loss": 0.2484, |
| "step": 54700 |
| }, |
| { |
| "epoch": 0.9674683253520878, |
| "grad_norm": 3.2125537395477295, |
| "learning_rate": 1.8289450432938681e-06, |
| "loss": 0.3228, |
| "step": 54750 |
| }, |
| { |
| "epoch": 0.9683518580693043, |
| "grad_norm": 3.5145859718322754, |
| "learning_rate": 1.779859024955332e-06, |
| "loss": 0.2935, |
| "step": 54800 |
| }, |
| { |
| "epoch": 0.9692353907865209, |
| "grad_norm": 1.5993742942810059, |
| "learning_rate": 1.7307730066167953e-06, |
| "loss": 0.3085, |
| "step": 54850 |
| }, |
| { |
| "epoch": 0.9701189235037373, |
| "grad_norm": 4.223308086395264, |
| "learning_rate": 1.681686988278259e-06, |
| "loss": 0.3112, |
| "step": 54900 |
| }, |
| { |
| "epoch": 0.9710024562209538, |
| "grad_norm": 1.7939913272857666, |
| "learning_rate": 1.6326009699397223e-06, |
| "loss": 0.2889, |
| "step": 54950 |
| }, |
| { |
| "epoch": 0.9718859889381704, |
| "grad_norm": 1.1405465602874756, |
| "learning_rate": 1.583514951601186e-06, |
| "loss": 0.2746, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.9727695216553869, |
| "grad_norm": 1.8150931596755981, |
| "learning_rate": 1.5344289332626496e-06, |
| "loss": 0.2772, |
| "step": 55050 |
| }, |
| { |
| "epoch": 0.9736530543726034, |
| "grad_norm": 1.4807177782058716, |
| "learning_rate": 1.485342914924113e-06, |
| "loss": 0.2965, |
| "step": 55100 |
| }, |
| { |
| "epoch": 0.9745365870898199, |
| "grad_norm": 1.4012283086776733, |
| "learning_rate": 1.4362568965855766e-06, |
| "loss": 0.2382, |
| "step": 55150 |
| }, |
| { |
| "epoch": 0.9754201198070365, |
| "grad_norm": 1.435829520225525, |
| "learning_rate": 1.388152598613811e-06, |
| "loss": 0.3863, |
| "step": 55200 |
| }, |
| { |
| "epoch": 0.976303652524253, |
| "grad_norm": 1.0731230974197388, |
| "learning_rate": 1.3390665802752744e-06, |
| "loss": 0.2909, |
| "step": 55250 |
| }, |
| { |
| "epoch": 0.9771871852414695, |
| "grad_norm": 1.6253186464309692, |
| "learning_rate": 1.289980561936738e-06, |
| "loss": 0.3787, |
| "step": 55300 |
| }, |
| { |
| "epoch": 0.978070717958686, |
| "grad_norm": 1.9667285680770874, |
| "learning_rate": 1.2408945435982016e-06, |
| "loss": 0.3196, |
| "step": 55350 |
| }, |
| { |
| "epoch": 0.9789542506759026, |
| "grad_norm": 1.1798194646835327, |
| "learning_rate": 1.191808525259665e-06, |
| "loss": 0.3738, |
| "step": 55400 |
| }, |
| { |
| "epoch": 0.979837783393119, |
| "grad_norm": 1.5018582344055176, |
| "learning_rate": 1.1427225069211286e-06, |
| "loss": 0.3588, |
| "step": 55450 |
| }, |
| { |
| "epoch": 0.9807213161103355, |
| "grad_norm": 1.1979721784591675, |
| "learning_rate": 1.0936364885825922e-06, |
| "loss": 0.3559, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.9816048488275521, |
| "grad_norm": 3.014507532119751, |
| "learning_rate": 1.0445504702440556e-06, |
| "loss": 0.4382, |
| "step": 55550 |
| }, |
| { |
| "epoch": 0.9824883815447686, |
| "grad_norm": 1.5364562273025513, |
| "learning_rate": 9.954644519055192e-07, |
| "loss": 0.2588, |
| "step": 55600 |
| }, |
| { |
| "epoch": 0.9833719142619851, |
| "grad_norm": 1.011873483657837, |
| "learning_rate": 9.463784335669829e-07, |
| "loss": 0.3132, |
| "step": 55650 |
| }, |
| { |
| "epoch": 0.9842554469792016, |
| "grad_norm": 2.5110092163085938, |
| "learning_rate": 8.972924152284464e-07, |
| "loss": 0.2922, |
| "step": 55700 |
| }, |
| { |
| "epoch": 0.9851389796964182, |
| "grad_norm": 1.2086411714553833, |
| "learning_rate": 8.4820639688991e-07, |
| "loss": 0.2409, |
| "step": 55750 |
| }, |
| { |
| "epoch": 0.9860225124136347, |
| "grad_norm": 1.5035746097564697, |
| "learning_rate": 7.991203785513735e-07, |
| "loss": 0.2704, |
| "step": 55800 |
| }, |
| { |
| "epoch": 0.9869060451308512, |
| "grad_norm": 1.3643758296966553, |
| "learning_rate": 7.50034360212837e-07, |
| "loss": 0.2855, |
| "step": 55850 |
| }, |
| { |
| "epoch": 0.9877895778480678, |
| "grad_norm": 1.2211904525756836, |
| "learning_rate": 7.009483418743006e-07, |
| "loss": 0.241, |
| "step": 55900 |
| }, |
| { |
| "epoch": 0.9886731105652843, |
| "grad_norm": 3.049858570098877, |
| "learning_rate": 6.518623235357641e-07, |
| "loss": 0.2616, |
| "step": 55950 |
| }, |
| { |
| "epoch": 0.9895566432825007, |
| "grad_norm": 1.8196197748184204, |
| "learning_rate": 6.027763051972277e-07, |
| "loss": 0.2572, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.9904401759997172, |
| "grad_norm": 1.814112663269043, |
| "learning_rate": 5.536902868586912e-07, |
| "loss": 0.3437, |
| "step": 56050 |
| }, |
| { |
| "epoch": 0.9913237087169338, |
| "grad_norm": 2.0368192195892334, |
| "learning_rate": 5.046042685201547e-07, |
| "loss": 0.2681, |
| "step": 56100 |
| }, |
| { |
| "epoch": 0.9922072414341503, |
| "grad_norm": 1.4389891624450684, |
| "learning_rate": 4.555182501816183e-07, |
| "loss": 0.2366, |
| "step": 56150 |
| }, |
| { |
| "epoch": 0.9930907741513668, |
| "grad_norm": 2.772890567779541, |
| "learning_rate": 4.0643223184308187e-07, |
| "loss": 0.2505, |
| "step": 56200 |
| }, |
| { |
| "epoch": 0.9939743068685833, |
| "grad_norm": 1.548779010772705, |
| "learning_rate": 3.5734621350454537e-07, |
| "loss": 0.3341, |
| "step": 56250 |
| }, |
| { |
| "epoch": 0.9948578395857999, |
| "grad_norm": 1.6362569332122803, |
| "learning_rate": 3.0826019516600893e-07, |
| "loss": 0.3292, |
| "step": 56300 |
| }, |
| { |
| "epoch": 0.9957413723030164, |
| "grad_norm": 1.707270622253418, |
| "learning_rate": 2.591741768274725e-07, |
| "loss": 0.3199, |
| "step": 56350 |
| }, |
| { |
| "epoch": 0.9966249050202329, |
| "grad_norm": 2.1296205520629883, |
| "learning_rate": 2.1008815848893604e-07, |
| "loss": 0.284, |
| "step": 56400 |
| }, |
| { |
| "epoch": 0.9975084377374495, |
| "grad_norm": 1.6319339275360107, |
| "learning_rate": 1.6100214015039955e-07, |
| "loss": 0.3286, |
| "step": 56450 |
| }, |
| { |
| "epoch": 0.998391970454666, |
| "grad_norm": 1.348299503326416, |
| "learning_rate": 1.119161218118631e-07, |
| "loss": 0.3321, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.9992755031718824, |
| "grad_norm": 1.5483179092407227, |
| "learning_rate": 6.283010347332666e-08, |
| "loss": 0.3073, |
| "step": 56550 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 56591, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|