| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1024, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0009765625, |
| "grad_norm": 0.6541444063186646, |
| "learning_rate": 0.0, |
| "loss": 1.0280990600585938, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.001953125, |
| "grad_norm": 0.4356674551963806, |
| "learning_rate": 4e-05, |
| "loss": 0.8305179476737976, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0029296875, |
| "grad_norm": 0.3900858759880066, |
| "learning_rate": 8e-05, |
| "loss": 0.7835474014282227, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00390625, |
| "grad_norm": 0.3717947006225586, |
| "learning_rate": 0.00012, |
| "loss": 1.1571688652038574, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0048828125, |
| "grad_norm": 0.2760661542415619, |
| "learning_rate": 0.00016, |
| "loss": 0.8141135573387146, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.005859375, |
| "grad_norm": 0.24524882435798645, |
| "learning_rate": 0.0002, |
| "loss": 0.29919666051864624, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0068359375, |
| "grad_norm": 0.3155483305454254, |
| "learning_rate": 0.00019980372914622178, |
| "loss": 0.916366696357727, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0078125, |
| "grad_norm": 1.0419310331344604, |
| "learning_rate": 0.00019960745829244357, |
| "loss": 0.986505389213562, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0087890625, |
| "grad_norm": 0.32395845651626587, |
| "learning_rate": 0.00019941118743866537, |
| "loss": 0.7845190167427063, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.009765625, |
| "grad_norm": 0.564084529876709, |
| "learning_rate": 0.00019921491658488717, |
| "loss": 1.0922366380691528, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0107421875, |
| "grad_norm": 0.4066593647003174, |
| "learning_rate": 0.00019901864573110893, |
| "loss": 1.0279463529586792, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.01171875, |
| "grad_norm": 0.43442535400390625, |
| "learning_rate": 0.00019882237487733073, |
| "loss": 0.9713175892829895, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0126953125, |
| "grad_norm": 0.26689526438713074, |
| "learning_rate": 0.0001986261040235525, |
| "loss": 0.38461241126060486, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.013671875, |
| "grad_norm": 0.41254541277885437, |
| "learning_rate": 0.0001984298331697743, |
| "loss": 0.7746479511260986, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0146484375, |
| "grad_norm": 0.39432424306869507, |
| "learning_rate": 0.0001982335623159961, |
| "loss": 0.7843194603919983, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.015625, |
| "grad_norm": 0.4303337037563324, |
| "learning_rate": 0.0001980372914622179, |
| "loss": 0.6613403558731079, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0166015625, |
| "grad_norm": 0.875269889831543, |
| "learning_rate": 0.00019784102060843966, |
| "loss": 1.0992671251296997, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.017578125, |
| "grad_norm": 0.21415413916110992, |
| "learning_rate": 0.00019764474975466145, |
| "loss": 0.2784216105937958, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0185546875, |
| "grad_norm": 0.4318086504936218, |
| "learning_rate": 0.00019744847890088322, |
| "loss": 0.6146124005317688, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.01953125, |
| "grad_norm": 0.20149515569210052, |
| "learning_rate": 0.00019725220804710502, |
| "loss": 0.3920556306838989, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0205078125, |
| "grad_norm": 0.358688622713089, |
| "learning_rate": 0.0001970559371933268, |
| "loss": 0.6672685742378235, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.021484375, |
| "grad_norm": 0.5916730165481567, |
| "learning_rate": 0.00019685966633954858, |
| "loss": 1.0804443359375, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0224609375, |
| "grad_norm": 0.3139825761318207, |
| "learning_rate": 0.00019666339548577038, |
| "loss": 0.7358766794204712, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0234375, |
| "grad_norm": 0.4019712805747986, |
| "learning_rate": 0.00019646712463199215, |
| "loss": 0.7362902164459229, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0244140625, |
| "grad_norm": 0.2874290347099304, |
| "learning_rate": 0.00019627085377821394, |
| "loss": 0.6446189284324646, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.025390625, |
| "grad_norm": 0.357494592666626, |
| "learning_rate": 0.0001960745829244357, |
| "loss": 0.2820976972579956, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0263671875, |
| "grad_norm": 0.22216391563415527, |
| "learning_rate": 0.00019587831207065753, |
| "loss": 0.6020435094833374, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.02734375, |
| "grad_norm": 0.23284995555877686, |
| "learning_rate": 0.0001956820412168793, |
| "loss": 0.44151532649993896, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0283203125, |
| "grad_norm": 0.3594605028629303, |
| "learning_rate": 0.0001954857703631011, |
| "loss": 0.9414041042327881, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.029296875, |
| "grad_norm": 0.4460504353046417, |
| "learning_rate": 0.00019528949950932287, |
| "loss": 0.7148531079292297, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0302734375, |
| "grad_norm": 0.3392362892627716, |
| "learning_rate": 0.00019509322865554466, |
| "loss": 0.7185512781143188, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.03125, |
| "grad_norm": 0.3340625464916229, |
| "learning_rate": 0.00019489695780176643, |
| "loss": 0.6613262891769409, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0322265625, |
| "grad_norm": 0.26223355531692505, |
| "learning_rate": 0.00019470068694798826, |
| "loss": 0.590149462223053, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.033203125, |
| "grad_norm": 0.3481689691543579, |
| "learning_rate": 0.00019450441609421002, |
| "loss": 0.5590913891792297, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0341796875, |
| "grad_norm": 0.4775488078594208, |
| "learning_rate": 0.00019430814524043182, |
| "loss": 0.927351176738739, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.03515625, |
| "grad_norm": 0.4474835693836212, |
| "learning_rate": 0.0001941118743866536, |
| "loss": 0.7719380855560303, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0361328125, |
| "grad_norm": 0.3538999855518341, |
| "learning_rate": 0.00019391560353287536, |
| "loss": 1.0287561416625977, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.037109375, |
| "grad_norm": 0.5018237233161926, |
| "learning_rate": 0.00019371933267909715, |
| "loss": 1.049814224243164, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0380859375, |
| "grad_norm": 0.5052743554115295, |
| "learning_rate": 0.00019352306182531895, |
| "loss": 0.39767658710479736, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0390625, |
| "grad_norm": 0.46170520782470703, |
| "learning_rate": 0.00019332679097154075, |
| "loss": 0.9849376678466797, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0400390625, |
| "grad_norm": 0.5961291193962097, |
| "learning_rate": 0.00019313052011776251, |
| "loss": 0.8527336716651917, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.041015625, |
| "grad_norm": 0.4002876579761505, |
| "learning_rate": 0.0001929342492639843, |
| "loss": 0.7445047497749329, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0419921875, |
| "grad_norm": 0.6382992267608643, |
| "learning_rate": 0.00019273797841020608, |
| "loss": 0.7587878704071045, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.04296875, |
| "grad_norm": 0.4204530715942383, |
| "learning_rate": 0.00019254170755642788, |
| "loss": 0.943995475769043, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0439453125, |
| "grad_norm": 0.29038068652153015, |
| "learning_rate": 0.00019234543670264967, |
| "loss": 0.4540131688117981, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.044921875, |
| "grad_norm": 0.41968628764152527, |
| "learning_rate": 0.00019214916584887147, |
| "loss": 0.3900204300880432, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.0458984375, |
| "grad_norm": 0.5870251059532166, |
| "learning_rate": 0.00019195289499509324, |
| "loss": 0.8700598478317261, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.046875, |
| "grad_norm": 0.3120124042034149, |
| "learning_rate": 0.00019175662414131503, |
| "loss": 0.2866731882095337, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0478515625, |
| "grad_norm": 0.31891942024230957, |
| "learning_rate": 0.0001915603532875368, |
| "loss": 0.7711223363876343, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.048828125, |
| "grad_norm": 0.4250207543373108, |
| "learning_rate": 0.0001913640824337586, |
| "loss": 0.7499758005142212, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0498046875, |
| "grad_norm": 0.4769924581050873, |
| "learning_rate": 0.0001911678115799804, |
| "loss": 0.8479812145233154, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.05078125, |
| "grad_norm": 0.2966979146003723, |
| "learning_rate": 0.00019097154072620216, |
| "loss": 0.8125182390213013, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0517578125, |
| "grad_norm": 0.4924452006816864, |
| "learning_rate": 0.00019077526987242396, |
| "loss": 1.006331443786621, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.052734375, |
| "grad_norm": 0.5558736324310303, |
| "learning_rate": 0.00019057899901864573, |
| "loss": 0.8218062520027161, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0537109375, |
| "grad_norm": 0.488903284072876, |
| "learning_rate": 0.00019038272816486752, |
| "loss": 0.7451006770133972, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0546875, |
| "grad_norm": 0.6092124581336975, |
| "learning_rate": 0.00019018645731108932, |
| "loss": 0.3371097445487976, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0556640625, |
| "grad_norm": 0.34885621070861816, |
| "learning_rate": 0.00018999018645731111, |
| "loss": 0.9263520836830139, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.056640625, |
| "grad_norm": 0.41470521688461304, |
| "learning_rate": 0.00018979391560353288, |
| "loss": 0.8741390109062195, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.0576171875, |
| "grad_norm": 0.32286664843559265, |
| "learning_rate": 0.00018959764474975468, |
| "loss": 0.6128658056259155, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.05859375, |
| "grad_norm": 0.43667954206466675, |
| "learning_rate": 0.00018940137389597645, |
| "loss": 0.822106122970581, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0595703125, |
| "grad_norm": 0.5501149892807007, |
| "learning_rate": 0.00018920510304219824, |
| "loss": 0.2981743812561035, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.060546875, |
| "grad_norm": 0.5234649777412415, |
| "learning_rate": 0.00018900883218842004, |
| "loss": 0.710310161113739, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.0615234375, |
| "grad_norm": 0.5040559768676758, |
| "learning_rate": 0.00018881256133464184, |
| "loss": 1.0355676412582397, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0625, |
| "grad_norm": 0.4435643255710602, |
| "learning_rate": 0.0001886162904808636, |
| "loss": 1.031105399131775, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.0634765625, |
| "grad_norm": 0.4987465441226959, |
| "learning_rate": 0.0001884200196270854, |
| "loss": 0.7753915190696716, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.064453125, |
| "grad_norm": 0.3633696436882019, |
| "learning_rate": 0.00018822374877330717, |
| "loss": 1.2376799583435059, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.0654296875, |
| "grad_norm": 1.0342258214950562, |
| "learning_rate": 0.00018802747791952894, |
| "loss": 0.6145737171173096, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.06640625, |
| "grad_norm": 0.47045138478279114, |
| "learning_rate": 0.00018783120706575076, |
| "loss": 0.8622407913208008, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.0673828125, |
| "grad_norm": 0.47864851355552673, |
| "learning_rate": 0.00018763493621197253, |
| "loss": 0.6727300882339478, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.068359375, |
| "grad_norm": 0.38102060556411743, |
| "learning_rate": 0.00018743866535819433, |
| "loss": 0.7417519092559814, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0693359375, |
| "grad_norm": 0.4229515492916107, |
| "learning_rate": 0.0001872423945044161, |
| "loss": 0.46951866149902344, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.0703125, |
| "grad_norm": 0.4868115186691284, |
| "learning_rate": 0.0001870461236506379, |
| "loss": 0.32457292079925537, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0712890625, |
| "grad_norm": 0.298020601272583, |
| "learning_rate": 0.00018684985279685966, |
| "loss": 0.2501494288444519, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.072265625, |
| "grad_norm": 0.49870651960372925, |
| "learning_rate": 0.00018665358194308145, |
| "loss": 0.5599403381347656, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0732421875, |
| "grad_norm": 0.5717479586601257, |
| "learning_rate": 0.00018645731108930325, |
| "loss": 0.4725653827190399, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.07421875, |
| "grad_norm": 0.5230128765106201, |
| "learning_rate": 0.00018626104023552505, |
| "loss": 1.0607699155807495, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.0751953125, |
| "grad_norm": 0.4279435873031616, |
| "learning_rate": 0.00018606476938174682, |
| "loss": 0.5628142952919006, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.076171875, |
| "grad_norm": 0.6166331171989441, |
| "learning_rate": 0.0001858684985279686, |
| "loss": 0.44837141036987305, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0771484375, |
| "grad_norm": 0.6329861879348755, |
| "learning_rate": 0.00018567222767419038, |
| "loss": 0.5013883709907532, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.078125, |
| "grad_norm": 0.2921103239059448, |
| "learning_rate": 0.00018547595682041218, |
| "loss": 0.541824996471405, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0791015625, |
| "grad_norm": 0.36744800209999084, |
| "learning_rate": 0.00018527968596663397, |
| "loss": 0.3878925144672394, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.080078125, |
| "grad_norm": 0.34045904874801636, |
| "learning_rate": 0.00018508341511285574, |
| "loss": 0.33476194739341736, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.0810546875, |
| "grad_norm": 0.48908546566963196, |
| "learning_rate": 0.00018488714425907754, |
| "loss": 1.003555178642273, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.08203125, |
| "grad_norm": 0.4683694839477539, |
| "learning_rate": 0.0001846908734052993, |
| "loss": 0.7300649285316467, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0830078125, |
| "grad_norm": 0.3560928404331207, |
| "learning_rate": 0.0001844946025515211, |
| "loss": 0.4525097608566284, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.083984375, |
| "grad_norm": 1.481307864189148, |
| "learning_rate": 0.0001842983316977429, |
| "loss": 0.5444833040237427, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.0849609375, |
| "grad_norm": 0.42610403895378113, |
| "learning_rate": 0.0001841020608439647, |
| "loss": 0.7340827584266663, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.0859375, |
| "grad_norm": 0.6035026907920837, |
| "learning_rate": 0.00018390578999018646, |
| "loss": 0.5589049458503723, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.0869140625, |
| "grad_norm": 0.6075074076652527, |
| "learning_rate": 0.00018370951913640826, |
| "loss": 0.4969009757041931, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.087890625, |
| "grad_norm": 0.6751372814178467, |
| "learning_rate": 0.00018351324828263003, |
| "loss": 0.46451041102409363, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0888671875, |
| "grad_norm": 0.5816373229026794, |
| "learning_rate": 0.00018331697742885182, |
| "loss": 1.024427056312561, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.08984375, |
| "grad_norm": 0.6644161939620972, |
| "learning_rate": 0.00018312070657507362, |
| "loss": 0.778592586517334, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.0908203125, |
| "grad_norm": 0.652209997177124, |
| "learning_rate": 0.00018292443572129541, |
| "loss": 0.8565710783004761, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.091796875, |
| "grad_norm": 0.9109074473381042, |
| "learning_rate": 0.00018272816486751718, |
| "loss": 0.6693978309631348, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.0927734375, |
| "grad_norm": 0.5235186219215393, |
| "learning_rate": 0.00018253189401373895, |
| "loss": 0.8255172967910767, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.09375, |
| "grad_norm": 0.8362122178077698, |
| "learning_rate": 0.00018233562315996075, |
| "loss": 0.5858157873153687, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.0947265625, |
| "grad_norm": 0.6753116846084595, |
| "learning_rate": 0.00018213935230618254, |
| "loss": 0.6682421565055847, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.095703125, |
| "grad_norm": 0.5394794940948486, |
| "learning_rate": 0.00018194308145240434, |
| "loss": 0.3218158781528473, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.0966796875, |
| "grad_norm": 3.2796010971069336, |
| "learning_rate": 0.0001817468105986261, |
| "loss": 0.681085467338562, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.09765625, |
| "grad_norm": 0.38390907645225525, |
| "learning_rate": 0.0001815505397448479, |
| "loss": 0.39554187655448914, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0986328125, |
| "grad_norm": 0.5289499759674072, |
| "learning_rate": 0.00018135426889106967, |
| "loss": 1.0264520645141602, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.099609375, |
| "grad_norm": 0.8211148977279663, |
| "learning_rate": 0.00018115799803729147, |
| "loss": 0.8588113784790039, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.1005859375, |
| "grad_norm": 0.4771063029766083, |
| "learning_rate": 0.00018096172718351327, |
| "loss": 0.7471244931221008, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.1015625, |
| "grad_norm": 0.6326794624328613, |
| "learning_rate": 0.00018076545632973506, |
| "loss": 0.6081597805023193, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.1025390625, |
| "grad_norm": 0.7229248285293579, |
| "learning_rate": 0.00018056918547595683, |
| "loss": 0.8315082788467407, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.103515625, |
| "grad_norm": 0.6803163290023804, |
| "learning_rate": 0.00018037291462217863, |
| "loss": 0.8308911323547363, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.1044921875, |
| "grad_norm": 0.5268850326538086, |
| "learning_rate": 0.0001801766437684004, |
| "loss": 0.8480656743049622, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.10546875, |
| "grad_norm": 0.7849289178848267, |
| "learning_rate": 0.0001799803729146222, |
| "loss": 0.8200575113296509, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.1064453125, |
| "grad_norm": 0.4259982407093048, |
| "learning_rate": 0.00017978410206084396, |
| "loss": 0.44367721676826477, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.107421875, |
| "grad_norm": 0.4788619577884674, |
| "learning_rate": 0.00017958783120706576, |
| "loss": 0.6017763018608093, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1083984375, |
| "grad_norm": 0.34434452652931213, |
| "learning_rate": 0.00017939156035328755, |
| "loss": 0.29681769013404846, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.109375, |
| "grad_norm": 1.1506884098052979, |
| "learning_rate": 0.00017919528949950932, |
| "loss": 0.6520863771438599, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1103515625, |
| "grad_norm": 0.8348999619483948, |
| "learning_rate": 0.00017899901864573112, |
| "loss": 0.6035414934158325, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.111328125, |
| "grad_norm": 0.5550518035888672, |
| "learning_rate": 0.00017880274779195289, |
| "loss": 0.7711564302444458, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.1123046875, |
| "grad_norm": 0.28814634680747986, |
| "learning_rate": 0.00017860647693817468, |
| "loss": 0.8325987458229065, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.11328125, |
| "grad_norm": 0.3833630084991455, |
| "learning_rate": 0.00017841020608439648, |
| "loss": 0.3345921039581299, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.1142578125, |
| "grad_norm": 0.8784507513046265, |
| "learning_rate": 0.00017821393523061827, |
| "loss": 0.4186948239803314, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.115234375, |
| "grad_norm": 0.7263842225074768, |
| "learning_rate": 0.00017801766437684004, |
| "loss": 0.5570493936538696, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.1162109375, |
| "grad_norm": 0.6391569972038269, |
| "learning_rate": 0.00017782139352306184, |
| "loss": 1.0257431268692017, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.1171875, |
| "grad_norm": 0.6025450229644775, |
| "learning_rate": 0.0001776251226692836, |
| "loss": 0.8676729202270508, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1181640625, |
| "grad_norm": 0.3776579201221466, |
| "learning_rate": 0.0001774288518155054, |
| "loss": 0.5870720148086548, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.119140625, |
| "grad_norm": 0.40912336111068726, |
| "learning_rate": 0.0001772325809617272, |
| "loss": 0.9210044145584106, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.1201171875, |
| "grad_norm": 0.5036085247993469, |
| "learning_rate": 0.000177036310107949, |
| "loss": 0.47378072142601013, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.12109375, |
| "grad_norm": 0.5508134961128235, |
| "learning_rate": 0.00017684003925417076, |
| "loss": 0.8295834064483643, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.1220703125, |
| "grad_norm": 0.5522392392158508, |
| "learning_rate": 0.00017664376840039253, |
| "loss": 0.793156087398529, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.123046875, |
| "grad_norm": 1.0098820924758911, |
| "learning_rate": 0.00017644749754661433, |
| "loss": 0.5780155658721924, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.1240234375, |
| "grad_norm": 0.6178780198097229, |
| "learning_rate": 0.00017625122669283612, |
| "loss": 0.5129156708717346, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.6224352121353149, |
| "learning_rate": 0.00017605495583905792, |
| "loss": 0.8498928546905518, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.1259765625, |
| "grad_norm": 0.7869983315467834, |
| "learning_rate": 0.0001758586849852797, |
| "loss": 0.9180670976638794, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.126953125, |
| "grad_norm": 0.4122680127620697, |
| "learning_rate": 0.00017566241413150148, |
| "loss": 0.510919988155365, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1279296875, |
| "grad_norm": 0.7221843004226685, |
| "learning_rate": 0.00017546614327772325, |
| "loss": 0.3977488875389099, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.12890625, |
| "grad_norm": 1.155800461769104, |
| "learning_rate": 0.00017526987242394505, |
| "loss": 0.6549078226089478, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.1298828125, |
| "grad_norm": 0.7164724469184875, |
| "learning_rate": 0.00017507360157016685, |
| "loss": 0.8306566476821899, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.130859375, |
| "grad_norm": 0.7600284814834595, |
| "learning_rate": 0.00017487733071638864, |
| "loss": 0.34278520941734314, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.1318359375, |
| "grad_norm": 0.8636081218719482, |
| "learning_rate": 0.0001746810598626104, |
| "loss": 0.8881778717041016, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.1328125, |
| "grad_norm": 1.0904357433319092, |
| "learning_rate": 0.0001744847890088322, |
| "loss": 0.4423227310180664, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.1337890625, |
| "grad_norm": 0.5639862418174744, |
| "learning_rate": 0.00017428851815505397, |
| "loss": 0.8610935211181641, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.134765625, |
| "grad_norm": 1.05929696559906, |
| "learning_rate": 0.00017409224730127577, |
| "loss": 1.1729753017425537, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.1357421875, |
| "grad_norm": 1.0731761455535889, |
| "learning_rate": 0.00017389597644749757, |
| "loss": 0.6459341049194336, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.13671875, |
| "grad_norm": 0.7464702725410461, |
| "learning_rate": 0.00017369970559371934, |
| "loss": 0.5368601083755493, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1376953125, |
| "grad_norm": 0.5722304582595825, |
| "learning_rate": 0.00017350343473994113, |
| "loss": 0.9642695784568787, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.138671875, |
| "grad_norm": 0.5044945478439331, |
| "learning_rate": 0.0001733071638861629, |
| "loss": 0.49555253982543945, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.1396484375, |
| "grad_norm": 0.8069168329238892, |
| "learning_rate": 0.0001731108930323847, |
| "loss": 0.8796389698982239, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.140625, |
| "grad_norm": 0.5269959568977356, |
| "learning_rate": 0.00017291462217860646, |
| "loss": 0.9928920269012451, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.1416015625, |
| "grad_norm": 0.6606360077857971, |
| "learning_rate": 0.0001727183513248283, |
| "loss": 1.0528640747070312, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.142578125, |
| "grad_norm": 0.7145242691040039, |
| "learning_rate": 0.00017252208047105006, |
| "loss": 1.1252766847610474, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.1435546875, |
| "grad_norm": 0.5808660984039307, |
| "learning_rate": 0.00017232580961727185, |
| "loss": 0.24914072453975677, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.14453125, |
| "grad_norm": 0.8544529676437378, |
| "learning_rate": 0.00017212953876349362, |
| "loss": 0.4420434832572937, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.1455078125, |
| "grad_norm": 0.899334728717804, |
| "learning_rate": 0.00017193326790971542, |
| "loss": 0.7128512263298035, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.146484375, |
| "grad_norm": 0.36327579617500305, |
| "learning_rate": 0.00017173699705593719, |
| "loss": 0.5503419637680054, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1474609375, |
| "grad_norm": 0.553255021572113, |
| "learning_rate": 0.000171540726202159, |
| "loss": 0.5796535015106201, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.1484375, |
| "grad_norm": 0.41036659479141235, |
| "learning_rate": 0.00017134445534838078, |
| "loss": 0.8935849666595459, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.1494140625, |
| "grad_norm": 0.3723013997077942, |
| "learning_rate": 0.00017114818449460257, |
| "loss": 0.39106485247612, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.150390625, |
| "grad_norm": 0.654262900352478, |
| "learning_rate": 0.00017095191364082434, |
| "loss": 1.0176405906677246, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.1513671875, |
| "grad_norm": 0.5707812309265137, |
| "learning_rate": 0.0001707556427870461, |
| "loss": 0.6580768823623657, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.15234375, |
| "grad_norm": 0.35879406332969666, |
| "learning_rate": 0.0001705593719332679, |
| "loss": 0.4050876200199127, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.1533203125, |
| "grad_norm": 0.5701449513435364, |
| "learning_rate": 0.0001703631010794897, |
| "loss": 0.9737375974655151, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.154296875, |
| "grad_norm": 0.4461202919483185, |
| "learning_rate": 0.0001701668302257115, |
| "loss": 0.9864733815193176, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.1552734375, |
| "grad_norm": 0.6229621767997742, |
| "learning_rate": 0.00016997055937193327, |
| "loss": 0.35883933305740356, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.15625, |
| "grad_norm": 0.5390028357505798, |
| "learning_rate": 0.00016977428851815506, |
| "loss": 0.5791765451431274, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1572265625, |
| "grad_norm": 0.7851611375808716, |
| "learning_rate": 0.00016957801766437683, |
| "loss": 0.9032300114631653, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.158203125, |
| "grad_norm": 0.6211395263671875, |
| "learning_rate": 0.00016938174681059863, |
| "loss": 0.5069928765296936, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.1591796875, |
| "grad_norm": 0.8290377855300903, |
| "learning_rate": 0.00016918547595682042, |
| "loss": 0.8917738795280457, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.16015625, |
| "grad_norm": 0.42707324028015137, |
| "learning_rate": 0.00016898920510304222, |
| "loss": 0.606585681438446, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.1611328125, |
| "grad_norm": 0.49472010135650635, |
| "learning_rate": 0.000168792934249264, |
| "loss": 1.0100075006484985, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.162109375, |
| "grad_norm": 0.48441267013549805, |
| "learning_rate": 0.00016859666339548579, |
| "loss": 0.7145558595657349, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.1630859375, |
| "grad_norm": 0.5181763172149658, |
| "learning_rate": 0.00016840039254170755, |
| "loss": 0.8088749647140503, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.1640625, |
| "grad_norm": 0.4702328145503998, |
| "learning_rate": 0.00016820412168792935, |
| "loss": 0.5631542801856995, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.1650390625, |
| "grad_norm": 0.35454344749450684, |
| "learning_rate": 0.00016800785083415115, |
| "loss": 0.31744396686553955, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.166015625, |
| "grad_norm": 0.5193122029304504, |
| "learning_rate": 0.00016781157998037291, |
| "loss": 0.7338438034057617, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1669921875, |
| "grad_norm": 0.49799400568008423, |
| "learning_rate": 0.0001676153091265947, |
| "loss": 0.7910654544830322, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.16796875, |
| "grad_norm": 0.4855571389198303, |
| "learning_rate": 0.00016741903827281648, |
| "loss": 0.38415610790252686, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.1689453125, |
| "grad_norm": 0.8796041011810303, |
| "learning_rate": 0.00016722276741903828, |
| "loss": 0.6042807102203369, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.169921875, |
| "grad_norm": 0.6005135774612427, |
| "learning_rate": 0.00016702649656526007, |
| "loss": 0.6617047786712646, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.1708984375, |
| "grad_norm": 0.6359293460845947, |
| "learning_rate": 0.00016683022571148187, |
| "loss": 0.5227914452552795, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.171875, |
| "grad_norm": 0.46007266640663147, |
| "learning_rate": 0.00016663395485770364, |
| "loss": 0.6881235837936401, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.1728515625, |
| "grad_norm": 0.37411797046661377, |
| "learning_rate": 0.00016643768400392543, |
| "loss": 0.7384200096130371, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.173828125, |
| "grad_norm": 0.4021860659122467, |
| "learning_rate": 0.0001662414131501472, |
| "loss": 1.1738500595092773, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.1748046875, |
| "grad_norm": 0.3674755096435547, |
| "learning_rate": 0.000166045142296369, |
| "loss": 0.37539663910865784, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.17578125, |
| "grad_norm": 0.5051441788673401, |
| "learning_rate": 0.0001658488714425908, |
| "loss": 0.6273016333580017, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.1767578125, |
| "grad_norm": 0.6807597279548645, |
| "learning_rate": 0.0001656526005888126, |
| "loss": 0.4195510447025299, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.177734375, |
| "grad_norm": 0.3345419466495514, |
| "learning_rate": 0.00016545632973503436, |
| "loss": 0.8546851873397827, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.1787109375, |
| "grad_norm": 0.33821800351142883, |
| "learning_rate": 0.00016526005888125615, |
| "loss": 0.522655725479126, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.1796875, |
| "grad_norm": 0.3145562708377838, |
| "learning_rate": 0.00016506378802747792, |
| "loss": 0.3799128532409668, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.1806640625, |
| "grad_norm": 0.44908636808395386, |
| "learning_rate": 0.0001648675171736997, |
| "loss": 0.6263326406478882, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.181640625, |
| "grad_norm": 0.7736865282058716, |
| "learning_rate": 0.00016467124631992151, |
| "loss": 0.3385460078716278, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.1826171875, |
| "grad_norm": 0.5184527635574341, |
| "learning_rate": 0.00016447497546614328, |
| "loss": 0.7980771064758301, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.18359375, |
| "grad_norm": 0.41774502396583557, |
| "learning_rate": 0.00016427870461236508, |
| "loss": 0.7745299339294434, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.1845703125, |
| "grad_norm": 0.43824154138565063, |
| "learning_rate": 0.00016408243375858685, |
| "loss": 0.9190135598182678, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.185546875, |
| "grad_norm": 0.4037880301475525, |
| "learning_rate": 0.00016388616290480864, |
| "loss": 0.5671911239624023, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1865234375, |
| "grad_norm": 0.3757816255092621, |
| "learning_rate": 0.0001636898920510304, |
| "loss": 0.39916592836380005, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.1875, |
| "grad_norm": 0.4747844636440277, |
| "learning_rate": 0.00016349362119725224, |
| "loss": 0.9217299818992615, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.1884765625, |
| "grad_norm": 0.42307209968566895, |
| "learning_rate": 0.000163297350343474, |
| "loss": 0.8852982521057129, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.189453125, |
| "grad_norm": 0.47294488549232483, |
| "learning_rate": 0.0001631010794896958, |
| "loss": 1.0635476112365723, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.1904296875, |
| "grad_norm": 0.3519342243671417, |
| "learning_rate": 0.00016290480863591757, |
| "loss": 0.33460623025894165, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.19140625, |
| "grad_norm": 0.418151319026947, |
| "learning_rate": 0.00016270853778213936, |
| "loss": 0.8776851296424866, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.1923828125, |
| "grad_norm": 0.3954712152481079, |
| "learning_rate": 0.00016251226692836113, |
| "loss": 0.9358173608779907, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.193359375, |
| "grad_norm": 0.35646897554397583, |
| "learning_rate": 0.00016231599607458293, |
| "loss": 0.43795716762542725, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.1943359375, |
| "grad_norm": 0.41675063967704773, |
| "learning_rate": 0.00016211972522080473, |
| "loss": 0.8348654508590698, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.1953125, |
| "grad_norm": 0.5800544023513794, |
| "learning_rate": 0.0001619234543670265, |
| "loss": 0.5580507516860962, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1962890625, |
| "grad_norm": 0.44925832748413086, |
| "learning_rate": 0.0001617271835132483, |
| "loss": 0.47444453835487366, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.197265625, |
| "grad_norm": 0.48447439074516296, |
| "learning_rate": 0.00016153091265947006, |
| "loss": 0.5927308797836304, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.1982421875, |
| "grad_norm": 0.37814846634864807, |
| "learning_rate": 0.00016133464180569186, |
| "loss": 0.8504298329353333, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.19921875, |
| "grad_norm": 0.4171026051044464, |
| "learning_rate": 0.00016113837095191365, |
| "loss": 1.0796414613723755, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.2001953125, |
| "grad_norm": 0.4570372402667999, |
| "learning_rate": 0.00016094210009813545, |
| "loss": 0.6229358315467834, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.201171875, |
| "grad_norm": 0.6294324994087219, |
| "learning_rate": 0.00016074582924435722, |
| "loss": 0.8749011158943176, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.2021484375, |
| "grad_norm": 0.42371129989624023, |
| "learning_rate": 0.000160549558390579, |
| "loss": 0.9866290092468262, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.203125, |
| "grad_norm": 0.5329370498657227, |
| "learning_rate": 0.00016035328753680078, |
| "loss": 0.7568405270576477, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.2041015625, |
| "grad_norm": 0.37205901741981506, |
| "learning_rate": 0.00016015701668302258, |
| "loss": 0.7115534543991089, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.205078125, |
| "grad_norm": 0.4536517262458801, |
| "learning_rate": 0.00015996074582924437, |
| "loss": 0.5152509808540344, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2060546875, |
| "grad_norm": 2.319321393966675, |
| "learning_rate": 0.00015976447497546617, |
| "loss": 0.2915653586387634, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.20703125, |
| "grad_norm": 0.7047526836395264, |
| "learning_rate": 0.00015956820412168794, |
| "loss": 0.3070187568664551, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.2080078125, |
| "grad_norm": 0.6068500280380249, |
| "learning_rate": 0.0001593719332679097, |
| "loss": 0.8103427290916443, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.208984375, |
| "grad_norm": 0.3588794469833374, |
| "learning_rate": 0.0001591756624141315, |
| "loss": 0.4655485153198242, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.2099609375, |
| "grad_norm": 0.6561040878295898, |
| "learning_rate": 0.0001589793915603533, |
| "loss": 0.5353362560272217, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.2109375, |
| "grad_norm": 0.6485084891319275, |
| "learning_rate": 0.0001587831207065751, |
| "loss": 0.8601769804954529, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.2119140625, |
| "grad_norm": 0.4718208909034729, |
| "learning_rate": 0.00015858684985279686, |
| "loss": 0.6897189617156982, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.212890625, |
| "grad_norm": 0.7453560829162598, |
| "learning_rate": 0.00015839057899901866, |
| "loss": 1.0387171506881714, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.2138671875, |
| "grad_norm": 0.41157087683677673, |
| "learning_rate": 0.00015819430814524043, |
| "loss": 0.4910873770713806, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.21484375, |
| "grad_norm": 0.4198990762233734, |
| "learning_rate": 0.00015799803729146222, |
| "loss": 0.588080108165741, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2158203125, |
| "grad_norm": 0.7791650295257568, |
| "learning_rate": 0.00015780176643768402, |
| "loss": 0.754984974861145, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.216796875, |
| "grad_norm": 1.4430909156799316, |
| "learning_rate": 0.00015760549558390581, |
| "loss": 0.5313946008682251, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.2177734375, |
| "grad_norm": 0.4399142861366272, |
| "learning_rate": 0.00015740922473012758, |
| "loss": 0.523280918598175, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.21875, |
| "grad_norm": 0.4177611470222473, |
| "learning_rate": 0.00015721295387634938, |
| "loss": 0.7598159313201904, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.2197265625, |
| "grad_norm": 0.4408816397190094, |
| "learning_rate": 0.00015701668302257115, |
| "loss": 0.8131666779518127, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.220703125, |
| "grad_norm": 0.4228694438934326, |
| "learning_rate": 0.00015682041216879294, |
| "loss": 1.0456180572509766, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.2216796875, |
| "grad_norm": 0.6313449144363403, |
| "learning_rate": 0.00015662414131501474, |
| "loss": 0.496864914894104, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.22265625, |
| "grad_norm": 0.48103493452072144, |
| "learning_rate": 0.0001564278704612365, |
| "loss": 0.5967347621917725, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.2236328125, |
| "grad_norm": 0.3548172116279602, |
| "learning_rate": 0.0001562315996074583, |
| "loss": 0.3325611650943756, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.224609375, |
| "grad_norm": 0.41543763875961304, |
| "learning_rate": 0.00015603532875368007, |
| "loss": 0.9223452806472778, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2255859375, |
| "grad_norm": 0.6072061061859131, |
| "learning_rate": 0.00015583905789990187, |
| "loss": 0.2860236167907715, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.2265625, |
| "grad_norm": 0.3232869505882263, |
| "learning_rate": 0.00015564278704612364, |
| "loss": 0.7308738231658936, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.2275390625, |
| "grad_norm": 0.5271327495574951, |
| "learning_rate": 0.00015544651619234546, |
| "loss": 1.0354498624801636, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.228515625, |
| "grad_norm": 0.626105546951294, |
| "learning_rate": 0.00015525024533856723, |
| "loss": 1.0841856002807617, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.2294921875, |
| "grad_norm": 0.5628311634063721, |
| "learning_rate": 0.00015505397448478903, |
| "loss": 0.8868529200553894, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.23046875, |
| "grad_norm": 0.4290577471256256, |
| "learning_rate": 0.0001548577036310108, |
| "loss": 0.5887943506240845, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.2314453125, |
| "grad_norm": 0.743786096572876, |
| "learning_rate": 0.0001546614327772326, |
| "loss": 0.8314348459243774, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.232421875, |
| "grad_norm": 0.34498658776283264, |
| "learning_rate": 0.00015446516192345436, |
| "loss": 0.6171099543571472, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.2333984375, |
| "grad_norm": 0.7894997596740723, |
| "learning_rate": 0.00015426889106967616, |
| "loss": 0.614283561706543, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.234375, |
| "grad_norm": 0.4631381034851074, |
| "learning_rate": 0.00015407262021589795, |
| "loss": 0.6744101047515869, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2353515625, |
| "grad_norm": 0.44523295760154724, |
| "learning_rate": 0.00015387634936211975, |
| "loss": 0.7094103097915649, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.236328125, |
| "grad_norm": 0.7059242725372314, |
| "learning_rate": 0.00015368007850834152, |
| "loss": 0.6856737732887268, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.2373046875, |
| "grad_norm": 1.0360506772994995, |
| "learning_rate": 0.00015348380765456329, |
| "loss": 1.101341962814331, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.23828125, |
| "grad_norm": 0.6630859375, |
| "learning_rate": 0.00015328753680078508, |
| "loss": 0.8815068006515503, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.2392578125, |
| "grad_norm": 0.4162105321884155, |
| "learning_rate": 0.00015309126594700688, |
| "loss": 0.39801689982414246, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.240234375, |
| "grad_norm": 0.5786510109901428, |
| "learning_rate": 0.00015289499509322867, |
| "loss": 0.5399383902549744, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.2412109375, |
| "grad_norm": 0.5430185794830322, |
| "learning_rate": 0.00015269872423945044, |
| "loss": 0.5432325601577759, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.2421875, |
| "grad_norm": 0.3750382959842682, |
| "learning_rate": 0.00015250245338567224, |
| "loss": 0.49265092611312866, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.2431640625, |
| "grad_norm": 0.5081580877304077, |
| "learning_rate": 0.000152306182531894, |
| "loss": 0.8720104098320007, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.244140625, |
| "grad_norm": 0.5619673728942871, |
| "learning_rate": 0.0001521099116781158, |
| "loss": 0.4022529125213623, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2451171875, |
| "grad_norm": 0.3996225893497467, |
| "learning_rate": 0.0001519136408243376, |
| "loss": 0.443879097700119, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.24609375, |
| "grad_norm": 0.4688915014266968, |
| "learning_rate": 0.0001517173699705594, |
| "loss": 0.47562721371650696, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.2470703125, |
| "grad_norm": 1.7595641613006592, |
| "learning_rate": 0.00015152109911678116, |
| "loss": 0.5174474716186523, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.248046875, |
| "grad_norm": 0.47813650965690613, |
| "learning_rate": 0.00015132482826300296, |
| "loss": 0.8565359711647034, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.2490234375, |
| "grad_norm": 0.49612802267074585, |
| "learning_rate": 0.00015112855740922473, |
| "loss": 0.4736977815628052, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.4370449483394623, |
| "learning_rate": 0.00015093228655544652, |
| "loss": 0.7566809058189392, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.2509765625, |
| "grad_norm": 0.43916988372802734, |
| "learning_rate": 0.00015073601570166832, |
| "loss": 0.8396226763725281, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.251953125, |
| "grad_norm": 0.7745673060417175, |
| "learning_rate": 0.0001505397448478901, |
| "loss": 0.3085971772670746, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.2529296875, |
| "grad_norm": 0.4097643792629242, |
| "learning_rate": 0.00015034347399411188, |
| "loss": 0.2730502188205719, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.25390625, |
| "grad_norm": 0.4131183624267578, |
| "learning_rate": 0.00015014720314033365, |
| "loss": 0.5422588586807251, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2548828125, |
| "grad_norm": 0.469498872756958, |
| "learning_rate": 0.00014995093228655545, |
| "loss": 0.6572885513305664, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.255859375, |
| "grad_norm": 0.3662133514881134, |
| "learning_rate": 0.00014975466143277725, |
| "loss": 0.9272421598434448, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.2568359375, |
| "grad_norm": 0.38194844126701355, |
| "learning_rate": 0.00014955839057899904, |
| "loss": 0.6010634303092957, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.2578125, |
| "grad_norm": 0.3645467758178711, |
| "learning_rate": 0.0001493621197252208, |
| "loss": 0.9131143093109131, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.2587890625, |
| "grad_norm": 0.3304290771484375, |
| "learning_rate": 0.0001491658488714426, |
| "loss": 0.4593530297279358, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.259765625, |
| "grad_norm": 0.7529020309448242, |
| "learning_rate": 0.00014896957801766437, |
| "loss": 0.5219628810882568, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.2607421875, |
| "grad_norm": 0.4974548816680908, |
| "learning_rate": 0.00014877330716388617, |
| "loss": 0.7617945075035095, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.26171875, |
| "grad_norm": 0.28884655237197876, |
| "learning_rate": 0.00014857703631010797, |
| "loss": 0.4288986921310425, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.2626953125, |
| "grad_norm": 0.5195730328559875, |
| "learning_rate": 0.00014838076545632976, |
| "loss": 0.830593466758728, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.263671875, |
| "grad_norm": 0.40689924359321594, |
| "learning_rate": 0.00014818449460255153, |
| "loss": 0.7528857588768005, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2646484375, |
| "grad_norm": 0.33955928683280945, |
| "learning_rate": 0.00014798822374877333, |
| "loss": 0.5274187326431274, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.265625, |
| "grad_norm": 1.0572726726531982, |
| "learning_rate": 0.0001477919528949951, |
| "loss": 0.7389089465141296, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.2666015625, |
| "grad_norm": 0.5191348791122437, |
| "learning_rate": 0.00014759568204121686, |
| "loss": 0.4842514991760254, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.267578125, |
| "grad_norm": 0.3779315650463104, |
| "learning_rate": 0.00014739941118743866, |
| "loss": 0.7406666278839111, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.2685546875, |
| "grad_norm": 0.6065999865531921, |
| "learning_rate": 0.00014720314033366046, |
| "loss": 0.6771246790885925, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.26953125, |
| "grad_norm": 0.537529468536377, |
| "learning_rate": 0.00014700686947988225, |
| "loss": 0.861257791519165, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.2705078125, |
| "grad_norm": 0.3961732089519501, |
| "learning_rate": 0.00014681059862610402, |
| "loss": 0.9672999382019043, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.271484375, |
| "grad_norm": 0.45974740386009216, |
| "learning_rate": 0.00014661432777232582, |
| "loss": 0.5789016485214233, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.2724609375, |
| "grad_norm": 0.7211292386054993, |
| "learning_rate": 0.00014641805691854759, |
| "loss": 0.867314338684082, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.2734375, |
| "grad_norm": 0.6938930749893188, |
| "learning_rate": 0.00014622178606476938, |
| "loss": 0.4570122957229614, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2744140625, |
| "grad_norm": 0.5093329548835754, |
| "learning_rate": 0.00014602551521099118, |
| "loss": 0.9487482309341431, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.275390625, |
| "grad_norm": 0.4403358995914459, |
| "learning_rate": 0.00014582924435721297, |
| "loss": 0.5330759286880493, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.2763671875, |
| "grad_norm": 0.5305198431015015, |
| "learning_rate": 0.00014563297350343474, |
| "loss": 0.8727459907531738, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.27734375, |
| "grad_norm": 0.49577099084854126, |
| "learning_rate": 0.00014543670264965654, |
| "loss": 0.6166709065437317, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.2783203125, |
| "grad_norm": 0.4856763780117035, |
| "learning_rate": 0.0001452404317958783, |
| "loss": 0.920722484588623, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.279296875, |
| "grad_norm": 0.3397112786769867, |
| "learning_rate": 0.0001450441609421001, |
| "loss": 1.001542329788208, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.2802734375, |
| "grad_norm": 0.591691792011261, |
| "learning_rate": 0.0001448478900883219, |
| "loss": 0.4898494780063629, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.28125, |
| "grad_norm": 0.45293164253234863, |
| "learning_rate": 0.00014465161923454367, |
| "loss": 0.4958389401435852, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.2822265625, |
| "grad_norm": 0.38414305448532104, |
| "learning_rate": 0.00014445534838076546, |
| "loss": 0.3971215784549713, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.283203125, |
| "grad_norm": 0.5568608045578003, |
| "learning_rate": 0.00014425907752698723, |
| "loss": 0.7953230142593384, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.2841796875, |
| "grad_norm": 0.3680984377861023, |
| "learning_rate": 0.00014406280667320903, |
| "loss": 0.703729510307312, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.28515625, |
| "grad_norm": 0.4263870120048523, |
| "learning_rate": 0.00014386653581943082, |
| "loss": 0.7433100938796997, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.2861328125, |
| "grad_norm": 1.3262213468551636, |
| "learning_rate": 0.00014367026496565262, |
| "loss": 0.8011248111724854, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.287109375, |
| "grad_norm": 0.44766104221343994, |
| "learning_rate": 0.0001434739941118744, |
| "loss": 0.6682827472686768, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.2880859375, |
| "grad_norm": 0.7399169206619263, |
| "learning_rate": 0.00014327772325809619, |
| "loss": 0.8356127142906189, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.2890625, |
| "grad_norm": 0.3582242727279663, |
| "learning_rate": 0.00014308145240431795, |
| "loss": 0.7127545475959778, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.2900390625, |
| "grad_norm": 0.5251145958900452, |
| "learning_rate": 0.00014288518155053975, |
| "loss": 0.7467620968818665, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.291015625, |
| "grad_norm": 0.639377772808075, |
| "learning_rate": 0.00014268891069676155, |
| "loss": 0.434887170791626, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.2919921875, |
| "grad_norm": 0.5007404685020447, |
| "learning_rate": 0.00014249263984298334, |
| "loss": 1.028229832649231, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.29296875, |
| "grad_norm": 0.41101035475730896, |
| "learning_rate": 0.0001422963689892051, |
| "loss": 0.8766242265701294, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.2939453125, |
| "grad_norm": 0.3938690721988678, |
| "learning_rate": 0.0001421000981354269, |
| "loss": 0.7176960706710815, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.294921875, |
| "grad_norm": 0.5939344763755798, |
| "learning_rate": 0.00014190382728164868, |
| "loss": 0.6655953526496887, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.2958984375, |
| "grad_norm": 0.47224998474121094, |
| "learning_rate": 0.00014170755642787047, |
| "loss": 0.9155608415603638, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.296875, |
| "grad_norm": 0.41344454884529114, |
| "learning_rate": 0.00014151128557409227, |
| "loss": 0.6017557382583618, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.2978515625, |
| "grad_norm": 0.514320969581604, |
| "learning_rate": 0.00014131501472031404, |
| "loss": 0.6184566617012024, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.298828125, |
| "grad_norm": 0.5005887150764465, |
| "learning_rate": 0.00014111874386653583, |
| "loss": 0.6652892231941223, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.2998046875, |
| "grad_norm": 0.5872619152069092, |
| "learning_rate": 0.0001409224730127576, |
| "loss": 0.8618959784507751, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.30078125, |
| "grad_norm": 0.5114542245864868, |
| "learning_rate": 0.0001407262021589794, |
| "loss": 0.6637990474700928, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.3017578125, |
| "grad_norm": 1.141750693321228, |
| "learning_rate": 0.00014052993130520117, |
| "loss": 0.6234999299049377, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.302734375, |
| "grad_norm": 0.4786873459815979, |
| "learning_rate": 0.000140333660451423, |
| "loss": 0.9601540565490723, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3037109375, |
| "grad_norm": 0.6048462390899658, |
| "learning_rate": 0.00014013738959764476, |
| "loss": 0.5895652770996094, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.3046875, |
| "grad_norm": 0.7435188889503479, |
| "learning_rate": 0.00013994111874386655, |
| "loss": 1.196149468421936, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.3056640625, |
| "grad_norm": 0.7936303019523621, |
| "learning_rate": 0.00013974484789008832, |
| "loss": 0.6073983907699585, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.306640625, |
| "grad_norm": 0.5199156403541565, |
| "learning_rate": 0.00013954857703631012, |
| "loss": 0.2734944224357605, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.3076171875, |
| "grad_norm": 0.38845276832580566, |
| "learning_rate": 0.0001393523061825319, |
| "loss": 0.604506254196167, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.30859375, |
| "grad_norm": 0.6925122737884521, |
| "learning_rate": 0.0001391560353287537, |
| "loss": 1.0446012020111084, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.3095703125, |
| "grad_norm": 0.4950433075428009, |
| "learning_rate": 0.00013895976447497548, |
| "loss": 1.027349591255188, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.310546875, |
| "grad_norm": 0.36179935932159424, |
| "learning_rate": 0.00013876349362119725, |
| "loss": 0.6760075688362122, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.3115234375, |
| "grad_norm": 0.3730153739452362, |
| "learning_rate": 0.00013856722276741904, |
| "loss": 0.47779884934425354, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.3125, |
| "grad_norm": 0.6181739568710327, |
| "learning_rate": 0.0001383709519136408, |
| "loss": 0.4747524857521057, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.3134765625, |
| "grad_norm": 0.8233240246772766, |
| "learning_rate": 0.0001381746810598626, |
| "loss": 0.490276575088501, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.314453125, |
| "grad_norm": 0.6492604613304138, |
| "learning_rate": 0.0001379784102060844, |
| "loss": 0.44847172498703003, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.3154296875, |
| "grad_norm": 0.5506369471549988, |
| "learning_rate": 0.0001377821393523062, |
| "loss": 0.47955968976020813, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.31640625, |
| "grad_norm": 0.4187554717063904, |
| "learning_rate": 0.00013758586849852797, |
| "loss": 0.6466250419616699, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.3173828125, |
| "grad_norm": 0.3976380527019501, |
| "learning_rate": 0.00013738959764474977, |
| "loss": 0.756473183631897, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.318359375, |
| "grad_norm": 0.6089552044868469, |
| "learning_rate": 0.00013719332679097153, |
| "loss": 0.9309840202331543, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.3193359375, |
| "grad_norm": 0.31628501415252686, |
| "learning_rate": 0.00013699705593719333, |
| "loss": 0.7739764451980591, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.3203125, |
| "grad_norm": 0.6984357237815857, |
| "learning_rate": 0.00013680078508341513, |
| "loss": 1.0047030448913574, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.3212890625, |
| "grad_norm": 0.42705219984054565, |
| "learning_rate": 0.00013660451422963692, |
| "loss": 0.5215034484863281, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.322265625, |
| "grad_norm": 0.3548984229564667, |
| "learning_rate": 0.0001364082433758587, |
| "loss": 0.777184009552002, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3232421875, |
| "grad_norm": 0.6042805314064026, |
| "learning_rate": 0.00013621197252208046, |
| "loss": 0.469806432723999, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.32421875, |
| "grad_norm": 0.43482446670532227, |
| "learning_rate": 0.00013601570166830226, |
| "loss": 0.8123322129249573, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.3251953125, |
| "grad_norm": 0.4851783812046051, |
| "learning_rate": 0.00013581943081452405, |
| "loss": 1.1560527086257935, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.326171875, |
| "grad_norm": 0.681423008441925, |
| "learning_rate": 0.00013562315996074585, |
| "loss": 0.5681013464927673, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.3271484375, |
| "grad_norm": 0.43838411569595337, |
| "learning_rate": 0.00013542688910696762, |
| "loss": 0.8758999109268188, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.328125, |
| "grad_norm": 0.5508302450180054, |
| "learning_rate": 0.0001352306182531894, |
| "loss": 0.7725740671157837, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.3291015625, |
| "grad_norm": 0.2603519856929779, |
| "learning_rate": 0.00013503434739941118, |
| "loss": 0.357033908367157, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.330078125, |
| "grad_norm": 0.38098394870758057, |
| "learning_rate": 0.00013483807654563298, |
| "loss": 0.41752922534942627, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.3310546875, |
| "grad_norm": 0.5308575630187988, |
| "learning_rate": 0.00013464180569185477, |
| "loss": 0.6187021732330322, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.33203125, |
| "grad_norm": 0.4033392369747162, |
| "learning_rate": 0.00013444553483807657, |
| "loss": 0.9481551647186279, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3330078125, |
| "grad_norm": 0.3999135494232178, |
| "learning_rate": 0.00013424926398429834, |
| "loss": 0.6853100657463074, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.333984375, |
| "grad_norm": 0.4521353840827942, |
| "learning_rate": 0.00013405299313052013, |
| "loss": 1.0335659980773926, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.3349609375, |
| "grad_norm": 0.3538281321525574, |
| "learning_rate": 0.0001338567222767419, |
| "loss": 0.821506142616272, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.3359375, |
| "grad_norm": 0.49575889110565186, |
| "learning_rate": 0.0001336604514229637, |
| "loss": 0.6124354004859924, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.3369140625, |
| "grad_norm": 0.37985700368881226, |
| "learning_rate": 0.0001334641805691855, |
| "loss": 0.6803320646286011, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.337890625, |
| "grad_norm": 0.3533600866794586, |
| "learning_rate": 0.00013326790971540726, |
| "loss": 0.7260403037071228, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.3388671875, |
| "grad_norm": 0.49213504791259766, |
| "learning_rate": 0.00013307163886162906, |
| "loss": 0.9051091074943542, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.33984375, |
| "grad_norm": 0.37704166769981384, |
| "learning_rate": 0.00013287536800785083, |
| "loss": 0.4471222460269928, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.3408203125, |
| "grad_norm": 0.4309573471546173, |
| "learning_rate": 0.00013267909715407262, |
| "loss": 0.749025285243988, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.341796875, |
| "grad_norm": 0.7491689920425415, |
| "learning_rate": 0.0001324828263002944, |
| "loss": 1.1318167448043823, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3427734375, |
| "grad_norm": 0.3965498208999634, |
| "learning_rate": 0.00013228655544651622, |
| "loss": 0.8451839685440063, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.34375, |
| "grad_norm": 0.4446418285369873, |
| "learning_rate": 0.00013209028459273798, |
| "loss": 0.7875360250473022, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.3447265625, |
| "grad_norm": 0.3396705985069275, |
| "learning_rate": 0.00013189401373895978, |
| "loss": 0.8446518182754517, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.345703125, |
| "grad_norm": 0.3436250388622284, |
| "learning_rate": 0.00013169774288518155, |
| "loss": 0.8995112180709839, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.3466796875, |
| "grad_norm": 0.33643823862075806, |
| "learning_rate": 0.00013150147203140334, |
| "loss": 0.6253601312637329, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.34765625, |
| "grad_norm": 0.39978718757629395, |
| "learning_rate": 0.0001313052011776251, |
| "loss": 0.31882500648498535, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.3486328125, |
| "grad_norm": 0.3054925799369812, |
| "learning_rate": 0.00013110893032384694, |
| "loss": 0.3698769807815552, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.349609375, |
| "grad_norm": 0.3789948523044586, |
| "learning_rate": 0.0001309126594700687, |
| "loss": 0.9039162397384644, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.3505859375, |
| "grad_norm": 0.4192582964897156, |
| "learning_rate": 0.0001307163886162905, |
| "loss": 0.7852678298950195, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.3515625, |
| "grad_norm": 0.5130710601806641, |
| "learning_rate": 0.00013052011776251227, |
| "loss": 0.7745686769485474, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3525390625, |
| "grad_norm": 0.39334234595298767, |
| "learning_rate": 0.00013032384690873404, |
| "loss": 0.7644802331924438, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.353515625, |
| "grad_norm": 0.6141180992126465, |
| "learning_rate": 0.00013012757605495583, |
| "loss": 0.6028044819831848, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.3544921875, |
| "grad_norm": 0.33263200521469116, |
| "learning_rate": 0.00012993130520117763, |
| "loss": 0.6908546090126038, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.35546875, |
| "grad_norm": 0.3901807367801666, |
| "learning_rate": 0.00012973503434739943, |
| "loss": 0.8896909952163696, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.3564453125, |
| "grad_norm": 0.3889808654785156, |
| "learning_rate": 0.0001295387634936212, |
| "loss": 0.622492790222168, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.357421875, |
| "grad_norm": 0.41004979610443115, |
| "learning_rate": 0.000129342492639843, |
| "loss": 0.6293104887008667, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.3583984375, |
| "grad_norm": 0.32929369807243347, |
| "learning_rate": 0.00012914622178606476, |
| "loss": 0.7049382925033569, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.359375, |
| "grad_norm": 0.5189999341964722, |
| "learning_rate": 0.00012894995093228656, |
| "loss": 0.9230547547340393, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.3603515625, |
| "grad_norm": 0.290991872549057, |
| "learning_rate": 0.00012875368007850835, |
| "loss": 0.5716772079467773, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.361328125, |
| "grad_norm": 0.3976893126964569, |
| "learning_rate": 0.00012855740922473015, |
| "loss": 0.4593455493450165, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.3623046875, |
| "grad_norm": 0.38385459780693054, |
| "learning_rate": 0.00012836113837095192, |
| "loss": 0.4766542315483093, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.36328125, |
| "grad_norm": 0.45652449131011963, |
| "learning_rate": 0.0001281648675171737, |
| "loss": 0.9292062520980835, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.3642578125, |
| "grad_norm": 0.384463906288147, |
| "learning_rate": 0.00012796859666339548, |
| "loss": 0.7896109223365784, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.365234375, |
| "grad_norm": 0.43412724137306213, |
| "learning_rate": 0.00012777232580961728, |
| "loss": 0.6185650825500488, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.3662109375, |
| "grad_norm": 0.4574507772922516, |
| "learning_rate": 0.00012757605495583907, |
| "loss": 0.5614027380943298, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.3671875, |
| "grad_norm": 0.2921536862850189, |
| "learning_rate": 0.00012737978410206084, |
| "loss": 0.26786333322525024, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.3681640625, |
| "grad_norm": 0.5887529850006104, |
| "learning_rate": 0.00012718351324828264, |
| "loss": 0.4167410433292389, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.369140625, |
| "grad_norm": 0.3651127815246582, |
| "learning_rate": 0.0001269872423945044, |
| "loss": 1.0140016078948975, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.3701171875, |
| "grad_norm": 0.47206228971481323, |
| "learning_rate": 0.0001267909715407262, |
| "loss": 0.8293377757072449, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.37109375, |
| "grad_norm": 0.6319689154624939, |
| "learning_rate": 0.000126594700686948, |
| "loss": 0.7301446795463562, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.3720703125, |
| "grad_norm": 0.5163951516151428, |
| "learning_rate": 0.0001263984298331698, |
| "loss": 0.9944421648979187, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.373046875, |
| "grad_norm": 0.519072949886322, |
| "learning_rate": 0.00012620215897939156, |
| "loss": 0.6176541447639465, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.3740234375, |
| "grad_norm": 3.0750813484191895, |
| "learning_rate": 0.00012600588812561336, |
| "loss": 0.7531320452690125, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.3246331512928009, |
| "learning_rate": 0.00012580961727183513, |
| "loss": 0.3269459903240204, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.3759765625, |
| "grad_norm": 1.1105197668075562, |
| "learning_rate": 0.00012561334641805692, |
| "loss": 0.4228656589984894, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.376953125, |
| "grad_norm": 0.6776182055473328, |
| "learning_rate": 0.00012541707556427872, |
| "loss": 0.791953980922699, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.3779296875, |
| "grad_norm": 0.4413786828517914, |
| "learning_rate": 0.00012522080471050052, |
| "loss": 0.7953442335128784, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.37890625, |
| "grad_norm": 0.4036264419555664, |
| "learning_rate": 0.00012502453385672228, |
| "loss": 0.6062744855880737, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.3798828125, |
| "grad_norm": 1.0638166666030884, |
| "learning_rate": 0.00012482826300294408, |
| "loss": 1.0578093528747559, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.380859375, |
| "grad_norm": 0.2518276572227478, |
| "learning_rate": 0.00012463199214916585, |
| "loss": 0.5070685148239136, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.3818359375, |
| "grad_norm": 0.3338214159011841, |
| "learning_rate": 0.00012443572129538762, |
| "loss": 0.7665579915046692, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.3828125, |
| "grad_norm": 0.4730507433414459, |
| "learning_rate": 0.00012423945044160944, |
| "loss": 0.48353517055511475, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.3837890625, |
| "grad_norm": 0.3488924503326416, |
| "learning_rate": 0.0001240431795878312, |
| "loss": 0.4422420561313629, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.384765625, |
| "grad_norm": 0.2397361695766449, |
| "learning_rate": 0.000123846908734053, |
| "loss": 0.7025644183158875, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.3857421875, |
| "grad_norm": 0.3638167679309845, |
| "learning_rate": 0.00012365063788027478, |
| "loss": 0.5372107625007629, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.38671875, |
| "grad_norm": 0.4088346064090729, |
| "learning_rate": 0.00012345436702649657, |
| "loss": 0.7636011838912964, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.3876953125, |
| "grad_norm": 0.36985111236572266, |
| "learning_rate": 0.00012325809617271834, |
| "loss": 0.6720612645149231, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.388671875, |
| "grad_norm": 0.37556055188179016, |
| "learning_rate": 0.00012306182531894016, |
| "loss": 0.8087592124938965, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.3896484375, |
| "grad_norm": 0.6851724982261658, |
| "learning_rate": 0.00012286555446516193, |
| "loss": 0.780835747718811, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.390625, |
| "grad_norm": 0.3453989326953888, |
| "learning_rate": 0.00012266928361138373, |
| "loss": 0.8235517740249634, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.3916015625, |
| "grad_norm": 0.43622198700904846, |
| "learning_rate": 0.0001224730127576055, |
| "loss": 0.3758167028427124, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.392578125, |
| "grad_norm": 0.4364018142223358, |
| "learning_rate": 0.0001222767419038273, |
| "loss": 0.7123017907142639, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.3935546875, |
| "grad_norm": 0.24169716238975525, |
| "learning_rate": 0.00012208047105004906, |
| "loss": 0.48390328884124756, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.39453125, |
| "grad_norm": 3.4902851581573486, |
| "learning_rate": 0.00012188420019627087, |
| "loss": 0.8519951105117798, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.3955078125, |
| "grad_norm": 0.8332751989364624, |
| "learning_rate": 0.00012168792934249264, |
| "loss": 0.7562370896339417, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.396484375, |
| "grad_norm": 0.3582589030265808, |
| "learning_rate": 0.00012149165848871442, |
| "loss": 0.3723471164703369, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.3974609375, |
| "grad_norm": 0.48302146792411804, |
| "learning_rate": 0.00012129538763493622, |
| "loss": 1.0008171796798706, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.3984375, |
| "grad_norm": 0.3510138988494873, |
| "learning_rate": 0.000121099116781158, |
| "loss": 0.30772703886032104, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.3994140625, |
| "grad_norm": 0.2771015763282776, |
| "learning_rate": 0.0001209028459273798, |
| "loss": 0.4403090178966522, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.400390625, |
| "grad_norm": 0.42239415645599365, |
| "learning_rate": 0.00012070657507360156, |
| "loss": 0.5451241731643677, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.4013671875, |
| "grad_norm": 0.27876874804496765, |
| "learning_rate": 0.00012051030421982336, |
| "loss": 0.3590753972530365, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.40234375, |
| "grad_norm": 0.42854824662208557, |
| "learning_rate": 0.00012031403336604514, |
| "loss": 1.0192680358886719, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.4033203125, |
| "grad_norm": 0.32980695366859436, |
| "learning_rate": 0.00012011776251226694, |
| "loss": 0.6476566195487976, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.404296875, |
| "grad_norm": 0.45046037435531616, |
| "learning_rate": 0.00011992149165848872, |
| "loss": 0.9548048973083496, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.4052734375, |
| "grad_norm": 0.4176082909107208, |
| "learning_rate": 0.00011972522080471052, |
| "loss": 0.3793225586414337, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.40625, |
| "grad_norm": 0.335823118686676, |
| "learning_rate": 0.00011952894995093229, |
| "loss": 0.5807560086250305, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.4072265625, |
| "grad_norm": 0.4758591651916504, |
| "learning_rate": 0.00011933267909715408, |
| "loss": 0.3924551010131836, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.408203125, |
| "grad_norm": 0.21527709066867828, |
| "learning_rate": 0.00011913640824337586, |
| "loss": 0.1651245653629303, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.4091796875, |
| "grad_norm": 0.31255391240119934, |
| "learning_rate": 0.00011894013738959766, |
| "loss": 0.6133516430854797, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.41015625, |
| "grad_norm": 0.40668365359306335, |
| "learning_rate": 0.00011874386653581944, |
| "loss": 0.894720196723938, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4111328125, |
| "grad_norm": 0.35574087500572205, |
| "learning_rate": 0.00011854759568204121, |
| "loss": 0.9017484188079834, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.412109375, |
| "grad_norm": 0.3389612138271332, |
| "learning_rate": 0.00011835132482826301, |
| "loss": 0.7961660623550415, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.4130859375, |
| "grad_norm": 0.8334202766418457, |
| "learning_rate": 0.00011815505397448479, |
| "loss": 0.8654063940048218, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.4140625, |
| "grad_norm": 0.5917571187019348, |
| "learning_rate": 0.00011795878312070659, |
| "loss": 0.631730318069458, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.4150390625, |
| "grad_norm": 0.4908443093299866, |
| "learning_rate": 0.00011776251226692835, |
| "loss": 0.3205869495868683, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.416015625, |
| "grad_norm": 0.8349789381027222, |
| "learning_rate": 0.00011756624141315016, |
| "loss": 0.8526176810264587, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.4169921875, |
| "grad_norm": 0.38712671399116516, |
| "learning_rate": 0.00011736997055937193, |
| "loss": 0.6580482125282288, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.41796875, |
| "grad_norm": 0.766034722328186, |
| "learning_rate": 0.00011717369970559373, |
| "loss": 0.5494309663772583, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.4189453125, |
| "grad_norm": 0.33322349190711975, |
| "learning_rate": 0.00011697742885181551, |
| "loss": 0.38351887464523315, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.419921875, |
| "grad_norm": 0.411155641078949, |
| "learning_rate": 0.00011678115799803731, |
| "loss": 0.8139836192131042, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.4208984375, |
| "grad_norm": 0.5857217907905579, |
| "learning_rate": 0.00011658488714425908, |
| "loss": 0.5668150186538696, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.421875, |
| "grad_norm": 0.8849710822105408, |
| "learning_rate": 0.00011638861629048087, |
| "loss": 0.5478008985519409, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.4228515625, |
| "grad_norm": 0.6771020293235779, |
| "learning_rate": 0.00011619234543670265, |
| "loss": 0.608709454536438, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.423828125, |
| "grad_norm": 0.30138713121414185, |
| "learning_rate": 0.00011599607458292445, |
| "loss": 0.8240669369697571, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.4248046875, |
| "grad_norm": 0.3273598253726959, |
| "learning_rate": 0.00011579980372914623, |
| "loss": 0.6287229657173157, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.42578125, |
| "grad_norm": 0.5044806003570557, |
| "learning_rate": 0.000115603532875368, |
| "loss": 0.735835075378418, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.4267578125, |
| "grad_norm": 0.34495776891708374, |
| "learning_rate": 0.0001154072620215898, |
| "loss": 0.7688421010971069, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.427734375, |
| "grad_norm": 0.41923069953918457, |
| "learning_rate": 0.00011521099116781158, |
| "loss": 0.679617166519165, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.4287109375, |
| "grad_norm": 0.3509843945503235, |
| "learning_rate": 0.00011501472031403338, |
| "loss": 0.7478575110435486, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.4296875, |
| "grad_norm": 0.4758707582950592, |
| "learning_rate": 0.00011481844946025514, |
| "loss": 0.48871147632598877, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.4306640625, |
| "grad_norm": 0.30272597074508667, |
| "learning_rate": 0.00011462217860647695, |
| "loss": 0.4311315715312958, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.431640625, |
| "grad_norm": 0.5226417779922485, |
| "learning_rate": 0.00011442590775269872, |
| "loss": 0.8198300004005432, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.4326171875, |
| "grad_norm": 0.41183850169181824, |
| "learning_rate": 0.00011422963689892052, |
| "loss": 0.9958367347717285, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.43359375, |
| "grad_norm": 0.384048193693161, |
| "learning_rate": 0.0001140333660451423, |
| "loss": 0.3194778859615326, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.4345703125, |
| "grad_norm": 0.5035115480422974, |
| "learning_rate": 0.0001138370951913641, |
| "loss": 0.6455928683280945, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.435546875, |
| "grad_norm": 0.4875551462173462, |
| "learning_rate": 0.00011364082433758587, |
| "loss": 0.799978494644165, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.4365234375, |
| "grad_norm": 0.3395763337612152, |
| "learning_rate": 0.00011344455348380768, |
| "loss": 0.47672414779663086, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.4375, |
| "grad_norm": 0.5594314932823181, |
| "learning_rate": 0.00011324828263002944, |
| "loss": 0.4325803518295288, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.4384765625, |
| "grad_norm": 0.44647228717803955, |
| "learning_rate": 0.00011305201177625124, |
| "loss": 0.8119433522224426, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.439453125, |
| "grad_norm": 0.3190518915653229, |
| "learning_rate": 0.00011285574092247302, |
| "loss": 0.4949466288089752, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4404296875, |
| "grad_norm": 0.5943452715873718, |
| "learning_rate": 0.00011265947006869479, |
| "loss": 0.8245764374732971, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.44140625, |
| "grad_norm": 0.8067309260368347, |
| "learning_rate": 0.00011246319921491659, |
| "loss": 0.39331740140914917, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.4423828125, |
| "grad_norm": 0.4130857288837433, |
| "learning_rate": 0.00011226692836113837, |
| "loss": 1.0005946159362793, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.443359375, |
| "grad_norm": 0.6839224100112915, |
| "learning_rate": 0.00011207065750736017, |
| "loss": 0.453269362449646, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.4443359375, |
| "grad_norm": 0.6282085180282593, |
| "learning_rate": 0.00011187438665358195, |
| "loss": 0.7137607932090759, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.4453125, |
| "grad_norm": 0.49894508719444275, |
| "learning_rate": 0.00011167811579980374, |
| "loss": 0.6289803981781006, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.4462890625, |
| "grad_norm": 0.3570895493030548, |
| "learning_rate": 0.00011148184494602551, |
| "loss": 0.3711976110935211, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.447265625, |
| "grad_norm": 0.28931114077568054, |
| "learning_rate": 0.00011128557409224731, |
| "loss": 0.5629679560661316, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.4482421875, |
| "grad_norm": 1.2492791414260864, |
| "learning_rate": 0.00011108930323846909, |
| "loss": 0.5821082592010498, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.44921875, |
| "grad_norm": 0.29861876368522644, |
| "learning_rate": 0.00011089303238469089, |
| "loss": 0.4129573106765747, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4501953125, |
| "grad_norm": 0.5244950652122498, |
| "learning_rate": 0.00011069676153091267, |
| "loss": 0.8300201296806335, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.451171875, |
| "grad_norm": 0.446435809135437, |
| "learning_rate": 0.00011050049067713446, |
| "loss": 0.7500958442687988, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.4521484375, |
| "grad_norm": 0.4531306028366089, |
| "learning_rate": 0.00011030421982335623, |
| "loss": 0.8492609262466431, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.453125, |
| "grad_norm": 0.46944308280944824, |
| "learning_rate": 0.00011010794896957802, |
| "loss": 0.6209090948104858, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.4541015625, |
| "grad_norm": 0.5465651154518127, |
| "learning_rate": 0.00010991167811579981, |
| "loss": 0.5176469087600708, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.455078125, |
| "grad_norm": 0.36550402641296387, |
| "learning_rate": 0.00010971540726202158, |
| "loss": 0.6358295679092407, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.4560546875, |
| "grad_norm": 0.48919910192489624, |
| "learning_rate": 0.00010951913640824338, |
| "loss": 0.5903019905090332, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.45703125, |
| "grad_norm": 0.4378332793712616, |
| "learning_rate": 0.00010932286555446516, |
| "loss": 0.6710047721862793, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.4580078125, |
| "grad_norm": 0.3095405101776123, |
| "learning_rate": 0.00010912659470068696, |
| "loss": 0.6787213683128357, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.458984375, |
| "grad_norm": 0.40901967883110046, |
| "learning_rate": 0.00010893032384690874, |
| "loss": 0.6371384859085083, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.4599609375, |
| "grad_norm": 0.3962486982345581, |
| "learning_rate": 0.00010873405299313053, |
| "loss": 0.5823498964309692, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.4609375, |
| "grad_norm": 0.4094708263874054, |
| "learning_rate": 0.0001085377821393523, |
| "loss": 1.0396480560302734, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.4619140625, |
| "grad_norm": 0.5117614269256592, |
| "learning_rate": 0.0001083415112855741, |
| "loss": 0.6320610642433167, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.462890625, |
| "grad_norm": 0.28345227241516113, |
| "learning_rate": 0.00010814524043179588, |
| "loss": 0.33279290795326233, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.4638671875, |
| "grad_norm": 0.5475791096687317, |
| "learning_rate": 0.00010794896957801768, |
| "loss": 0.359570175409317, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.46484375, |
| "grad_norm": 0.44176843762397766, |
| "learning_rate": 0.00010775269872423946, |
| "loss": 0.7576714158058167, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.4658203125, |
| "grad_norm": 0.473562628030777, |
| "learning_rate": 0.00010755642787046125, |
| "loss": 0.8758799433708191, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.466796875, |
| "grad_norm": 0.41919219493865967, |
| "learning_rate": 0.00010736015701668302, |
| "loss": 0.863654375076294, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.4677734375, |
| "grad_norm": 0.4215691089630127, |
| "learning_rate": 0.0001071638861629048, |
| "loss": 0.5004569292068481, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.46875, |
| "grad_norm": 0.36801034212112427, |
| "learning_rate": 0.0001069676153091266, |
| "loss": 0.9330754280090332, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.4697265625, |
| "grad_norm": 0.42489972710609436, |
| "learning_rate": 0.00010677134445534837, |
| "loss": 1.0529820919036865, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.470703125, |
| "grad_norm": 0.4067368507385254, |
| "learning_rate": 0.00010657507360157018, |
| "loss": 0.5453970432281494, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.4716796875, |
| "grad_norm": 0.28611162304878235, |
| "learning_rate": 0.00010637880274779195, |
| "loss": 0.2348572313785553, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.47265625, |
| "grad_norm": 0.40047627687454224, |
| "learning_rate": 0.00010618253189401374, |
| "loss": 0.4776308834552765, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.4736328125, |
| "grad_norm": 0.5168628692626953, |
| "learning_rate": 0.00010598626104023553, |
| "loss": 0.9922167062759399, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.474609375, |
| "grad_norm": 0.3620246946811676, |
| "learning_rate": 0.00010578999018645732, |
| "loss": 0.7285036444664001, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.4755859375, |
| "grad_norm": 0.42711782455444336, |
| "learning_rate": 0.00010559371933267909, |
| "loss": 0.6387231349945068, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.4765625, |
| "grad_norm": 0.2139827311038971, |
| "learning_rate": 0.0001053974484789009, |
| "loss": 0.4295338988304138, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.4775390625, |
| "grad_norm": 0.31191739439964294, |
| "learning_rate": 0.00010520117762512267, |
| "loss": 0.42860671877861023, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.478515625, |
| "grad_norm": 0.2909379303455353, |
| "learning_rate": 0.00010500490677134447, |
| "loss": 0.47065097093582153, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.4794921875, |
| "grad_norm": 0.48990437388420105, |
| "learning_rate": 0.00010480863591756625, |
| "loss": 0.8870656490325928, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.48046875, |
| "grad_norm": 0.5662127733230591, |
| "learning_rate": 0.00010461236506378804, |
| "loss": 0.8007984161376953, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.4814453125, |
| "grad_norm": 0.3656634986400604, |
| "learning_rate": 0.00010441609421000981, |
| "loss": 0.41389334201812744, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.482421875, |
| "grad_norm": 0.39840465784072876, |
| "learning_rate": 0.0001042198233562316, |
| "loss": 0.6927056908607483, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.4833984375, |
| "grad_norm": 0.641647219657898, |
| "learning_rate": 0.00010402355250245339, |
| "loss": 0.7912976145744324, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.484375, |
| "grad_norm": 0.4522266685962677, |
| "learning_rate": 0.00010382728164867517, |
| "loss": 0.615374743938446, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.4853515625, |
| "grad_norm": 0.415444016456604, |
| "learning_rate": 0.00010363101079489697, |
| "loss": 0.8559135794639587, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.486328125, |
| "grad_norm": 0.4477578401565552, |
| "learning_rate": 0.00010343473994111874, |
| "loss": 0.6109384298324585, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.4873046875, |
| "grad_norm": 0.33097633719444275, |
| "learning_rate": 0.00010323846908734053, |
| "loss": 0.6325762271881104, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.48828125, |
| "grad_norm": 0.38771572709083557, |
| "learning_rate": 0.00010304219823356232, |
| "loss": 0.5979640483856201, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4892578125, |
| "grad_norm": 0.3339928984642029, |
| "learning_rate": 0.00010284592737978411, |
| "loss": 0.6619001626968384, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.490234375, |
| "grad_norm": 0.6400135159492493, |
| "learning_rate": 0.00010264965652600588, |
| "loss": 0.28338727355003357, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.4912109375, |
| "grad_norm": 0.35763970017433167, |
| "learning_rate": 0.00010245338567222769, |
| "loss": 0.6373124122619629, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.4921875, |
| "grad_norm": 0.2136622965335846, |
| "learning_rate": 0.00010225711481844946, |
| "loss": 0.2315329760313034, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.4931640625, |
| "grad_norm": 0.6324110627174377, |
| "learning_rate": 0.00010206084396467126, |
| "loss": 1.0045514106750488, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.494140625, |
| "grad_norm": 0.4471307694911957, |
| "learning_rate": 0.00010186457311089304, |
| "loss": 0.5188390016555786, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.4951171875, |
| "grad_norm": 0.38222211599349976, |
| "learning_rate": 0.00010166830225711483, |
| "loss": 0.7351740598678589, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.49609375, |
| "grad_norm": 0.41885000467300415, |
| "learning_rate": 0.0001014720314033366, |
| "loss": 0.9071688055992126, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.4970703125, |
| "grad_norm": 0.8193621635437012, |
| "learning_rate": 0.00010127576054955839, |
| "loss": 0.7240473031997681, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.498046875, |
| "grad_norm": 0.2846645712852478, |
| "learning_rate": 0.00010107948969578018, |
| "loss": 0.351628839969635, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.4990234375, |
| "grad_norm": 0.4778954088687897, |
| "learning_rate": 0.00010088321884200196, |
| "loss": 0.7705833911895752, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.3384702503681183, |
| "learning_rate": 0.00010068694798822376, |
| "loss": 0.5467265248298645, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.5009765625, |
| "grad_norm": 0.43917056918144226, |
| "learning_rate": 0.00010049067713444553, |
| "loss": 0.9810686707496643, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.501953125, |
| "grad_norm": 0.4351615607738495, |
| "learning_rate": 0.00010029440628066732, |
| "loss": 0.9716764688491821, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.5029296875, |
| "grad_norm": 0.49873459339141846, |
| "learning_rate": 0.00010009813542688911, |
| "loss": 0.9183788299560547, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.50390625, |
| "grad_norm": 0.36710789799690247, |
| "learning_rate": 9.990186457311089e-05, |
| "loss": 0.49884548783302307, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.5048828125, |
| "grad_norm": 0.5402531623840332, |
| "learning_rate": 9.970559371933269e-05, |
| "loss": 0.6645570993423462, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.505859375, |
| "grad_norm": 0.4990559220314026, |
| "learning_rate": 9.950932286555447e-05, |
| "loss": 1.0321924686431885, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.5068359375, |
| "grad_norm": 0.4634752869606018, |
| "learning_rate": 9.931305201177625e-05, |
| "loss": 0.8484972715377808, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.5078125, |
| "grad_norm": 0.38584330677986145, |
| "learning_rate": 9.911678115799805e-05, |
| "loss": 0.3424939513206482, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5087890625, |
| "grad_norm": 0.41148415207862854, |
| "learning_rate": 9.892051030421983e-05, |
| "loss": 0.7890703678131104, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.509765625, |
| "grad_norm": 0.35891374945640564, |
| "learning_rate": 9.872423945044161e-05, |
| "loss": 0.7387750744819641, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.5107421875, |
| "grad_norm": 0.4174203872680664, |
| "learning_rate": 9.85279685966634e-05, |
| "loss": 0.5610706806182861, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.51171875, |
| "grad_norm": 0.4062010645866394, |
| "learning_rate": 9.833169774288519e-05, |
| "loss": 0.6016039252281189, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.5126953125, |
| "grad_norm": 0.35915061831474304, |
| "learning_rate": 9.813542688910697e-05, |
| "loss": 0.37933990359306335, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.513671875, |
| "grad_norm": 0.49826234579086304, |
| "learning_rate": 9.793915603532877e-05, |
| "loss": 0.9650976657867432, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.5146484375, |
| "grad_norm": 0.4122180938720703, |
| "learning_rate": 9.774288518155055e-05, |
| "loss": 0.5477824211120605, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.515625, |
| "grad_norm": 0.3824058175086975, |
| "learning_rate": 9.754661432777233e-05, |
| "loss": 0.5163108706474304, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.5166015625, |
| "grad_norm": 0.4485555589199066, |
| "learning_rate": 9.735034347399413e-05, |
| "loss": 0.9402418732643127, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.517578125, |
| "grad_norm": 0.4053209722042084, |
| "learning_rate": 9.715407262021591e-05, |
| "loss": 0.9314478039741516, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5185546875, |
| "grad_norm": 0.3183811604976654, |
| "learning_rate": 9.695780176643768e-05, |
| "loss": 0.6706205606460571, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.51953125, |
| "grad_norm": 0.40083932876586914, |
| "learning_rate": 9.676153091265947e-05, |
| "loss": 1.102424144744873, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.5205078125, |
| "grad_norm": 0.5949054956436157, |
| "learning_rate": 9.656526005888126e-05, |
| "loss": 0.8396608829498291, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.521484375, |
| "grad_norm": 0.41966959834098816, |
| "learning_rate": 9.636898920510304e-05, |
| "loss": 0.5641101002693176, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.5224609375, |
| "grad_norm": 0.448281466960907, |
| "learning_rate": 9.617271835132484e-05, |
| "loss": 0.44873932003974915, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.5234375, |
| "grad_norm": 0.47785645723342896, |
| "learning_rate": 9.597644749754662e-05, |
| "loss": 0.8799008131027222, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.5244140625, |
| "grad_norm": 0.45459261536598206, |
| "learning_rate": 9.57801766437684e-05, |
| "loss": 0.8261788487434387, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.525390625, |
| "grad_norm": 0.6168074607849121, |
| "learning_rate": 9.55839057899902e-05, |
| "loss": 0.9762136936187744, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.5263671875, |
| "grad_norm": 0.6500818133354187, |
| "learning_rate": 9.538763493621198e-05, |
| "loss": 0.9044640064239502, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.52734375, |
| "grad_norm": 0.31668490171432495, |
| "learning_rate": 9.519136408243376e-05, |
| "loss": 0.42503029108047485, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5283203125, |
| "grad_norm": 0.4041314721107483, |
| "learning_rate": 9.499509322865556e-05, |
| "loss": 0.6643175482749939, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.529296875, |
| "grad_norm": 1.011020541191101, |
| "learning_rate": 9.479882237487734e-05, |
| "loss": 0.7636033892631531, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.5302734375, |
| "grad_norm": 0.3690396845340729, |
| "learning_rate": 9.460255152109912e-05, |
| "loss": 1.0516947507858276, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.53125, |
| "grad_norm": 0.288604199886322, |
| "learning_rate": 9.440628066732092e-05, |
| "loss": 0.3806208372116089, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.5322265625, |
| "grad_norm": 0.4247501790523529, |
| "learning_rate": 9.42100098135427e-05, |
| "loss": 0.8651745319366455, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.533203125, |
| "grad_norm": 1.1893255710601807, |
| "learning_rate": 9.401373895976447e-05, |
| "loss": 0.28601521253585815, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.5341796875, |
| "grad_norm": 0.3229619562625885, |
| "learning_rate": 9.381746810598626e-05, |
| "loss": 0.8316909670829773, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.53515625, |
| "grad_norm": 0.390278160572052, |
| "learning_rate": 9.362119725220805e-05, |
| "loss": 0.7263185977935791, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.5361328125, |
| "grad_norm": 0.2949998378753662, |
| "learning_rate": 9.342492639842983e-05, |
| "loss": 0.5417062044143677, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.537109375, |
| "grad_norm": 0.47482210397720337, |
| "learning_rate": 9.322865554465163e-05, |
| "loss": 0.6505849361419678, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5380859375, |
| "grad_norm": 0.3653123676776886, |
| "learning_rate": 9.303238469087341e-05, |
| "loss": 0.7270935773849487, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.5390625, |
| "grad_norm": 0.5652351975440979, |
| "learning_rate": 9.283611383709519e-05, |
| "loss": 0.8330069780349731, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.5400390625, |
| "grad_norm": 0.448408842086792, |
| "learning_rate": 9.263984298331699e-05, |
| "loss": 0.8804951310157776, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.541015625, |
| "grad_norm": 0.7700690031051636, |
| "learning_rate": 9.244357212953877e-05, |
| "loss": 0.6466813087463379, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.5419921875, |
| "grad_norm": 0.45755863189697266, |
| "learning_rate": 9.224730127576055e-05, |
| "loss": 0.5548572540283203, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.54296875, |
| "grad_norm": 0.4113846719264984, |
| "learning_rate": 9.205103042198235e-05, |
| "loss": 0.9286736845970154, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.5439453125, |
| "grad_norm": 0.4555431604385376, |
| "learning_rate": 9.185475956820413e-05, |
| "loss": 0.8332977890968323, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.544921875, |
| "grad_norm": 0.5103408098220825, |
| "learning_rate": 9.165848871442591e-05, |
| "loss": 1.0110094547271729, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.5458984375, |
| "grad_norm": 0.299912691116333, |
| "learning_rate": 9.146221786064771e-05, |
| "loss": 0.3136459290981293, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.546875, |
| "grad_norm": 0.40499091148376465, |
| "learning_rate": 9.126594700686948e-05, |
| "loss": 0.6785961389541626, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.5478515625, |
| "grad_norm": 0.4190375804901123, |
| "learning_rate": 9.106967615309127e-05, |
| "loss": 0.9891744256019592, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.548828125, |
| "grad_norm": 0.6265519261360168, |
| "learning_rate": 9.087340529931305e-05, |
| "loss": 0.48712462186813354, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.5498046875, |
| "grad_norm": 0.466420978307724, |
| "learning_rate": 9.067713444553484e-05, |
| "loss": 0.5573943257331848, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.55078125, |
| "grad_norm": 0.3990301191806793, |
| "learning_rate": 9.048086359175663e-05, |
| "loss": 0.5893411040306091, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.5517578125, |
| "grad_norm": 0.31471043825149536, |
| "learning_rate": 9.028459273797842e-05, |
| "loss": 0.593424379825592, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.552734375, |
| "grad_norm": 0.46789905428886414, |
| "learning_rate": 9.00883218842002e-05, |
| "loss": 0.9398684501647949, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.5537109375, |
| "grad_norm": 0.48358282446861267, |
| "learning_rate": 8.989205103042198e-05, |
| "loss": 0.895098865032196, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.5546875, |
| "grad_norm": 0.25878453254699707, |
| "learning_rate": 8.969578017664378e-05, |
| "loss": 0.4817226231098175, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.5556640625, |
| "grad_norm": 0.5319378972053528, |
| "learning_rate": 8.949950932286556e-05, |
| "loss": 0.6119651794433594, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.556640625, |
| "grad_norm": 0.3002898097038269, |
| "learning_rate": 8.930323846908734e-05, |
| "loss": 0.28599199652671814, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5576171875, |
| "grad_norm": 0.37161317467689514, |
| "learning_rate": 8.910696761530914e-05, |
| "loss": 0.3307079076766968, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.55859375, |
| "grad_norm": 0.4755436182022095, |
| "learning_rate": 8.891069676153092e-05, |
| "loss": 0.5868921279907227, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.5595703125, |
| "grad_norm": 0.3264123499393463, |
| "learning_rate": 8.87144259077527e-05, |
| "loss": 0.6682905554771423, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.560546875, |
| "grad_norm": 0.43468573689460754, |
| "learning_rate": 8.85181550539745e-05, |
| "loss": 0.6316066980361938, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.5615234375, |
| "grad_norm": 0.5759416222572327, |
| "learning_rate": 8.832188420019627e-05, |
| "loss": 0.5687480568885803, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.5625, |
| "grad_norm": 0.39352041482925415, |
| "learning_rate": 8.812561334641806e-05, |
| "loss": 0.3803275525569916, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.5634765625, |
| "grad_norm": 0.4155440926551819, |
| "learning_rate": 8.792934249263984e-05, |
| "loss": 0.3923049569129944, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.564453125, |
| "grad_norm": 0.34934133291244507, |
| "learning_rate": 8.773307163886163e-05, |
| "loss": 0.7100962996482849, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.5654296875, |
| "grad_norm": 0.3993069529533386, |
| "learning_rate": 8.753680078508342e-05, |
| "loss": 0.6711176037788391, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.56640625, |
| "grad_norm": 0.3445776700973511, |
| "learning_rate": 8.73405299313052e-05, |
| "loss": 0.6986067295074463, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.5673828125, |
| "grad_norm": 0.45837292075157166, |
| "learning_rate": 8.714425907752699e-05, |
| "loss": 0.9020513892173767, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.568359375, |
| "grad_norm": 0.3630208671092987, |
| "learning_rate": 8.694798822374878e-05, |
| "loss": 0.42499858140945435, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.5693359375, |
| "grad_norm": 0.41205838322639465, |
| "learning_rate": 8.675171736997057e-05, |
| "loss": 0.5535018444061279, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.5703125, |
| "grad_norm": 0.2596284747123718, |
| "learning_rate": 8.655544651619235e-05, |
| "loss": 0.3234618902206421, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.5712890625, |
| "grad_norm": 0.3716956079006195, |
| "learning_rate": 8.635917566241414e-05, |
| "loss": 0.7567611932754517, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.572265625, |
| "grad_norm": 0.42999619245529175, |
| "learning_rate": 8.616290480863593e-05, |
| "loss": 0.8695427179336548, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.5732421875, |
| "grad_norm": 0.3309305012226105, |
| "learning_rate": 8.596663395485771e-05, |
| "loss": 0.989714503288269, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.57421875, |
| "grad_norm": 0.40024474263191223, |
| "learning_rate": 8.57703631010795e-05, |
| "loss": 1.0608711242675781, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.5751953125, |
| "grad_norm": 0.453950434923172, |
| "learning_rate": 8.557409224730129e-05, |
| "loss": 0.7340632677078247, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.576171875, |
| "grad_norm": 0.4473342299461365, |
| "learning_rate": 8.537782139352306e-05, |
| "loss": 0.7264219522476196, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5771484375, |
| "grad_norm": 0.420469731092453, |
| "learning_rate": 8.518155053974485e-05, |
| "loss": 0.8141539692878723, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.578125, |
| "grad_norm": 0.4068243205547333, |
| "learning_rate": 8.498527968596663e-05, |
| "loss": 0.5802872180938721, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.5791015625, |
| "grad_norm": 0.4243272840976715, |
| "learning_rate": 8.478900883218842e-05, |
| "loss": 0.350595086812973, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.580078125, |
| "grad_norm": 0.4519834518432617, |
| "learning_rate": 8.459273797841021e-05, |
| "loss": 0.7131458520889282, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.5810546875, |
| "grad_norm": 0.34145745635032654, |
| "learning_rate": 8.4396467124632e-05, |
| "loss": 0.7618221640586853, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.58203125, |
| "grad_norm": 0.46494174003601074, |
| "learning_rate": 8.420019627085378e-05, |
| "loss": 0.5102145075798035, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.5830078125, |
| "grad_norm": 0.3305060565471649, |
| "learning_rate": 8.400392541707557e-05, |
| "loss": 0.7812811732292175, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.583984375, |
| "grad_norm": 0.47092583775520325, |
| "learning_rate": 8.380765456329736e-05, |
| "loss": 0.7497634887695312, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.5849609375, |
| "grad_norm": 0.38902655243873596, |
| "learning_rate": 8.361138370951914e-05, |
| "loss": 0.4198119640350342, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.5859375, |
| "grad_norm": 0.43659287691116333, |
| "learning_rate": 8.341511285574093e-05, |
| "loss": 0.824333667755127, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5869140625, |
| "grad_norm": 0.4277879595756531, |
| "learning_rate": 8.321884200196272e-05, |
| "loss": 0.445267915725708, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.587890625, |
| "grad_norm": 0.3186829090118408, |
| "learning_rate": 8.30225711481845e-05, |
| "loss": 0.9906235337257385, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.5888671875, |
| "grad_norm": 0.2983294427394867, |
| "learning_rate": 8.28263002944063e-05, |
| "loss": 0.5342146754264832, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.58984375, |
| "grad_norm": 0.4127228856086731, |
| "learning_rate": 8.263002944062808e-05, |
| "loss": 0.41288450360298157, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.5908203125, |
| "grad_norm": 0.3961617052555084, |
| "learning_rate": 8.243375858684985e-05, |
| "loss": 0.43576663732528687, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.591796875, |
| "grad_norm": 0.4124387502670288, |
| "learning_rate": 8.223748773307164e-05, |
| "loss": 0.5837401747703552, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.5927734375, |
| "grad_norm": 0.4274151921272278, |
| "learning_rate": 8.204121687929342e-05, |
| "loss": 0.8666547536849976, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.59375, |
| "grad_norm": 0.3881700932979584, |
| "learning_rate": 8.18449460255152e-05, |
| "loss": 0.9063656330108643, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.5947265625, |
| "grad_norm": 0.46216556429862976, |
| "learning_rate": 8.1648675171737e-05, |
| "loss": 0.4573599696159363, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.595703125, |
| "grad_norm": 0.3843960762023926, |
| "learning_rate": 8.145240431795878e-05, |
| "loss": 0.6214632391929626, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5966796875, |
| "grad_norm": 0.538301408290863, |
| "learning_rate": 8.125613346418057e-05, |
| "loss": 0.8800979852676392, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.59765625, |
| "grad_norm": 0.49643319845199585, |
| "learning_rate": 8.105986261040236e-05, |
| "loss": 0.48715031147003174, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.5986328125, |
| "grad_norm": 0.4753062427043915, |
| "learning_rate": 8.086359175662415e-05, |
| "loss": 0.8127011060714722, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.599609375, |
| "grad_norm": 0.7572022676467896, |
| "learning_rate": 8.066732090284593e-05, |
| "loss": 0.7151535153388977, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.6005859375, |
| "grad_norm": 0.35117295384407043, |
| "learning_rate": 8.047105004906772e-05, |
| "loss": 0.9221618175506592, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.6015625, |
| "grad_norm": 0.2643633186817169, |
| "learning_rate": 8.02747791952895e-05, |
| "loss": 0.5025840401649475, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.6025390625, |
| "grad_norm": 0.45553916692733765, |
| "learning_rate": 8.007850834151129e-05, |
| "loss": 0.452494740486145, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.603515625, |
| "grad_norm": 0.386594295501709, |
| "learning_rate": 7.988223748773308e-05, |
| "loss": 0.7942792773246765, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.6044921875, |
| "grad_norm": 0.3616650700569153, |
| "learning_rate": 7.968596663395485e-05, |
| "loss": 0.5697340965270996, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.60546875, |
| "grad_norm": 0.3885051906108856, |
| "learning_rate": 7.948969578017665e-05, |
| "loss": 0.7082506418228149, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6064453125, |
| "grad_norm": 0.4484117329120636, |
| "learning_rate": 7.929342492639843e-05, |
| "loss": 0.5993860960006714, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.607421875, |
| "grad_norm": 0.44654563069343567, |
| "learning_rate": 7.909715407262021e-05, |
| "loss": 0.5804839134216309, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.6083984375, |
| "grad_norm": 0.3943687081336975, |
| "learning_rate": 7.890088321884201e-05, |
| "loss": 0.6422688364982605, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.609375, |
| "grad_norm": 0.4153381288051605, |
| "learning_rate": 7.870461236506379e-05, |
| "loss": 0.6437400579452515, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.6103515625, |
| "grad_norm": 0.38221171498298645, |
| "learning_rate": 7.850834151128557e-05, |
| "loss": 0.8738820552825928, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.611328125, |
| "grad_norm": 0.339599609375, |
| "learning_rate": 7.831207065750737e-05, |
| "loss": 0.517478883266449, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.6123046875, |
| "grad_norm": 0.7177076935768127, |
| "learning_rate": 7.811579980372915e-05, |
| "loss": 0.7372115254402161, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.61328125, |
| "grad_norm": 0.47573140263557434, |
| "learning_rate": 7.791952894995093e-05, |
| "loss": 0.649010181427002, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.6142578125, |
| "grad_norm": 0.44851094484329224, |
| "learning_rate": 7.772325809617273e-05, |
| "loss": 0.6269842386245728, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.615234375, |
| "grad_norm": 0.3544669449329376, |
| "learning_rate": 7.752698724239451e-05, |
| "loss": 0.8870983123779297, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.6162109375, |
| "grad_norm": 0.4103491008281708, |
| "learning_rate": 7.73307163886163e-05, |
| "loss": 0.8711034059524536, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.6171875, |
| "grad_norm": 0.3651062548160553, |
| "learning_rate": 7.713444553483808e-05, |
| "loss": 0.8420337438583374, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.6181640625, |
| "grad_norm": 0.4135638475418091, |
| "learning_rate": 7.693817468105987e-05, |
| "loss": 0.601078450679779, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.619140625, |
| "grad_norm": 0.5965299010276794, |
| "learning_rate": 7.674190382728164e-05, |
| "loss": 0.604471743106842, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.6201171875, |
| "grad_norm": 0.4340416491031647, |
| "learning_rate": 7.654563297350344e-05, |
| "loss": 0.905183732509613, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.62109375, |
| "grad_norm": 0.361518919467926, |
| "learning_rate": 7.634936211972522e-05, |
| "loss": 0.6569675207138062, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.6220703125, |
| "grad_norm": 1.04604971408844, |
| "learning_rate": 7.6153091265947e-05, |
| "loss": 0.7399482727050781, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.623046875, |
| "grad_norm": 0.8039460778236389, |
| "learning_rate": 7.59568204121688e-05, |
| "loss": 0.6003617644309998, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.6240234375, |
| "grad_norm": 0.5462118983268738, |
| "learning_rate": 7.576054955839058e-05, |
| "loss": 0.7750217914581299, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 0.29333505034446716, |
| "learning_rate": 7.556427870461236e-05, |
| "loss": 0.47371456027030945, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.6259765625, |
| "grad_norm": 0.2468312531709671, |
| "learning_rate": 7.536800785083416e-05, |
| "loss": 0.4615188241004944, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.626953125, |
| "grad_norm": 0.48467332124710083, |
| "learning_rate": 7.517173699705594e-05, |
| "loss": 0.6456693410873413, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.6279296875, |
| "grad_norm": 0.5471943020820618, |
| "learning_rate": 7.497546614327772e-05, |
| "loss": 0.5899155139923096, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.62890625, |
| "grad_norm": 0.3715604841709137, |
| "learning_rate": 7.477919528949952e-05, |
| "loss": 0.7910970449447632, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.6298828125, |
| "grad_norm": 0.3298327922821045, |
| "learning_rate": 7.45829244357213e-05, |
| "loss": 0.5769776701927185, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.630859375, |
| "grad_norm": 0.44131916761398315, |
| "learning_rate": 7.438665358194309e-05, |
| "loss": 0.8805806636810303, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.6318359375, |
| "grad_norm": 0.4686948359012604, |
| "learning_rate": 7.419038272816488e-05, |
| "loss": 0.7262091636657715, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.6328125, |
| "grad_norm": 0.48123931884765625, |
| "learning_rate": 7.399411187438666e-05, |
| "loss": 0.8481992483139038, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.6337890625, |
| "grad_norm": 0.5582646131515503, |
| "learning_rate": 7.379784102060843e-05, |
| "loss": 0.4963653087615967, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.634765625, |
| "grad_norm": 0.30464881658554077, |
| "learning_rate": 7.360157016683023e-05, |
| "loss": 0.6772556900978088, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6357421875, |
| "grad_norm": 0.44710803031921387, |
| "learning_rate": 7.340529931305201e-05, |
| "loss": 0.5476983189582825, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.63671875, |
| "grad_norm": 0.35922887921333313, |
| "learning_rate": 7.320902845927379e-05, |
| "loss": 0.8256508111953735, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.6376953125, |
| "grad_norm": 0.40085500478744507, |
| "learning_rate": 7.301275760549559e-05, |
| "loss": 0.5783500671386719, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.638671875, |
| "grad_norm": 0.47579512000083923, |
| "learning_rate": 7.281648675171737e-05, |
| "loss": 0.5591031908988953, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.6396484375, |
| "grad_norm": 0.5594353675842285, |
| "learning_rate": 7.262021589793915e-05, |
| "loss": 0.8133666515350342, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.640625, |
| "grad_norm": 0.44030821323394775, |
| "learning_rate": 7.242394504416095e-05, |
| "loss": 1.0282940864562988, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.6416015625, |
| "grad_norm": 0.7038627862930298, |
| "learning_rate": 7.222767419038273e-05, |
| "loss": 0.2322971373796463, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.642578125, |
| "grad_norm": 0.223698228597641, |
| "learning_rate": 7.203140333660451e-05, |
| "loss": 0.7056642174720764, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.6435546875, |
| "grad_norm": 0.3815765976905823, |
| "learning_rate": 7.183513248282631e-05, |
| "loss": 1.074477195739746, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.64453125, |
| "grad_norm": 0.35606271028518677, |
| "learning_rate": 7.163886162904809e-05, |
| "loss": 0.4300801753997803, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.6455078125, |
| "grad_norm": 0.32899999618530273, |
| "learning_rate": 7.144259077526988e-05, |
| "loss": 0.5923078060150146, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.646484375, |
| "grad_norm": 0.49968358874320984, |
| "learning_rate": 7.124631992149167e-05, |
| "loss": 0.8295183181762695, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.6474609375, |
| "grad_norm": 0.3393777012825012, |
| "learning_rate": 7.105004906771345e-05, |
| "loss": 0.30383622646331787, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.6484375, |
| "grad_norm": 0.24977968633174896, |
| "learning_rate": 7.085377821393524e-05, |
| "loss": 0.429612934589386, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.6494140625, |
| "grad_norm": 0.35886242985725403, |
| "learning_rate": 7.065750736015702e-05, |
| "loss": 0.9189084768295288, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.650390625, |
| "grad_norm": 0.3856249153614044, |
| "learning_rate": 7.04612365063788e-05, |
| "loss": 0.4880048930644989, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.6513671875, |
| "grad_norm": 0.4439884424209595, |
| "learning_rate": 7.026496565260058e-05, |
| "loss": 0.7537186145782471, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.65234375, |
| "grad_norm": 0.29563215374946594, |
| "learning_rate": 7.006869479882238e-05, |
| "loss": 0.38701343536376953, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.6533203125, |
| "grad_norm": 0.1909576952457428, |
| "learning_rate": 6.987242394504416e-05, |
| "loss": 0.15140604972839355, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.654296875, |
| "grad_norm": 0.3344849944114685, |
| "learning_rate": 6.967615309126594e-05, |
| "loss": 0.527427077293396, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.6552734375, |
| "grad_norm": 0.3609422743320465, |
| "learning_rate": 6.947988223748774e-05, |
| "loss": 0.29116177558898926, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.65625, |
| "grad_norm": 0.4419811964035034, |
| "learning_rate": 6.928361138370952e-05, |
| "loss": 0.7166855931282043, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.6572265625, |
| "grad_norm": 0.31890806555747986, |
| "learning_rate": 6.90873405299313e-05, |
| "loss": 0.5259425640106201, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.658203125, |
| "grad_norm": 0.39572352170944214, |
| "learning_rate": 6.88910696761531e-05, |
| "loss": 0.5964791774749756, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.6591796875, |
| "grad_norm": 0.4501058757305145, |
| "learning_rate": 6.869479882237488e-05, |
| "loss": 0.2289922833442688, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.66015625, |
| "grad_norm": 0.2884235680103302, |
| "learning_rate": 6.849852796859666e-05, |
| "loss": 0.2730886936187744, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.6611328125, |
| "grad_norm": 0.32970431447029114, |
| "learning_rate": 6.830225711481846e-05, |
| "loss": 0.4283568859100342, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.662109375, |
| "grad_norm": 0.39025789499282837, |
| "learning_rate": 6.810598626104023e-05, |
| "loss": 0.9361288547515869, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.6630859375, |
| "grad_norm": 0.48386886715888977, |
| "learning_rate": 6.790971540726203e-05, |
| "loss": 0.4907494783401489, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.6640625, |
| "grad_norm": 0.41783151030540466, |
| "learning_rate": 6.771344455348381e-05, |
| "loss": 0.7485824823379517, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.6650390625, |
| "grad_norm": 0.4826144278049469, |
| "learning_rate": 6.751717369970559e-05, |
| "loss": 0.6413211226463318, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.666015625, |
| "grad_norm": 0.27521079778671265, |
| "learning_rate": 6.732090284592739e-05, |
| "loss": 0.5747159123420715, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.6669921875, |
| "grad_norm": 0.3745660185813904, |
| "learning_rate": 6.712463199214917e-05, |
| "loss": 0.414341002702713, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.66796875, |
| "grad_norm": 0.45048731565475464, |
| "learning_rate": 6.692836113837095e-05, |
| "loss": 0.3665570318698883, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.6689453125, |
| "grad_norm": 0.5048633217811584, |
| "learning_rate": 6.673209028459275e-05, |
| "loss": 0.5923498272895813, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.669921875, |
| "grad_norm": 0.46423155069351196, |
| "learning_rate": 6.653581943081453e-05, |
| "loss": 0.7506915330886841, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.6708984375, |
| "grad_norm": 0.42965108156204224, |
| "learning_rate": 6.633954857703631e-05, |
| "loss": 0.7576399445533752, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.671875, |
| "grad_norm": 0.48331597447395325, |
| "learning_rate": 6.614327772325811e-05, |
| "loss": 0.5249682068824768, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.6728515625, |
| "grad_norm": 0.4685790240764618, |
| "learning_rate": 6.594700686947989e-05, |
| "loss": 0.8056750297546387, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.673828125, |
| "grad_norm": 0.46440044045448303, |
| "learning_rate": 6.575073601570167e-05, |
| "loss": 0.9252493381500244, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.6748046875, |
| "grad_norm": 0.46564289927482605, |
| "learning_rate": 6.555446516192347e-05, |
| "loss": 0.8182022571563721, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.67578125, |
| "grad_norm": 0.4397750496864319, |
| "learning_rate": 6.535819430814525e-05, |
| "loss": 0.7928388118743896, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.6767578125, |
| "grad_norm": 0.3233174681663513, |
| "learning_rate": 6.516192345436702e-05, |
| "loss": 0.5252426862716675, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.677734375, |
| "grad_norm": 0.6012148857116699, |
| "learning_rate": 6.496565260058882e-05, |
| "loss": 0.44195663928985596, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.6787109375, |
| "grad_norm": 0.6329052448272705, |
| "learning_rate": 6.47693817468106e-05, |
| "loss": 0.5354570150375366, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.6796875, |
| "grad_norm": 0.47926270961761475, |
| "learning_rate": 6.457311089303238e-05, |
| "loss": 0.4950491786003113, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.6806640625, |
| "grad_norm": 0.5051383972167969, |
| "learning_rate": 6.437684003925418e-05, |
| "loss": 0.6795849204063416, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.681640625, |
| "grad_norm": 0.4022398591041565, |
| "learning_rate": 6.418056918547596e-05, |
| "loss": 1.0388166904449463, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.6826171875, |
| "grad_norm": 0.4309573471546173, |
| "learning_rate": 6.398429833169774e-05, |
| "loss": 0.6022897362709045, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.68359375, |
| "grad_norm": 0.3301983177661896, |
| "learning_rate": 6.378802747791954e-05, |
| "loss": 0.6451660394668579, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6845703125, |
| "grad_norm": 0.6647156476974487, |
| "learning_rate": 6.359175662414132e-05, |
| "loss": 0.9699732661247253, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.685546875, |
| "grad_norm": 0.37545597553253174, |
| "learning_rate": 6.33954857703631e-05, |
| "loss": 0.43181508779525757, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.6865234375, |
| "grad_norm": 0.40882429480552673, |
| "learning_rate": 6.31992149165849e-05, |
| "loss": 0.665264368057251, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.6875, |
| "grad_norm": 0.46597936749458313, |
| "learning_rate": 6.300294406280668e-05, |
| "loss": 0.8813620209693909, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.6884765625, |
| "grad_norm": 0.4355461597442627, |
| "learning_rate": 6.280667320902846e-05, |
| "loss": 0.595770537853241, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.689453125, |
| "grad_norm": 0.45896056294441223, |
| "learning_rate": 6.261040235525026e-05, |
| "loss": 0.7571601271629333, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.6904296875, |
| "grad_norm": 0.37643495202064514, |
| "learning_rate": 6.241413150147204e-05, |
| "loss": 0.47930869460105896, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.69140625, |
| "grad_norm": 0.49690738320350647, |
| "learning_rate": 6.221786064769381e-05, |
| "loss": 0.3727263808250427, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.6923828125, |
| "grad_norm": 0.44111907482147217, |
| "learning_rate": 6.20215897939156e-05, |
| "loss": 0.7276532649993896, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.693359375, |
| "grad_norm": 0.44872644543647766, |
| "learning_rate": 6.182531894013739e-05, |
| "loss": 0.5082123279571533, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.6943359375, |
| "grad_norm": 0.3345314562320709, |
| "learning_rate": 6.162904808635917e-05, |
| "loss": 0.5472716093063354, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.6953125, |
| "grad_norm": 0.4269154667854309, |
| "learning_rate": 6.143277723258097e-05, |
| "loss": 0.7036910057067871, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.6962890625, |
| "grad_norm": 0.5314676761627197, |
| "learning_rate": 6.123650637880275e-05, |
| "loss": 0.8663474917411804, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.697265625, |
| "grad_norm": 0.2820166349411011, |
| "learning_rate": 6.104023552502453e-05, |
| "loss": 0.6397068500518799, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.6982421875, |
| "grad_norm": 0.40954726934432983, |
| "learning_rate": 6.084396467124632e-05, |
| "loss": 0.5477964282035828, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.69921875, |
| "grad_norm": 0.6858615279197693, |
| "learning_rate": 6.064769381746811e-05, |
| "loss": 0.694764256477356, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.7001953125, |
| "grad_norm": 2.901998281478882, |
| "learning_rate": 6.04514229636899e-05, |
| "loss": 0.5803335309028625, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.701171875, |
| "grad_norm": 0.6065869927406311, |
| "learning_rate": 6.025515210991168e-05, |
| "loss": 0.49790292978286743, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.7021484375, |
| "grad_norm": 0.3678690195083618, |
| "learning_rate": 6.005888125613347e-05, |
| "loss": 0.38595882058143616, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.703125, |
| "grad_norm": 0.32496991753578186, |
| "learning_rate": 5.986261040235526e-05, |
| "loss": 0.3554360866546631, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.7041015625, |
| "grad_norm": 0.5348960161209106, |
| "learning_rate": 5.966633954857704e-05, |
| "loss": 1.0386948585510254, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.705078125, |
| "grad_norm": 0.42248818278312683, |
| "learning_rate": 5.947006869479883e-05, |
| "loss": 0.4950508177280426, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.7060546875, |
| "grad_norm": 0.36575669050216675, |
| "learning_rate": 5.9273797841020606e-05, |
| "loss": 0.8793643712997437, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.70703125, |
| "grad_norm": 0.30802977085113525, |
| "learning_rate": 5.9077526987242395e-05, |
| "loss": 0.7557331919670105, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.7080078125, |
| "grad_norm": 0.36057788133621216, |
| "learning_rate": 5.888125613346418e-05, |
| "loss": 0.793386697769165, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.708984375, |
| "grad_norm": 0.5049283504486084, |
| "learning_rate": 5.8684985279685966e-05, |
| "loss": 0.3805343210697174, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.7099609375, |
| "grad_norm": 0.4448167681694031, |
| "learning_rate": 5.8488714425907756e-05, |
| "loss": 0.8297110199928284, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.7109375, |
| "grad_norm": 0.5144803524017334, |
| "learning_rate": 5.829244357212954e-05, |
| "loss": 0.8582932949066162, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.7119140625, |
| "grad_norm": 0.48559248447418213, |
| "learning_rate": 5.809617271835133e-05, |
| "loss": 0.851997971534729, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.712890625, |
| "grad_norm": 0.5277959704399109, |
| "learning_rate": 5.7899901864573116e-05, |
| "loss": 0.8560271859169006, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7138671875, |
| "grad_norm": 0.39055025577545166, |
| "learning_rate": 5.77036310107949e-05, |
| "loss": 0.5023626685142517, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.71484375, |
| "grad_norm": 0.4014328718185425, |
| "learning_rate": 5.750736015701669e-05, |
| "loss": 0.7782986760139465, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.7158203125, |
| "grad_norm": 0.9840988516807556, |
| "learning_rate": 5.731108930323848e-05, |
| "loss": 0.5097107887268066, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.716796875, |
| "grad_norm": 0.512140691280365, |
| "learning_rate": 5.711481844946026e-05, |
| "loss": 0.5448895692825317, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.7177734375, |
| "grad_norm": 0.45195046067237854, |
| "learning_rate": 5.691854759568205e-05, |
| "loss": 0.7583330273628235, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.71875, |
| "grad_norm": 0.4155009090900421, |
| "learning_rate": 5.672227674190384e-05, |
| "loss": 0.5220797061920166, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.7197265625, |
| "grad_norm": 0.552148699760437, |
| "learning_rate": 5.652600588812562e-05, |
| "loss": 0.8043540716171265, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.720703125, |
| "grad_norm": 0.30510297417640686, |
| "learning_rate": 5.6329735034347396e-05, |
| "loss": 0.5110808610916138, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.7216796875, |
| "grad_norm": 0.522339940071106, |
| "learning_rate": 5.6133464180569185e-05, |
| "loss": 1.0245096683502197, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.72265625, |
| "grad_norm": 0.27751341462135315, |
| "learning_rate": 5.5937193326790974e-05, |
| "loss": 0.6376601457595825, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.7236328125, |
| "grad_norm": 0.4283340573310852, |
| "learning_rate": 5.5740922473012756e-05, |
| "loss": 1.1317777633666992, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.724609375, |
| "grad_norm": 0.541248619556427, |
| "learning_rate": 5.5544651619234545e-05, |
| "loss": 0.8086187839508057, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.7255859375, |
| "grad_norm": 0.24750906229019165, |
| "learning_rate": 5.5348380765456335e-05, |
| "loss": 0.4873177409172058, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.7265625, |
| "grad_norm": 0.42374616861343384, |
| "learning_rate": 5.515210991167812e-05, |
| "loss": 0.41606956720352173, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.7275390625, |
| "grad_norm": 0.35455161333084106, |
| "learning_rate": 5.4955839057899906e-05, |
| "loss": 0.49936947226524353, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.728515625, |
| "grad_norm": 0.4243617653846741, |
| "learning_rate": 5.475956820412169e-05, |
| "loss": 0.6650359630584717, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.7294921875, |
| "grad_norm": 0.4106060862541199, |
| "learning_rate": 5.456329735034348e-05, |
| "loss": 0.37870654463768005, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.73046875, |
| "grad_norm": 0.3536394536495209, |
| "learning_rate": 5.436702649656527e-05, |
| "loss": 1.0944924354553223, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.7314453125, |
| "grad_norm": 0.3067559003829956, |
| "learning_rate": 5.417075564278705e-05, |
| "loss": 0.6380996704101562, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.732421875, |
| "grad_norm": 0.40423691272735596, |
| "learning_rate": 5.397448478900884e-05, |
| "loss": 0.712358295917511, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7333984375, |
| "grad_norm": 0.451038658618927, |
| "learning_rate": 5.377821393523063e-05, |
| "loss": 0.6221305727958679, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.734375, |
| "grad_norm": 0.32606229186058044, |
| "learning_rate": 5.35819430814524e-05, |
| "loss": 0.6600078344345093, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.7353515625, |
| "grad_norm": 0.746896505355835, |
| "learning_rate": 5.3385672227674185e-05, |
| "loss": 0.5533967614173889, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.736328125, |
| "grad_norm": 0.403277724981308, |
| "learning_rate": 5.3189401373895974e-05, |
| "loss": 0.7483388185501099, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.7373046875, |
| "grad_norm": 0.6016709208488464, |
| "learning_rate": 5.2993130520117764e-05, |
| "loss": 0.539909839630127, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.73828125, |
| "grad_norm": 0.39885231852531433, |
| "learning_rate": 5.2796859666339546e-05, |
| "loss": 0.7900533676147461, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.7392578125, |
| "grad_norm": 0.3245362639427185, |
| "learning_rate": 5.2600588812561335e-05, |
| "loss": 0.42862433195114136, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.740234375, |
| "grad_norm": 0.47334104776382446, |
| "learning_rate": 5.2404317958783124e-05, |
| "loss": 0.3249909281730652, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.7412109375, |
| "grad_norm": 0.3029737174510956, |
| "learning_rate": 5.220804710500491e-05, |
| "loss": 0.4264957308769226, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.7421875, |
| "grad_norm": 0.33878564834594727, |
| "learning_rate": 5.2011776251226696e-05, |
| "loss": 0.4446904957294464, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.7431640625, |
| "grad_norm": 0.3307798206806183, |
| "learning_rate": 5.1815505397448485e-05, |
| "loss": 0.461605966091156, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.744140625, |
| "grad_norm": 0.4146850109100342, |
| "learning_rate": 5.161923454367027e-05, |
| "loss": 0.758568525314331, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.7451171875, |
| "grad_norm": 0.3531327545642853, |
| "learning_rate": 5.1422963689892056e-05, |
| "loss": 0.4580535292625427, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.74609375, |
| "grad_norm": 0.3952695429325104, |
| "learning_rate": 5.1226692836113846e-05, |
| "loss": 0.333244651556015, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.7470703125, |
| "grad_norm": 0.5774162411689758, |
| "learning_rate": 5.103042198233563e-05, |
| "loss": 0.6433362364768982, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.748046875, |
| "grad_norm": 0.49668964743614197, |
| "learning_rate": 5.083415112855742e-05, |
| "loss": 0.8478100895881653, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.7490234375, |
| "grad_norm": 0.3303810954093933, |
| "learning_rate": 5.063788027477919e-05, |
| "loss": 0.7296837568283081, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.27652832865715027, |
| "learning_rate": 5.044160942100098e-05, |
| "loss": 0.6442312598228455, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.7509765625, |
| "grad_norm": 1.0828924179077148, |
| "learning_rate": 5.0245338567222764e-05, |
| "loss": 0.9848635196685791, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.751953125, |
| "grad_norm": 0.38959333300590515, |
| "learning_rate": 5.0049067713444553e-05, |
| "loss": 0.722776472568512, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.7529296875, |
| "grad_norm": 0.3470323383808136, |
| "learning_rate": 4.985279685966634e-05, |
| "loss": 0.6584157943725586, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.75390625, |
| "grad_norm": 0.4060254693031311, |
| "learning_rate": 4.9656526005888125e-05, |
| "loss": 0.6276923418045044, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.7548828125, |
| "grad_norm": 0.34566962718963623, |
| "learning_rate": 4.9460255152109914e-05, |
| "loss": 0.972516655921936, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.755859375, |
| "grad_norm": 0.41829708218574524, |
| "learning_rate": 4.92639842983317e-05, |
| "loss": 0.6937177181243896, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.7568359375, |
| "grad_norm": 0.7653974294662476, |
| "learning_rate": 4.9067713444553486e-05, |
| "loss": 0.6027823090553284, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.7578125, |
| "grad_norm": 1.0477155447006226, |
| "learning_rate": 4.8871442590775275e-05, |
| "loss": 0.925806999206543, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.7587890625, |
| "grad_norm": 0.43484824895858765, |
| "learning_rate": 4.8675171736997064e-05, |
| "loss": 0.7783142328262329, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.759765625, |
| "grad_norm": 0.33719849586486816, |
| "learning_rate": 4.847890088321884e-05, |
| "loss": 0.6108527779579163, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.7607421875, |
| "grad_norm": 0.3983028531074524, |
| "learning_rate": 4.828263002944063e-05, |
| "loss": 0.9976012706756592, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.76171875, |
| "grad_norm": 0.3278787136077881, |
| "learning_rate": 4.808635917566242e-05, |
| "loss": 0.5754845142364502, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.7626953125, |
| "grad_norm": 0.42433467507362366, |
| "learning_rate": 4.78900883218842e-05, |
| "loss": 0.8455826640129089, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.763671875, |
| "grad_norm": 0.33245334029197693, |
| "learning_rate": 4.769381746810599e-05, |
| "loss": 0.5207083225250244, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.7646484375, |
| "grad_norm": 0.4390372931957245, |
| "learning_rate": 4.749754661432778e-05, |
| "loss": 0.7208432555198669, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.765625, |
| "grad_norm": 0.325720876455307, |
| "learning_rate": 4.730127576054956e-05, |
| "loss": 0.3017955422401428, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.7666015625, |
| "grad_norm": 0.3036203980445862, |
| "learning_rate": 4.710500490677135e-05, |
| "loss": 0.47869423031806946, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.767578125, |
| "grad_norm": 0.4316065013408661, |
| "learning_rate": 4.690873405299313e-05, |
| "loss": 0.7984920740127563, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.7685546875, |
| "grad_norm": 0.46907728910446167, |
| "learning_rate": 4.6712463199214915e-05, |
| "loss": 0.7288491725921631, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.76953125, |
| "grad_norm": 0.38269418478012085, |
| "learning_rate": 4.6516192345436704e-05, |
| "loss": 0.46745771169662476, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.7705078125, |
| "grad_norm": 0.6045718193054199, |
| "learning_rate": 4.631992149165849e-05, |
| "loss": 0.5405256152153015, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.771484375, |
| "grad_norm": 0.3303053677082062, |
| "learning_rate": 4.6123650637880275e-05, |
| "loss": 0.6721948981285095, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.7724609375, |
| "grad_norm": 0.42014074325561523, |
| "learning_rate": 4.5927379784102065e-05, |
| "loss": 0.9322581887245178, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.7734375, |
| "grad_norm": 0.3720149099826813, |
| "learning_rate": 4.5731108930323854e-05, |
| "loss": 0.7807843685150146, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.7744140625, |
| "grad_norm": 0.31559938192367554, |
| "learning_rate": 4.5534838076545636e-05, |
| "loss": 0.8503724336624146, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.775390625, |
| "grad_norm": 0.4096013903617859, |
| "learning_rate": 4.533856722276742e-05, |
| "loss": 0.6950633525848389, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.7763671875, |
| "grad_norm": 0.3791837990283966, |
| "learning_rate": 4.514229636898921e-05, |
| "loss": 0.7583197951316833, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.77734375, |
| "grad_norm": 0.5274584889411926, |
| "learning_rate": 4.494602551521099e-05, |
| "loss": 0.4712093770503998, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.7783203125, |
| "grad_norm": 0.29654791951179504, |
| "learning_rate": 4.474975466143278e-05, |
| "loss": 0.552979588508606, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.779296875, |
| "grad_norm": 0.25629475712776184, |
| "learning_rate": 4.455348380765457e-05, |
| "loss": 0.5225521922111511, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.7802734375, |
| "grad_norm": 0.2676495611667633, |
| "learning_rate": 4.435721295387635e-05, |
| "loss": 0.4382556080818176, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.78125, |
| "grad_norm": 0.4117366075515747, |
| "learning_rate": 4.416094210009813e-05, |
| "loss": 0.5639417767524719, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7822265625, |
| "grad_norm": 0.26305386424064636, |
| "learning_rate": 4.396467124631992e-05, |
| "loss": 0.28840768337249756, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.783203125, |
| "grad_norm": 0.7253789305686951, |
| "learning_rate": 4.376840039254171e-05, |
| "loss": 0.4104336202144623, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.7841796875, |
| "grad_norm": 0.371288001537323, |
| "learning_rate": 4.3572129538763494e-05, |
| "loss": 0.609147310256958, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.78515625, |
| "grad_norm": 0.634273111820221, |
| "learning_rate": 4.337585868498528e-05, |
| "loss": 0.5141665935516357, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.7861328125, |
| "grad_norm": 0.4442044496536255, |
| "learning_rate": 4.317958783120707e-05, |
| "loss": 0.4882044494152069, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.787109375, |
| "grad_norm": 0.3099007308483124, |
| "learning_rate": 4.2983316977428854e-05, |
| "loss": 0.3148588538169861, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.7880859375, |
| "grad_norm": 0.41893890500068665, |
| "learning_rate": 4.2787046123650643e-05, |
| "loss": 0.6678078174591064, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.7890625, |
| "grad_norm": 0.47682809829711914, |
| "learning_rate": 4.2590775269872426e-05, |
| "loss": 0.46614763140678406, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.7900390625, |
| "grad_norm": 0.25193366408348083, |
| "learning_rate": 4.239450441609421e-05, |
| "loss": 0.3707652986049652, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.791015625, |
| "grad_norm": 0.3425232768058777, |
| "learning_rate": 4.2198233562316e-05, |
| "loss": 0.604179859161377, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.7919921875, |
| "grad_norm": 0.31459808349609375, |
| "learning_rate": 4.2001962708537786e-05, |
| "loss": 0.748989999294281, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.79296875, |
| "grad_norm": 0.3478514850139618, |
| "learning_rate": 4.180569185475957e-05, |
| "loss": 0.6651142835617065, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.7939453125, |
| "grad_norm": 0.3951675295829773, |
| "learning_rate": 4.160942100098136e-05, |
| "loss": 0.7293418049812317, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.794921875, |
| "grad_norm": 0.26888158917427063, |
| "learning_rate": 4.141315014720315e-05, |
| "loss": 0.2181730419397354, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.7958984375, |
| "grad_norm": 0.17496585845947266, |
| "learning_rate": 4.121687929342492e-05, |
| "loss": 0.18257993459701538, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.796875, |
| "grad_norm": 0.3386918306350708, |
| "learning_rate": 4.102060843964671e-05, |
| "loss": 0.43010956048965454, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.7978515625, |
| "grad_norm": 0.5185137987136841, |
| "learning_rate": 4.08243375858685e-05, |
| "loss": 0.9117882251739502, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.798828125, |
| "grad_norm": 0.499529093503952, |
| "learning_rate": 4.0628066732090283e-05, |
| "loss": 0.8601939678192139, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.7998046875, |
| "grad_norm": 0.44401317834854126, |
| "learning_rate": 4.043179587831207e-05, |
| "loss": 0.8643960356712341, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.80078125, |
| "grad_norm": 0.30553653836250305, |
| "learning_rate": 4.023552502453386e-05, |
| "loss": 0.7741817235946655, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.8017578125, |
| "grad_norm": 0.443541944026947, |
| "learning_rate": 4.0039254170755644e-05, |
| "loss": 0.9571224451065063, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.802734375, |
| "grad_norm": 0.2611587643623352, |
| "learning_rate": 3.9842983316977426e-05, |
| "loss": 0.4755222201347351, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.8037109375, |
| "grad_norm": 0.38695722818374634, |
| "learning_rate": 3.9646712463199216e-05, |
| "loss": 0.9597996473312378, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.8046875, |
| "grad_norm": 0.505346953868866, |
| "learning_rate": 3.9450441609421005e-05, |
| "loss": 0.328266441822052, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.8056640625, |
| "grad_norm": 0.38910478353500366, |
| "learning_rate": 3.925417075564279e-05, |
| "loss": 0.4758382737636566, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.806640625, |
| "grad_norm": 0.4268342852592468, |
| "learning_rate": 3.9057899901864576e-05, |
| "loss": 0.6131553649902344, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.8076171875, |
| "grad_norm": 0.32205328345298767, |
| "learning_rate": 3.8861629048086365e-05, |
| "loss": 0.6047544479370117, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.80859375, |
| "grad_norm": 0.6975948214530945, |
| "learning_rate": 3.866535819430815e-05, |
| "loss": 0.7599061727523804, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.8095703125, |
| "grad_norm": 0.20186780393123627, |
| "learning_rate": 3.846908734052994e-05, |
| "loss": 0.3639545738697052, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.810546875, |
| "grad_norm": 0.443435937166214, |
| "learning_rate": 3.827281648675172e-05, |
| "loss": 0.6933274269104004, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8115234375, |
| "grad_norm": 0.44157811999320984, |
| "learning_rate": 3.80765456329735e-05, |
| "loss": 0.5135524272918701, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.8125, |
| "grad_norm": 0.3959600031375885, |
| "learning_rate": 3.788027477919529e-05, |
| "loss": 0.6713152527809143, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.8134765625, |
| "grad_norm": 0.5439519882202148, |
| "learning_rate": 3.768400392541708e-05, |
| "loss": 0.3603706359863281, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.814453125, |
| "grad_norm": 0.36693719029426575, |
| "learning_rate": 3.748773307163886e-05, |
| "loss": 0.8574247360229492, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.8154296875, |
| "grad_norm": 0.3476804792881012, |
| "learning_rate": 3.729146221786065e-05, |
| "loss": 0.6845530867576599, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.81640625, |
| "grad_norm": 0.48850229382514954, |
| "learning_rate": 3.709519136408244e-05, |
| "loss": 0.788569450378418, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.8173828125, |
| "grad_norm": 0.5997111797332764, |
| "learning_rate": 3.6898920510304216e-05, |
| "loss": 0.5885312557220459, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.818359375, |
| "grad_norm": 0.43312472105026245, |
| "learning_rate": 3.6702649656526005e-05, |
| "loss": 0.5300126075744629, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.8193359375, |
| "grad_norm": 0.6505857110023499, |
| "learning_rate": 3.6506378802747795e-05, |
| "loss": 0.7164736986160278, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.8203125, |
| "grad_norm": 0.34061765670776367, |
| "learning_rate": 3.631010794896958e-05, |
| "loss": 0.5405696034431458, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.8212890625, |
| "grad_norm": 0.4188057780265808, |
| "learning_rate": 3.6113837095191366e-05, |
| "loss": 1.0057684183120728, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.822265625, |
| "grad_norm": 0.392007052898407, |
| "learning_rate": 3.5917566241413155e-05, |
| "loss": 0.6687936782836914, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.8232421875, |
| "grad_norm": 0.44254210591316223, |
| "learning_rate": 3.572129538763494e-05, |
| "loss": 0.39150726795196533, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.82421875, |
| "grad_norm": 0.41756534576416016, |
| "learning_rate": 3.552502453385673e-05, |
| "loss": 0.764665961265564, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.8251953125, |
| "grad_norm": 0.9839560985565186, |
| "learning_rate": 3.532875368007851e-05, |
| "loss": 0.45259296894073486, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.826171875, |
| "grad_norm": 0.3465111553668976, |
| "learning_rate": 3.513248282630029e-05, |
| "loss": 0.5895928740501404, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.8271484375, |
| "grad_norm": 0.4883447289466858, |
| "learning_rate": 3.493621197252208e-05, |
| "loss": 0.8401346802711487, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.828125, |
| "grad_norm": 0.3590312898159027, |
| "learning_rate": 3.473994111874387e-05, |
| "loss": 0.6134470105171204, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.8291015625, |
| "grad_norm": 0.48273324966430664, |
| "learning_rate": 3.454367026496565e-05, |
| "loss": 0.6351644992828369, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.830078125, |
| "grad_norm": 0.32156500220298767, |
| "learning_rate": 3.434739941118744e-05, |
| "loss": 0.5098355412483215, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8310546875, |
| "grad_norm": 0.38239747285842896, |
| "learning_rate": 3.415112855740923e-05, |
| "loss": 1.0178660154342651, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.83203125, |
| "grad_norm": 0.6875290274620056, |
| "learning_rate": 3.395485770363101e-05, |
| "loss": 0.4496825337409973, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.8330078125, |
| "grad_norm": 0.27034860849380493, |
| "learning_rate": 3.3758586849852795e-05, |
| "loss": 0.41253381967544556, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.833984375, |
| "grad_norm": 0.5166223049163818, |
| "learning_rate": 3.3562315996074584e-05, |
| "loss": 0.7344639897346497, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.8349609375, |
| "grad_norm": 0.39597758650779724, |
| "learning_rate": 3.3366045142296373e-05, |
| "loss": 0.6066821217536926, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.8359375, |
| "grad_norm": 0.44033098220825195, |
| "learning_rate": 3.3169774288518156e-05, |
| "loss": 0.7928174734115601, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.8369140625, |
| "grad_norm": 0.3340597450733185, |
| "learning_rate": 3.2973503434739945e-05, |
| "loss": 0.4783233404159546, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.837890625, |
| "grad_norm": 0.5634653568267822, |
| "learning_rate": 3.2777232580961734e-05, |
| "loss": 0.785845935344696, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.8388671875, |
| "grad_norm": 0.24581296741962433, |
| "learning_rate": 3.258096172718351e-05, |
| "loss": 0.36480462551116943, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.83984375, |
| "grad_norm": 0.316773384809494, |
| "learning_rate": 3.23846908734053e-05, |
| "loss": 0.886894941329956, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.8408203125, |
| "grad_norm": 0.4605409502983093, |
| "learning_rate": 3.218842001962709e-05, |
| "loss": 0.7125131487846375, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.841796875, |
| "grad_norm": 0.5473557114601135, |
| "learning_rate": 3.199214916584887e-05, |
| "loss": 0.45582157373428345, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.8427734375, |
| "grad_norm": 0.4604926109313965, |
| "learning_rate": 3.179587831207066e-05, |
| "loss": 0.5392733812332153, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.84375, |
| "grad_norm": 0.3192322552204132, |
| "learning_rate": 3.159960745829245e-05, |
| "loss": 0.3216538727283478, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.8447265625, |
| "grad_norm": 0.4225713610649109, |
| "learning_rate": 3.140333660451423e-05, |
| "loss": 0.36403900384902954, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.845703125, |
| "grad_norm": 0.7738484740257263, |
| "learning_rate": 3.120706575073602e-05, |
| "loss": 0.5428112149238586, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.8466796875, |
| "grad_norm": 0.7795976400375366, |
| "learning_rate": 3.10107948969578e-05, |
| "loss": 0.838668704032898, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.84765625, |
| "grad_norm": 0.4240044355392456, |
| "learning_rate": 3.0814524043179585e-05, |
| "loss": 0.5039677023887634, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.8486328125, |
| "grad_norm": 0.7870606780052185, |
| "learning_rate": 3.0618253189401374e-05, |
| "loss": 0.2639703154563904, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.849609375, |
| "grad_norm": 4.898192405700684, |
| "learning_rate": 3.042198233562316e-05, |
| "loss": 0.9641809463500977, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.8505859375, |
| "grad_norm": 0.4090663194656372, |
| "learning_rate": 3.022571148184495e-05, |
| "loss": 0.5249053835868835, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.8515625, |
| "grad_norm": 0.5761129856109619, |
| "learning_rate": 3.0029440628066735e-05, |
| "loss": 0.8987921476364136, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.8525390625, |
| "grad_norm": 0.2440023124217987, |
| "learning_rate": 2.983316977428852e-05, |
| "loss": 0.3279159367084503, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.853515625, |
| "grad_norm": 0.438519150018692, |
| "learning_rate": 2.9636898920510303e-05, |
| "loss": 0.8272308111190796, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.8544921875, |
| "grad_norm": 0.4011988639831543, |
| "learning_rate": 2.944062806673209e-05, |
| "loss": 0.3140803873538971, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.85546875, |
| "grad_norm": 0.5748201012611389, |
| "learning_rate": 2.9244357212953878e-05, |
| "loss": 0.6699116230010986, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.8564453125, |
| "grad_norm": 0.3001462519168854, |
| "learning_rate": 2.9048086359175664e-05, |
| "loss": 0.19382989406585693, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.857421875, |
| "grad_norm": 0.40844887495040894, |
| "learning_rate": 2.885181550539745e-05, |
| "loss": 0.6494845747947693, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.8583984375, |
| "grad_norm": 0.3480914235115051, |
| "learning_rate": 2.865554465161924e-05, |
| "loss": 0.5555131435394287, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.859375, |
| "grad_norm": 0.3903101682662964, |
| "learning_rate": 2.8459273797841024e-05, |
| "loss": 0.6830955147743225, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.8603515625, |
| "grad_norm": 0.3058629333972931, |
| "learning_rate": 2.826300294406281e-05, |
| "loss": 0.3747236728668213, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.861328125, |
| "grad_norm": 0.49275287985801697, |
| "learning_rate": 2.8066732090284592e-05, |
| "loss": 1.0192487239837646, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.8623046875, |
| "grad_norm": 0.4016769826412201, |
| "learning_rate": 2.7870461236506378e-05, |
| "loss": 0.4012300372123718, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.86328125, |
| "grad_norm": 0.4790811240673065, |
| "learning_rate": 2.7674190382728167e-05, |
| "loss": 0.6936056613922119, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.8642578125, |
| "grad_norm": 0.39931413531303406, |
| "learning_rate": 2.7477919528949953e-05, |
| "loss": 0.3612633943557739, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.865234375, |
| "grad_norm": 0.3250795006752014, |
| "learning_rate": 2.728164867517174e-05, |
| "loss": 0.5146504640579224, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.8662109375, |
| "grad_norm": 0.5216737985610962, |
| "learning_rate": 2.7085377821393525e-05, |
| "loss": 0.6185201406478882, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.8671875, |
| "grad_norm": 0.5681923031806946, |
| "learning_rate": 2.6889106967615314e-05, |
| "loss": 0.9492973685264587, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.8681640625, |
| "grad_norm": 0.5284391045570374, |
| "learning_rate": 2.6692836113837093e-05, |
| "loss": 0.7801765203475952, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.869140625, |
| "grad_norm": 0.42510825395584106, |
| "learning_rate": 2.6496565260058882e-05, |
| "loss": 0.4871942102909088, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.8701171875, |
| "grad_norm": 0.39092326164245605, |
| "learning_rate": 2.6300294406280668e-05, |
| "loss": 0.5123960375785828, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.87109375, |
| "grad_norm": 0.37694281339645386, |
| "learning_rate": 2.6104023552502453e-05, |
| "loss": 0.3543451428413391, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.8720703125, |
| "grad_norm": 0.26519376039505005, |
| "learning_rate": 2.5907752698724242e-05, |
| "loss": 0.2388455718755722, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.873046875, |
| "grad_norm": 0.6303861141204834, |
| "learning_rate": 2.5711481844946028e-05, |
| "loss": 0.7195224761962891, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.8740234375, |
| "grad_norm": 0.4436159133911133, |
| "learning_rate": 2.5515210991167814e-05, |
| "loss": 0.8888048529624939, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 0.6473313570022583, |
| "learning_rate": 2.5318940137389596e-05, |
| "loss": 0.8557075262069702, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.8759765625, |
| "grad_norm": 0.6625436544418335, |
| "learning_rate": 2.5122669283611382e-05, |
| "loss": 0.7132158279418945, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.876953125, |
| "grad_norm": 0.7241202592849731, |
| "learning_rate": 2.492639842983317e-05, |
| "loss": 0.9367854595184326, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.8779296875, |
| "grad_norm": 0.5321157574653625, |
| "learning_rate": 2.4730127576054957e-05, |
| "loss": 1.0013937950134277, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.87890625, |
| "grad_norm": 0.3287423253059387, |
| "learning_rate": 2.4533856722276743e-05, |
| "loss": 0.4560258984565735, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8798828125, |
| "grad_norm": 0.5040727257728577, |
| "learning_rate": 2.4337585868498532e-05, |
| "loss": 0.5655212998390198, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.880859375, |
| "grad_norm": 0.4150228202342987, |
| "learning_rate": 2.4141315014720314e-05, |
| "loss": 0.43106216192245483, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.8818359375, |
| "grad_norm": 0.4006192684173584, |
| "learning_rate": 2.39450441609421e-05, |
| "loss": 0.4401901364326477, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.8828125, |
| "grad_norm": 0.5145865678787231, |
| "learning_rate": 2.374877330716389e-05, |
| "loss": 0.9345691800117493, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.8837890625, |
| "grad_norm": 0.7273013591766357, |
| "learning_rate": 2.3552502453385675e-05, |
| "loss": 0.27768659591674805, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.884765625, |
| "grad_norm": 0.3039482831954956, |
| "learning_rate": 2.3356231599607457e-05, |
| "loss": 0.6196010112762451, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.8857421875, |
| "grad_norm": 0.35697150230407715, |
| "learning_rate": 2.3159960745829247e-05, |
| "loss": 0.34777021408081055, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.88671875, |
| "grad_norm": 0.356717050075531, |
| "learning_rate": 2.2963689892051032e-05, |
| "loss": 0.4651508331298828, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.8876953125, |
| "grad_norm": 0.485963374376297, |
| "learning_rate": 2.2767419038272818e-05, |
| "loss": 0.3906201720237732, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.888671875, |
| "grad_norm": 0.38827836513519287, |
| "learning_rate": 2.2571148184494604e-05, |
| "loss": 0.48782849311828613, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.8896484375, |
| "grad_norm": 0.39589494466781616, |
| "learning_rate": 2.237487733071639e-05, |
| "loss": 0.5089969635009766, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.890625, |
| "grad_norm": 0.6619493365287781, |
| "learning_rate": 2.2178606476938175e-05, |
| "loss": 0.9266189932823181, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.8916015625, |
| "grad_norm": 0.407817542552948, |
| "learning_rate": 2.198233562315996e-05, |
| "loss": 0.3518386483192444, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.892578125, |
| "grad_norm": 0.4645719826221466, |
| "learning_rate": 2.1786064769381747e-05, |
| "loss": 0.9297075271606445, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.8935546875, |
| "grad_norm": 0.434517502784729, |
| "learning_rate": 2.1589793915603536e-05, |
| "loss": 0.7716128826141357, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.89453125, |
| "grad_norm": 0.49387747049331665, |
| "learning_rate": 2.1393523061825322e-05, |
| "loss": 0.5475488901138306, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.8955078125, |
| "grad_norm": 0.5593905448913574, |
| "learning_rate": 2.1197252208047104e-05, |
| "loss": 0.7304456233978271, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.896484375, |
| "grad_norm": 0.3386078178882599, |
| "learning_rate": 2.1000981354268893e-05, |
| "loss": 0.7872465252876282, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.8974609375, |
| "grad_norm": 0.2872868478298187, |
| "learning_rate": 2.080471050049068e-05, |
| "loss": 0.3295198976993561, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.8984375, |
| "grad_norm": 0.4897945523262024, |
| "learning_rate": 2.060843964671246e-05, |
| "loss": 0.3939395546913147, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.8994140625, |
| "grad_norm": 0.5068129897117615, |
| "learning_rate": 2.041216879293425e-05, |
| "loss": 0.4646037817001343, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.900390625, |
| "grad_norm": 0.3769625425338745, |
| "learning_rate": 2.0215897939156036e-05, |
| "loss": 0.811498761177063, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.9013671875, |
| "grad_norm": 0.380655974149704, |
| "learning_rate": 2.0019627085377822e-05, |
| "loss": 0.6260181665420532, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.90234375, |
| "grad_norm": 0.5810602903366089, |
| "learning_rate": 1.9823356231599608e-05, |
| "loss": 0.7125158309936523, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.9033203125, |
| "grad_norm": 0.4367387592792511, |
| "learning_rate": 1.9627085377821394e-05, |
| "loss": 0.7728107571601868, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.904296875, |
| "grad_norm": 0.604702353477478, |
| "learning_rate": 1.9430814524043183e-05, |
| "loss": 0.5136534571647644, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.9052734375, |
| "grad_norm": 0.40865615010261536, |
| "learning_rate": 1.923454367026497e-05, |
| "loss": 0.5040115714073181, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.90625, |
| "grad_norm": 0.3602078855037689, |
| "learning_rate": 1.903827281648675e-05, |
| "loss": 0.4498569965362549, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.9072265625, |
| "grad_norm": 0.46351152658462524, |
| "learning_rate": 1.884200196270854e-05, |
| "loss": 0.8635745644569397, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.908203125, |
| "grad_norm": 0.5490495562553406, |
| "learning_rate": 1.8645731108930326e-05, |
| "loss": 0.9265761375427246, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.9091796875, |
| "grad_norm": 0.4198157489299774, |
| "learning_rate": 1.8449460255152108e-05, |
| "loss": 0.8148217797279358, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.91015625, |
| "grad_norm": 0.5183578729629517, |
| "learning_rate": 1.8253189401373897e-05, |
| "loss": 0.7837534546852112, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.9111328125, |
| "grad_norm": 0.41839340329170227, |
| "learning_rate": 1.8056918547595683e-05, |
| "loss": 0.7239848971366882, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.912109375, |
| "grad_norm": 0.49158063530921936, |
| "learning_rate": 1.786064769381747e-05, |
| "loss": 0.7751527428627014, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.9130859375, |
| "grad_norm": 0.20171599090099335, |
| "learning_rate": 1.7664376840039255e-05, |
| "loss": 0.181843563914299, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.9140625, |
| "grad_norm": 0.36237961053848267, |
| "learning_rate": 1.746810598626104e-05, |
| "loss": 0.5150234699249268, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.9150390625, |
| "grad_norm": 0.4587535858154297, |
| "learning_rate": 1.7271835132482826e-05, |
| "loss": 0.6178685426712036, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.916015625, |
| "grad_norm": 0.392635703086853, |
| "learning_rate": 1.7075564278704615e-05, |
| "loss": 0.7002321481704712, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.9169921875, |
| "grad_norm": 0.28255772590637207, |
| "learning_rate": 1.6879293424926398e-05, |
| "loss": 0.6161627769470215, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.91796875, |
| "grad_norm": 0.31382182240486145, |
| "learning_rate": 1.6683022571148187e-05, |
| "loss": 0.6143029928207397, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.9189453125, |
| "grad_norm": 0.5099475383758545, |
| "learning_rate": 1.6486751717369972e-05, |
| "loss": 0.9116108417510986, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.919921875, |
| "grad_norm": 0.4015892446041107, |
| "learning_rate": 1.6290480863591755e-05, |
| "loss": 0.7331390380859375, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.9208984375, |
| "grad_norm": 0.4519053101539612, |
| "learning_rate": 1.6094210009813544e-05, |
| "loss": 0.6662384867668152, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.921875, |
| "grad_norm": 0.5565328598022461, |
| "learning_rate": 1.589793915603533e-05, |
| "loss": 0.37386590242385864, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.9228515625, |
| "grad_norm": 0.398419588804245, |
| "learning_rate": 1.5701668302257116e-05, |
| "loss": 0.9127399325370789, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.923828125, |
| "grad_norm": 0.37491804361343384, |
| "learning_rate": 1.55053974484789e-05, |
| "loss": 0.47025924921035767, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.9248046875, |
| "grad_norm": 0.49557894468307495, |
| "learning_rate": 1.5309126594700687e-05, |
| "loss": 0.6349594593048096, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.92578125, |
| "grad_norm": 0.2361314743757248, |
| "learning_rate": 1.5112855740922475e-05, |
| "loss": 0.3594982922077179, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.9267578125, |
| "grad_norm": 0.40022003650665283, |
| "learning_rate": 1.491658488714426e-05, |
| "loss": 0.41701436042785645, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.927734375, |
| "grad_norm": 0.349528431892395, |
| "learning_rate": 1.4720314033366044e-05, |
| "loss": 0.2943156063556671, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.9287109375, |
| "grad_norm": 0.4660559892654419, |
| "learning_rate": 1.4524043179587832e-05, |
| "loss": 0.3633948564529419, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.9296875, |
| "grad_norm": 0.28590673208236694, |
| "learning_rate": 1.432777232580962e-05, |
| "loss": 0.4886907935142517, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.9306640625, |
| "grad_norm": 0.4388448894023895, |
| "learning_rate": 1.4131501472031405e-05, |
| "loss": 0.6123654246330261, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.931640625, |
| "grad_norm": 0.4807531237602234, |
| "learning_rate": 1.3935230618253189e-05, |
| "loss": 0.32400381565093994, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.9326171875, |
| "grad_norm": 0.3903636932373047, |
| "learning_rate": 1.3738959764474977e-05, |
| "loss": 0.6839208006858826, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.93359375, |
| "grad_norm": 0.2925507426261902, |
| "learning_rate": 1.3542688910696762e-05, |
| "loss": 0.5898708701133728, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.9345703125, |
| "grad_norm": 0.39300912618637085, |
| "learning_rate": 1.3346418056918546e-05, |
| "loss": 0.3898833692073822, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.935546875, |
| "grad_norm": 0.4321513772010803, |
| "learning_rate": 1.3150147203140334e-05, |
| "loss": 0.5717346668243408, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.9365234375, |
| "grad_norm": 0.47681212425231934, |
| "learning_rate": 1.2953876349362121e-05, |
| "loss": 0.9711145162582397, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.9375, |
| "grad_norm": 0.524958610534668, |
| "learning_rate": 1.2757605495583907e-05, |
| "loss": 0.6577808260917664, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.9384765625, |
| "grad_norm": 0.40814298391342163, |
| "learning_rate": 1.2561334641805691e-05, |
| "loss": 0.5148733258247375, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.939453125, |
| "grad_norm": 0.3122687041759491, |
| "learning_rate": 1.2365063788027479e-05, |
| "loss": 0.884072482585907, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.9404296875, |
| "grad_norm": 0.4473840594291687, |
| "learning_rate": 1.2168792934249266e-05, |
| "loss": 0.660685658454895, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.94140625, |
| "grad_norm": 0.3491450548171997, |
| "learning_rate": 1.197252208047105e-05, |
| "loss": 0.8680378794670105, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.9423828125, |
| "grad_norm": 0.6323879957199097, |
| "learning_rate": 1.1776251226692837e-05, |
| "loss": 0.8196921348571777, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.943359375, |
| "grad_norm": 0.354900062084198, |
| "learning_rate": 1.1579980372914623e-05, |
| "loss": 0.5380838513374329, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.9443359375, |
| "grad_norm": 0.3235265612602234, |
| "learning_rate": 1.1383709519136409e-05, |
| "loss": 0.39993464946746826, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.9453125, |
| "grad_norm": 0.3700491786003113, |
| "learning_rate": 1.1187438665358195e-05, |
| "loss": 0.6613435745239258, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.9462890625, |
| "grad_norm": 0.29880228638648987, |
| "learning_rate": 1.099116781157998e-05, |
| "loss": 0.5756196975708008, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.947265625, |
| "grad_norm": 0.4585433304309845, |
| "learning_rate": 1.0794896957801768e-05, |
| "loss": 0.5012968182563782, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.9482421875, |
| "grad_norm": 0.5275799632072449, |
| "learning_rate": 1.0598626104023552e-05, |
| "loss": 0.4986013174057007, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.94921875, |
| "grad_norm": 0.30642619729042053, |
| "learning_rate": 1.040235525024534e-05, |
| "loss": 0.29793277382850647, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.9501953125, |
| "grad_norm": 0.7356166243553162, |
| "learning_rate": 1.0206084396467125e-05, |
| "loss": 0.6518126726150513, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.951171875, |
| "grad_norm": 0.6069150567054749, |
| "learning_rate": 1.0009813542688911e-05, |
| "loss": 0.7005544900894165, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.9521484375, |
| "grad_norm": 0.500067949295044, |
| "learning_rate": 9.813542688910697e-06, |
| "loss": 0.5567950010299683, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.953125, |
| "grad_norm": 0.5926097631454468, |
| "learning_rate": 9.617271835132484e-06, |
| "loss": 0.6974345445632935, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.9541015625, |
| "grad_norm": 0.28873002529144287, |
| "learning_rate": 9.42100098135427e-06, |
| "loss": 0.28231939673423767, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.955078125, |
| "grad_norm": 0.6644822359085083, |
| "learning_rate": 9.224730127576054e-06, |
| "loss": 0.46575701236724854, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.9560546875, |
| "grad_norm": 0.34748774766921997, |
| "learning_rate": 9.028459273797842e-06, |
| "loss": 0.7192713022232056, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.95703125, |
| "grad_norm": 0.4444558024406433, |
| "learning_rate": 8.832188420019627e-06, |
| "loss": 0.34014150500297546, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.9580078125, |
| "grad_norm": 0.4814091920852661, |
| "learning_rate": 8.635917566241413e-06, |
| "loss": 0.8042552471160889, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.958984375, |
| "grad_norm": 0.5443412661552429, |
| "learning_rate": 8.439646712463199e-06, |
| "loss": 0.6534023880958557, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.9599609375, |
| "grad_norm": 0.40025195479393005, |
| "learning_rate": 8.243375858684986e-06, |
| "loss": 0.9056930541992188, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.9609375, |
| "grad_norm": 0.41958069801330566, |
| "learning_rate": 8.047105004906772e-06, |
| "loss": 0.5610394477844238, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.9619140625, |
| "grad_norm": 0.33056482672691345, |
| "learning_rate": 7.850834151128558e-06, |
| "loss": 0.5796000361442566, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.962890625, |
| "grad_norm": 0.5056169629096985, |
| "learning_rate": 7.654563297350344e-06, |
| "loss": 0.7795373201370239, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.9638671875, |
| "grad_norm": 0.4030667543411255, |
| "learning_rate": 7.45829244357213e-06, |
| "loss": 0.761528491973877, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.96484375, |
| "grad_norm": 0.22716952860355377, |
| "learning_rate": 7.262021589793916e-06, |
| "loss": 0.21712671220302582, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.9658203125, |
| "grad_norm": 0.4826786518096924, |
| "learning_rate": 7.0657507360157025e-06, |
| "loss": 0.6192560791969299, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.966796875, |
| "grad_norm": 0.3611379861831665, |
| "learning_rate": 6.869479882237488e-06, |
| "loss": 0.5660407543182373, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.9677734375, |
| "grad_norm": 0.44197750091552734, |
| "learning_rate": 6.673209028459273e-06, |
| "loss": 0.8223164081573486, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.96875, |
| "grad_norm": 0.45650866627693176, |
| "learning_rate": 6.476938174681061e-06, |
| "loss": 0.5810177326202393, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.9697265625, |
| "grad_norm": 0.6275922060012817, |
| "learning_rate": 6.2806673209028455e-06, |
| "loss": 0.46302127838134766, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.970703125, |
| "grad_norm": 0.29163289070129395, |
| "learning_rate": 6.084396467124633e-06, |
| "loss": 0.49744415283203125, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.9716796875, |
| "grad_norm": 0.4289768934249878, |
| "learning_rate": 5.888125613346419e-06, |
| "loss": 0.39710360765457153, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.97265625, |
| "grad_norm": 0.43311089277267456, |
| "learning_rate": 5.6918547595682045e-06, |
| "loss": 0.4934995174407959, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.9736328125, |
| "grad_norm": 0.4249640703201294, |
| "learning_rate": 5.49558390578999e-06, |
| "loss": 0.6822129487991333, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.974609375, |
| "grad_norm": 0.4080635607242584, |
| "learning_rate": 5.299313052011776e-06, |
| "loss": 0.2851019501686096, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.9755859375, |
| "grad_norm": 0.3082174062728882, |
| "learning_rate": 5.103042198233563e-06, |
| "loss": 0.8851650357246399, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.9765625, |
| "grad_norm": 0.5285578370094299, |
| "learning_rate": 4.906771344455348e-06, |
| "loss": 0.5684286952018738, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9775390625, |
| "grad_norm": 0.37052616477012634, |
| "learning_rate": 4.710500490677135e-06, |
| "loss": 0.8170924782752991, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.978515625, |
| "grad_norm": 0.46926191449165344, |
| "learning_rate": 4.514229636898921e-06, |
| "loss": 0.665911853313446, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.9794921875, |
| "grad_norm": 0.38110095262527466, |
| "learning_rate": 4.3179587831207065e-06, |
| "loss": 0.9365942478179932, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.98046875, |
| "grad_norm": 0.3803754150867462, |
| "learning_rate": 4.121687929342493e-06, |
| "loss": 0.756361722946167, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.9814453125, |
| "grad_norm": 0.6576887965202332, |
| "learning_rate": 3.925417075564279e-06, |
| "loss": 0.6846331357955933, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.982421875, |
| "grad_norm": 0.6425113081932068, |
| "learning_rate": 3.729146221786065e-06, |
| "loss": 0.7665562629699707, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.9833984375, |
| "grad_norm": 0.28858375549316406, |
| "learning_rate": 3.5328753680078512e-06, |
| "loss": 0.2748746871948242, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.984375, |
| "grad_norm": 0.38693365454673767, |
| "learning_rate": 3.3366045142296366e-06, |
| "loss": 0.6602081060409546, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.9853515625, |
| "grad_norm": 0.39297735691070557, |
| "learning_rate": 3.1403336604514228e-06, |
| "loss": 0.43784576654434204, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.986328125, |
| "grad_norm": 0.4182215929031372, |
| "learning_rate": 2.9440628066732094e-06, |
| "loss": 0.7852948307991028, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.9873046875, |
| "grad_norm": 0.4079328775405884, |
| "learning_rate": 2.747791952894995e-06, |
| "loss": 0.5413305759429932, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.98828125, |
| "grad_norm": 0.41826963424682617, |
| "learning_rate": 2.5515210991167813e-06, |
| "loss": 0.449452668428421, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.9892578125, |
| "grad_norm": 0.31969836354255676, |
| "learning_rate": 2.3552502453385675e-06, |
| "loss": 0.26595592498779297, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.990234375, |
| "grad_norm": 0.466192364692688, |
| "learning_rate": 2.1589793915603533e-06, |
| "loss": 0.6175995469093323, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.9912109375, |
| "grad_norm": 0.4734349846839905, |
| "learning_rate": 1.9627085377821394e-06, |
| "loss": 0.6440984010696411, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.9921875, |
| "grad_norm": 0.4446095824241638, |
| "learning_rate": 1.7664376840039256e-06, |
| "loss": 0.5738557577133179, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.9931640625, |
| "grad_norm": 0.24098840355873108, |
| "learning_rate": 1.5701668302257114e-06, |
| "loss": 0.6320365071296692, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.994140625, |
| "grad_norm": 0.5342791676521301, |
| "learning_rate": 1.3738959764474976e-06, |
| "loss": 0.9431695938110352, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.9951171875, |
| "grad_norm": 0.31406712532043457, |
| "learning_rate": 1.1776251226692837e-06, |
| "loss": 0.6406105160713196, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.99609375, |
| "grad_norm": 0.5162865519523621, |
| "learning_rate": 9.813542688910697e-07, |
| "loss": 0.7935853004455566, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.9970703125, |
| "grad_norm": 0.4624859690666199, |
| "learning_rate": 7.850834151128557e-07, |
| "loss": 0.9667851328849792, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.998046875, |
| "grad_norm": 0.43549951910972595, |
| "learning_rate": 5.888125613346419e-07, |
| "loss": 0.73248291015625, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.9990234375, |
| "grad_norm": 0.6080308556556702, |
| "learning_rate": 3.9254170755642785e-07, |
| "loss": 0.5045021772384644, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.3927266299724579, |
| "learning_rate": 1.9627085377821392e-07, |
| "loss": 0.37262263894081116, |
| "step": 1024 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1024, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.871410239702333e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|