| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.79247152055473, |
| "eval_steps": 500.0, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00039623576027736503, |
| "grad_norm": 23.5, |
| "learning_rate": 2.6315789473684213e-07, |
| "loss": 1.1837007999420166, |
| "step": 1, |
| "token_acc": 0.8159329621764334 |
| }, |
| { |
| "epoch": 0.00396235760277365, |
| "grad_norm": 21.375, |
| "learning_rate": 2.631578947368421e-06, |
| "loss": 1.123257319132487, |
| "step": 10, |
| "token_acc": 0.8210087927828165 |
| }, |
| { |
| "epoch": 0.0079247152055473, |
| "grad_norm": 7.09375, |
| "learning_rate": 5.263157894736842e-06, |
| "loss": 0.9101140975952149, |
| "step": 20, |
| "token_acc": 0.8285831313786395 |
| }, |
| { |
| "epoch": 0.01188707280832095, |
| "grad_norm": 2.8125, |
| "learning_rate": 7.894736842105265e-06, |
| "loss": 0.5795128822326661, |
| "step": 30, |
| "token_acc": 0.8505745886743207 |
| }, |
| { |
| "epoch": 0.0158494304110946, |
| "grad_norm": 1.7578125, |
| "learning_rate": 1.0526315789473684e-05, |
| "loss": 0.42301692962646487, |
| "step": 40, |
| "token_acc": 0.8767900103104593 |
| }, |
| { |
| "epoch": 0.01981178801386825, |
| "grad_norm": 1.4921875, |
| "learning_rate": 1.3157894736842108e-05, |
| "loss": 0.35302650928497314, |
| "step": 50, |
| "token_acc": 0.8897820845537251 |
| }, |
| { |
| "epoch": 0.0237741456166419, |
| "grad_norm": 1.375, |
| "learning_rate": 1.578947368421053e-05, |
| "loss": 0.33079302310943604, |
| "step": 60, |
| "token_acc": 0.8967102736745091 |
| }, |
| { |
| "epoch": 0.02773650321941555, |
| "grad_norm": 1.25, |
| "learning_rate": 1.8421052631578947e-05, |
| "loss": 0.31292335987091063, |
| "step": 70, |
| "token_acc": 0.9010626512129326 |
| }, |
| { |
| "epoch": 0.0316988608221892, |
| "grad_norm": 1.109375, |
| "learning_rate": 1.999986824534997e-05, |
| "loss": 0.3141467094421387, |
| "step": 80, |
| "token_acc": 0.9005368650633087 |
| }, |
| { |
| "epoch": 0.03566121842496285, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.9998386045408938e-05, |
| "loss": 0.29496400356292723, |
| "step": 90, |
| "token_acc": 0.9055861965123218 |
| }, |
| { |
| "epoch": 0.0396235760277365, |
| "grad_norm": 1.28125, |
| "learning_rate": 1.999525719713366e-05, |
| "loss": 0.2913074970245361, |
| "step": 100, |
| "token_acc": 0.9076149509114921 |
| }, |
| { |
| "epoch": 0.04358593363051015, |
| "grad_norm": 1.6015625, |
| "learning_rate": 1.999048221581858e-05, |
| "loss": 0.2880474805831909, |
| "step": 110, |
| "token_acc": 0.9073922051522615 |
| }, |
| { |
| "epoch": 0.0475482912332838, |
| "grad_norm": 1.390625, |
| "learning_rate": 1.9984061887862118e-05, |
| "loss": 0.27746291160583497, |
| "step": 120, |
| "token_acc": 0.9101783276777932 |
| }, |
| { |
| "epoch": 0.05151064883605745, |
| "grad_norm": 1.171875, |
| "learning_rate": 1.9975997270637172e-05, |
| "loss": 0.273817777633667, |
| "step": 130, |
| "token_acc": 0.909736600422787 |
| }, |
| { |
| "epoch": 0.0554730064388311, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.9966289692316944e-05, |
| "loss": 0.2767889976501465, |
| "step": 140, |
| "token_acc": 0.9082912026144594 |
| }, |
| { |
| "epoch": 0.05943536404160475, |
| "grad_norm": 1.4921875, |
| "learning_rate": 1.9954940751656245e-05, |
| "loss": 0.27089781761169435, |
| "step": 150, |
| "token_acc": 0.9099060425408418 |
| }, |
| { |
| "epoch": 0.0633977216443784, |
| "grad_norm": 1.171875, |
| "learning_rate": 1.994195231772815e-05, |
| "loss": 0.25421991348266604, |
| "step": 160, |
| "token_acc": 0.9162766481231006 |
| }, |
| { |
| "epoch": 0.06736007924715205, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.9927326529616203e-05, |
| "loss": 0.2611961841583252, |
| "step": 170, |
| "token_acc": 0.9147679722152482 |
| }, |
| { |
| "epoch": 0.0713224368499257, |
| "grad_norm": 1.4296875, |
| "learning_rate": 1.9911065796062137e-05, |
| "loss": 0.264358377456665, |
| "step": 180, |
| "token_acc": 0.9137104702605277 |
| }, |
| { |
| "epoch": 0.07528479445269935, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.9893172795069144e-05, |
| "loss": 0.27645695209503174, |
| "step": 190, |
| "token_acc": 0.9085774438661551 |
| }, |
| { |
| "epoch": 0.079247152055473, |
| "grad_norm": 1.3125, |
| "learning_rate": 1.9873650473460862e-05, |
| "loss": 0.2564415693283081, |
| "step": 200, |
| "token_acc": 0.9148068228524455 |
| }, |
| { |
| "epoch": 0.08320950965824665, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.9852502046396035e-05, |
| "loss": 0.2584503650665283, |
| "step": 210, |
| "token_acc": 0.9148747112137906 |
| }, |
| { |
| "epoch": 0.0871718672610203, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.982973099683902e-05, |
| "loss": 0.25623598098754885, |
| "step": 220, |
| "token_acc": 0.916684382955295 |
| }, |
| { |
| "epoch": 0.09113422486379395, |
| "grad_norm": 1.4296875, |
| "learning_rate": 1.980534107498616e-05, |
| "loss": 0.2456662178039551, |
| "step": 230, |
| "token_acc": 0.9188118082346068 |
| }, |
| { |
| "epoch": 0.0950965824665676, |
| "grad_norm": 1.4609375, |
| "learning_rate": 1.977933629764817e-05, |
| "loss": 0.2530802249908447, |
| "step": 240, |
| "token_acc": 0.9152495545682131 |
| }, |
| { |
| "epoch": 0.09905894006934125, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.9751720947588603e-05, |
| "loss": 0.24223690032958983, |
| "step": 250, |
| "token_acc": 0.9186887231706855 |
| }, |
| { |
| "epoch": 0.1030212976721149, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.9722499572818496e-05, |
| "loss": 0.23485193252563477, |
| "step": 260, |
| "token_acc": 0.9216265054055996 |
| }, |
| { |
| "epoch": 0.10698365527488855, |
| "grad_norm": 1.5, |
| "learning_rate": 1.969167698584738e-05, |
| "loss": 0.24744803905487062, |
| "step": 270, |
| "token_acc": 0.9177383756974582 |
| }, |
| { |
| "epoch": 0.1109460128776622, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.9659258262890683e-05, |
| "loss": 0.25014376640319824, |
| "step": 280, |
| "token_acc": 0.9170167948905685 |
| }, |
| { |
| "epoch": 0.11490837048043585, |
| "grad_norm": 1.5078125, |
| "learning_rate": 1.9625248743033725e-05, |
| "loss": 0.23340215682983398, |
| "step": 290, |
| "token_acc": 0.9214856049225197 |
| }, |
| { |
| "epoch": 0.1188707280832095, |
| "grad_norm": 1.171875, |
| "learning_rate": 1.9589654027352412e-05, |
| "loss": 0.24289028644561766, |
| "step": 300, |
| "token_acc": 0.9185406963850078 |
| }, |
| { |
| "epoch": 0.12283308568598315, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.9552479977990802e-05, |
| "loss": 0.24520406723022461, |
| "step": 310, |
| "token_acc": 0.9184403422069023 |
| }, |
| { |
| "epoch": 0.1267954432887568, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.9513732717195638e-05, |
| "loss": 0.2427917242050171, |
| "step": 320, |
| "token_acc": 0.9178514285714285 |
| }, |
| { |
| "epoch": 0.13075780089153047, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.9473418626308086e-05, |
| "loss": 0.21972455978393554, |
| "step": 330, |
| "token_acc": 0.9259012550960103 |
| }, |
| { |
| "epoch": 0.1347201584943041, |
| "grad_norm": 1.3984375, |
| "learning_rate": 1.9431544344712776e-05, |
| "loss": 0.2463603973388672, |
| "step": 340, |
| "token_acc": 0.9171354320865818 |
| }, |
| { |
| "epoch": 0.13868251609707777, |
| "grad_norm": 1.25, |
| "learning_rate": 1.9388116768744344e-05, |
| "loss": 0.23121447563171388, |
| "step": 350, |
| "token_acc": 0.9208610209876757 |
| }, |
| { |
| "epoch": 0.1426448736998514, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.9343143050551684e-05, |
| "loss": 0.2372572898864746, |
| "step": 360, |
| "token_acc": 0.9205740491816241 |
| }, |
| { |
| "epoch": 0.14660723130262507, |
| "grad_norm": 1.6328125, |
| "learning_rate": 1.929663059692002e-05, |
| "loss": 0.23370888233184814, |
| "step": 370, |
| "token_acc": 0.9218769547078884 |
| }, |
| { |
| "epoch": 0.1505695889053987, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.924858706805112e-05, |
| "loss": 0.22563014030456544, |
| "step": 380, |
| "token_acc": 0.9239206109486627 |
| }, |
| { |
| "epoch": 0.15453194650817237, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.9199020376301666e-05, |
| "loss": 0.22754812240600586, |
| "step": 390, |
| "token_acc": 0.923770752222635 |
| }, |
| { |
| "epoch": 0.158494304110946, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.9147938684880213e-05, |
| "loss": 0.233451247215271, |
| "step": 400, |
| "token_acc": 0.9208578517882449 |
| }, |
| { |
| "epoch": 0.16245666171371967, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.9095350406502736e-05, |
| "loss": 0.22117164134979247, |
| "step": 410, |
| "token_acc": 0.9251948698253339 |
| }, |
| { |
| "epoch": 0.1664190193164933, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.9041264202007158e-05, |
| "loss": 0.23051214218139648, |
| "step": 420, |
| "token_acc": 0.9227009356565836 |
| }, |
| { |
| "epoch": 0.17038137691926697, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.8985688978926972e-05, |
| "loss": 0.22384767532348632, |
| "step": 430, |
| "token_acc": 0.9254292644524351 |
| }, |
| { |
| "epoch": 0.1743437345220406, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.892863389002424e-05, |
| "loss": 0.22796776294708251, |
| "step": 440, |
| "token_acc": 0.9236655948553054 |
| }, |
| { |
| "epoch": 0.17830609212481427, |
| "grad_norm": 1.1015625, |
| "learning_rate": 1.887010833178222e-05, |
| "loss": 0.2255650520324707, |
| "step": 450, |
| "token_acc": 0.9233627684120709 |
| }, |
| { |
| "epoch": 0.1822684497275879, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.8810121942857848e-05, |
| "loss": 0.21253745555877684, |
| "step": 460, |
| "token_acc": 0.9272634714542769 |
| }, |
| { |
| "epoch": 0.18623080733036157, |
| "grad_norm": 1.171875, |
| "learning_rate": 1.8748684602494327e-05, |
| "loss": 0.22184033393859864, |
| "step": 470, |
| "token_acc": 0.9256473357586134 |
| }, |
| { |
| "epoch": 0.1901931649331352, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.8685806428894113e-05, |
| "loss": 0.2163544178009033, |
| "step": 480, |
| "token_acc": 0.92641120988206 |
| }, |
| { |
| "epoch": 0.19415552253590887, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.8621497777552508e-05, |
| "loss": 0.2326265335083008, |
| "step": 490, |
| "token_acc": 0.9219484631704639 |
| }, |
| { |
| "epoch": 0.1981178801386825, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.8555769239552232e-05, |
| "loss": 0.21914072036743165, |
| "step": 500, |
| "token_acc": 0.9266210447862321 |
| }, |
| { |
| "epoch": 0.20208023774145617, |
| "grad_norm": 1.546875, |
| "learning_rate": 1.848863163981914e-05, |
| "loss": 0.22959327697753906, |
| "step": 510, |
| "token_acc": 0.9215090641842234 |
| }, |
| { |
| "epoch": 0.2060425953442298, |
| "grad_norm": 1.109375, |
| "learning_rate": 1.8420096035339454e-05, |
| "loss": 0.21052975654602052, |
| "step": 520, |
| "token_acc": 0.9286930380232219 |
| }, |
| { |
| "epoch": 0.21000495294700347, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.8350173713338777e-05, |
| "loss": 0.22955830097198487, |
| "step": 530, |
| "token_acc": 0.9225931053342221 |
| }, |
| { |
| "epoch": 0.2139673105497771, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.827887618942318e-05, |
| "loss": 0.21942346096038817, |
| "step": 540, |
| "token_acc": 0.9257000477242205 |
| }, |
| { |
| "epoch": 0.21792966815255077, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.8206215205682683e-05, |
| "loss": 0.21607930660247804, |
| "step": 550, |
| "token_acc": 0.9265396164644921 |
| }, |
| { |
| "epoch": 0.2218920257553244, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.8132202728757428e-05, |
| "loss": 0.21843266487121582, |
| "step": 560, |
| "token_acc": 0.9258849850056328 |
| }, |
| { |
| "epoch": 0.22585438335809807, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.805685094786689e-05, |
| "loss": 0.21874871253967285, |
| "step": 570, |
| "token_acc": 0.9250736338016231 |
| }, |
| { |
| "epoch": 0.2298167409608717, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.7980172272802398e-05, |
| "loss": 0.22817540168762207, |
| "step": 580, |
| "token_acc": 0.9221536778365731 |
| }, |
| { |
| "epoch": 0.23377909856364537, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.790217933188336e-05, |
| "loss": 0.20628876686096193, |
| "step": 590, |
| "token_acc": 0.9291559217209775 |
| }, |
| { |
| "epoch": 0.237741456166419, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.7822884969877493e-05, |
| "loss": 0.22458946704864502, |
| "step": 600, |
| "token_acc": 0.9231406464867372 |
| }, |
| { |
| "epoch": 0.24170381376919267, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.7742302245885384e-05, |
| "loss": 0.20527830123901367, |
| "step": 610, |
| "token_acc": 0.9306424304540271 |
| }, |
| { |
| "epoch": 0.2456661713719663, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.766044443118978e-05, |
| "loss": 0.2055346965789795, |
| "step": 620, |
| "token_acc": 0.9294153185205075 |
| }, |
| { |
| "epoch": 0.24962852897473997, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.7577325007069927e-05, |
| "loss": 0.21000022888183595, |
| "step": 630, |
| "token_acc": 0.9276756514760238 |
| }, |
| { |
| "epoch": 0.2535908865775136, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.7492957662581297e-05, |
| "loss": 0.20726590156555175, |
| "step": 640, |
| "token_acc": 0.9288681287625508 |
| }, |
| { |
| "epoch": 0.25755324418028724, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.7407356292301134e-05, |
| "loss": 0.20893335342407227, |
| "step": 650, |
| "token_acc": 0.9287459199802928 |
| }, |
| { |
| "epoch": 0.26151560178306094, |
| "grad_norm": 1.46875, |
| "learning_rate": 1.7320534994040148e-05, |
| "loss": 0.2122333526611328, |
| "step": 660, |
| "token_acc": 0.9268251113697004 |
| }, |
| { |
| "epoch": 0.26547795938583457, |
| "grad_norm": 2.21875, |
| "learning_rate": 1.7232508066520702e-05, |
| "loss": 0.2119227170944214, |
| "step": 670, |
| "token_acc": 0.9272324174995067 |
| }, |
| { |
| "epoch": 0.2694403169886082, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.7143290007021942e-05, |
| "loss": 0.2144456148147583, |
| "step": 680, |
| "token_acc": 0.9266572858854115 |
| }, |
| { |
| "epoch": 0.27340267459138184, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.7052895508992236e-05, |
| "loss": 0.20908637046813966, |
| "step": 690, |
| "token_acc": 0.9279253384640653 |
| }, |
| { |
| "epoch": 0.27736503219415554, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.696133945962927e-05, |
| "loss": 0.21407780647277833, |
| "step": 700, |
| "token_acc": 0.9275297697109584 |
| }, |
| { |
| "epoch": 0.2813273897969292, |
| "grad_norm": 1.3984375, |
| "learning_rate": 1.6868636937428254e-05, |
| "loss": 0.20272161960601806, |
| "step": 710, |
| "token_acc": 0.9313989228518674 |
| }, |
| { |
| "epoch": 0.2852897473997028, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.677480320969865e-05, |
| "loss": 0.20830063819885253, |
| "step": 720, |
| "token_acc": 0.9284670505715276 |
| }, |
| { |
| "epoch": 0.2892521050024765, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.6679853730049743e-05, |
| "loss": 0.20571448802947997, |
| "step": 730, |
| "token_acc": 0.9288137503522119 |
| }, |
| { |
| "epoch": 0.29321446260525014, |
| "grad_norm": 1.4375, |
| "learning_rate": 1.6583804135845582e-05, |
| "loss": 0.207275128364563, |
| "step": 740, |
| "token_acc": 0.9295052506473598 |
| }, |
| { |
| "epoch": 0.2971768202080238, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.648667024562963e-05, |
| "loss": 0.2059840202331543, |
| "step": 750, |
| "token_acc": 0.9303702716282313 |
| }, |
| { |
| "epoch": 0.3011391778107974, |
| "grad_norm": 1.4296875, |
| "learning_rate": 1.638846805651961e-05, |
| "loss": 0.20929555892944335, |
| "step": 760, |
| "token_acc": 0.9285013576720667 |
| }, |
| { |
| "epoch": 0.3051015354135711, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.62892137415729e-05, |
| "loss": 0.2164773464202881, |
| "step": 770, |
| "token_acc": 0.9268445872201972 |
| }, |
| { |
| "epoch": 0.30906389301634474, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.6188923647122946e-05, |
| "loss": 0.20146725177764893, |
| "step": 780, |
| "token_acc": 0.9308608962964089 |
| }, |
| { |
| "epoch": 0.3130262506191184, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.608761429008721e-05, |
| "loss": 0.19116392135620117, |
| "step": 790, |
| "token_acc": 0.9360810066351728 |
| }, |
| { |
| "epoch": 0.316988608221892, |
| "grad_norm": 1.40625, |
| "learning_rate": 1.5985302355246932e-05, |
| "loss": 0.19471538066864014, |
| "step": 800, |
| "token_acc": 0.9334035945789697 |
| }, |
| { |
| "epoch": 0.3209509658246657, |
| "grad_norm": 1.3984375, |
| "learning_rate": 1.5882004692499324e-05, |
| "loss": 0.20449495315551758, |
| "step": 810, |
| "token_acc": 0.9296946281131374 |
| }, |
| { |
| "epoch": 0.32491332342743934, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.5777738314082514e-05, |
| "loss": 0.2058267116546631, |
| "step": 820, |
| "token_acc": 0.930226312581988 |
| }, |
| { |
| "epoch": 0.328875681030213, |
| "grad_norm": 1.4296875, |
| "learning_rate": 1.567252039177378e-05, |
| "loss": 0.19794673919677735, |
| "step": 830, |
| "token_acc": 0.931884692988862 |
| }, |
| { |
| "epoch": 0.3328380386329866, |
| "grad_norm": 1.390625, |
| "learning_rate": 1.5566368254061505e-05, |
| "loss": 0.20482149124145507, |
| "step": 840, |
| "token_acc": 0.9305290785274152 |
| }, |
| { |
| "epoch": 0.3368003962357603, |
| "grad_norm": 1.53125, |
| "learning_rate": 1.5459299383291347e-05, |
| "loss": 0.19639644622802735, |
| "step": 850, |
| "token_acc": 0.9322417158382036 |
| }, |
| { |
| "epoch": 0.34076275383853394, |
| "grad_norm": 1.4375, |
| "learning_rate": 1.5351331412787004e-05, |
| "loss": 0.2021495819091797, |
| "step": 860, |
| "token_acc": 0.9298179216523921 |
| }, |
| { |
| "epoch": 0.3447251114413076, |
| "grad_norm": 1.1875, |
| "learning_rate": 1.52424821239462e-05, |
| "loss": 0.20063307285308837, |
| "step": 870, |
| "token_acc": 0.9313979538110527 |
| }, |
| { |
| "epoch": 0.3486874690440812, |
| "grad_norm": 1.6328125, |
| "learning_rate": 1.5132769443312207e-05, |
| "loss": 0.20427477359771729, |
| "step": 880, |
| "token_acc": 0.9299313715863092 |
| }, |
| { |
| "epoch": 0.3526498266468549, |
| "grad_norm": 1.4609375, |
| "learning_rate": 1.5022211439621521e-05, |
| "loss": 0.20063276290893556, |
| "step": 890, |
| "token_acc": 0.9309864789183134 |
| }, |
| { |
| "epoch": 0.35661218424962854, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.4910826320828085e-05, |
| "loss": 0.19403212070465087, |
| "step": 900, |
| "token_acc": 0.9340383217142124 |
| }, |
| { |
| "epoch": 0.3605745418524022, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.4798632431104591e-05, |
| "loss": 0.1897117853164673, |
| "step": 910, |
| "token_acc": 0.9360307874252368 |
| }, |
| { |
| "epoch": 0.3645368994551758, |
| "grad_norm": 1.4296875, |
| "learning_rate": 1.4685648247821376e-05, |
| "loss": 0.19313969612121581, |
| "step": 920, |
| "token_acc": 0.9329953036961753 |
| }, |
| { |
| "epoch": 0.3684992570579495, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.457189237850332e-05, |
| "loss": 0.203882098197937, |
| "step": 930, |
| "token_acc": 0.9312272344443193 |
| }, |
| { |
| "epoch": 0.37246161466072314, |
| "grad_norm": 0.875, |
| "learning_rate": 1.4457383557765385e-05, |
| "loss": 0.1886841893196106, |
| "step": 940, |
| "token_acc": 0.9355444372139664 |
| }, |
| { |
| "epoch": 0.3764239722634968, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.4342140644227151e-05, |
| "loss": 0.1905367612838745, |
| "step": 950, |
| "token_acc": 0.9352085303078055 |
| }, |
| { |
| "epoch": 0.3803863298662704, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.4226182617406996e-05, |
| "loss": 0.19780998229980468, |
| "step": 960, |
| "token_acc": 0.9324879595849204 |
| }, |
| { |
| "epoch": 0.3843486874690441, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.41095285745963e-05, |
| "loss": 0.19177125692367553, |
| "step": 970, |
| "token_acc": 0.9343932834841926 |
| }, |
| { |
| "epoch": 0.38831104507181774, |
| "grad_norm": 1.7578125, |
| "learning_rate": 1.399219772771431e-05, |
| "loss": 0.1960275650024414, |
| "step": 980, |
| "token_acc": 0.9329073312723757 |
| }, |
| { |
| "epoch": 0.3922734026745914, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.3874209400144092e-05, |
| "loss": 0.18507509231567382, |
| "step": 990, |
| "token_acc": 0.9359859759133133 |
| }, |
| { |
| "epoch": 0.396235760277365, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.3755583023550128e-05, |
| "loss": 0.1876603364944458, |
| "step": 1000, |
| "token_acc": 0.9350970511384845 |
| }, |
| { |
| "epoch": 0.4001981178801387, |
| "grad_norm": 1.28125, |
| "learning_rate": 1.3636338134678104e-05, |
| "loss": 0.17850277423858643, |
| "step": 1010, |
| "token_acc": 0.9377877376733048 |
| }, |
| { |
| "epoch": 0.40416047548291234, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.3516494372137368e-05, |
| "loss": 0.1958215355873108, |
| "step": 1020, |
| "token_acc": 0.9318651647470785 |
| }, |
| { |
| "epoch": 0.408122833085686, |
| "grad_norm": 1.6640625, |
| "learning_rate": 1.3396071473166614e-05, |
| "loss": 0.18602523803710938, |
| "step": 1030, |
| "token_acc": 0.9359838557500786 |
| }, |
| { |
| "epoch": 0.4120851906884596, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.327508927038334e-05, |
| "loss": 0.18929693698883057, |
| "step": 1040, |
| "token_acc": 0.9350099237438629 |
| }, |
| { |
| "epoch": 0.4160475482912333, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.3153567688517567e-05, |
| "loss": 0.18981436491012574, |
| "step": 1050, |
| "token_acc": 0.934143741104814 |
| }, |
| { |
| "epoch": 0.42000990589400694, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.3031526741130435e-05, |
| "loss": 0.1816575288772583, |
| "step": 1060, |
| "token_acc": 0.9370538611291369 |
| }, |
| { |
| "epoch": 0.4239722634967806, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.2908986527318121e-05, |
| "loss": 0.19676063060760499, |
| "step": 1070, |
| "token_acc": 0.932801285003426 |
| }, |
| { |
| "epoch": 0.4279346210995542, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.2785967228401688e-05, |
| "loss": 0.19254275560379028, |
| "step": 1080, |
| "token_acc": 0.9333315147712704 |
| }, |
| { |
| "epoch": 0.4318969787023279, |
| "grad_norm": 1.5078125, |
| "learning_rate": 1.266248910460341e-05, |
| "loss": 0.18717528581619264, |
| "step": 1090, |
| "token_acc": 0.9360305301291446 |
| }, |
| { |
| "epoch": 0.43585933630510154, |
| "grad_norm": 1.734375, |
| "learning_rate": 1.2538572491710079e-05, |
| "loss": 0.1824967622756958, |
| "step": 1100, |
| "token_acc": 0.9372006812944594 |
| }, |
| { |
| "epoch": 0.4398216939078752, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.2414237797723876e-05, |
| "loss": 0.17919249534606935, |
| "step": 1110, |
| "token_acc": 0.9387596071733562 |
| }, |
| { |
| "epoch": 0.4437840515106488, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.2289505499501341e-05, |
| "loss": 0.18926095962524414, |
| "step": 1120, |
| "token_acc": 0.9342525248667318 |
| }, |
| { |
| "epoch": 0.4477464091134225, |
| "grad_norm": 1.28125, |
| "learning_rate": 1.2164396139381029e-05, |
| "loss": 0.20064361095428468, |
| "step": 1130, |
| "token_acc": 0.9315847075431296 |
| }, |
| { |
| "epoch": 0.45170876671619614, |
| "grad_norm": 1.484375, |
| "learning_rate": 1.2038930321800346e-05, |
| "loss": 0.1895804524421692, |
| "step": 1140, |
| "token_acc": 0.9349271790531848 |
| }, |
| { |
| "epoch": 0.4556711243189698, |
| "grad_norm": 1.5703125, |
| "learning_rate": 1.1913128709902182e-05, |
| "loss": 0.1807018995285034, |
| "step": 1150, |
| "token_acc": 0.9369057628872647 |
| }, |
| { |
| "epoch": 0.4596334819217434, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.1787012022131863e-05, |
| "loss": 0.1842559814453125, |
| "step": 1160, |
| "token_acc": 0.9362108645620739 |
| }, |
| { |
| "epoch": 0.4635958395245171, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.1660601028825013e-05, |
| "loss": 0.19840482473373414, |
| "step": 1170, |
| "token_acc": 0.9314812356169233 |
| }, |
| { |
| "epoch": 0.46755819712729074, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.1533916548786856e-05, |
| "loss": 0.1772662878036499, |
| "step": 1180, |
| "token_acc": 0.9394712189028833 |
| }, |
| { |
| "epoch": 0.4715205547300644, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.1406979445863515e-05, |
| "loss": 0.18831554651260377, |
| "step": 1190, |
| "token_acc": 0.935608596292791 |
| }, |
| { |
| "epoch": 0.475482912332838, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.127981062550595e-05, |
| "loss": 0.18489151000976561, |
| "step": 1200, |
| "token_acc": 0.9360608419277421 |
| }, |
| { |
| "epoch": 0.4794452699356117, |
| "grad_norm": 1.3984375, |
| "learning_rate": 1.1152431031326978e-05, |
| "loss": 0.17761152982711792, |
| "step": 1210, |
| "token_acc": 0.9386175400572799 |
| }, |
| { |
| "epoch": 0.48340762753838534, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.102486164165207e-05, |
| "loss": 0.18663549423217773, |
| "step": 1220, |
| "token_acc": 0.9355476517845982 |
| }, |
| { |
| "epoch": 0.487369985141159, |
| "grad_norm": 1.25, |
| "learning_rate": 1.0897123466064376e-05, |
| "loss": 0.18886669874191284, |
| "step": 1230, |
| "token_acc": 0.9356319723508901 |
| }, |
| { |
| "epoch": 0.4913323427439326, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.0769237541944639e-05, |
| "loss": 0.18777060508728027, |
| "step": 1240, |
| "token_acc": 0.9354588236528564 |
| }, |
| { |
| "epoch": 0.4952947003467063, |
| "grad_norm": 1.5, |
| "learning_rate": 1.0641224931006518e-05, |
| "loss": 0.17902556657791138, |
| "step": 1250, |
| "token_acc": 0.9375767442118891 |
| }, |
| { |
| "epoch": 0.49925705794947994, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.0513106715827897e-05, |
| "loss": 0.18400684595108033, |
| "step": 1260, |
| "token_acc": 0.9370039916704695 |
| }, |
| { |
| "epoch": 0.5032194155522536, |
| "grad_norm": 1.40625, |
| "learning_rate": 1.0384903996378784e-05, |
| "loss": 0.17728078365325928, |
| "step": 1270, |
| "token_acc": 0.9389623546976645 |
| }, |
| { |
| "epoch": 0.5071817731550272, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.02566378865463e-05, |
| "loss": 0.18042536973953247, |
| "step": 1280, |
| "token_acc": 0.9374939011828994 |
| }, |
| { |
| "epoch": 0.5111441307578009, |
| "grad_norm": 1.4609375, |
| "learning_rate": 1.0128329510657426e-05, |
| "loss": 0.18618935346603394, |
| "step": 1290, |
| "token_acc": 0.9355284924654325 |
| }, |
| { |
| "epoch": 0.5151064883605745, |
| "grad_norm": 1.453125, |
| "learning_rate": 1e-05, |
| "loss": 0.19038233757019044, |
| "step": 1300, |
| "token_acc": 0.9348978046934141 |
| }, |
| { |
| "epoch": 0.5190688459633482, |
| "grad_norm": 1.921875, |
| "learning_rate": 9.871670489342577e-06, |
| "loss": 0.18166159391403197, |
| "step": 1310, |
| "token_acc": 0.9376118246059261 |
| }, |
| { |
| "epoch": 0.5230312035661219, |
| "grad_norm": 1.3828125, |
| "learning_rate": 9.743362113453705e-06, |
| "loss": 0.18087191581726075, |
| "step": 1320, |
| "token_acc": 0.9368352123903884 |
| }, |
| { |
| "epoch": 0.5269935611688955, |
| "grad_norm": 1.7890625, |
| "learning_rate": 9.615096003621221e-06, |
| "loss": 0.17757006883621215, |
| "step": 1330, |
| "token_acc": 0.9385874468359324 |
| }, |
| { |
| "epoch": 0.5309559187716691, |
| "grad_norm": 1.5, |
| "learning_rate": 9.486893284172103e-06, |
| "loss": 0.1725843906402588, |
| "step": 1340, |
| "token_acc": 0.9396233946138856 |
| }, |
| { |
| "epoch": 0.5349182763744428, |
| "grad_norm": 1.1953125, |
| "learning_rate": 9.358775068993484e-06, |
| "loss": 0.17776031494140626, |
| "step": 1350, |
| "token_acc": 0.9395069576186172 |
| }, |
| { |
| "epoch": 0.5388806339772164, |
| "grad_norm": 1.2890625, |
| "learning_rate": 9.230762458055363e-06, |
| "loss": 0.18048588037490845, |
| "step": 1360, |
| "token_acc": 0.9376439779197635 |
| }, |
| { |
| "epoch": 0.54284299157999, |
| "grad_norm": 1.34375, |
| "learning_rate": 9.102876533935626e-06, |
| "loss": 0.1871953248977661, |
| "step": 1370, |
| "token_acc": 0.9352319496539852 |
| }, |
| { |
| "epoch": 0.5468053491827637, |
| "grad_norm": 1.8203125, |
| "learning_rate": 8.975138358347931e-06, |
| "loss": 0.17401375770568847, |
| "step": 1380, |
| "token_acc": 0.9395100063574127 |
| }, |
| { |
| "epoch": 0.5507677067855374, |
| "grad_norm": 1.5625, |
| "learning_rate": 8.847568968673025e-06, |
| "loss": 0.1821776032447815, |
| "step": 1390, |
| "token_acc": 0.9382589568204417 |
| }, |
| { |
| "epoch": 0.5547300643883111, |
| "grad_norm": 1.3203125, |
| "learning_rate": 8.720189374494055e-06, |
| "loss": 0.18482091426849365, |
| "step": 1400, |
| "token_acc": 0.9366791672453971 |
| }, |
| { |
| "epoch": 0.5586924219910847, |
| "grad_norm": 1.140625, |
| "learning_rate": 8.593020554136491e-06, |
| "loss": 0.17976686954498292, |
| "step": 1410, |
| "token_acc": 0.938686745381246 |
| }, |
| { |
| "epoch": 0.5626547795938583, |
| "grad_norm": 1.7734375, |
| "learning_rate": 8.466083451213145e-06, |
| "loss": 0.16887048482894898, |
| "step": 1420, |
| "token_acc": 0.9413505379807353 |
| }, |
| { |
| "epoch": 0.566617137196632, |
| "grad_norm": 1.53125, |
| "learning_rate": 8.339398971174987e-06, |
| "loss": 0.181710684299469, |
| "step": 1430, |
| "token_acc": 0.9381945571057755 |
| }, |
| { |
| "epoch": 0.5705794947994056, |
| "grad_norm": 1.421875, |
| "learning_rate": 8.212987977868138e-06, |
| "loss": 0.192651104927063, |
| "step": 1440, |
| "token_acc": 0.9346234811416059 |
| }, |
| { |
| "epoch": 0.5745418524021793, |
| "grad_norm": 1.4375, |
| "learning_rate": 8.086871290097822e-06, |
| "loss": 0.1725835084915161, |
| "step": 1450, |
| "token_acc": 0.9401547502340085 |
| }, |
| { |
| "epoch": 0.578504210004953, |
| "grad_norm": 1.390625, |
| "learning_rate": 7.961069678199658e-06, |
| "loss": 0.18463332653045655, |
| "step": 1460, |
| "token_acc": 0.9356090428523226 |
| }, |
| { |
| "epoch": 0.5824665676077266, |
| "grad_norm": 1.390625, |
| "learning_rate": 7.835603860618973e-06, |
| "loss": 0.18219418525695802, |
| "step": 1470, |
| "token_acc": 0.9381983863723681 |
| }, |
| { |
| "epoch": 0.5864289252105003, |
| "grad_norm": 1.3125, |
| "learning_rate": 7.710494500498662e-06, |
| "loss": 0.17673687934875487, |
| "step": 1480, |
| "token_acc": 0.9390315988583202 |
| }, |
| { |
| "epoch": 0.5903912828132739, |
| "grad_norm": 1.6796875, |
| "learning_rate": 7.585762202276129e-06, |
| "loss": 0.1698865532875061, |
| "step": 1490, |
| "token_acc": 0.9415788913714225 |
| }, |
| { |
| "epoch": 0.5943536404160475, |
| "grad_norm": 1.53125, |
| "learning_rate": 7.461427508289922e-06, |
| "loss": 0.17974636554718018, |
| "step": 1500, |
| "token_acc": 0.9385133263736498 |
| }, |
| { |
| "epoch": 0.5983159980188212, |
| "grad_norm": 1.0859375, |
| "learning_rate": 7.337510895396591e-06, |
| "loss": 0.1787565231323242, |
| "step": 1510, |
| "token_acc": 0.9384560906515581 |
| }, |
| { |
| "epoch": 0.6022783556215948, |
| "grad_norm": 1.4609375, |
| "learning_rate": 7.214032771598316e-06, |
| "loss": 0.1744428515434265, |
| "step": 1520, |
| "token_acc": 0.9401470564435646 |
| }, |
| { |
| "epoch": 0.6062407132243685, |
| "grad_norm": 1.265625, |
| "learning_rate": 7.091013472681883e-06, |
| "loss": 0.17123017311096192, |
| "step": 1530, |
| "token_acc": 0.9405837916975914 |
| }, |
| { |
| "epoch": 0.6102030708271422, |
| "grad_norm": 1.5234375, |
| "learning_rate": 6.968473258869566e-06, |
| "loss": 0.1690650463104248, |
| "step": 1540, |
| "token_acc": 0.941058213231226 |
| }, |
| { |
| "epoch": 0.6141654284299158, |
| "grad_norm": 1.453125, |
| "learning_rate": 6.846432311482436e-06, |
| "loss": 0.18313372135162354, |
| "step": 1550, |
| "token_acc": 0.9371285854342504 |
| }, |
| { |
| "epoch": 0.6181277860326895, |
| "grad_norm": 1.7890625, |
| "learning_rate": 6.724910729616665e-06, |
| "loss": 0.17572647333145142, |
| "step": 1560, |
| "token_acc": 0.939426531245842 |
| }, |
| { |
| "epoch": 0.6220901436354631, |
| "grad_norm": 1.25, |
| "learning_rate": 6.603928526833386e-06, |
| "loss": 0.16190264225006104, |
| "step": 1570, |
| "token_acc": 0.9443632366772048 |
| }, |
| { |
| "epoch": 0.6260525012382367, |
| "grad_norm": 1.265625, |
| "learning_rate": 6.483505627862632e-06, |
| "loss": 0.1694807767868042, |
| "step": 1580, |
| "token_acc": 0.9416789717779672 |
| }, |
| { |
| "epoch": 0.6300148588410104, |
| "grad_norm": 1.5, |
| "learning_rate": 6.363661865321898e-06, |
| "loss": 0.17748751640319824, |
| "step": 1590, |
| "token_acc": 0.9385861686705892 |
| }, |
| { |
| "epoch": 0.633977216443784, |
| "grad_norm": 1.3984375, |
| "learning_rate": 6.244416976449875e-06, |
| "loss": 0.17347029447555543, |
| "step": 1600, |
| "token_acc": 0.9403739289918152 |
| }, |
| { |
| "epoch": 0.6379395740465577, |
| "grad_norm": 1.171875, |
| "learning_rate": 6.125790599855912e-06, |
| "loss": 0.1826688289642334, |
| "step": 1610, |
| "token_acc": 0.9372320591550186 |
| }, |
| { |
| "epoch": 0.6419019316493314, |
| "grad_norm": 1.2890625, |
| "learning_rate": 6.007802272285693e-06, |
| "loss": 0.17403693199157716, |
| "step": 1620, |
| "token_acc": 0.9401551062440614 |
| }, |
| { |
| "epoch": 0.645864289252105, |
| "grad_norm": 1.625, |
| "learning_rate": 5.890471425403703e-06, |
| "loss": 0.18286362886428834, |
| "step": 1630, |
| "token_acc": 0.9368950000596794 |
| }, |
| { |
| "epoch": 0.6498266468548787, |
| "grad_norm": 1.4375, |
| "learning_rate": 5.773817382593008e-06, |
| "loss": 0.1804821014404297, |
| "step": 1640, |
| "token_acc": 0.9376396973396319 |
| }, |
| { |
| "epoch": 0.6537890044576523, |
| "grad_norm": 1.375, |
| "learning_rate": 5.65785935577285e-06, |
| "loss": 0.17369402647018434, |
| "step": 1650, |
| "token_acc": 0.9392859770259903 |
| }, |
| { |
| "epoch": 0.657751362060426, |
| "grad_norm": 1.3515625, |
| "learning_rate": 5.542616442234618e-06, |
| "loss": 0.1656261920928955, |
| "step": 1660, |
| "token_acc": 0.943150599230765 |
| }, |
| { |
| "epoch": 0.6617137196631996, |
| "grad_norm": 1.2890625, |
| "learning_rate": 5.428107621496681e-06, |
| "loss": 0.17441051006317138, |
| "step": 1670, |
| "token_acc": 0.9392566132136696 |
| }, |
| { |
| "epoch": 0.6656760772659732, |
| "grad_norm": 1.3125, |
| "learning_rate": 5.3143517521786255e-06, |
| "loss": 0.17141460180282592, |
| "step": 1680, |
| "token_acc": 0.9404770520787022 |
| }, |
| { |
| "epoch": 0.6696384348687469, |
| "grad_norm": 1.4921875, |
| "learning_rate": 5.201367568895408e-06, |
| "loss": 0.1779789924621582, |
| "step": 1690, |
| "token_acc": 0.9389050144048604 |
| }, |
| { |
| "epoch": 0.6736007924715206, |
| "grad_norm": 1.2890625, |
| "learning_rate": 5.089173679171922e-06, |
| "loss": 0.1696174383163452, |
| "step": 1700, |
| "token_acc": 0.9415787866940171 |
| }, |
| { |
| "epoch": 0.6775631500742942, |
| "grad_norm": 1.265625, |
| "learning_rate": 4.977788560378484e-06, |
| "loss": 0.17647080421447753, |
| "step": 1710, |
| "token_acc": 0.9402322070530992 |
| }, |
| { |
| "epoch": 0.6815255076770679, |
| "grad_norm": 1.3203125, |
| "learning_rate": 4.867230556687797e-06, |
| "loss": 0.17825334072113036, |
| "step": 1720, |
| "token_acc": 0.9382623548644003 |
| }, |
| { |
| "epoch": 0.6854878652798415, |
| "grad_norm": 1.390625, |
| "learning_rate": 4.7575178760538e-06, |
| "loss": 0.1728861927986145, |
| "step": 1730, |
| "token_acc": 0.939594911427579 |
| }, |
| { |
| "epoch": 0.6894502228826151, |
| "grad_norm": 1.5234375, |
| "learning_rate": 4.648668587212998e-06, |
| "loss": 0.179952073097229, |
| "step": 1740, |
| "token_acc": 0.9381945052060547 |
| }, |
| { |
| "epoch": 0.6934125804853888, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.5407006167086575e-06, |
| "loss": 0.17567566633224488, |
| "step": 1750, |
| "token_acc": 0.9399701307689505 |
| }, |
| { |
| "epoch": 0.6973749380881624, |
| "grad_norm": 1.328125, |
| "learning_rate": 4.433631745938497e-06, |
| "loss": 0.17287354469299315, |
| "step": 1760, |
| "token_acc": 0.9405146011104378 |
| }, |
| { |
| "epoch": 0.7013372956909361, |
| "grad_norm": 1.5859375, |
| "learning_rate": 4.327479608226226e-06, |
| "loss": 0.17426562309265137, |
| "step": 1770, |
| "token_acc": 0.9401683220236025 |
| }, |
| { |
| "epoch": 0.7052996532937098, |
| "grad_norm": 1.375, |
| "learning_rate": 4.222261685917489e-06, |
| "loss": 0.1734224557876587, |
| "step": 1780, |
| "token_acc": 0.9401309334234104 |
| }, |
| { |
| "epoch": 0.7092620108964834, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.117995307500677e-06, |
| "loss": 0.17531417608261107, |
| "step": 1790, |
| "token_acc": 0.9409358352138655 |
| }, |
| { |
| "epoch": 0.7132243684992571, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.014697644753069e-06, |
| "loss": 0.17481131553649903, |
| "step": 1800, |
| "token_acc": 0.9396957170350632 |
| }, |
| { |
| "epoch": 0.7171867261020307, |
| "grad_norm": 1.3046875, |
| "learning_rate": 3.912385709912794e-06, |
| "loss": 0.17085225582122804, |
| "step": 1810, |
| "token_acc": 0.9413413462722593 |
| }, |
| { |
| "epoch": 0.7211490837048044, |
| "grad_norm": 1.765625, |
| "learning_rate": 3.8110763528770543e-06, |
| "loss": 0.18243337869644166, |
| "step": 1820, |
| "token_acc": 0.9380230355884426 |
| }, |
| { |
| "epoch": 0.725111441307578, |
| "grad_norm": 1.2109375, |
| "learning_rate": 3.7107862584271016e-06, |
| "loss": 0.16808085441589354, |
| "step": 1830, |
| "token_acc": 0.9410576758514462 |
| }, |
| { |
| "epoch": 0.7290737989103516, |
| "grad_norm": 1.28125, |
| "learning_rate": 3.6115319434803897e-06, |
| "loss": 0.16966335773468016, |
| "step": 1840, |
| "token_acc": 0.9421915175440875 |
| }, |
| { |
| "epoch": 0.7330361565131253, |
| "grad_norm": 1.484375, |
| "learning_rate": 3.5133297543703724e-06, |
| "loss": 0.18466969728469848, |
| "step": 1850, |
| "token_acc": 0.9361129341986922 |
| }, |
| { |
| "epoch": 0.736998514115899, |
| "grad_norm": 1.34375, |
| "learning_rate": 3.416195864154426e-06, |
| "loss": 0.17389074563980103, |
| "step": 1860, |
| "token_acc": 0.9401176608095999 |
| }, |
| { |
| "epoch": 0.7409608717186726, |
| "grad_norm": 1.8046875, |
| "learning_rate": 3.3201462699502606e-06, |
| "loss": 0.18031821250915528, |
| "step": 1870, |
| "token_acc": 0.9371710958652052 |
| }, |
| { |
| "epoch": 0.7449232293214463, |
| "grad_norm": 1.3828125, |
| "learning_rate": 3.2251967903013515e-06, |
| "loss": 0.16321947574615478, |
| "step": 1880, |
| "token_acc": 0.9434022207870669 |
| }, |
| { |
| "epoch": 0.7488855869242199, |
| "grad_norm": 1.40625, |
| "learning_rate": 3.1313630625717462e-06, |
| "loss": 0.165952730178833, |
| "step": 1890, |
| "token_acc": 0.9433088620870477 |
| }, |
| { |
| "epoch": 0.7528479445269936, |
| "grad_norm": 1.6953125, |
| "learning_rate": 3.0386605403707347e-06, |
| "loss": 0.17759935855865477, |
| "step": 1900, |
| "token_acc": 0.9391445944776249 |
| }, |
| { |
| "epoch": 0.7568103021297672, |
| "grad_norm": 1.5625, |
| "learning_rate": 2.947104491007766e-06, |
| "loss": 0.17080872058868407, |
| "step": 1910, |
| "token_acc": 0.9417546272928465 |
| }, |
| { |
| "epoch": 0.7607726597325408, |
| "grad_norm": 1.296875, |
| "learning_rate": 2.8567099929780596e-06, |
| "loss": 0.17588542699813842, |
| "step": 1920, |
| "token_acc": 0.9384443609064742 |
| }, |
| { |
| "epoch": 0.7647350173353145, |
| "grad_norm": 1.671875, |
| "learning_rate": 2.767491933479304e-06, |
| "loss": 0.17596354484558105, |
| "step": 1930, |
| "token_acc": 0.9390227163544026 |
| }, |
| { |
| "epoch": 0.7686973749380882, |
| "grad_norm": 1.4921875, |
| "learning_rate": 2.679465005959856e-06, |
| "loss": 0.1740294098854065, |
| "step": 1940, |
| "token_acc": 0.9401780685412244 |
| }, |
| { |
| "epoch": 0.7726597325408618, |
| "grad_norm": 1.359375, |
| "learning_rate": 2.5926437076988685e-06, |
| "loss": 0.16495332717895508, |
| "step": 1950, |
| "token_acc": 0.9424596695186467 |
| }, |
| { |
| "epoch": 0.7766220901436355, |
| "grad_norm": 1.359375, |
| "learning_rate": 2.507042337418707e-06, |
| "loss": 0.17244219779968262, |
| "step": 1960, |
| "token_acc": 0.940576109936575 |
| }, |
| { |
| "epoch": 0.7805844477464091, |
| "grad_norm": 1.7109375, |
| "learning_rate": 2.4226749929300774e-06, |
| "loss": 0.17762508392333984, |
| "step": 1970, |
| "token_acc": 0.9379922601444852 |
| }, |
| { |
| "epoch": 0.7845468053491828, |
| "grad_norm": 1.5078125, |
| "learning_rate": 2.339555568810221e-06, |
| "loss": 0.16768510341644288, |
| "step": 1980, |
| "token_acc": 0.9424666806336723 |
| }, |
| { |
| "epoch": 0.7885091629519564, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.2576977541146193e-06, |
| "loss": 0.1687544584274292, |
| "step": 1990, |
| "token_acc": 0.941843418155467 |
| }, |
| { |
| "epoch": 0.79247152055473, |
| "grad_norm": 1.3984375, |
| "learning_rate": 2.1771150301225097e-06, |
| "loss": 0.17961428165435792, |
| "step": 2000, |
| "token_acc": 0.9382880764646055 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2524, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3391808311045652e+19, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|