| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.999242079733212, | |
| "eval_steps": 500, | |
| "global_step": 1030, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004850689707442777, | |
| "grad_norm": 6.339065858846749, | |
| "learning_rate": 7.766990291262136e-07, | |
| "loss": 1.0147, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.009701379414885554, | |
| "grad_norm": 6.370901874406726, | |
| "learning_rate": 1.5533980582524272e-06, | |
| "loss": 1.0217, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01455206912232833, | |
| "grad_norm": 6.253076167725343, | |
| "learning_rate": 2.330097087378641e-06, | |
| "loss": 1.0059, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01940275882977111, | |
| "grad_norm": 5.8627331903052005, | |
| "learning_rate": 3.1067961165048544e-06, | |
| "loss": 0.9987, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.024253448537213885, | |
| "grad_norm": 4.624724616975588, | |
| "learning_rate": 3.883495145631068e-06, | |
| "loss": 0.9654, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.02910413824465666, | |
| "grad_norm": 2.701973440331149, | |
| "learning_rate": 4.660194174757282e-06, | |
| "loss": 0.9221, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03395482795209944, | |
| "grad_norm": 2.5821400220833683, | |
| "learning_rate": 5.436893203883496e-06, | |
| "loss": 0.9118, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.03880551765954222, | |
| "grad_norm": 3.1473551148693146, | |
| "learning_rate": 6.213592233009709e-06, | |
| "loss": 0.8818, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04365620736698499, | |
| "grad_norm": 3.8230150062051638, | |
| "learning_rate": 6.990291262135923e-06, | |
| "loss": 0.8912, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04850689707442777, | |
| "grad_norm": 3.4669236063777715, | |
| "learning_rate": 7.766990291262136e-06, | |
| "loss": 0.8779, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.053357586781870546, | |
| "grad_norm": 2.712055876575345, | |
| "learning_rate": 8.54368932038835e-06, | |
| "loss": 0.8328, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.05820827648931332, | |
| "grad_norm": 2.574263966000136, | |
| "learning_rate": 9.320388349514565e-06, | |
| "loss": 0.8149, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0630589661967561, | |
| "grad_norm": 1.8016385817876701, | |
| "learning_rate": 1.0097087378640778e-05, | |
| "loss": 0.7941, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.06790965590419888, | |
| "grad_norm": 1.16110696712433, | |
| "learning_rate": 1.0873786407766991e-05, | |
| "loss": 0.7751, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.07276034561164166, | |
| "grad_norm": 1.5441894400920566, | |
| "learning_rate": 1.1650485436893204e-05, | |
| "loss": 0.7603, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07761103531908443, | |
| "grad_norm": 1.3570174190036193, | |
| "learning_rate": 1.2427184466019418e-05, | |
| "loss": 0.7456, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.08246172502652721, | |
| "grad_norm": 1.0160879152766609, | |
| "learning_rate": 1.3203883495145633e-05, | |
| "loss": 0.74, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.08731241473396999, | |
| "grad_norm": 1.195923679791525, | |
| "learning_rate": 1.3980582524271846e-05, | |
| "loss": 0.7223, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.09216310444141276, | |
| "grad_norm": 1.0381307779091873, | |
| "learning_rate": 1.475728155339806e-05, | |
| "loss": 0.7149, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.09701379414885554, | |
| "grad_norm": 0.9191697728302082, | |
| "learning_rate": 1.5533980582524273e-05, | |
| "loss": 0.7032, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10186448385629832, | |
| "grad_norm": 1.0389109685950821, | |
| "learning_rate": 1.6310679611650486e-05, | |
| "loss": 0.697, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.10671517356374109, | |
| "grad_norm": 0.8528569833940303, | |
| "learning_rate": 1.70873786407767e-05, | |
| "loss": 0.6913, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.11156586327118387, | |
| "grad_norm": 0.7397574673832126, | |
| "learning_rate": 1.7864077669902916e-05, | |
| "loss": 0.6844, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.11641655297862664, | |
| "grad_norm": 0.6762376097915315, | |
| "learning_rate": 1.864077669902913e-05, | |
| "loss": 0.6807, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.12126724268606942, | |
| "grad_norm": 0.6801312007046909, | |
| "learning_rate": 1.9417475728155343e-05, | |
| "loss": 0.6651, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1261179323935122, | |
| "grad_norm": 0.5373129321939298, | |
| "learning_rate": 2.0194174757281556e-05, | |
| "loss": 0.6661, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.13096862210095497, | |
| "grad_norm": 0.6155691125010336, | |
| "learning_rate": 2.097087378640777e-05, | |
| "loss": 0.6655, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.13581931180839776, | |
| "grad_norm": 0.5373412410981904, | |
| "learning_rate": 2.1747572815533982e-05, | |
| "loss": 0.6541, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.14067000151584053, | |
| "grad_norm": 0.5722094683121568, | |
| "learning_rate": 2.2524271844660196e-05, | |
| "loss": 0.6534, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.14552069122328332, | |
| "grad_norm": 0.6369873796903149, | |
| "learning_rate": 2.330097087378641e-05, | |
| "loss": 0.6536, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.15037138093072608, | |
| "grad_norm": 0.5246684440675834, | |
| "learning_rate": 2.4077669902912622e-05, | |
| "loss": 0.6545, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.15522207063816887, | |
| "grad_norm": 0.504247506683658, | |
| "learning_rate": 2.4854368932038836e-05, | |
| "loss": 0.6314, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.16007276034561163, | |
| "grad_norm": 0.5566944063536889, | |
| "learning_rate": 2.5631067961165052e-05, | |
| "loss": 0.6373, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.16492345005305442, | |
| "grad_norm": 0.8570989167580252, | |
| "learning_rate": 2.6407766990291266e-05, | |
| "loss": 0.63, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.16977413976049718, | |
| "grad_norm": 1.8667995731915865, | |
| "learning_rate": 2.718446601941748e-05, | |
| "loss": 0.6456, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.17462482946793997, | |
| "grad_norm": 0.5164194413667431, | |
| "learning_rate": 2.7961165048543692e-05, | |
| "loss": 0.6308, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.17947551917538274, | |
| "grad_norm": 1.6098330943831782, | |
| "learning_rate": 2.8737864077669905e-05, | |
| "loss": 0.6398, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.18432620888282553, | |
| "grad_norm": 1.1492129920694993, | |
| "learning_rate": 2.951456310679612e-05, | |
| "loss": 0.6276, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.18917689859026832, | |
| "grad_norm": 0.7127614761088336, | |
| "learning_rate": 3.0291262135922332e-05, | |
| "loss": 0.6218, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.19402758829771108, | |
| "grad_norm": 1.0312378150228299, | |
| "learning_rate": 3.1067961165048545e-05, | |
| "loss": 0.6253, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.19887827800515387, | |
| "grad_norm": 1.7574387168351864, | |
| "learning_rate": 3.184466019417476e-05, | |
| "loss": 0.6281, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.20372896771259663, | |
| "grad_norm": 0.9416599187328968, | |
| "learning_rate": 3.262135922330097e-05, | |
| "loss": 0.6235, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.20857965742003942, | |
| "grad_norm": 1.8301787236679616, | |
| "learning_rate": 3.339805825242719e-05, | |
| "loss": 0.622, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.21343034712748218, | |
| "grad_norm": 0.9292078210446757, | |
| "learning_rate": 3.41747572815534e-05, | |
| "loss": 0.6107, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.21828103683492497, | |
| "grad_norm": 1.994214362456412, | |
| "learning_rate": 3.4951456310679615e-05, | |
| "loss": 0.6151, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.22313172654236774, | |
| "grad_norm": 1.3619344540131681, | |
| "learning_rate": 3.572815533980583e-05, | |
| "loss": 0.6089, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.22798241624981053, | |
| "grad_norm": 1.6323821476629805, | |
| "learning_rate": 3.650485436893204e-05, | |
| "loss": 0.6165, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.2328331059572533, | |
| "grad_norm": 1.1474300438640261, | |
| "learning_rate": 3.728155339805826e-05, | |
| "loss": 0.6104, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.23768379566469608, | |
| "grad_norm": 1.1936059623728144, | |
| "learning_rate": 3.805825242718447e-05, | |
| "loss": 0.6086, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.24253448537213884, | |
| "grad_norm": 1.4126137333521573, | |
| "learning_rate": 3.8834951456310685e-05, | |
| "loss": 0.6123, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.24738517507958163, | |
| "grad_norm": 0.7835607330331523, | |
| "learning_rate": 3.9611650485436895e-05, | |
| "loss": 0.5979, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.2522358647870244, | |
| "grad_norm": 1.3575257469759314, | |
| "learning_rate": 4.038834951456311e-05, | |
| "loss": 0.6134, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.25708655449446716, | |
| "grad_norm": 0.9580505107131282, | |
| "learning_rate": 4.116504854368932e-05, | |
| "loss": 0.5987, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.26193724420190995, | |
| "grad_norm": 1.1647956860260527, | |
| "learning_rate": 4.194174757281554e-05, | |
| "loss": 0.6134, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.26678793390935274, | |
| "grad_norm": 0.9133078407230598, | |
| "learning_rate": 4.271844660194175e-05, | |
| "loss": 0.5978, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2716386236167955, | |
| "grad_norm": 1.3505836368180404, | |
| "learning_rate": 4.3495145631067965e-05, | |
| "loss": 0.5944, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.2764893133242383, | |
| "grad_norm": 1.1628512424723636, | |
| "learning_rate": 4.4271844660194175e-05, | |
| "loss": 0.6052, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.28134000303168105, | |
| "grad_norm": 1.6421048813051027, | |
| "learning_rate": 4.504854368932039e-05, | |
| "loss": 0.6008, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.28619069273912384, | |
| "grad_norm": 1.3019802198524983, | |
| "learning_rate": 4.58252427184466e-05, | |
| "loss": 0.594, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.29104138244656663, | |
| "grad_norm": 1.2774402772060065, | |
| "learning_rate": 4.660194174757282e-05, | |
| "loss": 0.5892, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2958920721540094, | |
| "grad_norm": 1.4435670386305743, | |
| "learning_rate": 4.737864077669903e-05, | |
| "loss": 0.5931, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.30074276186145216, | |
| "grad_norm": 1.0284857540916943, | |
| "learning_rate": 4.8155339805825245e-05, | |
| "loss": 0.5932, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.30559345156889495, | |
| "grad_norm": 1.1698103896183938, | |
| "learning_rate": 4.8932038834951454e-05, | |
| "loss": 0.5948, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.31044414127633774, | |
| "grad_norm": 1.434142725222452, | |
| "learning_rate": 4.970873786407767e-05, | |
| "loss": 0.5897, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3152948309837805, | |
| "grad_norm": 1.3482179068151203, | |
| "learning_rate": 5.0485436893203895e-05, | |
| "loss": 0.5824, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.32014552069122326, | |
| "grad_norm": 1.3069227496554443, | |
| "learning_rate": 5.1262135922330105e-05, | |
| "loss": 0.5802, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.32499621039866605, | |
| "grad_norm": 1.1366236056516827, | |
| "learning_rate": 5.203883495145632e-05, | |
| "loss": 0.5804, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.32984690010610884, | |
| "grad_norm": 1.6243350603336242, | |
| "learning_rate": 5.281553398058253e-05, | |
| "loss": 0.5812, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.33469758981355163, | |
| "grad_norm": 1.0822457196908746, | |
| "learning_rate": 5.359223300970875e-05, | |
| "loss": 0.5862, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.33954827952099437, | |
| "grad_norm": 1.6360957248140573, | |
| "learning_rate": 5.436893203883496e-05, | |
| "loss": 0.5912, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.34439896922843716, | |
| "grad_norm": 2.3477845490488813, | |
| "learning_rate": 5.5145631067961174e-05, | |
| "loss": 0.5906, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.34924965893587995, | |
| "grad_norm": 1.105543560593242, | |
| "learning_rate": 5.5922330097087384e-05, | |
| "loss": 0.5824, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.35410034864332274, | |
| "grad_norm": 4.137098681881185, | |
| "learning_rate": 5.66990291262136e-05, | |
| "loss": 0.6359, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3589510383507655, | |
| "grad_norm": 3.8847979837997033, | |
| "learning_rate": 5.747572815533981e-05, | |
| "loss": 0.6486, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.36380172805820826, | |
| "grad_norm": 1.1747128429519862, | |
| "learning_rate": 5.825242718446603e-05, | |
| "loss": 0.595, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.36865241776565105, | |
| "grad_norm": 3.008245900701061, | |
| "learning_rate": 5.902912621359224e-05, | |
| "loss": 0.6387, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.37350310747309384, | |
| "grad_norm": 2.3713075132931554, | |
| "learning_rate": 5.9805825242718454e-05, | |
| "loss": 0.6344, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.37835379718053663, | |
| "grad_norm": 1.6213053074921984, | |
| "learning_rate": 6.0582524271844664e-05, | |
| "loss": 0.6048, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.38320448688797937, | |
| "grad_norm": 1.6024154837501339, | |
| "learning_rate": 6.135922330097087e-05, | |
| "loss": 0.6203, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.38805517659542216, | |
| "grad_norm": 1.1825030677591377, | |
| "learning_rate": 6.213592233009709e-05, | |
| "loss": 0.6052, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.39290586630286495, | |
| "grad_norm": 1.3964525731128163, | |
| "learning_rate": 6.291262135922331e-05, | |
| "loss": 0.6112, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.39775655601030774, | |
| "grad_norm": 1.1205074621871551, | |
| "learning_rate": 6.368932038834952e-05, | |
| "loss": 0.5977, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.4026072457177505, | |
| "grad_norm": 1.045620374565707, | |
| "learning_rate": 6.446601941747573e-05, | |
| "loss": 0.5914, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.40745793542519326, | |
| "grad_norm": 1.3974586249408472, | |
| "learning_rate": 6.524271844660194e-05, | |
| "loss": 0.5918, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.41230862513263605, | |
| "grad_norm": 1.0818483302602913, | |
| "learning_rate": 6.601941747572816e-05, | |
| "loss": 0.5948, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.41715931484007884, | |
| "grad_norm": 0.9808456957793906, | |
| "learning_rate": 6.679611650485438e-05, | |
| "loss": 0.5839, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.4220100045475216, | |
| "grad_norm": 1.2035779456517084, | |
| "learning_rate": 6.757281553398058e-05, | |
| "loss": 0.5833, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.42686069425496437, | |
| "grad_norm": 1.6887623926979713, | |
| "learning_rate": 6.83495145631068e-05, | |
| "loss": 0.5818, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.43171138396240716, | |
| "grad_norm": 0.8023218391013366, | |
| "learning_rate": 6.912621359223301e-05, | |
| "loss": 0.5863, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.43656207366984995, | |
| "grad_norm": 1.574900348178855, | |
| "learning_rate": 6.990291262135923e-05, | |
| "loss": 0.5821, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4414127633772927, | |
| "grad_norm": 0.9288518542917786, | |
| "learning_rate": 7.067961165048545e-05, | |
| "loss": 0.5814, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.4462634530847355, | |
| "grad_norm": 1.6871845487045471, | |
| "learning_rate": 7.145631067961166e-05, | |
| "loss": 0.5819, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.45111414279217826, | |
| "grad_norm": 1.147490028185953, | |
| "learning_rate": 7.223300970873787e-05, | |
| "loss": 0.5752, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.45596483249962105, | |
| "grad_norm": 1.0734179177901382, | |
| "learning_rate": 7.300970873786408e-05, | |
| "loss": 0.5786, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.46081552220706384, | |
| "grad_norm": 1.2263367009960806, | |
| "learning_rate": 7.37864077669903e-05, | |
| "loss": 0.5789, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.4656662119145066, | |
| "grad_norm": 1.4570032389620742, | |
| "learning_rate": 7.456310679611652e-05, | |
| "loss": 0.5745, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.47051690162194937, | |
| "grad_norm": 1.3246870726440927, | |
| "learning_rate": 7.533980582524272e-05, | |
| "loss": 0.5775, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.47536759132939216, | |
| "grad_norm": 0.9415857506868542, | |
| "learning_rate": 7.611650485436894e-05, | |
| "loss": 0.5699, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.48021828103683495, | |
| "grad_norm": 1.2384384474151087, | |
| "learning_rate": 7.689320388349515e-05, | |
| "loss": 0.5733, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.4850689707442777, | |
| "grad_norm": 1.5627749991572353, | |
| "learning_rate": 7.766990291262137e-05, | |
| "loss": 0.5735, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4899196604517205, | |
| "grad_norm": 1.0078484211944914, | |
| "learning_rate": 7.844660194174757e-05, | |
| "loss": 0.5733, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.49477035015916326, | |
| "grad_norm": 1.6421211712488573, | |
| "learning_rate": 7.922330097087379e-05, | |
| "loss": 0.576, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.49962103986660605, | |
| "grad_norm": 0.8416126904816602, | |
| "learning_rate": 8e-05, | |
| "loss": 0.5697, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5044717295740488, | |
| "grad_norm": 1.7540458195736903, | |
| "learning_rate": 7.999977029531286e-05, | |
| "loss": 0.5799, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5093224192814916, | |
| "grad_norm": 1.050447231844734, | |
| "learning_rate": 7.999908118388965e-05, | |
| "loss": 0.5756, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5141731089889343, | |
| "grad_norm": 1.394330877960354, | |
| "learning_rate": 7.999793267364497e-05, | |
| "loss": 0.5713, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5190237986963772, | |
| "grad_norm": 1.5873678777006228, | |
| "learning_rate": 7.999632477776974e-05, | |
| "loss": 0.5733, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5238744884038199, | |
| "grad_norm": 0.8115732123781836, | |
| "learning_rate": 7.9994257514731e-05, | |
| "loss": 0.5661, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5287251781112627, | |
| "grad_norm": 1.1857754936411384, | |
| "learning_rate": 7.999173090827177e-05, | |
| "loss": 0.5719, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5335758678187055, | |
| "grad_norm": 0.8066157800150973, | |
| "learning_rate": 7.998874498741072e-05, | |
| "loss": 0.5695, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5384265575261482, | |
| "grad_norm": 1.4739854945603235, | |
| "learning_rate": 7.998529978644183e-05, | |
| "loss": 0.5712, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.543277247233591, | |
| "grad_norm": 0.778875512416499, | |
| "learning_rate": 7.998139534493407e-05, | |
| "loss": 0.5609, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5481279369410338, | |
| "grad_norm": 0.9919152170469479, | |
| "learning_rate": 7.997703170773084e-05, | |
| "loss": 0.5648, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.5529786266484766, | |
| "grad_norm": 1.3093959851041357, | |
| "learning_rate": 7.997220892494955e-05, | |
| "loss": 0.5757, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5578293163559194, | |
| "grad_norm": 0.9066117120369992, | |
| "learning_rate": 7.996692705198097e-05, | |
| "loss": 0.566, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5626800060633621, | |
| "grad_norm": 1.2498381901490132, | |
| "learning_rate": 7.996118614948869e-05, | |
| "loss": 0.5757, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.567530695770805, | |
| "grad_norm": 1.0385189418340641, | |
| "learning_rate": 7.995498628340827e-05, | |
| "loss": 0.5697, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5723813854782477, | |
| "grad_norm": 1.3065564110858372, | |
| "learning_rate": 7.994832752494667e-05, | |
| "loss": 0.5672, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5772320751856904, | |
| "grad_norm": 0.9524061899396458, | |
| "learning_rate": 7.994120995058127e-05, | |
| "loss": 0.5624, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5820827648931333, | |
| "grad_norm": 1.018979775317657, | |
| "learning_rate": 7.993363364205907e-05, | |
| "loss": 0.554, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.586933454600576, | |
| "grad_norm": 0.9276898951496916, | |
| "learning_rate": 7.992559868639576e-05, | |
| "loss": 0.556, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.5917841443080188, | |
| "grad_norm": 1.1930439188526805, | |
| "learning_rate": 7.99171051758747e-05, | |
| "loss": 0.5526, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5966348340154616, | |
| "grad_norm": 0.6826863389197481, | |
| "learning_rate": 7.990815320804583e-05, | |
| "loss": 0.5609, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6014855237229043, | |
| "grad_norm": 0.8061980858751947, | |
| "learning_rate": 7.98987428857246e-05, | |
| "loss": 0.5586, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6063362134303472, | |
| "grad_norm": 0.9239948149603757, | |
| "learning_rate": 7.988887431699079e-05, | |
| "loss": 0.5507, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6111869031377899, | |
| "grad_norm": 1.0465957889843347, | |
| "learning_rate": 7.987854761518719e-05, | |
| "loss": 0.5568, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6160375928452326, | |
| "grad_norm": 0.8308130357408615, | |
| "learning_rate": 7.986776289891842e-05, | |
| "loss": 0.5591, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6208882825526755, | |
| "grad_norm": 1.0246744766037437, | |
| "learning_rate": 7.985652029204946e-05, | |
| "loss": 0.5563, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6257389722601182, | |
| "grad_norm": 1.3238612418839921, | |
| "learning_rate": 7.984481992370429e-05, | |
| "loss": 0.5491, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.630589661967561, | |
| "grad_norm": 0.6948258976249133, | |
| "learning_rate": 7.983266192826437e-05, | |
| "loss": 0.5418, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6354403516750038, | |
| "grad_norm": 0.7288790333090353, | |
| "learning_rate": 7.982004644536716e-05, | |
| "loss": 0.5441, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.6402910413824465, | |
| "grad_norm": 1.0943204811023435, | |
| "learning_rate": 7.98069736199044e-05, | |
| "loss": 0.5493, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.6451417310898894, | |
| "grad_norm": 0.8706827515570799, | |
| "learning_rate": 7.979344360202055e-05, | |
| "loss": 0.5465, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.6499924207973321, | |
| "grad_norm": 0.9127380693761118, | |
| "learning_rate": 7.977945654711108e-05, | |
| "loss": 0.5475, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.654843110504775, | |
| "grad_norm": 0.9510188119086359, | |
| "learning_rate": 7.976501261582056e-05, | |
| "loss": 0.543, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.6596938002122177, | |
| "grad_norm": 0.9350726421156861, | |
| "learning_rate": 7.975011197404092e-05, | |
| "loss": 0.5525, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6645444899196604, | |
| "grad_norm": 1.272258773056705, | |
| "learning_rate": 7.973475479290956e-05, | |
| "loss": 0.5518, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6693951796271033, | |
| "grad_norm": 0.9765265325518906, | |
| "learning_rate": 7.971894124880727e-05, | |
| "loss": 0.5417, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.674245869334546, | |
| "grad_norm": 1.1917501609756302, | |
| "learning_rate": 7.970267152335632e-05, | |
| "loss": 0.5464, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.6790965590419887, | |
| "grad_norm": 0.6719257463868904, | |
| "learning_rate": 7.968594580341832e-05, | |
| "loss": 0.544, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6839472487494316, | |
| "grad_norm": 0.7137208028607956, | |
| "learning_rate": 7.966876428109209e-05, | |
| "loss": 0.5351, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.6887979384568743, | |
| "grad_norm": 0.7449688014358767, | |
| "learning_rate": 7.965112715371144e-05, | |
| "loss": 0.5397, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6936486281643172, | |
| "grad_norm": 0.5377305872108858, | |
| "learning_rate": 7.96330346238429e-05, | |
| "loss": 0.5346, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.6984993178717599, | |
| "grad_norm": 0.5856757070627496, | |
| "learning_rate": 7.961448689928341e-05, | |
| "loss": 0.5395, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7033500075792026, | |
| "grad_norm": 0.6000325566823206, | |
| "learning_rate": 7.959548419305796e-05, | |
| "loss": 0.5447, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.7082006972866455, | |
| "grad_norm": 0.7819963257560868, | |
| "learning_rate": 7.957602672341707e-05, | |
| "loss": 0.5364, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.7130513869940882, | |
| "grad_norm": 1.1223449508846108, | |
| "learning_rate": 7.955611471383433e-05, | |
| "loss": 0.5381, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.717902076701531, | |
| "grad_norm": 1.1418981667975974, | |
| "learning_rate": 7.953574839300385e-05, | |
| "loss": 0.5381, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.7227527664089738, | |
| "grad_norm": 0.758286759296052, | |
| "learning_rate": 7.95149279948376e-05, | |
| "loss": 0.5398, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.7276034561164165, | |
| "grad_norm": 0.7637204957772546, | |
| "learning_rate": 7.949365375846271e-05, | |
| "loss": 0.5386, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.7324541458238594, | |
| "grad_norm": 0.6982030938329856, | |
| "learning_rate": 7.94719259282188e-05, | |
| "loss": 0.5328, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.7373048355313021, | |
| "grad_norm": 0.7115887055025976, | |
| "learning_rate": 7.944974475365506e-05, | |
| "loss": 0.5406, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.7421555252387448, | |
| "grad_norm": 0.6914212445412167, | |
| "learning_rate": 7.94271104895275e-05, | |
| "loss": 0.5375, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.7470062149461877, | |
| "grad_norm": 0.6376946136665823, | |
| "learning_rate": 7.940402339579596e-05, | |
| "loss": 0.5322, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.7518569046536304, | |
| "grad_norm": 0.662468788270689, | |
| "learning_rate": 7.93804837376211e-05, | |
| "loss": 0.5312, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.7567075943610733, | |
| "grad_norm": 0.8116591959883654, | |
| "learning_rate": 7.935649178536142e-05, | |
| "loss": 0.5362, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.761558284068516, | |
| "grad_norm": 1.2251954995336705, | |
| "learning_rate": 7.93320478145701e-05, | |
| "loss": 0.5454, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.7664089737759587, | |
| "grad_norm": 1.1753293382340935, | |
| "learning_rate": 7.93071521059919e-05, | |
| "loss": 0.5369, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.7712596634834016, | |
| "grad_norm": 0.5797209510428332, | |
| "learning_rate": 7.928180494555983e-05, | |
| "loss": 0.5255, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.7761103531908443, | |
| "grad_norm": 0.9260629876609666, | |
| "learning_rate": 7.925600662439201e-05, | |
| "loss": 0.535, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.780961042898287, | |
| "grad_norm": 1.2828815170548864, | |
| "learning_rate": 7.922975743878817e-05, | |
| "loss": 0.5293, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.7858117326057299, | |
| "grad_norm": 0.5171701751512903, | |
| "learning_rate": 7.92030576902264e-05, | |
| "loss": 0.5298, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.7906624223131726, | |
| "grad_norm": 0.9531992162184508, | |
| "learning_rate": 7.917590768535952e-05, | |
| "loss": 0.5315, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.7955131120206155, | |
| "grad_norm": 1.1106333536493764, | |
| "learning_rate": 7.914830773601173e-05, | |
| "loss": 0.5279, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.8003638017280582, | |
| "grad_norm": 0.45675301699833715, | |
| "learning_rate": 7.912025815917489e-05, | |
| "loss": 0.5372, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.805214491435501, | |
| "grad_norm": 0.7189143844280501, | |
| "learning_rate": 7.909175927700499e-05, | |
| "loss": 0.535, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.8100651811429438, | |
| "grad_norm": 0.8686823215812186, | |
| "learning_rate": 7.906281141681839e-05, | |
| "loss": 0.5333, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.8149158708503865, | |
| "grad_norm": 0.588440050652225, | |
| "learning_rate": 7.903341491108798e-05, | |
| "loss": 0.5289, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.8197665605578294, | |
| "grad_norm": 0.563873615495661, | |
| "learning_rate": 7.900357009743958e-05, | |
| "loss": 0.5331, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.8246172502652721, | |
| "grad_norm": 0.545022127050129, | |
| "learning_rate": 7.897327731864784e-05, | |
| "loss": 0.5266, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8294679399727148, | |
| "grad_norm": 0.5592040867673563, | |
| "learning_rate": 7.894253692263244e-05, | |
| "loss": 0.522, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.8343186296801577, | |
| "grad_norm": 0.6324827822327501, | |
| "learning_rate": 7.891134926245402e-05, | |
| "loss": 0.5297, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.8391693193876004, | |
| "grad_norm": 0.8244822385641454, | |
| "learning_rate": 7.887971469631016e-05, | |
| "loss": 0.5319, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.8440200090950432, | |
| "grad_norm": 1.2087031591715138, | |
| "learning_rate": 7.884763358753129e-05, | |
| "loss": 0.5408, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.848870698802486, | |
| "grad_norm": 0.7427926592130248, | |
| "learning_rate": 7.881510630457643e-05, | |
| "loss": 0.5326, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.8537213885099287, | |
| "grad_norm": 0.5074413186000275, | |
| "learning_rate": 7.878213322102908e-05, | |
| "loss": 0.5281, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.8585720782173716, | |
| "grad_norm": 0.7191422226961306, | |
| "learning_rate": 7.874871471559282e-05, | |
| "loss": 0.5269, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.8634227679248143, | |
| "grad_norm": 0.7351841845171684, | |
| "learning_rate": 7.8714851172087e-05, | |
| "loss": 0.5326, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.868273457632257, | |
| "grad_norm": 0.661668481852005, | |
| "learning_rate": 7.868054297944237e-05, | |
| "loss": 0.5312, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.8731241473396999, | |
| "grad_norm": 0.6257843185205204, | |
| "learning_rate": 7.864579053169657e-05, | |
| "loss": 0.5265, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8779748370471426, | |
| "grad_norm": 0.6272508186215946, | |
| "learning_rate": 7.86105942279896e-05, | |
| "loss": 0.5242, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.8828255267545854, | |
| "grad_norm": 0.5859524441087289, | |
| "learning_rate": 7.857495447255925e-05, | |
| "loss": 0.5117, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.8876762164620282, | |
| "grad_norm": 0.5314342493506371, | |
| "learning_rate": 7.853887167473646e-05, | |
| "loss": 0.5275, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.892526906169471, | |
| "grad_norm": 0.5894396606474899, | |
| "learning_rate": 7.850234624894064e-05, | |
| "loss": 0.5236, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.8973775958769138, | |
| "grad_norm": 0.7789309413273731, | |
| "learning_rate": 7.846537861467485e-05, | |
| "loss": 0.5269, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.9022282855843565, | |
| "grad_norm": 0.9749399305462054, | |
| "learning_rate": 7.842796919652104e-05, | |
| "loss": 0.5177, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.9070789752917993, | |
| "grad_norm": 0.8944031183061603, | |
| "learning_rate": 7.839011842413514e-05, | |
| "loss": 0.5236, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.9119296649992421, | |
| "grad_norm": 0.6072136307213187, | |
| "learning_rate": 7.835182673224212e-05, | |
| "loss": 0.5237, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.9167803547066848, | |
| "grad_norm": 0.44815995922956803, | |
| "learning_rate": 7.831309456063107e-05, | |
| "loss": 0.5193, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.9216310444141277, | |
| "grad_norm": 0.5362363058315522, | |
| "learning_rate": 7.827392235415005e-05, | |
| "loss": 0.5242, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.9264817341215704, | |
| "grad_norm": 0.5675209533538658, | |
| "learning_rate": 7.823431056270103e-05, | |
| "loss": 0.5223, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.9313324238290132, | |
| "grad_norm": 0.5087214285921436, | |
| "learning_rate": 7.81942596412347e-05, | |
| "loss": 0.522, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.936183113536456, | |
| "grad_norm": 0.46687413642954234, | |
| "learning_rate": 7.815377004974532e-05, | |
| "loss": 0.509, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.9410338032438987, | |
| "grad_norm": 0.43127006261715695, | |
| "learning_rate": 7.811284225326529e-05, | |
| "loss": 0.522, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.9458844929513415, | |
| "grad_norm": 0.43502529088952246, | |
| "learning_rate": 7.807147672185996e-05, | |
| "loss": 0.5258, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.9507351826587843, | |
| "grad_norm": 0.5247431038277521, | |
| "learning_rate": 7.802967393062219e-05, | |
| "loss": 0.524, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.955585872366227, | |
| "grad_norm": 0.6873590048364063, | |
| "learning_rate": 7.798743435966676e-05, | |
| "loss": 0.5227, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.9604365620736699, | |
| "grad_norm": 0.8978763261543932, | |
| "learning_rate": 7.794475849412512e-05, | |
| "loss": 0.5143, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.9652872517811126, | |
| "grad_norm": 0.8491329782406589, | |
| "learning_rate": 7.790164682413954e-05, | |
| "loss": 0.5186, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.9701379414885554, | |
| "grad_norm": 0.5004070361840594, | |
| "learning_rate": 7.785809984485765e-05, | |
| "loss": 0.5185, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.9749886311959982, | |
| "grad_norm": 0.4620691718653308, | |
| "learning_rate": 7.781411805642675e-05, | |
| "loss": 0.5179, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.979839320903441, | |
| "grad_norm": 0.6078145800613444, | |
| "learning_rate": 7.776970196398795e-05, | |
| "loss": 0.5185, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.9846900106108837, | |
| "grad_norm": 0.6744449960922595, | |
| "learning_rate": 7.77248520776705e-05, | |
| "loss": 0.5223, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.9895407003183265, | |
| "grad_norm": 0.769858159356013, | |
| "learning_rate": 7.767956891258585e-05, | |
| "loss": 0.514, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.9943913900257693, | |
| "grad_norm": 0.9703823352556248, | |
| "learning_rate": 7.763385298882177e-05, | |
| "loss": 0.5227, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.9992420797332121, | |
| "grad_norm": 1.0790500827883502, | |
| "learning_rate": 7.758770483143634e-05, | |
| "loss": 0.5161, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.0048506897074427, | |
| "grad_norm": 0.979399858326228, | |
| "learning_rate": 7.754112497045198e-05, | |
| "loss": 0.5126, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.0097013794148855, | |
| "grad_norm": 0.8061955074549858, | |
| "learning_rate": 7.749411394084931e-05, | |
| "loss": 0.5093, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.0145520691223284, | |
| "grad_norm": 0.769561002371883, | |
| "learning_rate": 7.744667228256102e-05, | |
| "loss": 0.5129, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.0194027588297712, | |
| "grad_norm": 0.7563794216226459, | |
| "learning_rate": 7.739880054046567e-05, | |
| "loss": 0.504, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.024253448537214, | |
| "grad_norm": 0.4861095042439798, | |
| "learning_rate": 7.735049926438143e-05, | |
| "loss": 0.5008, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.0291041382446566, | |
| "grad_norm": 0.521763707621908, | |
| "learning_rate": 7.730176900905978e-05, | |
| "loss": 0.5039, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.0339548279520994, | |
| "grad_norm": 0.6847654097095645, | |
| "learning_rate": 7.725261033417914e-05, | |
| "loss": 0.4987, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.0388055176595423, | |
| "grad_norm": 0.45617035587869154, | |
| "learning_rate": 7.720302380433838e-05, | |
| "loss": 0.5082, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.043656207366985, | |
| "grad_norm": 0.5136841747695677, | |
| "learning_rate": 7.715300998905045e-05, | |
| "loss": 0.4903, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.0485068970744278, | |
| "grad_norm": 0.5201029165395914, | |
| "learning_rate": 7.710256946273572e-05, | |
| "loss": 0.5061, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.0533575867818705, | |
| "grad_norm": 0.5369451831716502, | |
| "learning_rate": 7.705170280471546e-05, | |
| "loss": 0.4923, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.0582082764893133, | |
| "grad_norm": 0.5292650633659572, | |
| "learning_rate": 7.700041059920516e-05, | |
| "loss": 0.4958, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.0630589661967562, | |
| "grad_norm": 0.38347803199441816, | |
| "learning_rate": 7.694869343530781e-05, | |
| "loss": 0.4949, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.067909655904199, | |
| "grad_norm": 0.42205008813365347, | |
| "learning_rate": 7.689655190700719e-05, | |
| "loss": 0.4958, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.0727603456116417, | |
| "grad_norm": 0.43326896920779806, | |
| "learning_rate": 7.684398661316092e-05, | |
| "loss": 0.5034, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.0776110353190844, | |
| "grad_norm": 0.3610486041319715, | |
| "learning_rate": 7.679099815749377e-05, | |
| "loss": 0.508, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.0824617250265272, | |
| "grad_norm": 0.4631910613487445, | |
| "learning_rate": 7.673758714859052e-05, | |
| "loss": 0.5012, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.08731241473397, | |
| "grad_norm": 0.5933537887890846, | |
| "learning_rate": 7.668375419988918e-05, | |
| "loss": 0.5003, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.0921631044414128, | |
| "grad_norm": 0.7536616463697116, | |
| "learning_rate": 7.662949992967375e-05, | |
| "loss": 0.4984, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.0970137941488556, | |
| "grad_norm": 0.8908783317227219, | |
| "learning_rate": 7.657482496106725e-05, | |
| "loss": 0.499, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.1018644838562983, | |
| "grad_norm": 0.8949365262495667, | |
| "learning_rate": 7.651972992202449e-05, | |
| "loss": 0.4964, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.106715173563741, | |
| "grad_norm": 0.7666080530207662, | |
| "learning_rate": 7.646421544532492e-05, | |
| "loss": 0.501, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.1115658632711838, | |
| "grad_norm": 0.5974548399149405, | |
| "learning_rate": 7.640828216856532e-05, | |
| "loss": 0.5019, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.1164165529786267, | |
| "grad_norm": 0.5208576739553936, | |
| "learning_rate": 7.635193073415246e-05, | |
| "loss": 0.4954, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.1212672426860695, | |
| "grad_norm": 0.4449218126901089, | |
| "learning_rate": 7.62951617892958e-05, | |
| "loss": 0.4966, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.1261179323935122, | |
| "grad_norm": 0.3413546355163314, | |
| "learning_rate": 7.623797598599995e-05, | |
| "loss": 0.4869, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.130968622100955, | |
| "grad_norm": 0.4021494906350838, | |
| "learning_rate": 7.618037398105728e-05, | |
| "loss": 0.4876, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.1358193118083977, | |
| "grad_norm": 0.5401093810240136, | |
| "learning_rate": 7.612235643604031e-05, | |
| "loss": 0.495, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.1406700015158404, | |
| "grad_norm": 0.6450843987922678, | |
| "learning_rate": 7.606392401729415e-05, | |
| "loss": 0.4953, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.1455206912232834, | |
| "grad_norm": 0.6413460171528268, | |
| "learning_rate": 7.600507739592879e-05, | |
| "loss": 0.4972, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.150371380930726, | |
| "grad_norm": 0.6276158556341717, | |
| "learning_rate": 7.594581724781152e-05, | |
| "loss": 0.4957, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.1552220706381688, | |
| "grad_norm": 0.7553018024510589, | |
| "learning_rate": 7.588614425355898e-05, | |
| "loss": 0.4955, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.1600727603456116, | |
| "grad_norm": 0.8968837770832118, | |
| "learning_rate": 7.582605909852951e-05, | |
| "loss": 0.4937, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.1649234500530543, | |
| "grad_norm": 0.7618870978199445, | |
| "learning_rate": 7.576556247281522e-05, | |
| "loss": 0.4969, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.1697741397604973, | |
| "grad_norm": 0.6531717186517569, | |
| "learning_rate": 7.570465507123401e-05, | |
| "loss": 0.5042, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.17462482946794, | |
| "grad_norm": 0.591515239054212, | |
| "learning_rate": 7.564333759332167e-05, | |
| "loss": 0.4906, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.1794755191753827, | |
| "grad_norm": 0.49797581923871925, | |
| "learning_rate": 7.558161074332379e-05, | |
| "loss": 0.4966, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.1843262088828255, | |
| "grad_norm": 0.5865910810734263, | |
| "learning_rate": 7.551947523018774e-05, | |
| "loss": 0.4997, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.1891768985902682, | |
| "grad_norm": 0.5003536808662635, | |
| "learning_rate": 7.54569317675544e-05, | |
| "loss": 0.4954, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.1940275882977112, | |
| "grad_norm": 0.4328856052217075, | |
| "learning_rate": 7.539398107375015e-05, | |
| "loss": 0.4979, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.198878278005154, | |
| "grad_norm": 0.5623661241326378, | |
| "learning_rate": 7.533062387177843e-05, | |
| "loss": 0.4982, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.2037289677125966, | |
| "grad_norm": 0.542587238982675, | |
| "learning_rate": 7.526686088931156e-05, | |
| "loss": 0.4991, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.2085796574200394, | |
| "grad_norm": 0.4949553127282243, | |
| "learning_rate": 7.520269285868235e-05, | |
| "loss": 0.4908, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.213430347127482, | |
| "grad_norm": 0.39917468694971237, | |
| "learning_rate": 7.513812051687564e-05, | |
| "loss": 0.4917, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.218281036834925, | |
| "grad_norm": 0.44831825611716425, | |
| "learning_rate": 7.507314460551993e-05, | |
| "loss": 0.4898, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.2231317265423678, | |
| "grad_norm": 0.4694728642442923, | |
| "learning_rate": 7.500776587087878e-05, | |
| "loss": 0.4929, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.2279824162498105, | |
| "grad_norm": 0.4548469944052975, | |
| "learning_rate": 7.494198506384229e-05, | |
| "loss": 0.4826, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.2328331059572533, | |
| "grad_norm": 0.6666754202357293, | |
| "learning_rate": 7.487580293991844e-05, | |
| "loss": 0.5021, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.237683795664696, | |
| "grad_norm": 0.6791995766586792, | |
| "learning_rate": 7.480922025922443e-05, | |
| "loss": 0.4974, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.242534485372139, | |
| "grad_norm": 0.48795487898431833, | |
| "learning_rate": 7.474223778647796e-05, | |
| "loss": 0.4934, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.2473851750795817, | |
| "grad_norm": 0.32007742081193336, | |
| "learning_rate": 7.467485629098842e-05, | |
| "loss": 0.4907, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.2522358647870244, | |
| "grad_norm": 0.2948779105297471, | |
| "learning_rate": 7.460707654664807e-05, | |
| "loss": 0.4974, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.2570865544944672, | |
| "grad_norm": 0.2954711132741566, | |
| "learning_rate": 7.453889933192316e-05, | |
| "loss": 0.4893, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.26193724420191, | |
| "grad_norm": 0.3234156565448516, | |
| "learning_rate": 7.447032542984502e-05, | |
| "loss": 0.4882, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.2667879339093528, | |
| "grad_norm": 0.37736907562809757, | |
| "learning_rate": 7.440135562800093e-05, | |
| "loss": 0.487, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.2716386236167956, | |
| "grad_norm": 0.4380964102542513, | |
| "learning_rate": 7.433199071852526e-05, | |
| "loss": 0.4965, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.2764893133242383, | |
| "grad_norm": 0.5377640268616285, | |
| "learning_rate": 7.426223149809023e-05, | |
| "loss": 0.4922, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.281340003031681, | |
| "grad_norm": 0.5447192325063156, | |
| "learning_rate": 7.419207876789685e-05, | |
| "loss": 0.4844, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.2861906927391238, | |
| "grad_norm": 0.5569387883590096, | |
| "learning_rate": 7.412153333366567e-05, | |
| "loss": 0.4887, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.2910413824465667, | |
| "grad_norm": 0.6127559785319273, | |
| "learning_rate": 7.405059600562751e-05, | |
| "loss": 0.4974, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.2958920721540095, | |
| "grad_norm": 0.5372861059475705, | |
| "learning_rate": 7.397926759851425e-05, | |
| "loss": 0.4946, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.3007427618614522, | |
| "grad_norm": 0.3715441206816088, | |
| "learning_rate": 7.390754893154933e-05, | |
| "loss": 0.4914, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.305593451568895, | |
| "grad_norm": 0.3468457624853246, | |
| "learning_rate": 7.383544082843846e-05, | |
| "loss": 0.4906, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.3104441412763377, | |
| "grad_norm": 0.37917435116867076, | |
| "learning_rate": 7.376294411736009e-05, | |
| "loss": 0.4877, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.3152948309837806, | |
| "grad_norm": 0.44133896758740837, | |
| "learning_rate": 7.369005963095596e-05, | |
| "loss": 0.4962, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.3201455206912232, | |
| "grad_norm": 0.5453886771483799, | |
| "learning_rate": 7.361678820632145e-05, | |
| "loss": 0.4918, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.324996210398666, | |
| "grad_norm": 0.6036321305780495, | |
| "learning_rate": 7.354313068499607e-05, | |
| "loss": 0.4892, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.3298469001061088, | |
| "grad_norm": 0.620838391242342, | |
| "learning_rate": 7.346908791295369e-05, | |
| "loss": 0.495, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.3346975898135516, | |
| "grad_norm": 0.5848284090531766, | |
| "learning_rate": 7.339466074059292e-05, | |
| "loss": 0.4862, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.3395482795209943, | |
| "grad_norm": 0.5915616847361488, | |
| "learning_rate": 7.331985002272726e-05, | |
| "loss": 0.4986, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.344398969228437, | |
| "grad_norm": 0.5871009859314463, | |
| "learning_rate": 7.324465661857534e-05, | |
| "loss": 0.4868, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.34924965893588, | |
| "grad_norm": 0.4325881770640798, | |
| "learning_rate": 7.316908139175105e-05, | |
| "loss": 0.4886, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.3541003486433227, | |
| "grad_norm": 0.342133338929163, | |
| "learning_rate": 7.309312521025356e-05, | |
| "loss": 0.4909, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.3589510383507655, | |
| "grad_norm": 0.4415991996544984, | |
| "learning_rate": 7.301678894645742e-05, | |
| "loss": 0.4915, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.3638017280582082, | |
| "grad_norm": 0.510599069072842, | |
| "learning_rate": 7.294007347710251e-05, | |
| "loss": 0.4935, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.368652417765651, | |
| "grad_norm": 0.5220646005978747, | |
| "learning_rate": 7.286297968328397e-05, | |
| "loss": 0.4834, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.373503107473094, | |
| "grad_norm": 0.4836929320122624, | |
| "learning_rate": 7.27855084504421e-05, | |
| "loss": 0.4953, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.3783537971805366, | |
| "grad_norm": 0.4678184328008887, | |
| "learning_rate": 7.270766066835217e-05, | |
| "loss": 0.4872, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.3832044868879794, | |
| "grad_norm": 0.4479819209137706, | |
| "learning_rate": 7.262943723111419e-05, | |
| "loss": 0.4916, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.388055176595422, | |
| "grad_norm": 0.44661460126650243, | |
| "learning_rate": 7.255083903714266e-05, | |
| "loss": 0.4866, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.3929058663028648, | |
| "grad_norm": 0.46700839086853646, | |
| "learning_rate": 7.247186698915625e-05, | |
| "loss": 0.4879, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.3977565560103078, | |
| "grad_norm": 0.5111356334340853, | |
| "learning_rate": 7.239252199416749e-05, | |
| "loss": 0.4812, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.4026072457177505, | |
| "grad_norm": 0.5198819509993116, | |
| "learning_rate": 7.23128049634722e-05, | |
| "loss": 0.4809, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.4074579354251933, | |
| "grad_norm": 0.5071119674882796, | |
| "learning_rate": 7.223271681263916e-05, | |
| "loss": 0.4839, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.412308625132636, | |
| "grad_norm": 0.4871736388487796, | |
| "learning_rate": 7.215225846149957e-05, | |
| "loss": 0.4899, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.4171593148400787, | |
| "grad_norm": 0.4644477177967194, | |
| "learning_rate": 7.207143083413643e-05, | |
| "loss": 0.4865, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.4220100045475217, | |
| "grad_norm": 0.5305010693129898, | |
| "learning_rate": 7.1990234858874e-05, | |
| "loss": 0.4876, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.4268606942549644, | |
| "grad_norm": 0.5381812012245146, | |
| "learning_rate": 7.190867146826707e-05, | |
| "loss": 0.4936, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.4317113839624072, | |
| "grad_norm": 0.42926452605809334, | |
| "learning_rate": 7.182674159909031e-05, | |
| "loss": 0.4845, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.43656207366985, | |
| "grad_norm": 0.42959757228135126, | |
| "learning_rate": 7.174444619232745e-05, | |
| "loss": 0.4952, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.4414127633772926, | |
| "grad_norm": 0.4994771109955089, | |
| "learning_rate": 7.166178619316056e-05, | |
| "loss": 0.4912, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.4462634530847356, | |
| "grad_norm": 0.4676921077663137, | |
| "learning_rate": 7.157876255095906e-05, | |
| "loss": 0.4875, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.4511141427921783, | |
| "grad_norm": 0.430209471088095, | |
| "learning_rate": 7.149537621926895e-05, | |
| "loss": 0.4862, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.455964832499621, | |
| "grad_norm": 0.5267214852499816, | |
| "learning_rate": 7.14116281558018e-05, | |
| "loss": 0.4879, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.4608155222070638, | |
| "grad_norm": 0.5169308603441447, | |
| "learning_rate": 7.132751932242376e-05, | |
| "loss": 0.4984, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.4656662119145065, | |
| "grad_norm": 0.3435076728807633, | |
| "learning_rate": 7.124305068514444e-05, | |
| "loss": 0.487, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.4705169016219495, | |
| "grad_norm": 0.2662958717194974, | |
| "learning_rate": 7.1158223214106e-05, | |
| "loss": 0.4878, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.4753675913293922, | |
| "grad_norm": 0.31770648802942325, | |
| "learning_rate": 7.107303788357177e-05, | |
| "loss": 0.4819, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.480218281036835, | |
| "grad_norm": 0.29599579651368213, | |
| "learning_rate": 7.098749567191527e-05, | |
| "loss": 0.4852, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.4850689707442777, | |
| "grad_norm": 0.3093179833213525, | |
| "learning_rate": 7.090159756160886e-05, | |
| "loss": 0.4877, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.4899196604517204, | |
| "grad_norm": 0.38971280696993216, | |
| "learning_rate": 7.081534453921242e-05, | |
| "loss": 0.4852, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.4947703501591634, | |
| "grad_norm": 0.3825128078756503, | |
| "learning_rate": 7.072873759536217e-05, | |
| "loss": 0.4913, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.499621039866606, | |
| "grad_norm": 0.2886624346515238, | |
| "learning_rate": 7.064177772475912e-05, | |
| "loss": 0.4798, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.5044717295740488, | |
| "grad_norm": 0.3712678777863381, | |
| "learning_rate": 7.05544659261578e-05, | |
| "loss": 0.4867, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.5093224192814916, | |
| "grad_norm": 0.42331161544955054, | |
| "learning_rate": 7.046680320235466e-05, | |
| "loss": 0.4871, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.5141731089889343, | |
| "grad_norm": 0.4018226592697959, | |
| "learning_rate": 7.037879056017663e-05, | |
| "loss": 0.4842, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.5190237986963773, | |
| "grad_norm": 0.3959692369182176, | |
| "learning_rate": 7.029042901046952e-05, | |
| "loss": 0.4802, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.5238744884038198, | |
| "grad_norm": 0.4052319536586186, | |
| "learning_rate": 7.020171956808645e-05, | |
| "loss": 0.4859, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.5287251781112627, | |
| "grad_norm": 0.40673400305291324, | |
| "learning_rate": 7.011266325187615e-05, | |
| "loss": 0.496, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.5335758678187055, | |
| "grad_norm": 0.44258718402573904, | |
| "learning_rate": 7.002326108467129e-05, | |
| "loss": 0.4864, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.5384265575261482, | |
| "grad_norm": 0.4022422201093314, | |
| "learning_rate": 6.993351409327672e-05, | |
| "loss": 0.4763, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.5432772472335912, | |
| "grad_norm": 0.39107692009497685, | |
| "learning_rate": 6.984342330845764e-05, | |
| "loss": 0.4952, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.5481279369410337, | |
| "grad_norm": 0.36401667233363455, | |
| "learning_rate": 6.975298976492785e-05, | |
| "loss": 0.4952, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.5529786266484766, | |
| "grad_norm": 0.3964209144895704, | |
| "learning_rate": 6.966221450133779e-05, | |
| "loss": 0.4901, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.5578293163559194, | |
| "grad_norm": 0.41002635948711413, | |
| "learning_rate": 6.957109856026261e-05, | |
| "loss": 0.4917, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.562680006063362, | |
| "grad_norm": 0.33043042887382146, | |
| "learning_rate": 6.94796429881903e-05, | |
| "loss": 0.4771, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.567530695770805, | |
| "grad_norm": 0.3258167342621945, | |
| "learning_rate": 6.938784883550948e-05, | |
| "loss": 0.4889, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.5723813854782476, | |
| "grad_norm": 0.33864970777234993, | |
| "learning_rate": 6.929571715649755e-05, | |
| "loss": 0.4866, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.5772320751856905, | |
| "grad_norm": 0.3799335436470155, | |
| "learning_rate": 6.920324900930842e-05, | |
| "loss": 0.4907, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.5820827648931333, | |
| "grad_norm": 0.38649667346458383, | |
| "learning_rate": 6.911044545596042e-05, | |
| "loss": 0.4854, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.586933454600576, | |
| "grad_norm": 0.3332457174533195, | |
| "learning_rate": 6.901730756232411e-05, | |
| "loss": 0.4895, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.591784144308019, | |
| "grad_norm": 0.3688581464993942, | |
| "learning_rate": 6.892383639811005e-05, | |
| "loss": 0.4958, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.5966348340154615, | |
| "grad_norm": 0.43127701691860393, | |
| "learning_rate": 6.883003303685644e-05, | |
| "loss": 0.4844, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.6014855237229044, | |
| "grad_norm": 0.5098788849460419, | |
| "learning_rate": 6.87358985559169e-05, | |
| "loss": 0.489, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.6063362134303472, | |
| "grad_norm": 0.531466303384909, | |
| "learning_rate": 6.864143403644797e-05, | |
| "loss": 0.4945, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.61118690313779, | |
| "grad_norm": 0.4575057116450561, | |
| "learning_rate": 6.85466405633968e-05, | |
| "loss": 0.4855, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.6160375928452326, | |
| "grad_norm": 0.42418027914564915, | |
| "learning_rate": 6.845151922548865e-05, | |
| "loss": 0.4783, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.6208882825526754, | |
| "grad_norm": 0.38431703300530295, | |
| "learning_rate": 6.835607111521439e-05, | |
| "loss": 0.4796, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.6257389722601183, | |
| "grad_norm": 0.325044588754403, | |
| "learning_rate": 6.826029732881793e-05, | |
| "loss": 0.4928, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.630589661967561, | |
| "grad_norm": 0.2723847494193817, | |
| "learning_rate": 6.816419896628363e-05, | |
| "loss": 0.4851, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.6354403516750038, | |
| "grad_norm": 0.25325170865464947, | |
| "learning_rate": 6.806777713132374e-05, | |
| "loss": 0.4826, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.6402910413824465, | |
| "grad_norm": 0.2793387163645126, | |
| "learning_rate": 6.79710329313656e-05, | |
| "loss": 0.4873, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.6451417310898893, | |
| "grad_norm": 0.38835730960557174, | |
| "learning_rate": 6.787396747753903e-05, | |
| "loss": 0.4744, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.6499924207973322, | |
| "grad_norm": 0.5896470389457479, | |
| "learning_rate": 6.777658188466354e-05, | |
| "loss": 0.4765, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.654843110504775, | |
| "grad_norm": 0.7717199781637745, | |
| "learning_rate": 6.767887727123544e-05, | |
| "loss": 0.4931, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.6596938002122177, | |
| "grad_norm": 1.0491183054565791, | |
| "learning_rate": 6.758085475941516e-05, | |
| "loss": 0.4875, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.6645444899196604, | |
| "grad_norm": 1.0963514331569248, | |
| "learning_rate": 6.748251547501418e-05, | |
| "loss": 0.4783, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.6693951796271032, | |
| "grad_norm": 0.6008540810802777, | |
| "learning_rate": 6.738386054748226e-05, | |
| "loss": 0.4836, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.674245869334546, | |
| "grad_norm": 0.4448291346545642, | |
| "learning_rate": 6.728489110989434e-05, | |
| "loss": 0.4883, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.6790965590419886, | |
| "grad_norm": 0.4942617693141126, | |
| "learning_rate": 6.718560829893762e-05, | |
| "loss": 0.4799, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.6839472487494316, | |
| "grad_norm": 0.4996733158915889, | |
| "learning_rate": 6.708601325489844e-05, | |
| "loss": 0.4872, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.6887979384568743, | |
| "grad_norm": 0.3715020675799419, | |
| "learning_rate": 6.698610712164924e-05, | |
| "loss": 0.4864, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.693648628164317, | |
| "grad_norm": 0.27977348496462506, | |
| "learning_rate": 6.688589104663536e-05, | |
| "loss": 0.4731, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.69849931787176, | |
| "grad_norm": 0.3329757061053683, | |
| "learning_rate": 6.67853661808619e-05, | |
| "loss": 0.4771, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.7033500075792025, | |
| "grad_norm": 0.35849519050184514, | |
| "learning_rate": 6.668453367888052e-05, | |
| "loss": 0.4867, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.7082006972866455, | |
| "grad_norm": 0.3093064728603477, | |
| "learning_rate": 6.658339469877613e-05, | |
| "loss": 0.478, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.7130513869940882, | |
| "grad_norm": 0.2983932071893944, | |
| "learning_rate": 6.64819504021536e-05, | |
| "loss": 0.4814, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.717902076701531, | |
| "grad_norm": 0.32895472335219694, | |
| "learning_rate": 6.638020195412448e-05, | |
| "loss": 0.4771, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.722752766408974, | |
| "grad_norm": 0.4277599821762847, | |
| "learning_rate": 6.627815052329354e-05, | |
| "loss": 0.4925, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.7276034561164164, | |
| "grad_norm": 0.4690652488456737, | |
| "learning_rate": 6.617579728174535e-05, | |
| "loss": 0.4854, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.7324541458238594, | |
| "grad_norm": 0.43487992599318925, | |
| "learning_rate": 6.60731434050309e-05, | |
| "loss": 0.4777, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.737304835531302, | |
| "grad_norm": 0.4398837483674929, | |
| "learning_rate": 6.597019007215401e-05, | |
| "loss": 0.4783, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.7421555252387448, | |
| "grad_norm": 0.43050412759021583, | |
| "learning_rate": 6.586693846555788e-05, | |
| "loss": 0.4743, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.7470062149461878, | |
| "grad_norm": 0.40202174280112624, | |
| "learning_rate": 6.576338977111134e-05, | |
| "loss": 0.48, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.7518569046536303, | |
| "grad_norm": 0.36259944082265505, | |
| "learning_rate": 6.565954517809543e-05, | |
| "loss": 0.4747, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.7567075943610733, | |
| "grad_norm": 0.28300782787905476, | |
| "learning_rate": 6.555540587918968e-05, | |
| "loss": 0.4778, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.761558284068516, | |
| "grad_norm": 0.27286805994349533, | |
| "learning_rate": 6.545097307045831e-05, | |
| "loss": 0.4795, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.7664089737759587, | |
| "grad_norm": 0.31934491384929364, | |
| "learning_rate": 6.534624795133662e-05, | |
| "loss": 0.4851, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.7712596634834017, | |
| "grad_norm": 0.35245692182661065, | |
| "learning_rate": 6.524123172461711e-05, | |
| "loss": 0.4794, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.7761103531908442, | |
| "grad_norm": 0.3396944161317505, | |
| "learning_rate": 6.51359255964358e-05, | |
| "loss": 0.4774, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.7809610428982872, | |
| "grad_norm": 0.2877754553279699, | |
| "learning_rate": 6.503033077625824e-05, | |
| "loss": 0.4746, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.78581173260573, | |
| "grad_norm": 0.3326775146217252, | |
| "learning_rate": 6.492444847686566e-05, | |
| "loss": 0.4849, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.7906624223131726, | |
| "grad_norm": 0.419702080624426, | |
| "learning_rate": 6.481827991434111e-05, | |
| "loss": 0.4814, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.7955131120206156, | |
| "grad_norm": 0.43780909545325103, | |
| "learning_rate": 6.471182630805538e-05, | |
| "loss": 0.4813, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.800363801728058, | |
| "grad_norm": 0.5080371772111751, | |
| "learning_rate": 6.460508888065314e-05, | |
| "loss": 0.4865, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.805214491435501, | |
| "grad_norm": 0.5541771901573003, | |
| "learning_rate": 6.449806885803873e-05, | |
| "loss": 0.4752, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.8100651811429438, | |
| "grad_norm": 0.5077930985424878, | |
| "learning_rate": 6.439076746936219e-05, | |
| "loss": 0.4776, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.8149158708503865, | |
| "grad_norm": 0.3974055597915673, | |
| "learning_rate": 6.428318594700509e-05, | |
| "loss": 0.4833, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.8197665605578295, | |
| "grad_norm": 0.27838793632255576, | |
| "learning_rate": 6.417532552656647e-05, | |
| "loss": 0.4808, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.824617250265272, | |
| "grad_norm": 0.25531453372215107, | |
| "learning_rate": 6.406718744684851e-05, | |
| "loss": 0.475, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.829467939972715, | |
| "grad_norm": 0.37619306197372576, | |
| "learning_rate": 6.395877294984241e-05, | |
| "loss": 0.4718, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.8343186296801577, | |
| "grad_norm": 0.45314622671289495, | |
| "learning_rate": 6.385008328071406e-05, | |
| "loss": 0.4858, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.8391693193876004, | |
| "grad_norm": 0.4091697946782105, | |
| "learning_rate": 6.374111968778982e-05, | |
| "loss": 0.4797, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.8440200090950432, | |
| "grad_norm": 0.31229108750141443, | |
| "learning_rate": 6.363188342254206e-05, | |
| "loss": 0.4819, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.848870698802486, | |
| "grad_norm": 0.29389799920114323, | |
| "learning_rate": 6.352237573957488e-05, | |
| "loss": 0.4777, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.8537213885099288, | |
| "grad_norm": 0.31830405845533455, | |
| "learning_rate": 6.341259789660969e-05, | |
| "loss": 0.4812, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.8585720782173716, | |
| "grad_norm": 0.2783419658252336, | |
| "learning_rate": 6.330255115447076e-05, | |
| "loss": 0.4722, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.8634227679248143, | |
| "grad_norm": 0.252195902565345, | |
| "learning_rate": 6.319223677707069e-05, | |
| "loss": 0.4786, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.868273457632257, | |
| "grad_norm": 0.24931429544692238, | |
| "learning_rate": 6.308165603139598e-05, | |
| "loss": 0.4766, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.8731241473396998, | |
| "grad_norm": 0.343056469344131, | |
| "learning_rate": 6.29708101874924e-05, | |
| "loss": 0.4746, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.8779748370471427, | |
| "grad_norm": 0.39163026769861153, | |
| "learning_rate": 6.285970051845045e-05, | |
| "loss": 0.4777, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.8828255267545853, | |
| "grad_norm": 0.3925083138992103, | |
| "learning_rate": 6.274832830039071e-05, | |
| "loss": 0.4762, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.8876762164620282, | |
| "grad_norm": 0.3908015146094751, | |
| "learning_rate": 6.26366948124492e-05, | |
| "loss": 0.4882, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.892526906169471, | |
| "grad_norm": 0.38768255703534454, | |
| "learning_rate": 6.25248013367627e-05, | |
| "loss": 0.4746, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.8973775958769137, | |
| "grad_norm": 0.34797592572865116, | |
| "learning_rate": 6.241264915845401e-05, | |
| "loss": 0.4863, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.9022282855843566, | |
| "grad_norm": 0.2939167601152598, | |
| "learning_rate": 6.230023956561716e-05, | |
| "loss": 0.4803, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.9070789752917991, | |
| "grad_norm": 0.2683598696324213, | |
| "learning_rate": 6.218757384930268e-05, | |
| "loss": 0.4769, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.911929664999242, | |
| "grad_norm": 0.2963955328357841, | |
| "learning_rate": 6.207465330350273e-05, | |
| "loss": 0.4798, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.9167803547066848, | |
| "grad_norm": 0.34936564317737695, | |
| "learning_rate": 6.196147922513623e-05, | |
| "loss": 0.479, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.9216310444141276, | |
| "grad_norm": 0.3023947037005716, | |
| "learning_rate": 6.184805291403402e-05, | |
| "loss": 0.4776, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.9264817341215705, | |
| "grad_norm": 0.23472105439779495, | |
| "learning_rate": 6.173437567292383e-05, | |
| "loss": 0.475, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.931332423829013, | |
| "grad_norm": 0.30151702935680424, | |
| "learning_rate": 6.162044880741544e-05, | |
| "loss": 0.4719, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.936183113536456, | |
| "grad_norm": 0.4380813843564537, | |
| "learning_rate": 6.150627362598557e-05, | |
| "loss": 0.4871, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.9410338032438987, | |
| "grad_norm": 0.5034940901090467, | |
| "learning_rate": 6.139185143996298e-05, | |
| "loss": 0.4806, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.9458844929513415, | |
| "grad_norm": 0.44831770290541656, | |
| "learning_rate": 6.127718356351326e-05, | |
| "loss": 0.478, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.9507351826587844, | |
| "grad_norm": 0.3881020850233725, | |
| "learning_rate": 6.116227131362385e-05, | |
| "loss": 0.4714, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.955585872366227, | |
| "grad_norm": 0.3382903208345561, | |
| "learning_rate": 6.104711601008888e-05, | |
| "loss": 0.4779, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.96043656207367, | |
| "grad_norm": 0.2830781275191087, | |
| "learning_rate": 6.0931718975493985e-05, | |
| "loss": 0.4846, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.9652872517811126, | |
| "grad_norm": 0.27767803820547865, | |
| "learning_rate": 6.081608153520117e-05, | |
| "loss": 0.4691, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.9701379414885554, | |
| "grad_norm": 0.35527234014372044, | |
| "learning_rate": 6.0700205017333525e-05, | |
| "loss": 0.4787, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.9749886311959983, | |
| "grad_norm": 0.34300192919407774, | |
| "learning_rate": 6.058409075276002e-05, | |
| "loss": 0.4689, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.9798393209034408, | |
| "grad_norm": 0.2964726842271146, | |
| "learning_rate": 6.046774007508019e-05, | |
| "loss": 0.475, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.9846900106108838, | |
| "grad_norm": 0.2502113324820329, | |
| "learning_rate": 6.035115432060883e-05, | |
| "loss": 0.4747, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.9895407003183265, | |
| "grad_norm": 0.23471156048166733, | |
| "learning_rate": 6.0234334828360655e-05, | |
| "loss": 0.4786, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.9943913900257693, | |
| "grad_norm": 0.27137410563019304, | |
| "learning_rate": 6.011728294003494e-05, | |
| "loss": 0.4802, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.9992420797332122, | |
| "grad_norm": 0.2960970008273601, | |
| "learning_rate": 6.000000000000001e-05, | |
| "loss": 0.4768, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.004850689707443, | |
| "grad_norm": 0.33644070901215145, | |
| "learning_rate": 5.988248735527793e-05, | |
| "loss": 0.4473, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.0097013794148855, | |
| "grad_norm": 0.39546749871672404, | |
| "learning_rate": 5.9764746355528994e-05, | |
| "loss": 0.4501, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.0145520691223284, | |
| "grad_norm": 0.47967552460987467, | |
| "learning_rate": 5.964677835303615e-05, | |
| "loss": 0.4483, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.019402758829771, | |
| "grad_norm": 0.43081443925289625, | |
| "learning_rate": 5.952858470268955e-05, | |
| "loss": 0.4468, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.024253448537214, | |
| "grad_norm": 0.42692273173821377, | |
| "learning_rate": 5.941016676197098e-05, | |
| "loss": 0.4499, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.029104138244657, | |
| "grad_norm": 0.5203095933335755, | |
| "learning_rate": 5.929152589093825e-05, | |
| "loss": 0.4498, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.0339548279520994, | |
| "grad_norm": 0.5390774215298352, | |
| "learning_rate": 5.9172663452209554e-05, | |
| "loss": 0.449, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.0388055176595423, | |
| "grad_norm": 0.5618164866931807, | |
| "learning_rate": 5.9053580810947845e-05, | |
| "loss": 0.4555, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.043656207366985, | |
| "grad_norm": 0.6473211411897097, | |
| "learning_rate": 5.89342793348452e-05, | |
| "loss": 0.4537, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.048506897074428, | |
| "grad_norm": 0.7641174380933736, | |
| "learning_rate": 5.881476039410699e-05, | |
| "loss": 0.4579, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.0533575867818707, | |
| "grad_norm": 0.6589276143277975, | |
| "learning_rate": 5.869502536143629e-05, | |
| "loss": 0.4478, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.0582082764893133, | |
| "grad_norm": 0.4055360078631115, | |
| "learning_rate": 5.857507561201802e-05, | |
| "loss": 0.4501, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.063058966196756, | |
| "grad_norm": 0.3576976481492298, | |
| "learning_rate": 5.845491252350312e-05, | |
| "loss": 0.4479, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.0679096559041987, | |
| "grad_norm": 0.444390387775824, | |
| "learning_rate": 5.833453747599286e-05, | |
| "loss": 0.4466, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.0727603456116417, | |
| "grad_norm": 0.38706062123939283, | |
| "learning_rate": 5.821395185202285e-05, | |
| "loss": 0.449, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.0776110353190846, | |
| "grad_norm": 0.3243650769978332, | |
| "learning_rate": 5.809315703654726e-05, | |
| "loss": 0.4581, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.082461725026527, | |
| "grad_norm": 0.35862489527477903, | |
| "learning_rate": 5.797215441692284e-05, | |
| "loss": 0.4534, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.08731241473397, | |
| "grad_norm": 0.3353344776943914, | |
| "learning_rate": 5.785094538289304e-05, | |
| "loss": 0.4537, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.0921631044414126, | |
| "grad_norm": 0.3620686259692887, | |
| "learning_rate": 5.772953132657202e-05, | |
| "loss": 0.4553, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.0970137941488556, | |
| "grad_norm": 0.307403523851727, | |
| "learning_rate": 5.7607913642428666e-05, | |
| "loss": 0.4424, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.101864483856298, | |
| "grad_norm": 0.28710201149568576, | |
| "learning_rate": 5.7486093727270606e-05, | |
| "loss": 0.4462, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.106715173563741, | |
| "grad_norm": 0.31451691469735704, | |
| "learning_rate": 5.736407298022809e-05, | |
| "loss": 0.4434, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.111565863271184, | |
| "grad_norm": 0.3105010146819863, | |
| "learning_rate": 5.7241852802738e-05, | |
| "loss": 0.4533, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.1164165529786265, | |
| "grad_norm": 0.2878408032383936, | |
| "learning_rate": 5.711943459852772e-05, | |
| "loss": 0.4427, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.1212672426860695, | |
| "grad_norm": 0.3026608573456508, | |
| "learning_rate": 5.699681977359902e-05, | |
| "loss": 0.4385, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.1261179323935124, | |
| "grad_norm": 0.2857208984253648, | |
| "learning_rate": 5.6874009736211896e-05, | |
| "loss": 0.4465, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.130968622100955, | |
| "grad_norm": 0.28786201316205207, | |
| "learning_rate": 5.675100589686839e-05, | |
| "loss": 0.4472, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.135819311808398, | |
| "grad_norm": 0.3397455432854385, | |
| "learning_rate": 5.662780966829646e-05, | |
| "loss": 0.4486, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.1406700015158404, | |
| "grad_norm": 0.3060713829784068, | |
| "learning_rate": 5.650442246543364e-05, | |
| "loss": 0.4525, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.1455206912232834, | |
| "grad_norm": 0.2554715156912105, | |
| "learning_rate": 5.638084570541088e-05, | |
| "loss": 0.4451, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.150371380930726, | |
| "grad_norm": 0.31763468046624377, | |
| "learning_rate": 5.625708080753621e-05, | |
| "loss": 0.455, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.155222070638169, | |
| "grad_norm": 0.3288505103534938, | |
| "learning_rate": 5.6133129193278525e-05, | |
| "loss": 0.4453, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.160072760345612, | |
| "grad_norm": 0.27302062189682574, | |
| "learning_rate": 5.600899228625112e-05, | |
| "loss": 0.4523, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.1649234500530543, | |
| "grad_norm": 0.2367917713116079, | |
| "learning_rate": 5.588467151219549e-05, | |
| "loss": 0.4481, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.1697741397604973, | |
| "grad_norm": 0.27892881990044693, | |
| "learning_rate": 5.5760168298964874e-05, | |
| "loss": 0.4397, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.17462482946794, | |
| "grad_norm": 0.3659363982261399, | |
| "learning_rate": 5.563548407650782e-05, | |
| "loss": 0.4464, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.1794755191753827, | |
| "grad_norm": 0.3139974928184246, | |
| "learning_rate": 5.551062027685187e-05, | |
| "loss": 0.4487, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.1843262088828257, | |
| "grad_norm": 0.25894377780084493, | |
| "learning_rate": 5.5385578334087006e-05, | |
| "loss": 0.4481, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.189176898590268, | |
| "grad_norm": 0.17951964246489394, | |
| "learning_rate": 5.526035968434927e-05, | |
| "loss": 0.4469, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.194027588297711, | |
| "grad_norm": 0.21554515294483917, | |
| "learning_rate": 5.513496576580418e-05, | |
| "loss": 0.4573, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.1988782780051537, | |
| "grad_norm": 0.24799983544152385, | |
| "learning_rate": 5.5009398018630276e-05, | |
| "loss": 0.4498, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.2037289677125966, | |
| "grad_norm": 0.186973375547793, | |
| "learning_rate": 5.4883657885002575e-05, | |
| "loss": 0.4449, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.2085796574200396, | |
| "grad_norm": 0.1888483688795653, | |
| "learning_rate": 5.475774680907597e-05, | |
| "loss": 0.443, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.213430347127482, | |
| "grad_norm": 0.18914695692387654, | |
| "learning_rate": 5.463166623696868e-05, | |
| "loss": 0.4434, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.218281036834925, | |
| "grad_norm": 0.19070268199404652, | |
| "learning_rate": 5.450541761674562e-05, | |
| "loss": 0.4445, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.2231317265423676, | |
| "grad_norm": 0.2259939851217927, | |
| "learning_rate": 5.437900239840179e-05, | |
| "loss": 0.4465, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.2279824162498105, | |
| "grad_norm": 0.18023421500987896, | |
| "learning_rate": 5.42524220338456e-05, | |
| "loss": 0.4453, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.2328331059572535, | |
| "grad_norm": 0.16598518940751159, | |
| "learning_rate": 5.412567797688219e-05, | |
| "loss": 0.4498, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.237683795664696, | |
| "grad_norm": 0.16004882092407235, | |
| "learning_rate": 5.3998771683196754e-05, | |
| "loss": 0.4527, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.242534485372139, | |
| "grad_norm": 0.17890202722776521, | |
| "learning_rate": 5.3871704610337836e-05, | |
| "loss": 0.4444, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.2473851750795815, | |
| "grad_norm": 0.182744670257566, | |
| "learning_rate": 5.374447821770053e-05, | |
| "loss": 0.4431, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.2522358647870244, | |
| "grad_norm": 0.19342699163866056, | |
| "learning_rate": 5.361709396650977e-05, | |
| "loss": 0.4404, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.2570865544944674, | |
| "grad_norm": 0.20442779398031627, | |
| "learning_rate": 5.3489553319803566e-05, | |
| "loss": 0.4496, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.26193724420191, | |
| "grad_norm": 0.23359410284964036, | |
| "learning_rate": 5.336185774241609e-05, | |
| "loss": 0.4469, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.266787933909353, | |
| "grad_norm": 0.23295417695606166, | |
| "learning_rate": 5.3234008700961e-05, | |
| "loss": 0.4505, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.2716386236167954, | |
| "grad_norm": 0.20207094824496044, | |
| "learning_rate": 5.3106007663814505e-05, | |
| "loss": 0.4406, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.2764893133242383, | |
| "grad_norm": 0.1850252157104855, | |
| "learning_rate": 5.2977856101098484e-05, | |
| "loss": 0.4525, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.281340003031681, | |
| "grad_norm": 0.1821206965545461, | |
| "learning_rate": 5.284955548466371e-05, | |
| "loss": 0.4592, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.286190692739124, | |
| "grad_norm": 0.1913435003815255, | |
| "learning_rate": 5.272110728807279e-05, | |
| "loss": 0.4459, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.2910413824465667, | |
| "grad_norm": 0.17908151714339782, | |
| "learning_rate": 5.25925129865834e-05, | |
| "loss": 0.4523, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.2958920721540093, | |
| "grad_norm": 0.17796456682985312, | |
| "learning_rate": 5.246377405713121e-05, | |
| "loss": 0.4426, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.300742761861452, | |
| "grad_norm": 0.16950491734508644, | |
| "learning_rate": 5.2334891978313006e-05, | |
| "loss": 0.4426, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.305593451568895, | |
| "grad_norm": 0.18036359667208995, | |
| "learning_rate": 5.220586823036966e-05, | |
| "loss": 0.4458, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.3104441412763377, | |
| "grad_norm": 0.1873060682555774, | |
| "learning_rate": 5.207670429516915e-05, | |
| "loss": 0.4433, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.3152948309837806, | |
| "grad_norm": 0.1986452939709168, | |
| "learning_rate": 5.1947401656189546e-05, | |
| "loss": 0.4593, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.320145520691223, | |
| "grad_norm": 0.20494449288937291, | |
| "learning_rate": 5.181796179850197e-05, | |
| "loss": 0.4424, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.324996210398666, | |
| "grad_norm": 0.17952910191793728, | |
| "learning_rate": 5.168838620875352e-05, | |
| "loss": 0.4503, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.3298469001061086, | |
| "grad_norm": 0.1909701609269039, | |
| "learning_rate": 5.155867637515019e-05, | |
| "loss": 0.4506, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.3346975898135516, | |
| "grad_norm": 0.1838298898954926, | |
| "learning_rate": 5.142883378743984e-05, | |
| "loss": 0.4513, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.3395482795209945, | |
| "grad_norm": 0.1818267641683358, | |
| "learning_rate": 5.129885993689502e-05, | |
| "loss": 0.4488, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.344398969228437, | |
| "grad_norm": 0.21197375880432345, | |
| "learning_rate": 5.116875631629585e-05, | |
| "loss": 0.4456, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.34924965893588, | |
| "grad_norm": 0.21240893965447508, | |
| "learning_rate": 5.10385244199129e-05, | |
| "loss": 0.4386, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.354100348643323, | |
| "grad_norm": 0.19244612255162405, | |
| "learning_rate": 5.0908165743490047e-05, | |
| "loss": 0.4482, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.3589510383507655, | |
| "grad_norm": 0.22440529731925618, | |
| "learning_rate": 5.0777681784227224e-05, | |
| "loss": 0.4496, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.3638017280582084, | |
| "grad_norm": 0.249440062974833, | |
| "learning_rate": 5.064707404076327e-05, | |
| "loss": 0.4502, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.368652417765651, | |
| "grad_norm": 0.2374206142112278, | |
| "learning_rate": 5.051634401315875e-05, | |
| "loss": 0.448, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.373503107473094, | |
| "grad_norm": 0.21044332969367502, | |
| "learning_rate": 5.0385493202878656e-05, | |
| "loss": 0.4416, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.3783537971805364, | |
| "grad_norm": 0.15343545111269605, | |
| "learning_rate": 5.025452311277522e-05, | |
| "loss": 0.4413, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.3832044868879794, | |
| "grad_norm": 0.17472771019103053, | |
| "learning_rate": 5.01234352470706e-05, | |
| "loss": 0.4472, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.3880551765954223, | |
| "grad_norm": 0.2225509747823868, | |
| "learning_rate": 4.999223111133968e-05, | |
| "loss": 0.4405, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.392905866302865, | |
| "grad_norm": 0.27110633436791925, | |
| "learning_rate": 4.986091221249269e-05, | |
| "loss": 0.44, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.397756556010308, | |
| "grad_norm": 0.24255464597168586, | |
| "learning_rate": 4.972948005875796e-05, | |
| "loss": 0.4432, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.4026072457177503, | |
| "grad_norm": 0.2496648842091371, | |
| "learning_rate": 4.959793615966459e-05, | |
| "loss": 0.4401, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.4074579354251933, | |
| "grad_norm": 0.24806426439634907, | |
| "learning_rate": 4.946628202602508e-05, | |
| "loss": 0.4526, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.412308625132636, | |
| "grad_norm": 0.21808090914084832, | |
| "learning_rate": 4.933451916991802e-05, | |
| "loss": 0.4474, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.4171593148400787, | |
| "grad_norm": 0.19833835766366836, | |
| "learning_rate": 4.920264910467066e-05, | |
| "loss": 0.4485, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.4220100045475217, | |
| "grad_norm": 0.1904056029579938, | |
| "learning_rate": 4.9070673344841645e-05, | |
| "loss": 0.4471, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.426860694254964, | |
| "grad_norm": 0.17821880940044135, | |
| "learning_rate": 4.893859340620348e-05, | |
| "loss": 0.4518, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.431711383962407, | |
| "grad_norm": 0.16242846601925154, | |
| "learning_rate": 4.880641080572522e-05, | |
| "loss": 0.4426, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.43656207366985, | |
| "grad_norm": 0.16230843192633562, | |
| "learning_rate": 4.8674127061555025e-05, | |
| "loss": 0.4492, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.4414127633772926, | |
| "grad_norm": 0.18692985874064466, | |
| "learning_rate": 4.8541743693002676e-05, | |
| "loss": 0.4576, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.4462634530847356, | |
| "grad_norm": 0.17489245993778632, | |
| "learning_rate": 4.8409262220522196e-05, | |
| "loss": 0.4476, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.451114142792178, | |
| "grad_norm": 0.14061959670906948, | |
| "learning_rate": 4.8276684165694336e-05, | |
| "loss": 0.4479, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.455964832499621, | |
| "grad_norm": 0.17289206898304424, | |
| "learning_rate": 4.814401105120914e-05, | |
| "loss": 0.4479, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.460815522207064, | |
| "grad_norm": 0.21063084112901795, | |
| "learning_rate": 4.8011244400848414e-05, | |
| "loss": 0.4466, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.4656662119145065, | |
| "grad_norm": 0.2134287283629687, | |
| "learning_rate": 4.787838573946825e-05, | |
| "loss": 0.4503, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.4705169016219495, | |
| "grad_norm": 0.19387557882251144, | |
| "learning_rate": 4.774543659298152e-05, | |
| "loss": 0.4419, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.475367591329392, | |
| "grad_norm": 0.1690053079886072, | |
| "learning_rate": 4.761239848834031e-05, | |
| "loss": 0.4443, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.480218281036835, | |
| "grad_norm": 0.17406180900609755, | |
| "learning_rate": 4.747927295351845e-05, | |
| "loss": 0.4474, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.485068970744278, | |
| "grad_norm": 0.2024050850623432, | |
| "learning_rate": 4.734606151749389e-05, | |
| "loss": 0.4473, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.4899196604517204, | |
| "grad_norm": 0.22659357369802574, | |
| "learning_rate": 4.7212765710231204e-05, | |
| "loss": 0.4481, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.4947703501591634, | |
| "grad_norm": 0.23595309939097722, | |
| "learning_rate": 4.707938706266397e-05, | |
| "loss": 0.4484, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.499621039866606, | |
| "grad_norm": 0.19918584163751257, | |
| "learning_rate": 4.694592710667723e-05, | |
| "loss": 0.444, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.504471729574049, | |
| "grad_norm": 0.18418670752131802, | |
| "learning_rate": 4.681238737508983e-05, | |
| "loss": 0.4424, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.5093224192814914, | |
| "grad_norm": 0.1926237495649244, | |
| "learning_rate": 4.6678769401636894e-05, | |
| "loss": 0.4444, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.5141731089889343, | |
| "grad_norm": 0.20706125086296728, | |
| "learning_rate": 4.6545074720952166e-05, | |
| "loss": 0.456, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.5190237986963773, | |
| "grad_norm": 0.17699664563372686, | |
| "learning_rate": 4.641130486855038e-05, | |
| "loss": 0.4396, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.52387448840382, | |
| "grad_norm": 0.18317752602670304, | |
| "learning_rate": 4.627746138080966e-05, | |
| "loss": 0.4432, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.5287251781112627, | |
| "grad_norm": 0.2190424482227647, | |
| "learning_rate": 4.614354579495379e-05, | |
| "loss": 0.4448, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.5335758678187057, | |
| "grad_norm": 0.20135719119048615, | |
| "learning_rate": 4.6009559649034695e-05, | |
| "loss": 0.4432, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.538426557526148, | |
| "grad_norm": 0.20782420010728125, | |
| "learning_rate": 4.587550448191465e-05, | |
| "loss": 0.4474, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.543277247233591, | |
| "grad_norm": 0.18668296726800496, | |
| "learning_rate": 4.5741381833248655e-05, | |
| "loss": 0.455, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.5481279369410337, | |
| "grad_norm": 0.17935132627421838, | |
| "learning_rate": 4.560719324346677e-05, | |
| "loss": 0.4457, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.5529786266484766, | |
| "grad_norm": 0.16835981784522308, | |
| "learning_rate": 4.547294025375641e-05, | |
| "loss": 0.4478, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.557829316355919, | |
| "grad_norm": 0.18084487512355504, | |
| "learning_rate": 4.533862440604461e-05, | |
| "loss": 0.447, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.562680006063362, | |
| "grad_norm": 0.17384784743298828, | |
| "learning_rate": 4.520424724298036e-05, | |
| "loss": 0.4408, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.567530695770805, | |
| "grad_norm": 0.20150460275113774, | |
| "learning_rate": 4.5069810307916874e-05, | |
| "loss": 0.4441, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.5723813854782476, | |
| "grad_norm": 0.2240004858996321, | |
| "learning_rate": 4.493531514489385e-05, | |
| "loss": 0.4425, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.5772320751856905, | |
| "grad_norm": 0.2286831099325836, | |
| "learning_rate": 4.480076329861977e-05, | |
| "loss": 0.4433, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.5820827648931335, | |
| "grad_norm": 0.2048648110357608, | |
| "learning_rate": 4.46661563144541e-05, | |
| "loss": 0.4487, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.586933454600576, | |
| "grad_norm": 0.2121488428415987, | |
| "learning_rate": 4.453149573838962e-05, | |
| "loss": 0.4445, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.591784144308019, | |
| "grad_norm": 0.18541474138380978, | |
| "learning_rate": 4.43967831170346e-05, | |
| "loss": 0.4494, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.5966348340154615, | |
| "grad_norm": 0.17452762694525445, | |
| "learning_rate": 4.426201999759505e-05, | |
| "loss": 0.4484, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.6014855237229044, | |
| "grad_norm": 0.21944224068377363, | |
| "learning_rate": 4.4127207927857e-05, | |
| "loss": 0.4419, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.606336213430347, | |
| "grad_norm": 0.17656272332454842, | |
| "learning_rate": 4.3992348456168666e-05, | |
| "loss": 0.4568, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.61118690313779, | |
| "grad_norm": 0.1892562653364182, | |
| "learning_rate": 4.385744313142267e-05, | |
| "loss": 0.4427, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.616037592845233, | |
| "grad_norm": 0.21611454670373548, | |
| "learning_rate": 4.372249350303828e-05, | |
| "loss": 0.4418, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.6208882825526754, | |
| "grad_norm": 0.17168747953224547, | |
| "learning_rate": 4.358750112094363e-05, | |
| "loss": 0.4544, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.6257389722601183, | |
| "grad_norm": 0.17941819138400728, | |
| "learning_rate": 4.3452467535557846e-05, | |
| "loss": 0.4372, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.6305896619675613, | |
| "grad_norm": 0.2025265834742146, | |
| "learning_rate": 4.3317394297773304e-05, | |
| "loss": 0.4517, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.635440351675004, | |
| "grad_norm": 0.20441246530938206, | |
| "learning_rate": 4.3182282958937816e-05, | |
| "loss": 0.4333, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.6402910413824463, | |
| "grad_norm": 0.2334105452950634, | |
| "learning_rate": 4.304713507083673e-05, | |
| "loss": 0.4481, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.6451417310898893, | |
| "grad_norm": 0.26291969340773214, | |
| "learning_rate": 4.291195218567523e-05, | |
| "loss": 0.4466, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.649992420797332, | |
| "grad_norm": 0.1863631298156993, | |
| "learning_rate": 4.277673585606046e-05, | |
| "loss": 0.4405, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.6548431105047747, | |
| "grad_norm": 0.23226855973797117, | |
| "learning_rate": 4.264148763498364e-05, | |
| "loss": 0.4566, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.6596938002122177, | |
| "grad_norm": 0.30482274820740174, | |
| "learning_rate": 4.250620907580226e-05, | |
| "loss": 0.4407, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.6645444899196606, | |
| "grad_norm": 0.23781311620065457, | |
| "learning_rate": 4.237090173222231e-05, | |
| "loss": 0.4493, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.669395179627103, | |
| "grad_norm": 0.1808214801234254, | |
| "learning_rate": 4.223556715828033e-05, | |
| "loss": 0.4511, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.674245869334546, | |
| "grad_norm": 0.26315804734468673, | |
| "learning_rate": 4.2100206908325603e-05, | |
| "loss": 0.447, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.6790965590419886, | |
| "grad_norm": 0.25781234163394623, | |
| "learning_rate": 4.196482253700235e-05, | |
| "loss": 0.4415, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.6839472487494316, | |
| "grad_norm": 0.17133762584152984, | |
| "learning_rate": 4.182941559923179e-05, | |
| "loss": 0.4457, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.688797938456874, | |
| "grad_norm": 0.2266803612041648, | |
| "learning_rate": 4.169398765019433e-05, | |
| "loss": 0.4422, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.693648628164317, | |
| "grad_norm": 0.23286738752123257, | |
| "learning_rate": 4.15585402453117e-05, | |
| "loss": 0.4429, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.69849931787176, | |
| "grad_norm": 0.20226496811604636, | |
| "learning_rate": 4.14230749402291e-05, | |
| "loss": 0.4421, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.7033500075792025, | |
| "grad_norm": 0.21746634743317236, | |
| "learning_rate": 4.128759329079732e-05, | |
| "loss": 0.4318, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.7082006972866455, | |
| "grad_norm": 0.24285493960537577, | |
| "learning_rate": 4.115209685305482e-05, | |
| "loss": 0.4374, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.7130513869940884, | |
| "grad_norm": 0.20035101285126697, | |
| "learning_rate": 4.101658718320998e-05, | |
| "loss": 0.4429, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.717902076701531, | |
| "grad_norm": 0.1733102653989901, | |
| "learning_rate": 4.088106583762309e-05, | |
| "loss": 0.4456, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.722752766408974, | |
| "grad_norm": 0.25116764609287723, | |
| "learning_rate": 4.074553437278857e-05, | |
| "loss": 0.4494, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.7276034561164164, | |
| "grad_norm": 0.19896329775589092, | |
| "learning_rate": 4.060999434531704e-05, | |
| "loss": 0.4449, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.7324541458238594, | |
| "grad_norm": 0.16517536428811208, | |
| "learning_rate": 4.047444731191751e-05, | |
| "loss": 0.4426, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.737304835531302, | |
| "grad_norm": 0.1656807626865065, | |
| "learning_rate": 4.033889482937943e-05, | |
| "loss": 0.4445, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.742155525238745, | |
| "grad_norm": 0.15103159619749504, | |
| "learning_rate": 4.020333845455478e-05, | |
| "loss": 0.4565, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.747006214946188, | |
| "grad_norm": 0.16996820086522443, | |
| "learning_rate": 4.0067779744340345e-05, | |
| "loss": 0.4459, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.7518569046536303, | |
| "grad_norm": 0.1495970266083701, | |
| "learning_rate": 3.993222025565966e-05, | |
| "loss": 0.4447, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.7567075943610733, | |
| "grad_norm": 0.15458974892236554, | |
| "learning_rate": 3.979666154544522e-05, | |
| "loss": 0.4452, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.7615582840685162, | |
| "grad_norm": 0.1741093401099396, | |
| "learning_rate": 3.96611051706206e-05, | |
| "loss": 0.4421, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 2.7664089737759587, | |
| "grad_norm": 0.1819530197226333, | |
| "learning_rate": 3.9525552688082494e-05, | |
| "loss": 0.4509, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.7712596634834017, | |
| "grad_norm": 0.14996389947080183, | |
| "learning_rate": 3.939000565468297e-05, | |
| "loss": 0.4442, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 2.776110353190844, | |
| "grad_norm": 0.19015205226216172, | |
| "learning_rate": 3.9254465627211444e-05, | |
| "loss": 0.4458, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.780961042898287, | |
| "grad_norm": 0.2147271939320094, | |
| "learning_rate": 3.911893416237693e-05, | |
| "loss": 0.4423, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 2.7858117326057297, | |
| "grad_norm": 0.17368493707493848, | |
| "learning_rate": 3.8983412816790045e-05, | |
| "loss": 0.4415, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.7906624223131726, | |
| "grad_norm": 0.18366782229193682, | |
| "learning_rate": 3.8847903146945186e-05, | |
| "loss": 0.4419, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.7955131120206156, | |
| "grad_norm": 0.1770373720928735, | |
| "learning_rate": 3.871240670920269e-05, | |
| "loss": 0.4477, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.800363801728058, | |
| "grad_norm": 0.15508137985676013, | |
| "learning_rate": 3.85769250597709e-05, | |
| "loss": 0.4458, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 2.805214491435501, | |
| "grad_norm": 0.15518418619016236, | |
| "learning_rate": 3.844145975468832e-05, | |
| "loss": 0.4403, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.810065181142944, | |
| "grad_norm": 0.13016021632650948, | |
| "learning_rate": 3.830601234980569e-05, | |
| "loss": 0.4509, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 2.8149158708503865, | |
| "grad_norm": 0.17125198843453068, | |
| "learning_rate": 3.8170584400768224e-05, | |
| "loss": 0.4492, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.8197665605578295, | |
| "grad_norm": 0.18182599605587274, | |
| "learning_rate": 3.8035177462997664e-05, | |
| "loss": 0.4475, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 2.824617250265272, | |
| "grad_norm": 0.16612208849559923, | |
| "learning_rate": 3.7899793091674396e-05, | |
| "loss": 0.4419, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.829467939972715, | |
| "grad_norm": 0.15504382692612345, | |
| "learning_rate": 3.776443284171969e-05, | |
| "loss": 0.4421, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 2.8343186296801575, | |
| "grad_norm": 0.14158493240403466, | |
| "learning_rate": 3.7629098267777706e-05, | |
| "loss": 0.4399, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.8391693193876004, | |
| "grad_norm": 0.14521790840725082, | |
| "learning_rate": 3.7493790924197746e-05, | |
| "loss": 0.4328, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.8440200090950434, | |
| "grad_norm": 0.14223117063886642, | |
| "learning_rate": 3.735851236501637e-05, | |
| "loss": 0.4403, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.848870698802486, | |
| "grad_norm": 0.13716153813556975, | |
| "learning_rate": 3.722326414393954e-05, | |
| "loss": 0.4375, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 2.853721388509929, | |
| "grad_norm": 0.13481835773066367, | |
| "learning_rate": 3.708804781432478e-05, | |
| "loss": 0.4465, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.858572078217372, | |
| "grad_norm": 0.15098861155914894, | |
| "learning_rate": 3.6952864929163286e-05, | |
| "loss": 0.4478, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 2.8634227679248143, | |
| "grad_norm": 0.14428029096945075, | |
| "learning_rate": 3.6817717041062204e-05, | |
| "loss": 0.4433, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.868273457632257, | |
| "grad_norm": 0.14941685817838865, | |
| "learning_rate": 3.66826057022267e-05, | |
| "loss": 0.4426, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 2.8731241473397, | |
| "grad_norm": 0.15814915640950794, | |
| "learning_rate": 3.654753246444217e-05, | |
| "loss": 0.437, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.8779748370471427, | |
| "grad_norm": 0.1666350361712344, | |
| "learning_rate": 3.641249887905638e-05, | |
| "loss": 0.4404, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 2.8828255267545853, | |
| "grad_norm": 0.14761581189993908, | |
| "learning_rate": 3.627750649696173e-05, | |
| "loss": 0.4418, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.887676216462028, | |
| "grad_norm": 0.16433140815176697, | |
| "learning_rate": 3.614255686857734e-05, | |
| "loss": 0.4482, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.892526906169471, | |
| "grad_norm": 0.14898117787478918, | |
| "learning_rate": 3.600765154383134e-05, | |
| "loss": 0.4407, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.8973775958769137, | |
| "grad_norm": 0.1576311183401583, | |
| "learning_rate": 3.587279207214301e-05, | |
| "loss": 0.4502, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 2.9022282855843566, | |
| "grad_norm": 0.12713739329535242, | |
| "learning_rate": 3.5737980002404965e-05, | |
| "loss": 0.4504, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.907078975291799, | |
| "grad_norm": 0.1407162797770253, | |
| "learning_rate": 3.5603216882965415e-05, | |
| "loss": 0.4444, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 2.911929664999242, | |
| "grad_norm": 0.15096239269549241, | |
| "learning_rate": 3.5468504261610387e-05, | |
| "loss": 0.4497, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.9167803547066846, | |
| "grad_norm": 0.1341187523707445, | |
| "learning_rate": 3.5333843685545914e-05, | |
| "loss": 0.4449, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 2.9216310444141276, | |
| "grad_norm": 0.1619335943958044, | |
| "learning_rate": 3.519923670138025e-05, | |
| "loss": 0.4434, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.9264817341215705, | |
| "grad_norm": 0.16211928956809254, | |
| "learning_rate": 3.506468485510616e-05, | |
| "loss": 0.4394, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 2.931332423829013, | |
| "grad_norm": 0.13565985896740923, | |
| "learning_rate": 3.493018969208314e-05, | |
| "loss": 0.4513, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.936183113536456, | |
| "grad_norm": 0.14551792578562125, | |
| "learning_rate": 3.479575275701965e-05, | |
| "loss": 0.4425, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 2.941033803243899, | |
| "grad_norm": 0.17680002225930672, | |
| "learning_rate": 3.4661375593955405e-05, | |
| "loss": 0.4384, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.9458844929513415, | |
| "grad_norm": 0.16447736465809054, | |
| "learning_rate": 3.45270597462436e-05, | |
| "loss": 0.4451, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 2.9507351826587844, | |
| "grad_norm": 0.15299942108774958, | |
| "learning_rate": 3.4392806756533233e-05, | |
| "loss": 0.4383, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.955585872366227, | |
| "grad_norm": 0.1536623395643003, | |
| "learning_rate": 3.425861816675135e-05, | |
| "loss": 0.4453, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 2.96043656207367, | |
| "grad_norm": 0.15727509499916784, | |
| "learning_rate": 3.4124495518085366e-05, | |
| "loss": 0.436, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.9652872517811124, | |
| "grad_norm": 0.17906600408772821, | |
| "learning_rate": 3.399044035096532e-05, | |
| "loss": 0.4467, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 2.9701379414885554, | |
| "grad_norm": 0.16243556143131102, | |
| "learning_rate": 3.3856454205046223e-05, | |
| "loss": 0.4364, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.9749886311959983, | |
| "grad_norm": 0.1608114743453715, | |
| "learning_rate": 3.372253861919036e-05, | |
| "loss": 0.4517, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 2.979839320903441, | |
| "grad_norm": 0.16249676695986184, | |
| "learning_rate": 3.3588695131449626e-05, | |
| "loss": 0.4464, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.984690010610884, | |
| "grad_norm": 0.14682205641070967, | |
| "learning_rate": 3.3454925279047854e-05, | |
| "loss": 0.4446, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 2.9895407003183267, | |
| "grad_norm": 0.158750060737996, | |
| "learning_rate": 3.3321230598363126e-05, | |
| "loss": 0.4449, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.9943913900257693, | |
| "grad_norm": 0.15103569523913019, | |
| "learning_rate": 3.3187612624910185e-05, | |
| "loss": 0.4457, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 2.999242079733212, | |
| "grad_norm": 0.13279281722008607, | |
| "learning_rate": 3.305407289332279e-05, | |
| "loss": 0.4524, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 3.004850689707443, | |
| "grad_norm": 0.21786343264375474, | |
| "learning_rate": 3.2920612937336035e-05, | |
| "loss": 0.4186, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 3.0097013794148855, | |
| "grad_norm": 0.1940657522375173, | |
| "learning_rate": 3.2787234289768816e-05, | |
| "loss": 0.4198, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.0145520691223284, | |
| "grad_norm": 0.21773031084989108, | |
| "learning_rate": 3.2653938482506125e-05, | |
| "loss": 0.4257, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 3.019402758829771, | |
| "grad_norm": 0.20326547030411232, | |
| "learning_rate": 3.252072704648157e-05, | |
| "loss": 0.4165, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 3.024253448537214, | |
| "grad_norm": 0.1952882443077655, | |
| "learning_rate": 3.2387601511659695e-05, | |
| "loss": 0.4099, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 3.029104138244657, | |
| "grad_norm": 0.1918042514566916, | |
| "learning_rate": 3.22545634070185e-05, | |
| "loss": 0.4152, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 3.0339548279520994, | |
| "grad_norm": 0.19705116304049744, | |
| "learning_rate": 3.212161426053177e-05, | |
| "loss": 0.4128, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.0388055176595423, | |
| "grad_norm": 0.19649678060258355, | |
| "learning_rate": 3.19887555991516e-05, | |
| "loss": 0.4129, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 3.043656207366985, | |
| "grad_norm": 0.21708556979200094, | |
| "learning_rate": 3.1855988948790866e-05, | |
| "loss": 0.419, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 3.048506897074428, | |
| "grad_norm": 0.1975715363358727, | |
| "learning_rate": 3.172331583430567e-05, | |
| "loss": 0.4179, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 3.0533575867818707, | |
| "grad_norm": 0.23335868145268443, | |
| "learning_rate": 3.1590737779477825e-05, | |
| "loss": 0.4187, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 3.0582082764893133, | |
| "grad_norm": 0.16312780113553543, | |
| "learning_rate": 3.145825630699734e-05, | |
| "loss": 0.4145, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.063058966196756, | |
| "grad_norm": 0.1995031684584169, | |
| "learning_rate": 3.1325872938444995e-05, | |
| "loss": 0.4269, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 3.0679096559041987, | |
| "grad_norm": 0.16321617015370155, | |
| "learning_rate": 3.119358919427478e-05, | |
| "loss": 0.4189, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 3.0727603456116417, | |
| "grad_norm": 0.18410001775256316, | |
| "learning_rate": 3.106140659379652e-05, | |
| "loss": 0.4218, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 3.0776110353190846, | |
| "grad_norm": 0.15016856412332835, | |
| "learning_rate": 3.092932665515837e-05, | |
| "loss": 0.4142, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 3.082461725026527, | |
| "grad_norm": 0.1502499895895468, | |
| "learning_rate": 3.079735089532935e-05, | |
| "loss": 0.4092, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 3.08731241473397, | |
| "grad_norm": 0.1564011362374121, | |
| "learning_rate": 3.0665480830082e-05, | |
| "loss": 0.4176, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 3.0921631044414126, | |
| "grad_norm": 0.15490177035198396, | |
| "learning_rate": 3.0533717973974924e-05, | |
| "loss": 0.4171, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 3.0970137941488556, | |
| "grad_norm": 0.16842243538179394, | |
| "learning_rate": 3.040206384033542e-05, | |
| "loss": 0.4142, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 3.101864483856298, | |
| "grad_norm": 0.15647723657119908, | |
| "learning_rate": 3.0270519941242052e-05, | |
| "loss": 0.4045, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 3.106715173563741, | |
| "grad_norm": 0.14187950473293476, | |
| "learning_rate": 3.0139087787507323e-05, | |
| "loss": 0.4162, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.111565863271184, | |
| "grad_norm": 0.1486994798612613, | |
| "learning_rate": 3.0007768888660337e-05, | |
| "loss": 0.4162, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 3.1164165529786265, | |
| "grad_norm": 0.14045982542928215, | |
| "learning_rate": 2.9876564752929406e-05, | |
| "loss": 0.423, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 3.1212672426860695, | |
| "grad_norm": 0.14911944405394412, | |
| "learning_rate": 2.9745476887224806e-05, | |
| "loss": 0.4186, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 3.1261179323935124, | |
| "grad_norm": 0.130768356118423, | |
| "learning_rate": 2.961450679712135e-05, | |
| "loss": 0.4149, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 3.130968622100955, | |
| "grad_norm": 0.15344876789210227, | |
| "learning_rate": 2.9483655986841265e-05, | |
| "loss": 0.4185, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 3.135819311808398, | |
| "grad_norm": 0.16373681347456412, | |
| "learning_rate": 2.9352925959236732e-05, | |
| "loss": 0.4199, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 3.1406700015158404, | |
| "grad_norm": 0.13681917154319687, | |
| "learning_rate": 2.92223182157728e-05, | |
| "loss": 0.4187, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 3.1455206912232834, | |
| "grad_norm": 0.16376916195733163, | |
| "learning_rate": 2.909183425650996e-05, | |
| "loss": 0.4144, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 3.150371380930726, | |
| "grad_norm": 0.15231760580025508, | |
| "learning_rate": 2.8961475580087108e-05, | |
| "loss": 0.4065, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 3.155222070638169, | |
| "grad_norm": 0.1482250629623317, | |
| "learning_rate": 2.8831243683704162e-05, | |
| "loss": 0.4167, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.160072760345612, | |
| "grad_norm": 0.1279306947970543, | |
| "learning_rate": 2.8701140063104996e-05, | |
| "loss": 0.4163, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 3.1649234500530543, | |
| "grad_norm": 0.14839352063841615, | |
| "learning_rate": 2.857116621256018e-05, | |
| "loss": 0.4066, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 3.1697741397604973, | |
| "grad_norm": 0.14182099741632095, | |
| "learning_rate": 2.8441323624849827e-05, | |
| "loss": 0.4073, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 3.17462482946794, | |
| "grad_norm": 0.14062989686711488, | |
| "learning_rate": 2.83116137912465e-05, | |
| "loss": 0.4148, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 3.1794755191753827, | |
| "grad_norm": 0.13350262763245205, | |
| "learning_rate": 2.8182038201498038e-05, | |
| "loss": 0.4185, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 3.1843262088828257, | |
| "grad_norm": 0.14000623741594295, | |
| "learning_rate": 2.8052598343810474e-05, | |
| "loss": 0.4084, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 3.189176898590268, | |
| "grad_norm": 0.13922618624994615, | |
| "learning_rate": 2.7923295704830868e-05, | |
| "loss": 0.4209, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 3.194027588297711, | |
| "grad_norm": 0.13424360868636123, | |
| "learning_rate": 2.7794131769630355e-05, | |
| "loss": 0.4203, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 3.1988782780051537, | |
| "grad_norm": 0.1410971557670894, | |
| "learning_rate": 2.7665108021687007e-05, | |
| "loss": 0.4229, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 3.2037289677125966, | |
| "grad_norm": 0.15216131334925778, | |
| "learning_rate": 2.753622594286879e-05, | |
| "loss": 0.4145, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 3.2085796574200396, | |
| "grad_norm": 0.1261567069468345, | |
| "learning_rate": 2.7407487013416615e-05, | |
| "loss": 0.4083, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 3.213430347127482, | |
| "grad_norm": 0.15853568579534694, | |
| "learning_rate": 2.727889271192722e-05, | |
| "loss": 0.4187, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 3.218281036834925, | |
| "grad_norm": 0.1151093763460037, | |
| "learning_rate": 2.715044451533631e-05, | |
| "loss": 0.4164, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 3.2231317265423676, | |
| "grad_norm": 0.14296777545925654, | |
| "learning_rate": 2.702214389890152e-05, | |
| "loss": 0.413, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 3.2279824162498105, | |
| "grad_norm": 0.12844538251966497, | |
| "learning_rate": 2.6893992336185512e-05, | |
| "loss": 0.4035, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 3.2328331059572535, | |
| "grad_norm": 0.13036276585432338, | |
| "learning_rate": 2.6765991299039025e-05, | |
| "loss": 0.4145, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 3.237683795664696, | |
| "grad_norm": 0.12243507743746514, | |
| "learning_rate": 2.663814225758393e-05, | |
| "loss": 0.4117, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 3.242534485372139, | |
| "grad_norm": 0.14245972122975337, | |
| "learning_rate": 2.6510446680196448e-05, | |
| "loss": 0.4195, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 3.2473851750795815, | |
| "grad_norm": 0.12799039988824565, | |
| "learning_rate": 2.638290603349023e-05, | |
| "loss": 0.4203, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 3.2522358647870244, | |
| "grad_norm": 0.12961756855915293, | |
| "learning_rate": 2.625552178229949e-05, | |
| "loss": 0.4159, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 3.2570865544944674, | |
| "grad_norm": 0.1296944618260485, | |
| "learning_rate": 2.612829538966218e-05, | |
| "loss": 0.4111, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 3.26193724420191, | |
| "grad_norm": 0.14508116679367689, | |
| "learning_rate": 2.6001228316803256e-05, | |
| "loss": 0.4196, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 3.266787933909353, | |
| "grad_norm": 0.1216509600449271, | |
| "learning_rate": 2.5874322023117824e-05, | |
| "loss": 0.4162, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 3.2716386236167954, | |
| "grad_norm": 0.1407698696044201, | |
| "learning_rate": 2.5747577966154404e-05, | |
| "loss": 0.4165, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 3.2764893133242383, | |
| "grad_norm": 0.1142728140236527, | |
| "learning_rate": 2.5620997601598215e-05, | |
| "loss": 0.4076, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 3.281340003031681, | |
| "grad_norm": 0.14212475123175447, | |
| "learning_rate": 2.5494582383254388e-05, | |
| "loss": 0.4174, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 3.286190692739124, | |
| "grad_norm": 0.11798282133422631, | |
| "learning_rate": 2.5368333763031324e-05, | |
| "loss": 0.4131, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 3.2910413824465667, | |
| "grad_norm": 0.14618443390836333, | |
| "learning_rate": 2.5242253190924034e-05, | |
| "loss": 0.4092, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 3.2958920721540093, | |
| "grad_norm": 0.12815235524330332, | |
| "learning_rate": 2.5116342114997442e-05, | |
| "loss": 0.409, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 3.300742761861452, | |
| "grad_norm": 0.1346079173937725, | |
| "learning_rate": 2.4990601981369737e-05, | |
| "loss": 0.4201, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 3.305593451568895, | |
| "grad_norm": 0.12020401678661803, | |
| "learning_rate": 2.4865034234195834e-05, | |
| "loss": 0.4107, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 3.3104441412763377, | |
| "grad_norm": 0.1342860022847603, | |
| "learning_rate": 2.4739640315650747e-05, | |
| "loss": 0.4145, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 3.3152948309837806, | |
| "grad_norm": 0.11394559661374248, | |
| "learning_rate": 2.4614421665912997e-05, | |
| "loss": 0.4213, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 3.320145520691223, | |
| "grad_norm": 0.126805017438777, | |
| "learning_rate": 2.4489379723148147e-05, | |
| "loss": 0.4129, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 3.324996210398666, | |
| "grad_norm": 0.11817811298525939, | |
| "learning_rate": 2.4364515923492187e-05, | |
| "loss": 0.4193, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 3.3298469001061086, | |
| "grad_norm": 0.1342332761111468, | |
| "learning_rate": 2.4239831701035143e-05, | |
| "loss": 0.418, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 3.3346975898135516, | |
| "grad_norm": 0.12474527460295737, | |
| "learning_rate": 2.411532848780451e-05, | |
| "loss": 0.4166, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 3.3395482795209945, | |
| "grad_norm": 0.1329059048345405, | |
| "learning_rate": 2.399100771374888e-05, | |
| "loss": 0.4138, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 3.344398969228437, | |
| "grad_norm": 0.11628441384991241, | |
| "learning_rate": 2.3866870806721495e-05, | |
| "loss": 0.4111, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 3.34924965893588, | |
| "grad_norm": 0.1453864432398833, | |
| "learning_rate": 2.37429191924638e-05, | |
| "loss": 0.42, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 3.354100348643323, | |
| "grad_norm": 0.11264715608612798, | |
| "learning_rate": 2.361915429458913e-05, | |
| "loss": 0.417, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 3.3589510383507655, | |
| "grad_norm": 0.14248256005839127, | |
| "learning_rate": 2.349557753456637e-05, | |
| "loss": 0.4168, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 3.3638017280582084, | |
| "grad_norm": 0.12398727462550883, | |
| "learning_rate": 2.3372190331703556e-05, | |
| "loss": 0.4189, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 3.368652417765651, | |
| "grad_norm": 0.11869463547338541, | |
| "learning_rate": 2.324899410313161e-05, | |
| "loss": 0.4125, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 3.373503107473094, | |
| "grad_norm": 0.12377753865053726, | |
| "learning_rate": 2.3125990263788118e-05, | |
| "loss": 0.4186, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 3.3783537971805364, | |
| "grad_norm": 0.13141561076446406, | |
| "learning_rate": 2.3003180226400986e-05, | |
| "loss": 0.4123, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 3.3832044868879794, | |
| "grad_norm": 0.11816327910326484, | |
| "learning_rate": 2.288056540147229e-05, | |
| "loss": 0.4129, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 3.3880551765954223, | |
| "grad_norm": 0.12260669143866527, | |
| "learning_rate": 2.275814719726201e-05, | |
| "loss": 0.4133, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 3.392905866302865, | |
| "grad_norm": 0.133084483131333, | |
| "learning_rate": 2.263592701977193e-05, | |
| "loss": 0.4219, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 3.397756556010308, | |
| "grad_norm": 0.11848073628628028, | |
| "learning_rate": 2.2513906272729397e-05, | |
| "loss": 0.4143, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.4026072457177503, | |
| "grad_norm": 0.12858950370510128, | |
| "learning_rate": 2.239208635757133e-05, | |
| "loss": 0.4166, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 3.4074579354251933, | |
| "grad_norm": 0.11722692816596028, | |
| "learning_rate": 2.2270468673428004e-05, | |
| "loss": 0.4259, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 3.412308625132636, | |
| "grad_norm": 0.11830608786302087, | |
| "learning_rate": 2.2149054617106974e-05, | |
| "loss": 0.407, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 3.4171593148400787, | |
| "grad_norm": 0.12285165179780408, | |
| "learning_rate": 2.2027845583077175e-05, | |
| "loss": 0.4231, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 3.4220100045475217, | |
| "grad_norm": 0.12884220459818324, | |
| "learning_rate": 2.1906842963452757e-05, | |
| "loss": 0.4069, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 3.426860694254964, | |
| "grad_norm": 0.10700415399515635, | |
| "learning_rate": 2.178604814797715e-05, | |
| "loss": 0.4149, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 3.431711383962407, | |
| "grad_norm": 0.11853515304365536, | |
| "learning_rate": 2.1665462524007162e-05, | |
| "loss": 0.4125, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 3.43656207366985, | |
| "grad_norm": 0.10772813217204756, | |
| "learning_rate": 2.1545087476496903e-05, | |
| "loss": 0.4216, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 3.4414127633772926, | |
| "grad_norm": 0.12946073793938545, | |
| "learning_rate": 2.1424924387981996e-05, | |
| "loss": 0.4227, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 3.4462634530847356, | |
| "grad_norm": 0.11256888467579416, | |
| "learning_rate": 2.1304974638563715e-05, | |
| "loss": 0.4116, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 3.451114142792178, | |
| "grad_norm": 0.12334394272902899, | |
| "learning_rate": 2.1185239605893013e-05, | |
| "loss": 0.4217, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 3.455964832499621, | |
| "grad_norm": 0.11467056354627854, | |
| "learning_rate": 2.106572066515482e-05, | |
| "loss": 0.4193, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 3.460815522207064, | |
| "grad_norm": 0.11248104529388973, | |
| "learning_rate": 2.0946419189052162e-05, | |
| "loss": 0.4147, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 3.4656662119145065, | |
| "grad_norm": 0.11839620237634582, | |
| "learning_rate": 2.0827336547790452e-05, | |
| "loss": 0.4214, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 3.4705169016219495, | |
| "grad_norm": 0.11350680266976185, | |
| "learning_rate": 2.0708474109061752e-05, | |
| "loss": 0.416, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 3.475367591329392, | |
| "grad_norm": 0.11310394993568851, | |
| "learning_rate": 2.0589833238029032e-05, | |
| "loss": 0.4001, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 3.480218281036835, | |
| "grad_norm": 0.1142115014102716, | |
| "learning_rate": 2.0471415297310455e-05, | |
| "loss": 0.4158, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 3.485068970744278, | |
| "grad_norm": 0.12114855144347077, | |
| "learning_rate": 2.0353221646963864e-05, | |
| "loss": 0.41, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 3.4899196604517204, | |
| "grad_norm": 0.11155618948058961, | |
| "learning_rate": 2.0235253644471012e-05, | |
| "loss": 0.4226, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 3.4947703501591634, | |
| "grad_norm": 0.12346483161916652, | |
| "learning_rate": 2.011751264472206e-05, | |
| "loss": 0.4163, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 3.499621039866606, | |
| "grad_norm": 0.1314504891806893, | |
| "learning_rate": 2.0000000000000012e-05, | |
| "loss": 0.4185, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 3.504471729574049, | |
| "grad_norm": 0.11587490363769114, | |
| "learning_rate": 1.9882717059965086e-05, | |
| "loss": 0.4243, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 3.5093224192814914, | |
| "grad_norm": 0.147492515392888, | |
| "learning_rate": 1.9765665171639345e-05, | |
| "loss": 0.4139, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 3.5141731089889343, | |
| "grad_norm": 0.13161876983358525, | |
| "learning_rate": 1.964884567939118e-05, | |
| "loss": 0.4023, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 3.5190237986963773, | |
| "grad_norm": 0.1288012773595963, | |
| "learning_rate": 1.9532259924919823e-05, | |
| "loss": 0.416, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 3.52387448840382, | |
| "grad_norm": 0.10979902623938809, | |
| "learning_rate": 1.9415909247239996e-05, | |
| "loss": 0.4198, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 3.5287251781112627, | |
| "grad_norm": 0.12273321372911124, | |
| "learning_rate": 1.9299794982666485e-05, | |
| "loss": 0.4084, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 3.5335758678187057, | |
| "grad_norm": 0.11485189001172974, | |
| "learning_rate": 1.9183918464798837e-05, | |
| "loss": 0.408, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 3.538426557526148, | |
| "grad_norm": 0.1142263107632135, | |
| "learning_rate": 1.906828102450601e-05, | |
| "loss": 0.4131, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 3.543277247233591, | |
| "grad_norm": 0.11235720950088848, | |
| "learning_rate": 1.895288398991114e-05, | |
| "loss": 0.4102, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.5481279369410337, | |
| "grad_norm": 0.11476446551397697, | |
| "learning_rate": 1.8837728686376158e-05, | |
| "loss": 0.4176, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 3.5529786266484766, | |
| "grad_norm": 0.12252689326129268, | |
| "learning_rate": 1.8722816436486754e-05, | |
| "loss": 0.4238, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 3.557829316355919, | |
| "grad_norm": 0.10742367387279236, | |
| "learning_rate": 1.8608148560037036e-05, | |
| "loss": 0.4248, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 3.562680006063362, | |
| "grad_norm": 0.11202485484827082, | |
| "learning_rate": 1.8493726374014442e-05, | |
| "loss": 0.4047, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 3.567530695770805, | |
| "grad_norm": 0.10619927645163178, | |
| "learning_rate": 1.8379551192584588e-05, | |
| "loss": 0.4105, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 3.5723813854782476, | |
| "grad_norm": 0.1104031551739802, | |
| "learning_rate": 1.826562432707619e-05, | |
| "loss": 0.4158, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 3.5772320751856905, | |
| "grad_norm": 0.10920848831138874, | |
| "learning_rate": 1.8151947085965994e-05, | |
| "loss": 0.4157, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 3.5820827648931335, | |
| "grad_norm": 0.10703865180962287, | |
| "learning_rate": 1.803852077486377e-05, | |
| "loss": 0.4144, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 3.586933454600576, | |
| "grad_norm": 0.1043548569854331, | |
| "learning_rate": 1.7925346696497295e-05, | |
| "loss": 0.4082, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 3.591784144308019, | |
| "grad_norm": 0.11138455070453787, | |
| "learning_rate": 1.781242615069733e-05, | |
| "loss": 0.4137, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.5966348340154615, | |
| "grad_norm": 0.11820322046978973, | |
| "learning_rate": 1.7699760434382853e-05, | |
| "loss": 0.4108, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 3.6014855237229044, | |
| "grad_norm": 0.11278678223744326, | |
| "learning_rate": 1.758735084154601e-05, | |
| "loss": 0.4189, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 3.606336213430347, | |
| "grad_norm": 0.11323272041806605, | |
| "learning_rate": 1.7475198663237297e-05, | |
| "loss": 0.4123, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 3.61118690313779, | |
| "grad_norm": 0.10904989882061365, | |
| "learning_rate": 1.736330518755082e-05, | |
| "loss": 0.4158, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 3.616037592845233, | |
| "grad_norm": 0.11229772775053595, | |
| "learning_rate": 1.7251671699609313e-05, | |
| "loss": 0.4182, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 3.6208882825526754, | |
| "grad_norm": 0.10951334908001022, | |
| "learning_rate": 1.7140299481549557e-05, | |
| "loss": 0.4213, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 3.6257389722601183, | |
| "grad_norm": 0.11434493342191, | |
| "learning_rate": 1.7029189812507603e-05, | |
| "loss": 0.4224, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 3.6305896619675613, | |
| "grad_norm": 0.10344385368037658, | |
| "learning_rate": 1.6918343968604027e-05, | |
| "loss": 0.4106, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 3.635440351675004, | |
| "grad_norm": 0.11168549168378746, | |
| "learning_rate": 1.6807763222929315e-05, | |
| "loss": 0.408, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 3.6402910413824463, | |
| "grad_norm": 0.11594040802990377, | |
| "learning_rate": 1.669744884552926e-05, | |
| "loss": 0.4169, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.6451417310898893, | |
| "grad_norm": 0.11452998735643415, | |
| "learning_rate": 1.6587402103390314e-05, | |
| "loss": 0.4162, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 3.649992420797332, | |
| "grad_norm": 0.11033578052460767, | |
| "learning_rate": 1.6477624260425137e-05, | |
| "loss": 0.4179, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 3.6548431105047747, | |
| "grad_norm": 0.1198668006175803, | |
| "learning_rate": 1.6368116577457973e-05, | |
| "loss": 0.4124, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 3.6596938002122177, | |
| "grad_norm": 0.10884921001547737, | |
| "learning_rate": 1.6258880312210195e-05, | |
| "loss": 0.4152, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 3.6645444899196606, | |
| "grad_norm": 0.12009675101549108, | |
| "learning_rate": 1.6149916719285942e-05, | |
| "loss": 0.4147, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 3.669395179627103, | |
| "grad_norm": 0.11742205741912104, | |
| "learning_rate": 1.6041227050157607e-05, | |
| "loss": 0.4096, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 3.674245869334546, | |
| "grad_norm": 0.1123251375365744, | |
| "learning_rate": 1.5932812553151506e-05, | |
| "loss": 0.4128, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 3.6790965590419886, | |
| "grad_norm": 0.12009728599563303, | |
| "learning_rate": 1.582467447343355e-05, | |
| "loss": 0.4179, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 3.6839472487494316, | |
| "grad_norm": 0.10679356522304362, | |
| "learning_rate": 1.5716814052994928e-05, | |
| "loss": 0.4153, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 3.688797938456874, | |
| "grad_norm": 0.11287526417521035, | |
| "learning_rate": 1.5609232530637827e-05, | |
| "loss": 0.4065, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.693648628164317, | |
| "grad_norm": 0.10604174975295146, | |
| "learning_rate": 1.5501931141961278e-05, | |
| "loss": 0.4135, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 3.69849931787176, | |
| "grad_norm": 0.10895447717123838, | |
| "learning_rate": 1.539491111934686e-05, | |
| "loss": 0.4102, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 3.7033500075792025, | |
| "grad_norm": 0.11522106117016057, | |
| "learning_rate": 1.5288173691944613e-05, | |
| "loss": 0.4193, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 3.7082006972866455, | |
| "grad_norm": 0.11668799730823959, | |
| "learning_rate": 1.5181720085658906e-05, | |
| "loss": 0.4131, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 3.7130513869940884, | |
| "grad_norm": 0.1179757070897769, | |
| "learning_rate": 1.5075551523134358e-05, | |
| "loss": 0.4107, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 3.717902076701531, | |
| "grad_norm": 0.11352197320512793, | |
| "learning_rate": 1.4969669223741771e-05, | |
| "loss": 0.4093, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 3.722752766408974, | |
| "grad_norm": 0.11945491424386492, | |
| "learning_rate": 1.4864074403564216e-05, | |
| "loss": 0.4142, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 3.7276034561164164, | |
| "grad_norm": 0.10892285815783607, | |
| "learning_rate": 1.4758768275382887e-05, | |
| "loss": 0.4205, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.7324541458238594, | |
| "grad_norm": 0.12765157069597566, | |
| "learning_rate": 1.4653752048663394e-05, | |
| "loss": 0.412, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 3.737304835531302, | |
| "grad_norm": 0.10956587187939422, | |
| "learning_rate": 1.4549026929541693e-05, | |
| "loss": 0.4148, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.742155525238745, | |
| "grad_norm": 0.12138544117757244, | |
| "learning_rate": 1.4444594120810326e-05, | |
| "loss": 0.4115, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 3.747006214946188, | |
| "grad_norm": 0.10697996518645103, | |
| "learning_rate": 1.4340454821904573e-05, | |
| "loss": 0.4194, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 3.7518569046536303, | |
| "grad_norm": 0.10998535306961314, | |
| "learning_rate": 1.4236610228888683e-05, | |
| "loss": 0.4143, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 3.7567075943610733, | |
| "grad_norm": 0.10998459629638649, | |
| "learning_rate": 1.4133061534442133e-05, | |
| "loss": 0.4121, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 3.7615582840685162, | |
| "grad_norm": 0.1056241184630642, | |
| "learning_rate": 1.4029809927845981e-05, | |
| "loss": 0.4146, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 3.7664089737759587, | |
| "grad_norm": 0.11617390368347923, | |
| "learning_rate": 1.3926856594969115e-05, | |
| "loss": 0.4125, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 3.7712596634834017, | |
| "grad_norm": 0.10105741678809248, | |
| "learning_rate": 1.3824202718254655e-05, | |
| "loss": 0.4081, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 3.776110353190844, | |
| "grad_norm": 0.12303653861990478, | |
| "learning_rate": 1.3721849476706477e-05, | |
| "loss": 0.413, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 3.780961042898287, | |
| "grad_norm": 0.10128855312982177, | |
| "learning_rate": 1.3619798045875529e-05, | |
| "loss": 0.4117, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 3.7858117326057297, | |
| "grad_norm": 0.1118697000457396, | |
| "learning_rate": 1.3518049597846412e-05, | |
| "loss": 0.4097, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.7906624223131726, | |
| "grad_norm": 0.10689143155371568, | |
| "learning_rate": 1.3416605301223893e-05, | |
| "loss": 0.4146, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 3.7955131120206156, | |
| "grad_norm": 0.09942573069367382, | |
| "learning_rate": 1.3315466321119486e-05, | |
| "loss": 0.415, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 3.800363801728058, | |
| "grad_norm": 0.112223020708749, | |
| "learning_rate": 1.3214633819138105e-05, | |
| "loss": 0.4187, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 3.805214491435501, | |
| "grad_norm": 0.09509376603334437, | |
| "learning_rate": 1.3114108953364655e-05, | |
| "loss": 0.4083, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 3.810065181142944, | |
| "grad_norm": 0.1039694903593983, | |
| "learning_rate": 1.3013892878350771e-05, | |
| "loss": 0.415, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 3.8149158708503865, | |
| "grad_norm": 0.09799457517015436, | |
| "learning_rate": 1.2913986745101567e-05, | |
| "loss": 0.4082, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 3.8197665605578295, | |
| "grad_norm": 0.0994744466895985, | |
| "learning_rate": 1.2814391701062392e-05, | |
| "loss": 0.416, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 3.824617250265272, | |
| "grad_norm": 0.09937882084294014, | |
| "learning_rate": 1.2715108890105663e-05, | |
| "loss": 0.4118, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 3.829467939972715, | |
| "grad_norm": 0.09366936781008306, | |
| "learning_rate": 1.2616139452517748e-05, | |
| "loss": 0.4202, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 3.8343186296801575, | |
| "grad_norm": 0.10103247526201467, | |
| "learning_rate": 1.2517484524985836e-05, | |
| "loss": 0.414, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.8391693193876004, | |
| "grad_norm": 0.10154281976161658, | |
| "learning_rate": 1.2419145240584856e-05, | |
| "loss": 0.4169, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 3.8440200090950434, | |
| "grad_norm": 0.09917419975099584, | |
| "learning_rate": 1.2321122728764566e-05, | |
| "loss": 0.4121, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 3.848870698802486, | |
| "grad_norm": 0.10067017561953691, | |
| "learning_rate": 1.222341811533648e-05, | |
| "loss": 0.4177, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 3.853721388509929, | |
| "grad_norm": 0.10318548830281854, | |
| "learning_rate": 1.2126032522460975e-05, | |
| "loss": 0.4211, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 3.858572078217372, | |
| "grad_norm": 0.09616344349182201, | |
| "learning_rate": 1.2028967068634417e-05, | |
| "loss": 0.4204, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 3.8634227679248143, | |
| "grad_norm": 0.10489574786705688, | |
| "learning_rate": 1.193222286867628e-05, | |
| "loss": 0.4119, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 3.868273457632257, | |
| "grad_norm": 0.1008569871750799, | |
| "learning_rate": 1.1835801033716372e-05, | |
| "loss": 0.4086, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 3.8731241473397, | |
| "grad_norm": 0.09122317803788128, | |
| "learning_rate": 1.1739702671182083e-05, | |
| "loss": 0.4214, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 3.8779748370471427, | |
| "grad_norm": 0.0991904412555539, | |
| "learning_rate": 1.1643928884785618e-05, | |
| "loss": 0.4095, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 3.8828255267545853, | |
| "grad_norm": 0.0959491624589887, | |
| "learning_rate": 1.1548480774511353e-05, | |
| "loss": 0.4218, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.887676216462028, | |
| "grad_norm": 0.09541302821779325, | |
| "learning_rate": 1.1453359436603213e-05, | |
| "loss": 0.4218, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 3.892526906169471, | |
| "grad_norm": 0.09054297930670004, | |
| "learning_rate": 1.1358565963552039e-05, | |
| "loss": 0.421, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 3.8973775958769137, | |
| "grad_norm": 0.09175434223796135, | |
| "learning_rate": 1.126410144408312e-05, | |
| "loss": 0.4088, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 3.9022282855843566, | |
| "grad_norm": 0.0933968969265613, | |
| "learning_rate": 1.1169966963143568e-05, | |
| "loss": 0.4105, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 3.907078975291799, | |
| "grad_norm": 0.09827723734859882, | |
| "learning_rate": 1.1076163601889953e-05, | |
| "loss": 0.4114, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 3.911929664999242, | |
| "grad_norm": 0.09185288445531155, | |
| "learning_rate": 1.098269243767589e-05, | |
| "loss": 0.4099, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 3.9167803547066846, | |
| "grad_norm": 0.10344811490009215, | |
| "learning_rate": 1.0889554544039593e-05, | |
| "loss": 0.417, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 3.9216310444141276, | |
| "grad_norm": 0.09584223413813131, | |
| "learning_rate": 1.0796750990691596e-05, | |
| "loss": 0.4092, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 3.9264817341215705, | |
| "grad_norm": 0.10225202685968936, | |
| "learning_rate": 1.0704282843502459e-05, | |
| "loss": 0.4156, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 3.931332423829013, | |
| "grad_norm": 0.10087206295646214, | |
| "learning_rate": 1.0612151164490525e-05, | |
| "loss": 0.4209, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.936183113536456, | |
| "grad_norm": 0.09495839512641314, | |
| "learning_rate": 1.0520357011809707e-05, | |
| "loss": 0.4193, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 3.941033803243899, | |
| "grad_norm": 0.10115287872844174, | |
| "learning_rate": 1.0428901439737387e-05, | |
| "loss": 0.415, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 3.9458844929513415, | |
| "grad_norm": 0.09538434619439141, | |
| "learning_rate": 1.0337785498662223e-05, | |
| "loss": 0.4152, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 3.9507351826587844, | |
| "grad_norm": 0.0955931932252973, | |
| "learning_rate": 1.024701023507216e-05, | |
| "loss": 0.4153, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 3.955585872366227, | |
| "grad_norm": 0.10045702643945939, | |
| "learning_rate": 1.015657669154237e-05, | |
| "loss": 0.4156, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 3.96043656207367, | |
| "grad_norm": 0.09335473216661304, | |
| "learning_rate": 1.00664859067233e-05, | |
| "loss": 0.4109, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 3.9652872517811124, | |
| "grad_norm": 0.09645722262367523, | |
| "learning_rate": 9.976738915328719e-06, | |
| "loss": 0.4107, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 3.9701379414885554, | |
| "grad_norm": 0.09446841411423582, | |
| "learning_rate": 9.887336748123864e-06, | |
| "loss": 0.4178, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 3.9749886311959983, | |
| "grad_norm": 0.0903235693705833, | |
| "learning_rate": 9.798280431913558e-06, | |
| "loss": 0.4202, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 3.979839320903441, | |
| "grad_norm": 0.0948091272844266, | |
| "learning_rate": 9.709570989530493e-06, | |
| "loss": 0.4123, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.984690010610884, | |
| "grad_norm": 0.10103242493534337, | |
| "learning_rate": 9.621209439823388e-06, | |
| "loss": 0.4132, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 3.9895407003183267, | |
| "grad_norm": 0.09199808648298305, | |
| "learning_rate": 9.533196797645354e-06, | |
| "loss": 0.4101, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 3.9943913900257693, | |
| "grad_norm": 0.09866422487619428, | |
| "learning_rate": 9.44553407384221e-06, | |
| "loss": 0.412, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 3.999242079733212, | |
| "grad_norm": 0.09950215009808663, | |
| "learning_rate": 9.358222275240884e-06, | |
| "loss": 0.4113, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 4.0048506897074425, | |
| "grad_norm": 0.16227668213643984, | |
| "learning_rate": 9.271262404637835e-06, | |
| "loss": 0.4032, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 4.009701379414886, | |
| "grad_norm": 0.11430363283166092, | |
| "learning_rate": 9.184655460787591e-06, | |
| "loss": 0.3988, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 4.014552069122328, | |
| "grad_norm": 0.11751760908320912, | |
| "learning_rate": 9.098402438391161e-06, | |
| "loss": 0.3943, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 4.019402758829771, | |
| "grad_norm": 0.13225951402360853, | |
| "learning_rate": 9.012504328084724e-06, | |
| "loss": 0.4024, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 4.0242534485372135, | |
| "grad_norm": 0.1269725744791709, | |
| "learning_rate": 8.926962116428228e-06, | |
| "loss": 0.4, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 4.029104138244657, | |
| "grad_norm": 0.12436207204015083, | |
| "learning_rate": 8.841776785894014e-06, | |
| "loss": 0.3994, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 4.033954827952099, | |
| "grad_norm": 0.12882961013897004, | |
| "learning_rate": 8.756949314855565e-06, | |
| "loss": 0.3977, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 4.038805517659542, | |
| "grad_norm": 0.11685196489455994, | |
| "learning_rate": 8.672480677576267e-06, | |
| "loss": 0.3906, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 4.043656207366985, | |
| "grad_norm": 0.106568471380297, | |
| "learning_rate": 8.58837184419821e-06, | |
| "loss": 0.388, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 4.048506897074428, | |
| "grad_norm": 0.1134401036269532, | |
| "learning_rate": 8.504623780731056e-06, | |
| "loss": 0.3918, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 4.05335758678187, | |
| "grad_norm": 0.12093411095557142, | |
| "learning_rate": 8.421237449040962e-06, | |
| "loss": 0.397, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 4.058208276489314, | |
| "grad_norm": 0.11674197768223235, | |
| "learning_rate": 8.338213806839453e-06, | |
| "loss": 0.393, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 4.063058966196756, | |
| "grad_norm": 0.10082248897884766, | |
| "learning_rate": 8.255553807672547e-06, | |
| "loss": 0.3897, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 4.067909655904199, | |
| "grad_norm": 0.11069126863357161, | |
| "learning_rate": 8.1732584009097e-06, | |
| "loss": 0.3882, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 4.072760345611641, | |
| "grad_norm": 0.11738423041846735, | |
| "learning_rate": 8.091328531732925e-06, | |
| "loss": 0.3959, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 4.077611035319085, | |
| "grad_norm": 0.10387395387726432, | |
| "learning_rate": 8.009765141126014e-06, | |
| "loss": 0.3891, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.082461725026527, | |
| "grad_norm": 0.10003982048853301, | |
| "learning_rate": 7.928569165863584e-06, | |
| "loss": 0.3909, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 4.08731241473397, | |
| "grad_norm": 0.10549262492820612, | |
| "learning_rate": 7.847741538500439e-06, | |
| "loss": 0.3875, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 4.092163104441413, | |
| "grad_norm": 0.10986857014715488, | |
| "learning_rate": 7.767283187360846e-06, | |
| "loss": 0.3929, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 4.097013794148856, | |
| "grad_norm": 0.09674503227632227, | |
| "learning_rate": 7.687195036527813e-06, | |
| "loss": 0.3928, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 4.101864483856298, | |
| "grad_norm": 0.10305617355184506, | |
| "learning_rate": 7.60747800583252e-06, | |
| "loss": 0.4002, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 4.1067151735637415, | |
| "grad_norm": 0.09835145173574618, | |
| "learning_rate": 7.52813301084375e-06, | |
| "loss": 0.3961, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 4.111565863271184, | |
| "grad_norm": 0.09426867245890704, | |
| "learning_rate": 7.449160962857358e-06, | |
| "loss": 0.3946, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 4.1164165529786265, | |
| "grad_norm": 0.0993123791236732, | |
| "learning_rate": 7.370562768885823e-06, | |
| "loss": 0.3937, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 4.121267242686069, | |
| "grad_norm": 0.09692548506896993, | |
| "learning_rate": 7.292339331647848e-06, | |
| "loss": 0.3957, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 4.126117932393512, | |
| "grad_norm": 0.0977639150148905, | |
| "learning_rate": 7.214491549557898e-06, | |
| "loss": 0.3969, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 4.130968622100955, | |
| "grad_norm": 0.10100358672328608, | |
| "learning_rate": 7.1370203167160326e-06, | |
| "loss": 0.3937, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 4.1358193118083975, | |
| "grad_norm": 0.0961508995875077, | |
| "learning_rate": 7.0599265228975e-06, | |
| "loss": 0.3965, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 4.140670001515841, | |
| "grad_norm": 0.09450992963252156, | |
| "learning_rate": 6.983211053542591e-06, | |
| "loss": 0.4008, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 4.145520691223283, | |
| "grad_norm": 0.10411768031595499, | |
| "learning_rate": 6.9068747897464535e-06, | |
| "loss": 0.4032, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 4.150371380930726, | |
| "grad_norm": 0.09981896937364662, | |
| "learning_rate": 6.830918608248964e-06, | |
| "loss": 0.4002, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 4.155222070638169, | |
| "grad_norm": 0.08754517232013051, | |
| "learning_rate": 6.755343381424659e-06, | |
| "loss": 0.3976, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 4.160072760345612, | |
| "grad_norm": 0.09520671649907132, | |
| "learning_rate": 6.68014997727275e-06, | |
| "loss": 0.3921, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 4.164923450053054, | |
| "grad_norm": 0.09719121632502135, | |
| "learning_rate": 6.605339259407104e-06, | |
| "loss": 0.3852, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 4.169774139760497, | |
| "grad_norm": 0.08870928687089744, | |
| "learning_rate": 6.530912087046317e-06, | |
| "loss": 0.395, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 4.17462482946794, | |
| "grad_norm": 0.08685694037296439, | |
| "learning_rate": 6.456869315003946e-06, | |
| "loss": 0.3941, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 4.179475519175383, | |
| "grad_norm": 0.08904997313439429, | |
| "learning_rate": 6.3832117936785564e-06, | |
| "loss": 0.3997, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 4.184326208882825, | |
| "grad_norm": 0.0910482544729166, | |
| "learning_rate": 6.309940369044047e-06, | |
| "loss": 0.3945, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 4.189176898590269, | |
| "grad_norm": 0.0887134263309384, | |
| "learning_rate": 6.23705588263992e-06, | |
| "loss": 0.3881, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 4.194027588297711, | |
| "grad_norm": 0.08977043998387933, | |
| "learning_rate": 6.164559171561553e-06, | |
| "loss": 0.3957, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 4.198878278005154, | |
| "grad_norm": 0.09216637353138708, | |
| "learning_rate": 6.092451068450671e-06, | |
| "loss": 0.3969, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 4.203728967712596, | |
| "grad_norm": 0.08464526503746563, | |
| "learning_rate": 6.020732401485751e-06, | |
| "loss": 0.3896, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 4.20857965742004, | |
| "grad_norm": 0.08983668958241023, | |
| "learning_rate": 5.9494039943724845e-06, | |
| "loss": 0.4025, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 4.213430347127482, | |
| "grad_norm": 0.08753368121579265, | |
| "learning_rate": 5.878466666334341e-06, | |
| "loss": 0.3954, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 4.218281036834925, | |
| "grad_norm": 0.08687937990016806, | |
| "learning_rate": 5.80792123210316e-06, | |
| "loss": 0.391, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 4.223131726542368, | |
| "grad_norm": 0.08701962255264487, | |
| "learning_rate": 5.737768501909773e-06, | |
| "loss": 0.3965, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 4.2279824162498105, | |
| "grad_norm": 0.08811499746627446, | |
| "learning_rate": 5.668009281474751e-06, | |
| "loss": 0.3846, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 4.232833105957253, | |
| "grad_norm": 0.08561633996061273, | |
| "learning_rate": 5.598644371999085e-06, | |
| "loss": 0.3919, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 4.237683795664696, | |
| "grad_norm": 0.08321419945310787, | |
| "learning_rate": 5.5296745701549906e-06, | |
| "loss": 0.394, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 4.242534485372139, | |
| "grad_norm": 0.08724643182123827, | |
| "learning_rate": 5.4611006680768305e-06, | |
| "loss": 0.3994, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 4.2473851750795815, | |
| "grad_norm": 0.08361301460174729, | |
| "learning_rate": 5.3929234533519345e-06, | |
| "loss": 0.388, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 4.252235864787025, | |
| "grad_norm": 0.0862576560444418, | |
| "learning_rate": 5.325143709011587e-06, | |
| "loss": 0.3942, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 4.257086554494467, | |
| "grad_norm": 0.082606577377262, | |
| "learning_rate": 5.257762213522055e-06, | |
| "loss": 0.3907, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 4.26193724420191, | |
| "grad_norm": 0.08601520095290771, | |
| "learning_rate": 5.19077974077558e-06, | |
| "loss": 0.3927, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 4.266787933909352, | |
| "grad_norm": 0.08144847498379731, | |
| "learning_rate": 5.124197060081564e-06, | |
| "loss": 0.3981, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 4.271638623616796, | |
| "grad_norm": 0.08369648223405692, | |
| "learning_rate": 5.058014936157714e-06, | |
| "loss": 0.3987, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 4.276489313324238, | |
| "grad_norm": 0.08487450387282452, | |
| "learning_rate": 4.992234129121225e-06, | |
| "loss": 0.3993, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 4.281340003031681, | |
| "grad_norm": 0.0835928002976722, | |
| "learning_rate": 4.926855394480079e-06, | |
| "loss": 0.394, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 4.286190692739124, | |
| "grad_norm": 0.0877663910781713, | |
| "learning_rate": 4.861879483124372e-06, | |
| "loss": 0.3888, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 4.291041382446567, | |
| "grad_norm": 0.08201902566316537, | |
| "learning_rate": 4.797307141317666e-06, | |
| "loss": 0.3978, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 4.295892072154009, | |
| "grad_norm": 0.08252391090570044, | |
| "learning_rate": 4.7331391106884364e-06, | |
| "loss": 0.3949, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 4.300742761861452, | |
| "grad_norm": 0.08121494056766716, | |
| "learning_rate": 4.6693761282215766e-06, | |
| "loss": 0.3922, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 4.305593451568895, | |
| "grad_norm": 0.08414265476669228, | |
| "learning_rate": 4.606018926249851e-06, | |
| "loss": 0.389, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 4.310444141276338, | |
| "grad_norm": 0.08528892500629966, | |
| "learning_rate": 4.543068232445596e-06, | |
| "loss": 0.3956, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 4.31529483098378, | |
| "grad_norm": 0.08280199210655267, | |
| "learning_rate": 4.480524769812276e-06, | |
| "loss": 0.3938, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 4.320145520691224, | |
| "grad_norm": 0.08884076809121294, | |
| "learning_rate": 4.418389256676206e-06, | |
| "loss": 0.3947, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 4.324996210398666, | |
| "grad_norm": 0.08902762031211295, | |
| "learning_rate": 4.35666240667834e-06, | |
| "loss": 0.3907, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 4.329846900106109, | |
| "grad_norm": 0.08464410835355753, | |
| "learning_rate": 4.295344928765999e-06, | |
| "loss": 0.3939, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 4.334697589813552, | |
| "grad_norm": 0.08347421805827179, | |
| "learning_rate": 4.234437527184785e-06, | |
| "loss": 0.3985, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 4.3395482795209945, | |
| "grad_norm": 0.08770395898608876, | |
| "learning_rate": 4.173940901470488e-06, | |
| "loss": 0.395, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 4.344398969228437, | |
| "grad_norm": 0.08139356257791996, | |
| "learning_rate": 4.11385574644104e-06, | |
| "loss": 0.39, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 4.34924965893588, | |
| "grad_norm": 0.0818047074224124, | |
| "learning_rate": 4.054182752188501e-06, | |
| "loss": 0.3948, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 4.354100348643323, | |
| "grad_norm": 0.08370905382784635, | |
| "learning_rate": 3.994922604071217e-06, | |
| "loss": 0.3941, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 4.3589510383507655, | |
| "grad_norm": 0.08543140680473625, | |
| "learning_rate": 3.936075982705871e-06, | |
| "loss": 0.3982, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 4.363801728058208, | |
| "grad_norm": 0.08330546263387466, | |
| "learning_rate": 3.877643563959694e-06, | |
| "loss": 0.3986, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 4.368652417765651, | |
| "grad_norm": 0.08181359063571672, | |
| "learning_rate": 3.819626018942732e-06, | |
| "loss": 0.3962, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.373503107473094, | |
| "grad_norm": 0.08458157848919085, | |
| "learning_rate": 3.762024014000054e-06, | |
| "loss": 0.3974, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 4.378353797180536, | |
| "grad_norm": 0.0797245693813596, | |
| "learning_rate": 3.7048382107042113e-06, | |
| "loss": 0.3849, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 4.38320448688798, | |
| "grad_norm": 0.08552325047075819, | |
| "learning_rate": 3.6480692658475446e-06, | |
| "loss": 0.3908, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 4.388055176595422, | |
| "grad_norm": 0.08191614537028945, | |
| "learning_rate": 3.5917178314346955e-06, | |
| "loss": 0.398, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 4.392905866302865, | |
| "grad_norm": 0.08324472646577967, | |
| "learning_rate": 3.535784554675088e-06, | |
| "loss": 0.3941, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 4.397756556010307, | |
| "grad_norm": 0.08386476178169076, | |
| "learning_rate": 3.480270077975525e-06, | |
| "loss": 0.395, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 4.402607245717751, | |
| "grad_norm": 0.08759720428686872, | |
| "learning_rate": 3.42517503893276e-06, | |
| "loss": 0.3879, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 4.407457935425193, | |
| "grad_norm": 0.0834695533465509, | |
| "learning_rate": 3.370500070326257e-06, | |
| "loss": 0.3832, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 4.412308625132636, | |
| "grad_norm": 0.080547419306159, | |
| "learning_rate": 3.3162458001108332e-06, | |
| "loss": 0.3858, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 4.417159314840079, | |
| "grad_norm": 0.08406690651169581, | |
| "learning_rate": 3.2624128514094778e-06, | |
| "loss": 0.3923, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 4.422010004547522, | |
| "grad_norm": 0.08417275645743631, | |
| "learning_rate": 3.20900184250625e-06, | |
| "loss": 0.3933, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 4.426860694254964, | |
| "grad_norm": 0.08104238775596306, | |
| "learning_rate": 3.1560133868390895e-06, | |
| "loss": 0.4023, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 4.431711383962407, | |
| "grad_norm": 0.08376361425082632, | |
| "learning_rate": 3.1034480929928333e-06, | |
| "loss": 0.399, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 4.43656207366985, | |
| "grad_norm": 0.08058350379685782, | |
| "learning_rate": 3.0513065646921957e-06, | |
| "loss": 0.3946, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 4.441412763377293, | |
| "grad_norm": 0.08013760556674378, | |
| "learning_rate": 2.999589400794851e-06, | |
| "loss": 0.392, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 4.446263453084735, | |
| "grad_norm": 0.08082216615145961, | |
| "learning_rate": 2.948297195284546e-06, | |
| "loss": 0.3916, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 4.4511141427921785, | |
| "grad_norm": 0.09007717527915819, | |
| "learning_rate": 2.897430537264283e-06, | |
| "loss": 0.3947, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 4.455964832499621, | |
| "grad_norm": 0.0866390993026153, | |
| "learning_rate": 2.8469900109495553e-06, | |
| "loss": 0.3942, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 4.460815522207064, | |
| "grad_norm": 0.08034663815912857, | |
| "learning_rate": 2.79697619566162e-06, | |
| "loss": 0.3888, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 4.465666211914507, | |
| "grad_norm": 0.08423465438674188, | |
| "learning_rate": 2.7473896658208743e-06, | |
| "loss": 0.391, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 4.4705169016219495, | |
| "grad_norm": 0.08769412223500794, | |
| "learning_rate": 2.6982309909402293e-06, | |
| "loss": 0.3936, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 4.475367591329392, | |
| "grad_norm": 0.0794092004599977, | |
| "learning_rate": 2.649500735618582e-06, | |
| "loss": 0.3993, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 4.480218281036835, | |
| "grad_norm": 0.08321318361659834, | |
| "learning_rate": 2.6011994595343516e-06, | |
| "loss": 0.3965, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 4.485068970744278, | |
| "grad_norm": 0.08400144661884966, | |
| "learning_rate": 2.5533277174389916e-06, | |
| "loss": 0.3927, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 4.48991966045172, | |
| "grad_norm": 0.08039773750467258, | |
| "learning_rate": 2.5058860591506973e-06, | |
| "loss": 0.3927, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 4.494770350159163, | |
| "grad_norm": 0.08132175209524008, | |
| "learning_rate": 2.4588750295480246e-06, | |
| "loss": 0.3888, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 4.499621039866606, | |
| "grad_norm": 0.07920562392368859, | |
| "learning_rate": 2.4122951685636674e-06, | |
| "loss": 0.3896, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 4.504471729574049, | |
| "grad_norm": 0.07833571502263627, | |
| "learning_rate": 2.366147011178246e-06, | |
| "loss": 0.398, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 4.509322419281491, | |
| "grad_norm": 0.08232920047142565, | |
| "learning_rate": 2.320431087414159e-06, | |
| "loss": 0.3838, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 4.514173108988935, | |
| "grad_norm": 0.08136833742803433, | |
| "learning_rate": 2.275147922329506e-06, | |
| "loss": 0.3935, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 4.519023798696377, | |
| "grad_norm": 0.08383396678636225, | |
| "learning_rate": 2.230298036012055e-06, | |
| "loss": 0.3913, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 4.52387448840382, | |
| "grad_norm": 0.08000391439211, | |
| "learning_rate": 2.1858819435732583e-06, | |
| "loss": 0.395, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 4.528725178111262, | |
| "grad_norm": 0.080147749223563, | |
| "learning_rate": 2.141900155142351e-06, | |
| "loss": 0.3951, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 4.533575867818706, | |
| "grad_norm": 0.07978655829531454, | |
| "learning_rate": 2.0983531758604726e-06, | |
| "loss": 0.4011, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 4.538426557526148, | |
| "grad_norm": 0.08386421814730027, | |
| "learning_rate": 2.055241505874892e-06, | |
| "loss": 0.3999, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 4.543277247233591, | |
| "grad_norm": 0.08174904946083562, | |
| "learning_rate": 2.0125656403332396e-06, | |
| "loss": 0.3968, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 4.548127936941034, | |
| "grad_norm": 0.07927025601942647, | |
| "learning_rate": 1.970326069377828e-06, | |
| "loss": 0.4001, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 4.552978626648477, | |
| "grad_norm": 0.0783638905343862, | |
| "learning_rate": 1.928523278140033e-06, | |
| "loss": 0.3943, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 4.557829316355919, | |
| "grad_norm": 0.08012821453349714, | |
| "learning_rate": 1.887157746734718e-06, | |
| "loss": 0.3972, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 4.562680006063362, | |
| "grad_norm": 0.08022389886437123, | |
| "learning_rate": 1.846229950254692e-06, | |
| "loss": 0.3888, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 4.567530695770805, | |
| "grad_norm": 0.07938938767749412, | |
| "learning_rate": 1.8057403587652977e-06, | |
| "loss": 0.3892, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 4.572381385478248, | |
| "grad_norm": 0.07979264917062728, | |
| "learning_rate": 1.7656894372989785e-06, | |
| "loss": 0.3991, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 4.57723207518569, | |
| "grad_norm": 0.07900416574598519, | |
| "learning_rate": 1.726077645849955e-06, | |
| "loss": 0.4033, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 4.5820827648931335, | |
| "grad_norm": 0.0816244017270053, | |
| "learning_rate": 1.6869054393689265e-06, | |
| "loss": 0.394, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 4.586933454600576, | |
| "grad_norm": 0.07678354405433441, | |
| "learning_rate": 1.6481732677578798e-06, | |
| "loss": 0.4026, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 4.5917841443080185, | |
| "grad_norm": 0.07913436605637802, | |
| "learning_rate": 1.60988157586488e-06, | |
| "loss": 0.378, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 4.596634834015462, | |
| "grad_norm": 0.0793297266554538, | |
| "learning_rate": 1.5720308034789721e-06, | |
| "loss": 0.391, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 4.601485523722904, | |
| "grad_norm": 0.07767980761029898, | |
| "learning_rate": 1.5346213853251546e-06, | |
| "loss": 0.3978, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 4.606336213430347, | |
| "grad_norm": 0.08494507410525762, | |
| "learning_rate": 1.4976537510593646e-06, | |
| "loss": 0.3995, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 4.61118690313779, | |
| "grad_norm": 0.08237577571806884, | |
| "learning_rate": 1.4611283252635412e-06, | |
| "loss": 0.4038, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 4.616037592845233, | |
| "grad_norm": 0.07590194142960284, | |
| "learning_rate": 1.425045527440756e-06, | |
| "loss": 0.3956, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 4.620888282552675, | |
| "grad_norm": 0.07954982455169565, | |
| "learning_rate": 1.3894057720104104e-06, | |
| "loss": 0.399, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 4.625738972260118, | |
| "grad_norm": 0.07731849776688487, | |
| "learning_rate": 1.354209468303429e-06, | |
| "loss": 0.3828, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 4.630589661967561, | |
| "grad_norm": 0.07970299041030604, | |
| "learning_rate": 1.3194570205576284e-06, | |
| "loss": 0.3954, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 4.635440351675004, | |
| "grad_norm": 0.07874124868840192, | |
| "learning_rate": 1.2851488279130053e-06, | |
| "loss": 0.3876, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 4.640291041382446, | |
| "grad_norm": 0.07691612719760402, | |
| "learning_rate": 1.2512852844071933e-06, | |
| "loss": 0.3949, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 4.64514173108989, | |
| "grad_norm": 0.08126016720695953, | |
| "learning_rate": 1.2178667789709287e-06, | |
| "loss": 0.3919, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 4.649992420797332, | |
| "grad_norm": 0.08002643782323322, | |
| "learning_rate": 1.1848936954235702e-06, | |
| "loss": 0.395, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 4.654843110504775, | |
| "grad_norm": 0.08026194265591635, | |
| "learning_rate": 1.1523664124687284e-06, | |
| "loss": 0.3997, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 4.659693800212217, | |
| "grad_norm": 0.07660352848559206, | |
| "learning_rate": 1.1202853036898476e-06, | |
| "loss": 0.3974, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 4.664544489919661, | |
| "grad_norm": 0.08113236962163348, | |
| "learning_rate": 1.0886507375459908e-06, | |
| "loss": 0.3981, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 4.669395179627103, | |
| "grad_norm": 0.07701309756204706, | |
| "learning_rate": 1.0574630773675687e-06, | |
| "loss": 0.3839, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 4.674245869334546, | |
| "grad_norm": 0.07891978078549244, | |
| "learning_rate": 1.0267226813521635e-06, | |
| "loss": 0.3877, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 4.679096559041989, | |
| "grad_norm": 0.07852321868608765, | |
| "learning_rate": 9.964299025604274e-07, | |
| "loss": 0.3921, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 4.683947248749432, | |
| "grad_norm": 0.07818107376466682, | |
| "learning_rate": 9.66585088912022e-07, | |
| "loss": 0.3967, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 4.688797938456874, | |
| "grad_norm": 0.076600770628096, | |
| "learning_rate": 9.371885831816319e-07, | |
| "loss": 0.395, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 4.6936486281643175, | |
| "grad_norm": 0.07627250485843899, | |
| "learning_rate": 9.082407229950018e-07, | |
| "loss": 0.3976, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 4.69849931787176, | |
| "grad_norm": 0.07905168555654667, | |
| "learning_rate": 8.797418408251101e-07, | |
| "loss": 0.3918, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 4.7033500075792025, | |
| "grad_norm": 0.07625038396328788, | |
| "learning_rate": 8.516922639882819e-07, | |
| "loss": 0.3897, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 4.708200697286646, | |
| "grad_norm": 0.08055085830124646, | |
| "learning_rate": 8.2409231464049e-07, | |
| "loss": 0.3901, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 4.713051386994088, | |
| "grad_norm": 0.07599467665769233, | |
| "learning_rate": 7.969423097736162e-07, | |
| "loss": 0.3931, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 4.717902076701531, | |
| "grad_norm": 0.08049982038815076, | |
| "learning_rate": 7.702425612118269e-07, | |
| "loss": 0.3962, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 4.7227527664089735, | |
| "grad_norm": 0.07817439712988589, | |
| "learning_rate": 7.439933756079942e-07, | |
| "loss": 0.3913, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 4.727603456116417, | |
| "grad_norm": 0.08001209525722262, | |
| "learning_rate": 7.181950544401695e-07, | |
| "loss": 0.3935, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 4.732454145823859, | |
| "grad_norm": 0.07544329449164154, | |
| "learning_rate": 6.928478940081107e-07, | |
| "loss": 0.3984, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 4.737304835531302, | |
| "grad_norm": 0.07792056514995602, | |
| "learning_rate": 6.679521854299032e-07, | |
| "loss": 0.3914, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 4.742155525238745, | |
| "grad_norm": 0.07777159770613211, | |
| "learning_rate": 6.435082146385885e-07, | |
| "loss": 0.3901, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 4.747006214946188, | |
| "grad_norm": 0.07684966968716175, | |
| "learning_rate": 6.195162623789052e-07, | |
| "loss": 0.393, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 4.75185690465363, | |
| "grad_norm": 0.07663626226452194, | |
| "learning_rate": 5.959766042040426e-07, | |
| "loss": 0.3879, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 4.756707594361073, | |
| "grad_norm": 0.07653692258850123, | |
| "learning_rate": 5.728895104724963e-07, | |
| "loss": 0.3959, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 4.761558284068516, | |
| "grad_norm": 0.07731590848782262, | |
| "learning_rate": 5.502552463449418e-07, | |
| "loss": 0.3984, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 4.766408973775959, | |
| "grad_norm": 0.07715742201671594, | |
| "learning_rate": 5.280740717812149e-07, | |
| "loss": 0.3977, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 4.771259663483401, | |
| "grad_norm": 0.07733908081214975, | |
| "learning_rate": 5.063462415372967e-07, | |
| "loss": 0.3948, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 4.776110353190845, | |
| "grad_norm": 0.07797117300936077, | |
| "learning_rate": 4.850720051624124e-07, | |
| "loss": 0.3914, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 4.780961042898287, | |
| "grad_norm": 0.07721734718928677, | |
| "learning_rate": 4.642516069961556e-07, | |
| "loss": 0.392, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 4.78581173260573, | |
| "grad_norm": 0.07648551851063208, | |
| "learning_rate": 4.438852861656751e-07, | |
| "loss": 0.3951, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 4.790662422313172, | |
| "grad_norm": 0.07521119037010907, | |
| "learning_rate": 4.2397327658294076e-07, | |
| "loss": 0.3899, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 4.795513112020616, | |
| "grad_norm": 0.07655879860272995, | |
| "learning_rate": 4.045158069420474e-07, | |
| "loss": 0.3963, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 4.800363801728058, | |
| "grad_norm": 0.07622760847546149, | |
| "learning_rate": 3.8551310071659023e-07, | |
| "loss": 0.3975, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 4.805214491435501, | |
| "grad_norm": 0.0750428400819878, | |
| "learning_rate": 3.6696537615711124e-07, | |
| "loss": 0.3968, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 4.810065181142944, | |
| "grad_norm": 0.07852243689578567, | |
| "learning_rate": 3.4887284628857266e-07, | |
| "loss": 0.3932, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 4.8149158708503865, | |
| "grad_norm": 0.0771348195411035, | |
| "learning_rate": 3.3123571890791405e-07, | |
| "loss": 0.3887, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 4.819766560557829, | |
| "grad_norm": 0.07614331045752255, | |
| "learning_rate": 3.1405419658168125e-07, | |
| "loss": 0.394, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 4.824617250265272, | |
| "grad_norm": 0.07563944264490313, | |
| "learning_rate": 2.973284766436857e-07, | |
| "loss": 0.3917, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 4.829467939972715, | |
| "grad_norm": 0.07503164664083585, | |
| "learning_rate": 2.810587511927354e-07, | |
| "loss": 0.3901, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 4.8343186296801575, | |
| "grad_norm": 0.07663595734772072, | |
| "learning_rate": 2.652452070904499e-07, | |
| "loss": 0.3923, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 4.839169319387601, | |
| "grad_norm": 0.07606961706547127, | |
| "learning_rate": 2.498880259590797e-07, | |
| "loss": 0.3944, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 4.844020009095043, | |
| "grad_norm": 0.0761182148197196, | |
| "learning_rate": 2.3498738417945034e-07, | |
| "loss": 0.3975, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 4.848870698802486, | |
| "grad_norm": 0.08039104985634041, | |
| "learning_rate": 2.205434528889283e-07, | |
| "loss": 0.3971, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 4.853721388509928, | |
| "grad_norm": 0.07775606427920397, | |
| "learning_rate": 2.0655639797944937e-07, | |
| "loss": 0.3903, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.858572078217372, | |
| "grad_norm": 0.07610940999590161, | |
| "learning_rate": 1.9302638009561782e-07, | |
| "loss": 0.396, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 4.863422767924814, | |
| "grad_norm": 0.07569450912275462, | |
| "learning_rate": 1.7995355463285457e-07, | |
| "loss": 0.3965, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 4.868273457632257, | |
| "grad_norm": 0.07643720956773195, | |
| "learning_rate": 1.6733807173562988e-07, | |
| "loss": 0.3913, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 4.8731241473397, | |
| "grad_norm": 0.07571887859535044, | |
| "learning_rate": 1.5518007629571342e-07, | |
| "loss": 0.3995, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 4.877974837047143, | |
| "grad_norm": 0.07657090064859913, | |
| "learning_rate": 1.4347970795054456e-07, | |
| "loss": 0.3967, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 4.882825526754585, | |
| "grad_norm": 0.07475277823538463, | |
| "learning_rate": 1.3223710108158483e-07, | |
| "loss": 0.3965, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 4.887676216462028, | |
| "grad_norm": 0.07465351831333361, | |
| "learning_rate": 1.214523848128124e-07, | |
| "loss": 0.3971, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 4.892526906169471, | |
| "grad_norm": 0.07637388169909817, | |
| "learning_rate": 1.111256830092211e-07, | |
| "loss": 0.3996, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 4.897377595876914, | |
| "grad_norm": 0.07639665552172381, | |
| "learning_rate": 1.0125711427540374e-07, | |
| "loss": 0.3949, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 4.902228285584356, | |
| "grad_norm": 0.07373092012101537, | |
| "learning_rate": 9.184679195417989e-08, | |
| "loss": 0.3889, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 4.9070789752918, | |
| "grad_norm": 0.07583698163385665, | |
| "learning_rate": 8.289482412531246e-08, | |
| "loss": 0.3984, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 4.911929664999242, | |
| "grad_norm": 0.07647787343235872, | |
| "learning_rate": 7.440131360424652e-08, | |
| "loss": 0.3887, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 4.916780354706685, | |
| "grad_norm": 0.07751142464425709, | |
| "learning_rate": 6.636635794094126e-08, | |
| "loss": 0.3908, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 4.921631044414128, | |
| "grad_norm": 0.07884353160777417, | |
| "learning_rate": 5.879004941874655e-08, | |
| "loss": 0.3951, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 4.9264817341215705, | |
| "grad_norm": 0.07661585400143268, | |
| "learning_rate": 5.16724750533415e-08, | |
| "loss": 0.398, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 4.931332423829013, | |
| "grad_norm": 0.07506936516416932, | |
| "learning_rate": 4.5013716591730815e-08, | |
| "loss": 0.3944, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 4.9361831135364564, | |
| "grad_norm": 0.07625120541429038, | |
| "learning_rate": 3.881385051132114e-08, | |
| "loss": 0.3997, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 4.941033803243899, | |
| "grad_norm": 0.0754548388472603, | |
| "learning_rate": 3.307294801902838e-08, | |
| "loss": 0.3968, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 4.9458844929513415, | |
| "grad_norm": 0.07563871272528049, | |
| "learning_rate": 2.7791075050460636e-08, | |
| "loss": 0.3977, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 4.950735182658784, | |
| "grad_norm": 0.0760975342061979, | |
| "learning_rate": 2.2968292269167637e-08, | |
| "loss": 0.3978, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 4.955585872366227, | |
| "grad_norm": 0.07484229156933196, | |
| "learning_rate": 1.8604655065939116e-08, | |
| "loss": 0.3888, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 4.96043656207367, | |
| "grad_norm": 0.07489129192172529, | |
| "learning_rate": 1.470021355816975e-08, | |
| "loss": 0.3952, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 4.965287251781112, | |
| "grad_norm": 0.07561234415729748, | |
| "learning_rate": 1.1255012589286297e-08, | |
| "loss": 0.3988, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 4.970137941488556, | |
| "grad_norm": 0.07589712959511802, | |
| "learning_rate": 8.269091728232426e-09, | |
| "loss": 0.3858, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 4.974988631195998, | |
| "grad_norm": 0.07684966203710471, | |
| "learning_rate": 5.742485269006892e-09, | |
| "loss": 0.3976, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 4.979839320903441, | |
| "grad_norm": 0.07450707523151534, | |
| "learning_rate": 3.6752222302727238e-09, | |
| "loss": 0.3922, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 4.984690010610883, | |
| "grad_norm": 0.0754634711281639, | |
| "learning_rate": 2.06732635503748e-09, | |
| "loss": 0.3894, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 4.989540700318327, | |
| "grad_norm": 0.07460842756020492, | |
| "learning_rate": 9.188161103557136e-10, | |
| "loss": 0.3893, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 4.994391390025769, | |
| "grad_norm": 0.07411367814575705, | |
| "learning_rate": 2.2970468714245132e-10, | |
| "loss": 0.3934, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 4.999242079733212, | |
| "grad_norm": 0.07533391242414708, | |
| "learning_rate": 0.0, | |
| "loss": 0.3877, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 4.999242079733212, | |
| "step": 1030, | |
| "total_flos": 2.739131934768418e+19, | |
| "train_loss": 0.07882811409755817, | |
| "train_runtime": 48155.7887, | |
| "train_samples_per_second": 10.958, | |
| "train_steps_per_second": 0.021 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1030, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.739131934768418e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |