| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9958368026644462, | |
| "eval_steps": 500, | |
| "global_step": 600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0033305578684429643, | |
| "grad_norm": 2.1135175063937415, | |
| "learning_rate": 1.3333333333333334e-07, | |
| "loss": 1.6413, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.01, | |
| "memory/max_mem_allocated(gib)": 56.7, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.006661115736885929, | |
| "grad_norm": 2.0196598114735065, | |
| "learning_rate": 2.6666666666666667e-07, | |
| "loss": 1.6382, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.009991673605328892, | |
| "grad_norm": 2.037892565480129, | |
| "learning_rate": 4.0000000000000003e-07, | |
| "loss": 1.6536, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.013322231473771857, | |
| "grad_norm": 1.980939710918612, | |
| "learning_rate": 5.333333333333333e-07, | |
| "loss": 1.6712, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01665278934221482, | |
| "grad_norm": 2.0553629965683196, | |
| "learning_rate": 6.666666666666666e-07, | |
| "loss": 1.591, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.019983347210657785, | |
| "grad_norm": 2.1321442384194493, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 1.6275, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.02331390507910075, | |
| "grad_norm": 2.0224554441924147, | |
| "learning_rate": 9.333333333333333e-07, | |
| "loss": 1.6802, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.026644462947543714, | |
| "grad_norm": 2.0657857283218144, | |
| "learning_rate": 1.0666666666666667e-06, | |
| "loss": 1.5768, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02997502081598668, | |
| "grad_norm": 2.0104233987359206, | |
| "learning_rate": 1.2e-06, | |
| "loss": 1.6026, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.03330557868442964, | |
| "grad_norm": 2.098692014200002, | |
| "learning_rate": 1.3333333333333332e-06, | |
| "loss": 1.682, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03663613655287261, | |
| "grad_norm": 2.0879014611232116, | |
| "learning_rate": 1.4666666666666667e-06, | |
| "loss": 1.6368, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.03996669442131557, | |
| "grad_norm": 2.0701872996726443, | |
| "learning_rate": 1.6000000000000001e-06, | |
| "loss": 1.629, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.04329725228975854, | |
| "grad_norm": 2.105064067100562, | |
| "learning_rate": 1.7333333333333332e-06, | |
| "loss": 1.6568, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0466278101582015, | |
| "grad_norm": 2.1084872575258733, | |
| "learning_rate": 1.8666666666666667e-06, | |
| "loss": 1.597, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.04995836802664446, | |
| "grad_norm": 2.0616161807879965, | |
| "learning_rate": 2e-06, | |
| "loss": 1.6008, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.05328892589508743, | |
| "grad_norm": 1.92970469468585, | |
| "learning_rate": 2.1333333333333334e-06, | |
| "loss": 1.6815, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.05661948376353039, | |
| "grad_norm": 2.0527427262697855, | |
| "learning_rate": 2.266666666666667e-06, | |
| "loss": 1.6873, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.05995004163197336, | |
| "grad_norm": 1.9622305052083537, | |
| "learning_rate": 2.4e-06, | |
| "loss": 1.6334, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.06328059950041633, | |
| "grad_norm": 1.9979390122219929, | |
| "learning_rate": 2.533333333333333e-06, | |
| "loss": 1.6623, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.06661115736885928, | |
| "grad_norm": 2.0311968068371367, | |
| "learning_rate": 2.6666666666666664e-06, | |
| "loss": 1.607, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06994171523730225, | |
| "grad_norm": 1.968344786501615, | |
| "learning_rate": 2.8e-06, | |
| "loss": 1.6087, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.07327227310574522, | |
| "grad_norm": 2.1145916019697952, | |
| "learning_rate": 2.9333333333333333e-06, | |
| "loss": 1.5926, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.07660283097418817, | |
| "grad_norm": 2.0129475295050496, | |
| "learning_rate": 3.066666666666667e-06, | |
| "loss": 1.6171, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.07993338884263114, | |
| "grad_norm": 1.8817164699193898, | |
| "learning_rate": 3.2000000000000003e-06, | |
| "loss": 1.6552, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.08326394671107411, | |
| "grad_norm": 1.9306634203997992, | |
| "learning_rate": 3.333333333333333e-06, | |
| "loss": 1.6288, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.08659450457951708, | |
| "grad_norm": 1.8839715974459492, | |
| "learning_rate": 3.4666666666666664e-06, | |
| "loss": 1.5772, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.08992506244796003, | |
| "grad_norm": 1.9004207576591563, | |
| "learning_rate": 3.6e-06, | |
| "loss": 1.6019, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.093255620316403, | |
| "grad_norm": 1.8508009396241183, | |
| "learning_rate": 3.7333333333333333e-06, | |
| "loss": 1.6347, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.09658617818484597, | |
| "grad_norm": 1.6521817439090796, | |
| "learning_rate": 3.866666666666666e-06, | |
| "loss": 1.6425, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.09991673605328892, | |
| "grad_norm": 1.5825237347457706, | |
| "learning_rate": 4e-06, | |
| "loss": 1.4999, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.10324729392173189, | |
| "grad_norm": 1.4406934972277887, | |
| "learning_rate": 4.133333333333333e-06, | |
| "loss": 1.537, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.10657785179017486, | |
| "grad_norm": 1.3686252476380623, | |
| "learning_rate": 4.266666666666667e-06, | |
| "loss": 1.5054, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.10990840965861781, | |
| "grad_norm": 1.190989973623068, | |
| "learning_rate": 4.399999999999999e-06, | |
| "loss": 1.5673, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.11323896752706078, | |
| "grad_norm": 1.0921718147815354, | |
| "learning_rate": 4.533333333333334e-06, | |
| "loss": 1.5383, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.11656952539550375, | |
| "grad_norm": 0.9720091603452963, | |
| "learning_rate": 4.666666666666666e-06, | |
| "loss": 1.5698, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.11990008326394672, | |
| "grad_norm": 0.8634677699569875, | |
| "learning_rate": 4.8e-06, | |
| "loss": 1.5286, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.12323064113238967, | |
| "grad_norm": 0.7720350215206407, | |
| "learning_rate": 4.933333333333333e-06, | |
| "loss": 1.5897, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.12656119900083265, | |
| "grad_norm": 0.7351438783567595, | |
| "learning_rate": 5.066666666666666e-06, | |
| "loss": 1.471, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1298917568692756, | |
| "grad_norm": 0.6436527036047347, | |
| "learning_rate": 5.2e-06, | |
| "loss": 1.5523, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.13322231473771856, | |
| "grad_norm": 0.5914433909472115, | |
| "learning_rate": 5.333333333333333e-06, | |
| "loss": 1.5169, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.13655287260616153, | |
| "grad_norm": 0.5708899134928395, | |
| "learning_rate": 5.466666666666667e-06, | |
| "loss": 1.4727, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.1398834304746045, | |
| "grad_norm": 0.562979308505682, | |
| "learning_rate": 5.6e-06, | |
| "loss": 1.5101, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.14321398834304747, | |
| "grad_norm": 0.5333098859373814, | |
| "learning_rate": 5.733333333333332e-06, | |
| "loss": 1.5053, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.14654454621149043, | |
| "grad_norm": 0.518700589700869, | |
| "learning_rate": 5.866666666666667e-06, | |
| "loss": 1.5522, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.1498751040799334, | |
| "grad_norm": 0.5123134702021855, | |
| "learning_rate": 5.999999999999999e-06, | |
| "loss": 1.4581, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.15320566194837634, | |
| "grad_norm": 0.5233023339387923, | |
| "learning_rate": 6.133333333333334e-06, | |
| "loss": 1.4503, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1565362198168193, | |
| "grad_norm": 0.4984395351799732, | |
| "learning_rate": 6.266666666666666e-06, | |
| "loss": 1.4698, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.15986677768526228, | |
| "grad_norm": 0.48116733820243823, | |
| "learning_rate": 6.4000000000000006e-06, | |
| "loss": 1.5399, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.16319733555370525, | |
| "grad_norm": 0.4917159508967155, | |
| "learning_rate": 6.533333333333333e-06, | |
| "loss": 1.4674, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.16652789342214822, | |
| "grad_norm": 0.4631697484027289, | |
| "learning_rate": 6.666666666666666e-06, | |
| "loss": 1.5063, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.16985845129059118, | |
| "grad_norm": 0.4506097490342786, | |
| "learning_rate": 6.8e-06, | |
| "loss": 1.4787, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.17318900915903415, | |
| "grad_norm": 0.4808943580292107, | |
| "learning_rate": 6.933333333333333e-06, | |
| "loss": 1.5355, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1765195670274771, | |
| "grad_norm": 0.4353655566788618, | |
| "learning_rate": 7.066666666666667e-06, | |
| "loss": 1.4545, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.17985012489592006, | |
| "grad_norm": 0.42881276266179474, | |
| "learning_rate": 7.2e-06, | |
| "loss": 1.4726, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.18318068276436303, | |
| "grad_norm": 0.4243886425058161, | |
| "learning_rate": 7.333333333333332e-06, | |
| "loss": 1.5364, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.186511240632806, | |
| "grad_norm": 0.4078516232902407, | |
| "learning_rate": 7.466666666666667e-06, | |
| "loss": 1.5441, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.18984179850124897, | |
| "grad_norm": 0.39819776399963164, | |
| "learning_rate": 7.599999999999999e-06, | |
| "loss": 1.5394, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.19317235636969193, | |
| "grad_norm": 0.3993196408685462, | |
| "learning_rate": 7.733333333333333e-06, | |
| "loss": 1.4883, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.1965029142381349, | |
| "grad_norm": 0.3992257742362516, | |
| "learning_rate": 7.866666666666667e-06, | |
| "loss": 1.4933, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.19983347210657784, | |
| "grad_norm": 0.39782096872195477, | |
| "learning_rate": 8e-06, | |
| "loss": 1.4729, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2031640299750208, | |
| "grad_norm": 0.38436305350726707, | |
| "learning_rate": 7.99851604526062e-06, | |
| "loss": 1.4777, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.20649458784346378, | |
| "grad_norm": 0.3782583438569582, | |
| "learning_rate": 7.99702712746191e-06, | |
| "loss": 1.535, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.20982514571190675, | |
| "grad_norm": 0.3910775225847348, | |
| "learning_rate": 7.995533221663874e-06, | |
| "loss": 1.4643, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.21315570358034971, | |
| "grad_norm": 0.37376830993433585, | |
| "learning_rate": 7.994034302759135e-06, | |
| "loss": 1.4265, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.21648626144879268, | |
| "grad_norm": 0.37205196740456564, | |
| "learning_rate": 7.99253034547152e-06, | |
| "loss": 1.484, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.21981681931723562, | |
| "grad_norm": 0.37012462931708767, | |
| "learning_rate": 7.991021324354658e-06, | |
| "loss": 1.4668, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.2231473771856786, | |
| "grad_norm": 0.36609254541987934, | |
| "learning_rate": 7.989507213790519e-06, | |
| "loss": 1.4512, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.22647793505412156, | |
| "grad_norm": 0.36389643029866026, | |
| "learning_rate": 7.987987987987988e-06, | |
| "loss": 1.4666, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.22980849292256453, | |
| "grad_norm": 0.3835942907380993, | |
| "learning_rate": 7.986463620981386e-06, | |
| "loss": 1.5581, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.2331390507910075, | |
| "grad_norm": 0.3709505537460329, | |
| "learning_rate": 7.984934086629002e-06, | |
| "loss": 1.4942, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.23646960865945046, | |
| "grad_norm": 0.3734284694232727, | |
| "learning_rate": 7.983399358611582e-06, | |
| "loss": 1.5449, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.23980016652789343, | |
| "grad_norm": 0.38168285139161445, | |
| "learning_rate": 7.981859410430838e-06, | |
| "loss": 1.4972, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.24313072439633637, | |
| "grad_norm": 0.4166060644404285, | |
| "learning_rate": 7.98031421540791e-06, | |
| "loss": 1.5273, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.24646128226477934, | |
| "grad_norm": 0.3721773268353121, | |
| "learning_rate": 7.978763746681835e-06, | |
| "loss": 1.5459, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.2497918401332223, | |
| "grad_norm": 0.3785109036596187, | |
| "learning_rate": 7.977207977207977e-06, | |
| "loss": 1.5221, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.2531223980016653, | |
| "grad_norm": 0.3798544993330551, | |
| "learning_rate": 7.975646879756469e-06, | |
| "loss": 1.447, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.25645295587010825, | |
| "grad_norm": 0.3676458399839075, | |
| "learning_rate": 7.974080426910615e-06, | |
| "loss": 1.552, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.2597835137385512, | |
| "grad_norm": 0.3906392619486636, | |
| "learning_rate": 7.972508591065292e-06, | |
| "loss": 1.5524, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.2631140716069942, | |
| "grad_norm": 0.4086059406499793, | |
| "learning_rate": 7.97093134442532e-06, | |
| "loss": 1.5347, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.2664446294754371, | |
| "grad_norm": 0.37866878925235237, | |
| "learning_rate": 7.969348659003832e-06, | |
| "loss": 1.4263, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2697751873438801, | |
| "grad_norm": 0.36630829174020924, | |
| "learning_rate": 7.96776050662061e-06, | |
| "loss": 1.4882, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.27310574521232306, | |
| "grad_norm": 0.3572953266857883, | |
| "learning_rate": 7.966166858900421e-06, | |
| "loss": 1.4996, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.27643630308076605, | |
| "grad_norm": 0.37034991529495037, | |
| "learning_rate": 7.964567687271327e-06, | |
| "loss": 1.4558, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.279766860949209, | |
| "grad_norm": 0.39454254411893813, | |
| "learning_rate": 7.962962962962963e-06, | |
| "loss": 1.481, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.28309741881765194, | |
| "grad_norm": 0.36598142148212737, | |
| "learning_rate": 7.961352657004831e-06, | |
| "loss": 1.4647, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.28642797668609493, | |
| "grad_norm": 0.40131025635004997, | |
| "learning_rate": 7.959736740224545e-06, | |
| "loss": 1.486, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.28975853455453787, | |
| "grad_norm": 0.36813808559204136, | |
| "learning_rate": 7.958115183246073e-06, | |
| "loss": 1.5104, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.29308909242298087, | |
| "grad_norm": 0.4399054897841581, | |
| "learning_rate": 7.956487956487956e-06, | |
| "loss": 1.5511, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.2964196502914238, | |
| "grad_norm": 0.4137480663423791, | |
| "learning_rate": 7.95485503016151e-06, | |
| "loss": 1.5431, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.2997502081598668, | |
| "grad_norm": 0.39082659570701933, | |
| "learning_rate": 7.953216374269006e-06, | |
| "loss": 1.5094, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.30308076602830974, | |
| "grad_norm": 0.4222547479458042, | |
| "learning_rate": 7.951571958601836e-06, | |
| "loss": 1.528, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.3064113238967527, | |
| "grad_norm": 0.4565848989524497, | |
| "learning_rate": 7.949921752738653e-06, | |
| "loss": 1.4345, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3097418817651957, | |
| "grad_norm": 0.3909465393349193, | |
| "learning_rate": 7.948265726043504e-06, | |
| "loss": 1.4885, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.3130724396336386, | |
| "grad_norm": 0.40399439020361494, | |
| "learning_rate": 7.946603847663918e-06, | |
| "loss": 1.4836, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.3164029975020816, | |
| "grad_norm": 0.3940685084379771, | |
| "learning_rate": 7.944936086529007e-06, | |
| "loss": 1.4894, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.31973355537052456, | |
| "grad_norm": 0.36455393248573603, | |
| "learning_rate": 7.943262411347517e-06, | |
| "loss": 1.4765, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.32306411323896755, | |
| "grad_norm": 0.42216219555871026, | |
| "learning_rate": 7.94158279060588e-06, | |
| "loss": 1.505, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.3263946711074105, | |
| "grad_norm": 0.3833612688097333, | |
| "learning_rate": 7.93989719256623e-06, | |
| "loss": 1.4803, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.32972522897585343, | |
| "grad_norm": 0.3793312412105176, | |
| "learning_rate": 7.938205585264408e-06, | |
| "loss": 1.4721, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.33305578684429643, | |
| "grad_norm": 0.6231405275420779, | |
| "learning_rate": 7.936507936507936e-06, | |
| "loss": 1.4941, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.33638634471273937, | |
| "grad_norm": 0.39916108511305454, | |
| "learning_rate": 7.934804213873981e-06, | |
| "loss": 1.5113, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.33971690258118237, | |
| "grad_norm": 0.39832888981715536, | |
| "learning_rate": 7.933094384707288e-06, | |
| "loss": 1.4616, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.3430474604496253, | |
| "grad_norm": 0.35554379353616694, | |
| "learning_rate": 7.931378416118093e-06, | |
| "loss": 1.4754, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.3463780183180683, | |
| "grad_norm": 0.3778786204869107, | |
| "learning_rate": 7.929656274980016e-06, | |
| "loss": 1.5204, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.34970857618651124, | |
| "grad_norm": 0.3979509981477904, | |
| "learning_rate": 7.927927927927927e-06, | |
| "loss": 1.4972, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.3530391340549542, | |
| "grad_norm": 0.3829152377900939, | |
| "learning_rate": 7.926193341355797e-06, | |
| "loss": 1.4852, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.3563696919233972, | |
| "grad_norm": 0.3783230292732417, | |
| "learning_rate": 7.924452481414507e-06, | |
| "loss": 1.4605, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.3597002497918401, | |
| "grad_norm": 0.3702225917786687, | |
| "learning_rate": 7.922705314009662e-06, | |
| "loss": 1.4751, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.3630308076602831, | |
| "grad_norm": 0.423076463648796, | |
| "learning_rate": 7.920951804799353e-06, | |
| "loss": 1.5043, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.36636136552872606, | |
| "grad_norm": 0.4015775298544568, | |
| "learning_rate": 7.919191919191919e-06, | |
| "loss": 1.4993, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.36969192339716905, | |
| "grad_norm": 0.395772531232646, | |
| "learning_rate": 7.917425622343655e-06, | |
| "loss": 1.5074, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.373022481265612, | |
| "grad_norm": 0.4063035915678222, | |
| "learning_rate": 7.915652879156528e-06, | |
| "loss": 1.5005, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.37635303913405493, | |
| "grad_norm": 0.3749049780160411, | |
| "learning_rate": 7.913873654275848e-06, | |
| "loss": 1.5016, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.37968359700249793, | |
| "grad_norm": 0.40207184709524446, | |
| "learning_rate": 7.912087912087911e-06, | |
| "loss": 1.5112, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.38301415487094087, | |
| "grad_norm": 0.3761596500147066, | |
| "learning_rate": 7.910295616717634e-06, | |
| "loss": 1.4226, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.38634471273938387, | |
| "grad_norm": 0.34919530357614503, | |
| "learning_rate": 7.908496732026144e-06, | |
| "loss": 1.454, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.3896752706078268, | |
| "grad_norm": 0.3783249892281946, | |
| "learning_rate": 7.906691221608348e-06, | |
| "loss": 1.3926, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.3930058284762698, | |
| "grad_norm": 0.38789047851939196, | |
| "learning_rate": 7.904879048790487e-06, | |
| "loss": 1.5148, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.39633638634471274, | |
| "grad_norm": 0.38028310552438055, | |
| "learning_rate": 7.903060176627645e-06, | |
| "loss": 1.5512, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.3996669442131557, | |
| "grad_norm": 0.3557857851285413, | |
| "learning_rate": 7.901234567901235e-06, | |
| "loss": 1.5145, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4029975020815987, | |
| "grad_norm": 0.3648126505851961, | |
| "learning_rate": 7.89940218511647e-06, | |
| "loss": 1.4616, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.4063280599500416, | |
| "grad_norm": 0.3518641114757544, | |
| "learning_rate": 7.897562990499793e-06, | |
| "loss": 1.4444, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.4096586178184846, | |
| "grad_norm": 0.3812409352914946, | |
| "learning_rate": 7.895716945996276e-06, | |
| "loss": 1.4524, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.41298917568692756, | |
| "grad_norm": 0.37136499335096407, | |
| "learning_rate": 7.893864013266998e-06, | |
| "loss": 1.4495, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.4163197335553705, | |
| "grad_norm": 0.36965696298407785, | |
| "learning_rate": 7.892004153686396e-06, | |
| "loss": 1.454, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.4196502914238135, | |
| "grad_norm": 0.37625883797439813, | |
| "learning_rate": 7.890137328339575e-06, | |
| "loss": 1.4738, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.42298084929225643, | |
| "grad_norm": 0.3891305395409707, | |
| "learning_rate": 7.888263498019595e-06, | |
| "loss": 1.4336, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.42631140716069943, | |
| "grad_norm": 0.33836499033668194, | |
| "learning_rate": 7.886382623224727e-06, | |
| "loss": 1.4435, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.42964196502914237, | |
| "grad_norm": 0.39084535016086686, | |
| "learning_rate": 7.88449466415568e-06, | |
| "loss": 1.4598, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.43297252289758537, | |
| "grad_norm": 0.3896756879145717, | |
| "learning_rate": 7.882599580712787e-06, | |
| "loss": 1.5065, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.4363030807660283, | |
| "grad_norm": 0.4252827004151611, | |
| "learning_rate": 7.880697332493174e-06, | |
| "loss": 1.4083, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.43963363863447125, | |
| "grad_norm": 0.3608365697753635, | |
| "learning_rate": 7.878787878787878e-06, | |
| "loss": 1.441, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.44296419650291424, | |
| "grad_norm": 0.403123415092978, | |
| "learning_rate": 7.876871178578958e-06, | |
| "loss": 1.4627, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.4462947543713572, | |
| "grad_norm": 0.40013457143727, | |
| "learning_rate": 7.874947190536545e-06, | |
| "loss": 1.4955, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.4496253122398002, | |
| "grad_norm": 0.3883976625001682, | |
| "learning_rate": 7.873015873015873e-06, | |
| "loss": 1.4298, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.4529558701082431, | |
| "grad_norm": 0.47893723454397114, | |
| "learning_rate": 7.871077184054283e-06, | |
| "loss": 1.4706, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.4562864279766861, | |
| "grad_norm": 0.3939594731251799, | |
| "learning_rate": 7.869131081368174e-06, | |
| "loss": 1.4659, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.45961698584512906, | |
| "grad_norm": 0.39872483940488357, | |
| "learning_rate": 7.867177522349935e-06, | |
| "loss": 1.4428, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.462947543713572, | |
| "grad_norm": 0.41681968734219343, | |
| "learning_rate": 7.865216464064831e-06, | |
| "loss": 1.5116, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.466278101582015, | |
| "grad_norm": 0.3950334535334994, | |
| "learning_rate": 7.863247863247863e-06, | |
| "loss": 1.4453, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.46960865945045793, | |
| "grad_norm": 0.3569883912128034, | |
| "learning_rate": 7.861271676300578e-06, | |
| "loss": 1.462, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.47293921731890093, | |
| "grad_norm": 0.3784473417547298, | |
| "learning_rate": 7.85928785928786e-06, | |
| "loss": 1.4961, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.47626977518734387, | |
| "grad_norm": 0.35459480974078084, | |
| "learning_rate": 7.857296367934665e-06, | |
| "loss": 1.5362, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.47960033305578686, | |
| "grad_norm": 0.3662426670901604, | |
| "learning_rate": 7.85529715762274e-06, | |
| "loss": 1.3832, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.4829308909242298, | |
| "grad_norm": 0.4066610425979986, | |
| "learning_rate": 7.85329018338727e-06, | |
| "loss": 1.4641, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.48626144879267275, | |
| "grad_norm": 0.3545713986492447, | |
| "learning_rate": 7.851275399913532e-06, | |
| "loss": 1.4675, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.48959200666111574, | |
| "grad_norm": 0.3664688735051096, | |
| "learning_rate": 7.849252761533463e-06, | |
| "loss": 1.4683, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.4929225645295587, | |
| "grad_norm": 0.3733605661751341, | |
| "learning_rate": 7.847222222222221e-06, | |
| "loss": 1.4315, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.4962531223980017, | |
| "grad_norm": 0.3380374141462393, | |
| "learning_rate": 7.845183735594695e-06, | |
| "loss": 1.4401, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.4995836802664446, | |
| "grad_norm": 0.407518229964574, | |
| "learning_rate": 7.84313725490196e-06, | |
| "loss": 1.4437, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5029142381348876, | |
| "grad_norm": 0.36142186690399497, | |
| "learning_rate": 7.841082733027723e-06, | |
| "loss": 1.4444, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.5062447960033306, | |
| "grad_norm": 0.35245555484230673, | |
| "learning_rate": 7.839020122484688e-06, | |
| "loss": 1.4013, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.5095753538717736, | |
| "grad_norm": 0.3751518274944043, | |
| "learning_rate": 7.836949375410913e-06, | |
| "loss": 1.4325, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.5129059117402165, | |
| "grad_norm": 0.364299926744196, | |
| "learning_rate": 7.834870443566096e-06, | |
| "loss": 1.4757, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.5162364696086594, | |
| "grad_norm": 0.352709296353453, | |
| "learning_rate": 7.832783278327833e-06, | |
| "loss": 1.4405, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.5195670274771024, | |
| "grad_norm": 0.3595882748979197, | |
| "learning_rate": 7.830687830687831e-06, | |
| "loss": 1.5005, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.5228975853455454, | |
| "grad_norm": 0.3663062717533196, | |
| "learning_rate": 7.828584051248068e-06, | |
| "loss": 1.4916, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.5262281432139884, | |
| "grad_norm": 0.39230898190550817, | |
| "learning_rate": 7.82647189021691e-06, | |
| "loss": 1.5925, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.5295587010824313, | |
| "grad_norm": 0.34764213510621217, | |
| "learning_rate": 7.824351297405189e-06, | |
| "loss": 1.533, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.5328892589508742, | |
| "grad_norm": 0.4356036173061448, | |
| "learning_rate": 7.822222222222222e-06, | |
| "loss": 1.4768, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5362198168193172, | |
| "grad_norm": 0.3650633676087402, | |
| "learning_rate": 7.820084613671788e-06, | |
| "loss": 1.4834, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.5395503746877602, | |
| "grad_norm": 0.36003662026404476, | |
| "learning_rate": 7.81793842034806e-06, | |
| "loss": 1.4745, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.5428809325562032, | |
| "grad_norm": 0.45089712637002705, | |
| "learning_rate": 7.815783590431477e-06, | |
| "loss": 1.4762, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.5462114904246461, | |
| "grad_norm": 0.3727325869359898, | |
| "learning_rate": 7.813620071684589e-06, | |
| "loss": 1.4605, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.5495420482930891, | |
| "grad_norm": 0.3396845072209277, | |
| "learning_rate": 7.81144781144781e-06, | |
| "loss": 1.4793, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.5528726061615321, | |
| "grad_norm": 0.35005093334327886, | |
| "learning_rate": 7.809266756635177e-06, | |
| "loss": 1.4699, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.556203164029975, | |
| "grad_norm": 0.3836826797224187, | |
| "learning_rate": 7.807076853729998e-06, | |
| "loss": 1.4727, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.559533721898418, | |
| "grad_norm": 0.3866747204941054, | |
| "learning_rate": 7.804878048780487e-06, | |
| "loss": 1.4656, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.5628642797668609, | |
| "grad_norm": 0.3754060351606817, | |
| "learning_rate": 7.802670287395338e-06, | |
| "loss": 1.4427, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.5661948376353039, | |
| "grad_norm": 0.3560392764595894, | |
| "learning_rate": 7.80045351473923e-06, | |
| "loss": 1.469, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5695253955037469, | |
| "grad_norm": 0.38308734497417124, | |
| "learning_rate": 7.79822767552829e-06, | |
| "loss": 1.5086, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.5728559533721899, | |
| "grad_norm": 0.359072776955667, | |
| "learning_rate": 7.7959927140255e-06, | |
| "loss": 1.4531, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.5761865112406328, | |
| "grad_norm": 0.3922686356985507, | |
| "learning_rate": 7.793748574036049e-06, | |
| "loss": 1.5004, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.5795170691090757, | |
| "grad_norm": 0.38139432931110967, | |
| "learning_rate": 7.791495198902606e-06, | |
| "loss": 1.4596, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.5828476269775187, | |
| "grad_norm": 0.386725308323352, | |
| "learning_rate": 7.789232531500573e-06, | |
| "loss": 1.4107, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.5861781848459617, | |
| "grad_norm": 0.3590860738790805, | |
| "learning_rate": 7.786960514233242e-06, | |
| "loss": 1.4748, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.5895087427144047, | |
| "grad_norm": 0.38618438471328675, | |
| "learning_rate": 7.784679089026915e-06, | |
| "loss": 1.481, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.5928393005828476, | |
| "grad_norm": 0.35271293932202913, | |
| "learning_rate": 7.782388197325957e-06, | |
| "loss": 1.4445, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.5961698584512906, | |
| "grad_norm": 0.3587339394337467, | |
| "learning_rate": 7.78008778008778e-06, | |
| "loss": 1.482, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.5995004163197336, | |
| "grad_norm": 0.4051854093619042, | |
| "learning_rate": 7.777777777777777e-06, | |
| "loss": 1.4538, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6028309741881765, | |
| "grad_norm": 0.3624830177676393, | |
| "learning_rate": 7.775458130364185e-06, | |
| "loss": 1.3882, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.6061615320566195, | |
| "grad_norm": 0.40327439887058536, | |
| "learning_rate": 7.773128777312878e-06, | |
| "loss": 1.4439, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.6094920899250624, | |
| "grad_norm": 0.3849485884337272, | |
| "learning_rate": 7.77078965758211e-06, | |
| "loss": 1.4598, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.6128226477935054, | |
| "grad_norm": 0.3800546336095655, | |
| "learning_rate": 7.76844070961718e-06, | |
| "loss": 1.5077, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.6161532056619484, | |
| "grad_norm": 0.4058514640829756, | |
| "learning_rate": 7.766081871345029e-06, | |
| "loss": 1.4557, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.6194837635303914, | |
| "grad_norm": 0.3547116281420189, | |
| "learning_rate": 7.763713080168775e-06, | |
| "loss": 1.4465, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.6228143213988343, | |
| "grad_norm": 0.36935786461716674, | |
| "learning_rate": 7.76133427296218e-06, | |
| "loss": 1.3674, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.6261448792672772, | |
| "grad_norm": 0.3465066682351456, | |
| "learning_rate": 7.75894538606403e-06, | |
| "loss": 1.5018, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.6294754371357202, | |
| "grad_norm": 0.37821929789921876, | |
| "learning_rate": 7.75654635527247e-06, | |
| "loss": 1.46, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.6328059950041632, | |
| "grad_norm": 0.42147975033129337, | |
| "learning_rate": 7.754137115839244e-06, | |
| "loss": 1.4324, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.6361365528726062, | |
| "grad_norm": 0.395112631651776, | |
| "learning_rate": 7.751717602463872e-06, | |
| "loss": 1.4682, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.6394671107410491, | |
| "grad_norm": 0.3866087697502269, | |
| "learning_rate": 7.749287749287749e-06, | |
| "loss": 1.4845, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.6427976686094921, | |
| "grad_norm": 0.39380953384339784, | |
| "learning_rate": 7.746847489888173e-06, | |
| "loss": 1.4628, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.6461282264779351, | |
| "grad_norm": 0.38499086799547566, | |
| "learning_rate": 7.744396757272294e-06, | |
| "loss": 1.4485, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.649458784346378, | |
| "grad_norm": 0.3628021970554608, | |
| "learning_rate": 7.741935483870966e-06, | |
| "loss": 1.4306, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.652789342214821, | |
| "grad_norm": 0.37886204626432507, | |
| "learning_rate": 7.739463601532567e-06, | |
| "loss": 1.4178, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.6561199000832639, | |
| "grad_norm": 0.36347566586862995, | |
| "learning_rate": 7.736981041516678e-06, | |
| "loss": 1.3917, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.6594504579517069, | |
| "grad_norm": 0.3808525608826558, | |
| "learning_rate": 7.734487734487733e-06, | |
| "loss": 1.425, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.6627810158201499, | |
| "grad_norm": 0.36703672958616185, | |
| "learning_rate": 7.731983610508556e-06, | |
| "loss": 1.3963, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.6661115736885929, | |
| "grad_norm": 0.3449284331155099, | |
| "learning_rate": 7.729468599033817e-06, | |
| "loss": 1.5389, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6694421315570358, | |
| "grad_norm": 0.38098871722055255, | |
| "learning_rate": 7.726942628903412e-06, | |
| "loss": 1.4354, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.6727726894254787, | |
| "grad_norm": 0.37447535098026113, | |
| "learning_rate": 7.72440562833576e-06, | |
| "loss": 1.4238, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.6761032472939217, | |
| "grad_norm": 0.3815996192127943, | |
| "learning_rate": 7.721857524920983e-06, | |
| "loss": 1.4465, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.6794338051623647, | |
| "grad_norm": 0.43830167523580127, | |
| "learning_rate": 7.719298245614036e-06, | |
| "loss": 1.4464, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.6827643630308077, | |
| "grad_norm": 0.49374391843463344, | |
| "learning_rate": 7.716727716727717e-06, | |
| "loss": 1.4326, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.6860949208992506, | |
| "grad_norm": 0.40611516537871767, | |
| "learning_rate": 7.714145863925599e-06, | |
| "loss": 1.4867, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.6894254787676936, | |
| "grad_norm": 0.39306412548059455, | |
| "learning_rate": 7.711552612214863e-06, | |
| "loss": 1.4879, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.6927560366361366, | |
| "grad_norm": 0.3732547746311456, | |
| "learning_rate": 7.708947885939036e-06, | |
| "loss": 1.5305, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.6960865945045795, | |
| "grad_norm": 0.3749992070235647, | |
| "learning_rate": 7.706331608770632e-06, | |
| "loss": 1.4422, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.6994171523730225, | |
| "grad_norm": 0.4236632648954227, | |
| "learning_rate": 7.703703703703702e-06, | |
| "loss": 1.4362, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.7027477102414654, | |
| "grad_norm": 0.3799687473741569, | |
| "learning_rate": 7.701064093046274e-06, | |
| "loss": 1.512, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.7060782681099084, | |
| "grad_norm": 0.3724271784543797, | |
| "learning_rate": 7.698412698412699e-06, | |
| "loss": 1.469, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.7094088259783514, | |
| "grad_norm": 0.364477503994216, | |
| "learning_rate": 7.695749440715883e-06, | |
| "loss": 1.4811, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.7127393838467944, | |
| "grad_norm": 0.3925520005032744, | |
| "learning_rate": 7.693074240159441e-06, | |
| "loss": 1.5027, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.7160699417152373, | |
| "grad_norm": 0.40921223587397654, | |
| "learning_rate": 7.690387016229713e-06, | |
| "loss": 1.488, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.7194004995836802, | |
| "grad_norm": 0.3981162315328969, | |
| "learning_rate": 7.687687687687688e-06, | |
| "loss": 1.4343, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.7227310574521232, | |
| "grad_norm": 0.35388766488814566, | |
| "learning_rate": 7.684976172560823e-06, | |
| "loss": 1.4599, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.7260616153205662, | |
| "grad_norm": 0.3449802535833205, | |
| "learning_rate": 7.682252388134742e-06, | |
| "loss": 1.442, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.7293921731890092, | |
| "grad_norm": 0.34627676487411824, | |
| "learning_rate": 7.679516250944822e-06, | |
| "loss": 1.4461, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.7327227310574521, | |
| "grad_norm": 0.35799089084524466, | |
| "learning_rate": 7.676767676767677e-06, | |
| "loss": 1.4731, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.736053288925895, | |
| "grad_norm": 0.3820520257947768, | |
| "learning_rate": 7.674006580612503e-06, | |
| "loss": 1.4566, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.7393838467943381, | |
| "grad_norm": 0.3641120307221186, | |
| "learning_rate": 7.671232876712327e-06, | |
| "loss": 1.4525, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.742714404662781, | |
| "grad_norm": 0.37136269720782134, | |
| "learning_rate": 7.668446478515128e-06, | |
| "loss": 1.4548, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.746044962531224, | |
| "grad_norm": 0.4138383130083843, | |
| "learning_rate": 7.665647298674822e-06, | |
| "loss": 1.5395, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.7493755203996669, | |
| "grad_norm": 0.37512729325167443, | |
| "learning_rate": 7.662835249042145e-06, | |
| "loss": 1.4348, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.7527060782681099, | |
| "grad_norm": 0.3574220209010036, | |
| "learning_rate": 7.660010240655401e-06, | |
| "loss": 1.4205, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.7560366361365529, | |
| "grad_norm": 0.3509015504877034, | |
| "learning_rate": 7.657172183731076e-06, | |
| "loss": 1.4074, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.7593671940049959, | |
| "grad_norm": 0.4191818637620366, | |
| "learning_rate": 7.654320987654322e-06, | |
| "loss": 1.434, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.7626977518734388, | |
| "grad_norm": 0.38073125720358314, | |
| "learning_rate": 7.651456560969322e-06, | |
| "loss": 1.4315, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.7660283097418817, | |
| "grad_norm": 0.3489534004367162, | |
| "learning_rate": 7.648578811369509e-06, | |
| "loss": 1.4292, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.7693588676103247, | |
| "grad_norm": 0.39880199669766575, | |
| "learning_rate": 7.645687645687645e-06, | |
| "loss": 1.4797, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.7726894254787677, | |
| "grad_norm": 0.3377554646810836, | |
| "learning_rate": 7.642782969885774e-06, | |
| "loss": 1.3638, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.7760199833472107, | |
| "grad_norm": 0.45577113603344144, | |
| "learning_rate": 7.639864689045015e-06, | |
| "loss": 1.5272, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.7793505412156536, | |
| "grad_norm": 0.3872639106321951, | |
| "learning_rate": 7.636932707355241e-06, | |
| "loss": 1.5223, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.7826810990840966, | |
| "grad_norm": 0.41241615465906434, | |
| "learning_rate": 7.633986928104575e-06, | |
| "loss": 1.4047, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.7860116569525396, | |
| "grad_norm": 0.350902547985464, | |
| "learning_rate": 7.631027253668762e-06, | |
| "loss": 1.4599, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.7893422148209825, | |
| "grad_norm": 0.36780129033305325, | |
| "learning_rate": 7.6280535855003936e-06, | |
| "loss": 1.4872, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.7926727726894255, | |
| "grad_norm": 0.3504301681190647, | |
| "learning_rate": 7.625065824117956e-06, | |
| "loss": 1.4508, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.7960033305578684, | |
| "grad_norm": 0.425786005279154, | |
| "learning_rate": 7.622063869094748e-06, | |
| "loss": 1.5359, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.7993338884263114, | |
| "grad_norm": 0.3423914333711706, | |
| "learning_rate": 7.619047619047619e-06, | |
| "loss": 1.5116, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.8026644462947544, | |
| "grad_norm": 0.39752748882813016, | |
| "learning_rate": 7.616016971625564e-06, | |
| "loss": 1.3967, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.8059950041631974, | |
| "grad_norm": 0.35349720101513005, | |
| "learning_rate": 7.61297182349814e-06, | |
| "loss": 1.428, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.8093255620316403, | |
| "grad_norm": 0.3592529486243108, | |
| "learning_rate": 7.609912070343725e-06, | |
| "loss": 1.4716, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.8126561199000832, | |
| "grad_norm": 0.41007914987868593, | |
| "learning_rate": 7.606837606837607e-06, | |
| "loss": 1.4601, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.8159866777685262, | |
| "grad_norm": 0.4368820717106569, | |
| "learning_rate": 7.603748326639893e-06, | |
| "loss": 1.4299, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.8193172356369692, | |
| "grad_norm": 0.34781376516299506, | |
| "learning_rate": 7.600644122383253e-06, | |
| "loss": 1.3832, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.8226477935054122, | |
| "grad_norm": 0.4378928638690168, | |
| "learning_rate": 7.597524885660478e-06, | |
| "loss": 1.5006, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.8259783513738551, | |
| "grad_norm": 0.38866511125189074, | |
| "learning_rate": 7.594390507011865e-06, | |
| "loss": 1.3808, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.829308909242298, | |
| "grad_norm": 0.3796151796802332, | |
| "learning_rate": 7.591240875912408e-06, | |
| "loss": 1.4048, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.832639467110741, | |
| "grad_norm": 0.47512939093169254, | |
| "learning_rate": 7.588075880758807e-06, | |
| "loss": 1.4533, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.835970024979184, | |
| "grad_norm": 0.4002177494781384, | |
| "learning_rate": 7.584895408856289e-06, | |
| "loss": 1.4364, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.839300582847627, | |
| "grad_norm": 0.41628446885968545, | |
| "learning_rate": 7.581699346405228e-06, | |
| "loss": 1.5213, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.8426311407160699, | |
| "grad_norm": 0.41586597700526384, | |
| "learning_rate": 7.578487578487578e-06, | |
| "loss": 1.4163, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.8459616985845129, | |
| "grad_norm": 0.37080164887555395, | |
| "learning_rate": 7.575259989053093e-06, | |
| "loss": 1.4262, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.8492922564529559, | |
| "grad_norm": 0.44276862899193814, | |
| "learning_rate": 7.57201646090535e-06, | |
| "loss": 1.4434, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.8526228143213989, | |
| "grad_norm": 0.3565514945143501, | |
| "learning_rate": 7.568756875687569e-06, | |
| "loss": 1.4628, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.8559533721898418, | |
| "grad_norm": 0.3424453222650746, | |
| "learning_rate": 7.565481113868211e-06, | |
| "loss": 1.4397, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.8592839300582847, | |
| "grad_norm": 0.36361177745212486, | |
| "learning_rate": 7.562189054726368e-06, | |
| "loss": 1.434, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.8626144879267277, | |
| "grad_norm": 0.39961369778575284, | |
| "learning_rate": 7.558880576336936e-06, | |
| "loss": 1.3263, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.8659450457951707, | |
| "grad_norm": 0.3694683835624918, | |
| "learning_rate": 7.555555555555556e-06, | |
| "loss": 1.465, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8692756036636137, | |
| "grad_norm": 0.38926907075141, | |
| "learning_rate": 7.552213868003341e-06, | |
| "loss": 1.4639, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.8726061615320566, | |
| "grad_norm": 0.41002402289266, | |
| "learning_rate": 7.548855388051367e-06, | |
| "loss": 1.4583, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.8759367194004996, | |
| "grad_norm": 0.39476689396263037, | |
| "learning_rate": 7.545479988804925e-06, | |
| "loss": 1.5369, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.8792672772689425, | |
| "grad_norm": 0.42338165790994337, | |
| "learning_rate": 7.542087542087541e-06, | |
| "loss": 1.4149, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.8825978351373855, | |
| "grad_norm": 0.37580056414171503, | |
| "learning_rate": 7.538677918424753e-06, | |
| "loss": 1.4767, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.8859283930058285, | |
| "grad_norm": 0.35524395112624974, | |
| "learning_rate": 7.535250987027637e-06, | |
| "loss": 1.4565, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.8892589508742714, | |
| "grad_norm": 0.333001020301385, | |
| "learning_rate": 7.531806615776081e-06, | |
| "loss": 1.4653, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.8925895087427144, | |
| "grad_norm": 0.325740334034441, | |
| "learning_rate": 7.5283446712018136e-06, | |
| "loss": 1.4583, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.8959200666111574, | |
| "grad_norm": 0.3579186582787629, | |
| "learning_rate": 7.524865018471157e-06, | |
| "loss": 1.441, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.8992506244796004, | |
| "grad_norm": 0.38100337783570354, | |
| "learning_rate": 7.521367521367521e-06, | |
| "loss": 1.4725, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.9025811823480433, | |
| "grad_norm": 0.4439224251441086, | |
| "learning_rate": 7.5178520422736365e-06, | |
| "loss": 1.4433, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.9059117402164862, | |
| "grad_norm": 0.36404138775247186, | |
| "learning_rate": 7.514318442153494e-06, | |
| "loss": 1.4502, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.9092422980849292, | |
| "grad_norm": 0.3739016590981095, | |
| "learning_rate": 7.5107665805340226e-06, | |
| "loss": 1.5614, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.9125728559533722, | |
| "grad_norm": 0.34618213523089303, | |
| "learning_rate": 7.5071963154864715e-06, | |
| "loss": 1.4818, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.9159034138218152, | |
| "grad_norm": 0.3516565079552471, | |
| "learning_rate": 7.5036075036075024e-06, | |
| "loss": 1.4811, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.9192339716902581, | |
| "grad_norm": 0.372847709765313, | |
| "learning_rate": 7.499999999999999e-06, | |
| "loss": 1.4314, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.922564529558701, | |
| "grad_norm": 0.33633722585110437, | |
| "learning_rate": 7.496373658253553e-06, | |
| "loss": 1.467, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.925895087427144, | |
| "grad_norm": 0.3502677047499933, | |
| "learning_rate": 7.4927283304246645e-06, | |
| "loss": 1.465, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.929225645295587, | |
| "grad_norm": 0.3301559549021256, | |
| "learning_rate": 7.4890638670166225e-06, | |
| "loss": 1.4345, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.93255620316403, | |
| "grad_norm": 0.3995138440783666, | |
| "learning_rate": 7.485380116959064e-06, | |
| "loss": 1.3383, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.9358867610324729, | |
| "grad_norm": 0.3871081436839965, | |
| "learning_rate": 7.481676927587217e-06, | |
| "loss": 1.4796, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.9392173189009159, | |
| "grad_norm": 0.4376413636164512, | |
| "learning_rate": 7.4779541446208115e-06, | |
| "loss": 1.4644, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.9425478767693589, | |
| "grad_norm": 0.43206377875237645, | |
| "learning_rate": 7.474211612142647e-06, | |
| "loss": 1.4107, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.9458784346378019, | |
| "grad_norm": 0.4025303715871277, | |
| "learning_rate": 7.470449172576832e-06, | |
| "loss": 1.4318, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.9492089925062448, | |
| "grad_norm": 0.37724819780360036, | |
| "learning_rate": 7.466666666666667e-06, | |
| "loss": 1.4454, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.9525395503746877, | |
| "grad_norm": 0.35328542805788227, | |
| "learning_rate": 7.462863933452169e-06, | |
| "loss": 1.4175, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.9558701082431307, | |
| "grad_norm": 0.3788093515621439, | |
| "learning_rate": 7.459040810247245e-06, | |
| "loss": 1.4429, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.9592006661115737, | |
| "grad_norm": 0.4018461850957888, | |
| "learning_rate": 7.455197132616486e-06, | |
| "loss": 1.4679, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.9625312239800167, | |
| "grad_norm": 0.3792573314031364, | |
| "learning_rate": 7.451332734351601e-06, | |
| "loss": 1.5191, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.9658617818484596, | |
| "grad_norm": 0.4173737668171256, | |
| "learning_rate": 7.447447447447447e-06, | |
| "loss": 1.4999, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.9691923397169026, | |
| "grad_norm": 0.34073643176316165, | |
| "learning_rate": 7.443541102077687e-06, | |
| "loss": 1.3667, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.9725228975853455, | |
| "grad_norm": 0.37255255694817807, | |
| "learning_rate": 7.439613526570048e-06, | |
| "loss": 1.4196, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.9758534554537885, | |
| "grad_norm": 0.3751657637349412, | |
| "learning_rate": 7.435664547381168e-06, | |
| "loss": 1.4545, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.9791840133222315, | |
| "grad_norm": 0.45200307278108437, | |
| "learning_rate": 7.431693989071039e-06, | |
| "loss": 1.4199, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.9825145711906744, | |
| "grad_norm": 0.3889800375255201, | |
| "learning_rate": 7.427701674277017e-06, | |
| "loss": 1.4801, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.9858451290591174, | |
| "grad_norm": 0.3473904572951369, | |
| "learning_rate": 7.4236874236874235e-06, | |
| "loss": 1.393, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.9891756869275604, | |
| "grad_norm": 0.3581161377664693, | |
| "learning_rate": 7.419651056014692e-06, | |
| "loss": 1.4073, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.9925062447960034, | |
| "grad_norm": 0.35620326479274533, | |
| "learning_rate": 7.415592387968079e-06, | |
| "loss": 1.43, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.9958368026644463, | |
| "grad_norm": 0.3676482591149261, | |
| "learning_rate": 7.4115112342259155e-06, | |
| "loss": 1.4148, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.9991673605328892, | |
| "grad_norm": 0.43615077194471996, | |
| "learning_rate": 7.407407407407408e-06, | |
| "loss": 1.4154, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.5905255990041776, | |
| "learning_rate": 7.403280718043948e-06, | |
| "loss": 1.4231, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.003330557868443, | |
| "grad_norm": 0.4020383385971024, | |
| "learning_rate": 7.399130974549968e-06, | |
| "loss": 1.4394, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.0066611157368859, | |
| "grad_norm": 0.3865309278317666, | |
| "learning_rate": 7.394957983193276e-06, | |
| "loss": 1.4413, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.009991673605329, | |
| "grad_norm": 0.40327921143010825, | |
| "learning_rate": 7.390761548064918e-06, | |
| "loss": 1.4576, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.0133222314737718, | |
| "grad_norm": 0.4070700680157323, | |
| "learning_rate": 7.386541471048513e-06, | |
| "loss": 1.4817, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.0166527893422148, | |
| "grad_norm": 0.43460108634631706, | |
| "learning_rate": 7.382297551789077e-06, | |
| "loss": 1.3939, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.0199833472106579, | |
| "grad_norm": 0.42277608172713, | |
| "learning_rate": 7.378029587661315e-06, | |
| "loss": 1.4239, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.0233139050791007, | |
| "grad_norm": 0.37583795807106635, | |
| "learning_rate": 7.373737373737374e-06, | |
| "loss": 1.4878, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.0266444629475437, | |
| "grad_norm": 0.4013147771199415, | |
| "learning_rate": 7.3694207027540355e-06, | |
| "loss": 1.3758, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.0299750208159866, | |
| "grad_norm": 0.38150977748656323, | |
| "learning_rate": 7.365079365079365e-06, | |
| "loss": 1.4122, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.0333055786844296, | |
| "grad_norm": 0.36288526161353013, | |
| "learning_rate": 7.360713148678764e-06, | |
| "loss": 1.4775, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.0366361365528727, | |
| "grad_norm": 0.5122154405495047, | |
| "learning_rate": 7.35632183908046e-06, | |
| "loss": 1.4385, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.0399666944213155, | |
| "grad_norm": 0.4907680124574417, | |
| "learning_rate": 7.351905219340377e-06, | |
| "loss": 1.4321, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.0432972522897586, | |
| "grad_norm": 0.3750039319171418, | |
| "learning_rate": 7.347463070006422e-06, | |
| "loss": 1.4609, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.0466278101582014, | |
| "grad_norm": 0.415847010986813, | |
| "learning_rate": 7.342995169082125e-06, | |
| "loss": 1.399, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.0499583680266444, | |
| "grad_norm": 0.40484373034787197, | |
| "learning_rate": 7.338501291989663e-06, | |
| "loss": 1.4082, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.0532889258950875, | |
| "grad_norm": 0.3556695114896482, | |
| "learning_rate": 7.333981211532231e-06, | |
| "loss": 1.5045, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.0566194837635303, | |
| "grad_norm": 0.4217178250762373, | |
| "learning_rate": 7.329434697855749e-06, | |
| "loss": 1.5051, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.0599500416319734, | |
| "grad_norm": 0.44541116731095065, | |
| "learning_rate": 7.324861518409905e-06, | |
| "loss": 1.454, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.0632805995004164, | |
| "grad_norm": 0.3722519430085194, | |
| "learning_rate": 7.320261437908496e-06, | |
| "loss": 1.4864, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.0666111573688593, | |
| "grad_norm": 0.49955941789670055, | |
| "learning_rate": 7.315634218289086e-06, | |
| "loss": 1.4328, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.0699417152373023, | |
| "grad_norm": 0.559077472675475, | |
| "learning_rate": 7.310979618671926e-06, | |
| "loss": 1.4387, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.0732722731057451, | |
| "grad_norm": 0.38492000673298576, | |
| "learning_rate": 7.306297395318167e-06, | |
| "loss": 1.4173, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.0766028309741882, | |
| "grad_norm": 0.46264263086480695, | |
| "learning_rate": 7.301587301587301e-06, | |
| "loss": 1.458, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.0799333888426312, | |
| "grad_norm": 0.48393689092527553, | |
| "learning_rate": 7.296849087893865e-06, | |
| "loss": 1.4984, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.083263946711074, | |
| "grad_norm": 0.3833552546352091, | |
| "learning_rate": 7.29208250166334e-06, | |
| "loss": 1.4801, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.0865945045795171, | |
| "grad_norm": 0.45636346067253053, | |
| "learning_rate": 7.287287287287286e-06, | |
| "loss": 1.4335, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.08992506244796, | |
| "grad_norm": 0.5170759134234261, | |
| "learning_rate": 7.282463186077643e-06, | |
| "loss": 1.4619, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.093255620316403, | |
| "grad_norm": 0.41413948804668765, | |
| "learning_rate": 7.277609936220207e-06, | |
| "loss": 1.4976, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.096586178184846, | |
| "grad_norm": 0.47675818666743885, | |
| "learning_rate": 7.272727272727272e-06, | |
| "loss": 1.5166, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.0999167360532889, | |
| "grad_norm": 0.4446284191251516, | |
| "learning_rate": 7.267814927389396e-06, | |
| "loss": 1.3756, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.103247293921732, | |
| "grad_norm": 0.3712448236233366, | |
| "learning_rate": 7.262872628726287e-06, | |
| "loss": 1.4177, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.1065778517901748, | |
| "grad_norm": 0.354780077610888, | |
| "learning_rate": 7.257900101936799e-06, | |
| "loss": 1.3888, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.1099084096586178, | |
| "grad_norm": 0.4287556354375581, | |
| "learning_rate": 7.252897068847988e-06, | |
| "loss": 1.463, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.1132389675270609, | |
| "grad_norm": 0.43855718184558823, | |
| "learning_rate": 7.247863247863247e-06, | |
| "loss": 1.4375, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.1165695253955037, | |
| "grad_norm": 0.36652449074881177, | |
| "learning_rate": 7.242798353909463e-06, | |
| "loss": 1.4724, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.1199000832639467, | |
| "grad_norm": 0.41471476618444547, | |
| "learning_rate": 7.237702098383213e-06, | |
| "loss": 1.4368, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.1232306411323896, | |
| "grad_norm": 0.3584246686612814, | |
| "learning_rate": 7.2325741890959285e-06, | |
| "loss": 1.507, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.1265611990008326, | |
| "grad_norm": 0.35472951006324893, | |
| "learning_rate": 7.227414330218068e-06, | |
| "loss": 1.3847, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.1298917568692757, | |
| "grad_norm": 0.40770232084467445, | |
| "learning_rate": 7.222222222222222e-06, | |
| "loss": 1.4722, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.1332223147377185, | |
| "grad_norm": 0.3854760192656062, | |
| "learning_rate": 7.216997561825147e-06, | |
| "loss": 1.4397, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.1365528726061616, | |
| "grad_norm": 0.3425435570180868, | |
| "learning_rate": 7.211740041928721e-06, | |
| "loss": 1.3917, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.1398834304746046, | |
| "grad_norm": 0.3629363871231361, | |
| "learning_rate": 7.206449351559762e-06, | |
| "loss": 1.4329, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.1432139883430474, | |
| "grad_norm": 0.3746351865474382, | |
| "learning_rate": 7.20112517580872e-06, | |
| "loss": 1.4325, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.1465445462114905, | |
| "grad_norm": 0.35633065876642767, | |
| "learning_rate": 7.195767195767195e-06, | |
| "loss": 1.4802, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.1498751040799333, | |
| "grad_norm": 0.41086591430313346, | |
| "learning_rate": 7.1903750884642605e-06, | |
| "loss": 1.386, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.1532056619483764, | |
| "grad_norm": 0.4248601636564269, | |
| "learning_rate": 7.184948526801562e-06, | |
| "loss": 1.3764, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.1565362198168194, | |
| "grad_norm": 0.3677689809276377, | |
| "learning_rate": 7.179487179487179e-06, | |
| "loss": 1.3977, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.1598667776852623, | |
| "grad_norm": 0.4562607243713519, | |
| "learning_rate": 7.173990710968203e-06, | |
| "loss": 1.4743, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.1631973355537053, | |
| "grad_norm": 0.36851546433374166, | |
| "learning_rate": 7.168458781362006e-06, | |
| "loss": 1.3998, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.1665278934221481, | |
| "grad_norm": 0.3440172529786023, | |
| "learning_rate": 7.1628910463861915e-06, | |
| "loss": 1.4388, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.1698584512905912, | |
| "grad_norm": 0.34899417175359176, | |
| "learning_rate": 7.157287157287158e-06, | |
| "loss": 1.4109, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.1731890091590342, | |
| "grad_norm": 0.3369095274891404, | |
| "learning_rate": 7.151646760767281e-06, | |
| "loss": 1.4721, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.176519567027477, | |
| "grad_norm": 0.3373083746918916, | |
| "learning_rate": 7.145969498910675e-06, | |
| "loss": 1.3879, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.1798501248959201, | |
| "grad_norm": 0.3127699546260214, | |
| "learning_rate": 7.140255009107467e-06, | |
| "loss": 1.4067, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.183180682764363, | |
| "grad_norm": 0.3455714541263257, | |
| "learning_rate": 7.1345029239766076e-06, | |
| "loss": 1.4729, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.186511240632806, | |
| "grad_norm": 0.35459135197814406, | |
| "learning_rate": 7.128712871287129e-06, | |
| "loss": 1.4845, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.189841798501249, | |
| "grad_norm": 0.3382392537839561, | |
| "learning_rate": 7.122884473877851e-06, | |
| "loss": 1.4796, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.1931723563696919, | |
| "grad_norm": 0.3229617810865785, | |
| "learning_rate": 7.117017349575488e-06, | |
| "loss": 1.4258, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.196502914238135, | |
| "grad_norm": 0.4132878845320615, | |
| "learning_rate": 7.11111111111111e-06, | |
| "loss": 1.4344, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.1998334721065778, | |
| "grad_norm": 0.3909252234186588, | |
| "learning_rate": 7.105165366034932e-06, | |
| "loss": 1.4136, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.2031640299750208, | |
| "grad_norm": 0.4166542946239009, | |
| "learning_rate": 7.0991797166293805e-06, | |
| "loss": 1.418, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.2064945878434639, | |
| "grad_norm": 0.3978265092622875, | |
| "learning_rate": 7.093153759820426e-06, | |
| "loss": 1.4778, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.2098251457119067, | |
| "grad_norm": 0.3362476483926624, | |
| "learning_rate": 7.087087087087086e-06, | |
| "loss": 1.4045, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.2131557035803497, | |
| "grad_norm": 0.34227618124914144, | |
| "learning_rate": 7.0809792843691135e-06, | |
| "loss": 1.3691, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.2164862614487926, | |
| "grad_norm": 0.4154500404546309, | |
| "learning_rate": 7.074829931972789e-06, | |
| "loss": 1.4296, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.2198168193172356, | |
| "grad_norm": 0.3910386371341375, | |
| "learning_rate": 7.068638604474782e-06, | |
| "loss": 1.4128, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.2231473771856787, | |
| "grad_norm": 0.32973016037230485, | |
| "learning_rate": 7.062404870624048e-06, | |
| "loss": 1.3952, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.2264779350541215, | |
| "grad_norm": 0.3476414929125133, | |
| "learning_rate": 7.056128293241695e-06, | |
| "loss": 1.4132, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.2298084929225646, | |
| "grad_norm": 0.3490075847160727, | |
| "learning_rate": 7.0498084291187725e-06, | |
| "loss": 1.5034, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.2331390507910074, | |
| "grad_norm": 0.4213209017684047, | |
| "learning_rate": 7.043444828911956e-06, | |
| "loss": 1.4407, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.2364696086594504, | |
| "grad_norm": 0.41156726116014214, | |
| "learning_rate": 7.037037037037037e-06, | |
| "loss": 1.4922, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.2398001665278935, | |
| "grad_norm": 0.3274736563867899, | |
| "learning_rate": 7.0305845915602e-06, | |
| "loss": 1.4443, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.2431307243963363, | |
| "grad_norm": 0.4016877039684572, | |
| "learning_rate": 7.024087024087023e-06, | |
| "loss": 1.4765, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.2464612822647794, | |
| "grad_norm": 0.37926187648963133, | |
| "learning_rate": 7.017543859649123e-06, | |
| "loss": 1.4944, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.2497918401332222, | |
| "grad_norm": 0.3995775555374175, | |
| "learning_rate": 7.0109546165884185e-06, | |
| "loss": 1.4737, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.2531223980016652, | |
| "grad_norm": 0.4179330927454956, | |
| "learning_rate": 7.0043188064389475e-06, | |
| "loss": 1.396, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.2564529558701083, | |
| "grad_norm": 0.4026676583822718, | |
| "learning_rate": 6.997635933806146e-06, | |
| "loss": 1.5024, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.2597835137385511, | |
| "grad_norm": 0.3729935293489866, | |
| "learning_rate": 6.9909054962435735e-06, | |
| "loss": 1.5035, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.2631140716069942, | |
| "grad_norm": 0.37785861617292904, | |
| "learning_rate": 6.984126984126983e-06, | |
| "loss": 1.4859, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.266444629475437, | |
| "grad_norm": 0.34618072727066834, | |
| "learning_rate": 6.977299880525687e-06, | |
| "loss": 1.3753, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.26977518734388, | |
| "grad_norm": 0.3603657688818211, | |
| "learning_rate": 6.970423661071143e-06, | |
| "loss": 1.4396, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.2731057452123231, | |
| "grad_norm": 0.31695078316874364, | |
| "learning_rate": 6.963497793822704e-06, | |
| "loss": 1.4512, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.2764363030807662, | |
| "grad_norm": 0.3998834526312468, | |
| "learning_rate": 6.956521739130433e-06, | |
| "loss": 1.4068, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.279766860949209, | |
| "grad_norm": 0.40218592316674945, | |
| "learning_rate": 6.949494949494949e-06, | |
| "loss": 1.4314, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.2830974188176518, | |
| "grad_norm": 0.4377216092057675, | |
| "learning_rate": 6.942416869424169e-06, | |
| "loss": 1.4159, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.2864279766860949, | |
| "grad_norm": 0.3806613338175727, | |
| "learning_rate": 6.935286935286935e-06, | |
| "loss": 1.4383, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.289758534554538, | |
| "grad_norm": 0.41315217581288083, | |
| "learning_rate": 6.928104575163398e-06, | |
| "loss": 1.4639, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.293089092422981, | |
| "grad_norm": 0.4242068360276873, | |
| "learning_rate": 6.920869208692086e-06, | |
| "loss": 1.5043, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.2964196502914238, | |
| "grad_norm": 0.40526133848179174, | |
| "learning_rate": 6.913580246913578e-06, | |
| "loss": 1.4969, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.2997502081598669, | |
| "grad_norm": 0.4390648977103527, | |
| "learning_rate": 6.9062370921106965e-06, | |
| "loss": 1.4634, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.3030807660283097, | |
| "grad_norm": 0.3293053257002483, | |
| "learning_rate": 6.898839137645108e-06, | |
| "loss": 1.4837, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.3064113238967527, | |
| "grad_norm": 0.3741205703169676, | |
| "learning_rate": 6.891385767790261e-06, | |
| "loss": 1.3888, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.3097418817651958, | |
| "grad_norm": 0.36736277922290345, | |
| "learning_rate": 6.883876357560567e-06, | |
| "loss": 1.4422, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.3130724396336386, | |
| "grad_norm": 0.34987451065304387, | |
| "learning_rate": 6.876310272536688e-06, | |
| "loss": 1.4384, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.3164029975020817, | |
| "grad_norm": 0.3574591374681954, | |
| "learning_rate": 6.868686868686868e-06, | |
| "loss": 1.4453, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.3197335553705245, | |
| "grad_norm": 0.31108139602911883, | |
| "learning_rate": 6.861005492184199e-06, | |
| "loss": 1.4302, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.3230641132389676, | |
| "grad_norm": 0.3317920901111113, | |
| "learning_rate": 6.853265479219677e-06, | |
| "loss": 1.4599, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.3263946711074106, | |
| "grad_norm": 0.3319586529185681, | |
| "learning_rate": 6.8454661558109825e-06, | |
| "loss": 1.4349, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.3297252289758534, | |
| "grad_norm": 0.35385561486286676, | |
| "learning_rate": 6.837606837606837e-06, | |
| "loss": 1.4262, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.3330557868442965, | |
| "grad_norm": 0.36123629997437273, | |
| "learning_rate": 6.82968682968683e-06, | |
| "loss": 1.4475, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.3363863447127393, | |
| "grad_norm": 0.3678679588945442, | |
| "learning_rate": 6.821705426356589e-06, | |
| "loss": 1.4662, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.3397169025811824, | |
| "grad_norm": 0.35623277676543963, | |
| "learning_rate": 6.813661910938175e-06, | |
| "loss": 1.4157, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.3430474604496254, | |
| "grad_norm": 0.3670378130601921, | |
| "learning_rate": 6.805555555555554e-06, | |
| "loss": 1.4289, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.3463780183180682, | |
| "grad_norm": 0.37365353793241013, | |
| "learning_rate": 6.797385620915031e-06, | |
| "loss": 1.4758, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.3497085761865113, | |
| "grad_norm": 0.4227767618895852, | |
| "learning_rate": 6.78915135608049e-06, | |
| "loss": 1.4522, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.3530391340549541, | |
| "grad_norm": 0.392419028331304, | |
| "learning_rate": 6.780851998243303e-06, | |
| "loss": 1.4386, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.3563696919233972, | |
| "grad_norm": 0.34332622194519336, | |
| "learning_rate": 6.772486772486772e-06, | |
| "loss": 1.4143, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.3597002497918402, | |
| "grad_norm": 0.39554534288670906, | |
| "learning_rate": 6.76405489154493e-06, | |
| "loss": 1.4289, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.363030807660283, | |
| "grad_norm": 0.3680781980427255, | |
| "learning_rate": 6.7555555555555545e-06, | |
| "loss": 1.4604, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.3663613655287261, | |
| "grad_norm": 0.359696703224119, | |
| "learning_rate": 6.7469879518072274e-06, | |
| "loss": 1.4552, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.369691923397169, | |
| "grad_norm": 0.43946142988468057, | |
| "learning_rate": 6.738351254480287e-06, | |
| "loss": 1.4649, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.373022481265612, | |
| "grad_norm": 0.38158471914984216, | |
| "learning_rate": 6.729644624381466e-06, | |
| "loss": 1.4553, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.376353039134055, | |
| "grad_norm": 0.3785264160376015, | |
| "learning_rate": 6.720867208672086e-06, | |
| "loss": 1.457, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.3796835970024979, | |
| "grad_norm": 0.40275915279313634, | |
| "learning_rate": 6.712018140589569e-06, | |
| "loss": 1.4665, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.383014154870941, | |
| "grad_norm": 0.37268382461278277, | |
| "learning_rate": 6.703096539162113e-06, | |
| "loss": 1.377, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.3863447127393838, | |
| "grad_norm": 0.3362832443073036, | |
| "learning_rate": 6.694101508916324e-06, | |
| "loss": 1.4122, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.3896752706078268, | |
| "grad_norm": 0.3428291854645596, | |
| "learning_rate": 6.6850321395775945e-06, | |
| "loss": 1.3466, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.3930058284762699, | |
| "grad_norm": 0.38976496538071015, | |
| "learning_rate": 6.675887505763023e-06, | |
| "loss": 1.4711, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.3963363863447127, | |
| "grad_norm": 0.4034367524201395, | |
| "learning_rate": 6.666666666666666e-06, | |
| "loss": 1.5079, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.3996669442131557, | |
| "grad_norm": 0.38251655422807695, | |
| "learning_rate": 6.657368665736867e-06, | |
| "loss": 1.4715, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.4029975020815986, | |
| "grad_norm": 0.3636615753904805, | |
| "learning_rate": 6.647992530345471e-06, | |
| "loss": 1.4175, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.4063280599500416, | |
| "grad_norm": 0.3543871236347375, | |
| "learning_rate": 6.6385372714486634e-06, | |
| "loss": 1.4008, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.4096586178184847, | |
| "grad_norm": 0.4270397824248548, | |
| "learning_rate": 6.6290018832391705e-06, | |
| "loss": 1.4082, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.4129891756869275, | |
| "grad_norm": 0.41956585580281563, | |
| "learning_rate": 6.6193853427895966e-06, | |
| "loss": 1.4075, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.4163197335553706, | |
| "grad_norm": 0.47761484099497725, | |
| "learning_rate": 6.60968660968661e-06, | |
| "loss": 1.4104, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.4196502914238134, | |
| "grad_norm": 0.4078261265233408, | |
| "learning_rate": 6.599904625655699e-06, | |
| "loss": 1.4317, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.4229808492922564, | |
| "grad_norm": 0.37691692681004796, | |
| "learning_rate": 6.590038314176245e-06, | |
| "loss": 1.391, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.4263114071606995, | |
| "grad_norm": 0.3288508827565593, | |
| "learning_rate": 6.580086580086579e-06, | |
| "loss": 1.401, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.4296419650291423, | |
| "grad_norm": 0.338319616372442, | |
| "learning_rate": 6.570048309178745e-06, | |
| "loss": 1.4178, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.4329725228975854, | |
| "grad_norm": 0.414862604672987, | |
| "learning_rate": 6.559922367782628e-06, | |
| "loss": 1.4642, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.4363030807660282, | |
| "grad_norm": 0.3949017633125201, | |
| "learning_rate": 6.54970760233918e-06, | |
| "loss": 1.3643, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.4396336386344712, | |
| "grad_norm": 0.3872663647349424, | |
| "learning_rate": 6.53940283896231e-06, | |
| "loss": 1.3998, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.4429641965029143, | |
| "grad_norm": 0.3778182716944692, | |
| "learning_rate": 6.529006882989183e-06, | |
| "loss": 1.421, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.4462947543713571, | |
| "grad_norm": 0.3368637084806252, | |
| "learning_rate": 6.518518518518519e-06, | |
| "loss": 1.4562, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.4496253122398002, | |
| "grad_norm": 0.37088068849156625, | |
| "learning_rate": 6.507936507936509e-06, | |
| "loss": 1.389, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.452955870108243, | |
| "grad_norm": 0.4171977510324979, | |
| "learning_rate": 6.497259591429994e-06, | |
| "loss": 1.4314, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.456286427976686, | |
| "grad_norm": 0.36493233792748947, | |
| "learning_rate": 6.486486486486486e-06, | |
| "loss": 1.4239, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.4596169858451291, | |
| "grad_norm": 0.36409025362836434, | |
| "learning_rate": 6.475615887380592e-06, | |
| "loss": 1.4011, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.462947543713572, | |
| "grad_norm": 0.3432633374051585, | |
| "learning_rate": 6.464646464646463e-06, | |
| "loss": 1.4706, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.466278101582015, | |
| "grad_norm": 0.36918146681400343, | |
| "learning_rate": 6.453576864535769e-06, | |
| "loss": 1.4048, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.4696086594504578, | |
| "grad_norm": 0.3558974109435063, | |
| "learning_rate": 6.442405708460755e-06, | |
| "loss": 1.4233, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.4729392173189009, | |
| "grad_norm": 0.3319935101093491, | |
| "learning_rate": 6.431131592421914e-06, | |
| "loss": 1.4557, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.476269775187344, | |
| "grad_norm": 0.3957835276431251, | |
| "learning_rate": 6.419753086419752e-06, | |
| "loss": 1.4974, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.479600333055787, | |
| "grad_norm": 0.46743126820019115, | |
| "learning_rate": 6.408268733850127e-06, | |
| "loss": 1.3428, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.4829308909242298, | |
| "grad_norm": 0.32072751511352704, | |
| "learning_rate": 6.396677050882658e-06, | |
| "loss": 1.4252, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.4862614487926726, | |
| "grad_norm": 0.3691624108782593, | |
| "learning_rate": 6.384976525821596e-06, | |
| "loss": 1.4288, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.4895920066611157, | |
| "grad_norm": 0.41832466518878647, | |
| "learning_rate": 6.373165618448636e-06, | |
| "loss": 1.4287, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.4929225645295587, | |
| "grad_norm": 0.3728200914294547, | |
| "learning_rate": 6.361242759347024e-06, | |
| "loss": 1.391, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.4962531223980018, | |
| "grad_norm": 0.3489172461380398, | |
| "learning_rate": 6.349206349206349e-06, | |
| "loss": 1.4012, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.4995836802664446, | |
| "grad_norm": 0.45831242097179337, | |
| "learning_rate": 6.337054758107389e-06, | |
| "loss": 1.4062, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.5029142381348874, | |
| "grad_norm": 0.4485083988308969, | |
| "learning_rate": 6.324786324786324e-06, | |
| "loss": 1.4077, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.5062447960033305, | |
| "grad_norm": 0.3469124587165823, | |
| "learning_rate": 6.312399355877616e-06, | |
| "loss": 1.3635, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.5095753538717736, | |
| "grad_norm": 0.4359834643536742, | |
| "learning_rate": 6.299892125134842e-06, | |
| "loss": 1.3951, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.5129059117402166, | |
| "grad_norm": 0.43347338145656295, | |
| "learning_rate": 6.287262872628726e-06, | |
| "loss": 1.438, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.5162364696086594, | |
| "grad_norm": 0.3544519721589859, | |
| "learning_rate": 6.274509803921569e-06, | |
| "loss": 1.4028, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.5195670274771023, | |
| "grad_norm": 0.4175623558211923, | |
| "learning_rate": 6.261631089217296e-06, | |
| "loss": 1.4649, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.5228975853455453, | |
| "grad_norm": 0.47794327593006264, | |
| "learning_rate": 6.248624862486248e-06, | |
| "loss": 1.4552, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.5262281432139884, | |
| "grad_norm": 0.5102221497723193, | |
| "learning_rate": 6.235489220563847e-06, | |
| "loss": 1.5577, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.5295587010824314, | |
| "grad_norm": 0.361727454686882, | |
| "learning_rate": 6.2222222222222215e-06, | |
| "loss": 1.4977, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.5328892589508742, | |
| "grad_norm": 0.43568797487755334, | |
| "learning_rate": 6.208821887213847e-06, | |
| "loss": 1.4417, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.536219816819317, | |
| "grad_norm": 0.39795557103291623, | |
| "learning_rate": 6.195286195286195e-06, | |
| "loss": 1.4479, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.5395503746877601, | |
| "grad_norm": 0.3699426752838303, | |
| "learning_rate": 6.181613085166384e-06, | |
| "loss": 1.4379, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.5428809325562032, | |
| "grad_norm": 0.5138765482501748, | |
| "learning_rate": 6.167800453514738e-06, | |
| "loss": 1.4433, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.5462114904246462, | |
| "grad_norm": 0.5597671339637968, | |
| "learning_rate": 6.153846153846153e-06, | |
| "loss": 1.4255, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.549542048293089, | |
| "grad_norm": 0.4443208189107028, | |
| "learning_rate": 6.1397479954180976e-06, | |
| "loss": 1.4458, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.552872606161532, | |
| "grad_norm": 0.41782304334586917, | |
| "learning_rate": 6.125503742084053e-06, | |
| "loss": 1.4362, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.556203164029975, | |
| "grad_norm": 0.511701451750574, | |
| "learning_rate": 6.11111111111111e-06, | |
| "loss": 1.4378, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.559533721898418, | |
| "grad_norm": 0.4272528437058103, | |
| "learning_rate": 6.096567771960442e-06, | |
| "loss": 1.4315, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.562864279766861, | |
| "grad_norm": 0.42099653002903337, | |
| "learning_rate": 6.0818713450292395e-06, | |
| "loss": 1.4092, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.5661948376353039, | |
| "grad_norm": 0.4635591149261861, | |
| "learning_rate": 6.067019400352732e-06, | |
| "loss": 1.4357, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.569525395503747, | |
| "grad_norm": 0.5318262046494987, | |
| "learning_rate": 6.052009456264775e-06, | |
| "loss": 1.4753, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.5728559533721898, | |
| "grad_norm": 0.4098578230232083, | |
| "learning_rate": 6.036838978015449e-06, | |
| "loss": 1.4192, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.5761865112406328, | |
| "grad_norm": 0.4563174114919455, | |
| "learning_rate": 6.021505376344085e-06, | |
| "loss": 1.4676, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.5795170691090759, | |
| "grad_norm": 0.5270544922424331, | |
| "learning_rate": 6.006006006006005e-06, | |
| "loss": 1.4267, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.5828476269775187, | |
| "grad_norm": 0.3910787909582668, | |
| "learning_rate": 5.990338164251208e-06, | |
| "loss": 1.3766, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.5861781848459617, | |
| "grad_norm": 0.4736515430850208, | |
| "learning_rate": 5.974499089253187e-06, | |
| "loss": 1.4437, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.5895087427144046, | |
| "grad_norm": 0.5430796464569592, | |
| "learning_rate": 5.958485958485957e-06, | |
| "loss": 1.4482, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.5928393005828476, | |
| "grad_norm": 0.38226206389298173, | |
| "learning_rate": 5.942295887047268e-06, | |
| "loss": 1.412, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.5961698584512907, | |
| "grad_norm": 0.3721223079028304, | |
| "learning_rate": 5.925925925925925e-06, | |
| "loss": 1.45, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.5995004163197337, | |
| "grad_norm": 0.3827064109331823, | |
| "learning_rate": 5.909373060211049e-06, | |
| "loss": 1.4217, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.6028309741881765, | |
| "grad_norm": 0.33684324932641296, | |
| "learning_rate": 5.892634207240949e-06, | |
| "loss": 1.3557, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.6061615320566194, | |
| "grad_norm": 0.31468847211788964, | |
| "learning_rate": 5.875706214689265e-06, | |
| "loss": 1.4122, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.6094920899250624, | |
| "grad_norm": 0.4442799216781044, | |
| "learning_rate": 5.858585858585859e-06, | |
| "loss": 1.4285, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.6128226477935055, | |
| "grad_norm": 0.4567121702156198, | |
| "learning_rate": 5.841269841269841e-06, | |
| "loss": 1.4764, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.6161532056619485, | |
| "grad_norm": 0.3590206566567271, | |
| "learning_rate": 5.82375478927203e-06, | |
| "loss": 1.4229, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.6194837635303914, | |
| "grad_norm": 0.3652198930331244, | |
| "learning_rate": 5.806037251123956e-06, | |
| "loss": 1.4151, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.6228143213988342, | |
| "grad_norm": 0.35866861963268476, | |
| "learning_rate": 5.7881136950904385e-06, | |
| "loss": 1.3369, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.6261448792672772, | |
| "grad_norm": 0.4750936045573692, | |
| "learning_rate": 5.7699805068226105e-06, | |
| "loss": 1.4715, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.6294754371357203, | |
| "grad_norm": 0.3613198830707804, | |
| "learning_rate": 5.7516339869281045e-06, | |
| "loss": 1.4291, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.6328059950041633, | |
| "grad_norm": 0.43606379412430957, | |
| "learning_rate": 5.733070348454964e-06, | |
| "loss": 1.4011, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.6361365528726062, | |
| "grad_norm": 0.35042984426925494, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 1.4368, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.639467110741049, | |
| "grad_norm": 0.31661366243629, | |
| "learning_rate": 5.695276114437791e-06, | |
| "loss": 1.4541, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.642797668609492, | |
| "grad_norm": 0.3561358967067642, | |
| "learning_rate": 5.676037483266399e-06, | |
| "loss": 1.433, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.646128226477935, | |
| "grad_norm": 0.3931637346563919, | |
| "learning_rate": 5.656565656565656e-06, | |
| "loss": 1.4193, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.6494587843463782, | |
| "grad_norm": 0.48631366553960975, | |
| "learning_rate": 5.6368563685636855e-06, | |
| "loss": 1.4012, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.652789342214821, | |
| "grad_norm": 0.41348933242163105, | |
| "learning_rate": 5.616905248807089e-06, | |
| "loss": 1.3883, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.6561199000832638, | |
| "grad_norm": 0.3541766139316355, | |
| "learning_rate": 5.59670781893004e-06, | |
| "loss": 1.363, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.6594504579517069, | |
| "grad_norm": 0.410383164470969, | |
| "learning_rate": 5.576259489302967e-06, | |
| "loss": 1.3955, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.66278101582015, | |
| "grad_norm": 0.4100549908496841, | |
| "learning_rate": 5.555555555555555e-06, | |
| "loss": 1.3663, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.666111573688593, | |
| "grad_norm": 0.4122832272958553, | |
| "learning_rate": 5.534591194968553e-06, | |
| "loss": 1.5108, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.6694421315570358, | |
| "grad_norm": 0.33209617039282874, | |
| "learning_rate": 5.513361462728551e-06, | |
| "loss": 1.4069, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.6727726894254786, | |
| "grad_norm": 0.34650064809899755, | |
| "learning_rate": 5.491861288039631e-06, | |
| "loss": 1.3953, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.6761032472939217, | |
| "grad_norm": 0.3583592015376779, | |
| "learning_rate": 5.47008547008547e-06, | |
| "loss": 1.4181, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.6794338051623647, | |
| "grad_norm": 0.34343414571245584, | |
| "learning_rate": 5.448028673835125e-06, | |
| "loss": 1.418, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.6827643630308078, | |
| "grad_norm": 0.35638669107128673, | |
| "learning_rate": 5.425685425685425e-06, | |
| "loss": 1.4052, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.6860949208992506, | |
| "grad_norm": 0.3467424021658532, | |
| "learning_rate": 5.403050108932461e-06, | |
| "loss": 1.4581, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.6894254787676934, | |
| "grad_norm": 0.32381127071831955, | |
| "learning_rate": 5.3801169590643285e-06, | |
| "loss": 1.459, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.6927560366361365, | |
| "grad_norm": 0.3811936086039866, | |
| "learning_rate": 5.356880058866813e-06, | |
| "loss": 1.5033, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.6960865945045795, | |
| "grad_norm": 0.3612050754686712, | |
| "learning_rate": 5.333333333333333e-06, | |
| "loss": 1.4137, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.6994171523730226, | |
| "grad_norm": 0.35765265665477713, | |
| "learning_rate": 5.309470544369873e-06, | |
| "loss": 1.4087, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.7027477102414654, | |
| "grad_norm": 0.3357163323849947, | |
| "learning_rate": 5.285285285285285e-06, | |
| "loss": 1.4851, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.7060782681099083, | |
| "grad_norm": 0.3449646759899252, | |
| "learning_rate": 5.260770975056689e-06, | |
| "loss": 1.442, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.7094088259783513, | |
| "grad_norm": 0.3335919341097906, | |
| "learning_rate": 5.235920852359208e-06, | |
| "loss": 1.454, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.7127393838467944, | |
| "grad_norm": 0.3414007515866483, | |
| "learning_rate": 5.210727969348659e-06, | |
| "loss": 1.4762, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.7160699417152374, | |
| "grad_norm": 0.37174665041283544, | |
| "learning_rate": 5.185185185185185e-06, | |
| "loss": 1.4615, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.7194004995836802, | |
| "grad_norm": 0.37265087217053033, | |
| "learning_rate": 5.159285159285159e-06, | |
| "loss": 1.4072, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.722731057452123, | |
| "grad_norm": 0.3445160578098801, | |
| "learning_rate": 5.1330203442879505e-06, | |
| "loss": 1.4337, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.7260616153205661, | |
| "grad_norm": 0.3675807887019101, | |
| "learning_rate": 5.106382978723403e-06, | |
| "loss": 1.4147, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.7293921731890092, | |
| "grad_norm": 0.34584285856367675, | |
| "learning_rate": 5.079365079365079e-06, | |
| "loss": 1.4193, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.7327227310574522, | |
| "grad_norm": 0.3685778739128953, | |
| "learning_rate": 5.051958433253396e-06, | |
| "loss": 1.4466, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.736053288925895, | |
| "grad_norm": 0.35632916296360506, | |
| "learning_rate": 5.02415458937198e-06, | |
| "loss": 1.4299, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.739383846794338, | |
| "grad_norm": 0.3941128956001842, | |
| "learning_rate": 4.995944849959448e-06, | |
| "loss": 1.4264, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.742714404662781, | |
| "grad_norm": 0.3481786883352737, | |
| "learning_rate": 4.967320261437908e-06, | |
| "loss": 1.4279, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.746044962531224, | |
| "grad_norm": 0.3627527951339854, | |
| "learning_rate": 4.938271604938271e-06, | |
| "loss": 1.5152, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.749375520399667, | |
| "grad_norm": 0.34527513358988937, | |
| "learning_rate": 4.9087893864013265e-06, | |
| "loss": 1.4088, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.7527060782681099, | |
| "grad_norm": 0.368823414133051, | |
| "learning_rate": 4.878863826232247e-06, | |
| "loss": 1.3944, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.756036636136553, | |
| "grad_norm": 0.3471938836863914, | |
| "learning_rate": 4.848484848484849e-06, | |
| "loss": 1.3809, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.7593671940049957, | |
| "grad_norm": 0.39760839658681035, | |
| "learning_rate": 4.817642069550467e-06, | |
| "loss": 1.4081, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.7626977518734388, | |
| "grad_norm": 0.35630999152948084, | |
| "learning_rate": 4.786324786324786e-06, | |
| "loss": 1.4049, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.7660283097418819, | |
| "grad_norm": 0.3123127862091999, | |
| "learning_rate": 4.754521963824289e-06, | |
| "loss": 1.4033, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.7693588676103247, | |
| "grad_norm": 0.3565716669933871, | |
| "learning_rate": 4.722222222222222e-06, | |
| "loss": 1.4548, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.7726894254787677, | |
| "grad_norm": 0.3415824605451111, | |
| "learning_rate": 4.68941382327209e-06, | |
| "loss": 1.3379, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.7760199833472106, | |
| "grad_norm": 0.37445157627374487, | |
| "learning_rate": 4.6560846560846555e-06, | |
| "loss": 1.5023, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.7793505412156536, | |
| "grad_norm": 0.4140970552339397, | |
| "learning_rate": 4.622222222222222e-06, | |
| "loss": 1.4982, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.7826810990840967, | |
| "grad_norm": 0.3696216853055909, | |
| "learning_rate": 4.587813620071684e-06, | |
| "loss": 1.3795, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.7860116569525397, | |
| "grad_norm": 0.3374796769034963, | |
| "learning_rate": 4.552845528455284e-06, | |
| "loss": 1.4356, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.7893422148209825, | |
| "grad_norm": 0.4227610049072286, | |
| "learning_rate": 4.517304189435337e-06, | |
| "loss": 1.4625, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.7926727726894254, | |
| "grad_norm": 0.36612259553982557, | |
| "learning_rate": 4.4811753902663e-06, | |
| "loss": 1.4274, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.7960033305578684, | |
| "grad_norm": 0.4222638209328834, | |
| "learning_rate": 4.444444444444443e-06, | |
| "loss": 1.5129, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.7993338884263115, | |
| "grad_norm": 0.41009576553628174, | |
| "learning_rate": 4.407096171802053e-06, | |
| "loss": 1.4873, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.8026644462947545, | |
| "grad_norm": 0.35086922544434007, | |
| "learning_rate": 4.369114877589454e-06, | |
| "loss": 1.3718, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.8059950041631974, | |
| "grad_norm": 0.35855015526438827, | |
| "learning_rate": 4.33048433048433e-06, | |
| "loss": 1.4031, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.8093255620316402, | |
| "grad_norm": 0.42477533100459036, | |
| "learning_rate": 4.291187739463601e-06, | |
| "loss": 1.4473, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.8126561199000832, | |
| "grad_norm": 0.39791782472493653, | |
| "learning_rate": 4.251207729468599e-06, | |
| "loss": 1.4374, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.8159866777685263, | |
| "grad_norm": 0.3444343384513091, | |
| "learning_rate": 4.210526315789473e-06, | |
| "loss": 1.4048, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.8193172356369693, | |
| "grad_norm": 0.3453119165966736, | |
| "learning_rate": 4.169124877089478e-06, | |
| "loss": 1.3581, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.8226477935054122, | |
| "grad_norm": 0.38186414289634574, | |
| "learning_rate": 4.126984126984126e-06, | |
| "loss": 1.4774, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.825978351373855, | |
| "grad_norm": 0.3371300332212375, | |
| "learning_rate": 4.084084084084084e-06, | |
| "loss": 1.3565, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.829308909242298, | |
| "grad_norm": 0.32042065002080106, | |
| "learning_rate": 4.0404040404040395e-06, | |
| "loss": 1.3807, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.832639467110741, | |
| "grad_norm": 0.3776475075214216, | |
| "learning_rate": 3.995922528032619e-06, | |
| "loss": 1.4305, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.8359700249791842, | |
| "grad_norm": 0.3351717661136717, | |
| "learning_rate": 3.9506172839506175e-06, | |
| "loss": 1.4133, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.839300582847627, | |
| "grad_norm": 0.37528610178789024, | |
| "learning_rate": 3.904465212876428e-06, | |
| "loss": 1.4994, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.8426311407160698, | |
| "grad_norm": 0.38936785329254486, | |
| "learning_rate": 3.857442348008385e-06, | |
| "loss": 1.393, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.8459616985845129, | |
| "grad_norm": 0.40525496168183883, | |
| "learning_rate": 3.8095238095238094e-06, | |
| "loss": 1.4019, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.849292256452956, | |
| "grad_norm": 0.4169994094961459, | |
| "learning_rate": 3.7606837606837604e-06, | |
| "loss": 1.4208, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.852622814321399, | |
| "grad_norm": 0.4093560262894869, | |
| "learning_rate": 3.710895361380798e-06, | |
| "loss": 1.44, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.8559533721898418, | |
| "grad_norm": 0.35662577286334196, | |
| "learning_rate": 3.660130718954248e-06, | |
| "loss": 1.4168, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.8592839300582846, | |
| "grad_norm": 0.3469062029498766, | |
| "learning_rate": 3.6083608360836084e-06, | |
| "loss": 1.4109, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.8626144879267277, | |
| "grad_norm": 0.35913894186601036, | |
| "learning_rate": 3.5555555555555546e-06, | |
| "loss": 1.3026, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.8659450457951707, | |
| "grad_norm": 0.3601783041537011, | |
| "learning_rate": 3.501683501683501e-06, | |
| "loss": 1.4429, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.8692756036636138, | |
| "grad_norm": 0.4301246312907219, | |
| "learning_rate": 3.4467120181405894e-06, | |
| "loss": 1.4415, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.8726061615320566, | |
| "grad_norm": 0.44543619950365937, | |
| "learning_rate": 3.390607101947308e-06, | |
| "loss": 1.4354, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.8759367194004994, | |
| "grad_norm": 0.4110006980910609, | |
| "learning_rate": 3.333333333333333e-06, | |
| "loss": 1.5156, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.8792672772689425, | |
| "grad_norm": 0.36681035057341954, | |
| "learning_rate": 3.27485380116959e-06, | |
| "loss": 1.3926, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.8825978351373855, | |
| "grad_norm": 0.3639552416710322, | |
| "learning_rate": 3.215130023640661e-06, | |
| "loss": 1.4537, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.8859283930058286, | |
| "grad_norm": 0.3809201109198225, | |
| "learning_rate": 3.154121863799283e-06, | |
| "loss": 1.4344, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.8892589508742714, | |
| "grad_norm": 0.5067748995958425, | |
| "learning_rate": 3.0917874396135263e-06, | |
| "loss": 1.444, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.8925895087427143, | |
| "grad_norm": 0.39339545860925257, | |
| "learning_rate": 3.028083028083028e-06, | |
| "loss": 1.4368, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.8959200666111573, | |
| "grad_norm": 0.3645143242760266, | |
| "learning_rate": 2.9629629629629625e-06, | |
| "loss": 1.4189, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.8992506244796004, | |
| "grad_norm": 0.41301404150023885, | |
| "learning_rate": 2.8963795255930087e-06, | |
| "loss": 1.4513, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.9025811823480434, | |
| "grad_norm": 0.35445322756534786, | |
| "learning_rate": 2.828282828282828e-06, | |
| "loss": 1.4212, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.9059117402164862, | |
| "grad_norm": 0.31609898679838344, | |
| "learning_rate": 2.758620689655172e-06, | |
| "loss": 1.4282, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.909242298084929, | |
| "grad_norm": 0.38641815454972966, | |
| "learning_rate": 2.6873385012919895e-06, | |
| "loss": 1.5401, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.9125728559533721, | |
| "grad_norm": 0.38729985084754753, | |
| "learning_rate": 2.6143790849673204e-06, | |
| "loss": 1.461, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.9159034138218152, | |
| "grad_norm": 0.4550081298663739, | |
| "learning_rate": 2.5396825396825395e-06, | |
| "loss": 1.4602, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.9192339716902582, | |
| "grad_norm": 0.3605173725442084, | |
| "learning_rate": 2.4631860776439087e-06, | |
| "loss": 1.4104, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.922564529558701, | |
| "grad_norm": 0.38548981376382463, | |
| "learning_rate": 2.384823848238482e-06, | |
| "loss": 1.4465, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.9258950874271439, | |
| "grad_norm": 0.39748551935246357, | |
| "learning_rate": 2.304526748971193e-06, | |
| "loss": 1.443, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.929225645295587, | |
| "grad_norm": 0.3638886379639791, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 1.4129, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.93255620316403, | |
| "grad_norm": 0.36953734209449074, | |
| "learning_rate": 2.1378340365682133e-06, | |
| "loss": 1.3176, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.935886761032473, | |
| "grad_norm": 0.3267944344034355, | |
| "learning_rate": 2.051282051282051e-06, | |
| "loss": 1.4588, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.9392173189009159, | |
| "grad_norm": 0.3915434082543582, | |
| "learning_rate": 1.962481962481962e-06, | |
| "loss": 1.4441, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.942547876769359, | |
| "grad_norm": 0.3556155258308632, | |
| "learning_rate": 1.871345029239766e-06, | |
| "loss": 1.3898, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.9458784346378017, | |
| "grad_norm": 0.35583427100431714, | |
| "learning_rate": 1.7777777777777775e-06, | |
| "loss": 1.4117, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.9492089925062448, | |
| "grad_norm": 0.3312617219719275, | |
| "learning_rate": 1.6816816816816814e-06, | |
| "loss": 1.4243, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.9525395503746878, | |
| "grad_norm": 0.3171322439070156, | |
| "learning_rate": 1.582952815829528e-06, | |
| "loss": 1.3974, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.9558701082431307, | |
| "grad_norm": 0.2931034713127486, | |
| "learning_rate": 1.4814814814814812e-06, | |
| "loss": 1.4232, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.9592006661115737, | |
| "grad_norm": 0.31803832338980526, | |
| "learning_rate": 1.3771517996870107e-06, | |
| "loss": 1.4475, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.9625312239800166, | |
| "grad_norm": 0.3102745879861819, | |
| "learning_rate": 1.2698412698412697e-06, | |
| "loss": 1.4991, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.9658617818484596, | |
| "grad_norm": 0.35190966791382605, | |
| "learning_rate": 1.1594202898550724e-06, | |
| "loss": 1.4806, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.9691923397169027, | |
| "grad_norm": 0.3133274529689738, | |
| "learning_rate": 1.045751633986928e-06, | |
| "loss": 1.347, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.9725228975853455, | |
| "grad_norm": 0.30605048339614954, | |
| "learning_rate": 9.286898839137644e-07, | |
| "loss": 1.3999, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.9758534554537885, | |
| "grad_norm": 0.3151090112991302, | |
| "learning_rate": 8.08080808080808e-07, | |
| "loss": 1.4339, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.9791840133222314, | |
| "grad_norm": 0.35650599063479166, | |
| "learning_rate": 6.837606837606837e-07, | |
| "loss": 1.4009, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.9825145711906744, | |
| "grad_norm": 0.3384958491564326, | |
| "learning_rate": 5.555555555555555e-07, | |
| "loss": 1.4611, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.9858451290591175, | |
| "grad_norm": 0.3335636198476521, | |
| "learning_rate": 4.2328042328042324e-07, | |
| "loss": 1.3728, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.9891756869275605, | |
| "grad_norm": 0.32277296814250667, | |
| "learning_rate": 2.8673835125448024e-07, | |
| "loss": 1.387, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.9925062447960034, | |
| "grad_norm": 0.3467254801927619, | |
| "learning_rate": 1.4571948998178507e-07, | |
| "loss": 1.4114, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.9958368026644462, | |
| "grad_norm": 0.3207639144479259, | |
| "learning_rate": 0, | |
| "loss": 1.3956, | |
| "memory/device_mem_reserved(gib)": 59.75, | |
| "memory/max_mem_active(gib)": 57.09, | |
| "memory/max_mem_allocated(gib)": 56.77, | |
| "step": 600 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 600, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 150, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.439031159441326e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |