{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9958368026644462, "eval_steps": 500, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0033305578684429643, "grad_norm": 2.1135175063937415, "learning_rate": 1.3333333333333334e-07, "loss": 1.6413, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.01, "memory/max_mem_allocated(gib)": 56.7, "step": 1 }, { "epoch": 0.006661115736885929, "grad_norm": 2.0196598114735065, "learning_rate": 2.6666666666666667e-07, "loss": 1.6382, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 2 }, { "epoch": 0.009991673605328892, "grad_norm": 2.037892565480129, "learning_rate": 4.0000000000000003e-07, "loss": 1.6536, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 3 }, { "epoch": 0.013322231473771857, "grad_norm": 1.980939710918612, "learning_rate": 5.333333333333333e-07, "loss": 1.6712, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 4 }, { "epoch": 0.01665278934221482, "grad_norm": 2.0553629965683196, "learning_rate": 6.666666666666666e-07, "loss": 1.591, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 5 }, { "epoch": 0.019983347210657785, "grad_norm": 2.1321442384194493, "learning_rate": 8.000000000000001e-07, "loss": 1.6275, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 6 }, { "epoch": 0.02331390507910075, "grad_norm": 2.0224554441924147, "learning_rate": 9.333333333333333e-07, "loss": 1.6802, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 7 }, { "epoch": 0.026644462947543714, "grad_norm": 2.0657857283218144, "learning_rate": 1.0666666666666667e-06, "loss": 1.5768, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 8 }, { "epoch": 0.02997502081598668, "grad_norm": 2.0104233987359206, "learning_rate": 1.2e-06, "loss": 1.6026, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 9 }, { "epoch": 0.03330557868442964, "grad_norm": 2.098692014200002, "learning_rate": 1.3333333333333332e-06, "loss": 1.682, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 10 }, { "epoch": 0.03663613655287261, "grad_norm": 2.0879014611232116, "learning_rate": 1.4666666666666667e-06, "loss": 1.6368, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 11 }, { "epoch": 0.03996669442131557, "grad_norm": 2.0701872996726443, "learning_rate": 1.6000000000000001e-06, "loss": 1.629, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 12 }, { "epoch": 0.04329725228975854, "grad_norm": 2.105064067100562, "learning_rate": 1.7333333333333332e-06, "loss": 1.6568, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 13 }, { "epoch": 0.0466278101582015, "grad_norm": 2.1084872575258733, "learning_rate": 1.8666666666666667e-06, "loss": 1.597, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 14 }, { "epoch": 0.04995836802664446, "grad_norm": 2.0616161807879965, "learning_rate": 2e-06, "loss": 1.6008, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 15 }, { "epoch": 0.05328892589508743, "grad_norm": 1.92970469468585, "learning_rate": 2.1333333333333334e-06, "loss": 1.6815, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 16 }, { "epoch": 0.05661948376353039, "grad_norm": 2.0527427262697855, "learning_rate": 2.266666666666667e-06, "loss": 1.6873, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 17 }, { "epoch": 0.05995004163197336, "grad_norm": 1.9622305052083537, "learning_rate": 2.4e-06, "loss": 1.6334, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 18 }, { "epoch": 0.06328059950041633, "grad_norm": 1.9979390122219929, "learning_rate": 2.533333333333333e-06, "loss": 1.6623, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 19 }, { "epoch": 0.06661115736885928, "grad_norm": 2.0311968068371367, "learning_rate": 2.6666666666666664e-06, "loss": 1.607, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 20 }, { "epoch": 0.06994171523730225, "grad_norm": 1.968344786501615, "learning_rate": 2.8e-06, "loss": 1.6087, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 21 }, { "epoch": 0.07327227310574522, "grad_norm": 2.1145916019697952, "learning_rate": 2.9333333333333333e-06, "loss": 1.5926, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 22 }, { "epoch": 0.07660283097418817, "grad_norm": 2.0129475295050496, "learning_rate": 3.066666666666667e-06, "loss": 1.6171, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 23 }, { "epoch": 0.07993338884263114, "grad_norm": 1.8817164699193898, "learning_rate": 3.2000000000000003e-06, "loss": 1.6552, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 24 }, { "epoch": 0.08326394671107411, "grad_norm": 1.9306634203997992, "learning_rate": 3.333333333333333e-06, "loss": 1.6288, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 25 }, { "epoch": 0.08659450457951708, "grad_norm": 1.8839715974459492, "learning_rate": 3.4666666666666664e-06, "loss": 1.5772, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 26 }, { "epoch": 0.08992506244796003, "grad_norm": 1.9004207576591563, "learning_rate": 3.6e-06, "loss": 1.6019, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 27 }, { "epoch": 0.093255620316403, "grad_norm": 1.8508009396241183, "learning_rate": 3.7333333333333333e-06, "loss": 1.6347, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 28 }, { "epoch": 0.09658617818484597, "grad_norm": 1.6521817439090796, "learning_rate": 3.866666666666666e-06, "loss": 1.6425, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 29 }, { "epoch": 0.09991673605328892, "grad_norm": 1.5825237347457706, "learning_rate": 4e-06, "loss": 1.4999, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 30 }, { "epoch": 0.10324729392173189, "grad_norm": 1.4406934972277887, "learning_rate": 4.133333333333333e-06, "loss": 1.537, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 31 }, { "epoch": 0.10657785179017486, "grad_norm": 1.3686252476380623, "learning_rate": 4.266666666666667e-06, "loss": 1.5054, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 32 }, { "epoch": 0.10990840965861781, "grad_norm": 1.190989973623068, "learning_rate": 4.399999999999999e-06, "loss": 1.5673, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 33 }, { "epoch": 0.11323896752706078, "grad_norm": 1.0921718147815354, "learning_rate": 4.533333333333334e-06, "loss": 1.5383, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 34 }, { "epoch": 0.11656952539550375, "grad_norm": 0.9720091603452963, "learning_rate": 4.666666666666666e-06, "loss": 1.5698, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 35 }, { "epoch": 0.11990008326394672, "grad_norm": 0.8634677699569875, "learning_rate": 4.8e-06, "loss": 1.5286, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 36 }, { "epoch": 0.12323064113238967, "grad_norm": 0.7720350215206407, "learning_rate": 4.933333333333333e-06, "loss": 1.5897, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 37 }, { "epoch": 0.12656119900083265, "grad_norm": 0.7351438783567595, "learning_rate": 5.066666666666666e-06, "loss": 1.471, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 38 }, { "epoch": 0.1298917568692756, "grad_norm": 0.6436527036047347, "learning_rate": 5.2e-06, "loss": 1.5523, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 39 }, { "epoch": 0.13322231473771856, "grad_norm": 0.5914433909472115, "learning_rate": 5.333333333333333e-06, "loss": 1.5169, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 40 }, { "epoch": 0.13655287260616153, "grad_norm": 0.5708899134928395, "learning_rate": 5.466666666666667e-06, "loss": 1.4727, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 41 }, { "epoch": 0.1398834304746045, "grad_norm": 0.562979308505682, "learning_rate": 5.6e-06, "loss": 1.5101, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 42 }, { "epoch": 0.14321398834304747, "grad_norm": 0.5333098859373814, "learning_rate": 5.733333333333332e-06, "loss": 1.5053, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 43 }, { "epoch": 0.14654454621149043, "grad_norm": 0.518700589700869, "learning_rate": 5.866666666666667e-06, "loss": 1.5522, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 44 }, { "epoch": 0.1498751040799334, "grad_norm": 0.5123134702021855, "learning_rate": 5.999999999999999e-06, "loss": 1.4581, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 45 }, { "epoch": 0.15320566194837634, "grad_norm": 0.5233023339387923, "learning_rate": 6.133333333333334e-06, "loss": 1.4503, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 46 }, { "epoch": 0.1565362198168193, "grad_norm": 0.4984395351799732, "learning_rate": 6.266666666666666e-06, "loss": 1.4698, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 47 }, { "epoch": 0.15986677768526228, "grad_norm": 0.48116733820243823, "learning_rate": 6.4000000000000006e-06, "loss": 1.5399, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 48 }, { "epoch": 0.16319733555370525, "grad_norm": 0.4917159508967155, "learning_rate": 6.533333333333333e-06, "loss": 1.4674, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 49 }, { "epoch": 0.16652789342214822, "grad_norm": 0.4631697484027289, "learning_rate": 6.666666666666666e-06, "loss": 1.5063, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 50 }, { "epoch": 0.16985845129059118, "grad_norm": 0.4506097490342786, "learning_rate": 6.8e-06, "loss": 1.4787, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 51 }, { "epoch": 0.17318900915903415, "grad_norm": 0.4808943580292107, "learning_rate": 6.933333333333333e-06, "loss": 1.5355, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 52 }, { "epoch": 0.1765195670274771, "grad_norm": 0.4353655566788618, "learning_rate": 7.066666666666667e-06, "loss": 1.4545, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 53 }, { "epoch": 0.17985012489592006, "grad_norm": 0.42881276266179474, "learning_rate": 7.2e-06, "loss": 1.4726, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 54 }, { "epoch": 0.18318068276436303, "grad_norm": 0.4243886425058161, "learning_rate": 7.333333333333332e-06, "loss": 1.5364, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 55 }, { "epoch": 0.186511240632806, "grad_norm": 0.4078516232902407, "learning_rate": 7.466666666666667e-06, "loss": 1.5441, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 56 }, { "epoch": 0.18984179850124897, "grad_norm": 0.39819776399963164, "learning_rate": 7.599999999999999e-06, "loss": 1.5394, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 57 }, { "epoch": 0.19317235636969193, "grad_norm": 0.3993196408685462, "learning_rate": 7.733333333333333e-06, "loss": 1.4883, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 58 }, { "epoch": 0.1965029142381349, "grad_norm": 0.3992257742362516, "learning_rate": 7.866666666666667e-06, "loss": 1.4933, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 59 }, { "epoch": 0.19983347210657784, "grad_norm": 0.39782096872195477, "learning_rate": 8e-06, "loss": 1.4729, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 60 }, { "epoch": 0.2031640299750208, "grad_norm": 0.38436305350726707, "learning_rate": 7.99851604526062e-06, "loss": 1.4777, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 61 }, { "epoch": 0.20649458784346378, "grad_norm": 0.3782583438569582, "learning_rate": 7.99702712746191e-06, "loss": 1.535, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 62 }, { "epoch": 0.20982514571190675, "grad_norm": 0.3910775225847348, "learning_rate": 7.995533221663874e-06, "loss": 1.4643, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 63 }, { "epoch": 0.21315570358034971, "grad_norm": 0.37376830993433585, "learning_rate": 7.994034302759135e-06, "loss": 1.4265, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 64 }, { "epoch": 0.21648626144879268, "grad_norm": 0.37205196740456564, "learning_rate": 7.99253034547152e-06, "loss": 1.484, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 65 }, { "epoch": 0.21981681931723562, "grad_norm": 0.37012462931708767, "learning_rate": 7.991021324354658e-06, "loss": 1.4668, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 66 }, { "epoch": 0.2231473771856786, "grad_norm": 0.36609254541987934, "learning_rate": 7.989507213790519e-06, "loss": 1.4512, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 67 }, { "epoch": 0.22647793505412156, "grad_norm": 0.36389643029866026, "learning_rate": 7.987987987987988e-06, "loss": 1.4666, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 68 }, { "epoch": 0.22980849292256453, "grad_norm": 0.3835942907380993, "learning_rate": 7.986463620981386e-06, "loss": 1.5581, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 69 }, { "epoch": 0.2331390507910075, "grad_norm": 0.3709505537460329, "learning_rate": 7.984934086629002e-06, "loss": 1.4942, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 70 }, { "epoch": 0.23646960865945046, "grad_norm": 0.3734284694232727, "learning_rate": 7.983399358611582e-06, "loss": 1.5449, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 71 }, { "epoch": 0.23980016652789343, "grad_norm": 0.38168285139161445, "learning_rate": 7.981859410430838e-06, "loss": 1.4972, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 72 }, { "epoch": 0.24313072439633637, "grad_norm": 0.4166060644404285, "learning_rate": 7.98031421540791e-06, "loss": 1.5273, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 73 }, { "epoch": 0.24646128226477934, "grad_norm": 0.3721773268353121, "learning_rate": 7.978763746681835e-06, "loss": 1.5459, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 74 }, { "epoch": 0.2497918401332223, "grad_norm": 0.3785109036596187, "learning_rate": 7.977207977207977e-06, "loss": 1.5221, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 75 }, { "epoch": 0.2531223980016653, "grad_norm": 0.3798544993330551, "learning_rate": 7.975646879756469e-06, "loss": 1.447, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 76 }, { "epoch": 0.25645295587010825, "grad_norm": 0.3676458399839075, "learning_rate": 7.974080426910615e-06, "loss": 1.552, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 77 }, { "epoch": 0.2597835137385512, "grad_norm": 0.3906392619486636, "learning_rate": 7.972508591065292e-06, "loss": 1.5524, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 78 }, { "epoch": 0.2631140716069942, "grad_norm": 0.4086059406499793, "learning_rate": 7.97093134442532e-06, "loss": 1.5347, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 79 }, { "epoch": 0.2664446294754371, "grad_norm": 0.37866878925235237, "learning_rate": 7.969348659003832e-06, "loss": 1.4263, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 80 }, { "epoch": 0.2697751873438801, "grad_norm": 0.36630829174020924, "learning_rate": 7.96776050662061e-06, "loss": 1.4882, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 81 }, { "epoch": 0.27310574521232306, "grad_norm": 0.3572953266857883, "learning_rate": 7.966166858900421e-06, "loss": 1.4996, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 82 }, { "epoch": 0.27643630308076605, "grad_norm": 0.37034991529495037, "learning_rate": 7.964567687271327e-06, "loss": 1.4558, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 83 }, { "epoch": 0.279766860949209, "grad_norm": 0.39454254411893813, "learning_rate": 7.962962962962963e-06, "loss": 1.481, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 84 }, { "epoch": 0.28309741881765194, "grad_norm": 0.36598142148212737, "learning_rate": 7.961352657004831e-06, "loss": 1.4647, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 85 }, { "epoch": 0.28642797668609493, "grad_norm": 0.40131025635004997, "learning_rate": 7.959736740224545e-06, "loss": 1.486, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 86 }, { "epoch": 0.28975853455453787, "grad_norm": 0.36813808559204136, "learning_rate": 7.958115183246073e-06, "loss": 1.5104, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 87 }, { "epoch": 0.29308909242298087, "grad_norm": 0.4399054897841581, "learning_rate": 7.956487956487956e-06, "loss": 1.5511, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 88 }, { "epoch": 0.2964196502914238, "grad_norm": 0.4137480663423791, "learning_rate": 7.95485503016151e-06, "loss": 1.5431, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 89 }, { "epoch": 0.2997502081598668, "grad_norm": 0.39082659570701933, "learning_rate": 7.953216374269006e-06, "loss": 1.5094, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 90 }, { "epoch": 0.30308076602830974, "grad_norm": 0.4222547479458042, "learning_rate": 7.951571958601836e-06, "loss": 1.528, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 91 }, { "epoch": 0.3064113238967527, "grad_norm": 0.4565848989524497, "learning_rate": 7.949921752738653e-06, "loss": 1.4345, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 92 }, { "epoch": 0.3097418817651957, "grad_norm": 0.3909465393349193, "learning_rate": 7.948265726043504e-06, "loss": 1.4885, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 93 }, { "epoch": 0.3130724396336386, "grad_norm": 0.40399439020361494, "learning_rate": 7.946603847663918e-06, "loss": 1.4836, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 94 }, { "epoch": 0.3164029975020816, "grad_norm": 0.3940685084379771, "learning_rate": 7.944936086529007e-06, "loss": 1.4894, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 95 }, { "epoch": 0.31973355537052456, "grad_norm": 0.36455393248573603, "learning_rate": 7.943262411347517e-06, "loss": 1.4765, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 96 }, { "epoch": 0.32306411323896755, "grad_norm": 0.42216219555871026, "learning_rate": 7.94158279060588e-06, "loss": 1.505, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 97 }, { "epoch": 0.3263946711074105, "grad_norm": 0.3833612688097333, "learning_rate": 7.93989719256623e-06, "loss": 1.4803, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 98 }, { "epoch": 0.32972522897585343, "grad_norm": 0.3793312412105176, "learning_rate": 7.938205585264408e-06, "loss": 1.4721, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 99 }, { "epoch": 0.33305578684429643, "grad_norm": 0.6231405275420779, "learning_rate": 7.936507936507936e-06, "loss": 1.4941, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 100 }, { "epoch": 0.33638634471273937, "grad_norm": 0.39916108511305454, "learning_rate": 7.934804213873981e-06, "loss": 1.5113, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 101 }, { "epoch": 0.33971690258118237, "grad_norm": 0.39832888981715536, "learning_rate": 7.933094384707288e-06, "loss": 1.4616, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 102 }, { "epoch": 0.3430474604496253, "grad_norm": 0.35554379353616694, "learning_rate": 7.931378416118093e-06, "loss": 1.4754, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 103 }, { "epoch": 0.3463780183180683, "grad_norm": 0.3778786204869107, "learning_rate": 7.929656274980016e-06, "loss": 1.5204, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 104 }, { "epoch": 0.34970857618651124, "grad_norm": 0.3979509981477904, "learning_rate": 7.927927927927927e-06, "loss": 1.4972, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 105 }, { "epoch": 0.3530391340549542, "grad_norm": 0.3829152377900939, "learning_rate": 7.926193341355797e-06, "loss": 1.4852, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 106 }, { "epoch": 0.3563696919233972, "grad_norm": 0.3783230292732417, "learning_rate": 7.924452481414507e-06, "loss": 1.4605, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 107 }, { "epoch": 0.3597002497918401, "grad_norm": 0.3702225917786687, "learning_rate": 7.922705314009662e-06, "loss": 1.4751, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 108 }, { "epoch": 0.3630308076602831, "grad_norm": 0.423076463648796, "learning_rate": 7.920951804799353e-06, "loss": 1.5043, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 109 }, { "epoch": 0.36636136552872606, "grad_norm": 0.4015775298544568, "learning_rate": 7.919191919191919e-06, "loss": 1.4993, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 110 }, { "epoch": 0.36969192339716905, "grad_norm": 0.395772531232646, "learning_rate": 7.917425622343655e-06, "loss": 1.5074, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 111 }, { "epoch": 0.373022481265612, "grad_norm": 0.4063035915678222, "learning_rate": 7.915652879156528e-06, "loss": 1.5005, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 112 }, { "epoch": 0.37635303913405493, "grad_norm": 0.3749049780160411, "learning_rate": 7.913873654275848e-06, "loss": 1.5016, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 113 }, { "epoch": 0.37968359700249793, "grad_norm": 0.40207184709524446, "learning_rate": 7.912087912087911e-06, "loss": 1.5112, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 114 }, { "epoch": 0.38301415487094087, "grad_norm": 0.3761596500147066, "learning_rate": 7.910295616717634e-06, "loss": 1.4226, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 115 }, { "epoch": 0.38634471273938387, "grad_norm": 0.34919530357614503, "learning_rate": 7.908496732026144e-06, "loss": 1.454, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 116 }, { "epoch": 0.3896752706078268, "grad_norm": 0.3783249892281946, "learning_rate": 7.906691221608348e-06, "loss": 1.3926, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 117 }, { "epoch": 0.3930058284762698, "grad_norm": 0.38789047851939196, "learning_rate": 7.904879048790487e-06, "loss": 1.5148, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 118 }, { "epoch": 0.39633638634471274, "grad_norm": 0.38028310552438055, "learning_rate": 7.903060176627645e-06, "loss": 1.5512, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 119 }, { "epoch": 0.3996669442131557, "grad_norm": 0.3557857851285413, "learning_rate": 7.901234567901235e-06, "loss": 1.5145, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 120 }, { "epoch": 0.4029975020815987, "grad_norm": 0.3648126505851961, "learning_rate": 7.89940218511647e-06, "loss": 1.4616, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 121 }, { "epoch": 0.4063280599500416, "grad_norm": 0.3518641114757544, "learning_rate": 7.897562990499793e-06, "loss": 1.4444, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 122 }, { "epoch": 0.4096586178184846, "grad_norm": 0.3812409352914946, "learning_rate": 7.895716945996276e-06, "loss": 1.4524, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 123 }, { "epoch": 0.41298917568692756, "grad_norm": 0.37136499335096407, "learning_rate": 7.893864013266998e-06, "loss": 1.4495, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 124 }, { "epoch": 0.4163197335553705, "grad_norm": 0.36965696298407785, "learning_rate": 7.892004153686396e-06, "loss": 1.454, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 125 }, { "epoch": 0.4196502914238135, "grad_norm": 0.37625883797439813, "learning_rate": 7.890137328339575e-06, "loss": 1.4738, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 126 }, { "epoch": 0.42298084929225643, "grad_norm": 0.3891305395409707, "learning_rate": 7.888263498019595e-06, "loss": 1.4336, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 127 }, { "epoch": 0.42631140716069943, "grad_norm": 0.33836499033668194, "learning_rate": 7.886382623224727e-06, "loss": 1.4435, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 128 }, { "epoch": 0.42964196502914237, "grad_norm": 0.39084535016086686, "learning_rate": 7.88449466415568e-06, "loss": 1.4598, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 129 }, { "epoch": 0.43297252289758537, "grad_norm": 0.3896756879145717, "learning_rate": 7.882599580712787e-06, "loss": 1.5065, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 130 }, { "epoch": 0.4363030807660283, "grad_norm": 0.4252827004151611, "learning_rate": 7.880697332493174e-06, "loss": 1.4083, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 131 }, { "epoch": 0.43963363863447125, "grad_norm": 0.3608365697753635, "learning_rate": 7.878787878787878e-06, "loss": 1.441, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 132 }, { "epoch": 0.44296419650291424, "grad_norm": 0.403123415092978, "learning_rate": 7.876871178578958e-06, "loss": 1.4627, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 133 }, { "epoch": 0.4462947543713572, "grad_norm": 0.40013457143727, "learning_rate": 7.874947190536545e-06, "loss": 1.4955, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 134 }, { "epoch": 0.4496253122398002, "grad_norm": 0.3883976625001682, "learning_rate": 7.873015873015873e-06, "loss": 1.4298, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 135 }, { "epoch": 0.4529558701082431, "grad_norm": 0.47893723454397114, "learning_rate": 7.871077184054283e-06, "loss": 1.4706, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 136 }, { "epoch": 0.4562864279766861, "grad_norm": 0.3939594731251799, "learning_rate": 7.869131081368174e-06, "loss": 1.4659, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 137 }, { "epoch": 0.45961698584512906, "grad_norm": 0.39872483940488357, "learning_rate": 7.867177522349935e-06, "loss": 1.4428, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 138 }, { "epoch": 0.462947543713572, "grad_norm": 0.41681968734219343, "learning_rate": 7.865216464064831e-06, "loss": 1.5116, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 139 }, { "epoch": 0.466278101582015, "grad_norm": 0.3950334535334994, "learning_rate": 7.863247863247863e-06, "loss": 1.4453, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 140 }, { "epoch": 0.46960865945045793, "grad_norm": 0.3569883912128034, "learning_rate": 7.861271676300578e-06, "loss": 1.462, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 141 }, { "epoch": 0.47293921731890093, "grad_norm": 0.3784473417547298, "learning_rate": 7.85928785928786e-06, "loss": 1.4961, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 142 }, { "epoch": 0.47626977518734387, "grad_norm": 0.35459480974078084, "learning_rate": 7.857296367934665e-06, "loss": 1.5362, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 143 }, { "epoch": 0.47960033305578686, "grad_norm": 0.3662426670901604, "learning_rate": 7.85529715762274e-06, "loss": 1.3832, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 144 }, { "epoch": 0.4829308909242298, "grad_norm": 0.4066610425979986, "learning_rate": 7.85329018338727e-06, "loss": 1.4641, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 145 }, { "epoch": 0.48626144879267275, "grad_norm": 0.3545713986492447, "learning_rate": 7.851275399913532e-06, "loss": 1.4675, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 146 }, { "epoch": 0.48959200666111574, "grad_norm": 0.3664688735051096, "learning_rate": 7.849252761533463e-06, "loss": 1.4683, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 147 }, { "epoch": 0.4929225645295587, "grad_norm": 0.3733605661751341, "learning_rate": 7.847222222222221e-06, "loss": 1.4315, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 148 }, { "epoch": 0.4962531223980017, "grad_norm": 0.3380374141462393, "learning_rate": 7.845183735594695e-06, "loss": 1.4401, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 149 }, { "epoch": 0.4995836802664446, "grad_norm": 0.407518229964574, "learning_rate": 7.84313725490196e-06, "loss": 1.4437, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 150 }, { "epoch": 0.5029142381348876, "grad_norm": 0.36142186690399497, "learning_rate": 7.841082733027723e-06, "loss": 1.4444, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 151 }, { "epoch": 0.5062447960033306, "grad_norm": 0.35245555484230673, "learning_rate": 7.839020122484688e-06, "loss": 1.4013, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 152 }, { "epoch": 0.5095753538717736, "grad_norm": 0.3751518274944043, "learning_rate": 7.836949375410913e-06, "loss": 1.4325, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 153 }, { "epoch": 0.5129059117402165, "grad_norm": 0.364299926744196, "learning_rate": 7.834870443566096e-06, "loss": 1.4757, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 154 }, { "epoch": 0.5162364696086594, "grad_norm": 0.352709296353453, "learning_rate": 7.832783278327833e-06, "loss": 1.4405, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 155 }, { "epoch": 0.5195670274771024, "grad_norm": 0.3595882748979197, "learning_rate": 7.830687830687831e-06, "loss": 1.5005, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 156 }, { "epoch": 0.5228975853455454, "grad_norm": 0.3663062717533196, "learning_rate": 7.828584051248068e-06, "loss": 1.4916, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 157 }, { "epoch": 0.5262281432139884, "grad_norm": 0.39230898190550817, "learning_rate": 7.82647189021691e-06, "loss": 1.5925, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 158 }, { "epoch": 0.5295587010824313, "grad_norm": 0.34764213510621217, "learning_rate": 7.824351297405189e-06, "loss": 1.533, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 159 }, { "epoch": 0.5328892589508742, "grad_norm": 0.4356036173061448, "learning_rate": 7.822222222222222e-06, "loss": 1.4768, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 160 }, { "epoch": 0.5362198168193172, "grad_norm": 0.3650633676087402, "learning_rate": 7.820084613671788e-06, "loss": 1.4834, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 161 }, { "epoch": 0.5395503746877602, "grad_norm": 0.36003662026404476, "learning_rate": 7.81793842034806e-06, "loss": 1.4745, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 162 }, { "epoch": 0.5428809325562032, "grad_norm": 0.45089712637002705, "learning_rate": 7.815783590431477e-06, "loss": 1.4762, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 163 }, { "epoch": 0.5462114904246461, "grad_norm": 0.3727325869359898, "learning_rate": 7.813620071684589e-06, "loss": 1.4605, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 164 }, { "epoch": 0.5495420482930891, "grad_norm": 0.3396845072209277, "learning_rate": 7.81144781144781e-06, "loss": 1.4793, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 165 }, { "epoch": 0.5528726061615321, "grad_norm": 0.35005093334327886, "learning_rate": 7.809266756635177e-06, "loss": 1.4699, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 166 }, { "epoch": 0.556203164029975, "grad_norm": 0.3836826797224187, "learning_rate": 7.807076853729998e-06, "loss": 1.4727, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 167 }, { "epoch": 0.559533721898418, "grad_norm": 0.3866747204941054, "learning_rate": 7.804878048780487e-06, "loss": 1.4656, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 168 }, { "epoch": 0.5628642797668609, "grad_norm": 0.3754060351606817, "learning_rate": 7.802670287395338e-06, "loss": 1.4427, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 169 }, { "epoch": 0.5661948376353039, "grad_norm": 0.3560392764595894, "learning_rate": 7.80045351473923e-06, "loss": 1.469, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 170 }, { "epoch": 0.5695253955037469, "grad_norm": 0.38308734497417124, "learning_rate": 7.79822767552829e-06, "loss": 1.5086, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 171 }, { "epoch": 0.5728559533721899, "grad_norm": 0.359072776955667, "learning_rate": 7.7959927140255e-06, "loss": 1.4531, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 172 }, { "epoch": 0.5761865112406328, "grad_norm": 0.3922686356985507, "learning_rate": 7.793748574036049e-06, "loss": 1.5004, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 173 }, { "epoch": 0.5795170691090757, "grad_norm": 0.38139432931110967, "learning_rate": 7.791495198902606e-06, "loss": 1.4596, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 174 }, { "epoch": 0.5828476269775187, "grad_norm": 0.386725308323352, "learning_rate": 7.789232531500573e-06, "loss": 1.4107, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 175 }, { "epoch": 0.5861781848459617, "grad_norm": 0.3590860738790805, "learning_rate": 7.786960514233242e-06, "loss": 1.4748, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 176 }, { "epoch": 0.5895087427144047, "grad_norm": 0.38618438471328675, "learning_rate": 7.784679089026915e-06, "loss": 1.481, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 177 }, { "epoch": 0.5928393005828476, "grad_norm": 0.35271293932202913, "learning_rate": 7.782388197325957e-06, "loss": 1.4445, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 178 }, { "epoch": 0.5961698584512906, "grad_norm": 0.3587339394337467, "learning_rate": 7.78008778008778e-06, "loss": 1.482, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 179 }, { "epoch": 0.5995004163197336, "grad_norm": 0.4051854093619042, "learning_rate": 7.777777777777777e-06, "loss": 1.4538, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 180 }, { "epoch": 0.6028309741881765, "grad_norm": 0.3624830177676393, "learning_rate": 7.775458130364185e-06, "loss": 1.3882, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 181 }, { "epoch": 0.6061615320566195, "grad_norm": 0.40327439887058536, "learning_rate": 7.773128777312878e-06, "loss": 1.4439, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 182 }, { "epoch": 0.6094920899250624, "grad_norm": 0.3849485884337272, "learning_rate": 7.77078965758211e-06, "loss": 1.4598, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 183 }, { "epoch": 0.6128226477935054, "grad_norm": 0.3800546336095655, "learning_rate": 7.76844070961718e-06, "loss": 1.5077, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 184 }, { "epoch": 0.6161532056619484, "grad_norm": 0.4058514640829756, "learning_rate": 7.766081871345029e-06, "loss": 1.4557, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 185 }, { "epoch": 0.6194837635303914, "grad_norm": 0.3547116281420189, "learning_rate": 7.763713080168775e-06, "loss": 1.4465, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 186 }, { "epoch": 0.6228143213988343, "grad_norm": 0.36935786461716674, "learning_rate": 7.76133427296218e-06, "loss": 1.3674, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 187 }, { "epoch": 0.6261448792672772, "grad_norm": 0.3465066682351456, "learning_rate": 7.75894538606403e-06, "loss": 1.5018, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 188 }, { "epoch": 0.6294754371357202, "grad_norm": 0.37821929789921876, "learning_rate": 7.75654635527247e-06, "loss": 1.46, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 189 }, { "epoch": 0.6328059950041632, "grad_norm": 0.42147975033129337, "learning_rate": 7.754137115839244e-06, "loss": 1.4324, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 190 }, { "epoch": 0.6361365528726062, "grad_norm": 0.395112631651776, "learning_rate": 7.751717602463872e-06, "loss": 1.4682, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 191 }, { "epoch": 0.6394671107410491, "grad_norm": 0.3866087697502269, "learning_rate": 7.749287749287749e-06, "loss": 1.4845, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 192 }, { "epoch": 0.6427976686094921, "grad_norm": 0.39380953384339784, "learning_rate": 7.746847489888173e-06, "loss": 1.4628, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 193 }, { "epoch": 0.6461282264779351, "grad_norm": 0.38499086799547566, "learning_rate": 7.744396757272294e-06, "loss": 1.4485, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 194 }, { "epoch": 0.649458784346378, "grad_norm": 0.3628021970554608, "learning_rate": 7.741935483870966e-06, "loss": 1.4306, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 195 }, { "epoch": 0.652789342214821, "grad_norm": 0.37886204626432507, "learning_rate": 7.739463601532567e-06, "loss": 1.4178, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 196 }, { "epoch": 0.6561199000832639, "grad_norm": 0.36347566586862995, "learning_rate": 7.736981041516678e-06, "loss": 1.3917, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 197 }, { "epoch": 0.6594504579517069, "grad_norm": 0.3808525608826558, "learning_rate": 7.734487734487733e-06, "loss": 1.425, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 198 }, { "epoch": 0.6627810158201499, "grad_norm": 0.36703672958616185, "learning_rate": 7.731983610508556e-06, "loss": 1.3963, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 199 }, { "epoch": 0.6661115736885929, "grad_norm": 0.3449284331155099, "learning_rate": 7.729468599033817e-06, "loss": 1.5389, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 200 }, { "epoch": 0.6694421315570358, "grad_norm": 0.38098871722055255, "learning_rate": 7.726942628903412e-06, "loss": 1.4354, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 201 }, { "epoch": 0.6727726894254787, "grad_norm": 0.37447535098026113, "learning_rate": 7.72440562833576e-06, "loss": 1.4238, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 202 }, { "epoch": 0.6761032472939217, "grad_norm": 0.3815996192127943, "learning_rate": 7.721857524920983e-06, "loss": 1.4465, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 203 }, { "epoch": 0.6794338051623647, "grad_norm": 0.43830167523580127, "learning_rate": 7.719298245614036e-06, "loss": 1.4464, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 204 }, { "epoch": 0.6827643630308077, "grad_norm": 0.49374391843463344, "learning_rate": 7.716727716727717e-06, "loss": 1.4326, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 205 }, { "epoch": 0.6860949208992506, "grad_norm": 0.40611516537871767, "learning_rate": 7.714145863925599e-06, "loss": 1.4867, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 206 }, { "epoch": 0.6894254787676936, "grad_norm": 0.39306412548059455, "learning_rate": 7.711552612214863e-06, "loss": 1.4879, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 207 }, { "epoch": 0.6927560366361366, "grad_norm": 0.3732547746311456, "learning_rate": 7.708947885939036e-06, "loss": 1.5305, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 208 }, { "epoch": 0.6960865945045795, "grad_norm": 0.3749992070235647, "learning_rate": 7.706331608770632e-06, "loss": 1.4422, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 209 }, { "epoch": 0.6994171523730225, "grad_norm": 0.4236632648954227, "learning_rate": 7.703703703703702e-06, "loss": 1.4362, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 210 }, { "epoch": 0.7027477102414654, "grad_norm": 0.3799687473741569, "learning_rate": 7.701064093046274e-06, "loss": 1.512, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 211 }, { "epoch": 0.7060782681099084, "grad_norm": 0.3724271784543797, "learning_rate": 7.698412698412699e-06, "loss": 1.469, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 212 }, { "epoch": 0.7094088259783514, "grad_norm": 0.364477503994216, "learning_rate": 7.695749440715883e-06, "loss": 1.4811, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 213 }, { "epoch": 0.7127393838467944, "grad_norm": 0.3925520005032744, "learning_rate": 7.693074240159441e-06, "loss": 1.5027, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 214 }, { "epoch": 0.7160699417152373, "grad_norm": 0.40921223587397654, "learning_rate": 7.690387016229713e-06, "loss": 1.488, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 215 }, { "epoch": 0.7194004995836802, "grad_norm": 0.3981162315328969, "learning_rate": 7.687687687687688e-06, "loss": 1.4343, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 216 }, { "epoch": 0.7227310574521232, "grad_norm": 0.35388766488814566, "learning_rate": 7.684976172560823e-06, "loss": 1.4599, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 217 }, { "epoch": 0.7260616153205662, "grad_norm": 0.3449802535833205, "learning_rate": 7.682252388134742e-06, "loss": 1.442, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 218 }, { "epoch": 0.7293921731890092, "grad_norm": 0.34627676487411824, "learning_rate": 7.679516250944822e-06, "loss": 1.4461, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 219 }, { "epoch": 0.7327227310574521, "grad_norm": 0.35799089084524466, "learning_rate": 7.676767676767677e-06, "loss": 1.4731, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 220 }, { "epoch": 0.736053288925895, "grad_norm": 0.3820520257947768, "learning_rate": 7.674006580612503e-06, "loss": 1.4566, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 221 }, { "epoch": 0.7393838467943381, "grad_norm": 0.3641120307221186, "learning_rate": 7.671232876712327e-06, "loss": 1.4525, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 222 }, { "epoch": 0.742714404662781, "grad_norm": 0.37136269720782134, "learning_rate": 7.668446478515128e-06, "loss": 1.4548, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 223 }, { "epoch": 0.746044962531224, "grad_norm": 0.4138383130083843, "learning_rate": 7.665647298674822e-06, "loss": 1.5395, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 224 }, { "epoch": 0.7493755203996669, "grad_norm": 0.37512729325167443, "learning_rate": 7.662835249042145e-06, "loss": 1.4348, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 225 }, { "epoch": 0.7527060782681099, "grad_norm": 0.3574220209010036, "learning_rate": 7.660010240655401e-06, "loss": 1.4205, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 226 }, { "epoch": 0.7560366361365529, "grad_norm": 0.3509015504877034, "learning_rate": 7.657172183731076e-06, "loss": 1.4074, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 227 }, { "epoch": 0.7593671940049959, "grad_norm": 0.4191818637620366, "learning_rate": 7.654320987654322e-06, "loss": 1.434, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 228 }, { "epoch": 0.7626977518734388, "grad_norm": 0.38073125720358314, "learning_rate": 7.651456560969322e-06, "loss": 1.4315, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 229 }, { "epoch": 0.7660283097418817, "grad_norm": 0.3489534004367162, "learning_rate": 7.648578811369509e-06, "loss": 1.4292, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 230 }, { "epoch": 0.7693588676103247, "grad_norm": 0.39880199669766575, "learning_rate": 7.645687645687645e-06, "loss": 1.4797, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 231 }, { "epoch": 0.7726894254787677, "grad_norm": 0.3377554646810836, "learning_rate": 7.642782969885774e-06, "loss": 1.3638, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 232 }, { "epoch": 0.7760199833472107, "grad_norm": 0.45577113603344144, "learning_rate": 7.639864689045015e-06, "loss": 1.5272, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 233 }, { "epoch": 0.7793505412156536, "grad_norm": 0.3872639106321951, "learning_rate": 7.636932707355241e-06, "loss": 1.5223, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 234 }, { "epoch": 0.7826810990840966, "grad_norm": 0.41241615465906434, "learning_rate": 7.633986928104575e-06, "loss": 1.4047, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 235 }, { "epoch": 0.7860116569525396, "grad_norm": 0.350902547985464, "learning_rate": 7.631027253668762e-06, "loss": 1.4599, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 236 }, { "epoch": 0.7893422148209825, "grad_norm": 0.36780129033305325, "learning_rate": 7.6280535855003936e-06, "loss": 1.4872, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 237 }, { "epoch": 0.7926727726894255, "grad_norm": 0.3504301681190647, "learning_rate": 7.625065824117956e-06, "loss": 1.4508, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 238 }, { "epoch": 0.7960033305578684, "grad_norm": 0.425786005279154, "learning_rate": 7.622063869094748e-06, "loss": 1.5359, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 239 }, { "epoch": 0.7993338884263114, "grad_norm": 0.3423914333711706, "learning_rate": 7.619047619047619e-06, "loss": 1.5116, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 240 }, { "epoch": 0.8026644462947544, "grad_norm": 0.39752748882813016, "learning_rate": 7.616016971625564e-06, "loss": 1.3967, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 241 }, { "epoch": 0.8059950041631974, "grad_norm": 0.35349720101513005, "learning_rate": 7.61297182349814e-06, "loss": 1.428, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 242 }, { "epoch": 0.8093255620316403, "grad_norm": 0.3592529486243108, "learning_rate": 7.609912070343725e-06, "loss": 1.4716, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 243 }, { "epoch": 0.8126561199000832, "grad_norm": 0.41007914987868593, "learning_rate": 7.606837606837607e-06, "loss": 1.4601, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 244 }, { "epoch": 0.8159866777685262, "grad_norm": 0.4368820717106569, "learning_rate": 7.603748326639893e-06, "loss": 1.4299, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 245 }, { "epoch": 0.8193172356369692, "grad_norm": 0.34781376516299506, "learning_rate": 7.600644122383253e-06, "loss": 1.3832, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 246 }, { "epoch": 0.8226477935054122, "grad_norm": 0.4378928638690168, "learning_rate": 7.597524885660478e-06, "loss": 1.5006, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 247 }, { "epoch": 0.8259783513738551, "grad_norm": 0.38866511125189074, "learning_rate": 7.594390507011865e-06, "loss": 1.3808, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 248 }, { "epoch": 0.829308909242298, "grad_norm": 0.3796151796802332, "learning_rate": 7.591240875912408e-06, "loss": 1.4048, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 249 }, { "epoch": 0.832639467110741, "grad_norm": 0.47512939093169254, "learning_rate": 7.588075880758807e-06, "loss": 1.4533, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 250 }, { "epoch": 0.835970024979184, "grad_norm": 0.4002177494781384, "learning_rate": 7.584895408856289e-06, "loss": 1.4364, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 251 }, { "epoch": 0.839300582847627, "grad_norm": 0.41628446885968545, "learning_rate": 7.581699346405228e-06, "loss": 1.5213, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 252 }, { "epoch": 0.8426311407160699, "grad_norm": 0.41586597700526384, "learning_rate": 7.578487578487578e-06, "loss": 1.4163, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 253 }, { "epoch": 0.8459616985845129, "grad_norm": 0.37080164887555395, "learning_rate": 7.575259989053093e-06, "loss": 1.4262, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 254 }, { "epoch": 0.8492922564529559, "grad_norm": 0.44276862899193814, "learning_rate": 7.57201646090535e-06, "loss": 1.4434, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 255 }, { "epoch": 0.8526228143213989, "grad_norm": 0.3565514945143501, "learning_rate": 7.568756875687569e-06, "loss": 1.4628, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 256 }, { "epoch": 0.8559533721898418, "grad_norm": 0.3424453222650746, "learning_rate": 7.565481113868211e-06, "loss": 1.4397, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 257 }, { "epoch": 0.8592839300582847, "grad_norm": 0.36361177745212486, "learning_rate": 7.562189054726368e-06, "loss": 1.434, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 258 }, { "epoch": 0.8626144879267277, "grad_norm": 0.39961369778575284, "learning_rate": 7.558880576336936e-06, "loss": 1.3263, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 259 }, { "epoch": 0.8659450457951707, "grad_norm": 0.3694683835624918, "learning_rate": 7.555555555555556e-06, "loss": 1.465, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 260 }, { "epoch": 0.8692756036636137, "grad_norm": 0.38926907075141, "learning_rate": 7.552213868003341e-06, "loss": 1.4639, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 261 }, { "epoch": 0.8726061615320566, "grad_norm": 0.41002402289266, "learning_rate": 7.548855388051367e-06, "loss": 1.4583, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 262 }, { "epoch": 0.8759367194004996, "grad_norm": 0.39476689396263037, "learning_rate": 7.545479988804925e-06, "loss": 1.5369, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 263 }, { "epoch": 0.8792672772689425, "grad_norm": 0.42338165790994337, "learning_rate": 7.542087542087541e-06, "loss": 1.4149, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 264 }, { "epoch": 0.8825978351373855, "grad_norm": 0.37580056414171503, "learning_rate": 7.538677918424753e-06, "loss": 1.4767, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 265 }, { "epoch": 0.8859283930058285, "grad_norm": 0.35524395112624974, "learning_rate": 7.535250987027637e-06, "loss": 1.4565, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 266 }, { "epoch": 0.8892589508742714, "grad_norm": 0.333001020301385, "learning_rate": 7.531806615776081e-06, "loss": 1.4653, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 267 }, { "epoch": 0.8925895087427144, "grad_norm": 0.325740334034441, "learning_rate": 7.5283446712018136e-06, "loss": 1.4583, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 268 }, { "epoch": 0.8959200666111574, "grad_norm": 0.3579186582787629, "learning_rate": 7.524865018471157e-06, "loss": 1.441, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 269 }, { "epoch": 0.8992506244796004, "grad_norm": 0.38100337783570354, "learning_rate": 7.521367521367521e-06, "loss": 1.4725, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 270 }, { "epoch": 0.9025811823480433, "grad_norm": 0.4439224251441086, "learning_rate": 7.5178520422736365e-06, "loss": 1.4433, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 271 }, { "epoch": 0.9059117402164862, "grad_norm": 0.36404138775247186, "learning_rate": 7.514318442153494e-06, "loss": 1.4502, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 272 }, { "epoch": 0.9092422980849292, "grad_norm": 0.3739016590981095, "learning_rate": 7.5107665805340226e-06, "loss": 1.5614, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 273 }, { "epoch": 0.9125728559533722, "grad_norm": 0.34618213523089303, "learning_rate": 7.5071963154864715e-06, "loss": 1.4818, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 274 }, { "epoch": 0.9159034138218152, "grad_norm": 0.3516565079552471, "learning_rate": 7.5036075036075024e-06, "loss": 1.4811, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 275 }, { "epoch": 0.9192339716902581, "grad_norm": 0.372847709765313, "learning_rate": 7.499999999999999e-06, "loss": 1.4314, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 276 }, { "epoch": 0.922564529558701, "grad_norm": 0.33633722585110437, "learning_rate": 7.496373658253553e-06, "loss": 1.467, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 277 }, { "epoch": 0.925895087427144, "grad_norm": 0.3502677047499933, "learning_rate": 7.4927283304246645e-06, "loss": 1.465, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 278 }, { "epoch": 0.929225645295587, "grad_norm": 0.3301559549021256, "learning_rate": 7.4890638670166225e-06, "loss": 1.4345, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 279 }, { "epoch": 0.93255620316403, "grad_norm": 0.3995138440783666, "learning_rate": 7.485380116959064e-06, "loss": 1.3383, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 280 }, { "epoch": 0.9358867610324729, "grad_norm": 0.3871081436839965, "learning_rate": 7.481676927587217e-06, "loss": 1.4796, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 281 }, { "epoch": 0.9392173189009159, "grad_norm": 0.4376413636164512, "learning_rate": 7.4779541446208115e-06, "loss": 1.4644, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 282 }, { "epoch": 0.9425478767693589, "grad_norm": 0.43206377875237645, "learning_rate": 7.474211612142647e-06, "loss": 1.4107, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 283 }, { "epoch": 0.9458784346378019, "grad_norm": 0.4025303715871277, "learning_rate": 7.470449172576832e-06, "loss": 1.4318, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 284 }, { "epoch": 0.9492089925062448, "grad_norm": 0.37724819780360036, "learning_rate": 7.466666666666667e-06, "loss": 1.4454, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 285 }, { "epoch": 0.9525395503746877, "grad_norm": 0.35328542805788227, "learning_rate": 7.462863933452169e-06, "loss": 1.4175, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 286 }, { "epoch": 0.9558701082431307, "grad_norm": 0.3788093515621439, "learning_rate": 7.459040810247245e-06, "loss": 1.4429, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 287 }, { "epoch": 0.9592006661115737, "grad_norm": 0.4018461850957888, "learning_rate": 7.455197132616486e-06, "loss": 1.4679, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 288 }, { "epoch": 0.9625312239800167, "grad_norm": 0.3792573314031364, "learning_rate": 7.451332734351601e-06, "loss": 1.5191, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 289 }, { "epoch": 0.9658617818484596, "grad_norm": 0.4173737668171256, "learning_rate": 7.447447447447447e-06, "loss": 1.4999, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 290 }, { "epoch": 0.9691923397169026, "grad_norm": 0.34073643176316165, "learning_rate": 7.443541102077687e-06, "loss": 1.3667, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 291 }, { "epoch": 0.9725228975853455, "grad_norm": 0.37255255694817807, "learning_rate": 7.439613526570048e-06, "loss": 1.4196, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 292 }, { "epoch": 0.9758534554537885, "grad_norm": 0.3751657637349412, "learning_rate": 7.435664547381168e-06, "loss": 1.4545, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 293 }, { "epoch": 0.9791840133222315, "grad_norm": 0.45200307278108437, "learning_rate": 7.431693989071039e-06, "loss": 1.4199, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 294 }, { "epoch": 0.9825145711906744, "grad_norm": 0.3889800375255201, "learning_rate": 7.427701674277017e-06, "loss": 1.4801, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 295 }, { "epoch": 0.9858451290591174, "grad_norm": 0.3473904572951369, "learning_rate": 7.4236874236874235e-06, "loss": 1.393, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 296 }, { "epoch": 0.9891756869275604, "grad_norm": 0.3581161377664693, "learning_rate": 7.419651056014692e-06, "loss": 1.4073, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 297 }, { "epoch": 0.9925062447960034, "grad_norm": 0.35620326479274533, "learning_rate": 7.415592387968079e-06, "loss": 1.43, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 298 }, { "epoch": 0.9958368026644463, "grad_norm": 0.3676482591149261, "learning_rate": 7.4115112342259155e-06, "loss": 1.4148, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 299 }, { "epoch": 0.9991673605328892, "grad_norm": 0.43615077194471996, "learning_rate": 7.407407407407408e-06, "loss": 1.4154, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 300 }, { "epoch": 1.0, "grad_norm": 0.5905255990041776, "learning_rate": 7.403280718043948e-06, "loss": 1.4231, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 301 }, { "epoch": 1.003330557868443, "grad_norm": 0.4020383385971024, "learning_rate": 7.399130974549968e-06, "loss": 1.4394, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 302 }, { "epoch": 1.0066611157368859, "grad_norm": 0.3865309278317666, "learning_rate": 7.394957983193276e-06, "loss": 1.4413, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 303 }, { "epoch": 1.009991673605329, "grad_norm": 0.40327921143010825, "learning_rate": 7.390761548064918e-06, "loss": 1.4576, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 304 }, { "epoch": 1.0133222314737718, "grad_norm": 0.4070700680157323, "learning_rate": 7.386541471048513e-06, "loss": 1.4817, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 305 }, { "epoch": 1.0166527893422148, "grad_norm": 0.43460108634631706, "learning_rate": 7.382297551789077e-06, "loss": 1.3939, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 306 }, { "epoch": 1.0199833472106579, "grad_norm": 0.42277608172713, "learning_rate": 7.378029587661315e-06, "loss": 1.4239, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 307 }, { "epoch": 1.0233139050791007, "grad_norm": 0.37583795807106635, "learning_rate": 7.373737373737374e-06, "loss": 1.4878, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 308 }, { "epoch": 1.0266444629475437, "grad_norm": 0.4013147771199415, "learning_rate": 7.3694207027540355e-06, "loss": 1.3758, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 309 }, { "epoch": 1.0299750208159866, "grad_norm": 0.38150977748656323, "learning_rate": 7.365079365079365e-06, "loss": 1.4122, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 310 }, { "epoch": 1.0333055786844296, "grad_norm": 0.36288526161353013, "learning_rate": 7.360713148678764e-06, "loss": 1.4775, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 311 }, { "epoch": 1.0366361365528727, "grad_norm": 0.5122154405495047, "learning_rate": 7.35632183908046e-06, "loss": 1.4385, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 312 }, { "epoch": 1.0399666944213155, "grad_norm": 0.4907680124574417, "learning_rate": 7.351905219340377e-06, "loss": 1.4321, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 313 }, { "epoch": 1.0432972522897586, "grad_norm": 0.3750039319171418, "learning_rate": 7.347463070006422e-06, "loss": 1.4609, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 314 }, { "epoch": 1.0466278101582014, "grad_norm": 0.415847010986813, "learning_rate": 7.342995169082125e-06, "loss": 1.399, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 315 }, { "epoch": 1.0499583680266444, "grad_norm": 0.40484373034787197, "learning_rate": 7.338501291989663e-06, "loss": 1.4082, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 316 }, { "epoch": 1.0532889258950875, "grad_norm": 0.3556695114896482, "learning_rate": 7.333981211532231e-06, "loss": 1.5045, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 317 }, { "epoch": 1.0566194837635303, "grad_norm": 0.4217178250762373, "learning_rate": 7.329434697855749e-06, "loss": 1.5051, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 318 }, { "epoch": 1.0599500416319734, "grad_norm": 0.44541116731095065, "learning_rate": 7.324861518409905e-06, "loss": 1.454, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 319 }, { "epoch": 1.0632805995004164, "grad_norm": 0.3722519430085194, "learning_rate": 7.320261437908496e-06, "loss": 1.4864, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 320 }, { "epoch": 1.0666111573688593, "grad_norm": 0.49955941789670055, "learning_rate": 7.315634218289086e-06, "loss": 1.4328, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 321 }, { "epoch": 1.0699417152373023, "grad_norm": 0.559077472675475, "learning_rate": 7.310979618671926e-06, "loss": 1.4387, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 322 }, { "epoch": 1.0732722731057451, "grad_norm": 0.38492000673298576, "learning_rate": 7.306297395318167e-06, "loss": 1.4173, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 323 }, { "epoch": 1.0766028309741882, "grad_norm": 0.46264263086480695, "learning_rate": 7.301587301587301e-06, "loss": 1.458, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 324 }, { "epoch": 1.0799333888426312, "grad_norm": 0.48393689092527553, "learning_rate": 7.296849087893865e-06, "loss": 1.4984, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 325 }, { "epoch": 1.083263946711074, "grad_norm": 0.3833552546352091, "learning_rate": 7.29208250166334e-06, "loss": 1.4801, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 326 }, { "epoch": 1.0865945045795171, "grad_norm": 0.45636346067253053, "learning_rate": 7.287287287287286e-06, "loss": 1.4335, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 327 }, { "epoch": 1.08992506244796, "grad_norm": 0.5170759134234261, "learning_rate": 7.282463186077643e-06, "loss": 1.4619, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 328 }, { "epoch": 1.093255620316403, "grad_norm": 0.41413948804668765, "learning_rate": 7.277609936220207e-06, "loss": 1.4976, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 329 }, { "epoch": 1.096586178184846, "grad_norm": 0.47675818666743885, "learning_rate": 7.272727272727272e-06, "loss": 1.5166, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 330 }, { "epoch": 1.0999167360532889, "grad_norm": 0.4446284191251516, "learning_rate": 7.267814927389396e-06, "loss": 1.3756, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 331 }, { "epoch": 1.103247293921732, "grad_norm": 0.3712448236233366, "learning_rate": 7.262872628726287e-06, "loss": 1.4177, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 332 }, { "epoch": 1.1065778517901748, "grad_norm": 0.354780077610888, "learning_rate": 7.257900101936799e-06, "loss": 1.3888, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 333 }, { "epoch": 1.1099084096586178, "grad_norm": 0.4287556354375581, "learning_rate": 7.252897068847988e-06, "loss": 1.463, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 334 }, { "epoch": 1.1132389675270609, "grad_norm": 0.43855718184558823, "learning_rate": 7.247863247863247e-06, "loss": 1.4375, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 335 }, { "epoch": 1.1165695253955037, "grad_norm": 0.36652449074881177, "learning_rate": 7.242798353909463e-06, "loss": 1.4724, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 336 }, { "epoch": 1.1199000832639467, "grad_norm": 0.41471476618444547, "learning_rate": 7.237702098383213e-06, "loss": 1.4368, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 337 }, { "epoch": 1.1232306411323896, "grad_norm": 0.3584246686612814, "learning_rate": 7.2325741890959285e-06, "loss": 1.507, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 338 }, { "epoch": 1.1265611990008326, "grad_norm": 0.35472951006324893, "learning_rate": 7.227414330218068e-06, "loss": 1.3847, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 339 }, { "epoch": 1.1298917568692757, "grad_norm": 0.40770232084467445, "learning_rate": 7.222222222222222e-06, "loss": 1.4722, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 340 }, { "epoch": 1.1332223147377185, "grad_norm": 0.3854760192656062, "learning_rate": 7.216997561825147e-06, "loss": 1.4397, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 341 }, { "epoch": 1.1365528726061616, "grad_norm": 0.3425435570180868, "learning_rate": 7.211740041928721e-06, "loss": 1.3917, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 342 }, { "epoch": 1.1398834304746046, "grad_norm": 0.3629363871231361, "learning_rate": 7.206449351559762e-06, "loss": 1.4329, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 343 }, { "epoch": 1.1432139883430474, "grad_norm": 0.3746351865474382, "learning_rate": 7.20112517580872e-06, "loss": 1.4325, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 344 }, { "epoch": 1.1465445462114905, "grad_norm": 0.35633065876642767, "learning_rate": 7.195767195767195e-06, "loss": 1.4802, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 345 }, { "epoch": 1.1498751040799333, "grad_norm": 0.41086591430313346, "learning_rate": 7.1903750884642605e-06, "loss": 1.386, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 346 }, { "epoch": 1.1532056619483764, "grad_norm": 0.4248601636564269, "learning_rate": 7.184948526801562e-06, "loss": 1.3764, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 347 }, { "epoch": 1.1565362198168194, "grad_norm": 0.3677689809276377, "learning_rate": 7.179487179487179e-06, "loss": 1.3977, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 348 }, { "epoch": 1.1598667776852623, "grad_norm": 0.4562607243713519, "learning_rate": 7.173990710968203e-06, "loss": 1.4743, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 349 }, { "epoch": 1.1631973355537053, "grad_norm": 0.36851546433374166, "learning_rate": 7.168458781362006e-06, "loss": 1.3998, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 350 }, { "epoch": 1.1665278934221481, "grad_norm": 0.3440172529786023, "learning_rate": 7.1628910463861915e-06, "loss": 1.4388, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 351 }, { "epoch": 1.1698584512905912, "grad_norm": 0.34899417175359176, "learning_rate": 7.157287157287158e-06, "loss": 1.4109, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 352 }, { "epoch": 1.1731890091590342, "grad_norm": 0.3369095274891404, "learning_rate": 7.151646760767281e-06, "loss": 1.4721, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 353 }, { "epoch": 1.176519567027477, "grad_norm": 0.3373083746918916, "learning_rate": 7.145969498910675e-06, "loss": 1.3879, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 354 }, { "epoch": 1.1798501248959201, "grad_norm": 0.3127699546260214, "learning_rate": 7.140255009107467e-06, "loss": 1.4067, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 355 }, { "epoch": 1.183180682764363, "grad_norm": 0.3455714541263257, "learning_rate": 7.1345029239766076e-06, "loss": 1.4729, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 356 }, { "epoch": 1.186511240632806, "grad_norm": 0.35459135197814406, "learning_rate": 7.128712871287129e-06, "loss": 1.4845, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 357 }, { "epoch": 1.189841798501249, "grad_norm": 0.3382392537839561, "learning_rate": 7.122884473877851e-06, "loss": 1.4796, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 358 }, { "epoch": 1.1931723563696919, "grad_norm": 0.3229617810865785, "learning_rate": 7.117017349575488e-06, "loss": 1.4258, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 359 }, { "epoch": 1.196502914238135, "grad_norm": 0.4132878845320615, "learning_rate": 7.11111111111111e-06, "loss": 1.4344, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 360 }, { "epoch": 1.1998334721065778, "grad_norm": 0.3909252234186588, "learning_rate": 7.105165366034932e-06, "loss": 1.4136, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 361 }, { "epoch": 1.2031640299750208, "grad_norm": 0.4166542946239009, "learning_rate": 7.0991797166293805e-06, "loss": 1.418, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 362 }, { "epoch": 1.2064945878434639, "grad_norm": 0.3978265092622875, "learning_rate": 7.093153759820426e-06, "loss": 1.4778, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 363 }, { "epoch": 1.2098251457119067, "grad_norm": 0.3362476483926624, "learning_rate": 7.087087087087086e-06, "loss": 1.4045, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 364 }, { "epoch": 1.2131557035803497, "grad_norm": 0.34227618124914144, "learning_rate": 7.0809792843691135e-06, "loss": 1.3691, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 365 }, { "epoch": 1.2164862614487926, "grad_norm": 0.4154500404546309, "learning_rate": 7.074829931972789e-06, "loss": 1.4296, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 366 }, { "epoch": 1.2198168193172356, "grad_norm": 0.3910386371341375, "learning_rate": 7.068638604474782e-06, "loss": 1.4128, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 367 }, { "epoch": 1.2231473771856787, "grad_norm": 0.32973016037230485, "learning_rate": 7.062404870624048e-06, "loss": 1.3952, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 368 }, { "epoch": 1.2264779350541215, "grad_norm": 0.3476414929125133, "learning_rate": 7.056128293241695e-06, "loss": 1.4132, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 369 }, { "epoch": 1.2298084929225646, "grad_norm": 0.3490075847160727, "learning_rate": 7.0498084291187725e-06, "loss": 1.5034, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 370 }, { "epoch": 1.2331390507910074, "grad_norm": 0.4213209017684047, "learning_rate": 7.043444828911956e-06, "loss": 1.4407, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 371 }, { "epoch": 1.2364696086594504, "grad_norm": 0.41156726116014214, "learning_rate": 7.037037037037037e-06, "loss": 1.4922, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 372 }, { "epoch": 1.2398001665278935, "grad_norm": 0.3274736563867899, "learning_rate": 7.0305845915602e-06, "loss": 1.4443, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 373 }, { "epoch": 1.2431307243963363, "grad_norm": 0.4016877039684572, "learning_rate": 7.024087024087023e-06, "loss": 1.4765, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 374 }, { "epoch": 1.2464612822647794, "grad_norm": 0.37926187648963133, "learning_rate": 7.017543859649123e-06, "loss": 1.4944, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 375 }, { "epoch": 1.2497918401332222, "grad_norm": 0.3995775555374175, "learning_rate": 7.0109546165884185e-06, "loss": 1.4737, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 376 }, { "epoch": 1.2531223980016652, "grad_norm": 0.4179330927454956, "learning_rate": 7.0043188064389475e-06, "loss": 1.396, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 377 }, { "epoch": 1.2564529558701083, "grad_norm": 0.4026676583822718, "learning_rate": 6.997635933806146e-06, "loss": 1.5024, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 378 }, { "epoch": 1.2597835137385511, "grad_norm": 0.3729935293489866, "learning_rate": 6.9909054962435735e-06, "loss": 1.5035, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 379 }, { "epoch": 1.2631140716069942, "grad_norm": 0.37785861617292904, "learning_rate": 6.984126984126983e-06, "loss": 1.4859, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 380 }, { "epoch": 1.266444629475437, "grad_norm": 0.34618072727066834, "learning_rate": 6.977299880525687e-06, "loss": 1.3753, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 381 }, { "epoch": 1.26977518734388, "grad_norm": 0.3603657688818211, "learning_rate": 6.970423661071143e-06, "loss": 1.4396, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 382 }, { "epoch": 1.2731057452123231, "grad_norm": 0.31695078316874364, "learning_rate": 6.963497793822704e-06, "loss": 1.4512, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 383 }, { "epoch": 1.2764363030807662, "grad_norm": 0.3998834526312468, "learning_rate": 6.956521739130433e-06, "loss": 1.4068, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 384 }, { "epoch": 1.279766860949209, "grad_norm": 0.40218592316674945, "learning_rate": 6.949494949494949e-06, "loss": 1.4314, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 385 }, { "epoch": 1.2830974188176518, "grad_norm": 0.4377216092057675, "learning_rate": 6.942416869424169e-06, "loss": 1.4159, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 386 }, { "epoch": 1.2864279766860949, "grad_norm": 0.3806613338175727, "learning_rate": 6.935286935286935e-06, "loss": 1.4383, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 387 }, { "epoch": 1.289758534554538, "grad_norm": 0.41315217581288083, "learning_rate": 6.928104575163398e-06, "loss": 1.4639, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 388 }, { "epoch": 1.293089092422981, "grad_norm": 0.4242068360276873, "learning_rate": 6.920869208692086e-06, "loss": 1.5043, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 389 }, { "epoch": 1.2964196502914238, "grad_norm": 0.40526133848179174, "learning_rate": 6.913580246913578e-06, "loss": 1.4969, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 390 }, { "epoch": 1.2997502081598669, "grad_norm": 0.4390648977103527, "learning_rate": 6.9062370921106965e-06, "loss": 1.4634, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 391 }, { "epoch": 1.3030807660283097, "grad_norm": 0.3293053257002483, "learning_rate": 6.898839137645108e-06, "loss": 1.4837, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 392 }, { "epoch": 1.3064113238967527, "grad_norm": 0.3741205703169676, "learning_rate": 6.891385767790261e-06, "loss": 1.3888, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 393 }, { "epoch": 1.3097418817651958, "grad_norm": 0.36736277922290345, "learning_rate": 6.883876357560567e-06, "loss": 1.4422, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 394 }, { "epoch": 1.3130724396336386, "grad_norm": 0.34987451065304387, "learning_rate": 6.876310272536688e-06, "loss": 1.4384, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 395 }, { "epoch": 1.3164029975020817, "grad_norm": 0.3574591374681954, "learning_rate": 6.868686868686868e-06, "loss": 1.4453, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 396 }, { "epoch": 1.3197335553705245, "grad_norm": 0.31108139602911883, "learning_rate": 6.861005492184199e-06, "loss": 1.4302, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 397 }, { "epoch": 1.3230641132389676, "grad_norm": 0.3317920901111113, "learning_rate": 6.853265479219677e-06, "loss": 1.4599, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 398 }, { "epoch": 1.3263946711074106, "grad_norm": 0.3319586529185681, "learning_rate": 6.8454661558109825e-06, "loss": 1.4349, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 399 }, { "epoch": 1.3297252289758534, "grad_norm": 0.35385561486286676, "learning_rate": 6.837606837606837e-06, "loss": 1.4262, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 400 }, { "epoch": 1.3330557868442965, "grad_norm": 0.36123629997437273, "learning_rate": 6.82968682968683e-06, "loss": 1.4475, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 401 }, { "epoch": 1.3363863447127393, "grad_norm": 0.3678679588945442, "learning_rate": 6.821705426356589e-06, "loss": 1.4662, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 402 }, { "epoch": 1.3397169025811824, "grad_norm": 0.35623277676543963, "learning_rate": 6.813661910938175e-06, "loss": 1.4157, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 403 }, { "epoch": 1.3430474604496254, "grad_norm": 0.3670378130601921, "learning_rate": 6.805555555555554e-06, "loss": 1.4289, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 404 }, { "epoch": 1.3463780183180682, "grad_norm": 0.37365353793241013, "learning_rate": 6.797385620915031e-06, "loss": 1.4758, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 405 }, { "epoch": 1.3497085761865113, "grad_norm": 0.4227767618895852, "learning_rate": 6.78915135608049e-06, "loss": 1.4522, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 406 }, { "epoch": 1.3530391340549541, "grad_norm": 0.392419028331304, "learning_rate": 6.780851998243303e-06, "loss": 1.4386, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 407 }, { "epoch": 1.3563696919233972, "grad_norm": 0.34332622194519336, "learning_rate": 6.772486772486772e-06, "loss": 1.4143, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 408 }, { "epoch": 1.3597002497918402, "grad_norm": 0.39554534288670906, "learning_rate": 6.76405489154493e-06, "loss": 1.4289, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 409 }, { "epoch": 1.363030807660283, "grad_norm": 0.3680781980427255, "learning_rate": 6.7555555555555545e-06, "loss": 1.4604, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 410 }, { "epoch": 1.3663613655287261, "grad_norm": 0.359696703224119, "learning_rate": 6.7469879518072274e-06, "loss": 1.4552, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 411 }, { "epoch": 1.369691923397169, "grad_norm": 0.43946142988468057, "learning_rate": 6.738351254480287e-06, "loss": 1.4649, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 412 }, { "epoch": 1.373022481265612, "grad_norm": 0.38158471914984216, "learning_rate": 6.729644624381466e-06, "loss": 1.4553, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 413 }, { "epoch": 1.376353039134055, "grad_norm": 0.3785264160376015, "learning_rate": 6.720867208672086e-06, "loss": 1.457, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 414 }, { "epoch": 1.3796835970024979, "grad_norm": 0.40275915279313634, "learning_rate": 6.712018140589569e-06, "loss": 1.4665, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 415 }, { "epoch": 1.383014154870941, "grad_norm": 0.37268382461278277, "learning_rate": 6.703096539162113e-06, "loss": 1.377, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 416 }, { "epoch": 1.3863447127393838, "grad_norm": 0.3362832443073036, "learning_rate": 6.694101508916324e-06, "loss": 1.4122, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 417 }, { "epoch": 1.3896752706078268, "grad_norm": 0.3428291854645596, "learning_rate": 6.6850321395775945e-06, "loss": 1.3466, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 418 }, { "epoch": 1.3930058284762699, "grad_norm": 0.38976496538071015, "learning_rate": 6.675887505763023e-06, "loss": 1.4711, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 419 }, { "epoch": 1.3963363863447127, "grad_norm": 0.4034367524201395, "learning_rate": 6.666666666666666e-06, "loss": 1.5079, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 420 }, { "epoch": 1.3996669442131557, "grad_norm": 0.38251655422807695, "learning_rate": 6.657368665736867e-06, "loss": 1.4715, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 421 }, { "epoch": 1.4029975020815986, "grad_norm": 0.3636615753904805, "learning_rate": 6.647992530345471e-06, "loss": 1.4175, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 422 }, { "epoch": 1.4063280599500416, "grad_norm": 0.3543871236347375, "learning_rate": 6.6385372714486634e-06, "loss": 1.4008, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 423 }, { "epoch": 1.4096586178184847, "grad_norm": 0.4270397824248548, "learning_rate": 6.6290018832391705e-06, "loss": 1.4082, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 424 }, { "epoch": 1.4129891756869275, "grad_norm": 0.41956585580281563, "learning_rate": 6.6193853427895966e-06, "loss": 1.4075, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 425 }, { "epoch": 1.4163197335553706, "grad_norm": 0.47761484099497725, "learning_rate": 6.60968660968661e-06, "loss": 1.4104, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 426 }, { "epoch": 1.4196502914238134, "grad_norm": 0.4078261265233408, "learning_rate": 6.599904625655699e-06, "loss": 1.4317, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 427 }, { "epoch": 1.4229808492922564, "grad_norm": 0.37691692681004796, "learning_rate": 6.590038314176245e-06, "loss": 1.391, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 428 }, { "epoch": 1.4263114071606995, "grad_norm": 0.3288508827565593, "learning_rate": 6.580086580086579e-06, "loss": 1.401, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 429 }, { "epoch": 1.4296419650291423, "grad_norm": 0.338319616372442, "learning_rate": 6.570048309178745e-06, "loss": 1.4178, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 430 }, { "epoch": 1.4329725228975854, "grad_norm": 0.414862604672987, "learning_rate": 6.559922367782628e-06, "loss": 1.4642, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 431 }, { "epoch": 1.4363030807660282, "grad_norm": 0.3949017633125201, "learning_rate": 6.54970760233918e-06, "loss": 1.3643, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 432 }, { "epoch": 1.4396336386344712, "grad_norm": 0.3872663647349424, "learning_rate": 6.53940283896231e-06, "loss": 1.3998, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 433 }, { "epoch": 1.4429641965029143, "grad_norm": 0.3778182716944692, "learning_rate": 6.529006882989183e-06, "loss": 1.421, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 434 }, { "epoch": 1.4462947543713571, "grad_norm": 0.3368637084806252, "learning_rate": 6.518518518518519e-06, "loss": 1.4562, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 435 }, { "epoch": 1.4496253122398002, "grad_norm": 0.37088068849156625, "learning_rate": 6.507936507936509e-06, "loss": 1.389, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 436 }, { "epoch": 1.452955870108243, "grad_norm": 0.4171977510324979, "learning_rate": 6.497259591429994e-06, "loss": 1.4314, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 437 }, { "epoch": 1.456286427976686, "grad_norm": 0.36493233792748947, "learning_rate": 6.486486486486486e-06, "loss": 1.4239, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 438 }, { "epoch": 1.4596169858451291, "grad_norm": 0.36409025362836434, "learning_rate": 6.475615887380592e-06, "loss": 1.4011, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 439 }, { "epoch": 1.462947543713572, "grad_norm": 0.3432633374051585, "learning_rate": 6.464646464646463e-06, "loss": 1.4706, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 440 }, { "epoch": 1.466278101582015, "grad_norm": 0.36918146681400343, "learning_rate": 6.453576864535769e-06, "loss": 1.4048, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 441 }, { "epoch": 1.4696086594504578, "grad_norm": 0.3558974109435063, "learning_rate": 6.442405708460755e-06, "loss": 1.4233, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 442 }, { "epoch": 1.4729392173189009, "grad_norm": 0.3319935101093491, "learning_rate": 6.431131592421914e-06, "loss": 1.4557, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 443 }, { "epoch": 1.476269775187344, "grad_norm": 0.3957835276431251, "learning_rate": 6.419753086419752e-06, "loss": 1.4974, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 444 }, { "epoch": 1.479600333055787, "grad_norm": 0.46743126820019115, "learning_rate": 6.408268733850127e-06, "loss": 1.3428, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 445 }, { "epoch": 1.4829308909242298, "grad_norm": 0.32072751511352704, "learning_rate": 6.396677050882658e-06, "loss": 1.4252, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 446 }, { "epoch": 1.4862614487926726, "grad_norm": 0.3691624108782593, "learning_rate": 6.384976525821596e-06, "loss": 1.4288, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 447 }, { "epoch": 1.4895920066611157, "grad_norm": 0.41832466518878647, "learning_rate": 6.373165618448636e-06, "loss": 1.4287, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 448 }, { "epoch": 1.4929225645295587, "grad_norm": 0.3728200914294547, "learning_rate": 6.361242759347024e-06, "loss": 1.391, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 449 }, { "epoch": 1.4962531223980018, "grad_norm": 0.3489172461380398, "learning_rate": 6.349206349206349e-06, "loss": 1.4012, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 450 }, { "epoch": 1.4995836802664446, "grad_norm": 0.45831242097179337, "learning_rate": 6.337054758107389e-06, "loss": 1.4062, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 451 }, { "epoch": 1.5029142381348874, "grad_norm": 0.4485083988308969, "learning_rate": 6.324786324786324e-06, "loss": 1.4077, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 452 }, { "epoch": 1.5062447960033305, "grad_norm": 0.3469124587165823, "learning_rate": 6.312399355877616e-06, "loss": 1.3635, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 453 }, { "epoch": 1.5095753538717736, "grad_norm": 0.4359834643536742, "learning_rate": 6.299892125134842e-06, "loss": 1.3951, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 454 }, { "epoch": 1.5129059117402166, "grad_norm": 0.43347338145656295, "learning_rate": 6.287262872628726e-06, "loss": 1.438, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 455 }, { "epoch": 1.5162364696086594, "grad_norm": 0.3544519721589859, "learning_rate": 6.274509803921569e-06, "loss": 1.4028, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 456 }, { "epoch": 1.5195670274771023, "grad_norm": 0.4175623558211923, "learning_rate": 6.261631089217296e-06, "loss": 1.4649, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 457 }, { "epoch": 1.5228975853455453, "grad_norm": 0.47794327593006264, "learning_rate": 6.248624862486248e-06, "loss": 1.4552, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 458 }, { "epoch": 1.5262281432139884, "grad_norm": 0.5102221497723193, "learning_rate": 6.235489220563847e-06, "loss": 1.5577, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 459 }, { "epoch": 1.5295587010824314, "grad_norm": 0.361727454686882, "learning_rate": 6.2222222222222215e-06, "loss": 1.4977, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 460 }, { "epoch": 1.5328892589508742, "grad_norm": 0.43568797487755334, "learning_rate": 6.208821887213847e-06, "loss": 1.4417, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 461 }, { "epoch": 1.536219816819317, "grad_norm": 0.39795557103291623, "learning_rate": 6.195286195286195e-06, "loss": 1.4479, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 462 }, { "epoch": 1.5395503746877601, "grad_norm": 0.3699426752838303, "learning_rate": 6.181613085166384e-06, "loss": 1.4379, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 463 }, { "epoch": 1.5428809325562032, "grad_norm": 0.5138765482501748, "learning_rate": 6.167800453514738e-06, "loss": 1.4433, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 464 }, { "epoch": 1.5462114904246462, "grad_norm": 0.5597671339637968, "learning_rate": 6.153846153846153e-06, "loss": 1.4255, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 465 }, { "epoch": 1.549542048293089, "grad_norm": 0.4443208189107028, "learning_rate": 6.1397479954180976e-06, "loss": 1.4458, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 466 }, { "epoch": 1.552872606161532, "grad_norm": 0.41782304334586917, "learning_rate": 6.125503742084053e-06, "loss": 1.4362, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 467 }, { "epoch": 1.556203164029975, "grad_norm": 0.511701451750574, "learning_rate": 6.11111111111111e-06, "loss": 1.4378, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 468 }, { "epoch": 1.559533721898418, "grad_norm": 0.4272528437058103, "learning_rate": 6.096567771960442e-06, "loss": 1.4315, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 469 }, { "epoch": 1.562864279766861, "grad_norm": 0.42099653002903337, "learning_rate": 6.0818713450292395e-06, "loss": 1.4092, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 470 }, { "epoch": 1.5661948376353039, "grad_norm": 0.4635591149261861, "learning_rate": 6.067019400352732e-06, "loss": 1.4357, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 471 }, { "epoch": 1.569525395503747, "grad_norm": 0.5318262046494987, "learning_rate": 6.052009456264775e-06, "loss": 1.4753, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 472 }, { "epoch": 1.5728559533721898, "grad_norm": 0.4098578230232083, "learning_rate": 6.036838978015449e-06, "loss": 1.4192, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 473 }, { "epoch": 1.5761865112406328, "grad_norm": 0.4563174114919455, "learning_rate": 6.021505376344085e-06, "loss": 1.4676, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 474 }, { "epoch": 1.5795170691090759, "grad_norm": 0.5270544922424331, "learning_rate": 6.006006006006005e-06, "loss": 1.4267, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 475 }, { "epoch": 1.5828476269775187, "grad_norm": 0.3910787909582668, "learning_rate": 5.990338164251208e-06, "loss": 1.3766, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 476 }, { "epoch": 1.5861781848459617, "grad_norm": 0.4736515430850208, "learning_rate": 5.974499089253187e-06, "loss": 1.4437, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 477 }, { "epoch": 1.5895087427144046, "grad_norm": 0.5430796464569592, "learning_rate": 5.958485958485957e-06, "loss": 1.4482, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 478 }, { "epoch": 1.5928393005828476, "grad_norm": 0.38226206389298173, "learning_rate": 5.942295887047268e-06, "loss": 1.412, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 479 }, { "epoch": 1.5961698584512907, "grad_norm": 0.3721223079028304, "learning_rate": 5.925925925925925e-06, "loss": 1.45, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 480 }, { "epoch": 1.5995004163197337, "grad_norm": 0.3827064109331823, "learning_rate": 5.909373060211049e-06, "loss": 1.4217, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 481 }, { "epoch": 1.6028309741881765, "grad_norm": 0.33684324932641296, "learning_rate": 5.892634207240949e-06, "loss": 1.3557, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 482 }, { "epoch": 1.6061615320566194, "grad_norm": 0.31468847211788964, "learning_rate": 5.875706214689265e-06, "loss": 1.4122, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 483 }, { "epoch": 1.6094920899250624, "grad_norm": 0.4442799216781044, "learning_rate": 5.858585858585859e-06, "loss": 1.4285, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 484 }, { "epoch": 1.6128226477935055, "grad_norm": 0.4567121702156198, "learning_rate": 5.841269841269841e-06, "loss": 1.4764, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 485 }, { "epoch": 1.6161532056619485, "grad_norm": 0.3590206566567271, "learning_rate": 5.82375478927203e-06, "loss": 1.4229, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 486 }, { "epoch": 1.6194837635303914, "grad_norm": 0.3652198930331244, "learning_rate": 5.806037251123956e-06, "loss": 1.4151, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 487 }, { "epoch": 1.6228143213988342, "grad_norm": 0.35866861963268476, "learning_rate": 5.7881136950904385e-06, "loss": 1.3369, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 488 }, { "epoch": 1.6261448792672772, "grad_norm": 0.4750936045573692, "learning_rate": 5.7699805068226105e-06, "loss": 1.4715, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 489 }, { "epoch": 1.6294754371357203, "grad_norm": 0.3613198830707804, "learning_rate": 5.7516339869281045e-06, "loss": 1.4291, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 490 }, { "epoch": 1.6328059950041633, "grad_norm": 0.43606379412430957, "learning_rate": 5.733070348454964e-06, "loss": 1.4011, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 491 }, { "epoch": 1.6361365528726062, "grad_norm": 0.35042984426925494, "learning_rate": 5.7142857142857145e-06, "loss": 1.4368, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 492 }, { "epoch": 1.639467110741049, "grad_norm": 0.31661366243629, "learning_rate": 5.695276114437791e-06, "loss": 1.4541, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 493 }, { "epoch": 1.642797668609492, "grad_norm": 0.3561358967067642, "learning_rate": 5.676037483266399e-06, "loss": 1.433, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 494 }, { "epoch": 1.646128226477935, "grad_norm": 0.3931637346563919, "learning_rate": 5.656565656565656e-06, "loss": 1.4193, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 495 }, { "epoch": 1.6494587843463782, "grad_norm": 0.48631366553960975, "learning_rate": 5.6368563685636855e-06, "loss": 1.4012, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 496 }, { "epoch": 1.652789342214821, "grad_norm": 0.41348933242163105, "learning_rate": 5.616905248807089e-06, "loss": 1.3883, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 497 }, { "epoch": 1.6561199000832638, "grad_norm": 0.3541766139316355, "learning_rate": 5.59670781893004e-06, "loss": 1.363, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 498 }, { "epoch": 1.6594504579517069, "grad_norm": 0.410383164470969, "learning_rate": 5.576259489302967e-06, "loss": 1.3955, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 499 }, { "epoch": 1.66278101582015, "grad_norm": 0.4100549908496841, "learning_rate": 5.555555555555555e-06, "loss": 1.3663, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 500 }, { "epoch": 1.666111573688593, "grad_norm": 0.4122832272958553, "learning_rate": 5.534591194968553e-06, "loss": 1.5108, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 501 }, { "epoch": 1.6694421315570358, "grad_norm": 0.33209617039282874, "learning_rate": 5.513361462728551e-06, "loss": 1.4069, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 502 }, { "epoch": 1.6727726894254786, "grad_norm": 0.34650064809899755, "learning_rate": 5.491861288039631e-06, "loss": 1.3953, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 503 }, { "epoch": 1.6761032472939217, "grad_norm": 0.3583592015376779, "learning_rate": 5.47008547008547e-06, "loss": 1.4181, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 504 }, { "epoch": 1.6794338051623647, "grad_norm": 0.34343414571245584, "learning_rate": 5.448028673835125e-06, "loss": 1.418, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 505 }, { "epoch": 1.6827643630308078, "grad_norm": 0.35638669107128673, "learning_rate": 5.425685425685425e-06, "loss": 1.4052, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 506 }, { "epoch": 1.6860949208992506, "grad_norm": 0.3467424021658532, "learning_rate": 5.403050108932461e-06, "loss": 1.4581, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 507 }, { "epoch": 1.6894254787676934, "grad_norm": 0.32381127071831955, "learning_rate": 5.3801169590643285e-06, "loss": 1.459, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 508 }, { "epoch": 1.6927560366361365, "grad_norm": 0.3811936086039866, "learning_rate": 5.356880058866813e-06, "loss": 1.5033, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 509 }, { "epoch": 1.6960865945045795, "grad_norm": 0.3612050754686712, "learning_rate": 5.333333333333333e-06, "loss": 1.4137, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 510 }, { "epoch": 1.6994171523730226, "grad_norm": 0.35765265665477713, "learning_rate": 5.309470544369873e-06, "loss": 1.4087, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 511 }, { "epoch": 1.7027477102414654, "grad_norm": 0.3357163323849947, "learning_rate": 5.285285285285285e-06, "loss": 1.4851, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 512 }, { "epoch": 1.7060782681099083, "grad_norm": 0.3449646759899252, "learning_rate": 5.260770975056689e-06, "loss": 1.442, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 513 }, { "epoch": 1.7094088259783513, "grad_norm": 0.3335919341097906, "learning_rate": 5.235920852359208e-06, "loss": 1.454, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 514 }, { "epoch": 1.7127393838467944, "grad_norm": 0.3414007515866483, "learning_rate": 5.210727969348659e-06, "loss": 1.4762, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 515 }, { "epoch": 1.7160699417152374, "grad_norm": 0.37174665041283544, "learning_rate": 5.185185185185185e-06, "loss": 1.4615, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 516 }, { "epoch": 1.7194004995836802, "grad_norm": 0.37265087217053033, "learning_rate": 5.159285159285159e-06, "loss": 1.4072, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 517 }, { "epoch": 1.722731057452123, "grad_norm": 0.3445160578098801, "learning_rate": 5.1330203442879505e-06, "loss": 1.4337, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 518 }, { "epoch": 1.7260616153205661, "grad_norm": 0.3675807887019101, "learning_rate": 5.106382978723403e-06, "loss": 1.4147, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 519 }, { "epoch": 1.7293921731890092, "grad_norm": 0.34584285856367675, "learning_rate": 5.079365079365079e-06, "loss": 1.4193, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 520 }, { "epoch": 1.7327227310574522, "grad_norm": 0.3685778739128953, "learning_rate": 5.051958433253396e-06, "loss": 1.4466, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 521 }, { "epoch": 1.736053288925895, "grad_norm": 0.35632916296360506, "learning_rate": 5.02415458937198e-06, "loss": 1.4299, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 522 }, { "epoch": 1.739383846794338, "grad_norm": 0.3941128956001842, "learning_rate": 4.995944849959448e-06, "loss": 1.4264, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 523 }, { "epoch": 1.742714404662781, "grad_norm": 0.3481786883352737, "learning_rate": 4.967320261437908e-06, "loss": 1.4279, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 524 }, { "epoch": 1.746044962531224, "grad_norm": 0.3627527951339854, "learning_rate": 4.938271604938271e-06, "loss": 1.5152, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 525 }, { "epoch": 1.749375520399667, "grad_norm": 0.34527513358988937, "learning_rate": 4.9087893864013265e-06, "loss": 1.4088, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 526 }, { "epoch": 1.7527060782681099, "grad_norm": 0.368823414133051, "learning_rate": 4.878863826232247e-06, "loss": 1.3944, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 527 }, { "epoch": 1.756036636136553, "grad_norm": 0.3471938836863914, "learning_rate": 4.848484848484849e-06, "loss": 1.3809, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 528 }, { "epoch": 1.7593671940049957, "grad_norm": 0.39760839658681035, "learning_rate": 4.817642069550467e-06, "loss": 1.4081, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 529 }, { "epoch": 1.7626977518734388, "grad_norm": 0.35630999152948084, "learning_rate": 4.786324786324786e-06, "loss": 1.4049, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 530 }, { "epoch": 1.7660283097418819, "grad_norm": 0.3123127862091999, "learning_rate": 4.754521963824289e-06, "loss": 1.4033, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 531 }, { "epoch": 1.7693588676103247, "grad_norm": 0.3565716669933871, "learning_rate": 4.722222222222222e-06, "loss": 1.4548, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 532 }, { "epoch": 1.7726894254787677, "grad_norm": 0.3415824605451111, "learning_rate": 4.68941382327209e-06, "loss": 1.3379, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 533 }, { "epoch": 1.7760199833472106, "grad_norm": 0.37445157627374487, "learning_rate": 4.6560846560846555e-06, "loss": 1.5023, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 534 }, { "epoch": 1.7793505412156536, "grad_norm": 0.4140970552339397, "learning_rate": 4.622222222222222e-06, "loss": 1.4982, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 535 }, { "epoch": 1.7826810990840967, "grad_norm": 0.3696216853055909, "learning_rate": 4.587813620071684e-06, "loss": 1.3795, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 536 }, { "epoch": 1.7860116569525397, "grad_norm": 0.3374796769034963, "learning_rate": 4.552845528455284e-06, "loss": 1.4356, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 537 }, { "epoch": 1.7893422148209825, "grad_norm": 0.4227610049072286, "learning_rate": 4.517304189435337e-06, "loss": 1.4625, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 538 }, { "epoch": 1.7926727726894254, "grad_norm": 0.36612259553982557, "learning_rate": 4.4811753902663e-06, "loss": 1.4274, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 539 }, { "epoch": 1.7960033305578684, "grad_norm": 0.4222638209328834, "learning_rate": 4.444444444444443e-06, "loss": 1.5129, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 540 }, { "epoch": 1.7993338884263115, "grad_norm": 0.41009576553628174, "learning_rate": 4.407096171802053e-06, "loss": 1.4873, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 541 }, { "epoch": 1.8026644462947545, "grad_norm": 0.35086922544434007, "learning_rate": 4.369114877589454e-06, "loss": 1.3718, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 542 }, { "epoch": 1.8059950041631974, "grad_norm": 0.35855015526438827, "learning_rate": 4.33048433048433e-06, "loss": 1.4031, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 543 }, { "epoch": 1.8093255620316402, "grad_norm": 0.42477533100459036, "learning_rate": 4.291187739463601e-06, "loss": 1.4473, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 544 }, { "epoch": 1.8126561199000832, "grad_norm": 0.39791782472493653, "learning_rate": 4.251207729468599e-06, "loss": 1.4374, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 545 }, { "epoch": 1.8159866777685263, "grad_norm": 0.3444343384513091, "learning_rate": 4.210526315789473e-06, "loss": 1.4048, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 546 }, { "epoch": 1.8193172356369693, "grad_norm": 0.3453119165966736, "learning_rate": 4.169124877089478e-06, "loss": 1.3581, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 547 }, { "epoch": 1.8226477935054122, "grad_norm": 0.38186414289634574, "learning_rate": 4.126984126984126e-06, "loss": 1.4774, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 548 }, { "epoch": 1.825978351373855, "grad_norm": 0.3371300332212375, "learning_rate": 4.084084084084084e-06, "loss": 1.3565, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 549 }, { "epoch": 1.829308909242298, "grad_norm": 0.32042065002080106, "learning_rate": 4.0404040404040395e-06, "loss": 1.3807, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 550 }, { "epoch": 1.832639467110741, "grad_norm": 0.3776475075214216, "learning_rate": 3.995922528032619e-06, "loss": 1.4305, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 551 }, { "epoch": 1.8359700249791842, "grad_norm": 0.3351717661136717, "learning_rate": 3.9506172839506175e-06, "loss": 1.4133, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 552 }, { "epoch": 1.839300582847627, "grad_norm": 0.37528610178789024, "learning_rate": 3.904465212876428e-06, "loss": 1.4994, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 553 }, { "epoch": 1.8426311407160698, "grad_norm": 0.38936785329254486, "learning_rate": 3.857442348008385e-06, "loss": 1.393, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 554 }, { "epoch": 1.8459616985845129, "grad_norm": 0.40525496168183883, "learning_rate": 3.8095238095238094e-06, "loss": 1.4019, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 555 }, { "epoch": 1.849292256452956, "grad_norm": 0.4169994094961459, "learning_rate": 3.7606837606837604e-06, "loss": 1.4208, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 556 }, { "epoch": 1.852622814321399, "grad_norm": 0.4093560262894869, "learning_rate": 3.710895361380798e-06, "loss": 1.44, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 557 }, { "epoch": 1.8559533721898418, "grad_norm": 0.35662577286334196, "learning_rate": 3.660130718954248e-06, "loss": 1.4168, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 558 }, { "epoch": 1.8592839300582846, "grad_norm": 0.3469062029498766, "learning_rate": 3.6083608360836084e-06, "loss": 1.4109, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 559 }, { "epoch": 1.8626144879267277, "grad_norm": 0.35913894186601036, "learning_rate": 3.5555555555555546e-06, "loss": 1.3026, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 560 }, { "epoch": 1.8659450457951707, "grad_norm": 0.3601783041537011, "learning_rate": 3.501683501683501e-06, "loss": 1.4429, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 561 }, { "epoch": 1.8692756036636138, "grad_norm": 0.4301246312907219, "learning_rate": 3.4467120181405894e-06, "loss": 1.4415, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 562 }, { "epoch": 1.8726061615320566, "grad_norm": 0.44543619950365937, "learning_rate": 3.390607101947308e-06, "loss": 1.4354, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 563 }, { "epoch": 1.8759367194004994, "grad_norm": 0.4110006980910609, "learning_rate": 3.333333333333333e-06, "loss": 1.5156, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 564 }, { "epoch": 1.8792672772689425, "grad_norm": 0.36681035057341954, "learning_rate": 3.27485380116959e-06, "loss": 1.3926, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 565 }, { "epoch": 1.8825978351373855, "grad_norm": 0.3639552416710322, "learning_rate": 3.215130023640661e-06, "loss": 1.4537, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 566 }, { "epoch": 1.8859283930058286, "grad_norm": 0.3809201109198225, "learning_rate": 3.154121863799283e-06, "loss": 1.4344, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 567 }, { "epoch": 1.8892589508742714, "grad_norm": 0.5067748995958425, "learning_rate": 3.0917874396135263e-06, "loss": 1.444, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 568 }, { "epoch": 1.8925895087427143, "grad_norm": 0.39339545860925257, "learning_rate": 3.028083028083028e-06, "loss": 1.4368, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 569 }, { "epoch": 1.8959200666111573, "grad_norm": 0.3645143242760266, "learning_rate": 2.9629629629629625e-06, "loss": 1.4189, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 570 }, { "epoch": 1.8992506244796004, "grad_norm": 0.41301404150023885, "learning_rate": 2.8963795255930087e-06, "loss": 1.4513, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 571 }, { "epoch": 1.9025811823480434, "grad_norm": 0.35445322756534786, "learning_rate": 2.828282828282828e-06, "loss": 1.4212, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 572 }, { "epoch": 1.9059117402164862, "grad_norm": 0.31609898679838344, "learning_rate": 2.758620689655172e-06, "loss": 1.4282, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 573 }, { "epoch": 1.909242298084929, "grad_norm": 0.38641815454972966, "learning_rate": 2.6873385012919895e-06, "loss": 1.5401, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 574 }, { "epoch": 1.9125728559533721, "grad_norm": 0.38729985084754753, "learning_rate": 2.6143790849673204e-06, "loss": 1.461, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 575 }, { "epoch": 1.9159034138218152, "grad_norm": 0.4550081298663739, "learning_rate": 2.5396825396825395e-06, "loss": 1.4602, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 576 }, { "epoch": 1.9192339716902582, "grad_norm": 0.3605173725442084, "learning_rate": 2.4631860776439087e-06, "loss": 1.4104, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 577 }, { "epoch": 1.922564529558701, "grad_norm": 0.38548981376382463, "learning_rate": 2.384823848238482e-06, "loss": 1.4465, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 578 }, { "epoch": 1.9258950874271439, "grad_norm": 0.39748551935246357, "learning_rate": 2.304526748971193e-06, "loss": 1.443, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 579 }, { "epoch": 1.929225645295587, "grad_norm": 0.3638886379639791, "learning_rate": 2.222222222222222e-06, "loss": 1.4129, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 580 }, { "epoch": 1.93255620316403, "grad_norm": 0.36953734209449074, "learning_rate": 2.1378340365682133e-06, "loss": 1.3176, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 581 }, { "epoch": 1.935886761032473, "grad_norm": 0.3267944344034355, "learning_rate": 2.051282051282051e-06, "loss": 1.4588, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 582 }, { "epoch": 1.9392173189009159, "grad_norm": 0.3915434082543582, "learning_rate": 1.962481962481962e-06, "loss": 1.4441, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 583 }, { "epoch": 1.942547876769359, "grad_norm": 0.3556155258308632, "learning_rate": 1.871345029239766e-06, "loss": 1.3898, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 584 }, { "epoch": 1.9458784346378017, "grad_norm": 0.35583427100431714, "learning_rate": 1.7777777777777775e-06, "loss": 1.4117, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 585 }, { "epoch": 1.9492089925062448, "grad_norm": 0.3312617219719275, "learning_rate": 1.6816816816816814e-06, "loss": 1.4243, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 586 }, { "epoch": 1.9525395503746878, "grad_norm": 0.3171322439070156, "learning_rate": 1.582952815829528e-06, "loss": 1.3974, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 587 }, { "epoch": 1.9558701082431307, "grad_norm": 0.2931034713127486, "learning_rate": 1.4814814814814812e-06, "loss": 1.4232, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 588 }, { "epoch": 1.9592006661115737, "grad_norm": 0.31803832338980526, "learning_rate": 1.3771517996870107e-06, "loss": 1.4475, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 589 }, { "epoch": 1.9625312239800166, "grad_norm": 0.3102745879861819, "learning_rate": 1.2698412698412697e-06, "loss": 1.4991, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 590 }, { "epoch": 1.9658617818484596, "grad_norm": 0.35190966791382605, "learning_rate": 1.1594202898550724e-06, "loss": 1.4806, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 591 }, { "epoch": 1.9691923397169027, "grad_norm": 0.3133274529689738, "learning_rate": 1.045751633986928e-06, "loss": 1.347, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 592 }, { "epoch": 1.9725228975853455, "grad_norm": 0.30605048339614954, "learning_rate": 9.286898839137644e-07, "loss": 1.3999, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 593 }, { "epoch": 1.9758534554537885, "grad_norm": 0.3151090112991302, "learning_rate": 8.08080808080808e-07, "loss": 1.4339, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 594 }, { "epoch": 1.9791840133222314, "grad_norm": 0.35650599063479166, "learning_rate": 6.837606837606837e-07, "loss": 1.4009, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 595 }, { "epoch": 1.9825145711906744, "grad_norm": 0.3384958491564326, "learning_rate": 5.555555555555555e-07, "loss": 1.4611, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 596 }, { "epoch": 1.9858451290591175, "grad_norm": 0.3335636198476521, "learning_rate": 4.2328042328042324e-07, "loss": 1.3728, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 597 }, { "epoch": 1.9891756869275605, "grad_norm": 0.32277296814250667, "learning_rate": 2.8673835125448024e-07, "loss": 1.387, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 598 }, { "epoch": 1.9925062447960034, "grad_norm": 0.3467254801927619, "learning_rate": 1.4571948998178507e-07, "loss": 1.4114, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 599 }, { "epoch": 1.9958368026644462, "grad_norm": 0.3207639144479259, "learning_rate": 0, "loss": 1.3956, "memory/device_mem_reserved(gib)": 59.75, "memory/max_mem_active(gib)": 57.09, "memory/max_mem_allocated(gib)": 56.77, "step": 600 } ], "logging_steps": 1, "max_steps": 600, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 150, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.439031159441326e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }