Elenchus / trainer_state.json
Delta-Vector's picture
Upload folder using huggingface_hub
c2c88d3 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9958368026644462,
"eval_steps": 500,
"global_step": 600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0033305578684429643,
"grad_norm": 2.1135175063937415,
"learning_rate": 1.3333333333333334e-07,
"loss": 1.6413,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.01,
"memory/max_mem_allocated(gib)": 56.7,
"step": 1
},
{
"epoch": 0.006661115736885929,
"grad_norm": 2.0196598114735065,
"learning_rate": 2.6666666666666667e-07,
"loss": 1.6382,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 2
},
{
"epoch": 0.009991673605328892,
"grad_norm": 2.037892565480129,
"learning_rate": 4.0000000000000003e-07,
"loss": 1.6536,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 3
},
{
"epoch": 0.013322231473771857,
"grad_norm": 1.980939710918612,
"learning_rate": 5.333333333333333e-07,
"loss": 1.6712,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 4
},
{
"epoch": 0.01665278934221482,
"grad_norm": 2.0553629965683196,
"learning_rate": 6.666666666666666e-07,
"loss": 1.591,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 5
},
{
"epoch": 0.019983347210657785,
"grad_norm": 2.1321442384194493,
"learning_rate": 8.000000000000001e-07,
"loss": 1.6275,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 6
},
{
"epoch": 0.02331390507910075,
"grad_norm": 2.0224554441924147,
"learning_rate": 9.333333333333333e-07,
"loss": 1.6802,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 7
},
{
"epoch": 0.026644462947543714,
"grad_norm": 2.0657857283218144,
"learning_rate": 1.0666666666666667e-06,
"loss": 1.5768,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 8
},
{
"epoch": 0.02997502081598668,
"grad_norm": 2.0104233987359206,
"learning_rate": 1.2e-06,
"loss": 1.6026,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 9
},
{
"epoch": 0.03330557868442964,
"grad_norm": 2.098692014200002,
"learning_rate": 1.3333333333333332e-06,
"loss": 1.682,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 10
},
{
"epoch": 0.03663613655287261,
"grad_norm": 2.0879014611232116,
"learning_rate": 1.4666666666666667e-06,
"loss": 1.6368,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 11
},
{
"epoch": 0.03996669442131557,
"grad_norm": 2.0701872996726443,
"learning_rate": 1.6000000000000001e-06,
"loss": 1.629,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 12
},
{
"epoch": 0.04329725228975854,
"grad_norm": 2.105064067100562,
"learning_rate": 1.7333333333333332e-06,
"loss": 1.6568,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 13
},
{
"epoch": 0.0466278101582015,
"grad_norm": 2.1084872575258733,
"learning_rate": 1.8666666666666667e-06,
"loss": 1.597,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 14
},
{
"epoch": 0.04995836802664446,
"grad_norm": 2.0616161807879965,
"learning_rate": 2e-06,
"loss": 1.6008,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 15
},
{
"epoch": 0.05328892589508743,
"grad_norm": 1.92970469468585,
"learning_rate": 2.1333333333333334e-06,
"loss": 1.6815,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 16
},
{
"epoch": 0.05661948376353039,
"grad_norm": 2.0527427262697855,
"learning_rate": 2.266666666666667e-06,
"loss": 1.6873,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 17
},
{
"epoch": 0.05995004163197336,
"grad_norm": 1.9622305052083537,
"learning_rate": 2.4e-06,
"loss": 1.6334,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 18
},
{
"epoch": 0.06328059950041633,
"grad_norm": 1.9979390122219929,
"learning_rate": 2.533333333333333e-06,
"loss": 1.6623,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 19
},
{
"epoch": 0.06661115736885928,
"grad_norm": 2.0311968068371367,
"learning_rate": 2.6666666666666664e-06,
"loss": 1.607,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 20
},
{
"epoch": 0.06994171523730225,
"grad_norm": 1.968344786501615,
"learning_rate": 2.8e-06,
"loss": 1.6087,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 21
},
{
"epoch": 0.07327227310574522,
"grad_norm": 2.1145916019697952,
"learning_rate": 2.9333333333333333e-06,
"loss": 1.5926,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 22
},
{
"epoch": 0.07660283097418817,
"grad_norm": 2.0129475295050496,
"learning_rate": 3.066666666666667e-06,
"loss": 1.6171,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 23
},
{
"epoch": 0.07993338884263114,
"grad_norm": 1.8817164699193898,
"learning_rate": 3.2000000000000003e-06,
"loss": 1.6552,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 24
},
{
"epoch": 0.08326394671107411,
"grad_norm": 1.9306634203997992,
"learning_rate": 3.333333333333333e-06,
"loss": 1.6288,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 25
},
{
"epoch": 0.08659450457951708,
"grad_norm": 1.8839715974459492,
"learning_rate": 3.4666666666666664e-06,
"loss": 1.5772,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 26
},
{
"epoch": 0.08992506244796003,
"grad_norm": 1.9004207576591563,
"learning_rate": 3.6e-06,
"loss": 1.6019,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 27
},
{
"epoch": 0.093255620316403,
"grad_norm": 1.8508009396241183,
"learning_rate": 3.7333333333333333e-06,
"loss": 1.6347,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 28
},
{
"epoch": 0.09658617818484597,
"grad_norm": 1.6521817439090796,
"learning_rate": 3.866666666666666e-06,
"loss": 1.6425,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 29
},
{
"epoch": 0.09991673605328892,
"grad_norm": 1.5825237347457706,
"learning_rate": 4e-06,
"loss": 1.4999,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 30
},
{
"epoch": 0.10324729392173189,
"grad_norm": 1.4406934972277887,
"learning_rate": 4.133333333333333e-06,
"loss": 1.537,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 31
},
{
"epoch": 0.10657785179017486,
"grad_norm": 1.3686252476380623,
"learning_rate": 4.266666666666667e-06,
"loss": 1.5054,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 32
},
{
"epoch": 0.10990840965861781,
"grad_norm": 1.190989973623068,
"learning_rate": 4.399999999999999e-06,
"loss": 1.5673,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 33
},
{
"epoch": 0.11323896752706078,
"grad_norm": 1.0921718147815354,
"learning_rate": 4.533333333333334e-06,
"loss": 1.5383,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 34
},
{
"epoch": 0.11656952539550375,
"grad_norm": 0.9720091603452963,
"learning_rate": 4.666666666666666e-06,
"loss": 1.5698,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 35
},
{
"epoch": 0.11990008326394672,
"grad_norm": 0.8634677699569875,
"learning_rate": 4.8e-06,
"loss": 1.5286,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 36
},
{
"epoch": 0.12323064113238967,
"grad_norm": 0.7720350215206407,
"learning_rate": 4.933333333333333e-06,
"loss": 1.5897,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 37
},
{
"epoch": 0.12656119900083265,
"grad_norm": 0.7351438783567595,
"learning_rate": 5.066666666666666e-06,
"loss": 1.471,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 38
},
{
"epoch": 0.1298917568692756,
"grad_norm": 0.6436527036047347,
"learning_rate": 5.2e-06,
"loss": 1.5523,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 39
},
{
"epoch": 0.13322231473771856,
"grad_norm": 0.5914433909472115,
"learning_rate": 5.333333333333333e-06,
"loss": 1.5169,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 40
},
{
"epoch": 0.13655287260616153,
"grad_norm": 0.5708899134928395,
"learning_rate": 5.466666666666667e-06,
"loss": 1.4727,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 41
},
{
"epoch": 0.1398834304746045,
"grad_norm": 0.562979308505682,
"learning_rate": 5.6e-06,
"loss": 1.5101,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 42
},
{
"epoch": 0.14321398834304747,
"grad_norm": 0.5333098859373814,
"learning_rate": 5.733333333333332e-06,
"loss": 1.5053,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 43
},
{
"epoch": 0.14654454621149043,
"grad_norm": 0.518700589700869,
"learning_rate": 5.866666666666667e-06,
"loss": 1.5522,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 44
},
{
"epoch": 0.1498751040799334,
"grad_norm": 0.5123134702021855,
"learning_rate": 5.999999999999999e-06,
"loss": 1.4581,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 45
},
{
"epoch": 0.15320566194837634,
"grad_norm": 0.5233023339387923,
"learning_rate": 6.133333333333334e-06,
"loss": 1.4503,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 46
},
{
"epoch": 0.1565362198168193,
"grad_norm": 0.4984395351799732,
"learning_rate": 6.266666666666666e-06,
"loss": 1.4698,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 47
},
{
"epoch": 0.15986677768526228,
"grad_norm": 0.48116733820243823,
"learning_rate": 6.4000000000000006e-06,
"loss": 1.5399,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 48
},
{
"epoch": 0.16319733555370525,
"grad_norm": 0.4917159508967155,
"learning_rate": 6.533333333333333e-06,
"loss": 1.4674,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 49
},
{
"epoch": 0.16652789342214822,
"grad_norm": 0.4631697484027289,
"learning_rate": 6.666666666666666e-06,
"loss": 1.5063,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 50
},
{
"epoch": 0.16985845129059118,
"grad_norm": 0.4506097490342786,
"learning_rate": 6.8e-06,
"loss": 1.4787,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 51
},
{
"epoch": 0.17318900915903415,
"grad_norm": 0.4808943580292107,
"learning_rate": 6.933333333333333e-06,
"loss": 1.5355,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 52
},
{
"epoch": 0.1765195670274771,
"grad_norm": 0.4353655566788618,
"learning_rate": 7.066666666666667e-06,
"loss": 1.4545,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 53
},
{
"epoch": 0.17985012489592006,
"grad_norm": 0.42881276266179474,
"learning_rate": 7.2e-06,
"loss": 1.4726,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 54
},
{
"epoch": 0.18318068276436303,
"grad_norm": 0.4243886425058161,
"learning_rate": 7.333333333333332e-06,
"loss": 1.5364,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 55
},
{
"epoch": 0.186511240632806,
"grad_norm": 0.4078516232902407,
"learning_rate": 7.466666666666667e-06,
"loss": 1.5441,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 56
},
{
"epoch": 0.18984179850124897,
"grad_norm": 0.39819776399963164,
"learning_rate": 7.599999999999999e-06,
"loss": 1.5394,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 57
},
{
"epoch": 0.19317235636969193,
"grad_norm": 0.3993196408685462,
"learning_rate": 7.733333333333333e-06,
"loss": 1.4883,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 58
},
{
"epoch": 0.1965029142381349,
"grad_norm": 0.3992257742362516,
"learning_rate": 7.866666666666667e-06,
"loss": 1.4933,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 59
},
{
"epoch": 0.19983347210657784,
"grad_norm": 0.39782096872195477,
"learning_rate": 8e-06,
"loss": 1.4729,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 60
},
{
"epoch": 0.2031640299750208,
"grad_norm": 0.38436305350726707,
"learning_rate": 7.99851604526062e-06,
"loss": 1.4777,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 61
},
{
"epoch": 0.20649458784346378,
"grad_norm": 0.3782583438569582,
"learning_rate": 7.99702712746191e-06,
"loss": 1.535,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 62
},
{
"epoch": 0.20982514571190675,
"grad_norm": 0.3910775225847348,
"learning_rate": 7.995533221663874e-06,
"loss": 1.4643,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 63
},
{
"epoch": 0.21315570358034971,
"grad_norm": 0.37376830993433585,
"learning_rate": 7.994034302759135e-06,
"loss": 1.4265,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 64
},
{
"epoch": 0.21648626144879268,
"grad_norm": 0.37205196740456564,
"learning_rate": 7.99253034547152e-06,
"loss": 1.484,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 65
},
{
"epoch": 0.21981681931723562,
"grad_norm": 0.37012462931708767,
"learning_rate": 7.991021324354658e-06,
"loss": 1.4668,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 66
},
{
"epoch": 0.2231473771856786,
"grad_norm": 0.36609254541987934,
"learning_rate": 7.989507213790519e-06,
"loss": 1.4512,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 67
},
{
"epoch": 0.22647793505412156,
"grad_norm": 0.36389643029866026,
"learning_rate": 7.987987987987988e-06,
"loss": 1.4666,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 68
},
{
"epoch": 0.22980849292256453,
"grad_norm": 0.3835942907380993,
"learning_rate": 7.986463620981386e-06,
"loss": 1.5581,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 69
},
{
"epoch": 0.2331390507910075,
"grad_norm": 0.3709505537460329,
"learning_rate": 7.984934086629002e-06,
"loss": 1.4942,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 70
},
{
"epoch": 0.23646960865945046,
"grad_norm": 0.3734284694232727,
"learning_rate": 7.983399358611582e-06,
"loss": 1.5449,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 71
},
{
"epoch": 0.23980016652789343,
"grad_norm": 0.38168285139161445,
"learning_rate": 7.981859410430838e-06,
"loss": 1.4972,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 72
},
{
"epoch": 0.24313072439633637,
"grad_norm": 0.4166060644404285,
"learning_rate": 7.98031421540791e-06,
"loss": 1.5273,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 73
},
{
"epoch": 0.24646128226477934,
"grad_norm": 0.3721773268353121,
"learning_rate": 7.978763746681835e-06,
"loss": 1.5459,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 74
},
{
"epoch": 0.2497918401332223,
"grad_norm": 0.3785109036596187,
"learning_rate": 7.977207977207977e-06,
"loss": 1.5221,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 75
},
{
"epoch": 0.2531223980016653,
"grad_norm": 0.3798544993330551,
"learning_rate": 7.975646879756469e-06,
"loss": 1.447,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 76
},
{
"epoch": 0.25645295587010825,
"grad_norm": 0.3676458399839075,
"learning_rate": 7.974080426910615e-06,
"loss": 1.552,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 77
},
{
"epoch": 0.2597835137385512,
"grad_norm": 0.3906392619486636,
"learning_rate": 7.972508591065292e-06,
"loss": 1.5524,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 78
},
{
"epoch": 0.2631140716069942,
"grad_norm": 0.4086059406499793,
"learning_rate": 7.97093134442532e-06,
"loss": 1.5347,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 79
},
{
"epoch": 0.2664446294754371,
"grad_norm": 0.37866878925235237,
"learning_rate": 7.969348659003832e-06,
"loss": 1.4263,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 80
},
{
"epoch": 0.2697751873438801,
"grad_norm": 0.36630829174020924,
"learning_rate": 7.96776050662061e-06,
"loss": 1.4882,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 81
},
{
"epoch": 0.27310574521232306,
"grad_norm": 0.3572953266857883,
"learning_rate": 7.966166858900421e-06,
"loss": 1.4996,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 82
},
{
"epoch": 0.27643630308076605,
"grad_norm": 0.37034991529495037,
"learning_rate": 7.964567687271327e-06,
"loss": 1.4558,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 83
},
{
"epoch": 0.279766860949209,
"grad_norm": 0.39454254411893813,
"learning_rate": 7.962962962962963e-06,
"loss": 1.481,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 84
},
{
"epoch": 0.28309741881765194,
"grad_norm": 0.36598142148212737,
"learning_rate": 7.961352657004831e-06,
"loss": 1.4647,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 85
},
{
"epoch": 0.28642797668609493,
"grad_norm": 0.40131025635004997,
"learning_rate": 7.959736740224545e-06,
"loss": 1.486,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 86
},
{
"epoch": 0.28975853455453787,
"grad_norm": 0.36813808559204136,
"learning_rate": 7.958115183246073e-06,
"loss": 1.5104,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 87
},
{
"epoch": 0.29308909242298087,
"grad_norm": 0.4399054897841581,
"learning_rate": 7.956487956487956e-06,
"loss": 1.5511,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 88
},
{
"epoch": 0.2964196502914238,
"grad_norm": 0.4137480663423791,
"learning_rate": 7.95485503016151e-06,
"loss": 1.5431,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 89
},
{
"epoch": 0.2997502081598668,
"grad_norm": 0.39082659570701933,
"learning_rate": 7.953216374269006e-06,
"loss": 1.5094,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 90
},
{
"epoch": 0.30308076602830974,
"grad_norm": 0.4222547479458042,
"learning_rate": 7.951571958601836e-06,
"loss": 1.528,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 91
},
{
"epoch": 0.3064113238967527,
"grad_norm": 0.4565848989524497,
"learning_rate": 7.949921752738653e-06,
"loss": 1.4345,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 92
},
{
"epoch": 0.3097418817651957,
"grad_norm": 0.3909465393349193,
"learning_rate": 7.948265726043504e-06,
"loss": 1.4885,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 93
},
{
"epoch": 0.3130724396336386,
"grad_norm": 0.40399439020361494,
"learning_rate": 7.946603847663918e-06,
"loss": 1.4836,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 94
},
{
"epoch": 0.3164029975020816,
"grad_norm": 0.3940685084379771,
"learning_rate": 7.944936086529007e-06,
"loss": 1.4894,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 95
},
{
"epoch": 0.31973355537052456,
"grad_norm": 0.36455393248573603,
"learning_rate": 7.943262411347517e-06,
"loss": 1.4765,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 96
},
{
"epoch": 0.32306411323896755,
"grad_norm": 0.42216219555871026,
"learning_rate": 7.94158279060588e-06,
"loss": 1.505,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 97
},
{
"epoch": 0.3263946711074105,
"grad_norm": 0.3833612688097333,
"learning_rate": 7.93989719256623e-06,
"loss": 1.4803,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 98
},
{
"epoch": 0.32972522897585343,
"grad_norm": 0.3793312412105176,
"learning_rate": 7.938205585264408e-06,
"loss": 1.4721,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 99
},
{
"epoch": 0.33305578684429643,
"grad_norm": 0.6231405275420779,
"learning_rate": 7.936507936507936e-06,
"loss": 1.4941,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 100
},
{
"epoch": 0.33638634471273937,
"grad_norm": 0.39916108511305454,
"learning_rate": 7.934804213873981e-06,
"loss": 1.5113,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 101
},
{
"epoch": 0.33971690258118237,
"grad_norm": 0.39832888981715536,
"learning_rate": 7.933094384707288e-06,
"loss": 1.4616,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 102
},
{
"epoch": 0.3430474604496253,
"grad_norm": 0.35554379353616694,
"learning_rate": 7.931378416118093e-06,
"loss": 1.4754,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 103
},
{
"epoch": 0.3463780183180683,
"grad_norm": 0.3778786204869107,
"learning_rate": 7.929656274980016e-06,
"loss": 1.5204,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 104
},
{
"epoch": 0.34970857618651124,
"grad_norm": 0.3979509981477904,
"learning_rate": 7.927927927927927e-06,
"loss": 1.4972,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 105
},
{
"epoch": 0.3530391340549542,
"grad_norm": 0.3829152377900939,
"learning_rate": 7.926193341355797e-06,
"loss": 1.4852,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 106
},
{
"epoch": 0.3563696919233972,
"grad_norm": 0.3783230292732417,
"learning_rate": 7.924452481414507e-06,
"loss": 1.4605,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 107
},
{
"epoch": 0.3597002497918401,
"grad_norm": 0.3702225917786687,
"learning_rate": 7.922705314009662e-06,
"loss": 1.4751,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 108
},
{
"epoch": 0.3630308076602831,
"grad_norm": 0.423076463648796,
"learning_rate": 7.920951804799353e-06,
"loss": 1.5043,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 109
},
{
"epoch": 0.36636136552872606,
"grad_norm": 0.4015775298544568,
"learning_rate": 7.919191919191919e-06,
"loss": 1.4993,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 110
},
{
"epoch": 0.36969192339716905,
"grad_norm": 0.395772531232646,
"learning_rate": 7.917425622343655e-06,
"loss": 1.5074,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 111
},
{
"epoch": 0.373022481265612,
"grad_norm": 0.4063035915678222,
"learning_rate": 7.915652879156528e-06,
"loss": 1.5005,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 112
},
{
"epoch": 0.37635303913405493,
"grad_norm": 0.3749049780160411,
"learning_rate": 7.913873654275848e-06,
"loss": 1.5016,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 113
},
{
"epoch": 0.37968359700249793,
"grad_norm": 0.40207184709524446,
"learning_rate": 7.912087912087911e-06,
"loss": 1.5112,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 114
},
{
"epoch": 0.38301415487094087,
"grad_norm": 0.3761596500147066,
"learning_rate": 7.910295616717634e-06,
"loss": 1.4226,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 115
},
{
"epoch": 0.38634471273938387,
"grad_norm": 0.34919530357614503,
"learning_rate": 7.908496732026144e-06,
"loss": 1.454,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 116
},
{
"epoch": 0.3896752706078268,
"grad_norm": 0.3783249892281946,
"learning_rate": 7.906691221608348e-06,
"loss": 1.3926,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 117
},
{
"epoch": 0.3930058284762698,
"grad_norm": 0.38789047851939196,
"learning_rate": 7.904879048790487e-06,
"loss": 1.5148,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 118
},
{
"epoch": 0.39633638634471274,
"grad_norm": 0.38028310552438055,
"learning_rate": 7.903060176627645e-06,
"loss": 1.5512,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 119
},
{
"epoch": 0.3996669442131557,
"grad_norm": 0.3557857851285413,
"learning_rate": 7.901234567901235e-06,
"loss": 1.5145,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 120
},
{
"epoch": 0.4029975020815987,
"grad_norm": 0.3648126505851961,
"learning_rate": 7.89940218511647e-06,
"loss": 1.4616,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 121
},
{
"epoch": 0.4063280599500416,
"grad_norm": 0.3518641114757544,
"learning_rate": 7.897562990499793e-06,
"loss": 1.4444,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 122
},
{
"epoch": 0.4096586178184846,
"grad_norm": 0.3812409352914946,
"learning_rate": 7.895716945996276e-06,
"loss": 1.4524,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 123
},
{
"epoch": 0.41298917568692756,
"grad_norm": 0.37136499335096407,
"learning_rate": 7.893864013266998e-06,
"loss": 1.4495,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 124
},
{
"epoch": 0.4163197335553705,
"grad_norm": 0.36965696298407785,
"learning_rate": 7.892004153686396e-06,
"loss": 1.454,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 125
},
{
"epoch": 0.4196502914238135,
"grad_norm": 0.37625883797439813,
"learning_rate": 7.890137328339575e-06,
"loss": 1.4738,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 126
},
{
"epoch": 0.42298084929225643,
"grad_norm": 0.3891305395409707,
"learning_rate": 7.888263498019595e-06,
"loss": 1.4336,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 127
},
{
"epoch": 0.42631140716069943,
"grad_norm": 0.33836499033668194,
"learning_rate": 7.886382623224727e-06,
"loss": 1.4435,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 128
},
{
"epoch": 0.42964196502914237,
"grad_norm": 0.39084535016086686,
"learning_rate": 7.88449466415568e-06,
"loss": 1.4598,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 129
},
{
"epoch": 0.43297252289758537,
"grad_norm": 0.3896756879145717,
"learning_rate": 7.882599580712787e-06,
"loss": 1.5065,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 130
},
{
"epoch": 0.4363030807660283,
"grad_norm": 0.4252827004151611,
"learning_rate": 7.880697332493174e-06,
"loss": 1.4083,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 131
},
{
"epoch": 0.43963363863447125,
"grad_norm": 0.3608365697753635,
"learning_rate": 7.878787878787878e-06,
"loss": 1.441,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 132
},
{
"epoch": 0.44296419650291424,
"grad_norm": 0.403123415092978,
"learning_rate": 7.876871178578958e-06,
"loss": 1.4627,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 133
},
{
"epoch": 0.4462947543713572,
"grad_norm": 0.40013457143727,
"learning_rate": 7.874947190536545e-06,
"loss": 1.4955,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 134
},
{
"epoch": 0.4496253122398002,
"grad_norm": 0.3883976625001682,
"learning_rate": 7.873015873015873e-06,
"loss": 1.4298,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 135
},
{
"epoch": 0.4529558701082431,
"grad_norm": 0.47893723454397114,
"learning_rate": 7.871077184054283e-06,
"loss": 1.4706,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 136
},
{
"epoch": 0.4562864279766861,
"grad_norm": 0.3939594731251799,
"learning_rate": 7.869131081368174e-06,
"loss": 1.4659,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 137
},
{
"epoch": 0.45961698584512906,
"grad_norm": 0.39872483940488357,
"learning_rate": 7.867177522349935e-06,
"loss": 1.4428,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 138
},
{
"epoch": 0.462947543713572,
"grad_norm": 0.41681968734219343,
"learning_rate": 7.865216464064831e-06,
"loss": 1.5116,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 139
},
{
"epoch": 0.466278101582015,
"grad_norm": 0.3950334535334994,
"learning_rate": 7.863247863247863e-06,
"loss": 1.4453,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 140
},
{
"epoch": 0.46960865945045793,
"grad_norm": 0.3569883912128034,
"learning_rate": 7.861271676300578e-06,
"loss": 1.462,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 141
},
{
"epoch": 0.47293921731890093,
"grad_norm": 0.3784473417547298,
"learning_rate": 7.85928785928786e-06,
"loss": 1.4961,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 142
},
{
"epoch": 0.47626977518734387,
"grad_norm": 0.35459480974078084,
"learning_rate": 7.857296367934665e-06,
"loss": 1.5362,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 143
},
{
"epoch": 0.47960033305578686,
"grad_norm": 0.3662426670901604,
"learning_rate": 7.85529715762274e-06,
"loss": 1.3832,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 144
},
{
"epoch": 0.4829308909242298,
"grad_norm": 0.4066610425979986,
"learning_rate": 7.85329018338727e-06,
"loss": 1.4641,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 145
},
{
"epoch": 0.48626144879267275,
"grad_norm": 0.3545713986492447,
"learning_rate": 7.851275399913532e-06,
"loss": 1.4675,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 146
},
{
"epoch": 0.48959200666111574,
"grad_norm": 0.3664688735051096,
"learning_rate": 7.849252761533463e-06,
"loss": 1.4683,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 147
},
{
"epoch": 0.4929225645295587,
"grad_norm": 0.3733605661751341,
"learning_rate": 7.847222222222221e-06,
"loss": 1.4315,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 148
},
{
"epoch": 0.4962531223980017,
"grad_norm": 0.3380374141462393,
"learning_rate": 7.845183735594695e-06,
"loss": 1.4401,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 149
},
{
"epoch": 0.4995836802664446,
"grad_norm": 0.407518229964574,
"learning_rate": 7.84313725490196e-06,
"loss": 1.4437,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 150
},
{
"epoch": 0.5029142381348876,
"grad_norm": 0.36142186690399497,
"learning_rate": 7.841082733027723e-06,
"loss": 1.4444,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 151
},
{
"epoch": 0.5062447960033306,
"grad_norm": 0.35245555484230673,
"learning_rate": 7.839020122484688e-06,
"loss": 1.4013,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 152
},
{
"epoch": 0.5095753538717736,
"grad_norm": 0.3751518274944043,
"learning_rate": 7.836949375410913e-06,
"loss": 1.4325,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 153
},
{
"epoch": 0.5129059117402165,
"grad_norm": 0.364299926744196,
"learning_rate": 7.834870443566096e-06,
"loss": 1.4757,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 154
},
{
"epoch": 0.5162364696086594,
"grad_norm": 0.352709296353453,
"learning_rate": 7.832783278327833e-06,
"loss": 1.4405,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 155
},
{
"epoch": 0.5195670274771024,
"grad_norm": 0.3595882748979197,
"learning_rate": 7.830687830687831e-06,
"loss": 1.5005,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 156
},
{
"epoch": 0.5228975853455454,
"grad_norm": 0.3663062717533196,
"learning_rate": 7.828584051248068e-06,
"loss": 1.4916,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 157
},
{
"epoch": 0.5262281432139884,
"grad_norm": 0.39230898190550817,
"learning_rate": 7.82647189021691e-06,
"loss": 1.5925,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 158
},
{
"epoch": 0.5295587010824313,
"grad_norm": 0.34764213510621217,
"learning_rate": 7.824351297405189e-06,
"loss": 1.533,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 159
},
{
"epoch": 0.5328892589508742,
"grad_norm": 0.4356036173061448,
"learning_rate": 7.822222222222222e-06,
"loss": 1.4768,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 160
},
{
"epoch": 0.5362198168193172,
"grad_norm": 0.3650633676087402,
"learning_rate": 7.820084613671788e-06,
"loss": 1.4834,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 161
},
{
"epoch": 0.5395503746877602,
"grad_norm": 0.36003662026404476,
"learning_rate": 7.81793842034806e-06,
"loss": 1.4745,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 162
},
{
"epoch": 0.5428809325562032,
"grad_norm": 0.45089712637002705,
"learning_rate": 7.815783590431477e-06,
"loss": 1.4762,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 163
},
{
"epoch": 0.5462114904246461,
"grad_norm": 0.3727325869359898,
"learning_rate": 7.813620071684589e-06,
"loss": 1.4605,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 164
},
{
"epoch": 0.5495420482930891,
"grad_norm": 0.3396845072209277,
"learning_rate": 7.81144781144781e-06,
"loss": 1.4793,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 165
},
{
"epoch": 0.5528726061615321,
"grad_norm": 0.35005093334327886,
"learning_rate": 7.809266756635177e-06,
"loss": 1.4699,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 166
},
{
"epoch": 0.556203164029975,
"grad_norm": 0.3836826797224187,
"learning_rate": 7.807076853729998e-06,
"loss": 1.4727,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 167
},
{
"epoch": 0.559533721898418,
"grad_norm": 0.3866747204941054,
"learning_rate": 7.804878048780487e-06,
"loss": 1.4656,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 168
},
{
"epoch": 0.5628642797668609,
"grad_norm": 0.3754060351606817,
"learning_rate": 7.802670287395338e-06,
"loss": 1.4427,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 169
},
{
"epoch": 0.5661948376353039,
"grad_norm": 0.3560392764595894,
"learning_rate": 7.80045351473923e-06,
"loss": 1.469,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 170
},
{
"epoch": 0.5695253955037469,
"grad_norm": 0.38308734497417124,
"learning_rate": 7.79822767552829e-06,
"loss": 1.5086,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 171
},
{
"epoch": 0.5728559533721899,
"grad_norm": 0.359072776955667,
"learning_rate": 7.7959927140255e-06,
"loss": 1.4531,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 172
},
{
"epoch": 0.5761865112406328,
"grad_norm": 0.3922686356985507,
"learning_rate": 7.793748574036049e-06,
"loss": 1.5004,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 173
},
{
"epoch": 0.5795170691090757,
"grad_norm": 0.38139432931110967,
"learning_rate": 7.791495198902606e-06,
"loss": 1.4596,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 174
},
{
"epoch": 0.5828476269775187,
"grad_norm": 0.386725308323352,
"learning_rate": 7.789232531500573e-06,
"loss": 1.4107,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 175
},
{
"epoch": 0.5861781848459617,
"grad_norm": 0.3590860738790805,
"learning_rate": 7.786960514233242e-06,
"loss": 1.4748,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 176
},
{
"epoch": 0.5895087427144047,
"grad_norm": 0.38618438471328675,
"learning_rate": 7.784679089026915e-06,
"loss": 1.481,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 177
},
{
"epoch": 0.5928393005828476,
"grad_norm": 0.35271293932202913,
"learning_rate": 7.782388197325957e-06,
"loss": 1.4445,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 178
},
{
"epoch": 0.5961698584512906,
"grad_norm": 0.3587339394337467,
"learning_rate": 7.78008778008778e-06,
"loss": 1.482,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 179
},
{
"epoch": 0.5995004163197336,
"grad_norm": 0.4051854093619042,
"learning_rate": 7.777777777777777e-06,
"loss": 1.4538,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 180
},
{
"epoch": 0.6028309741881765,
"grad_norm": 0.3624830177676393,
"learning_rate": 7.775458130364185e-06,
"loss": 1.3882,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 181
},
{
"epoch": 0.6061615320566195,
"grad_norm": 0.40327439887058536,
"learning_rate": 7.773128777312878e-06,
"loss": 1.4439,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 182
},
{
"epoch": 0.6094920899250624,
"grad_norm": 0.3849485884337272,
"learning_rate": 7.77078965758211e-06,
"loss": 1.4598,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 183
},
{
"epoch": 0.6128226477935054,
"grad_norm": 0.3800546336095655,
"learning_rate": 7.76844070961718e-06,
"loss": 1.5077,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 184
},
{
"epoch": 0.6161532056619484,
"grad_norm": 0.4058514640829756,
"learning_rate": 7.766081871345029e-06,
"loss": 1.4557,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 185
},
{
"epoch": 0.6194837635303914,
"grad_norm": 0.3547116281420189,
"learning_rate": 7.763713080168775e-06,
"loss": 1.4465,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 186
},
{
"epoch": 0.6228143213988343,
"grad_norm": 0.36935786461716674,
"learning_rate": 7.76133427296218e-06,
"loss": 1.3674,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 187
},
{
"epoch": 0.6261448792672772,
"grad_norm": 0.3465066682351456,
"learning_rate": 7.75894538606403e-06,
"loss": 1.5018,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 188
},
{
"epoch": 0.6294754371357202,
"grad_norm": 0.37821929789921876,
"learning_rate": 7.75654635527247e-06,
"loss": 1.46,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 189
},
{
"epoch": 0.6328059950041632,
"grad_norm": 0.42147975033129337,
"learning_rate": 7.754137115839244e-06,
"loss": 1.4324,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 190
},
{
"epoch": 0.6361365528726062,
"grad_norm": 0.395112631651776,
"learning_rate": 7.751717602463872e-06,
"loss": 1.4682,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 191
},
{
"epoch": 0.6394671107410491,
"grad_norm": 0.3866087697502269,
"learning_rate": 7.749287749287749e-06,
"loss": 1.4845,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 192
},
{
"epoch": 0.6427976686094921,
"grad_norm": 0.39380953384339784,
"learning_rate": 7.746847489888173e-06,
"loss": 1.4628,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 193
},
{
"epoch": 0.6461282264779351,
"grad_norm": 0.38499086799547566,
"learning_rate": 7.744396757272294e-06,
"loss": 1.4485,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 194
},
{
"epoch": 0.649458784346378,
"grad_norm": 0.3628021970554608,
"learning_rate": 7.741935483870966e-06,
"loss": 1.4306,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 195
},
{
"epoch": 0.652789342214821,
"grad_norm": 0.37886204626432507,
"learning_rate": 7.739463601532567e-06,
"loss": 1.4178,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 196
},
{
"epoch": 0.6561199000832639,
"grad_norm": 0.36347566586862995,
"learning_rate": 7.736981041516678e-06,
"loss": 1.3917,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 197
},
{
"epoch": 0.6594504579517069,
"grad_norm": 0.3808525608826558,
"learning_rate": 7.734487734487733e-06,
"loss": 1.425,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 198
},
{
"epoch": 0.6627810158201499,
"grad_norm": 0.36703672958616185,
"learning_rate": 7.731983610508556e-06,
"loss": 1.3963,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 199
},
{
"epoch": 0.6661115736885929,
"grad_norm": 0.3449284331155099,
"learning_rate": 7.729468599033817e-06,
"loss": 1.5389,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 200
},
{
"epoch": 0.6694421315570358,
"grad_norm": 0.38098871722055255,
"learning_rate": 7.726942628903412e-06,
"loss": 1.4354,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 201
},
{
"epoch": 0.6727726894254787,
"grad_norm": 0.37447535098026113,
"learning_rate": 7.72440562833576e-06,
"loss": 1.4238,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 202
},
{
"epoch": 0.6761032472939217,
"grad_norm": 0.3815996192127943,
"learning_rate": 7.721857524920983e-06,
"loss": 1.4465,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 203
},
{
"epoch": 0.6794338051623647,
"grad_norm": 0.43830167523580127,
"learning_rate": 7.719298245614036e-06,
"loss": 1.4464,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 204
},
{
"epoch": 0.6827643630308077,
"grad_norm": 0.49374391843463344,
"learning_rate": 7.716727716727717e-06,
"loss": 1.4326,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 205
},
{
"epoch": 0.6860949208992506,
"grad_norm": 0.40611516537871767,
"learning_rate": 7.714145863925599e-06,
"loss": 1.4867,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 206
},
{
"epoch": 0.6894254787676936,
"grad_norm": 0.39306412548059455,
"learning_rate": 7.711552612214863e-06,
"loss": 1.4879,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 207
},
{
"epoch": 0.6927560366361366,
"grad_norm": 0.3732547746311456,
"learning_rate": 7.708947885939036e-06,
"loss": 1.5305,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 208
},
{
"epoch": 0.6960865945045795,
"grad_norm": 0.3749992070235647,
"learning_rate": 7.706331608770632e-06,
"loss": 1.4422,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 209
},
{
"epoch": 0.6994171523730225,
"grad_norm": 0.4236632648954227,
"learning_rate": 7.703703703703702e-06,
"loss": 1.4362,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 210
},
{
"epoch": 0.7027477102414654,
"grad_norm": 0.3799687473741569,
"learning_rate": 7.701064093046274e-06,
"loss": 1.512,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 211
},
{
"epoch": 0.7060782681099084,
"grad_norm": 0.3724271784543797,
"learning_rate": 7.698412698412699e-06,
"loss": 1.469,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 212
},
{
"epoch": 0.7094088259783514,
"grad_norm": 0.364477503994216,
"learning_rate": 7.695749440715883e-06,
"loss": 1.4811,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 213
},
{
"epoch": 0.7127393838467944,
"grad_norm": 0.3925520005032744,
"learning_rate": 7.693074240159441e-06,
"loss": 1.5027,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 214
},
{
"epoch": 0.7160699417152373,
"grad_norm": 0.40921223587397654,
"learning_rate": 7.690387016229713e-06,
"loss": 1.488,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 215
},
{
"epoch": 0.7194004995836802,
"grad_norm": 0.3981162315328969,
"learning_rate": 7.687687687687688e-06,
"loss": 1.4343,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 216
},
{
"epoch": 0.7227310574521232,
"grad_norm": 0.35388766488814566,
"learning_rate": 7.684976172560823e-06,
"loss": 1.4599,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 217
},
{
"epoch": 0.7260616153205662,
"grad_norm": 0.3449802535833205,
"learning_rate": 7.682252388134742e-06,
"loss": 1.442,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 218
},
{
"epoch": 0.7293921731890092,
"grad_norm": 0.34627676487411824,
"learning_rate": 7.679516250944822e-06,
"loss": 1.4461,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 219
},
{
"epoch": 0.7327227310574521,
"grad_norm": 0.35799089084524466,
"learning_rate": 7.676767676767677e-06,
"loss": 1.4731,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 220
},
{
"epoch": 0.736053288925895,
"grad_norm": 0.3820520257947768,
"learning_rate": 7.674006580612503e-06,
"loss": 1.4566,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 221
},
{
"epoch": 0.7393838467943381,
"grad_norm": 0.3641120307221186,
"learning_rate": 7.671232876712327e-06,
"loss": 1.4525,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 222
},
{
"epoch": 0.742714404662781,
"grad_norm": 0.37136269720782134,
"learning_rate": 7.668446478515128e-06,
"loss": 1.4548,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 223
},
{
"epoch": 0.746044962531224,
"grad_norm": 0.4138383130083843,
"learning_rate": 7.665647298674822e-06,
"loss": 1.5395,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 224
},
{
"epoch": 0.7493755203996669,
"grad_norm": 0.37512729325167443,
"learning_rate": 7.662835249042145e-06,
"loss": 1.4348,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 225
},
{
"epoch": 0.7527060782681099,
"grad_norm": 0.3574220209010036,
"learning_rate": 7.660010240655401e-06,
"loss": 1.4205,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 226
},
{
"epoch": 0.7560366361365529,
"grad_norm": 0.3509015504877034,
"learning_rate": 7.657172183731076e-06,
"loss": 1.4074,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 227
},
{
"epoch": 0.7593671940049959,
"grad_norm": 0.4191818637620366,
"learning_rate": 7.654320987654322e-06,
"loss": 1.434,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 228
},
{
"epoch": 0.7626977518734388,
"grad_norm": 0.38073125720358314,
"learning_rate": 7.651456560969322e-06,
"loss": 1.4315,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 229
},
{
"epoch": 0.7660283097418817,
"grad_norm": 0.3489534004367162,
"learning_rate": 7.648578811369509e-06,
"loss": 1.4292,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 230
},
{
"epoch": 0.7693588676103247,
"grad_norm": 0.39880199669766575,
"learning_rate": 7.645687645687645e-06,
"loss": 1.4797,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 231
},
{
"epoch": 0.7726894254787677,
"grad_norm": 0.3377554646810836,
"learning_rate": 7.642782969885774e-06,
"loss": 1.3638,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 232
},
{
"epoch": 0.7760199833472107,
"grad_norm": 0.45577113603344144,
"learning_rate": 7.639864689045015e-06,
"loss": 1.5272,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 233
},
{
"epoch": 0.7793505412156536,
"grad_norm": 0.3872639106321951,
"learning_rate": 7.636932707355241e-06,
"loss": 1.5223,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 234
},
{
"epoch": 0.7826810990840966,
"grad_norm": 0.41241615465906434,
"learning_rate": 7.633986928104575e-06,
"loss": 1.4047,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 235
},
{
"epoch": 0.7860116569525396,
"grad_norm": 0.350902547985464,
"learning_rate": 7.631027253668762e-06,
"loss": 1.4599,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 236
},
{
"epoch": 0.7893422148209825,
"grad_norm": 0.36780129033305325,
"learning_rate": 7.6280535855003936e-06,
"loss": 1.4872,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 237
},
{
"epoch": 0.7926727726894255,
"grad_norm": 0.3504301681190647,
"learning_rate": 7.625065824117956e-06,
"loss": 1.4508,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 238
},
{
"epoch": 0.7960033305578684,
"grad_norm": 0.425786005279154,
"learning_rate": 7.622063869094748e-06,
"loss": 1.5359,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 239
},
{
"epoch": 0.7993338884263114,
"grad_norm": 0.3423914333711706,
"learning_rate": 7.619047619047619e-06,
"loss": 1.5116,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 240
},
{
"epoch": 0.8026644462947544,
"grad_norm": 0.39752748882813016,
"learning_rate": 7.616016971625564e-06,
"loss": 1.3967,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 241
},
{
"epoch": 0.8059950041631974,
"grad_norm": 0.35349720101513005,
"learning_rate": 7.61297182349814e-06,
"loss": 1.428,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 242
},
{
"epoch": 0.8093255620316403,
"grad_norm": 0.3592529486243108,
"learning_rate": 7.609912070343725e-06,
"loss": 1.4716,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 243
},
{
"epoch": 0.8126561199000832,
"grad_norm": 0.41007914987868593,
"learning_rate": 7.606837606837607e-06,
"loss": 1.4601,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 244
},
{
"epoch": 0.8159866777685262,
"grad_norm": 0.4368820717106569,
"learning_rate": 7.603748326639893e-06,
"loss": 1.4299,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 245
},
{
"epoch": 0.8193172356369692,
"grad_norm": 0.34781376516299506,
"learning_rate": 7.600644122383253e-06,
"loss": 1.3832,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 246
},
{
"epoch": 0.8226477935054122,
"grad_norm": 0.4378928638690168,
"learning_rate": 7.597524885660478e-06,
"loss": 1.5006,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 247
},
{
"epoch": 0.8259783513738551,
"grad_norm": 0.38866511125189074,
"learning_rate": 7.594390507011865e-06,
"loss": 1.3808,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 248
},
{
"epoch": 0.829308909242298,
"grad_norm": 0.3796151796802332,
"learning_rate": 7.591240875912408e-06,
"loss": 1.4048,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 249
},
{
"epoch": 0.832639467110741,
"grad_norm": 0.47512939093169254,
"learning_rate": 7.588075880758807e-06,
"loss": 1.4533,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 250
},
{
"epoch": 0.835970024979184,
"grad_norm": 0.4002177494781384,
"learning_rate": 7.584895408856289e-06,
"loss": 1.4364,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 251
},
{
"epoch": 0.839300582847627,
"grad_norm": 0.41628446885968545,
"learning_rate": 7.581699346405228e-06,
"loss": 1.5213,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 252
},
{
"epoch": 0.8426311407160699,
"grad_norm": 0.41586597700526384,
"learning_rate": 7.578487578487578e-06,
"loss": 1.4163,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 253
},
{
"epoch": 0.8459616985845129,
"grad_norm": 0.37080164887555395,
"learning_rate": 7.575259989053093e-06,
"loss": 1.4262,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 254
},
{
"epoch": 0.8492922564529559,
"grad_norm": 0.44276862899193814,
"learning_rate": 7.57201646090535e-06,
"loss": 1.4434,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 255
},
{
"epoch": 0.8526228143213989,
"grad_norm": 0.3565514945143501,
"learning_rate": 7.568756875687569e-06,
"loss": 1.4628,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 256
},
{
"epoch": 0.8559533721898418,
"grad_norm": 0.3424453222650746,
"learning_rate": 7.565481113868211e-06,
"loss": 1.4397,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 257
},
{
"epoch": 0.8592839300582847,
"grad_norm": 0.36361177745212486,
"learning_rate": 7.562189054726368e-06,
"loss": 1.434,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 258
},
{
"epoch": 0.8626144879267277,
"grad_norm": 0.39961369778575284,
"learning_rate": 7.558880576336936e-06,
"loss": 1.3263,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 259
},
{
"epoch": 0.8659450457951707,
"grad_norm": 0.3694683835624918,
"learning_rate": 7.555555555555556e-06,
"loss": 1.465,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 260
},
{
"epoch": 0.8692756036636137,
"grad_norm": 0.38926907075141,
"learning_rate": 7.552213868003341e-06,
"loss": 1.4639,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 261
},
{
"epoch": 0.8726061615320566,
"grad_norm": 0.41002402289266,
"learning_rate": 7.548855388051367e-06,
"loss": 1.4583,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 262
},
{
"epoch": 0.8759367194004996,
"grad_norm": 0.39476689396263037,
"learning_rate": 7.545479988804925e-06,
"loss": 1.5369,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 263
},
{
"epoch": 0.8792672772689425,
"grad_norm": 0.42338165790994337,
"learning_rate": 7.542087542087541e-06,
"loss": 1.4149,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 264
},
{
"epoch": 0.8825978351373855,
"grad_norm": 0.37580056414171503,
"learning_rate": 7.538677918424753e-06,
"loss": 1.4767,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 265
},
{
"epoch": 0.8859283930058285,
"grad_norm": 0.35524395112624974,
"learning_rate": 7.535250987027637e-06,
"loss": 1.4565,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 266
},
{
"epoch": 0.8892589508742714,
"grad_norm": 0.333001020301385,
"learning_rate": 7.531806615776081e-06,
"loss": 1.4653,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 267
},
{
"epoch": 0.8925895087427144,
"grad_norm": 0.325740334034441,
"learning_rate": 7.5283446712018136e-06,
"loss": 1.4583,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 268
},
{
"epoch": 0.8959200666111574,
"grad_norm": 0.3579186582787629,
"learning_rate": 7.524865018471157e-06,
"loss": 1.441,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 269
},
{
"epoch": 0.8992506244796004,
"grad_norm": 0.38100337783570354,
"learning_rate": 7.521367521367521e-06,
"loss": 1.4725,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 270
},
{
"epoch": 0.9025811823480433,
"grad_norm": 0.4439224251441086,
"learning_rate": 7.5178520422736365e-06,
"loss": 1.4433,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 271
},
{
"epoch": 0.9059117402164862,
"grad_norm": 0.36404138775247186,
"learning_rate": 7.514318442153494e-06,
"loss": 1.4502,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 272
},
{
"epoch": 0.9092422980849292,
"grad_norm": 0.3739016590981095,
"learning_rate": 7.5107665805340226e-06,
"loss": 1.5614,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 273
},
{
"epoch": 0.9125728559533722,
"grad_norm": 0.34618213523089303,
"learning_rate": 7.5071963154864715e-06,
"loss": 1.4818,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 274
},
{
"epoch": 0.9159034138218152,
"grad_norm": 0.3516565079552471,
"learning_rate": 7.5036075036075024e-06,
"loss": 1.4811,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 275
},
{
"epoch": 0.9192339716902581,
"grad_norm": 0.372847709765313,
"learning_rate": 7.499999999999999e-06,
"loss": 1.4314,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 276
},
{
"epoch": 0.922564529558701,
"grad_norm": 0.33633722585110437,
"learning_rate": 7.496373658253553e-06,
"loss": 1.467,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 277
},
{
"epoch": 0.925895087427144,
"grad_norm": 0.3502677047499933,
"learning_rate": 7.4927283304246645e-06,
"loss": 1.465,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 278
},
{
"epoch": 0.929225645295587,
"grad_norm": 0.3301559549021256,
"learning_rate": 7.4890638670166225e-06,
"loss": 1.4345,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 279
},
{
"epoch": 0.93255620316403,
"grad_norm": 0.3995138440783666,
"learning_rate": 7.485380116959064e-06,
"loss": 1.3383,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 280
},
{
"epoch": 0.9358867610324729,
"grad_norm": 0.3871081436839965,
"learning_rate": 7.481676927587217e-06,
"loss": 1.4796,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 281
},
{
"epoch": 0.9392173189009159,
"grad_norm": 0.4376413636164512,
"learning_rate": 7.4779541446208115e-06,
"loss": 1.4644,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 282
},
{
"epoch": 0.9425478767693589,
"grad_norm": 0.43206377875237645,
"learning_rate": 7.474211612142647e-06,
"loss": 1.4107,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 283
},
{
"epoch": 0.9458784346378019,
"grad_norm": 0.4025303715871277,
"learning_rate": 7.470449172576832e-06,
"loss": 1.4318,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 284
},
{
"epoch": 0.9492089925062448,
"grad_norm": 0.37724819780360036,
"learning_rate": 7.466666666666667e-06,
"loss": 1.4454,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 285
},
{
"epoch": 0.9525395503746877,
"grad_norm": 0.35328542805788227,
"learning_rate": 7.462863933452169e-06,
"loss": 1.4175,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 286
},
{
"epoch": 0.9558701082431307,
"grad_norm": 0.3788093515621439,
"learning_rate": 7.459040810247245e-06,
"loss": 1.4429,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 287
},
{
"epoch": 0.9592006661115737,
"grad_norm": 0.4018461850957888,
"learning_rate": 7.455197132616486e-06,
"loss": 1.4679,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 288
},
{
"epoch": 0.9625312239800167,
"grad_norm": 0.3792573314031364,
"learning_rate": 7.451332734351601e-06,
"loss": 1.5191,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 289
},
{
"epoch": 0.9658617818484596,
"grad_norm": 0.4173737668171256,
"learning_rate": 7.447447447447447e-06,
"loss": 1.4999,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 290
},
{
"epoch": 0.9691923397169026,
"grad_norm": 0.34073643176316165,
"learning_rate": 7.443541102077687e-06,
"loss": 1.3667,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 291
},
{
"epoch": 0.9725228975853455,
"grad_norm": 0.37255255694817807,
"learning_rate": 7.439613526570048e-06,
"loss": 1.4196,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 292
},
{
"epoch": 0.9758534554537885,
"grad_norm": 0.3751657637349412,
"learning_rate": 7.435664547381168e-06,
"loss": 1.4545,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 293
},
{
"epoch": 0.9791840133222315,
"grad_norm": 0.45200307278108437,
"learning_rate": 7.431693989071039e-06,
"loss": 1.4199,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 294
},
{
"epoch": 0.9825145711906744,
"grad_norm": 0.3889800375255201,
"learning_rate": 7.427701674277017e-06,
"loss": 1.4801,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 295
},
{
"epoch": 0.9858451290591174,
"grad_norm": 0.3473904572951369,
"learning_rate": 7.4236874236874235e-06,
"loss": 1.393,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 296
},
{
"epoch": 0.9891756869275604,
"grad_norm": 0.3581161377664693,
"learning_rate": 7.419651056014692e-06,
"loss": 1.4073,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 297
},
{
"epoch": 0.9925062447960034,
"grad_norm": 0.35620326479274533,
"learning_rate": 7.415592387968079e-06,
"loss": 1.43,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 298
},
{
"epoch": 0.9958368026644463,
"grad_norm": 0.3676482591149261,
"learning_rate": 7.4115112342259155e-06,
"loss": 1.4148,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 299
},
{
"epoch": 0.9991673605328892,
"grad_norm": 0.43615077194471996,
"learning_rate": 7.407407407407408e-06,
"loss": 1.4154,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 300
},
{
"epoch": 1.0,
"grad_norm": 0.5905255990041776,
"learning_rate": 7.403280718043948e-06,
"loss": 1.4231,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 301
},
{
"epoch": 1.003330557868443,
"grad_norm": 0.4020383385971024,
"learning_rate": 7.399130974549968e-06,
"loss": 1.4394,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 302
},
{
"epoch": 1.0066611157368859,
"grad_norm": 0.3865309278317666,
"learning_rate": 7.394957983193276e-06,
"loss": 1.4413,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 303
},
{
"epoch": 1.009991673605329,
"grad_norm": 0.40327921143010825,
"learning_rate": 7.390761548064918e-06,
"loss": 1.4576,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 304
},
{
"epoch": 1.0133222314737718,
"grad_norm": 0.4070700680157323,
"learning_rate": 7.386541471048513e-06,
"loss": 1.4817,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 305
},
{
"epoch": 1.0166527893422148,
"grad_norm": 0.43460108634631706,
"learning_rate": 7.382297551789077e-06,
"loss": 1.3939,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 306
},
{
"epoch": 1.0199833472106579,
"grad_norm": 0.42277608172713,
"learning_rate": 7.378029587661315e-06,
"loss": 1.4239,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 307
},
{
"epoch": 1.0233139050791007,
"grad_norm": 0.37583795807106635,
"learning_rate": 7.373737373737374e-06,
"loss": 1.4878,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 308
},
{
"epoch": 1.0266444629475437,
"grad_norm": 0.4013147771199415,
"learning_rate": 7.3694207027540355e-06,
"loss": 1.3758,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 309
},
{
"epoch": 1.0299750208159866,
"grad_norm": 0.38150977748656323,
"learning_rate": 7.365079365079365e-06,
"loss": 1.4122,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 310
},
{
"epoch": 1.0333055786844296,
"grad_norm": 0.36288526161353013,
"learning_rate": 7.360713148678764e-06,
"loss": 1.4775,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 311
},
{
"epoch": 1.0366361365528727,
"grad_norm": 0.5122154405495047,
"learning_rate": 7.35632183908046e-06,
"loss": 1.4385,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 312
},
{
"epoch": 1.0399666944213155,
"grad_norm": 0.4907680124574417,
"learning_rate": 7.351905219340377e-06,
"loss": 1.4321,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 313
},
{
"epoch": 1.0432972522897586,
"grad_norm": 0.3750039319171418,
"learning_rate": 7.347463070006422e-06,
"loss": 1.4609,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 314
},
{
"epoch": 1.0466278101582014,
"grad_norm": 0.415847010986813,
"learning_rate": 7.342995169082125e-06,
"loss": 1.399,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 315
},
{
"epoch": 1.0499583680266444,
"grad_norm": 0.40484373034787197,
"learning_rate": 7.338501291989663e-06,
"loss": 1.4082,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 316
},
{
"epoch": 1.0532889258950875,
"grad_norm": 0.3556695114896482,
"learning_rate": 7.333981211532231e-06,
"loss": 1.5045,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 317
},
{
"epoch": 1.0566194837635303,
"grad_norm": 0.4217178250762373,
"learning_rate": 7.329434697855749e-06,
"loss": 1.5051,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 318
},
{
"epoch": 1.0599500416319734,
"grad_norm": 0.44541116731095065,
"learning_rate": 7.324861518409905e-06,
"loss": 1.454,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 319
},
{
"epoch": 1.0632805995004164,
"grad_norm": 0.3722519430085194,
"learning_rate": 7.320261437908496e-06,
"loss": 1.4864,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 320
},
{
"epoch": 1.0666111573688593,
"grad_norm": 0.49955941789670055,
"learning_rate": 7.315634218289086e-06,
"loss": 1.4328,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 321
},
{
"epoch": 1.0699417152373023,
"grad_norm": 0.559077472675475,
"learning_rate": 7.310979618671926e-06,
"loss": 1.4387,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 322
},
{
"epoch": 1.0732722731057451,
"grad_norm": 0.38492000673298576,
"learning_rate": 7.306297395318167e-06,
"loss": 1.4173,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 323
},
{
"epoch": 1.0766028309741882,
"grad_norm": 0.46264263086480695,
"learning_rate": 7.301587301587301e-06,
"loss": 1.458,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 324
},
{
"epoch": 1.0799333888426312,
"grad_norm": 0.48393689092527553,
"learning_rate": 7.296849087893865e-06,
"loss": 1.4984,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 325
},
{
"epoch": 1.083263946711074,
"grad_norm": 0.3833552546352091,
"learning_rate": 7.29208250166334e-06,
"loss": 1.4801,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 326
},
{
"epoch": 1.0865945045795171,
"grad_norm": 0.45636346067253053,
"learning_rate": 7.287287287287286e-06,
"loss": 1.4335,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 327
},
{
"epoch": 1.08992506244796,
"grad_norm": 0.5170759134234261,
"learning_rate": 7.282463186077643e-06,
"loss": 1.4619,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 328
},
{
"epoch": 1.093255620316403,
"grad_norm": 0.41413948804668765,
"learning_rate": 7.277609936220207e-06,
"loss": 1.4976,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 329
},
{
"epoch": 1.096586178184846,
"grad_norm": 0.47675818666743885,
"learning_rate": 7.272727272727272e-06,
"loss": 1.5166,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 330
},
{
"epoch": 1.0999167360532889,
"grad_norm": 0.4446284191251516,
"learning_rate": 7.267814927389396e-06,
"loss": 1.3756,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 331
},
{
"epoch": 1.103247293921732,
"grad_norm": 0.3712448236233366,
"learning_rate": 7.262872628726287e-06,
"loss": 1.4177,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 332
},
{
"epoch": 1.1065778517901748,
"grad_norm": 0.354780077610888,
"learning_rate": 7.257900101936799e-06,
"loss": 1.3888,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 333
},
{
"epoch": 1.1099084096586178,
"grad_norm": 0.4287556354375581,
"learning_rate": 7.252897068847988e-06,
"loss": 1.463,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 334
},
{
"epoch": 1.1132389675270609,
"grad_norm": 0.43855718184558823,
"learning_rate": 7.247863247863247e-06,
"loss": 1.4375,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 335
},
{
"epoch": 1.1165695253955037,
"grad_norm": 0.36652449074881177,
"learning_rate": 7.242798353909463e-06,
"loss": 1.4724,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 336
},
{
"epoch": 1.1199000832639467,
"grad_norm": 0.41471476618444547,
"learning_rate": 7.237702098383213e-06,
"loss": 1.4368,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 337
},
{
"epoch": 1.1232306411323896,
"grad_norm": 0.3584246686612814,
"learning_rate": 7.2325741890959285e-06,
"loss": 1.507,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 338
},
{
"epoch": 1.1265611990008326,
"grad_norm": 0.35472951006324893,
"learning_rate": 7.227414330218068e-06,
"loss": 1.3847,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 339
},
{
"epoch": 1.1298917568692757,
"grad_norm": 0.40770232084467445,
"learning_rate": 7.222222222222222e-06,
"loss": 1.4722,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 340
},
{
"epoch": 1.1332223147377185,
"grad_norm": 0.3854760192656062,
"learning_rate": 7.216997561825147e-06,
"loss": 1.4397,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 341
},
{
"epoch": 1.1365528726061616,
"grad_norm": 0.3425435570180868,
"learning_rate": 7.211740041928721e-06,
"loss": 1.3917,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 342
},
{
"epoch": 1.1398834304746046,
"grad_norm": 0.3629363871231361,
"learning_rate": 7.206449351559762e-06,
"loss": 1.4329,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 343
},
{
"epoch": 1.1432139883430474,
"grad_norm": 0.3746351865474382,
"learning_rate": 7.20112517580872e-06,
"loss": 1.4325,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 344
},
{
"epoch": 1.1465445462114905,
"grad_norm": 0.35633065876642767,
"learning_rate": 7.195767195767195e-06,
"loss": 1.4802,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 345
},
{
"epoch": 1.1498751040799333,
"grad_norm": 0.41086591430313346,
"learning_rate": 7.1903750884642605e-06,
"loss": 1.386,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 346
},
{
"epoch": 1.1532056619483764,
"grad_norm": 0.4248601636564269,
"learning_rate": 7.184948526801562e-06,
"loss": 1.3764,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 347
},
{
"epoch": 1.1565362198168194,
"grad_norm": 0.3677689809276377,
"learning_rate": 7.179487179487179e-06,
"loss": 1.3977,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 348
},
{
"epoch": 1.1598667776852623,
"grad_norm": 0.4562607243713519,
"learning_rate": 7.173990710968203e-06,
"loss": 1.4743,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 349
},
{
"epoch": 1.1631973355537053,
"grad_norm": 0.36851546433374166,
"learning_rate": 7.168458781362006e-06,
"loss": 1.3998,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 350
},
{
"epoch": 1.1665278934221481,
"grad_norm": 0.3440172529786023,
"learning_rate": 7.1628910463861915e-06,
"loss": 1.4388,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 351
},
{
"epoch": 1.1698584512905912,
"grad_norm": 0.34899417175359176,
"learning_rate": 7.157287157287158e-06,
"loss": 1.4109,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 352
},
{
"epoch": 1.1731890091590342,
"grad_norm": 0.3369095274891404,
"learning_rate": 7.151646760767281e-06,
"loss": 1.4721,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 353
},
{
"epoch": 1.176519567027477,
"grad_norm": 0.3373083746918916,
"learning_rate": 7.145969498910675e-06,
"loss": 1.3879,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 354
},
{
"epoch": 1.1798501248959201,
"grad_norm": 0.3127699546260214,
"learning_rate": 7.140255009107467e-06,
"loss": 1.4067,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 355
},
{
"epoch": 1.183180682764363,
"grad_norm": 0.3455714541263257,
"learning_rate": 7.1345029239766076e-06,
"loss": 1.4729,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 356
},
{
"epoch": 1.186511240632806,
"grad_norm": 0.35459135197814406,
"learning_rate": 7.128712871287129e-06,
"loss": 1.4845,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 357
},
{
"epoch": 1.189841798501249,
"grad_norm": 0.3382392537839561,
"learning_rate": 7.122884473877851e-06,
"loss": 1.4796,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 358
},
{
"epoch": 1.1931723563696919,
"grad_norm": 0.3229617810865785,
"learning_rate": 7.117017349575488e-06,
"loss": 1.4258,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 359
},
{
"epoch": 1.196502914238135,
"grad_norm": 0.4132878845320615,
"learning_rate": 7.11111111111111e-06,
"loss": 1.4344,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 360
},
{
"epoch": 1.1998334721065778,
"grad_norm": 0.3909252234186588,
"learning_rate": 7.105165366034932e-06,
"loss": 1.4136,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 361
},
{
"epoch": 1.2031640299750208,
"grad_norm": 0.4166542946239009,
"learning_rate": 7.0991797166293805e-06,
"loss": 1.418,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 362
},
{
"epoch": 1.2064945878434639,
"grad_norm": 0.3978265092622875,
"learning_rate": 7.093153759820426e-06,
"loss": 1.4778,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 363
},
{
"epoch": 1.2098251457119067,
"grad_norm": 0.3362476483926624,
"learning_rate": 7.087087087087086e-06,
"loss": 1.4045,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 364
},
{
"epoch": 1.2131557035803497,
"grad_norm": 0.34227618124914144,
"learning_rate": 7.0809792843691135e-06,
"loss": 1.3691,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 365
},
{
"epoch": 1.2164862614487926,
"grad_norm": 0.4154500404546309,
"learning_rate": 7.074829931972789e-06,
"loss": 1.4296,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 366
},
{
"epoch": 1.2198168193172356,
"grad_norm": 0.3910386371341375,
"learning_rate": 7.068638604474782e-06,
"loss": 1.4128,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 367
},
{
"epoch": 1.2231473771856787,
"grad_norm": 0.32973016037230485,
"learning_rate": 7.062404870624048e-06,
"loss": 1.3952,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 368
},
{
"epoch": 1.2264779350541215,
"grad_norm": 0.3476414929125133,
"learning_rate": 7.056128293241695e-06,
"loss": 1.4132,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 369
},
{
"epoch": 1.2298084929225646,
"grad_norm": 0.3490075847160727,
"learning_rate": 7.0498084291187725e-06,
"loss": 1.5034,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 370
},
{
"epoch": 1.2331390507910074,
"grad_norm": 0.4213209017684047,
"learning_rate": 7.043444828911956e-06,
"loss": 1.4407,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 371
},
{
"epoch": 1.2364696086594504,
"grad_norm": 0.41156726116014214,
"learning_rate": 7.037037037037037e-06,
"loss": 1.4922,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 372
},
{
"epoch": 1.2398001665278935,
"grad_norm": 0.3274736563867899,
"learning_rate": 7.0305845915602e-06,
"loss": 1.4443,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 373
},
{
"epoch": 1.2431307243963363,
"grad_norm": 0.4016877039684572,
"learning_rate": 7.024087024087023e-06,
"loss": 1.4765,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 374
},
{
"epoch": 1.2464612822647794,
"grad_norm": 0.37926187648963133,
"learning_rate": 7.017543859649123e-06,
"loss": 1.4944,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 375
},
{
"epoch": 1.2497918401332222,
"grad_norm": 0.3995775555374175,
"learning_rate": 7.0109546165884185e-06,
"loss": 1.4737,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 376
},
{
"epoch": 1.2531223980016652,
"grad_norm": 0.4179330927454956,
"learning_rate": 7.0043188064389475e-06,
"loss": 1.396,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 377
},
{
"epoch": 1.2564529558701083,
"grad_norm": 0.4026676583822718,
"learning_rate": 6.997635933806146e-06,
"loss": 1.5024,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 378
},
{
"epoch": 1.2597835137385511,
"grad_norm": 0.3729935293489866,
"learning_rate": 6.9909054962435735e-06,
"loss": 1.5035,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 379
},
{
"epoch": 1.2631140716069942,
"grad_norm": 0.37785861617292904,
"learning_rate": 6.984126984126983e-06,
"loss": 1.4859,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 380
},
{
"epoch": 1.266444629475437,
"grad_norm": 0.34618072727066834,
"learning_rate": 6.977299880525687e-06,
"loss": 1.3753,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 381
},
{
"epoch": 1.26977518734388,
"grad_norm": 0.3603657688818211,
"learning_rate": 6.970423661071143e-06,
"loss": 1.4396,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 382
},
{
"epoch": 1.2731057452123231,
"grad_norm": 0.31695078316874364,
"learning_rate": 6.963497793822704e-06,
"loss": 1.4512,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 383
},
{
"epoch": 1.2764363030807662,
"grad_norm": 0.3998834526312468,
"learning_rate": 6.956521739130433e-06,
"loss": 1.4068,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 384
},
{
"epoch": 1.279766860949209,
"grad_norm": 0.40218592316674945,
"learning_rate": 6.949494949494949e-06,
"loss": 1.4314,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 385
},
{
"epoch": 1.2830974188176518,
"grad_norm": 0.4377216092057675,
"learning_rate": 6.942416869424169e-06,
"loss": 1.4159,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 386
},
{
"epoch": 1.2864279766860949,
"grad_norm": 0.3806613338175727,
"learning_rate": 6.935286935286935e-06,
"loss": 1.4383,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 387
},
{
"epoch": 1.289758534554538,
"grad_norm": 0.41315217581288083,
"learning_rate": 6.928104575163398e-06,
"loss": 1.4639,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 388
},
{
"epoch": 1.293089092422981,
"grad_norm": 0.4242068360276873,
"learning_rate": 6.920869208692086e-06,
"loss": 1.5043,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 389
},
{
"epoch": 1.2964196502914238,
"grad_norm": 0.40526133848179174,
"learning_rate": 6.913580246913578e-06,
"loss": 1.4969,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 390
},
{
"epoch": 1.2997502081598669,
"grad_norm": 0.4390648977103527,
"learning_rate": 6.9062370921106965e-06,
"loss": 1.4634,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 391
},
{
"epoch": 1.3030807660283097,
"grad_norm": 0.3293053257002483,
"learning_rate": 6.898839137645108e-06,
"loss": 1.4837,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 392
},
{
"epoch": 1.3064113238967527,
"grad_norm": 0.3741205703169676,
"learning_rate": 6.891385767790261e-06,
"loss": 1.3888,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 393
},
{
"epoch": 1.3097418817651958,
"grad_norm": 0.36736277922290345,
"learning_rate": 6.883876357560567e-06,
"loss": 1.4422,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 394
},
{
"epoch": 1.3130724396336386,
"grad_norm": 0.34987451065304387,
"learning_rate": 6.876310272536688e-06,
"loss": 1.4384,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 395
},
{
"epoch": 1.3164029975020817,
"grad_norm": 0.3574591374681954,
"learning_rate": 6.868686868686868e-06,
"loss": 1.4453,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 396
},
{
"epoch": 1.3197335553705245,
"grad_norm": 0.31108139602911883,
"learning_rate": 6.861005492184199e-06,
"loss": 1.4302,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 397
},
{
"epoch": 1.3230641132389676,
"grad_norm": 0.3317920901111113,
"learning_rate": 6.853265479219677e-06,
"loss": 1.4599,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 398
},
{
"epoch": 1.3263946711074106,
"grad_norm": 0.3319586529185681,
"learning_rate": 6.8454661558109825e-06,
"loss": 1.4349,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 399
},
{
"epoch": 1.3297252289758534,
"grad_norm": 0.35385561486286676,
"learning_rate": 6.837606837606837e-06,
"loss": 1.4262,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 400
},
{
"epoch": 1.3330557868442965,
"grad_norm": 0.36123629997437273,
"learning_rate": 6.82968682968683e-06,
"loss": 1.4475,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 401
},
{
"epoch": 1.3363863447127393,
"grad_norm": 0.3678679588945442,
"learning_rate": 6.821705426356589e-06,
"loss": 1.4662,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 402
},
{
"epoch": 1.3397169025811824,
"grad_norm": 0.35623277676543963,
"learning_rate": 6.813661910938175e-06,
"loss": 1.4157,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 403
},
{
"epoch": 1.3430474604496254,
"grad_norm": 0.3670378130601921,
"learning_rate": 6.805555555555554e-06,
"loss": 1.4289,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 404
},
{
"epoch": 1.3463780183180682,
"grad_norm": 0.37365353793241013,
"learning_rate": 6.797385620915031e-06,
"loss": 1.4758,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 405
},
{
"epoch": 1.3497085761865113,
"grad_norm": 0.4227767618895852,
"learning_rate": 6.78915135608049e-06,
"loss": 1.4522,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 406
},
{
"epoch": 1.3530391340549541,
"grad_norm": 0.392419028331304,
"learning_rate": 6.780851998243303e-06,
"loss": 1.4386,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 407
},
{
"epoch": 1.3563696919233972,
"grad_norm": 0.34332622194519336,
"learning_rate": 6.772486772486772e-06,
"loss": 1.4143,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 408
},
{
"epoch": 1.3597002497918402,
"grad_norm": 0.39554534288670906,
"learning_rate": 6.76405489154493e-06,
"loss": 1.4289,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 409
},
{
"epoch": 1.363030807660283,
"grad_norm": 0.3680781980427255,
"learning_rate": 6.7555555555555545e-06,
"loss": 1.4604,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 410
},
{
"epoch": 1.3663613655287261,
"grad_norm": 0.359696703224119,
"learning_rate": 6.7469879518072274e-06,
"loss": 1.4552,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 411
},
{
"epoch": 1.369691923397169,
"grad_norm": 0.43946142988468057,
"learning_rate": 6.738351254480287e-06,
"loss": 1.4649,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 412
},
{
"epoch": 1.373022481265612,
"grad_norm": 0.38158471914984216,
"learning_rate": 6.729644624381466e-06,
"loss": 1.4553,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 413
},
{
"epoch": 1.376353039134055,
"grad_norm": 0.3785264160376015,
"learning_rate": 6.720867208672086e-06,
"loss": 1.457,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 414
},
{
"epoch": 1.3796835970024979,
"grad_norm": 0.40275915279313634,
"learning_rate": 6.712018140589569e-06,
"loss": 1.4665,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 415
},
{
"epoch": 1.383014154870941,
"grad_norm": 0.37268382461278277,
"learning_rate": 6.703096539162113e-06,
"loss": 1.377,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 416
},
{
"epoch": 1.3863447127393838,
"grad_norm": 0.3362832443073036,
"learning_rate": 6.694101508916324e-06,
"loss": 1.4122,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 417
},
{
"epoch": 1.3896752706078268,
"grad_norm": 0.3428291854645596,
"learning_rate": 6.6850321395775945e-06,
"loss": 1.3466,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 418
},
{
"epoch": 1.3930058284762699,
"grad_norm": 0.38976496538071015,
"learning_rate": 6.675887505763023e-06,
"loss": 1.4711,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 419
},
{
"epoch": 1.3963363863447127,
"grad_norm": 0.4034367524201395,
"learning_rate": 6.666666666666666e-06,
"loss": 1.5079,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 420
},
{
"epoch": 1.3996669442131557,
"grad_norm": 0.38251655422807695,
"learning_rate": 6.657368665736867e-06,
"loss": 1.4715,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 421
},
{
"epoch": 1.4029975020815986,
"grad_norm": 0.3636615753904805,
"learning_rate": 6.647992530345471e-06,
"loss": 1.4175,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 422
},
{
"epoch": 1.4063280599500416,
"grad_norm": 0.3543871236347375,
"learning_rate": 6.6385372714486634e-06,
"loss": 1.4008,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 423
},
{
"epoch": 1.4096586178184847,
"grad_norm": 0.4270397824248548,
"learning_rate": 6.6290018832391705e-06,
"loss": 1.4082,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 424
},
{
"epoch": 1.4129891756869275,
"grad_norm": 0.41956585580281563,
"learning_rate": 6.6193853427895966e-06,
"loss": 1.4075,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 425
},
{
"epoch": 1.4163197335553706,
"grad_norm": 0.47761484099497725,
"learning_rate": 6.60968660968661e-06,
"loss": 1.4104,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 426
},
{
"epoch": 1.4196502914238134,
"grad_norm": 0.4078261265233408,
"learning_rate": 6.599904625655699e-06,
"loss": 1.4317,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 427
},
{
"epoch": 1.4229808492922564,
"grad_norm": 0.37691692681004796,
"learning_rate": 6.590038314176245e-06,
"loss": 1.391,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 428
},
{
"epoch": 1.4263114071606995,
"grad_norm": 0.3288508827565593,
"learning_rate": 6.580086580086579e-06,
"loss": 1.401,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 429
},
{
"epoch": 1.4296419650291423,
"grad_norm": 0.338319616372442,
"learning_rate": 6.570048309178745e-06,
"loss": 1.4178,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 430
},
{
"epoch": 1.4329725228975854,
"grad_norm": 0.414862604672987,
"learning_rate": 6.559922367782628e-06,
"loss": 1.4642,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 431
},
{
"epoch": 1.4363030807660282,
"grad_norm": 0.3949017633125201,
"learning_rate": 6.54970760233918e-06,
"loss": 1.3643,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 432
},
{
"epoch": 1.4396336386344712,
"grad_norm": 0.3872663647349424,
"learning_rate": 6.53940283896231e-06,
"loss": 1.3998,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 433
},
{
"epoch": 1.4429641965029143,
"grad_norm": 0.3778182716944692,
"learning_rate": 6.529006882989183e-06,
"loss": 1.421,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 434
},
{
"epoch": 1.4462947543713571,
"grad_norm": 0.3368637084806252,
"learning_rate": 6.518518518518519e-06,
"loss": 1.4562,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 435
},
{
"epoch": 1.4496253122398002,
"grad_norm": 0.37088068849156625,
"learning_rate": 6.507936507936509e-06,
"loss": 1.389,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 436
},
{
"epoch": 1.452955870108243,
"grad_norm": 0.4171977510324979,
"learning_rate": 6.497259591429994e-06,
"loss": 1.4314,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 437
},
{
"epoch": 1.456286427976686,
"grad_norm": 0.36493233792748947,
"learning_rate": 6.486486486486486e-06,
"loss": 1.4239,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 438
},
{
"epoch": 1.4596169858451291,
"grad_norm": 0.36409025362836434,
"learning_rate": 6.475615887380592e-06,
"loss": 1.4011,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 439
},
{
"epoch": 1.462947543713572,
"grad_norm": 0.3432633374051585,
"learning_rate": 6.464646464646463e-06,
"loss": 1.4706,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 440
},
{
"epoch": 1.466278101582015,
"grad_norm": 0.36918146681400343,
"learning_rate": 6.453576864535769e-06,
"loss": 1.4048,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 441
},
{
"epoch": 1.4696086594504578,
"grad_norm": 0.3558974109435063,
"learning_rate": 6.442405708460755e-06,
"loss": 1.4233,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 442
},
{
"epoch": 1.4729392173189009,
"grad_norm": 0.3319935101093491,
"learning_rate": 6.431131592421914e-06,
"loss": 1.4557,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 443
},
{
"epoch": 1.476269775187344,
"grad_norm": 0.3957835276431251,
"learning_rate": 6.419753086419752e-06,
"loss": 1.4974,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 444
},
{
"epoch": 1.479600333055787,
"grad_norm": 0.46743126820019115,
"learning_rate": 6.408268733850127e-06,
"loss": 1.3428,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 445
},
{
"epoch": 1.4829308909242298,
"grad_norm": 0.32072751511352704,
"learning_rate": 6.396677050882658e-06,
"loss": 1.4252,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 446
},
{
"epoch": 1.4862614487926726,
"grad_norm": 0.3691624108782593,
"learning_rate": 6.384976525821596e-06,
"loss": 1.4288,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 447
},
{
"epoch": 1.4895920066611157,
"grad_norm": 0.41832466518878647,
"learning_rate": 6.373165618448636e-06,
"loss": 1.4287,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 448
},
{
"epoch": 1.4929225645295587,
"grad_norm": 0.3728200914294547,
"learning_rate": 6.361242759347024e-06,
"loss": 1.391,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 449
},
{
"epoch": 1.4962531223980018,
"grad_norm": 0.3489172461380398,
"learning_rate": 6.349206349206349e-06,
"loss": 1.4012,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 450
},
{
"epoch": 1.4995836802664446,
"grad_norm": 0.45831242097179337,
"learning_rate": 6.337054758107389e-06,
"loss": 1.4062,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 451
},
{
"epoch": 1.5029142381348874,
"grad_norm": 0.4485083988308969,
"learning_rate": 6.324786324786324e-06,
"loss": 1.4077,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 452
},
{
"epoch": 1.5062447960033305,
"grad_norm": 0.3469124587165823,
"learning_rate": 6.312399355877616e-06,
"loss": 1.3635,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 453
},
{
"epoch": 1.5095753538717736,
"grad_norm": 0.4359834643536742,
"learning_rate": 6.299892125134842e-06,
"loss": 1.3951,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 454
},
{
"epoch": 1.5129059117402166,
"grad_norm": 0.43347338145656295,
"learning_rate": 6.287262872628726e-06,
"loss": 1.438,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 455
},
{
"epoch": 1.5162364696086594,
"grad_norm": 0.3544519721589859,
"learning_rate": 6.274509803921569e-06,
"loss": 1.4028,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 456
},
{
"epoch": 1.5195670274771023,
"grad_norm": 0.4175623558211923,
"learning_rate": 6.261631089217296e-06,
"loss": 1.4649,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 457
},
{
"epoch": 1.5228975853455453,
"grad_norm": 0.47794327593006264,
"learning_rate": 6.248624862486248e-06,
"loss": 1.4552,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 458
},
{
"epoch": 1.5262281432139884,
"grad_norm": 0.5102221497723193,
"learning_rate": 6.235489220563847e-06,
"loss": 1.5577,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 459
},
{
"epoch": 1.5295587010824314,
"grad_norm": 0.361727454686882,
"learning_rate": 6.2222222222222215e-06,
"loss": 1.4977,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 460
},
{
"epoch": 1.5328892589508742,
"grad_norm": 0.43568797487755334,
"learning_rate": 6.208821887213847e-06,
"loss": 1.4417,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 461
},
{
"epoch": 1.536219816819317,
"grad_norm": 0.39795557103291623,
"learning_rate": 6.195286195286195e-06,
"loss": 1.4479,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 462
},
{
"epoch": 1.5395503746877601,
"grad_norm": 0.3699426752838303,
"learning_rate": 6.181613085166384e-06,
"loss": 1.4379,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 463
},
{
"epoch": 1.5428809325562032,
"grad_norm": 0.5138765482501748,
"learning_rate": 6.167800453514738e-06,
"loss": 1.4433,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 464
},
{
"epoch": 1.5462114904246462,
"grad_norm": 0.5597671339637968,
"learning_rate": 6.153846153846153e-06,
"loss": 1.4255,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 465
},
{
"epoch": 1.549542048293089,
"grad_norm": 0.4443208189107028,
"learning_rate": 6.1397479954180976e-06,
"loss": 1.4458,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 466
},
{
"epoch": 1.552872606161532,
"grad_norm": 0.41782304334586917,
"learning_rate": 6.125503742084053e-06,
"loss": 1.4362,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 467
},
{
"epoch": 1.556203164029975,
"grad_norm": 0.511701451750574,
"learning_rate": 6.11111111111111e-06,
"loss": 1.4378,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 468
},
{
"epoch": 1.559533721898418,
"grad_norm": 0.4272528437058103,
"learning_rate": 6.096567771960442e-06,
"loss": 1.4315,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 469
},
{
"epoch": 1.562864279766861,
"grad_norm": 0.42099653002903337,
"learning_rate": 6.0818713450292395e-06,
"loss": 1.4092,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 470
},
{
"epoch": 1.5661948376353039,
"grad_norm": 0.4635591149261861,
"learning_rate": 6.067019400352732e-06,
"loss": 1.4357,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 471
},
{
"epoch": 1.569525395503747,
"grad_norm": 0.5318262046494987,
"learning_rate": 6.052009456264775e-06,
"loss": 1.4753,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 472
},
{
"epoch": 1.5728559533721898,
"grad_norm": 0.4098578230232083,
"learning_rate": 6.036838978015449e-06,
"loss": 1.4192,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 473
},
{
"epoch": 1.5761865112406328,
"grad_norm": 0.4563174114919455,
"learning_rate": 6.021505376344085e-06,
"loss": 1.4676,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 474
},
{
"epoch": 1.5795170691090759,
"grad_norm": 0.5270544922424331,
"learning_rate": 6.006006006006005e-06,
"loss": 1.4267,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 475
},
{
"epoch": 1.5828476269775187,
"grad_norm": 0.3910787909582668,
"learning_rate": 5.990338164251208e-06,
"loss": 1.3766,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 476
},
{
"epoch": 1.5861781848459617,
"grad_norm": 0.4736515430850208,
"learning_rate": 5.974499089253187e-06,
"loss": 1.4437,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 477
},
{
"epoch": 1.5895087427144046,
"grad_norm": 0.5430796464569592,
"learning_rate": 5.958485958485957e-06,
"loss": 1.4482,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 478
},
{
"epoch": 1.5928393005828476,
"grad_norm": 0.38226206389298173,
"learning_rate": 5.942295887047268e-06,
"loss": 1.412,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 479
},
{
"epoch": 1.5961698584512907,
"grad_norm": 0.3721223079028304,
"learning_rate": 5.925925925925925e-06,
"loss": 1.45,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 480
},
{
"epoch": 1.5995004163197337,
"grad_norm": 0.3827064109331823,
"learning_rate": 5.909373060211049e-06,
"loss": 1.4217,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 481
},
{
"epoch": 1.6028309741881765,
"grad_norm": 0.33684324932641296,
"learning_rate": 5.892634207240949e-06,
"loss": 1.3557,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 482
},
{
"epoch": 1.6061615320566194,
"grad_norm": 0.31468847211788964,
"learning_rate": 5.875706214689265e-06,
"loss": 1.4122,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 483
},
{
"epoch": 1.6094920899250624,
"grad_norm": 0.4442799216781044,
"learning_rate": 5.858585858585859e-06,
"loss": 1.4285,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 484
},
{
"epoch": 1.6128226477935055,
"grad_norm": 0.4567121702156198,
"learning_rate": 5.841269841269841e-06,
"loss": 1.4764,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 485
},
{
"epoch": 1.6161532056619485,
"grad_norm": 0.3590206566567271,
"learning_rate": 5.82375478927203e-06,
"loss": 1.4229,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 486
},
{
"epoch": 1.6194837635303914,
"grad_norm": 0.3652198930331244,
"learning_rate": 5.806037251123956e-06,
"loss": 1.4151,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 487
},
{
"epoch": 1.6228143213988342,
"grad_norm": 0.35866861963268476,
"learning_rate": 5.7881136950904385e-06,
"loss": 1.3369,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 488
},
{
"epoch": 1.6261448792672772,
"grad_norm": 0.4750936045573692,
"learning_rate": 5.7699805068226105e-06,
"loss": 1.4715,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 489
},
{
"epoch": 1.6294754371357203,
"grad_norm": 0.3613198830707804,
"learning_rate": 5.7516339869281045e-06,
"loss": 1.4291,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 490
},
{
"epoch": 1.6328059950041633,
"grad_norm": 0.43606379412430957,
"learning_rate": 5.733070348454964e-06,
"loss": 1.4011,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 491
},
{
"epoch": 1.6361365528726062,
"grad_norm": 0.35042984426925494,
"learning_rate": 5.7142857142857145e-06,
"loss": 1.4368,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 492
},
{
"epoch": 1.639467110741049,
"grad_norm": 0.31661366243629,
"learning_rate": 5.695276114437791e-06,
"loss": 1.4541,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 493
},
{
"epoch": 1.642797668609492,
"grad_norm": 0.3561358967067642,
"learning_rate": 5.676037483266399e-06,
"loss": 1.433,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 494
},
{
"epoch": 1.646128226477935,
"grad_norm": 0.3931637346563919,
"learning_rate": 5.656565656565656e-06,
"loss": 1.4193,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 495
},
{
"epoch": 1.6494587843463782,
"grad_norm": 0.48631366553960975,
"learning_rate": 5.6368563685636855e-06,
"loss": 1.4012,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 496
},
{
"epoch": 1.652789342214821,
"grad_norm": 0.41348933242163105,
"learning_rate": 5.616905248807089e-06,
"loss": 1.3883,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 497
},
{
"epoch": 1.6561199000832638,
"grad_norm": 0.3541766139316355,
"learning_rate": 5.59670781893004e-06,
"loss": 1.363,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 498
},
{
"epoch": 1.6594504579517069,
"grad_norm": 0.410383164470969,
"learning_rate": 5.576259489302967e-06,
"loss": 1.3955,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 499
},
{
"epoch": 1.66278101582015,
"grad_norm": 0.4100549908496841,
"learning_rate": 5.555555555555555e-06,
"loss": 1.3663,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 500
},
{
"epoch": 1.666111573688593,
"grad_norm": 0.4122832272958553,
"learning_rate": 5.534591194968553e-06,
"loss": 1.5108,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 501
},
{
"epoch": 1.6694421315570358,
"grad_norm": 0.33209617039282874,
"learning_rate": 5.513361462728551e-06,
"loss": 1.4069,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 502
},
{
"epoch": 1.6727726894254786,
"grad_norm": 0.34650064809899755,
"learning_rate": 5.491861288039631e-06,
"loss": 1.3953,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 503
},
{
"epoch": 1.6761032472939217,
"grad_norm": 0.3583592015376779,
"learning_rate": 5.47008547008547e-06,
"loss": 1.4181,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 504
},
{
"epoch": 1.6794338051623647,
"grad_norm": 0.34343414571245584,
"learning_rate": 5.448028673835125e-06,
"loss": 1.418,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 505
},
{
"epoch": 1.6827643630308078,
"grad_norm": 0.35638669107128673,
"learning_rate": 5.425685425685425e-06,
"loss": 1.4052,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 506
},
{
"epoch": 1.6860949208992506,
"grad_norm": 0.3467424021658532,
"learning_rate": 5.403050108932461e-06,
"loss": 1.4581,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 507
},
{
"epoch": 1.6894254787676934,
"grad_norm": 0.32381127071831955,
"learning_rate": 5.3801169590643285e-06,
"loss": 1.459,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 508
},
{
"epoch": 1.6927560366361365,
"grad_norm": 0.3811936086039866,
"learning_rate": 5.356880058866813e-06,
"loss": 1.5033,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 509
},
{
"epoch": 1.6960865945045795,
"grad_norm": 0.3612050754686712,
"learning_rate": 5.333333333333333e-06,
"loss": 1.4137,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 510
},
{
"epoch": 1.6994171523730226,
"grad_norm": 0.35765265665477713,
"learning_rate": 5.309470544369873e-06,
"loss": 1.4087,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 511
},
{
"epoch": 1.7027477102414654,
"grad_norm": 0.3357163323849947,
"learning_rate": 5.285285285285285e-06,
"loss": 1.4851,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 512
},
{
"epoch": 1.7060782681099083,
"grad_norm": 0.3449646759899252,
"learning_rate": 5.260770975056689e-06,
"loss": 1.442,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 513
},
{
"epoch": 1.7094088259783513,
"grad_norm": 0.3335919341097906,
"learning_rate": 5.235920852359208e-06,
"loss": 1.454,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 514
},
{
"epoch": 1.7127393838467944,
"grad_norm": 0.3414007515866483,
"learning_rate": 5.210727969348659e-06,
"loss": 1.4762,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 515
},
{
"epoch": 1.7160699417152374,
"grad_norm": 0.37174665041283544,
"learning_rate": 5.185185185185185e-06,
"loss": 1.4615,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 516
},
{
"epoch": 1.7194004995836802,
"grad_norm": 0.37265087217053033,
"learning_rate": 5.159285159285159e-06,
"loss": 1.4072,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 517
},
{
"epoch": 1.722731057452123,
"grad_norm": 0.3445160578098801,
"learning_rate": 5.1330203442879505e-06,
"loss": 1.4337,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 518
},
{
"epoch": 1.7260616153205661,
"grad_norm": 0.3675807887019101,
"learning_rate": 5.106382978723403e-06,
"loss": 1.4147,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 519
},
{
"epoch": 1.7293921731890092,
"grad_norm": 0.34584285856367675,
"learning_rate": 5.079365079365079e-06,
"loss": 1.4193,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 520
},
{
"epoch": 1.7327227310574522,
"grad_norm": 0.3685778739128953,
"learning_rate": 5.051958433253396e-06,
"loss": 1.4466,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 521
},
{
"epoch": 1.736053288925895,
"grad_norm": 0.35632916296360506,
"learning_rate": 5.02415458937198e-06,
"loss": 1.4299,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 522
},
{
"epoch": 1.739383846794338,
"grad_norm": 0.3941128956001842,
"learning_rate": 4.995944849959448e-06,
"loss": 1.4264,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 523
},
{
"epoch": 1.742714404662781,
"grad_norm": 0.3481786883352737,
"learning_rate": 4.967320261437908e-06,
"loss": 1.4279,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 524
},
{
"epoch": 1.746044962531224,
"grad_norm": 0.3627527951339854,
"learning_rate": 4.938271604938271e-06,
"loss": 1.5152,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 525
},
{
"epoch": 1.749375520399667,
"grad_norm": 0.34527513358988937,
"learning_rate": 4.9087893864013265e-06,
"loss": 1.4088,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 526
},
{
"epoch": 1.7527060782681099,
"grad_norm": 0.368823414133051,
"learning_rate": 4.878863826232247e-06,
"loss": 1.3944,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 527
},
{
"epoch": 1.756036636136553,
"grad_norm": 0.3471938836863914,
"learning_rate": 4.848484848484849e-06,
"loss": 1.3809,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 528
},
{
"epoch": 1.7593671940049957,
"grad_norm": 0.39760839658681035,
"learning_rate": 4.817642069550467e-06,
"loss": 1.4081,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 529
},
{
"epoch": 1.7626977518734388,
"grad_norm": 0.35630999152948084,
"learning_rate": 4.786324786324786e-06,
"loss": 1.4049,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 530
},
{
"epoch": 1.7660283097418819,
"grad_norm": 0.3123127862091999,
"learning_rate": 4.754521963824289e-06,
"loss": 1.4033,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 531
},
{
"epoch": 1.7693588676103247,
"grad_norm": 0.3565716669933871,
"learning_rate": 4.722222222222222e-06,
"loss": 1.4548,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 532
},
{
"epoch": 1.7726894254787677,
"grad_norm": 0.3415824605451111,
"learning_rate": 4.68941382327209e-06,
"loss": 1.3379,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 533
},
{
"epoch": 1.7760199833472106,
"grad_norm": 0.37445157627374487,
"learning_rate": 4.6560846560846555e-06,
"loss": 1.5023,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 534
},
{
"epoch": 1.7793505412156536,
"grad_norm": 0.4140970552339397,
"learning_rate": 4.622222222222222e-06,
"loss": 1.4982,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 535
},
{
"epoch": 1.7826810990840967,
"grad_norm": 0.3696216853055909,
"learning_rate": 4.587813620071684e-06,
"loss": 1.3795,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 536
},
{
"epoch": 1.7860116569525397,
"grad_norm": 0.3374796769034963,
"learning_rate": 4.552845528455284e-06,
"loss": 1.4356,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 537
},
{
"epoch": 1.7893422148209825,
"grad_norm": 0.4227610049072286,
"learning_rate": 4.517304189435337e-06,
"loss": 1.4625,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 538
},
{
"epoch": 1.7926727726894254,
"grad_norm": 0.36612259553982557,
"learning_rate": 4.4811753902663e-06,
"loss": 1.4274,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 539
},
{
"epoch": 1.7960033305578684,
"grad_norm": 0.4222638209328834,
"learning_rate": 4.444444444444443e-06,
"loss": 1.5129,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 540
},
{
"epoch": 1.7993338884263115,
"grad_norm": 0.41009576553628174,
"learning_rate": 4.407096171802053e-06,
"loss": 1.4873,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 541
},
{
"epoch": 1.8026644462947545,
"grad_norm": 0.35086922544434007,
"learning_rate": 4.369114877589454e-06,
"loss": 1.3718,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 542
},
{
"epoch": 1.8059950041631974,
"grad_norm": 0.35855015526438827,
"learning_rate": 4.33048433048433e-06,
"loss": 1.4031,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 543
},
{
"epoch": 1.8093255620316402,
"grad_norm": 0.42477533100459036,
"learning_rate": 4.291187739463601e-06,
"loss": 1.4473,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 544
},
{
"epoch": 1.8126561199000832,
"grad_norm": 0.39791782472493653,
"learning_rate": 4.251207729468599e-06,
"loss": 1.4374,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 545
},
{
"epoch": 1.8159866777685263,
"grad_norm": 0.3444343384513091,
"learning_rate": 4.210526315789473e-06,
"loss": 1.4048,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 546
},
{
"epoch": 1.8193172356369693,
"grad_norm": 0.3453119165966736,
"learning_rate": 4.169124877089478e-06,
"loss": 1.3581,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 547
},
{
"epoch": 1.8226477935054122,
"grad_norm": 0.38186414289634574,
"learning_rate": 4.126984126984126e-06,
"loss": 1.4774,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 548
},
{
"epoch": 1.825978351373855,
"grad_norm": 0.3371300332212375,
"learning_rate": 4.084084084084084e-06,
"loss": 1.3565,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 549
},
{
"epoch": 1.829308909242298,
"grad_norm": 0.32042065002080106,
"learning_rate": 4.0404040404040395e-06,
"loss": 1.3807,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 550
},
{
"epoch": 1.832639467110741,
"grad_norm": 0.3776475075214216,
"learning_rate": 3.995922528032619e-06,
"loss": 1.4305,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 551
},
{
"epoch": 1.8359700249791842,
"grad_norm": 0.3351717661136717,
"learning_rate": 3.9506172839506175e-06,
"loss": 1.4133,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 552
},
{
"epoch": 1.839300582847627,
"grad_norm": 0.37528610178789024,
"learning_rate": 3.904465212876428e-06,
"loss": 1.4994,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 553
},
{
"epoch": 1.8426311407160698,
"grad_norm": 0.38936785329254486,
"learning_rate": 3.857442348008385e-06,
"loss": 1.393,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 554
},
{
"epoch": 1.8459616985845129,
"grad_norm": 0.40525496168183883,
"learning_rate": 3.8095238095238094e-06,
"loss": 1.4019,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 555
},
{
"epoch": 1.849292256452956,
"grad_norm": 0.4169994094961459,
"learning_rate": 3.7606837606837604e-06,
"loss": 1.4208,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 556
},
{
"epoch": 1.852622814321399,
"grad_norm": 0.4093560262894869,
"learning_rate": 3.710895361380798e-06,
"loss": 1.44,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 557
},
{
"epoch": 1.8559533721898418,
"grad_norm": 0.35662577286334196,
"learning_rate": 3.660130718954248e-06,
"loss": 1.4168,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 558
},
{
"epoch": 1.8592839300582846,
"grad_norm": 0.3469062029498766,
"learning_rate": 3.6083608360836084e-06,
"loss": 1.4109,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 559
},
{
"epoch": 1.8626144879267277,
"grad_norm": 0.35913894186601036,
"learning_rate": 3.5555555555555546e-06,
"loss": 1.3026,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 560
},
{
"epoch": 1.8659450457951707,
"grad_norm": 0.3601783041537011,
"learning_rate": 3.501683501683501e-06,
"loss": 1.4429,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 561
},
{
"epoch": 1.8692756036636138,
"grad_norm": 0.4301246312907219,
"learning_rate": 3.4467120181405894e-06,
"loss": 1.4415,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 562
},
{
"epoch": 1.8726061615320566,
"grad_norm": 0.44543619950365937,
"learning_rate": 3.390607101947308e-06,
"loss": 1.4354,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 563
},
{
"epoch": 1.8759367194004994,
"grad_norm": 0.4110006980910609,
"learning_rate": 3.333333333333333e-06,
"loss": 1.5156,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 564
},
{
"epoch": 1.8792672772689425,
"grad_norm": 0.36681035057341954,
"learning_rate": 3.27485380116959e-06,
"loss": 1.3926,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 565
},
{
"epoch": 1.8825978351373855,
"grad_norm": 0.3639552416710322,
"learning_rate": 3.215130023640661e-06,
"loss": 1.4537,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 566
},
{
"epoch": 1.8859283930058286,
"grad_norm": 0.3809201109198225,
"learning_rate": 3.154121863799283e-06,
"loss": 1.4344,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 567
},
{
"epoch": 1.8892589508742714,
"grad_norm": 0.5067748995958425,
"learning_rate": 3.0917874396135263e-06,
"loss": 1.444,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 568
},
{
"epoch": 1.8925895087427143,
"grad_norm": 0.39339545860925257,
"learning_rate": 3.028083028083028e-06,
"loss": 1.4368,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 569
},
{
"epoch": 1.8959200666111573,
"grad_norm": 0.3645143242760266,
"learning_rate": 2.9629629629629625e-06,
"loss": 1.4189,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 570
},
{
"epoch": 1.8992506244796004,
"grad_norm": 0.41301404150023885,
"learning_rate": 2.8963795255930087e-06,
"loss": 1.4513,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 571
},
{
"epoch": 1.9025811823480434,
"grad_norm": 0.35445322756534786,
"learning_rate": 2.828282828282828e-06,
"loss": 1.4212,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 572
},
{
"epoch": 1.9059117402164862,
"grad_norm": 0.31609898679838344,
"learning_rate": 2.758620689655172e-06,
"loss": 1.4282,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 573
},
{
"epoch": 1.909242298084929,
"grad_norm": 0.38641815454972966,
"learning_rate": 2.6873385012919895e-06,
"loss": 1.5401,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 574
},
{
"epoch": 1.9125728559533721,
"grad_norm": 0.38729985084754753,
"learning_rate": 2.6143790849673204e-06,
"loss": 1.461,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 575
},
{
"epoch": 1.9159034138218152,
"grad_norm": 0.4550081298663739,
"learning_rate": 2.5396825396825395e-06,
"loss": 1.4602,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 576
},
{
"epoch": 1.9192339716902582,
"grad_norm": 0.3605173725442084,
"learning_rate": 2.4631860776439087e-06,
"loss": 1.4104,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 577
},
{
"epoch": 1.922564529558701,
"grad_norm": 0.38548981376382463,
"learning_rate": 2.384823848238482e-06,
"loss": 1.4465,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 578
},
{
"epoch": 1.9258950874271439,
"grad_norm": 0.39748551935246357,
"learning_rate": 2.304526748971193e-06,
"loss": 1.443,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 579
},
{
"epoch": 1.929225645295587,
"grad_norm": 0.3638886379639791,
"learning_rate": 2.222222222222222e-06,
"loss": 1.4129,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 580
},
{
"epoch": 1.93255620316403,
"grad_norm": 0.36953734209449074,
"learning_rate": 2.1378340365682133e-06,
"loss": 1.3176,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 581
},
{
"epoch": 1.935886761032473,
"grad_norm": 0.3267944344034355,
"learning_rate": 2.051282051282051e-06,
"loss": 1.4588,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 582
},
{
"epoch": 1.9392173189009159,
"grad_norm": 0.3915434082543582,
"learning_rate": 1.962481962481962e-06,
"loss": 1.4441,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 583
},
{
"epoch": 1.942547876769359,
"grad_norm": 0.3556155258308632,
"learning_rate": 1.871345029239766e-06,
"loss": 1.3898,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 584
},
{
"epoch": 1.9458784346378017,
"grad_norm": 0.35583427100431714,
"learning_rate": 1.7777777777777775e-06,
"loss": 1.4117,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 585
},
{
"epoch": 1.9492089925062448,
"grad_norm": 0.3312617219719275,
"learning_rate": 1.6816816816816814e-06,
"loss": 1.4243,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 586
},
{
"epoch": 1.9525395503746878,
"grad_norm": 0.3171322439070156,
"learning_rate": 1.582952815829528e-06,
"loss": 1.3974,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 587
},
{
"epoch": 1.9558701082431307,
"grad_norm": 0.2931034713127486,
"learning_rate": 1.4814814814814812e-06,
"loss": 1.4232,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 588
},
{
"epoch": 1.9592006661115737,
"grad_norm": 0.31803832338980526,
"learning_rate": 1.3771517996870107e-06,
"loss": 1.4475,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 589
},
{
"epoch": 1.9625312239800166,
"grad_norm": 0.3102745879861819,
"learning_rate": 1.2698412698412697e-06,
"loss": 1.4991,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 590
},
{
"epoch": 1.9658617818484596,
"grad_norm": 0.35190966791382605,
"learning_rate": 1.1594202898550724e-06,
"loss": 1.4806,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 591
},
{
"epoch": 1.9691923397169027,
"grad_norm": 0.3133274529689738,
"learning_rate": 1.045751633986928e-06,
"loss": 1.347,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 592
},
{
"epoch": 1.9725228975853455,
"grad_norm": 0.30605048339614954,
"learning_rate": 9.286898839137644e-07,
"loss": 1.3999,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 593
},
{
"epoch": 1.9758534554537885,
"grad_norm": 0.3151090112991302,
"learning_rate": 8.08080808080808e-07,
"loss": 1.4339,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 594
},
{
"epoch": 1.9791840133222314,
"grad_norm": 0.35650599063479166,
"learning_rate": 6.837606837606837e-07,
"loss": 1.4009,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 595
},
{
"epoch": 1.9825145711906744,
"grad_norm": 0.3384958491564326,
"learning_rate": 5.555555555555555e-07,
"loss": 1.4611,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 596
},
{
"epoch": 1.9858451290591175,
"grad_norm": 0.3335636198476521,
"learning_rate": 4.2328042328042324e-07,
"loss": 1.3728,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 597
},
{
"epoch": 1.9891756869275605,
"grad_norm": 0.32277296814250667,
"learning_rate": 2.8673835125448024e-07,
"loss": 1.387,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 598
},
{
"epoch": 1.9925062447960034,
"grad_norm": 0.3467254801927619,
"learning_rate": 1.4571948998178507e-07,
"loss": 1.4114,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 599
},
{
"epoch": 1.9958368026644462,
"grad_norm": 0.3207639144479259,
"learning_rate": 0,
"loss": 1.3956,
"memory/device_mem_reserved(gib)": 59.75,
"memory/max_mem_active(gib)": 57.09,
"memory/max_mem_allocated(gib)": 56.77,
"step": 600
}
],
"logging_steps": 1,
"max_steps": 600,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 150,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.439031159441326e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}