| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 4017, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0007472445357743321, | |
| "grad_norm": 7.656607564918146, | |
| "learning_rate": 5.999999770634896e-06, | |
| "loss": 0.880859375, | |
| "memory(GiB)": 4.97, | |
| "step": 1, | |
| "train_speed(iter/s)": 0.036844 | |
| }, | |
| { | |
| "epoch": 0.0037362226788716607, | |
| "grad_norm": 1.3240812663129038, | |
| "learning_rate": 5.999994265874156e-06, | |
| "loss": 0.6390380859375, | |
| "memory(GiB)": 5.26, | |
| "step": 5, | |
| "train_speed(iter/s)": 0.138065 | |
| }, | |
| { | |
| "epoch": 0.007472445357743321, | |
| "grad_norm": 0.7284755993919946, | |
| "learning_rate": 5.999977063518543e-06, | |
| "loss": 0.5041015625, | |
| "memory(GiB)": 5.26, | |
| "step": 10, | |
| "train_speed(iter/s)": 0.213363 | |
| }, | |
| { | |
| "epoch": 0.011208668036614982, | |
| "grad_norm": 0.6580943903426335, | |
| "learning_rate": 5.999948392998923e-06, | |
| "loss": 0.40029296875, | |
| "memory(GiB)": 5.26, | |
| "step": 15, | |
| "train_speed(iter/s)": 0.261004 | |
| }, | |
| { | |
| "epoch": 0.014944890715486643, | |
| "grad_norm": 0.7857583426949194, | |
| "learning_rate": 5.999908254424895e-06, | |
| "loss": 0.33583984375, | |
| "memory(GiB)": 6.59, | |
| "step": 20, | |
| "train_speed(iter/s)": 0.293459 | |
| }, | |
| { | |
| "epoch": 0.018681113394358302, | |
| "grad_norm": 0.6568799760311678, | |
| "learning_rate": 5.999856647949899e-06, | |
| "loss": 0.29228515625, | |
| "memory(GiB)": 6.59, | |
| "step": 25, | |
| "train_speed(iter/s)": 0.318359 | |
| }, | |
| { | |
| "epoch": 0.022417336073229963, | |
| "grad_norm": 0.6029155858817358, | |
| "learning_rate": 5.999793573771213e-06, | |
| "loss": 0.316943359375, | |
| "memory(GiB)": 6.59, | |
| "step": 30, | |
| "train_speed(iter/s)": 0.337313 | |
| }, | |
| { | |
| "epoch": 0.026153558752101624, | |
| "grad_norm": 0.5753306542956114, | |
| "learning_rate": 5.999719032129956e-06, | |
| "loss": 0.2943359375, | |
| "memory(GiB)": 6.59, | |
| "step": 35, | |
| "train_speed(iter/s)": 0.348738 | |
| }, | |
| { | |
| "epoch": 0.029889781430973286, | |
| "grad_norm": 0.4408689781290305, | |
| "learning_rate": 5.999633023311079e-06, | |
| "loss": 0.2607421875, | |
| "memory(GiB)": 6.59, | |
| "step": 40, | |
| "train_speed(iter/s)": 0.361231 | |
| }, | |
| { | |
| "epoch": 0.03362600410984495, | |
| "grad_norm": 0.48969938792644213, | |
| "learning_rate": 5.999535547643375e-06, | |
| "loss": 0.274072265625, | |
| "memory(GiB)": 6.59, | |
| "step": 45, | |
| "train_speed(iter/s)": 0.371707 | |
| }, | |
| { | |
| "epoch": 0.037362226788716604, | |
| "grad_norm": 0.44261950794528854, | |
| "learning_rate": 5.999426605499469e-06, | |
| "loss": 0.26669921875, | |
| "memory(GiB)": 6.59, | |
| "step": 50, | |
| "train_speed(iter/s)": 0.379995 | |
| }, | |
| { | |
| "epoch": 0.04109844946758827, | |
| "grad_norm": 0.598637540279772, | |
| "learning_rate": 5.999306197295818e-06, | |
| "loss": 0.231982421875, | |
| "memory(GiB)": 6.59, | |
| "step": 55, | |
| "train_speed(iter/s)": 0.387849 | |
| }, | |
| { | |
| "epoch": 0.04483467214645993, | |
| "grad_norm": 0.590172951597349, | |
| "learning_rate": 5.999174323492712e-06, | |
| "loss": 0.2291015625, | |
| "memory(GiB)": 6.59, | |
| "step": 60, | |
| "train_speed(iter/s)": 0.394021 | |
| }, | |
| { | |
| "epoch": 0.04857089482533159, | |
| "grad_norm": 0.3946331437032985, | |
| "learning_rate": 5.999030984594274e-06, | |
| "loss": 0.2292236328125, | |
| "memory(GiB)": 7.12, | |
| "step": 65, | |
| "train_speed(iter/s)": 0.398312 | |
| }, | |
| { | |
| "epoch": 0.05230711750420325, | |
| "grad_norm": 0.47975474179770955, | |
| "learning_rate": 5.998876181148451e-06, | |
| "loss": 0.244677734375, | |
| "memory(GiB)": 7.12, | |
| "step": 70, | |
| "train_speed(iter/s)": 0.403159 | |
| }, | |
| { | |
| "epoch": 0.05604334018307491, | |
| "grad_norm": 0.42828328321417347, | |
| "learning_rate": 5.99870991374702e-06, | |
| "loss": 0.241357421875, | |
| "memory(GiB)": 7.12, | |
| "step": 75, | |
| "train_speed(iter/s)": 0.40778 | |
| }, | |
| { | |
| "epoch": 0.05977956286194657, | |
| "grad_norm": 0.42785803136464096, | |
| "learning_rate": 5.9985321830255785e-06, | |
| "loss": 0.19462890625, | |
| "memory(GiB)": 7.12, | |
| "step": 80, | |
| "train_speed(iter/s)": 0.411773 | |
| }, | |
| { | |
| "epoch": 0.06351578554081823, | |
| "grad_norm": 0.508984996055907, | |
| "learning_rate": 5.998342989663546e-06, | |
| "loss": 0.2152587890625, | |
| "memory(GiB)": 7.12, | |
| "step": 85, | |
| "train_speed(iter/s)": 0.415122 | |
| }, | |
| { | |
| "epoch": 0.0672520082196899, | |
| "grad_norm": 0.40314104128835676, | |
| "learning_rate": 5.998142334384162e-06, | |
| "loss": 0.2130859375, | |
| "memory(GiB)": 7.12, | |
| "step": 90, | |
| "train_speed(iter/s)": 0.417064 | |
| }, | |
| { | |
| "epoch": 0.07098823089856156, | |
| "grad_norm": 0.4958145558390914, | |
| "learning_rate": 5.997930217954482e-06, | |
| "loss": 0.20390625, | |
| "memory(GiB)": 7.12, | |
| "step": 95, | |
| "train_speed(iter/s)": 0.419957 | |
| }, | |
| { | |
| "epoch": 0.07472445357743321, | |
| "grad_norm": 0.41222740097614996, | |
| "learning_rate": 5.997706641185376e-06, | |
| "loss": 0.2318359375, | |
| "memory(GiB)": 7.68, | |
| "step": 100, | |
| "train_speed(iter/s)": 0.422692 | |
| }, | |
| { | |
| "epoch": 0.07846067625630487, | |
| "grad_norm": 0.3568824010450547, | |
| "learning_rate": 5.997471604931518e-06, | |
| "loss": 0.21181640625, | |
| "memory(GiB)": 7.68, | |
| "step": 105, | |
| "train_speed(iter/s)": 0.425586 | |
| }, | |
| { | |
| "epoch": 0.08219689893517654, | |
| "grad_norm": 0.5279562949874639, | |
| "learning_rate": 5.997225110091396e-06, | |
| "loss": 0.2095947265625, | |
| "memory(GiB)": 7.68, | |
| "step": 110, | |
| "train_speed(iter/s)": 0.428419 | |
| }, | |
| { | |
| "epoch": 0.0859331216140482, | |
| "grad_norm": 0.4919839298671231, | |
| "learning_rate": 5.996967157607298e-06, | |
| "loss": 0.187939453125, | |
| "memory(GiB)": 7.68, | |
| "step": 115, | |
| "train_speed(iter/s)": 0.430818 | |
| }, | |
| { | |
| "epoch": 0.08966934429291985, | |
| "grad_norm": 0.3706866470661083, | |
| "learning_rate": 5.99669774846531e-06, | |
| "loss": 0.2244140625, | |
| "memory(GiB)": 7.68, | |
| "step": 120, | |
| "train_speed(iter/s)": 0.432015 | |
| }, | |
| { | |
| "epoch": 0.09340556697179152, | |
| "grad_norm": 0.39636987044245997, | |
| "learning_rate": 5.9964168836953194e-06, | |
| "loss": 0.206689453125, | |
| "memory(GiB)": 7.68, | |
| "step": 125, | |
| "train_speed(iter/s)": 0.434132 | |
| }, | |
| { | |
| "epoch": 0.09714178965066318, | |
| "grad_norm": 0.4441200958244795, | |
| "learning_rate": 5.996124564371e-06, | |
| "loss": 0.17958984375, | |
| "memory(GiB)": 7.68, | |
| "step": 130, | |
| "train_speed(iter/s)": 0.435878 | |
| }, | |
| { | |
| "epoch": 0.10087801232953485, | |
| "grad_norm": 0.5703220339704642, | |
| "learning_rate": 5.995820791609815e-06, | |
| "loss": 0.1775390625, | |
| "memory(GiB)": 7.68, | |
| "step": 135, | |
| "train_speed(iter/s)": 0.437848 | |
| }, | |
| { | |
| "epoch": 0.1046142350084065, | |
| "grad_norm": 0.4384590937574754, | |
| "learning_rate": 5.995505566573013e-06, | |
| "loss": 0.166064453125, | |
| "memory(GiB)": 7.68, | |
| "step": 140, | |
| "train_speed(iter/s)": 0.438804 | |
| }, | |
| { | |
| "epoch": 0.10835045768727816, | |
| "grad_norm": 0.39708135180108495, | |
| "learning_rate": 5.995178890465622e-06, | |
| "loss": 0.1685302734375, | |
| "memory(GiB)": 7.68, | |
| "step": 145, | |
| "train_speed(iter/s)": 0.440584 | |
| }, | |
| { | |
| "epoch": 0.11208668036614983, | |
| "grad_norm": 0.4525405723559605, | |
| "learning_rate": 5.99484076453644e-06, | |
| "loss": 0.19501953125, | |
| "memory(GiB)": 7.68, | |
| "step": 150, | |
| "train_speed(iter/s)": 0.441918 | |
| }, | |
| { | |
| "epoch": 0.11582290304502148, | |
| "grad_norm": 0.285652037586189, | |
| "learning_rate": 5.99449119007804e-06, | |
| "loss": 0.1964111328125, | |
| "memory(GiB)": 7.68, | |
| "step": 155, | |
| "train_speed(iter/s)": 0.442742 | |
| }, | |
| { | |
| "epoch": 0.11955912572389314, | |
| "grad_norm": 0.37436551218621555, | |
| "learning_rate": 5.994130168426758e-06, | |
| "loss": 0.17265625, | |
| "memory(GiB)": 7.68, | |
| "step": 160, | |
| "train_speed(iter/s)": 0.444294 | |
| }, | |
| { | |
| "epoch": 0.1232953484027648, | |
| "grad_norm": 0.4319611112269015, | |
| "learning_rate": 5.993757700962691e-06, | |
| "loss": 0.1605712890625, | |
| "memory(GiB)": 7.68, | |
| "step": 165, | |
| "train_speed(iter/s)": 0.445095 | |
| }, | |
| { | |
| "epoch": 0.12703157108163646, | |
| "grad_norm": 0.4679153709762584, | |
| "learning_rate": 5.993373789109686e-06, | |
| "loss": 0.165673828125, | |
| "memory(GiB)": 7.68, | |
| "step": 170, | |
| "train_speed(iter/s)": 0.446127 | |
| }, | |
| { | |
| "epoch": 0.13076779376050812, | |
| "grad_norm": 0.371562107209469, | |
| "learning_rate": 5.992978434335345e-06, | |
| "loss": 0.2007080078125, | |
| "memory(GiB)": 7.68, | |
| "step": 175, | |
| "train_speed(iter/s)": 0.447213 | |
| }, | |
| { | |
| "epoch": 0.1345040164393798, | |
| "grad_norm": 0.41362103389091964, | |
| "learning_rate": 5.992571638151009e-06, | |
| "loss": 0.189794921875, | |
| "memory(GiB)": 7.68, | |
| "step": 180, | |
| "train_speed(iter/s)": 0.447752 | |
| }, | |
| { | |
| "epoch": 0.13824023911825145, | |
| "grad_norm": 0.44521680263908975, | |
| "learning_rate": 5.992153402111759e-06, | |
| "loss": 0.1886474609375, | |
| "memory(GiB)": 7.68, | |
| "step": 185, | |
| "train_speed(iter/s)": 0.448523 | |
| }, | |
| { | |
| "epoch": 0.14197646179712312, | |
| "grad_norm": 0.3574382830191666, | |
| "learning_rate": 5.991723727816408e-06, | |
| "loss": 0.2037109375, | |
| "memory(GiB)": 7.68, | |
| "step": 190, | |
| "train_speed(iter/s)": 0.449759 | |
| }, | |
| { | |
| "epoch": 0.14571268447599478, | |
| "grad_norm": 0.384417458292917, | |
| "learning_rate": 5.991282616907493e-06, | |
| "loss": 0.182666015625, | |
| "memory(GiB)": 7.68, | |
| "step": 195, | |
| "train_speed(iter/s)": 0.450424 | |
| }, | |
| { | |
| "epoch": 0.14944890715486642, | |
| "grad_norm": 0.30564363786555343, | |
| "learning_rate": 5.990830071071269e-06, | |
| "loss": 0.1610107421875, | |
| "memory(GiB)": 7.68, | |
| "step": 200, | |
| "train_speed(iter/s)": 0.45118 | |
| }, | |
| { | |
| "epoch": 0.15318512983373808, | |
| "grad_norm": 0.34594889167069637, | |
| "learning_rate": 5.990366092037709e-06, | |
| "loss": 0.1712890625, | |
| "memory(GiB)": 7.68, | |
| "step": 205, | |
| "train_speed(iter/s)": 0.451796 | |
| }, | |
| { | |
| "epoch": 0.15692135251260975, | |
| "grad_norm": 0.26609760325798565, | |
| "learning_rate": 5.9898906815804865e-06, | |
| "loss": 0.1716552734375, | |
| "memory(GiB)": 8.72, | |
| "step": 210, | |
| "train_speed(iter/s)": 0.451737 | |
| }, | |
| { | |
| "epoch": 0.1606575751914814, | |
| "grad_norm": 0.4326902726320289, | |
| "learning_rate": 5.989403841516979e-06, | |
| "loss": 0.1868408203125, | |
| "memory(GiB)": 9.45, | |
| "step": 215, | |
| "train_speed(iter/s)": 0.452203 | |
| }, | |
| { | |
| "epoch": 0.16439379787035308, | |
| "grad_norm": 0.31305519468747833, | |
| "learning_rate": 5.9889055737082535e-06, | |
| "loss": 0.1808837890625, | |
| "memory(GiB)": 9.45, | |
| "step": 220, | |
| "train_speed(iter/s)": 0.452465 | |
| }, | |
| { | |
| "epoch": 0.16813002054922474, | |
| "grad_norm": 0.337929954898332, | |
| "learning_rate": 5.988395880059065e-06, | |
| "loss": 0.1795166015625, | |
| "memory(GiB)": 9.45, | |
| "step": 225, | |
| "train_speed(iter/s)": 0.452759 | |
| }, | |
| { | |
| "epoch": 0.1718662432280964, | |
| "grad_norm": 0.39047122531072104, | |
| "learning_rate": 5.987874762517843e-06, | |
| "loss": 0.169384765625, | |
| "memory(GiB)": 9.45, | |
| "step": 230, | |
| "train_speed(iter/s)": 0.453624 | |
| }, | |
| { | |
| "epoch": 0.17560246590696804, | |
| "grad_norm": 0.29442955032080625, | |
| "learning_rate": 5.987342223076692e-06, | |
| "loss": 0.15751953125, | |
| "memory(GiB)": 9.45, | |
| "step": 235, | |
| "train_speed(iter/s)": 0.453993 | |
| }, | |
| { | |
| "epoch": 0.1793386885858397, | |
| "grad_norm": 0.4050248335175831, | |
| "learning_rate": 5.986798263771375e-06, | |
| "loss": 0.1673095703125, | |
| "memory(GiB)": 9.45, | |
| "step": 240, | |
| "train_speed(iter/s)": 0.454589 | |
| }, | |
| { | |
| "epoch": 0.18307491126471137, | |
| "grad_norm": 0.39189087307596043, | |
| "learning_rate": 5.9862428866813155e-06, | |
| "loss": 0.17457275390625, | |
| "memory(GiB)": 9.45, | |
| "step": 245, | |
| "train_speed(iter/s)": 0.455097 | |
| }, | |
| { | |
| "epoch": 0.18681113394358304, | |
| "grad_norm": 0.2995268823777092, | |
| "learning_rate": 5.985676093929579e-06, | |
| "loss": 0.1733154296875, | |
| "memory(GiB)": 9.45, | |
| "step": 250, | |
| "train_speed(iter/s)": 0.455559 | |
| }, | |
| { | |
| "epoch": 0.1905473566224547, | |
| "grad_norm": 0.35042188317088824, | |
| "learning_rate": 5.985097887682876e-06, | |
| "loss": 0.18154296875, | |
| "memory(GiB)": 9.45, | |
| "step": 255, | |
| "train_speed(iter/s)": 0.456072 | |
| }, | |
| { | |
| "epoch": 0.19428357930132636, | |
| "grad_norm": 0.3402952343617486, | |
| "learning_rate": 5.984508270151542e-06, | |
| "loss": 0.1767578125, | |
| "memory(GiB)": 9.45, | |
| "step": 260, | |
| "train_speed(iter/s)": 0.456723 | |
| }, | |
| { | |
| "epoch": 0.19801980198019803, | |
| "grad_norm": 0.2789400887911893, | |
| "learning_rate": 5.983907243589537e-06, | |
| "loss": 0.16141357421875, | |
| "memory(GiB)": 9.45, | |
| "step": 265, | |
| "train_speed(iter/s)": 0.456772 | |
| }, | |
| { | |
| "epoch": 0.2017560246590697, | |
| "grad_norm": 0.33400251489865246, | |
| "learning_rate": 5.983294810294439e-06, | |
| "loss": 0.158544921875, | |
| "memory(GiB)": 9.45, | |
| "step": 270, | |
| "train_speed(iter/s)": 0.457152 | |
| }, | |
| { | |
| "epoch": 0.20549224733794133, | |
| "grad_norm": 0.4225006545766808, | |
| "learning_rate": 5.982670972607426e-06, | |
| "loss": 0.1498046875, | |
| "memory(GiB)": 9.45, | |
| "step": 275, | |
| "train_speed(iter/s)": 0.457743 | |
| }, | |
| { | |
| "epoch": 0.209228470016813, | |
| "grad_norm": 0.43474965051646863, | |
| "learning_rate": 5.982035732913273e-06, | |
| "loss": 0.1770263671875, | |
| "memory(GiB)": 9.45, | |
| "step": 280, | |
| "train_speed(iter/s)": 0.457807 | |
| }, | |
| { | |
| "epoch": 0.21296469269568466, | |
| "grad_norm": 0.36173927443406817, | |
| "learning_rate": 5.981389093640344e-06, | |
| "loss": 0.1758056640625, | |
| "memory(GiB)": 9.45, | |
| "step": 285, | |
| "train_speed(iter/s)": 0.458088 | |
| }, | |
| { | |
| "epoch": 0.21670091537455632, | |
| "grad_norm": 0.25308312315237813, | |
| "learning_rate": 5.980731057260579e-06, | |
| "loss": 0.173388671875, | |
| "memory(GiB)": 9.45, | |
| "step": 290, | |
| "train_speed(iter/s)": 0.457498 | |
| }, | |
| { | |
| "epoch": 0.220437138053428, | |
| "grad_norm": 0.29470555914634394, | |
| "learning_rate": 5.980061626289489e-06, | |
| "loss": 0.15411376953125, | |
| "memory(GiB)": 9.45, | |
| "step": 295, | |
| "train_speed(iter/s)": 0.457387 | |
| }, | |
| { | |
| "epoch": 0.22417336073229965, | |
| "grad_norm": 0.35624287307171026, | |
| "learning_rate": 5.9793808032861385e-06, | |
| "loss": 0.1614501953125, | |
| "memory(GiB)": 9.45, | |
| "step": 300, | |
| "train_speed(iter/s)": 0.457895 | |
| }, | |
| { | |
| "epoch": 0.22790958341117132, | |
| "grad_norm": 0.2504855752959934, | |
| "learning_rate": 5.9786885908531455e-06, | |
| "loss": 0.15517578125, | |
| "memory(GiB)": 9.45, | |
| "step": 305, | |
| "train_speed(iter/s)": 0.458265 | |
| }, | |
| { | |
| "epoch": 0.23164580609004295, | |
| "grad_norm": 0.33904923734016645, | |
| "learning_rate": 5.977984991636665e-06, | |
| "loss": 0.1745361328125, | |
| "memory(GiB)": 9.45, | |
| "step": 310, | |
| "train_speed(iter/s)": 0.458658 | |
| }, | |
| { | |
| "epoch": 0.23538202876891462, | |
| "grad_norm": 0.3551555191841338, | |
| "learning_rate": 5.977270008326383e-06, | |
| "loss": 0.157275390625, | |
| "memory(GiB)": 9.45, | |
| "step": 315, | |
| "train_speed(iter/s)": 0.459103 | |
| }, | |
| { | |
| "epoch": 0.23911825144778628, | |
| "grad_norm": 0.4587798002581139, | |
| "learning_rate": 5.9765436436555e-06, | |
| "loss": 0.1659423828125, | |
| "memory(GiB)": 9.45, | |
| "step": 320, | |
| "train_speed(iter/s)": 0.459434 | |
| }, | |
| { | |
| "epoch": 0.24285447412665795, | |
| "grad_norm": 0.3505254508815674, | |
| "learning_rate": 5.975805900400728e-06, | |
| "loss": 0.1699951171875, | |
| "memory(GiB)": 9.45, | |
| "step": 325, | |
| "train_speed(iter/s)": 0.459396 | |
| }, | |
| { | |
| "epoch": 0.2465906968055296, | |
| "grad_norm": 0.3234531871867349, | |
| "learning_rate": 5.9750567813822766e-06, | |
| "loss": 0.15689697265625, | |
| "memory(GiB)": 9.45, | |
| "step": 330, | |
| "train_speed(iter/s)": 0.459815 | |
| }, | |
| { | |
| "epoch": 0.2503269194844013, | |
| "grad_norm": 0.2847235822528394, | |
| "learning_rate": 5.974296289463838e-06, | |
| "loss": 0.1782470703125, | |
| "memory(GiB)": 9.45, | |
| "step": 335, | |
| "train_speed(iter/s)": 0.460005 | |
| }, | |
| { | |
| "epoch": 0.2540631421632729, | |
| "grad_norm": 0.19887321720781595, | |
| "learning_rate": 5.973524427552586e-06, | |
| "loss": 0.1454345703125, | |
| "memory(GiB)": 9.45, | |
| "step": 340, | |
| "train_speed(iter/s)": 0.46045 | |
| }, | |
| { | |
| "epoch": 0.2577993648421446, | |
| "grad_norm": 0.35609582881164253, | |
| "learning_rate": 5.972741198599155e-06, | |
| "loss": 0.15576171875, | |
| "memory(GiB)": 9.45, | |
| "step": 345, | |
| "train_speed(iter/s)": 0.460808 | |
| }, | |
| { | |
| "epoch": 0.26153558752101624, | |
| "grad_norm": 0.3260335257305967, | |
| "learning_rate": 5.971946605597634e-06, | |
| "loss": 0.1542236328125, | |
| "memory(GiB)": 9.45, | |
| "step": 350, | |
| "train_speed(iter/s)": 0.461081 | |
| }, | |
| { | |
| "epoch": 0.26527181019988794, | |
| "grad_norm": 0.3000956082136632, | |
| "learning_rate": 5.9711406515855535e-06, | |
| "loss": 0.1672119140625, | |
| "memory(GiB)": 9.45, | |
| "step": 355, | |
| "train_speed(iter/s)": 0.461632 | |
| }, | |
| { | |
| "epoch": 0.2690080328787596, | |
| "grad_norm": 0.5003356531721083, | |
| "learning_rate": 5.970323339643875e-06, | |
| "loss": 0.141943359375, | |
| "memory(GiB)": 9.45, | |
| "step": 360, | |
| "train_speed(iter/s)": 0.46182 | |
| }, | |
| { | |
| "epoch": 0.2727442555576312, | |
| "grad_norm": 0.3898278569959764, | |
| "learning_rate": 5.969494672896979e-06, | |
| "loss": 0.1525146484375, | |
| "memory(GiB)": 9.45, | |
| "step": 365, | |
| "train_speed(iter/s)": 0.461906 | |
| }, | |
| { | |
| "epoch": 0.2764804782365029, | |
| "grad_norm": 0.3453310818742678, | |
| "learning_rate": 5.96865465451265e-06, | |
| "loss": 0.178564453125, | |
| "memory(GiB)": 9.45, | |
| "step": 370, | |
| "train_speed(iter/s)": 0.46223 | |
| }, | |
| { | |
| "epoch": 0.28021670091537454, | |
| "grad_norm": 0.38009861005791173, | |
| "learning_rate": 5.9678032877020705e-06, | |
| "loss": 0.1583251953125, | |
| "memory(GiB)": 9.45, | |
| "step": 375, | |
| "train_speed(iter/s)": 0.46236 | |
| }, | |
| { | |
| "epoch": 0.28395292359424623, | |
| "grad_norm": 0.3337227144486021, | |
| "learning_rate": 5.966940575719802e-06, | |
| "loss": 0.164697265625, | |
| "memory(GiB)": 9.45, | |
| "step": 380, | |
| "train_speed(iter/s)": 0.462583 | |
| }, | |
| { | |
| "epoch": 0.28768914627311787, | |
| "grad_norm": 0.34344615999699735, | |
| "learning_rate": 5.966066521863778e-06, | |
| "loss": 0.155126953125, | |
| "memory(GiB)": 9.45, | |
| "step": 385, | |
| "train_speed(iter/s)": 0.462936 | |
| }, | |
| { | |
| "epoch": 0.29142536895198956, | |
| "grad_norm": 0.3782402092083932, | |
| "learning_rate": 5.9651811294752885e-06, | |
| "loss": 0.161767578125, | |
| "memory(GiB)": 9.45, | |
| "step": 390, | |
| "train_speed(iter/s)": 0.463287 | |
| }, | |
| { | |
| "epoch": 0.2951615916308612, | |
| "grad_norm": 0.3820929493431576, | |
| "learning_rate": 5.964284401938968e-06, | |
| "loss": 0.1547119140625, | |
| "memory(GiB)": 9.45, | |
| "step": 395, | |
| "train_speed(iter/s)": 0.463312 | |
| }, | |
| { | |
| "epoch": 0.29889781430973283, | |
| "grad_norm": 0.37254277787709306, | |
| "learning_rate": 5.96337634268278e-06, | |
| "loss": 0.1453125, | |
| "memory(GiB)": 9.45, | |
| "step": 400, | |
| "train_speed(iter/s)": 0.463552 | |
| }, | |
| { | |
| "epoch": 0.3026340369886045, | |
| "grad_norm": 0.3771270351369902, | |
| "learning_rate": 5.9624569551780115e-06, | |
| "loss": 0.1693603515625, | |
| "memory(GiB)": 9.45, | |
| "step": 405, | |
| "train_speed(iter/s)": 0.463665 | |
| }, | |
| { | |
| "epoch": 0.30637025966747616, | |
| "grad_norm": 0.3169810724128572, | |
| "learning_rate": 5.961526242939251e-06, | |
| "loss": 0.143310546875, | |
| "memory(GiB)": 9.45, | |
| "step": 410, | |
| "train_speed(iter/s)": 0.463774 | |
| }, | |
| { | |
| "epoch": 0.31010648234634786, | |
| "grad_norm": 0.39276892682897285, | |
| "learning_rate": 5.960584209524377e-06, | |
| "loss": 0.12626953125, | |
| "memory(GiB)": 9.45, | |
| "step": 415, | |
| "train_speed(iter/s)": 0.463772 | |
| }, | |
| { | |
| "epoch": 0.3138427050252195, | |
| "grad_norm": 0.30248041554648486, | |
| "learning_rate": 5.95963085853455e-06, | |
| "loss": 0.1291259765625, | |
| "memory(GiB)": 9.45, | |
| "step": 420, | |
| "train_speed(iter/s)": 0.464062 | |
| }, | |
| { | |
| "epoch": 0.3175789277040912, | |
| "grad_norm": 0.31139734130517427, | |
| "learning_rate": 5.958666193614194e-06, | |
| "loss": 0.1403564453125, | |
| "memory(GiB)": 9.45, | |
| "step": 425, | |
| "train_speed(iter/s)": 0.46431 | |
| }, | |
| { | |
| "epoch": 0.3213151503829628, | |
| "grad_norm": 0.29672071282145907, | |
| "learning_rate": 5.95769021845098e-06, | |
| "loss": 0.1619140625, | |
| "memory(GiB)": 9.45, | |
| "step": 430, | |
| "train_speed(iter/s)": 0.464574 | |
| }, | |
| { | |
| "epoch": 0.32505137306183446, | |
| "grad_norm": 0.3245553447126267, | |
| "learning_rate": 5.956702936775819e-06, | |
| "loss": 0.149169921875, | |
| "memory(GiB)": 9.45, | |
| "step": 435, | |
| "train_speed(iter/s)": 0.464656 | |
| }, | |
| { | |
| "epoch": 0.32878759574070615, | |
| "grad_norm": 0.37942479273965346, | |
| "learning_rate": 5.955704352362843e-06, | |
| "loss": 0.1540283203125, | |
| "memory(GiB)": 9.45, | |
| "step": 440, | |
| "train_speed(iter/s)": 0.464866 | |
| }, | |
| { | |
| "epoch": 0.3325238184195778, | |
| "grad_norm": 0.4722961848658832, | |
| "learning_rate": 5.954694469029391e-06, | |
| "loss": 0.146875, | |
| "memory(GiB)": 9.45, | |
| "step": 445, | |
| "train_speed(iter/s)": 0.46511 | |
| }, | |
| { | |
| "epoch": 0.3362600410984495, | |
| "grad_norm": 0.32208483256209325, | |
| "learning_rate": 5.9536732906359936e-06, | |
| "loss": 0.1362060546875, | |
| "memory(GiB)": 9.45, | |
| "step": 450, | |
| "train_speed(iter/s)": 0.465444 | |
| }, | |
| { | |
| "epoch": 0.3399962637773211, | |
| "grad_norm": 0.39468565724302457, | |
| "learning_rate": 5.952640821086362e-06, | |
| "loss": 0.14046630859375, | |
| "memory(GiB)": 9.45, | |
| "step": 455, | |
| "train_speed(iter/s)": 0.465502 | |
| }, | |
| { | |
| "epoch": 0.3437324864561928, | |
| "grad_norm": 0.2923449968980904, | |
| "learning_rate": 5.951597064327371e-06, | |
| "loss": 0.14259033203125, | |
| "memory(GiB)": 9.45, | |
| "step": 460, | |
| "train_speed(iter/s)": 0.465768 | |
| }, | |
| { | |
| "epoch": 0.34746870913506445, | |
| "grad_norm": 0.2526312937320368, | |
| "learning_rate": 5.95054202434904e-06, | |
| "loss": 0.154150390625, | |
| "memory(GiB)": 9.45, | |
| "step": 465, | |
| "train_speed(iter/s)": 0.465477 | |
| }, | |
| { | |
| "epoch": 0.3512049318139361, | |
| "grad_norm": 0.25397429668673016, | |
| "learning_rate": 5.949475705184526e-06, | |
| "loss": 0.145068359375, | |
| "memory(GiB)": 9.45, | |
| "step": 470, | |
| "train_speed(iter/s)": 0.465793 | |
| }, | |
| { | |
| "epoch": 0.3549411544928078, | |
| "grad_norm": 0.2889099964297901, | |
| "learning_rate": 5.948398110910099e-06, | |
| "loss": 0.14326171875, | |
| "memory(GiB)": 9.45, | |
| "step": 475, | |
| "train_speed(iter/s)": 0.465718 | |
| }, | |
| { | |
| "epoch": 0.3586773771716794, | |
| "grad_norm": 0.30650880945183995, | |
| "learning_rate": 5.947309245645134e-06, | |
| "loss": 0.17294921875, | |
| "memory(GiB)": 9.45, | |
| "step": 480, | |
| "train_speed(iter/s)": 0.465738 | |
| }, | |
| { | |
| "epoch": 0.3624135998505511, | |
| "grad_norm": 0.23874814446464385, | |
| "learning_rate": 5.946209113552092e-06, | |
| "loss": 0.1577880859375, | |
| "memory(GiB)": 9.45, | |
| "step": 485, | |
| "train_speed(iter/s)": 0.465905 | |
| }, | |
| { | |
| "epoch": 0.36614982252942274, | |
| "grad_norm": 0.26737529230375395, | |
| "learning_rate": 5.945097718836503e-06, | |
| "loss": 0.13236083984375, | |
| "memory(GiB)": 9.45, | |
| "step": 490, | |
| "train_speed(iter/s)": 0.466159 | |
| }, | |
| { | |
| "epoch": 0.36988604520829443, | |
| "grad_norm": 0.34648783089300494, | |
| "learning_rate": 5.9439750657469524e-06, | |
| "loss": 0.166064453125, | |
| "memory(GiB)": 9.45, | |
| "step": 495, | |
| "train_speed(iter/s)": 0.466248 | |
| }, | |
| { | |
| "epoch": 0.37362226788716607, | |
| "grad_norm": 0.3711374814276351, | |
| "learning_rate": 5.942841158575061e-06, | |
| "loss": 0.15181884765625, | |
| "memory(GiB)": 9.45, | |
| "step": 500, | |
| "train_speed(iter/s)": 0.46631 | |
| }, | |
| { | |
| "epoch": 0.37735849056603776, | |
| "grad_norm": 0.26122017355859195, | |
| "learning_rate": 5.941696001655475e-06, | |
| "loss": 0.1420654296875, | |
| "memory(GiB)": 9.45, | |
| "step": 505, | |
| "train_speed(iter/s)": 0.466356 | |
| }, | |
| { | |
| "epoch": 0.3810947132449094, | |
| "grad_norm": 0.30129945797313573, | |
| "learning_rate": 5.940539599365843e-06, | |
| "loss": 0.15704345703125, | |
| "memory(GiB)": 9.45, | |
| "step": 510, | |
| "train_speed(iter/s)": 0.466088 | |
| }, | |
| { | |
| "epoch": 0.38483093592378104, | |
| "grad_norm": 0.27115019497623694, | |
| "learning_rate": 5.939371956126803e-06, | |
| "loss": 0.1350341796875, | |
| "memory(GiB)": 9.45, | |
| "step": 515, | |
| "train_speed(iter/s)": 0.466144 | |
| }, | |
| { | |
| "epoch": 0.38856715860265273, | |
| "grad_norm": 0.3323988811121097, | |
| "learning_rate": 5.938193076401964e-06, | |
| "loss": 0.149072265625, | |
| "memory(GiB)": 9.45, | |
| "step": 520, | |
| "train_speed(iter/s)": 0.466125 | |
| }, | |
| { | |
| "epoch": 0.39230338128152437, | |
| "grad_norm": 0.36151939711979136, | |
| "learning_rate": 5.937002964697888e-06, | |
| "loss": 0.13743896484375, | |
| "memory(GiB)": 9.45, | |
| "step": 525, | |
| "train_speed(iter/s)": 0.466282 | |
| }, | |
| { | |
| "epoch": 0.39603960396039606, | |
| "grad_norm": 0.28979409508186516, | |
| "learning_rate": 5.935801625564074e-06, | |
| "loss": 0.15244140625, | |
| "memory(GiB)": 9.45, | |
| "step": 530, | |
| "train_speed(iter/s)": 0.466375 | |
| }, | |
| { | |
| "epoch": 0.3997758266392677, | |
| "grad_norm": 0.3296511480431298, | |
| "learning_rate": 5.934589063592946e-06, | |
| "loss": 0.15579833984375, | |
| "memory(GiB)": 9.45, | |
| "step": 535, | |
| "train_speed(iter/s)": 0.466122 | |
| }, | |
| { | |
| "epoch": 0.4035120493181394, | |
| "grad_norm": 0.20344435673525696, | |
| "learning_rate": 5.933365283419823e-06, | |
| "loss": 0.151953125, | |
| "memory(GiB)": 9.45, | |
| "step": 540, | |
| "train_speed(iter/s)": 0.466272 | |
| }, | |
| { | |
| "epoch": 0.407248271997011, | |
| "grad_norm": 0.2633323321431179, | |
| "learning_rate": 5.932130289722912e-06, | |
| "loss": 0.15283203125, | |
| "memory(GiB)": 9.45, | |
| "step": 545, | |
| "train_speed(iter/s)": 0.466106 | |
| }, | |
| { | |
| "epoch": 0.41098449467588266, | |
| "grad_norm": 0.3221514229815824, | |
| "learning_rate": 5.9308840872232845e-06, | |
| "loss": 0.16361083984375, | |
| "memory(GiB)": 9.45, | |
| "step": 550, | |
| "train_speed(iter/s)": 0.466076 | |
| }, | |
| { | |
| "epoch": 0.41472071735475435, | |
| "grad_norm": 0.2957653300069589, | |
| "learning_rate": 5.929626680684864e-06, | |
| "loss": 0.1420654296875, | |
| "memory(GiB)": 9.45, | |
| "step": 555, | |
| "train_speed(iter/s)": 0.466246 | |
| }, | |
| { | |
| "epoch": 0.418456940033626, | |
| "grad_norm": 0.27433097372254944, | |
| "learning_rate": 5.928358074914402e-06, | |
| "loss": 0.133544921875, | |
| "memory(GiB)": 9.45, | |
| "step": 560, | |
| "train_speed(iter/s)": 0.466388 | |
| }, | |
| { | |
| "epoch": 0.4221931627124977, | |
| "grad_norm": 0.30811252206856754, | |
| "learning_rate": 5.927078274761459e-06, | |
| "loss": 0.13226318359375, | |
| "memory(GiB)": 9.45, | |
| "step": 565, | |
| "train_speed(iter/s)": 0.46622 | |
| }, | |
| { | |
| "epoch": 0.4259293853913693, | |
| "grad_norm": 0.4343588364932629, | |
| "learning_rate": 5.925787285118395e-06, | |
| "loss": 0.132061767578125, | |
| "memory(GiB)": 9.45, | |
| "step": 570, | |
| "train_speed(iter/s)": 0.466434 | |
| }, | |
| { | |
| "epoch": 0.429665608070241, | |
| "grad_norm": 0.28429865872011917, | |
| "learning_rate": 5.9244851109203404e-06, | |
| "loss": 0.1482177734375, | |
| "memory(GiB)": 9.45, | |
| "step": 575, | |
| "train_speed(iter/s)": 0.466569 | |
| }, | |
| { | |
| "epoch": 0.43340183074911265, | |
| "grad_norm": 0.27884340279867387, | |
| "learning_rate": 5.923171757145182e-06, | |
| "loss": 0.14344482421875, | |
| "memory(GiB)": 9.45, | |
| "step": 580, | |
| "train_speed(iter/s)": 0.46672 | |
| }, | |
| { | |
| "epoch": 0.4371380534279843, | |
| "grad_norm": 0.32622048115497765, | |
| "learning_rate": 5.921847228813543e-06, | |
| "loss": 0.146728515625, | |
| "memory(GiB)": 9.45, | |
| "step": 585, | |
| "train_speed(iter/s)": 0.466879 | |
| }, | |
| { | |
| "epoch": 0.440874276106856, | |
| "grad_norm": 0.43240131198466947, | |
| "learning_rate": 5.9205115309887666e-06, | |
| "loss": 0.1595458984375, | |
| "memory(GiB)": 9.45, | |
| "step": 590, | |
| "train_speed(iter/s)": 0.466944 | |
| }, | |
| { | |
| "epoch": 0.4446104987857276, | |
| "grad_norm": 0.31277968889979835, | |
| "learning_rate": 5.919164668776891e-06, | |
| "loss": 0.1449462890625, | |
| "memory(GiB)": 9.45, | |
| "step": 595, | |
| "train_speed(iter/s)": 0.467155 | |
| }, | |
| { | |
| "epoch": 0.4483467214645993, | |
| "grad_norm": 0.2719906915702348, | |
| "learning_rate": 5.917806647326636e-06, | |
| "loss": 0.1359130859375, | |
| "memory(GiB)": 9.45, | |
| "step": 600, | |
| "train_speed(iter/s)": 0.467399 | |
| }, | |
| { | |
| "epoch": 0.45208294414347094, | |
| "grad_norm": 0.2958357180656749, | |
| "learning_rate": 5.9164374718293764e-06, | |
| "loss": 0.1510498046875, | |
| "memory(GiB)": 9.45, | |
| "step": 605, | |
| "train_speed(iter/s)": 0.467309 | |
| }, | |
| { | |
| "epoch": 0.45581916682234264, | |
| "grad_norm": 0.323840801916129, | |
| "learning_rate": 5.91505714751913e-06, | |
| "loss": 0.1556884765625, | |
| "memory(GiB)": 9.45, | |
| "step": 610, | |
| "train_speed(iter/s)": 0.467461 | |
| }, | |
| { | |
| "epoch": 0.4595553895012143, | |
| "grad_norm": 0.25540153277044697, | |
| "learning_rate": 5.913665679672533e-06, | |
| "loss": 0.1478271484375, | |
| "memory(GiB)": 9.45, | |
| "step": 615, | |
| "train_speed(iter/s)": 0.467614 | |
| }, | |
| { | |
| "epoch": 0.4632916121800859, | |
| "grad_norm": 0.3216539204738006, | |
| "learning_rate": 5.912263073608819e-06, | |
| "loss": 0.14404296875, | |
| "memory(GiB)": 9.45, | |
| "step": 620, | |
| "train_speed(iter/s)": 0.46759 | |
| }, | |
| { | |
| "epoch": 0.4670278348589576, | |
| "grad_norm": 0.3564645954321089, | |
| "learning_rate": 5.9108493346898014e-06, | |
| "loss": 0.1556640625, | |
| "memory(GiB)": 9.45, | |
| "step": 625, | |
| "train_speed(iter/s)": 0.467777 | |
| }, | |
| { | |
| "epoch": 0.47076405753782924, | |
| "grad_norm": 0.3234498866245867, | |
| "learning_rate": 5.9094244683198514e-06, | |
| "loss": 0.130474853515625, | |
| "memory(GiB)": 9.45, | |
| "step": 630, | |
| "train_speed(iter/s)": 0.467657 | |
| }, | |
| { | |
| "epoch": 0.47450028021670093, | |
| "grad_norm": 0.27930245162799133, | |
| "learning_rate": 5.907988479945878e-06, | |
| "loss": 0.1467529296875, | |
| "memory(GiB)": 9.45, | |
| "step": 635, | |
| "train_speed(iter/s)": 0.467501 | |
| }, | |
| { | |
| "epoch": 0.47823650289557257, | |
| "grad_norm": 0.2831117651967566, | |
| "learning_rate": 5.906541375057305e-06, | |
| "loss": 0.135107421875, | |
| "memory(GiB)": 9.45, | |
| "step": 640, | |
| "train_speed(iter/s)": 0.467611 | |
| }, | |
| { | |
| "epoch": 0.48197272557444426, | |
| "grad_norm": 0.27115323391313917, | |
| "learning_rate": 5.905083159186056e-06, | |
| "loss": 0.128759765625, | |
| "memory(GiB)": 9.45, | |
| "step": 645, | |
| "train_speed(iter/s)": 0.46749 | |
| }, | |
| { | |
| "epoch": 0.4857089482533159, | |
| "grad_norm": 0.32564068860731793, | |
| "learning_rate": 5.903613837906525e-06, | |
| "loss": 0.1319580078125, | |
| "memory(GiB)": 9.45, | |
| "step": 650, | |
| "train_speed(iter/s)": 0.467686 | |
| }, | |
| { | |
| "epoch": 0.48944517093218753, | |
| "grad_norm": 0.2387399044673888, | |
| "learning_rate": 5.902133416835561e-06, | |
| "loss": 0.1338134765625, | |
| "memory(GiB)": 9.45, | |
| "step": 655, | |
| "train_speed(iter/s)": 0.467441 | |
| }, | |
| { | |
| "epoch": 0.4931813936110592, | |
| "grad_norm": 0.24117814539801136, | |
| "learning_rate": 5.900641901632444e-06, | |
| "loss": 0.1324462890625, | |
| "memory(GiB)": 9.45, | |
| "step": 660, | |
| "train_speed(iter/s)": 0.46725 | |
| }, | |
| { | |
| "epoch": 0.49691761628993086, | |
| "grad_norm": 0.2806056999325975, | |
| "learning_rate": 5.899139297998865e-06, | |
| "loss": 0.14583740234375, | |
| "memory(GiB)": 9.45, | |
| "step": 665, | |
| "train_speed(iter/s)": 0.467303 | |
| }, | |
| { | |
| "epoch": 0.5006538389688026, | |
| "grad_norm": 0.3602595784462823, | |
| "learning_rate": 5.897625611678904e-06, | |
| "loss": 0.16168212890625, | |
| "memory(GiB)": 9.45, | |
| "step": 670, | |
| "train_speed(iter/s)": 0.467465 | |
| }, | |
| { | |
| "epoch": 0.5043900616476742, | |
| "grad_norm": 0.2892704455949438, | |
| "learning_rate": 5.896100848459004e-06, | |
| "loss": 0.14654541015625, | |
| "memory(GiB)": 9.45, | |
| "step": 675, | |
| "train_speed(iter/s)": 0.467417 | |
| }, | |
| { | |
| "epoch": 0.5081262843265458, | |
| "grad_norm": 0.30864116070274367, | |
| "learning_rate": 5.894565014167955e-06, | |
| "loss": 0.1387451171875, | |
| "memory(GiB)": 9.45, | |
| "step": 680, | |
| "train_speed(iter/s)": 0.467388 | |
| }, | |
| { | |
| "epoch": 0.5118625070054176, | |
| "grad_norm": 0.23741861823114724, | |
| "learning_rate": 5.89301811467687e-06, | |
| "loss": 0.14443359375, | |
| "memory(GiB)": 9.45, | |
| "step": 685, | |
| "train_speed(iter/s)": 0.467619 | |
| }, | |
| { | |
| "epoch": 0.5155987296842892, | |
| "grad_norm": 0.3704119854549676, | |
| "learning_rate": 5.891460155899159e-06, | |
| "loss": 0.1429931640625, | |
| "memory(GiB)": 9.45, | |
| "step": 690, | |
| "train_speed(iter/s)": 0.467553 | |
| }, | |
| { | |
| "epoch": 0.5193349523631609, | |
| "grad_norm": 0.3371956586173727, | |
| "learning_rate": 5.88989114379051e-06, | |
| "loss": 0.122119140625, | |
| "memory(GiB)": 9.45, | |
| "step": 695, | |
| "train_speed(iter/s)": 0.467568 | |
| }, | |
| { | |
| "epoch": 0.5230711750420325, | |
| "grad_norm": 0.23061580193263015, | |
| "learning_rate": 5.888311084348865e-06, | |
| "loss": 0.1429931640625, | |
| "memory(GiB)": 9.45, | |
| "step": 700, | |
| "train_speed(iter/s)": 0.467617 | |
| }, | |
| { | |
| "epoch": 0.5268073977209041, | |
| "grad_norm": 0.2357495758457104, | |
| "learning_rate": 5.886719983614396e-06, | |
| "loss": 0.1326904296875, | |
| "memory(GiB)": 9.45, | |
| "step": 705, | |
| "train_speed(iter/s)": 0.467672 | |
| }, | |
| { | |
| "epoch": 0.5305436203997759, | |
| "grad_norm": 0.20506003694806352, | |
| "learning_rate": 5.885117847669485e-06, | |
| "loss": 0.1441650390625, | |
| "memory(GiB)": 9.45, | |
| "step": 710, | |
| "train_speed(iter/s)": 0.467709 | |
| }, | |
| { | |
| "epoch": 0.5342798430786475, | |
| "grad_norm": 0.3366909550119504, | |
| "learning_rate": 5.883504682638699e-06, | |
| "loss": 0.1407958984375, | |
| "memory(GiB)": 9.45, | |
| "step": 715, | |
| "train_speed(iter/s)": 0.467852 | |
| }, | |
| { | |
| "epoch": 0.5380160657575191, | |
| "grad_norm": 0.2909847266005631, | |
| "learning_rate": 5.881880494688763e-06, | |
| "loss": 0.1455322265625, | |
| "memory(GiB)": 9.45, | |
| "step": 720, | |
| "train_speed(iter/s)": 0.467893 | |
| }, | |
| { | |
| "epoch": 0.5417522884363908, | |
| "grad_norm": 0.2386052769018931, | |
| "learning_rate": 5.880245290028545e-06, | |
| "loss": 0.140478515625, | |
| "memory(GiB)": 9.45, | |
| "step": 725, | |
| "train_speed(iter/s)": 0.467751 | |
| }, | |
| { | |
| "epoch": 0.5454885111152624, | |
| "grad_norm": 0.2645707847366404, | |
| "learning_rate": 5.878599074909023e-06, | |
| "loss": 0.1463134765625, | |
| "memory(GiB)": 9.45, | |
| "step": 730, | |
| "train_speed(iter/s)": 0.467878 | |
| }, | |
| { | |
| "epoch": 0.5492247337941342, | |
| "grad_norm": 0.31563029908522805, | |
| "learning_rate": 5.876941855623268e-06, | |
| "loss": 0.1530029296875, | |
| "memory(GiB)": 9.45, | |
| "step": 735, | |
| "train_speed(iter/s)": 0.467974 | |
| }, | |
| { | |
| "epoch": 0.5529609564730058, | |
| "grad_norm": 0.26319413448836815, | |
| "learning_rate": 5.8752736385064145e-06, | |
| "loss": 0.12587890625, | |
| "memory(GiB)": 9.45, | |
| "step": 740, | |
| "train_speed(iter/s)": 0.467961 | |
| }, | |
| { | |
| "epoch": 0.5566971791518774, | |
| "grad_norm": 0.3858440978882179, | |
| "learning_rate": 5.873594429935642e-06, | |
| "loss": 0.1377197265625, | |
| "memory(GiB)": 9.45, | |
| "step": 745, | |
| "train_speed(iter/s)": 0.468054 | |
| }, | |
| { | |
| "epoch": 0.5604334018307491, | |
| "grad_norm": 0.20276433188895907, | |
| "learning_rate": 5.871904236330144e-06, | |
| "loss": 0.12718505859375, | |
| "memory(GiB)": 9.45, | |
| "step": 750, | |
| "train_speed(iter/s)": 0.468081 | |
| }, | |
| { | |
| "epoch": 0.5641696245096208, | |
| "grad_norm": 0.22243564217533868, | |
| "learning_rate": 5.870203064151111e-06, | |
| "loss": 0.1421630859375, | |
| "memory(GiB)": 9.45, | |
| "step": 755, | |
| "train_speed(iter/s)": 0.468228 | |
| }, | |
| { | |
| "epoch": 0.5679058471884925, | |
| "grad_norm": 0.2924186985340597, | |
| "learning_rate": 5.8684909199017e-06, | |
| "loss": 0.145458984375, | |
| "memory(GiB)": 9.45, | |
| "step": 760, | |
| "train_speed(iter/s)": 0.468279 | |
| }, | |
| { | |
| "epoch": 0.5716420698673641, | |
| "grad_norm": 0.22056169438669584, | |
| "learning_rate": 5.866767810127009e-06, | |
| "loss": 0.128564453125, | |
| "memory(GiB)": 9.45, | |
| "step": 765, | |
| "train_speed(iter/s)": 0.468225 | |
| }, | |
| { | |
| "epoch": 0.5753782925462357, | |
| "grad_norm": 0.2740803532217515, | |
| "learning_rate": 5.86503374141406e-06, | |
| "loss": 0.1392822265625, | |
| "memory(GiB)": 9.45, | |
| "step": 770, | |
| "train_speed(iter/s)": 0.468416 | |
| }, | |
| { | |
| "epoch": 0.5791145152251074, | |
| "grad_norm": 0.3606780255005757, | |
| "learning_rate": 5.863288720391763e-06, | |
| "loss": 0.155615234375, | |
| "memory(GiB)": 9.45, | |
| "step": 775, | |
| "train_speed(iter/s)": 0.468411 | |
| }, | |
| { | |
| "epoch": 0.5828507379039791, | |
| "grad_norm": 0.21221894282841508, | |
| "learning_rate": 5.861532753730898e-06, | |
| "loss": 0.1374755859375, | |
| "memory(GiB)": 9.45, | |
| "step": 780, | |
| "train_speed(iter/s)": 0.468088 | |
| }, | |
| { | |
| "epoch": 0.5865869605828508, | |
| "grad_norm": 0.2660755841560947, | |
| "learning_rate": 5.859765848144089e-06, | |
| "loss": 0.13995361328125, | |
| "memory(GiB)": 9.45, | |
| "step": 785, | |
| "train_speed(iter/s)": 0.467999 | |
| }, | |
| { | |
| "epoch": 0.5903231832617224, | |
| "grad_norm": 0.22600558934152162, | |
| "learning_rate": 5.857988010385774e-06, | |
| "loss": 0.128515625, | |
| "memory(GiB)": 9.45, | |
| "step": 790, | |
| "train_speed(iter/s)": 0.468097 | |
| }, | |
| { | |
| "epoch": 0.594059405940594, | |
| "grad_norm": 0.24882069836354315, | |
| "learning_rate": 5.856199247252184e-06, | |
| "loss": 0.1505126953125, | |
| "memory(GiB)": 9.45, | |
| "step": 795, | |
| "train_speed(iter/s)": 0.468257 | |
| }, | |
| { | |
| "epoch": 0.5977956286194657, | |
| "grad_norm": 0.2541011112429318, | |
| "learning_rate": 5.854399565581314e-06, | |
| "loss": 0.13427734375, | |
| "memory(GiB)": 10.57, | |
| "step": 800, | |
| "train_speed(iter/s)": 0.468265 | |
| }, | |
| { | |
| "epoch": 0.6015318512983374, | |
| "grad_norm": 0.2872004300607469, | |
| "learning_rate": 5.8525889722528985e-06, | |
| "loss": 0.1360595703125, | |
| "memory(GiB)": 10.57, | |
| "step": 805, | |
| "train_speed(iter/s)": 0.468185 | |
| }, | |
| { | |
| "epoch": 0.605268073977209, | |
| "grad_norm": 0.323332001257219, | |
| "learning_rate": 5.850767474188383e-06, | |
| "loss": 0.1507080078125, | |
| "memory(GiB)": 10.57, | |
| "step": 810, | |
| "train_speed(iter/s)": 0.468123 | |
| }, | |
| { | |
| "epoch": 0.6090042966560807, | |
| "grad_norm": 0.20912945195997415, | |
| "learning_rate": 5.8489350783509025e-06, | |
| "loss": 0.13023681640625, | |
| "memory(GiB)": 10.57, | |
| "step": 815, | |
| "train_speed(iter/s)": 0.46818 | |
| }, | |
| { | |
| "epoch": 0.6127405193349523, | |
| "grad_norm": 0.2939854808777276, | |
| "learning_rate": 5.847091791745247e-06, | |
| "loss": 0.14840087890625, | |
| "memory(GiB)": 10.57, | |
| "step": 820, | |
| "train_speed(iter/s)": 0.468357 | |
| }, | |
| { | |
| "epoch": 0.6164767420138241, | |
| "grad_norm": 0.24988955332399215, | |
| "learning_rate": 5.8452376214178426e-06, | |
| "loss": 0.12974853515625, | |
| "memory(GiB)": 10.57, | |
| "step": 825, | |
| "train_speed(iter/s)": 0.468499 | |
| }, | |
| { | |
| "epoch": 0.6202129646926957, | |
| "grad_norm": 0.21781664947497836, | |
| "learning_rate": 5.84337257445672e-06, | |
| "loss": 0.1396484375, | |
| "memory(GiB)": 10.57, | |
| "step": 830, | |
| "train_speed(iter/s)": 0.468441 | |
| }, | |
| { | |
| "epoch": 0.6239491873715673, | |
| "grad_norm": 0.25082480295038034, | |
| "learning_rate": 5.841496657991487e-06, | |
| "loss": 0.135546875, | |
| "memory(GiB)": 10.57, | |
| "step": 835, | |
| "train_speed(iter/s)": 0.468446 | |
| }, | |
| { | |
| "epoch": 0.627685410050439, | |
| "grad_norm": 0.2686863733874229, | |
| "learning_rate": 5.8396098791933055e-06, | |
| "loss": 0.11251220703125, | |
| "memory(GiB)": 10.57, | |
| "step": 840, | |
| "train_speed(iter/s)": 0.46852 | |
| }, | |
| { | |
| "epoch": 0.6314216327293106, | |
| "grad_norm": 0.2710369755059897, | |
| "learning_rate": 5.837712245274861e-06, | |
| "loss": 0.11365966796875, | |
| "memory(GiB)": 10.57, | |
| "step": 845, | |
| "train_speed(iter/s)": 0.468573 | |
| }, | |
| { | |
| "epoch": 0.6351578554081824, | |
| "grad_norm": 0.34487595194525544, | |
| "learning_rate": 5.835803763490333e-06, | |
| "loss": 0.1312255859375, | |
| "memory(GiB)": 10.57, | |
| "step": 850, | |
| "train_speed(iter/s)": 0.468679 | |
| }, | |
| { | |
| "epoch": 0.638894078087054, | |
| "grad_norm": 0.2557913641225529, | |
| "learning_rate": 5.833884441135373e-06, | |
| "loss": 0.150701904296875, | |
| "memory(GiB)": 10.57, | |
| "step": 855, | |
| "train_speed(iter/s)": 0.468713 | |
| }, | |
| { | |
| "epoch": 0.6426303007659256, | |
| "grad_norm": 0.2492246452188681, | |
| "learning_rate": 5.831954285547071e-06, | |
| "loss": 0.1027587890625, | |
| "memory(GiB)": 10.57, | |
| "step": 860, | |
| "train_speed(iter/s)": 0.468697 | |
| }, | |
| { | |
| "epoch": 0.6463665234447973, | |
| "grad_norm": 0.20962556058124304, | |
| "learning_rate": 5.830013304103929e-06, | |
| "loss": 0.13544921875, | |
| "memory(GiB)": 10.57, | |
| "step": 865, | |
| "train_speed(iter/s)": 0.468784 | |
| }, | |
| { | |
| "epoch": 0.6501027461236689, | |
| "grad_norm": 0.26313981696050626, | |
| "learning_rate": 5.828061504225837e-06, | |
| "loss": 0.13037109375, | |
| "memory(GiB)": 10.57, | |
| "step": 870, | |
| "train_speed(iter/s)": 0.468837 | |
| }, | |
| { | |
| "epoch": 0.6538389688025407, | |
| "grad_norm": 0.3459843916573515, | |
| "learning_rate": 5.826098893374037e-06, | |
| "loss": 0.1420654296875, | |
| "memory(GiB)": 10.57, | |
| "step": 875, | |
| "train_speed(iter/s)": 0.468901 | |
| }, | |
| { | |
| "epoch": 0.6575751914814123, | |
| "grad_norm": 0.27792291827470583, | |
| "learning_rate": 5.824125479051103e-06, | |
| "loss": 0.13037109375, | |
| "memory(GiB)": 10.57, | |
| "step": 880, | |
| "train_speed(iter/s)": 0.468895 | |
| }, | |
| { | |
| "epoch": 0.6613114141602839, | |
| "grad_norm": 0.2607823555214958, | |
| "learning_rate": 5.8221412688009034e-06, | |
| "loss": 0.135107421875, | |
| "memory(GiB)": 10.57, | |
| "step": 885, | |
| "train_speed(iter/s)": 0.468908 | |
| }, | |
| { | |
| "epoch": 0.6650476368391556, | |
| "grad_norm": 0.2501794810871831, | |
| "learning_rate": 5.820146270208581e-06, | |
| "loss": 0.12391357421875, | |
| "memory(GiB)": 10.57, | |
| "step": 890, | |
| "train_speed(iter/s)": 0.468941 | |
| }, | |
| { | |
| "epoch": 0.6687838595180273, | |
| "grad_norm": 0.2564710519025842, | |
| "learning_rate": 5.8181404909005175e-06, | |
| "loss": 0.14501953125, | |
| "memory(GiB)": 10.57, | |
| "step": 895, | |
| "train_speed(iter/s)": 0.46911 | |
| }, | |
| { | |
| "epoch": 0.672520082196899, | |
| "grad_norm": 0.2715014489679807, | |
| "learning_rate": 5.816123938544305e-06, | |
| "loss": 0.132275390625, | |
| "memory(GiB)": 10.57, | |
| "step": 900, | |
| "train_speed(iter/s)": 0.469243 | |
| }, | |
| { | |
| "epoch": 0.6762563048757706, | |
| "grad_norm": 0.321526690011715, | |
| "learning_rate": 5.814096620848723e-06, | |
| "loss": 0.14796142578125, | |
| "memory(GiB)": 10.57, | |
| "step": 905, | |
| "train_speed(iter/s)": 0.469369 | |
| }, | |
| { | |
| "epoch": 0.6799925275546422, | |
| "grad_norm": 0.26737616140516984, | |
| "learning_rate": 5.8120585455636975e-06, | |
| "loss": 0.1335205078125, | |
| "memory(GiB)": 10.57, | |
| "step": 910, | |
| "train_speed(iter/s)": 0.469536 | |
| }, | |
| { | |
| "epoch": 0.6837287502335139, | |
| "grad_norm": 0.23441075691954993, | |
| "learning_rate": 5.8100097204802854e-06, | |
| "loss": 0.13460693359375, | |
| "memory(GiB)": 10.57, | |
| "step": 915, | |
| "train_speed(iter/s)": 0.469558 | |
| }, | |
| { | |
| "epoch": 0.6874649729123856, | |
| "grad_norm": 0.3004174037886124, | |
| "learning_rate": 5.807950153430634e-06, | |
| "loss": 0.13314208984375, | |
| "memory(GiB)": 10.57, | |
| "step": 920, | |
| "train_speed(iter/s)": 0.469494 | |
| }, | |
| { | |
| "epoch": 0.6912011955912573, | |
| "grad_norm": 0.3511989299300596, | |
| "learning_rate": 5.805879852287953e-06, | |
| "loss": 0.11871337890625, | |
| "memory(GiB)": 10.57, | |
| "step": 925, | |
| "train_speed(iter/s)": 0.469497 | |
| }, | |
| { | |
| "epoch": 0.6949374182701289, | |
| "grad_norm": 0.22941841038115351, | |
| "learning_rate": 5.803798824966487e-06, | |
| "loss": 0.12340087890625, | |
| "memory(GiB)": 10.57, | |
| "step": 930, | |
| "train_speed(iter/s)": 0.469442 | |
| }, | |
| { | |
| "epoch": 0.6986736409490005, | |
| "grad_norm": 0.2296737881416939, | |
| "learning_rate": 5.801707079421485e-06, | |
| "loss": 0.115618896484375, | |
| "memory(GiB)": 10.57, | |
| "step": 935, | |
| "train_speed(iter/s)": 0.469455 | |
| }, | |
| { | |
| "epoch": 0.7024098636278722, | |
| "grad_norm": 0.2832572168234479, | |
| "learning_rate": 5.799604623649168e-06, | |
| "loss": 0.1305908203125, | |
| "memory(GiB)": 10.57, | |
| "step": 940, | |
| "train_speed(iter/s)": 0.46963 | |
| }, | |
| { | |
| "epoch": 0.7061460863067439, | |
| "grad_norm": 0.316216648821189, | |
| "learning_rate": 5.7974914656867004e-06, | |
| "loss": 0.123193359375, | |
| "memory(GiB)": 10.57, | |
| "step": 945, | |
| "train_speed(iter/s)": 0.46966 | |
| }, | |
| { | |
| "epoch": 0.7098823089856156, | |
| "grad_norm": 0.26883224736976363, | |
| "learning_rate": 5.795367613612158e-06, | |
| "loss": 0.12900390625, | |
| "memory(GiB)": 10.57, | |
| "step": 950, | |
| "train_speed(iter/s)": 0.469682 | |
| }, | |
| { | |
| "epoch": 0.7136185316644872, | |
| "grad_norm": 0.2965748829854584, | |
| "learning_rate": 5.793233075544498e-06, | |
| "loss": 0.11947021484375, | |
| "memory(GiB)": 10.57, | |
| "step": 955, | |
| "train_speed(iter/s)": 0.469772 | |
| }, | |
| { | |
| "epoch": 0.7173547543433588, | |
| "grad_norm": 0.23063703167824398, | |
| "learning_rate": 5.791087859643525e-06, | |
| "loss": 0.15511474609375, | |
| "memory(GiB)": 10.57, | |
| "step": 960, | |
| "train_speed(iter/s)": 0.469872 | |
| }, | |
| { | |
| "epoch": 0.7210909770222306, | |
| "grad_norm": 0.3034417815586922, | |
| "learning_rate": 5.788931974109867e-06, | |
| "loss": 0.1328369140625, | |
| "memory(GiB)": 10.57, | |
| "step": 965, | |
| "train_speed(iter/s)": 0.469955 | |
| }, | |
| { | |
| "epoch": 0.7248271997011022, | |
| "grad_norm": 0.2606666501840904, | |
| "learning_rate": 5.7867654271849355e-06, | |
| "loss": 0.1348388671875, | |
| "memory(GiB)": 10.57, | |
| "step": 970, | |
| "train_speed(iter/s)": 0.470006 | |
| }, | |
| { | |
| "epoch": 0.7285634223799738, | |
| "grad_norm": 0.26581107097992346, | |
| "learning_rate": 5.7845882271508975e-06, | |
| "loss": 0.133349609375, | |
| "memory(GiB)": 10.57, | |
| "step": 975, | |
| "train_speed(iter/s)": 0.470064 | |
| }, | |
| { | |
| "epoch": 0.7322996450588455, | |
| "grad_norm": 0.3223256775646686, | |
| "learning_rate": 5.7824003823306484e-06, | |
| "loss": 0.13079833984375, | |
| "memory(GiB)": 10.57, | |
| "step": 980, | |
| "train_speed(iter/s)": 0.469962 | |
| }, | |
| { | |
| "epoch": 0.7360358677377171, | |
| "grad_norm": 0.3011414890652826, | |
| "learning_rate": 5.780201901087771e-06, | |
| "loss": 0.1345947265625, | |
| "memory(GiB)": 10.57, | |
| "step": 985, | |
| "train_speed(iter/s)": 0.470102 | |
| }, | |
| { | |
| "epoch": 0.7397720904165889, | |
| "grad_norm": 0.30876194543952196, | |
| "learning_rate": 5.777992791826512e-06, | |
| "loss": 0.14576416015625, | |
| "memory(GiB)": 10.57, | |
| "step": 990, | |
| "train_speed(iter/s)": 0.470252 | |
| }, | |
| { | |
| "epoch": 0.7435083130954605, | |
| "grad_norm": 0.2584420531503668, | |
| "learning_rate": 5.775773062991744e-06, | |
| "loss": 0.1373291015625, | |
| "memory(GiB)": 10.57, | |
| "step": 995, | |
| "train_speed(iter/s)": 0.470366 | |
| }, | |
| { | |
| "epoch": 0.7472445357743321, | |
| "grad_norm": 0.30020605961844676, | |
| "learning_rate": 5.773542723068937e-06, | |
| "loss": 0.1499267578125, | |
| "memory(GiB)": 10.57, | |
| "step": 1000, | |
| "train_speed(iter/s)": 0.470476 | |
| }, | |
| { | |
| "epoch": 0.7509807584532038, | |
| "grad_norm": 0.3190463063150355, | |
| "learning_rate": 5.771301780584126e-06, | |
| "loss": 0.13701171875, | |
| "memory(GiB)": 10.57, | |
| "step": 1005, | |
| "train_speed(iter/s)": 0.470413 | |
| }, | |
| { | |
| "epoch": 0.7547169811320755, | |
| "grad_norm": 0.27457688934495655, | |
| "learning_rate": 5.769050244103872e-06, | |
| "loss": 0.14490966796875, | |
| "memory(GiB)": 10.57, | |
| "step": 1010, | |
| "train_speed(iter/s)": 0.470476 | |
| }, | |
| { | |
| "epoch": 0.7584532038109472, | |
| "grad_norm": 0.2676875855097265, | |
| "learning_rate": 5.76678812223524e-06, | |
| "loss": 0.1295654296875, | |
| "memory(GiB)": 10.57, | |
| "step": 1015, | |
| "train_speed(iter/s)": 0.470519 | |
| }, | |
| { | |
| "epoch": 0.7621894264898188, | |
| "grad_norm": 0.35808053452995126, | |
| "learning_rate": 5.764515423625757e-06, | |
| "loss": 0.148779296875, | |
| "memory(GiB)": 10.57, | |
| "step": 1020, | |
| "train_speed(iter/s)": 0.470638 | |
| }, | |
| { | |
| "epoch": 0.7659256491686904, | |
| "grad_norm": 0.2842904674611216, | |
| "learning_rate": 5.762232156963381e-06, | |
| "loss": 0.142138671875, | |
| "memory(GiB)": 10.57, | |
| "step": 1025, | |
| "train_speed(iter/s)": 0.47066 | |
| }, | |
| { | |
| "epoch": 0.7696618718475621, | |
| "grad_norm": 0.24187187753830167, | |
| "learning_rate": 5.759938330976473e-06, | |
| "loss": 0.12486572265625, | |
| "memory(GiB)": 10.57, | |
| "step": 1030, | |
| "train_speed(iter/s)": 0.470767 | |
| }, | |
| { | |
| "epoch": 0.7733980945264338, | |
| "grad_norm": 0.17249284757124964, | |
| "learning_rate": 5.757633954433757e-06, | |
| "loss": 0.13060302734375, | |
| "memory(GiB)": 10.57, | |
| "step": 1035, | |
| "train_speed(iter/s)": 0.470832 | |
| }, | |
| { | |
| "epoch": 0.7771343172053055, | |
| "grad_norm": 0.2299915320848999, | |
| "learning_rate": 5.755319036144289e-06, | |
| "loss": 0.1218017578125, | |
| "memory(GiB)": 10.57, | |
| "step": 1040, | |
| "train_speed(iter/s)": 0.470946 | |
| }, | |
| { | |
| "epoch": 0.7808705398841771, | |
| "grad_norm": 0.19120763093823928, | |
| "learning_rate": 5.752993584957426e-06, | |
| "loss": 0.11143798828125, | |
| "memory(GiB)": 10.57, | |
| "step": 1045, | |
| "train_speed(iter/s)": 0.470952 | |
| }, | |
| { | |
| "epoch": 0.7846067625630487, | |
| "grad_norm": 0.23296209254061714, | |
| "learning_rate": 5.750657609762787e-06, | |
| "loss": 0.12412109375, | |
| "memory(GiB)": 10.57, | |
| "step": 1050, | |
| "train_speed(iter/s)": 0.471065 | |
| }, | |
| { | |
| "epoch": 0.7883429852419204, | |
| "grad_norm": 0.23478728691916106, | |
| "learning_rate": 5.74831111949022e-06, | |
| "loss": 0.119873046875, | |
| "memory(GiB)": 10.57, | |
| "step": 1055, | |
| "train_speed(iter/s)": 0.471177 | |
| }, | |
| { | |
| "epoch": 0.7920792079207921, | |
| "grad_norm": 0.3162727585321945, | |
| "learning_rate": 5.745954123109776e-06, | |
| "loss": 0.1431884765625, | |
| "memory(GiB)": 10.57, | |
| "step": 1060, | |
| "train_speed(iter/s)": 0.471178 | |
| }, | |
| { | |
| "epoch": 0.7958154305996638, | |
| "grad_norm": 0.2554843268036968, | |
| "learning_rate": 5.743586629631663e-06, | |
| "loss": 0.13331298828125, | |
| "memory(GiB)": 10.57, | |
| "step": 1065, | |
| "train_speed(iter/s)": 0.471177 | |
| }, | |
| { | |
| "epoch": 0.7995516532785354, | |
| "grad_norm": 0.26771118158424334, | |
| "learning_rate": 5.741208648106216e-06, | |
| "loss": 0.12591552734375, | |
| "memory(GiB)": 10.57, | |
| "step": 1070, | |
| "train_speed(iter/s)": 0.471319 | |
| }, | |
| { | |
| "epoch": 0.803287875957407, | |
| "grad_norm": 0.22210847866208316, | |
| "learning_rate": 5.7388201876238665e-06, | |
| "loss": 0.13048095703125, | |
| "memory(GiB)": 10.57, | |
| "step": 1075, | |
| "train_speed(iter/s)": 0.471098 | |
| }, | |
| { | |
| "epoch": 0.8070240986362788, | |
| "grad_norm": 0.2090698541733704, | |
| "learning_rate": 5.736421257315105e-06, | |
| "loss": 0.128466796875, | |
| "memory(GiB)": 10.57, | |
| "step": 1080, | |
| "train_speed(iter/s)": 0.471054 | |
| }, | |
| { | |
| "epoch": 0.8107603213151504, | |
| "grad_norm": 0.25643857813943166, | |
| "learning_rate": 5.734011866350441e-06, | |
| "loss": 0.1285400390625, | |
| "memory(GiB)": 10.57, | |
| "step": 1085, | |
| "train_speed(iter/s)": 0.471162 | |
| }, | |
| { | |
| "epoch": 0.814496543994022, | |
| "grad_norm": 0.21115359067598077, | |
| "learning_rate": 5.731592023940377e-06, | |
| "loss": 0.1183837890625, | |
| "memory(GiB)": 10.57, | |
| "step": 1090, | |
| "train_speed(iter/s)": 0.471227 | |
| }, | |
| { | |
| "epoch": 0.8182327666728937, | |
| "grad_norm": 0.2712675993739738, | |
| "learning_rate": 5.7291617393353644e-06, | |
| "loss": 0.13204345703125, | |
| "memory(GiB)": 10.57, | |
| "step": 1095, | |
| "train_speed(iter/s)": 0.471252 | |
| }, | |
| { | |
| "epoch": 0.8219689893517653, | |
| "grad_norm": 0.2084250099258058, | |
| "learning_rate": 5.726721021825778e-06, | |
| "loss": 0.11478271484375, | |
| "memory(GiB)": 10.57, | |
| "step": 1100, | |
| "train_speed(iter/s)": 0.471379 | |
| }, | |
| { | |
| "epoch": 0.8257052120306371, | |
| "grad_norm": 0.2830458697450999, | |
| "learning_rate": 5.724269880741871e-06, | |
| "loss": 0.126416015625, | |
| "memory(GiB)": 10.57, | |
| "step": 1105, | |
| "train_speed(iter/s)": 0.471405 | |
| }, | |
| { | |
| "epoch": 0.8294414347095087, | |
| "grad_norm": 0.3346887940418336, | |
| "learning_rate": 5.721808325453744e-06, | |
| "loss": 0.120458984375, | |
| "memory(GiB)": 10.57, | |
| "step": 1110, | |
| "train_speed(iter/s)": 0.471343 | |
| }, | |
| { | |
| "epoch": 0.8331776573883803, | |
| "grad_norm": 0.36391607206986826, | |
| "learning_rate": 5.719336365371309e-06, | |
| "loss": 0.12493896484375, | |
| "memory(GiB)": 10.57, | |
| "step": 1115, | |
| "train_speed(iter/s)": 0.471475 | |
| }, | |
| { | |
| "epoch": 0.836913880067252, | |
| "grad_norm": 0.24337137317648888, | |
| "learning_rate": 5.716854009944253e-06, | |
| "loss": 0.116259765625, | |
| "memory(GiB)": 10.57, | |
| "step": 1120, | |
| "train_speed(iter/s)": 0.471472 | |
| }, | |
| { | |
| "epoch": 0.8406501027461236, | |
| "grad_norm": 0.26926385581419715, | |
| "learning_rate": 5.714361268662001e-06, | |
| "loss": 0.12049560546875, | |
| "memory(GiB)": 10.57, | |
| "step": 1125, | |
| "train_speed(iter/s)": 0.471483 | |
| }, | |
| { | |
| "epoch": 0.8443863254249954, | |
| "grad_norm": 0.21579011337181153, | |
| "learning_rate": 5.711858151053681e-06, | |
| "loss": 0.13843994140625, | |
| "memory(GiB)": 10.57, | |
| "step": 1130, | |
| "train_speed(iter/s)": 0.471564 | |
| }, | |
| { | |
| "epoch": 0.848122548103867, | |
| "grad_norm": 0.20161584440361865, | |
| "learning_rate": 5.7093446666880895e-06, | |
| "loss": 0.109716796875, | |
| "memory(GiB)": 10.57, | |
| "step": 1135, | |
| "train_speed(iter/s)": 0.471594 | |
| }, | |
| { | |
| "epoch": 0.8518587707827386, | |
| "grad_norm": 0.188476182825962, | |
| "learning_rate": 5.7068208251736475e-06, | |
| "loss": 0.126171875, | |
| "memory(GiB)": 10.57, | |
| "step": 1140, | |
| "train_speed(iter/s)": 0.471689 | |
| }, | |
| { | |
| "epoch": 0.8555949934616103, | |
| "grad_norm": 0.27427824113320737, | |
| "learning_rate": 5.704286636158373e-06, | |
| "loss": 0.12137451171875, | |
| "memory(GiB)": 10.57, | |
| "step": 1145, | |
| "train_speed(iter/s)": 0.471734 | |
| }, | |
| { | |
| "epoch": 0.859331216140482, | |
| "grad_norm": 0.22763746167838253, | |
| "learning_rate": 5.701742109329838e-06, | |
| "loss": 0.13856201171875, | |
| "memory(GiB)": 10.57, | |
| "step": 1150, | |
| "train_speed(iter/s)": 0.471726 | |
| }, | |
| { | |
| "epoch": 0.8630674388193537, | |
| "grad_norm": 0.17131970459498547, | |
| "learning_rate": 5.6991872544151335e-06, | |
| "loss": 0.14425048828125, | |
| "memory(GiB)": 10.57, | |
| "step": 1155, | |
| "train_speed(iter/s)": 0.471788 | |
| }, | |
| { | |
| "epoch": 0.8668036614982253, | |
| "grad_norm": 0.23048712465348178, | |
| "learning_rate": 5.696622081180834e-06, | |
| "loss": 0.153955078125, | |
| "memory(GiB)": 10.57, | |
| "step": 1160, | |
| "train_speed(iter/s)": 0.471828 | |
| }, | |
| { | |
| "epoch": 0.8705398841770969, | |
| "grad_norm": 0.2737430014057503, | |
| "learning_rate": 5.694046599432956e-06, | |
| "loss": 0.116259765625, | |
| "memory(GiB)": 10.57, | |
| "step": 1165, | |
| "train_speed(iter/s)": 0.471811 | |
| }, | |
| { | |
| "epoch": 0.8742761068559686, | |
| "grad_norm": 0.23626021988375195, | |
| "learning_rate": 5.691460819016923e-06, | |
| "loss": 0.1245849609375, | |
| "memory(GiB)": 10.57, | |
| "step": 1170, | |
| "train_speed(iter/s)": 0.471906 | |
| }, | |
| { | |
| "epoch": 0.8780123295348403, | |
| "grad_norm": 0.27390563050373423, | |
| "learning_rate": 5.68886474981753e-06, | |
| "loss": 0.12216796875, | |
| "memory(GiB)": 10.57, | |
| "step": 1175, | |
| "train_speed(iter/s)": 0.471894 | |
| }, | |
| { | |
| "epoch": 0.881748552213712, | |
| "grad_norm": 0.3598824701234181, | |
| "learning_rate": 5.686258401758901e-06, | |
| "loss": 0.1288818359375, | |
| "memory(GiB)": 10.57, | |
| "step": 1180, | |
| "train_speed(iter/s)": 0.471866 | |
| }, | |
| { | |
| "epoch": 0.8854847748925836, | |
| "grad_norm": 0.2803403042160743, | |
| "learning_rate": 5.683641784804454e-06, | |
| "loss": 0.119970703125, | |
| "memory(GiB)": 10.57, | |
| "step": 1185, | |
| "train_speed(iter/s)": 0.471838 | |
| }, | |
| { | |
| "epoch": 0.8892209975714552, | |
| "grad_norm": 0.24011469363238191, | |
| "learning_rate": 5.681014908956866e-06, | |
| "loss": 0.12734375, | |
| "memory(GiB)": 10.57, | |
| "step": 1190, | |
| "train_speed(iter/s)": 0.471876 | |
| }, | |
| { | |
| "epoch": 0.8929572202503269, | |
| "grad_norm": 0.23680884380834868, | |
| "learning_rate": 5.6783777842580245e-06, | |
| "loss": 0.131884765625, | |
| "memory(GiB)": 10.57, | |
| "step": 1195, | |
| "train_speed(iter/s)": 0.471946 | |
| }, | |
| { | |
| "epoch": 0.8966934429291986, | |
| "grad_norm": 0.25067555757294774, | |
| "learning_rate": 5.6757304207890006e-06, | |
| "loss": 0.11749267578125, | |
| "memory(GiB)": 10.57, | |
| "step": 1200, | |
| "train_speed(iter/s)": 0.471919 | |
| }, | |
| { | |
| "epoch": 0.9004296656080703, | |
| "grad_norm": 0.25663340180554484, | |
| "learning_rate": 5.673072828670005e-06, | |
| "loss": 0.1390380859375, | |
| "memory(GiB)": 10.57, | |
| "step": 1205, | |
| "train_speed(iter/s)": 0.471961 | |
| }, | |
| { | |
| "epoch": 0.9041658882869419, | |
| "grad_norm": 0.34196712108358773, | |
| "learning_rate": 5.670405018060349e-06, | |
| "loss": 0.1314453125, | |
| "memory(GiB)": 10.57, | |
| "step": 1210, | |
| "train_speed(iter/s)": 0.472017 | |
| }, | |
| { | |
| "epoch": 0.9079021109658135, | |
| "grad_norm": 0.25320961648503115, | |
| "learning_rate": 5.667726999158408e-06, | |
| "loss": 0.11199951171875, | |
| "memory(GiB)": 10.57, | |
| "step": 1215, | |
| "train_speed(iter/s)": 0.472063 | |
| }, | |
| { | |
| "epoch": 0.9116383336446853, | |
| "grad_norm": 0.2895202800969726, | |
| "learning_rate": 5.665038782201579e-06, | |
| "loss": 0.11494140625, | |
| "memory(GiB)": 10.57, | |
| "step": 1220, | |
| "train_speed(iter/s)": 0.472149 | |
| }, | |
| { | |
| "epoch": 0.9153745563235569, | |
| "grad_norm": 0.24353976208363304, | |
| "learning_rate": 5.662340377466246e-06, | |
| "loss": 0.13350830078125, | |
| "memory(GiB)": 10.57, | |
| "step": 1225, | |
| "train_speed(iter/s)": 0.472205 | |
| }, | |
| { | |
| "epoch": 0.9191107790024285, | |
| "grad_norm": 0.21343931443362257, | |
| "learning_rate": 5.659631795267736e-06, | |
| "loss": 0.1358642578125, | |
| "memory(GiB)": 10.57, | |
| "step": 1230, | |
| "train_speed(iter/s)": 0.472292 | |
| }, | |
| { | |
| "epoch": 0.9228470016813002, | |
| "grad_norm": 0.18836231763075187, | |
| "learning_rate": 5.656913045960284e-06, | |
| "loss": 0.1275634765625, | |
| "memory(GiB)": 10.57, | |
| "step": 1235, | |
| "train_speed(iter/s)": 0.472331 | |
| }, | |
| { | |
| "epoch": 0.9265832243601718, | |
| "grad_norm": 0.4626722838861778, | |
| "learning_rate": 5.65418413993699e-06, | |
| "loss": 0.12288818359375, | |
| "memory(GiB)": 10.57, | |
| "step": 1240, | |
| "train_speed(iter/s)": 0.472384 | |
| }, | |
| { | |
| "epoch": 0.9303194470390436, | |
| "grad_norm": 0.2791944611984056, | |
| "learning_rate": 5.651445087629781e-06, | |
| "loss": 0.12313232421875, | |
| "memory(GiB)": 10.57, | |
| "step": 1245, | |
| "train_speed(iter/s)": 0.47236 | |
| }, | |
| { | |
| "epoch": 0.9340556697179152, | |
| "grad_norm": 0.2351927769190445, | |
| "learning_rate": 5.648695899509373e-06, | |
| "loss": 0.12640380859375, | |
| "memory(GiB)": 10.57, | |
| "step": 1250, | |
| "train_speed(iter/s)": 0.472318 | |
| }, | |
| { | |
| "epoch": 0.9377918923967868, | |
| "grad_norm": 0.29167608891344404, | |
| "learning_rate": 5.6459365860852225e-06, | |
| "loss": 0.1332763671875, | |
| "memory(GiB)": 10.57, | |
| "step": 1255, | |
| "train_speed(iter/s)": 0.472324 | |
| }, | |
| { | |
| "epoch": 0.9415281150756585, | |
| "grad_norm": 0.3389174699822604, | |
| "learning_rate": 5.643167157905499e-06, | |
| "loss": 0.1290771484375, | |
| "memory(GiB)": 10.57, | |
| "step": 1260, | |
| "train_speed(iter/s)": 0.472422 | |
| }, | |
| { | |
| "epoch": 0.9452643377545301, | |
| "grad_norm": 0.19240685493137236, | |
| "learning_rate": 5.640387625557036e-06, | |
| "loss": 0.11680908203125, | |
| "memory(GiB)": 10.57, | |
| "step": 1265, | |
| "train_speed(iter/s)": 0.472422 | |
| }, | |
| { | |
| "epoch": 0.9490005604334019, | |
| "grad_norm": 0.26444195491643885, | |
| "learning_rate": 5.63759799966529e-06, | |
| "loss": 0.139111328125, | |
| "memory(GiB)": 10.57, | |
| "step": 1270, | |
| "train_speed(iter/s)": 0.4725 | |
| }, | |
| { | |
| "epoch": 0.9527367831122735, | |
| "grad_norm": 0.2630005422058253, | |
| "learning_rate": 5.634798290894306e-06, | |
| "loss": 0.1197265625, | |
| "memory(GiB)": 10.57, | |
| "step": 1275, | |
| "train_speed(iter/s)": 0.472495 | |
| }, | |
| { | |
| "epoch": 0.9564730057911451, | |
| "grad_norm": 0.23145820253558871, | |
| "learning_rate": 5.631988509946674e-06, | |
| "loss": 0.1113037109375, | |
| "memory(GiB)": 10.57, | |
| "step": 1280, | |
| "train_speed(iter/s)": 0.472494 | |
| }, | |
| { | |
| "epoch": 0.9602092284700168, | |
| "grad_norm": 0.2899148756938717, | |
| "learning_rate": 5.629168667563484e-06, | |
| "loss": 0.121484375, | |
| "memory(GiB)": 10.57, | |
| "step": 1285, | |
| "train_speed(iter/s)": 0.472497 | |
| }, | |
| { | |
| "epoch": 0.9639454511488885, | |
| "grad_norm": 0.36548878879119173, | |
| "learning_rate": 5.62633877452429e-06, | |
| "loss": 0.12415771484375, | |
| "memory(GiB)": 10.57, | |
| "step": 1290, | |
| "train_speed(iter/s)": 0.472442 | |
| }, | |
| { | |
| "epoch": 0.9676816738277602, | |
| "grad_norm": 0.25528341367700647, | |
| "learning_rate": 5.623498841647067e-06, | |
| "loss": 0.1307861328125, | |
| "memory(GiB)": 10.57, | |
| "step": 1295, | |
| "train_speed(iter/s)": 0.472498 | |
| }, | |
| { | |
| "epoch": 0.9714178965066318, | |
| "grad_norm": 0.1948509154997499, | |
| "learning_rate": 5.620648879788172e-06, | |
| "loss": 0.122802734375, | |
| "memory(GiB)": 10.57, | |
| "step": 1300, | |
| "train_speed(iter/s)": 0.47247 | |
| }, | |
| { | |
| "epoch": 0.9751541191855034, | |
| "grad_norm": 0.23395818708390523, | |
| "learning_rate": 5.617788899842296e-06, | |
| "loss": 0.1336181640625, | |
| "memory(GiB)": 10.57, | |
| "step": 1305, | |
| "train_speed(iter/s)": 0.472478 | |
| }, | |
| { | |
| "epoch": 0.9788903418643751, | |
| "grad_norm": 0.22129751131979117, | |
| "learning_rate": 5.61491891274243e-06, | |
| "loss": 0.11290283203125, | |
| "memory(GiB)": 10.57, | |
| "step": 1310, | |
| "train_speed(iter/s)": 0.47253 | |
| }, | |
| { | |
| "epoch": 0.9826265645432468, | |
| "grad_norm": 0.38768444008771463, | |
| "learning_rate": 5.6120389294598185e-06, | |
| "loss": 0.1374267578125, | |
| "memory(GiB)": 10.57, | |
| "step": 1315, | |
| "train_speed(iter/s)": 0.472627 | |
| }, | |
| { | |
| "epoch": 0.9863627872221185, | |
| "grad_norm": 0.2634727672178905, | |
| "learning_rate": 5.609148961003919e-06, | |
| "loss": 0.10865478515625, | |
| "memory(GiB)": 10.57, | |
| "step": 1320, | |
| "train_speed(iter/s)": 0.472642 | |
| }, | |
| { | |
| "epoch": 0.9900990099009901, | |
| "grad_norm": 0.2693250349909997, | |
| "learning_rate": 5.606249018422361e-06, | |
| "loss": 0.121435546875, | |
| "memory(GiB)": 10.57, | |
| "step": 1325, | |
| "train_speed(iter/s)": 0.472693 | |
| }, | |
| { | |
| "epoch": 0.9938352325798617, | |
| "grad_norm": 0.3142833629244817, | |
| "learning_rate": 5.603339112800902e-06, | |
| "loss": 0.127587890625, | |
| "memory(GiB)": 10.57, | |
| "step": 1330, | |
| "train_speed(iter/s)": 0.472723 | |
| }, | |
| { | |
| "epoch": 0.9975714552587335, | |
| "grad_norm": 0.260475289320075, | |
| "learning_rate": 5.600419255263382e-06, | |
| "loss": 0.12655029296875, | |
| "memory(GiB)": 10.57, | |
| "step": 1335, | |
| "train_speed(iter/s)": 0.472744 | |
| }, | |
| { | |
| "epoch": 1.0007472445357743, | |
| "grad_norm": 0.1995569301298896, | |
| "learning_rate": 5.5974894569716925e-06, | |
| "loss": 0.11612548828125, | |
| "memory(GiB)": 10.57, | |
| "step": 1340, | |
| "train_speed(iter/s)": 0.472443 | |
| }, | |
| { | |
| "epoch": 1.004483467214646, | |
| "grad_norm": 0.19232697366661053, | |
| "learning_rate": 5.594549729125718e-06, | |
| "loss": 0.09854736328125, | |
| "memory(GiB)": 10.57, | |
| "step": 1345, | |
| "train_speed(iter/s)": 0.472505 | |
| }, | |
| { | |
| "epoch": 1.0082196898935176, | |
| "grad_norm": 0.33732827706175905, | |
| "learning_rate": 5.591600082963308e-06, | |
| "loss": 0.115234375, | |
| "memory(GiB)": 10.57, | |
| "step": 1350, | |
| "train_speed(iter/s)": 0.472424 | |
| }, | |
| { | |
| "epoch": 1.0119559125723894, | |
| "grad_norm": 0.18176928497159583, | |
| "learning_rate": 5.58864052976022e-06, | |
| "loss": 0.10379638671875, | |
| "memory(GiB)": 10.57, | |
| "step": 1355, | |
| "train_speed(iter/s)": 0.472477 | |
| }, | |
| { | |
| "epoch": 1.015692135251261, | |
| "grad_norm": 0.357592464764428, | |
| "learning_rate": 5.585671080830091e-06, | |
| "loss": 0.1013671875, | |
| "memory(GiB)": 10.57, | |
| "step": 1360, | |
| "train_speed(iter/s)": 0.472527 | |
| }, | |
| { | |
| "epoch": 1.0194283579301326, | |
| "grad_norm": 0.2808114053275493, | |
| "learning_rate": 5.5826917475243834e-06, | |
| "loss": 0.097698974609375, | |
| "memory(GiB)": 10.57, | |
| "step": 1365, | |
| "train_speed(iter/s)": 0.47262 | |
| }, | |
| { | |
| "epoch": 1.0231645806090044, | |
| "grad_norm": 0.17182129292938358, | |
| "learning_rate": 5.579702541232344e-06, | |
| "loss": 0.10177001953125, | |
| "memory(GiB)": 10.57, | |
| "step": 1370, | |
| "train_speed(iter/s)": 0.472515 | |
| }, | |
| { | |
| "epoch": 1.026900803287876, | |
| "grad_norm": 0.36516118234394773, | |
| "learning_rate": 5.576703473380963e-06, | |
| "loss": 0.10394287109375, | |
| "memory(GiB)": 10.57, | |
| "step": 1375, | |
| "train_speed(iter/s)": 0.472485 | |
| }, | |
| { | |
| "epoch": 1.0306370259667477, | |
| "grad_norm": 0.12994331570229598, | |
| "learning_rate": 5.573694555434929e-06, | |
| "loss": 0.09647216796875, | |
| "memory(GiB)": 10.57, | |
| "step": 1380, | |
| "train_speed(iter/s)": 0.472423 | |
| }, | |
| { | |
| "epoch": 1.0343732486456192, | |
| "grad_norm": 0.2439500781039156, | |
| "learning_rate": 5.570675798896584e-06, | |
| "loss": 0.09913330078125, | |
| "memory(GiB)": 10.57, | |
| "step": 1385, | |
| "train_speed(iter/s)": 0.472472 | |
| }, | |
| { | |
| "epoch": 1.038109471324491, | |
| "grad_norm": 0.24510427147093836, | |
| "learning_rate": 5.567647215305884e-06, | |
| "loss": 0.10660400390625, | |
| "memory(GiB)": 10.57, | |
| "step": 1390, | |
| "train_speed(iter/s)": 0.472502 | |
| }, | |
| { | |
| "epoch": 1.0418456940033627, | |
| "grad_norm": 0.3224514432487436, | |
| "learning_rate": 5.564608816240345e-06, | |
| "loss": 0.1132080078125, | |
| "memory(GiB)": 10.57, | |
| "step": 1395, | |
| "train_speed(iter/s)": 0.472569 | |
| }, | |
| { | |
| "epoch": 1.0455819166822342, | |
| "grad_norm": 0.23587230778852436, | |
| "learning_rate": 5.56156061331501e-06, | |
| "loss": 0.0916259765625, | |
| "memory(GiB)": 10.57, | |
| "step": 1400, | |
| "train_speed(iter/s)": 0.472605 | |
| }, | |
| { | |
| "epoch": 1.049318139361106, | |
| "grad_norm": 0.22597459572374368, | |
| "learning_rate": 5.5585026181823994e-06, | |
| "loss": 0.10594482421875, | |
| "memory(GiB)": 10.57, | |
| "step": 1405, | |
| "train_speed(iter/s)": 0.472556 | |
| }, | |
| { | |
| "epoch": 1.0530543620399775, | |
| "grad_norm": 0.19096269961906193, | |
| "learning_rate": 5.555434842532465e-06, | |
| "loss": 0.089910888671875, | |
| "memory(GiB)": 10.57, | |
| "step": 1410, | |
| "train_speed(iter/s)": 0.472594 | |
| }, | |
| { | |
| "epoch": 1.0567905847188492, | |
| "grad_norm": 0.21611547990188876, | |
| "learning_rate": 5.552357298092549e-06, | |
| "loss": 0.10777587890625, | |
| "memory(GiB)": 10.57, | |
| "step": 1415, | |
| "train_speed(iter/s)": 0.472614 | |
| }, | |
| { | |
| "epoch": 1.060526807397721, | |
| "grad_norm": 0.2651855509481471, | |
| "learning_rate": 5.549269996627335e-06, | |
| "loss": 0.104296875, | |
| "memory(GiB)": 10.57, | |
| "step": 1420, | |
| "train_speed(iter/s)": 0.472711 | |
| }, | |
| { | |
| "epoch": 1.0642630300765925, | |
| "grad_norm": 0.2884021435709037, | |
| "learning_rate": 5.546172949938806e-06, | |
| "loss": 0.09815673828125, | |
| "memory(GiB)": 10.57, | |
| "step": 1425, | |
| "train_speed(iter/s)": 0.472743 | |
| }, | |
| { | |
| "epoch": 1.0679992527554643, | |
| "grad_norm": 0.3272777127266579, | |
| "learning_rate": 5.5430661698661995e-06, | |
| "loss": 0.09771728515625, | |
| "memory(GiB)": 10.57, | |
| "step": 1430, | |
| "train_speed(iter/s)": 0.472793 | |
| }, | |
| { | |
| "epoch": 1.0717354754343358, | |
| "grad_norm": 0.22908749879031715, | |
| "learning_rate": 5.539949668285962e-06, | |
| "loss": 0.11275634765625, | |
| "memory(GiB)": 10.57, | |
| "step": 1435, | |
| "train_speed(iter/s)": 0.472759 | |
| }, | |
| { | |
| "epoch": 1.0754716981132075, | |
| "grad_norm": 0.20839037146203993, | |
| "learning_rate": 5.5368234571117e-06, | |
| "loss": 0.1127685546875, | |
| "memory(GiB)": 10.57, | |
| "step": 1440, | |
| "train_speed(iter/s)": 0.472726 | |
| }, | |
| { | |
| "epoch": 1.0792079207920793, | |
| "grad_norm": 0.21433788637796058, | |
| "learning_rate": 5.533687548294139e-06, | |
| "loss": 0.102685546875, | |
| "memory(GiB)": 10.57, | |
| "step": 1445, | |
| "train_speed(iter/s)": 0.472795 | |
| }, | |
| { | |
| "epoch": 1.0829441434709508, | |
| "grad_norm": 0.2194852609411041, | |
| "learning_rate": 5.530541953821078e-06, | |
| "loss": 0.1194580078125, | |
| "memory(GiB)": 10.57, | |
| "step": 1450, | |
| "train_speed(iter/s)": 0.472644 | |
| }, | |
| { | |
| "epoch": 1.0866803661498226, | |
| "grad_norm": 0.2119142735733801, | |
| "learning_rate": 5.5273866857173375e-06, | |
| "loss": 0.09979248046875, | |
| "memory(GiB)": 10.57, | |
| "step": 1455, | |
| "train_speed(iter/s)": 0.472692 | |
| }, | |
| { | |
| "epoch": 1.090416588828694, | |
| "grad_norm": 0.18271859704191354, | |
| "learning_rate": 5.524221756044723e-06, | |
| "loss": 0.10120849609375, | |
| "memory(GiB)": 10.57, | |
| "step": 1460, | |
| "train_speed(iter/s)": 0.472786 | |
| }, | |
| { | |
| "epoch": 1.0941528115075658, | |
| "grad_norm": 0.1965749879154183, | |
| "learning_rate": 5.521047176901968e-06, | |
| "loss": 0.09178466796875, | |
| "memory(GiB)": 10.57, | |
| "step": 1465, | |
| "train_speed(iter/s)": 0.472753 | |
| }, | |
| { | |
| "epoch": 1.0978890341864376, | |
| "grad_norm": 0.3529079661879815, | |
| "learning_rate": 5.5178629604247e-06, | |
| "loss": 0.099200439453125, | |
| "memory(GiB)": 10.57, | |
| "step": 1470, | |
| "train_speed(iter/s)": 0.4728 | |
| }, | |
| { | |
| "epoch": 1.101625256865309, | |
| "grad_norm": 0.23509583771318013, | |
| "learning_rate": 5.514669118785383e-06, | |
| "loss": 0.10716552734375, | |
| "memory(GiB)": 10.57, | |
| "step": 1475, | |
| "train_speed(iter/s)": 0.47282 | |
| }, | |
| { | |
| "epoch": 1.1053614795441808, | |
| "grad_norm": 0.22191044730282325, | |
| "learning_rate": 5.511465664193278e-06, | |
| "loss": 0.1013671875, | |
| "memory(GiB)": 10.57, | |
| "step": 1480, | |
| "train_speed(iter/s)": 0.472823 | |
| }, | |
| { | |
| "epoch": 1.1090977022230526, | |
| "grad_norm": 0.2697952102289562, | |
| "learning_rate": 5.50825260889439e-06, | |
| "loss": 0.110107421875, | |
| "memory(GiB)": 10.57, | |
| "step": 1485, | |
| "train_speed(iter/s)": 0.47279 | |
| }, | |
| { | |
| "epoch": 1.1128339249019241, | |
| "grad_norm": 0.34041111425327863, | |
| "learning_rate": 5.505029965171431e-06, | |
| "loss": 0.10975341796875, | |
| "memory(GiB)": 10.57, | |
| "step": 1490, | |
| "train_speed(iter/s)": 0.472756 | |
| }, | |
| { | |
| "epoch": 1.1165701475807959, | |
| "grad_norm": 0.26883268648527414, | |
| "learning_rate": 5.501797745343762e-06, | |
| "loss": 0.09005126953125, | |
| "memory(GiB)": 10.57, | |
| "step": 1495, | |
| "train_speed(iter/s)": 0.472749 | |
| }, | |
| { | |
| "epoch": 1.1203063702596674, | |
| "grad_norm": 0.2591995651189346, | |
| "learning_rate": 5.498555961767353e-06, | |
| "loss": 0.1026611328125, | |
| "memory(GiB)": 10.57, | |
| "step": 1500, | |
| "train_speed(iter/s)": 0.472741 | |
| }, | |
| { | |
| "epoch": 1.1240425929385391, | |
| "grad_norm": 0.20112516368334774, | |
| "learning_rate": 5.495304626834737e-06, | |
| "loss": 0.10999755859375, | |
| "memory(GiB)": 10.57, | |
| "step": 1505, | |
| "train_speed(iter/s)": 0.472822 | |
| }, | |
| { | |
| "epoch": 1.127778815617411, | |
| "grad_norm": 0.31243387047085314, | |
| "learning_rate": 5.492043752974954e-06, | |
| "loss": 0.1138671875, | |
| "memory(GiB)": 10.57, | |
| "step": 1510, | |
| "train_speed(iter/s)": 0.472881 | |
| }, | |
| { | |
| "epoch": 1.1315150382962824, | |
| "grad_norm": 0.2885339464617385, | |
| "learning_rate": 5.488773352653511e-06, | |
| "loss": 0.103564453125, | |
| "memory(GiB)": 10.57, | |
| "step": 1515, | |
| "train_speed(iter/s)": 0.47291 | |
| }, | |
| { | |
| "epoch": 1.1352512609751542, | |
| "grad_norm": 0.2769596342571021, | |
| "learning_rate": 5.485493438372334e-06, | |
| "loss": 0.11546630859375, | |
| "memory(GiB)": 10.57, | |
| "step": 1520, | |
| "train_speed(iter/s)": 0.472812 | |
| }, | |
| { | |
| "epoch": 1.1389874836540257, | |
| "grad_norm": 0.3103063506129397, | |
| "learning_rate": 5.482204022669716e-06, | |
| "loss": 0.108837890625, | |
| "memory(GiB)": 10.57, | |
| "step": 1525, | |
| "train_speed(iter/s)": 0.472794 | |
| }, | |
| { | |
| "epoch": 1.1427237063328974, | |
| "grad_norm": 0.2913972666378632, | |
| "learning_rate": 5.478905118120274e-06, | |
| "loss": 0.1053466796875, | |
| "memory(GiB)": 10.57, | |
| "step": 1530, | |
| "train_speed(iter/s)": 0.472759 | |
| }, | |
| { | |
| "epoch": 1.1464599290117692, | |
| "grad_norm": 0.1805358057399443, | |
| "learning_rate": 5.475596737334896e-06, | |
| "loss": 0.10556640625, | |
| "memory(GiB)": 10.57, | |
| "step": 1535, | |
| "train_speed(iter/s)": 0.472764 | |
| }, | |
| { | |
| "epoch": 1.1501961516906407, | |
| "grad_norm": 0.18027571971615952, | |
| "learning_rate": 5.472278892960697e-06, | |
| "loss": 0.10286865234375, | |
| "memory(GiB)": 10.57, | |
| "step": 1540, | |
| "train_speed(iter/s)": 0.472798 | |
| }, | |
| { | |
| "epoch": 1.1539323743695125, | |
| "grad_norm": 0.26360451822838044, | |
| "learning_rate": 5.468951597680969e-06, | |
| "loss": 0.10518798828125, | |
| "memory(GiB)": 10.57, | |
| "step": 1545, | |
| "train_speed(iter/s)": 0.472811 | |
| }, | |
| { | |
| "epoch": 1.1576685970483842, | |
| "grad_norm": 0.27825790440429315, | |
| "learning_rate": 5.4656148642151315e-06, | |
| "loss": 0.1068115234375, | |
| "memory(GiB)": 10.57, | |
| "step": 1550, | |
| "train_speed(iter/s)": 0.472822 | |
| }, | |
| { | |
| "epoch": 1.1614048197272557, | |
| "grad_norm": 0.37841928985976586, | |
| "learning_rate": 5.462268705318685e-06, | |
| "loss": 0.105902099609375, | |
| "memory(GiB)": 10.57, | |
| "step": 1555, | |
| "train_speed(iter/s)": 0.472796 | |
| }, | |
| { | |
| "epoch": 1.1651410424061275, | |
| "grad_norm": 0.20072772545318748, | |
| "learning_rate": 5.458913133783158e-06, | |
| "loss": 0.096240234375, | |
| "memory(GiB)": 10.57, | |
| "step": 1560, | |
| "train_speed(iter/s)": 0.472849 | |
| }, | |
| { | |
| "epoch": 1.168877265084999, | |
| "grad_norm": 0.2782224873229787, | |
| "learning_rate": 5.455548162436066e-06, | |
| "loss": 0.10538330078125, | |
| "memory(GiB)": 10.57, | |
| "step": 1565, | |
| "train_speed(iter/s)": 0.472866 | |
| }, | |
| { | |
| "epoch": 1.1726134877638708, | |
| "grad_norm": 0.2611062382021719, | |
| "learning_rate": 5.4521738041408535e-06, | |
| "loss": 0.102545166015625, | |
| "memory(GiB)": 10.57, | |
| "step": 1570, | |
| "train_speed(iter/s)": 0.472925 | |
| }, | |
| { | |
| "epoch": 1.1763497104427425, | |
| "grad_norm": 0.36681796736657335, | |
| "learning_rate": 5.448790071796851e-06, | |
| "loss": 0.10877685546875, | |
| "memory(GiB)": 10.57, | |
| "step": 1575, | |
| "train_speed(iter/s)": 0.472947 | |
| }, | |
| { | |
| "epoch": 1.180085933121614, | |
| "grad_norm": 0.31412178351944464, | |
| "learning_rate": 5.445396978339223e-06, | |
| "loss": 0.1108642578125, | |
| "memory(GiB)": 10.57, | |
| "step": 1580, | |
| "train_speed(iter/s)": 0.472961 | |
| }, | |
| { | |
| "epoch": 1.1838221558004858, | |
| "grad_norm": 0.2945745909309181, | |
| "learning_rate": 5.4419945367389204e-06, | |
| "loss": 0.104638671875, | |
| "memory(GiB)": 10.57, | |
| "step": 1585, | |
| "train_speed(iter/s)": 0.472997 | |
| }, | |
| { | |
| "epoch": 1.1875583784793573, | |
| "grad_norm": 0.2005694453013891, | |
| "learning_rate": 5.438582760002628e-06, | |
| "loss": 0.11466064453125, | |
| "memory(GiB)": 10.57, | |
| "step": 1590, | |
| "train_speed(iter/s)": 0.472995 | |
| }, | |
| { | |
| "epoch": 1.191294601158229, | |
| "grad_norm": 0.21815942040257993, | |
| "learning_rate": 5.4351616611727174e-06, | |
| "loss": 0.09090576171875, | |
| "memory(GiB)": 10.57, | |
| "step": 1595, | |
| "train_speed(iter/s)": 0.472983 | |
| }, | |
| { | |
| "epoch": 1.1950308238371008, | |
| "grad_norm": 0.3254149926280658, | |
| "learning_rate": 5.431731253327197e-06, | |
| "loss": 0.09832763671875, | |
| "memory(GiB)": 10.57, | |
| "step": 1600, | |
| "train_speed(iter/s)": 0.472989 | |
| }, | |
| { | |
| "epoch": 1.1987670465159723, | |
| "grad_norm": 0.21539039093948628, | |
| "learning_rate": 5.428291549579658e-06, | |
| "loss": 0.0917236328125, | |
| "memory(GiB)": 10.57, | |
| "step": 1605, | |
| "train_speed(iter/s)": 0.472999 | |
| }, | |
| { | |
| "epoch": 1.202503269194844, | |
| "grad_norm": 0.3980763574441828, | |
| "learning_rate": 5.424842563079231e-06, | |
| "loss": 0.1013427734375, | |
| "memory(GiB)": 10.57, | |
| "step": 1610, | |
| "train_speed(iter/s)": 0.473002 | |
| }, | |
| { | |
| "epoch": 1.2062394918737156, | |
| "grad_norm": 0.2562644399270751, | |
| "learning_rate": 5.421384307010532e-06, | |
| "loss": 0.12611083984375, | |
| "memory(GiB)": 10.57, | |
| "step": 1615, | |
| "train_speed(iter/s)": 0.473001 | |
| }, | |
| { | |
| "epoch": 1.2099757145525873, | |
| "grad_norm": 0.21063963603050906, | |
| "learning_rate": 5.41791679459361e-06, | |
| "loss": 0.09677734375, | |
| "memory(GiB)": 10.57, | |
| "step": 1620, | |
| "train_speed(iter/s)": 0.473087 | |
| }, | |
| { | |
| "epoch": 1.213711937231459, | |
| "grad_norm": 0.26589295201735347, | |
| "learning_rate": 5.4144400390839014e-06, | |
| "loss": 0.10716552734375, | |
| "memory(GiB)": 10.57, | |
| "step": 1625, | |
| "train_speed(iter/s)": 0.473137 | |
| }, | |
| { | |
| "epoch": 1.2174481599103306, | |
| "grad_norm": 0.3159674300444183, | |
| "learning_rate": 5.410954053772174e-06, | |
| "loss": 0.117822265625, | |
| "memory(GiB)": 10.57, | |
| "step": 1630, | |
| "train_speed(iter/s)": 0.473161 | |
| }, | |
| { | |
| "epoch": 1.2211843825892024, | |
| "grad_norm": 0.3257909348870682, | |
| "learning_rate": 5.407458851984481e-06, | |
| "loss": 0.105908203125, | |
| "memory(GiB)": 10.57, | |
| "step": 1635, | |
| "train_speed(iter/s)": 0.473064 | |
| }, | |
| { | |
| "epoch": 1.224920605268074, | |
| "grad_norm": 0.25594963311057084, | |
| "learning_rate": 5.403954447082107e-06, | |
| "loss": 0.1008544921875, | |
| "memory(GiB)": 10.57, | |
| "step": 1640, | |
| "train_speed(iter/s)": 0.473138 | |
| }, | |
| { | |
| "epoch": 1.2286568279469456, | |
| "grad_norm": 0.27760936809640124, | |
| "learning_rate": 5.400440852461517e-06, | |
| "loss": 0.08446044921875, | |
| "memory(GiB)": 10.57, | |
| "step": 1645, | |
| "train_speed(iter/s)": 0.473198 | |
| }, | |
| { | |
| "epoch": 1.2323930506258174, | |
| "grad_norm": 0.30926667434610317, | |
| "learning_rate": 5.3969180815543075e-06, | |
| "loss": 0.0973876953125, | |
| "memory(GiB)": 10.57, | |
| "step": 1650, | |
| "train_speed(iter/s)": 0.473211 | |
| }, | |
| { | |
| "epoch": 1.236129273304689, | |
| "grad_norm": 0.22376369134309534, | |
| "learning_rate": 5.393386147827153e-06, | |
| "loss": 0.08917236328125, | |
| "memory(GiB)": 10.57, | |
| "step": 1655, | |
| "train_speed(iter/s)": 0.473219 | |
| }, | |
| { | |
| "epoch": 1.2398654959835607, | |
| "grad_norm": 0.3060981242994768, | |
| "learning_rate": 5.3898450647817534e-06, | |
| "loss": 0.095660400390625, | |
| "memory(GiB)": 10.57, | |
| "step": 1660, | |
| "train_speed(iter/s)": 0.47326 | |
| }, | |
| { | |
| "epoch": 1.2436017186624322, | |
| "grad_norm": 0.2824418483688286, | |
| "learning_rate": 5.386294845954789e-06, | |
| "loss": 0.093310546875, | |
| "memory(GiB)": 10.57, | |
| "step": 1665, | |
| "train_speed(iter/s)": 0.473272 | |
| }, | |
| { | |
| "epoch": 1.247337941341304, | |
| "grad_norm": 0.36318507390627536, | |
| "learning_rate": 5.382735504917859e-06, | |
| "loss": 0.09969482421875, | |
| "memory(GiB)": 10.57, | |
| "step": 1670, | |
| "train_speed(iter/s)": 0.473338 | |
| }, | |
| { | |
| "epoch": 1.2510741640201757, | |
| "grad_norm": 0.25998406554963555, | |
| "learning_rate": 5.379167055277436e-06, | |
| "loss": 0.0906982421875, | |
| "memory(GiB)": 10.57, | |
| "step": 1675, | |
| "train_speed(iter/s)": 0.473289 | |
| }, | |
| { | |
| "epoch": 1.2548103866990472, | |
| "grad_norm": 0.3053060614623874, | |
| "learning_rate": 5.3755895106748135e-06, | |
| "loss": 0.1009033203125, | |
| "memory(GiB)": 10.57, | |
| "step": 1680, | |
| "train_speed(iter/s)": 0.473278 | |
| }, | |
| { | |
| "epoch": 1.258546609377919, | |
| "grad_norm": 0.3304211891993834, | |
| "learning_rate": 5.372002884786053e-06, | |
| "loss": 0.080206298828125, | |
| "memory(GiB)": 10.57, | |
| "step": 1685, | |
| "train_speed(iter/s)": 0.473247 | |
| }, | |
| { | |
| "epoch": 1.2622828320567905, | |
| "grad_norm": 0.3786132572419238, | |
| "learning_rate": 5.368407191321929e-06, | |
| "loss": 0.11483154296875, | |
| "memory(GiB)": 10.57, | |
| "step": 1690, | |
| "train_speed(iter/s)": 0.473224 | |
| }, | |
| { | |
| "epoch": 1.2660190547356622, | |
| "grad_norm": 0.4098142898034233, | |
| "learning_rate": 5.364802444027881e-06, | |
| "loss": 0.11900634765625, | |
| "memory(GiB)": 10.57, | |
| "step": 1695, | |
| "train_speed(iter/s)": 0.473255 | |
| }, | |
| { | |
| "epoch": 1.269755277414534, | |
| "grad_norm": 0.31832382239724993, | |
| "learning_rate": 5.36118865668396e-06, | |
| "loss": 0.100079345703125, | |
| "memory(GiB)": 10.57, | |
| "step": 1700, | |
| "train_speed(iter/s)": 0.473256 | |
| }, | |
| { | |
| "epoch": 1.2734915000934055, | |
| "grad_norm": 0.21787448497633385, | |
| "learning_rate": 5.357565843104772e-06, | |
| "loss": 0.1089111328125, | |
| "memory(GiB)": 10.57, | |
| "step": 1705, | |
| "train_speed(iter/s)": 0.473319 | |
| }, | |
| { | |
| "epoch": 1.2772277227722773, | |
| "grad_norm": 0.24048814888237727, | |
| "learning_rate": 5.3539340171394315e-06, | |
| "loss": 0.103173828125, | |
| "memory(GiB)": 10.57, | |
| "step": 1710, | |
| "train_speed(iter/s)": 0.473382 | |
| }, | |
| { | |
| "epoch": 1.2809639454511488, | |
| "grad_norm": 0.2628088064912976, | |
| "learning_rate": 5.350293192671502e-06, | |
| "loss": 0.1017578125, | |
| "memory(GiB)": 10.57, | |
| "step": 1715, | |
| "train_speed(iter/s)": 0.473392 | |
| }, | |
| { | |
| "epoch": 1.2847001681300205, | |
| "grad_norm": 0.19682320473371387, | |
| "learning_rate": 5.3466433836189466e-06, | |
| "loss": 0.10618896484375, | |
| "memory(GiB)": 10.57, | |
| "step": 1720, | |
| "train_speed(iter/s)": 0.473367 | |
| }, | |
| { | |
| "epoch": 1.2884363908088923, | |
| "grad_norm": 0.31166282334428463, | |
| "learning_rate": 5.342984603934075e-06, | |
| "loss": 0.0931884765625, | |
| "memory(GiB)": 10.57, | |
| "step": 1725, | |
| "train_speed(iter/s)": 0.473394 | |
| }, | |
| { | |
| "epoch": 1.2921726134877638, | |
| "grad_norm": 0.4426055463824898, | |
| "learning_rate": 5.3393168676034925e-06, | |
| "loss": 0.10029296875, | |
| "memory(GiB)": 10.57, | |
| "step": 1730, | |
| "train_speed(iter/s)": 0.473469 | |
| }, | |
| { | |
| "epoch": 1.2959088361666355, | |
| "grad_norm": 0.19012257878940111, | |
| "learning_rate": 5.335640188648036e-06, | |
| "loss": 0.0994873046875, | |
| "memory(GiB)": 10.57, | |
| "step": 1735, | |
| "train_speed(iter/s)": 0.473506 | |
| }, | |
| { | |
| "epoch": 1.299645058845507, | |
| "grad_norm": 0.2509436471905221, | |
| "learning_rate": 5.3319545811227345e-06, | |
| "loss": 0.10556640625, | |
| "memory(GiB)": 10.57, | |
| "step": 1740, | |
| "train_speed(iter/s)": 0.47352 | |
| }, | |
| { | |
| "epoch": 1.3033812815243788, | |
| "grad_norm": 0.30945571438082825, | |
| "learning_rate": 5.328260059116746e-06, | |
| "loss": 0.10347900390625, | |
| "memory(GiB)": 10.57, | |
| "step": 1745, | |
| "train_speed(iter/s)": 0.473584 | |
| }, | |
| { | |
| "epoch": 1.3071175042032506, | |
| "grad_norm": 0.2842323038315994, | |
| "learning_rate": 5.324556636753305e-06, | |
| "loss": 0.0927490234375, | |
| "memory(GiB)": 10.57, | |
| "step": 1750, | |
| "train_speed(iter/s)": 0.473591 | |
| }, | |
| { | |
| "epoch": 1.310853726882122, | |
| "grad_norm": 0.22529076559497616, | |
| "learning_rate": 5.320844328189674e-06, | |
| "loss": 0.10736083984375, | |
| "memory(GiB)": 10.57, | |
| "step": 1755, | |
| "train_speed(iter/s)": 0.473593 | |
| }, | |
| { | |
| "epoch": 1.3145899495609938, | |
| "grad_norm": 0.25966109665415044, | |
| "learning_rate": 5.31712314761708e-06, | |
| "loss": 0.09718017578125, | |
| "memory(GiB)": 10.57, | |
| "step": 1760, | |
| "train_speed(iter/s)": 0.473634 | |
| }, | |
| { | |
| "epoch": 1.3183261722398654, | |
| "grad_norm": 0.2824285315852678, | |
| "learning_rate": 5.31339310926067e-06, | |
| "loss": 0.1147216796875, | |
| "memory(GiB)": 10.57, | |
| "step": 1765, | |
| "train_speed(iter/s)": 0.473682 | |
| }, | |
| { | |
| "epoch": 1.3220623949187371, | |
| "grad_norm": 0.29212776422688475, | |
| "learning_rate": 5.30965422737945e-06, | |
| "loss": 0.106103515625, | |
| "memory(GiB)": 10.57, | |
| "step": 1770, | |
| "train_speed(iter/s)": 0.473711 | |
| }, | |
| { | |
| "epoch": 1.3257986175976089, | |
| "grad_norm": 0.21770600045083738, | |
| "learning_rate": 5.305906516266232e-06, | |
| "loss": 0.09356689453125, | |
| "memory(GiB)": 10.57, | |
| "step": 1775, | |
| "train_speed(iter/s)": 0.473749 | |
| }, | |
| { | |
| "epoch": 1.3295348402764806, | |
| "grad_norm": 0.22535805175359133, | |
| "learning_rate": 5.302149990247581e-06, | |
| "loss": 0.09854736328125, | |
| "memory(GiB)": 10.57, | |
| "step": 1780, | |
| "train_speed(iter/s)": 0.47377 | |
| }, | |
| { | |
| "epoch": 1.3332710629553521, | |
| "grad_norm": 0.3731424208017629, | |
| "learning_rate": 5.298384663683759e-06, | |
| "loss": 0.10096435546875, | |
| "memory(GiB)": 10.57, | |
| "step": 1785, | |
| "train_speed(iter/s)": 0.473814 | |
| }, | |
| { | |
| "epoch": 1.3370072856342237, | |
| "grad_norm": 0.19409382195361594, | |
| "learning_rate": 5.29461055096867e-06, | |
| "loss": 0.0933837890625, | |
| "memory(GiB)": 10.57, | |
| "step": 1790, | |
| "train_speed(iter/s)": 0.473846 | |
| }, | |
| { | |
| "epoch": 1.3407435083130954, | |
| "grad_norm": 0.20858019331443553, | |
| "learning_rate": 5.290827666529807e-06, | |
| "loss": 0.09691162109375, | |
| "memory(GiB)": 10.57, | |
| "step": 1795, | |
| "train_speed(iter/s)": 0.473812 | |
| }, | |
| { | |
| "epoch": 1.3444797309919672, | |
| "grad_norm": 0.21508957217260072, | |
| "learning_rate": 5.287036024828191e-06, | |
| "loss": 0.112396240234375, | |
| "memory(GiB)": 10.57, | |
| "step": 1800, | |
| "train_speed(iter/s)": 0.473874 | |
| }, | |
| { | |
| "epoch": 1.348215953670839, | |
| "grad_norm": 0.21088809922179003, | |
| "learning_rate": 5.283235640358326e-06, | |
| "loss": 0.10013427734375, | |
| "memory(GiB)": 10.57, | |
| "step": 1805, | |
| "train_speed(iter/s)": 0.473898 | |
| }, | |
| { | |
| "epoch": 1.3519521763497104, | |
| "grad_norm": 0.2980687891825392, | |
| "learning_rate": 5.27942652764813e-06, | |
| "loss": 0.12469482421875, | |
| "memory(GiB)": 10.57, | |
| "step": 1810, | |
| "train_speed(iter/s)": 0.473908 | |
| }, | |
| { | |
| "epoch": 1.3556883990285822, | |
| "grad_norm": 0.26579488787728855, | |
| "learning_rate": 5.275608701258893e-06, | |
| "loss": 0.09619140625, | |
| "memory(GiB)": 10.57, | |
| "step": 1815, | |
| "train_speed(iter/s)": 0.473922 | |
| }, | |
| { | |
| "epoch": 1.3594246217074537, | |
| "grad_norm": 0.18737292024034827, | |
| "learning_rate": 5.271782175785213e-06, | |
| "loss": 0.08944091796875, | |
| "memory(GiB)": 10.57, | |
| "step": 1820, | |
| "train_speed(iter/s)": 0.473933 | |
| }, | |
| { | |
| "epoch": 1.3631608443863255, | |
| "grad_norm": 0.24782345412701354, | |
| "learning_rate": 5.2679469658549425e-06, | |
| "loss": 0.09827880859375, | |
| "memory(GiB)": 10.57, | |
| "step": 1825, | |
| "train_speed(iter/s)": 0.473873 | |
| }, | |
| { | |
| "epoch": 1.3668970670651972, | |
| "grad_norm": 0.32532596436786243, | |
| "learning_rate": 5.26410308612913e-06, | |
| "loss": 0.09747314453125, | |
| "memory(GiB)": 10.57, | |
| "step": 1830, | |
| "train_speed(iter/s)": 0.473915 | |
| }, | |
| { | |
| "epoch": 1.3706332897440687, | |
| "grad_norm": 0.31097616250716587, | |
| "learning_rate": 5.2602505513019725e-06, | |
| "loss": 0.1041748046875, | |
| "memory(GiB)": 10.57, | |
| "step": 1835, | |
| "train_speed(iter/s)": 0.473886 | |
| }, | |
| { | |
| "epoch": 1.3743695124229405, | |
| "grad_norm": 0.3233980057122036, | |
| "learning_rate": 5.256389376100747e-06, | |
| "loss": 0.10128173828125, | |
| "memory(GiB)": 10.57, | |
| "step": 1840, | |
| "train_speed(iter/s)": 0.473889 | |
| }, | |
| { | |
| "epoch": 1.378105735101812, | |
| "grad_norm": 0.2838217794938913, | |
| "learning_rate": 5.252519575285765e-06, | |
| "loss": 0.10989990234375, | |
| "memory(GiB)": 10.57, | |
| "step": 1845, | |
| "train_speed(iter/s)": 0.473918 | |
| }, | |
| { | |
| "epoch": 1.3818419577806837, | |
| "grad_norm": 0.2857844265885774, | |
| "learning_rate": 5.248641163650309e-06, | |
| "loss": 0.101458740234375, | |
| "memory(GiB)": 10.57, | |
| "step": 1850, | |
| "train_speed(iter/s)": 0.473944 | |
| }, | |
| { | |
| "epoch": 1.3855781804595555, | |
| "grad_norm": 0.3117055756844236, | |
| "learning_rate": 5.244754156020577e-06, | |
| "loss": 0.10926513671875, | |
| "memory(GiB)": 10.57, | |
| "step": 1855, | |
| "train_speed(iter/s)": 0.473967 | |
| }, | |
| { | |
| "epoch": 1.389314403138427, | |
| "grad_norm": 0.1920114429204594, | |
| "learning_rate": 5.240858567255634e-06, | |
| "loss": 0.110009765625, | |
| "memory(GiB)": 10.57, | |
| "step": 1860, | |
| "train_speed(iter/s)": 0.473962 | |
| }, | |
| { | |
| "epoch": 1.3930506258172988, | |
| "grad_norm": 0.3502090927498937, | |
| "learning_rate": 5.236954412247341e-06, | |
| "loss": 0.11763916015625, | |
| "memory(GiB)": 10.57, | |
| "step": 1865, | |
| "train_speed(iter/s)": 0.473991 | |
| }, | |
| { | |
| "epoch": 1.3967868484961703, | |
| "grad_norm": 0.23316922643496588, | |
| "learning_rate": 5.2330417059203095e-06, | |
| "loss": 0.1151123046875, | |
| "memory(GiB)": 10.57, | |
| "step": 1870, | |
| "train_speed(iter/s)": 0.474034 | |
| }, | |
| { | |
| "epoch": 1.400523071175042, | |
| "grad_norm": 0.2549951722054464, | |
| "learning_rate": 5.22912046323184e-06, | |
| "loss": 0.110504150390625, | |
| "memory(GiB)": 10.57, | |
| "step": 1875, | |
| "train_speed(iter/s)": 0.474059 | |
| }, | |
| { | |
| "epoch": 1.4042592938539138, | |
| "grad_norm": 0.1708829919522614, | |
| "learning_rate": 5.225190699171865e-06, | |
| "loss": 0.08787841796875, | |
| "memory(GiB)": 10.57, | |
| "step": 1880, | |
| "train_speed(iter/s)": 0.474029 | |
| }, | |
| { | |
| "epoch": 1.4079955165327853, | |
| "grad_norm": 0.27196811779503416, | |
| "learning_rate": 5.221252428762893e-06, | |
| "loss": 0.11351318359375, | |
| "memory(GiB)": 10.57, | |
| "step": 1885, | |
| "train_speed(iter/s)": 0.474046 | |
| }, | |
| { | |
| "epoch": 1.411731739211657, | |
| "grad_norm": 0.23328619371671638, | |
| "learning_rate": 5.217305667059948e-06, | |
| "loss": 0.101446533203125, | |
| "memory(GiB)": 10.57, | |
| "step": 1890, | |
| "train_speed(iter/s)": 0.474076 | |
| }, | |
| { | |
| "epoch": 1.4154679618905286, | |
| "grad_norm": 0.18762276770097455, | |
| "learning_rate": 5.213350429150517e-06, | |
| "loss": 0.10950927734375, | |
| "memory(GiB)": 10.57, | |
| "step": 1895, | |
| "train_speed(iter/s)": 0.474023 | |
| }, | |
| { | |
| "epoch": 1.4192041845694003, | |
| "grad_norm": 0.26686273546353123, | |
| "learning_rate": 5.209386730154487e-06, | |
| "loss": 0.10045166015625, | |
| "memory(GiB)": 10.57, | |
| "step": 1900, | |
| "train_speed(iter/s)": 0.474076 | |
| }, | |
| { | |
| "epoch": 1.422940407248272, | |
| "grad_norm": 0.3085786825020616, | |
| "learning_rate": 5.205414585224091e-06, | |
| "loss": 0.10711669921875, | |
| "memory(GiB)": 10.57, | |
| "step": 1905, | |
| "train_speed(iter/s)": 0.474098 | |
| }, | |
| { | |
| "epoch": 1.4266766299271436, | |
| "grad_norm": 0.3905887360768796, | |
| "learning_rate": 5.2014340095438476e-06, | |
| "loss": 0.118505859375, | |
| "memory(GiB)": 10.57, | |
| "step": 1910, | |
| "train_speed(iter/s)": 0.474116 | |
| }, | |
| { | |
| "epoch": 1.4304128526060154, | |
| "grad_norm": 0.2752084354347657, | |
| "learning_rate": 5.197445018330506e-06, | |
| "loss": 0.09713134765625, | |
| "memory(GiB)": 10.57, | |
| "step": 1915, | |
| "train_speed(iter/s)": 0.47414 | |
| }, | |
| { | |
| "epoch": 1.4341490752848869, | |
| "grad_norm": 0.25638122340507086, | |
| "learning_rate": 5.193447626832984e-06, | |
| "loss": 0.1004638671875, | |
| "memory(GiB)": 10.57, | |
| "step": 1920, | |
| "train_speed(iter/s)": 0.474127 | |
| }, | |
| { | |
| "epoch": 1.4378852979637586, | |
| "grad_norm": 0.3365573737926719, | |
| "learning_rate": 5.189441850332312e-06, | |
| "loss": 0.096502685546875, | |
| "memory(GiB)": 10.57, | |
| "step": 1925, | |
| "train_speed(iter/s)": 0.474083 | |
| }, | |
| { | |
| "epoch": 1.4416215206426304, | |
| "grad_norm": 0.1924187499510245, | |
| "learning_rate": 5.185427704141573e-06, | |
| "loss": 0.124609375, | |
| "memory(GiB)": 10.57, | |
| "step": 1930, | |
| "train_speed(iter/s)": 0.474111 | |
| }, | |
| { | |
| "epoch": 1.445357743321502, | |
| "grad_norm": 0.28660368393049557, | |
| "learning_rate": 5.181405203605849e-06, | |
| "loss": 0.10279541015625, | |
| "memory(GiB)": 10.57, | |
| "step": 1935, | |
| "train_speed(iter/s)": 0.474107 | |
| }, | |
| { | |
| "epoch": 1.4490939660003737, | |
| "grad_norm": 0.26275748472823024, | |
| "learning_rate": 5.177374364102156e-06, | |
| "loss": 0.1211669921875, | |
| "memory(GiB)": 10.57, | |
| "step": 1940, | |
| "train_speed(iter/s)": 0.474117 | |
| }, | |
| { | |
| "epoch": 1.4528301886792452, | |
| "grad_norm": 0.30473179680325724, | |
| "learning_rate": 5.1733352010393855e-06, | |
| "loss": 0.1116455078125, | |
| "memory(GiB)": 10.57, | |
| "step": 1945, | |
| "train_speed(iter/s)": 0.474149 | |
| }, | |
| { | |
| "epoch": 1.456566411358117, | |
| "grad_norm": 0.2980857699329149, | |
| "learning_rate": 5.169287729858254e-06, | |
| "loss": 0.09521484375, | |
| "memory(GiB)": 10.57, | |
| "step": 1950, | |
| "train_speed(iter/s)": 0.474117 | |
| }, | |
| { | |
| "epoch": 1.4603026340369887, | |
| "grad_norm": 0.3892418519621433, | |
| "learning_rate": 5.165231966031231e-06, | |
| "loss": 0.10706787109375, | |
| "memory(GiB)": 10.57, | |
| "step": 1955, | |
| "train_speed(iter/s)": 0.474167 | |
| }, | |
| { | |
| "epoch": 1.4640388567158602, | |
| "grad_norm": 0.26876863290437225, | |
| "learning_rate": 5.161167925062492e-06, | |
| "loss": 0.0955810546875, | |
| "memory(GiB)": 10.57, | |
| "step": 1960, | |
| "train_speed(iter/s)": 0.474231 | |
| }, | |
| { | |
| "epoch": 1.467775079394732, | |
| "grad_norm": 0.23766298983672868, | |
| "learning_rate": 5.15709562248785e-06, | |
| "loss": 0.1157470703125, | |
| "memory(GiB)": 10.57, | |
| "step": 1965, | |
| "train_speed(iter/s)": 0.474264 | |
| }, | |
| { | |
| "epoch": 1.4715113020736035, | |
| "grad_norm": 0.2475077256620063, | |
| "learning_rate": 5.153015073874704e-06, | |
| "loss": 0.103997802734375, | |
| "memory(GiB)": 10.57, | |
| "step": 1970, | |
| "train_speed(iter/s)": 0.474248 | |
| }, | |
| { | |
| "epoch": 1.4752475247524752, | |
| "grad_norm": 0.2529463798672503, | |
| "learning_rate": 5.148926294821973e-06, | |
| "loss": 0.09212646484375, | |
| "memory(GiB)": 10.57, | |
| "step": 1975, | |
| "train_speed(iter/s)": 0.474282 | |
| }, | |
| { | |
| "epoch": 1.478983747431347, | |
| "grad_norm": 0.34121952234096015, | |
| "learning_rate": 5.144829300960038e-06, | |
| "loss": 0.09998779296875, | |
| "memory(GiB)": 10.57, | |
| "step": 1980, | |
| "train_speed(iter/s)": 0.474279 | |
| }, | |
| { | |
| "epoch": 1.4827199701102185, | |
| "grad_norm": 0.26555171567768715, | |
| "learning_rate": 5.140724107950687e-06, | |
| "loss": 0.10701904296875, | |
| "memory(GiB)": 10.57, | |
| "step": 1985, | |
| "train_speed(iter/s)": 0.474325 | |
| }, | |
| { | |
| "epoch": 1.4864561927890902, | |
| "grad_norm": 0.3012526382519, | |
| "learning_rate": 5.136610731487047e-06, | |
| "loss": 0.10223388671875, | |
| "memory(GiB)": 10.57, | |
| "step": 1990, | |
| "train_speed(iter/s)": 0.474388 | |
| }, | |
| { | |
| "epoch": 1.4901924154679618, | |
| "grad_norm": 0.2585567492074306, | |
| "learning_rate": 5.13248918729353e-06, | |
| "loss": 0.110015869140625, | |
| "memory(GiB)": 10.57, | |
| "step": 1995, | |
| "train_speed(iter/s)": 0.474458 | |
| }, | |
| { | |
| "epoch": 1.4939286381468335, | |
| "grad_norm": 0.21553275657329446, | |
| "learning_rate": 5.128359491125772e-06, | |
| "loss": 0.10537109375, | |
| "memory(GiB)": 10.57, | |
| "step": 2000, | |
| "train_speed(iter/s)": 0.474436 | |
| }, | |
| { | |
| "epoch": 1.4976648608257053, | |
| "grad_norm": 0.23393892148099255, | |
| "learning_rate": 5.1242216587705726e-06, | |
| "loss": 0.09471435546875, | |
| "memory(GiB)": 10.57, | |
| "step": 2005, | |
| "train_speed(iter/s)": 0.474455 | |
| }, | |
| { | |
| "epoch": 1.501401083504577, | |
| "grad_norm": 0.1982523301744199, | |
| "learning_rate": 5.1200757060458305e-06, | |
| "loss": 0.094744873046875, | |
| "memory(GiB)": 10.57, | |
| "step": 2010, | |
| "train_speed(iter/s)": 0.47441 | |
| }, | |
| { | |
| "epoch": 1.5051373061834485, | |
| "grad_norm": 0.24897221547603635, | |
| "learning_rate": 5.11592164880049e-06, | |
| "loss": 0.094281005859375, | |
| "memory(GiB)": 10.57, | |
| "step": 2015, | |
| "train_speed(iter/s)": 0.474368 | |
| }, | |
| { | |
| "epoch": 1.50887352886232, | |
| "grad_norm": 0.2524388493286587, | |
| "learning_rate": 5.111759502914477e-06, | |
| "loss": 0.10567626953125, | |
| "memory(GiB)": 10.57, | |
| "step": 2020, | |
| "train_speed(iter/s)": 0.474413 | |
| }, | |
| { | |
| "epoch": 1.5126097515411918, | |
| "grad_norm": 0.2821918241104093, | |
| "learning_rate": 5.107589284298635e-06, | |
| "loss": 0.10643310546875, | |
| "memory(GiB)": 10.57, | |
| "step": 2025, | |
| "train_speed(iter/s)": 0.474445 | |
| }, | |
| { | |
| "epoch": 1.5163459742200636, | |
| "grad_norm": 0.1949063316633063, | |
| "learning_rate": 5.10341100889467e-06, | |
| "loss": 0.10220947265625, | |
| "memory(GiB)": 10.57, | |
| "step": 2030, | |
| "train_speed(iter/s)": 0.474452 | |
| }, | |
| { | |
| "epoch": 1.5200821968989353, | |
| "grad_norm": 0.2664640714650226, | |
| "learning_rate": 5.0992246926750866e-06, | |
| "loss": 0.1039306640625, | |
| "memory(GiB)": 10.57, | |
| "step": 2035, | |
| "train_speed(iter/s)": 0.474496 | |
| }, | |
| { | |
| "epoch": 1.5238184195778068, | |
| "grad_norm": 0.15706118788240764, | |
| "learning_rate": 5.095030351643129e-06, | |
| "loss": 0.0922210693359375, | |
| "memory(GiB)": 10.57, | |
| "step": 2040, | |
| "train_speed(iter/s)": 0.474486 | |
| }, | |
| { | |
| "epoch": 1.5275546422566784, | |
| "grad_norm": 0.2929327425758124, | |
| "learning_rate": 5.090828001832715e-06, | |
| "loss": 0.1028076171875, | |
| "memory(GiB)": 10.57, | |
| "step": 2045, | |
| "train_speed(iter/s)": 0.474519 | |
| }, | |
| { | |
| "epoch": 1.5312908649355501, | |
| "grad_norm": 0.21976773396934837, | |
| "learning_rate": 5.0866176593083805e-06, | |
| "loss": 0.1067626953125, | |
| "memory(GiB)": 10.57, | |
| "step": 2050, | |
| "train_speed(iter/s)": 0.474561 | |
| }, | |
| { | |
| "epoch": 1.5350270876144219, | |
| "grad_norm": 0.22682626802364397, | |
| "learning_rate": 5.082399340165214e-06, | |
| "loss": 0.10389404296875, | |
| "memory(GiB)": 10.57, | |
| "step": 2055, | |
| "train_speed(iter/s)": 0.474594 | |
| }, | |
| { | |
| "epoch": 1.5387633102932936, | |
| "grad_norm": 0.2279293975450204, | |
| "learning_rate": 5.0781730605287985e-06, | |
| "loss": 0.102423095703125, | |
| "memory(GiB)": 10.57, | |
| "step": 2060, | |
| "train_speed(iter/s)": 0.474651 | |
| }, | |
| { | |
| "epoch": 1.5424995329721651, | |
| "grad_norm": 0.21127637298228888, | |
| "learning_rate": 5.073938836555145e-06, | |
| "loss": 0.11668701171875, | |
| "memory(GiB)": 10.57, | |
| "step": 2065, | |
| "train_speed(iter/s)": 0.474653 | |
| }, | |
| { | |
| "epoch": 1.5462357556510367, | |
| "grad_norm": 0.23883103143189194, | |
| "learning_rate": 5.069696684430639e-06, | |
| "loss": 0.10777587890625, | |
| "memory(GiB)": 10.57, | |
| "step": 2070, | |
| "train_speed(iter/s)": 0.474569 | |
| }, | |
| { | |
| "epoch": 1.5499719783299084, | |
| "grad_norm": 0.19708822331757736, | |
| "learning_rate": 5.065446620371966e-06, | |
| "loss": 0.10965576171875, | |
| "memory(GiB)": 10.57, | |
| "step": 2075, | |
| "train_speed(iter/s)": 0.474611 | |
| }, | |
| { | |
| "epoch": 1.5537082010087802, | |
| "grad_norm": 0.22428614901572544, | |
| "learning_rate": 5.061188660626064e-06, | |
| "loss": 0.08321533203125, | |
| "memory(GiB)": 10.57, | |
| "step": 2080, | |
| "train_speed(iter/s)": 0.474592 | |
| }, | |
| { | |
| "epoch": 1.557444423687652, | |
| "grad_norm": 0.23095421524064055, | |
| "learning_rate": 5.056922821470048e-06, | |
| "loss": 0.1009521484375, | |
| "memory(GiB)": 10.57, | |
| "step": 2085, | |
| "train_speed(iter/s)": 0.474622 | |
| }, | |
| { | |
| "epoch": 1.5611806463665234, | |
| "grad_norm": 0.2871546333696532, | |
| "learning_rate": 5.052649119211159e-06, | |
| "loss": 0.1187744140625, | |
| "memory(GiB)": 10.57, | |
| "step": 2090, | |
| "train_speed(iter/s)": 0.474601 | |
| }, | |
| { | |
| "epoch": 1.564916869045395, | |
| "grad_norm": 0.21613086763978323, | |
| "learning_rate": 5.048367570186694e-06, | |
| "loss": 0.1031494140625, | |
| "memory(GiB)": 10.57, | |
| "step": 2095, | |
| "train_speed(iter/s)": 0.474627 | |
| }, | |
| { | |
| "epoch": 1.5686530917242667, | |
| "grad_norm": 0.2485805730125251, | |
| "learning_rate": 5.044078190763949e-06, | |
| "loss": 0.09178466796875, | |
| "memory(GiB)": 10.57, | |
| "step": 2100, | |
| "train_speed(iter/s)": 0.474608 | |
| }, | |
| { | |
| "epoch": 1.5723893144031384, | |
| "grad_norm": 0.2501433468360814, | |
| "learning_rate": 5.039780997340148e-06, | |
| "loss": 0.096502685546875, | |
| "memory(GiB)": 10.57, | |
| "step": 2105, | |
| "train_speed(iter/s)": 0.474639 | |
| }, | |
| { | |
| "epoch": 1.5761255370820102, | |
| "grad_norm": 0.2625314288905634, | |
| "learning_rate": 5.035476006342392e-06, | |
| "loss": 0.12071533203125, | |
| "memory(GiB)": 10.57, | |
| "step": 2110, | |
| "train_speed(iter/s)": 0.474666 | |
| }, | |
| { | |
| "epoch": 1.5798617597608817, | |
| "grad_norm": 0.25070127552544946, | |
| "learning_rate": 5.031163234227587e-06, | |
| "loss": 0.102880859375, | |
| "memory(GiB)": 10.57, | |
| "step": 2115, | |
| "train_speed(iter/s)": 0.474731 | |
| }, | |
| { | |
| "epoch": 1.5835979824397532, | |
| "grad_norm": 0.2730775843332172, | |
| "learning_rate": 5.026842697482386e-06, | |
| "loss": 0.107745361328125, | |
| "memory(GiB)": 10.57, | |
| "step": 2120, | |
| "train_speed(iter/s)": 0.47469 | |
| }, | |
| { | |
| "epoch": 1.587334205118625, | |
| "grad_norm": 0.3168533915295129, | |
| "learning_rate": 5.022514412623122e-06, | |
| "loss": 0.10606689453125, | |
| "memory(GiB)": 10.57, | |
| "step": 2125, | |
| "train_speed(iter/s)": 0.474712 | |
| }, | |
| { | |
| "epoch": 1.5910704277974967, | |
| "grad_norm": 0.26414617810461144, | |
| "learning_rate": 5.018178396195749e-06, | |
| "loss": 0.114739990234375, | |
| "memory(GiB)": 10.57, | |
| "step": 2130, | |
| "train_speed(iter/s)": 0.474667 | |
| }, | |
| { | |
| "epoch": 1.5948066504763685, | |
| "grad_norm": 0.2884403060168701, | |
| "learning_rate": 5.013834664775775e-06, | |
| "loss": 0.09578857421875, | |
| "memory(GiB)": 10.57, | |
| "step": 2135, | |
| "train_speed(iter/s)": 0.474686 | |
| }, | |
| { | |
| "epoch": 1.59854287315524, | |
| "grad_norm": 0.17316814005290654, | |
| "learning_rate": 5.009483234968204e-06, | |
| "loss": 0.09461669921875, | |
| "memory(GiB)": 10.57, | |
| "step": 2140, | |
| "train_speed(iter/s)": 0.474718 | |
| }, | |
| { | |
| "epoch": 1.6022790958341118, | |
| "grad_norm": 0.20180870823591296, | |
| "learning_rate": 5.005124123407466e-06, | |
| "loss": 0.1016357421875, | |
| "memory(GiB)": 10.57, | |
| "step": 2145, | |
| "train_speed(iter/s)": 0.474763 | |
| }, | |
| { | |
| "epoch": 1.6060153185129833, | |
| "grad_norm": 0.28225684517263877, | |
| "learning_rate": 5.0007573467573556e-06, | |
| "loss": 0.0999755859375, | |
| "memory(GiB)": 10.57, | |
| "step": 2150, | |
| "train_speed(iter/s)": 0.474781 | |
| }, | |
| { | |
| "epoch": 1.609751541191855, | |
| "grad_norm": 0.11744325613491245, | |
| "learning_rate": 4.996382921710973e-06, | |
| "loss": 0.088720703125, | |
| "memory(GiB)": 10.57, | |
| "step": 2155, | |
| "train_speed(iter/s)": 0.474755 | |
| }, | |
| { | |
| "epoch": 1.6134877638707268, | |
| "grad_norm": 0.34760100976149216, | |
| "learning_rate": 4.992000864990652e-06, | |
| "loss": 0.112939453125, | |
| "memory(GiB)": 10.57, | |
| "step": 2160, | |
| "train_speed(iter/s)": 0.474772 | |
| }, | |
| { | |
| "epoch": 1.6172239865495983, | |
| "grad_norm": 0.22604747445071158, | |
| "learning_rate": 4.987611193347903e-06, | |
| "loss": 0.089892578125, | |
| "memory(GiB)": 10.57, | |
| "step": 2165, | |
| "train_speed(iter/s)": 0.474717 | |
| }, | |
| { | |
| "epoch": 1.62096020922847, | |
| "grad_norm": 0.28280682170193416, | |
| "learning_rate": 4.983213923563347e-06, | |
| "loss": 0.0989990234375, | |
| "memory(GiB)": 10.57, | |
| "step": 2170, | |
| "train_speed(iter/s)": 0.474738 | |
| }, | |
| { | |
| "epoch": 1.6246964319073416, | |
| "grad_norm": 0.22814666006274306, | |
| "learning_rate": 4.978809072446648e-06, | |
| "loss": 0.0938232421875, | |
| "memory(GiB)": 10.57, | |
| "step": 2175, | |
| "train_speed(iter/s)": 0.474723 | |
| }, | |
| { | |
| "epoch": 1.6284326545862133, | |
| "grad_norm": 0.26304826342931886, | |
| "learning_rate": 4.974396656836454e-06, | |
| "loss": 0.09578857421875, | |
| "memory(GiB)": 10.57, | |
| "step": 2180, | |
| "train_speed(iter/s)": 0.474661 | |
| }, | |
| { | |
| "epoch": 1.632168877265085, | |
| "grad_norm": 0.3174530542273234, | |
| "learning_rate": 4.969976693600328e-06, | |
| "loss": 0.08758544921875, | |
| "memory(GiB)": 10.57, | |
| "step": 2185, | |
| "train_speed(iter/s)": 0.474686 | |
| }, | |
| { | |
| "epoch": 1.6359050999439566, | |
| "grad_norm": 0.2533342016854265, | |
| "learning_rate": 4.965549199634688e-06, | |
| "loss": 0.095849609375, | |
| "memory(GiB)": 10.57, | |
| "step": 2190, | |
| "train_speed(iter/s)": 0.474707 | |
| }, | |
| { | |
| "epoch": 1.6396413226228284, | |
| "grad_norm": 0.2795419703573222, | |
| "learning_rate": 4.96111419186474e-06, | |
| "loss": 0.09959716796875, | |
| "memory(GiB)": 10.57, | |
| "step": 2195, | |
| "train_speed(iter/s)": 0.474746 | |
| }, | |
| { | |
| "epoch": 1.6433775453016999, | |
| "grad_norm": 0.2244253656669392, | |
| "learning_rate": 4.95667168724441e-06, | |
| "loss": 0.103564453125, | |
| "memory(GiB)": 10.57, | |
| "step": 2200, | |
| "train_speed(iter/s)": 0.474702 | |
| }, | |
| { | |
| "epoch": 1.6471137679805716, | |
| "grad_norm": 0.2568324687784542, | |
| "learning_rate": 4.952221702756288e-06, | |
| "loss": 0.1037445068359375, | |
| "memory(GiB)": 10.57, | |
| "step": 2205, | |
| "train_speed(iter/s)": 0.474722 | |
| }, | |
| { | |
| "epoch": 1.6508499906594434, | |
| "grad_norm": 0.3956651516840788, | |
| "learning_rate": 4.947764255411551e-06, | |
| "loss": 0.11588134765625, | |
| "memory(GiB)": 10.57, | |
| "step": 2210, | |
| "train_speed(iter/s)": 0.474738 | |
| }, | |
| { | |
| "epoch": 1.6545862133383151, | |
| "grad_norm": 0.20985100077876295, | |
| "learning_rate": 4.943299362249912e-06, | |
| "loss": 0.099951171875, | |
| "memory(GiB)": 10.57, | |
| "step": 2215, | |
| "train_speed(iter/s)": 0.474773 | |
| }, | |
| { | |
| "epoch": 1.6583224360171867, | |
| "grad_norm": 0.1962140667346041, | |
| "learning_rate": 4.9388270403395415e-06, | |
| "loss": 0.10343017578125, | |
| "memory(GiB)": 10.57, | |
| "step": 2220, | |
| "train_speed(iter/s)": 0.474776 | |
| }, | |
| { | |
| "epoch": 1.6620586586960582, | |
| "grad_norm": 0.22503137462618433, | |
| "learning_rate": 4.934347306777012e-06, | |
| "loss": 0.1007568359375, | |
| "memory(GiB)": 10.57, | |
| "step": 2225, | |
| "train_speed(iter/s)": 0.474752 | |
| }, | |
| { | |
| "epoch": 1.66579488137493, | |
| "grad_norm": 0.22195673002837232, | |
| "learning_rate": 4.929860178687226e-06, | |
| "loss": 0.091131591796875, | |
| "memory(GiB)": 10.57, | |
| "step": 2230, | |
| "train_speed(iter/s)": 0.474771 | |
| }, | |
| { | |
| "epoch": 1.6695311040538017, | |
| "grad_norm": 0.3168855098173885, | |
| "learning_rate": 4.9253656732233564e-06, | |
| "loss": 0.11160888671875, | |
| "memory(GiB)": 10.57, | |
| "step": 2235, | |
| "train_speed(iter/s)": 0.474768 | |
| }, | |
| { | |
| "epoch": 1.6732673267326734, | |
| "grad_norm": 0.1738888875381385, | |
| "learning_rate": 4.920863807566776e-06, | |
| "loss": 0.0958465576171875, | |
| "memory(GiB)": 10.57, | |
| "step": 2240, | |
| "train_speed(iter/s)": 0.474725 | |
| }, | |
| { | |
| "epoch": 1.677003549411545, | |
| "grad_norm": 0.2552273932950652, | |
| "learning_rate": 4.9163545989269944e-06, | |
| "loss": 0.09219970703125, | |
| "memory(GiB)": 10.57, | |
| "step": 2245, | |
| "train_speed(iter/s)": 0.474729 | |
| }, | |
| { | |
| "epoch": 1.6807397720904165, | |
| "grad_norm": 0.3060989271500881, | |
| "learning_rate": 4.9118380645415905e-06, | |
| "loss": 0.100439453125, | |
| "memory(GiB)": 10.57, | |
| "step": 2250, | |
| "train_speed(iter/s)": 0.474737 | |
| }, | |
| { | |
| "epoch": 1.6844759947692882, | |
| "grad_norm": 0.2949704093412238, | |
| "learning_rate": 4.907314221676149e-06, | |
| "loss": 0.102716064453125, | |
| "memory(GiB)": 10.57, | |
| "step": 2255, | |
| "train_speed(iter/s)": 0.474753 | |
| }, | |
| { | |
| "epoch": 1.68821221744816, | |
| "grad_norm": 0.28246484565713104, | |
| "learning_rate": 4.902783087624195e-06, | |
| "loss": 0.104339599609375, | |
| "memory(GiB)": 10.57, | |
| "step": 2260, | |
| "train_speed(iter/s)": 0.474772 | |
| }, | |
| { | |
| "epoch": 1.6919484401270317, | |
| "grad_norm": 0.2912739109964812, | |
| "learning_rate": 4.89824467970712e-06, | |
| "loss": 0.09698486328125, | |
| "memory(GiB)": 10.57, | |
| "step": 2265, | |
| "train_speed(iter/s)": 0.474766 | |
| }, | |
| { | |
| "epoch": 1.6956846628059032, | |
| "grad_norm": 0.20297905907906486, | |
| "learning_rate": 4.8936990152741276e-06, | |
| "loss": 0.10142822265625, | |
| "memory(GiB)": 10.57, | |
| "step": 2270, | |
| "train_speed(iter/s)": 0.474788 | |
| }, | |
| { | |
| "epoch": 1.6994208854847748, | |
| "grad_norm": 0.27675872548007086, | |
| "learning_rate": 4.88914611170216e-06, | |
| "loss": 0.11038818359375, | |
| "memory(GiB)": 10.57, | |
| "step": 2275, | |
| "train_speed(iter/s)": 0.474807 | |
| }, | |
| { | |
| "epoch": 1.7031571081636465, | |
| "grad_norm": 0.26312724669069576, | |
| "learning_rate": 4.88458598639583e-06, | |
| "loss": 0.10172119140625, | |
| "memory(GiB)": 10.57, | |
| "step": 2280, | |
| "train_speed(iter/s)": 0.474842 | |
| }, | |
| { | |
| "epoch": 1.7068933308425183, | |
| "grad_norm": 0.2905331610134025, | |
| "learning_rate": 4.880018656787359e-06, | |
| "loss": 0.09381103515625, | |
| "memory(GiB)": 10.57, | |
| "step": 2285, | |
| "train_speed(iter/s)": 0.474842 | |
| }, | |
| { | |
| "epoch": 1.71062955352139, | |
| "grad_norm": 0.34444149002078045, | |
| "learning_rate": 4.8754441403365105e-06, | |
| "loss": 0.1239501953125, | |
| "memory(GiB)": 10.57, | |
| "step": 2290, | |
| "train_speed(iter/s)": 0.47486 | |
| }, | |
| { | |
| "epoch": 1.7143657762002615, | |
| "grad_norm": 0.2738462078711773, | |
| "learning_rate": 4.8708624545305185e-06, | |
| "loss": 0.0885498046875, | |
| "memory(GiB)": 10.57, | |
| "step": 2295, | |
| "train_speed(iter/s)": 0.474827 | |
| }, | |
| { | |
| "epoch": 1.718101998879133, | |
| "grad_norm": 0.28959854575833754, | |
| "learning_rate": 4.866273616884027e-06, | |
| "loss": 0.11025390625, | |
| "memory(GiB)": 10.57, | |
| "step": 2300, | |
| "train_speed(iter/s)": 0.474849 | |
| }, | |
| { | |
| "epoch": 1.7218382215580048, | |
| "grad_norm": 0.20588142938995796, | |
| "learning_rate": 4.861677644939015e-06, | |
| "loss": 0.08424072265625, | |
| "memory(GiB)": 10.57, | |
| "step": 2305, | |
| "train_speed(iter/s)": 0.474856 | |
| }, | |
| { | |
| "epoch": 1.7255744442368766, | |
| "grad_norm": 0.3354441601677246, | |
| "learning_rate": 4.857074556264738e-06, | |
| "loss": 0.1094970703125, | |
| "memory(GiB)": 10.57, | |
| "step": 2310, | |
| "train_speed(iter/s)": 0.474867 | |
| }, | |
| { | |
| "epoch": 1.7293106669157483, | |
| "grad_norm": 0.20426806575301326, | |
| "learning_rate": 4.852464368457656e-06, | |
| "loss": 0.10550537109375, | |
| "memory(GiB)": 10.57, | |
| "step": 2315, | |
| "train_speed(iter/s)": 0.474874 | |
| }, | |
| { | |
| "epoch": 1.7330468895946198, | |
| "grad_norm": 0.23904264143395532, | |
| "learning_rate": 4.8478470991413675e-06, | |
| "loss": 0.086602783203125, | |
| "memory(GiB)": 10.57, | |
| "step": 2320, | |
| "train_speed(iter/s)": 0.474876 | |
| }, | |
| { | |
| "epoch": 1.7367831122734914, | |
| "grad_norm": 0.22442760094437317, | |
| "learning_rate": 4.84322276596654e-06, | |
| "loss": 0.10830078125, | |
| "memory(GiB)": 10.57, | |
| "step": 2325, | |
| "train_speed(iter/s)": 0.4749 | |
| }, | |
| { | |
| "epoch": 1.740519334952363, | |
| "grad_norm": 0.22627089113762092, | |
| "learning_rate": 4.838591386610846e-06, | |
| "loss": 0.0934814453125, | |
| "memory(GiB)": 10.57, | |
| "step": 2330, | |
| "train_speed(iter/s)": 0.474923 | |
| }, | |
| { | |
| "epoch": 1.7442555576312349, | |
| "grad_norm": 0.212873273345035, | |
| "learning_rate": 4.833952978778896e-06, | |
| "loss": 0.10042724609375, | |
| "memory(GiB)": 10.57, | |
| "step": 2335, | |
| "train_speed(iter/s)": 0.474953 | |
| }, | |
| { | |
| "epoch": 1.7479917803101066, | |
| "grad_norm": 0.310168401865503, | |
| "learning_rate": 4.829307560202164e-06, | |
| "loss": 0.090283203125, | |
| "memory(GiB)": 10.57, | |
| "step": 2340, | |
| "train_speed(iter/s)": 0.47497 | |
| }, | |
| { | |
| "epoch": 1.7517280029889781, | |
| "grad_norm": 0.25363080821630596, | |
| "learning_rate": 4.824655148638925e-06, | |
| "loss": 0.09075927734375, | |
| "memory(GiB)": 10.57, | |
| "step": 2345, | |
| "train_speed(iter/s)": 0.474997 | |
| }, | |
| { | |
| "epoch": 1.7554642256678497, | |
| "grad_norm": 0.2287201903267125, | |
| "learning_rate": 4.81999576187419e-06, | |
| "loss": 0.122119140625, | |
| "memory(GiB)": 10.57, | |
| "step": 2350, | |
| "train_speed(iter/s)": 0.474997 | |
| }, | |
| { | |
| "epoch": 1.7592004483467214, | |
| "grad_norm": 0.360999021305386, | |
| "learning_rate": 4.815329417719632e-06, | |
| "loss": 0.11300048828125, | |
| "memory(GiB)": 10.57, | |
| "step": 2355, | |
| "train_speed(iter/s)": 0.474979 | |
| }, | |
| { | |
| "epoch": 1.7629366710255931, | |
| "grad_norm": 0.2535783044832626, | |
| "learning_rate": 4.810656134013522e-06, | |
| "loss": 0.108135986328125, | |
| "memory(GiB)": 10.57, | |
| "step": 2360, | |
| "train_speed(iter/s)": 0.474978 | |
| }, | |
| { | |
| "epoch": 1.766672893704465, | |
| "grad_norm": 0.32574474831453987, | |
| "learning_rate": 4.805975928620656e-06, | |
| "loss": 0.10255126953125, | |
| "memory(GiB)": 10.57, | |
| "step": 2365, | |
| "train_speed(iter/s)": 0.47493 | |
| }, | |
| { | |
| "epoch": 1.7704091163833364, | |
| "grad_norm": 0.19234656846328618, | |
| "learning_rate": 4.801288819432292e-06, | |
| "loss": 0.10970458984375, | |
| "memory(GiB)": 10.57, | |
| "step": 2370, | |
| "train_speed(iter/s)": 0.474954 | |
| }, | |
| { | |
| "epoch": 1.774145339062208, | |
| "grad_norm": 0.2139672272846014, | |
| "learning_rate": 4.79659482436608e-06, | |
| "loss": 0.09434814453125, | |
| "memory(GiB)": 10.57, | |
| "step": 2375, | |
| "train_speed(iter/s)": 0.474927 | |
| }, | |
| { | |
| "epoch": 1.7778815617410797, | |
| "grad_norm": 0.2978805049656468, | |
| "learning_rate": 4.791893961365992e-06, | |
| "loss": 0.11248779296875, | |
| "memory(GiB)": 10.57, | |
| "step": 2380, | |
| "train_speed(iter/s)": 0.474937 | |
| }, | |
| { | |
| "epoch": 1.7816177844199514, | |
| "grad_norm": 0.20130959752649452, | |
| "learning_rate": 4.787186248402255e-06, | |
| "loss": 0.0978759765625, | |
| "memory(GiB)": 10.57, | |
| "step": 2385, | |
| "train_speed(iter/s)": 0.474949 | |
| }, | |
| { | |
| "epoch": 1.7853540070988232, | |
| "grad_norm": 0.29180997165297434, | |
| "learning_rate": 4.782471703471281e-06, | |
| "loss": 0.112115478515625, | |
| "memory(GiB)": 10.57, | |
| "step": 2390, | |
| "train_speed(iter/s)": 0.475004 | |
| }, | |
| { | |
| "epoch": 1.7890902297776947, | |
| "grad_norm": 0.35716522757327235, | |
| "learning_rate": 4.777750344595599e-06, | |
| "loss": 0.111859130859375, | |
| "memory(GiB)": 10.57, | |
| "step": 2395, | |
| "train_speed(iter/s)": 0.475038 | |
| }, | |
| { | |
| "epoch": 1.7928264524565665, | |
| "grad_norm": 0.20213639606383335, | |
| "learning_rate": 4.773022189823787e-06, | |
| "loss": 0.09229736328125, | |
| "memory(GiB)": 10.57, | |
| "step": 2400, | |
| "train_speed(iter/s)": 0.475057 | |
| }, | |
| { | |
| "epoch": 1.796562675135438, | |
| "grad_norm": 0.2865105053142085, | |
| "learning_rate": 4.768287257230401e-06, | |
| "loss": 0.097021484375, | |
| "memory(GiB)": 10.57, | |
| "step": 2405, | |
| "train_speed(iter/s)": 0.475109 | |
| }, | |
| { | |
| "epoch": 1.8002988978143097, | |
| "grad_norm": 0.21308993463861362, | |
| "learning_rate": 4.763545564915908e-06, | |
| "loss": 0.0991943359375, | |
| "memory(GiB)": 10.57, | |
| "step": 2410, | |
| "train_speed(iter/s)": 0.475081 | |
| }, | |
| { | |
| "epoch": 1.8040351204931815, | |
| "grad_norm": 0.23525035418815923, | |
| "learning_rate": 4.758797131006613e-06, | |
| "loss": 0.0963623046875, | |
| "memory(GiB)": 10.57, | |
| "step": 2415, | |
| "train_speed(iter/s)": 0.475099 | |
| }, | |
| { | |
| "epoch": 1.807771343172053, | |
| "grad_norm": 0.21883109136220677, | |
| "learning_rate": 4.754041973654596e-06, | |
| "loss": 0.092449951171875, | |
| "memory(GiB)": 10.57, | |
| "step": 2420, | |
| "train_speed(iter/s)": 0.475037 | |
| }, | |
| { | |
| "epoch": 1.8115075658509248, | |
| "grad_norm": 0.3077520982362397, | |
| "learning_rate": 4.749280111037637e-06, | |
| "loss": 0.113623046875, | |
| "memory(GiB)": 10.57, | |
| "step": 2425, | |
| "train_speed(iter/s)": 0.475075 | |
| }, | |
| { | |
| "epoch": 1.8152437885297963, | |
| "grad_norm": 0.32425955991836447, | |
| "learning_rate": 4.7445115613591496e-06, | |
| "loss": 0.09962158203125, | |
| "memory(GiB)": 10.57, | |
| "step": 2430, | |
| "train_speed(iter/s)": 0.475116 | |
| }, | |
| { | |
| "epoch": 1.818980011208668, | |
| "grad_norm": 0.32297534935048733, | |
| "learning_rate": 4.739736342848108e-06, | |
| "loss": 0.09112548828125, | |
| "memory(GiB)": 10.57, | |
| "step": 2435, | |
| "train_speed(iter/s)": 0.475123 | |
| }, | |
| { | |
| "epoch": 1.8227162338875398, | |
| "grad_norm": 0.21046232051363747, | |
| "learning_rate": 4.734954473758984e-06, | |
| "loss": 0.08634033203125, | |
| "memory(GiB)": 10.57, | |
| "step": 2440, | |
| "train_speed(iter/s)": 0.47511 | |
| }, | |
| { | |
| "epoch": 1.8264524565664113, | |
| "grad_norm": 0.1757652117500697, | |
| "learning_rate": 4.730165972371668e-06, | |
| "loss": 0.1082275390625, | |
| "memory(GiB)": 10.57, | |
| "step": 2445, | |
| "train_speed(iter/s)": 0.475149 | |
| }, | |
| { | |
| "epoch": 1.830188679245283, | |
| "grad_norm": 0.25911116090794284, | |
| "learning_rate": 4.725370856991408e-06, | |
| "loss": 0.1029541015625, | |
| "memory(GiB)": 10.57, | |
| "step": 2450, | |
| "train_speed(iter/s)": 0.475184 | |
| }, | |
| { | |
| "epoch": 1.8339249019241546, | |
| "grad_norm": 0.34390479485101666, | |
| "learning_rate": 4.720569145948732e-06, | |
| "loss": 0.11917724609375, | |
| "memory(GiB)": 10.57, | |
| "step": 2455, | |
| "train_speed(iter/s)": 0.475229 | |
| }, | |
| { | |
| "epoch": 1.8376611246030263, | |
| "grad_norm": 0.2682881042332428, | |
| "learning_rate": 4.715760857599386e-06, | |
| "loss": 0.09146728515625, | |
| "memory(GiB)": 10.57, | |
| "step": 2460, | |
| "train_speed(iter/s)": 0.475248 | |
| }, | |
| { | |
| "epoch": 1.841397347281898, | |
| "grad_norm": 0.19430110744207282, | |
| "learning_rate": 4.710946010324257e-06, | |
| "loss": 0.10311279296875, | |
| "memory(GiB)": 10.57, | |
| "step": 2465, | |
| "train_speed(iter/s)": 0.475206 | |
| }, | |
| { | |
| "epoch": 1.8451335699607698, | |
| "grad_norm": 0.27883436818284973, | |
| "learning_rate": 4.706124622529303e-06, | |
| "loss": 0.10494384765625, | |
| "memory(GiB)": 10.57, | |
| "step": 2470, | |
| "train_speed(iter/s)": 0.475183 | |
| }, | |
| { | |
| "epoch": 1.8488697926396414, | |
| "grad_norm": 0.31596787268028487, | |
| "learning_rate": 4.7012967126454875e-06, | |
| "loss": 0.08948974609375, | |
| "memory(GiB)": 10.57, | |
| "step": 2475, | |
| "train_speed(iter/s)": 0.47521 | |
| }, | |
| { | |
| "epoch": 1.8526060153185129, | |
| "grad_norm": 0.31069646386041977, | |
| "learning_rate": 4.696462299128708e-06, | |
| "loss": 0.08408203125, | |
| "memory(GiB)": 10.57, | |
| "step": 2480, | |
| "train_speed(iter/s)": 0.475194 | |
| }, | |
| { | |
| "epoch": 1.8563422379973846, | |
| "grad_norm": 0.2061030284127865, | |
| "learning_rate": 4.691621400459718e-06, | |
| "loss": 0.09312744140625, | |
| "memory(GiB)": 10.57, | |
| "step": 2485, | |
| "train_speed(iter/s)": 0.475183 | |
| }, | |
| { | |
| "epoch": 1.8600784606762564, | |
| "grad_norm": 0.2927277785286754, | |
| "learning_rate": 4.686774035144067e-06, | |
| "loss": 0.104736328125, | |
| "memory(GiB)": 10.57, | |
| "step": 2490, | |
| "train_speed(iter/s)": 0.475219 | |
| }, | |
| { | |
| "epoch": 1.8638146833551281, | |
| "grad_norm": 0.27419348046623093, | |
| "learning_rate": 4.681920221712026e-06, | |
| "loss": 0.10330810546875, | |
| "memory(GiB)": 10.57, | |
| "step": 2495, | |
| "train_speed(iter/s)": 0.475193 | |
| }, | |
| { | |
| "epoch": 1.8675509060339996, | |
| "grad_norm": 0.2618512568544601, | |
| "learning_rate": 4.67705997871851e-06, | |
| "loss": 0.09486083984375, | |
| "memory(GiB)": 10.57, | |
| "step": 2500, | |
| "train_speed(iter/s)": 0.475193 | |
| }, | |
| { | |
| "epoch": 1.8712871287128712, | |
| "grad_norm": 0.2616692317535369, | |
| "learning_rate": 4.6721933247430155e-06, | |
| "loss": 0.10108642578125, | |
| "memory(GiB)": 10.57, | |
| "step": 2505, | |
| "train_speed(iter/s)": 0.475234 | |
| }, | |
| { | |
| "epoch": 1.875023351391743, | |
| "grad_norm": 0.37832147071618105, | |
| "learning_rate": 4.667320278389548e-06, | |
| "loss": 0.094085693359375, | |
| "memory(GiB)": 10.57, | |
| "step": 2510, | |
| "train_speed(iter/s)": 0.475221 | |
| }, | |
| { | |
| "epoch": 1.8787595740706147, | |
| "grad_norm": 0.24687088782500174, | |
| "learning_rate": 4.662440858286548e-06, | |
| "loss": 0.09676513671875, | |
| "memory(GiB)": 10.57, | |
| "step": 2515, | |
| "train_speed(iter/s)": 0.475216 | |
| }, | |
| { | |
| "epoch": 1.8824957967494864, | |
| "grad_norm": 0.234016616688346, | |
| "learning_rate": 4.657555083086823e-06, | |
| "loss": 0.10130615234375, | |
| "memory(GiB)": 10.57, | |
| "step": 2520, | |
| "train_speed(iter/s)": 0.475251 | |
| }, | |
| { | |
| "epoch": 1.886232019428358, | |
| "grad_norm": 0.238817474808307, | |
| "learning_rate": 4.65266297146747e-06, | |
| "loss": 0.097900390625, | |
| "memory(GiB)": 10.57, | |
| "step": 2525, | |
| "train_speed(iter/s)": 0.475255 | |
| }, | |
| { | |
| "epoch": 1.8899682421072295, | |
| "grad_norm": 0.207645191573174, | |
| "learning_rate": 4.647764542129812e-06, | |
| "loss": 0.091064453125, | |
| "memory(GiB)": 10.57, | |
| "step": 2530, | |
| "train_speed(iter/s)": 0.475271 | |
| }, | |
| { | |
| "epoch": 1.8937044647861012, | |
| "grad_norm": 0.38113365892750667, | |
| "learning_rate": 4.642859813799324e-06, | |
| "loss": 0.118853759765625, | |
| "memory(GiB)": 10.57, | |
| "step": 2535, | |
| "train_speed(iter/s)": 0.475293 | |
| }, | |
| { | |
| "epoch": 1.897440687464973, | |
| "grad_norm": 0.19816679538437149, | |
| "learning_rate": 4.637948805225559e-06, | |
| "loss": 0.08568115234375, | |
| "memory(GiB)": 10.57, | |
| "step": 2540, | |
| "train_speed(iter/s)": 0.475228 | |
| }, | |
| { | |
| "epoch": 1.9011769101438447, | |
| "grad_norm": 0.23604249041392467, | |
| "learning_rate": 4.633031535182075e-06, | |
| "loss": 0.11710205078125, | |
| "memory(GiB)": 10.57, | |
| "step": 2545, | |
| "train_speed(iter/s)": 0.47526 | |
| }, | |
| { | |
| "epoch": 1.9049131328227162, | |
| "grad_norm": 0.24670385102759632, | |
| "learning_rate": 4.6281080224663716e-06, | |
| "loss": 0.087890625, | |
| "memory(GiB)": 10.57, | |
| "step": 2550, | |
| "train_speed(iter/s)": 0.475273 | |
| }, | |
| { | |
| "epoch": 1.9086493555015878, | |
| "grad_norm": 0.2847144171201072, | |
| "learning_rate": 4.62317828589981e-06, | |
| "loss": 0.104248046875, | |
| "memory(GiB)": 10.57, | |
| "step": 2555, | |
| "train_speed(iter/s)": 0.475257 | |
| }, | |
| { | |
| "epoch": 1.9123855781804595, | |
| "grad_norm": 0.3178684000074, | |
| "learning_rate": 4.618242344327542e-06, | |
| "loss": 0.0997802734375, | |
| "memory(GiB)": 10.57, | |
| "step": 2560, | |
| "train_speed(iter/s)": 0.475268 | |
| }, | |
| { | |
| "epoch": 1.9161218008593313, | |
| "grad_norm": 0.2554865843964831, | |
| "learning_rate": 4.613300216618441e-06, | |
| "loss": 0.097015380859375, | |
| "memory(GiB)": 10.57, | |
| "step": 2565, | |
| "train_speed(iter/s)": 0.475299 | |
| }, | |
| { | |
| "epoch": 1.919858023538203, | |
| "grad_norm": 0.2965767135219661, | |
| "learning_rate": 4.608351921665029e-06, | |
| "loss": 0.10614013671875, | |
| "memory(GiB)": 10.57, | |
| "step": 2570, | |
| "train_speed(iter/s)": 0.475332 | |
| }, | |
| { | |
| "epoch": 1.9235942462170745, | |
| "grad_norm": 0.4039822442089598, | |
| "learning_rate": 4.603397478383403e-06, | |
| "loss": 0.10904541015625, | |
| "memory(GiB)": 10.57, | |
| "step": 2575, | |
| "train_speed(iter/s)": 0.475287 | |
| }, | |
| { | |
| "epoch": 1.927330468895946, | |
| "grad_norm": 0.25628472854278145, | |
| "learning_rate": 4.5984369057131656e-06, | |
| "loss": 0.0983642578125, | |
| "memory(GiB)": 10.57, | |
| "step": 2580, | |
| "train_speed(iter/s)": 0.475305 | |
| }, | |
| { | |
| "epoch": 1.9310666915748178, | |
| "grad_norm": 0.2779068338896975, | |
| "learning_rate": 4.5934702226173455e-06, | |
| "loss": 0.098095703125, | |
| "memory(GiB)": 10.57, | |
| "step": 2585, | |
| "train_speed(iter/s)": 0.475271 | |
| }, | |
| { | |
| "epoch": 1.9348029142536896, | |
| "grad_norm": 0.281249239607163, | |
| "learning_rate": 4.588497448082336e-06, | |
| "loss": 0.129345703125, | |
| "memory(GiB)": 10.57, | |
| "step": 2590, | |
| "train_speed(iter/s)": 0.475263 | |
| }, | |
| { | |
| "epoch": 1.9385391369325613, | |
| "grad_norm": 0.18136865279150907, | |
| "learning_rate": 4.583518601117812e-06, | |
| "loss": 0.089013671875, | |
| "memory(GiB)": 10.57, | |
| "step": 2595, | |
| "train_speed(iter/s)": 0.47529 | |
| }, | |
| { | |
| "epoch": 1.9422753596114328, | |
| "grad_norm": 0.3240659543460739, | |
| "learning_rate": 4.578533700756666e-06, | |
| "loss": 0.11053466796875, | |
| "memory(GiB)": 10.57, | |
| "step": 2600, | |
| "train_speed(iter/s)": 0.475327 | |
| }, | |
| { | |
| "epoch": 1.9460115822903044, | |
| "grad_norm": 0.19903277137682823, | |
| "learning_rate": 4.573542766054926e-06, | |
| "loss": 0.1120361328125, | |
| "memory(GiB)": 10.57, | |
| "step": 2605, | |
| "train_speed(iter/s)": 0.475344 | |
| }, | |
| { | |
| "epoch": 1.949747804969176, | |
| "grad_norm": 0.24138123028972722, | |
| "learning_rate": 4.568545816091691e-06, | |
| "loss": 0.08602294921875, | |
| "memory(GiB)": 10.57, | |
| "step": 2610, | |
| "train_speed(iter/s)": 0.475337 | |
| }, | |
| { | |
| "epoch": 1.9534840276480478, | |
| "grad_norm": 0.28322280343269146, | |
| "learning_rate": 4.563542869969055e-06, | |
| "loss": 0.08720703125, | |
| "memory(GiB)": 10.57, | |
| "step": 2615, | |
| "train_speed(iter/s)": 0.475316 | |
| }, | |
| { | |
| "epoch": 1.9572202503269196, | |
| "grad_norm": 0.249240836739657, | |
| "learning_rate": 4.558533946812034e-06, | |
| "loss": 0.093548583984375, | |
| "memory(GiB)": 10.57, | |
| "step": 2620, | |
| "train_speed(iter/s)": 0.475334 | |
| }, | |
| { | |
| "epoch": 1.9609564730057911, | |
| "grad_norm": 0.26762802652785167, | |
| "learning_rate": 4.55351906576849e-06, | |
| "loss": 0.08345947265625, | |
| "memory(GiB)": 10.57, | |
| "step": 2625, | |
| "train_speed(iter/s)": 0.475361 | |
| }, | |
| { | |
| "epoch": 1.9646926956846626, | |
| "grad_norm": 0.22273584638151617, | |
| "learning_rate": 4.548498246009062e-06, | |
| "loss": 0.10457763671875, | |
| "memory(GiB)": 10.57, | |
| "step": 2630, | |
| "train_speed(iter/s)": 0.475364 | |
| }, | |
| { | |
| "epoch": 1.9684289183635344, | |
| "grad_norm": 0.3304879364377937, | |
| "learning_rate": 4.543471506727094e-06, | |
| "loss": 0.1021240234375, | |
| "memory(GiB)": 10.57, | |
| "step": 2635, | |
| "train_speed(iter/s)": 0.475354 | |
| }, | |
| { | |
| "epoch": 1.9721651410424061, | |
| "grad_norm": 0.29863906262334294, | |
| "learning_rate": 4.538438867138554e-06, | |
| "loss": 0.10843505859375, | |
| "memory(GiB)": 10.57, | |
| "step": 2640, | |
| "train_speed(iter/s)": 0.475332 | |
| }, | |
| { | |
| "epoch": 1.975901363721278, | |
| "grad_norm": 0.2714963446386557, | |
| "learning_rate": 4.533400346481969e-06, | |
| "loss": 0.097955322265625, | |
| "memory(GiB)": 10.57, | |
| "step": 2645, | |
| "train_speed(iter/s)": 0.475329 | |
| }, | |
| { | |
| "epoch": 1.9796375864001494, | |
| "grad_norm": 0.3336618360843215, | |
| "learning_rate": 4.528355964018347e-06, | |
| "loss": 0.09144287109375, | |
| "memory(GiB)": 10.57, | |
| "step": 2650, | |
| "train_speed(iter/s)": 0.475305 | |
| }, | |
| { | |
| "epoch": 1.983373809079021, | |
| "grad_norm": 0.2980584550792422, | |
| "learning_rate": 4.523305739031104e-06, | |
| "loss": 0.0895965576171875, | |
| "memory(GiB)": 10.57, | |
| "step": 2655, | |
| "train_speed(iter/s)": 0.475329 | |
| }, | |
| { | |
| "epoch": 1.9871100317578927, | |
| "grad_norm": 0.2720629310615164, | |
| "learning_rate": 4.518249690825988e-06, | |
| "loss": 0.1112548828125, | |
| "memory(GiB)": 10.57, | |
| "step": 2660, | |
| "train_speed(iter/s)": 0.475346 | |
| }, | |
| { | |
| "epoch": 1.9908462544367644, | |
| "grad_norm": 0.3546253789825318, | |
| "learning_rate": 4.5131878387310135e-06, | |
| "loss": 0.12337646484375, | |
| "memory(GiB)": 10.57, | |
| "step": 2665, | |
| "train_speed(iter/s)": 0.475357 | |
| }, | |
| { | |
| "epoch": 1.9945824771156362, | |
| "grad_norm": 0.28424801518849385, | |
| "learning_rate": 4.508120202096376e-06, | |
| "loss": 0.109814453125, | |
| "memory(GiB)": 10.57, | |
| "step": 2670, | |
| "train_speed(iter/s)": 0.475325 | |
| }, | |
| { | |
| "epoch": 1.9983186997945077, | |
| "grad_norm": 0.21729292843099146, | |
| "learning_rate": 4.5030468002943874e-06, | |
| "loss": 0.0903076171875, | |
| "memory(GiB)": 10.57, | |
| "step": 2675, | |
| "train_speed(iter/s)": 0.475307 | |
| }, | |
| { | |
| "epoch": 2.0014944890715487, | |
| "grad_norm": 0.2345088903539215, | |
| "learning_rate": 4.497967652719397e-06, | |
| "loss": 0.08399658203125, | |
| "memory(GiB)": 10.57, | |
| "step": 2680, | |
| "train_speed(iter/s)": 0.475181 | |
| }, | |
| { | |
| "epoch": 2.0052307117504204, | |
| "grad_norm": 0.15184847590072537, | |
| "learning_rate": 4.492882778787718e-06, | |
| "loss": 0.07313232421875, | |
| "memory(GiB)": 10.57, | |
| "step": 2685, | |
| "train_speed(iter/s)": 0.475183 | |
| }, | |
| { | |
| "epoch": 2.008966934429292, | |
| "grad_norm": 0.19979306629529392, | |
| "learning_rate": 4.487792197937558e-06, | |
| "loss": 0.0822509765625, | |
| "memory(GiB)": 10.57, | |
| "step": 2690, | |
| "train_speed(iter/s)": 0.475199 | |
| }, | |
| { | |
| "epoch": 2.0127031571081635, | |
| "grad_norm": 0.2757404700372733, | |
| "learning_rate": 4.482695929628936e-06, | |
| "loss": 0.083453369140625, | |
| "memory(GiB)": 10.57, | |
| "step": 2695, | |
| "train_speed(iter/s)": 0.475215 | |
| }, | |
| { | |
| "epoch": 2.0164393797870352, | |
| "grad_norm": 0.2560396040817178, | |
| "learning_rate": 4.477593993343614e-06, | |
| "loss": 0.0873291015625, | |
| "memory(GiB)": 10.57, | |
| "step": 2700, | |
| "train_speed(iter/s)": 0.475205 | |
| }, | |
| { | |
| "epoch": 2.020175602465907, | |
| "grad_norm": 0.26086772363802274, | |
| "learning_rate": 4.472486408585022e-06, | |
| "loss": 0.084521484375, | |
| "memory(GiB)": 10.57, | |
| "step": 2705, | |
| "train_speed(iter/s)": 0.475236 | |
| }, | |
| { | |
| "epoch": 2.0239118251447787, | |
| "grad_norm": 0.2694766103158065, | |
| "learning_rate": 4.467373194878183e-06, | |
| "loss": 0.0845458984375, | |
| "memory(GiB)": 10.57, | |
| "step": 2710, | |
| "train_speed(iter/s)": 0.475254 | |
| }, | |
| { | |
| "epoch": 2.0276480478236505, | |
| "grad_norm": 0.36339657819849375, | |
| "learning_rate": 4.462254371769637e-06, | |
| "loss": 0.08817138671875, | |
| "memory(GiB)": 10.57, | |
| "step": 2715, | |
| "train_speed(iter/s)": 0.475273 | |
| }, | |
| { | |
| "epoch": 2.031384270502522, | |
| "grad_norm": 0.1574529728668933, | |
| "learning_rate": 4.457129958827369e-06, | |
| "loss": 0.07781982421875, | |
| "memory(GiB)": 10.57, | |
| "step": 2720, | |
| "train_speed(iter/s)": 0.475304 | |
| }, | |
| { | |
| "epoch": 2.0351204931813935, | |
| "grad_norm": 0.2776966602079697, | |
| "learning_rate": 4.451999975640731e-06, | |
| "loss": 0.079388427734375, | |
| "memory(GiB)": 10.57, | |
| "step": 2725, | |
| "train_speed(iter/s)": 0.47531 | |
| }, | |
| { | |
| "epoch": 2.0388567158602653, | |
| "grad_norm": 0.1584963725015156, | |
| "learning_rate": 4.446864441820368e-06, | |
| "loss": 0.09000244140625, | |
| "memory(GiB)": 10.57, | |
| "step": 2730, | |
| "train_speed(iter/s)": 0.475323 | |
| }, | |
| { | |
| "epoch": 2.042592938539137, | |
| "grad_norm": 0.22327421262837086, | |
| "learning_rate": 4.441723376998147e-06, | |
| "loss": 0.0762939453125, | |
| "memory(GiB)": 10.57, | |
| "step": 2735, | |
| "train_speed(iter/s)": 0.475273 | |
| }, | |
| { | |
| "epoch": 2.046329161218009, | |
| "grad_norm": 0.33057687684074827, | |
| "learning_rate": 4.436576800827074e-06, | |
| "loss": 0.06875762939453126, | |
| "memory(GiB)": 10.57, | |
| "step": 2740, | |
| "train_speed(iter/s)": 0.475284 | |
| }, | |
| { | |
| "epoch": 2.05006538389688, | |
| "grad_norm": 0.29923227392853685, | |
| "learning_rate": 4.431424732981228e-06, | |
| "loss": 0.06706466674804687, | |
| "memory(GiB)": 10.57, | |
| "step": 2745, | |
| "train_speed(iter/s)": 0.475292 | |
| }, | |
| { | |
| "epoch": 2.053801606575752, | |
| "grad_norm": 0.4030927309740962, | |
| "learning_rate": 4.426267193155678e-06, | |
| "loss": 0.075927734375, | |
| "memory(GiB)": 10.57, | |
| "step": 2750, | |
| "train_speed(iter/s)": 0.475316 | |
| }, | |
| { | |
| "epoch": 2.0575378292546236, | |
| "grad_norm": 0.37117244198948085, | |
| "learning_rate": 4.4211042010664135e-06, | |
| "loss": 0.07960205078125, | |
| "memory(GiB)": 10.57, | |
| "step": 2755, | |
| "train_speed(iter/s)": 0.475314 | |
| }, | |
| { | |
| "epoch": 2.0612740519334953, | |
| "grad_norm": 0.31391095462008983, | |
| "learning_rate": 4.415935776450264e-06, | |
| "loss": 0.09554443359375, | |
| "memory(GiB)": 10.57, | |
| "step": 2760, | |
| "train_speed(iter/s)": 0.475317 | |
| }, | |
| { | |
| "epoch": 2.065010274612367, | |
| "grad_norm": 0.17975702587106152, | |
| "learning_rate": 4.410761939064827e-06, | |
| "loss": 0.07388916015625, | |
| "memory(GiB)": 10.57, | |
| "step": 2765, | |
| "train_speed(iter/s)": 0.475337 | |
| }, | |
| { | |
| "epoch": 2.0687464972912384, | |
| "grad_norm": 0.3396889402601098, | |
| "learning_rate": 4.405582708688395e-06, | |
| "loss": 0.084979248046875, | |
| "memory(GiB)": 10.57, | |
| "step": 2770, | |
| "train_speed(iter/s)": 0.475352 | |
| }, | |
| { | |
| "epoch": 2.07248271997011, | |
| "grad_norm": 0.24563175886180283, | |
| "learning_rate": 4.400398105119872e-06, | |
| "loss": 0.08388671875, | |
| "memory(GiB)": 10.57, | |
| "step": 2775, | |
| "train_speed(iter/s)": 0.475388 | |
| }, | |
| { | |
| "epoch": 2.076218942648982, | |
| "grad_norm": 0.2558763668832394, | |
| "learning_rate": 4.395208148178704e-06, | |
| "loss": 0.0897216796875, | |
| "memory(GiB)": 10.57, | |
| "step": 2780, | |
| "train_speed(iter/s)": 0.475404 | |
| }, | |
| { | |
| "epoch": 2.0799551653278536, | |
| "grad_norm": 0.3548268406619161, | |
| "learning_rate": 4.390012857704802e-06, | |
| "loss": 0.08565673828125, | |
| "memory(GiB)": 10.57, | |
| "step": 2785, | |
| "train_speed(iter/s)": 0.4754 | |
| }, | |
| { | |
| "epoch": 2.0836913880067254, | |
| "grad_norm": 0.326064743718348, | |
| "learning_rate": 4.384812253558467e-06, | |
| "loss": 0.08856201171875, | |
| "memory(GiB)": 10.57, | |
| "step": 2790, | |
| "train_speed(iter/s)": 0.47541 | |
| }, | |
| { | |
| "epoch": 2.0874276106855967, | |
| "grad_norm": 0.3250783826701612, | |
| "learning_rate": 4.37960635562031e-06, | |
| "loss": 0.083563232421875, | |
| "memory(GiB)": 10.57, | |
| "step": 2795, | |
| "train_speed(iter/s)": 0.475407 | |
| }, | |
| { | |
| "epoch": 2.0911638333644684, | |
| "grad_norm": 0.1928343549830928, | |
| "learning_rate": 4.3743951837911804e-06, | |
| "loss": 0.0770751953125, | |
| "memory(GiB)": 10.57, | |
| "step": 2800, | |
| "train_speed(iter/s)": 0.475418 | |
| }, | |
| { | |
| "epoch": 2.09490005604334, | |
| "grad_norm": 0.3314940438350291, | |
| "learning_rate": 4.3691787579920886e-06, | |
| "loss": 0.0668182373046875, | |
| "memory(GiB)": 10.57, | |
| "step": 2805, | |
| "train_speed(iter/s)": 0.475443 | |
| }, | |
| { | |
| "epoch": 2.098636278722212, | |
| "grad_norm": 0.25557946764887945, | |
| "learning_rate": 4.363957098164129e-06, | |
| "loss": 0.09249267578125, | |
| "memory(GiB)": 10.57, | |
| "step": 2810, | |
| "train_speed(iter/s)": 0.475472 | |
| }, | |
| { | |
| "epoch": 2.1023725014010837, | |
| "grad_norm": 0.2834236723948582, | |
| "learning_rate": 4.358730224268404e-06, | |
| "loss": 0.076348876953125, | |
| "memory(GiB)": 10.57, | |
| "step": 2815, | |
| "train_speed(iter/s)": 0.475457 | |
| }, | |
| { | |
| "epoch": 2.106108724079955, | |
| "grad_norm": 0.17913726646319922, | |
| "learning_rate": 4.353498156285951e-06, | |
| "loss": 0.0684478759765625, | |
| "memory(GiB)": 10.57, | |
| "step": 2820, | |
| "train_speed(iter/s)": 0.475474 | |
| }, | |
| { | |
| "epoch": 2.1098449467588267, | |
| "grad_norm": 0.39181628904806004, | |
| "learning_rate": 4.3482609142176585e-06, | |
| "loss": 0.08323974609375, | |
| "memory(GiB)": 10.57, | |
| "step": 2825, | |
| "train_speed(iter/s)": 0.475472 | |
| }, | |
| { | |
| "epoch": 2.1135811694376985, | |
| "grad_norm": 0.3689042584118628, | |
| "learning_rate": 4.343018518084197e-06, | |
| "loss": 0.08089599609375, | |
| "memory(GiB)": 10.57, | |
| "step": 2830, | |
| "train_speed(iter/s)": 0.475507 | |
| }, | |
| { | |
| "epoch": 2.11731739211657, | |
| "grad_norm": 0.30944635697905426, | |
| "learning_rate": 4.337770987925941e-06, | |
| "loss": 0.074566650390625, | |
| "memory(GiB)": 10.57, | |
| "step": 2835, | |
| "train_speed(iter/s)": 0.475485 | |
| }, | |
| { | |
| "epoch": 2.121053614795442, | |
| "grad_norm": 0.20965343005966453, | |
| "learning_rate": 4.332518343802886e-06, | |
| "loss": 0.0746063232421875, | |
| "memory(GiB)": 10.57, | |
| "step": 2840, | |
| "train_speed(iter/s)": 0.475453 | |
| }, | |
| { | |
| "epoch": 2.1247898374743133, | |
| "grad_norm": 0.24055286896299563, | |
| "learning_rate": 4.327260605794583e-06, | |
| "loss": 0.0832275390625, | |
| "memory(GiB)": 10.57, | |
| "step": 2845, | |
| "train_speed(iter/s)": 0.475488 | |
| }, | |
| { | |
| "epoch": 2.128526060153185, | |
| "grad_norm": 0.30278392143378924, | |
| "learning_rate": 4.321997794000053e-06, | |
| "loss": 0.09150390625, | |
| "memory(GiB)": 10.57, | |
| "step": 2850, | |
| "train_speed(iter/s)": 0.475506 | |
| }, | |
| { | |
| "epoch": 2.1322622828320568, | |
| "grad_norm": 0.3357493665071166, | |
| "learning_rate": 4.316729928537712e-06, | |
| "loss": 0.077264404296875, | |
| "memory(GiB)": 10.57, | |
| "step": 2855, | |
| "train_speed(iter/s)": 0.475505 | |
| }, | |
| { | |
| "epoch": 2.1359985055109285, | |
| "grad_norm": 0.28839246476160085, | |
| "learning_rate": 4.311457029545295e-06, | |
| "loss": 0.07557373046875, | |
| "memory(GiB)": 10.57, | |
| "step": 2860, | |
| "train_speed(iter/s)": 0.475494 | |
| }, | |
| { | |
| "epoch": 2.1397347281898003, | |
| "grad_norm": 0.3587645451871882, | |
| "learning_rate": 4.30617911717978e-06, | |
| "loss": 0.08240966796875, | |
| "memory(GiB)": 10.57, | |
| "step": 2865, | |
| "train_speed(iter/s)": 0.475522 | |
| }, | |
| { | |
| "epoch": 2.1434709508686716, | |
| "grad_norm": 0.21348435074986552, | |
| "learning_rate": 4.3008962116173105e-06, | |
| "loss": 0.06397705078125, | |
| "memory(GiB)": 10.57, | |
| "step": 2870, | |
| "train_speed(iter/s)": 0.47546 | |
| }, | |
| { | |
| "epoch": 2.1472071735475433, | |
| "grad_norm": 0.24044644726569717, | |
| "learning_rate": 4.295608333053115e-06, | |
| "loss": 0.0892333984375, | |
| "memory(GiB)": 10.57, | |
| "step": 2875, | |
| "train_speed(iter/s)": 0.475493 | |
| }, | |
| { | |
| "epoch": 2.150943396226415, | |
| "grad_norm": 0.271844882428932, | |
| "learning_rate": 4.290315501701436e-06, | |
| "loss": 0.07017822265625, | |
| "memory(GiB)": 10.57, | |
| "step": 2880, | |
| "train_speed(iter/s)": 0.475506 | |
| }, | |
| { | |
| "epoch": 2.154679618905287, | |
| "grad_norm": 0.32275562789715756, | |
| "learning_rate": 4.285017737795447e-06, | |
| "loss": 0.094970703125, | |
| "memory(GiB)": 10.57, | |
| "step": 2885, | |
| "train_speed(iter/s)": 0.475543 | |
| }, | |
| { | |
| "epoch": 2.1584158415841586, | |
| "grad_norm": 0.19204227011392838, | |
| "learning_rate": 4.279715061587176e-06, | |
| "loss": 0.082275390625, | |
| "memory(GiB)": 10.57, | |
| "step": 2890, | |
| "train_speed(iter/s)": 0.475515 | |
| }, | |
| { | |
| "epoch": 2.1621520642630303, | |
| "grad_norm": 0.3187374981569435, | |
| "learning_rate": 4.274407493347435e-06, | |
| "loss": 0.073956298828125, | |
| "memory(GiB)": 10.57, | |
| "step": 2895, | |
| "train_speed(iter/s)": 0.475498 | |
| }, | |
| { | |
| "epoch": 2.1658882869419016, | |
| "grad_norm": 0.31518550432451825, | |
| "learning_rate": 4.26909505336573e-06, | |
| "loss": 0.08779296875, | |
| "memory(GiB)": 10.57, | |
| "step": 2900, | |
| "train_speed(iter/s)": 0.475501 | |
| }, | |
| { | |
| "epoch": 2.1696245096207734, | |
| "grad_norm": 0.25742777623976215, | |
| "learning_rate": 4.2637777619501955e-06, | |
| "loss": 0.068133544921875, | |
| "memory(GiB)": 10.57, | |
| "step": 2905, | |
| "train_speed(iter/s)": 0.475526 | |
| }, | |
| { | |
| "epoch": 2.173360732299645, | |
| "grad_norm": 0.327461904975564, | |
| "learning_rate": 4.258455639427512e-06, | |
| "loss": 0.07855224609375, | |
| "memory(GiB)": 10.57, | |
| "step": 2910, | |
| "train_speed(iter/s)": 0.475516 | |
| }, | |
| { | |
| "epoch": 2.177096954978517, | |
| "grad_norm": 0.2947045587842032, | |
| "learning_rate": 4.253128706142823e-06, | |
| "loss": 0.078759765625, | |
| "memory(GiB)": 10.57, | |
| "step": 2915, | |
| "train_speed(iter/s)": 0.475556 | |
| }, | |
| { | |
| "epoch": 2.180833177657388, | |
| "grad_norm": 0.24106474434323896, | |
| "learning_rate": 4.2477969824596675e-06, | |
| "loss": 0.0806396484375, | |
| "memory(GiB)": 10.57, | |
| "step": 2920, | |
| "train_speed(iter/s)": 0.475576 | |
| }, | |
| { | |
| "epoch": 2.18456940033626, | |
| "grad_norm": 0.35498053988232225, | |
| "learning_rate": 4.2424604887598956e-06, | |
| "loss": 0.08232421875, | |
| "memory(GiB)": 10.57, | |
| "step": 2925, | |
| "train_speed(iter/s)": 0.475536 | |
| }, | |
| { | |
| "epoch": 2.1883056230151317, | |
| "grad_norm": 0.30444021185040904, | |
| "learning_rate": 4.237119245443591e-06, | |
| "loss": 0.08363037109375, | |
| "memory(GiB)": 10.57, | |
| "step": 2930, | |
| "train_speed(iter/s)": 0.475537 | |
| }, | |
| { | |
| "epoch": 2.1920418456940034, | |
| "grad_norm": 0.2844894921351017, | |
| "learning_rate": 4.231773272928995e-06, | |
| "loss": 0.0828857421875, | |
| "memory(GiB)": 10.57, | |
| "step": 2935, | |
| "train_speed(iter/s)": 0.475519 | |
| }, | |
| { | |
| "epoch": 2.195778068372875, | |
| "grad_norm": 0.3680515586014792, | |
| "learning_rate": 4.226422591652426e-06, | |
| "loss": 0.0849609375, | |
| "memory(GiB)": 10.57, | |
| "step": 2940, | |
| "train_speed(iter/s)": 0.475527 | |
| }, | |
| { | |
| "epoch": 2.199514291051747, | |
| "grad_norm": 0.3347584264458827, | |
| "learning_rate": 4.221067222068204e-06, | |
| "loss": 0.07615966796875, | |
| "memory(GiB)": 10.57, | |
| "step": 2945, | |
| "train_speed(iter/s)": 0.475506 | |
| }, | |
| { | |
| "epoch": 2.203250513730618, | |
| "grad_norm": 0.24357214909557126, | |
| "learning_rate": 4.215707184648571e-06, | |
| "loss": 0.071929931640625, | |
| "memory(GiB)": 10.57, | |
| "step": 2950, | |
| "train_speed(iter/s)": 0.475535 | |
| }, | |
| { | |
| "epoch": 2.20698673640949, | |
| "grad_norm": 0.2969870033632324, | |
| "learning_rate": 4.2103424998836166e-06, | |
| "loss": 0.0795166015625, | |
| "memory(GiB)": 10.57, | |
| "step": 2955, | |
| "train_speed(iter/s)": 0.475539 | |
| }, | |
| { | |
| "epoch": 2.2107229590883617, | |
| "grad_norm": 0.2597821857641748, | |
| "learning_rate": 4.204973188281187e-06, | |
| "loss": 0.078076171875, | |
| "memory(GiB)": 10.57, | |
| "step": 2960, | |
| "train_speed(iter/s)": 0.475554 | |
| }, | |
| { | |
| "epoch": 2.2144591817672334, | |
| "grad_norm": 0.345560787249567, | |
| "learning_rate": 4.199599270366825e-06, | |
| "loss": 0.085748291015625, | |
| "memory(GiB)": 10.57, | |
| "step": 2965, | |
| "train_speed(iter/s)": 0.47555 | |
| }, | |
| { | |
| "epoch": 2.218195404446105, | |
| "grad_norm": 0.30970032428526245, | |
| "learning_rate": 4.1942207666836765e-06, | |
| "loss": 0.082818603515625, | |
| "memory(GiB)": 10.57, | |
| "step": 2970, | |
| "train_speed(iter/s)": 0.475506 | |
| }, | |
| { | |
| "epoch": 2.2219316271249765, | |
| "grad_norm": 0.3183590391694136, | |
| "learning_rate": 4.188837697792421e-06, | |
| "loss": 0.0791748046875, | |
| "memory(GiB)": 10.57, | |
| "step": 2975, | |
| "train_speed(iter/s)": 0.475502 | |
| }, | |
| { | |
| "epoch": 2.2256678498038482, | |
| "grad_norm": 0.40743149107649224, | |
| "learning_rate": 4.183450084271186e-06, | |
| "loss": 0.085736083984375, | |
| "memory(GiB)": 10.57, | |
| "step": 2980, | |
| "train_speed(iter/s)": 0.475528 | |
| }, | |
| { | |
| "epoch": 2.22940407248272, | |
| "grad_norm": 0.36574069885687205, | |
| "learning_rate": 4.178057946715476e-06, | |
| "loss": 0.08839111328125, | |
| "memory(GiB)": 10.57, | |
| "step": 2985, | |
| "train_speed(iter/s)": 0.475523 | |
| }, | |
| { | |
| "epoch": 2.2331402951615917, | |
| "grad_norm": 0.29949255358893273, | |
| "learning_rate": 4.172661305738086e-06, | |
| "loss": 0.076226806640625, | |
| "memory(GiB)": 10.57, | |
| "step": 2990, | |
| "train_speed(iter/s)": 0.4755 | |
| }, | |
| { | |
| "epoch": 2.2368765178404635, | |
| "grad_norm": 0.2645783347146312, | |
| "learning_rate": 4.167260181969031e-06, | |
| "loss": 0.0787109375, | |
| "memory(GiB)": 10.57, | |
| "step": 2995, | |
| "train_speed(iter/s)": 0.475505 | |
| }, | |
| { | |
| "epoch": 2.240612740519335, | |
| "grad_norm": 0.35590583986728974, | |
| "learning_rate": 4.161854596055458e-06, | |
| "loss": 0.082958984375, | |
| "memory(GiB)": 10.57, | |
| "step": 3000, | |
| "train_speed(iter/s)": 0.475522 | |
| }, | |
| { | |
| "epoch": 2.2443489631982065, | |
| "grad_norm": 0.2855462271704881, | |
| "learning_rate": 4.156444568661574e-06, | |
| "loss": 0.0782135009765625, | |
| "memory(GiB)": 10.57, | |
| "step": 3005, | |
| "train_speed(iter/s)": 0.475556 | |
| }, | |
| { | |
| "epoch": 2.2480851858770783, | |
| "grad_norm": 0.23189643301309532, | |
| "learning_rate": 4.151030120468563e-06, | |
| "loss": 0.08284912109375, | |
| "memory(GiB)": 10.57, | |
| "step": 3010, | |
| "train_speed(iter/s)": 0.475525 | |
| }, | |
| { | |
| "epoch": 2.25182140855595, | |
| "grad_norm": 0.2823549603550444, | |
| "learning_rate": 4.145611272174513e-06, | |
| "loss": 0.1001220703125, | |
| "memory(GiB)": 10.57, | |
| "step": 3015, | |
| "train_speed(iter/s)": 0.47551 | |
| }, | |
| { | |
| "epoch": 2.255557631234822, | |
| "grad_norm": 0.3123102072825862, | |
| "learning_rate": 4.140188044494328e-06, | |
| "loss": 0.0789306640625, | |
| "memory(GiB)": 10.57, | |
| "step": 3020, | |
| "train_speed(iter/s)": 0.475473 | |
| }, | |
| { | |
| "epoch": 2.259293853913693, | |
| "grad_norm": 0.34390166190396304, | |
| "learning_rate": 4.134760458159652e-06, | |
| "loss": 0.088250732421875, | |
| "memory(GiB)": 10.57, | |
| "step": 3025, | |
| "train_speed(iter/s)": 0.475474 | |
| }, | |
| { | |
| "epoch": 2.263030076592565, | |
| "grad_norm": 0.4471657878322189, | |
| "learning_rate": 4.1293285339187975e-06, | |
| "loss": 0.08520050048828125, | |
| "memory(GiB)": 10.57, | |
| "step": 3030, | |
| "train_speed(iter/s)": 0.475502 | |
| }, | |
| { | |
| "epoch": 2.2667662992714366, | |
| "grad_norm": 0.29627009892222517, | |
| "learning_rate": 4.123892292536655e-06, | |
| "loss": 0.0954498291015625, | |
| "memory(GiB)": 10.57, | |
| "step": 3035, | |
| "train_speed(iter/s)": 0.475527 | |
| }, | |
| { | |
| "epoch": 2.2705025219503083, | |
| "grad_norm": 0.2103142983370086, | |
| "learning_rate": 4.118451754794616e-06, | |
| "loss": 0.079296875, | |
| "memory(GiB)": 10.57, | |
| "step": 3040, | |
| "train_speed(iter/s)": 0.47555 | |
| }, | |
| { | |
| "epoch": 2.27423874462918, | |
| "grad_norm": 0.29094874204231086, | |
| "learning_rate": 4.113006941490504e-06, | |
| "loss": 0.07890625, | |
| "memory(GiB)": 10.57, | |
| "step": 3045, | |
| "train_speed(iter/s)": 0.475543 | |
| }, | |
| { | |
| "epoch": 2.2779749673080514, | |
| "grad_norm": 0.2944500502582637, | |
| "learning_rate": 4.1075578734384796e-06, | |
| "loss": 0.07510986328125, | |
| "memory(GiB)": 10.57, | |
| "step": 3050, | |
| "train_speed(iter/s)": 0.475485 | |
| }, | |
| { | |
| "epoch": 2.281711189986923, | |
| "grad_norm": 0.247526345569416, | |
| "learning_rate": 4.1021045714689715e-06, | |
| "loss": 0.062725830078125, | |
| "memory(GiB)": 10.57, | |
| "step": 3055, | |
| "train_speed(iter/s)": 0.4755 | |
| }, | |
| { | |
| "epoch": 2.285447412665795, | |
| "grad_norm": 0.2223509166017715, | |
| "learning_rate": 4.096647056428591e-06, | |
| "loss": 0.08511962890625, | |
| "memory(GiB)": 10.57, | |
| "step": 3060, | |
| "train_speed(iter/s)": 0.475511 | |
| }, | |
| { | |
| "epoch": 2.2891836353446666, | |
| "grad_norm": 0.40394852915768165, | |
| "learning_rate": 4.0911853491800606e-06, | |
| "loss": 0.078338623046875, | |
| "memory(GiB)": 10.57, | |
| "step": 3065, | |
| "train_speed(iter/s)": 0.475523 | |
| }, | |
| { | |
| "epoch": 2.2929198580235384, | |
| "grad_norm": 0.3262435355040092, | |
| "learning_rate": 4.085719470602121e-06, | |
| "loss": 0.085260009765625, | |
| "memory(GiB)": 10.57, | |
| "step": 3070, | |
| "train_speed(iter/s)": 0.475506 | |
| }, | |
| { | |
| "epoch": 2.2966560807024097, | |
| "grad_norm": 0.30731468388186667, | |
| "learning_rate": 4.080249441589465e-06, | |
| "loss": 0.081439208984375, | |
| "memory(GiB)": 10.57, | |
| "step": 3075, | |
| "train_speed(iter/s)": 0.47553 | |
| }, | |
| { | |
| "epoch": 2.3003923033812814, | |
| "grad_norm": 0.2619319232654712, | |
| "learning_rate": 4.074775283052647e-06, | |
| "loss": 0.07823486328125, | |
| "memory(GiB)": 10.57, | |
| "step": 3080, | |
| "train_speed(iter/s)": 0.475536 | |
| }, | |
| { | |
| "epoch": 2.304128526060153, | |
| "grad_norm": 0.28997697963247854, | |
| "learning_rate": 4.069297015918012e-06, | |
| "loss": 0.080047607421875, | |
| "memory(GiB)": 10.57, | |
| "step": 3085, | |
| "train_speed(iter/s)": 0.475543 | |
| }, | |
| { | |
| "epoch": 2.307864748739025, | |
| "grad_norm": 0.3041055152853103, | |
| "learning_rate": 4.063814661127607e-06, | |
| "loss": 0.085015869140625, | |
| "memory(GiB)": 10.57, | |
| "step": 3090, | |
| "train_speed(iter/s)": 0.475538 | |
| }, | |
| { | |
| "epoch": 2.3116009714178967, | |
| "grad_norm": 0.28074738714998865, | |
| "learning_rate": 4.058328239639108e-06, | |
| "loss": 0.0771240234375, | |
| "memory(GiB)": 10.57, | |
| "step": 3095, | |
| "train_speed(iter/s)": 0.475537 | |
| }, | |
| { | |
| "epoch": 2.3153371940967684, | |
| "grad_norm": 0.2742208472612064, | |
| "learning_rate": 4.052837772425735e-06, | |
| "loss": 0.071533203125, | |
| "memory(GiB)": 10.57, | |
| "step": 3100, | |
| "train_speed(iter/s)": 0.475526 | |
| }, | |
| { | |
| "epoch": 2.3190734167756397, | |
| "grad_norm": 0.2738394747920133, | |
| "learning_rate": 4.0473432804761745e-06, | |
| "loss": 0.074151611328125, | |
| "memory(GiB)": 10.57, | |
| "step": 3105, | |
| "train_speed(iter/s)": 0.475533 | |
| }, | |
| { | |
| "epoch": 2.3228096394545115, | |
| "grad_norm": 0.3325363093754662, | |
| "learning_rate": 4.0418447847945e-06, | |
| "loss": 0.07762451171875, | |
| "memory(GiB)": 10.57, | |
| "step": 3110, | |
| "train_speed(iter/s)": 0.475573 | |
| }, | |
| { | |
| "epoch": 2.326545862133383, | |
| "grad_norm": 0.29208910041820724, | |
| "learning_rate": 4.036342306400087e-06, | |
| "loss": 0.08729248046875, | |
| "memory(GiB)": 10.57, | |
| "step": 3115, | |
| "train_speed(iter/s)": 0.475557 | |
| }, | |
| { | |
| "epoch": 2.330282084812255, | |
| "grad_norm": 0.2986291580987787, | |
| "learning_rate": 4.03083586632754e-06, | |
| "loss": 0.070965576171875, | |
| "memory(GiB)": 10.57, | |
| "step": 3120, | |
| "train_speed(iter/s)": 0.475591 | |
| }, | |
| { | |
| "epoch": 2.3340183074911263, | |
| "grad_norm": 0.2715172245264193, | |
| "learning_rate": 4.025325485626604e-06, | |
| "loss": 0.07711181640625, | |
| "memory(GiB)": 10.57, | |
| "step": 3125, | |
| "train_speed(iter/s)": 0.475607 | |
| }, | |
| { | |
| "epoch": 2.337754530169998, | |
| "grad_norm": 0.28383527690267557, | |
| "learning_rate": 4.01981118536209e-06, | |
| "loss": 0.073974609375, | |
| "memory(GiB)": 10.57, | |
| "step": 3130, | |
| "train_speed(iter/s)": 0.475608 | |
| }, | |
| { | |
| "epoch": 2.3414907528488698, | |
| "grad_norm": 0.4294056030563819, | |
| "learning_rate": 4.014292986613795e-06, | |
| "loss": 0.09591064453125, | |
| "memory(GiB)": 10.57, | |
| "step": 3135, | |
| "train_speed(iter/s)": 0.475616 | |
| }, | |
| { | |
| "epoch": 2.3452269755277415, | |
| "grad_norm": 0.324672085272647, | |
| "learning_rate": 4.008770910476415e-06, | |
| "loss": 0.073956298828125, | |
| "memory(GiB)": 10.57, | |
| "step": 3140, | |
| "train_speed(iter/s)": 0.475642 | |
| }, | |
| { | |
| "epoch": 2.3489631982066133, | |
| "grad_norm": 0.33039068217728207, | |
| "learning_rate": 4.003244978059466e-06, | |
| "loss": 0.082257080078125, | |
| "memory(GiB)": 10.57, | |
| "step": 3145, | |
| "train_speed(iter/s)": 0.475644 | |
| }, | |
| { | |
| "epoch": 2.352699420885485, | |
| "grad_norm": 0.25727097167399077, | |
| "learning_rate": 3.997715210487215e-06, | |
| "loss": 0.078131103515625, | |
| "memory(GiB)": 10.57, | |
| "step": 3150, | |
| "train_speed(iter/s)": 0.475682 | |
| }, | |
| { | |
| "epoch": 2.3564356435643563, | |
| "grad_norm": 0.3005461408551253, | |
| "learning_rate": 3.992181628898582e-06, | |
| "loss": 0.0718292236328125, | |
| "memory(GiB)": 10.57, | |
| "step": 3155, | |
| "train_speed(iter/s)": 0.475677 | |
| }, | |
| { | |
| "epoch": 2.360171866243228, | |
| "grad_norm": 0.21717097651290396, | |
| "learning_rate": 3.986644254447067e-06, | |
| "loss": 0.084930419921875, | |
| "memory(GiB)": 10.57, | |
| "step": 3160, | |
| "train_speed(iter/s)": 0.475668 | |
| }, | |
| { | |
| "epoch": 2.3639080889221, | |
| "grad_norm": 0.2740183483391346, | |
| "learning_rate": 3.981103108300674e-06, | |
| "loss": 0.08662109375, | |
| "memory(GiB)": 10.57, | |
| "step": 3165, | |
| "train_speed(iter/s)": 0.475671 | |
| }, | |
| { | |
| "epoch": 2.3676443116009716, | |
| "grad_norm": 0.23952800833281973, | |
| "learning_rate": 3.975558211641822e-06, | |
| "loss": 0.085614013671875, | |
| "memory(GiB)": 10.57, | |
| "step": 3170, | |
| "train_speed(iter/s)": 0.475681 | |
| }, | |
| { | |
| "epoch": 2.371380534279843, | |
| "grad_norm": 0.20740773834062282, | |
| "learning_rate": 3.970009585667267e-06, | |
| "loss": 0.0666015625, | |
| "memory(GiB)": 10.57, | |
| "step": 3175, | |
| "train_speed(iter/s)": 0.475702 | |
| }, | |
| { | |
| "epoch": 2.3751167569587146, | |
| "grad_norm": 0.3093587039146876, | |
| "learning_rate": 3.964457251588023e-06, | |
| "loss": 0.07269287109375, | |
| "memory(GiB)": 10.57, | |
| "step": 3180, | |
| "train_speed(iter/s)": 0.475703 | |
| }, | |
| { | |
| "epoch": 2.3788529796375864, | |
| "grad_norm": 0.3535470284455733, | |
| "learning_rate": 3.958901230629277e-06, | |
| "loss": 0.0844482421875, | |
| "memory(GiB)": 10.57, | |
| "step": 3185, | |
| "train_speed(iter/s)": 0.475708 | |
| }, | |
| { | |
| "epoch": 2.382589202316458, | |
| "grad_norm": 0.3279555931100402, | |
| "learning_rate": 3.953341544030311e-06, | |
| "loss": 0.08740234375, | |
| "memory(GiB)": 10.57, | |
| "step": 3190, | |
| "train_speed(iter/s)": 0.475712 | |
| }, | |
| { | |
| "epoch": 2.38632542499533, | |
| "grad_norm": 0.37799827875806785, | |
| "learning_rate": 3.947778213044423e-06, | |
| "loss": 0.06464996337890624, | |
| "memory(GiB)": 10.57, | |
| "step": 3195, | |
| "train_speed(iter/s)": 0.475685 | |
| }, | |
| { | |
| "epoch": 2.3900616476742016, | |
| "grad_norm": 0.21175755993638834, | |
| "learning_rate": 3.942211258938837e-06, | |
| "loss": 0.079998779296875, | |
| "memory(GiB)": 10.57, | |
| "step": 3200, | |
| "train_speed(iter/s)": 0.475655 | |
| }, | |
| { | |
| "epoch": 2.393797870353073, | |
| "grad_norm": 0.3983514672863944, | |
| "learning_rate": 3.936640702994629e-06, | |
| "loss": 0.07978515625, | |
| "memory(GiB)": 10.57, | |
| "step": 3205, | |
| "train_speed(iter/s)": 0.475627 | |
| }, | |
| { | |
| "epoch": 2.3975340930319446, | |
| "grad_norm": 0.3407681935903124, | |
| "learning_rate": 3.931066566506648e-06, | |
| "loss": 0.08079833984375, | |
| "memory(GiB)": 10.57, | |
| "step": 3210, | |
| "train_speed(iter/s)": 0.475614 | |
| }, | |
| { | |
| "epoch": 2.4012703157108164, | |
| "grad_norm": 0.1829141400287362, | |
| "learning_rate": 3.925488870783426e-06, | |
| "loss": 0.08177490234375, | |
| "memory(GiB)": 10.57, | |
| "step": 3215, | |
| "train_speed(iter/s)": 0.475612 | |
| }, | |
| { | |
| "epoch": 2.405006538389688, | |
| "grad_norm": 0.24647777146358466, | |
| "learning_rate": 3.919907637147102e-06, | |
| "loss": 0.081903076171875, | |
| "memory(GiB)": 10.57, | |
| "step": 3220, | |
| "train_speed(iter/s)": 0.475609 | |
| }, | |
| { | |
| "epoch": 2.4087427610685594, | |
| "grad_norm": 0.38090689812957224, | |
| "learning_rate": 3.914322886933341e-06, | |
| "loss": 0.064569091796875, | |
| "memory(GiB)": 10.57, | |
| "step": 3225, | |
| "train_speed(iter/s)": 0.475619 | |
| }, | |
| { | |
| "epoch": 2.412478983747431, | |
| "grad_norm": 0.2666319657744909, | |
| "learning_rate": 3.908734641491248e-06, | |
| "loss": 0.077764892578125, | |
| "memory(GiB)": 10.57, | |
| "step": 3230, | |
| "train_speed(iter/s)": 0.475645 | |
| }, | |
| { | |
| "epoch": 2.416215206426303, | |
| "grad_norm": 0.22804209432893346, | |
| "learning_rate": 3.903142922183294e-06, | |
| "loss": 0.070025634765625, | |
| "memory(GiB)": 10.57, | |
| "step": 3235, | |
| "train_speed(iter/s)": 0.475584 | |
| }, | |
| { | |
| "epoch": 2.4199514291051747, | |
| "grad_norm": 0.23685896651720773, | |
| "learning_rate": 3.897547750385226e-06, | |
| "loss": 0.0831634521484375, | |
| "memory(GiB)": 10.57, | |
| "step": 3240, | |
| "train_speed(iter/s)": 0.475578 | |
| }, | |
| { | |
| "epoch": 2.4236876517840464, | |
| "grad_norm": 0.2355129405846085, | |
| "learning_rate": 3.891949147485989e-06, | |
| "loss": 0.077679443359375, | |
| "memory(GiB)": 10.57, | |
| "step": 3245, | |
| "train_speed(iter/s)": 0.47556 | |
| }, | |
| { | |
| "epoch": 2.427423874462918, | |
| "grad_norm": 0.38970162877110276, | |
| "learning_rate": 3.886347134887647e-06, | |
| "loss": 0.0797607421875, | |
| "memory(GiB)": 10.57, | |
| "step": 3250, | |
| "train_speed(iter/s)": 0.475557 | |
| }, | |
| { | |
| "epoch": 2.4311600971417895, | |
| "grad_norm": 0.2697647074819102, | |
| "learning_rate": 3.8807417340052964e-06, | |
| "loss": 0.0737060546875, | |
| "memory(GiB)": 10.57, | |
| "step": 3255, | |
| "train_speed(iter/s)": 0.475577 | |
| }, | |
| { | |
| "epoch": 2.4348963198206612, | |
| "grad_norm": 0.19920837434880515, | |
| "learning_rate": 3.875132966266987e-06, | |
| "loss": 0.0791748046875, | |
| "memory(GiB)": 10.57, | |
| "step": 3260, | |
| "train_speed(iter/s)": 0.475596 | |
| }, | |
| { | |
| "epoch": 2.438632542499533, | |
| "grad_norm": 0.22217603367413016, | |
| "learning_rate": 3.869520853113637e-06, | |
| "loss": 0.07099609375, | |
| "memory(GiB)": 10.57, | |
| "step": 3265, | |
| "train_speed(iter/s)": 0.475601 | |
| }, | |
| { | |
| "epoch": 2.4423687651784047, | |
| "grad_norm": 0.310354028282849, | |
| "learning_rate": 3.863905415998958e-06, | |
| "loss": 0.075830078125, | |
| "memory(GiB)": 10.57, | |
| "step": 3270, | |
| "train_speed(iter/s)": 0.475595 | |
| }, | |
| { | |
| "epoch": 2.4461049878572765, | |
| "grad_norm": 0.2904199442330529, | |
| "learning_rate": 3.858286676389363e-06, | |
| "loss": 0.07169189453125, | |
| "memory(GiB)": 10.57, | |
| "step": 3275, | |
| "train_speed(iter/s)": 0.475577 | |
| }, | |
| { | |
| "epoch": 2.449841210536148, | |
| "grad_norm": 0.2671154417988313, | |
| "learning_rate": 3.852664655763891e-06, | |
| "loss": 0.0576446533203125, | |
| "memory(GiB)": 10.57, | |
| "step": 3280, | |
| "train_speed(iter/s)": 0.475573 | |
| }, | |
| { | |
| "epoch": 2.4535774332150195, | |
| "grad_norm": 0.2117803221633462, | |
| "learning_rate": 3.8470393756141285e-06, | |
| "loss": 0.070208740234375, | |
| "memory(GiB)": 10.57, | |
| "step": 3285, | |
| "train_speed(iter/s)": 0.475569 | |
| }, | |
| { | |
| "epoch": 2.4573136558938913, | |
| "grad_norm": 0.28365805075568284, | |
| "learning_rate": 3.8414108574441155e-06, | |
| "loss": 0.07728271484375, | |
| "memory(GiB)": 10.57, | |
| "step": 3290, | |
| "train_speed(iter/s)": 0.475604 | |
| }, | |
| { | |
| "epoch": 2.461049878572763, | |
| "grad_norm": 0.26559512910109384, | |
| "learning_rate": 3.835779122770274e-06, | |
| "loss": 0.07513427734375, | |
| "memory(GiB)": 10.57, | |
| "step": 3295, | |
| "train_speed(iter/s)": 0.475628 | |
| }, | |
| { | |
| "epoch": 2.4647861012516348, | |
| "grad_norm": 0.31583700464598574, | |
| "learning_rate": 3.830144193121321e-06, | |
| "loss": 0.0657806396484375, | |
| "memory(GiB)": 10.57, | |
| "step": 3300, | |
| "train_speed(iter/s)": 0.475643 | |
| }, | |
| { | |
| "epoch": 2.468522323930506, | |
| "grad_norm": 0.2884092438790019, | |
| "learning_rate": 3.824506090038185e-06, | |
| "loss": 0.091070556640625, | |
| "memory(GiB)": 10.57, | |
| "step": 3305, | |
| "train_speed(iter/s)": 0.475667 | |
| }, | |
| { | |
| "epoch": 2.472258546609378, | |
| "grad_norm": 0.3977319977360202, | |
| "learning_rate": 3.818864835073931e-06, | |
| "loss": 0.0851806640625, | |
| "memory(GiB)": 10.57, | |
| "step": 3310, | |
| "train_speed(iter/s)": 0.475693 | |
| }, | |
| { | |
| "epoch": 2.4759947692882496, | |
| "grad_norm": 0.3494999636811868, | |
| "learning_rate": 3.813220449793667e-06, | |
| "loss": 0.064434814453125, | |
| "memory(GiB)": 10.57, | |
| "step": 3315, | |
| "train_speed(iter/s)": 0.475688 | |
| }, | |
| { | |
| "epoch": 2.4797309919671213, | |
| "grad_norm": 0.17667298355698585, | |
| "learning_rate": 3.8075729557744706e-06, | |
| "loss": 0.06602783203125, | |
| "memory(GiB)": 10.57, | |
| "step": 3320, | |
| "train_speed(iter/s)": 0.475718 | |
| }, | |
| { | |
| "epoch": 2.483467214645993, | |
| "grad_norm": 0.2847260138841454, | |
| "learning_rate": 3.8019223746053037e-06, | |
| "loss": 0.0813232421875, | |
| "memory(GiB)": 10.57, | |
| "step": 3325, | |
| "train_speed(iter/s)": 0.47572 | |
| }, | |
| { | |
| "epoch": 2.4872034373248644, | |
| "grad_norm": 0.3276391701017016, | |
| "learning_rate": 3.7962687278869266e-06, | |
| "loss": 0.084173583984375, | |
| "memory(GiB)": 10.57, | |
| "step": 3330, | |
| "train_speed(iter/s)": 0.47573 | |
| }, | |
| { | |
| "epoch": 2.490939660003736, | |
| "grad_norm": 0.20750116064295474, | |
| "learning_rate": 3.7906120372318237e-06, | |
| "loss": 0.055908203125, | |
| "memory(GiB)": 10.57, | |
| "step": 3335, | |
| "train_speed(iter/s)": 0.475771 | |
| }, | |
| { | |
| "epoch": 2.494675882682608, | |
| "grad_norm": 0.21852160072540378, | |
| "learning_rate": 3.784952324264109e-06, | |
| "loss": 0.075030517578125, | |
| "memory(GiB)": 10.57, | |
| "step": 3340, | |
| "train_speed(iter/s)": 0.475804 | |
| }, | |
| { | |
| "epoch": 2.4984121053614796, | |
| "grad_norm": 0.24279228051631654, | |
| "learning_rate": 3.779289610619455e-06, | |
| "loss": 0.07666015625, | |
| "memory(GiB)": 10.57, | |
| "step": 3345, | |
| "train_speed(iter/s)": 0.475805 | |
| }, | |
| { | |
| "epoch": 2.5021483280403514, | |
| "grad_norm": 0.2904472098375547, | |
| "learning_rate": 3.773623917945004e-06, | |
| "loss": 0.092840576171875, | |
| "memory(GiB)": 10.57, | |
| "step": 3350, | |
| "train_speed(iter/s)": 0.475809 | |
| }, | |
| { | |
| "epoch": 2.505884550719223, | |
| "grad_norm": 0.3311881989863495, | |
| "learning_rate": 3.7679552678992854e-06, | |
| "loss": 0.07431640625, | |
| "memory(GiB)": 10.57, | |
| "step": 3355, | |
| "train_speed(iter/s)": 0.475802 | |
| }, | |
| { | |
| "epoch": 2.5096207733980944, | |
| "grad_norm": 0.347020365516737, | |
| "learning_rate": 3.7622836821521346e-06, | |
| "loss": 0.083404541015625, | |
| "memory(GiB)": 10.57, | |
| "step": 3360, | |
| "train_speed(iter/s)": 0.475755 | |
| }, | |
| { | |
| "epoch": 2.513356996076966, | |
| "grad_norm": 0.30218078744076704, | |
| "learning_rate": 3.7566091823846082e-06, | |
| "loss": 0.080633544921875, | |
| "memory(GiB)": 10.57, | |
| "step": 3365, | |
| "train_speed(iter/s)": 0.475751 | |
| }, | |
| { | |
| "epoch": 2.517093218755838, | |
| "grad_norm": 0.19250830743626035, | |
| "learning_rate": 3.750931790288904e-06, | |
| "loss": 0.070989990234375, | |
| "memory(GiB)": 10.57, | |
| "step": 3370, | |
| "train_speed(iter/s)": 0.475766 | |
| }, | |
| { | |
| "epoch": 2.5208294414347097, | |
| "grad_norm": 0.3140116665074889, | |
| "learning_rate": 3.745251527568276e-06, | |
| "loss": 0.08988037109375, | |
| "memory(GiB)": 10.57, | |
| "step": 3375, | |
| "train_speed(iter/s)": 0.475765 | |
| }, | |
| { | |
| "epoch": 2.524565664113581, | |
| "grad_norm": 0.27965921080609724, | |
| "learning_rate": 3.7395684159369515e-06, | |
| "loss": 0.0727783203125, | |
| "memory(GiB)": 10.57, | |
| "step": 3380, | |
| "train_speed(iter/s)": 0.475783 | |
| }, | |
| { | |
| "epoch": 2.5283018867924527, | |
| "grad_norm": 0.2825039712001602, | |
| "learning_rate": 3.733882477120049e-06, | |
| "loss": 0.07235107421875, | |
| "memory(GiB)": 10.57, | |
| "step": 3385, | |
| "train_speed(iter/s)": 0.475777 | |
| }, | |
| { | |
| "epoch": 2.5320381094713245, | |
| "grad_norm": 0.2817704189737431, | |
| "learning_rate": 3.7281937328534927e-06, | |
| "loss": 0.07215576171875, | |
| "memory(GiB)": 10.57, | |
| "step": 3390, | |
| "train_speed(iter/s)": 0.475785 | |
| }, | |
| { | |
| "epoch": 2.535774332150196, | |
| "grad_norm": 0.2984895644961484, | |
| "learning_rate": 3.7225022048839364e-06, | |
| "loss": 0.07979736328125, | |
| "memory(GiB)": 10.57, | |
| "step": 3395, | |
| "train_speed(iter/s)": 0.475804 | |
| }, | |
| { | |
| "epoch": 2.539510554829068, | |
| "grad_norm": 0.4297688864469516, | |
| "learning_rate": 3.716807914968669e-06, | |
| "loss": 0.0768310546875, | |
| "memory(GiB)": 10.57, | |
| "step": 3400, | |
| "train_speed(iter/s)": 0.475802 | |
| }, | |
| { | |
| "epoch": 2.5432467775079397, | |
| "grad_norm": 0.2540092842763994, | |
| "learning_rate": 3.7111108848755407e-06, | |
| "loss": 0.080731201171875, | |
| "memory(GiB)": 10.57, | |
| "step": 3405, | |
| "train_speed(iter/s)": 0.475804 | |
| }, | |
| { | |
| "epoch": 2.546983000186811, | |
| "grad_norm": 0.218855865695132, | |
| "learning_rate": 3.705411136382877e-06, | |
| "loss": 0.07509765625, | |
| "memory(GiB)": 10.57, | |
| "step": 3410, | |
| "train_speed(iter/s)": 0.475824 | |
| }, | |
| { | |
| "epoch": 2.5507192228656828, | |
| "grad_norm": 0.31386617014735185, | |
| "learning_rate": 3.6997086912793953e-06, | |
| "loss": 0.08365478515625, | |
| "memory(GiB)": 10.57, | |
| "step": 3415, | |
| "train_speed(iter/s)": 0.475796 | |
| }, | |
| { | |
| "epoch": 2.5544554455445545, | |
| "grad_norm": 0.2888393651203557, | |
| "learning_rate": 3.69400357136412e-06, | |
| "loss": 0.08245849609375, | |
| "memory(GiB)": 10.57, | |
| "step": 3420, | |
| "train_speed(iter/s)": 0.475804 | |
| }, | |
| { | |
| "epoch": 2.5581916682234263, | |
| "grad_norm": 0.518767980813791, | |
| "learning_rate": 3.6882957984463014e-06, | |
| "loss": 0.084869384765625, | |
| "memory(GiB)": 10.57, | |
| "step": 3425, | |
| "train_speed(iter/s)": 0.475798 | |
| }, | |
| { | |
| "epoch": 2.5619278909022976, | |
| "grad_norm": 0.24055934018386763, | |
| "learning_rate": 3.6825853943453326e-06, | |
| "loss": 0.07509765625, | |
| "memory(GiB)": 10.57, | |
| "step": 3430, | |
| "train_speed(iter/s)": 0.475815 | |
| }, | |
| { | |
| "epoch": 2.5656641135811693, | |
| "grad_norm": 0.11607703015154515, | |
| "learning_rate": 3.6768723808906624e-06, | |
| "loss": 0.0733642578125, | |
| "memory(GiB)": 10.57, | |
| "step": 3435, | |
| "train_speed(iter/s)": 0.475839 | |
| }, | |
| { | |
| "epoch": 2.569400336260041, | |
| "grad_norm": 0.2621128311109813, | |
| "learning_rate": 3.6711567799217177e-06, | |
| "loss": 0.07127685546875, | |
| "memory(GiB)": 10.57, | |
| "step": 3440, | |
| "train_speed(iter/s)": 0.475869 | |
| }, | |
| { | |
| "epoch": 2.573136558938913, | |
| "grad_norm": 0.4650255643831401, | |
| "learning_rate": 3.6654386132878153e-06, | |
| "loss": 0.07940673828125, | |
| "memory(GiB)": 10.57, | |
| "step": 3445, | |
| "train_speed(iter/s)": 0.475873 | |
| }, | |
| { | |
| "epoch": 2.5768727816177845, | |
| "grad_norm": 0.3724024885268326, | |
| "learning_rate": 3.659717902848079e-06, | |
| "loss": 0.07889404296875, | |
| "memory(GiB)": 10.57, | |
| "step": 3450, | |
| "train_speed(iter/s)": 0.475871 | |
| }, | |
| { | |
| "epoch": 2.5806090042966563, | |
| "grad_norm": 0.23714008480261214, | |
| "learning_rate": 3.653994670471358e-06, | |
| "loss": 0.062042236328125, | |
| "memory(GiB)": 10.57, | |
| "step": 3455, | |
| "train_speed(iter/s)": 0.475898 | |
| }, | |
| { | |
| "epoch": 2.5843452269755276, | |
| "grad_norm": 0.38138493209988716, | |
| "learning_rate": 3.6482689380361434e-06, | |
| "loss": 0.078564453125, | |
| "memory(GiB)": 10.57, | |
| "step": 3460, | |
| "train_speed(iter/s)": 0.47589 | |
| }, | |
| { | |
| "epoch": 2.5880814496543993, | |
| "grad_norm": 0.2790205903786827, | |
| "learning_rate": 3.6425407274304794e-06, | |
| "loss": 0.07850341796875, | |
| "memory(GiB)": 10.57, | |
| "step": 3465, | |
| "train_speed(iter/s)": 0.475897 | |
| }, | |
| { | |
| "epoch": 2.591817672333271, | |
| "grad_norm": 0.28268894066227623, | |
| "learning_rate": 3.6368100605518895e-06, | |
| "loss": 0.080084228515625, | |
| "memory(GiB)": 10.57, | |
| "step": 3470, | |
| "train_speed(iter/s)": 0.4759 | |
| }, | |
| { | |
| "epoch": 2.595553895012143, | |
| "grad_norm": 0.40313615278345716, | |
| "learning_rate": 3.631076959307282e-06, | |
| "loss": 0.085107421875, | |
| "memory(GiB)": 10.57, | |
| "step": 3475, | |
| "train_speed(iter/s)": 0.475908 | |
| }, | |
| { | |
| "epoch": 2.599290117691014, | |
| "grad_norm": 0.2734351199877751, | |
| "learning_rate": 3.625341445612872e-06, | |
| "loss": 0.084490966796875, | |
| "memory(GiB)": 10.57, | |
| "step": 3480, | |
| "train_speed(iter/s)": 0.475939 | |
| }, | |
| { | |
| "epoch": 2.603026340369886, | |
| "grad_norm": 0.24165164144941384, | |
| "learning_rate": 3.6196035413941004e-06, | |
| "loss": 0.075732421875, | |
| "memory(GiB)": 10.57, | |
| "step": 3485, | |
| "train_speed(iter/s)": 0.475926 | |
| }, | |
| { | |
| "epoch": 2.6067625630487576, | |
| "grad_norm": 0.22587276792049774, | |
| "learning_rate": 3.6138632685855416e-06, | |
| "loss": 0.06920166015625, | |
| "memory(GiB)": 10.57, | |
| "step": 3490, | |
| "train_speed(iter/s)": 0.47595 | |
| }, | |
| { | |
| "epoch": 2.6104987857276294, | |
| "grad_norm": 0.26274757578605296, | |
| "learning_rate": 3.608120649130827e-06, | |
| "loss": 0.06964111328125, | |
| "memory(GiB)": 10.57, | |
| "step": 3495, | |
| "train_speed(iter/s)": 0.475958 | |
| }, | |
| { | |
| "epoch": 2.614235008406501, | |
| "grad_norm": 0.2791749381588521, | |
| "learning_rate": 3.602375704982559e-06, | |
| "loss": 0.082159423828125, | |
| "memory(GiB)": 10.57, | |
| "step": 3500, | |
| "train_speed(iter/s)": 0.475942 | |
| }, | |
| { | |
| "epoch": 2.617971231085373, | |
| "grad_norm": 0.19097386934636804, | |
| "learning_rate": 3.5966284581022256e-06, | |
| "loss": 0.071124267578125, | |
| "memory(GiB)": 10.57, | |
| "step": 3505, | |
| "train_speed(iter/s)": 0.475946 | |
| }, | |
| { | |
| "epoch": 2.621707453764244, | |
| "grad_norm": 0.30489359623246215, | |
| "learning_rate": 3.5908789304601187e-06, | |
| "loss": 0.0773193359375, | |
| "memory(GiB)": 10.57, | |
| "step": 3510, | |
| "train_speed(iter/s)": 0.475924 | |
| }, | |
| { | |
| "epoch": 2.625443676443116, | |
| "grad_norm": 0.3251670210353117, | |
| "learning_rate": 3.585127144035247e-06, | |
| "loss": 0.0652557373046875, | |
| "memory(GiB)": 10.57, | |
| "step": 3515, | |
| "train_speed(iter/s)": 0.475915 | |
| }, | |
| { | |
| "epoch": 2.6291798991219877, | |
| "grad_norm": 0.47973710424124294, | |
| "learning_rate": 3.579373120815257e-06, | |
| "loss": 0.0652099609375, | |
| "memory(GiB)": 10.57, | |
| "step": 3520, | |
| "train_speed(iter/s)": 0.47582 | |
| }, | |
| { | |
| "epoch": 2.6329161218008594, | |
| "grad_norm": 0.251813320258894, | |
| "learning_rate": 3.5736168827963423e-06, | |
| "loss": 0.0735595703125, | |
| "memory(GiB)": 10.57, | |
| "step": 3525, | |
| "train_speed(iter/s)": 0.475822 | |
| }, | |
| { | |
| "epoch": 2.6366523444797307, | |
| "grad_norm": 0.16642948523661447, | |
| "learning_rate": 3.567858451983167e-06, | |
| "loss": 0.0711456298828125, | |
| "memory(GiB)": 10.57, | |
| "step": 3530, | |
| "train_speed(iter/s)": 0.475776 | |
| }, | |
| { | |
| "epoch": 2.6403885671586025, | |
| "grad_norm": 0.2232206082433094, | |
| "learning_rate": 3.562097850388775e-06, | |
| "loss": 0.08082275390625, | |
| "memory(GiB)": 10.57, | |
| "step": 3535, | |
| "train_speed(iter/s)": 0.475792 | |
| }, | |
| { | |
| "epoch": 2.6441247898374742, | |
| "grad_norm": 0.29955499401855273, | |
| "learning_rate": 3.5563351000345077e-06, | |
| "loss": 0.06729736328125, | |
| "memory(GiB)": 10.57, | |
| "step": 3540, | |
| "train_speed(iter/s)": 0.475806 | |
| }, | |
| { | |
| "epoch": 2.647861012516346, | |
| "grad_norm": 0.3399121760483779, | |
| "learning_rate": 3.5505702229499243e-06, | |
| "loss": 0.0638671875, | |
| "memory(GiB)": 10.57, | |
| "step": 3545, | |
| "train_speed(iter/s)": 0.475786 | |
| }, | |
| { | |
| "epoch": 2.6515972351952177, | |
| "grad_norm": 0.24813478944145864, | |
| "learning_rate": 3.5448032411727123e-06, | |
| "loss": 0.073760986328125, | |
| "memory(GiB)": 10.57, | |
| "step": 3550, | |
| "train_speed(iter/s)": 0.475775 | |
| }, | |
| { | |
| "epoch": 2.6553334578740895, | |
| "grad_norm": 0.20754012538401892, | |
| "learning_rate": 3.539034176748602e-06, | |
| "loss": 0.069378662109375, | |
| "memory(GiB)": 10.57, | |
| "step": 3555, | |
| "train_speed(iter/s)": 0.475759 | |
| }, | |
| { | |
| "epoch": 2.6590696805529612, | |
| "grad_norm": 0.3300071479044449, | |
| "learning_rate": 3.53326305173129e-06, | |
| "loss": 0.0831787109375, | |
| "memory(GiB)": 10.57, | |
| "step": 3560, | |
| "train_speed(iter/s)": 0.475747 | |
| }, | |
| { | |
| "epoch": 2.6628059032318325, | |
| "grad_norm": 0.2418845408277716, | |
| "learning_rate": 3.5274898881823466e-06, | |
| "loss": 0.0650390625, | |
| "memory(GiB)": 10.57, | |
| "step": 3565, | |
| "train_speed(iter/s)": 0.475754 | |
| }, | |
| { | |
| "epoch": 2.6665421259107043, | |
| "grad_norm": 0.191875325205025, | |
| "learning_rate": 3.5217147081711363e-06, | |
| "loss": 0.07650146484375, | |
| "memory(GiB)": 10.57, | |
| "step": 3570, | |
| "train_speed(iter/s)": 0.475774 | |
| }, | |
| { | |
| "epoch": 2.670278348589576, | |
| "grad_norm": 0.2918403056701858, | |
| "learning_rate": 3.515937533774732e-06, | |
| "loss": 0.0787841796875, | |
| "memory(GiB)": 10.57, | |
| "step": 3575, | |
| "train_speed(iter/s)": 0.475801 | |
| }, | |
| { | |
| "epoch": 2.6740145712684473, | |
| "grad_norm": 0.2103497141365804, | |
| "learning_rate": 3.51015838707783e-06, | |
| "loss": 0.083331298828125, | |
| "memory(GiB)": 10.57, | |
| "step": 3580, | |
| "train_speed(iter/s)": 0.475836 | |
| }, | |
| { | |
| "epoch": 2.677750793947319, | |
| "grad_norm": 0.15535646417219773, | |
| "learning_rate": 3.504377290172666e-06, | |
| "loss": 0.0805419921875, | |
| "memory(GiB)": 10.57, | |
| "step": 3585, | |
| "train_speed(iter/s)": 0.475811 | |
| }, | |
| { | |
| "epoch": 2.681487016626191, | |
| "grad_norm": 0.2156487636541889, | |
| "learning_rate": 3.498594265158933e-06, | |
| "loss": 0.0731689453125, | |
| "memory(GiB)": 10.57, | |
| "step": 3590, | |
| "train_speed(iter/s)": 0.47582 | |
| }, | |
| { | |
| "epoch": 2.6852232393050626, | |
| "grad_norm": 0.31756593216849865, | |
| "learning_rate": 3.4928093341436915e-06, | |
| "loss": 0.08016357421875, | |
| "memory(GiB)": 10.57, | |
| "step": 3595, | |
| "train_speed(iter/s)": 0.475826 | |
| }, | |
| { | |
| "epoch": 2.6889594619839343, | |
| "grad_norm": 0.17993011176812954, | |
| "learning_rate": 3.4870225192412908e-06, | |
| "loss": 0.068292236328125, | |
| "memory(GiB)": 10.57, | |
| "step": 3600, | |
| "train_speed(iter/s)": 0.475817 | |
| }, | |
| { | |
| "epoch": 2.692695684662806, | |
| "grad_norm": 0.2563812995989066, | |
| "learning_rate": 3.4812338425732808e-06, | |
| "loss": 0.09036865234375, | |
| "memory(GiB)": 10.57, | |
| "step": 3605, | |
| "train_speed(iter/s)": 0.475841 | |
| }, | |
| { | |
| "epoch": 2.696431907341678, | |
| "grad_norm": 0.21729858304510458, | |
| "learning_rate": 3.4754433262683286e-06, | |
| "loss": 0.070880126953125, | |
| "memory(GiB)": 10.57, | |
| "step": 3610, | |
| "train_speed(iter/s)": 0.475864 | |
| }, | |
| { | |
| "epoch": 2.700168130020549, | |
| "grad_norm": 0.4448881083896266, | |
| "learning_rate": 3.4696509924621324e-06, | |
| "loss": 0.090478515625, | |
| "memory(GiB)": 10.57, | |
| "step": 3615, | |
| "train_speed(iter/s)": 0.475831 | |
| }, | |
| { | |
| "epoch": 2.703904352699421, | |
| "grad_norm": 0.29692075196588846, | |
| "learning_rate": 3.463856863297341e-06, | |
| "loss": 0.078076171875, | |
| "memory(GiB)": 10.57, | |
| "step": 3620, | |
| "train_speed(iter/s)": 0.475848 | |
| }, | |
| { | |
| "epoch": 2.7076405753782926, | |
| "grad_norm": 0.31954279997414836, | |
| "learning_rate": 3.4580609609234648e-06, | |
| "loss": 0.07919921875, | |
| "memory(GiB)": 10.57, | |
| "step": 3625, | |
| "train_speed(iter/s)": 0.475834 | |
| }, | |
| { | |
| "epoch": 2.7113767980571644, | |
| "grad_norm": 0.1723702450513143, | |
| "learning_rate": 3.4522633074967915e-06, | |
| "loss": 0.074517822265625, | |
| "memory(GiB)": 10.57, | |
| "step": 3630, | |
| "train_speed(iter/s)": 0.475811 | |
| }, | |
| { | |
| "epoch": 2.7151130207360357, | |
| "grad_norm": 0.22262320422842827, | |
| "learning_rate": 3.4464639251803052e-06, | |
| "loss": 0.070367431640625, | |
| "memory(GiB)": 10.57, | |
| "step": 3635, | |
| "train_speed(iter/s)": 0.475826 | |
| }, | |
| { | |
| "epoch": 2.7188492434149074, | |
| "grad_norm": 0.28450955603049155, | |
| "learning_rate": 3.4406628361435986e-06, | |
| "loss": 0.08800048828125, | |
| "memory(GiB)": 10.57, | |
| "step": 3640, | |
| "train_speed(iter/s)": 0.475849 | |
| }, | |
| { | |
| "epoch": 2.722585466093779, | |
| "grad_norm": 0.3537764688990701, | |
| "learning_rate": 3.4348600625627853e-06, | |
| "loss": 0.08115081787109375, | |
| "memory(GiB)": 10.57, | |
| "step": 3645, | |
| "train_speed(iter/s)": 0.475856 | |
| }, | |
| { | |
| "epoch": 2.726321688772651, | |
| "grad_norm": 0.2717562915869466, | |
| "learning_rate": 3.4290556266204255e-06, | |
| "loss": 0.06995849609375, | |
| "memory(GiB)": 10.57, | |
| "step": 3650, | |
| "train_speed(iter/s)": 0.475855 | |
| }, | |
| { | |
| "epoch": 2.7300579114515227, | |
| "grad_norm": 0.22750796325018738, | |
| "learning_rate": 3.4232495505054263e-06, | |
| "loss": 0.071771240234375, | |
| "memory(GiB)": 10.57, | |
| "step": 3655, | |
| "train_speed(iter/s)": 0.475875 | |
| }, | |
| { | |
| "epoch": 2.7337941341303944, | |
| "grad_norm": 0.15412260555395027, | |
| "learning_rate": 3.4174418564129683e-06, | |
| "loss": 0.07366943359375, | |
| "memory(GiB)": 10.57, | |
| "step": 3660, | |
| "train_speed(iter/s)": 0.475851 | |
| }, | |
| { | |
| "epoch": 2.7375303568092657, | |
| "grad_norm": 0.22006647714355373, | |
| "learning_rate": 3.4116325665444205e-06, | |
| "loss": 0.07138671875, | |
| "memory(GiB)": 10.57, | |
| "step": 3665, | |
| "train_speed(iter/s)": 0.475871 | |
| }, | |
| { | |
| "epoch": 2.7412665794881375, | |
| "grad_norm": 0.42373302378912014, | |
| "learning_rate": 3.405821703107247e-06, | |
| "loss": 0.081640625, | |
| "memory(GiB)": 10.57, | |
| "step": 3670, | |
| "train_speed(iter/s)": 0.475866 | |
| }, | |
| { | |
| "epoch": 2.745002802167009, | |
| "grad_norm": 0.25034251347665165, | |
| "learning_rate": 3.4000092883149293e-06, | |
| "loss": 0.07459716796875, | |
| "memory(GiB)": 10.57, | |
| "step": 3675, | |
| "train_speed(iter/s)": 0.475862 | |
| }, | |
| { | |
| "epoch": 2.748739024845881, | |
| "grad_norm": 0.26815460719783096, | |
| "learning_rate": 3.3941953443868794e-06, | |
| "loss": 0.0758056640625, | |
| "memory(GiB)": 10.57, | |
| "step": 3680, | |
| "train_speed(iter/s)": 0.475869 | |
| }, | |
| { | |
| "epoch": 2.7524752475247523, | |
| "grad_norm": 0.3488626865913072, | |
| "learning_rate": 3.388379893548356e-06, | |
| "loss": 0.076416015625, | |
| "memory(GiB)": 10.57, | |
| "step": 3685, | |
| "train_speed(iter/s)": 0.475889 | |
| }, | |
| { | |
| "epoch": 2.756211470203624, | |
| "grad_norm": 0.2927879301365204, | |
| "learning_rate": 3.382562958030375e-06, | |
| "loss": 0.072265625, | |
| "memory(GiB)": 10.57, | |
| "step": 3690, | |
| "train_speed(iter/s)": 0.475894 | |
| }, | |
| { | |
| "epoch": 2.7599476928824958, | |
| "grad_norm": 0.39819039701808595, | |
| "learning_rate": 3.376744560069631e-06, | |
| "loss": 0.0801025390625, | |
| "memory(GiB)": 10.57, | |
| "step": 3695, | |
| "train_speed(iter/s)": 0.475889 | |
| }, | |
| { | |
| "epoch": 2.7636839155613675, | |
| "grad_norm": 0.27836721809953646, | |
| "learning_rate": 3.370924721908408e-06, | |
| "loss": 0.081817626953125, | |
| "memory(GiB)": 10.57, | |
| "step": 3700, | |
| "train_speed(iter/s)": 0.475851 | |
| }, | |
| { | |
| "epoch": 2.7674201382402392, | |
| "grad_norm": 0.3159510466408062, | |
| "learning_rate": 3.3651034657944944e-06, | |
| "loss": 0.09007568359375, | |
| "memory(GiB)": 10.57, | |
| "step": 3705, | |
| "train_speed(iter/s)": 0.475839 | |
| }, | |
| { | |
| "epoch": 2.771156360919111, | |
| "grad_norm": 0.2482343530491869, | |
| "learning_rate": 3.3592808139811034e-06, | |
| "loss": 0.08701171875, | |
| "memory(GiB)": 10.57, | |
| "step": 3710, | |
| "train_speed(iter/s)": 0.475854 | |
| }, | |
| { | |
| "epoch": 2.7748925835979823, | |
| "grad_norm": 0.2212717362508163, | |
| "learning_rate": 3.353456788726778e-06, | |
| "loss": 0.089019775390625, | |
| "memory(GiB)": 10.57, | |
| "step": 3715, | |
| "train_speed(iter/s)": 0.475852 | |
| }, | |
| { | |
| "epoch": 2.778628806276854, | |
| "grad_norm": 0.3180240539309867, | |
| "learning_rate": 3.347631412295314e-06, | |
| "loss": 0.078448486328125, | |
| "memory(GiB)": 10.57, | |
| "step": 3720, | |
| "train_speed(iter/s)": 0.475768 | |
| }, | |
| { | |
| "epoch": 2.782365028955726, | |
| "grad_norm": 0.19694686614220888, | |
| "learning_rate": 3.341804706955673e-06, | |
| "loss": 0.071771240234375, | |
| "memory(GiB)": 10.57, | |
| "step": 3725, | |
| "train_speed(iter/s)": 0.475772 | |
| }, | |
| { | |
| "epoch": 2.7861012516345975, | |
| "grad_norm": 0.27207148460273645, | |
| "learning_rate": 3.335976694981898e-06, | |
| "loss": 0.071990966796875, | |
| "memory(GiB)": 10.57, | |
| "step": 3730, | |
| "train_speed(iter/s)": 0.475786 | |
| }, | |
| { | |
| "epoch": 2.789837474313469, | |
| "grad_norm": 0.2784440972147361, | |
| "learning_rate": 3.3301473986530204e-06, | |
| "loss": 0.08033447265625, | |
| "memory(GiB)": 10.57, | |
| "step": 3735, | |
| "train_speed(iter/s)": 0.475803 | |
| }, | |
| { | |
| "epoch": 2.7935736969923406, | |
| "grad_norm": 0.384630172157372, | |
| "learning_rate": 3.3243168402529903e-06, | |
| "loss": 0.07603759765625, | |
| "memory(GiB)": 10.57, | |
| "step": 3740, | |
| "train_speed(iter/s)": 0.475835 | |
| }, | |
| { | |
| "epoch": 2.7973099196712123, | |
| "grad_norm": 0.3015764425828606, | |
| "learning_rate": 3.318485042070576e-06, | |
| "loss": 0.070220947265625, | |
| "memory(GiB)": 10.57, | |
| "step": 3745, | |
| "train_speed(iter/s)": 0.475867 | |
| }, | |
| { | |
| "epoch": 2.801046142350084, | |
| "grad_norm": 0.33638080331152426, | |
| "learning_rate": 3.3126520263992883e-06, | |
| "loss": 0.078277587890625, | |
| "memory(GiB)": 10.57, | |
| "step": 3750, | |
| "train_speed(iter/s)": 0.475859 | |
| }, | |
| { | |
| "epoch": 2.804782365028956, | |
| "grad_norm": 0.2624352148398618, | |
| "learning_rate": 3.306817815537291e-06, | |
| "loss": 0.0696044921875, | |
| "memory(GiB)": 10.57, | |
| "step": 3755, | |
| "train_speed(iter/s)": 0.47588 | |
| }, | |
| { | |
| "epoch": 2.8085185877078276, | |
| "grad_norm": 0.27781369223511193, | |
| "learning_rate": 3.3009824317873164e-06, | |
| "loss": 0.058050537109375, | |
| "memory(GiB)": 10.57, | |
| "step": 3760, | |
| "train_speed(iter/s)": 0.475896 | |
| }, | |
| { | |
| "epoch": 2.812254810386699, | |
| "grad_norm": 0.1340015202269091, | |
| "learning_rate": 3.2951458974565808e-06, | |
| "loss": 0.08018798828125, | |
| "memory(GiB)": 10.57, | |
| "step": 3765, | |
| "train_speed(iter/s)": 0.475889 | |
| }, | |
| { | |
| "epoch": 2.8159910330655706, | |
| "grad_norm": 0.20980509524344693, | |
| "learning_rate": 3.2893082348567e-06, | |
| "loss": 0.069110107421875, | |
| "memory(GiB)": 10.57, | |
| "step": 3770, | |
| "train_speed(iter/s)": 0.475909 | |
| }, | |
| { | |
| "epoch": 2.8197272557444424, | |
| "grad_norm": 0.2501876137757298, | |
| "learning_rate": 3.2834694663036016e-06, | |
| "loss": 0.07905120849609375, | |
| "memory(GiB)": 10.57, | |
| "step": 3775, | |
| "train_speed(iter/s)": 0.475912 | |
| }, | |
| { | |
| "epoch": 2.823463478423314, | |
| "grad_norm": 0.23719398237463618, | |
| "learning_rate": 3.2776296141174405e-06, | |
| "loss": 0.07977294921875, | |
| "memory(GiB)": 10.57, | |
| "step": 3780, | |
| "train_speed(iter/s)": 0.475894 | |
| }, | |
| { | |
| "epoch": 2.8271997011021854, | |
| "grad_norm": 0.2019112294748815, | |
| "learning_rate": 3.271788700622517e-06, | |
| "loss": 0.067169189453125, | |
| "memory(GiB)": 10.57, | |
| "step": 3785, | |
| "train_speed(iter/s)": 0.475868 | |
| }, | |
| { | |
| "epoch": 2.830935923781057, | |
| "grad_norm": 0.2569708345412305, | |
| "learning_rate": 3.265946748147185e-06, | |
| "loss": 0.08135986328125, | |
| "memory(GiB)": 10.57, | |
| "step": 3790, | |
| "train_speed(iter/s)": 0.475837 | |
| }, | |
| { | |
| "epoch": 2.834672146459929, | |
| "grad_norm": 0.3039925539901921, | |
| "learning_rate": 3.2601037790237713e-06, | |
| "loss": 0.0752685546875, | |
| "memory(GiB)": 10.57, | |
| "step": 3795, | |
| "train_speed(iter/s)": 0.475822 | |
| }, | |
| { | |
| "epoch": 2.8384083691388007, | |
| "grad_norm": 0.31263171782477395, | |
| "learning_rate": 3.2542598155884905e-06, | |
| "loss": 0.079345703125, | |
| "memory(GiB)": 10.57, | |
| "step": 3800, | |
| "train_speed(iter/s)": 0.475843 | |
| }, | |
| { | |
| "epoch": 2.8421445918176724, | |
| "grad_norm": 0.3194717938532269, | |
| "learning_rate": 3.2484148801813564e-06, | |
| "loss": 0.0697540283203125, | |
| "memory(GiB)": 10.57, | |
| "step": 3805, | |
| "train_speed(iter/s)": 0.475787 | |
| }, | |
| { | |
| "epoch": 2.845880814496544, | |
| "grad_norm": 0.2414957673117366, | |
| "learning_rate": 3.242568995146099e-06, | |
| "loss": 0.079833984375, | |
| "memory(GiB)": 10.57, | |
| "step": 3810, | |
| "train_speed(iter/s)": 0.475812 | |
| }, | |
| { | |
| "epoch": 2.849617037175416, | |
| "grad_norm": 0.29766797126278466, | |
| "learning_rate": 3.2367221828300797e-06, | |
| "loss": 0.07156982421875, | |
| "memory(GiB)": 10.57, | |
| "step": 3815, | |
| "train_speed(iter/s)": 0.475822 | |
| }, | |
| { | |
| "epoch": 2.8533532598542872, | |
| "grad_norm": 0.33562960678102366, | |
| "learning_rate": 3.2308744655842023e-06, | |
| "loss": 0.07691650390625, | |
| "memory(GiB)": 10.57, | |
| "step": 3820, | |
| "train_speed(iter/s)": 0.475844 | |
| }, | |
| { | |
| "epoch": 2.857089482533159, | |
| "grad_norm": 0.23249083043588517, | |
| "learning_rate": 3.2250258657628317e-06, | |
| "loss": 0.0674591064453125, | |
| "memory(GiB)": 10.57, | |
| "step": 3825, | |
| "train_speed(iter/s)": 0.475864 | |
| }, | |
| { | |
| "epoch": 2.8608257052120307, | |
| "grad_norm": 0.3868842372829782, | |
| "learning_rate": 3.2191764057237057e-06, | |
| "loss": 0.0788818359375, | |
| "memory(GiB)": 10.57, | |
| "step": 3830, | |
| "train_speed(iter/s)": 0.475867 | |
| }, | |
| { | |
| "epoch": 2.864561927890902, | |
| "grad_norm": 0.2721643307415772, | |
| "learning_rate": 3.2133261078278516e-06, | |
| "loss": 0.076806640625, | |
| "memory(GiB)": 10.57, | |
| "step": 3835, | |
| "train_speed(iter/s)": 0.475878 | |
| }, | |
| { | |
| "epoch": 2.8682981505697738, | |
| "grad_norm": 0.2509409981744641, | |
| "learning_rate": 3.207474994439499e-06, | |
| "loss": 0.07947998046875, | |
| "memory(GiB)": 10.57, | |
| "step": 3840, | |
| "train_speed(iter/s)": 0.475893 | |
| }, | |
| { | |
| "epoch": 2.8720343732486455, | |
| "grad_norm": 0.2985391643876752, | |
| "learning_rate": 3.2016230879259938e-06, | |
| "loss": 0.08131103515625, | |
| "memory(GiB)": 10.57, | |
| "step": 3845, | |
| "train_speed(iter/s)": 0.475879 | |
| }, | |
| { | |
| "epoch": 2.8757705959275173, | |
| "grad_norm": 0.34684741561716165, | |
| "learning_rate": 3.195770410657717e-06, | |
| "loss": 0.082269287109375, | |
| "memory(GiB)": 10.57, | |
| "step": 3850, | |
| "train_speed(iter/s)": 0.475847 | |
| }, | |
| { | |
| "epoch": 2.879506818606389, | |
| "grad_norm": 0.23479279469344572, | |
| "learning_rate": 3.189916985007991e-06, | |
| "loss": 0.09420166015625, | |
| "memory(GiB)": 10.57, | |
| "step": 3855, | |
| "train_speed(iter/s)": 0.475813 | |
| }, | |
| { | |
| "epoch": 2.8832430412852608, | |
| "grad_norm": 0.3907742470555341, | |
| "learning_rate": 3.184062833353005e-06, | |
| "loss": 0.07618408203125, | |
| "memory(GiB)": 10.57, | |
| "step": 3860, | |
| "train_speed(iter/s)": 0.475799 | |
| }, | |
| { | |
| "epoch": 2.8869792639641325, | |
| "grad_norm": 0.19372123225177681, | |
| "learning_rate": 3.178207978071719e-06, | |
| "loss": 0.079144287109375, | |
| "memory(GiB)": 10.57, | |
| "step": 3865, | |
| "train_speed(iter/s)": 0.475828 | |
| }, | |
| { | |
| "epoch": 2.890715486643004, | |
| "grad_norm": 0.3425380929045749, | |
| "learning_rate": 3.1723524415457845e-06, | |
| "loss": 0.085382080078125, | |
| "memory(GiB)": 10.57, | |
| "step": 3870, | |
| "train_speed(iter/s)": 0.475816 | |
| }, | |
| { | |
| "epoch": 2.8944517093218756, | |
| "grad_norm": 0.3609396149048238, | |
| "learning_rate": 3.166496246159457e-06, | |
| "loss": 0.070849609375, | |
| "memory(GiB)": 10.57, | |
| "step": 3875, | |
| "train_speed(iter/s)": 0.475828 | |
| }, | |
| { | |
| "epoch": 2.8981879320007473, | |
| "grad_norm": 0.20183491005738083, | |
| "learning_rate": 3.160639414299511e-06, | |
| "loss": 0.074884033203125, | |
| "memory(GiB)": 10.57, | |
| "step": 3880, | |
| "train_speed(iter/s)": 0.475821 | |
| }, | |
| { | |
| "epoch": 2.901924154679619, | |
| "grad_norm": 0.2467148593569697, | |
| "learning_rate": 3.154781968355153e-06, | |
| "loss": 0.063775634765625, | |
| "memory(GiB)": 10.57, | |
| "step": 3885, | |
| "train_speed(iter/s)": 0.475846 | |
| }, | |
| { | |
| "epoch": 2.9056603773584904, | |
| "grad_norm": 0.2637999747733018, | |
| "learning_rate": 3.148923930717939e-06, | |
| "loss": 0.0755615234375, | |
| "memory(GiB)": 10.57, | |
| "step": 3890, | |
| "train_speed(iter/s)": 0.475849 | |
| }, | |
| { | |
| "epoch": 2.909396600037362, | |
| "grad_norm": 0.25527787190645407, | |
| "learning_rate": 3.143065323781685e-06, | |
| "loss": 0.06624755859375, | |
| "memory(GiB)": 10.57, | |
| "step": 3895, | |
| "train_speed(iter/s)": 0.475865 | |
| }, | |
| { | |
| "epoch": 2.913132822716234, | |
| "grad_norm": 0.30417828277097125, | |
| "learning_rate": 3.137206169942384e-06, | |
| "loss": 0.073992919921875, | |
| "memory(GiB)": 10.57, | |
| "step": 3900, | |
| "train_speed(iter/s)": 0.475832 | |
| }, | |
| { | |
| "epoch": 2.9168690453951056, | |
| "grad_norm": 0.2346109435926227, | |
| "learning_rate": 3.131346491598119e-06, | |
| "loss": 0.07637939453125, | |
| "memory(GiB)": 10.57, | |
| "step": 3905, | |
| "train_speed(iter/s)": 0.47584 | |
| }, | |
| { | |
| "epoch": 2.9206052680739774, | |
| "grad_norm": 0.2353613119236764, | |
| "learning_rate": 3.1254863111489804e-06, | |
| "loss": 0.081158447265625, | |
| "memory(GiB)": 10.57, | |
| "step": 3910, | |
| "train_speed(iter/s)": 0.475845 | |
| }, | |
| { | |
| "epoch": 2.924341490752849, | |
| "grad_norm": 0.3558838314274693, | |
| "learning_rate": 3.119625650996974e-06, | |
| "loss": 0.076300048828125, | |
| "memory(GiB)": 10.57, | |
| "step": 3915, | |
| "train_speed(iter/s)": 0.475836 | |
| }, | |
| { | |
| "epoch": 2.9280777134317204, | |
| "grad_norm": 0.27354688251249265, | |
| "learning_rate": 3.1137645335459434e-06, | |
| "loss": 0.073907470703125, | |
| "memory(GiB)": 10.57, | |
| "step": 3920, | |
| "train_speed(iter/s)": 0.475809 | |
| }, | |
| { | |
| "epoch": 2.931813936110592, | |
| "grad_norm": 0.3327608490083812, | |
| "learning_rate": 3.107902981201478e-06, | |
| "loss": 0.07683868408203125, | |
| "memory(GiB)": 10.57, | |
| "step": 3925, | |
| "train_speed(iter/s)": 0.475779 | |
| }, | |
| { | |
| "epoch": 2.935550158789464, | |
| "grad_norm": 0.3747363988689518, | |
| "learning_rate": 3.1020410163708304e-06, | |
| "loss": 0.074114990234375, | |
| "memory(GiB)": 10.57, | |
| "step": 3930, | |
| "train_speed(iter/s)": 0.475764 | |
| }, | |
| { | |
| "epoch": 2.9392863814683357, | |
| "grad_norm": 0.18606776814447884, | |
| "learning_rate": 3.0961786614628308e-06, | |
| "loss": 0.073858642578125, | |
| "memory(GiB)": 10.57, | |
| "step": 3935, | |
| "train_speed(iter/s)": 0.475783 | |
| }, | |
| { | |
| "epoch": 2.943022604147207, | |
| "grad_norm": 0.22753548240298943, | |
| "learning_rate": 3.0903159388877984e-06, | |
| "loss": 0.07952880859375, | |
| "memory(GiB)": 10.57, | |
| "step": 3940, | |
| "train_speed(iter/s)": 0.475798 | |
| }, | |
| { | |
| "epoch": 2.9467588268260787, | |
| "grad_norm": 0.2665097861133451, | |
| "learning_rate": 3.0844528710574603e-06, | |
| "loss": 0.08333740234375, | |
| "memory(GiB)": 10.57, | |
| "step": 3945, | |
| "train_speed(iter/s)": 0.475797 | |
| }, | |
| { | |
| "epoch": 2.9504950495049505, | |
| "grad_norm": 0.17698058114731188, | |
| "learning_rate": 3.0785894803848617e-06, | |
| "loss": 0.069122314453125, | |
| "memory(GiB)": 10.57, | |
| "step": 3950, | |
| "train_speed(iter/s)": 0.475778 | |
| }, | |
| { | |
| "epoch": 2.954231272183822, | |
| "grad_norm": 0.3104099022805613, | |
| "learning_rate": 3.072725789284282e-06, | |
| "loss": 0.062646484375, | |
| "memory(GiB)": 10.57, | |
| "step": 3955, | |
| "train_speed(iter/s)": 0.475745 | |
| }, | |
| { | |
| "epoch": 2.957967494862694, | |
| "grad_norm": 0.20315634133154128, | |
| "learning_rate": 3.0668618201711517e-06, | |
| "loss": 0.08089599609375, | |
| "memory(GiB)": 10.57, | |
| "step": 3960, | |
| "train_speed(iter/s)": 0.475758 | |
| }, | |
| { | |
| "epoch": 2.9617037175415657, | |
| "grad_norm": 0.25055279371623723, | |
| "learning_rate": 3.0609975954619585e-06, | |
| "loss": 0.070599365234375, | |
| "memory(GiB)": 10.57, | |
| "step": 3965, | |
| "train_speed(iter/s)": 0.475775 | |
| }, | |
| { | |
| "epoch": 2.965439940220437, | |
| "grad_norm": 0.27358700735815494, | |
| "learning_rate": 3.0551331375741753e-06, | |
| "loss": 0.079913330078125, | |
| "memory(GiB)": 10.57, | |
| "step": 3970, | |
| "train_speed(iter/s)": 0.475795 | |
| }, | |
| { | |
| "epoch": 2.9691761628993087, | |
| "grad_norm": 0.2701014272775072, | |
| "learning_rate": 3.0492684689261587e-06, | |
| "loss": 0.069427490234375, | |
| "memory(GiB)": 10.57, | |
| "step": 3975, | |
| "train_speed(iter/s)": 0.475767 | |
| }, | |
| { | |
| "epoch": 2.9729123855781805, | |
| "grad_norm": 0.26839228857427083, | |
| "learning_rate": 3.0434036119370734e-06, | |
| "loss": 0.07572021484375, | |
| "memory(GiB)": 10.57, | |
| "step": 3980, | |
| "train_speed(iter/s)": 0.475785 | |
| }, | |
| { | |
| "epoch": 2.9766486082570522, | |
| "grad_norm": 0.22716855276596393, | |
| "learning_rate": 3.037538589026808e-06, | |
| "loss": 0.08402099609375, | |
| "memory(GiB)": 10.57, | |
| "step": 3985, | |
| "train_speed(iter/s)": 0.475805 | |
| }, | |
| { | |
| "epoch": 2.9803848309359235, | |
| "grad_norm": 0.2867732902522501, | |
| "learning_rate": 3.03167342261588e-06, | |
| "loss": 0.06982421875, | |
| "memory(GiB)": 10.57, | |
| "step": 3990, | |
| "train_speed(iter/s)": 0.475802 | |
| }, | |
| { | |
| "epoch": 2.9841210536147953, | |
| "grad_norm": 0.1859176777096869, | |
| "learning_rate": 3.0258081351253565e-06, | |
| "loss": 0.073046875, | |
| "memory(GiB)": 10.57, | |
| "step": 3995, | |
| "train_speed(iter/s)": 0.475829 | |
| }, | |
| { | |
| "epoch": 2.987857276293667, | |
| "grad_norm": 0.28880199434249176, | |
| "learning_rate": 3.019942748976771e-06, | |
| "loss": 0.092022705078125, | |
| "memory(GiB)": 10.57, | |
| "step": 4000, | |
| "train_speed(iter/s)": 0.475846 | |
| }, | |
| { | |
| "epoch": 2.991593498972539, | |
| "grad_norm": 0.33890813145931753, | |
| "learning_rate": 3.0140772865920308e-06, | |
| "loss": 0.076885986328125, | |
| "memory(GiB)": 10.57, | |
| "step": 4005, | |
| "train_speed(iter/s)": 0.475863 | |
| }, | |
| { | |
| "epoch": 2.9953297216514105, | |
| "grad_norm": 0.237266786675584, | |
| "learning_rate": 3.0082117703933345e-06, | |
| "loss": 0.088226318359375, | |
| "memory(GiB)": 10.57, | |
| "step": 4010, | |
| "train_speed(iter/s)": 0.475845 | |
| }, | |
| { | |
| "epoch": 2.9990659443302823, | |
| "grad_norm": 0.2422362583040606, | |
| "learning_rate": 3.002346222803089e-06, | |
| "loss": 0.0780517578125, | |
| "memory(GiB)": 10.57, | |
| "step": 4015, | |
| "train_speed(iter/s)": 0.475845 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 8034, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 1339, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 502166754164736.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |